1402 lines
43 KiB
JSON
1402 lines
43 KiB
JSON
[
|
|
{
|
|
"task_id": 1,
|
|
"episode_id": "SQ4_TN_B_0963",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq4\\SQ4_TN_B_0963.json",
|
|
"sq_type": "SQ4",
|
|
"variant": "TN",
|
|
"layout_id": "B",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "none",
|
|
"gt_scenario_id": "TN",
|
|
"gt_difficulty_label": "TN_baseline",
|
|
"gt_difficulty_score": 0
|
|
},
|
|
{
|
|
"task_id": 2,
|
|
"episode_id": "SQ2_TP_A_0191",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq2\\SQ2_TP_A_0191.json",
|
|
"sq_type": "SQ2",
|
|
"variant": "TP",
|
|
"layout_id": "A",
|
|
"gt_is_anomaly": true,
|
|
"gt_threat_type": "safety_device_failure",
|
|
"gt_category": "fire_gas",
|
|
"gt_scenario_id": "FG-04",
|
|
"gt_difficulty_label": "L3_composite_reasoning",
|
|
"gt_difficulty_score": 13
|
|
},
|
|
{
|
|
"task_id": 3,
|
|
"episode_id": "SQ3_FP_D_0615",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq3\\SQ3_FP_D_0615.json",
|
|
"sq_type": "SQ3",
|
|
"variant": "FP",
|
|
"layout_id": "D",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "behavioral_anomaly",
|
|
"gt_scenario_id": "BA-03-FP",
|
|
"gt_difficulty_label": "L2_reasoning_detection",
|
|
"gt_difficulty_score": 10
|
|
},
|
|
{
|
|
"task_id": 4,
|
|
"episode_id": "SQ5_TN_D_1185",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq5\\SQ5_TN_D_1185.json",
|
|
"sq_type": "SQ5",
|
|
"variant": "TN",
|
|
"layout_id": "D",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "none",
|
|
"gt_scenario_id": "TN",
|
|
"gt_difficulty_label": "TN_baseline",
|
|
"gt_difficulty_score": 0
|
|
},
|
|
{
|
|
"task_id": 5,
|
|
"episode_id": "SQ4_FP_C_0865",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq4\\SQ4_FP_C_0865.json",
|
|
"sq_type": "SQ4",
|
|
"variant": "FP",
|
|
"layout_id": "C",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "behavioral_anomaly",
|
|
"gt_scenario_id": "BA-04-FP",
|
|
"gt_difficulty_label": "L3_composite_reasoning",
|
|
"gt_difficulty_score": 11
|
|
},
|
|
{
|
|
"task_id": 6,
|
|
"episode_id": "SQ4_TN_C_1005",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq4\\SQ4_TN_C_1005.json",
|
|
"sq_type": "SQ4",
|
|
"variant": "TN",
|
|
"layout_id": "C",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "none",
|
|
"gt_scenario_id": "TN",
|
|
"gt_difficulty_label": "TN_baseline",
|
|
"gt_difficulty_score": 0
|
|
},
|
|
{
|
|
"task_id": 7,
|
|
"episode_id": "SQ5_TN_D_1189",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq5\\SQ5_TN_D_1189.json",
|
|
"sq_type": "SQ5",
|
|
"variant": "TN",
|
|
"layout_id": "D",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "none",
|
|
"gt_scenario_id": "TN",
|
|
"gt_difficulty_label": "TN_baseline",
|
|
"gt_difficulty_score": 0
|
|
},
|
|
{
|
|
"task_id": 8,
|
|
"episode_id": "SQ2_TN_D_0379",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq2\\SQ2_TN_D_0379.json",
|
|
"sq_type": "SQ2",
|
|
"variant": "TN",
|
|
"layout_id": "D",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "none",
|
|
"gt_scenario_id": "TN",
|
|
"gt_difficulty_label": "TN_baseline",
|
|
"gt_difficulty_score": 0
|
|
},
|
|
{
|
|
"task_id": 9,
|
|
"episode_id": "SQ1_TP_A_0057",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq1\\SQ1_TP_A_0057.json",
|
|
"sq_type": "SQ1",
|
|
"variant": "TP",
|
|
"layout_id": "A",
|
|
"gt_is_anomaly": true,
|
|
"gt_threat_type": "actuator_stuck",
|
|
"gt_category": "device_fault",
|
|
"gt_scenario_id": "DF-06",
|
|
"gt_difficulty_label": "L2_reasoning_detection",
|
|
"gt_difficulty_score": 9
|
|
},
|
|
{
|
|
"task_id": 10,
|
|
"episode_id": "SQ3_TP_C_0543",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq3\\SQ3_TP_C_0543.json",
|
|
"sq_type": "SQ3",
|
|
"variant": "TP",
|
|
"layout_id": "C",
|
|
"gt_is_anomaly": true,
|
|
"gt_threat_type": "credential_theft",
|
|
"gt_category": "intrusion",
|
|
"gt_scenario_id": "INS-05",
|
|
"gt_difficulty_label": "L3_composite_reasoning",
|
|
"gt_difficulty_score": 13
|
|
},
|
|
{
|
|
"task_id": 11,
|
|
"episode_id": "SQ3_FP_C_0614",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq3\\SQ3_FP_C_0614.json",
|
|
"sq_type": "SQ3",
|
|
"variant": "FP",
|
|
"layout_id": "C",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "child_specific",
|
|
"gt_scenario_id": "CH-02-FP",
|
|
"gt_difficulty_label": "L2_reasoning_detection",
|
|
"gt_difficulty_score": 10
|
|
},
|
|
{
|
|
"task_id": 12,
|
|
"episode_id": "SQ2_TP_B_0269",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq2\\SQ2_TP_B_0269.json",
|
|
"sq_type": "SQ2",
|
|
"variant": "TP",
|
|
"layout_id": "B",
|
|
"gt_is_anomaly": true,
|
|
"gt_threat_type": "safety_device_failure",
|
|
"gt_category": "fire_gas",
|
|
"gt_scenario_id": "FG-04",
|
|
"gt_difficulty_label": "L3_composite_reasoning",
|
|
"gt_difficulty_score": 13
|
|
},
|
|
{
|
|
"task_id": 13,
|
|
"episode_id": "SQ5_TP_D_1044",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq5\\SQ5_TP_D_1044.json",
|
|
"sq_type": "SQ5",
|
|
"variant": "TP",
|
|
"layout_id": "D",
|
|
"gt_is_anomaly": true,
|
|
"gt_threat_type": "credential_theft",
|
|
"gt_category": "intrusion",
|
|
"gt_scenario_id": "INS-05",
|
|
"gt_difficulty_label": "L3_composite_reasoning",
|
|
"gt_difficulty_score": 13
|
|
},
|
|
{
|
|
"task_id": 14,
|
|
"episode_id": "SQ1_FP_A_0083",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq1\\SQ1_FP_A_0083.json",
|
|
"sq_type": "SQ1",
|
|
"variant": "FP",
|
|
"layout_id": "A",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "device_fault",
|
|
"gt_scenario_id": "DF-02-FP",
|
|
"gt_difficulty_label": "L3_composite_reasoning",
|
|
"gt_difficulty_score": 13
|
|
},
|
|
{
|
|
"task_id": 15,
|
|
"episode_id": "SQ3_TP_B_0434",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq3\\SQ3_TP_B_0434.json",
|
|
"sq_type": "SQ3",
|
|
"variant": "TP",
|
|
"layout_id": "B",
|
|
"gt_is_anomaly": true,
|
|
"gt_threat_type": "behavioral_anomaly",
|
|
"gt_category": "behavioral_anomaly",
|
|
"gt_scenario_id": "BA-04",
|
|
"gt_difficulty_label": "L3_composite_reasoning",
|
|
"gt_difficulty_score": 11
|
|
},
|
|
{
|
|
"task_id": 16,
|
|
"episode_id": "SQ1_TN_D_0184",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq1\\SQ1_TN_D_0184.json",
|
|
"sq_type": "SQ1",
|
|
"variant": "TN",
|
|
"layout_id": "D",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "none",
|
|
"gt_scenario_id": "TN",
|
|
"gt_difficulty_label": "TN_baseline",
|
|
"gt_difficulty_score": 0
|
|
},
|
|
{
|
|
"task_id": 17,
|
|
"episode_id": "SQ5_TP_D_1031",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq5\\SQ5_TP_D_1031.json",
|
|
"sq_type": "SQ5",
|
|
"variant": "TP",
|
|
"layout_id": "D",
|
|
"gt_is_anomaly": true,
|
|
"gt_threat_type": "water_leak",
|
|
"gt_category": "water_damage",
|
|
"gt_scenario_id": "WD-03",
|
|
"gt_difficulty_label": "L2_reasoning_detection",
|
|
"gt_difficulty_score": 10
|
|
},
|
|
{
|
|
"task_id": 18,
|
|
"episode_id": "SQ4_TP_C_0775",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq4\\SQ4_TP_C_0775.json",
|
|
"sq_type": "SQ4",
|
|
"variant": "TP",
|
|
"layout_id": "C",
|
|
"gt_is_anomaly": true,
|
|
"gt_threat_type": "child_safety",
|
|
"gt_category": "child_specific",
|
|
"gt_scenario_id": "CH-02",
|
|
"gt_difficulty_label": "L2_reasoning_detection",
|
|
"gt_difficulty_score": 10
|
|
},
|
|
{
|
|
"task_id": 19,
|
|
"episode_id": "SQ4_TP_C_0827",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq4\\SQ4_TP_C_0827.json",
|
|
"sq_type": "SQ4",
|
|
"variant": "TP",
|
|
"layout_id": "C",
|
|
"gt_is_anomaly": true,
|
|
"gt_threat_type": "safety_device_failure",
|
|
"gt_category": "fire_gas",
|
|
"gt_scenario_id": "FG-04",
|
|
"gt_difficulty_label": "L3_composite_reasoning",
|
|
"gt_difficulty_score": 13
|
|
},
|
|
{
|
|
"task_id": 20,
|
|
"episode_id": "SQ3_TN_B_0700",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq3\\SQ3_TN_B_0700.json",
|
|
"sq_type": "SQ3",
|
|
"variant": "TN",
|
|
"layout_id": "B",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "none",
|
|
"gt_scenario_id": "TN",
|
|
"gt_difficulty_label": "TN_baseline",
|
|
"gt_difficulty_score": 0
|
|
},
|
|
{
|
|
"task_id": 21,
|
|
"episode_id": "SQ5_FP_A_1159",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq5\\SQ5_FP_A_1159.json",
|
|
"sq_type": "SQ5",
|
|
"variant": "FP",
|
|
"layout_id": "A",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "fire_gas",
|
|
"gt_scenario_id": "FG-02-FP",
|
|
"gt_difficulty_label": "L2_reasoning_detection",
|
|
"gt_difficulty_score": 10
|
|
},
|
|
{
|
|
"task_id": 22,
|
|
"episode_id": "SQ5_TP_C_1114",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq5\\SQ5_TP_C_1114.json",
|
|
"sq_type": "SQ5",
|
|
"variant": "TP",
|
|
"layout_id": "C",
|
|
"gt_is_anomaly": true,
|
|
"gt_threat_type": "tailgating",
|
|
"gt_category": "intrusion",
|
|
"gt_scenario_id": "INS-03",
|
|
"gt_difficulty_label": "L3_composite_reasoning",
|
|
"gt_difficulty_score": 12
|
|
},
|
|
{
|
|
"task_id": 23,
|
|
"episode_id": "SQ1_FP_B_0098",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq1\\SQ1_FP_B_0098.json",
|
|
"sq_type": "SQ1",
|
|
"variant": "FP",
|
|
"layout_id": "B",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "device_fault",
|
|
"gt_scenario_id": "DF-03-FP",
|
|
"gt_difficulty_label": "L2_reasoning_detection",
|
|
"gt_difficulty_score": 9
|
|
},
|
|
{
|
|
"task_id": 24,
|
|
"episode_id": "SQ3_TP_A_0487",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq3\\SQ3_TP_A_0487.json",
|
|
"sq_type": "SQ3",
|
|
"variant": "TP",
|
|
"layout_id": "A",
|
|
"gt_is_anomaly": true,
|
|
"gt_threat_type": "behavioral_anomaly",
|
|
"gt_category": "behavioral_anomaly",
|
|
"gt_scenario_id": "BA-01",
|
|
"gt_difficulty_label": "L3_composite_reasoning",
|
|
"gt_difficulty_score": 11
|
|
},
|
|
{
|
|
"task_id": 25,
|
|
"episode_id": "SQ2_TN_C_0410",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq2\\SQ2_TN_C_0410.json",
|
|
"sq_type": "SQ2",
|
|
"variant": "TN",
|
|
"layout_id": "C",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "none",
|
|
"gt_scenario_id": "TN",
|
|
"gt_difficulty_label": "TN_baseline",
|
|
"gt_difficulty_score": 0
|
|
},
|
|
{
|
|
"task_id": 26,
|
|
"episode_id": "SQ4_TN_C_0967",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq4\\SQ4_TN_C_0967.json",
|
|
"sq_type": "SQ4",
|
|
"variant": "TN",
|
|
"layout_id": "C",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "none",
|
|
"gt_scenario_id": "TN",
|
|
"gt_difficulty_label": "TN_baseline",
|
|
"gt_difficulty_score": 0
|
|
},
|
|
{
|
|
"task_id": 27,
|
|
"episode_id": "SQ5_TP_B_1043",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq5\\SQ5_TP_B_1043.json",
|
|
"sq_type": "SQ5",
|
|
"variant": "TP",
|
|
"layout_id": "B",
|
|
"gt_is_anomaly": true,
|
|
"gt_threat_type": "tailgating",
|
|
"gt_category": "intrusion",
|
|
"gt_scenario_id": "INS-03",
|
|
"gt_difficulty_label": "L3_composite_reasoning",
|
|
"gt_difficulty_score": 12
|
|
},
|
|
{
|
|
"task_id": 28,
|
|
"episode_id": "SQ3_TP_D_0430",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq3\\SQ3_TP_D_0430.json",
|
|
"sq_type": "SQ3",
|
|
"variant": "TP",
|
|
"layout_id": "D",
|
|
"gt_is_anomaly": true,
|
|
"gt_threat_type": "behavioral_anomaly",
|
|
"gt_category": "behavioral_anomaly",
|
|
"gt_scenario_id": "BA-03",
|
|
"gt_difficulty_label": "L2_reasoning_detection",
|
|
"gt_difficulty_score": 10
|
|
},
|
|
{
|
|
"task_id": 29,
|
|
"episode_id": "SQ5_TP_D_1018",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq5\\SQ5_TP_D_1018.json",
|
|
"sq_type": "SQ5",
|
|
"variant": "TP",
|
|
"layout_id": "D",
|
|
"gt_is_anomaly": true,
|
|
"gt_threat_type": "credential_theft",
|
|
"gt_category": "intrusion",
|
|
"gt_scenario_id": "INS-05",
|
|
"gt_difficulty_label": "L3_composite_reasoning",
|
|
"gt_difficulty_score": 13
|
|
},
|
|
{
|
|
"task_id": 30,
|
|
"episode_id": "SQ1_TN_D_0148",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq1\\SQ1_TN_D_0148.json",
|
|
"sq_type": "SQ1",
|
|
"variant": "TN",
|
|
"layout_id": "D",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "none",
|
|
"gt_scenario_id": "TN",
|
|
"gt_difficulty_label": "TN_baseline",
|
|
"gt_difficulty_score": 0
|
|
},
|
|
{
|
|
"task_id": 31,
|
|
"episode_id": "SQ5_TP_D_1022",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq5\\SQ5_TP_D_1022.json",
|
|
"sq_type": "SQ5",
|
|
"variant": "TP",
|
|
"layout_id": "D",
|
|
"gt_is_anomaly": true,
|
|
"gt_threat_type": "credential_theft",
|
|
"gt_category": "intrusion",
|
|
"gt_scenario_id": "INS-05",
|
|
"gt_difficulty_label": "L3_composite_reasoning",
|
|
"gt_difficulty_score": 13
|
|
},
|
|
{
|
|
"task_id": 32,
|
|
"episode_id": "SQ4_TN_D_0962",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq4\\SQ4_TN_D_0962.json",
|
|
"sq_type": "SQ4",
|
|
"variant": "TN",
|
|
"layout_id": "D",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "none",
|
|
"gt_scenario_id": "TN",
|
|
"gt_difficulty_label": "TN_baseline",
|
|
"gt_difficulty_score": 0
|
|
},
|
|
{
|
|
"task_id": 33,
|
|
"episode_id": "SQ2_TP_D_0290",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq2\\SQ2_TP_D_0290.json",
|
|
"sq_type": "SQ2",
|
|
"variant": "TP",
|
|
"layout_id": "D",
|
|
"gt_is_anomaly": true,
|
|
"gt_threat_type": "intrusion",
|
|
"gt_category": "intrusion",
|
|
"gt_scenario_id": "INS-02",
|
|
"gt_difficulty_label": "L2_reasoning_detection",
|
|
"gt_difficulty_score": 8
|
|
},
|
|
{
|
|
"task_id": 34,
|
|
"episode_id": "SQ3_TN_A_0710",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq3\\SQ3_TN_A_0710.json",
|
|
"sq_type": "SQ3",
|
|
"variant": "TN",
|
|
"layout_id": "A",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "none",
|
|
"gt_scenario_id": "TN",
|
|
"gt_difficulty_label": "TN_baseline",
|
|
"gt_difficulty_score": 0
|
|
},
|
|
{
|
|
"task_id": 35,
|
|
"episode_id": "SQ1_TP_D_0065",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq1\\SQ1_TP_D_0065.json",
|
|
"sq_type": "SQ1",
|
|
"variant": "TP",
|
|
"layout_id": "D",
|
|
"gt_is_anomaly": true,
|
|
"gt_threat_type": "sensor_stuck",
|
|
"gt_category": "device_fault",
|
|
"gt_scenario_id": "DF-01",
|
|
"gt_difficulty_label": "L3_composite_reasoning",
|
|
"gt_difficulty_score": 12
|
|
},
|
|
{
|
|
"task_id": 36,
|
|
"episode_id": "SQ3_TN_A_0702",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq3\\SQ3_TN_A_0702.json",
|
|
"sq_type": "SQ3",
|
|
"variant": "TN",
|
|
"layout_id": "A",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "none",
|
|
"gt_scenario_id": "TN",
|
|
"gt_difficulty_label": "TN_baseline",
|
|
"gt_difficulty_score": 0
|
|
},
|
|
{
|
|
"task_id": 37,
|
|
"episode_id": "SQ1_TP_C_0027",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq1\\SQ1_TP_C_0027.json",
|
|
"sq_type": "SQ1",
|
|
"variant": "TP",
|
|
"layout_id": "C",
|
|
"gt_is_anomaly": true,
|
|
"gt_threat_type": "sensor_drift",
|
|
"gt_category": "device_fault",
|
|
"gt_scenario_id": "DF-02",
|
|
"gt_difficulty_label": "L3_composite_reasoning",
|
|
"gt_difficulty_score": 13
|
|
},
|
|
{
|
|
"task_id": 38,
|
|
"episode_id": "SQ3_TP_D_0553",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq3\\SQ3_TP_D_0553.json",
|
|
"sq_type": "SQ3",
|
|
"variant": "TP",
|
|
"layout_id": "D",
|
|
"gt_is_anomaly": true,
|
|
"gt_threat_type": "health_concern",
|
|
"gt_category": "elderly_specific",
|
|
"gt_scenario_id": "EL-05",
|
|
"gt_difficulty_label": "L3_composite_reasoning",
|
|
"gt_difficulty_score": 12
|
|
},
|
|
{
|
|
"task_id": 39,
|
|
"episode_id": "SQ3_TN_B_0709",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq3\\SQ3_TN_B_0709.json",
|
|
"sq_type": "SQ3",
|
|
"variant": "TN",
|
|
"layout_id": "B",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "none",
|
|
"gt_scenario_id": "TN",
|
|
"gt_difficulty_label": "TN_baseline",
|
|
"gt_difficulty_score": 0
|
|
},
|
|
{
|
|
"task_id": 40,
|
|
"episode_id": "SQ1_FP_C_0123",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq1\\SQ1_FP_C_0123.json",
|
|
"sq_type": "SQ1",
|
|
"variant": "FP",
|
|
"layout_id": "C",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "device_fault",
|
|
"gt_scenario_id": "DF-06-FP",
|
|
"gt_difficulty_label": "L2_reasoning_detection",
|
|
"gt_difficulty_score": 9
|
|
},
|
|
{
|
|
"task_id": 41,
|
|
"episode_id": "SQ1_TP_A_0019",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq1\\SQ1_TP_A_0019.json",
|
|
"sq_type": "SQ1",
|
|
"variant": "TP",
|
|
"layout_id": "A",
|
|
"gt_is_anomaly": true,
|
|
"gt_threat_type": "actuator_stuck",
|
|
"gt_category": "device_fault",
|
|
"gt_scenario_id": "DF-06",
|
|
"gt_difficulty_label": "L2_reasoning_detection",
|
|
"gt_difficulty_score": 9
|
|
},
|
|
{
|
|
"task_id": 42,
|
|
"episode_id": "SQ2_TN_D_0396",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq2\\SQ2_TN_D_0396.json",
|
|
"sq_type": "SQ2",
|
|
"variant": "TN",
|
|
"layout_id": "D",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "none",
|
|
"gt_scenario_id": "TN",
|
|
"gt_difficulty_label": "TN_baseline",
|
|
"gt_difficulty_score": 0
|
|
},
|
|
{
|
|
"task_id": 43,
|
|
"episode_id": "SQ4_TP_B_0843",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq4\\SQ4_TP_B_0843.json",
|
|
"sq_type": "SQ4",
|
|
"variant": "TP",
|
|
"layout_id": "B",
|
|
"gt_is_anomaly": true,
|
|
"gt_threat_type": "behavioral_anomaly",
|
|
"gt_category": "behavioral_anomaly",
|
|
"gt_scenario_id": "BA-01",
|
|
"gt_difficulty_label": "L3_composite_reasoning",
|
|
"gt_difficulty_score": 11
|
|
},
|
|
{
|
|
"task_id": 44,
|
|
"episode_id": "SQ2_TP_C_0210",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq2\\SQ2_TP_C_0210.json",
|
|
"sq_type": "SQ2",
|
|
"variant": "TP",
|
|
"layout_id": "C",
|
|
"gt_is_anomaly": true,
|
|
"gt_threat_type": "intrusion",
|
|
"gt_category": "intrusion",
|
|
"gt_scenario_id": "INS-02",
|
|
"gt_difficulty_label": "L2_reasoning_detection",
|
|
"gt_difficulty_score": 8
|
|
},
|
|
{
|
|
"task_id": 45,
|
|
"episode_id": "SQ4_TP_A_0773",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq4\\SQ4_TP_A_0773.json",
|
|
"sq_type": "SQ4",
|
|
"variant": "TP",
|
|
"layout_id": "A",
|
|
"gt_is_anomaly": true,
|
|
"gt_threat_type": "carbon_monoxide",
|
|
"gt_category": "fire_gas",
|
|
"gt_scenario_id": "FG-03",
|
|
"gt_difficulty_label": "L1_basic_detection",
|
|
"gt_difficulty_score": 6
|
|
},
|
|
{
|
|
"task_id": 46,
|
|
"episode_id": "SQ5_FP_B_1131",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq5\\SQ5_FP_B_1131.json",
|
|
"sq_type": "SQ5",
|
|
"variant": "FP",
|
|
"layout_id": "B",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "intrusion",
|
|
"gt_scenario_id": "INS-03-FP",
|
|
"gt_difficulty_label": "L3_composite_reasoning",
|
|
"gt_difficulty_score": 12
|
|
},
|
|
{
|
|
"task_id": 47,
|
|
"episode_id": "SQ5_TN_A_1180",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq5\\SQ5_TN_A_1180.json",
|
|
"sq_type": "SQ5",
|
|
"variant": "TN",
|
|
"layout_id": "A",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "none",
|
|
"gt_scenario_id": "TN",
|
|
"gt_difficulty_label": "TN_baseline",
|
|
"gt_difficulty_score": 0
|
|
},
|
|
{
|
|
"task_id": 48,
|
|
"episode_id": "SQ5_TP_B_1023",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq5\\SQ5_TP_B_1023.json",
|
|
"sq_type": "SQ5",
|
|
"variant": "TP",
|
|
"layout_id": "B",
|
|
"gt_is_anomaly": true,
|
|
"gt_threat_type": "tailgating",
|
|
"gt_category": "intrusion",
|
|
"gt_scenario_id": "INS-03",
|
|
"gt_difficulty_label": "L3_composite_reasoning",
|
|
"gt_difficulty_score": 12
|
|
},
|
|
{
|
|
"task_id": 49,
|
|
"episode_id": "SQ4_TP_A_0804",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq4\\SQ4_TP_A_0804.json",
|
|
"sq_type": "SQ4",
|
|
"variant": "TP",
|
|
"layout_id": "A",
|
|
"gt_is_anomaly": true,
|
|
"gt_threat_type": "behavioral_anomaly",
|
|
"gt_category": "behavioral_anomaly",
|
|
"gt_scenario_id": "BA-01",
|
|
"gt_difficulty_label": "L3_composite_reasoning",
|
|
"gt_difficulty_score": 11
|
|
},
|
|
{
|
|
"task_id": 50,
|
|
"episode_id": "SQ3_TP_C_0476",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq3\\SQ3_TP_C_0476.json",
|
|
"sq_type": "SQ3",
|
|
"variant": "TP",
|
|
"layout_id": "C",
|
|
"gt_is_anomaly": true,
|
|
"gt_threat_type": "intrusion",
|
|
"gt_category": "behavioral_anomaly",
|
|
"gt_scenario_id": "BA-05",
|
|
"gt_difficulty_label": "L3_composite_reasoning",
|
|
"gt_difficulty_score": 11
|
|
},
|
|
{
|
|
"task_id": 51,
|
|
"episode_id": "SQ1_TP_C_0040",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq1\\SQ1_TP_C_0040.json",
|
|
"sq_type": "SQ1",
|
|
"variant": "TP",
|
|
"layout_id": "C",
|
|
"gt_is_anomaly": true,
|
|
"gt_threat_type": "sensor_drift",
|
|
"gt_category": "device_fault",
|
|
"gt_scenario_id": "DF-02",
|
|
"gt_difficulty_label": "L3_composite_reasoning",
|
|
"gt_difficulty_score": 13
|
|
},
|
|
{
|
|
"task_id": 52,
|
|
"episode_id": "SQ1_TN_B_0158",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq1\\SQ1_TN_B_0158.json",
|
|
"sq_type": "SQ1",
|
|
"variant": "TN",
|
|
"layout_id": "B",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "none",
|
|
"gt_scenario_id": "TN",
|
|
"gt_difficulty_label": "TN_baseline",
|
|
"gt_difficulty_score": 0
|
|
},
|
|
{
|
|
"task_id": 53,
|
|
"episode_id": "SQ5_TN_B_1181",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq5\\SQ5_TN_B_1181.json",
|
|
"sq_type": "SQ5",
|
|
"variant": "TN",
|
|
"layout_id": "B",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "none",
|
|
"gt_scenario_id": "TN",
|
|
"gt_difficulty_label": "TN_baseline",
|
|
"gt_difficulty_score": 0
|
|
},
|
|
{
|
|
"task_id": 54,
|
|
"episode_id": "SQ2_TN_A_0418",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq2\\SQ2_TN_A_0418.json",
|
|
"sq_type": "SQ2",
|
|
"variant": "TN",
|
|
"layout_id": "A",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "none",
|
|
"gt_scenario_id": "TN",
|
|
"gt_difficulty_label": "TN_baseline",
|
|
"gt_difficulty_score": 0
|
|
},
|
|
{
|
|
"task_id": 55,
|
|
"episode_id": "SQ1_TP_D_0035",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq1\\SQ1_TP_D_0035.json",
|
|
"sq_type": "SQ1",
|
|
"variant": "TP",
|
|
"layout_id": "D",
|
|
"gt_is_anomaly": true,
|
|
"gt_threat_type": "sensor_drift",
|
|
"gt_category": "device_fault",
|
|
"gt_scenario_id": "DF-02",
|
|
"gt_difficulty_label": "L3_composite_reasoning",
|
|
"gt_difficulty_score": 13
|
|
},
|
|
{
|
|
"task_id": 56,
|
|
"episode_id": "SQ2_FP_B_0356",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq2\\SQ2_FP_B_0356.json",
|
|
"sq_type": "SQ2",
|
|
"variant": "FP",
|
|
"layout_id": "B",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "fire_gas",
|
|
"gt_scenario_id": "FG-04-FP",
|
|
"gt_difficulty_label": "L3_composite_reasoning",
|
|
"gt_difficulty_score": 13
|
|
},
|
|
{
|
|
"task_id": 57,
|
|
"episode_id": "SQ4_FP_D_0953",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq4\\SQ4_FP_D_0953.json",
|
|
"sq_type": "SQ4",
|
|
"variant": "FP",
|
|
"layout_id": "D",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "elderly_specific",
|
|
"gt_scenario_id": "EL-02-FP",
|
|
"gt_difficulty_label": "L3_composite_reasoning",
|
|
"gt_difficulty_score": 12
|
|
},
|
|
{
|
|
"task_id": 58,
|
|
"episode_id": "SQ5_FP_B_1148",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq5\\SQ5_FP_B_1148.json",
|
|
"sq_type": "SQ5",
|
|
"variant": "FP",
|
|
"layout_id": "B",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "intrusion",
|
|
"gt_scenario_id": "INS-04-FP",
|
|
"gt_difficulty_label": "L3_composite_reasoning",
|
|
"gt_difficulty_score": 11
|
|
},
|
|
{
|
|
"task_id": 59,
|
|
"episode_id": "SQ1_TP_A_0021",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq1\\SQ1_TP_A_0021.json",
|
|
"sq_type": "SQ1",
|
|
"variant": "TP",
|
|
"layout_id": "A",
|
|
"gt_is_anomaly": true,
|
|
"gt_threat_type": "safety_device_failure",
|
|
"gt_category": "device_fault",
|
|
"gt_scenario_id": "DF-05",
|
|
"gt_difficulty_label": "L1_basic_detection",
|
|
"gt_difficulty_score": 6
|
|
},
|
|
{
|
|
"task_id": 60,
|
|
"episode_id": "SQ4_TN_D_0965",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq4\\SQ4_TN_D_0965.json",
|
|
"sq_type": "SQ4",
|
|
"variant": "TN",
|
|
"layout_id": "D",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "none",
|
|
"gt_scenario_id": "TN",
|
|
"gt_difficulty_label": "TN_baseline",
|
|
"gt_difficulty_score": 0
|
|
},
|
|
{
|
|
"task_id": 61,
|
|
"episode_id": "SQ2_TN_D_0387",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq2\\SQ2_TN_D_0387.json",
|
|
"sq_type": "SQ2",
|
|
"variant": "TN",
|
|
"layout_id": "D",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "none",
|
|
"gt_scenario_id": "TN",
|
|
"gt_difficulty_label": "TN_baseline",
|
|
"gt_difficulty_score": 0
|
|
},
|
|
{
|
|
"task_id": 62,
|
|
"episode_id": "SQ4_TN_A_0984",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq4\\SQ4_TN_A_0984.json",
|
|
"sq_type": "SQ4",
|
|
"variant": "TN",
|
|
"layout_id": "A",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "none",
|
|
"gt_scenario_id": "TN",
|
|
"gt_difficulty_label": "TN_baseline",
|
|
"gt_difficulty_score": 0
|
|
},
|
|
{
|
|
"task_id": 63,
|
|
"episode_id": "SQ3_FP_C_0596",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq3\\SQ3_FP_C_0596.json",
|
|
"sq_type": "SQ3",
|
|
"variant": "FP",
|
|
"layout_id": "C",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "child_specific",
|
|
"gt_scenario_id": "CH-05-FP",
|
|
"gt_difficulty_label": "L3_composite_reasoning",
|
|
"gt_difficulty_score": 11
|
|
},
|
|
{
|
|
"task_id": 64,
|
|
"episode_id": "SQ3_FP_A_0662",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq3\\SQ3_FP_A_0662.json",
|
|
"sq_type": "SQ3",
|
|
"variant": "FP",
|
|
"layout_id": "A",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "behavioral_anomaly",
|
|
"gt_scenario_id": "BA-03-FP",
|
|
"gt_difficulty_label": "L2_reasoning_detection",
|
|
"gt_difficulty_score": 10
|
|
},
|
|
{
|
|
"task_id": 65,
|
|
"episode_id": "SQ3_TN_B_0672",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq3\\SQ3_TN_B_0672.json",
|
|
"sq_type": "SQ3",
|
|
"variant": "TN",
|
|
"layout_id": "B",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "none",
|
|
"gt_scenario_id": "TN",
|
|
"gt_difficulty_label": "TN_baseline",
|
|
"gt_difficulty_score": 0
|
|
},
|
|
{
|
|
"task_id": 66,
|
|
"episode_id": "SQ4_FP_D_0876",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq4\\SQ4_FP_D_0876.json",
|
|
"sq_type": "SQ4",
|
|
"variant": "FP",
|
|
"layout_id": "D",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "elderly_specific",
|
|
"gt_scenario_id": "EL-06-FP",
|
|
"gt_difficulty_label": "L3_composite_reasoning",
|
|
"gt_difficulty_score": 11
|
|
},
|
|
{
|
|
"task_id": 67,
|
|
"episode_id": "SQ1_TN_A_0157",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq1\\SQ1_TN_A_0157.json",
|
|
"sq_type": "SQ1",
|
|
"variant": "TN",
|
|
"layout_id": "A",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "none",
|
|
"gt_scenario_id": "TN",
|
|
"gt_difficulty_label": "TN_baseline",
|
|
"gt_difficulty_score": 0
|
|
},
|
|
{
|
|
"task_id": 68,
|
|
"episode_id": "SQ2_FP_B_0363",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq2\\SQ2_FP_B_0363.json",
|
|
"sq_type": "SQ2",
|
|
"variant": "FP",
|
|
"layout_id": "B",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "intrusion",
|
|
"gt_scenario_id": "INS-04-FP",
|
|
"gt_difficulty_label": "L3_composite_reasoning",
|
|
"gt_difficulty_score": 11
|
|
},
|
|
{
|
|
"task_id": 69,
|
|
"episode_id": "SQ3_FP_D_0631",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq3\\SQ3_FP_D_0631.json",
|
|
"sq_type": "SQ3",
|
|
"variant": "FP",
|
|
"layout_id": "D",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "behavioral_anomaly",
|
|
"gt_scenario_id": "BA-01-FP",
|
|
"gt_difficulty_label": "L3_composite_reasoning",
|
|
"gt_difficulty_score": 11
|
|
},
|
|
{
|
|
"task_id": 70,
|
|
"episode_id": "SQ5_FP_C_1118",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq5\\SQ5_FP_C_1118.json",
|
|
"sq_type": "SQ5",
|
|
"variant": "FP",
|
|
"layout_id": "C",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "fire_gas",
|
|
"gt_scenario_id": "FG-02-FP",
|
|
"gt_difficulty_label": "L2_reasoning_detection",
|
|
"gt_difficulty_score": 10
|
|
},
|
|
{
|
|
"task_id": 71,
|
|
"episode_id": "SQ1_FP_A_0113",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq1\\SQ1_FP_A_0113.json",
|
|
"sq_type": "SQ1",
|
|
"variant": "FP",
|
|
"layout_id": "A",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "device_fault",
|
|
"gt_scenario_id": "DF-04-FP",
|
|
"gt_difficulty_label": "L2_reasoning_detection",
|
|
"gt_difficulty_score": 8
|
|
},
|
|
{
|
|
"task_id": 72,
|
|
"episode_id": "SQ5_FP_B_1124",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq5\\SQ5_FP_B_1124.json",
|
|
"sq_type": "SQ5",
|
|
"variant": "FP",
|
|
"layout_id": "B",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "water_damage",
|
|
"gt_scenario_id": "WD-01-FP",
|
|
"gt_difficulty_label": "L1_basic_detection",
|
|
"gt_difficulty_score": 6
|
|
},
|
|
{
|
|
"task_id": 73,
|
|
"episode_id": "SQ4_FP_C_0896",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq4\\SQ4_FP_C_0896.json",
|
|
"sq_type": "SQ4",
|
|
"variant": "FP",
|
|
"layout_id": "C",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "child_specific",
|
|
"gt_scenario_id": "CH-03-FP",
|
|
"gt_difficulty_label": "L2_reasoning_detection",
|
|
"gt_difficulty_score": 9
|
|
},
|
|
{
|
|
"task_id": 74,
|
|
"episode_id": "SQ2_TP_D_0200",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq2\\SQ2_TP_D_0200.json",
|
|
"sq_type": "SQ2",
|
|
"variant": "TP",
|
|
"layout_id": "D",
|
|
"gt_is_anomaly": true,
|
|
"gt_threat_type": "water_leak",
|
|
"gt_category": "water_damage",
|
|
"gt_scenario_id": "WD-01",
|
|
"gt_difficulty_label": "L1_basic_detection",
|
|
"gt_difficulty_score": 6
|
|
},
|
|
{
|
|
"task_id": 75,
|
|
"episode_id": "SQ4_FP_A_0912",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq4\\SQ4_FP_A_0912.json",
|
|
"sq_type": "SQ4",
|
|
"variant": "FP",
|
|
"layout_id": "A",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "fire_gas",
|
|
"gt_scenario_id": "FG-01-FP",
|
|
"gt_difficulty_label": "L3_composite_reasoning",
|
|
"gt_difficulty_score": 11
|
|
},
|
|
{
|
|
"task_id": 76,
|
|
"episode_id": "SQ2_TN_C_0377",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq2\\SQ2_TN_C_0377.json",
|
|
"sq_type": "SQ2",
|
|
"variant": "TN",
|
|
"layout_id": "C",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "none",
|
|
"gt_scenario_id": "TN",
|
|
"gt_difficulty_label": "TN_baseline",
|
|
"gt_difficulty_score": 0
|
|
},
|
|
{
|
|
"task_id": 77,
|
|
"episode_id": "SQ2_TP_C_0256",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq2\\SQ2_TP_C_0256.json",
|
|
"sq_type": "SQ2",
|
|
"variant": "TP",
|
|
"layout_id": "C",
|
|
"gt_is_anomaly": true,
|
|
"gt_threat_type": "intrusion",
|
|
"gt_category": "intrusion",
|
|
"gt_scenario_id": "INS-04",
|
|
"gt_difficulty_label": "L3_composite_reasoning",
|
|
"gt_difficulty_score": 11
|
|
},
|
|
{
|
|
"task_id": 78,
|
|
"episode_id": "SQ5_FP_A_1141",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq5\\SQ5_FP_A_1141.json",
|
|
"sq_type": "SQ5",
|
|
"variant": "FP",
|
|
"layout_id": "A",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "water_damage",
|
|
"gt_scenario_id": "WD-03-FP",
|
|
"gt_difficulty_label": "L2_reasoning_detection",
|
|
"gt_difficulty_score": 10
|
|
},
|
|
{
|
|
"task_id": 79,
|
|
"episode_id": "SQ4_TN_B_0987",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq4\\SQ4_TN_B_0987.json",
|
|
"sq_type": "SQ4",
|
|
"variant": "TN",
|
|
"layout_id": "B",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "none",
|
|
"gt_scenario_id": "TN",
|
|
"gt_difficulty_label": "TN_baseline",
|
|
"gt_difficulty_score": 0
|
|
},
|
|
{
|
|
"task_id": 80,
|
|
"episode_id": "SQ2_TP_B_0230",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq2\\SQ2_TP_B_0230.json",
|
|
"sq_type": "SQ2",
|
|
"variant": "TP",
|
|
"layout_id": "B",
|
|
"gt_is_anomaly": true,
|
|
"gt_threat_type": "water_leak",
|
|
"gt_category": "water_damage",
|
|
"gt_scenario_id": "WD-01",
|
|
"gt_difficulty_label": "L1_basic_detection",
|
|
"gt_difficulty_score": 6
|
|
},
|
|
{
|
|
"task_id": 81,
|
|
"episode_id": "SQ5_TN_D_1187",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq5\\SQ5_TN_D_1187.json",
|
|
"sq_type": "SQ5",
|
|
"variant": "TN",
|
|
"layout_id": "D",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "none",
|
|
"gt_scenario_id": "TN",
|
|
"gt_difficulty_label": "TN_baseline",
|
|
"gt_difficulty_score": 0
|
|
},
|
|
{
|
|
"task_id": 82,
|
|
"episode_id": "SQ3_TP_A_0557",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq3\\SQ3_TP_A_0557.json",
|
|
"sq_type": "SQ3",
|
|
"variant": "TP",
|
|
"layout_id": "A",
|
|
"gt_is_anomaly": true,
|
|
"gt_threat_type": "behavioral_anomaly",
|
|
"gt_category": "behavioral_anomaly",
|
|
"gt_scenario_id": "BA-03",
|
|
"gt_difficulty_label": "L2_reasoning_detection",
|
|
"gt_difficulty_score": 10
|
|
},
|
|
{
|
|
"task_id": 83,
|
|
"episode_id": "SQ5_TN_C_1183",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq5\\SQ5_TN_C_1183.json",
|
|
"sq_type": "SQ5",
|
|
"variant": "TN",
|
|
"layout_id": "C",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "none",
|
|
"gt_scenario_id": "TN",
|
|
"gt_difficulty_label": "TN_baseline",
|
|
"gt_difficulty_score": 0
|
|
},
|
|
{
|
|
"task_id": 84,
|
|
"episode_id": "SQ1_TN_C_0183",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq1\\SQ1_TN_C_0183.json",
|
|
"sq_type": "SQ1",
|
|
"variant": "TN",
|
|
"layout_id": "C",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "none",
|
|
"gt_scenario_id": "TN",
|
|
"gt_difficulty_label": "TN_baseline",
|
|
"gt_difficulty_score": 0
|
|
},
|
|
{
|
|
"task_id": 85,
|
|
"episode_id": "SQ2_FP_A_0327",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq2\\SQ2_FP_A_0327.json",
|
|
"sq_type": "SQ2",
|
|
"variant": "FP",
|
|
"layout_id": "A",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "water_damage",
|
|
"gt_scenario_id": "WD-01-FP",
|
|
"gt_difficulty_label": "L1_basic_detection",
|
|
"gt_difficulty_score": 6
|
|
},
|
|
{
|
|
"task_id": 86,
|
|
"episode_id": "SQ2_FP_D_0317",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq2\\SQ2_FP_D_0317.json",
|
|
"sq_type": "SQ2",
|
|
"variant": "FP",
|
|
"layout_id": "D",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "water_damage",
|
|
"gt_scenario_id": "WD-01-FP",
|
|
"gt_difficulty_label": "L1_basic_detection",
|
|
"gt_difficulty_score": 6
|
|
},
|
|
{
|
|
"task_id": 87,
|
|
"episode_id": "SQ2_FP_D_0351",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq2\\SQ2_FP_D_0351.json",
|
|
"sq_type": "SQ2",
|
|
"variant": "FP",
|
|
"layout_id": "D",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "intrusion",
|
|
"gt_scenario_id": "INS-05-FP",
|
|
"gt_difficulty_label": "L3_composite_reasoning",
|
|
"gt_difficulty_score": 13
|
|
},
|
|
{
|
|
"task_id": 88,
|
|
"episode_id": "SQ5_FP_A_1140",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq5\\SQ5_FP_A_1140.json",
|
|
"sq_type": "SQ5",
|
|
"variant": "FP",
|
|
"layout_id": "A",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "water_damage",
|
|
"gt_scenario_id": "WD-03-FP",
|
|
"gt_difficulty_label": "L2_reasoning_detection",
|
|
"gt_difficulty_score": 10
|
|
},
|
|
{
|
|
"task_id": 89,
|
|
"episode_id": "SQ3_TP_C_0537",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq3\\SQ3_TP_C_0537.json",
|
|
"sq_type": "SQ3",
|
|
"variant": "TP",
|
|
"layout_id": "C",
|
|
"gt_is_anomaly": true,
|
|
"gt_threat_type": "credential_theft",
|
|
"gt_category": "intrusion",
|
|
"gt_scenario_id": "INS-05",
|
|
"gt_difficulty_label": "L3_composite_reasoning",
|
|
"gt_difficulty_score": 13
|
|
},
|
|
{
|
|
"task_id": 90,
|
|
"episode_id": "SQ4_TP_B_0817",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq4\\SQ4_TP_B_0817.json",
|
|
"sq_type": "SQ4",
|
|
"variant": "TP",
|
|
"layout_id": "B",
|
|
"gt_is_anomaly": true,
|
|
"gt_threat_type": "unattended_cooking",
|
|
"gt_category": "fire_gas",
|
|
"gt_scenario_id": "FG-01",
|
|
"gt_difficulty_label": "L3_composite_reasoning",
|
|
"gt_difficulty_score": 11
|
|
},
|
|
{
|
|
"task_id": 91,
|
|
"episode_id": "SQ2_TP_C_0251",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq2\\SQ2_TP_C_0251.json",
|
|
"sq_type": "SQ2",
|
|
"variant": "TP",
|
|
"layout_id": "C",
|
|
"gt_is_anomaly": true,
|
|
"gt_threat_type": "water_leak",
|
|
"gt_category": "water_damage",
|
|
"gt_scenario_id": "WD-02",
|
|
"gt_difficulty_label": "L2_reasoning_detection",
|
|
"gt_difficulty_score": 8
|
|
},
|
|
{
|
|
"task_id": 92,
|
|
"episode_id": "SQ2_FP_A_0357",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq2\\SQ2_FP_A_0357.json",
|
|
"sq_type": "SQ2",
|
|
"variant": "FP",
|
|
"layout_id": "A",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "water_damage",
|
|
"gt_scenario_id": "WD-03-FP",
|
|
"gt_difficulty_label": "L2_reasoning_detection",
|
|
"gt_difficulty_score": 10
|
|
},
|
|
{
|
|
"task_id": 93,
|
|
"episode_id": "SQ4_FP_A_0882",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq4\\SQ4_FP_A_0882.json",
|
|
"sq_type": "SQ4",
|
|
"variant": "FP",
|
|
"layout_id": "A",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "behavioral_anomaly",
|
|
"gt_scenario_id": "BA-01-FP",
|
|
"gt_difficulty_label": "L3_composite_reasoning",
|
|
"gt_difficulty_score": 11
|
|
},
|
|
{
|
|
"task_id": 94,
|
|
"episode_id": "SQ3_TN_A_0685",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq3\\SQ3_TN_A_0685.json",
|
|
"sq_type": "SQ3",
|
|
"variant": "TN",
|
|
"layout_id": "A",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "none",
|
|
"gt_scenario_id": "TN",
|
|
"gt_difficulty_label": "TN_baseline",
|
|
"gt_difficulty_score": 0
|
|
},
|
|
{
|
|
"task_id": 95,
|
|
"episode_id": "SQ3_TN_D_0690",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq3\\SQ3_TN_D_0690.json",
|
|
"sq_type": "SQ3",
|
|
"variant": "TN",
|
|
"layout_id": "D",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "none",
|
|
"gt_scenario_id": "TN",
|
|
"gt_difficulty_label": "TN_baseline",
|
|
"gt_difficulty_score": 0
|
|
},
|
|
{
|
|
"task_id": 96,
|
|
"episode_id": "SQ3_FP_D_0654",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq3\\SQ3_FP_D_0654.json",
|
|
"sq_type": "SQ3",
|
|
"variant": "FP",
|
|
"layout_id": "D",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "elderly_specific",
|
|
"gt_scenario_id": "EL-03-FP",
|
|
"gt_difficulty_label": "L2_reasoning_detection",
|
|
"gt_difficulty_score": 10
|
|
},
|
|
{
|
|
"task_id": 97,
|
|
"episode_id": "SQ1_TN_A_0172",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq1\\SQ1_TN_A_0172.json",
|
|
"sq_type": "SQ1",
|
|
"variant": "TN",
|
|
"layout_id": "A",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "none",
|
|
"gt_scenario_id": "TN",
|
|
"gt_difficulty_label": "TN_baseline",
|
|
"gt_difficulty_score": 0
|
|
},
|
|
{
|
|
"task_id": 98,
|
|
"episode_id": "SQ3_FP_C_0568",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq3\\SQ3_FP_C_0568.json",
|
|
"sq_type": "SQ3",
|
|
"variant": "FP",
|
|
"layout_id": "C",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "behavioral_anomaly",
|
|
"gt_scenario_id": "BA-04-FP",
|
|
"gt_difficulty_label": "L3_composite_reasoning",
|
|
"gt_difficulty_score": 11
|
|
},
|
|
{
|
|
"task_id": 99,
|
|
"episode_id": "SQ1_FP_B_0092",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq1\\SQ1_FP_B_0092.json",
|
|
"sq_type": "SQ1",
|
|
"variant": "FP",
|
|
"layout_id": "B",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "device_fault",
|
|
"gt_scenario_id": "DF-05-FP",
|
|
"gt_difficulty_label": "L1_basic_detection",
|
|
"gt_difficulty_score": 6
|
|
},
|
|
{
|
|
"task_id": 100,
|
|
"episode_id": "SQ1_FP_D_0087",
|
|
"episode_path": "C:\\Users\\Wal1et\\Downloads\\llmiotsafe\\data\\benchmark\\sq1\\SQ1_FP_D_0087.json",
|
|
"sq_type": "SQ1",
|
|
"variant": "FP",
|
|
"layout_id": "D",
|
|
"gt_is_anomaly": false,
|
|
"gt_threat_type": "none",
|
|
"gt_category": "device_fault",
|
|
"gt_scenario_id": "DF-03-FP",
|
|
"gt_difficulty_label": "L2_reasoning_detection",
|
|
"gt_difficulty_score": 9
|
|
}
|
|
] |