import json from typing import Dict THREAT_TYPES = ( "intrusion / tailgating / credential_theft / fire_risk / unattended_cooking / " "carbon_monoxide / sensor_stuck / sensor_drift / sensor_malfunction / actuator_stuck / " "lock_malfunction / safety_device_failure / water_leak / possible_fall / " "abnormal_inactivity / health_concern / child_safety / behavioral_anomaly / none" ) def _protocol_notes_block(material: Dict) -> str: notes = material.get("protocol_notes", []) return "\n".join(f"- {note}" for note in notes) def build_extractor_prompt(material: Dict) -> Dict[str, str]: system = ( "You are a neutral evidence extractor for smart-home anomaly analysis. " "Do not decide the final verdict. " "Your job is to infer the query intent and choose the most relevant chunks for debate." ) user = f"""## Query {material['query']} ## Matter Notes {_protocol_notes_block(material)} ## Layout {material['layout_summary']} ## Deterministic Signals {json.dumps(material['signals'], ensure_ascii=False, indent=2)} ## Chunk Index {json.dumps(material['chunk_index'], ensure_ascii=False, indent=2)} Rules: - Infer the task from the query without hidden benchmark labels. - Choose exactly one `primary_task_profile`. - Use `secondary_task_profile=device-health` only if direct device-fault evidence seems materially relevant. - Select 4-6 `focus_chunk_ids` for the debate stage. - Prefer evidence coverage rather than narrow certainty. - For `device-health`, include: - the suspicious event, - the immediate follow-up/retry sequence, - and any later recovery or non-recovery evidence. - For `single-event-safety` and `behavior-sequence`, include: - the trigger or access-path chunk, - nearby human/device sequence context, - and one chunk that could support a benign alternative explanation. - For `composite-safety` and `emergency-response`, include: - the local hazard trigger, - nearby human/vulnerability context, - and mitigation/outcome context. - Missing logs alone are not enough to frame a case as device failure. - Do not decide whether the anomaly is real; only select evidence and candidate lines of inquiry. Return JSON only: {{ "primary_task_profile": "device-health | single-event-safety | behavior-sequence | composite-safety | emergency-response", "secondary_task_profile": "none | device-health | single-event-safety | behavior-sequence | composite-safety | emergency-response", "query_anchor": {{ "target_rooms": ["..."], "target_devices": ["..."], "target_question": "..." }}, "focus_chunk_ids": ["C00", "C01", "C02", "C03"], "candidate_threats": ["{THREAT_TYPES}"], "why_these_chunks": ["..."], "open_questions": ["..."] }}""" return {"system": system, "user": user} def build_prosecutor_prompt(material: Dict, extractor_text: str, focused_chunks: str) -> Dict[str, str]: system = ( "You are the Prosecutor in a smart-home safety debate. " "Your role is recall-oriented: surface the strongest supported anomaly case and do not prematurely dismiss risk." ) user = f"""## Query {material['query']} ## Matter Notes {_protocol_notes_block(material)} ## Structured Signals {json.dumps(material['signals'], ensure_ascii=False, indent=2)} ## Extractor Output {extractor_text} ## Focused Chunks {focused_chunks} Rules: - Build the strongest anomaly case that is still evidence-based. - Prefer the best-supported anomaly type, not necessarily the most severe one. - Cite exact sequence evidence, chunk references, and cross-device relations. - You are allowed to be aggressive about recall, but not to invent unsupported events. - A single transient `None`, missing log, or isolated spike is weak by itself for device-fault labels. - For intrusion-style claims, prefer access-path inconsistency, impossible timing, lock/contact conflicts, motion progression, or identity-like anomalies. - For behavioral anomalies, focus on sequence-level inconsistency rather than raw telemetry oddities. - For unattended cooking or fire risk, explain why supervision appears missing and why the hazard window is meaningful. - A later recovery does not erase a real unsafe event if the logged sequence already shows one. Return JSON only: {{ "position": "anomaly | weak_anomaly", "best_threat_type": "{THREAT_TYPES}", "core_claim": "...", "supporting_evidence": ["..."], "why_normal_explanation_is_weaker": ["..."], "weaknesses": ["..."], "confidence": "high | medium | low", "missing_but_not_required": ["..."] }}""" return {"system": system, "user": user} def build_defender_prompt(material: Dict, extractor_text: str, focused_chunks: str, prosecutor_text: str) -> Dict[str, str]: system = ( "You are the Defender in a smart-home safety debate. " "Your role is precision-oriented: directly challenge weak anomaly claims and protect against false alarms." ) user = f"""## Query {material['query']} ## Matter Notes {_protocol_notes_block(material)} ## Structured Signals {json.dumps(material['signals'], ensure_ascii=False, indent=2)} ## Extractor Output {extractor_text} ## Prosecutor {prosecutor_text} ## Focused Chunks {focused_chunks} Rules: - Directly rebut the Prosecutor's key claim, not just give a separate normal story. - For each major Prosecutor evidence point, ask: - is it direct evidence or only an absence-based inference? - is there a coherent ordinary-routine explanation? - does the raw evidence actually contradict normal behavior? - Build the strongest benign or insufficient-evidence explanation supported by the logs. - Attack anomaly claims that rely mainly on: - missing logs, - one transient telemetry glitch, - long but coherent idle intervals, - or a plausible ordinary routine. - For device faults, require persistence, repeated failure, contradictory states, or direct fault evidence. - For intrusion-style claims, challenge whether the access path is truly inconsistent rather than merely sparse. - For unattended cooking or fire risk, challenge whether supervision is truly absent versus simply not directly logged. - Do not deny a real anomaly if the sequence is clearly unsafe; precision means challenging weak alarms, not ignoring evidence. Return JSON only: {{ "position": "normal | insufficient_evidence", "core_claim": "...", "rebuttals_to_prosecutor": [ {{"claim": "...", "why_not_proven": "...", "counterevidence": ["..."]}} ], "supporting_evidence": ["..."], "why_anomaly_explanation_is_weaker": ["..."], "weaknesses": ["..."], "confidence": "high | medium | low", "missing_but_not_required": ["..."] }}""" return {"system": system, "user": user} def build_judge_prompt( material: Dict, extractor_text: str, prosecutor_text: str, defender_text: str, focused_chunks: str, ) -> Dict[str, str]: system = ( "You are the Judge in a smart-home anomaly debate. " "You must compare both sides against the raw evidence and apply a burden-of-proof standard." ) user = f"""## Query {material['query']} ## Matter Notes {_protocol_notes_block(material)} ## Structured Signals {json.dumps(material['signals'], ensure_ascii=False, indent=2)} ## Extractor Output {extractor_text} ## Focused Chunks {focused_chunks} ## Prosecutor {prosecutor_text} ## Defender {defender_text} Decision Rules: - Compare both sides by evidence quality, directness, temporal coverage, and consistency with the query. - Do not just average the two sides; judge the strength of the cited evidence yourself. - You can disagree with both parties. - Use this burden-of-proof test: - Step 1: Did the Prosecutor establish at least one concrete anomaly chain, or mostly a plausible story? - Step 2: Did the Defender show that the core claim depends mainly on absence-based assumptions, sparse telemetry, or a coherent ordinary routine? - Step 3: If the Prosecutor's key links are weakly grounded and the Defender offers a coherent non-anomalous story, favor `none`. - Device-fault labels require direct fault evidence or persistent contradiction. - Intrusion / tailgating / credential-theft labels require access-path or identity-path evidence, not just sparse occupancy. - Behavioral anomaly requires a sequence that is genuinely inconsistent with a normal household story, not just unusual timing alone. - Unattended cooking / fire risk requires a meaningful hazard window plus missing or weak supervision/mitigation, not just cooking plus incomplete telemetry. - For composite safety, local hazard evidence is enough only when it is paired with vulnerable/unsupervised context or weak mitigation. - If the Prosecutor's case directly answers the query and the Defender mostly attacks telemetry incompleteness, anomaly may still win. - If the anomaly story depends mainly on absence-based assumptions, sparse access logs, or one ambiguous telemetry inconsistency, favor `none`. - For `behavior-sequence`, `single-event-safety`, and `composite-safety`, do not let the Prosecutor win merely by telling a more vivid story. Require at least one key link that is directly supported rather than only inferred from missing signals. - For `SQ3`-style behavior cases in particular, unusual timing, unlocked intervals, or lack of kitchen occupancy are not enough unless they conflict with the broader routine in a concrete way. - If the true concern is anomalous but the exact subtype is uncertain, choose the best-supported anomaly type rather than forcing `none`. - You must read the raw focused chunks, not only the two argument summaries. Return JSON only: {{ "winner": "prosecutor | defender | mixed", "burden_test": {{ "prosecutor_has_direct_case": true/false, "defender_showed_plausible_normal_story": true/false, "core_uncertainty_type": "direct_conflict | absence_based_inference | sparse_sequence | type_selection | none" }}, "is_anomaly": true/false, "confidence": "high/medium/low", "threat_type": "{THREAT_TYPES}", "threat_description": "one-sentence conclusion", "reasoning": ["step 1", "step 2", "step 3"], "key_evidence": ["evidence 1", "evidence 2"], "recommended_actions": ["action 1", "action 2"] }}""" return {"system": system, "user": user}