224 lines
8.5 KiB
Python
224 lines
8.5 KiB
Python
import json
|
|
from typing import Dict
|
|
|
|
|
|
THREAT_TYPES = (
|
|
"intrusion / tailgating / credential_theft / fire_risk / unattended_cooking / "
|
|
"carbon_monoxide / sensor_stuck / sensor_drift / sensor_malfunction / actuator_stuck / "
|
|
"lock_malfunction / safety_device_failure / water_leak / possible_fall / "
|
|
"abnormal_inactivity / health_concern / child_safety / behavioral_anomaly / none"
|
|
)
|
|
|
|
|
|
def _protocol_notes_block(material: Dict) -> str:
|
|
notes = material.get("protocol_notes", [])
|
|
return "\n".join(f"- {note}" for note in notes)
|
|
|
|
|
|
def build_extractor_prompt(material: Dict) -> Dict[str, str]:
|
|
system = (
|
|
"You are a neutral evidence extractor for smart-home anomaly analysis. "
|
|
"Do not decide the final verdict. "
|
|
"Your job is to infer the query intent and choose the most relevant chunks for debate."
|
|
)
|
|
user = f"""## Query
|
|
{material['query']}
|
|
|
|
## Matter Notes
|
|
{_protocol_notes_block(material)}
|
|
|
|
## Layout
|
|
{material['layout_summary']}
|
|
|
|
## Deterministic Signals
|
|
{json.dumps(material['signals'], ensure_ascii=False, indent=2)}
|
|
|
|
## Chunk Index
|
|
{json.dumps(material['chunk_index'], ensure_ascii=False, indent=2)}
|
|
|
|
Rules:
|
|
- Infer the task from the query without hidden benchmark labels.
|
|
- Choose exactly one `primary_task_profile`.
|
|
- Use `secondary_task_profile=device-health` only if direct device-fault evidence seems materially relevant.
|
|
- Select 4-6 `focus_chunk_ids` for the debate stage.
|
|
- Prefer evidence coverage rather than narrow certainty.
|
|
- For `device-health`, include:
|
|
- the suspicious event,
|
|
- the immediate follow-up/retry sequence,
|
|
- and any later recovery or non-recovery evidence.
|
|
- For `single-event-safety` and `behavior-sequence`, include:
|
|
- the trigger or access-path chunk,
|
|
- nearby human/device sequence context,
|
|
- and one chunk that could support a benign alternative explanation.
|
|
- For `composite-safety` and `emergency-response`, include:
|
|
- the local hazard trigger,
|
|
- nearby human/vulnerability context,
|
|
- and mitigation/outcome context.
|
|
- Missing logs alone are not enough to frame a case as device failure.
|
|
- Do not decide whether the anomaly is real; only select evidence and candidate lines of inquiry.
|
|
|
|
Return JSON only:
|
|
{{
|
|
"primary_task_profile": "device-health | single-event-safety | behavior-sequence | composite-safety | emergency-response",
|
|
"secondary_task_profile": "none | device-health | single-event-safety | behavior-sequence | composite-safety | emergency-response",
|
|
"query_anchor": {{
|
|
"target_rooms": ["..."],
|
|
"target_devices": ["..."],
|
|
"target_question": "..."
|
|
}},
|
|
"focus_chunk_ids": ["C00", "C01", "C02", "C03"],
|
|
"candidate_threats": ["{THREAT_TYPES}"],
|
|
"why_these_chunks": ["..."],
|
|
"open_questions": ["..."]
|
|
}}"""
|
|
return {"system": system, "user": user}
|
|
|
|
|
|
def build_prosecutor_prompt(material: Dict, extractor_text: str, focused_chunks: str) -> Dict[str, str]:
|
|
system = (
|
|
"You are the Prosecutor in a smart-home safety debate. "
|
|
"Your role is recall-oriented: surface the strongest supported anomaly case and do not prematurely dismiss risk."
|
|
)
|
|
user = f"""## Query
|
|
{material['query']}
|
|
|
|
## Matter Notes
|
|
{_protocol_notes_block(material)}
|
|
|
|
## Structured Signals
|
|
{json.dumps(material['signals'], ensure_ascii=False, indent=2)}
|
|
|
|
## Extractor Output
|
|
{extractor_text}
|
|
|
|
## Focused Chunks
|
|
{focused_chunks}
|
|
|
|
Rules:
|
|
- Build the strongest anomaly case that is still evidence-based.
|
|
- Prefer the best-supported anomaly type, not necessarily the most severe one.
|
|
- Cite exact sequence evidence, chunk references, and cross-device relations.
|
|
- You are allowed to be aggressive about recall, but not to invent unsupported events.
|
|
- A single transient `None`, missing log, or isolated spike is weak by itself for device-fault labels.
|
|
- For intrusion-style claims, prefer access-path inconsistency, impossible timing, lock/contact conflicts, motion progression, or identity-like anomalies.
|
|
- For behavioral anomalies, focus on sequence-level inconsistency rather than raw telemetry oddities.
|
|
- For unattended cooking or fire risk, explain why supervision appears missing and why the hazard window is meaningful.
|
|
- A later recovery does not erase a real unsafe event if the logged sequence already shows one.
|
|
|
|
Return JSON only:
|
|
{{
|
|
"position": "anomaly | weak_anomaly",
|
|
"best_threat_type": "{THREAT_TYPES}",
|
|
"core_claim": "...",
|
|
"supporting_evidence": ["..."],
|
|
"why_normal_explanation_is_weaker": ["..."],
|
|
"weaknesses": ["..."],
|
|
"confidence": "high | medium | low",
|
|
"missing_but_not_required": ["..."]
|
|
}}"""
|
|
return {"system": system, "user": user}
|
|
|
|
|
|
def build_defender_prompt(material: Dict, extractor_text: str, focused_chunks: str) -> Dict[str, str]:
|
|
system = (
|
|
"You are the Defender in a smart-home safety debate. "
|
|
"Your role is precision-oriented: build the strongest supported case that the logs remain normal or insufficient for alarm."
|
|
)
|
|
user = f"""## Query
|
|
{material['query']}
|
|
|
|
## Matter Notes
|
|
{_protocol_notes_block(material)}
|
|
|
|
## Structured Signals
|
|
{json.dumps(material['signals'], ensure_ascii=False, indent=2)}
|
|
|
|
## Extractor Output
|
|
{extractor_text}
|
|
|
|
## Focused Chunks
|
|
{focused_chunks}
|
|
|
|
Rules:
|
|
- Build the strongest benign or insufficient-evidence explanation supported by the logs.
|
|
- Attack anomaly claims that rely mainly on:
|
|
- missing logs,
|
|
- one transient telemetry glitch,
|
|
- long but coherent idle intervals,
|
|
- or a plausible ordinary routine.
|
|
- For device faults, require persistence, repeated failure, contradictory states, or direct fault evidence.
|
|
- For intrusion-style claims, challenge whether the access path is truly inconsistent rather than merely sparse.
|
|
- For unattended cooking or fire risk, challenge whether supervision is truly absent versus simply not directly logged.
|
|
- Do not deny a real anomaly if the sequence is clearly unsafe; precision means challenging weak alarms, not ignoring evidence.
|
|
|
|
Return JSON only:
|
|
{{
|
|
"position": "normal | insufficient_evidence",
|
|
"core_claim": "...",
|
|
"supporting_evidence": ["..."],
|
|
"why_anomaly_explanation_is_weaker": ["..."],
|
|
"weaknesses": ["..."],
|
|
"confidence": "high | medium | low",
|
|
"missing_but_not_required": ["..."]
|
|
}}"""
|
|
return {"system": system, "user": user}
|
|
|
|
|
|
def build_judge_prompt(
|
|
material: Dict,
|
|
extractor_text: str,
|
|
prosecutor_text: str,
|
|
defender_text: str,
|
|
focused_chunks: str,
|
|
) -> Dict[str, str]:
|
|
system = (
|
|
"You are the Judge in a smart-home anomaly debate. "
|
|
"You must compare both sides against the raw evidence and make the final decision."
|
|
)
|
|
user = f"""## Query
|
|
{material['query']}
|
|
|
|
## Matter Notes
|
|
{_protocol_notes_block(material)}
|
|
|
|
## Structured Signals
|
|
{json.dumps(material['signals'], ensure_ascii=False, indent=2)}
|
|
|
|
## Extractor Output
|
|
{extractor_text}
|
|
|
|
## Focused Chunks
|
|
{focused_chunks}
|
|
|
|
## Prosecutor
|
|
{prosecutor_text}
|
|
|
|
## Defender
|
|
{defender_text}
|
|
|
|
Decision Rules:
|
|
- Compare both sides by evidence quality, directness, temporal coverage, and consistency with the query.
|
|
- Do not just average the two sides; judge the strength of the cited evidence yourself.
|
|
- You can disagree with both parties.
|
|
- Device-fault labels require direct fault evidence or persistent contradiction.
|
|
- Intrusion / tailgating / credential-theft labels require access-path or identity-path evidence, not just sparse occupancy.
|
|
- Behavioral anomaly requires a sequence that is genuinely inconsistent with a normal household story, not just unusual timing alone.
|
|
- Unattended cooking / fire risk requires a meaningful hazard window plus missing or weak supervision/mitigation, not just cooking plus incomplete telemetry.
|
|
- For composite safety, local hazard evidence is enough only when it is paired with vulnerable/unsupervised context or weak mitigation.
|
|
- If the Prosecutor's case directly answers the query and the Defender mostly attacks telemetry incompleteness, anomaly may still win.
|
|
- If the anomaly story depends mainly on absence-based assumptions, sparse access logs, or one ambiguous telemetry inconsistency, favor `none`.
|
|
- If the true concern is anomalous but the exact subtype is uncertain, choose the best-supported anomaly type rather than forcing `none`.
|
|
- You must read the raw focused chunks, not only the two argument summaries.
|
|
|
|
Return JSON only:
|
|
{{
|
|
"is_anomaly": true/false,
|
|
"confidence": "high/medium/low",
|
|
"threat_type": "{THREAT_TYPES}",
|
|
"threat_description": "one-sentence conclusion",
|
|
"reasoning": ["step 1", "step 2", "step 3"],
|
|
"key_evidence": ["evidence 1", "evidence 2"],
|
|
"recommended_actions": ["action 1", "action 2"]
|
|
}}"""
|
|
return {"system": system, "user": user}
|