Files
llmiotsafe/EGPv4/extractor.py
2026-05-12 17:01:39 +08:00

244 lines
8.9 KiB
Python

from collections import Counter, defaultdict
from datetime import datetime
from typing import Dict, List
from EGPv4.signals import build_case_material
IMPORTANT_ATTRS = {
"LockState",
"Occupancy",
"OnOff",
"StateValue",
"OperationalState",
"SystemMode",
"CurrentPositionLiftPercent100ths",
"CurrentLevel",
}
def infer_query_profile(query: str) -> str:
query = str(query or "")
if ("工作正常" in query) or ("故障类型" in query):
return "device-health"
if ("应急响应" in query) or ("严重程度" in query and "威胁类型" in query):
return "emergency-response"
if ("综合分析" in query and "安全状况" in query) or ("潜在风险" in query):
return "composite-safety"
if "异常行为模式" in query:
return "behavior-sequence"
if "安全威胁" in query:
return "single-event-safety"
return "unknown"
def _device_room_map(home_state: Dict) -> Dict[str, str]:
mapping = {}
for device_id, info in home_state.get("devices", {}).items():
mapping[device_id] = info.get("room_id", "unknown")
return mapping
def _format_temp(value) -> str:
if not isinstance(value, (int, float)):
return str(value)
c = value / 100 if abs(value) >= 100 else value
return f"{c:.2f}C(raw={value})"
def _event_to_compact_line(event: Dict, room_map: Dict[str, str]) -> str:
ts = event.get("timestamp", "?")
dev = event.get("device_id", "?")
room = room_map.get(dev, "unknown")
etype = event.get("event_type", "")
if etype == "attribute_change":
cluster = event.get("cluster", "")
attr = event.get("attribute", "")
value = event.get("value")
if attr == "MeasuredValue" and "Temperature" in cluster:
value_str = _format_temp(value)
else:
value_str = str(value)
return f"{ts} | {room}/{dev} | {cluster}.{attr}={value_str}"
if etype == "device_event":
fields = ", ".join(f"{k}={v}" for k, v in event.get("fields", {}).items())
return f"{ts} | {room}/{dev} | Event:{event.get('event_name', '')}({fields})"
if etype == "command":
return f"{ts} | {room}/{dev} | Command:{event.get('command', '')}"
return f"{ts} | {room}/{dev} | {etype}"
def _select_key_events(events: List[Dict], room_map: Dict[str, str], max_lines: int = 80) -> List[str]:
selected: List[str] = []
temp_last = {}
for event in events:
etype = event.get("event_type", "")
take = False
if etype in {"device_event", "command"}:
take = True
elif etype == "attribute_change":
attr = event.get("attribute", "")
cluster = event.get("cluster", "")
value = event.get("value")
if attr in IMPORTANT_ATTRS:
take = True
elif value is None:
take = True
elif attr == "MeasuredValue" and "Temperature" in cluster:
dev = event.get("device_id", "")
prev = temp_last.get(dev)
if prev is not None and isinstance(value, (int, float)) and abs(value - prev) >= 120:
take = True
temp_last[dev] = value
if take:
selected.append(_event_to_compact_line(event, room_map))
if len(selected) >= max_lines:
break
return selected
def _temperature_trend_summaries(events: List[Dict]) -> List[str]:
series = defaultdict(list)
for event in events:
if event.get("event_type") != "attribute_change":
continue
if event.get("attribute") != "MeasuredValue" or "Temperature" not in event.get("cluster", ""):
continue
value = event.get("value")
if isinstance(value, (int, float)):
series[event.get("device_id", "")].append((event.get("timestamp", ""), value))
summaries = []
for device_id, seq in series.items():
if len(seq) < 4:
continue
vals = [v for _, v in seq]
min_v = min(vals)
max_v = max(vals)
start_v = vals[0]
end_v = vals[-1]
span = (max_v - min_v) / 100 if abs(max_v - min_v) >= 100 else (max_v - min_v)
drift = (end_v - start_v) / 100 if abs(end_v - start_v) >= 100 else (end_v - start_v)
summaries.append(
f"{device_id}: start={_format_temp(start_v)}, end={_format_temp(end_v)}, min={_format_temp(min_v)}, max={_format_temp(max_v)}, span={span:.2f}C, net_change={drift:.2f}C, samples={len(seq)}"
)
return summaries[:12]
def _occupancy_summaries(events: List[Dict], room_map: Dict[str, str]) -> List[str]:
counts = Counter()
active_rooms = Counter()
first_seen = {}
last_seen = {}
for event in events:
if event.get("event_type") != "attribute_change" or event.get("attribute") != "Occupancy":
continue
dev = event.get("device_id", "")
room = room_map.get(dev, "unknown")
counts[dev] += 1
ts = event.get("timestamp", "")
first_seen.setdefault(dev, ts)
last_seen[dev] = ts
if event.get("value") == 1:
active_rooms[room] += 1
lines = []
for dev, n in counts.most_common(12):
room = room_map.get(dev, "unknown")
lines.append(
f"{room}/{dev}: occupancy_updates={n}, occupied_events={active_rooms.get(room, 0)}, first={first_seen.get(dev, '?')}, last={last_seen.get(dev, '?')}"
)
return lines
def _alert_summaries(events: List[Dict], room_map: Dict[str, str]) -> List[str]:
lines = []
for event in events:
line = _event_to_compact_line(event, room_map)
if any(token in line for token in ["Alarm", "Fault", "Error", "None", "Battery"]):
lines.append(line)
return lines[:30]
def _focus_chunk_ids(material: Dict, query_profile: str, max_focus_chunks: int) -> List[str]:
chunks = material["chunk_index"]
ranked = sorted(
chunks,
key=lambda c: (
c.get("alarmish_count", 0),
c.get("event_count", 0),
),
reverse=True,
)
selected = [c["chunk_id"] for c in ranked[: max_focus_chunks]]
if query_profile in {"single-event-safety", "behavior-sequence", "composite-safety", "emergency-response"}:
ordered = [c["chunk_id"] for c in chunks if c["chunk_id"] in set(selected)]
return ordered[:max_focus_chunks]
return selected[:max_focus_chunks]
def build_evidence_packet(episode: Dict, chunk_size: int = 80, max_focus_chunks: int = 6) -> Dict:
material = build_case_material(episode, chunk_size=chunk_size)
home_state = episode.get("home_state", {})
room_map = _device_room_map(home_state)
query_profile = infer_query_profile(material.get("query", ""))
focus_chunk_ids = _focus_chunk_ids(material, query_profile, max_focus_chunks=max_focus_chunks)
all_events = []
for chunk in material["chunks"]:
all_events.extend(chunk["raw_events"])
packet = {
"episode_id": material.get("episode_id", ""),
"query": material.get("query", ""),
"query_profile": query_profile,
"layout_summary": material.get("layout_summary", ""),
"protocol_notes": material.get("protocol_notes", []),
"signals": material.get("signals", {}),
"focus_chunk_ids": focus_chunk_ids,
"focus_chunk_summaries": [
chunk for chunk in material["chunk_index"] if chunk["chunk_id"] in set(focus_chunk_ids)
],
"key_events": _select_key_events(all_events, room_map, max_lines=80),
"temperature_trends": _temperature_trend_summaries(all_events),
"occupancy_summaries": _occupancy_summaries(all_events, room_map),
"alert_events": _alert_summaries(all_events, room_map),
"event_count": material.get("event_count", 0),
}
return packet
def materialize_evidence_packet(packet: Dict) -> str:
sections = [
"## Query",
packet.get("query", ""),
"",
"## Query Profile",
packet.get("query_profile", "unknown"),
"",
"## Matter Notes",
]
sections.extend(f"- {note}" for note in packet.get("protocol_notes", []))
sections.extend(
[
"",
"## Layout",
packet.get("layout_summary", ""),
"",
"## Structured Signals",
str(packet.get("signals", {})),
"",
"## Focus Chunks",
str(packet.get("focus_chunk_summaries", [])),
"",
"## Key Events",
]
)
sections.extend(f"- {line}" for line in packet.get("key_events", []))
sections.extend(["", "## Temperature Trends"])
sections.extend(f"- {line}" for line in packet.get("temperature_trends", []))
sections.extend(["", "## Occupancy Summaries"])
sections.extend(f"- {line}" for line in packet.get("occupancy_summaries", []))
sections.extend(["", "## Alert / Error Events"])
sections.extend(f"- {line}" for line in packet.get("alert_events", []))
return "\n".join(sections)