From 623c373d02e4793a24be094cc75f1bd0d9003132 Mon Sep 17 00:00:00 2001 From: BattleTag Date: Thu, 7 May 2026 21:17:01 +0800 Subject: [PATCH] Move aggregator scripts to scripts/aggregate/ (preserve tools before nuking artifacts/) --- scripts/aggregate/PROTOCOL.md | 83 +++++++++ scripts/aggregate/aggregate_cross.py | 174 +++++++++++++++++++ scripts/aggregate/aggregate_cross_matrix.py | 176 +++++++++++++++++++ scripts/aggregate/aggregate_full_sota.py | 84 +++++++++ scripts/aggregate/aggregate_results.py | 94 ++++++++++ scripts/aggregate/aggregate_score_router.py | 180 +++++++++++++++++++ scripts/aggregate/aggregate_v2.py | 182 ++++++++++++++++++++ scripts/aggregate/run_ac_combo_evals.sh | 70 ++++++++ scripts/aggregate/run_all_phase1.sh | 68 ++++++++ scripts/aggregate/run_cross_all.sh | 105 +++++++++++ scripts/aggregate/run_full_cross_matrix.sh | 88 ++++++++++ scripts/aggregate/run_phase1_all.sh | 45 +++++ 12 files changed, 1349 insertions(+) create mode 100644 scripts/aggregate/PROTOCOL.md create mode 100644 scripts/aggregate/aggregate_cross.py create mode 100644 scripts/aggregate/aggregate_cross_matrix.py create mode 100644 scripts/aggregate/aggregate_full_sota.py create mode 100644 scripts/aggregate/aggregate_results.py create mode 100644 scripts/aggregate/aggregate_score_router.py create mode 100644 scripts/aggregate/aggregate_v2.py create mode 100755 scripts/aggregate/run_ac_combo_evals.sh create mode 100755 scripts/aggregate/run_all_phase1.sh create mode 100755 scripts/aggregate/run_cross_all.sh create mode 100755 scripts/aggregate/run_full_cross_matrix.sh create mode 100755 scripts/aggregate/run_phase1_all.sh diff --git a/scripts/aggregate/PROTOCOL.md b/scripts/aggregate/PROTOCOL.md new file mode 100644 index 0000000..9cfaf6f --- /dev/null +++ b/scripts/aggregate/PROTOCOL.md @@ -0,0 +1,83 @@ +# Route Comparison Protocol + +Goal: compare three FM-mechanism × traffic-property route variants on a unified +training base. All routes start from the current `Unified_CFM` SOTA recipe and +change one mechanism axis. + +## Unified base (LOCKED) + +| Item | Value | +|---|---| +| Dataset | CICIoT2023 | +| Source store | `datasets/ciciot2023/processed/full_store/` | +| Flows | `datasets/ciciot2023/processed/full_store/flows.parquet` | +| Flow features | `datasets/ciciot2023/processed/flow_features.parquet` (canonical 20-d) | +| Train: benign | 10,000 (Shafir within-dataset protocol) | +| Sequence length | T = 64 | +| Packet preprocess | `mixed_dequant` (Routes A/B); raw binaries (Route C) | +| Benign split | 80/20, `split_seed=42` | +| Val cap | 10,000 | +| Attack cap | 20,000 (stratified) | +| Multi-seed | {42, 43, 44} | + +## Architecture base (LOCKED) + +| Item | Value | +|---|---| +| `d_model` | 128 | +| `n_layers` | 4 | +| `n_heads` | 4 | +| `mlp_ratio` | 4.0 | +| `time_dim` | 64 | +| `sigma` | 0.1 | +| `use_ot` | True | +| `lambda_flow / lambda_packet` | 0.3 / 0.3 | +| `packet_mask_ratio` | 0.5 | +| Optimizer | AdamW, lr=3e-4, wd=0.01, grad_clip=1.0 | +| Schedule | CosineAnnealingLR over total steps | +| Epochs | 50 | +| Batch size | 256 | + +## Routes + +| Route | Mechanism axis | Traffic property targeted | +|---|---|---| +| **Baseline** | Standard UnifiedCFM (current SOTA) | — | +| **A: Causal** | Packet-causal attention mask | Protocol causality (TCP/HTTP handshake) | +| **B: Spectral** | Append K=8-band DFT of (size, IAT) — 32 dims — to flow features (`flow_dim` 20→52); model architecture unchanged | Burstiness / LRD / self-similarity | +| **C: Mixed FM** | Continuous-CFM on (size,IAT,win) + DFM on flags | Discrete-continuous mixed channels | + +Route D (Edit Flows) is deferred until A/B/C show signal. + +## Reporting + +Each route × seed produces: + +``` +artifacts/route_comparison/_seed/ +├── model.pt +├── config.yaml # actual config used +├── history.json +├── phase1_summary.json # 34-score per-attack-class AUROC table +└── train.log +``` + +Final aggregate at `artifacts/route_comparison/RESULTS.md`: + +``` +| Route | terminal_norm | route-specific score | param count | train wall | +| baseline | 0.962 (existing) | — | 1.23M | ~2 min | +| A | ? | causal_surprisal_packet_median | ? | ? | +| B | ? | velocity_freq | ? | ? | +| C | ? | nll_disc + terminal_cont | ? | ? | +``` + +Plus per-attack-class breakdown for the top 10 attack labels by support. + +## Baseline reference (single-seed, from existing run) + +`artifacts/runs/unified_cfm_ciciot2023_2026_04_29/`: +- 50 epochs, σ=0.1, λ=0.3 +- final `auroc_terminal_norm` = **0.962** +- This is the number to compare against; we'll re-run it under multi-seed for + fair comparison. diff --git a/scripts/aggregate/aggregate_cross.py b/scripts/aggregate/aggregate_cross.py new file mode 100644 index 0000000..839118d --- /dev/null +++ b/scripts/aggregate/aggregate_cross.py @@ -0,0 +1,174 @@ +from __future__ import annotations +import json +import re +from pathlib import Path +import numpy as np +from sklearn.metrics import roc_auc_score +ROOT = Path(__file__).resolve().parents[2] / 'artifacts' / 'route_comparison' +CROSS_DIR = ROOT / 'cross' +NAME_RE = re.compile('^(?P.+?)_seed(?P\\d+)_to_(?Pcicids2017|cicddos2019)$') +ROUTES = [('baseline', 'baseline'), ('A: causal', 'route_a_causal'), ('B: spectral', 'route_b_spectral'), ('C: mixed', 'route_c_mixed'), ('A+C combo', 'route_ac_combo')] +TARGETS = ['cicids2017', 'cicddos2019'] +PRIMARY_SCORES = ['terminal_norm', 'terminal_flow', 'terminal_packet', 'flow_consistency', 'packet_consistency', 'consistency_total', 'causal_surprisal_packet_median', 'causal_surprisal_total', 'direction_drift_packet_median', 'pna_packet_median', 'kappa2_speed2norm_packet_median', 'curvature_packet', 'disc_nll_total', 'disc_nll_ch3', 'disc_nll_ch7'] + +def _collect() -> dict[tuple[str, str], dict[int, dict]]: + out: dict[tuple[str, str], dict[int, dict]] = {} + for f in sorted(CROSS_DIR.glob('*.json')): + m = NAME_RE.match(f.stem) + if not m: + continue + key = (m.group('route'), m.group('target')) + out.setdefault(key, {})[int(m.group('seed'))] = json.loads(f.read_text()) + return out + +def _ensemble_sweep(npz_path: Path) -> dict[float, float] | None: + if not npz_path.exists(): + return None + z = np.load(npz_path, allow_pickle=True) + keys = set(z.files) + if 'b_terminal_norm' not in keys or 'b_disc_nll_total' not in keys: + return None + v_tn = z['b_terminal_norm'] + a_tn = z['a_terminal_norm'] + v_dn = z['b_disc_nll_total'] + a_dn = z['a_disc_nll_total'] + + def zsc(v, a): + (mu, sd) = (v.mean(), v.std() + 1e-09) + return ((v - mu) / sd, (a - mu) / sd) + (v_tn_z, a_tn_z) = zsc(v_tn, a_tn) + (v_dn_z, a_dn_z) = zsc(v_dn, a_dn) + out = {} + for alpha in (0.0, 0.5, 0.7, 0.8, 0.9, 1.0): + s_v = alpha * v_tn_z + (1.0 - alpha) * v_dn_z + s_a = alpha * a_tn_z + (1.0 - alpha) * a_dn_z + y = np.r_[np.zeros(len(s_v)), np.ones(len(s_a))] + s = np.r_[s_v, s_a] + out[alpha] = float(roc_auc_score(y, s)) + return out + +def _mean_std(vs: list[float]) -> tuple[float, float]: + arr = np.asarray([v for v in vs if v == v], dtype=np.float64) + if arr.size == 0: + return (float('nan'), float('nan')) + return (float(arr.mean()), float(arr.std())) + +def main() -> None: + data = _collect() + rows: list[str] = [] + rows.append('# Cross-Dataset Eval — CICIoT2023 → {CICIDS2017, CICDDoS2019}') + rows.append('') + rows.append("All models trained on CICIoT2023 (10K benign), evaluated on each target's") + rows.append('10K benign + 10K stratified attack. Source-domain norm stats applied.') + rows.append('3 seeds each. AUROC mean ± std.') + rows.append('') + rows.append('## Primary score: `terminal_norm`') + rows.append('') + header = '| Route | within-CICIoT2023 (ref) | → CICIDS2017 | → CICDDoS2019 |' + rows.append(header) + rows.append('|---|---|---|---|') + within_fallback = {'baseline': (0.9612, 0.0017), 'A: causal': (0.9636, 0.0006), 'B: spectral': (0.9619, 0.0013), 'C: mixed': (0.9625, 0.0028), 'A+C combo': (0.9587, 0.0017)} + within_terminal: dict[str, tuple[float, float]] = {} + for (label, prefix) in ROUTES: + within_seeds = sorted(ROOT.glob(f'{prefix}_seed*/phase1_summary.json')) + vals: list[float] = [] + for f in within_seeds: + try: + s = json.loads(f.read_text()) + v = s.get('overall', {}).get('terminal_norm', {}).get('auroc') + if v is not None: + vals.append(v) + except Exception: + pass + if vals: + within_terminal[label] = _mean_std(vals) + else: + within_terminal[label] = within_fallback.get(label, (float('nan'), float('nan'))) + for (label, prefix) in ROUTES: + cells = [label] + (wm, ws) = within_terminal[label] + cells.append(f'{wm:.4f} ± {ws:.4f}') + for tgt in TARGETS: + seeds = data.get((prefix, tgt), {}) + vals = [s['overall'].get('terminal_norm', {}).get('auroc', float('nan')) for s in seeds.values()] + (m, sd) = _mean_std(vals) + cells.append(f'{m:.4f} ± {sd:.4f}' if m == m else '—') + rows.append('| ' + ' | '.join(cells) + ' |') + rows.append('') + rows.append("## Each route's best score per target") + rows.append('') + for tgt in TARGETS: + rows.append(f'### → {tgt}') + rows.append('') + rows.append("| Route | Best score | AUROC | Δ (vs same-route's terminal_norm) |") + rows.append('|---|---|---|---|') + for (label, prefix) in ROUTES: + seeds = data.get((prefix, tgt), {}) + if not seeds: + rows.append(f'| {label} | — | — | — |') + continue + score_means: dict[str, float] = {} + for s in seeds.values(): + for (k, v) in s.get('overall', {}).items(): + score_means.setdefault(k, []).append(v.get('auroc', float('nan'))) + mean_per_score = {k: _mean_std(v)[0] for (k, v) in score_means.items()} + mean_per_score = {k: v for (k, v) in mean_per_score.items() if v == v} + if not mean_per_score: + rows.append(f'| {label} | — | — | — |') + continue + best = max(mean_per_score, key=mean_per_score.get) + best_v = mean_per_score[best] + best_sd = _mean_std(score_means[best])[1] + tn = mean_per_score.get('terminal_norm', float('nan')) + delta = f'{best_v - tn:+.4f}' if tn == tn else '—' + rows.append(f'| {label} | `{best}` | {best_v:.4f} ± {best_sd:.4f} | {delta} |') + rows.append('') + for tgt in TARGETS: + rows.append(f'## All key scores → {tgt}') + rows.append('') + header = '| Score | ' + ' | '.join((label for (label, _) in ROUTES)) + ' |' + rows.append(header) + rows.append('|---' * (1 + len(ROUTES)) + '|') + for sc in PRIMARY_SCORES: + cells = [f'`{sc}`'] + for (label, prefix) in ROUTES: + seeds = data.get((prefix, tgt), {}) + vals = [s['overall'].get(sc, {}).get('auroc', float('nan')) for s in seeds.values()] + (m, sd) = _mean_std(vals) + cells.append(f'{m:.4f} ± {sd:.4f}' if m == m else '—') + rows.append('| ' + ' | '.join(cells) + ' |') + rows.append('') + for tgt in TARGETS: + rows.append(f'## Route C ensemble (terminal_norm + disc_nll) → {tgt}') + rows.append('') + c_seeds = data.get(('route_c_mixed', tgt), {}) + if c_seeds: + alphas = (0.0, 0.5, 0.7, 0.8, 0.9, 1.0) + rows.append('| α | ' + ' | '.join((f'seed{s}' for s in sorted(c_seeds.keys()))) + ' | mean ± std |') + rows.append('|---' * (2 + len(c_seeds)) + '|') + seed_sweeps = {} + for s in c_seeds: + npz = CROSS_DIR / f'route_c_mixed_seed{s}_to_{tgt}.npz' + seed_sweeps[s] = _ensemble_sweep(npz) or {} + for a in alphas: + cells = [f'{a:.2f}'] + vals = [] + for s in sorted(c_seeds.keys()): + v = seed_sweeps[s].get(a, float('nan')) + cells.append(f'{v:.4f}') + vals.append(v) + (m, sd) = _mean_std(vals) + cells.append(f'**{m:.4f} ± {sd:.4f}**') + rows.append('| ' + ' | '.join(cells) + ' |') + rows.append('') + rows.append('## Run inventory') + rows.append('') + for (label, prefix) in ROUTES: + for tgt in TARGETS: + seeds = sorted(data.get((prefix, tgt), {}).keys()) + rows.append(f"- {label} → {tgt}: seeds = {(seeds if seeds else '(none)')}") + out = ROOT / 'CROSS_RESULTS.md' + out.write_text('\n'.join(rows) + '\n') + print(f'[wrote] {out}') +if __name__ == '__main__': + main() diff --git a/scripts/aggregate/aggregate_cross_matrix.py b/scripts/aggregate/aggregate_cross_matrix.py new file mode 100644 index 0000000..ff73823 --- /dev/null +++ b/scripts/aggregate/aggregate_cross_matrix.py @@ -0,0 +1,176 @@ +from __future__ import annotations +import json +import re +from pathlib import Path +import numpy as np +from sklearn.covariance import OAS +from sklearn.metrics import roc_auc_score +ROOT = Path(__file__).resolve().parents[2] / 'artifacts' / 'route_comparison' +CROSS_DIR = ROOT / 'cross' +DATASETS = ['iscxtor2016', 'cicids2017', 'cicddos2019', 'ciciot2023'] +SEEDS = [42, 43, 44] + +def _mahal_eval(npz_path: Path, val_prefix: str, atk_prefix: str) -> float: + if not npz_path.exists(): + return float('nan') + z = np.load(npz_path, allow_pickle=True) + keys = sorted([k.replace(val_prefix, '') for k in z.files if k.startswith(val_prefix) and (not k.endswith('labels'))]) + val_S = np.stack([z[f'{val_prefix}{k}'] for k in keys], axis=1) + atk_S = np.stack([z[f'{atk_prefix}{k}'] for k in keys], axis=1) + val_S = np.nan_to_num(val_S, nan=0.0, posinf=1000000.0, neginf=-1000000.0) + atk_S = np.nan_to_num(atk_S, nan=0.0, posinf=1000000.0, neginf=-1000000.0) + if len(val_S) < 50 or len(atk_S) < 50: + return float('nan') + y = np.r_[np.zeros(len(val_S)), np.ones(len(atk_S))] + K = val_S.shape[1] + try: + oas = OAS().fit(val_S) + inv_cov = np.linalg.inv(oas.covariance_ + 1e-09 * np.eye(K)) + except Exception: + return float('nan') + mu = val_S.mean(0) + + def m(S): + d = S - mu + return np.einsum('ni,ij,nj->n', d, inv_cov, d) + s = np.r_[m(val_S), m(atk_S)] + s = np.nan_to_num(s, nan=0.0, posinf=1000000000000.0, neginf=-1000000000000.0) + try: + return float(roc_auc_score(y, s)) + except ValueError: + return float('nan') + +def _within_mahal(ds: str, seed: int) -> float: + md = ROOT / f'route_ac_combo_{ds}_seed{seed}' + return _mahal_eval(md / 'phase1_scores.npz', 'val_', 'atk_') + +def _within_terminal_norm(ds: str, seed: int) -> float: + f = ROOT / f'route_ac_combo_{ds}_seed{seed}' / 'phase1_summary.json' + if not f.exists(): + return float('nan') + return json.loads(f.read_text())['overall'].get('terminal_norm', {}).get('auroc', float('nan')) + +def _src_aliases(src: str) -> list[str]: + aliases = [src] + if src == 'cicddos2019': + aliases.append('ddos2019') + return aliases + +def _cross_mahal(src: str, tgt: str, seed: int) -> float: + candidates = [] + for alias in _src_aliases(src): + candidates.append(CROSS_DIR / f'route_ac_combo_seed{seed}_{alias}_to_{tgt}.npz') + if src == 'ciciot2023': + candidates.append(CROSS_DIR / f'route_ac_combo_seed{seed}_to_{tgt}.npz') + for c in candidates: + if c.exists(): + return _mahal_eval(c, 'b_', 'a_') + return float('nan') + +def _cross_terminal_norm(src: str, tgt: str, seed: int) -> float: + candidates = [] + for alias in _src_aliases(src): + candidates.append(CROSS_DIR / f'route_ac_combo_seed{seed}_{alias}_to_{tgt}.json') + if src == 'ciciot2023': + candidates.append(CROSS_DIR / f'route_ac_combo_seed{seed}_to_{tgt}.json') + for c in candidates: + if c.exists(): + d = json.loads(c.read_text()) + return d['overall'].get('terminal_norm', {}).get('auroc', float('nan')) + return float('nan') + +def _ms(vals: list[float]) -> str: + arr = np.asarray([v for v in vals if not np.isnan(v)], dtype=np.float64) + if arr.size == 0: + return '—' + if arr.size == 1: + return f'{arr[0]:.4f}' + return f'{arr.mean():.4f}±{arr.std():.4f}' + +def main() -> None: + rows: list[str] = [] + rows.append('# Full 4×4 Cross Matrix — A+C combo + Mahalanobis-OAS') + rows.append('') + rows.append('3-seed mean ± std. Diagonal = within-dataset; off-diagonal = cross.') + rows.append('Aggregator: Mahalanobis-OAS over 10-d A+C combo score vector,') + rows.append('fit on **target-dataset benign val only** (no attack labels).') + rows.append('') + rows.append('## Mahalanobis-OAS AUROC (4×4)') + rows.append('') + rows.append('| Source ↓ \\ Target → | ' + ' | '.join(DATASETS) + ' |') + rows.append('|---' * (1 + len(DATASETS)) + '|') + for src in DATASETS: + cells = [src] + for tgt in DATASETS: + if src == tgt: + vals = [_within_mahal(src, s) for s in SEEDS] + cells.append(f'_{_ms(vals)}_') + else: + vals = [_cross_mahal(src, tgt, s) for s in SEEDS] + cells.append(_ms(vals)) + rows.append('| ' + ' | '.join(cells) + ' |') + rows.append('') + rows.append('(Italic diagonal = within-dataset reference)') + rows.append('') + rows.append('## `terminal_norm` AUROC (4×4) — for comparison (selection-bias-free single fixed score)') + rows.append('') + rows.append('| Source ↓ \\ Target → | ' + ' | '.join(DATASETS) + ' |') + rows.append('|---' * (1 + len(DATASETS)) + '|') + for src in DATASETS: + cells = [src] + for tgt in DATASETS: + if src == tgt: + vals = [_within_terminal_norm(src, s) for s in SEEDS] + cells.append(f'_{_ms(vals)}_') + else: + vals = [_cross_terminal_norm(src, tgt, s) for s in SEEDS] + cells.append(_ms(vals)) + rows.append('| ' + ' | '.join(cells) + ' |') + rows.append('') + rows.append('## Δ Mahalanobis − terminal_norm (where positive, Mahalanobis is better)') + rows.append('') + rows.append('| Source ↓ \\ Target → | ' + ' | '.join(DATASETS) + ' |') + rows.append('|---' * (1 + len(DATASETS)) + '|') + for src in DATASETS: + cells = [src] + for tgt in DATASETS: + if src == tgt: + m = np.mean([v for v in [_within_mahal(src, s) for s in SEEDS] if not np.isnan(v)]) + t = np.mean([v for v in [_within_terminal_norm(src, s) for s in SEEDS] if not np.isnan(v)]) + else: + m = np.mean([v for v in [_cross_mahal(src, tgt, s) for s in SEEDS] if not np.isnan(v)]) + t = np.mean([v for v in [_cross_terminal_norm(src, tgt, s) for s in SEEDS] if not np.isnan(v)]) + if np.isnan(m) or np.isnan(t): + cells.append('—') + else: + d = m - t + if abs(d) < 0.005: + cells.append(f'{d:+.4f}') + elif d > 0: + cells.append(f'**{d:+.4f}**') + else: + cells.append(f'_{d:+.4f}_') + rows.append('| ' + ' | '.join(cells) + ' |') + rows.append('') + rows.append('## Per-source averaged cross-AUROC (Mahalanobis, off-diagonal mean)') + rows.append('') + rows.append('| Source | mean off-diag Mahalanobis | mean off-diag terminal_norm |') + rows.append('|---|---|---|') + for src in DATASETS: + m_offs = [] + t_offs = [] + for tgt in DATASETS: + if src == tgt: + continue + m_vals = [_cross_mahal(src, tgt, s) for s in SEEDS] + t_vals = [_cross_terminal_norm(src, tgt, s) for s in SEEDS] + m_offs.extend([v for v in m_vals if not np.isnan(v)]) + t_offs.extend([v for v in t_vals if not np.isnan(v)]) + m_mean = np.mean(m_offs) if m_offs else float('nan') + t_mean = np.mean(t_offs) if t_offs else float('nan') + rows.append(f'| {src} | {m_mean:.4f} | {t_mean:.4f} |') + out = ROOT / 'CROSS_MATRIX.md' + out.write_text('\n'.join(rows) + '\n') + print(f'[wrote] {out}') +if __name__ == '__main__': + main() diff --git a/scripts/aggregate/aggregate_full_sota.py b/scripts/aggregate/aggregate_full_sota.py new file mode 100644 index 0000000..c09baf3 --- /dev/null +++ b/scripts/aggregate/aggregate_full_sota.py @@ -0,0 +1,84 @@ +from __future__ import annotations +import json +import re +from pathlib import Path +import numpy as np +ROOT = Path(__file__).resolve().parents[2] / 'artifacts' / 'route_comparison' +SEED_RE = re.compile('_seed(\\d+)$') +EXISTING_SOTA = {'ISCXTor2016 (NonTor → Tor)': {'shafir_baseline': 0.8731, 'shafir_ref': 'Table VI', 'ours_existing': (0.9945, 0.0011), 'ours_score': 'terminal_norm', 'sigma': 0.1, 'ac_prefix': 'route_ac_combo_iscxtor2016'}, 'CICIDS2017 within (Shafir 10k/10k)': {'shafir_baseline': 0.9303, 'shafir_ref': 'Table VII', 'ours_existing': (0.9858, 0.0021), 'ours_score': 'terminal_norm', 'sigma': 0.6, 'ac_prefix': 'route_ac_combo_cicids2017'}, 'CICDDoS2019 within': {'shafir_baseline': 0.93, 'shafir_ref': 'Table IX, row 1', 'ours_existing': (0.996, 0.001), 'ours_score': 'terminal_norm', 'sigma': 0.1, 'ac_prefix': 'route_ac_combo_cicddos2019'}, 'CICIoT2023 within (multi-seed)': {'shafir_baseline': None, 'shafir_ref': None, 'ours_existing': (0.9612, 0.0017), 'ours_score': 'terminal_norm', 'sigma': 0.1, 'ac_prefix': 'route_ac_combo_ciciot2023'}} + +def _seeds(prefix: str) -> dict[int, Path]: + out = {} + for d in sorted(ROOT.glob(f'{prefix}_seed*')): + m = SEED_RE.search(d.name) + if m and (d / 'phase1_summary.json').exists(): + out[int(m.group(1))] = d + return out + +def _load(d: Path) -> dict: + return json.loads((d / 'phase1_summary.json').read_text()) + +def _mean_std(vs: list[float]) -> tuple[float, float]: + arr = np.asarray([v for v in vs if v == v], dtype=np.float64) + if arr.size == 0: + return (float('nan'), float('nan')) + return (float(arr.mean()), float(arr.std())) + +def main() -> None: + rows: list[str] = [] + rows.append('# SOTA Comparison: A+C combo vs existing UnifiedCFM') + rows.append('') + rows.append('All 4 datasets, 3 seeds each, within-dataset Shafir 10K/10K protocol.') + rows.append('Existing UnifiedCFM uses Phase-2 consistency loss (λ_flow=λ_packet=0.3).') + rows.append('A+C combo uses Mixed_CFM (continuous CFM + DFM) + causal-packet attention,') + rows.append('**no Phase-2 consistency loss**. lambda_disc=1.0, sigma=0.1, use_ot=True.') + rows.append('') + rows.append("## Headline: A+C combo's best score per dataset") + rows.append('') + rows.append('| Dataset | Shafir 2026 | Existing UnifiedCFM (SOTA) | A+C combo `terminal_norm` | A+C combo `terminal_packet` | A+C combo `disc_nll_total` | A+C best | New SOTA? |') + rows.append('|---|---|---|---|---|---|---|---|') + for (label, meta) in EXISTING_SOTA.items(): + seeds = _seeds(meta['ac_prefix']) + shafir_str = f"{meta['shafir_baseline']:.4f}" if meta['shafir_baseline'] else '—' + (existing_m, existing_sd) = meta['ours_existing'] + existing_str = f'{existing_m:.4f} ± {existing_sd:.4f}' + if not seeds: + rows.append(f'| {label} | {shafir_str} | {existing_str} | (running) | — | — | — | — |') + continue + vals_term = [_load(d).get('overall', {}).get('terminal_norm', {}).get('auroc', float('nan')) for d in seeds.values()] + vals_pkt = [_load(d).get('overall', {}).get('terminal_packet', {}).get('auroc', float('nan')) for d in seeds.values()] + vals_disc = [_load(d).get('overall', {}).get('disc_nll_total', {}).get('auroc', float('nan')) for d in seeds.values()] + (m_t, s_t) = _mean_std(vals_term) + (m_p, s_p) = _mean_std(vals_pkt) + (m_d, s_d) = _mean_std(vals_disc) + (best_score, best_m, best_sd) = ('terminal_norm', m_t, s_t) + if m_p > best_m: + (best_score, best_m, best_sd) = ('terminal_packet', m_p, s_p) + if m_d > best_m: + (best_score, best_m, best_sd) = ('disc_nll_total', m_d, s_d) + beats = '✅' if best_m > existing_m else '❌' + rows.append(f'| {label} | {shafir_str} | {existing_str} | {m_t:.4f} ± {s_t:.4f} | {m_p:.4f} ± {s_p:.4f} | {m_d:.4f} ± {s_d:.4f} | `{best_score}` {best_m:.4f} ± {best_sd:.4f} | {beats} {best_m - existing_m:+.4f} |') + rows.append('') + rows.append('## Per-dataset full scoring') + rows.append('') + score_keys = ['terminal_norm', 'terminal_flow', 'terminal_packet', 'disc_nll_total', 'disc_nll_ch3', 'disc_nll_ch4', 'disc_nll_ch5', 'disc_nll_ch7'] + for (label, meta) in EXISTING_SOTA.items(): + rows.append(f'### {label}') + rows.append('') + seeds = _seeds(meta['ac_prefix']) + if not seeds: + rows.append('(not yet completed)\n') + continue + rows.append('| Score | mean ± std | seeds |') + rows.append('|---|---|---|') + for sc in score_keys: + vals = [_load(d).get('overall', {}).get(sc, {}).get('auroc', float('nan')) for d in seeds.values()] + (m, sd) = _mean_std(vals) + if m == m: + rows.append(f'| `{sc}` | {m:.4f} ± {sd:.4f} | {sorted(seeds.keys())} |') + rows.append('') + out = ROOT / 'SOTA_COMPARISON.md' + out.write_text('\n'.join(rows) + '\n') + print(f'[wrote] {out}') +if __name__ == '__main__': + main() diff --git a/scripts/aggregate/aggregate_results.py b/scripts/aggregate/aggregate_results.py new file mode 100644 index 0000000..4b59a09 --- /dev/null +++ b/scripts/aggregate/aggregate_results.py @@ -0,0 +1,94 @@ +from __future__ import annotations +import json +import re +from collections import defaultdict +from pathlib import Path +import numpy as np +ROOT = Path(__file__).resolve().parents[2] / 'artifacts' / 'route_comparison' +SEED_RE = re.compile('_seed(\\d+)$') +ROUTES = [('baseline', 'baseline_ciciot2023'), ('A: causal', 'route_a_causal_ciciot2023'), ('B: spectral', 'route_b_spectral_ciciot2023'), ('C: mixed', 'route_c_mixed_ciciot2023')] +PRIMARY_SCORES = ['terminal_norm', 'terminal_flow', 'terminal_packet', 'causal_surprisal_packet_median', 'causal_surprisal_packet_max', 'causal_surprisal_total', 'consistency_total', 'flow_consistency', 'packet_consistency', 'kappa2_speed2norm_packet_median', 'direction_drift_packet_median', 'pna_packet_median', 'disc_nll_total', 'disc_nll_ch2', 'disc_nll_ch3', 'disc_nll_ch4', 'disc_nll_ch5', 'disc_nll_ch6', 'disc_nll_ch7'] + +def _collect(prefix: str) -> dict[int, dict]: + out: dict[int, dict] = {} + for d in sorted(ROOT.glob(f'{prefix}_seed*')): + m = SEED_RE.search(d.name) + if not m: + continue + f = d / 'phase1_summary.json' + if not f.exists(): + continue + out[int(m.group(1))] = json.loads(f.read_text()) + return out + +def _mean_std(values: list[float]) -> tuple[float, float]: + arr = np.asarray([v for v in values if v == v], dtype=np.float64) + if arr.size == 0: + return (float('nan'), float('nan')) + return (float(arr.mean()), float(arr.std())) + +def main() -> None: + routes_data = {label: _collect(prefix) for (label, prefix) in ROUTES} + rows = [] + rows.append('# Route Comparison Results — CICIoT2023') + rows.append('') + rows.append('All routes trained on CICIoT2023 with the protocol locked in `PROTOCOL.md`. ') + rows.append('Numbers are AUROC over benign val (10k cap) vs all attacks (10k cap), ') + rows.append('3 seeds each. ± std across seeds.') + rows.append('') + rows.append('## Overall AUROC by score') + rows.append('') + header = '| Score | ' + ' | '.join((label for (label, _) in ROUTES)) + ' |' + sep = '|---' * (1 + len(ROUTES)) + '|' + rows.append(header) + rows.append(sep) + for score in PRIMARY_SCORES: + cells = [f'`{score}`'] + for (label, _) in ROUTES: + seeds = routes_data[label] + if not seeds: + cells.append('—') + continue + vals = [summary.get('overall', {}).get(score, {}).get('auroc', float('nan')) for summary in seeds.values()] + (mean, std) = _mean_std(vals) + cells.append(f'{mean:.4f} ± {std:.4f}' if mean == mean else '—') + rows.append('| ' + ' | '.join(cells) + ' |') + rows.append('') + rows.append('## Per-attack-class `terminal_norm` AUROC (top 12 by support)') + rows.append('') + seed_dicts = list(routes_data['baseline'].values()) + if seed_dicts: + all_classes: dict[str, float] = {} + for s in seed_dicts: + for (cls, cls_data) in s.get('per_class', {}).items(): + if cls.startswith('_'): + continue + n = cls_data.get('_n', 0.0) + all_classes[cls] = max(all_classes.get(cls, 0.0), n) + ranked = sorted(all_classes.items(), key=lambda kv: -kv[1])[:12] + header = '| Class | n | ' + ' | '.join((label for (label, _) in ROUTES)) + ' |' + sep = '|---' * (2 + len(ROUTES)) + '|' + rows.append(header) + rows.append(sep) + for (cls, n) in ranked: + cells = [cls, f'{int(n)}'] + for (label, _) in ROUTES: + seeds = routes_data[label] + if not seeds: + cells.append('—') + continue + vals = [summary.get('per_class', {}).get(cls, {}).get('terminal_norm', float('nan')) for summary in seeds.values()] + (mean, std) = _mean_std(vals) + cells.append(f'{mean:.3f} ± {std:.3f}' if mean == mean else '—') + rows.append('| ' + ' | '.join(cells) + ' |') + rows.append('') + rows.append('## Run inventory') + rows.append('') + for (label, prefix) in ROUTES: + seeds = sorted(routes_data[label].keys()) + rows.append(f"- **{label}** (`{prefix}_seed*`): seeds = {(seeds if seeds else '(none yet)')}") + out = ROOT / 'RESULTS.md' + out.write_text('\n'.join(rows) + '\n') + print(f'[wrote] {out}') +if __name__ == '__main__': + main() diff --git a/scripts/aggregate/aggregate_score_router.py b/scripts/aggregate/aggregate_score_router.py new file mode 100644 index 0000000..ce843b9 --- /dev/null +++ b/scripts/aggregate/aggregate_score_router.py @@ -0,0 +1,180 @@ +from __future__ import annotations +import json +from pathlib import Path +import numpy as np +from sklearn.covariance import LedoitWolf, OAS, GraphicalLassoCV +from sklearn.metrics import roc_auc_score +ROOT = Path(__file__).resolve().parents[2] / 'artifacts' / 'route_comparison' +CROSS_DIR = ROOT / 'cross' +WITHIN_DATASETS = ['iscxtor2016', 'cicids2017', 'cicddos2019', 'ciciot2023'] +CROSS_TARGETS = ['cicids2017', 'cicddos2019'] +SEEDS = [42, 43, 44] + +def _aggregators(val_S: np.ndarray, test_S_list: list[np.ndarray]) -> dict[str, list[np.ndarray]]: + val_S = np.nan_to_num(val_S, nan=0.0, posinf=1000000.0, neginf=-1000000.0) + test_S_list = [np.nan_to_num(t, nan=0.0, posinf=1000000.0, neginf=-1000000.0) for t in test_S_list] + mu = val_S.mean(axis=0) + sigma = val_S.std(axis=0) + 1e-09 + K = val_S.shape[1] + cov_emp = np.cov(val_S, rowvar=False) + inv_cov_plain = np.linalg.inv(cov_emp + 0.001 * np.eye(K)) + lw = LedoitWolf().fit(val_S) + inv_cov_lw = np.linalg.inv(lw.covariance_ + 1e-09 * np.eye(K)) + oas = OAS().fit(val_S) + inv_cov_oas = np.linalg.inv(oas.covariance_ + 1e-09 * np.eye(K)) + + def _max_abs_z(S): + return np.abs((S - mu) / sigma).max(axis=1) + + def _max_pos_z(S): + return ((S - mu) / sigma).max(axis=1) + + def _mahal_factory(inv_cov): + + def f(S): + d = S - mu + return np.einsum('ni,ij,nj->n', d, inv_cov, d) + return f + out: dict[str, list[np.ndarray]] = {} + for (tag, fn) in [('max_abs_z', _max_abs_z), ('max_pos_z', _max_pos_z), ('mahal_plain', _mahal_factory(inv_cov_plain)), ('mahal_lw', _mahal_factory(inv_cov_lw)), ('mahal_oas', _mahal_factory(inv_cov_oas))]: + out[tag] = [fn(t) for t in test_S_list] + return out +SCORE_SUBSETS = {'all': None, 'terminal3': ['terminal_norm', 'terminal_flow', 'terminal_packet'], 'disc7': ['disc_nll_total', 'disc_nll_ch2', 'disc_nll_ch3', 'disc_nll_ch4', 'disc_nll_ch5', 'disc_nll_ch6', 'disc_nll_ch7']} + +def _evaluate(npz: Path, val_prefix: str, atk_prefix: str) -> dict: + z = np.load(npz, allow_pickle=True) + all_keys = sorted([k.replace(val_prefix, '') for k in z.files if k.startswith(val_prefix) and (not k.endswith('labels'))]) + out: dict = {'n_val': None, 'n_atk': None} + for (subset_name, subset_keys) in SCORE_SUBSETS.items(): + if subset_keys is None: + keys = all_keys + else: + keys = [k for k in subset_keys if k in all_keys] + if len(keys) < 2: + continue + val_S = np.stack([z[f'{val_prefix}{k}'] for k in keys], axis=1) + atk_S = np.stack([z[f'{atk_prefix}{k}'] for k in keys], axis=1) + (n_val, n_atk) = (val_S.shape[0], atk_S.shape[0]) + out['n_val'] = n_val + out['n_atk'] = n_atk + y = np.r_[np.zeros(n_val), np.ones(n_atk)] + aggs = _aggregators(val_S, [val_S, atk_S]) + for (tag, (v_agg, a_agg)) in aggs.items(): + s = np.r_[v_agg, a_agg] + s = np.nan_to_num(s, nan=0.0, posinf=1000000000000.0, neginf=-1000000000000.0) + try: + auc = float(roc_auc_score(y, s)) + except ValueError: + auc = float('nan') + out[f'auc_{tag}_{subset_name}'] = auc + out['auc_max_abs_z'] = out.get('auc_max_abs_z_all') + out['auc_max_pos_z'] = out.get('auc_max_pos_z_all') + out['auc_mahal_plain'] = out.get('auc_mahal_plain_all') + out['auc_mahal_lw'] = out.get('auc_mahal_lw_all') + out['auc_mahal_oas'] = out.get('auc_mahal_oas_all') + val_S = np.stack([z[f'{val_prefix}{k}'] for k in all_keys], axis=1) + atk_S = np.stack([z[f'{atk_prefix}{k}'] for k in all_keys], axis=1) + val_S = np.nan_to_num(val_S, nan=0.0, posinf=1000000000000.0, neginf=-1000000000000.0) + atk_S = np.nan_to_num(atk_S, nan=0.0, posinf=1000000000000.0, neginf=-1000000000000.0) + y = np.r_[np.zeros(val_S.shape[0]), np.ones(atk_S.shape[0])] + per_score = {} + for (i, k) in enumerate(all_keys): + s = np.r_[val_S[:, i], atk_S[:, i]] + s = np.nan_to_num(s, nan=0.0, posinf=1000000000000.0, neginf=-1000000000000.0) + a1 = roc_auc_score(y, s) + per_score[k] = max(a1, 1 - a1) + best_score = max(per_score, key=per_score.get) + out['auc_best_fixed'] = per_score[best_score] + out['best_fixed_name'] = best_score + out['auc_term_norm'] = per_score.get('terminal_norm', float('nan')) + out['auc_term_pkt'] = per_score.get('terminal_packet', float('nan')) + out['auc_disc_total'] = per_score.get('disc_nll_total', float('nan')) + return out + +def _mean_std(vs: list[float]) -> tuple[float, float]: + arr = np.asarray([v for v in vs if v == v], dtype=np.float64) + if arr.size == 0: + return (float('nan'), float('nan')) + return (float(arr.mean()), float(arr.std())) + +def main() -> None: + rows: list[str] = [] + rows.append('# Score-vector auto-selection: max-of-|z| / Mahalanobis vs fixed scores') + rows.append('') + rows.append('Aggregators are fit on **benign val only** (no attack labels). All numbers') + rows.append('are 3-seed mean ± std on A+C combo (Mixed_CFM + causal-packet attention).') + rows.append('') + rows.append('Note on fairness: `auc_best_fixed` is selection-biased (picks per-dataset best') + rows.append('score post-hoc on test set). `max_abs_z` and `mahalanobis` are NOT — they only') + rows.append('use benign val to fit aggregator parameters.') + rows.append('') + rows.append("## Within-dataset(A+C combo on each dataset's own benign/attack)") + rows.append('') + rows.append('| Dataset | term_norm | best fixed | max-\\|z\\| (all) | mahal-OAS (all) | **mahal-OAS (term3)** | **mahal-OAS (disc7)** |') + rows.append('|---|---|---|---|---|---|---|') + for ds in WITHIN_DATASETS: + rows_per_seed: list[dict] = [] + for s in SEEDS: + md = ROOT / f'route_ac_combo_{ds}_seed{s}' + npz = md / 'phase1_scores.npz' + if not npz.exists(): + continue + rows_per_seed.append(_evaluate(npz, 'val_', 'atk_')) + if not rows_per_seed: + rows.append(f'| {ds} | (no data) | | | | | |') + continue + + def col(field): + (m, sd) = _mean_std([r[field] for r in rows_per_seed]) + return f'{m:.4f} ± {sd:.4f}' + rows.append(f"| {ds} | {col('auc_term_norm')} | {col('auc_best_fixed')} | {col('auc_max_abs_z_all')} | {col('auc_mahal_oas_all')} | **{col('auc_mahal_oas_terminal3')}** | **{col('auc_mahal_oas_disc7')}** |") + rows.append('') + rows.append('## Cross-dataset(A+C combo trained on CICIoT2023 → eval on target)') + rows.append('') + rows.append('| Target | term_norm | best fixed | max-\\|z\\| (all) | mahal-OAS (all) | **mahal-OAS (term3)** | **mahal-OAS (disc7)** |') + rows.append('|---|---|---|---|---|---|---|') + for tgt in CROSS_TARGETS: + rows_per_seed: list[dict] = [] + for s in SEEDS: + npz = CROSS_DIR / f'route_ac_combo_seed{s}_to_{tgt}.npz' + if not npz.exists(): + continue + rows_per_seed.append(_evaluate(npz, 'b_', 'a_')) + if not rows_per_seed: + rows.append(f'| {tgt} | (no data) | | | | | |') + continue + + def col(field): + (m, sd) = _mean_std([r[field] for r in rows_per_seed]) + return f'{m:.4f} ± {sd:.4f}' + rows.append(f"| {tgt} | {col('auc_term_norm')} | {col('auc_best_fixed')} | {col('auc_max_abs_z_all')} | {col('auc_mahal_oas_all')} | **{col('auc_mahal_oas_terminal3')}** | **{col('auc_mahal_oas_disc7')}** |") + rows.append('') + rows.append('## Best-fixed-score winner per setup') + rows.append('') + rows.append('| Setup | seed42 | seed43 | seed44 |') + rows.append('|---|---|---|---|') + for ds in WITHIN_DATASETS: + cells = [f'within {ds}'] + for s in SEEDS: + npz = ROOT / f'route_ac_combo_{ds}_seed{s}/phase1_scores.npz' + if not npz.exists(): + cells.append('—') + continue + r = _evaluate(npz, 'val_', 'atk_') + cells.append(f"{r['best_fixed_name']} ({r['auc_best_fixed']:.4f})") + rows.append('| ' + ' | '.join(cells) + ' |') + for tgt in CROSS_TARGETS: + cells = [f'cross→{tgt}'] + for s in SEEDS: + npz = CROSS_DIR / f'route_ac_combo_seed{s}_to_{tgt}.npz' + if not npz.exists(): + cells.append('—') + continue + r = _evaluate(npz, 'b_', 'a_') + cells.append(f"{r['best_fixed_name']} ({r['auc_best_fixed']:.4f})") + rows.append('| ' + ' | '.join(cells) + ' |') + out = ROOT / 'SCORE_ROUTER.md' + out.write_text('\n'.join(rows) + '\n') + print(f'[wrote] {out}') +if __name__ == '__main__': + main() diff --git a/scripts/aggregate/aggregate_v2.py b/scripts/aggregate/aggregate_v2.py new file mode 100644 index 0000000..648f0d1 --- /dev/null +++ b/scripts/aggregate/aggregate_v2.py @@ -0,0 +1,182 @@ +from __future__ import annotations +import json +import re +from pathlib import Path +import numpy as np +from sklearn.metrics import roc_auc_score +ROOT = Path(__file__).resolve().parents[2] / 'artifacts' / 'route_comparison' +SEED_RE = re.compile('_seed(\\d+)$') +ROUTES = [('baseline', 'baseline_ciciot2023'), ('A: causal', 'route_a_causal_ciciot2023'), ('B: spectral', 'route_b_spectral_ciciot2023'), ('C: mixed', 'route_c_mixed_ciciot2023'), ('A+C combo', 'route_ac_combo_ciciot2023')] + +def _seeds(prefix: str) -> dict[int, Path]: + out = {} + for d in sorted(ROOT.glob(f'{prefix}_seed*')): + m = SEED_RE.search(d.name) + if m and (d / 'phase1_summary.json').exists(): + out[int(m.group(1))] = d + return out + +def _load_summary(d: Path) -> dict: + return json.loads((d / 'phase1_summary.json').read_text()) + +def _ensemble_sweep(d: Path) -> dict[float, float] | None: + f = d / 'phase1_scores.npz' + if not f.exists(): + return None + z = np.load(f, allow_pickle=True) + keys = set(z.files) + if 'val_terminal_norm' not in keys or 'val_disc_nll_total' not in keys: + return None + v_tn = z['val_terminal_norm'] + a_tn = z['atk_terminal_norm'] + v_dn = z['val_disc_nll_total'] + a_dn = z['atk_disc_nll_total'] + + def zsc(v, a): + (mu, sd) = (v.mean(), v.std() + 1e-09) + return ((v - mu) / sd, (a - mu) / sd) + (v_tn_z, a_tn_z) = zsc(v_tn, a_tn) + (v_dn_z, a_dn_z) = zsc(v_dn, a_dn) + out: dict[float, float] = {} + for alpha in (0.0, 0.25, 0.5, 0.7, 0.8, 0.9, 1.0): + s_v = alpha * v_tn_z + (1.0 - alpha) * v_dn_z + s_a = alpha * a_tn_z + (1.0 - alpha) * a_dn_z + y = np.r_[np.zeros(len(s_v)), np.ones(len(s_a))] + s = np.r_[s_v, s_a] + out[alpha] = float(roc_auc_score(y, s)) + return out + +def _ensemble_score(d: Path) -> tuple[float, float] | None: + sweep = _ensemble_sweep(d) + if sweep is None: + return None + best_alpha = max(sweep, key=sweep.get) + return (sweep[best_alpha], best_alpha) + +def _mean_std(vals: list[float]) -> tuple[float, float]: + arr = np.asarray([v for v in vals if v == v], dtype=np.float64) + if arr.size == 0: + return (float('nan'), float('nan')) + return (float(arr.mean()), float(arr.std())) + +def main() -> None: + routes_data: dict[str, dict[int, dict]] = {} + routes_dirs: dict[str, dict[int, Path]] = {} + for (label, prefix) in ROUTES: + seeds = _seeds(prefix) + routes_dirs[label] = seeds + routes_data[label] = {s: _load_summary(d) for (s, d) in seeds.items()} + rows: list[str] = [] + rows.append('# Route Comparison Results — CICIoT2023 (multi-seed)') + rows.append('') + rows.append('Phase1 eval: AUROC over benign val (5k cap) vs all attacks (10k cap), 3 seeds each.') + rows.append('') + rows.append("## Each route's best AUROC (overall)") + rows.append('') + rows.append('| Route | Best score | AUROC | Δ vs baseline-best |') + rows.append('|---|---|---|---|') + baseline_best = None + for (label, _) in ROUTES: + seeds = routes_data[label] + if not seeds: + rows.append(f'| {label} | — | — | — |') + continue + all_scores: dict[str, list[float]] = {} + for s in seeds.values(): + for (k, v) in s.get('overall', {}).items(): + all_scores.setdefault(k, []).append(v.get('auroc', float('nan'))) + score_means = {k: _mean_std(v)[0] for (k, v) in all_scores.items()} + score_means = {k: v for (k, v) in score_means.items() if v == v} + if not score_means: + rows.append(f'| {label} | — | — | — |') + continue + best_score = max(score_means, key=score_means.get) + best_val = score_means[best_score] + if label == 'baseline': + baseline_best = best_val + delta_str = '—' + else: + delta_str = f'{best_val - baseline_best:+.4f}' if baseline_best else '—' + std = _mean_std(all_scores[best_score])[1] + rows.append(f'| {label} | `{best_score}` | {best_val:.4f} ± {std:.4f} | {delta_str} |') + rows.append('') + rows.append('## Primary score: `terminal_norm`') + rows.append('') + rows.append('| Route | mean ± std | seeds |') + rows.append('|---|---|---|') + for (label, _) in ROUTES: + seeds = routes_data[label] + if not seeds: + rows.append(f'| {label} | — | — |') + continue + vals = [s['overall'].get('terminal_norm', {}).get('auroc', float('nan')) for s in seeds.values()] + (m, sd) = _mean_std(vals) + rows.append(f'| {label} | {m:.4f} ± {sd:.4f} | {sorted(seeds.keys())} |') + rows.append('') + rows.append('## Route-specific signature scores (mean ± std, 3 seeds)') + rows.append('') + score_groups = [('Route A signature (consistency family)', ['flow_consistency', 'packet_consistency', 'consistency_total', 'causal_surprisal_total', 'causal_surprisal_packet_median']), ('Route B signature (curvature/dynamics)', ['kappa2_speed2norm_packet_median', 'direction_drift_packet_median', 'pna_packet_median', 'curvature_packet']), ('Route C signature (discrete NLL)', ['disc_nll_total', 'disc_nll_ch3', 'disc_nll_ch4', 'disc_nll_ch5', 'disc_nll_ch7'])] + for (grp_name, scores) in score_groups: + rows.append(f'### {grp_name}') + rows.append('') + rows.append('| Score | ' + ' | '.join((label for (label, _) in ROUTES)) + ' |') + rows.append('|---' * (1 + len(ROUTES)) + '|') + for sc in scores: + cells = [f'`{sc}`'] + for (label, _) in ROUTES: + seeds = routes_data[label] + vals = [s['overall'].get(sc, {}).get('auroc', float('nan')) for s in seeds.values()] + (m, sd) = _mean_std(vals) + cells.append(f'{m:.4f} ± {sd:.4f}' if m == m else '—') + rows.append('| ' + ' | '.join(cells) + ' |') + rows.append('') + rows.append('## Route C ensemble: α·terminal_norm + (1−α)·disc_nll_total (z-scored)') + rows.append('') + c_dirs = routes_dirs.get('C: mixed', {}) + if c_dirs: + alphas = (0.0, 0.25, 0.5, 0.7, 0.8, 0.9, 1.0) + rows.append('| α | ' + ' | '.join((f'seed{s}' for s in sorted(c_dirs.keys()))) + ' | mean ± std |') + rows.append('|---' * (2 + len(c_dirs)) + '|') + per_alpha: dict[float, list[float]] = {a: [] for a in alphas} + per_seed_sweeps = {s: _ensemble_sweep(d) or {} for (s, d) in c_dirs.items()} + for a in alphas: + cells = [f'{a:.2f}'] + vals = [] + for s in sorted(c_dirs.keys()): + v = per_seed_sweeps[s].get(a, float('nan')) + cells.append(f'{v:.4f}') + vals.append(v) + (m, sd) = _mean_std(vals) + cells.append(f'**{m:.4f} ± {sd:.4f}**') + rows.append('| ' + ' | '.join(cells) + ' |') + rows.append('') + rows.append('(α=1.0 = terminal_norm only; α=0.0 = disc_nll only.)') + rows.append('') + rows.append('## Per-attack-class AUROC (top 12, terminal_norm)') + rows.append('') + if routes_data['baseline']: + any_summary = next(iter(routes_data['baseline'].values())) + classes = sorted([(c, d.get('_n', 0)) for (c, d) in any_summary.get('per_class', {}).items() if not c.startswith('_')], key=lambda kv: -kv[1])[:12] + header = '| Class | n | ' + ' | '.join((label for (label, _) in ROUTES)) + ' |' + sep = '|---' * (2 + len(ROUTES)) + '|' + rows.append(header) + rows.append(sep) + for (cls, n) in classes: + cells = [cls, f'{int(n)}'] + for (label, _) in ROUTES: + seeds = routes_data[label] + vals = [s.get('per_class', {}).get(cls, {}).get('terminal_norm', float('nan')) for s in seeds.values()] + (m, sd) = _mean_std(vals) + cells.append(f'{m:.3f}±{sd:.3f}' if m == m else '—') + rows.append('| ' + ' | '.join(cells) + ' |') + rows.append('') + rows.append('## Run inventory') + rows.append('') + for (label, prefix) in ROUTES: + seeds = sorted(routes_data[label].keys()) + rows.append(f"- **{label}** (`{prefix}_seed*`): seeds = {(seeds if seeds else '(none yet)')}") + out = ROOT / 'RESULTS.md' + out.write_text('\n'.join(rows) + '\n') + print(f'[wrote] {out}') +if __name__ == '__main__': + main() diff --git a/scripts/aggregate/run_ac_combo_evals.sh b/scripts/aggregate/run_ac_combo_evals.sh new file mode 100755 index 0000000..f13c283 --- /dev/null +++ b/scripts/aggregate/run_ac_combo_evals.sh @@ -0,0 +1,70 @@ +#!/bin/bash +# Phase1 + cross eval for the 3 A+C combo seeds. +set -e +ROOT=/home/chy/JANUS +MIXED_PHASE1=${ROOT}/Mixed_CFM/eval_phase1.py +MIXED_CROSS=${ROOT}/Mixed_CFM/eval_cross.py +CROSS_DIR=${ROOT}/artifacts/route_comparison/cross +mkdir -p ${CROSS_DIR} + +# GPU 0: phase1 + cross→IDS2017 for all 3 seeds +{ +for seed in 42 43 44; do + md=${ROOT}/artifacts/route_comparison/route_ac_combo_ciciot2023_seed${seed} + [ -f "${md}/model.pt" ] || { echo "[wait] seed${seed} model.pt not yet"; continue; } + + if [ ! -f "${md}/phase1_summary.json" ]; then + echo "[gpu0 phase1] seed${seed}" + cd ${ROOT}/Mixed_CFM + CUDA_VISIBLE_DEVICES=0 stdbuf -oL uv run --no-sync python -u ${MIXED_PHASE1} \ + --model-dir ${md} --out-dir ${md} \ + --batch-size 256 --n-steps 16 \ + --n-val-cap 5000 --n-atk-cap 10000 \ + > ${md}/phase1.log 2>&1 + fi + + ids_out=${CROSS_DIR}/route_ac_combo_seed${seed}_to_cicids2017.json + if [ ! -f "${ids_out}" ]; then + echo "[gpu0 cross→ids2017] seed${seed}" + cd ${ROOT}/Mixed_CFM + CUDA_VISIBLE_DEVICES=0 stdbuf -oL uv run --no-sync python -u ${MIXED_CROSS} \ + --model-dir ${md} \ + --target-store ${ROOT}/datasets/cicids2017/processed/full_store \ + --target-flows ${ROOT}/datasets/cicids2017/processed/flows.parquet \ + --target-flow-features ${ROOT}/datasets/cicids2017/processed/flow_features.parquet \ + --out ${ids_out} \ + --n-benign 10000 --n-attack 10000 --seed 42 --T 64 --batch-size 256 --n-steps 16 \ + > ${CROSS_DIR}/route_ac_combo_seed${seed}_to_cicids2017.log 2>&1 + fi +done +echo "[gpu0 done]" +} > /tmp/ac_eval_gpu0.log 2>&1 & +GPU0=$! + +# GPU 1: cross→DDoS19 for all 3 seeds +{ +for seed in 42 43 44; do + md=${ROOT}/artifacts/route_comparison/route_ac_combo_ciciot2023_seed${seed} + [ -f "${md}/model.pt" ] || { echo "[wait] seed${seed} model.pt not yet"; continue; } + + ddos_out=${CROSS_DIR}/route_ac_combo_seed${seed}_to_cicddos2019.json + if [ ! -f "${ddos_out}" ]; then + echo "[gpu1 cross→ddos19] seed${seed}" + cd ${ROOT}/Mixed_CFM + CUDA_VISIBLE_DEVICES=1 stdbuf -oL uv run --no-sync python -u ${MIXED_CROSS} \ + --model-dir ${md} \ + --target-store ${ROOT}/datasets/cicddos2019/processed/full_store \ + --target-flows ${ROOT}/datasets/cicddos2019/processed/flows.parquet \ + --target-flow-features ${ROOT}/datasets/cicddos2019/processed/flow_features.parquet \ + --out ${ddos_out} \ + --n-benign 10000 --n-attack 10000 --seed 42 --T 64 --batch-size 256 --n-steps 16 \ + > ${CROSS_DIR}/route_ac_combo_seed${seed}_to_cicddos2019.log 2>&1 + fi +done +echo "[gpu1 done]" +} > /tmp/ac_eval_gpu1.log 2>&1 & +GPU1=$! + +wait $GPU0 +wait $GPU1 +echo "[all ac combo evals done]" diff --git a/scripts/aggregate/run_all_phase1.sh b/scripts/aggregate/run_all_phase1.sh new file mode 100755 index 0000000..852a18e --- /dev/null +++ b/scripts/aggregate/run_all_phase1.sh @@ -0,0 +1,68 @@ +#!/bin/bash +# Run phase1 eval on all routes after trainings complete. +# Splits across 2 GPUs in parallel chains. + +set -e +ROOT=/home/chy/JANUS +UNIFIED_EVAL=${ROOT}/artifacts/verify_2026_04_24/eval_phase1_unified.py +MIXED_EVAL=${ROOT}/Mixed_CFM/eval_phase1.py + +cd ${ROOT} + +# GPU 0: baselines + route_a (6 models) +{ +for prefix in baseline_ciciot2023 route_a_causal_ciciot2023; do + for seed in 42 43 44; do + name=${prefix}_seed${seed} + md=${ROOT}/artifacts/route_comparison/${name} + [ -f "${md}/model.pt" ] || continue + [ -f "${md}/phase1_summary.json" ] && continue + echo "[GPU0 eval] ${name}" + cd ${ROOT}/Unified_CFM + CUDA_VISIBLE_DEVICES=0 stdbuf -oL uv run --no-sync python -u ${UNIFIED_EVAL} \ + --model-dir ${md} --out-dir ${md} \ + --batch-size 256 --n-steps 16 --jacobian-n-eps 4 \ + --n-val-cap 5000 --n-atk-cap 10000 \ + > ${md}/phase1.log 2>&1 + done +done +echo "[GPU0 done]" +} & +GPU0_PID=$! + +# GPU 1: route_b + route_c (6 models) +{ +for seed in 42 43 44; do + name=route_b_spectral_ciciot2023_seed${seed} + md=${ROOT}/artifacts/route_comparison/${name} + [ -f "${md}/model.pt" ] || continue + [ -f "${md}/phase1_summary.json" ] && continue + echo "[GPU1 eval] ${name}" + cd ${ROOT}/Unified_CFM + CUDA_VISIBLE_DEVICES=1 stdbuf -oL uv run --no-sync python -u ${UNIFIED_EVAL} \ + --model-dir ${md} --out-dir ${md} \ + --batch-size 256 --n-steps 16 --jacobian-n-eps 4 \ + --n-val-cap 5000 --n-atk-cap 10000 \ + > ${md}/phase1.log 2>&1 +done +for seed in 42 43 44; do + name=route_c_mixed_ciciot2023_seed${seed} + md=${ROOT}/artifacts/route_comparison/${name} + [ -f "${md}/model.pt" ] || continue + [ -f "${md}/phase1_summary.json" ] && continue + echo "[GPU1 eval] ${name}" + cd ${ROOT}/Mixed_CFM + CUDA_VISIBLE_DEVICES=1 stdbuf -oL uv run --no-sync python -u ${MIXED_EVAL} \ + --model-dir ${md} --out-dir ${md} \ + --batch-size 256 --n-steps 16 \ + --n-val-cap 5000 --n-atk-cap 10000 \ + > ${md}/phase1.log 2>&1 +done +echo "[GPU1 done]" +} & +GPU1_PID=$! + +wait $GPU0_PID +wait $GPU1_PID +echo "[all phase1 done]" +cd ${ROOT} && uv run --no-sync python artifacts/route_comparison/aggregate_results.py diff --git a/scripts/aggregate/run_cross_all.sh b/scripts/aggregate/run_cross_all.sh new file mode 100755 index 0000000..96db7ba --- /dev/null +++ b/scripts/aggregate/run_cross_all.sh @@ -0,0 +1,105 @@ +#!/bin/bash +# Cross-dataset eval for all 4 routes × 2 targets × 3 seeds = 24 runs. +# Source: CICIoT2023 (where all models were trained). +# Targets: CICIDS2017 + CICDDoS2019. + +set -e +ROOT=/home/chy/JANUS +UNIFIED_EVAL=${ROOT}/artifacts/verify_2026_04_24/eval_phase2_cross_cicddos2019.py +MIXED_EVAL=${ROOT}/Mixed_CFM/eval_cross.py +CROSS_DIR=${ROOT}/artifacts/route_comparison/cross +mkdir -p ${CROSS_DIR} + +# Target dataset paths +declare -A TARGETS +TARGETS[cicids2017_store]=${ROOT}/datasets/cicids2017/processed/full_store +TARGETS[cicids2017_flows]=${ROOT}/datasets/cicids2017/processed/flows.parquet +TARGETS[cicids2017_features]=${ROOT}/datasets/cicids2017/processed/flow_features.parquet +TARGETS[cicids2017_features_spectral]=${ROOT}/datasets/cicids2017/processed/flow_features_spectral.parquet + +TARGETS[cicddos2019_store]=${ROOT}/datasets/cicddos2019/processed/full_store +TARGETS[cicddos2019_flows]=${ROOT}/datasets/cicddos2019/processed/flows.parquet +TARGETS[cicddos2019_features]=${ROOT}/datasets/cicddos2019/processed/flow_features.parquet +TARGETS[cicddos2019_features_spectral]=${ROOT}/datasets/cicddos2019/processed/flow_features_spectral.parquet + +run_unified_eval() { + local gpu=$1 model_dir=$2 target=$3 features=$4 out_name=$5 + local out=${CROSS_DIR}/${out_name}.json + [ -f "${out}" ] && { echo "[skip] ${out_name}"; return; } + echo "[gpu${gpu} eval] ${out_name}" + cd ${ROOT}/Unified_CFM + CUDA_VISIBLE_DEVICES=${gpu} stdbuf -oL uv run --no-sync python -u ${UNIFIED_EVAL} \ + --model-dir ${model_dir} \ + --target-store ${TARGETS[${target}_store]} \ + --target-flows ${TARGETS[${target}_flows]} \ + --target-flow-features ${features} \ + --out ${out} \ + --n-benign 10000 --n-attack 10000 --seed 42 \ + --T 64 --batch-size 256 --n-steps 16 \ + > ${CROSS_DIR}/${out_name}.log 2>&1 +} + +run_mixed_eval() { + local gpu=$1 model_dir=$2 target=$3 out_name=$4 + local out=${CROSS_DIR}/${out_name}.json + [ -f "${out}" ] && { echo "[skip] ${out_name}"; return; } + echo "[gpu${gpu} mixed eval] ${out_name}" + cd ${ROOT}/Mixed_CFM + CUDA_VISIBLE_DEVICES=${gpu} stdbuf -oL uv run --no-sync python -u ${MIXED_EVAL} \ + --model-dir ${model_dir} \ + --target-store ${TARGETS[${target}_store]} \ + --target-flows ${TARGETS[${target}_flows]} \ + --target-flow-features ${TARGETS[${target}_features]} \ + --out ${out} \ + --n-benign 10000 --n-attack 10000 --seed 42 \ + --T 64 --batch-size 256 --n-steps 16 \ + > ${CROSS_DIR}/${out_name}.log 2>&1 +} + +# === GPU 0 chain: baselines + route_a, both targets === +{ +for prefix_route in "baseline_ciciot2023:baseline" "route_a_causal_ciciot2023:route_a_causal"; do + prefix=${prefix_route%:*} + short=${prefix_route#*:} + for seed in 42 43 44; do + md=${ROOT}/artifacts/route_comparison/${prefix}_seed${seed} + [ -f "${md}/model.pt" ] || continue + for target in cicids2017 cicddos2019; do + run_unified_eval 0 "${md}" "${target}" "${TARGETS[${target}_features]}" \ + "${short}_seed${seed}_to_${target}" + done + done +done +echo "[gpu0 cross chain done]" +} > /tmp/cross_gpu0.log 2>&1 & +GPU0=$! + +# === GPU 1 chain: route_b (uses spectral features) + route_c (mixed) === +{ +# route_b: must use flow_features_spectral.parquet +for seed in 42 43 44; do + md=${ROOT}/artifacts/route_comparison/route_b_spectral_ciciot2023_seed${seed} + [ -f "${md}/model.pt" ] || continue + for target in cicids2017 cicddos2019; do + run_unified_eval 1 "${md}" "${target}" "${TARGETS[${target}_features_spectral]}" \ + "route_b_spectral_seed${seed}_to_${target}" + done +done + +# route_c: Mixed_CFM eval (uses canonical flow_features) +for seed in 42 43 44; do + md=${ROOT}/artifacts/route_comparison/route_c_mixed_ciciot2023_seed${seed} + [ -f "${md}/model.pt" ] || continue + for target in cicids2017 cicddos2019; do + run_mixed_eval 1 "${md}" "${target}" \ + "route_c_mixed_seed${seed}_to_${target}" + done +done +echo "[gpu1 cross chain done]" +} > /tmp/cross_gpu1.log 2>&1 & +GPU1=$! + +wait $GPU0 +wait $GPU1 +echo "[all cross done]" +ls -la ${CROSS_DIR}/*.json | wc -l diff --git a/scripts/aggregate/run_full_cross_matrix.sh b/scripts/aggregate/run_full_cross_matrix.sh new file mode 100755 index 0000000..fa83217 --- /dev/null +++ b/scripts/aggregate/run_full_cross_matrix.sh @@ -0,0 +1,88 @@ +#!/bin/bash +# Run all missing cross-direction evals for A+C combo. +# Targets are routed to packets-npz or full_store as appropriate. + +set -e +ROOT=/home/chy/JANUS +EVAL=${ROOT}/Mixed_CFM/eval_cross.py +CROSS_DIR=${ROOT}/artifacts/route_comparison/cross +mkdir -p ${CROSS_DIR} + +# Target paths +TGT_iscxtor2016_npz=${ROOT}/datasets/iscxtor2016/processed/packets.npz +TGT_iscxtor2016_flows=${ROOT}/datasets/iscxtor2016/processed/flows.parquet +TGT_iscxtor2016_features=${ROOT}/datasets/iscxtor2016/processed/flow_features.parquet +TGT_iscxtor2016_label=nontor +TGT_iscxtor2016_natk=1888 + +TGT_cicids2017_store=${ROOT}/datasets/cicids2017/processed/full_store +TGT_cicids2017_flows=${ROOT}/datasets/cicids2017/processed/flows.parquet +TGT_cicids2017_features=${ROOT}/datasets/cicids2017/processed/flow_features.parquet +TGT_cicids2017_label=normal + +TGT_cicddos2019_store=${ROOT}/datasets/cicddos2019/processed/full_store +TGT_cicddos2019_flows=${ROOT}/datasets/cicddos2019/processed/flows.parquet +TGT_cicddos2019_features=${ROOT}/datasets/cicddos2019/processed/flow_features.parquet +TGT_cicddos2019_label=normal + +TGT_ciciot2023_store=${ROOT}/datasets/ciciot2023/processed/full_store +TGT_ciciot2023_flows=${ROOT}/datasets/ciciot2023/processed/full_store/flows.parquet +TGT_ciciot2023_features=${ROOT}/datasets/ciciot2023/processed/flow_features.parquet +TGT_ciciot2023_label=normal + +run_one() { + local gpu=$1 src=$2 tgt=$3 seed=$4 + local md=${ROOT}/artifacts/route_comparison/route_ac_combo_${src}_seed${seed} + local out=${CROSS_DIR}/route_ac_combo_seed${seed}_${src}_to_${tgt}.json + if [ -f "${out}" ]; then echo "[skip] ${src}→${tgt} seed${seed}"; return; fi + if [ ! -f "${md}/model.pt" ]; then echo "[missing] ${md}/model.pt"; return; fi + + # Resolve target args + local tgt_args + if [ "${tgt}" = "iscxtor2016" ]; then + tgt_args="--target-packets-npz ${TGT_iscxtor2016_npz} --target-flows ${TGT_iscxtor2016_flows} --target-flow-features ${TGT_iscxtor2016_features} --benign-label nontor --n-attack 1888" + elif [ "${tgt}" = "cicids2017" ]; then + tgt_args="--target-store ${TGT_cicids2017_store} --target-flows ${TGT_cicids2017_flows} --target-flow-features ${TGT_cicids2017_features} --benign-label normal --n-attack 10000" + elif [ "${tgt}" = "cicddos2019" ]; then + tgt_args="--target-store ${TGT_cicddos2019_store} --target-flows ${TGT_cicddos2019_flows} --target-flow-features ${TGT_cicddos2019_features} --benign-label normal --n-attack 10000" + elif [ "${tgt}" = "ciciot2023" ]; then + tgt_args="--target-store ${TGT_ciciot2023_store} --target-flows ${TGT_ciciot2023_flows} --target-flow-features ${TGT_ciciot2023_features} --benign-label normal --n-attack 10000" + fi + + echo "[gpu${gpu}] ${src} → ${tgt} seed${seed}" + cd ${ROOT}/Mixed_CFM + CUDA_VISIBLE_DEVICES=${gpu} stdbuf -oL uv run --no-sync python -u ${EVAL} \ + --model-dir ${md} \ + ${tgt_args} \ + --out ${out} \ + --n-benign 10000 --seed 42 --T 64 --batch-size 256 --n-steps 16 \ + > ${CROSS_DIR}/route_ac_combo_seed${seed}_${src}_to_${tgt}.log 2>&1 +} + +# 8 missing directions × 3 seeds = 24 evals +# Split across 2 GPUs to balance load +{ +for dir in "ciciot2023:iscxtor2016" "cicids2017:iscxtor2016" "cicddos2019:iscxtor2016" "iscxtor2016:cicids2017"; do + src=${dir%:*}; tgt=${dir#*:} + for seed in 42 43 44; do + run_one 0 ${src} ${tgt} ${seed} + done +done +echo "[gpu0 done]" +} > /tmp/cross_matrix_gpu0.log 2>&1 & +G0=$! + +{ +for dir in "cicids2017:ciciot2023" "cicddos2019:ciciot2023" "iscxtor2016:cicddos2019" "iscxtor2016:ciciot2023"; do + src=${dir%:*}; tgt=${dir#*:} + for seed in 42 43 44; do + run_one 1 ${src} ${tgt} ${seed} + done +done +echo "[gpu1 done]" +} > /tmp/cross_matrix_gpu1.log 2>&1 & +G1=$! + +wait $G0 +wait $G1 +echo "[all done]" diff --git a/scripts/aggregate/run_phase1_all.sh b/scripts/aggregate/run_phase1_all.sh new file mode 100755 index 0000000..5fbce13 --- /dev/null +++ b/scripts/aggregate/run_phase1_all.sh @@ -0,0 +1,45 @@ +#!/bin/bash +# Run phase1 eval on all route_comparison models. +# Output: /phase1_summary.json + phase1_scores.npz +# +# Usage: +# bash artifacts/route_comparison/run_phase1_all.sh [GPU_ID] +# +# Default GPU_ID = 0. Each eval takes ~3-5 min with the caps below. + +set -e +GPU_ID="${1:-0}" +ROOT=/home/chy/JANUS +EVAL=${ROOT}/artifacts/verify_2026_04_24/eval_phase1_unified.py + +models=( + baseline_ciciot2023_seed42 + baseline_ciciot2023_seed43 + baseline_ciciot2023_seed44 + route_a_causal_ciciot2023_seed42 + route_a_causal_ciciot2023_seed43 + route_a_causal_ciciot2023_seed44 +) + +cd ${ROOT}/Unified_CFM +for name in "${models[@]}"; do + model_dir=${ROOT}/artifacts/route_comparison/${name} + if [ ! -f "${model_dir}/model.pt" ]; then + echo "[skip] ${name}: model.pt missing" + continue + fi + out_dir=${model_dir} + if [ -f "${out_dir}/phase1_summary.json" ]; then + echo "[skip] ${name}: phase1_summary.json exists" + continue + fi + echo "[eval] ${name}" + CUDA_VISIBLE_DEVICES=${GPU_ID} stdbuf -oL uv run --no-sync python -u ${EVAL} \ + --model-dir ${model_dir} --out-dir ${out_dir} \ + --batch-size 256 --n-steps 16 \ + --jacobian-n-eps 4 \ + --n-val-cap 5000 --n-atk-cap 10000 \ + 2>&1 | tee ${model_dir}/phase1.log | tail -5 + echo "[done] ${name}" +done +echo "[all done]"