Move aggregator scripts to scripts/aggregate/ (preserve tools before nuking artifacts/)
This commit is contained in:
83
scripts/aggregate/PROTOCOL.md
Normal file
83
scripts/aggregate/PROTOCOL.md
Normal file
@@ -0,0 +1,83 @@
|
||||
# Route Comparison Protocol
|
||||
|
||||
Goal: compare three FM-mechanism × traffic-property route variants on a unified
|
||||
training base. All routes start from the current `Unified_CFM` SOTA recipe and
|
||||
change one mechanism axis.
|
||||
|
||||
## Unified base (LOCKED)
|
||||
|
||||
| Item | Value |
|
||||
|---|---|
|
||||
| Dataset | CICIoT2023 |
|
||||
| Source store | `datasets/ciciot2023/processed/full_store/` |
|
||||
| Flows | `datasets/ciciot2023/processed/full_store/flows.parquet` |
|
||||
| Flow features | `datasets/ciciot2023/processed/flow_features.parquet` (canonical 20-d) |
|
||||
| Train: benign | 10,000 (Shafir within-dataset protocol) |
|
||||
| Sequence length | T = 64 |
|
||||
| Packet preprocess | `mixed_dequant` (Routes A/B); raw binaries (Route C) |
|
||||
| Benign split | 80/20, `split_seed=42` |
|
||||
| Val cap | 10,000 |
|
||||
| Attack cap | 20,000 (stratified) |
|
||||
| Multi-seed | {42, 43, 44} |
|
||||
|
||||
## Architecture base (LOCKED)
|
||||
|
||||
| Item | Value |
|
||||
|---|---|
|
||||
| `d_model` | 128 |
|
||||
| `n_layers` | 4 |
|
||||
| `n_heads` | 4 |
|
||||
| `mlp_ratio` | 4.0 |
|
||||
| `time_dim` | 64 |
|
||||
| `sigma` | 0.1 |
|
||||
| `use_ot` | True |
|
||||
| `lambda_flow / lambda_packet` | 0.3 / 0.3 |
|
||||
| `packet_mask_ratio` | 0.5 |
|
||||
| Optimizer | AdamW, lr=3e-4, wd=0.01, grad_clip=1.0 |
|
||||
| Schedule | CosineAnnealingLR over total steps |
|
||||
| Epochs | 50 |
|
||||
| Batch size | 256 |
|
||||
|
||||
## Routes
|
||||
|
||||
| Route | Mechanism axis | Traffic property targeted |
|
||||
|---|---|---|
|
||||
| **Baseline** | Standard UnifiedCFM (current SOTA) | — |
|
||||
| **A: Causal** | Packet-causal attention mask | Protocol causality (TCP/HTTP handshake) |
|
||||
| **B: Spectral** | Append K=8-band DFT of (size, IAT) — 32 dims — to flow features (`flow_dim` 20→52); model architecture unchanged | Burstiness / LRD / self-similarity |
|
||||
| **C: Mixed FM** | Continuous-CFM on (size,IAT,win) + DFM on flags | Discrete-continuous mixed channels |
|
||||
|
||||
Route D (Edit Flows) is deferred until A/B/C show signal.
|
||||
|
||||
## Reporting
|
||||
|
||||
Each route × seed produces:
|
||||
|
||||
```
|
||||
artifacts/route_comparison/<route>_seed<S>/
|
||||
├── model.pt
|
||||
├── config.yaml # actual config used
|
||||
├── history.json
|
||||
├── phase1_summary.json # 34-score per-attack-class AUROC table
|
||||
└── train.log
|
||||
```
|
||||
|
||||
Final aggregate at `artifacts/route_comparison/RESULTS.md`:
|
||||
|
||||
```
|
||||
| Route | terminal_norm | route-specific score | param count | train wall |
|
||||
| baseline | 0.962 (existing) | — | 1.23M | ~2 min |
|
||||
| A | ? | causal_surprisal_packet_median | ? | ? |
|
||||
| B | ? | velocity_freq | ? | ? |
|
||||
| C | ? | nll_disc + terminal_cont | ? | ? |
|
||||
```
|
||||
|
||||
Plus per-attack-class breakdown for the top 10 attack labels by support.
|
||||
|
||||
## Baseline reference (single-seed, from existing run)
|
||||
|
||||
`artifacts/runs/unified_cfm_ciciot2023_2026_04_29/`:
|
||||
- 50 epochs, σ=0.1, λ=0.3
|
||||
- final `auroc_terminal_norm` = **0.962**
|
||||
- This is the number to compare against; we'll re-run it under multi-seed for
|
||||
fair comparison.
|
||||
174
scripts/aggregate/aggregate_cross.py
Normal file
174
scripts/aggregate/aggregate_cross.py
Normal file
@@ -0,0 +1,174 @@
|
||||
from __future__ import annotations
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
import numpy as np
|
||||
from sklearn.metrics import roc_auc_score
|
||||
ROOT = Path(__file__).resolve().parents[2] / 'artifacts' / 'route_comparison'
|
||||
CROSS_DIR = ROOT / 'cross'
|
||||
NAME_RE = re.compile('^(?P<route>.+?)_seed(?P<seed>\\d+)_to_(?P<target>cicids2017|cicddos2019)$')
|
||||
ROUTES = [('baseline', 'baseline'), ('A: causal', 'route_a_causal'), ('B: spectral', 'route_b_spectral'), ('C: mixed', 'route_c_mixed'), ('A+C combo', 'route_ac_combo')]
|
||||
TARGETS = ['cicids2017', 'cicddos2019']
|
||||
PRIMARY_SCORES = ['terminal_norm', 'terminal_flow', 'terminal_packet', 'flow_consistency', 'packet_consistency', 'consistency_total', 'causal_surprisal_packet_median', 'causal_surprisal_total', 'direction_drift_packet_median', 'pna_packet_median', 'kappa2_speed2norm_packet_median', 'curvature_packet', 'disc_nll_total', 'disc_nll_ch3', 'disc_nll_ch7']
|
||||
|
||||
def _collect() -> dict[tuple[str, str], dict[int, dict]]:
|
||||
out: dict[tuple[str, str], dict[int, dict]] = {}
|
||||
for f in sorted(CROSS_DIR.glob('*.json')):
|
||||
m = NAME_RE.match(f.stem)
|
||||
if not m:
|
||||
continue
|
||||
key = (m.group('route'), m.group('target'))
|
||||
out.setdefault(key, {})[int(m.group('seed'))] = json.loads(f.read_text())
|
||||
return out
|
||||
|
||||
def _ensemble_sweep(npz_path: Path) -> dict[float, float] | None:
|
||||
if not npz_path.exists():
|
||||
return None
|
||||
z = np.load(npz_path, allow_pickle=True)
|
||||
keys = set(z.files)
|
||||
if 'b_terminal_norm' not in keys or 'b_disc_nll_total' not in keys:
|
||||
return None
|
||||
v_tn = z['b_terminal_norm']
|
||||
a_tn = z['a_terminal_norm']
|
||||
v_dn = z['b_disc_nll_total']
|
||||
a_dn = z['a_disc_nll_total']
|
||||
|
||||
def zsc(v, a):
|
||||
(mu, sd) = (v.mean(), v.std() + 1e-09)
|
||||
return ((v - mu) / sd, (a - mu) / sd)
|
||||
(v_tn_z, a_tn_z) = zsc(v_tn, a_tn)
|
||||
(v_dn_z, a_dn_z) = zsc(v_dn, a_dn)
|
||||
out = {}
|
||||
for alpha in (0.0, 0.5, 0.7, 0.8, 0.9, 1.0):
|
||||
s_v = alpha * v_tn_z + (1.0 - alpha) * v_dn_z
|
||||
s_a = alpha * a_tn_z + (1.0 - alpha) * a_dn_z
|
||||
y = np.r_[np.zeros(len(s_v)), np.ones(len(s_a))]
|
||||
s = np.r_[s_v, s_a]
|
||||
out[alpha] = float(roc_auc_score(y, s))
|
||||
return out
|
||||
|
||||
def _mean_std(vs: list[float]) -> tuple[float, float]:
|
||||
arr = np.asarray([v for v in vs if v == v], dtype=np.float64)
|
||||
if arr.size == 0:
|
||||
return (float('nan'), float('nan'))
|
||||
return (float(arr.mean()), float(arr.std()))
|
||||
|
||||
def main() -> None:
|
||||
data = _collect()
|
||||
rows: list[str] = []
|
||||
rows.append('# Cross-Dataset Eval — CICIoT2023 → {CICIDS2017, CICDDoS2019}')
|
||||
rows.append('')
|
||||
rows.append("All models trained on CICIoT2023 (10K benign), evaluated on each target's")
|
||||
rows.append('10K benign + 10K stratified attack. Source-domain norm stats applied.')
|
||||
rows.append('3 seeds each. AUROC mean ± std.')
|
||||
rows.append('')
|
||||
rows.append('## Primary score: `terminal_norm`')
|
||||
rows.append('')
|
||||
header = '| Route | within-CICIoT2023 (ref) | → CICIDS2017 | → CICDDoS2019 |'
|
||||
rows.append(header)
|
||||
rows.append('|---|---|---|---|')
|
||||
within_fallback = {'baseline': (0.9612, 0.0017), 'A: causal': (0.9636, 0.0006), 'B: spectral': (0.9619, 0.0013), 'C: mixed': (0.9625, 0.0028), 'A+C combo': (0.9587, 0.0017)}
|
||||
within_terminal: dict[str, tuple[float, float]] = {}
|
||||
for (label, prefix) in ROUTES:
|
||||
within_seeds = sorted(ROOT.glob(f'{prefix}_seed*/phase1_summary.json'))
|
||||
vals: list[float] = []
|
||||
for f in within_seeds:
|
||||
try:
|
||||
s = json.loads(f.read_text())
|
||||
v = s.get('overall', {}).get('terminal_norm', {}).get('auroc')
|
||||
if v is not None:
|
||||
vals.append(v)
|
||||
except Exception:
|
||||
pass
|
||||
if vals:
|
||||
within_terminal[label] = _mean_std(vals)
|
||||
else:
|
||||
within_terminal[label] = within_fallback.get(label, (float('nan'), float('nan')))
|
||||
for (label, prefix) in ROUTES:
|
||||
cells = [label]
|
||||
(wm, ws) = within_terminal[label]
|
||||
cells.append(f'{wm:.4f} ± {ws:.4f}')
|
||||
for tgt in TARGETS:
|
||||
seeds = data.get((prefix, tgt), {})
|
||||
vals = [s['overall'].get('terminal_norm', {}).get('auroc', float('nan')) for s in seeds.values()]
|
||||
(m, sd) = _mean_std(vals)
|
||||
cells.append(f'{m:.4f} ± {sd:.4f}' if m == m else '—')
|
||||
rows.append('| ' + ' | '.join(cells) + ' |')
|
||||
rows.append('')
|
||||
rows.append("## Each route's best score per target")
|
||||
rows.append('')
|
||||
for tgt in TARGETS:
|
||||
rows.append(f'### → {tgt}')
|
||||
rows.append('')
|
||||
rows.append("| Route | Best score | AUROC | Δ (vs same-route's terminal_norm) |")
|
||||
rows.append('|---|---|---|---|')
|
||||
for (label, prefix) in ROUTES:
|
||||
seeds = data.get((prefix, tgt), {})
|
||||
if not seeds:
|
||||
rows.append(f'| {label} | — | — | — |')
|
||||
continue
|
||||
score_means: dict[str, float] = {}
|
||||
for s in seeds.values():
|
||||
for (k, v) in s.get('overall', {}).items():
|
||||
score_means.setdefault(k, []).append(v.get('auroc', float('nan')))
|
||||
mean_per_score = {k: _mean_std(v)[0] for (k, v) in score_means.items()}
|
||||
mean_per_score = {k: v for (k, v) in mean_per_score.items() if v == v}
|
||||
if not mean_per_score:
|
||||
rows.append(f'| {label} | — | — | — |')
|
||||
continue
|
||||
best = max(mean_per_score, key=mean_per_score.get)
|
||||
best_v = mean_per_score[best]
|
||||
best_sd = _mean_std(score_means[best])[1]
|
||||
tn = mean_per_score.get('terminal_norm', float('nan'))
|
||||
delta = f'{best_v - tn:+.4f}' if tn == tn else '—'
|
||||
rows.append(f'| {label} | `{best}` | {best_v:.4f} ± {best_sd:.4f} | {delta} |')
|
||||
rows.append('')
|
||||
for tgt in TARGETS:
|
||||
rows.append(f'## All key scores → {tgt}')
|
||||
rows.append('')
|
||||
header = '| Score | ' + ' | '.join((label for (label, _) in ROUTES)) + ' |'
|
||||
rows.append(header)
|
||||
rows.append('|---' * (1 + len(ROUTES)) + '|')
|
||||
for sc in PRIMARY_SCORES:
|
||||
cells = [f'`{sc}`']
|
||||
for (label, prefix) in ROUTES:
|
||||
seeds = data.get((prefix, tgt), {})
|
||||
vals = [s['overall'].get(sc, {}).get('auroc', float('nan')) for s in seeds.values()]
|
||||
(m, sd) = _mean_std(vals)
|
||||
cells.append(f'{m:.4f} ± {sd:.4f}' if m == m else '—')
|
||||
rows.append('| ' + ' | '.join(cells) + ' |')
|
||||
rows.append('')
|
||||
for tgt in TARGETS:
|
||||
rows.append(f'## Route C ensemble (terminal_norm + disc_nll) → {tgt}')
|
||||
rows.append('')
|
||||
c_seeds = data.get(('route_c_mixed', tgt), {})
|
||||
if c_seeds:
|
||||
alphas = (0.0, 0.5, 0.7, 0.8, 0.9, 1.0)
|
||||
rows.append('| α | ' + ' | '.join((f'seed{s}' for s in sorted(c_seeds.keys()))) + ' | mean ± std |')
|
||||
rows.append('|---' * (2 + len(c_seeds)) + '|')
|
||||
seed_sweeps = {}
|
||||
for s in c_seeds:
|
||||
npz = CROSS_DIR / f'route_c_mixed_seed{s}_to_{tgt}.npz'
|
||||
seed_sweeps[s] = _ensemble_sweep(npz) or {}
|
||||
for a in alphas:
|
||||
cells = [f'{a:.2f}']
|
||||
vals = []
|
||||
for s in sorted(c_seeds.keys()):
|
||||
v = seed_sweeps[s].get(a, float('nan'))
|
||||
cells.append(f'{v:.4f}')
|
||||
vals.append(v)
|
||||
(m, sd) = _mean_std(vals)
|
||||
cells.append(f'**{m:.4f} ± {sd:.4f}**')
|
||||
rows.append('| ' + ' | '.join(cells) + ' |')
|
||||
rows.append('')
|
||||
rows.append('## Run inventory')
|
||||
rows.append('')
|
||||
for (label, prefix) in ROUTES:
|
||||
for tgt in TARGETS:
|
||||
seeds = sorted(data.get((prefix, tgt), {}).keys())
|
||||
rows.append(f"- {label} → {tgt}: seeds = {(seeds if seeds else '(none)')}")
|
||||
out = ROOT / 'CROSS_RESULTS.md'
|
||||
out.write_text('\n'.join(rows) + '\n')
|
||||
print(f'[wrote] {out}')
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
176
scripts/aggregate/aggregate_cross_matrix.py
Normal file
176
scripts/aggregate/aggregate_cross_matrix.py
Normal file
@@ -0,0 +1,176 @@
|
||||
from __future__ import annotations
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
import numpy as np
|
||||
from sklearn.covariance import OAS
|
||||
from sklearn.metrics import roc_auc_score
|
||||
ROOT = Path(__file__).resolve().parents[2] / 'artifacts' / 'route_comparison'
|
||||
CROSS_DIR = ROOT / 'cross'
|
||||
DATASETS = ['iscxtor2016', 'cicids2017', 'cicddos2019', 'ciciot2023']
|
||||
SEEDS = [42, 43, 44]
|
||||
|
||||
def _mahal_eval(npz_path: Path, val_prefix: str, atk_prefix: str) -> float:
|
||||
if not npz_path.exists():
|
||||
return float('nan')
|
||||
z = np.load(npz_path, allow_pickle=True)
|
||||
keys = sorted([k.replace(val_prefix, '') for k in z.files if k.startswith(val_prefix) and (not k.endswith('labels'))])
|
||||
val_S = np.stack([z[f'{val_prefix}{k}'] for k in keys], axis=1)
|
||||
atk_S = np.stack([z[f'{atk_prefix}{k}'] for k in keys], axis=1)
|
||||
val_S = np.nan_to_num(val_S, nan=0.0, posinf=1000000.0, neginf=-1000000.0)
|
||||
atk_S = np.nan_to_num(atk_S, nan=0.0, posinf=1000000.0, neginf=-1000000.0)
|
||||
if len(val_S) < 50 or len(atk_S) < 50:
|
||||
return float('nan')
|
||||
y = np.r_[np.zeros(len(val_S)), np.ones(len(atk_S))]
|
||||
K = val_S.shape[1]
|
||||
try:
|
||||
oas = OAS().fit(val_S)
|
||||
inv_cov = np.linalg.inv(oas.covariance_ + 1e-09 * np.eye(K))
|
||||
except Exception:
|
||||
return float('nan')
|
||||
mu = val_S.mean(0)
|
||||
|
||||
def m(S):
|
||||
d = S - mu
|
||||
return np.einsum('ni,ij,nj->n', d, inv_cov, d)
|
||||
s = np.r_[m(val_S), m(atk_S)]
|
||||
s = np.nan_to_num(s, nan=0.0, posinf=1000000000000.0, neginf=-1000000000000.0)
|
||||
try:
|
||||
return float(roc_auc_score(y, s))
|
||||
except ValueError:
|
||||
return float('nan')
|
||||
|
||||
def _within_mahal(ds: str, seed: int) -> float:
|
||||
md = ROOT / f'route_ac_combo_{ds}_seed{seed}'
|
||||
return _mahal_eval(md / 'phase1_scores.npz', 'val_', 'atk_')
|
||||
|
||||
def _within_terminal_norm(ds: str, seed: int) -> float:
|
||||
f = ROOT / f'route_ac_combo_{ds}_seed{seed}' / 'phase1_summary.json'
|
||||
if not f.exists():
|
||||
return float('nan')
|
||||
return json.loads(f.read_text())['overall'].get('terminal_norm', {}).get('auroc', float('nan'))
|
||||
|
||||
def _src_aliases(src: str) -> list[str]:
|
||||
aliases = [src]
|
||||
if src == 'cicddos2019':
|
||||
aliases.append('ddos2019')
|
||||
return aliases
|
||||
|
||||
def _cross_mahal(src: str, tgt: str, seed: int) -> float:
|
||||
candidates = []
|
||||
for alias in _src_aliases(src):
|
||||
candidates.append(CROSS_DIR / f'route_ac_combo_seed{seed}_{alias}_to_{tgt}.npz')
|
||||
if src == 'ciciot2023':
|
||||
candidates.append(CROSS_DIR / f'route_ac_combo_seed{seed}_to_{tgt}.npz')
|
||||
for c in candidates:
|
||||
if c.exists():
|
||||
return _mahal_eval(c, 'b_', 'a_')
|
||||
return float('nan')
|
||||
|
||||
def _cross_terminal_norm(src: str, tgt: str, seed: int) -> float:
|
||||
candidates = []
|
||||
for alias in _src_aliases(src):
|
||||
candidates.append(CROSS_DIR / f'route_ac_combo_seed{seed}_{alias}_to_{tgt}.json')
|
||||
if src == 'ciciot2023':
|
||||
candidates.append(CROSS_DIR / f'route_ac_combo_seed{seed}_to_{tgt}.json')
|
||||
for c in candidates:
|
||||
if c.exists():
|
||||
d = json.loads(c.read_text())
|
||||
return d['overall'].get('terminal_norm', {}).get('auroc', float('nan'))
|
||||
return float('nan')
|
||||
|
||||
def _ms(vals: list[float]) -> str:
|
||||
arr = np.asarray([v for v in vals if not np.isnan(v)], dtype=np.float64)
|
||||
if arr.size == 0:
|
||||
return '—'
|
||||
if arr.size == 1:
|
||||
return f'{arr[0]:.4f}'
|
||||
return f'{arr.mean():.4f}±{arr.std():.4f}'
|
||||
|
||||
def main() -> None:
|
||||
rows: list[str] = []
|
||||
rows.append('# Full 4×4 Cross Matrix — A+C combo + Mahalanobis-OAS')
|
||||
rows.append('')
|
||||
rows.append('3-seed mean ± std. Diagonal = within-dataset; off-diagonal = cross.')
|
||||
rows.append('Aggregator: Mahalanobis-OAS over 10-d A+C combo score vector,')
|
||||
rows.append('fit on **target-dataset benign val only** (no attack labels).')
|
||||
rows.append('')
|
||||
rows.append('## Mahalanobis-OAS AUROC (4×4)')
|
||||
rows.append('')
|
||||
rows.append('| Source ↓ \\ Target → | ' + ' | '.join(DATASETS) + ' |')
|
||||
rows.append('|---' * (1 + len(DATASETS)) + '|')
|
||||
for src in DATASETS:
|
||||
cells = [src]
|
||||
for tgt in DATASETS:
|
||||
if src == tgt:
|
||||
vals = [_within_mahal(src, s) for s in SEEDS]
|
||||
cells.append(f'_{_ms(vals)}_')
|
||||
else:
|
||||
vals = [_cross_mahal(src, tgt, s) for s in SEEDS]
|
||||
cells.append(_ms(vals))
|
||||
rows.append('| ' + ' | '.join(cells) + ' |')
|
||||
rows.append('')
|
||||
rows.append('(Italic diagonal = within-dataset reference)')
|
||||
rows.append('')
|
||||
rows.append('## `terminal_norm` AUROC (4×4) — for comparison (selection-bias-free single fixed score)')
|
||||
rows.append('')
|
||||
rows.append('| Source ↓ \\ Target → | ' + ' | '.join(DATASETS) + ' |')
|
||||
rows.append('|---' * (1 + len(DATASETS)) + '|')
|
||||
for src in DATASETS:
|
||||
cells = [src]
|
||||
for tgt in DATASETS:
|
||||
if src == tgt:
|
||||
vals = [_within_terminal_norm(src, s) for s in SEEDS]
|
||||
cells.append(f'_{_ms(vals)}_')
|
||||
else:
|
||||
vals = [_cross_terminal_norm(src, tgt, s) for s in SEEDS]
|
||||
cells.append(_ms(vals))
|
||||
rows.append('| ' + ' | '.join(cells) + ' |')
|
||||
rows.append('')
|
||||
rows.append('## Δ Mahalanobis − terminal_norm (where positive, Mahalanobis is better)')
|
||||
rows.append('')
|
||||
rows.append('| Source ↓ \\ Target → | ' + ' | '.join(DATASETS) + ' |')
|
||||
rows.append('|---' * (1 + len(DATASETS)) + '|')
|
||||
for src in DATASETS:
|
||||
cells = [src]
|
||||
for tgt in DATASETS:
|
||||
if src == tgt:
|
||||
m = np.mean([v for v in [_within_mahal(src, s) for s in SEEDS] if not np.isnan(v)])
|
||||
t = np.mean([v for v in [_within_terminal_norm(src, s) for s in SEEDS] if not np.isnan(v)])
|
||||
else:
|
||||
m = np.mean([v for v in [_cross_mahal(src, tgt, s) for s in SEEDS] if not np.isnan(v)])
|
||||
t = np.mean([v for v in [_cross_terminal_norm(src, tgt, s) for s in SEEDS] if not np.isnan(v)])
|
||||
if np.isnan(m) or np.isnan(t):
|
||||
cells.append('—')
|
||||
else:
|
||||
d = m - t
|
||||
if abs(d) < 0.005:
|
||||
cells.append(f'{d:+.4f}')
|
||||
elif d > 0:
|
||||
cells.append(f'**{d:+.4f}**')
|
||||
else:
|
||||
cells.append(f'_{d:+.4f}_')
|
||||
rows.append('| ' + ' | '.join(cells) + ' |')
|
||||
rows.append('')
|
||||
rows.append('## Per-source averaged cross-AUROC (Mahalanobis, off-diagonal mean)')
|
||||
rows.append('')
|
||||
rows.append('| Source | mean off-diag Mahalanobis | mean off-diag terminal_norm |')
|
||||
rows.append('|---|---|---|')
|
||||
for src in DATASETS:
|
||||
m_offs = []
|
||||
t_offs = []
|
||||
for tgt in DATASETS:
|
||||
if src == tgt:
|
||||
continue
|
||||
m_vals = [_cross_mahal(src, tgt, s) for s in SEEDS]
|
||||
t_vals = [_cross_terminal_norm(src, tgt, s) for s in SEEDS]
|
||||
m_offs.extend([v for v in m_vals if not np.isnan(v)])
|
||||
t_offs.extend([v for v in t_vals if not np.isnan(v)])
|
||||
m_mean = np.mean(m_offs) if m_offs else float('nan')
|
||||
t_mean = np.mean(t_offs) if t_offs else float('nan')
|
||||
rows.append(f'| {src} | {m_mean:.4f} | {t_mean:.4f} |')
|
||||
out = ROOT / 'CROSS_MATRIX.md'
|
||||
out.write_text('\n'.join(rows) + '\n')
|
||||
print(f'[wrote] {out}')
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
84
scripts/aggregate/aggregate_full_sota.py
Normal file
84
scripts/aggregate/aggregate_full_sota.py
Normal file
@@ -0,0 +1,84 @@
|
||||
from __future__ import annotations
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
import numpy as np
|
||||
ROOT = Path(__file__).resolve().parents[2] / 'artifacts' / 'route_comparison'
|
||||
SEED_RE = re.compile('_seed(\\d+)$')
|
||||
EXISTING_SOTA = {'ISCXTor2016 (NonTor → Tor)': {'shafir_baseline': 0.8731, 'shafir_ref': 'Table VI', 'ours_existing': (0.9945, 0.0011), 'ours_score': 'terminal_norm', 'sigma': 0.1, 'ac_prefix': 'route_ac_combo_iscxtor2016'}, 'CICIDS2017 within (Shafir 10k/10k)': {'shafir_baseline': 0.9303, 'shafir_ref': 'Table VII', 'ours_existing': (0.9858, 0.0021), 'ours_score': 'terminal_norm', 'sigma': 0.6, 'ac_prefix': 'route_ac_combo_cicids2017'}, 'CICDDoS2019 within': {'shafir_baseline': 0.93, 'shafir_ref': 'Table IX, row 1', 'ours_existing': (0.996, 0.001), 'ours_score': 'terminal_norm', 'sigma': 0.1, 'ac_prefix': 'route_ac_combo_cicddos2019'}, 'CICIoT2023 within (multi-seed)': {'shafir_baseline': None, 'shafir_ref': None, 'ours_existing': (0.9612, 0.0017), 'ours_score': 'terminal_norm', 'sigma': 0.1, 'ac_prefix': 'route_ac_combo_ciciot2023'}}
|
||||
|
||||
def _seeds(prefix: str) -> dict[int, Path]:
|
||||
out = {}
|
||||
for d in sorted(ROOT.glob(f'{prefix}_seed*')):
|
||||
m = SEED_RE.search(d.name)
|
||||
if m and (d / 'phase1_summary.json').exists():
|
||||
out[int(m.group(1))] = d
|
||||
return out
|
||||
|
||||
def _load(d: Path) -> dict:
|
||||
return json.loads((d / 'phase1_summary.json').read_text())
|
||||
|
||||
def _mean_std(vs: list[float]) -> tuple[float, float]:
|
||||
arr = np.asarray([v for v in vs if v == v], dtype=np.float64)
|
||||
if arr.size == 0:
|
||||
return (float('nan'), float('nan'))
|
||||
return (float(arr.mean()), float(arr.std()))
|
||||
|
||||
def main() -> None:
|
||||
rows: list[str] = []
|
||||
rows.append('# SOTA Comparison: A+C combo vs existing UnifiedCFM')
|
||||
rows.append('')
|
||||
rows.append('All 4 datasets, 3 seeds each, within-dataset Shafir 10K/10K protocol.')
|
||||
rows.append('Existing UnifiedCFM uses Phase-2 consistency loss (λ_flow=λ_packet=0.3).')
|
||||
rows.append('A+C combo uses Mixed_CFM (continuous CFM + DFM) + causal-packet attention,')
|
||||
rows.append('**no Phase-2 consistency loss**. lambda_disc=1.0, sigma=0.1, use_ot=True.')
|
||||
rows.append('')
|
||||
rows.append("## Headline: A+C combo's best score per dataset")
|
||||
rows.append('')
|
||||
rows.append('| Dataset | Shafir 2026 | Existing UnifiedCFM (SOTA) | A+C combo `terminal_norm` | A+C combo `terminal_packet` | A+C combo `disc_nll_total` | A+C best | New SOTA? |')
|
||||
rows.append('|---|---|---|---|---|---|---|---|')
|
||||
for (label, meta) in EXISTING_SOTA.items():
|
||||
seeds = _seeds(meta['ac_prefix'])
|
||||
shafir_str = f"{meta['shafir_baseline']:.4f}" if meta['shafir_baseline'] else '—'
|
||||
(existing_m, existing_sd) = meta['ours_existing']
|
||||
existing_str = f'{existing_m:.4f} ± {existing_sd:.4f}'
|
||||
if not seeds:
|
||||
rows.append(f'| {label} | {shafir_str} | {existing_str} | (running) | — | — | — | — |')
|
||||
continue
|
||||
vals_term = [_load(d).get('overall', {}).get('terminal_norm', {}).get('auroc', float('nan')) for d in seeds.values()]
|
||||
vals_pkt = [_load(d).get('overall', {}).get('terminal_packet', {}).get('auroc', float('nan')) for d in seeds.values()]
|
||||
vals_disc = [_load(d).get('overall', {}).get('disc_nll_total', {}).get('auroc', float('nan')) for d in seeds.values()]
|
||||
(m_t, s_t) = _mean_std(vals_term)
|
||||
(m_p, s_p) = _mean_std(vals_pkt)
|
||||
(m_d, s_d) = _mean_std(vals_disc)
|
||||
(best_score, best_m, best_sd) = ('terminal_norm', m_t, s_t)
|
||||
if m_p > best_m:
|
||||
(best_score, best_m, best_sd) = ('terminal_packet', m_p, s_p)
|
||||
if m_d > best_m:
|
||||
(best_score, best_m, best_sd) = ('disc_nll_total', m_d, s_d)
|
||||
beats = '✅' if best_m > existing_m else '❌'
|
||||
rows.append(f'| {label} | {shafir_str} | {existing_str} | {m_t:.4f} ± {s_t:.4f} | {m_p:.4f} ± {s_p:.4f} | {m_d:.4f} ± {s_d:.4f} | `{best_score}` {best_m:.4f} ± {best_sd:.4f} | {beats} {best_m - existing_m:+.4f} |')
|
||||
rows.append('')
|
||||
rows.append('## Per-dataset full scoring')
|
||||
rows.append('')
|
||||
score_keys = ['terminal_norm', 'terminal_flow', 'terminal_packet', 'disc_nll_total', 'disc_nll_ch3', 'disc_nll_ch4', 'disc_nll_ch5', 'disc_nll_ch7']
|
||||
for (label, meta) in EXISTING_SOTA.items():
|
||||
rows.append(f'### {label}')
|
||||
rows.append('')
|
||||
seeds = _seeds(meta['ac_prefix'])
|
||||
if not seeds:
|
||||
rows.append('(not yet completed)\n')
|
||||
continue
|
||||
rows.append('| Score | mean ± std | seeds |')
|
||||
rows.append('|---|---|---|')
|
||||
for sc in score_keys:
|
||||
vals = [_load(d).get('overall', {}).get(sc, {}).get('auroc', float('nan')) for d in seeds.values()]
|
||||
(m, sd) = _mean_std(vals)
|
||||
if m == m:
|
||||
rows.append(f'| `{sc}` | {m:.4f} ± {sd:.4f} | {sorted(seeds.keys())} |')
|
||||
rows.append('')
|
||||
out = ROOT / 'SOTA_COMPARISON.md'
|
||||
out.write_text('\n'.join(rows) + '\n')
|
||||
print(f'[wrote] {out}')
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
94
scripts/aggregate/aggregate_results.py
Normal file
94
scripts/aggregate/aggregate_results.py
Normal file
@@ -0,0 +1,94 @@
|
||||
from __future__ import annotations
|
||||
import json
|
||||
import re
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
import numpy as np
|
||||
ROOT = Path(__file__).resolve().parents[2] / 'artifacts' / 'route_comparison'
|
||||
SEED_RE = re.compile('_seed(\\d+)$')
|
||||
ROUTES = [('baseline', 'baseline_ciciot2023'), ('A: causal', 'route_a_causal_ciciot2023'), ('B: spectral', 'route_b_spectral_ciciot2023'), ('C: mixed', 'route_c_mixed_ciciot2023')]
|
||||
PRIMARY_SCORES = ['terminal_norm', 'terminal_flow', 'terminal_packet', 'causal_surprisal_packet_median', 'causal_surprisal_packet_max', 'causal_surprisal_total', 'consistency_total', 'flow_consistency', 'packet_consistency', 'kappa2_speed2norm_packet_median', 'direction_drift_packet_median', 'pna_packet_median', 'disc_nll_total', 'disc_nll_ch2', 'disc_nll_ch3', 'disc_nll_ch4', 'disc_nll_ch5', 'disc_nll_ch6', 'disc_nll_ch7']
|
||||
|
||||
def _collect(prefix: str) -> dict[int, dict]:
|
||||
out: dict[int, dict] = {}
|
||||
for d in sorted(ROOT.glob(f'{prefix}_seed*')):
|
||||
m = SEED_RE.search(d.name)
|
||||
if not m:
|
||||
continue
|
||||
f = d / 'phase1_summary.json'
|
||||
if not f.exists():
|
||||
continue
|
||||
out[int(m.group(1))] = json.loads(f.read_text())
|
||||
return out
|
||||
|
||||
def _mean_std(values: list[float]) -> tuple[float, float]:
|
||||
arr = np.asarray([v for v in values if v == v], dtype=np.float64)
|
||||
if arr.size == 0:
|
||||
return (float('nan'), float('nan'))
|
||||
return (float(arr.mean()), float(arr.std()))
|
||||
|
||||
def main() -> None:
|
||||
routes_data = {label: _collect(prefix) for (label, prefix) in ROUTES}
|
||||
rows = []
|
||||
rows.append('# Route Comparison Results — CICIoT2023')
|
||||
rows.append('')
|
||||
rows.append('All routes trained on CICIoT2023 with the protocol locked in `PROTOCOL.md`. ')
|
||||
rows.append('Numbers are AUROC over benign val (10k cap) vs all attacks (10k cap), ')
|
||||
rows.append('3 seeds each. ± std across seeds.')
|
||||
rows.append('')
|
||||
rows.append('## Overall AUROC by score')
|
||||
rows.append('')
|
||||
header = '| Score | ' + ' | '.join((label for (label, _) in ROUTES)) + ' |'
|
||||
sep = '|---' * (1 + len(ROUTES)) + '|'
|
||||
rows.append(header)
|
||||
rows.append(sep)
|
||||
for score in PRIMARY_SCORES:
|
||||
cells = [f'`{score}`']
|
||||
for (label, _) in ROUTES:
|
||||
seeds = routes_data[label]
|
||||
if not seeds:
|
||||
cells.append('—')
|
||||
continue
|
||||
vals = [summary.get('overall', {}).get(score, {}).get('auroc', float('nan')) for summary in seeds.values()]
|
||||
(mean, std) = _mean_std(vals)
|
||||
cells.append(f'{mean:.4f} ± {std:.4f}' if mean == mean else '—')
|
||||
rows.append('| ' + ' | '.join(cells) + ' |')
|
||||
rows.append('')
|
||||
rows.append('## Per-attack-class `terminal_norm` AUROC (top 12 by support)')
|
||||
rows.append('')
|
||||
seed_dicts = list(routes_data['baseline'].values())
|
||||
if seed_dicts:
|
||||
all_classes: dict[str, float] = {}
|
||||
for s in seed_dicts:
|
||||
for (cls, cls_data) in s.get('per_class', {}).items():
|
||||
if cls.startswith('_'):
|
||||
continue
|
||||
n = cls_data.get('_n', 0.0)
|
||||
all_classes[cls] = max(all_classes.get(cls, 0.0), n)
|
||||
ranked = sorted(all_classes.items(), key=lambda kv: -kv[1])[:12]
|
||||
header = '| Class | n | ' + ' | '.join((label for (label, _) in ROUTES)) + ' |'
|
||||
sep = '|---' * (2 + len(ROUTES)) + '|'
|
||||
rows.append(header)
|
||||
rows.append(sep)
|
||||
for (cls, n) in ranked:
|
||||
cells = [cls, f'{int(n)}']
|
||||
for (label, _) in ROUTES:
|
||||
seeds = routes_data[label]
|
||||
if not seeds:
|
||||
cells.append('—')
|
||||
continue
|
||||
vals = [summary.get('per_class', {}).get(cls, {}).get('terminal_norm', float('nan')) for summary in seeds.values()]
|
||||
(mean, std) = _mean_std(vals)
|
||||
cells.append(f'{mean:.3f} ± {std:.3f}' if mean == mean else '—')
|
||||
rows.append('| ' + ' | '.join(cells) + ' |')
|
||||
rows.append('')
|
||||
rows.append('## Run inventory')
|
||||
rows.append('')
|
||||
for (label, prefix) in ROUTES:
|
||||
seeds = sorted(routes_data[label].keys())
|
||||
rows.append(f"- **{label}** (`{prefix}_seed*`): seeds = {(seeds if seeds else '(none yet)')}")
|
||||
out = ROOT / 'RESULTS.md'
|
||||
out.write_text('\n'.join(rows) + '\n')
|
||||
print(f'[wrote] {out}')
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
180
scripts/aggregate/aggregate_score_router.py
Normal file
180
scripts/aggregate/aggregate_score_router.py
Normal file
@@ -0,0 +1,180 @@
|
||||
from __future__ import annotations
|
||||
import json
|
||||
from pathlib import Path
|
||||
import numpy as np
|
||||
from sklearn.covariance import LedoitWolf, OAS, GraphicalLassoCV
|
||||
from sklearn.metrics import roc_auc_score
|
||||
ROOT = Path(__file__).resolve().parents[2] / 'artifacts' / 'route_comparison'
|
||||
CROSS_DIR = ROOT / 'cross'
|
||||
WITHIN_DATASETS = ['iscxtor2016', 'cicids2017', 'cicddos2019', 'ciciot2023']
|
||||
CROSS_TARGETS = ['cicids2017', 'cicddos2019']
|
||||
SEEDS = [42, 43, 44]
|
||||
|
||||
def _aggregators(val_S: np.ndarray, test_S_list: list[np.ndarray]) -> dict[str, list[np.ndarray]]:
|
||||
val_S = np.nan_to_num(val_S, nan=0.0, posinf=1000000.0, neginf=-1000000.0)
|
||||
test_S_list = [np.nan_to_num(t, nan=0.0, posinf=1000000.0, neginf=-1000000.0) for t in test_S_list]
|
||||
mu = val_S.mean(axis=0)
|
||||
sigma = val_S.std(axis=0) + 1e-09
|
||||
K = val_S.shape[1]
|
||||
cov_emp = np.cov(val_S, rowvar=False)
|
||||
inv_cov_plain = np.linalg.inv(cov_emp + 0.001 * np.eye(K))
|
||||
lw = LedoitWolf().fit(val_S)
|
||||
inv_cov_lw = np.linalg.inv(lw.covariance_ + 1e-09 * np.eye(K))
|
||||
oas = OAS().fit(val_S)
|
||||
inv_cov_oas = np.linalg.inv(oas.covariance_ + 1e-09 * np.eye(K))
|
||||
|
||||
def _max_abs_z(S):
|
||||
return np.abs((S - mu) / sigma).max(axis=1)
|
||||
|
||||
def _max_pos_z(S):
|
||||
return ((S - mu) / sigma).max(axis=1)
|
||||
|
||||
def _mahal_factory(inv_cov):
|
||||
|
||||
def f(S):
|
||||
d = S - mu
|
||||
return np.einsum('ni,ij,nj->n', d, inv_cov, d)
|
||||
return f
|
||||
out: dict[str, list[np.ndarray]] = {}
|
||||
for (tag, fn) in [('max_abs_z', _max_abs_z), ('max_pos_z', _max_pos_z), ('mahal_plain', _mahal_factory(inv_cov_plain)), ('mahal_lw', _mahal_factory(inv_cov_lw)), ('mahal_oas', _mahal_factory(inv_cov_oas))]:
|
||||
out[tag] = [fn(t) for t in test_S_list]
|
||||
return out
|
||||
SCORE_SUBSETS = {'all': None, 'terminal3': ['terminal_norm', 'terminal_flow', 'terminal_packet'], 'disc7': ['disc_nll_total', 'disc_nll_ch2', 'disc_nll_ch3', 'disc_nll_ch4', 'disc_nll_ch5', 'disc_nll_ch6', 'disc_nll_ch7']}
|
||||
|
||||
def _evaluate(npz: Path, val_prefix: str, atk_prefix: str) -> dict:
|
||||
z = np.load(npz, allow_pickle=True)
|
||||
all_keys = sorted([k.replace(val_prefix, '') for k in z.files if k.startswith(val_prefix) and (not k.endswith('labels'))])
|
||||
out: dict = {'n_val': None, 'n_atk': None}
|
||||
for (subset_name, subset_keys) in SCORE_SUBSETS.items():
|
||||
if subset_keys is None:
|
||||
keys = all_keys
|
||||
else:
|
||||
keys = [k for k in subset_keys if k in all_keys]
|
||||
if len(keys) < 2:
|
||||
continue
|
||||
val_S = np.stack([z[f'{val_prefix}{k}'] for k in keys], axis=1)
|
||||
atk_S = np.stack([z[f'{atk_prefix}{k}'] for k in keys], axis=1)
|
||||
(n_val, n_atk) = (val_S.shape[0], atk_S.shape[0])
|
||||
out['n_val'] = n_val
|
||||
out['n_atk'] = n_atk
|
||||
y = np.r_[np.zeros(n_val), np.ones(n_atk)]
|
||||
aggs = _aggregators(val_S, [val_S, atk_S])
|
||||
for (tag, (v_agg, a_agg)) in aggs.items():
|
||||
s = np.r_[v_agg, a_agg]
|
||||
s = np.nan_to_num(s, nan=0.0, posinf=1000000000000.0, neginf=-1000000000000.0)
|
||||
try:
|
||||
auc = float(roc_auc_score(y, s))
|
||||
except ValueError:
|
||||
auc = float('nan')
|
||||
out[f'auc_{tag}_{subset_name}'] = auc
|
||||
out['auc_max_abs_z'] = out.get('auc_max_abs_z_all')
|
||||
out['auc_max_pos_z'] = out.get('auc_max_pos_z_all')
|
||||
out['auc_mahal_plain'] = out.get('auc_mahal_plain_all')
|
||||
out['auc_mahal_lw'] = out.get('auc_mahal_lw_all')
|
||||
out['auc_mahal_oas'] = out.get('auc_mahal_oas_all')
|
||||
val_S = np.stack([z[f'{val_prefix}{k}'] for k in all_keys], axis=1)
|
||||
atk_S = np.stack([z[f'{atk_prefix}{k}'] for k in all_keys], axis=1)
|
||||
val_S = np.nan_to_num(val_S, nan=0.0, posinf=1000000000000.0, neginf=-1000000000000.0)
|
||||
atk_S = np.nan_to_num(atk_S, nan=0.0, posinf=1000000000000.0, neginf=-1000000000000.0)
|
||||
y = np.r_[np.zeros(val_S.shape[0]), np.ones(atk_S.shape[0])]
|
||||
per_score = {}
|
||||
for (i, k) in enumerate(all_keys):
|
||||
s = np.r_[val_S[:, i], atk_S[:, i]]
|
||||
s = np.nan_to_num(s, nan=0.0, posinf=1000000000000.0, neginf=-1000000000000.0)
|
||||
a1 = roc_auc_score(y, s)
|
||||
per_score[k] = max(a1, 1 - a1)
|
||||
best_score = max(per_score, key=per_score.get)
|
||||
out['auc_best_fixed'] = per_score[best_score]
|
||||
out['best_fixed_name'] = best_score
|
||||
out['auc_term_norm'] = per_score.get('terminal_norm', float('nan'))
|
||||
out['auc_term_pkt'] = per_score.get('terminal_packet', float('nan'))
|
||||
out['auc_disc_total'] = per_score.get('disc_nll_total', float('nan'))
|
||||
return out
|
||||
|
||||
def _mean_std(vs: list[float]) -> tuple[float, float]:
|
||||
arr = np.asarray([v for v in vs if v == v], dtype=np.float64)
|
||||
if arr.size == 0:
|
||||
return (float('nan'), float('nan'))
|
||||
return (float(arr.mean()), float(arr.std()))
|
||||
|
||||
def main() -> None:
|
||||
rows: list[str] = []
|
||||
rows.append('# Score-vector auto-selection: max-of-|z| / Mahalanobis vs fixed scores')
|
||||
rows.append('')
|
||||
rows.append('Aggregators are fit on **benign val only** (no attack labels). All numbers')
|
||||
rows.append('are 3-seed mean ± std on A+C combo (Mixed_CFM + causal-packet attention).')
|
||||
rows.append('')
|
||||
rows.append('Note on fairness: `auc_best_fixed` is selection-biased (picks per-dataset best')
|
||||
rows.append('score post-hoc on test set). `max_abs_z` and `mahalanobis` are NOT — they only')
|
||||
rows.append('use benign val to fit aggregator parameters.')
|
||||
rows.append('')
|
||||
rows.append("## Within-dataset(A+C combo on each dataset's own benign/attack)")
|
||||
rows.append('')
|
||||
rows.append('| Dataset | term_norm | best fixed | max-\\|z\\| (all) | mahal-OAS (all) | **mahal-OAS (term3)** | **mahal-OAS (disc7)** |')
|
||||
rows.append('|---|---|---|---|---|---|---|')
|
||||
for ds in WITHIN_DATASETS:
|
||||
rows_per_seed: list[dict] = []
|
||||
for s in SEEDS:
|
||||
md = ROOT / f'route_ac_combo_{ds}_seed{s}'
|
||||
npz = md / 'phase1_scores.npz'
|
||||
if not npz.exists():
|
||||
continue
|
||||
rows_per_seed.append(_evaluate(npz, 'val_', 'atk_'))
|
||||
if not rows_per_seed:
|
||||
rows.append(f'| {ds} | (no data) | | | | | |')
|
||||
continue
|
||||
|
||||
def col(field):
|
||||
(m, sd) = _mean_std([r[field] for r in rows_per_seed])
|
||||
return f'{m:.4f} ± {sd:.4f}'
|
||||
rows.append(f"| {ds} | {col('auc_term_norm')} | {col('auc_best_fixed')} | {col('auc_max_abs_z_all')} | {col('auc_mahal_oas_all')} | **{col('auc_mahal_oas_terminal3')}** | **{col('auc_mahal_oas_disc7')}** |")
|
||||
rows.append('')
|
||||
rows.append('## Cross-dataset(A+C combo trained on CICIoT2023 → eval on target)')
|
||||
rows.append('')
|
||||
rows.append('| Target | term_norm | best fixed | max-\\|z\\| (all) | mahal-OAS (all) | **mahal-OAS (term3)** | **mahal-OAS (disc7)** |')
|
||||
rows.append('|---|---|---|---|---|---|---|')
|
||||
for tgt in CROSS_TARGETS:
|
||||
rows_per_seed: list[dict] = []
|
||||
for s in SEEDS:
|
||||
npz = CROSS_DIR / f'route_ac_combo_seed{s}_to_{tgt}.npz'
|
||||
if not npz.exists():
|
||||
continue
|
||||
rows_per_seed.append(_evaluate(npz, 'b_', 'a_'))
|
||||
if not rows_per_seed:
|
||||
rows.append(f'| {tgt} | (no data) | | | | | |')
|
||||
continue
|
||||
|
||||
def col(field):
|
||||
(m, sd) = _mean_std([r[field] for r in rows_per_seed])
|
||||
return f'{m:.4f} ± {sd:.4f}'
|
||||
rows.append(f"| {tgt} | {col('auc_term_norm')} | {col('auc_best_fixed')} | {col('auc_max_abs_z_all')} | {col('auc_mahal_oas_all')} | **{col('auc_mahal_oas_terminal3')}** | **{col('auc_mahal_oas_disc7')}** |")
|
||||
rows.append('')
|
||||
rows.append('## Best-fixed-score winner per setup')
|
||||
rows.append('')
|
||||
rows.append('| Setup | seed42 | seed43 | seed44 |')
|
||||
rows.append('|---|---|---|---|')
|
||||
for ds in WITHIN_DATASETS:
|
||||
cells = [f'within {ds}']
|
||||
for s in SEEDS:
|
||||
npz = ROOT / f'route_ac_combo_{ds}_seed{s}/phase1_scores.npz'
|
||||
if not npz.exists():
|
||||
cells.append('—')
|
||||
continue
|
||||
r = _evaluate(npz, 'val_', 'atk_')
|
||||
cells.append(f"{r['best_fixed_name']} ({r['auc_best_fixed']:.4f})")
|
||||
rows.append('| ' + ' | '.join(cells) + ' |')
|
||||
for tgt in CROSS_TARGETS:
|
||||
cells = [f'cross→{tgt}']
|
||||
for s in SEEDS:
|
||||
npz = CROSS_DIR / f'route_ac_combo_seed{s}_to_{tgt}.npz'
|
||||
if not npz.exists():
|
||||
cells.append('—')
|
||||
continue
|
||||
r = _evaluate(npz, 'b_', 'a_')
|
||||
cells.append(f"{r['best_fixed_name']} ({r['auc_best_fixed']:.4f})")
|
||||
rows.append('| ' + ' | '.join(cells) + ' |')
|
||||
out = ROOT / 'SCORE_ROUTER.md'
|
||||
out.write_text('\n'.join(rows) + '\n')
|
||||
print(f'[wrote] {out}')
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
182
scripts/aggregate/aggregate_v2.py
Normal file
182
scripts/aggregate/aggregate_v2.py
Normal file
@@ -0,0 +1,182 @@
|
||||
from __future__ import annotations
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
import numpy as np
|
||||
from sklearn.metrics import roc_auc_score
|
||||
ROOT = Path(__file__).resolve().parents[2] / 'artifacts' / 'route_comparison'
|
||||
SEED_RE = re.compile('_seed(\\d+)$')
|
||||
ROUTES = [('baseline', 'baseline_ciciot2023'), ('A: causal', 'route_a_causal_ciciot2023'), ('B: spectral', 'route_b_spectral_ciciot2023'), ('C: mixed', 'route_c_mixed_ciciot2023'), ('A+C combo', 'route_ac_combo_ciciot2023')]
|
||||
|
||||
def _seeds(prefix: str) -> dict[int, Path]:
|
||||
out = {}
|
||||
for d in sorted(ROOT.glob(f'{prefix}_seed*')):
|
||||
m = SEED_RE.search(d.name)
|
||||
if m and (d / 'phase1_summary.json').exists():
|
||||
out[int(m.group(1))] = d
|
||||
return out
|
||||
|
||||
def _load_summary(d: Path) -> dict:
|
||||
return json.loads((d / 'phase1_summary.json').read_text())
|
||||
|
||||
def _ensemble_sweep(d: Path) -> dict[float, float] | None:
|
||||
f = d / 'phase1_scores.npz'
|
||||
if not f.exists():
|
||||
return None
|
||||
z = np.load(f, allow_pickle=True)
|
||||
keys = set(z.files)
|
||||
if 'val_terminal_norm' not in keys or 'val_disc_nll_total' not in keys:
|
||||
return None
|
||||
v_tn = z['val_terminal_norm']
|
||||
a_tn = z['atk_terminal_norm']
|
||||
v_dn = z['val_disc_nll_total']
|
||||
a_dn = z['atk_disc_nll_total']
|
||||
|
||||
def zsc(v, a):
|
||||
(mu, sd) = (v.mean(), v.std() + 1e-09)
|
||||
return ((v - mu) / sd, (a - mu) / sd)
|
||||
(v_tn_z, a_tn_z) = zsc(v_tn, a_tn)
|
||||
(v_dn_z, a_dn_z) = zsc(v_dn, a_dn)
|
||||
out: dict[float, float] = {}
|
||||
for alpha in (0.0, 0.25, 0.5, 0.7, 0.8, 0.9, 1.0):
|
||||
s_v = alpha * v_tn_z + (1.0 - alpha) * v_dn_z
|
||||
s_a = alpha * a_tn_z + (1.0 - alpha) * a_dn_z
|
||||
y = np.r_[np.zeros(len(s_v)), np.ones(len(s_a))]
|
||||
s = np.r_[s_v, s_a]
|
||||
out[alpha] = float(roc_auc_score(y, s))
|
||||
return out
|
||||
|
||||
def _ensemble_score(d: Path) -> tuple[float, float] | None:
|
||||
sweep = _ensemble_sweep(d)
|
||||
if sweep is None:
|
||||
return None
|
||||
best_alpha = max(sweep, key=sweep.get)
|
||||
return (sweep[best_alpha], best_alpha)
|
||||
|
||||
def _mean_std(vals: list[float]) -> tuple[float, float]:
|
||||
arr = np.asarray([v for v in vals if v == v], dtype=np.float64)
|
||||
if arr.size == 0:
|
||||
return (float('nan'), float('nan'))
|
||||
return (float(arr.mean()), float(arr.std()))
|
||||
|
||||
def main() -> None:
|
||||
routes_data: dict[str, dict[int, dict]] = {}
|
||||
routes_dirs: dict[str, dict[int, Path]] = {}
|
||||
for (label, prefix) in ROUTES:
|
||||
seeds = _seeds(prefix)
|
||||
routes_dirs[label] = seeds
|
||||
routes_data[label] = {s: _load_summary(d) for (s, d) in seeds.items()}
|
||||
rows: list[str] = []
|
||||
rows.append('# Route Comparison Results — CICIoT2023 (multi-seed)')
|
||||
rows.append('')
|
||||
rows.append('Phase1 eval: AUROC over benign val (5k cap) vs all attacks (10k cap), 3 seeds each.')
|
||||
rows.append('')
|
||||
rows.append("## Each route's best AUROC (overall)")
|
||||
rows.append('')
|
||||
rows.append('| Route | Best score | AUROC | Δ vs baseline-best |')
|
||||
rows.append('|---|---|---|---|')
|
||||
baseline_best = None
|
||||
for (label, _) in ROUTES:
|
||||
seeds = routes_data[label]
|
||||
if not seeds:
|
||||
rows.append(f'| {label} | — | — | — |')
|
||||
continue
|
||||
all_scores: dict[str, list[float]] = {}
|
||||
for s in seeds.values():
|
||||
for (k, v) in s.get('overall', {}).items():
|
||||
all_scores.setdefault(k, []).append(v.get('auroc', float('nan')))
|
||||
score_means = {k: _mean_std(v)[0] for (k, v) in all_scores.items()}
|
||||
score_means = {k: v for (k, v) in score_means.items() if v == v}
|
||||
if not score_means:
|
||||
rows.append(f'| {label} | — | — | — |')
|
||||
continue
|
||||
best_score = max(score_means, key=score_means.get)
|
||||
best_val = score_means[best_score]
|
||||
if label == 'baseline':
|
||||
baseline_best = best_val
|
||||
delta_str = '—'
|
||||
else:
|
||||
delta_str = f'{best_val - baseline_best:+.4f}' if baseline_best else '—'
|
||||
std = _mean_std(all_scores[best_score])[1]
|
||||
rows.append(f'| {label} | `{best_score}` | {best_val:.4f} ± {std:.4f} | {delta_str} |')
|
||||
rows.append('')
|
||||
rows.append('## Primary score: `terminal_norm`')
|
||||
rows.append('')
|
||||
rows.append('| Route | mean ± std | seeds |')
|
||||
rows.append('|---|---|---|')
|
||||
for (label, _) in ROUTES:
|
||||
seeds = routes_data[label]
|
||||
if not seeds:
|
||||
rows.append(f'| {label} | — | — |')
|
||||
continue
|
||||
vals = [s['overall'].get('terminal_norm', {}).get('auroc', float('nan')) for s in seeds.values()]
|
||||
(m, sd) = _mean_std(vals)
|
||||
rows.append(f'| {label} | {m:.4f} ± {sd:.4f} | {sorted(seeds.keys())} |')
|
||||
rows.append('')
|
||||
rows.append('## Route-specific signature scores (mean ± std, 3 seeds)')
|
||||
rows.append('')
|
||||
score_groups = [('Route A signature (consistency family)', ['flow_consistency', 'packet_consistency', 'consistency_total', 'causal_surprisal_total', 'causal_surprisal_packet_median']), ('Route B signature (curvature/dynamics)', ['kappa2_speed2norm_packet_median', 'direction_drift_packet_median', 'pna_packet_median', 'curvature_packet']), ('Route C signature (discrete NLL)', ['disc_nll_total', 'disc_nll_ch3', 'disc_nll_ch4', 'disc_nll_ch5', 'disc_nll_ch7'])]
|
||||
for (grp_name, scores) in score_groups:
|
||||
rows.append(f'### {grp_name}')
|
||||
rows.append('')
|
||||
rows.append('| Score | ' + ' | '.join((label for (label, _) in ROUTES)) + ' |')
|
||||
rows.append('|---' * (1 + len(ROUTES)) + '|')
|
||||
for sc in scores:
|
||||
cells = [f'`{sc}`']
|
||||
for (label, _) in ROUTES:
|
||||
seeds = routes_data[label]
|
||||
vals = [s['overall'].get(sc, {}).get('auroc', float('nan')) for s in seeds.values()]
|
||||
(m, sd) = _mean_std(vals)
|
||||
cells.append(f'{m:.4f} ± {sd:.4f}' if m == m else '—')
|
||||
rows.append('| ' + ' | '.join(cells) + ' |')
|
||||
rows.append('')
|
||||
rows.append('## Route C ensemble: α·terminal_norm + (1−α)·disc_nll_total (z-scored)')
|
||||
rows.append('')
|
||||
c_dirs = routes_dirs.get('C: mixed', {})
|
||||
if c_dirs:
|
||||
alphas = (0.0, 0.25, 0.5, 0.7, 0.8, 0.9, 1.0)
|
||||
rows.append('| α | ' + ' | '.join((f'seed{s}' for s in sorted(c_dirs.keys()))) + ' | mean ± std |')
|
||||
rows.append('|---' * (2 + len(c_dirs)) + '|')
|
||||
per_alpha: dict[float, list[float]] = {a: [] for a in alphas}
|
||||
per_seed_sweeps = {s: _ensemble_sweep(d) or {} for (s, d) in c_dirs.items()}
|
||||
for a in alphas:
|
||||
cells = [f'{a:.2f}']
|
||||
vals = []
|
||||
for s in sorted(c_dirs.keys()):
|
||||
v = per_seed_sweeps[s].get(a, float('nan'))
|
||||
cells.append(f'{v:.4f}')
|
||||
vals.append(v)
|
||||
(m, sd) = _mean_std(vals)
|
||||
cells.append(f'**{m:.4f} ± {sd:.4f}**')
|
||||
rows.append('| ' + ' | '.join(cells) + ' |')
|
||||
rows.append('')
|
||||
rows.append('(α=1.0 = terminal_norm only; α=0.0 = disc_nll only.)')
|
||||
rows.append('')
|
||||
rows.append('## Per-attack-class AUROC (top 12, terminal_norm)')
|
||||
rows.append('')
|
||||
if routes_data['baseline']:
|
||||
any_summary = next(iter(routes_data['baseline'].values()))
|
||||
classes = sorted([(c, d.get('_n', 0)) for (c, d) in any_summary.get('per_class', {}).items() if not c.startswith('_')], key=lambda kv: -kv[1])[:12]
|
||||
header = '| Class | n | ' + ' | '.join((label for (label, _) in ROUTES)) + ' |'
|
||||
sep = '|---' * (2 + len(ROUTES)) + '|'
|
||||
rows.append(header)
|
||||
rows.append(sep)
|
||||
for (cls, n) in classes:
|
||||
cells = [cls, f'{int(n)}']
|
||||
for (label, _) in ROUTES:
|
||||
seeds = routes_data[label]
|
||||
vals = [s.get('per_class', {}).get(cls, {}).get('terminal_norm', float('nan')) for s in seeds.values()]
|
||||
(m, sd) = _mean_std(vals)
|
||||
cells.append(f'{m:.3f}±{sd:.3f}' if m == m else '—')
|
||||
rows.append('| ' + ' | '.join(cells) + ' |')
|
||||
rows.append('')
|
||||
rows.append('## Run inventory')
|
||||
rows.append('')
|
||||
for (label, prefix) in ROUTES:
|
||||
seeds = sorted(routes_data[label].keys())
|
||||
rows.append(f"- **{label}** (`{prefix}_seed*`): seeds = {(seeds if seeds else '(none yet)')}")
|
||||
out = ROOT / 'RESULTS.md'
|
||||
out.write_text('\n'.join(rows) + '\n')
|
||||
print(f'[wrote] {out}')
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
70
scripts/aggregate/run_ac_combo_evals.sh
Executable file
70
scripts/aggregate/run_ac_combo_evals.sh
Executable file
@@ -0,0 +1,70 @@
|
||||
#!/bin/bash
|
||||
# Phase1 + cross eval for the 3 A+C combo seeds.
|
||||
set -e
|
||||
ROOT=/home/chy/JANUS
|
||||
MIXED_PHASE1=${ROOT}/Mixed_CFM/eval_phase1.py
|
||||
MIXED_CROSS=${ROOT}/Mixed_CFM/eval_cross.py
|
||||
CROSS_DIR=${ROOT}/artifacts/route_comparison/cross
|
||||
mkdir -p ${CROSS_DIR}
|
||||
|
||||
# GPU 0: phase1 + cross→IDS2017 for all 3 seeds
|
||||
{
|
||||
for seed in 42 43 44; do
|
||||
md=${ROOT}/artifacts/route_comparison/route_ac_combo_ciciot2023_seed${seed}
|
||||
[ -f "${md}/model.pt" ] || { echo "[wait] seed${seed} model.pt not yet"; continue; }
|
||||
|
||||
if [ ! -f "${md}/phase1_summary.json" ]; then
|
||||
echo "[gpu0 phase1] seed${seed}"
|
||||
cd ${ROOT}/Mixed_CFM
|
||||
CUDA_VISIBLE_DEVICES=0 stdbuf -oL uv run --no-sync python -u ${MIXED_PHASE1} \
|
||||
--model-dir ${md} --out-dir ${md} \
|
||||
--batch-size 256 --n-steps 16 \
|
||||
--n-val-cap 5000 --n-atk-cap 10000 \
|
||||
> ${md}/phase1.log 2>&1
|
||||
fi
|
||||
|
||||
ids_out=${CROSS_DIR}/route_ac_combo_seed${seed}_to_cicids2017.json
|
||||
if [ ! -f "${ids_out}" ]; then
|
||||
echo "[gpu0 cross→ids2017] seed${seed}"
|
||||
cd ${ROOT}/Mixed_CFM
|
||||
CUDA_VISIBLE_DEVICES=0 stdbuf -oL uv run --no-sync python -u ${MIXED_CROSS} \
|
||||
--model-dir ${md} \
|
||||
--target-store ${ROOT}/datasets/cicids2017/processed/full_store \
|
||||
--target-flows ${ROOT}/datasets/cicids2017/processed/flows.parquet \
|
||||
--target-flow-features ${ROOT}/datasets/cicids2017/processed/flow_features.parquet \
|
||||
--out ${ids_out} \
|
||||
--n-benign 10000 --n-attack 10000 --seed 42 --T 64 --batch-size 256 --n-steps 16 \
|
||||
> ${CROSS_DIR}/route_ac_combo_seed${seed}_to_cicids2017.log 2>&1
|
||||
fi
|
||||
done
|
||||
echo "[gpu0 done]"
|
||||
} > /tmp/ac_eval_gpu0.log 2>&1 &
|
||||
GPU0=$!
|
||||
|
||||
# GPU 1: cross→DDoS19 for all 3 seeds
|
||||
{
|
||||
for seed in 42 43 44; do
|
||||
md=${ROOT}/artifacts/route_comparison/route_ac_combo_ciciot2023_seed${seed}
|
||||
[ -f "${md}/model.pt" ] || { echo "[wait] seed${seed} model.pt not yet"; continue; }
|
||||
|
||||
ddos_out=${CROSS_DIR}/route_ac_combo_seed${seed}_to_cicddos2019.json
|
||||
if [ ! -f "${ddos_out}" ]; then
|
||||
echo "[gpu1 cross→ddos19] seed${seed}"
|
||||
cd ${ROOT}/Mixed_CFM
|
||||
CUDA_VISIBLE_DEVICES=1 stdbuf -oL uv run --no-sync python -u ${MIXED_CROSS} \
|
||||
--model-dir ${md} \
|
||||
--target-store ${ROOT}/datasets/cicddos2019/processed/full_store \
|
||||
--target-flows ${ROOT}/datasets/cicddos2019/processed/flows.parquet \
|
||||
--target-flow-features ${ROOT}/datasets/cicddos2019/processed/flow_features.parquet \
|
||||
--out ${ddos_out} \
|
||||
--n-benign 10000 --n-attack 10000 --seed 42 --T 64 --batch-size 256 --n-steps 16 \
|
||||
> ${CROSS_DIR}/route_ac_combo_seed${seed}_to_cicddos2019.log 2>&1
|
||||
fi
|
||||
done
|
||||
echo "[gpu1 done]"
|
||||
} > /tmp/ac_eval_gpu1.log 2>&1 &
|
||||
GPU1=$!
|
||||
|
||||
wait $GPU0
|
||||
wait $GPU1
|
||||
echo "[all ac combo evals done]"
|
||||
68
scripts/aggregate/run_all_phase1.sh
Executable file
68
scripts/aggregate/run_all_phase1.sh
Executable file
@@ -0,0 +1,68 @@
|
||||
#!/bin/bash
|
||||
# Run phase1 eval on all routes after trainings complete.
|
||||
# Splits across 2 GPUs in parallel chains.
|
||||
|
||||
set -e
|
||||
ROOT=/home/chy/JANUS
|
||||
UNIFIED_EVAL=${ROOT}/artifacts/verify_2026_04_24/eval_phase1_unified.py
|
||||
MIXED_EVAL=${ROOT}/Mixed_CFM/eval_phase1.py
|
||||
|
||||
cd ${ROOT}
|
||||
|
||||
# GPU 0: baselines + route_a (6 models)
|
||||
{
|
||||
for prefix in baseline_ciciot2023 route_a_causal_ciciot2023; do
|
||||
for seed in 42 43 44; do
|
||||
name=${prefix}_seed${seed}
|
||||
md=${ROOT}/artifacts/route_comparison/${name}
|
||||
[ -f "${md}/model.pt" ] || continue
|
||||
[ -f "${md}/phase1_summary.json" ] && continue
|
||||
echo "[GPU0 eval] ${name}"
|
||||
cd ${ROOT}/Unified_CFM
|
||||
CUDA_VISIBLE_DEVICES=0 stdbuf -oL uv run --no-sync python -u ${UNIFIED_EVAL} \
|
||||
--model-dir ${md} --out-dir ${md} \
|
||||
--batch-size 256 --n-steps 16 --jacobian-n-eps 4 \
|
||||
--n-val-cap 5000 --n-atk-cap 10000 \
|
||||
> ${md}/phase1.log 2>&1
|
||||
done
|
||||
done
|
||||
echo "[GPU0 done]"
|
||||
} &
|
||||
GPU0_PID=$!
|
||||
|
||||
# GPU 1: route_b + route_c (6 models)
|
||||
{
|
||||
for seed in 42 43 44; do
|
||||
name=route_b_spectral_ciciot2023_seed${seed}
|
||||
md=${ROOT}/artifacts/route_comparison/${name}
|
||||
[ -f "${md}/model.pt" ] || continue
|
||||
[ -f "${md}/phase1_summary.json" ] && continue
|
||||
echo "[GPU1 eval] ${name}"
|
||||
cd ${ROOT}/Unified_CFM
|
||||
CUDA_VISIBLE_DEVICES=1 stdbuf -oL uv run --no-sync python -u ${UNIFIED_EVAL} \
|
||||
--model-dir ${md} --out-dir ${md} \
|
||||
--batch-size 256 --n-steps 16 --jacobian-n-eps 4 \
|
||||
--n-val-cap 5000 --n-atk-cap 10000 \
|
||||
> ${md}/phase1.log 2>&1
|
||||
done
|
||||
for seed in 42 43 44; do
|
||||
name=route_c_mixed_ciciot2023_seed${seed}
|
||||
md=${ROOT}/artifacts/route_comparison/${name}
|
||||
[ -f "${md}/model.pt" ] || continue
|
||||
[ -f "${md}/phase1_summary.json" ] && continue
|
||||
echo "[GPU1 eval] ${name}"
|
||||
cd ${ROOT}/Mixed_CFM
|
||||
CUDA_VISIBLE_DEVICES=1 stdbuf -oL uv run --no-sync python -u ${MIXED_EVAL} \
|
||||
--model-dir ${md} --out-dir ${md} \
|
||||
--batch-size 256 --n-steps 16 \
|
||||
--n-val-cap 5000 --n-atk-cap 10000 \
|
||||
> ${md}/phase1.log 2>&1
|
||||
done
|
||||
echo "[GPU1 done]"
|
||||
} &
|
||||
GPU1_PID=$!
|
||||
|
||||
wait $GPU0_PID
|
||||
wait $GPU1_PID
|
||||
echo "[all phase1 done]"
|
||||
cd ${ROOT} && uv run --no-sync python artifacts/route_comparison/aggregate_results.py
|
||||
105
scripts/aggregate/run_cross_all.sh
Executable file
105
scripts/aggregate/run_cross_all.sh
Executable file
@@ -0,0 +1,105 @@
|
||||
#!/bin/bash
|
||||
# Cross-dataset eval for all 4 routes × 2 targets × 3 seeds = 24 runs.
|
||||
# Source: CICIoT2023 (where all models were trained).
|
||||
# Targets: CICIDS2017 + CICDDoS2019.
|
||||
|
||||
set -e
|
||||
ROOT=/home/chy/JANUS
|
||||
UNIFIED_EVAL=${ROOT}/artifacts/verify_2026_04_24/eval_phase2_cross_cicddos2019.py
|
||||
MIXED_EVAL=${ROOT}/Mixed_CFM/eval_cross.py
|
||||
CROSS_DIR=${ROOT}/artifacts/route_comparison/cross
|
||||
mkdir -p ${CROSS_DIR}
|
||||
|
||||
# Target dataset paths
|
||||
declare -A TARGETS
|
||||
TARGETS[cicids2017_store]=${ROOT}/datasets/cicids2017/processed/full_store
|
||||
TARGETS[cicids2017_flows]=${ROOT}/datasets/cicids2017/processed/flows.parquet
|
||||
TARGETS[cicids2017_features]=${ROOT}/datasets/cicids2017/processed/flow_features.parquet
|
||||
TARGETS[cicids2017_features_spectral]=${ROOT}/datasets/cicids2017/processed/flow_features_spectral.parquet
|
||||
|
||||
TARGETS[cicddos2019_store]=${ROOT}/datasets/cicddos2019/processed/full_store
|
||||
TARGETS[cicddos2019_flows]=${ROOT}/datasets/cicddos2019/processed/flows.parquet
|
||||
TARGETS[cicddos2019_features]=${ROOT}/datasets/cicddos2019/processed/flow_features.parquet
|
||||
TARGETS[cicddos2019_features_spectral]=${ROOT}/datasets/cicddos2019/processed/flow_features_spectral.parquet
|
||||
|
||||
run_unified_eval() {
|
||||
local gpu=$1 model_dir=$2 target=$3 features=$4 out_name=$5
|
||||
local out=${CROSS_DIR}/${out_name}.json
|
||||
[ -f "${out}" ] && { echo "[skip] ${out_name}"; return; }
|
||||
echo "[gpu${gpu} eval] ${out_name}"
|
||||
cd ${ROOT}/Unified_CFM
|
||||
CUDA_VISIBLE_DEVICES=${gpu} stdbuf -oL uv run --no-sync python -u ${UNIFIED_EVAL} \
|
||||
--model-dir ${model_dir} \
|
||||
--target-store ${TARGETS[${target}_store]} \
|
||||
--target-flows ${TARGETS[${target}_flows]} \
|
||||
--target-flow-features ${features} \
|
||||
--out ${out} \
|
||||
--n-benign 10000 --n-attack 10000 --seed 42 \
|
||||
--T 64 --batch-size 256 --n-steps 16 \
|
||||
> ${CROSS_DIR}/${out_name}.log 2>&1
|
||||
}
|
||||
|
||||
run_mixed_eval() {
|
||||
local gpu=$1 model_dir=$2 target=$3 out_name=$4
|
||||
local out=${CROSS_DIR}/${out_name}.json
|
||||
[ -f "${out}" ] && { echo "[skip] ${out_name}"; return; }
|
||||
echo "[gpu${gpu} mixed eval] ${out_name}"
|
||||
cd ${ROOT}/Mixed_CFM
|
||||
CUDA_VISIBLE_DEVICES=${gpu} stdbuf -oL uv run --no-sync python -u ${MIXED_EVAL} \
|
||||
--model-dir ${model_dir} \
|
||||
--target-store ${TARGETS[${target}_store]} \
|
||||
--target-flows ${TARGETS[${target}_flows]} \
|
||||
--target-flow-features ${TARGETS[${target}_features]} \
|
||||
--out ${out} \
|
||||
--n-benign 10000 --n-attack 10000 --seed 42 \
|
||||
--T 64 --batch-size 256 --n-steps 16 \
|
||||
> ${CROSS_DIR}/${out_name}.log 2>&1
|
||||
}
|
||||
|
||||
# === GPU 0 chain: baselines + route_a, both targets ===
|
||||
{
|
||||
for prefix_route in "baseline_ciciot2023:baseline" "route_a_causal_ciciot2023:route_a_causal"; do
|
||||
prefix=${prefix_route%:*}
|
||||
short=${prefix_route#*:}
|
||||
for seed in 42 43 44; do
|
||||
md=${ROOT}/artifacts/route_comparison/${prefix}_seed${seed}
|
||||
[ -f "${md}/model.pt" ] || continue
|
||||
for target in cicids2017 cicddos2019; do
|
||||
run_unified_eval 0 "${md}" "${target}" "${TARGETS[${target}_features]}" \
|
||||
"${short}_seed${seed}_to_${target}"
|
||||
done
|
||||
done
|
||||
done
|
||||
echo "[gpu0 cross chain done]"
|
||||
} > /tmp/cross_gpu0.log 2>&1 &
|
||||
GPU0=$!
|
||||
|
||||
# === GPU 1 chain: route_b (uses spectral features) + route_c (mixed) ===
|
||||
{
|
||||
# route_b: must use flow_features_spectral.parquet
|
||||
for seed in 42 43 44; do
|
||||
md=${ROOT}/artifacts/route_comparison/route_b_spectral_ciciot2023_seed${seed}
|
||||
[ -f "${md}/model.pt" ] || continue
|
||||
for target in cicids2017 cicddos2019; do
|
||||
run_unified_eval 1 "${md}" "${target}" "${TARGETS[${target}_features_spectral]}" \
|
||||
"route_b_spectral_seed${seed}_to_${target}"
|
||||
done
|
||||
done
|
||||
|
||||
# route_c: Mixed_CFM eval (uses canonical flow_features)
|
||||
for seed in 42 43 44; do
|
||||
md=${ROOT}/artifacts/route_comparison/route_c_mixed_ciciot2023_seed${seed}
|
||||
[ -f "${md}/model.pt" ] || continue
|
||||
for target in cicids2017 cicddos2019; do
|
||||
run_mixed_eval 1 "${md}" "${target}" \
|
||||
"route_c_mixed_seed${seed}_to_${target}"
|
||||
done
|
||||
done
|
||||
echo "[gpu1 cross chain done]"
|
||||
} > /tmp/cross_gpu1.log 2>&1 &
|
||||
GPU1=$!
|
||||
|
||||
wait $GPU0
|
||||
wait $GPU1
|
||||
echo "[all cross done]"
|
||||
ls -la ${CROSS_DIR}/*.json | wc -l
|
||||
88
scripts/aggregate/run_full_cross_matrix.sh
Executable file
88
scripts/aggregate/run_full_cross_matrix.sh
Executable file
@@ -0,0 +1,88 @@
|
||||
#!/bin/bash
|
||||
# Run all missing cross-direction evals for A+C combo.
|
||||
# Targets are routed to packets-npz or full_store as appropriate.
|
||||
|
||||
set -e
|
||||
ROOT=/home/chy/JANUS
|
||||
EVAL=${ROOT}/Mixed_CFM/eval_cross.py
|
||||
CROSS_DIR=${ROOT}/artifacts/route_comparison/cross
|
||||
mkdir -p ${CROSS_DIR}
|
||||
|
||||
# Target paths
|
||||
TGT_iscxtor2016_npz=${ROOT}/datasets/iscxtor2016/processed/packets.npz
|
||||
TGT_iscxtor2016_flows=${ROOT}/datasets/iscxtor2016/processed/flows.parquet
|
||||
TGT_iscxtor2016_features=${ROOT}/datasets/iscxtor2016/processed/flow_features.parquet
|
||||
TGT_iscxtor2016_label=nontor
|
||||
TGT_iscxtor2016_natk=1888
|
||||
|
||||
TGT_cicids2017_store=${ROOT}/datasets/cicids2017/processed/full_store
|
||||
TGT_cicids2017_flows=${ROOT}/datasets/cicids2017/processed/flows.parquet
|
||||
TGT_cicids2017_features=${ROOT}/datasets/cicids2017/processed/flow_features.parquet
|
||||
TGT_cicids2017_label=normal
|
||||
|
||||
TGT_cicddos2019_store=${ROOT}/datasets/cicddos2019/processed/full_store
|
||||
TGT_cicddos2019_flows=${ROOT}/datasets/cicddos2019/processed/flows.parquet
|
||||
TGT_cicddos2019_features=${ROOT}/datasets/cicddos2019/processed/flow_features.parquet
|
||||
TGT_cicddos2019_label=normal
|
||||
|
||||
TGT_ciciot2023_store=${ROOT}/datasets/ciciot2023/processed/full_store
|
||||
TGT_ciciot2023_flows=${ROOT}/datasets/ciciot2023/processed/full_store/flows.parquet
|
||||
TGT_ciciot2023_features=${ROOT}/datasets/ciciot2023/processed/flow_features.parquet
|
||||
TGT_ciciot2023_label=normal
|
||||
|
||||
run_one() {
|
||||
local gpu=$1 src=$2 tgt=$3 seed=$4
|
||||
local md=${ROOT}/artifacts/route_comparison/route_ac_combo_${src}_seed${seed}
|
||||
local out=${CROSS_DIR}/route_ac_combo_seed${seed}_${src}_to_${tgt}.json
|
||||
if [ -f "${out}" ]; then echo "[skip] ${src}→${tgt} seed${seed}"; return; fi
|
||||
if [ ! -f "${md}/model.pt" ]; then echo "[missing] ${md}/model.pt"; return; fi
|
||||
|
||||
# Resolve target args
|
||||
local tgt_args
|
||||
if [ "${tgt}" = "iscxtor2016" ]; then
|
||||
tgt_args="--target-packets-npz ${TGT_iscxtor2016_npz} --target-flows ${TGT_iscxtor2016_flows} --target-flow-features ${TGT_iscxtor2016_features} --benign-label nontor --n-attack 1888"
|
||||
elif [ "${tgt}" = "cicids2017" ]; then
|
||||
tgt_args="--target-store ${TGT_cicids2017_store} --target-flows ${TGT_cicids2017_flows} --target-flow-features ${TGT_cicids2017_features} --benign-label normal --n-attack 10000"
|
||||
elif [ "${tgt}" = "cicddos2019" ]; then
|
||||
tgt_args="--target-store ${TGT_cicddos2019_store} --target-flows ${TGT_cicddos2019_flows} --target-flow-features ${TGT_cicddos2019_features} --benign-label normal --n-attack 10000"
|
||||
elif [ "${tgt}" = "ciciot2023" ]; then
|
||||
tgt_args="--target-store ${TGT_ciciot2023_store} --target-flows ${TGT_ciciot2023_flows} --target-flow-features ${TGT_ciciot2023_features} --benign-label normal --n-attack 10000"
|
||||
fi
|
||||
|
||||
echo "[gpu${gpu}] ${src} → ${tgt} seed${seed}"
|
||||
cd ${ROOT}/Mixed_CFM
|
||||
CUDA_VISIBLE_DEVICES=${gpu} stdbuf -oL uv run --no-sync python -u ${EVAL} \
|
||||
--model-dir ${md} \
|
||||
${tgt_args} \
|
||||
--out ${out} \
|
||||
--n-benign 10000 --seed 42 --T 64 --batch-size 256 --n-steps 16 \
|
||||
> ${CROSS_DIR}/route_ac_combo_seed${seed}_${src}_to_${tgt}.log 2>&1
|
||||
}
|
||||
|
||||
# 8 missing directions × 3 seeds = 24 evals
|
||||
# Split across 2 GPUs to balance load
|
||||
{
|
||||
for dir in "ciciot2023:iscxtor2016" "cicids2017:iscxtor2016" "cicddos2019:iscxtor2016" "iscxtor2016:cicids2017"; do
|
||||
src=${dir%:*}; tgt=${dir#*:}
|
||||
for seed in 42 43 44; do
|
||||
run_one 0 ${src} ${tgt} ${seed}
|
||||
done
|
||||
done
|
||||
echo "[gpu0 done]"
|
||||
} > /tmp/cross_matrix_gpu0.log 2>&1 &
|
||||
G0=$!
|
||||
|
||||
{
|
||||
for dir in "cicids2017:ciciot2023" "cicddos2019:ciciot2023" "iscxtor2016:cicddos2019" "iscxtor2016:ciciot2023"; do
|
||||
src=${dir%:*}; tgt=${dir#*:}
|
||||
for seed in 42 43 44; do
|
||||
run_one 1 ${src} ${tgt} ${seed}
|
||||
done
|
||||
done
|
||||
echo "[gpu1 done]"
|
||||
} > /tmp/cross_matrix_gpu1.log 2>&1 &
|
||||
G1=$!
|
||||
|
||||
wait $G0
|
||||
wait $G1
|
||||
echo "[all done]"
|
||||
45
scripts/aggregate/run_phase1_all.sh
Executable file
45
scripts/aggregate/run_phase1_all.sh
Executable file
@@ -0,0 +1,45 @@
|
||||
#!/bin/bash
|
||||
# Run phase1 eval on all route_comparison models.
|
||||
# Output: <model_dir>/phase1_summary.json + phase1_scores.npz
|
||||
#
|
||||
# Usage:
|
||||
# bash artifacts/route_comparison/run_phase1_all.sh [GPU_ID]
|
||||
#
|
||||
# Default GPU_ID = 0. Each eval takes ~3-5 min with the caps below.
|
||||
|
||||
set -e
|
||||
GPU_ID="${1:-0}"
|
||||
ROOT=/home/chy/JANUS
|
||||
EVAL=${ROOT}/artifacts/verify_2026_04_24/eval_phase1_unified.py
|
||||
|
||||
models=(
|
||||
baseline_ciciot2023_seed42
|
||||
baseline_ciciot2023_seed43
|
||||
baseline_ciciot2023_seed44
|
||||
route_a_causal_ciciot2023_seed42
|
||||
route_a_causal_ciciot2023_seed43
|
||||
route_a_causal_ciciot2023_seed44
|
||||
)
|
||||
|
||||
cd ${ROOT}/Unified_CFM
|
||||
for name in "${models[@]}"; do
|
||||
model_dir=${ROOT}/artifacts/route_comparison/${name}
|
||||
if [ ! -f "${model_dir}/model.pt" ]; then
|
||||
echo "[skip] ${name}: model.pt missing"
|
||||
continue
|
||||
fi
|
||||
out_dir=${model_dir}
|
||||
if [ -f "${out_dir}/phase1_summary.json" ]; then
|
||||
echo "[skip] ${name}: phase1_summary.json exists"
|
||||
continue
|
||||
fi
|
||||
echo "[eval] ${name}"
|
||||
CUDA_VISIBLE_DEVICES=${GPU_ID} stdbuf -oL uv run --no-sync python -u ${EVAL} \
|
||||
--model-dir ${model_dir} --out-dir ${out_dir} \
|
||||
--batch-size 256 --n-steps 16 \
|
||||
--jacobian-n-eps 4 \
|
||||
--n-val-cap 5000 --n-atk-cap 10000 \
|
||||
2>&1 | tee ${model_dir}/phase1.log | tail -5
|
||||
echo "[done] ${name}"
|
||||
done
|
||||
echo "[all done]"
|
||||
Reference in New Issue
Block a user