Rename A+C combo → JANUS: configs, model dirs, scripts, docs
This commit is contained in:
@@ -7,7 +7,7 @@ from sklearn.metrics import roc_auc_score
|
||||
ROOT = Path(__file__).resolve().parents[2] / 'artifacts' / 'route_comparison'
|
||||
CROSS_DIR = ROOT / 'cross'
|
||||
NAME_RE = re.compile('^(?P<route>.+?)_seed(?P<seed>\\d+)_to_(?P<target>cicids2017|cicddos2019)$')
|
||||
ROUTES = [('baseline', 'baseline'), ('A: causal', 'route_a_causal'), ('B: spectral', 'route_b_spectral'), ('C: mixed', 'route_c_mixed'), ('A+C combo', 'route_ac_combo')]
|
||||
ROUTES = [('baseline', 'baseline'), ('A: causal', 'route_a_causal'), ('B: spectral', 'route_b_spectral'), ('C: mixed', 'route_c_mixed'), ('JANUS', 'route_janus')]
|
||||
TARGETS = ['cicids2017', 'cicddos2019']
|
||||
PRIMARY_SCORES = ['terminal_norm', 'terminal_flow', 'terminal_packet', 'flow_consistency', 'packet_consistency', 'consistency_total', 'causal_surprisal_packet_median', 'causal_surprisal_total', 'direction_drift_packet_median', 'pna_packet_median', 'kappa2_speed2norm_packet_median', 'curvature_packet', 'disc_nll_total', 'disc_nll_ch3', 'disc_nll_ch7']
|
||||
|
||||
@@ -67,7 +67,7 @@ def main() -> None:
|
||||
header = '| Route | within-CICIoT2023 (ref) | → CICIDS2017 | → CICDDoS2019 |'
|
||||
rows.append(header)
|
||||
rows.append('|---|---|---|---|')
|
||||
within_fallback = {'baseline': (0.9612, 0.0017), 'A: causal': (0.9636, 0.0006), 'B: spectral': (0.9619, 0.0013), 'C: mixed': (0.9625, 0.0028), 'A+C combo': (0.9587, 0.0017)}
|
||||
within_fallback = {'baseline': (0.9612, 0.0017), 'A: causal': (0.9636, 0.0006), 'B: spectral': (0.9619, 0.0013), 'C: mixed': (0.9625, 0.0028), 'JANUS': (0.9587, 0.0017)}
|
||||
within_terminal: dict[str, tuple[float, float]] = {}
|
||||
for (label, prefix) in ROUTES:
|
||||
within_seeds = sorted(ROOT.glob(f'{prefix}_seed*/phase1_summary.json'))
|
||||
|
||||
@@ -41,11 +41,11 @@ def _mahal_eval(npz_path: Path, val_prefix: str, atk_prefix: str) -> float:
|
||||
return float('nan')
|
||||
|
||||
def _within_mahal(ds: str, seed: int) -> float:
|
||||
md = ROOT / f'route_ac_combo_{ds}_seed{seed}'
|
||||
md = ROOT / f'janus_{ds}_seed{seed}'
|
||||
return _mahal_eval(md / 'phase1_scores.npz', 'val_', 'atk_')
|
||||
|
||||
def _within_terminal_norm(ds: str, seed: int) -> float:
|
||||
f = ROOT / f'route_ac_combo_{ds}_seed{seed}' / 'phase1_summary.json'
|
||||
f = ROOT / f'janus_{ds}_seed{seed}' / 'phase1_summary.json'
|
||||
if not f.exists():
|
||||
return float('nan')
|
||||
return json.loads(f.read_text())['overall'].get('terminal_norm', {}).get('auroc', float('nan'))
|
||||
@@ -59,9 +59,9 @@ def _src_aliases(src: str) -> list[str]:
|
||||
def _cross_mahal(src: str, tgt: str, seed: int) -> float:
|
||||
candidates = []
|
||||
for alias in _src_aliases(src):
|
||||
candidates.append(CROSS_DIR / f'route_ac_combo_seed{seed}_{alias}_to_{tgt}.npz')
|
||||
candidates.append(CROSS_DIR / f'janus_seed{seed}_{alias}_to_{tgt}.npz')
|
||||
if src == 'ciciot2023':
|
||||
candidates.append(CROSS_DIR / f'route_ac_combo_seed{seed}_to_{tgt}.npz')
|
||||
candidates.append(CROSS_DIR / f'janus_seed{seed}_to_{tgt}.npz')
|
||||
for c in candidates:
|
||||
if c.exists():
|
||||
return _mahal_eval(c, 'b_', 'a_')
|
||||
@@ -70,9 +70,9 @@ def _cross_mahal(src: str, tgt: str, seed: int) -> float:
|
||||
def _cross_terminal_norm(src: str, tgt: str, seed: int) -> float:
|
||||
candidates = []
|
||||
for alias in _src_aliases(src):
|
||||
candidates.append(CROSS_DIR / f'route_ac_combo_seed{seed}_{alias}_to_{tgt}.json')
|
||||
candidates.append(CROSS_DIR / f'janus_seed{seed}_{alias}_to_{tgt}.json')
|
||||
if src == 'ciciot2023':
|
||||
candidates.append(CROSS_DIR / f'route_ac_combo_seed{seed}_to_{tgt}.json')
|
||||
candidates.append(CROSS_DIR / f'janus_seed{seed}_to_{tgt}.json')
|
||||
for c in candidates:
|
||||
if c.exists():
|
||||
d = json.loads(c.read_text())
|
||||
@@ -89,10 +89,10 @@ def _ms(vals: list[float]) -> str:
|
||||
|
||||
def main() -> None:
|
||||
rows: list[str] = []
|
||||
rows.append('# Full 4×4 Cross Matrix — A+C combo + Mahalanobis-OAS')
|
||||
rows.append('# Full 4×4 Cross Matrix — JANUS + Mahalanobis-OAS')
|
||||
rows.append('')
|
||||
rows.append('3-seed mean ± std. Diagonal = within-dataset; off-diagonal = cross.')
|
||||
rows.append('Aggregator: Mahalanobis-OAS over 10-d A+C combo score vector,')
|
||||
rows.append('Aggregator: Mahalanobis-OAS over 10-d JANUS score vector,')
|
||||
rows.append('fit on **target-dataset benign val only** (no attack labels).')
|
||||
rows.append('')
|
||||
rows.append('## Mahalanobis-OAS AUROC (4×4)')
|
||||
|
||||
@@ -5,7 +5,7 @@ from pathlib import Path
|
||||
import numpy as np
|
||||
ROOT = Path(__file__).resolve().parents[2] / 'artifacts' / 'route_comparison'
|
||||
SEED_RE = re.compile('_seed(\\d+)$')
|
||||
EXISTING_SOTA = {'ISCXTor2016 (NonTor → Tor)': {'shafir_baseline': 0.8731, 'shafir_ref': 'Table VI', 'ours_existing': (0.9945, 0.0011), 'ours_score': 'terminal_norm', 'sigma': 0.1, 'ac_prefix': 'route_ac_combo_iscxtor2016'}, 'CICIDS2017 within (Shafir 10k/10k)': {'shafir_baseline': 0.9303, 'shafir_ref': 'Table VII', 'ours_existing': (0.9858, 0.0021), 'ours_score': 'terminal_norm', 'sigma': 0.6, 'ac_prefix': 'route_ac_combo_cicids2017'}, 'CICDDoS2019 within': {'shafir_baseline': 0.93, 'shafir_ref': 'Table IX, row 1', 'ours_existing': (0.996, 0.001), 'ours_score': 'terminal_norm', 'sigma': 0.1, 'ac_prefix': 'route_ac_combo_cicddos2019'}, 'CICIoT2023 within (multi-seed)': {'shafir_baseline': None, 'shafir_ref': None, 'ours_existing': (0.9612, 0.0017), 'ours_score': 'terminal_norm', 'sigma': 0.1, 'ac_prefix': 'route_ac_combo_ciciot2023'}}
|
||||
EXISTING_SOTA = {'ISCXTor2016 (NonTor → Tor)': {'shafir_baseline': 0.8731, 'shafir_ref': 'Table VI', 'ours_existing': (0.9945, 0.0011), 'ours_score': 'terminal_norm', 'sigma': 0.1, 'ac_prefix': 'janus_iscxtor2016'}, 'CICIDS2017 within (Shafir 10k/10k)': {'shafir_baseline': 0.9303, 'shafir_ref': 'Table VII', 'ours_existing': (0.9858, 0.0021), 'ours_score': 'terminal_norm', 'sigma': 0.6, 'ac_prefix': 'janus_cicids2017'}, 'CICDDoS2019 within': {'shafir_baseline': 0.93, 'shafir_ref': 'Table IX, row 1', 'ours_existing': (0.996, 0.001), 'ours_score': 'terminal_norm', 'sigma': 0.1, 'ac_prefix': 'janus_cicddos2019'}, 'CICIoT2023 within (multi-seed)': {'shafir_baseline': None, 'shafir_ref': None, 'ours_existing': (0.9612, 0.0017), 'ours_score': 'terminal_norm', 'sigma': 0.1, 'ac_prefix': 'janus_ciciot2023'}}
|
||||
|
||||
def _seeds(prefix: str) -> dict[int, Path]:
|
||||
out = {}
|
||||
@@ -26,16 +26,16 @@ def _mean_std(vs: list[float]) -> tuple[float, float]:
|
||||
|
||||
def main() -> None:
|
||||
rows: list[str] = []
|
||||
rows.append('# SOTA Comparison: A+C combo vs existing UnifiedCFM')
|
||||
rows.append('# SOTA Comparison: JANUS vs existing UnifiedCFM')
|
||||
rows.append('')
|
||||
rows.append('All 4 datasets, 3 seeds each, within-dataset Shafir 10K/10K protocol.')
|
||||
rows.append('Existing UnifiedCFM uses Phase-2 consistency loss (λ_flow=λ_packet=0.3).')
|
||||
rows.append('A+C combo uses Mixed_CFM (continuous CFM + DFM) + causal-packet attention,')
|
||||
rows.append('JANUS uses Mixed_CFM (continuous CFM + DFM) + causal-packet attention,')
|
||||
rows.append('**no Phase-2 consistency loss**. lambda_disc=1.0, sigma=0.1, use_ot=True.')
|
||||
rows.append('')
|
||||
rows.append("## Headline: A+C combo's best score per dataset")
|
||||
rows.append("## Headline: JANUS's best score per dataset")
|
||||
rows.append('')
|
||||
rows.append('| Dataset | Shafir 2026 | Existing UnifiedCFM (SOTA) | A+C combo `terminal_norm` | A+C combo `terminal_packet` | A+C combo `disc_nll_total` | A+C best | New SOTA? |')
|
||||
rows.append('| Dataset | Shafir 2026 | Existing UnifiedCFM (SOTA) | JANUS `terminal_norm` | JANUS `terminal_packet` | JANUS `disc_nll_total` | JANUS best | New SOTA? |')
|
||||
rows.append('|---|---|---|---|---|---|---|---|')
|
||||
for (label, meta) in EXISTING_SOTA.items():
|
||||
seeds = _seeds(meta['ac_prefix'])
|
||||
|
||||
@@ -102,20 +102,20 @@ def main() -> None:
|
||||
rows.append('# Score-vector auto-selection: max-of-|z| / Mahalanobis vs fixed scores')
|
||||
rows.append('')
|
||||
rows.append('Aggregators are fit on **benign val only** (no attack labels). All numbers')
|
||||
rows.append('are 3-seed mean ± std on A+C combo (Mixed_CFM + causal-packet attention).')
|
||||
rows.append('are 3-seed mean ± std on JANUS (Mixed_CFM + causal-packet attention).')
|
||||
rows.append('')
|
||||
rows.append('Note on fairness: `auc_best_fixed` is selection-biased (picks per-dataset best')
|
||||
rows.append('score post-hoc on test set). `max_abs_z` and `mahalanobis` are NOT — they only')
|
||||
rows.append('use benign val to fit aggregator parameters.')
|
||||
rows.append('')
|
||||
rows.append("## Within-dataset(A+C combo on each dataset's own benign/attack)")
|
||||
rows.append("## Within-dataset(JANUS on each dataset's own benign/attack)")
|
||||
rows.append('')
|
||||
rows.append('| Dataset | term_norm | best fixed | max-\\|z\\| (all) | mahal-OAS (all) | **mahal-OAS (term3)** | **mahal-OAS (disc7)** |')
|
||||
rows.append('|---|---|---|---|---|---|---|')
|
||||
for ds in WITHIN_DATASETS:
|
||||
rows_per_seed: list[dict] = []
|
||||
for s in SEEDS:
|
||||
md = ROOT / f'route_ac_combo_{ds}_seed{s}'
|
||||
md = ROOT / f'janus_{ds}_seed{s}'
|
||||
npz = md / 'phase1_scores.npz'
|
||||
if not npz.exists():
|
||||
continue
|
||||
@@ -129,14 +129,14 @@ def main() -> None:
|
||||
return f'{m:.4f} ± {sd:.4f}'
|
||||
rows.append(f"| {ds} | {col('auc_term_norm')} | {col('auc_best_fixed')} | {col('auc_max_abs_z_all')} | {col('auc_mahal_oas_all')} | **{col('auc_mahal_oas_terminal3')}** | **{col('auc_mahal_oas_disc7')}** |")
|
||||
rows.append('')
|
||||
rows.append('## Cross-dataset(A+C combo trained on CICIoT2023 → eval on target)')
|
||||
rows.append('## Cross-dataset(JANUS trained on CICIoT2023 → eval on target)')
|
||||
rows.append('')
|
||||
rows.append('| Target | term_norm | best fixed | max-\\|z\\| (all) | mahal-OAS (all) | **mahal-OAS (term3)** | **mahal-OAS (disc7)** |')
|
||||
rows.append('|---|---|---|---|---|---|---|')
|
||||
for tgt in CROSS_TARGETS:
|
||||
rows_per_seed: list[dict] = []
|
||||
for s in SEEDS:
|
||||
npz = CROSS_DIR / f'route_ac_combo_seed{s}_to_{tgt}.npz'
|
||||
npz = CROSS_DIR / f'janus_seed{s}_to_{tgt}.npz'
|
||||
if not npz.exists():
|
||||
continue
|
||||
rows_per_seed.append(_evaluate(npz, 'b_', 'a_'))
|
||||
@@ -156,7 +156,7 @@ def main() -> None:
|
||||
for ds in WITHIN_DATASETS:
|
||||
cells = [f'within {ds}']
|
||||
for s in SEEDS:
|
||||
npz = ROOT / f'route_ac_combo_{ds}_seed{s}/phase1_scores.npz'
|
||||
npz = ROOT / f'janus_{ds}_seed{s}/phase1_scores.npz'
|
||||
if not npz.exists():
|
||||
cells.append('—')
|
||||
continue
|
||||
@@ -166,7 +166,7 @@ def main() -> None:
|
||||
for tgt in CROSS_TARGETS:
|
||||
cells = [f'cross→{tgt}']
|
||||
for s in SEEDS:
|
||||
npz = CROSS_DIR / f'route_ac_combo_seed{s}_to_{tgt}.npz'
|
||||
npz = CROSS_DIR / f'janus_seed{s}_to_{tgt}.npz'
|
||||
if not npz.exists():
|
||||
cells.append('—')
|
||||
continue
|
||||
|
||||
@@ -6,7 +6,7 @@ import numpy as np
|
||||
from sklearn.metrics import roc_auc_score
|
||||
ROOT = Path(__file__).resolve().parents[2] / 'artifacts' / 'route_comparison'
|
||||
SEED_RE = re.compile('_seed(\\d+)$')
|
||||
ROUTES = [('baseline', 'baseline_ciciot2023'), ('A: causal', 'route_a_causal_ciciot2023'), ('B: spectral', 'route_b_spectral_ciciot2023'), ('C: mixed', 'route_c_mixed_ciciot2023'), ('A+C combo', 'route_ac_combo_ciciot2023')]
|
||||
ROUTES = [('baseline', 'baseline_ciciot2023'), ('A: causal', 'route_a_causal_ciciot2023'), ('B: spectral', 'route_b_spectral_ciciot2023'), ('C: mixed', 'route_c_mixed_ciciot2023'), ('JANUS', 'janus_ciciot2023')]
|
||||
|
||||
def _seeds(prefix: str) -> dict[int, Path]:
|
||||
out = {}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#!/bin/bash
|
||||
# Run all missing cross-direction evals for A+C combo.
|
||||
# Run all missing cross-direction evals for JANUS.
|
||||
# Targets are routed to packets-npz or full_store as appropriate.
|
||||
|
||||
set -e
|
||||
@@ -32,8 +32,8 @@ TGT_ciciot2023_label=normal
|
||||
|
||||
run_one() {
|
||||
local gpu=$1 src=$2 tgt=$3 seed=$4
|
||||
local md=${ROOT}/artifacts/route_comparison/route_ac_combo_${src}_seed${seed}
|
||||
local out=${CROSS_DIR}/route_ac_combo_seed${seed}_${src}_to_${tgt}.json
|
||||
local md=${ROOT}/artifacts/route_comparison/janus_${src}_seed${seed}
|
||||
local out=${CROSS_DIR}/janus_seed${seed}_${src}_to_${tgt}.json
|
||||
if [ -f "${out}" ]; then echo "[skip] ${src}→${tgt} seed${seed}"; return; fi
|
||||
if [ ! -f "${md}/model.pt" ]; then echo "[missing] ${md}/model.pt"; return; fi
|
||||
|
||||
@@ -56,7 +56,7 @@ run_one() {
|
||||
${tgt_args} \
|
||||
--out ${out} \
|
||||
--n-benign 10000 --seed 42 --T 64 --batch-size 256 --n-steps 16 \
|
||||
> ${CROSS_DIR}/route_ac_combo_seed${seed}_${src}_to_${tgt}.log 2>&1
|
||||
> ${CROSS_DIR}/janus_seed${seed}_${src}_to_${tgt}.log 2>&1
|
||||
}
|
||||
|
||||
# 8 missing directions × 3 seeds = 24 evals
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#!/bin/bash
|
||||
# Phase1 + cross eval for the 3 A+C combo seeds.
|
||||
# Phase1 + cross eval for the 3 JANUS seeds.
|
||||
set -e
|
||||
ROOT=/home/chy/JANUS
|
||||
MIXED_PHASE1=${ROOT}/Mixed_CFM/eval_phase1.py
|
||||
@@ -10,7 +10,7 @@ mkdir -p ${CROSS_DIR}
|
||||
# GPU 0: phase1 + cross→IDS2017 for all 3 seeds
|
||||
{
|
||||
for seed in 42 43 44; do
|
||||
md=${ROOT}/artifacts/route_comparison/route_ac_combo_ciciot2023_seed${seed}
|
||||
md=${ROOT}/artifacts/route_comparison/janus_ciciot2023_seed${seed}
|
||||
[ -f "${md}/model.pt" ] || { echo "[wait] seed${seed} model.pt not yet"; continue; }
|
||||
|
||||
if [ ! -f "${md}/phase1_summary.json" ]; then
|
||||
@@ -23,7 +23,7 @@ for seed in 42 43 44; do
|
||||
> ${md}/phase1.log 2>&1
|
||||
fi
|
||||
|
||||
ids_out=${CROSS_DIR}/route_ac_combo_seed${seed}_to_cicids2017.json
|
||||
ids_out=${CROSS_DIR}/janus_seed${seed}_to_cicids2017.json
|
||||
if [ ! -f "${ids_out}" ]; then
|
||||
echo "[gpu0 cross→ids2017] seed${seed}"
|
||||
cd ${ROOT}/Mixed_CFM
|
||||
@@ -34,7 +34,7 @@ for seed in 42 43 44; do
|
||||
--target-flow-features ${ROOT}/datasets/cicids2017/processed/flow_features.parquet \
|
||||
--out ${ids_out} \
|
||||
--n-benign 10000 --n-attack 10000 --seed 42 --T 64 --batch-size 256 --n-steps 16 \
|
||||
> ${CROSS_DIR}/route_ac_combo_seed${seed}_to_cicids2017.log 2>&1
|
||||
> ${CROSS_DIR}/janus_seed${seed}_to_cicids2017.log 2>&1
|
||||
fi
|
||||
done
|
||||
echo "[gpu0 done]"
|
||||
@@ -44,10 +44,10 @@ GPU0=$!
|
||||
# GPU 1: cross→DDoS19 for all 3 seeds
|
||||
{
|
||||
for seed in 42 43 44; do
|
||||
md=${ROOT}/artifacts/route_comparison/route_ac_combo_ciciot2023_seed${seed}
|
||||
md=${ROOT}/artifacts/route_comparison/janus_ciciot2023_seed${seed}
|
||||
[ -f "${md}/model.pt" ] || { echo "[wait] seed${seed} model.pt not yet"; continue; }
|
||||
|
||||
ddos_out=${CROSS_DIR}/route_ac_combo_seed${seed}_to_cicddos2019.json
|
||||
ddos_out=${CROSS_DIR}/janus_seed${seed}_to_cicddos2019.json
|
||||
if [ ! -f "${ddos_out}" ]; then
|
||||
echo "[gpu1 cross→ddos19] seed${seed}"
|
||||
cd ${ROOT}/Mixed_CFM
|
||||
@@ -58,7 +58,7 @@ for seed in 42 43 44; do
|
||||
--target-flow-features ${ROOT}/datasets/cicddos2019/processed/flow_features.parquet \
|
||||
--out ${ddos_out} \
|
||||
--n-benign 10000 --n-attack 10000 --seed 42 --T 64 --batch-size 256 --n-steps 16 \
|
||||
> ${CROSS_DIR}/route_ac_combo_seed${seed}_to_cicddos2019.log 2>&1
|
||||
> ${CROSS_DIR}/janus_seed${seed}_to_cicddos2019.log 2>&1
|
||||
fi
|
||||
done
|
||||
echo "[gpu1 done]"
|
||||
Reference in New Issue
Block a user