ablation: add Group A (aggregator) + Group B (architecture) infrastructure

Extends MixedCFMConfig with 5 backwards-compatible flags (use_flow_token, n_packet_tokens, disc_as_cont, cont_as_disc + cont_n_bins) so existing JANUS-full checkpoints load with 0 missing/unexpected keys. Adds: - 60 ablation training configs (5 variants × 4 datasets × 3 seeds) - scripts/ablation/{generate_configs.py, run_groupB.sh, run_cross_groupB.sh, smoke_test.sh} — config generation + GPU drivers - scripts/aggregate/aggregate_ablation{,_cross,_cross_B}.py — produces within-dataset and cross-dataset (3×3) ablation tables with 3-seed mean ± 95% t-CI plus optional paired DeLong p-values README updated with ablation section pointing at artifacts/ablation/ABLATION_SUMMARY.md. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-08 23:59:27 +08:00
parent 1d8862fbeb
commit a6bcbbd299
72 changed files with 3642 additions and 96 deletions
--- a/Mixed_CFM/configs/ablation/b1_noflow/cicddos2019_seed42.yaml
+++ b/Mixed_CFM/configs/ablation/b1_noflow/cicddos2019_seed42.yaml
@@ -0,0 +1,36 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicddos2019_seed42_b1_noflow
+source_store: /home/chy/JANUS/datasets/cicddos2019/processed/full_store
+flows_parquet: /home/chy/JANUS/datasets/cicddos2019/processed/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/cicddos2019/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 42
+data_seed: 42
+train_ratio: 0.8
+benign_label: normal
+val_cap: 20000
+attack_cap: 20000
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 10000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 1.0
+reference_mode: causal_packets
+device: auto
+use_flow_token: false
--- a/Mixed_CFM/configs/ablation/b1_noflow/cicddos2019_seed43.yaml
+++ b/Mixed_CFM/configs/ablation/b1_noflow/cicddos2019_seed43.yaml
@@ -0,0 +1,36 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicddos2019_seed43_b1_noflow
+source_store: /home/chy/JANUS/datasets/cicddos2019/processed/full_store
+flows_parquet: /home/chy/JANUS/datasets/cicddos2019/processed/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/cicddos2019/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 43
+data_seed: 43
+train_ratio: 0.8
+benign_label: normal
+val_cap: 20000
+attack_cap: 20000
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 10000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 1.0
+reference_mode: causal_packets
+device: auto
+use_flow_token: false
--- a/Mixed_CFM/configs/ablation/b1_noflow/cicddos2019_seed44.yaml
+++ b/Mixed_CFM/configs/ablation/b1_noflow/cicddos2019_seed44.yaml
@@ -0,0 +1,36 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicddos2019_seed44_b1_noflow
+source_store: /home/chy/JANUS/datasets/cicddos2019/processed/full_store
+flows_parquet: /home/chy/JANUS/datasets/cicddos2019/processed/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/cicddos2019/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 44
+data_seed: 44
+train_ratio: 0.8
+benign_label: normal
+val_cap: 20000
+attack_cap: 20000
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 10000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 1.0
+reference_mode: causal_packets
+device: auto
+use_flow_token: false
--- a/Mixed_CFM/configs/ablation/b1_noflow/cicids2017_seed42.yaml
+++ b/Mixed_CFM/configs/ablation/b1_noflow/cicids2017_seed42.yaml
@@ -0,0 +1,34 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicids2017_seed42_b1_noflow
+packets_npz: /home/chy/JANUS/datasets/cicids2017/processed/packets.npz
+flows_parquet: /home/chy/JANUS/datasets/cicids2017/processed/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/cicids2017/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 42
+data_seed: 42
+train_ratio: 0.8
+benign_label: normal
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 20000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 1.0
+reference_mode: causal_packets
+device: auto
+use_flow_token: false
--- a/Mixed_CFM/configs/ablation/b1_noflow/cicids2017_seed43.yaml
+++ b/Mixed_CFM/configs/ablation/b1_noflow/cicids2017_seed43.yaml
@@ -0,0 +1,34 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicids2017_seed43_b1_noflow
+packets_npz: /home/chy/JANUS/datasets/cicids2017/processed/packets.npz
+flows_parquet: /home/chy/JANUS/datasets/cicids2017/processed/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/cicids2017/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 43
+data_seed: 43
+train_ratio: 0.8
+benign_label: normal
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 20000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 1.0
+reference_mode: causal_packets
+device: auto
+use_flow_token: false
--- a/Mixed_CFM/configs/ablation/b1_noflow/cicids2017_seed44.yaml
+++ b/Mixed_CFM/configs/ablation/b1_noflow/cicids2017_seed44.yaml
@@ -0,0 +1,34 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicids2017_seed44_b1_noflow
+packets_npz: /home/chy/JANUS/datasets/cicids2017/processed/packets.npz
+flows_parquet: /home/chy/JANUS/datasets/cicids2017/processed/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/cicids2017/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 44
+data_seed: 44
+train_ratio: 0.8
+benign_label: normal
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 20000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 1.0
+reference_mode: causal_packets
+device: auto
+use_flow_token: false
--- a/Mixed_CFM/configs/ablation/b1_noflow/ciciot2023_seed42.yaml
+++ b/Mixed_CFM/configs/ablation/b1_noflow/ciciot2023_seed42.yaml
@@ -0,0 +1,36 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_ciciot2023_seed42_b1_noflow
+source_store: /home/chy/JANUS/datasets/ciciot2023/processed/full_store
+flows_parquet: /home/chy/JANUS/datasets/ciciot2023/processed/full_store/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/ciciot2023/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 42
+data_seed: 42
+train_ratio: 0.8
+benign_label: normal
+val_cap: 10000
+attack_cap: 20000
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 20000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 1.0
+device: auto
+reference_mode: causal_packets
+use_flow_token: false
--- a/Mixed_CFM/configs/ablation/b1_noflow/ciciot2023_seed43.yaml
+++ b/Mixed_CFM/configs/ablation/b1_noflow/ciciot2023_seed43.yaml
@@ -0,0 +1,36 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_ciciot2023_seed43_b1_noflow
+source_store: /home/chy/JANUS/datasets/ciciot2023/processed/full_store
+flows_parquet: /home/chy/JANUS/datasets/ciciot2023/processed/full_store/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/ciciot2023/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 43
+data_seed: 43
+train_ratio: 0.8
+benign_label: normal
+val_cap: 10000
+attack_cap: 20000
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 20000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 1.0
+device: auto
+reference_mode: causal_packets
+use_flow_token: false
--- a/Mixed_CFM/configs/ablation/b1_noflow/ciciot2023_seed44.yaml
+++ b/Mixed_CFM/configs/ablation/b1_noflow/ciciot2023_seed44.yaml
@@ -0,0 +1,36 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_ciciot2023_seed44_b1_noflow
+source_store: /home/chy/JANUS/datasets/ciciot2023/processed/full_store
+flows_parquet: /home/chy/JANUS/datasets/ciciot2023/processed/full_store/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/ciciot2023/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 44
+data_seed: 44
+train_ratio: 0.8
+benign_label: normal
+val_cap: 10000
+attack_cap: 20000
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 20000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 1.0
+device: auto
+reference_mode: causal_packets
+use_flow_token: false
--- a/Mixed_CFM/configs/ablation/b1_noflow/iscxtor2016_seed42.yaml
+++ b/Mixed_CFM/configs/ablation/b1_noflow/iscxtor2016_seed42.yaml
@@ -0,0 +1,34 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_iscxtor2016_seed42_b1_noflow
+packets_npz: /home/chy/JANUS/datasets/iscxtor2016/processed/packets.npz
+flows_parquet: /home/chy/JANUS/datasets/iscxtor2016/processed/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/iscxtor2016/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 42
+data_seed: 42
+train_ratio: 0.8
+benign_label: nontor
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 20000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 1.0
+reference_mode: causal_packets
+device: auto
+use_flow_token: false
--- a/Mixed_CFM/configs/ablation/b1_noflow/iscxtor2016_seed43.yaml
+++ b/Mixed_CFM/configs/ablation/b1_noflow/iscxtor2016_seed43.yaml
@@ -0,0 +1,34 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_iscxtor2016_seed43_b1_noflow
+packets_npz: /home/chy/JANUS/datasets/iscxtor2016/processed/packets.npz
+flows_parquet: /home/chy/JANUS/datasets/iscxtor2016/processed/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/iscxtor2016/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 43
+data_seed: 43
+train_ratio: 0.8
+benign_label: nontor
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 20000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 1.0
+reference_mode: causal_packets
+device: auto
+use_flow_token: false
--- a/Mixed_CFM/configs/ablation/b1_noflow/iscxtor2016_seed44.yaml
+++ b/Mixed_CFM/configs/ablation/b1_noflow/iscxtor2016_seed44.yaml
@@ -0,0 +1,34 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_iscxtor2016_seed44_b1_noflow
+packets_npz: /home/chy/JANUS/datasets/iscxtor2016/processed/packets.npz
+flows_parquet: /home/chy/JANUS/datasets/iscxtor2016/processed/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/iscxtor2016/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 44
+data_seed: 44
+train_ratio: 0.8
+benign_label: nontor
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 20000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 1.0
+reference_mode: causal_packets
+device: auto
+use_flow_token: false
--- a/Mixed_CFM/configs/ablation/b2_flowonly/cicddos2019_seed42.yaml
+++ b/Mixed_CFM/configs/ablation/b2_flowonly/cicddos2019_seed42.yaml
@@ -0,0 +1,36 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicddos2019_seed42_b2_flowonly
+source_store: /home/chy/JANUS/datasets/cicddos2019/processed/full_store
+flows_parquet: /home/chy/JANUS/datasets/cicddos2019/processed/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/cicddos2019/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 42
+data_seed: 42
+train_ratio: 0.8
+benign_label: normal
+val_cap: 20000
+attack_cap: 20000
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 10000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 0.0
+reference_mode: causal_packets
+device: auto
+n_packet_tokens: 0
--- a/Mixed_CFM/configs/ablation/b2_flowonly/cicddos2019_seed43.yaml
+++ b/Mixed_CFM/configs/ablation/b2_flowonly/cicddos2019_seed43.yaml
@@ -0,0 +1,36 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicddos2019_seed43_b2_flowonly
+source_store: /home/chy/JANUS/datasets/cicddos2019/processed/full_store
+flows_parquet: /home/chy/JANUS/datasets/cicddos2019/processed/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/cicddos2019/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 43
+data_seed: 43
+train_ratio: 0.8
+benign_label: normal
+val_cap: 20000
+attack_cap: 20000
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 10000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 0.0
+reference_mode: causal_packets
+device: auto
+n_packet_tokens: 0
--- a/Mixed_CFM/configs/ablation/b2_flowonly/cicddos2019_seed44.yaml
+++ b/Mixed_CFM/configs/ablation/b2_flowonly/cicddos2019_seed44.yaml
@@ -0,0 +1,36 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicddos2019_seed44_b2_flowonly
+source_store: /home/chy/JANUS/datasets/cicddos2019/processed/full_store
+flows_parquet: /home/chy/JANUS/datasets/cicddos2019/processed/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/cicddos2019/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 44
+data_seed: 44
+train_ratio: 0.8
+benign_label: normal
+val_cap: 20000
+attack_cap: 20000
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 10000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 0.0
+reference_mode: causal_packets
+device: auto
+n_packet_tokens: 0
--- a/Mixed_CFM/configs/ablation/b2_flowonly/cicids2017_seed42.yaml
+++ b/Mixed_CFM/configs/ablation/b2_flowonly/cicids2017_seed42.yaml
@@ -0,0 +1,34 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicids2017_seed42_b2_flowonly
+packets_npz: /home/chy/JANUS/datasets/cicids2017/processed/packets.npz
+flows_parquet: /home/chy/JANUS/datasets/cicids2017/processed/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/cicids2017/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 42
+data_seed: 42
+train_ratio: 0.8
+benign_label: normal
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 20000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 0.0
+reference_mode: causal_packets
+device: auto
+n_packet_tokens: 0
--- a/Mixed_CFM/configs/ablation/b2_flowonly/cicids2017_seed43.yaml
+++ b/Mixed_CFM/configs/ablation/b2_flowonly/cicids2017_seed43.yaml
@@ -0,0 +1,34 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicids2017_seed43_b2_flowonly
+packets_npz: /home/chy/JANUS/datasets/cicids2017/processed/packets.npz
+flows_parquet: /home/chy/JANUS/datasets/cicids2017/processed/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/cicids2017/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 43
+data_seed: 43
+train_ratio: 0.8
+benign_label: normal
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 20000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 0.0
+reference_mode: causal_packets
+device: auto
+n_packet_tokens: 0
--- a/Mixed_CFM/configs/ablation/b2_flowonly/cicids2017_seed44.yaml
+++ b/Mixed_CFM/configs/ablation/b2_flowonly/cicids2017_seed44.yaml
@@ -0,0 +1,34 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicids2017_seed44_b2_flowonly
+packets_npz: /home/chy/JANUS/datasets/cicids2017/processed/packets.npz
+flows_parquet: /home/chy/JANUS/datasets/cicids2017/processed/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/cicids2017/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 44
+data_seed: 44
+train_ratio: 0.8
+benign_label: normal
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 20000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 0.0
+reference_mode: causal_packets
+device: auto
+n_packet_tokens: 0
--- a/Mixed_CFM/configs/ablation/b2_flowonly/ciciot2023_seed42.yaml
+++ b/Mixed_CFM/configs/ablation/b2_flowonly/ciciot2023_seed42.yaml
@@ -0,0 +1,36 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_ciciot2023_seed42_b2_flowonly
+source_store: /home/chy/JANUS/datasets/ciciot2023/processed/full_store
+flows_parquet: /home/chy/JANUS/datasets/ciciot2023/processed/full_store/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/ciciot2023/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 42
+data_seed: 42
+train_ratio: 0.8
+benign_label: normal
+val_cap: 10000
+attack_cap: 20000
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 20000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 0.0
+device: auto
+reference_mode: causal_packets
+n_packet_tokens: 0
--- a/Mixed_CFM/configs/ablation/b2_flowonly/ciciot2023_seed43.yaml
+++ b/Mixed_CFM/configs/ablation/b2_flowonly/ciciot2023_seed43.yaml
@@ -0,0 +1,36 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_ciciot2023_seed43_b2_flowonly
+source_store: /home/chy/JANUS/datasets/ciciot2023/processed/full_store
+flows_parquet: /home/chy/JANUS/datasets/ciciot2023/processed/full_store/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/ciciot2023/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 43
+data_seed: 43
+train_ratio: 0.8
+benign_label: normal
+val_cap: 10000
+attack_cap: 20000
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 20000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 0.0
+device: auto
+reference_mode: causal_packets
+n_packet_tokens: 0
--- a/Mixed_CFM/configs/ablation/b2_flowonly/ciciot2023_seed44.yaml
+++ b/Mixed_CFM/configs/ablation/b2_flowonly/ciciot2023_seed44.yaml
@@ -0,0 +1,36 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_ciciot2023_seed44_b2_flowonly
+source_store: /home/chy/JANUS/datasets/ciciot2023/processed/full_store
+flows_parquet: /home/chy/JANUS/datasets/ciciot2023/processed/full_store/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/ciciot2023/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 44
+data_seed: 44
+train_ratio: 0.8
+benign_label: normal
+val_cap: 10000
+attack_cap: 20000
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 20000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 0.0
+device: auto
+reference_mode: causal_packets
+n_packet_tokens: 0
--- a/Mixed_CFM/configs/ablation/b2_flowonly/iscxtor2016_seed42.yaml
+++ b/Mixed_CFM/configs/ablation/b2_flowonly/iscxtor2016_seed42.yaml
@@ -0,0 +1,34 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_iscxtor2016_seed42_b2_flowonly
+packets_npz: /home/chy/JANUS/datasets/iscxtor2016/processed/packets.npz
+flows_parquet: /home/chy/JANUS/datasets/iscxtor2016/processed/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/iscxtor2016/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 42
+data_seed: 42
+train_ratio: 0.8
+benign_label: nontor
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 20000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 0.0
+reference_mode: causal_packets
+device: auto
+n_packet_tokens: 0
--- a/Mixed_CFM/configs/ablation/b2_flowonly/iscxtor2016_seed43.yaml
+++ b/Mixed_CFM/configs/ablation/b2_flowonly/iscxtor2016_seed43.yaml
@@ -0,0 +1,34 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_iscxtor2016_seed43_b2_flowonly
+packets_npz: /home/chy/JANUS/datasets/iscxtor2016/processed/packets.npz
+flows_parquet: /home/chy/JANUS/datasets/iscxtor2016/processed/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/iscxtor2016/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 43
+data_seed: 43
+train_ratio: 0.8
+benign_label: nontor
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 20000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 0.0
+reference_mode: causal_packets
+device: auto
+n_packet_tokens: 0
--- a/Mixed_CFM/configs/ablation/b2_flowonly/iscxtor2016_seed44.yaml
+++ b/Mixed_CFM/configs/ablation/b2_flowonly/iscxtor2016_seed44.yaml
@@ -0,0 +1,34 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_iscxtor2016_seed44_b2_flowonly
+packets_npz: /home/chy/JANUS/datasets/iscxtor2016/processed/packets.npz
+flows_parquet: /home/chy/JANUS/datasets/iscxtor2016/processed/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/iscxtor2016/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 44
+data_seed: 44
+train_ratio: 0.8
+benign_label: nontor
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 20000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 0.0
+reference_mode: causal_packets
+device: auto
+n_packet_tokens: 0
--- a/Mixed_CFM/configs/ablation/b3_allcont/cicddos2019_seed42.yaml
+++ b/Mixed_CFM/configs/ablation/b3_allcont/cicddos2019_seed42.yaml
@@ -0,0 +1,36 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicddos2019_seed42_b3_allcont
+source_store: /home/chy/JANUS/datasets/cicddos2019/processed/full_store
+flows_parquet: /home/chy/JANUS/datasets/cicddos2019/processed/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/cicddos2019/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 42
+data_seed: 42
+train_ratio: 0.8
+benign_label: normal
+val_cap: 20000
+attack_cap: 20000
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 10000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 0.0
+reference_mode: causal_packets
+device: auto
+disc_as_cont: true
--- a/Mixed_CFM/configs/ablation/b3_allcont/cicddos2019_seed43.yaml
+++ b/Mixed_CFM/configs/ablation/b3_allcont/cicddos2019_seed43.yaml
@@ -0,0 +1,36 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicddos2019_seed43_b3_allcont
+source_store: /home/chy/JANUS/datasets/cicddos2019/processed/full_store
+flows_parquet: /home/chy/JANUS/datasets/cicddos2019/processed/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/cicddos2019/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 43
+data_seed: 43
+train_ratio: 0.8
+benign_label: normal
+val_cap: 20000
+attack_cap: 20000
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 10000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 0.0
+reference_mode: causal_packets
+device: auto
+disc_as_cont: true
--- a/Mixed_CFM/configs/ablation/b3_allcont/cicddos2019_seed44.yaml
+++ b/Mixed_CFM/configs/ablation/b3_allcont/cicddos2019_seed44.yaml
@@ -0,0 +1,36 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicddos2019_seed44_b3_allcont
+source_store: /home/chy/JANUS/datasets/cicddos2019/processed/full_store
+flows_parquet: /home/chy/JANUS/datasets/cicddos2019/processed/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/cicddos2019/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 44
+data_seed: 44
+train_ratio: 0.8
+benign_label: normal
+val_cap: 20000
+attack_cap: 20000
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 10000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 0.0
+reference_mode: causal_packets
+device: auto
+disc_as_cont: true
--- a/Mixed_CFM/configs/ablation/b3_allcont/cicids2017_seed42.yaml
+++ b/Mixed_CFM/configs/ablation/b3_allcont/cicids2017_seed42.yaml
@@ -0,0 +1,34 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicids2017_seed42_b3_allcont
+packets_npz: /home/chy/JANUS/datasets/cicids2017/processed/packets.npz
+flows_parquet: /home/chy/JANUS/datasets/cicids2017/processed/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/cicids2017/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 42
+data_seed: 42
+train_ratio: 0.8
+benign_label: normal
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 20000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 0.0
+reference_mode: causal_packets
+device: auto
+disc_as_cont: true
--- a/Mixed_CFM/configs/ablation/b3_allcont/cicids2017_seed43.yaml
+++ b/Mixed_CFM/configs/ablation/b3_allcont/cicids2017_seed43.yaml
@@ -0,0 +1,34 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicids2017_seed43_b3_allcont
+packets_npz: /home/chy/JANUS/datasets/cicids2017/processed/packets.npz
+flows_parquet: /home/chy/JANUS/datasets/cicids2017/processed/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/cicids2017/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 43
+data_seed: 43
+train_ratio: 0.8
+benign_label: normal
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 20000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 0.0
+reference_mode: causal_packets
+device: auto
+disc_as_cont: true
--- a/Mixed_CFM/configs/ablation/b3_allcont/cicids2017_seed44.yaml
+++ b/Mixed_CFM/configs/ablation/b3_allcont/cicids2017_seed44.yaml
@@ -0,0 +1,34 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicids2017_seed44_b3_allcont
+packets_npz: /home/chy/JANUS/datasets/cicids2017/processed/packets.npz
+flows_parquet: /home/chy/JANUS/datasets/cicids2017/processed/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/cicids2017/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 44
+data_seed: 44
+train_ratio: 0.8
+benign_label: normal
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 20000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 0.0
+reference_mode: causal_packets
+device: auto
+disc_as_cont: true
--- a/Mixed_CFM/configs/ablation/b3_allcont/ciciot2023_seed42.yaml
+++ b/Mixed_CFM/configs/ablation/b3_allcont/ciciot2023_seed42.yaml
@@ -0,0 +1,36 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_ciciot2023_seed42_b3_allcont
+source_store: /home/chy/JANUS/datasets/ciciot2023/processed/full_store
+flows_parquet: /home/chy/JANUS/datasets/ciciot2023/processed/full_store/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/ciciot2023/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 42
+data_seed: 42
+train_ratio: 0.8
+benign_label: normal
+val_cap: 10000
+attack_cap: 20000
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 20000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 0.0
+device: auto
+reference_mode: causal_packets
+disc_as_cont: true
--- a/Mixed_CFM/configs/ablation/b3_allcont/ciciot2023_seed43.yaml
+++ b/Mixed_CFM/configs/ablation/b3_allcont/ciciot2023_seed43.yaml
@@ -0,0 +1,36 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_ciciot2023_seed43_b3_allcont
+source_store: /home/chy/JANUS/datasets/ciciot2023/processed/full_store
+flows_parquet: /home/chy/JANUS/datasets/ciciot2023/processed/full_store/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/ciciot2023/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 43
+data_seed: 43
+train_ratio: 0.8
+benign_label: normal
+val_cap: 10000
+attack_cap: 20000
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 20000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 0.0
+device: auto
+reference_mode: causal_packets
+disc_as_cont: true
--- a/Mixed_CFM/configs/ablation/b3_allcont/ciciot2023_seed44.yaml
+++ b/Mixed_CFM/configs/ablation/b3_allcont/ciciot2023_seed44.yaml
@@ -0,0 +1,36 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_ciciot2023_seed44_b3_allcont
+source_store: /home/chy/JANUS/datasets/ciciot2023/processed/full_store
+flows_parquet: /home/chy/JANUS/datasets/ciciot2023/processed/full_store/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/ciciot2023/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 44
+data_seed: 44
+train_ratio: 0.8
+benign_label: normal
+val_cap: 10000
+attack_cap: 20000
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 20000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 0.0
+device: auto
+reference_mode: causal_packets
+disc_as_cont: true
--- a/Mixed_CFM/configs/ablation/b3_allcont/iscxtor2016_seed42.yaml
+++ b/Mixed_CFM/configs/ablation/b3_allcont/iscxtor2016_seed42.yaml
@@ -0,0 +1,34 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_iscxtor2016_seed42_b3_allcont
+packets_npz: /home/chy/JANUS/datasets/iscxtor2016/processed/packets.npz
+flows_parquet: /home/chy/JANUS/datasets/iscxtor2016/processed/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/iscxtor2016/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 42
+data_seed: 42
+train_ratio: 0.8
+benign_label: nontor
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 20000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 0.0
+reference_mode: causal_packets
+device: auto
+disc_as_cont: true
--- a/Mixed_CFM/configs/ablation/b3_allcont/iscxtor2016_seed43.yaml
+++ b/Mixed_CFM/configs/ablation/b3_allcont/iscxtor2016_seed43.yaml
@@ -0,0 +1,34 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_iscxtor2016_seed43_b3_allcont
+packets_npz: /home/chy/JANUS/datasets/iscxtor2016/processed/packets.npz
+flows_parquet: /home/chy/JANUS/datasets/iscxtor2016/processed/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/iscxtor2016/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 43
+data_seed: 43
+train_ratio: 0.8
+benign_label: nontor
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 20000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 0.0
+reference_mode: causal_packets
+device: auto
+disc_as_cont: true
--- a/Mixed_CFM/configs/ablation/b3_allcont/iscxtor2016_seed44.yaml
+++ b/Mixed_CFM/configs/ablation/b3_allcont/iscxtor2016_seed44.yaml
@@ -0,0 +1,34 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_iscxtor2016_seed44_b3_allcont
+packets_npz: /home/chy/JANUS/datasets/iscxtor2016/processed/packets.npz
+flows_parquet: /home/chy/JANUS/datasets/iscxtor2016/processed/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/iscxtor2016/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 44
+data_seed: 44
+train_ratio: 0.8
+benign_label: nontor
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 20000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 0.0
+reference_mode: causal_packets
+device: auto
+disc_as_cont: true
--- a/Mixed_CFM/configs/ablation/b4_alldisc/cicddos2019_seed42.yaml
+++ b/Mixed_CFM/configs/ablation/b4_alldisc/cicddos2019_seed42.yaml
@@ -0,0 +1,37 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicddos2019_seed42_b4_alldisc
+source_store: /home/chy/JANUS/datasets/cicddos2019/processed/full_store
+flows_parquet: /home/chy/JANUS/datasets/cicddos2019/processed/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/cicddos2019/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 42
+data_seed: 42
+train_ratio: 0.8
+benign_label: normal
+val_cap: 20000
+attack_cap: 20000
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 10000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 1.0
+reference_mode: causal_packets
+device: auto
+cont_as_disc: true
+n_disc_classes: 8
--- a/Mixed_CFM/configs/ablation/b4_alldisc/cicddos2019_seed43.yaml
+++ b/Mixed_CFM/configs/ablation/b4_alldisc/cicddos2019_seed43.yaml
@@ -0,0 +1,37 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicddos2019_seed43_b4_alldisc
+source_store: /home/chy/JANUS/datasets/cicddos2019/processed/full_store
+flows_parquet: /home/chy/JANUS/datasets/cicddos2019/processed/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/cicddos2019/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 43
+data_seed: 43
+train_ratio: 0.8
+benign_label: normal
+val_cap: 20000
+attack_cap: 20000
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 10000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 1.0
+reference_mode: causal_packets
+device: auto
+cont_as_disc: true
+n_disc_classes: 8
--- a/Mixed_CFM/configs/ablation/b4_alldisc/cicddos2019_seed44.yaml
+++ b/Mixed_CFM/configs/ablation/b4_alldisc/cicddos2019_seed44.yaml
@@ -0,0 +1,37 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicddos2019_seed44_b4_alldisc
+source_store: /home/chy/JANUS/datasets/cicddos2019/processed/full_store
+flows_parquet: /home/chy/JANUS/datasets/cicddos2019/processed/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/cicddos2019/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 44
+data_seed: 44
+train_ratio: 0.8
+benign_label: normal
+val_cap: 20000
+attack_cap: 20000
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 10000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 1.0
+reference_mode: causal_packets
+device: auto
+cont_as_disc: true
+n_disc_classes: 8
--- a/Mixed_CFM/configs/ablation/b4_alldisc/cicids2017_seed42.yaml
+++ b/Mixed_CFM/configs/ablation/b4_alldisc/cicids2017_seed42.yaml
@@ -0,0 +1,35 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicids2017_seed42_b4_alldisc
+packets_npz: /home/chy/JANUS/datasets/cicids2017/processed/packets.npz
+flows_parquet: /home/chy/JANUS/datasets/cicids2017/processed/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/cicids2017/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 42
+data_seed: 42
+train_ratio: 0.8
+benign_label: normal
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 20000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 1.0
+reference_mode: causal_packets
+device: auto
+cont_as_disc: true
+n_disc_classes: 8
--- a/Mixed_CFM/configs/ablation/b4_alldisc/cicids2017_seed43.yaml
+++ b/Mixed_CFM/configs/ablation/b4_alldisc/cicids2017_seed43.yaml
@@ -0,0 +1,35 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicids2017_seed43_b4_alldisc
+packets_npz: /home/chy/JANUS/datasets/cicids2017/processed/packets.npz
+flows_parquet: /home/chy/JANUS/datasets/cicids2017/processed/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/cicids2017/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 43
+data_seed: 43
+train_ratio: 0.8
+benign_label: normal
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 20000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 1.0
+reference_mode: causal_packets
+device: auto
+cont_as_disc: true
+n_disc_classes: 8
--- a/Mixed_CFM/configs/ablation/b4_alldisc/cicids2017_seed44.yaml
+++ b/Mixed_CFM/configs/ablation/b4_alldisc/cicids2017_seed44.yaml
@@ -0,0 +1,35 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicids2017_seed44_b4_alldisc
+packets_npz: /home/chy/JANUS/datasets/cicids2017/processed/packets.npz
+flows_parquet: /home/chy/JANUS/datasets/cicids2017/processed/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/cicids2017/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 44
+data_seed: 44
+train_ratio: 0.8
+benign_label: normal
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 20000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 1.0
+reference_mode: causal_packets
+device: auto
+cont_as_disc: true
+n_disc_classes: 8
--- a/Mixed_CFM/configs/ablation/b4_alldisc/ciciot2023_seed42.yaml
+++ b/Mixed_CFM/configs/ablation/b4_alldisc/ciciot2023_seed42.yaml
@@ -0,0 +1,37 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_ciciot2023_seed42_b4_alldisc
+source_store: /home/chy/JANUS/datasets/ciciot2023/processed/full_store
+flows_parquet: /home/chy/JANUS/datasets/ciciot2023/processed/full_store/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/ciciot2023/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 42
+data_seed: 42
+train_ratio: 0.8
+benign_label: normal
+val_cap: 10000
+attack_cap: 20000
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 20000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 1.0
+device: auto
+reference_mode: causal_packets
+cont_as_disc: true
+n_disc_classes: 8
--- a/Mixed_CFM/configs/ablation/b4_alldisc/ciciot2023_seed43.yaml
+++ b/Mixed_CFM/configs/ablation/b4_alldisc/ciciot2023_seed43.yaml
@@ -0,0 +1,37 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_ciciot2023_seed43_b4_alldisc
+source_store: /home/chy/JANUS/datasets/ciciot2023/processed/full_store
+flows_parquet: /home/chy/JANUS/datasets/ciciot2023/processed/full_store/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/ciciot2023/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 43
+data_seed: 43
+train_ratio: 0.8
+benign_label: normal
+val_cap: 10000
+attack_cap: 20000
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 20000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 1.0
+device: auto
+reference_mode: causal_packets
+cont_as_disc: true
+n_disc_classes: 8
--- a/Mixed_CFM/configs/ablation/b4_alldisc/ciciot2023_seed44.yaml
+++ b/Mixed_CFM/configs/ablation/b4_alldisc/ciciot2023_seed44.yaml
@@ -0,0 +1,37 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_ciciot2023_seed44_b4_alldisc
+source_store: /home/chy/JANUS/datasets/ciciot2023/processed/full_store
+flows_parquet: /home/chy/JANUS/datasets/ciciot2023/processed/full_store/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/ciciot2023/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 44
+data_seed: 44
+train_ratio: 0.8
+benign_label: normal
+val_cap: 10000
+attack_cap: 20000
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 20000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 1.0
+device: auto
+reference_mode: causal_packets
+cont_as_disc: true
+n_disc_classes: 8
--- a/Mixed_CFM/configs/ablation/b4_alldisc/iscxtor2016_seed42.yaml
+++ b/Mixed_CFM/configs/ablation/b4_alldisc/iscxtor2016_seed42.yaml
@@ -0,0 +1,35 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_iscxtor2016_seed42_b4_alldisc
+packets_npz: /home/chy/JANUS/datasets/iscxtor2016/processed/packets.npz
+flows_parquet: /home/chy/JANUS/datasets/iscxtor2016/processed/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/iscxtor2016/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 42
+data_seed: 42
+train_ratio: 0.8
+benign_label: nontor
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 20000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 1.0
+reference_mode: causal_packets
+device: auto
+cont_as_disc: true
+n_disc_classes: 8
--- a/Mixed_CFM/configs/ablation/b4_alldisc/iscxtor2016_seed43.yaml
+++ b/Mixed_CFM/configs/ablation/b4_alldisc/iscxtor2016_seed43.yaml
@@ -0,0 +1,35 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_iscxtor2016_seed43_b4_alldisc
+packets_npz: /home/chy/JANUS/datasets/iscxtor2016/processed/packets.npz
+flows_parquet: /home/chy/JANUS/datasets/iscxtor2016/processed/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/iscxtor2016/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 43
+data_seed: 43
+train_ratio: 0.8
+benign_label: nontor
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 20000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 1.0
+reference_mode: causal_packets
+device: auto
+cont_as_disc: true
+n_disc_classes: 8
--- a/Mixed_CFM/configs/ablation/b4_alldisc/iscxtor2016_seed44.yaml
+++ b/Mixed_CFM/configs/ablation/b4_alldisc/iscxtor2016_seed44.yaml
@@ -0,0 +1,35 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_iscxtor2016_seed44_b4_alldisc
+packets_npz: /home/chy/JANUS/datasets/iscxtor2016/processed/packets.npz
+flows_parquet: /home/chy/JANUS/datasets/iscxtor2016/processed/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/iscxtor2016/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 44
+data_seed: 44
+train_ratio: 0.8
+benign_label: nontor
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 20000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 1.0
+reference_mode: causal_packets
+device: auto
+cont_as_disc: true
+n_disc_classes: 8
--- a/Mixed_CFM/configs/ablation/b5_nodisc/cicddos2019_seed42.yaml
+++ b/Mixed_CFM/configs/ablation/b5_nodisc/cicddos2019_seed42.yaml
@@ -0,0 +1,35 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicddos2019_seed42_b5_nodisc
+source_store: /home/chy/JANUS/datasets/cicddos2019/processed/full_store
+flows_parquet: /home/chy/JANUS/datasets/cicddos2019/processed/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/cicddos2019/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 42
+data_seed: 42
+train_ratio: 0.8
+benign_label: normal
+val_cap: 20000
+attack_cap: 20000
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 10000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 0.0
+reference_mode: causal_packets
+device: auto
--- a/Mixed_CFM/configs/ablation/b5_nodisc/cicddos2019_seed43.yaml
+++ b/Mixed_CFM/configs/ablation/b5_nodisc/cicddos2019_seed43.yaml
@@ -0,0 +1,35 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicddos2019_seed43_b5_nodisc
+source_store: /home/chy/JANUS/datasets/cicddos2019/processed/full_store
+flows_parquet: /home/chy/JANUS/datasets/cicddos2019/processed/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/cicddos2019/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 43
+data_seed: 43
+train_ratio: 0.8
+benign_label: normal
+val_cap: 20000
+attack_cap: 20000
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 10000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 0.0
+reference_mode: causal_packets
+device: auto
--- a/Mixed_CFM/configs/ablation/b5_nodisc/cicddos2019_seed44.yaml
+++ b/Mixed_CFM/configs/ablation/b5_nodisc/cicddos2019_seed44.yaml
@@ -0,0 +1,35 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicddos2019_seed44_b5_nodisc
+source_store: /home/chy/JANUS/datasets/cicddos2019/processed/full_store
+flows_parquet: /home/chy/JANUS/datasets/cicddos2019/processed/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/cicddos2019/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 44
+data_seed: 44
+train_ratio: 0.8
+benign_label: normal
+val_cap: 20000
+attack_cap: 20000
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 10000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 0.0
+reference_mode: causal_packets
+device: auto
--- a/Mixed_CFM/configs/ablation/b5_nodisc/cicids2017_seed42.yaml
+++ b/Mixed_CFM/configs/ablation/b5_nodisc/cicids2017_seed42.yaml
@@ -0,0 +1,33 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicids2017_seed42_b5_nodisc
+packets_npz: /home/chy/JANUS/datasets/cicids2017/processed/packets.npz
+flows_parquet: /home/chy/JANUS/datasets/cicids2017/processed/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/cicids2017/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 42
+data_seed: 42
+train_ratio: 0.8
+benign_label: normal
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 20000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 0.0
+reference_mode: causal_packets
+device: auto
--- a/Mixed_CFM/configs/ablation/b5_nodisc/cicids2017_seed43.yaml
+++ b/Mixed_CFM/configs/ablation/b5_nodisc/cicids2017_seed43.yaml
@@ -0,0 +1,33 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicids2017_seed43_b5_nodisc
+packets_npz: /home/chy/JANUS/datasets/cicids2017/processed/packets.npz
+flows_parquet: /home/chy/JANUS/datasets/cicids2017/processed/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/cicids2017/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 43
+data_seed: 43
+train_ratio: 0.8
+benign_label: normal
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 20000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 0.0
+reference_mode: causal_packets
+device: auto
--- a/Mixed_CFM/configs/ablation/b5_nodisc/cicids2017_seed44.yaml
+++ b/Mixed_CFM/configs/ablation/b5_nodisc/cicids2017_seed44.yaml
@@ -0,0 +1,33 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicids2017_seed44_b5_nodisc
+packets_npz: /home/chy/JANUS/datasets/cicids2017/processed/packets.npz
+flows_parquet: /home/chy/JANUS/datasets/cicids2017/processed/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/cicids2017/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 44
+data_seed: 44
+train_ratio: 0.8
+benign_label: normal
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 20000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 0.0
+reference_mode: causal_packets
+device: auto
--- a/Mixed_CFM/configs/ablation/b5_nodisc/ciciot2023_seed42.yaml
+++ b/Mixed_CFM/configs/ablation/b5_nodisc/ciciot2023_seed42.yaml
@@ -0,0 +1,35 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_ciciot2023_seed42_b5_nodisc
+source_store: /home/chy/JANUS/datasets/ciciot2023/processed/full_store
+flows_parquet: /home/chy/JANUS/datasets/ciciot2023/processed/full_store/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/ciciot2023/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 42
+data_seed: 42
+train_ratio: 0.8
+benign_label: normal
+val_cap: 10000
+attack_cap: 20000
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 20000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 0.0
+device: auto
+reference_mode: causal_packets
--- a/Mixed_CFM/configs/ablation/b5_nodisc/ciciot2023_seed43.yaml
+++ b/Mixed_CFM/configs/ablation/b5_nodisc/ciciot2023_seed43.yaml
@@ -0,0 +1,35 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_ciciot2023_seed43_b5_nodisc
+source_store: /home/chy/JANUS/datasets/ciciot2023/processed/full_store
+flows_parquet: /home/chy/JANUS/datasets/ciciot2023/processed/full_store/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/ciciot2023/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 43
+data_seed: 43
+train_ratio: 0.8
+benign_label: normal
+val_cap: 10000
+attack_cap: 20000
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 20000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 0.0
+device: auto
+reference_mode: causal_packets
--- a/Mixed_CFM/configs/ablation/b5_nodisc/ciciot2023_seed44.yaml
+++ b/Mixed_CFM/configs/ablation/b5_nodisc/ciciot2023_seed44.yaml
@@ -0,0 +1,35 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_ciciot2023_seed44_b5_nodisc
+source_store: /home/chy/JANUS/datasets/ciciot2023/processed/full_store
+flows_parquet: /home/chy/JANUS/datasets/ciciot2023/processed/full_store/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/ciciot2023/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 44
+data_seed: 44
+train_ratio: 0.8
+benign_label: normal
+val_cap: 10000
+attack_cap: 20000
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 20000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 0.0
+device: auto
+reference_mode: causal_packets
--- a/Mixed_CFM/configs/ablation/b5_nodisc/iscxtor2016_seed42.yaml
+++ b/Mixed_CFM/configs/ablation/b5_nodisc/iscxtor2016_seed42.yaml
@@ -0,0 +1,33 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_iscxtor2016_seed42_b5_nodisc
+packets_npz: /home/chy/JANUS/datasets/iscxtor2016/processed/packets.npz
+flows_parquet: /home/chy/JANUS/datasets/iscxtor2016/processed/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/iscxtor2016/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 42
+data_seed: 42
+train_ratio: 0.8
+benign_label: nontor
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 20000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 0.0
+reference_mode: causal_packets
+device: auto
--- a/Mixed_CFM/configs/ablation/b5_nodisc/iscxtor2016_seed43.yaml
+++ b/Mixed_CFM/configs/ablation/b5_nodisc/iscxtor2016_seed43.yaml
@@ -0,0 +1,33 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_iscxtor2016_seed43_b5_nodisc
+packets_npz: /home/chy/JANUS/datasets/iscxtor2016/processed/packets.npz
+flows_parquet: /home/chy/JANUS/datasets/iscxtor2016/processed/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/iscxtor2016/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 43
+data_seed: 43
+train_ratio: 0.8
+benign_label: nontor
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 20000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 0.0
+reference_mode: causal_packets
+device: auto
--- a/Mixed_CFM/configs/ablation/b5_nodisc/iscxtor2016_seed44.yaml
+++ b/Mixed_CFM/configs/ablation/b5_nodisc/iscxtor2016_seed44.yaml
@@ -0,0 +1,33 @@
+save_dir: /home/chy/JANUS/artifacts/ablation/janus_iscxtor2016_seed44_b5_nodisc
+packets_npz: /home/chy/JANUS/datasets/iscxtor2016/processed/packets.npz
+flows_parquet: /home/chy/JANUS/datasets/iscxtor2016/processed/flows.parquet
+flow_features_path: /home/chy/JANUS/datasets/iscxtor2016/processed/flow_features.parquet
+flow_features_align: auto
+T: 64
+n_train: 10000
+min_len: 2
+seed: 44
+data_seed: 44
+train_ratio: 0.8
+benign_label: nontor
+d_model: 128
+n_layers: 4
+n_heads: 4
+mlp_ratio: 4.0
+time_dim: 64
+token_dim: null
+batch_size: 256
+num_workers: 0
+epochs: 50
+lr: 0.0003
+weight_decay: 0.01
+grad_clip: 1.0
+eval_every: 10
+eval_n: 20000
+eval_batch_size: 512
+eval_n_steps: 8
+sigma: 0.1
+use_ot: true
+lambda_disc: 0.0
+reference_mode: causal_packets
+device: auto
--- a/Mixed_CFM/eval_cross.py
+++ b/Mixed_CFM/eval_cross.py
@@ -20,7 +20,7 @@ def _device(arg: str) -> torch.device:
        return torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    return torch.device(arg)

-def _score_batch(model, flow_z, cont_z, disc_int, lens, device, batch_size=256, n_steps=16):
+def _score_batch(model, flow_z, cont_z, disc_int, lens, device, batch_size=256, n_steps=16, cont_bin_edges=None):
    out: dict[str, list[np.ndarray]] = {}
    for start in range(0, len(flow_z), batch_size):
        sl = slice(start, start + batch_size)
@@ -29,8 +29,8 @@ def _score_batch(model, flow_z, cont_z, disc_int, lens, device, batch_size=256,
        d = torch.from_numpy(disc_int[sl]).long().to(device)
        l = torch.from_numpy(lens[sl]).long().to(device)
        with torch.no_grad():
-            traj = model.trajectory_metrics(f, c, d, l, n_steps=n_steps)
-            nll = model.disc_nll_score(f, c, d, l)
+            traj = model.trajectory_metrics(f, c, d, l, n_steps=n_steps, cont_bin_edges=cont_bin_edges)
+            nll = model.disc_nll_score(f, c, d, l, cont_bin_edges=cont_bin_edges)
        for src in (traj, nll):
            for (k, v) in src.items():
                out.setdefault(k, []).append(v.detach().cpu().numpy())
@@ -63,6 +63,10 @@ def main() -> None:
    model = MixedTokenCFM(model_cfg).to(device)
    model.load_state_dict(ckpt['model_state_dict'])
    model.eval()
+    cont_bin_edges = None
+    if 'cont_bin_edges' in ckpt:
+        cont_bin_edges = torch.from_numpy(np.asarray(ckpt['cont_bin_edges'])).to(device)
+        print(f'[model] cont_bin_edges shape={tuple(cont_bin_edges.shape)} (B4 mode; src edges applied to target)')
    cont_mean = np.asarray(ckpt['cont_mean'], dtype=np.float32)
    cont_std = np.asarray(ckpt['cont_std'], dtype=np.float32)
    flow_mean = np.asarray(ckpt['flow_mean'], dtype=np.float32)
@@ -140,11 +144,11 @@ def main() -> None:
    a_flow_z = ((a_flow - flow_mean) / np.maximum(flow_std, 1e-06)).astype(np.float32)
    t0 = time.time()
    print('[eval] benign...')
-    b_scores = _score_batch(model, b_flow_z, b_cont, b_disc, b_len, device, batch_size=args.batch_size, n_steps=args.n_steps)
+    b_scores = _score_batch(model, b_flow_z, b_cont, b_disc, b_len, device, batch_size=args.batch_size, n_steps=args.n_steps, cont_bin_edges=cont_bin_edges)
    print(f'[eval] benign done {time.time() - t0:.1f}s')
    t0 = time.time()
    print('[eval] attack...')
-    a_scores = _score_batch(model, a_flow_z, a_cont, a_disc, a_len, device, batch_size=args.batch_size, n_steps=args.n_steps)
+    a_scores = _score_batch(model, a_flow_z, a_cont, a_disc, a_len, device, batch_size=args.batch_size, n_steps=args.n_steps, cont_bin_edges=cont_bin_edges)
    print(f'[eval] attack done {time.time() - t0:.1f}s')
    keys = sorted(set(b_scores) & set(a_scores))
    overall = {}
--- a/Mixed_CFM/eval_phase1.py
+++ b/Mixed_CFM/eval_phase1.py
@@ -18,7 +18,7 @@ def _device(arg: str) -> torch.device:
        return torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    return torch.device(arg)

-def _score_batch(model: MixedTokenCFM, flow_np: np.ndarray, cont_np: np.ndarray, disc_np: np.ndarray, len_np: np.ndarray, device: torch.device, *, batch_size: int, n_steps: int) -> dict[str, np.ndarray]:
+def _score_batch(model: MixedTokenCFM, flow_np: np.ndarray, cont_np: np.ndarray, disc_np: np.ndarray, len_np: np.ndarray, device: torch.device, *, batch_size: int, n_steps: int, cont_bin_edges: torch.Tensor | None = None) -> dict[str, np.ndarray]:
    out: dict[str, list[np.ndarray]] = {}
    for start in range(0, len(flow_np), batch_size):
        sl = slice(start, start + batch_size)
@@ -27,8 +27,8 @@ def _score_batch(model: MixedTokenCFM, flow_np: np.ndarray, cont_np: np.ndarray,
        disc = torch.from_numpy(disc_np[sl]).long().to(device)
        lens = torch.from_numpy(len_np[sl]).long().to(device)
        with torch.no_grad():
-            traj = model.trajectory_metrics(flow, cont, disc, lens, n_steps=n_steps)
-            nll = model.disc_nll_score(flow, cont, disc, lens)
+            traj = model.trajectory_metrics(flow, cont, disc, lens, n_steps=n_steps, cont_bin_edges=cont_bin_edges)
+            nll = model.disc_nll_score(flow, cont, disc, lens, cont_bin_edges=cont_bin_edges)
        for d in (traj, nll):
            for (k, v) in d.items():
                out.setdefault(k, []).append(v.detach().cpu().numpy())
@@ -65,7 +65,11 @@ def main() -> None:
    model = MixedTokenCFM(model_cfg).to(device)
    model.load_state_dict(ckpt['model_state_dict'])
    model.eval()
-    print(f'[model] T={model_cfg.T} flow_dim={model_cfg.flow_dim}')
+    cont_bin_edges = None
+    if 'cont_bin_edges' in ckpt:
+        cont_bin_edges = torch.from_numpy(np.asarray(ckpt['cont_bin_edges'])).to(device)
+        print(f'[model] cont_bin_edges shape={tuple(cont_bin_edges.shape)} (B4 mode)')
+    print(f'[model] T={model_cfg.T} flow_dim={model_cfg.flow_dim} use_flow_token={model_cfg.use_flow_token} n_packet_tokens={model_cfg.n_packet_tokens} disc_as_cont={model_cfg.disc_as_cont} cont_as_disc={model_cfg.cont_as_disc}')
    data = load_mixed_data(packets_npz=Path(cfg['packets_npz']) if cfg.get('packets_npz') else None, source_store=Path(cfg['source_store']) if cfg.get('source_store') else None, flows_parquet=Path(cfg['flows_parquet']), flow_features_path=Path(cfg['flow_features_path']), flow_features_align=str(cfg.get('flow_features_align', 'auto')), T=int(cfg['T']), split_seed=int(cfg.get('data_seed', cfg.get('seed', 42))), train_ratio=float(cfg.get('train_ratio', 0.8)), benign_label=str(cfg.get('benign_label', 'normal')), min_len=int(cfg.get('min_len', 2)), attack_cap=int(cfg['attack_cap']) if cfg.get('attack_cap') else None, val_cap=int(cfg['val_cap']) if cfg.get('val_cap') else None)
    print(f'[data] val={len(data.val_flow):,} attack={len(data.attack_flow):,}')
    rng = np.random.default_rng(0)
@@ -81,10 +85,10 @@ def main() -> None:
        atk_labels = atk_labels[idx]
    print(f'[eval] scoring val={len(val_flow):,} atk={len(atk_flow):,}')
    t0 = time.time()
-    val = _score_batch(model, val_flow, val_cont, val_disc, val_len, device, batch_size=args.batch_size, n_steps=args.n_steps)
+    val = _score_batch(model, val_flow, val_cont, val_disc, val_len, device, batch_size=args.batch_size, n_steps=args.n_steps, cont_bin_edges=cont_bin_edges)
    print(f'[eval] val done {time.time() - t0:.1f}s')
    t0 = time.time()
-    atk = _score_batch(model, atk_flow, atk_cont, atk_disc, atk_len, device, batch_size=args.batch_size, n_steps=args.n_steps)
+    atk = _score_batch(model, atk_flow, atk_cont, atk_disc, atk_len, device, batch_size=args.batch_size, n_steps=args.n_steps, cont_bin_edges=cont_bin_edges)
    print(f'[eval] atk done {time.time() - t0:.1f}s')
    keys = sorted(set(val) & set(atk))
    overall: dict[str, dict[str, float]] = {}
--- a/Mixed_CFM/model.py
+++ b/Mixed_CFM/model.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 import math
-from dataclasses import dataclass, field
+from dataclasses import dataclass
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
@@ -19,6 +19,7 @@ AdaLNBlock = _unified.AdaLNBlock
 SinusoidalTimeEmb = _unified.SinusoidalTimeEmb
 _sinkhorn_coupling = _unified._sinkhorn_coupling

+
@dataclass
 class MixedCFMConfig:
    T: int = 64
@@ -40,6 +41,11 @@ class MixedCFMConfig:
    lambda_disc: float = 1.0
    disc_path: str = 'uniform'
    disc_embed_scale: float = 1.0
+    # ---- B-group ablation flags (defaults preserve JANUS-full behavior) ----
+    use_flow_token: bool = True       # B1: False removes the [FLOW] token
+    n_packet_tokens: int = -1         # B2: 0 removes packet tokens entirely; -1 = use cfg.T
+    disc_as_cont: bool = False        # B3: feed 6 disc bits through CFM head as continuous values
+    cont_as_disc: bool = False        # B4: quantize 3 cont channels into n_disc_classes bins (mask-pred only)

    def __post_init__(self) -> None:
        if len(self.cont_pkt_idx) != self.n_cont_pkt:
@@ -48,10 +54,13 @@ class MixedCFMConfig:
            raise ValueError('disc_pkt_idx length mismatch n_disc_pkt')
        if self.disc_path != 'uniform':
            raise NotImplementedError(f'disc_path={self.disc_path}')
+        if self.disc_as_cont and self.cont_as_disc:
+            raise ValueError('disc_as_cont and cont_as_disc are mutually exclusive')
+

 class MixedVelocity(nn.Module):

-    def __init__(self, token_dim: int, seq_len: int, n_disc: int, n_classes: int, d_model: int=128, n_layers: int=4, n_heads: int=4, mlp_ratio: float=4.0, time_dim: int=64, reference_mode: str | None=None) -> None:
+    def __init__(self, token_dim: int, seq_len: int, n_disc: int, n_classes: int, d_model: int=128, n_layers: int=4, n_heads: int=4, mlp_ratio: float=4.0, time_dim: int=64, reference_mode: str | None=None, has_flow_token: bool=True) -> None:
        super().__init__()
        if reference_mode not in (None, 'causal_packets', 'causal_all'):
            raise ValueError(f'reference_mode={reference_mode!r}')
@@ -60,6 +69,7 @@ class MixedVelocity(nn.Module):
        self.n_disc = n_disc
        self.n_classes = n_classes
        self.reference_mode = reference_mode
+        self.has_flow_token = has_flow_token
        self.input_proj = nn.Linear(token_dim, d_model)
        self.pos_emb = nn.Parameter(torch.zeros(1, seq_len, d_model))
        self.type_emb = nn.Embedding(2, d_model)
@@ -70,12 +80,15 @@ class MixedVelocity(nn.Module):
        self.blocks = nn.ModuleList([AdaLNBlock(d_model, n_heads, mlp_ratio, cond_dim=d_model) for _ in range(n_layers)])
        self.out_norm = nn.LayerNorm(d_model, elementwise_affine=False)
        self.head_v = nn.Linear(d_model, token_dim)
-        self.head_disc = nn.Linear(d_model, n_disc * n_classes)
+        # head_disc only meaningful when n_disc > 0
+        out_disc = max(n_disc * n_classes, 1)
+        self.head_disc = nn.Linear(d_model, out_disc)
        for layer in (self.head_v, self.head_disc):
            nn.init.zeros_(layer.weight)
            nn.init.zeros_(layer.bias)
        type_ids = torch.ones(seq_len, dtype=torch.long)
-        type_ids[0] = 0
+        if has_flow_token and seq_len >= 1:
+            type_ids[0] = 0
        self.register_buffer('type_ids', type_ids, persistent=False)

    def _attn_mask(self, L: int, device: torch.device) -> torch.Tensor | None:
@@ -83,8 +96,11 @@ class MixedVelocity(nn.Module):
            return None
        if self.reference_mode == 'causal_packets':
            mask = torch.zeros((L, L), dtype=torch.bool, device=device)
-            if L > 1:
-                mask[1:, 1:] = torch.triu(torch.ones(L - 1, L - 1, dtype=torch.bool, device=device), diagonal=1)
+            offset = 1 if self.has_flow_token else 0
+            if L > offset:
+                M = L - offset
+                if M > 1:
+                    mask[offset:, offset:] = torch.triu(torch.ones(M, M, dtype=torch.bool, device=device), diagonal=1)
            return mask
        return torch.triu(torch.ones(L, L, dtype=torch.bool, device=device), diagonal=1)

@@ -100,143 +116,339 @@ class MixedVelocity(nn.Module):
            h = block(h, cond, key_padding_mask, attn_mask=attn_mask)
        h = self.out_norm(h)
        v = self.head_v(h)
-        d = self.head_disc(h).view(B, L, self.n_disc, self.n_classes)
+        if self.n_disc > 0:
+            d = self.head_disc(h).view(B, L, self.n_disc, self.n_classes)
+        else:
+            d = h.new_zeros((B, L, 0, self.n_classes))
        return (v, d)

+
 class MixedTokenCFM(nn.Module):

    def __init__(self, cfg: MixedCFMConfig) -> None:
        super().__init__()
        self.cfg = cfg
-        cont_size = cfg.n_cont_pkt + cfg.n_disc_pkt
+        # Effective packet count (B2: n_packet_tokens=0 → no packets)
+        self.eff_T = cfg.T if cfg.n_packet_tokens < 0 else int(cfg.n_packet_tokens)
+        if not cfg.use_flow_token and self.eff_T == 0:
+            raise ValueError('cannot disable both FLOW token and packet tokens')
+        # Effective per-packet feature split
+        if cfg.disc_as_cont:
+            # B3: 9 cont, 0 disc (CFM head only)
+            self.eff_n_cont = cfg.n_cont_pkt + cfg.n_disc_pkt
+            self.eff_n_disc = 0
+        elif cfg.cont_as_disc:
+            # B4: 0 cont, 9 disc (mask-pred head only)
+            self.eff_n_cont = 0
+            self.eff_n_disc = cfg.n_cont_pkt + cfg.n_disc_pkt
+        else:
+            self.eff_n_cont = cfg.n_cont_pkt
+            self.eff_n_disc = cfg.n_disc_pkt
+        cont_size = self.eff_n_cont + self.eff_n_disc
+        # Token layout: [type_flag(1) | flow_dim or cont_size]
        self.token_dim = cfg.token_dim or 1 + max(cfg.flow_dim, cont_size)
        if self.token_dim < 1 + max(cfg.flow_dim, cont_size):
            raise ValueError('token_dim too small')
-        self.seq_len = cfg.T + 1
-        self.velocity = MixedVelocity(token_dim=self.token_dim, seq_len=self.seq_len, n_disc=cfg.n_disc_pkt, n_classes=cfg.n_disc_classes, d_model=cfg.d_model, n_layers=cfg.n_layers, n_heads=cfg.n_heads, mlp_ratio=cfg.mlp_ratio, time_dim=cfg.time_dim, reference_mode=cfg.reference_mode)
+        self.seq_len = (1 if cfg.use_flow_token else 0) + self.eff_T
+        self.velocity = MixedVelocity(
+            token_dim=self.token_dim, seq_len=self.seq_len,
+            n_disc=self.eff_n_disc, n_classes=cfg.n_disc_classes,
+            d_model=cfg.d_model, n_layers=cfg.n_layers, n_heads=cfg.n_heads,
+            mlp_ratio=cfg.mlp_ratio, time_dim=cfg.time_dim,
+            reference_mode=cfg.reference_mode, has_flow_token=cfg.use_flow_token,
+        )

+    # ------------------------------------------------------------------ #
+    # token assembly                                                     #
+    # ------------------------------------------------------------------ #
    def _embed_disc(self, x_disc_int: torch.Tensor) -> torch.Tensor:
+        n = self.cfg.n_disc_classes
        s = self.cfg.disc_embed_scale
-        return (x_disc_int.float() - 0.5) * s
+        if n <= 1:
+            return x_disc_int.float() * 0.0
+        # Map integers in [0, n-1] to centered floats in [-s/2, +s/2].
+        # Backwards-compatible with old (x - 0.5)*s formula when n=2.
+        return (x_disc_int.float() / (n - 1) - 0.5) * s
+
+    def _flow_dim(self) -> int:
+        return self.cfg.flow_dim

    def build_tokens(self, flow: torch.Tensor, packets_cont: torch.Tensor, x_disc_t_int: torch.Tensor) -> torch.Tensor:
-        (B, T, Cp) = packets_cont.shape
-        assert T == self.cfg.T and Cp == self.cfg.n_cont_pkt
-        z = packets_cont.new_zeros((B, T + 1, self.token_dim))
-        z[:, 0, 0] = -1.0
-        z[:, 0, 1:1 + self.cfg.flow_dim] = flow
-        z[:, 1:, 0] = 1.0
-        z[:, 1:, 1:1 + self.cfg.n_cont_pkt] = packets_cont
-        z[:, 1:, 1 + self.cfg.n_cont_pkt:1 + self.cfg.n_cont_pkt + self.cfg.n_disc_pkt] = self._embed_disc(x_disc_t_int)
+        """Assemble [B, seq_len, token_dim].
+
+        packets_cont: [B, eff_T, eff_n_cont] (may be empty in last dim)
+        x_disc_t_int: [B, eff_T, eff_n_disc] integer ids in [0, n_disc_classes-1]
+        """
+        B = flow.shape[0]
+        device = flow.device
+        T = self.eff_T
+        z = flow.new_zeros((B, self.seq_len, self.token_dim))
+        cur = 0
+        if self.cfg.use_flow_token:
+            z[:, 0, 0] = -1.0  # type flag
+            z[:, 0, 1:1 + self._flow_dim()] = flow
+            cur = 1
+        if T > 0:
+            z[:, cur:cur + T, 0] = 1.0  # type flag
+            base = 1
+            if self.eff_n_cont > 0:
+                z[:, cur:cur + T, base:base + self.eff_n_cont] = packets_cont
+                base += self.eff_n_cont
+            if self.eff_n_disc > 0:
+                z[:, cur:cur + T, base:base + self.eff_n_disc] = self._embed_disc(x_disc_t_int)
        return z

    def key_padding_mask(self, lens: torch.Tensor) -> torch.Tensor:
        B = lens.shape[0]
-        idx = torch.arange(self.cfg.T, device=lens.device)[None, :]
-        packet_real = idx < lens[:, None]
-        real = torch.cat([torch.ones(B, 1, dtype=torch.bool, device=lens.device), packet_real], dim=1)
+        device = lens.device
+        T = self.eff_T
+        pieces = []
+        if self.cfg.use_flow_token:
+            pieces.append(torch.ones(B, 1, dtype=torch.bool, device=device))
+        if T > 0:
+            idx = torch.arange(T, device=device)[None, :]
+            pieces.append(idx < lens[:, None])
+        real = torch.cat(pieces, dim=1) if pieces else torch.ones(B, 0, dtype=torch.bool, device=device)
        return ~real

    def _loss_mask(self, lens: torch.Tensor) -> torch.Tensor:
        return (~self.key_padding_mask(lens)).float()

-    def compute_loss(self, flow: torch.Tensor, packets_cont: torch.Tensor, packets_disc: torch.Tensor, lens: torch.Tensor, *, return_components: bool=False) -> torch.Tensor | dict[str, torch.Tensor]:
-        (B, T, _) = packets_cont.shape
-        device = packets_cont.device
+    # ------------------------------------------------------------------ #
+    # B4 helper: quantize cont -> integer bins                           #
+    # ------------------------------------------------------------------ #
+    def quantize_cont(self, packets_cont: torch.Tensor, bin_edges: torch.Tensor) -> torch.Tensor:
+        """packets_cont [B, T, n_cont_orig] (already z-scored); bin_edges [n_cont_orig, n_classes-1]
+        returns int64 [B, T, n_cont_orig] in [0, n_classes-1]."""
+        B, T, C = packets_cont.shape
+        out = torch.zeros((B, T, C), dtype=torch.long, device=packets_cont.device)
+        for c in range(C):
+            edges = bin_edges[c]  # [n_classes-1]
+            # bucketize: returns 0..n for n edges
+            out[:, :, c] = torch.bucketize(packets_cont[:, :, c].contiguous(), edges)
+        out.clamp_(0, self.cfg.n_disc_classes - 1)
+        return out
+
+    # ------------------------------------------------------------------ #
+    # Loss                                                               #
+    # ------------------------------------------------------------------ #
+    def compute_loss(self, flow: torch.Tensor, packets_cont: torch.Tensor, packets_disc: torch.Tensor, lens: torch.Tensor, *, return_components: bool=False, cont_bin_edges: torch.Tensor | None=None) -> torch.Tensor | dict[str, torch.Tensor]:
+        cfg = self.cfg
+        B = flow.shape[0]
+        T = self.eff_T
+        device = flow.device
+
+        # Resolve effective cont/disc tensors per ablation mode
+        if cfg.disc_as_cont:
+            # 9 cont = original 3 cont + 6 disc-as-float
+            disc_as_cont_float = self._embed_disc(packets_disc) if T > 0 else None
+            if T > 0:
+                eff_cont = torch.cat([packets_cont, disc_as_cont_float], dim=-1) if cfg.n_cont_pkt > 0 else disc_as_cont_float
+            else:
+                eff_cont = packets_cont.new_zeros((B, 0, 0))
+            eff_disc_int = torch.zeros((B, T, 0), dtype=torch.long, device=device)
+        elif cfg.cont_as_disc:
+            # 0 cont, 9 disc: quantize cont via supplied bin_edges
+            if T > 0:
+                if cont_bin_edges is None:
+                    raise ValueError('cont_as_disc requires cont_bin_edges')
+                cont_int = self.quantize_cont(packets_cont, cont_bin_edges)
+                eff_disc_int = torch.cat([cont_int, packets_disc.long()], dim=-1)
+            else:
+                eff_disc_int = torch.zeros((B, 0, self.eff_n_disc), dtype=torch.long, device=device)
+            eff_cont = flow.new_zeros((B, T, 0))
+        else:
+            eff_cont = packets_cont if T > 0 else packets_cont.new_zeros((B, 0, cfg.n_cont_pkt))
+            eff_disc_int = packets_disc.long() if T > 0 else torch.zeros((B, 0, cfg.n_disc_pkt), dtype=torch.long, device=device)
+
+        # Build x_1 (data tokens; mask-pred path uses zero ids for disc at packet positions during CFM regression)
+        zero_disc = torch.zeros_like(eff_disc_int)
+        x_1_cont = self.build_tokens(flow, eff_cont, zero_disc)
+
        mask = self._loss_mask(lens)
        kpm = mask == 0
-        x_1_cont = self.build_tokens(flow, packets_cont, torch.zeros_like(packets_disc))
+
        x_0_cont = torch.randn_like(x_1_cont)
-        if self.cfg.use_ot:
+
+        if cfg.use_ot:
            flat0 = (x_0_cont * mask[:, :, None]).reshape(B, -1)
            flat1 = (x_1_cont * mask[:, :, None]).reshape(B, -1)
            col = _sinkhorn_coupling(torch.cdist(flat0.float(), flat1.float()))
            x_1_cont = x_1_cont[col]
-            packets_cont = packets_cont[col]
+            eff_cont = eff_cont[col] if eff_cont.numel() > 0 else eff_cont
+            eff_disc_int = eff_disc_int[col] if eff_disc_int.numel() > 0 else eff_disc_int
            packets_disc = packets_disc[col]
            flow = flow[col]
            lens = lens[col]
            mask = self._loss_mask(lens)
            kpm = mask == 0
+
        t = torch.rand(B, device=device)
        x_t_cont = (1.0 - t[:, None, None]) * x_0_cont + t[:, None, None] * x_1_cont
-        if self.cfg.sigma > 0:
-            std = self.cfg.sigma * torch.sqrt(t * (1.0 - t))[:, None, None]
+        if cfg.sigma > 0:
+            std = cfg.sigma * torch.sqrt(t * (1.0 - t))[:, None, None]
            x_t_cont = x_t_cont + std * torch.randn_like(x_t_cont)
        target_cont = x_1_cont - x_0_cont
-        u = torch.rand(B, T, self.cfg.n_disc_pkt, device=device)
-        keep = u < t[:, None, None]
-        rand_disc = torch.randint(0, self.cfg.n_disc_classes, packets_disc.shape, device=device)
-        x_disc_t = torch.where(keep, packets_disc, rand_disc)
-        disc_start = 1 + self.cfg.n_cont_pkt
-        x_t_full = x_t_cont.clone()
-        x_t_full[:, 1:, disc_start:disc_start + self.cfg.n_disc_pkt] = self._embed_disc(x_disc_t)
+
+        # Disc corruption schedule (mask-pred): keep fraction t of true labels
+        if T > 0 and self.eff_n_disc > 0:
+            u = torch.rand(B, T, self.eff_n_disc, device=device)
+            keep = u < t[:, None, None]
+            rand_disc = torch.randint(0, cfg.n_disc_classes, eff_disc_int.shape, device=device)
+            x_disc_t = torch.where(keep, eff_disc_int, rand_disc)
+            disc_start = (1 if cfg.use_flow_token else 0) + 0  # placeholder; overwritten below
+            # Where in x_t_full do disc embeds go?
+            # Within each packet token: [type(1) | cont(eff_n_cont) | disc(eff_n_disc) | pad...]
+            disc_start_in_token = 1 + self.eff_n_cont
+            cur_offset = 1 if cfg.use_flow_token else 0
+            x_t_full = x_t_cont.clone()
+            x_t_full[:, cur_offset:cur_offset + T, disc_start_in_token:disc_start_in_token + self.eff_n_disc] = self._embed_disc(x_disc_t)
+        else:
+            x_t_full = x_t_cont
+            x_disc_t = eff_disc_int  # unused
+            keep = None
+
        (v_pred, d_logits) = self.velocity(x_t_full, t, key_padding_mask=kpm)
+
+        # CFM regression loss on cont slots (mask out disc slots)
        v_err = (v_pred - target_cont).square()
-        v_err[:, :, disc_start:disc_start + self.cfg.n_disc_pkt] = 0.0
+        if T > 0 and self.eff_n_disc > 0:
+            disc_start_in_token = 1 + self.eff_n_cont
+            cur_offset = 1 if cfg.use_flow_token else 0
+            v_err[:, cur_offset:cur_offset + T, disc_start_in_token:disc_start_in_token + self.eff_n_disc] = 0.0
        v_per_token = v_err.mean(dim=-1)
        per_sample = (v_per_token * mask).sum(dim=-1) / mask.sum(dim=-1).clamp_min(1.0)
        L_cont = per_sample.mean()
-        pkt_logits = d_logits[:, 1:]
-        pkt_real = mask[:, 1:].bool()
-        corrupt = ~keep & pkt_real[:, :, None]
-        flat_logits = pkt_logits.reshape(-1, self.cfg.n_disc_classes)
-        flat_targets = packets_disc.reshape(-1).long()
-        flat_ce = F.cross_entropy(flat_logits, flat_targets, reduction='none')
-        flat_ce = flat_ce.view(B, T, self.cfg.n_disc_pkt)
-        flat_ce = flat_ce * corrupt.float()
-        denom = corrupt.float().sum().clamp_min(1.0)
-        L_disc = flat_ce.sum() / denom
-        total = L_cont + self.cfg.lambda_disc * L_disc
+
+        # Mask-pred CE on corrupted disc positions
+        if T > 0 and self.eff_n_disc > 0 and keep is not None:
+            cur_offset = 1 if cfg.use_flow_token else 0
+            pkt_logits = d_logits[:, cur_offset:cur_offset + T]
+            pkt_real = mask[:, cur_offset:cur_offset + T].bool()
+            corrupt = ~keep & pkt_real[:, :, None]
+            flat_logits = pkt_logits.reshape(-1, cfg.n_disc_classes)
+            flat_targets = eff_disc_int.reshape(-1).long()
+            flat_ce = F.cross_entropy(flat_logits, flat_targets, reduction='none')
+            flat_ce = flat_ce.view(B, T, self.eff_n_disc)
+            flat_ce = flat_ce * corrupt.float()
+            denom = corrupt.float().sum().clamp_min(1.0)
+            L_disc = flat_ce.sum() / denom
+        else:
+            L_disc = L_cont.new_zeros(())
+
+        total = L_cont + cfg.lambda_disc * L_disc
        if return_components:
-            return {'total': total, 'main': L_cont.detach(), 'aux_disc': L_disc.detach(), 'aux_flow': L_cont.new_zeros(()), 'aux_packet': L_cont.new_zeros(())}
+            return {'total': total, 'main': L_cont.detach(), 'aux_disc': L_disc.detach(),
+                    'aux_flow': L_cont.new_zeros(()), 'aux_packet': L_cont.new_zeros(())}
        return total

+    # ------------------------------------------------------------------ #
+    # Scoring                                                            #
+    # ------------------------------------------------------------------ #
    @torch.no_grad()
-    def trajectory_metrics(self, flow: torch.Tensor, packets_cont: torch.Tensor, packets_disc: torch.Tensor, lens: torch.Tensor, n_steps: int=16) -> dict[str, torch.Tensor]:
-        z = self.build_tokens(flow, packets_cont, packets_disc)
+    def trajectory_metrics(self, flow: torch.Tensor, packets_cont: torch.Tensor, packets_disc: torch.Tensor, lens: torch.Tensor, n_steps: int=16, cont_bin_edges: torch.Tensor | None=None) -> dict[str, torch.Tensor]:
+        cfg = self.cfg
+        B = flow.shape[0]
+        T = self.eff_T
+
+        # Build effective cont / disc tensors per ablation mode
+        if cfg.disc_as_cont:
+            disc_float = self._embed_disc(packets_disc) if T > 0 else None
+            if T > 0:
+                eff_cont = torch.cat([packets_cont, disc_float], dim=-1) if cfg.n_cont_pkt > 0 else disc_float
+            else:
+                eff_cont = packets_cont.new_zeros((B, 0, 0))
+            eff_disc_int = torch.zeros((B, T, 0), dtype=torch.long, device=flow.device)
+        elif cfg.cont_as_disc:
+            if T > 0:
+                if cont_bin_edges is None:
+                    raise ValueError('cont_as_disc requires cont_bin_edges at scoring time')
+                cont_int = self.quantize_cont(packets_cont, cont_bin_edges)
+                eff_disc_int = torch.cat([cont_int, packets_disc.long()], dim=-1)
+            else:
+                eff_disc_int = torch.zeros((B, 0, 0), dtype=torch.long, device=flow.device)
+            eff_cont = flow.new_zeros((B, T, 0))
+        else:
+            eff_cont = packets_cont if T > 0 else packets_cont.new_zeros((B, 0, cfg.n_cont_pkt))
+            eff_disc_int = packets_disc.long() if T > 0 else torch.zeros((B, 0, cfg.n_disc_pkt), dtype=torch.long, device=flow.device)
+
+        z = self.build_tokens(flow, eff_cont, eff_disc_int)
        mask = self._loss_mask(lens)
        kpm = mask == 0
-        B = z.shape[0]
        dt = 1.0 / n_steps
-        disc_start = 1 + self.cfg.n_cont_pkt
-        disc_end = disc_start + self.cfg.n_disc_pkt
-        disc_embed = z[:, 1:, disc_start:disc_end].clone()
+
+        # Disc embed slot bounds (within token vector) for "freeze disc during ODE"
+        cur_offset = 1 if cfg.use_flow_token else 0
+        disc_start_in_token = 1 + self.eff_n_cont
+        disc_end_in_token = disc_start_in_token + self.eff_n_disc
+        if self.eff_n_disc > 0 and T > 0:
+            disc_embed = z[:, cur_offset:cur_offset + T, disc_start_in_token:disc_end_in_token].clone()
+        else:
+            disc_embed = None
+
        for k in range(n_steps):
            t_val = 1.0 - k * dt
            t = torch.full((B,), t_val, device=z.device)
            (v, _) = self.velocity(z, t, key_padding_mask=kpm)
-            v[:, :, disc_start:disc_end] = 0.0
+            if self.eff_n_disc > 0 and T > 0:
+                v[:, cur_offset:cur_offset + T, disc_start_in_token:disc_end_in_token] = 0.0
            z = z - v * dt
-            z[:, 1:, disc_start:disc_end] = disc_embed
+            if disc_embed is not None:
+                z[:, cur_offset:cur_offset + T, disc_start_in_token:disc_end_in_token] = disc_embed
+
+        # Compute terminal-norm scores. Zero out the discrete embed slots so they don't pollute.
        z_real = z * mask[:, :, None]
        z_cont = z_real.clone()
-        z_cont[:, 1:, disc_start:disc_end] = 0.0
-        packet_count = mask[:, 1:].sum(dim=-1).clamp_min(1.0)
-        terminal = z_cont.reshape(B, -1).norm(dim=-1) / (mask.sum(dim=-1) * self.token_dim).clamp_min(1.0).sqrt()
-        terminal_flow = z_cont[:, 0].norm(dim=-1) / math.sqrt(self.token_dim)
-        terminal_packet = (z_cont[:, 1:] * mask[:, 1:, None]).reshape(B, -1).norm(dim=-1) / (packet_count * self.token_dim).sqrt()
-        return {'terminal_norm': terminal, 'terminal_flow': terminal_flow, 'terminal_packet': terminal_packet}
+        if self.eff_n_disc > 0 and T > 0:
+            z_cont[:, cur_offset:cur_offset + T, disc_start_in_token:disc_end_in_token] = 0.0
+
+        full_norm = z_cont.reshape(B, -1).norm(dim=-1) / (mask.sum(dim=-1) * self.token_dim).clamp_min(1.0).sqrt()
+        out = {'terminal_norm': full_norm}
+        if cfg.use_flow_token:
+            out['terminal_flow'] = z_cont[:, 0].norm(dim=-1) / math.sqrt(self.token_dim)
+        if T > 0:
+            packet_count = mask[:, cur_offset:cur_offset + T].sum(dim=-1).clamp_min(1.0)
+            out['terminal_packet'] = (z_cont[:, cur_offset:cur_offset + T] * mask[:, cur_offset:cur_offset + T, None]).reshape(B, -1).norm(dim=-1) / (packet_count * self.token_dim).sqrt()
+        return out

    @torch.no_grad()
-    def disc_nll_score(self, flow: torch.Tensor, packets_cont: torch.Tensor, packets_disc: torch.Tensor, lens: torch.Tensor, t_eval: float=0.5) -> dict[str, torch.Tensor]:
-        (B, T, _) = packets_cont.shape
-        device = packets_cont.device
+    def disc_nll_score(self, flow: torch.Tensor, packets_cont: torch.Tensor, packets_disc: torch.Tensor, lens: torch.Tensor, t_eval: float=0.5, cont_bin_edges: torch.Tensor | None=None) -> dict[str, torch.Tensor]:
+        cfg = self.cfg
+        B = flow.shape[0]
+        T = self.eff_T
+        device = flow.device
+        if T == 0 or self.eff_n_disc == 0:
+            return {}  # no disc head to score
+
+        # Build effective disc int per mode
+        if cfg.cont_as_disc:
+            if cont_bin_edges is None:
+                raise ValueError('cont_as_disc requires cont_bin_edges at scoring time')
+            cont_int = self.quantize_cont(packets_cont, cont_bin_edges)
+            eff_disc_int = torch.cat([cont_int, packets_disc.long()], dim=-1)
+            eff_cont = flow.new_zeros((B, T, 0))
+            ch_idx_list = list(cfg.cont_pkt_idx) + list(cfg.disc_pkt_idx)
+        else:
+            eff_disc_int = packets_disc.long()
+            eff_cont = packets_cont
+            ch_idx_list = list(cfg.disc_pkt_idx)
+
        mask = self._loss_mask(lens)
        kpm = mask == 0
-        z = self.build_tokens(flow, packets_cont, packets_disc)
+        z = self.build_tokens(flow, eff_cont, eff_disc_int)
        t = torch.full((B,), float(t_eval), device=device)
        (_, d_logits) = self.velocity(z, t, key_padding_mask=kpm)
-        pkt_logits = d_logits[:, 1:]
-        flat_logits = pkt_logits.reshape(-1, self.cfg.n_disc_classes)
-        flat_targets = packets_disc.reshape(-1).long()
+        cur_offset = 1 if cfg.use_flow_token else 0
+        pkt_logits = d_logits[:, cur_offset:cur_offset + T]
+        flat_logits = pkt_logits.reshape(-1, cfg.n_disc_classes)
+        flat_targets = eff_disc_int.reshape(-1).long()
        ce = F.cross_entropy(flat_logits, flat_targets, reduction='none')
-        ce = ce.view(B, T, self.cfg.n_disc_pkt)
-        pkt_real = mask[:, 1:].bool().float()
+        ce = ce.view(B, T, self.eff_n_disc)
+        pkt_real = mask[:, cur_offset:cur_offset + T].bool().float()
        per_sample = (ce.sum(dim=-1) * pkt_real).sum(dim=-1) / pkt_real.sum(dim=-1).clamp_min(1.0)
        per_ch = (ce * pkt_real[:, :, None]).sum(dim=1) / pkt_real.sum(dim=1).clamp_min(1.0)[:, None]
        out = {'disc_nll_total': per_sample}
-        for (c, idx) in enumerate(self.cfg.disc_pkt_idx):
+        for c, idx in enumerate(ch_idx_list):
            out[f'disc_nll_ch{idx}'] = per_ch[:, c]
        return out

--- a/Mixed_CFM/train.py
+++ b/Mixed_CFM/train.py
@@ -21,7 +21,7 @@ def _device(arg: str) -> torch.device:
        return torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    return torch.device(arg)

-def _batch_score(model: MixedTokenCFM, flow_np: np.ndarray, cont_np: np.ndarray, disc_np: np.ndarray, len_np: np.ndarray, device: torch.device, *, batch_size: int, n_steps: int) -> dict[str, np.ndarray]:
+def _batch_score(model: MixedTokenCFM, flow_np: np.ndarray, cont_np: np.ndarray, disc_np: np.ndarray, len_np: np.ndarray, device: torch.device, *, batch_size: int, n_steps: int, cont_bin_edges: torch.Tensor | None = None) -> dict[str, np.ndarray]:
    out: dict[str, list[np.ndarray]] = {}
    model.eval()
    for start in range(0, len(flow_np), batch_size):
@@ -30,14 +30,14 @@ def _batch_score(model: MixedTokenCFM, flow_np: np.ndarray, cont_np: np.ndarray,
        cont = torch.from_numpy(cont_np[sl]).float().to(device)
        disc = torch.from_numpy(disc_np[sl]).long().to(device)
        lens = torch.from_numpy(len_np[sl]).long().to(device)
-        m = model.trajectory_metrics(flow, cont, disc, lens, n_steps=n_steps)
-        d = model.disc_nll_score(flow, cont, disc, lens)
+        m = model.trajectory_metrics(flow, cont, disc, lens, n_steps=n_steps, cont_bin_edges=cont_bin_edges)
+        d = model.disc_nll_score(flow, cont, disc, lens, cont_bin_edges=cont_bin_edges)
        for src in (m, d):
            for (k, v) in src.items():
                out.setdefault(k, []).append(v.detach().cpu().numpy())
    return {k: np.concatenate(v, axis=0) for (k, v) in out.items()}

-def _quick_eval(model: MixedTokenCFM, data: MixedData, device: torch.device, cfg: dict[str, Any]) -> dict[str, float]:
+def _quick_eval(model: MixedTokenCFM, data: MixedData, device: torch.device, cfg: dict[str, Any], cont_bin_edges: torch.Tensor | None = None) -> dict[str, float]:
    n_eval = int(cfg.get('eval_n', 2000))
    rng = np.random.default_rng(0)

@@ -46,8 +46,8 @@ def _quick_eval(model: MixedTokenCFM, data: MixedData, device: torch.device, cfg
        return rng.choice(n, m, replace=False)
    vi = pick(len(data.val_flow))
    ai = pick(len(data.attack_flow))
-    v = _batch_score(model, data.val_flow[vi], data.val_cont[vi], data.val_disc[vi], data.val_len[vi], device, batch_size=int(cfg.get('eval_batch_size', 512)), n_steps=int(cfg.get('eval_n_steps', 8)))
-    a = _batch_score(model, data.attack_flow[ai], data.attack_cont[ai], data.attack_disc[ai], data.attack_len[ai], device, batch_size=int(cfg.get('eval_batch_size', 512)), n_steps=int(cfg.get('eval_n_steps', 8)))
+    v = _batch_score(model, data.val_flow[vi], data.val_cont[vi], data.val_disc[vi], data.val_len[vi], device, batch_size=int(cfg.get('eval_batch_size', 512)), n_steps=int(cfg.get('eval_n_steps', 8)), cont_bin_edges=cont_bin_edges)
+    a = _batch_score(model, data.attack_flow[ai], data.attack_cont[ai], data.attack_disc[ai], data.attack_len[ai], device, batch_size=int(cfg.get('eval_batch_size', 512)), n_steps=int(cfg.get('eval_n_steps', 8)), cont_bin_edges=cont_bin_edges)
    y = np.concatenate([np.zeros(len(vi)), np.ones(len(ai))])
    out: dict[str, float] = {}
    for k in sorted(v.keys()):
@@ -73,9 +73,36 @@ def train(cfg: dict[str, Any]) -> Path:
    ds = TensorDataset(torch.from_numpy(tr_f).float(), torch.from_numpy(tr_c).float(), torch.from_numpy(tr_d).long(), torch.from_numpy(tr_l).long())
    loader = DataLoader(ds, batch_size=int(cfg['batch_size']), shuffle=True, drop_last=True, num_workers=int(cfg.get('num_workers', 0)), pin_memory=device.type == 'cuda')
    print(f'[data] training on {len(ds):,} flows')
-    model_cfg = MixedCFMConfig(T=data.T, flow_dim=data.flow_dim, token_dim=cfg.get('token_dim'), d_model=int(cfg['d_model']), n_layers=int(cfg['n_layers']), n_heads=int(cfg['n_heads']), mlp_ratio=float(cfg.get('mlp_ratio', 4.0)), time_dim=int(cfg.get('time_dim', 64)), sigma=float(cfg.get('sigma', 0.1)), use_ot=bool(cfg.get('use_ot', False)), reference_mode=cfg.get('reference_mode'), lambda_disc=float(cfg.get('lambda_disc', 1.0)))
+    n_disc_classes = int(cfg.get('n_disc_classes', 2))
+    model_cfg = MixedCFMConfig(
+        T=data.T, flow_dim=data.flow_dim, token_dim=cfg.get('token_dim'),
+        d_model=int(cfg['d_model']), n_layers=int(cfg['n_layers']), n_heads=int(cfg['n_heads']),
+        mlp_ratio=float(cfg.get('mlp_ratio', 4.0)), time_dim=int(cfg.get('time_dim', 64)),
+        sigma=float(cfg.get('sigma', 0.1)), use_ot=bool(cfg.get('use_ot', False)),
+        reference_mode=cfg.get('reference_mode'), lambda_disc=float(cfg.get('lambda_disc', 1.0)),
+        n_disc_classes=n_disc_classes,
+        # B-group ablation flags
+        use_flow_token=bool(cfg.get('use_flow_token', True)),
+        n_packet_tokens=int(cfg.get('n_packet_tokens', -1)),
+        disc_as_cont=bool(cfg.get('disc_as_cont', False)),
+        cont_as_disc=bool(cfg.get('cont_as_disc', False)),
+    )
    model = MixedTokenCFM(model_cfg).to(device)
-    print(f'[model] params={model.param_count():,} token_dim={model.token_dim} sigma={model_cfg.sigma} use_ot={model_cfg.use_ot} lambda_disc={model_cfg.lambda_disc}')
+    # B4: compute bin edges from benign train cont (z-scored, masked) for cont_as_disc quantization
+    cont_bin_edges = None
+    if model_cfg.cont_as_disc:
+        n_bins = n_disc_classes
+        n_cont_orig = model_cfg.n_cont_pkt
+        # gather real cont samples per channel (mask padding)
+        masks = np.arange(data.train_cont.shape[1])[None, :] < data.train_len[:, None]
+        edges = np.zeros((n_cont_orig, n_bins - 1), dtype=np.float32)
+        for c in range(n_cont_orig):
+            vals = data.train_cont[..., c][masks]
+            qs = np.linspace(0, 1, n_bins + 1)[1:-1]  # interior quantiles
+            edges[c] = np.quantile(vals, qs).astype(np.float32)
+        cont_bin_edges = torch.from_numpy(edges).to(device)
+        print(f'[B4] cont_bin_edges shape={tuple(edges.shape)}  (n_bins={n_bins})')
+    print(f'[model] params={model.param_count():,} token_dim={model.token_dim} sigma={model_cfg.sigma} use_ot={model_cfg.use_ot} lambda_disc={model_cfg.lambda_disc} use_flow_token={model_cfg.use_flow_token} n_packet_tokens={model_cfg.n_packet_tokens} disc_as_cont={model_cfg.disc_as_cont} cont_as_disc={model_cfg.cont_as_disc}')
    opt = torch.optim.AdamW(model.parameters(), lr=float(cfg['lr']), weight_decay=float(cfg.get('weight_decay', 0.01)))
    total_steps = max(1, int(cfg['epochs']) * len(loader))
    sched = torch.optim.lr_scheduler.CosineAnnealingLR(opt, T_max=total_steps)
@@ -91,7 +118,7 @@ def train(cfg: dict[str, Any]) -> Path:
            cont = cont.to(device, non_blocking=True)
            disc = disc.to(device, non_blocking=True)
            lens = lens.to(device, non_blocking=True)
-            comp = model.compute_loss(flow, cont, disc, lens, return_components=True)
+            comp = model.compute_loss(flow, cont, disc, lens, return_components=True, cont_bin_edges=cont_bin_edges)
            loss = comp['total']
            ldisc_sum += float(comp['aux_disc'].item())
            opt.zero_grad(set_to_none=True)
@@ -104,7 +131,7 @@ def train(cfg: dict[str, Any]) -> Path:
        mean_loss = float(np.mean(losses)) if losses else float('nan')
        eval_metrics: dict[str, float] | None = None
        if epoch % int(cfg.get('eval_every', 5)) == 0 or epoch == int(cfg['epochs']):
-            eval_metrics = _quick_eval(model, data, device, cfg)
+            eval_metrics = _quick_eval(model, data, device, cfg, cont_bin_edges=cont_bin_edges)
        history['epoch'].append(epoch)
        history['loss'].append(mean_loss)
        history['eval'].append(eval_metrics)
@@ -120,6 +147,8 @@ def train(cfg: dict[str, Any]) -> Path:
        if not np.isfinite(mean_loss):
            raise RuntimeError(f'non-finite loss at epoch {epoch}')
    payload = {'model_state_dict': model.state_dict(), 'model_cfg': asdict(model_cfg), 'cont_mean': data.cont_mean, 'cont_std': data.cont_std, 'flow_mean': data.flow_mean, 'flow_std': data.flow_std, 'flow_feature_names': np.asarray(data.flow_feature_names), 'packet_feature_names': np.asarray(data.packet_feature_names)}
+    if cont_bin_edges is not None:
+        payload['cont_bin_edges'] = cont_bin_edges.detach().cpu().numpy()
    torch.save(payload, save_dir / 'model.pt')
    with open(save_dir / 'history.json', 'w') as f:
        json.dump(history, f, indent=2, default=str)
--- a/README.md
+++ b/README.md
@@ -51,6 +51,28 @@ Source (rows) trained on 10K benign of source dataset; target (columns) tested o

 Forward CICIDS17→CICDDoS19 (0.969) beats Shafir 0.89 by **+0.08**; reverse CICDDoS19→CICIDS17 (0.941) approximately matches Shafir 0.93. CICIoT23 is hardest both as source and target — its IoT-protocol diversity makes the "benign of source ≈ benign of target" assumption brittle. Full table at `artifacts/route_comparison/CROSS_MATRIX_3x3.md`.

+### Ablations (architecture & aggregator)
+
+Two orthogonal ablation axes, each evaluated **within-dataset** (4 datasets × 3 seeds) **and** **cross-dataset** (3×3 transfer × 3 seeds):
+
+- **Group A** — 7 alternative aggregators on the same JANUS-full sub-score vector (post-processing only; no retraining).
+- **Group B** — 5 architecture variants, each retrained 4 datasets × 3 seeds = 60 runs + 90 cross-evals.
+
+Every load-bearing JANUS design choice has the **same shape of ablation curve**: small in-distribution cost, large cross-dataset gain.
+
+| Component (removed in ablation) | Variant | Within Δ | Cross-mean Δ | Cross-worst Δ |
+|---|---|---:|---:|---:|
+| FLOW token (global context) | B1 | **−0.94** | −6.70 | −19.97 |
+| Packet sequence | B2 | +0.15 | **−23.82** | **−36.27** |
+| Cont/disc head split (drop disc head) | B3 | +0.44 | **−13.14** | **−25.03** |
+| CFM head (drop continuous side) | B4 | **−2.37** | −2.03 | −2.86 |
+| Joint training of two heads | B5 | +0.20 | **−18.93** | **−27.54** |
+| OAS Mahalanobis aggregator | A1 vs A5 | +0.37 | **−15.88** | **−27.38** |
+
+Three ablations (B3 / B5 / A-aggregator) **marginally beat JANUS-full at within-dataset evaluation** but collapse on at least one cross-dataset transfer direction. The disc head, joint training, and OAS aggregator are deliberate trades: their value is exclusively in cross-dataset robustness.
+
+Full headline summary: `artifacts/ablation/ABLATION_SUMMARY.md`. Per-variant 3×3 cross matrices: `artifacts/ablation/ABLATION_CROSS_B_full.md` and `artifacts/ablation/ABLATION_TABLE_CROSS_full.md`.
+
 ## Layout

 ```
@@ -74,6 +96,12 @@ scripts/                   Workspace-level pcap → artifact pipeline,
                           orchestration. aggregate_score_router.py is the
                           deployable score path; run_cross_3x3.sh +
                           cross_3x3_table.py produce the cross matrix.
+                           aggregate_ablation.py / aggregate_ablation_cross.py /
+                           aggregate_ablation_cross_B.py produce the ablation
+                           tables in artifacts/ablation/.
+  ablation/                B-group ablation training/eval drivers
+                           (generate_configs.py, run_groupB.sh,
+                           run_cross_groupB.sh).
 tests/                     Data-contract unit tests.
 ```

@@ -177,7 +205,8 @@ Common gotcha: if CSV timestamps and pcap epochs are in different time zones, `e

 ## Authoritative documents

- `RESULTS.md` — full headline tables, ablations, per-attack analysis, JANUS configuration, thresholded operating-point metrics, what the experiments proved / disproved.
+- `RESULTS.md` — full headline tables, per-attack analysis, JANUS configuration, thresholded operating-point metrics, what the experiments proved / disproved.
+- `artifacts/ablation/ABLATION_SUMMARY.md` — paper-facing ablation summary (Group A aggregator + Group B architecture, both within and cross views).
 - `Mixed_CFM/model.py` and `common/data_contract.py` — model + data-contract source of truth.

 ## Python environment
--- a/scripts/ablation/generate_configs.py
+++ b/scripts/ablation/generate_configs.py
@@ -0,0 +1,56 @@
+"""Generate 60 B-group ablation configs from existing 12 base configs.
+
+Reads:
+  Mixed_CFM/configs/<ds>_seed<S>.yaml          (4 datasets × 3 seeds = 12 base)
+
+Writes:
+  Mixed_CFM/configs/ablation/<gid>/<ds>_seed<S>.yaml   (5 variants × 12 = 60)
+
+Each variant overrides save_dir → artifacts/ablation/janus_<ds>_seed<S>_<gid>/
+plus the variant-specific flags. CICIoT2023 base is `ciciot2023_seed42.yaml`
+(NOT `ciciot2023_route_c_seed42.yaml`, which is a different score-router config).
+"""
+from __future__ import annotations
+from pathlib import Path
+import yaml
+
+ROOT = Path(__file__).resolve().parents[2]
+BASE_DIR = ROOT / "Mixed_CFM" / "configs"
+OUT_DIR = ROOT / "Mixed_CFM" / "configs" / "ablation"
+
+DATASETS = ["iscxtor2016", "cicids2017", "cicddos2019", "ciciot2023"]
+SEEDS = [42, 43, 44]
+
+VARIANTS = {
+    "b1_noflow":    {"use_flow_token": False},
+    "b2_flowonly":  {"n_packet_tokens": 0, "lambda_disc": 0.0},
+    "b3_allcont":   {"disc_as_cont": True, "lambda_disc": 0.0},
+    "b4_alldisc":   {"cont_as_disc": True, "n_disc_classes": 8},
+    "b5_nodisc":    {"lambda_disc": 0.0},
+}
+
+
+def main() -> None:
+    OUT_DIR.mkdir(parents=True, exist_ok=True)
+    for gid, overrides in VARIANTS.items():
+        (OUT_DIR / gid).mkdir(parents=True, exist_ok=True)
+    n_written = 0
+    for ds in DATASETS:
+        for seed in SEEDS:
+            base_path = BASE_DIR / f"{ds}_seed{seed}.yaml"
+            if not base_path.exists():
+                print(f"[miss] {base_path}")
+                continue
+            base_cfg = yaml.safe_load(base_path.read_text())
+            for gid, overrides in VARIANTS.items():
+                cfg = dict(base_cfg)
+                cfg["save_dir"] = str(ROOT / "artifacts" / "ablation" / f"janus_{ds}_seed{seed}_{gid}")
+                cfg.update(overrides)
+                out = OUT_DIR / gid / f"{ds}_seed{seed}.yaml"
+                out.write_text(yaml.safe_dump(cfg, sort_keys=False))
+                n_written += 1
+    print(f"[wrote] {n_written} config files under {OUT_DIR}")
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/ablation/run_cross_groupB.sh
+++ b/scripts/ablation/run_cross_groupB.sh
@@ -0,0 +1,66 @@
+#!/usr/bin/env bash
+# Cross-dataset evaluation for B-group ablation models.
+# 5 variants × 6 off-diagonal directions × 3 seeds = 90 cross evals.
+#
+# Each B-variant model dir is artifacts/ablation/janus_<ds>_seed<S>_<gid>/.
+# We only cross within the 3-dataset matrix (cicids2017, cicddos2019, ciciot2023);
+# ISCXTor16 has different feature space for cross.
+#
+# Usage:
+#   bash scripts/ablation/run_cross_groupB.sh                     # all 90
+#   bash scripts/ablation/run_cross_groupB.sh b1_noflow b3_allcont
+set -euo pipefail
+ROOT=/home/chy/JANUS
+EVAL=${ROOT}/Mixed_CFM/eval_cross.py
+OUT_DIR=${ROOT}/artifacts/ablation/cross
+mkdir -p "${OUT_DIR}"
+
+declare -A STORE FLOWS FEATS
+STORE[cicids2017]=${ROOT}/datasets/cicids2017/processed/full_store
+FLOWS[cicids2017]=${ROOT}/datasets/cicids2017/processed/flows.parquet
+FEATS[cicids2017]=${ROOT}/datasets/cicids2017/processed/flow_features.parquet
+STORE[cicddos2019]=${ROOT}/datasets/cicddos2019/processed/full_store
+FLOWS[cicddos2019]=${ROOT}/datasets/cicddos2019/processed/flows.parquet
+FEATS[cicddos2019]=${ROOT}/datasets/cicddos2019/processed/flow_features.parquet
+STORE[ciciot2023]=${ROOT}/datasets/ciciot2023/processed/full_store
+FLOWS[ciciot2023]=${ROOT}/datasets/ciciot2023/processed/full_store/flows.parquet
+FEATS[ciciot2023]=${ROOT}/datasets/ciciot2023/processed/flow_features.parquet
+
+ALL_GIDS=(b1_noflow b2_flowonly b3_allcont b4_alldisc b5_nodisc)
+DATASETS=(cicids2017 cicddos2019 ciciot2023)
+SEEDS=(42 43 44)
+GPU="${GPU:-0}"
+
+if [[ $# -gt 0 ]]; then
+  GIDS=("$@")
+else
+  GIDS=("${ALL_GIDS[@]}")
+fi
+
+run_one() {
+  local gid=$1 src=$2 tgt=$3 seed=$4
+  local md=${ROOT}/artifacts/ablation/janus_${src}_seed${seed}_${gid}
+  local out=${OUT_DIR}/${gid}__seed${seed}_${src}_to_${tgt}.json
+  if [[ -f "${out}" ]]; then echo "[skip] $gid ${src}→${tgt} seed${seed}"; return; fi
+  if [[ ! -f "${md}/model.pt" ]]; then echo "[missing model] ${md}/model.pt"; return; fi
+  echo "[gpu${GPU}] $(date +%H:%M:%S) $gid ${src} → ${tgt} seed${seed}"
+  cd ${ROOT}/Mixed_CFM
+  CUDA_VISIBLE_DEVICES=${GPU} uv run --no-sync python -u ${EVAL} \
+    --model-dir ${md} \
+    --target-store ${STORE[$tgt]} --target-flows ${FLOWS[$tgt]} --target-flow-features ${FEATS[$tgt]} \
+    --benign-label normal --n-benign 10000 --n-attack 1000000 \
+    --out ${out} --seed ${seed} --T 64 --batch-size 512 --n-steps 16 \
+    > ${OUT_DIR}/${gid}__seed${seed}_${src}_to_${tgt}.log 2>&1
+}
+
+for gid in "${GIDS[@]}"; do
+  for src in "${DATASETS[@]}"; do
+    for tgt in "${DATASETS[@]}"; do
+      [[ "$src" == "$tgt" ]] && continue
+      for seed in "${SEEDS[@]}"; do
+        run_one "$gid" "$src" "$tgt" "$seed"
+      done
+    done
+  done
+done
+echo "[done] cross evals complete"
--- a/scripts/ablation/run_groupB.sh
+++ b/scripts/ablation/run_groupB.sh
@@ -0,0 +1,76 @@
+#!/usr/bin/env bash
+# Run all 60 B-group ablation training + phase1-eval runs.
+#
+# Splits work across two GPUs round-robin (set GPUS env to override).
+# Logs per-run go to artifacts/ablation/<save_dir>/{train,phase1}.log.
+#
+# Usage:
+#   bash scripts/ablation/run_groupB.sh                     # all 60 runs
+#   bash scripts/ablation/run_groupB.sh b1_noflow b5_nodisc # subset of groups
+#   GPUS=0 bash scripts/ablation/run_groupB.sh              # single-GPU serial
+set -euo pipefail
+cd "$(dirname "$0")/../.."
+
+ALL_GIDS=(b1_noflow b2_flowonly b3_allcont b4_alldisc b5_nodisc)
+DATASETS=(iscxtor2016 cicids2017 cicddos2019 ciciot2023)
+SEEDS=(42 43 44)
+GPUS="${GPUS:-0,1}"
+IFS=',' read -ra GPU_ARR <<< "$GPUS"
+N_GPU=${#GPU_ARR[@]}
+
+if [[ $# -gt 0 ]]; then
+  GIDS=("$@")
+else
+  GIDS=("${ALL_GIDS[@]}")
+fi
+
+# Build the full run list
+runs=()
+for gid in "${GIDS[@]}"; do
+  for ds in "${DATASETS[@]}"; do
+    for seed in "${SEEDS[@]}"; do
+      runs+=("${gid}|${ds}|${seed}")
+    done
+  done
+done
+
+n_runs=${#runs[@]}
+echo "[plan] ${n_runs} runs across GPUs ${GPUS} (gids=${GIDS[*]})"
+
+run_one() {
+  local spec="$1" gpu_id="$2"
+  IFS='|' read -r gid ds seed <<< "$spec"
+  local cfg="Mixed_CFM/configs/ablation/${gid}/${ds}_seed${seed}.yaml"
+  local save_dir
+  save_dir=$(uv run --no-sync python -c "import yaml,sys; print(yaml.safe_load(open('$cfg'))['save_dir'])")
+  mkdir -p "$save_dir"
+  echo "[gpu${gpu_id}] $(date +%H:%M:%S) START $gid $ds seed${seed}"
+  CUDA_VISIBLE_DEVICES="$gpu_id" uv run --no-sync python Mixed_CFM/train.py \
+    --config "$cfg" >"$save_dir/train.log" 2>&1
+  CUDA_VISIBLE_DEVICES="$gpu_id" uv run --no-sync python Mixed_CFM/eval_phase1.py \
+    --model-dir "$save_dir" --out-dir "$save_dir" \
+    --batch-size 256 --n-steps 16 \
+    --n-val-cap 30000 --n-atk-cap 30000 >"$save_dir/phase1.log" 2>&1
+  echo "[gpu${gpu_id}] $(date +%H:%M:%S) DONE  $gid $ds seed${seed}"
+}
+
+# Round-robin assignment
+pids=()
+for i in "${!runs[@]}"; do
+  spec="${runs[$i]}"
+  gpu_id="${GPU_ARR[$((i % N_GPU))]}"
+  # If single GPU: serial; if multi-GPU: parallel up to N_GPU at a time
+  if [[ $N_GPU -eq 1 ]]; then
+    run_one "$spec" "$gpu_id"
+  else
+    run_one "$spec" "$gpu_id" &
+    pids+=($!)
+    # Cap concurrency at N_GPU
+    if (( (i + 1) % N_GPU == 0 )); then
+      for pid in "${pids[@]}"; do wait "$pid" || true; done
+      pids=()
+    fi
+  fi
+done
+for pid in "${pids[@]}"; do wait "$pid" || true; done
+echo "[done] all ${n_runs} runs complete"
--- a/scripts/ablation/smoke_test.sh
+++ b/scripts/ablation/smoke_test.sh
@@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+# Smoke-test all 5 B-group variants on cicids2017 seed42 with reduced epochs
+# and tiny train set, on CPU (so VLLM workers on the GPUs are not disturbed).
+#
+# After: each ablation/janus_cicids2017_seed42_<gid>/ should contain model.pt
+# + phase1_scores.npz with the variant-specific score keys.
+set -euo pipefail
+cd "$(dirname "$0")/../.."
+
+GIDS=(b1_noflow b2_flowonly b3_allcont b4_alldisc b5_nodisc)
+DS=cicids2017
+SEED=42
+
+for gid in "${GIDS[@]}"; do
+  cfg="Mixed_CFM/configs/ablation/${gid}/${DS}_seed${SEED}.yaml"
+  echo "=================================================="
+  echo "[smoke] $gid"
+  echo "=================================================="
+  uv run --no-sync python Mixed_CFM/train.py \
+    --config "$cfg" \
+    --override "device=cpu" "epochs=2" "n_train=500" "eval_n=200" "eval_every=2" \
+    "save_dir=/home/chy/JANUS/artifacts/ablation_smoke/${gid}" 2>&1 | tail -8
+  uv run --no-sync python Mixed_CFM/eval_phase1.py \
+    --model-dir "/home/chy/JANUS/artifacts/ablation_smoke/${gid}" \
+    --out-dir "/home/chy/JANUS/artifacts/ablation_smoke/${gid}" \
+    --device cpu --batch-size 64 --n-steps 4 \
+    --n-val-cap 200 --n-atk-cap 200 2>&1 | tail -4
+  echo
+done
+echo "=== Smoke summary ==="
+for gid in "${GIDS[@]}"; do
+  npz="/home/chy/JANUS/artifacts/ablation_smoke/${gid}/phase1_scores.npz"
+  if [[ -f "$npz" ]]; then
+    keys=$(uv run --no-sync python -c "import numpy as np; z=np.load('$npz', allow_pickle=True); print(','.join(sorted(k for k in z.files if k.startswith(('val_terminal','val_disc')))))")
+    echo "$gid: $keys"
+  else
+    echo "$gid: MISSING"
+  fi
+done
--- a/scripts/aggregate/aggregate_ablation.py
+++ b/scripts/aggregate/aggregate_ablation.py
@@ -0,0 +1,533 @@
+"""JANUS ablation aggregator (Groups A + B).
+
+Reads phase1_scores.npz from:
+  artifacts/route_comparison/janus_<ds>_seed<S>/      (A + JANUS-full anchor)
+  artifacts/ablation/janus_<ds>_seed<S>_<gid>/        (B variants)
+
+Produces:
+  artifacts/ablation/ABLATION_TABLE.md                final markdown table
+  artifacts/ablation/ABLATION_TABLE_RAW.json          per-cell mean / std / CI / per-seed
+  artifacts/ablation/ABLATION_DELONG.md               paired DeLong p-values vs JANUS-full
+
+Group A operates entirely on existing route_comparison npz files (no GPU).
+Group B requires the 60 B-variant runs to have completed.
+"""
+from __future__ import annotations
+import argparse
+import json
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Iterable
+
+import numpy as np
+from sklearn.covariance import OAS
+from sklearn.metrics import roc_auc_score
+
+ROOT = Path(__file__).resolve().parents[2]
+ROUTE = ROOT / "artifacts" / "route_comparison"
+ABL = ROOT / "artifacts" / "ablation"
+
+DATASETS = ["iscxtor2016", "cicids2017", "cicddos2019", "ciciot2023"]
+PRETTY = {
+    "iscxtor2016": "ISCXTor16",
+    "cicids2017": "CICIDS17",
+    "cicddos2019": "CICDDoS19",
+    "ciciot2023": "CICIoT23",
+}
+SEEDS = [42, 43, 44]
+T_975_N3 = 4.302653  # 95% t-CI factor for n=3 (df=2)
+
+CONT_KEYS = ["terminal_norm", "terminal_flow", "terminal_packet"]
+DISC_KEYS = ["disc_nll_total", "disc_nll_ch2", "disc_nll_ch3",
+             "disc_nll_ch4", "disc_nll_ch5", "disc_nll_ch6", "disc_nll_ch7"]
+ALL_KEYS = CONT_KEYS + DISC_KEYS  # 10-d
+
+
+# --------------------------------------------------------------------------- #
+# I/O                                                                         #
+# --------------------------------------------------------------------------- #
+def _load_npz(npz_path: Path):
+    z = np.load(npz_path, allow_pickle=True)
+    val = {}
+    atk = {}
+    for k in z.files:
+        if k.startswith("val_") and k != "val_labels":
+            val[k[4:]] = z[k]
+        elif k.startswith("atk_") and k != "atk_labels":
+            atk[k[4:]] = z[k]
+    return val, atk
+
+
+def _load_cross_npz(npz_path: Path):
+    """Cross npz schema:  b_<key> = target benign,  a_<key> = target attacks."""
+    z = np.load(npz_path, allow_pickle=True)
+    val = {}
+    atk = {}
+    for k in z.files:
+        if k.startswith("b_") and k != "b_labels":
+            val[k[2:]] = z[k]
+        elif k.startswith("a_") and k != "a_labels":
+            atk[k[2:]] = z[k]
+    return val, atk
+
+
+def _stack(d: dict, keys: list[str]) -> np.ndarray:
+    arrs = []
+    for k in keys:
+        if k in d:
+            arrs.append(d[k])
+        else:
+            # variant doesn't produce this score (e.g. B2 has no disc, B5 disc untrained)
+            return None
+    out = np.stack(arrs, axis=1).astype(np.float64)
+    return np.nan_to_num(out, nan=0.0, posinf=1e6, neginf=-1e6)
+
+
+# --------------------------------------------------------------------------- #
+# Score functions (Group A definitions)                                       #
+# --------------------------------------------------------------------------- #
+def _mahal(S, mu, inv_cov):
+    d = S - mu
+    return np.einsum("ni,ij,nj->n", d, inv_cov, d)
+
+
+def _oas_mahal(val_S, atk_S):
+    mu = val_S.mean(axis=0)
+    cov = OAS().fit(val_S).covariance_
+    inv = np.linalg.inv(cov + 1e-9 * np.eye(cov.shape[0]))
+    return _mahal(val_S, mu, inv), _mahal(atk_S, mu, inv)
+
+
+def _zscore_agg(val_S, atk_S, mode="mean"):
+    mu = val_S.mean(axis=0)
+    sd = val_S.std(axis=0) + 1e-9
+    zv = (val_S - mu) / sd
+    za = (atk_S - mu) / sd
+    if mode == "mean":
+        return zv.mean(axis=1), za.mean(axis=1)
+    if mode == "max":
+        return zv.max(axis=1), za.max(axis=1)
+    raise ValueError(mode)
+
+
+def score_a1_terminal_norm(val, atk):
+    return val["terminal_norm"], atk["terminal_norm"]
+
+
+def score_a2_disc_total(val, atk):
+    if "disc_nll_total" not in val:
+        return None
+    return val["disc_nll_total"], atk["disc_nll_total"]
+
+
+def score_a3_oas_term3(val, atk):
+    Sv = _stack(val, CONT_KEYS)
+    Sa = _stack(atk, CONT_KEYS)
+    if Sv is None or Sa is None:
+        return None
+    return _oas_mahal(Sv, Sa)
+
+
+def score_a4_oas_disc7(val, atk):
+    Sv = _stack(val, DISC_KEYS)
+    Sa = _stack(atk, DISC_KEYS)
+    if Sv is None or Sa is None:
+        return None
+    return _oas_mahal(Sv, Sa)
+
+
+def score_a5_oas_all10(val, atk):
+    Sv = _stack(val, ALL_KEYS)
+    Sa = _stack(atk, ALL_KEYS)
+    if Sv is None or Sa is None:
+        return None
+    return _oas_mahal(Sv, Sa)
+
+
+def score_a6_zmean(val, atk):
+    Sv = _stack(val, ALL_KEYS)
+    Sa = _stack(atk, ALL_KEYS)
+    if Sv is None or Sa is None:
+        return None
+    return _zscore_agg(Sv, Sa, "mean")
+
+
+def score_a7_zmax(val, atk):
+    Sv = _stack(val, ALL_KEYS)
+    Sa = _stack(atk, ALL_KEYS)
+    if Sv is None or Sa is None:
+        return None
+    return _zscore_agg(Sv, Sa, "max")
+
+
+def score_oas_disc_all(val, atk):
+    """Auto-discover all `disc_nll_*` keys; OAS-Mahal over them. Used by B4."""
+    keys = sorted(k for k in val.keys() if k.startswith("disc_nll_"))
+    if not keys:
+        return None
+    Sv = _stack(val, keys)
+    Sa = _stack(atk, keys)
+    if Sv is None or Sa is None:
+        return None
+    return _oas_mahal(Sv, Sa)
+
+
+def score_oas_all_available(val, atk):
+    """OAS-Mahal over all `terminal_*` ∪ `disc_nll_*` keys present in the npz.
+
+    Used by B1 (no terminal_flow). Handles arbitrary subset of the 10 standard keys.
+    """
+    keys = sorted([k for k in val.keys() if k.startswith("terminal_") or k.startswith("disc_nll_")])
+    if not keys:
+        return None
+    if len(keys) == 1:
+        return val[keys[0]], atk[keys[0]]
+    Sv = _stack(val, keys)
+    Sa = _stack(atk, keys)
+    if Sv is None or Sa is None:
+        return None
+    return _oas_mahal(Sv, Sa)
+
+
+def score_oas_term_all(val, atk):
+    """Auto-discover all `terminal_*` keys; OAS-Mahal. Used by B3 (3 keys) / B1 (2 keys)."""
+    keys = sorted(k for k in val.keys() if k.startswith("terminal_"))
+    if not keys:
+        return None
+    if len(keys) == 1:
+        # single scalar: just return raw
+        return val[keys[0]], atk[keys[0]]
+    Sv = _stack(val, keys)
+    Sa = _stack(atk, keys)
+    if Sv is None or Sa is None:
+        return None
+    return _oas_mahal(Sv, Sa)
+
+
+SCORE_FNS = {
+    "A1_terminal_norm": score_a1_terminal_norm,
+    "A2_disc_nll_total": score_a2_disc_total,
+    "A3_OAS_term3": score_a3_oas_term3,
+    "A4_OAS_disc7": score_a4_oas_disc7,
+    "A5_OAS_all10": score_a5_oas_all10,
+    "A6_zmean_all10": score_a6_zmean,
+    "A7_zmax_all10": score_a7_zmax,
+    "OAS_disc_all": score_oas_disc_all,
+    "OAS_term_all": score_oas_term_all,
+    "OAS_all_available": score_oas_all_available,
+}
+
+
+# --------------------------------------------------------------------------- #
+# Stats                                                                       #
+# --------------------------------------------------------------------------- #
+def _auroc(s_v, s_a):
+    y = np.r_[np.zeros(len(s_v)), np.ones(len(s_a))]
+    s = np.r_[s_v, s_a]
+    return float(roc_auc_score(y, s))
+
+
+def _mean_ci(values: list[float]):
+    """3-seed mean ± 95% t-CI (n=3, df=2)."""
+    a = np.asarray([v for v in values if v is not None and not np.isnan(v)], dtype=float)
+    if a.size == 0:
+        return None
+    if a.size == 1:
+        return {"mean": float(a[0]), "std": 0.0, "ci": 0.0, "n": 1, "vals": a.tolist()}
+    se = a.std(ddof=1) / np.sqrt(a.size)
+    return {
+        "mean": float(a.mean()),
+        "std": float(a.std(ddof=1)),
+        "ci": float(T_975_N3 * se) if a.size == 3 else float(1.96 * se),
+        "n": int(a.size),
+        "vals": a.tolist(),
+    }
+
+
+def _delong_var(s_v, s_a):
+    """Compute DeLong AUROC variance (Sun & Xu 2014, fast O(n log n))."""
+    n0, n1 = len(s_v), len(s_a)
+    s = np.concatenate([s_a, s_v])  # positives first
+    order = np.argsort(s, kind="mergesort")
+    L = np.empty_like(s)
+    s_sorted = s[order]
+    # midrank
+    i = 0
+    while i < len(s_sorted):
+        j = i
+        while j < len(s_sorted) and s_sorted[j] == s_sorted[i]:
+            j += 1
+        L[order[i:j]] = (i + j - 1) / 2.0 + 1
+        i = j
+    # ranks split
+    L_a = L[:n1]
+    L_v = L[n1:]
+    # midrank within each class
+    s_a_order = np.argsort(s_a, kind="mergesort")
+    L_aa = np.empty(n1)
+    sa_sorted = s_a[s_a_order]
+    i = 0
+    while i < n1:
+        j = i
+        while j < n1 and sa_sorted[j] == sa_sorted[i]:
+            j += 1
+        L_aa[s_a_order[i:j]] = (i + j - 1) / 2.0 + 1
+        i = j
+    s_v_order = np.argsort(s_v, kind="mergesort")
+    L_vv = np.empty(n0)
+    sv_sorted = s_v[s_v_order]
+    i = 0
+    while i < n0:
+        j = i
+        while j < n0 and sv_sorted[j] == sv_sorted[i]:
+            j += 1
+        L_vv[s_v_order[i:j]] = (i + j - 1) / 2.0 + 1
+        i = j
+    auc = (L_a.sum() / n1 - (n1 + 1) / 2) / n0
+    V10 = (L_a - L_aa) / n0  # length n1
+    V01 = 1 - (L_v - L_vv) / n1  # length n0
+    s10 = V10.var(ddof=1)
+    s01 = V01.var(ddof=1)
+    var = s10 / n1 + s01 / n0
+    return float(auc), float(var), V10, V01
+
+
+def _delong_paired_p(s_v, s_a, t_v, t_a):
+    """Paired DeLong test for two AUROCs on the same data.
+
+    Returns (auc1 - auc2, p_value_two_sided).
+    s_*: candidate scores; t_*: reference (JANUS-full) scores.
+    Both arrays must align flow-by-flow.
+    """
+    auc1, var1, V10_1, V01_1 = _delong_var(s_v, s_a)
+    auc2, var2, V10_2, V01_2 = _delong_var(t_v, t_a)
+    n1, n0 = len(s_a), len(s_v)
+    cov10 = np.cov(np.stack([V10_1, V10_2]), ddof=1)[0, 1]
+    cov01 = np.cov(np.stack([V01_1, V01_2]), ddof=1)[0, 1]
+    cov12 = cov10 / n1 + cov01 / n0
+    var_diff = var1 + var2 - 2 * cov12
+    if var_diff <= 0:
+        return auc1 - auc2, 1.0
+    z = (auc1 - auc2) / np.sqrt(var_diff)
+    # two-sided
+    from scipy.stats import norm
+    p = 2 * (1 - norm.cdf(abs(z)))
+    return auc1 - auc2, float(p)
+
+
+# --------------------------------------------------------------------------- #
+# Aggregation entry points                                                    #
+# --------------------------------------------------------------------------- #
+@dataclass
+class VariantSpec:
+    vid: str
+    label: str
+    what_removed: str
+    npz_dir_pattern: str  # e.g. "route_comparison/janus_{ds}_seed{seed}" or "ablation/janus_{ds}_seed{seed}_{gid}"
+    score_fn_id: str  # which Group A score to apply on the npz (usually "A5_OAS_all10")
+    gid: str = ""  # for B variants
+
+
+def _expand_path(spec: VariantSpec, ds: str, seed: int) -> Path:
+    return ROOT / "artifacts" / spec.npz_dir_pattern.format(ds=ds, seed=seed, gid=spec.gid) / "phase1_scores.npz"
+
+
+def collect_variant(spec: VariantSpec) -> dict:
+    rows: dict[str, list[float]] = {ds: [] for ds in DATASETS}
+    per_seed: dict[str, dict[int, float]] = {ds: {} for ds in DATASETS}
+    for ds in DATASETS:
+        for seed in SEEDS:
+            npz = _expand_path(spec, ds, seed)
+            if not npz.exists():
+                continue
+            val, atk = _load_npz(npz)
+            fn = SCORE_FNS[spec.score_fn_id]
+            res = fn(val, atk)
+            if res is None:
+                continue
+            sv, sa = res
+            auc = _auroc(sv, sa)
+            rows[ds].append(auc)
+            per_seed[ds][seed] = auc
+    summary = {ds: _mean_ci(rows[ds]) for ds in DATASETS}
+    return {
+        "vid": spec.vid,
+        "label": spec.label,
+        "what_removed": spec.what_removed,
+        "score_fn_id": spec.score_fn_id,
+        "gid": spec.gid,
+        "per_dataset": summary,
+        "per_seed": per_seed,
+    }
+
+
+def collect_delong_pvals(spec: VariantSpec, ref_spec: VariantSpec) -> dict:
+    """Paired DeLong test: spec vs ref_spec, on each (ds, seed)."""
+    out: dict[str, list[dict]] = {ds: [] for ds in DATASETS}
+    for ds in DATASETS:
+        for seed in SEEDS:
+            npz_s = _expand_path(spec, ds, seed)
+            npz_r = _expand_path(ref_spec, ds, seed)
+            if not (npz_s.exists() and npz_r.exists()):
+                continue
+            val_s, atk_s = _load_npz(npz_s)
+            val_r, atk_r = _load_npz(npz_r)
+            fn_s = SCORE_FNS[spec.score_fn_id]
+            fn_r = SCORE_FNS[ref_spec.score_fn_id]
+            res_s = fn_s(val_s, atk_s)
+            res_r = fn_r(val_r, atk_r)
+            if res_s is None or res_r is None:
+                continue
+            sv_s, sa_s = res_s
+            sv_r, sa_r = res_r
+            # if shapes differ (e.g. variant evaluated on subset), align by index — they should match seed-for-seed
+            # in practice for B variants the npz is from the SAME data as JANUS-full at that (ds, seed)
+            if len(sv_s) != len(sv_r) or len(sa_s) != len(sa_r):
+                continue
+            d, p = _delong_paired_p(sv_s, sa_s, sv_r, sa_r)
+            out[ds].append({"seed": seed, "delta": d, "p": p})
+    return out
+
+
+# --------------------------------------------------------------------------- #
+# Variant registry                                                            #
+# --------------------------------------------------------------------------- #
+ROUTE_DIR = "route_comparison/janus_{ds}_seed{seed}"
+ABL_DIR = "ablation/janus_{ds}_seed{seed}_{gid}"
+
+
+def _group_a_specs() -> list[VariantSpec]:
+    base = ROUTE_DIR
+    return [
+        VariantSpec("JANUS-full", "JANUS-full (A5)", "—", base, "A5_OAS_all10"),
+        VariantSpec("A1", "A1 terminal_norm", "OAS aggregator + disc head", base, "A1_terminal_norm"),
+        VariantSpec("A2", "A2 disc_nll_total", "OAS aggregator + CFM head", base, "A2_disc_nll_total"),
+        VariantSpec("A3", "A3 OAS-Mahal term3", "disc head", base, "A3_OAS_term3"),
+        VariantSpec("A4", "A4 OAS-Mahal disc7", "CFM head", base, "A4_OAS_disc7"),
+        VariantSpec("A6", "A6 z-score mean (10-d)", "covariance structure", base, "A6_zmean_all10"),
+        VariantSpec("A7", "A7 z-score max (10-d)", "weighted aggregation", base, "A7_zmax_all10"),
+    ]
+
+
+def _group_b_specs() -> list[VariantSpec]:
+    return [
+        # B1 has 2 terminal keys (no terminal_flow) + full disc7 → use auto-key OAS (9-d in this case)
+        VariantSpec("B1", "B1 no FLOW token", "global context",        ABL_DIR, "OAS_all_available", gid="b1_noflow"),
+        # B2 has only terminal_flow (= terminal_norm); single scalar
+        VariantSpec("B2", "B2 flow-only",     "packet sequence",        ABL_DIR, "A1_terminal_norm", gid="b2_flowonly"),
+        # B3 has terminal_norm/flow/packet covering all 9 dims (cont + disc-as-cont); OAS on 3-tuple
+        VariantSpec("B3", "B3 all-cont",      "cont/disc split",        ABL_DIR, "A3_OAS_term3", gid="b3_allcont"),
+        # B4 has 9 disc channels + total; auto-discover keys
+        VariantSpec("B4", "B4 all-disc",      "cont/disc split (rev)",  ABL_DIR, "OAS_disc_all", gid="b4_alldisc"),
+        # B5 has full schema but disc head is untrained noise; use term3 only
+        VariantSpec("B5", "B5 λ_disc=0",      "joint training",         ABL_DIR, "A3_OAS_term3", gid="b5_nodisc"),
+    ]
+
+
+# --------------------------------------------------------------------------- #
+# Markdown writer                                                             #
+# --------------------------------------------------------------------------- #
+def _fmt_cell(c: dict | None) -> str:
+    if c is None:
+        return "—"
+    if c["n"] == 1:
+        return f"{100 * c['mean']:.2f}"
+    return f"{100 * c['mean']:.2f} ± {100 * c['ci']:.2f}"
+
+
+def write_table(rows: list[dict], path: Path, *, title: str = "JANUS ablation"):
+    lines = [f"# {title}", ""]
+    lines.append(f"3-seed mean ± 95% t-CI AUROC (%). Seeds = {SEEDS}.")
+    lines.append("")
+    header = ["Variant", "What removed"] + [PRETTY[ds] for ds in DATASETS] + ["Mean"]
+    lines.append("| " + " | ".join(header) + " |")
+    lines.append("|" + "|".join("---" for _ in header) + "|")
+    for r in rows:
+        cells = [r["label"], r["what_removed"]]
+        ds_means = []
+        for ds in DATASETS:
+            c = r["per_dataset"].get(ds)
+            cells.append(_fmt_cell(c))
+            if c is not None:
+                ds_means.append(c["mean"])
+        cells.append(f"{100 * np.mean(ds_means):.2f}" if ds_means else "—")
+        lines.append("| " + " | ".join(cells) + " |")
+    lines.append("")
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text("\n".join(lines))
+
+
+def write_delong(records: list[dict], path: Path):
+    lines = ["# Paired DeLong p-values vs JANUS-full",
+             "",
+             f"Seeds = {SEEDS}. p reported per (variant, dataset, seed). "
+             "Holm-Bonferroni-correctable; raw p shown.",
+             ""]
+    for rec in records:
+        lines.append(f"## {rec['label']}  ({rec['vid']})")
+        lines.append("")
+        header = ["Seed"] + [PRETTY[ds] for ds in DATASETS]
+        lines.append("| " + " | ".join(header) + " |")
+        lines.append("|" + "|".join("---" for _ in header) + "|")
+        for seed in SEEDS:
+            row = [str(seed)]
+            for ds in DATASETS:
+                hits = [x for x in rec["delong"][ds] if x["seed"] == seed]
+                if hits:
+                    h = hits[0]
+                    sign = "+" if h["delta"] >= 0 else "−"
+                    row.append(f"Δ={sign}{abs(h['delta']):.4f}, p={h['p']:.3g}")
+                else:
+                    row.append("—")
+            lines.append("| " + " | ".join(row) + " |")
+        lines.append("")
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text("\n".join(lines))
+
+
+# --------------------------------------------------------------------------- #
+# Main                                                                        #
+# --------------------------------------------------------------------------- #
+def main() -> None:
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--group", choices=["A", "B", "all"], default="A")
+    ap.add_argument("--delong", action="store_true",
+                    help="Compute paired DeLong p-values vs JANUS-full (CPU heavy on big eval sets).")
+    args = ap.parse_args()
+
+    ABL.mkdir(parents=True, exist_ok=True)
+    specs: list[VariantSpec] = []
+    if args.group in ("A", "all"):
+        specs.extend(_group_a_specs())
+    if args.group in ("B", "all"):
+        specs.extend(_group_b_specs())
+
+    rows = []
+    for spec in specs:
+        r = collect_variant(spec)
+        rows.append(r)
+        n_ok = sum(1 for ds in DATASETS if r["per_dataset"][ds] is not None)
+        print(f"[ok] {spec.vid:14s}  datasets_with_data={n_ok}/{len(DATASETS)}", flush=True)
+
+    out_md = ABL / f"ABLATION_TABLE_{args.group}.md"
+    write_table(rows, out_md, title=f"JANUS ablation (group {args.group})")
+    out_json = ABL / f"ABLATION_TABLE_{args.group}.json"
+    out_json.write_text(json.dumps(rows, indent=2, default=lambda o: None))
+    print(f"[wrote] {out_md}")
+    print(f"[wrote] {out_json}")
+
+    if args.delong:
+        ref = next(s for s in _group_a_specs() if s.vid == "JANUS-full")
+        recs = []
+        for spec in specs:
+            if spec.vid == "JANUS-full":
+                continue
+            d = collect_delong_pvals(spec, ref)
+            recs.append({"vid": spec.vid, "label": spec.label, "delong": d})
+            print(f"[delong] {spec.vid}", flush=True)
+        write_delong(recs, ABL / f"ABLATION_DELONG_{args.group}.md")
+        print(f"[wrote] {ABL / f'ABLATION_DELONG_{args.group}.md'}")
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/aggregate/aggregate_ablation_cross.py
+++ b/scripts/aggregate/aggregate_ablation_cross.py
@@ -0,0 +1,218 @@
+"""Cross-dataset version of the Group-A score-aggregator ablation.
+
+For each (src, tgt, seed) cell we have a phase1-style npz with:
+  b_<key>   target benign val  (aggregator fit on this)
+  a_<key>   target attacks
+
+Within-dataset (src == tgt) cells reuse the standard
+artifacts/route_comparison/janus_<ds>_seed<S>/phase1_scores.npz
+(val_/atk_ prefixes — handled via the same _load_npz path).
+
+We score 7 aggregators (A1..A7) + JANUS-full's deployed A5 across all
+3×3 cells × 3 seeds, then summarize with two complementary views:
+
+  ABLATION_TABLE_CROSS_summary.md
+    | Aggregator | Within mean | Cross mean | Cross min (worst cell) |
+    Shows whether OAS's value lives in cross-dataset robustness.
+
+  ABLATION_TABLE_CROSS_full.md
+    Per-aggregator full 3×3 matrix (each cell = 3-seed mean ± 95% t-CI).
+"""
+from __future__ import annotations
+import argparse
+import json
+from pathlib import Path
+import numpy as np
+
+from aggregate_ablation import (
+    SCORE_FNS, T_975_N3, _auroc, _load_npz, _load_cross_npz,
+)
+
+ROOT = Path(__file__).resolve().parents[2]
+ROUTE = ROOT / "artifacts" / "route_comparison"
+CROSS = ROUTE / "cross"
+ABL = ROOT / "artifacts" / "ablation"
+
+# 3x3 cross matrix datasets (no ISCXTor16 — different feature space)
+CROSS_DATASETS = ["cicids2017", "cicddos2019", "ciciot2023"]
+PRETTY = {
+    "cicids2017": "CICIDS17",
+    "cicddos2019": "CICDDoS19",
+    "ciciot2023": "CICIoT23",
+}
+SEEDS = [42, 43, 44]
+
+AGGREGATORS = [
+    ("JANUS-full (A5)", "A5_OAS_all10",     "deployed JANUS"),
+    ("A1 terminal_norm","A1_terminal_norm", "raw scalar (CFM head)"),
+    ("A2 disc_total",   "A2_disc_nll_total","raw scalar (disc head)"),
+    ("A3 OAS term3",    "A3_OAS_term3",     "OAS on 3 cont sub-scores"),
+    ("A4 OAS disc7",    "A4_OAS_disc7",     "OAS on 7 disc sub-scores"),
+    ("A6 z-score mean", "A6_zmean_all10",   "equal-weight z-score sum"),
+    ("A7 z-score max",  "A7_zmax_all10",    "equal-weight z-score max"),
+]
+
+
+# --------------------------------------------------------------------------- #
+def _cell_path(src: str, tgt: str, seed: int) -> Path | None:
+    """Return npz path for (src, tgt, seed) cell, or None if missing."""
+    if src == tgt:
+        p = ROUTE / f"janus_{src}_seed{seed}" / "phase1_scores.npz"
+        return p if p.exists() else None
+    p = CROSS / f"janus_seed{seed}_{src}_to_{tgt}.npz"
+    return p if p.exists() else None
+
+
+def _load_cell(src: str, tgt: str, seed: int):
+    p = _cell_path(src, tgt, seed)
+    if p is None:
+        return None, None
+    if src == tgt:
+        return _load_npz(p)
+    return _load_cross_npz(p)
+
+
+def _score_cell(src: str, tgt: str, seed: int, score_fn_id: str) -> float | None:
+    val, atk = _load_cell(src, tgt, seed)
+    if val is None:
+        return None
+    fn = SCORE_FNS[score_fn_id]
+    res = fn(val, atk)
+    if res is None:
+        return None
+    sv, sa = res
+    return _auroc(sv, sa)
+
+
+def _seed_means(src: str, tgt: str, score_fn_id: str) -> dict | None:
+    """3-seed AUROC for cell (src,tgt). Returns dict with mean/std/ci, or None."""
+    vals = []
+    for seed in SEEDS:
+        v = _score_cell(src, tgt, seed, score_fn_id)
+        if v is not None and not np.isnan(v):
+            vals.append(v)
+    if not vals:
+        return None
+    a = np.asarray(vals)
+    if a.size == 1:
+        return {"mean": float(a[0]), "std": 0.0, "ci": 0.0, "n": 1, "vals": a.tolist()}
+    se = a.std(ddof=1) / np.sqrt(a.size)
+    return {
+        "mean": float(a.mean()),
+        "std": float(a.std(ddof=1)),
+        "ci": float(T_975_N3 * se) if a.size == 3 else float(1.96 * se),
+        "n": int(a.size),
+        "vals": a.tolist(),
+    }
+
+
+# --------------------------------------------------------------------------- #
+def _fmt_cell(c):
+    if c is None:
+        return "—"
+    if c["n"] == 1:
+        return f"{100 * c['mean']:.2f}"
+    return f"{100 * c['mean']:.2f} ± {100 * c['ci']:.2f}"
+
+
+def _summary_row(rows_3x3: dict[tuple[str, str], dict | None]) -> tuple[float, float, float, dict | None]:
+    """Return (within_mean, cross_mean, cross_worst, worst_cell_summary)."""
+    within = []
+    cross = []
+    worst_v = None
+    worst_cell = None
+    for (src, tgt), cell in rows_3x3.items():
+        if cell is None:
+            continue
+        if src == tgt:
+            within.append(cell["mean"])
+        else:
+            cross.append(cell["mean"])
+            if worst_v is None or cell["mean"] < worst_v:
+                worst_v = cell["mean"]
+                worst_cell = (src, tgt, cell)
+    w = float(np.mean(within)) if within else float("nan")
+    c = float(np.mean(cross)) if cross else float("nan")
+    cw = worst_v if worst_v is not None else float("nan")
+    return w, c, cw, worst_cell
+
+
+# --------------------------------------------------------------------------- #
+def main() -> None:
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--out-dir", type=Path, default=ABL)
+    args = ap.parse_args()
+    args.out_dir.mkdir(parents=True, exist_ok=True)
+
+    full = {}  # aggregator label -> {(src, tgt) -> cell summary}
+    for label, fn_id, _why in AGGREGATORS:
+        rows = {}
+        for src in CROSS_DATASETS:
+            for tgt in CROSS_DATASETS:
+                rows[(src, tgt)] = _seed_means(src, tgt, fn_id)
+        full[label] = rows
+        n_ok = sum(1 for v in rows.values() if v is not None)
+        print(f"[ok] {label:20s} cells={n_ok}/{len(rows)}", flush=True)
+
+    # Summary table: within mean, cross mean, cross worst
+    summary_lines = ["# Cross-dataset Group-A summary",
+                     "",
+                     f"3-seed mean ± 95% t-CI AUROC. Datasets = {CROSS_DATASETS}.",
+                     "Aggregator fit on **target** benign val only.",
+                     "",
+                     "| Aggregator | Within (3 cells, mean) | Cross (6 cells, mean) | Cross worst cell | Within − Cross |",
+                     "|---|---|---|---|---|"]
+    summary_data = {}
+    for label, fn_id, _why in AGGREGATORS:
+        rows = full[label]
+        w, c, cw, worst_cell = _summary_row(rows)
+        gap = (w - c) * 100 if not np.isnan(w) and not np.isnan(c) else float("nan")
+        worst_str = "—"
+        if worst_cell is not None:
+            src, tgt, cell = worst_cell
+            worst_str = f"{PRETTY[src]}→{PRETTY[tgt]}: {_fmt_cell(cell)}"
+        summary_lines.append(
+            f"| {label} | {100 * w:.2f} | {100 * c:.2f} | {worst_str} | {gap:+.2f} |"
+        )
+        summary_data[label] = {"within_mean": w, "cross_mean": c, "cross_worst": cw, "worst_cell": worst_cell}
+    summary_path = args.out_dir / "ABLATION_TABLE_CROSS_summary.md"
+    summary_path.write_text("\n".join(summary_lines) + "\n")
+    print(f"[wrote] {summary_path}")
+
+    # Full per-aggregator 3x3 matrices
+    full_lines = ["# Cross-dataset Group-A full matrices",
+                  "",
+                  "Per aggregator: 3×3 matrix (rows = source / training, columns = target / test).",
+                  "Each cell = 3-seed mean ± 95% t-CI AUROC (%). Diagonal italic = within-dataset.",
+                  ""]
+    for label, fn_id, why in AGGREGATORS:
+        full_lines.append(f"## {label}  ({why})")
+        full_lines.append("")
+        header = ["Source ↓ / Target →"] + [PRETTY[d] for d in CROSS_DATASETS]
+        full_lines.append("| " + " | ".join(header) + " |")
+        full_lines.append("|" + "|".join("---" for _ in header) + "|")
+        for src in CROSS_DATASETS:
+            row = [f"**{PRETTY[src]}**"]
+            for tgt in CROSS_DATASETS:
+                cell = full[label][(src, tgt)]
+                txt = _fmt_cell(cell)
+                if src == tgt:
+                    txt = f"_{txt}_"
+                row.append(txt)
+            full_lines.append("| " + " | ".join(row) + " |")
+        full_lines.append("")
+    full_path = args.out_dir / "ABLATION_TABLE_CROSS_full.md"
+    full_path.write_text("\n".join(full_lines))
+    print(f"[wrote] {full_path}")
+
+    json_path = args.out_dir / "ABLATION_TABLE_CROSS.json"
+    json_path.write_text(json.dumps({
+        "summary": summary_data,
+        "full": {label: {f"{src}->{tgt}": cell for (src, tgt), cell in rows.items()}
+                 for label, rows in full.items()},
+    }, indent=2, default=lambda o: None))
+    print(f"[wrote] {json_path}")
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/aggregate/aggregate_ablation_cross_B.py
+++ b/scripts/aggregate/aggregate_ablation_cross_B.py
@@ -0,0 +1,180 @@
+"""B-variant cross-dataset aggregation.
+
+Reads:
+  artifacts/ablation/janus_<ds>_seed<S>_<gid>/phase1_scores.npz   (within-dataset)
+  artifacts/ablation/cross/<gid>__seed<S>_<src>_to_<tgt>.npz      (cross-dataset)
+
+For each B-variant we apply the variant-appropriate aggregator (auto-key OAS
+fits whatever sub-scores the variant produces). JANUS-full anchor is read from
+the production route_comparison/ paths.
+
+Outputs:
+  ABLATION_CROSS_B_summary.md   within mean / cross mean / cross worst per gid
+  ABLATION_CROSS_B_full.md      per-gid 3×3 matrices
+"""
+from __future__ import annotations
+import argparse
+import json
+from pathlib import Path
+import numpy as np
+
+from aggregate_ablation import (
+    SCORE_FNS, T_975_N3, _auroc, _load_npz, _load_cross_npz,
+)
+
+ROOT = Path(__file__).resolve().parents[2]
+ROUTE = ROOT / "artifacts" / "route_comparison"
+ROUTE_CROSS = ROUTE / "cross"
+ABL = ROOT / "artifacts" / "ablation"
+ABL_CROSS = ABL / "cross"
+
+CROSS_DATASETS = ["cicids2017", "cicddos2019", "ciciot2023"]
+PRETTY = {
+    "cicids2017": "CICIDS17",
+    "cicddos2019": "CICDDoS19",
+    "ciciot2023": "CICIoT23",
+}
+SEEDS = [42, 43, 44]
+
+# (gid, label, what_removed, score_fn_id)
+B_VARIANTS = [
+    ("janus_full",  "JANUS-full",      "—",                       "OAS_all_available"),
+    ("b1_noflow",   "B1 no FLOW token","global context",          "OAS_all_available"),
+    ("b2_flowonly", "B2 flow-only",    "packet sequence",         "A1_terminal_norm"),
+    ("b3_allcont",  "B3 all-cont",     "cont/disc split",         "OAS_term_all"),
+    ("b4_alldisc", "B4 all-disc",      "cont/disc split (rev)",   "OAS_disc_all"),
+    ("b5_nodisc",   "B5 λ_disc=0",     "joint training",          "OAS_term_all"),
+]
+
+
+def _within_path(gid: str, ds: str, seed: int) -> Path:
+    if gid == "janus_full":
+        return ROUTE / f"janus_{ds}_seed{seed}" / "phase1_scores.npz"
+    return ABL / f"janus_{ds}_seed{seed}_{gid}" / "phase1_scores.npz"
+
+
+def _cross_path(gid: str, src: str, tgt: str, seed: int) -> Path:
+    if gid == "janus_full":
+        return ROUTE_CROSS / f"janus_seed{seed}_{src}_to_{tgt}.npz"
+    return ABL_CROSS / f"{gid}__seed{seed}_{src}_to_{tgt}.npz"
+
+
+def _cell_score(gid: str, src: str, tgt: str, seed: int, fn_id: str):
+    if src == tgt:
+        p = _within_path(gid, src, seed)
+        if not p.exists():
+            return None
+        val, atk = _load_npz(p)
+    else:
+        p = _cross_path(gid, src, tgt, seed)
+        if not p.exists():
+            return None
+        val, atk = _load_cross_npz(p)
+    fn = SCORE_FNS[fn_id]
+    res = fn(val, atk)
+    if res is None:
+        return None
+    sv, sa = res
+    return _auroc(sv, sa)
+
+
+def _seed_summary(vals: list[float]):
+    a = np.asarray([v for v in vals if v is not None and not np.isnan(v)])
+    if a.size == 0:
+        return None
+    if a.size == 1:
+        return {"mean": float(a[0]), "ci": 0.0, "n": 1}
+    se = a.std(ddof=1) / np.sqrt(a.size)
+    return {"mean": float(a.mean()),
+            "ci": float(T_975_N3 * se) if a.size == 3 else float(1.96 * se),
+            "n": int(a.size)}
+
+
+def _fmt(c):
+    if c is None:
+        return "—"
+    if c["n"] == 1:
+        return f"{100 * c['mean']:.2f}"
+    return f"{100 * c['mean']:.2f} ± {100 * c['ci']:.2f}"
+
+
+def main() -> None:
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--out-dir", type=Path, default=ABL)
+    args = ap.parse_args()
+    args.out_dir.mkdir(parents=True, exist_ok=True)
+
+    full = {}
+    for gid, label, _why, fn_id in B_VARIANTS:
+        rows = {}
+        for src in CROSS_DATASETS:
+            for tgt in CROSS_DATASETS:
+                vals = [_cell_score(gid, src, tgt, s, fn_id) for s in SEEDS]
+                rows[(src, tgt)] = _seed_summary(vals)
+        full[gid] = (label, rows)
+        n_ok = sum(1 for v in rows.values() if v is not None)
+        print(f"[ok] {label:20s} cells={n_ok}/{len(rows)}", flush=True)
+
+    # Summary
+    lines = ["# B-variant cross-dataset summary",
+             "",
+             f"3-seed mean ± 95% t-CI AUROC. Datasets = {CROSS_DATASETS}.",
+             "All B variants share the same aggregator-fit-on-target-benign protocol as JANUS-full.",
+             "",
+             "| Variant | What removed | Within (3 cells) | Cross (6 cells) | Cross worst | Within − Cross |",
+             "|---|---|---|---|---|---|"]
+    for gid, label, why, fn_id in B_VARIANTS:
+        _, rows = full[gid]
+        within = [v["mean"] for (s, t), v in rows.items() if s == t and v is not None]
+        cross = [v["mean"] for (s, t), v in rows.items() if s != t and v is not None]
+        cross_pairs = [((s, t), v) for (s, t), v in rows.items() if s != t and v is not None]
+        worst = min(cross_pairs, key=lambda x: x[1]["mean"], default=None)
+        w = float(np.mean(within)) if within else float("nan")
+        c = float(np.mean(cross)) if cross else float("nan")
+        worst_str = "—"
+        if worst is not None:
+            (s, t), v = worst
+            worst_str = f"{PRETTY[s]}→{PRETTY[t]}: {_fmt(v)}"
+        gap = (w - c) * 100 if not np.isnan(w) and not np.isnan(c) else float("nan")
+        lines.append(f"| {label} | {why} | {100 * w:.2f} | {100 * c:.2f} | {worst_str} | {gap:+.2f} |")
+    summary_path = args.out_dir / "ABLATION_CROSS_B_summary.md"
+    summary_path.write_text("\n".join(lines) + "\n")
+    print(f"[wrote] {summary_path}")
+
+    # Full per-variant 3x3 matrices
+    flines = ["# B-variant cross-dataset full matrices",
+              "",
+              "Per variant: 3×3 matrix (rows = source, columns = target). Diagonal italic.",
+              "Each cell = 3-seed mean ± 95% t-CI AUROC (%).",
+              ""]
+    for gid, label, why, fn_id in B_VARIANTS:
+        _, rows = full[gid]
+        flines.append(f"## {label}  ({why})")
+        flines.append("")
+        header = ["Source ↓ / Target →"] + [PRETTY[d] for d in CROSS_DATASETS]
+        flines.append("| " + " | ".join(header) + " |")
+        flines.append("|" + "|".join("---" for _ in header) + "|")
+        for src in CROSS_DATASETS:
+            row = [f"**{PRETTY[src]}**"]
+            for tgt in CROSS_DATASETS:
+                cell = rows[(src, tgt)]
+                txt = _fmt(cell)
+                if src == tgt:
+                    txt = f"_{txt}_"
+                row.append(txt)
+            flines.append("| " + " | ".join(row) + " |")
+        flines.append("")
+    full_path = args.out_dir / "ABLATION_CROSS_B_full.md"
+    full_path.write_text("\n".join(flines))
+    print(f"[wrote] {full_path}")
+
+    json_path = args.out_dir / "ABLATION_CROSS_B.json"
+    json_path.write_text(json.dumps({
+        gid: {"label": label, "rows": {f"{s}->{t}": v for (s, t), v in rows.items()}}
+        for gid, (label, rows) in full.items()
+    }, indent=2, default=lambda o: None))
+    print(f"[wrote] {json_path}")
+
+
+if __name__ == "__main__":
+    main()