ablation: add Group A (aggregator) + Group B (architecture) infrastructure

Extends MixedCFMConfig with 5 backwards-compatible flags (use_flow_token,
n_packet_tokens, disc_as_cont, cont_as_disc + cont_n_bins) so existing
JANUS-full checkpoints load with 0 missing/unexpected keys.

Adds:
- 60 ablation training configs (5 variants × 4 datasets × 3 seeds)
- scripts/ablation/{generate_configs.py, run_groupB.sh, run_cross_groupB.sh,
  smoke_test.sh} — config generation + GPU drivers
- scripts/aggregate/aggregate_ablation{,_cross,_cross_B}.py — produces
  within-dataset and cross-dataset (3×3) ablation tables with 3-seed mean
  ± 95% t-CI plus optional paired DeLong p-values

README updated with ablation section pointing at
artifacts/ablation/ABLATION_SUMMARY.md.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-08 23:59:27 +08:00
parent 1d8862fbeb
commit a6bcbbd299
72 changed files with 3642 additions and 96 deletions

View File

@@ -0,0 +1,36 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicddos2019_seed42_b1_noflow
source_store: /home/chy/JANUS/datasets/cicddos2019/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/cicddos2019/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicddos2019/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 42
data_seed: 42
train_ratio: 0.8
benign_label: normal
val_cap: 20000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 10000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
reference_mode: causal_packets
device: auto
use_flow_token: false

View File

@@ -0,0 +1,36 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicddos2019_seed43_b1_noflow
source_store: /home/chy/JANUS/datasets/cicddos2019/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/cicddos2019/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicddos2019/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 43
data_seed: 43
train_ratio: 0.8
benign_label: normal
val_cap: 20000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 10000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
reference_mode: causal_packets
device: auto
use_flow_token: false

View File

@@ -0,0 +1,36 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicddos2019_seed44_b1_noflow
source_store: /home/chy/JANUS/datasets/cicddos2019/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/cicddos2019/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicddos2019/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 44
data_seed: 44
train_ratio: 0.8
benign_label: normal
val_cap: 20000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 10000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
reference_mode: causal_packets
device: auto
use_flow_token: false

View File

@@ -0,0 +1,34 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicids2017_seed42_b1_noflow
packets_npz: /home/chy/JANUS/datasets/cicids2017/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/cicids2017/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicids2017/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 42
data_seed: 42
train_ratio: 0.8
benign_label: normal
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
reference_mode: causal_packets
device: auto
use_flow_token: false

View File

@@ -0,0 +1,34 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicids2017_seed43_b1_noflow
packets_npz: /home/chy/JANUS/datasets/cicids2017/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/cicids2017/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicids2017/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 43
data_seed: 43
train_ratio: 0.8
benign_label: normal
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
reference_mode: causal_packets
device: auto
use_flow_token: false

View File

@@ -0,0 +1,34 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicids2017_seed44_b1_noflow
packets_npz: /home/chy/JANUS/datasets/cicids2017/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/cicids2017/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicids2017/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 44
data_seed: 44
train_ratio: 0.8
benign_label: normal
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
reference_mode: causal_packets
device: auto
use_flow_token: false

View File

@@ -0,0 +1,36 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_ciciot2023_seed42_b1_noflow
source_store: /home/chy/JANUS/datasets/ciciot2023/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/ciciot2023/processed/full_store/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/ciciot2023/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 42
data_seed: 42
train_ratio: 0.8
benign_label: normal
val_cap: 10000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
device: auto
reference_mode: causal_packets
use_flow_token: false

View File

@@ -0,0 +1,36 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_ciciot2023_seed43_b1_noflow
source_store: /home/chy/JANUS/datasets/ciciot2023/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/ciciot2023/processed/full_store/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/ciciot2023/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 43
data_seed: 43
train_ratio: 0.8
benign_label: normal
val_cap: 10000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
device: auto
reference_mode: causal_packets
use_flow_token: false

View File

@@ -0,0 +1,36 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_ciciot2023_seed44_b1_noflow
source_store: /home/chy/JANUS/datasets/ciciot2023/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/ciciot2023/processed/full_store/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/ciciot2023/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 44
data_seed: 44
train_ratio: 0.8
benign_label: normal
val_cap: 10000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
device: auto
reference_mode: causal_packets
use_flow_token: false

View File

@@ -0,0 +1,34 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_iscxtor2016_seed42_b1_noflow
packets_npz: /home/chy/JANUS/datasets/iscxtor2016/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/iscxtor2016/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/iscxtor2016/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 42
data_seed: 42
train_ratio: 0.8
benign_label: nontor
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
reference_mode: causal_packets
device: auto
use_flow_token: false

View File

@@ -0,0 +1,34 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_iscxtor2016_seed43_b1_noflow
packets_npz: /home/chy/JANUS/datasets/iscxtor2016/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/iscxtor2016/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/iscxtor2016/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 43
data_seed: 43
train_ratio: 0.8
benign_label: nontor
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
reference_mode: causal_packets
device: auto
use_flow_token: false

View File

@@ -0,0 +1,34 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_iscxtor2016_seed44_b1_noflow
packets_npz: /home/chy/JANUS/datasets/iscxtor2016/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/iscxtor2016/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/iscxtor2016/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 44
data_seed: 44
train_ratio: 0.8
benign_label: nontor
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
reference_mode: causal_packets
device: auto
use_flow_token: false

View File

@@ -0,0 +1,36 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicddos2019_seed42_b2_flowonly
source_store: /home/chy/JANUS/datasets/cicddos2019/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/cicddos2019/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicddos2019/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 42
data_seed: 42
train_ratio: 0.8
benign_label: normal
val_cap: 20000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 10000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto
n_packet_tokens: 0

View File

@@ -0,0 +1,36 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicddos2019_seed43_b2_flowonly
source_store: /home/chy/JANUS/datasets/cicddos2019/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/cicddos2019/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicddos2019/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 43
data_seed: 43
train_ratio: 0.8
benign_label: normal
val_cap: 20000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 10000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto
n_packet_tokens: 0

View File

@@ -0,0 +1,36 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicddos2019_seed44_b2_flowonly
source_store: /home/chy/JANUS/datasets/cicddos2019/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/cicddos2019/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicddos2019/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 44
data_seed: 44
train_ratio: 0.8
benign_label: normal
val_cap: 20000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 10000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto
n_packet_tokens: 0

View File

@@ -0,0 +1,34 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicids2017_seed42_b2_flowonly
packets_npz: /home/chy/JANUS/datasets/cicids2017/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/cicids2017/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicids2017/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 42
data_seed: 42
train_ratio: 0.8
benign_label: normal
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto
n_packet_tokens: 0

View File

@@ -0,0 +1,34 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicids2017_seed43_b2_flowonly
packets_npz: /home/chy/JANUS/datasets/cicids2017/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/cicids2017/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicids2017/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 43
data_seed: 43
train_ratio: 0.8
benign_label: normal
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto
n_packet_tokens: 0

View File

@@ -0,0 +1,34 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicids2017_seed44_b2_flowonly
packets_npz: /home/chy/JANUS/datasets/cicids2017/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/cicids2017/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicids2017/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 44
data_seed: 44
train_ratio: 0.8
benign_label: normal
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto
n_packet_tokens: 0

View File

@@ -0,0 +1,36 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_ciciot2023_seed42_b2_flowonly
source_store: /home/chy/JANUS/datasets/ciciot2023/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/ciciot2023/processed/full_store/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/ciciot2023/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 42
data_seed: 42
train_ratio: 0.8
benign_label: normal
val_cap: 10000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
device: auto
reference_mode: causal_packets
n_packet_tokens: 0

View File

@@ -0,0 +1,36 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_ciciot2023_seed43_b2_flowonly
source_store: /home/chy/JANUS/datasets/ciciot2023/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/ciciot2023/processed/full_store/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/ciciot2023/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 43
data_seed: 43
train_ratio: 0.8
benign_label: normal
val_cap: 10000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
device: auto
reference_mode: causal_packets
n_packet_tokens: 0

View File

@@ -0,0 +1,36 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_ciciot2023_seed44_b2_flowonly
source_store: /home/chy/JANUS/datasets/ciciot2023/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/ciciot2023/processed/full_store/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/ciciot2023/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 44
data_seed: 44
train_ratio: 0.8
benign_label: normal
val_cap: 10000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
device: auto
reference_mode: causal_packets
n_packet_tokens: 0

View File

@@ -0,0 +1,34 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_iscxtor2016_seed42_b2_flowonly
packets_npz: /home/chy/JANUS/datasets/iscxtor2016/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/iscxtor2016/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/iscxtor2016/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 42
data_seed: 42
train_ratio: 0.8
benign_label: nontor
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto
n_packet_tokens: 0

View File

@@ -0,0 +1,34 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_iscxtor2016_seed43_b2_flowonly
packets_npz: /home/chy/JANUS/datasets/iscxtor2016/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/iscxtor2016/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/iscxtor2016/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 43
data_seed: 43
train_ratio: 0.8
benign_label: nontor
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto
n_packet_tokens: 0

View File

@@ -0,0 +1,34 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_iscxtor2016_seed44_b2_flowonly
packets_npz: /home/chy/JANUS/datasets/iscxtor2016/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/iscxtor2016/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/iscxtor2016/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 44
data_seed: 44
train_ratio: 0.8
benign_label: nontor
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto
n_packet_tokens: 0

View File

@@ -0,0 +1,36 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicddos2019_seed42_b3_allcont
source_store: /home/chy/JANUS/datasets/cicddos2019/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/cicddos2019/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicddos2019/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 42
data_seed: 42
train_ratio: 0.8
benign_label: normal
val_cap: 20000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 10000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto
disc_as_cont: true

View File

@@ -0,0 +1,36 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicddos2019_seed43_b3_allcont
source_store: /home/chy/JANUS/datasets/cicddos2019/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/cicddos2019/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicddos2019/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 43
data_seed: 43
train_ratio: 0.8
benign_label: normal
val_cap: 20000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 10000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto
disc_as_cont: true

View File

@@ -0,0 +1,36 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicddos2019_seed44_b3_allcont
source_store: /home/chy/JANUS/datasets/cicddos2019/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/cicddos2019/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicddos2019/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 44
data_seed: 44
train_ratio: 0.8
benign_label: normal
val_cap: 20000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 10000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto
disc_as_cont: true

View File

@@ -0,0 +1,34 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicids2017_seed42_b3_allcont
packets_npz: /home/chy/JANUS/datasets/cicids2017/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/cicids2017/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicids2017/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 42
data_seed: 42
train_ratio: 0.8
benign_label: normal
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto
disc_as_cont: true

View File

@@ -0,0 +1,34 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicids2017_seed43_b3_allcont
packets_npz: /home/chy/JANUS/datasets/cicids2017/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/cicids2017/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicids2017/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 43
data_seed: 43
train_ratio: 0.8
benign_label: normal
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto
disc_as_cont: true

View File

@@ -0,0 +1,34 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicids2017_seed44_b3_allcont
packets_npz: /home/chy/JANUS/datasets/cicids2017/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/cicids2017/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicids2017/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 44
data_seed: 44
train_ratio: 0.8
benign_label: normal
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto
disc_as_cont: true

View File

@@ -0,0 +1,36 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_ciciot2023_seed42_b3_allcont
source_store: /home/chy/JANUS/datasets/ciciot2023/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/ciciot2023/processed/full_store/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/ciciot2023/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 42
data_seed: 42
train_ratio: 0.8
benign_label: normal
val_cap: 10000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
device: auto
reference_mode: causal_packets
disc_as_cont: true

View File

@@ -0,0 +1,36 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_ciciot2023_seed43_b3_allcont
source_store: /home/chy/JANUS/datasets/ciciot2023/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/ciciot2023/processed/full_store/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/ciciot2023/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 43
data_seed: 43
train_ratio: 0.8
benign_label: normal
val_cap: 10000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
device: auto
reference_mode: causal_packets
disc_as_cont: true

View File

@@ -0,0 +1,36 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_ciciot2023_seed44_b3_allcont
source_store: /home/chy/JANUS/datasets/ciciot2023/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/ciciot2023/processed/full_store/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/ciciot2023/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 44
data_seed: 44
train_ratio: 0.8
benign_label: normal
val_cap: 10000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
device: auto
reference_mode: causal_packets
disc_as_cont: true

View File

@@ -0,0 +1,34 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_iscxtor2016_seed42_b3_allcont
packets_npz: /home/chy/JANUS/datasets/iscxtor2016/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/iscxtor2016/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/iscxtor2016/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 42
data_seed: 42
train_ratio: 0.8
benign_label: nontor
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto
disc_as_cont: true

View File

@@ -0,0 +1,34 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_iscxtor2016_seed43_b3_allcont
packets_npz: /home/chy/JANUS/datasets/iscxtor2016/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/iscxtor2016/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/iscxtor2016/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 43
data_seed: 43
train_ratio: 0.8
benign_label: nontor
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto
disc_as_cont: true

View File

@@ -0,0 +1,34 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_iscxtor2016_seed44_b3_allcont
packets_npz: /home/chy/JANUS/datasets/iscxtor2016/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/iscxtor2016/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/iscxtor2016/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 44
data_seed: 44
train_ratio: 0.8
benign_label: nontor
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto
disc_as_cont: true

View File

@@ -0,0 +1,37 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicddos2019_seed42_b4_alldisc
source_store: /home/chy/JANUS/datasets/cicddos2019/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/cicddos2019/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicddos2019/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 42
data_seed: 42
train_ratio: 0.8
benign_label: normal
val_cap: 20000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 10000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
reference_mode: causal_packets
device: auto
cont_as_disc: true
n_disc_classes: 8

View File

@@ -0,0 +1,37 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicddos2019_seed43_b4_alldisc
source_store: /home/chy/JANUS/datasets/cicddos2019/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/cicddos2019/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicddos2019/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 43
data_seed: 43
train_ratio: 0.8
benign_label: normal
val_cap: 20000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 10000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
reference_mode: causal_packets
device: auto
cont_as_disc: true
n_disc_classes: 8

View File

@@ -0,0 +1,37 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicddos2019_seed44_b4_alldisc
source_store: /home/chy/JANUS/datasets/cicddos2019/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/cicddos2019/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicddos2019/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 44
data_seed: 44
train_ratio: 0.8
benign_label: normal
val_cap: 20000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 10000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
reference_mode: causal_packets
device: auto
cont_as_disc: true
n_disc_classes: 8

View File

@@ -0,0 +1,35 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicids2017_seed42_b4_alldisc
packets_npz: /home/chy/JANUS/datasets/cicids2017/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/cicids2017/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicids2017/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 42
data_seed: 42
train_ratio: 0.8
benign_label: normal
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
reference_mode: causal_packets
device: auto
cont_as_disc: true
n_disc_classes: 8

View File

@@ -0,0 +1,35 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicids2017_seed43_b4_alldisc
packets_npz: /home/chy/JANUS/datasets/cicids2017/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/cicids2017/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicids2017/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 43
data_seed: 43
train_ratio: 0.8
benign_label: normal
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
reference_mode: causal_packets
device: auto
cont_as_disc: true
n_disc_classes: 8

View File

@@ -0,0 +1,35 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicids2017_seed44_b4_alldisc
packets_npz: /home/chy/JANUS/datasets/cicids2017/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/cicids2017/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicids2017/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 44
data_seed: 44
train_ratio: 0.8
benign_label: normal
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
reference_mode: causal_packets
device: auto
cont_as_disc: true
n_disc_classes: 8

View File

@@ -0,0 +1,37 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_ciciot2023_seed42_b4_alldisc
source_store: /home/chy/JANUS/datasets/ciciot2023/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/ciciot2023/processed/full_store/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/ciciot2023/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 42
data_seed: 42
train_ratio: 0.8
benign_label: normal
val_cap: 10000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
device: auto
reference_mode: causal_packets
cont_as_disc: true
n_disc_classes: 8

View File

@@ -0,0 +1,37 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_ciciot2023_seed43_b4_alldisc
source_store: /home/chy/JANUS/datasets/ciciot2023/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/ciciot2023/processed/full_store/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/ciciot2023/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 43
data_seed: 43
train_ratio: 0.8
benign_label: normal
val_cap: 10000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
device: auto
reference_mode: causal_packets
cont_as_disc: true
n_disc_classes: 8

View File

@@ -0,0 +1,37 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_ciciot2023_seed44_b4_alldisc
source_store: /home/chy/JANUS/datasets/ciciot2023/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/ciciot2023/processed/full_store/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/ciciot2023/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 44
data_seed: 44
train_ratio: 0.8
benign_label: normal
val_cap: 10000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
device: auto
reference_mode: causal_packets
cont_as_disc: true
n_disc_classes: 8

View File

@@ -0,0 +1,35 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_iscxtor2016_seed42_b4_alldisc
packets_npz: /home/chy/JANUS/datasets/iscxtor2016/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/iscxtor2016/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/iscxtor2016/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 42
data_seed: 42
train_ratio: 0.8
benign_label: nontor
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
reference_mode: causal_packets
device: auto
cont_as_disc: true
n_disc_classes: 8

View File

@@ -0,0 +1,35 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_iscxtor2016_seed43_b4_alldisc
packets_npz: /home/chy/JANUS/datasets/iscxtor2016/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/iscxtor2016/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/iscxtor2016/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 43
data_seed: 43
train_ratio: 0.8
benign_label: nontor
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
reference_mode: causal_packets
device: auto
cont_as_disc: true
n_disc_classes: 8

View File

@@ -0,0 +1,35 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_iscxtor2016_seed44_b4_alldisc
packets_npz: /home/chy/JANUS/datasets/iscxtor2016/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/iscxtor2016/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/iscxtor2016/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 44
data_seed: 44
train_ratio: 0.8
benign_label: nontor
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
reference_mode: causal_packets
device: auto
cont_as_disc: true
n_disc_classes: 8

View File

@@ -0,0 +1,35 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicddos2019_seed42_b5_nodisc
source_store: /home/chy/JANUS/datasets/cicddos2019/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/cicddos2019/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicddos2019/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 42
data_seed: 42
train_ratio: 0.8
benign_label: normal
val_cap: 20000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 10000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto

View File

@@ -0,0 +1,35 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicddos2019_seed43_b5_nodisc
source_store: /home/chy/JANUS/datasets/cicddos2019/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/cicddos2019/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicddos2019/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 43
data_seed: 43
train_ratio: 0.8
benign_label: normal
val_cap: 20000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 10000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto

View File

@@ -0,0 +1,35 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicddos2019_seed44_b5_nodisc
source_store: /home/chy/JANUS/datasets/cicddos2019/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/cicddos2019/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicddos2019/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 44
data_seed: 44
train_ratio: 0.8
benign_label: normal
val_cap: 20000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 10000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto

View File

@@ -0,0 +1,33 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicids2017_seed42_b5_nodisc
packets_npz: /home/chy/JANUS/datasets/cicids2017/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/cicids2017/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicids2017/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 42
data_seed: 42
train_ratio: 0.8
benign_label: normal
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto

View File

@@ -0,0 +1,33 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicids2017_seed43_b5_nodisc
packets_npz: /home/chy/JANUS/datasets/cicids2017/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/cicids2017/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicids2017/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 43
data_seed: 43
train_ratio: 0.8
benign_label: normal
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto

View File

@@ -0,0 +1,33 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicids2017_seed44_b5_nodisc
packets_npz: /home/chy/JANUS/datasets/cicids2017/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/cicids2017/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicids2017/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 44
data_seed: 44
train_ratio: 0.8
benign_label: normal
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto

View File

@@ -0,0 +1,35 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_ciciot2023_seed42_b5_nodisc
source_store: /home/chy/JANUS/datasets/ciciot2023/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/ciciot2023/processed/full_store/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/ciciot2023/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 42
data_seed: 42
train_ratio: 0.8
benign_label: normal
val_cap: 10000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
device: auto
reference_mode: causal_packets

View File

@@ -0,0 +1,35 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_ciciot2023_seed43_b5_nodisc
source_store: /home/chy/JANUS/datasets/ciciot2023/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/ciciot2023/processed/full_store/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/ciciot2023/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 43
data_seed: 43
train_ratio: 0.8
benign_label: normal
val_cap: 10000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
device: auto
reference_mode: causal_packets

View File

@@ -0,0 +1,35 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_ciciot2023_seed44_b5_nodisc
source_store: /home/chy/JANUS/datasets/ciciot2023/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/ciciot2023/processed/full_store/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/ciciot2023/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 44
data_seed: 44
train_ratio: 0.8
benign_label: normal
val_cap: 10000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
device: auto
reference_mode: causal_packets

View File

@@ -0,0 +1,33 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_iscxtor2016_seed42_b5_nodisc
packets_npz: /home/chy/JANUS/datasets/iscxtor2016/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/iscxtor2016/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/iscxtor2016/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 42
data_seed: 42
train_ratio: 0.8
benign_label: nontor
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto

View File

@@ -0,0 +1,33 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_iscxtor2016_seed43_b5_nodisc
packets_npz: /home/chy/JANUS/datasets/iscxtor2016/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/iscxtor2016/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/iscxtor2016/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 43
data_seed: 43
train_ratio: 0.8
benign_label: nontor
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto

View File

@@ -0,0 +1,33 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_iscxtor2016_seed44_b5_nodisc
packets_npz: /home/chy/JANUS/datasets/iscxtor2016/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/iscxtor2016/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/iscxtor2016/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 44
data_seed: 44
train_ratio: 0.8
benign_label: nontor
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto

View File

@@ -20,7 +20,7 @@ def _device(arg: str) -> torch.device:
return torch.device('cuda' if torch.cuda.is_available() else 'cpu')
return torch.device(arg)
def _score_batch(model, flow_z, cont_z, disc_int, lens, device, batch_size=256, n_steps=16):
def _score_batch(model, flow_z, cont_z, disc_int, lens, device, batch_size=256, n_steps=16, cont_bin_edges=None):
out: dict[str, list[np.ndarray]] = {}
for start in range(0, len(flow_z), batch_size):
sl = slice(start, start + batch_size)
@@ -29,8 +29,8 @@ def _score_batch(model, flow_z, cont_z, disc_int, lens, device, batch_size=256,
d = torch.from_numpy(disc_int[sl]).long().to(device)
l = torch.from_numpy(lens[sl]).long().to(device)
with torch.no_grad():
traj = model.trajectory_metrics(f, c, d, l, n_steps=n_steps)
nll = model.disc_nll_score(f, c, d, l)
traj = model.trajectory_metrics(f, c, d, l, n_steps=n_steps, cont_bin_edges=cont_bin_edges)
nll = model.disc_nll_score(f, c, d, l, cont_bin_edges=cont_bin_edges)
for src in (traj, nll):
for (k, v) in src.items():
out.setdefault(k, []).append(v.detach().cpu().numpy())
@@ -63,6 +63,10 @@ def main() -> None:
model = MixedTokenCFM(model_cfg).to(device)
model.load_state_dict(ckpt['model_state_dict'])
model.eval()
cont_bin_edges = None
if 'cont_bin_edges' in ckpt:
cont_bin_edges = torch.from_numpy(np.asarray(ckpt['cont_bin_edges'])).to(device)
print(f'[model] cont_bin_edges shape={tuple(cont_bin_edges.shape)} (B4 mode; src edges applied to target)')
cont_mean = np.asarray(ckpt['cont_mean'], dtype=np.float32)
cont_std = np.asarray(ckpt['cont_std'], dtype=np.float32)
flow_mean = np.asarray(ckpt['flow_mean'], dtype=np.float32)
@@ -140,11 +144,11 @@ def main() -> None:
a_flow_z = ((a_flow - flow_mean) / np.maximum(flow_std, 1e-06)).astype(np.float32)
t0 = time.time()
print('[eval] benign...')
b_scores = _score_batch(model, b_flow_z, b_cont, b_disc, b_len, device, batch_size=args.batch_size, n_steps=args.n_steps)
b_scores = _score_batch(model, b_flow_z, b_cont, b_disc, b_len, device, batch_size=args.batch_size, n_steps=args.n_steps, cont_bin_edges=cont_bin_edges)
print(f'[eval] benign done {time.time() - t0:.1f}s')
t0 = time.time()
print('[eval] attack...')
a_scores = _score_batch(model, a_flow_z, a_cont, a_disc, a_len, device, batch_size=args.batch_size, n_steps=args.n_steps)
a_scores = _score_batch(model, a_flow_z, a_cont, a_disc, a_len, device, batch_size=args.batch_size, n_steps=args.n_steps, cont_bin_edges=cont_bin_edges)
print(f'[eval] attack done {time.time() - t0:.1f}s')
keys = sorted(set(b_scores) & set(a_scores))
overall = {}

View File

@@ -18,7 +18,7 @@ def _device(arg: str) -> torch.device:
return torch.device('cuda' if torch.cuda.is_available() else 'cpu')
return torch.device(arg)
def _score_batch(model: MixedTokenCFM, flow_np: np.ndarray, cont_np: np.ndarray, disc_np: np.ndarray, len_np: np.ndarray, device: torch.device, *, batch_size: int, n_steps: int) -> dict[str, np.ndarray]:
def _score_batch(model: MixedTokenCFM, flow_np: np.ndarray, cont_np: np.ndarray, disc_np: np.ndarray, len_np: np.ndarray, device: torch.device, *, batch_size: int, n_steps: int, cont_bin_edges: torch.Tensor | None = None) -> dict[str, np.ndarray]:
out: dict[str, list[np.ndarray]] = {}
for start in range(0, len(flow_np), batch_size):
sl = slice(start, start + batch_size)
@@ -27,8 +27,8 @@ def _score_batch(model: MixedTokenCFM, flow_np: np.ndarray, cont_np: np.ndarray,
disc = torch.from_numpy(disc_np[sl]).long().to(device)
lens = torch.from_numpy(len_np[sl]).long().to(device)
with torch.no_grad():
traj = model.trajectory_metrics(flow, cont, disc, lens, n_steps=n_steps)
nll = model.disc_nll_score(flow, cont, disc, lens)
traj = model.trajectory_metrics(flow, cont, disc, lens, n_steps=n_steps, cont_bin_edges=cont_bin_edges)
nll = model.disc_nll_score(flow, cont, disc, lens, cont_bin_edges=cont_bin_edges)
for d in (traj, nll):
for (k, v) in d.items():
out.setdefault(k, []).append(v.detach().cpu().numpy())
@@ -65,7 +65,11 @@ def main() -> None:
model = MixedTokenCFM(model_cfg).to(device)
model.load_state_dict(ckpt['model_state_dict'])
model.eval()
print(f'[model] T={model_cfg.T} flow_dim={model_cfg.flow_dim}')
cont_bin_edges = None
if 'cont_bin_edges' in ckpt:
cont_bin_edges = torch.from_numpy(np.asarray(ckpt['cont_bin_edges'])).to(device)
print(f'[model] cont_bin_edges shape={tuple(cont_bin_edges.shape)} (B4 mode)')
print(f'[model] T={model_cfg.T} flow_dim={model_cfg.flow_dim} use_flow_token={model_cfg.use_flow_token} n_packet_tokens={model_cfg.n_packet_tokens} disc_as_cont={model_cfg.disc_as_cont} cont_as_disc={model_cfg.cont_as_disc}')
data = load_mixed_data(packets_npz=Path(cfg['packets_npz']) if cfg.get('packets_npz') else None, source_store=Path(cfg['source_store']) if cfg.get('source_store') else None, flows_parquet=Path(cfg['flows_parquet']), flow_features_path=Path(cfg['flow_features_path']), flow_features_align=str(cfg.get('flow_features_align', 'auto')), T=int(cfg['T']), split_seed=int(cfg.get('data_seed', cfg.get('seed', 42))), train_ratio=float(cfg.get('train_ratio', 0.8)), benign_label=str(cfg.get('benign_label', 'normal')), min_len=int(cfg.get('min_len', 2)), attack_cap=int(cfg['attack_cap']) if cfg.get('attack_cap') else None, val_cap=int(cfg['val_cap']) if cfg.get('val_cap') else None)
print(f'[data] val={len(data.val_flow):,} attack={len(data.attack_flow):,}')
rng = np.random.default_rng(0)
@@ -81,10 +85,10 @@ def main() -> None:
atk_labels = atk_labels[idx]
print(f'[eval] scoring val={len(val_flow):,} atk={len(atk_flow):,}')
t0 = time.time()
val = _score_batch(model, val_flow, val_cont, val_disc, val_len, device, batch_size=args.batch_size, n_steps=args.n_steps)
val = _score_batch(model, val_flow, val_cont, val_disc, val_len, device, batch_size=args.batch_size, n_steps=args.n_steps, cont_bin_edges=cont_bin_edges)
print(f'[eval] val done {time.time() - t0:.1f}s')
t0 = time.time()
atk = _score_batch(model, atk_flow, atk_cont, atk_disc, atk_len, device, batch_size=args.batch_size, n_steps=args.n_steps)
atk = _score_batch(model, atk_flow, atk_cont, atk_disc, atk_len, device, batch_size=args.batch_size, n_steps=args.n_steps, cont_bin_edges=cont_bin_edges)
print(f'[eval] atk done {time.time() - t0:.1f}s')
keys = sorted(set(val) & set(atk))
overall: dict[str, dict[str, float]] = {}

View File

@@ -1,6 +1,6 @@
from __future__ import annotations
import math
from dataclasses import dataclass, field
from dataclasses import dataclass
import torch
import torch.nn as nn
import torch.nn.functional as F
@@ -19,6 +19,7 @@ AdaLNBlock = _unified.AdaLNBlock
SinusoidalTimeEmb = _unified.SinusoidalTimeEmb
_sinkhorn_coupling = _unified._sinkhorn_coupling
@dataclass
class MixedCFMConfig:
T: int = 64
@@ -40,6 +41,11 @@ class MixedCFMConfig:
lambda_disc: float = 1.0
disc_path: str = 'uniform'
disc_embed_scale: float = 1.0
# ---- B-group ablation flags (defaults preserve JANUS-full behavior) ----
use_flow_token: bool = True # B1: False removes the [FLOW] token
n_packet_tokens: int = -1 # B2: 0 removes packet tokens entirely; -1 = use cfg.T
disc_as_cont: bool = False # B3: feed 6 disc bits through CFM head as continuous values
cont_as_disc: bool = False # B4: quantize 3 cont channels into n_disc_classes bins (mask-pred only)
def __post_init__(self) -> None:
if len(self.cont_pkt_idx) != self.n_cont_pkt:
@@ -48,10 +54,13 @@ class MixedCFMConfig:
raise ValueError('disc_pkt_idx length mismatch n_disc_pkt')
if self.disc_path != 'uniform':
raise NotImplementedError(f'disc_path={self.disc_path}')
if self.disc_as_cont and self.cont_as_disc:
raise ValueError('disc_as_cont and cont_as_disc are mutually exclusive')
class MixedVelocity(nn.Module):
def __init__(self, token_dim: int, seq_len: int, n_disc: int, n_classes: int, d_model: int=128, n_layers: int=4, n_heads: int=4, mlp_ratio: float=4.0, time_dim: int=64, reference_mode: str | None=None) -> None:
def __init__(self, token_dim: int, seq_len: int, n_disc: int, n_classes: int, d_model: int=128, n_layers: int=4, n_heads: int=4, mlp_ratio: float=4.0, time_dim: int=64, reference_mode: str | None=None, has_flow_token: bool=True) -> None:
super().__init__()
if reference_mode not in (None, 'causal_packets', 'causal_all'):
raise ValueError(f'reference_mode={reference_mode!r}')
@@ -60,6 +69,7 @@ class MixedVelocity(nn.Module):
self.n_disc = n_disc
self.n_classes = n_classes
self.reference_mode = reference_mode
self.has_flow_token = has_flow_token
self.input_proj = nn.Linear(token_dim, d_model)
self.pos_emb = nn.Parameter(torch.zeros(1, seq_len, d_model))
self.type_emb = nn.Embedding(2, d_model)
@@ -70,12 +80,15 @@ class MixedVelocity(nn.Module):
self.blocks = nn.ModuleList([AdaLNBlock(d_model, n_heads, mlp_ratio, cond_dim=d_model) for _ in range(n_layers)])
self.out_norm = nn.LayerNorm(d_model, elementwise_affine=False)
self.head_v = nn.Linear(d_model, token_dim)
self.head_disc = nn.Linear(d_model, n_disc * n_classes)
# head_disc only meaningful when n_disc > 0
out_disc = max(n_disc * n_classes, 1)
self.head_disc = nn.Linear(d_model, out_disc)
for layer in (self.head_v, self.head_disc):
nn.init.zeros_(layer.weight)
nn.init.zeros_(layer.bias)
type_ids = torch.ones(seq_len, dtype=torch.long)
type_ids[0] = 0
if has_flow_token and seq_len >= 1:
type_ids[0] = 0
self.register_buffer('type_ids', type_ids, persistent=False)
def _attn_mask(self, L: int, device: torch.device) -> torch.Tensor | None:
@@ -83,8 +96,11 @@ class MixedVelocity(nn.Module):
return None
if self.reference_mode == 'causal_packets':
mask = torch.zeros((L, L), dtype=torch.bool, device=device)
if L > 1:
mask[1:, 1:] = torch.triu(torch.ones(L - 1, L - 1, dtype=torch.bool, device=device), diagonal=1)
offset = 1 if self.has_flow_token else 0
if L > offset:
M = L - offset
if M > 1:
mask[offset:, offset:] = torch.triu(torch.ones(M, M, dtype=torch.bool, device=device), diagonal=1)
return mask
return torch.triu(torch.ones(L, L, dtype=torch.bool, device=device), diagonal=1)
@@ -100,143 +116,339 @@ class MixedVelocity(nn.Module):
h = block(h, cond, key_padding_mask, attn_mask=attn_mask)
h = self.out_norm(h)
v = self.head_v(h)
d = self.head_disc(h).view(B, L, self.n_disc, self.n_classes)
if self.n_disc > 0:
d = self.head_disc(h).view(B, L, self.n_disc, self.n_classes)
else:
d = h.new_zeros((B, L, 0, self.n_classes))
return (v, d)
class MixedTokenCFM(nn.Module):
def __init__(self, cfg: MixedCFMConfig) -> None:
super().__init__()
self.cfg = cfg
cont_size = cfg.n_cont_pkt + cfg.n_disc_pkt
# Effective packet count (B2: n_packet_tokens=0 → no packets)
self.eff_T = cfg.T if cfg.n_packet_tokens < 0 else int(cfg.n_packet_tokens)
if not cfg.use_flow_token and self.eff_T == 0:
raise ValueError('cannot disable both FLOW token and packet tokens')
# Effective per-packet feature split
if cfg.disc_as_cont:
# B3: 9 cont, 0 disc (CFM head only)
self.eff_n_cont = cfg.n_cont_pkt + cfg.n_disc_pkt
self.eff_n_disc = 0
elif cfg.cont_as_disc:
# B4: 0 cont, 9 disc (mask-pred head only)
self.eff_n_cont = 0
self.eff_n_disc = cfg.n_cont_pkt + cfg.n_disc_pkt
else:
self.eff_n_cont = cfg.n_cont_pkt
self.eff_n_disc = cfg.n_disc_pkt
cont_size = self.eff_n_cont + self.eff_n_disc
# Token layout: [type_flag(1) | flow_dim or cont_size]
self.token_dim = cfg.token_dim or 1 + max(cfg.flow_dim, cont_size)
if self.token_dim < 1 + max(cfg.flow_dim, cont_size):
raise ValueError('token_dim too small')
self.seq_len = cfg.T + 1
self.velocity = MixedVelocity(token_dim=self.token_dim, seq_len=self.seq_len, n_disc=cfg.n_disc_pkt, n_classes=cfg.n_disc_classes, d_model=cfg.d_model, n_layers=cfg.n_layers, n_heads=cfg.n_heads, mlp_ratio=cfg.mlp_ratio, time_dim=cfg.time_dim, reference_mode=cfg.reference_mode)
self.seq_len = (1 if cfg.use_flow_token else 0) + self.eff_T
self.velocity = MixedVelocity(
token_dim=self.token_dim, seq_len=self.seq_len,
n_disc=self.eff_n_disc, n_classes=cfg.n_disc_classes,
d_model=cfg.d_model, n_layers=cfg.n_layers, n_heads=cfg.n_heads,
mlp_ratio=cfg.mlp_ratio, time_dim=cfg.time_dim,
reference_mode=cfg.reference_mode, has_flow_token=cfg.use_flow_token,
)
# ------------------------------------------------------------------ #
# token assembly #
# ------------------------------------------------------------------ #
def _embed_disc(self, x_disc_int: torch.Tensor) -> torch.Tensor:
n = self.cfg.n_disc_classes
s = self.cfg.disc_embed_scale
return (x_disc_int.float() - 0.5) * s
if n <= 1:
return x_disc_int.float() * 0.0
# Map integers in [0, n-1] to centered floats in [-s/2, +s/2].
# Backwards-compatible with old (x - 0.5)*s formula when n=2.
return (x_disc_int.float() / (n - 1) - 0.5) * s
def _flow_dim(self) -> int:
return self.cfg.flow_dim
def build_tokens(self, flow: torch.Tensor, packets_cont: torch.Tensor, x_disc_t_int: torch.Tensor) -> torch.Tensor:
(B, T, Cp) = packets_cont.shape
assert T == self.cfg.T and Cp == self.cfg.n_cont_pkt
z = packets_cont.new_zeros((B, T + 1, self.token_dim))
z[:, 0, 0] = -1.0
z[:, 0, 1:1 + self.cfg.flow_dim] = flow
z[:, 1:, 0] = 1.0
z[:, 1:, 1:1 + self.cfg.n_cont_pkt] = packets_cont
z[:, 1:, 1 + self.cfg.n_cont_pkt:1 + self.cfg.n_cont_pkt + self.cfg.n_disc_pkt] = self._embed_disc(x_disc_t_int)
"""Assemble [B, seq_len, token_dim].
packets_cont: [B, eff_T, eff_n_cont] (may be empty in last dim)
x_disc_t_int: [B, eff_T, eff_n_disc] integer ids in [0, n_disc_classes-1]
"""
B = flow.shape[0]
device = flow.device
T = self.eff_T
z = flow.new_zeros((B, self.seq_len, self.token_dim))
cur = 0
if self.cfg.use_flow_token:
z[:, 0, 0] = -1.0 # type flag
z[:, 0, 1:1 + self._flow_dim()] = flow
cur = 1
if T > 0:
z[:, cur:cur + T, 0] = 1.0 # type flag
base = 1
if self.eff_n_cont > 0:
z[:, cur:cur + T, base:base + self.eff_n_cont] = packets_cont
base += self.eff_n_cont
if self.eff_n_disc > 0:
z[:, cur:cur + T, base:base + self.eff_n_disc] = self._embed_disc(x_disc_t_int)
return z
def key_padding_mask(self, lens: torch.Tensor) -> torch.Tensor:
B = lens.shape[0]
idx = torch.arange(self.cfg.T, device=lens.device)[None, :]
packet_real = idx < lens[:, None]
real = torch.cat([torch.ones(B, 1, dtype=torch.bool, device=lens.device), packet_real], dim=1)
device = lens.device
T = self.eff_T
pieces = []
if self.cfg.use_flow_token:
pieces.append(torch.ones(B, 1, dtype=torch.bool, device=device))
if T > 0:
idx = torch.arange(T, device=device)[None, :]
pieces.append(idx < lens[:, None])
real = torch.cat(pieces, dim=1) if pieces else torch.ones(B, 0, dtype=torch.bool, device=device)
return ~real
def _loss_mask(self, lens: torch.Tensor) -> torch.Tensor:
return (~self.key_padding_mask(lens)).float()
def compute_loss(self, flow: torch.Tensor, packets_cont: torch.Tensor, packets_disc: torch.Tensor, lens: torch.Tensor, *, return_components: bool=False) -> torch.Tensor | dict[str, torch.Tensor]:
(B, T, _) = packets_cont.shape
device = packets_cont.device
# ------------------------------------------------------------------ #
# B4 helper: quantize cont -> integer bins #
# ------------------------------------------------------------------ #
def quantize_cont(self, packets_cont: torch.Tensor, bin_edges: torch.Tensor) -> torch.Tensor:
"""packets_cont [B, T, n_cont_orig] (already z-scored); bin_edges [n_cont_orig, n_classes-1]
returns int64 [B, T, n_cont_orig] in [0, n_classes-1]."""
B, T, C = packets_cont.shape
out = torch.zeros((B, T, C), dtype=torch.long, device=packets_cont.device)
for c in range(C):
edges = bin_edges[c] # [n_classes-1]
# bucketize: returns 0..n for n edges
out[:, :, c] = torch.bucketize(packets_cont[:, :, c].contiguous(), edges)
out.clamp_(0, self.cfg.n_disc_classes - 1)
return out
# ------------------------------------------------------------------ #
# Loss #
# ------------------------------------------------------------------ #
def compute_loss(self, flow: torch.Tensor, packets_cont: torch.Tensor, packets_disc: torch.Tensor, lens: torch.Tensor, *, return_components: bool=False, cont_bin_edges: torch.Tensor | None=None) -> torch.Tensor | dict[str, torch.Tensor]:
cfg = self.cfg
B = flow.shape[0]
T = self.eff_T
device = flow.device
# Resolve effective cont/disc tensors per ablation mode
if cfg.disc_as_cont:
# 9 cont = original 3 cont + 6 disc-as-float
disc_as_cont_float = self._embed_disc(packets_disc) if T > 0 else None
if T > 0:
eff_cont = torch.cat([packets_cont, disc_as_cont_float], dim=-1) if cfg.n_cont_pkt > 0 else disc_as_cont_float
else:
eff_cont = packets_cont.new_zeros((B, 0, 0))
eff_disc_int = torch.zeros((B, T, 0), dtype=torch.long, device=device)
elif cfg.cont_as_disc:
# 0 cont, 9 disc: quantize cont via supplied bin_edges
if T > 0:
if cont_bin_edges is None:
raise ValueError('cont_as_disc requires cont_bin_edges')
cont_int = self.quantize_cont(packets_cont, cont_bin_edges)
eff_disc_int = torch.cat([cont_int, packets_disc.long()], dim=-1)
else:
eff_disc_int = torch.zeros((B, 0, self.eff_n_disc), dtype=torch.long, device=device)
eff_cont = flow.new_zeros((B, T, 0))
else:
eff_cont = packets_cont if T > 0 else packets_cont.new_zeros((B, 0, cfg.n_cont_pkt))
eff_disc_int = packets_disc.long() if T > 0 else torch.zeros((B, 0, cfg.n_disc_pkt), dtype=torch.long, device=device)
# Build x_1 (data tokens; mask-pred path uses zero ids for disc at packet positions during CFM regression)
zero_disc = torch.zeros_like(eff_disc_int)
x_1_cont = self.build_tokens(flow, eff_cont, zero_disc)
mask = self._loss_mask(lens)
kpm = mask == 0
x_1_cont = self.build_tokens(flow, packets_cont, torch.zeros_like(packets_disc))
x_0_cont = torch.randn_like(x_1_cont)
if self.cfg.use_ot:
if cfg.use_ot:
flat0 = (x_0_cont * mask[:, :, None]).reshape(B, -1)
flat1 = (x_1_cont * mask[:, :, None]).reshape(B, -1)
col = _sinkhorn_coupling(torch.cdist(flat0.float(), flat1.float()))
x_1_cont = x_1_cont[col]
packets_cont = packets_cont[col]
eff_cont = eff_cont[col] if eff_cont.numel() > 0 else eff_cont
eff_disc_int = eff_disc_int[col] if eff_disc_int.numel() > 0 else eff_disc_int
packets_disc = packets_disc[col]
flow = flow[col]
lens = lens[col]
mask = self._loss_mask(lens)
kpm = mask == 0
t = torch.rand(B, device=device)
x_t_cont = (1.0 - t[:, None, None]) * x_0_cont + t[:, None, None] * x_1_cont
if self.cfg.sigma > 0:
std = self.cfg.sigma * torch.sqrt(t * (1.0 - t))[:, None, None]
if cfg.sigma > 0:
std = cfg.sigma * torch.sqrt(t * (1.0 - t))[:, None, None]
x_t_cont = x_t_cont + std * torch.randn_like(x_t_cont)
target_cont = x_1_cont - x_0_cont
u = torch.rand(B, T, self.cfg.n_disc_pkt, device=device)
keep = u < t[:, None, None]
rand_disc = torch.randint(0, self.cfg.n_disc_classes, packets_disc.shape, device=device)
x_disc_t = torch.where(keep, packets_disc, rand_disc)
disc_start = 1 + self.cfg.n_cont_pkt
x_t_full = x_t_cont.clone()
x_t_full[:, 1:, disc_start:disc_start + self.cfg.n_disc_pkt] = self._embed_disc(x_disc_t)
# Disc corruption schedule (mask-pred): keep fraction t of true labels
if T > 0 and self.eff_n_disc > 0:
u = torch.rand(B, T, self.eff_n_disc, device=device)
keep = u < t[:, None, None]
rand_disc = torch.randint(0, cfg.n_disc_classes, eff_disc_int.shape, device=device)
x_disc_t = torch.where(keep, eff_disc_int, rand_disc)
disc_start = (1 if cfg.use_flow_token else 0) + 0 # placeholder; overwritten below
# Where in x_t_full do disc embeds go?
# Within each packet token: [type(1) | cont(eff_n_cont) | disc(eff_n_disc) | pad...]
disc_start_in_token = 1 + self.eff_n_cont
cur_offset = 1 if cfg.use_flow_token else 0
x_t_full = x_t_cont.clone()
x_t_full[:, cur_offset:cur_offset + T, disc_start_in_token:disc_start_in_token + self.eff_n_disc] = self._embed_disc(x_disc_t)
else:
x_t_full = x_t_cont
x_disc_t = eff_disc_int # unused
keep = None
(v_pred, d_logits) = self.velocity(x_t_full, t, key_padding_mask=kpm)
# CFM regression loss on cont slots (mask out disc slots)
v_err = (v_pred - target_cont).square()
v_err[:, :, disc_start:disc_start + self.cfg.n_disc_pkt] = 0.0
if T > 0 and self.eff_n_disc > 0:
disc_start_in_token = 1 + self.eff_n_cont
cur_offset = 1 if cfg.use_flow_token else 0
v_err[:, cur_offset:cur_offset + T, disc_start_in_token:disc_start_in_token + self.eff_n_disc] = 0.0
v_per_token = v_err.mean(dim=-1)
per_sample = (v_per_token * mask).sum(dim=-1) / mask.sum(dim=-1).clamp_min(1.0)
L_cont = per_sample.mean()
pkt_logits = d_logits[:, 1:]
pkt_real = mask[:, 1:].bool()
corrupt = ~keep & pkt_real[:, :, None]
flat_logits = pkt_logits.reshape(-1, self.cfg.n_disc_classes)
flat_targets = packets_disc.reshape(-1).long()
flat_ce = F.cross_entropy(flat_logits, flat_targets, reduction='none')
flat_ce = flat_ce.view(B, T, self.cfg.n_disc_pkt)
flat_ce = flat_ce * corrupt.float()
denom = corrupt.float().sum().clamp_min(1.0)
L_disc = flat_ce.sum() / denom
total = L_cont + self.cfg.lambda_disc * L_disc
# Mask-pred CE on corrupted disc positions
if T > 0 and self.eff_n_disc > 0 and keep is not None:
cur_offset = 1 if cfg.use_flow_token else 0
pkt_logits = d_logits[:, cur_offset:cur_offset + T]
pkt_real = mask[:, cur_offset:cur_offset + T].bool()
corrupt = ~keep & pkt_real[:, :, None]
flat_logits = pkt_logits.reshape(-1, cfg.n_disc_classes)
flat_targets = eff_disc_int.reshape(-1).long()
flat_ce = F.cross_entropy(flat_logits, flat_targets, reduction='none')
flat_ce = flat_ce.view(B, T, self.eff_n_disc)
flat_ce = flat_ce * corrupt.float()
denom = corrupt.float().sum().clamp_min(1.0)
L_disc = flat_ce.sum() / denom
else:
L_disc = L_cont.new_zeros(())
total = L_cont + cfg.lambda_disc * L_disc
if return_components:
return {'total': total, 'main': L_cont.detach(), 'aux_disc': L_disc.detach(), 'aux_flow': L_cont.new_zeros(()), 'aux_packet': L_cont.new_zeros(())}
return {'total': total, 'main': L_cont.detach(), 'aux_disc': L_disc.detach(),
'aux_flow': L_cont.new_zeros(()), 'aux_packet': L_cont.new_zeros(())}
return total
# ------------------------------------------------------------------ #
# Scoring #
# ------------------------------------------------------------------ #
@torch.no_grad()
def trajectory_metrics(self, flow: torch.Tensor, packets_cont: torch.Tensor, packets_disc: torch.Tensor, lens: torch.Tensor, n_steps: int=16) -> dict[str, torch.Tensor]:
z = self.build_tokens(flow, packets_cont, packets_disc)
def trajectory_metrics(self, flow: torch.Tensor, packets_cont: torch.Tensor, packets_disc: torch.Tensor, lens: torch.Tensor, n_steps: int=16, cont_bin_edges: torch.Tensor | None=None) -> dict[str, torch.Tensor]:
cfg = self.cfg
B = flow.shape[0]
T = self.eff_T
# Build effective cont / disc tensors per ablation mode
if cfg.disc_as_cont:
disc_float = self._embed_disc(packets_disc) if T > 0 else None
if T > 0:
eff_cont = torch.cat([packets_cont, disc_float], dim=-1) if cfg.n_cont_pkt > 0 else disc_float
else:
eff_cont = packets_cont.new_zeros((B, 0, 0))
eff_disc_int = torch.zeros((B, T, 0), dtype=torch.long, device=flow.device)
elif cfg.cont_as_disc:
if T > 0:
if cont_bin_edges is None:
raise ValueError('cont_as_disc requires cont_bin_edges at scoring time')
cont_int = self.quantize_cont(packets_cont, cont_bin_edges)
eff_disc_int = torch.cat([cont_int, packets_disc.long()], dim=-1)
else:
eff_disc_int = torch.zeros((B, 0, 0), dtype=torch.long, device=flow.device)
eff_cont = flow.new_zeros((B, T, 0))
else:
eff_cont = packets_cont if T > 0 else packets_cont.new_zeros((B, 0, cfg.n_cont_pkt))
eff_disc_int = packets_disc.long() if T > 0 else torch.zeros((B, 0, cfg.n_disc_pkt), dtype=torch.long, device=flow.device)
z = self.build_tokens(flow, eff_cont, eff_disc_int)
mask = self._loss_mask(lens)
kpm = mask == 0
B = z.shape[0]
dt = 1.0 / n_steps
disc_start = 1 + self.cfg.n_cont_pkt
disc_end = disc_start + self.cfg.n_disc_pkt
disc_embed = z[:, 1:, disc_start:disc_end].clone()
# Disc embed slot bounds (within token vector) for "freeze disc during ODE"
cur_offset = 1 if cfg.use_flow_token else 0
disc_start_in_token = 1 + self.eff_n_cont
disc_end_in_token = disc_start_in_token + self.eff_n_disc
if self.eff_n_disc > 0 and T > 0:
disc_embed = z[:, cur_offset:cur_offset + T, disc_start_in_token:disc_end_in_token].clone()
else:
disc_embed = None
for k in range(n_steps):
t_val = 1.0 - k * dt
t = torch.full((B,), t_val, device=z.device)
(v, _) = self.velocity(z, t, key_padding_mask=kpm)
v[:, :, disc_start:disc_end] = 0.0
if self.eff_n_disc > 0 and T > 0:
v[:, cur_offset:cur_offset + T, disc_start_in_token:disc_end_in_token] = 0.0
z = z - v * dt
z[:, 1:, disc_start:disc_end] = disc_embed
if disc_embed is not None:
z[:, cur_offset:cur_offset + T, disc_start_in_token:disc_end_in_token] = disc_embed
# Compute terminal-norm scores. Zero out the discrete embed slots so they don't pollute.
z_real = z * mask[:, :, None]
z_cont = z_real.clone()
z_cont[:, 1:, disc_start:disc_end] = 0.0
packet_count = mask[:, 1:].sum(dim=-1).clamp_min(1.0)
terminal = z_cont.reshape(B, -1).norm(dim=-1) / (mask.sum(dim=-1) * self.token_dim).clamp_min(1.0).sqrt()
terminal_flow = z_cont[:, 0].norm(dim=-1) / math.sqrt(self.token_dim)
terminal_packet = (z_cont[:, 1:] * mask[:, 1:, None]).reshape(B, -1).norm(dim=-1) / (packet_count * self.token_dim).sqrt()
return {'terminal_norm': terminal, 'terminal_flow': terminal_flow, 'terminal_packet': terminal_packet}
if self.eff_n_disc > 0 and T > 0:
z_cont[:, cur_offset:cur_offset + T, disc_start_in_token:disc_end_in_token] = 0.0
full_norm = z_cont.reshape(B, -1).norm(dim=-1) / (mask.sum(dim=-1) * self.token_dim).clamp_min(1.0).sqrt()
out = {'terminal_norm': full_norm}
if cfg.use_flow_token:
out['terminal_flow'] = z_cont[:, 0].norm(dim=-1) / math.sqrt(self.token_dim)
if T > 0:
packet_count = mask[:, cur_offset:cur_offset + T].sum(dim=-1).clamp_min(1.0)
out['terminal_packet'] = (z_cont[:, cur_offset:cur_offset + T] * mask[:, cur_offset:cur_offset + T, None]).reshape(B, -1).norm(dim=-1) / (packet_count * self.token_dim).sqrt()
return out
@torch.no_grad()
def disc_nll_score(self, flow: torch.Tensor, packets_cont: torch.Tensor, packets_disc: torch.Tensor, lens: torch.Tensor, t_eval: float=0.5) -> dict[str, torch.Tensor]:
(B, T, _) = packets_cont.shape
device = packets_cont.device
def disc_nll_score(self, flow: torch.Tensor, packets_cont: torch.Tensor, packets_disc: torch.Tensor, lens: torch.Tensor, t_eval: float=0.5, cont_bin_edges: torch.Tensor | None=None) -> dict[str, torch.Tensor]:
cfg = self.cfg
B = flow.shape[0]
T = self.eff_T
device = flow.device
if T == 0 or self.eff_n_disc == 0:
return {} # no disc head to score
# Build effective disc int per mode
if cfg.cont_as_disc:
if cont_bin_edges is None:
raise ValueError('cont_as_disc requires cont_bin_edges at scoring time')
cont_int = self.quantize_cont(packets_cont, cont_bin_edges)
eff_disc_int = torch.cat([cont_int, packets_disc.long()], dim=-1)
eff_cont = flow.new_zeros((B, T, 0))
ch_idx_list = list(cfg.cont_pkt_idx) + list(cfg.disc_pkt_idx)
else:
eff_disc_int = packets_disc.long()
eff_cont = packets_cont
ch_idx_list = list(cfg.disc_pkt_idx)
mask = self._loss_mask(lens)
kpm = mask == 0
z = self.build_tokens(flow, packets_cont, packets_disc)
z = self.build_tokens(flow, eff_cont, eff_disc_int)
t = torch.full((B,), float(t_eval), device=device)
(_, d_logits) = self.velocity(z, t, key_padding_mask=kpm)
pkt_logits = d_logits[:, 1:]
flat_logits = pkt_logits.reshape(-1, self.cfg.n_disc_classes)
flat_targets = packets_disc.reshape(-1).long()
cur_offset = 1 if cfg.use_flow_token else 0
pkt_logits = d_logits[:, cur_offset:cur_offset + T]
flat_logits = pkt_logits.reshape(-1, cfg.n_disc_classes)
flat_targets = eff_disc_int.reshape(-1).long()
ce = F.cross_entropy(flat_logits, flat_targets, reduction='none')
ce = ce.view(B, T, self.cfg.n_disc_pkt)
pkt_real = mask[:, 1:].bool().float()
ce = ce.view(B, T, self.eff_n_disc)
pkt_real = mask[:, cur_offset:cur_offset + T].bool().float()
per_sample = (ce.sum(dim=-1) * pkt_real).sum(dim=-1) / pkt_real.sum(dim=-1).clamp_min(1.0)
per_ch = (ce * pkt_real[:, :, None]).sum(dim=1) / pkt_real.sum(dim=1).clamp_min(1.0)[:, None]
out = {'disc_nll_total': per_sample}
for (c, idx) in enumerate(self.cfg.disc_pkt_idx):
for c, idx in enumerate(ch_idx_list):
out[f'disc_nll_ch{idx}'] = per_ch[:, c]
return out

View File

@@ -21,7 +21,7 @@ def _device(arg: str) -> torch.device:
return torch.device('cuda' if torch.cuda.is_available() else 'cpu')
return torch.device(arg)
def _batch_score(model: MixedTokenCFM, flow_np: np.ndarray, cont_np: np.ndarray, disc_np: np.ndarray, len_np: np.ndarray, device: torch.device, *, batch_size: int, n_steps: int) -> dict[str, np.ndarray]:
def _batch_score(model: MixedTokenCFM, flow_np: np.ndarray, cont_np: np.ndarray, disc_np: np.ndarray, len_np: np.ndarray, device: torch.device, *, batch_size: int, n_steps: int, cont_bin_edges: torch.Tensor | None = None) -> dict[str, np.ndarray]:
out: dict[str, list[np.ndarray]] = {}
model.eval()
for start in range(0, len(flow_np), batch_size):
@@ -30,14 +30,14 @@ def _batch_score(model: MixedTokenCFM, flow_np: np.ndarray, cont_np: np.ndarray,
cont = torch.from_numpy(cont_np[sl]).float().to(device)
disc = torch.from_numpy(disc_np[sl]).long().to(device)
lens = torch.from_numpy(len_np[sl]).long().to(device)
m = model.trajectory_metrics(flow, cont, disc, lens, n_steps=n_steps)
d = model.disc_nll_score(flow, cont, disc, lens)
m = model.trajectory_metrics(flow, cont, disc, lens, n_steps=n_steps, cont_bin_edges=cont_bin_edges)
d = model.disc_nll_score(flow, cont, disc, lens, cont_bin_edges=cont_bin_edges)
for src in (m, d):
for (k, v) in src.items():
out.setdefault(k, []).append(v.detach().cpu().numpy())
return {k: np.concatenate(v, axis=0) for (k, v) in out.items()}
def _quick_eval(model: MixedTokenCFM, data: MixedData, device: torch.device, cfg: dict[str, Any]) -> dict[str, float]:
def _quick_eval(model: MixedTokenCFM, data: MixedData, device: torch.device, cfg: dict[str, Any], cont_bin_edges: torch.Tensor | None = None) -> dict[str, float]:
n_eval = int(cfg.get('eval_n', 2000))
rng = np.random.default_rng(0)
@@ -46,8 +46,8 @@ def _quick_eval(model: MixedTokenCFM, data: MixedData, device: torch.device, cfg
return rng.choice(n, m, replace=False)
vi = pick(len(data.val_flow))
ai = pick(len(data.attack_flow))
v = _batch_score(model, data.val_flow[vi], data.val_cont[vi], data.val_disc[vi], data.val_len[vi], device, batch_size=int(cfg.get('eval_batch_size', 512)), n_steps=int(cfg.get('eval_n_steps', 8)))
a = _batch_score(model, data.attack_flow[ai], data.attack_cont[ai], data.attack_disc[ai], data.attack_len[ai], device, batch_size=int(cfg.get('eval_batch_size', 512)), n_steps=int(cfg.get('eval_n_steps', 8)))
v = _batch_score(model, data.val_flow[vi], data.val_cont[vi], data.val_disc[vi], data.val_len[vi], device, batch_size=int(cfg.get('eval_batch_size', 512)), n_steps=int(cfg.get('eval_n_steps', 8)), cont_bin_edges=cont_bin_edges)
a = _batch_score(model, data.attack_flow[ai], data.attack_cont[ai], data.attack_disc[ai], data.attack_len[ai], device, batch_size=int(cfg.get('eval_batch_size', 512)), n_steps=int(cfg.get('eval_n_steps', 8)), cont_bin_edges=cont_bin_edges)
y = np.concatenate([np.zeros(len(vi)), np.ones(len(ai))])
out: dict[str, float] = {}
for k in sorted(v.keys()):
@@ -73,9 +73,36 @@ def train(cfg: dict[str, Any]) -> Path:
ds = TensorDataset(torch.from_numpy(tr_f).float(), torch.from_numpy(tr_c).float(), torch.from_numpy(tr_d).long(), torch.from_numpy(tr_l).long())
loader = DataLoader(ds, batch_size=int(cfg['batch_size']), shuffle=True, drop_last=True, num_workers=int(cfg.get('num_workers', 0)), pin_memory=device.type == 'cuda')
print(f'[data] training on {len(ds):,} flows')
model_cfg = MixedCFMConfig(T=data.T, flow_dim=data.flow_dim, token_dim=cfg.get('token_dim'), d_model=int(cfg['d_model']), n_layers=int(cfg['n_layers']), n_heads=int(cfg['n_heads']), mlp_ratio=float(cfg.get('mlp_ratio', 4.0)), time_dim=int(cfg.get('time_dim', 64)), sigma=float(cfg.get('sigma', 0.1)), use_ot=bool(cfg.get('use_ot', False)), reference_mode=cfg.get('reference_mode'), lambda_disc=float(cfg.get('lambda_disc', 1.0)))
n_disc_classes = int(cfg.get('n_disc_classes', 2))
model_cfg = MixedCFMConfig(
T=data.T, flow_dim=data.flow_dim, token_dim=cfg.get('token_dim'),
d_model=int(cfg['d_model']), n_layers=int(cfg['n_layers']), n_heads=int(cfg['n_heads']),
mlp_ratio=float(cfg.get('mlp_ratio', 4.0)), time_dim=int(cfg.get('time_dim', 64)),
sigma=float(cfg.get('sigma', 0.1)), use_ot=bool(cfg.get('use_ot', False)),
reference_mode=cfg.get('reference_mode'), lambda_disc=float(cfg.get('lambda_disc', 1.0)),
n_disc_classes=n_disc_classes,
# B-group ablation flags
use_flow_token=bool(cfg.get('use_flow_token', True)),
n_packet_tokens=int(cfg.get('n_packet_tokens', -1)),
disc_as_cont=bool(cfg.get('disc_as_cont', False)),
cont_as_disc=bool(cfg.get('cont_as_disc', False)),
)
model = MixedTokenCFM(model_cfg).to(device)
print(f'[model] params={model.param_count():,} token_dim={model.token_dim} sigma={model_cfg.sigma} use_ot={model_cfg.use_ot} lambda_disc={model_cfg.lambda_disc}')
# B4: compute bin edges from benign train cont (z-scored, masked) for cont_as_disc quantization
cont_bin_edges = None
if model_cfg.cont_as_disc:
n_bins = n_disc_classes
n_cont_orig = model_cfg.n_cont_pkt
# gather real cont samples per channel (mask padding)
masks = np.arange(data.train_cont.shape[1])[None, :] < data.train_len[:, None]
edges = np.zeros((n_cont_orig, n_bins - 1), dtype=np.float32)
for c in range(n_cont_orig):
vals = data.train_cont[..., c][masks]
qs = np.linspace(0, 1, n_bins + 1)[1:-1] # interior quantiles
edges[c] = np.quantile(vals, qs).astype(np.float32)
cont_bin_edges = torch.from_numpy(edges).to(device)
print(f'[B4] cont_bin_edges shape={tuple(edges.shape)} (n_bins={n_bins})')
print(f'[model] params={model.param_count():,} token_dim={model.token_dim} sigma={model_cfg.sigma} use_ot={model_cfg.use_ot} lambda_disc={model_cfg.lambda_disc} use_flow_token={model_cfg.use_flow_token} n_packet_tokens={model_cfg.n_packet_tokens} disc_as_cont={model_cfg.disc_as_cont} cont_as_disc={model_cfg.cont_as_disc}')
opt = torch.optim.AdamW(model.parameters(), lr=float(cfg['lr']), weight_decay=float(cfg.get('weight_decay', 0.01)))
total_steps = max(1, int(cfg['epochs']) * len(loader))
sched = torch.optim.lr_scheduler.CosineAnnealingLR(opt, T_max=total_steps)
@@ -91,7 +118,7 @@ def train(cfg: dict[str, Any]) -> Path:
cont = cont.to(device, non_blocking=True)
disc = disc.to(device, non_blocking=True)
lens = lens.to(device, non_blocking=True)
comp = model.compute_loss(flow, cont, disc, lens, return_components=True)
comp = model.compute_loss(flow, cont, disc, lens, return_components=True, cont_bin_edges=cont_bin_edges)
loss = comp['total']
ldisc_sum += float(comp['aux_disc'].item())
opt.zero_grad(set_to_none=True)
@@ -104,7 +131,7 @@ def train(cfg: dict[str, Any]) -> Path:
mean_loss = float(np.mean(losses)) if losses else float('nan')
eval_metrics: dict[str, float] | None = None
if epoch % int(cfg.get('eval_every', 5)) == 0 or epoch == int(cfg['epochs']):
eval_metrics = _quick_eval(model, data, device, cfg)
eval_metrics = _quick_eval(model, data, device, cfg, cont_bin_edges=cont_bin_edges)
history['epoch'].append(epoch)
history['loss'].append(mean_loss)
history['eval'].append(eval_metrics)
@@ -120,6 +147,8 @@ def train(cfg: dict[str, Any]) -> Path:
if not np.isfinite(mean_loss):
raise RuntimeError(f'non-finite loss at epoch {epoch}')
payload = {'model_state_dict': model.state_dict(), 'model_cfg': asdict(model_cfg), 'cont_mean': data.cont_mean, 'cont_std': data.cont_std, 'flow_mean': data.flow_mean, 'flow_std': data.flow_std, 'flow_feature_names': np.asarray(data.flow_feature_names), 'packet_feature_names': np.asarray(data.packet_feature_names)}
if cont_bin_edges is not None:
payload['cont_bin_edges'] = cont_bin_edges.detach().cpu().numpy()
torch.save(payload, save_dir / 'model.pt')
with open(save_dir / 'history.json', 'w') as f:
json.dump(history, f, indent=2, default=str)

View File

@@ -51,6 +51,28 @@ Source (rows) trained on 10K benign of source dataset; target (columns) tested o
Forward CICIDS17→CICDDoS19 (0.969) beats Shafir 0.89 by **+0.08**; reverse CICDDoS19→CICIDS17 (0.941) approximately matches Shafir 0.93. CICIoT23 is hardest both as source and target — its IoT-protocol diversity makes the "benign of source ≈ benign of target" assumption brittle. Full table at `artifacts/route_comparison/CROSS_MATRIX_3x3.md`.
### Ablations (architecture & aggregator)
Two orthogonal ablation axes, each evaluated **within-dataset** (4 datasets × 3 seeds) **and** **cross-dataset** (3×3 transfer × 3 seeds):
- **Group A** — 7 alternative aggregators on the same JANUS-full sub-score vector (post-processing only; no retraining).
- **Group B** — 5 architecture variants, each retrained 4 datasets × 3 seeds = 60 runs + 90 cross-evals.
Every load-bearing JANUS design choice has the **same shape of ablation curve**: small in-distribution cost, large cross-dataset gain.
| Component (removed in ablation) | Variant | Within Δ | Cross-mean Δ | Cross-worst Δ |
|---|---|---:|---:|---:|
| FLOW token (global context) | B1 | **0.94** | 6.70 | 19.97 |
| Packet sequence | B2 | +0.15 | **23.82** | **36.27** |
| Cont/disc head split (drop disc head) | B3 | +0.44 | **13.14** | **25.03** |
| CFM head (drop continuous side) | B4 | **2.37** | 2.03 | 2.86 |
| Joint training of two heads | B5 | +0.20 | **18.93** | **27.54** |
| OAS Mahalanobis aggregator | A1 vs A5 | +0.37 | **15.88** | **27.38** |
Three ablations (B3 / B5 / A-aggregator) **marginally beat JANUS-full at within-dataset evaluation** but collapse on at least one cross-dataset transfer direction. The disc head, joint training, and OAS aggregator are deliberate trades: their value is exclusively in cross-dataset robustness.
Full headline summary: `artifacts/ablation/ABLATION_SUMMARY.md`. Per-variant 3×3 cross matrices: `artifacts/ablation/ABLATION_CROSS_B_full.md` and `artifacts/ablation/ABLATION_TABLE_CROSS_full.md`.
## Layout
```
@@ -74,6 +96,12 @@ scripts/ Workspace-level pcap → artifact pipeline,
orchestration. aggregate_score_router.py is the
deployable score path; run_cross_3x3.sh +
cross_3x3_table.py produce the cross matrix.
aggregate_ablation.py / aggregate_ablation_cross.py /
aggregate_ablation_cross_B.py produce the ablation
tables in artifacts/ablation/.
ablation/ B-group ablation training/eval drivers
(generate_configs.py, run_groupB.sh,
run_cross_groupB.sh).
tests/ Data-contract unit tests.
```
@@ -177,7 +205,8 @@ Common gotcha: if CSV timestamps and pcap epochs are in different time zones, `e
## Authoritative documents
- `RESULTS.md` — full headline tables, ablations, per-attack analysis, JANUS configuration, thresholded operating-point metrics, what the experiments proved / disproved.
- `RESULTS.md` — full headline tables, per-attack analysis, JANUS configuration, thresholded operating-point metrics, what the experiments proved / disproved.
- `artifacts/ablation/ABLATION_SUMMARY.md` — paper-facing ablation summary (Group A aggregator + Group B architecture, both within and cross views).
- `Mixed_CFM/model.py` and `common/data_contract.py` — model + data-contract source of truth.
## Python environment

View File

@@ -0,0 +1,56 @@
"""Generate 60 B-group ablation configs from existing 12 base configs.
Reads:
Mixed_CFM/configs/<ds>_seed<S>.yaml (4 datasets × 3 seeds = 12 base)
Writes:
Mixed_CFM/configs/ablation/<gid>/<ds>_seed<S>.yaml (5 variants × 12 = 60)
Each variant overrides save_dir → artifacts/ablation/janus_<ds>_seed<S>_<gid>/
plus the variant-specific flags. CICIoT2023 base is `ciciot2023_seed42.yaml`
(NOT `ciciot2023_route_c_seed42.yaml`, which is a different score-router config).
"""
from __future__ import annotations
from pathlib import Path
import yaml
ROOT = Path(__file__).resolve().parents[2]
BASE_DIR = ROOT / "Mixed_CFM" / "configs"
OUT_DIR = ROOT / "Mixed_CFM" / "configs" / "ablation"
DATASETS = ["iscxtor2016", "cicids2017", "cicddos2019", "ciciot2023"]
SEEDS = [42, 43, 44]
VARIANTS = {
"b1_noflow": {"use_flow_token": False},
"b2_flowonly": {"n_packet_tokens": 0, "lambda_disc": 0.0},
"b3_allcont": {"disc_as_cont": True, "lambda_disc": 0.0},
"b4_alldisc": {"cont_as_disc": True, "n_disc_classes": 8},
"b5_nodisc": {"lambda_disc": 0.0},
}
def main() -> None:
OUT_DIR.mkdir(parents=True, exist_ok=True)
for gid, overrides in VARIANTS.items():
(OUT_DIR / gid).mkdir(parents=True, exist_ok=True)
n_written = 0
for ds in DATASETS:
for seed in SEEDS:
base_path = BASE_DIR / f"{ds}_seed{seed}.yaml"
if not base_path.exists():
print(f"[miss] {base_path}")
continue
base_cfg = yaml.safe_load(base_path.read_text())
for gid, overrides in VARIANTS.items():
cfg = dict(base_cfg)
cfg["save_dir"] = str(ROOT / "artifacts" / "ablation" / f"janus_{ds}_seed{seed}_{gid}")
cfg.update(overrides)
out = OUT_DIR / gid / f"{ds}_seed{seed}.yaml"
out.write_text(yaml.safe_dump(cfg, sort_keys=False))
n_written += 1
print(f"[wrote] {n_written} config files under {OUT_DIR}")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,66 @@
#!/usr/bin/env bash
# Cross-dataset evaluation for B-group ablation models.
# 5 variants × 6 off-diagonal directions × 3 seeds = 90 cross evals.
#
# Each B-variant model dir is artifacts/ablation/janus_<ds>_seed<S>_<gid>/.
# We only cross within the 3-dataset matrix (cicids2017, cicddos2019, ciciot2023);
# ISCXTor16 has different feature space for cross.
#
# Usage:
# bash scripts/ablation/run_cross_groupB.sh # all 90
# bash scripts/ablation/run_cross_groupB.sh b1_noflow b3_allcont
set -euo pipefail
ROOT=/home/chy/JANUS
EVAL=${ROOT}/Mixed_CFM/eval_cross.py
OUT_DIR=${ROOT}/artifacts/ablation/cross
mkdir -p "${OUT_DIR}"
declare -A STORE FLOWS FEATS
STORE[cicids2017]=${ROOT}/datasets/cicids2017/processed/full_store
FLOWS[cicids2017]=${ROOT}/datasets/cicids2017/processed/flows.parquet
FEATS[cicids2017]=${ROOT}/datasets/cicids2017/processed/flow_features.parquet
STORE[cicddos2019]=${ROOT}/datasets/cicddos2019/processed/full_store
FLOWS[cicddos2019]=${ROOT}/datasets/cicddos2019/processed/flows.parquet
FEATS[cicddos2019]=${ROOT}/datasets/cicddos2019/processed/flow_features.parquet
STORE[ciciot2023]=${ROOT}/datasets/ciciot2023/processed/full_store
FLOWS[ciciot2023]=${ROOT}/datasets/ciciot2023/processed/full_store/flows.parquet
FEATS[ciciot2023]=${ROOT}/datasets/ciciot2023/processed/flow_features.parquet
ALL_GIDS=(b1_noflow b2_flowonly b3_allcont b4_alldisc b5_nodisc)
DATASETS=(cicids2017 cicddos2019 ciciot2023)
SEEDS=(42 43 44)
GPU="${GPU:-0}"
if [[ $# -gt 0 ]]; then
GIDS=("$@")
else
GIDS=("${ALL_GIDS[@]}")
fi
run_one() {
local gid=$1 src=$2 tgt=$3 seed=$4
local md=${ROOT}/artifacts/ablation/janus_${src}_seed${seed}_${gid}
local out=${OUT_DIR}/${gid}__seed${seed}_${src}_to_${tgt}.json
if [[ -f "${out}" ]]; then echo "[skip] $gid ${src}${tgt} seed${seed}"; return; fi
if [[ ! -f "${md}/model.pt" ]]; then echo "[missing model] ${md}/model.pt"; return; fi
echo "[gpu${GPU}] $(date +%H:%M:%S) $gid ${src}${tgt} seed${seed}"
cd ${ROOT}/Mixed_CFM
CUDA_VISIBLE_DEVICES=${GPU} uv run --no-sync python -u ${EVAL} \
--model-dir ${md} \
--target-store ${STORE[$tgt]} --target-flows ${FLOWS[$tgt]} --target-flow-features ${FEATS[$tgt]} \
--benign-label normal --n-benign 10000 --n-attack 1000000 \
--out ${out} --seed ${seed} --T 64 --batch-size 512 --n-steps 16 \
> ${OUT_DIR}/${gid}__seed${seed}_${src}_to_${tgt}.log 2>&1
}
for gid in "${GIDS[@]}"; do
for src in "${DATASETS[@]}"; do
for tgt in "${DATASETS[@]}"; do
[[ "$src" == "$tgt" ]] && continue
for seed in "${SEEDS[@]}"; do
run_one "$gid" "$src" "$tgt" "$seed"
done
done
done
done
echo "[done] cross evals complete"

76
scripts/ablation/run_groupB.sh Executable file
View File

@@ -0,0 +1,76 @@
#!/usr/bin/env bash
# Run all 60 B-group ablation training + phase1-eval runs.
#
# Splits work across two GPUs round-robin (set GPUS env to override).
# Logs per-run go to artifacts/ablation/<save_dir>/{train,phase1}.log.
#
# Usage:
# bash scripts/ablation/run_groupB.sh # all 60 runs
# bash scripts/ablation/run_groupB.sh b1_noflow b5_nodisc # subset of groups
# GPUS=0 bash scripts/ablation/run_groupB.sh # single-GPU serial
set -euo pipefail
cd "$(dirname "$0")/../.."
ALL_GIDS=(b1_noflow b2_flowonly b3_allcont b4_alldisc b5_nodisc)
DATASETS=(iscxtor2016 cicids2017 cicddos2019 ciciot2023)
SEEDS=(42 43 44)
GPUS="${GPUS:-0,1}"
IFS=',' read -ra GPU_ARR <<< "$GPUS"
N_GPU=${#GPU_ARR[@]}
if [[ $# -gt 0 ]]; then
GIDS=("$@")
else
GIDS=("${ALL_GIDS[@]}")
fi
# Build the full run list
runs=()
for gid in "${GIDS[@]}"; do
for ds in "${DATASETS[@]}"; do
for seed in "${SEEDS[@]}"; do
runs+=("${gid}|${ds}|${seed}")
done
done
done
n_runs=${#runs[@]}
echo "[plan] ${n_runs} runs across GPUs ${GPUS} (gids=${GIDS[*]})"
run_one() {
local spec="$1" gpu_id="$2"
IFS='|' read -r gid ds seed <<< "$spec"
local cfg="Mixed_CFM/configs/ablation/${gid}/${ds}_seed${seed}.yaml"
local save_dir
save_dir=$(uv run --no-sync python -c "import yaml,sys; print(yaml.safe_load(open('$cfg'))['save_dir'])")
mkdir -p "$save_dir"
echo "[gpu${gpu_id}] $(date +%H:%M:%S) START $gid $ds seed${seed}"
CUDA_VISIBLE_DEVICES="$gpu_id" uv run --no-sync python Mixed_CFM/train.py \
--config "$cfg" >"$save_dir/train.log" 2>&1
CUDA_VISIBLE_DEVICES="$gpu_id" uv run --no-sync python Mixed_CFM/eval_phase1.py \
--model-dir "$save_dir" --out-dir "$save_dir" \
--batch-size 256 --n-steps 16 \
--n-val-cap 30000 --n-atk-cap 30000 >"$save_dir/phase1.log" 2>&1
echo "[gpu${gpu_id}] $(date +%H:%M:%S) DONE $gid $ds seed${seed}"
}
# Round-robin assignment
pids=()
for i in "${!runs[@]}"; do
spec="${runs[$i]}"
gpu_id="${GPU_ARR[$((i % N_GPU))]}"
# If single GPU: serial; if multi-GPU: parallel up to N_GPU at a time
if [[ $N_GPU -eq 1 ]]; then
run_one "$spec" "$gpu_id"
else
run_one "$spec" "$gpu_id" &
pids+=($!)
# Cap concurrency at N_GPU
if (( (i + 1) % N_GPU == 0 )); then
for pid in "${pids[@]}"; do wait "$pid" || true; done
pids=()
fi
fi
done
for pid in "${pids[@]}"; do wait "$pid" || true; done
echo "[done] all ${n_runs} runs complete"

39
scripts/ablation/smoke_test.sh Executable file
View File

@@ -0,0 +1,39 @@
#!/usr/bin/env bash
# Smoke-test all 5 B-group variants on cicids2017 seed42 with reduced epochs
# and tiny train set, on CPU (so VLLM workers on the GPUs are not disturbed).
#
# After: each ablation/janus_cicids2017_seed42_<gid>/ should contain model.pt
# + phase1_scores.npz with the variant-specific score keys.
set -euo pipefail
cd "$(dirname "$0")/../.."
GIDS=(b1_noflow b2_flowonly b3_allcont b4_alldisc b5_nodisc)
DS=cicids2017
SEED=42
for gid in "${GIDS[@]}"; do
cfg="Mixed_CFM/configs/ablation/${gid}/${DS}_seed${SEED}.yaml"
echo "=================================================="
echo "[smoke] $gid"
echo "=================================================="
uv run --no-sync python Mixed_CFM/train.py \
--config "$cfg" \
--override "device=cpu" "epochs=2" "n_train=500" "eval_n=200" "eval_every=2" \
"save_dir=/home/chy/JANUS/artifacts/ablation_smoke/${gid}" 2>&1 | tail -8
uv run --no-sync python Mixed_CFM/eval_phase1.py \
--model-dir "/home/chy/JANUS/artifacts/ablation_smoke/${gid}" \
--out-dir "/home/chy/JANUS/artifacts/ablation_smoke/${gid}" \
--device cpu --batch-size 64 --n-steps 4 \
--n-val-cap 200 --n-atk-cap 200 2>&1 | tail -4
echo
done
echo "=== Smoke summary ==="
for gid in "${GIDS[@]}"; do
npz="/home/chy/JANUS/artifacts/ablation_smoke/${gid}/phase1_scores.npz"
if [[ -f "$npz" ]]; then
keys=$(uv run --no-sync python -c "import numpy as np; z=np.load('$npz', allow_pickle=True); print(','.join(sorted(k for k in z.files if k.startswith(('val_terminal','val_disc')))))")
echo "$gid: $keys"
else
echo "$gid: MISSING"
fi
done

View File

@@ -0,0 +1,533 @@
"""JANUS ablation aggregator (Groups A + B).
Reads phase1_scores.npz from:
artifacts/route_comparison/janus_<ds>_seed<S>/ (A + JANUS-full anchor)
artifacts/ablation/janus_<ds>_seed<S>_<gid>/ (B variants)
Produces:
artifacts/ablation/ABLATION_TABLE.md final markdown table
artifacts/ablation/ABLATION_TABLE_RAW.json per-cell mean / std / CI / per-seed
artifacts/ablation/ABLATION_DELONG.md paired DeLong p-values vs JANUS-full
Group A operates entirely on existing route_comparison npz files (no GPU).
Group B requires the 60 B-variant runs to have completed.
"""
from __future__ import annotations
import argparse
import json
from dataclasses import dataclass
from pathlib import Path
from typing import Iterable
import numpy as np
from sklearn.covariance import OAS
from sklearn.metrics import roc_auc_score
ROOT = Path(__file__).resolve().parents[2]
ROUTE = ROOT / "artifacts" / "route_comparison"
ABL = ROOT / "artifacts" / "ablation"
DATASETS = ["iscxtor2016", "cicids2017", "cicddos2019", "ciciot2023"]
PRETTY = {
"iscxtor2016": "ISCXTor16",
"cicids2017": "CICIDS17",
"cicddos2019": "CICDDoS19",
"ciciot2023": "CICIoT23",
}
SEEDS = [42, 43, 44]
T_975_N3 = 4.302653 # 95% t-CI factor for n=3 (df=2)
CONT_KEYS = ["terminal_norm", "terminal_flow", "terminal_packet"]
DISC_KEYS = ["disc_nll_total", "disc_nll_ch2", "disc_nll_ch3",
"disc_nll_ch4", "disc_nll_ch5", "disc_nll_ch6", "disc_nll_ch7"]
ALL_KEYS = CONT_KEYS + DISC_KEYS # 10-d
# --------------------------------------------------------------------------- #
# I/O #
# --------------------------------------------------------------------------- #
def _load_npz(npz_path: Path):
z = np.load(npz_path, allow_pickle=True)
val = {}
atk = {}
for k in z.files:
if k.startswith("val_") and k != "val_labels":
val[k[4:]] = z[k]
elif k.startswith("atk_") and k != "atk_labels":
atk[k[4:]] = z[k]
return val, atk
def _load_cross_npz(npz_path: Path):
"""Cross npz schema: b_<key> = target benign, a_<key> = target attacks."""
z = np.load(npz_path, allow_pickle=True)
val = {}
atk = {}
for k in z.files:
if k.startswith("b_") and k != "b_labels":
val[k[2:]] = z[k]
elif k.startswith("a_") and k != "a_labels":
atk[k[2:]] = z[k]
return val, atk
def _stack(d: dict, keys: list[str]) -> np.ndarray:
arrs = []
for k in keys:
if k in d:
arrs.append(d[k])
else:
# variant doesn't produce this score (e.g. B2 has no disc, B5 disc untrained)
return None
out = np.stack(arrs, axis=1).astype(np.float64)
return np.nan_to_num(out, nan=0.0, posinf=1e6, neginf=-1e6)
# --------------------------------------------------------------------------- #
# Score functions (Group A definitions) #
# --------------------------------------------------------------------------- #
def _mahal(S, mu, inv_cov):
d = S - mu
return np.einsum("ni,ij,nj->n", d, inv_cov, d)
def _oas_mahal(val_S, atk_S):
mu = val_S.mean(axis=0)
cov = OAS().fit(val_S).covariance_
inv = np.linalg.inv(cov + 1e-9 * np.eye(cov.shape[0]))
return _mahal(val_S, mu, inv), _mahal(atk_S, mu, inv)
def _zscore_agg(val_S, atk_S, mode="mean"):
mu = val_S.mean(axis=0)
sd = val_S.std(axis=0) + 1e-9
zv = (val_S - mu) / sd
za = (atk_S - mu) / sd
if mode == "mean":
return zv.mean(axis=1), za.mean(axis=1)
if mode == "max":
return zv.max(axis=1), za.max(axis=1)
raise ValueError(mode)
def score_a1_terminal_norm(val, atk):
return val["terminal_norm"], atk["terminal_norm"]
def score_a2_disc_total(val, atk):
if "disc_nll_total" not in val:
return None
return val["disc_nll_total"], atk["disc_nll_total"]
def score_a3_oas_term3(val, atk):
Sv = _stack(val, CONT_KEYS)
Sa = _stack(atk, CONT_KEYS)
if Sv is None or Sa is None:
return None
return _oas_mahal(Sv, Sa)
def score_a4_oas_disc7(val, atk):
Sv = _stack(val, DISC_KEYS)
Sa = _stack(atk, DISC_KEYS)
if Sv is None or Sa is None:
return None
return _oas_mahal(Sv, Sa)
def score_a5_oas_all10(val, atk):
Sv = _stack(val, ALL_KEYS)
Sa = _stack(atk, ALL_KEYS)
if Sv is None or Sa is None:
return None
return _oas_mahal(Sv, Sa)
def score_a6_zmean(val, atk):
Sv = _stack(val, ALL_KEYS)
Sa = _stack(atk, ALL_KEYS)
if Sv is None or Sa is None:
return None
return _zscore_agg(Sv, Sa, "mean")
def score_a7_zmax(val, atk):
Sv = _stack(val, ALL_KEYS)
Sa = _stack(atk, ALL_KEYS)
if Sv is None or Sa is None:
return None
return _zscore_agg(Sv, Sa, "max")
def score_oas_disc_all(val, atk):
"""Auto-discover all `disc_nll_*` keys; OAS-Mahal over them. Used by B4."""
keys = sorted(k for k in val.keys() if k.startswith("disc_nll_"))
if not keys:
return None
Sv = _stack(val, keys)
Sa = _stack(atk, keys)
if Sv is None or Sa is None:
return None
return _oas_mahal(Sv, Sa)
def score_oas_all_available(val, atk):
"""OAS-Mahal over all `terminal_*` `disc_nll_*` keys present in the npz.
Used by B1 (no terminal_flow). Handles arbitrary subset of the 10 standard keys.
"""
keys = sorted([k for k in val.keys() if k.startswith("terminal_") or k.startswith("disc_nll_")])
if not keys:
return None
if len(keys) == 1:
return val[keys[0]], atk[keys[0]]
Sv = _stack(val, keys)
Sa = _stack(atk, keys)
if Sv is None or Sa is None:
return None
return _oas_mahal(Sv, Sa)
def score_oas_term_all(val, atk):
"""Auto-discover all `terminal_*` keys; OAS-Mahal. Used by B3 (3 keys) / B1 (2 keys)."""
keys = sorted(k for k in val.keys() if k.startswith("terminal_"))
if not keys:
return None
if len(keys) == 1:
# single scalar: just return raw
return val[keys[0]], atk[keys[0]]
Sv = _stack(val, keys)
Sa = _stack(atk, keys)
if Sv is None or Sa is None:
return None
return _oas_mahal(Sv, Sa)
SCORE_FNS = {
"A1_terminal_norm": score_a1_terminal_norm,
"A2_disc_nll_total": score_a2_disc_total,
"A3_OAS_term3": score_a3_oas_term3,
"A4_OAS_disc7": score_a4_oas_disc7,
"A5_OAS_all10": score_a5_oas_all10,
"A6_zmean_all10": score_a6_zmean,
"A7_zmax_all10": score_a7_zmax,
"OAS_disc_all": score_oas_disc_all,
"OAS_term_all": score_oas_term_all,
"OAS_all_available": score_oas_all_available,
}
# --------------------------------------------------------------------------- #
# Stats #
# --------------------------------------------------------------------------- #
def _auroc(s_v, s_a):
y = np.r_[np.zeros(len(s_v)), np.ones(len(s_a))]
s = np.r_[s_v, s_a]
return float(roc_auc_score(y, s))
def _mean_ci(values: list[float]):
"""3-seed mean ± 95% t-CI (n=3, df=2)."""
a = np.asarray([v for v in values if v is not None and not np.isnan(v)], dtype=float)
if a.size == 0:
return None
if a.size == 1:
return {"mean": float(a[0]), "std": 0.0, "ci": 0.0, "n": 1, "vals": a.tolist()}
se = a.std(ddof=1) / np.sqrt(a.size)
return {
"mean": float(a.mean()),
"std": float(a.std(ddof=1)),
"ci": float(T_975_N3 * se) if a.size == 3 else float(1.96 * se),
"n": int(a.size),
"vals": a.tolist(),
}
def _delong_var(s_v, s_a):
"""Compute DeLong AUROC variance (Sun & Xu 2014, fast O(n log n))."""
n0, n1 = len(s_v), len(s_a)
s = np.concatenate([s_a, s_v]) # positives first
order = np.argsort(s, kind="mergesort")
L = np.empty_like(s)
s_sorted = s[order]
# midrank
i = 0
while i < len(s_sorted):
j = i
while j < len(s_sorted) and s_sorted[j] == s_sorted[i]:
j += 1
L[order[i:j]] = (i + j - 1) / 2.0 + 1
i = j
# ranks split
L_a = L[:n1]
L_v = L[n1:]
# midrank within each class
s_a_order = np.argsort(s_a, kind="mergesort")
L_aa = np.empty(n1)
sa_sorted = s_a[s_a_order]
i = 0
while i < n1:
j = i
while j < n1 and sa_sorted[j] == sa_sorted[i]:
j += 1
L_aa[s_a_order[i:j]] = (i + j - 1) / 2.0 + 1
i = j
s_v_order = np.argsort(s_v, kind="mergesort")
L_vv = np.empty(n0)
sv_sorted = s_v[s_v_order]
i = 0
while i < n0:
j = i
while j < n0 and sv_sorted[j] == sv_sorted[i]:
j += 1
L_vv[s_v_order[i:j]] = (i + j - 1) / 2.0 + 1
i = j
auc = (L_a.sum() / n1 - (n1 + 1) / 2) / n0
V10 = (L_a - L_aa) / n0 # length n1
V01 = 1 - (L_v - L_vv) / n1 # length n0
s10 = V10.var(ddof=1)
s01 = V01.var(ddof=1)
var = s10 / n1 + s01 / n0
return float(auc), float(var), V10, V01
def _delong_paired_p(s_v, s_a, t_v, t_a):
"""Paired DeLong test for two AUROCs on the same data.
Returns (auc1 - auc2, p_value_two_sided).
s_*: candidate scores; t_*: reference (JANUS-full) scores.
Both arrays must align flow-by-flow.
"""
auc1, var1, V10_1, V01_1 = _delong_var(s_v, s_a)
auc2, var2, V10_2, V01_2 = _delong_var(t_v, t_a)
n1, n0 = len(s_a), len(s_v)
cov10 = np.cov(np.stack([V10_1, V10_2]), ddof=1)[0, 1]
cov01 = np.cov(np.stack([V01_1, V01_2]), ddof=1)[0, 1]
cov12 = cov10 / n1 + cov01 / n0
var_diff = var1 + var2 - 2 * cov12
if var_diff <= 0:
return auc1 - auc2, 1.0
z = (auc1 - auc2) / np.sqrt(var_diff)
# two-sided
from scipy.stats import norm
p = 2 * (1 - norm.cdf(abs(z)))
return auc1 - auc2, float(p)
# --------------------------------------------------------------------------- #
# Aggregation entry points #
# --------------------------------------------------------------------------- #
@dataclass
class VariantSpec:
vid: str
label: str
what_removed: str
npz_dir_pattern: str # e.g. "route_comparison/janus_{ds}_seed{seed}" or "ablation/janus_{ds}_seed{seed}_{gid}"
score_fn_id: str # which Group A score to apply on the npz (usually "A5_OAS_all10")
gid: str = "" # for B variants
def _expand_path(spec: VariantSpec, ds: str, seed: int) -> Path:
return ROOT / "artifacts" / spec.npz_dir_pattern.format(ds=ds, seed=seed, gid=spec.gid) / "phase1_scores.npz"
def collect_variant(spec: VariantSpec) -> dict:
rows: dict[str, list[float]] = {ds: [] for ds in DATASETS}
per_seed: dict[str, dict[int, float]] = {ds: {} for ds in DATASETS}
for ds in DATASETS:
for seed in SEEDS:
npz = _expand_path(spec, ds, seed)
if not npz.exists():
continue
val, atk = _load_npz(npz)
fn = SCORE_FNS[spec.score_fn_id]
res = fn(val, atk)
if res is None:
continue
sv, sa = res
auc = _auroc(sv, sa)
rows[ds].append(auc)
per_seed[ds][seed] = auc
summary = {ds: _mean_ci(rows[ds]) for ds in DATASETS}
return {
"vid": spec.vid,
"label": spec.label,
"what_removed": spec.what_removed,
"score_fn_id": spec.score_fn_id,
"gid": spec.gid,
"per_dataset": summary,
"per_seed": per_seed,
}
def collect_delong_pvals(spec: VariantSpec, ref_spec: VariantSpec) -> dict:
"""Paired DeLong test: spec vs ref_spec, on each (ds, seed)."""
out: dict[str, list[dict]] = {ds: [] for ds in DATASETS}
for ds in DATASETS:
for seed in SEEDS:
npz_s = _expand_path(spec, ds, seed)
npz_r = _expand_path(ref_spec, ds, seed)
if not (npz_s.exists() and npz_r.exists()):
continue
val_s, atk_s = _load_npz(npz_s)
val_r, atk_r = _load_npz(npz_r)
fn_s = SCORE_FNS[spec.score_fn_id]
fn_r = SCORE_FNS[ref_spec.score_fn_id]
res_s = fn_s(val_s, atk_s)
res_r = fn_r(val_r, atk_r)
if res_s is None or res_r is None:
continue
sv_s, sa_s = res_s
sv_r, sa_r = res_r
# if shapes differ (e.g. variant evaluated on subset), align by index — they should match seed-for-seed
# in practice for B variants the npz is from the SAME data as JANUS-full at that (ds, seed)
if len(sv_s) != len(sv_r) or len(sa_s) != len(sa_r):
continue
d, p = _delong_paired_p(sv_s, sa_s, sv_r, sa_r)
out[ds].append({"seed": seed, "delta": d, "p": p})
return out
# --------------------------------------------------------------------------- #
# Variant registry #
# --------------------------------------------------------------------------- #
ROUTE_DIR = "route_comparison/janus_{ds}_seed{seed}"
ABL_DIR = "ablation/janus_{ds}_seed{seed}_{gid}"
def _group_a_specs() -> list[VariantSpec]:
base = ROUTE_DIR
return [
VariantSpec("JANUS-full", "JANUS-full (A5)", "", base, "A5_OAS_all10"),
VariantSpec("A1", "A1 terminal_norm", "OAS aggregator + disc head", base, "A1_terminal_norm"),
VariantSpec("A2", "A2 disc_nll_total", "OAS aggregator + CFM head", base, "A2_disc_nll_total"),
VariantSpec("A3", "A3 OAS-Mahal term3", "disc head", base, "A3_OAS_term3"),
VariantSpec("A4", "A4 OAS-Mahal disc7", "CFM head", base, "A4_OAS_disc7"),
VariantSpec("A6", "A6 z-score mean (10-d)", "covariance structure", base, "A6_zmean_all10"),
VariantSpec("A7", "A7 z-score max (10-d)", "weighted aggregation", base, "A7_zmax_all10"),
]
def _group_b_specs() -> list[VariantSpec]:
return [
# B1 has 2 terminal keys (no terminal_flow) + full disc7 → use auto-key OAS (9-d in this case)
VariantSpec("B1", "B1 no FLOW token", "global context", ABL_DIR, "OAS_all_available", gid="b1_noflow"),
# B2 has only terminal_flow (= terminal_norm); single scalar
VariantSpec("B2", "B2 flow-only", "packet sequence", ABL_DIR, "A1_terminal_norm", gid="b2_flowonly"),
# B3 has terminal_norm/flow/packet covering all 9 dims (cont + disc-as-cont); OAS on 3-tuple
VariantSpec("B3", "B3 all-cont", "cont/disc split", ABL_DIR, "A3_OAS_term3", gid="b3_allcont"),
# B4 has 9 disc channels + total; auto-discover keys
VariantSpec("B4", "B4 all-disc", "cont/disc split (rev)", ABL_DIR, "OAS_disc_all", gid="b4_alldisc"),
# B5 has full schema but disc head is untrained noise; use term3 only
VariantSpec("B5", "B5 λ_disc=0", "joint training", ABL_DIR, "A3_OAS_term3", gid="b5_nodisc"),
]
# --------------------------------------------------------------------------- #
# Markdown writer #
# --------------------------------------------------------------------------- #
def _fmt_cell(c: dict | None) -> str:
if c is None:
return ""
if c["n"] == 1:
return f"{100 * c['mean']:.2f}"
return f"{100 * c['mean']:.2f} ± {100 * c['ci']:.2f}"
def write_table(rows: list[dict], path: Path, *, title: str = "JANUS ablation"):
lines = [f"# {title}", ""]
lines.append(f"3-seed mean ± 95% t-CI AUROC (%). Seeds = {SEEDS}.")
lines.append("")
header = ["Variant", "What removed"] + [PRETTY[ds] for ds in DATASETS] + ["Mean"]
lines.append("| " + " | ".join(header) + " |")
lines.append("|" + "|".join("---" for _ in header) + "|")
for r in rows:
cells = [r["label"], r["what_removed"]]
ds_means = []
for ds in DATASETS:
c = r["per_dataset"].get(ds)
cells.append(_fmt_cell(c))
if c is not None:
ds_means.append(c["mean"])
cells.append(f"{100 * np.mean(ds_means):.2f}" if ds_means else "")
lines.append("| " + " | ".join(cells) + " |")
lines.append("")
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text("\n".join(lines))
def write_delong(records: list[dict], path: Path):
lines = ["# Paired DeLong p-values vs JANUS-full",
"",
f"Seeds = {SEEDS}. p reported per (variant, dataset, seed). "
"Holm-Bonferroni-correctable; raw p shown.",
""]
for rec in records:
lines.append(f"## {rec['label']} ({rec['vid']})")
lines.append("")
header = ["Seed"] + [PRETTY[ds] for ds in DATASETS]
lines.append("| " + " | ".join(header) + " |")
lines.append("|" + "|".join("---" for _ in header) + "|")
for seed in SEEDS:
row = [str(seed)]
for ds in DATASETS:
hits = [x for x in rec["delong"][ds] if x["seed"] == seed]
if hits:
h = hits[0]
sign = "+" if h["delta"] >= 0 else ""
row.append(f"Δ={sign}{abs(h['delta']):.4f}, p={h['p']:.3g}")
else:
row.append("")
lines.append("| " + " | ".join(row) + " |")
lines.append("")
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text("\n".join(lines))
# --------------------------------------------------------------------------- #
# Main #
# --------------------------------------------------------------------------- #
def main() -> None:
ap = argparse.ArgumentParser()
ap.add_argument("--group", choices=["A", "B", "all"], default="A")
ap.add_argument("--delong", action="store_true",
help="Compute paired DeLong p-values vs JANUS-full (CPU heavy on big eval sets).")
args = ap.parse_args()
ABL.mkdir(parents=True, exist_ok=True)
specs: list[VariantSpec] = []
if args.group in ("A", "all"):
specs.extend(_group_a_specs())
if args.group in ("B", "all"):
specs.extend(_group_b_specs())
rows = []
for spec in specs:
r = collect_variant(spec)
rows.append(r)
n_ok = sum(1 for ds in DATASETS if r["per_dataset"][ds] is not None)
print(f"[ok] {spec.vid:14s} datasets_with_data={n_ok}/{len(DATASETS)}", flush=True)
out_md = ABL / f"ABLATION_TABLE_{args.group}.md"
write_table(rows, out_md, title=f"JANUS ablation (group {args.group})")
out_json = ABL / f"ABLATION_TABLE_{args.group}.json"
out_json.write_text(json.dumps(rows, indent=2, default=lambda o: None))
print(f"[wrote] {out_md}")
print(f"[wrote] {out_json}")
if args.delong:
ref = next(s for s in _group_a_specs() if s.vid == "JANUS-full")
recs = []
for spec in specs:
if spec.vid == "JANUS-full":
continue
d = collect_delong_pvals(spec, ref)
recs.append({"vid": spec.vid, "label": spec.label, "delong": d})
print(f"[delong] {spec.vid}", flush=True)
write_delong(recs, ABL / f"ABLATION_DELONG_{args.group}.md")
print(f"[wrote] {ABL / f'ABLATION_DELONG_{args.group}.md'}")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,218 @@
"""Cross-dataset version of the Group-A score-aggregator ablation.
For each (src, tgt, seed) cell we have a phase1-style npz with:
b_<key> target benign val (aggregator fit on this)
a_<key> target attacks
Within-dataset (src == tgt) cells reuse the standard
artifacts/route_comparison/janus_<ds>_seed<S>/phase1_scores.npz
(val_/atk_ prefixes — handled via the same _load_npz path).
We score 7 aggregators (A1..A7) + JANUS-full's deployed A5 across all
3×3 cells × 3 seeds, then summarize with two complementary views:
ABLATION_TABLE_CROSS_summary.md
| Aggregator | Within mean | Cross mean | Cross min (worst cell) |
Shows whether OAS's value lives in cross-dataset robustness.
ABLATION_TABLE_CROSS_full.md
Per-aggregator full 3×3 matrix (each cell = 3-seed mean ± 95% t-CI).
"""
from __future__ import annotations
import argparse
import json
from pathlib import Path
import numpy as np
from aggregate_ablation import (
SCORE_FNS, T_975_N3, _auroc, _load_npz, _load_cross_npz,
)
ROOT = Path(__file__).resolve().parents[2]
ROUTE = ROOT / "artifacts" / "route_comparison"
CROSS = ROUTE / "cross"
ABL = ROOT / "artifacts" / "ablation"
# 3x3 cross matrix datasets (no ISCXTor16 — different feature space)
CROSS_DATASETS = ["cicids2017", "cicddos2019", "ciciot2023"]
PRETTY = {
"cicids2017": "CICIDS17",
"cicddos2019": "CICDDoS19",
"ciciot2023": "CICIoT23",
}
SEEDS = [42, 43, 44]
AGGREGATORS = [
("JANUS-full (A5)", "A5_OAS_all10", "deployed JANUS"),
("A1 terminal_norm","A1_terminal_norm", "raw scalar (CFM head)"),
("A2 disc_total", "A2_disc_nll_total","raw scalar (disc head)"),
("A3 OAS term3", "A3_OAS_term3", "OAS on 3 cont sub-scores"),
("A4 OAS disc7", "A4_OAS_disc7", "OAS on 7 disc sub-scores"),
("A6 z-score mean", "A6_zmean_all10", "equal-weight z-score sum"),
("A7 z-score max", "A7_zmax_all10", "equal-weight z-score max"),
]
# --------------------------------------------------------------------------- #
def _cell_path(src: str, tgt: str, seed: int) -> Path | None:
"""Return npz path for (src, tgt, seed) cell, or None if missing."""
if src == tgt:
p = ROUTE / f"janus_{src}_seed{seed}" / "phase1_scores.npz"
return p if p.exists() else None
p = CROSS / f"janus_seed{seed}_{src}_to_{tgt}.npz"
return p if p.exists() else None
def _load_cell(src: str, tgt: str, seed: int):
p = _cell_path(src, tgt, seed)
if p is None:
return None, None
if src == tgt:
return _load_npz(p)
return _load_cross_npz(p)
def _score_cell(src: str, tgt: str, seed: int, score_fn_id: str) -> float | None:
val, atk = _load_cell(src, tgt, seed)
if val is None:
return None
fn = SCORE_FNS[score_fn_id]
res = fn(val, atk)
if res is None:
return None
sv, sa = res
return _auroc(sv, sa)
def _seed_means(src: str, tgt: str, score_fn_id: str) -> dict | None:
"""3-seed AUROC for cell (src,tgt). Returns dict with mean/std/ci, or None."""
vals = []
for seed in SEEDS:
v = _score_cell(src, tgt, seed, score_fn_id)
if v is not None and not np.isnan(v):
vals.append(v)
if not vals:
return None
a = np.asarray(vals)
if a.size == 1:
return {"mean": float(a[0]), "std": 0.0, "ci": 0.0, "n": 1, "vals": a.tolist()}
se = a.std(ddof=1) / np.sqrt(a.size)
return {
"mean": float(a.mean()),
"std": float(a.std(ddof=1)),
"ci": float(T_975_N3 * se) if a.size == 3 else float(1.96 * se),
"n": int(a.size),
"vals": a.tolist(),
}
# --------------------------------------------------------------------------- #
def _fmt_cell(c):
if c is None:
return ""
if c["n"] == 1:
return f"{100 * c['mean']:.2f}"
return f"{100 * c['mean']:.2f} ± {100 * c['ci']:.2f}"
def _summary_row(rows_3x3: dict[tuple[str, str], dict | None]) -> tuple[float, float, float, dict | None]:
"""Return (within_mean, cross_mean, cross_worst, worst_cell_summary)."""
within = []
cross = []
worst_v = None
worst_cell = None
for (src, tgt), cell in rows_3x3.items():
if cell is None:
continue
if src == tgt:
within.append(cell["mean"])
else:
cross.append(cell["mean"])
if worst_v is None or cell["mean"] < worst_v:
worst_v = cell["mean"]
worst_cell = (src, tgt, cell)
w = float(np.mean(within)) if within else float("nan")
c = float(np.mean(cross)) if cross else float("nan")
cw = worst_v if worst_v is not None else float("nan")
return w, c, cw, worst_cell
# --------------------------------------------------------------------------- #
def main() -> None:
ap = argparse.ArgumentParser()
ap.add_argument("--out-dir", type=Path, default=ABL)
args = ap.parse_args()
args.out_dir.mkdir(parents=True, exist_ok=True)
full = {} # aggregator label -> {(src, tgt) -> cell summary}
for label, fn_id, _why in AGGREGATORS:
rows = {}
for src in CROSS_DATASETS:
for tgt in CROSS_DATASETS:
rows[(src, tgt)] = _seed_means(src, tgt, fn_id)
full[label] = rows
n_ok = sum(1 for v in rows.values() if v is not None)
print(f"[ok] {label:20s} cells={n_ok}/{len(rows)}", flush=True)
# Summary table: within mean, cross mean, cross worst
summary_lines = ["# Cross-dataset Group-A summary",
"",
f"3-seed mean ± 95% t-CI AUROC. Datasets = {CROSS_DATASETS}.",
"Aggregator fit on **target** benign val only.",
"",
"| Aggregator | Within (3 cells, mean) | Cross (6 cells, mean) | Cross worst cell | Within Cross |",
"|---|---|---|---|---|"]
summary_data = {}
for label, fn_id, _why in AGGREGATORS:
rows = full[label]
w, c, cw, worst_cell = _summary_row(rows)
gap = (w - c) * 100 if not np.isnan(w) and not np.isnan(c) else float("nan")
worst_str = ""
if worst_cell is not None:
src, tgt, cell = worst_cell
worst_str = f"{PRETTY[src]}{PRETTY[tgt]}: {_fmt_cell(cell)}"
summary_lines.append(
f"| {label} | {100 * w:.2f} | {100 * c:.2f} | {worst_str} | {gap:+.2f} |"
)
summary_data[label] = {"within_mean": w, "cross_mean": c, "cross_worst": cw, "worst_cell": worst_cell}
summary_path = args.out_dir / "ABLATION_TABLE_CROSS_summary.md"
summary_path.write_text("\n".join(summary_lines) + "\n")
print(f"[wrote] {summary_path}")
# Full per-aggregator 3x3 matrices
full_lines = ["# Cross-dataset Group-A full matrices",
"",
"Per aggregator: 3×3 matrix (rows = source / training, columns = target / test).",
"Each cell = 3-seed mean ± 95% t-CI AUROC (%). Diagonal italic = within-dataset.",
""]
for label, fn_id, why in AGGREGATORS:
full_lines.append(f"## {label} ({why})")
full_lines.append("")
header = ["Source ↓ / Target →"] + [PRETTY[d] for d in CROSS_DATASETS]
full_lines.append("| " + " | ".join(header) + " |")
full_lines.append("|" + "|".join("---" for _ in header) + "|")
for src in CROSS_DATASETS:
row = [f"**{PRETTY[src]}**"]
for tgt in CROSS_DATASETS:
cell = full[label][(src, tgt)]
txt = _fmt_cell(cell)
if src == tgt:
txt = f"_{txt}_"
row.append(txt)
full_lines.append("| " + " | ".join(row) + " |")
full_lines.append("")
full_path = args.out_dir / "ABLATION_TABLE_CROSS_full.md"
full_path.write_text("\n".join(full_lines))
print(f"[wrote] {full_path}")
json_path = args.out_dir / "ABLATION_TABLE_CROSS.json"
json_path.write_text(json.dumps({
"summary": summary_data,
"full": {label: {f"{src}->{tgt}": cell for (src, tgt), cell in rows.items()}
for label, rows in full.items()},
}, indent=2, default=lambda o: None))
print(f"[wrote] {json_path}")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,180 @@
"""B-variant cross-dataset aggregation.
Reads:
artifacts/ablation/janus_<ds>_seed<S>_<gid>/phase1_scores.npz (within-dataset)
artifacts/ablation/cross/<gid>__seed<S>_<src>_to_<tgt>.npz (cross-dataset)
For each B-variant we apply the variant-appropriate aggregator (auto-key OAS
fits whatever sub-scores the variant produces). JANUS-full anchor is read from
the production route_comparison/ paths.
Outputs:
ABLATION_CROSS_B_summary.md within mean / cross mean / cross worst per gid
ABLATION_CROSS_B_full.md per-gid 3×3 matrices
"""
from __future__ import annotations
import argparse
import json
from pathlib import Path
import numpy as np
from aggregate_ablation import (
SCORE_FNS, T_975_N3, _auroc, _load_npz, _load_cross_npz,
)
ROOT = Path(__file__).resolve().parents[2]
ROUTE = ROOT / "artifacts" / "route_comparison"
ROUTE_CROSS = ROUTE / "cross"
ABL = ROOT / "artifacts" / "ablation"
ABL_CROSS = ABL / "cross"
CROSS_DATASETS = ["cicids2017", "cicddos2019", "ciciot2023"]
PRETTY = {
"cicids2017": "CICIDS17",
"cicddos2019": "CICDDoS19",
"ciciot2023": "CICIoT23",
}
SEEDS = [42, 43, 44]
# (gid, label, what_removed, score_fn_id)
B_VARIANTS = [
("janus_full", "JANUS-full", "", "OAS_all_available"),
("b1_noflow", "B1 no FLOW token","global context", "OAS_all_available"),
("b2_flowonly", "B2 flow-only", "packet sequence", "A1_terminal_norm"),
("b3_allcont", "B3 all-cont", "cont/disc split", "OAS_term_all"),
("b4_alldisc", "B4 all-disc", "cont/disc split (rev)", "OAS_disc_all"),
("b5_nodisc", "B5 λ_disc=0", "joint training", "OAS_term_all"),
]
def _within_path(gid: str, ds: str, seed: int) -> Path:
if gid == "janus_full":
return ROUTE / f"janus_{ds}_seed{seed}" / "phase1_scores.npz"
return ABL / f"janus_{ds}_seed{seed}_{gid}" / "phase1_scores.npz"
def _cross_path(gid: str, src: str, tgt: str, seed: int) -> Path:
if gid == "janus_full":
return ROUTE_CROSS / f"janus_seed{seed}_{src}_to_{tgt}.npz"
return ABL_CROSS / f"{gid}__seed{seed}_{src}_to_{tgt}.npz"
def _cell_score(gid: str, src: str, tgt: str, seed: int, fn_id: str):
if src == tgt:
p = _within_path(gid, src, seed)
if not p.exists():
return None
val, atk = _load_npz(p)
else:
p = _cross_path(gid, src, tgt, seed)
if not p.exists():
return None
val, atk = _load_cross_npz(p)
fn = SCORE_FNS[fn_id]
res = fn(val, atk)
if res is None:
return None
sv, sa = res
return _auroc(sv, sa)
def _seed_summary(vals: list[float]):
a = np.asarray([v for v in vals if v is not None and not np.isnan(v)])
if a.size == 0:
return None
if a.size == 1:
return {"mean": float(a[0]), "ci": 0.0, "n": 1}
se = a.std(ddof=1) / np.sqrt(a.size)
return {"mean": float(a.mean()),
"ci": float(T_975_N3 * se) if a.size == 3 else float(1.96 * se),
"n": int(a.size)}
def _fmt(c):
if c is None:
return ""
if c["n"] == 1:
return f"{100 * c['mean']:.2f}"
return f"{100 * c['mean']:.2f} ± {100 * c['ci']:.2f}"
def main() -> None:
ap = argparse.ArgumentParser()
ap.add_argument("--out-dir", type=Path, default=ABL)
args = ap.parse_args()
args.out_dir.mkdir(parents=True, exist_ok=True)
full = {}
for gid, label, _why, fn_id in B_VARIANTS:
rows = {}
for src in CROSS_DATASETS:
for tgt in CROSS_DATASETS:
vals = [_cell_score(gid, src, tgt, s, fn_id) for s in SEEDS]
rows[(src, tgt)] = _seed_summary(vals)
full[gid] = (label, rows)
n_ok = sum(1 for v in rows.values() if v is not None)
print(f"[ok] {label:20s} cells={n_ok}/{len(rows)}", flush=True)
# Summary
lines = ["# B-variant cross-dataset summary",
"",
f"3-seed mean ± 95% t-CI AUROC. Datasets = {CROSS_DATASETS}.",
"All B variants share the same aggregator-fit-on-target-benign protocol as JANUS-full.",
"",
"| Variant | What removed | Within (3 cells) | Cross (6 cells) | Cross worst | Within Cross |",
"|---|---|---|---|---|---|"]
for gid, label, why, fn_id in B_VARIANTS:
_, rows = full[gid]
within = [v["mean"] for (s, t), v in rows.items() if s == t and v is not None]
cross = [v["mean"] for (s, t), v in rows.items() if s != t and v is not None]
cross_pairs = [((s, t), v) for (s, t), v in rows.items() if s != t and v is not None]
worst = min(cross_pairs, key=lambda x: x[1]["mean"], default=None)
w = float(np.mean(within)) if within else float("nan")
c = float(np.mean(cross)) if cross else float("nan")
worst_str = ""
if worst is not None:
(s, t), v = worst
worst_str = f"{PRETTY[s]}{PRETTY[t]}: {_fmt(v)}"
gap = (w - c) * 100 if not np.isnan(w) and not np.isnan(c) else float("nan")
lines.append(f"| {label} | {why} | {100 * w:.2f} | {100 * c:.2f} | {worst_str} | {gap:+.2f} |")
summary_path = args.out_dir / "ABLATION_CROSS_B_summary.md"
summary_path.write_text("\n".join(lines) + "\n")
print(f"[wrote] {summary_path}")
# Full per-variant 3x3 matrices
flines = ["# B-variant cross-dataset full matrices",
"",
"Per variant: 3×3 matrix (rows = source, columns = target). Diagonal italic.",
"Each cell = 3-seed mean ± 95% t-CI AUROC (%).",
""]
for gid, label, why, fn_id in B_VARIANTS:
_, rows = full[gid]
flines.append(f"## {label} ({why})")
flines.append("")
header = ["Source ↓ / Target →"] + [PRETTY[d] for d in CROSS_DATASETS]
flines.append("| " + " | ".join(header) + " |")
flines.append("|" + "|".join("---" for _ in header) + "|")
for src in CROSS_DATASETS:
row = [f"**{PRETTY[src]}**"]
for tgt in CROSS_DATASETS:
cell = rows[(src, tgt)]
txt = _fmt(cell)
if src == tgt:
txt = f"_{txt}_"
row.append(txt)
flines.append("| " + " | ".join(row) + " |")
flines.append("")
full_path = args.out_dir / "ABLATION_CROSS_B_full.md"
full_path.write_text("\n".join(flines))
print(f"[wrote] {full_path}")
json_path = args.out_dir / "ABLATION_CROSS_B.json"
json_path.write_text(json.dumps({
gid: {"label": label, "rows": {f"{s}->{t}": v for (s, t), v in rows.items()}}
for gid, (label, rows) in full.items()
}, indent=2, default=lambda o: None))
print(f"[wrote] {json_path}")
if __name__ == "__main__":
main()