Compare commits

..

4 Commits

Author SHA1 Message Date
539b8aaeaf gitignore: ignore rendered figure output dirs at repo root
Adds /unified_figures_*/ and /janus_figures_*/ — these are PDF/PNG outputs
of the figure-generation scripts under scripts/figures/, not source.
They live on the dev box alongside artifacts/ but should not enter the repo
(8.4MB of binaries currently sit in unified_figures_2026_04_26/).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-09 00:01:04 +08:00
0509ee2df9 figures: add JANUS mechanism figure scripts (trajectory + field view + score hist)
scripts/figures/ contains the per-dataset figure generators used to render
the JANUS mechanism figures (reverse-flow trajectory PCA, t=0.5 velocity
field view with sparse benign overlay, score-distribution histograms with
within-class fraction weighting). Outputs go to
artifacts/janus_mechanism_figures_<date>/ (gitignored under artifacts/).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-08 23:59:52 +08:00
0ccd758600 baselines: update Kitsune Path A to JANUS route_comparison checkpoints
Replaces stale phase25_* checkpoint paths with the current janus_<ds>_seed<S>
layout under route_comparison/, adds CICIoT2023 to PCAP_GLOBS / WITHIN_DIRS,
and removes the per-dataset n_atk caps so within-dataset eval uses the same
sample budget as JANUS phase1.

Adds cython (3.2.4) — required by Kitsune's KitNET cluster compile path.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-08 23:59:40 +08:00
a6bcbbd299 ablation: add Group A (aggregator) + Group B (architecture) infrastructure
Extends MixedCFMConfig with 5 backwards-compatible flags (use_flow_token,
n_packet_tokens, disc_as_cont, cont_as_disc + cont_n_bins) so existing
JANUS-full checkpoints load with 0 missing/unexpected keys.

Adds:
- 60 ablation training configs (5 variants × 4 datasets × 3 seeds)
- scripts/ablation/{generate_configs.py, run_groupB.sh, run_cross_groupB.sh,
  smoke_test.sh} — config generation + GPU drivers
- scripts/aggregate/aggregate_ablation{,_cross,_cross_B}.py — produces
  within-dataset and cross-dataset (3×3) ablation tables with 3-seed mean
  ± 95% t-CI plus optional paired DeLong p-values

README updated with ablation section pointing at
artifacts/ablation/ABLATION_SUMMARY.md.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-08 23:59:27 +08:00
81 changed files with 4748 additions and 100 deletions

4
.gitignore vendored
View File

@@ -26,4 +26,8 @@ Thumbs.db
/paper/
# rendered figure outputs (PDFs/PNGs at repo root from figure-generation runs)
/unified_figures_*/
/janus_figures_*/
*.tmp

View File

@@ -0,0 +1,36 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicddos2019_seed42_b1_noflow
source_store: /home/chy/JANUS/datasets/cicddos2019/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/cicddos2019/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicddos2019/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 42
data_seed: 42
train_ratio: 0.8
benign_label: normal
val_cap: 20000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 10000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
reference_mode: causal_packets
device: auto
use_flow_token: false

View File

@@ -0,0 +1,36 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicddos2019_seed43_b1_noflow
source_store: /home/chy/JANUS/datasets/cicddos2019/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/cicddos2019/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicddos2019/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 43
data_seed: 43
train_ratio: 0.8
benign_label: normal
val_cap: 20000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 10000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
reference_mode: causal_packets
device: auto
use_flow_token: false

View File

@@ -0,0 +1,36 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicddos2019_seed44_b1_noflow
source_store: /home/chy/JANUS/datasets/cicddos2019/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/cicddos2019/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicddos2019/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 44
data_seed: 44
train_ratio: 0.8
benign_label: normal
val_cap: 20000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 10000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
reference_mode: causal_packets
device: auto
use_flow_token: false

View File

@@ -0,0 +1,34 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicids2017_seed42_b1_noflow
packets_npz: /home/chy/JANUS/datasets/cicids2017/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/cicids2017/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicids2017/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 42
data_seed: 42
train_ratio: 0.8
benign_label: normal
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
reference_mode: causal_packets
device: auto
use_flow_token: false

View File

@@ -0,0 +1,34 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicids2017_seed43_b1_noflow
packets_npz: /home/chy/JANUS/datasets/cicids2017/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/cicids2017/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicids2017/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 43
data_seed: 43
train_ratio: 0.8
benign_label: normal
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
reference_mode: causal_packets
device: auto
use_flow_token: false

View File

@@ -0,0 +1,34 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicids2017_seed44_b1_noflow
packets_npz: /home/chy/JANUS/datasets/cicids2017/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/cicids2017/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicids2017/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 44
data_seed: 44
train_ratio: 0.8
benign_label: normal
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
reference_mode: causal_packets
device: auto
use_flow_token: false

View File

@@ -0,0 +1,36 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_ciciot2023_seed42_b1_noflow
source_store: /home/chy/JANUS/datasets/ciciot2023/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/ciciot2023/processed/full_store/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/ciciot2023/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 42
data_seed: 42
train_ratio: 0.8
benign_label: normal
val_cap: 10000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
device: auto
reference_mode: causal_packets
use_flow_token: false

View File

@@ -0,0 +1,36 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_ciciot2023_seed43_b1_noflow
source_store: /home/chy/JANUS/datasets/ciciot2023/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/ciciot2023/processed/full_store/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/ciciot2023/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 43
data_seed: 43
train_ratio: 0.8
benign_label: normal
val_cap: 10000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
device: auto
reference_mode: causal_packets
use_flow_token: false

View File

@@ -0,0 +1,36 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_ciciot2023_seed44_b1_noflow
source_store: /home/chy/JANUS/datasets/ciciot2023/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/ciciot2023/processed/full_store/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/ciciot2023/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 44
data_seed: 44
train_ratio: 0.8
benign_label: normal
val_cap: 10000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
device: auto
reference_mode: causal_packets
use_flow_token: false

View File

@@ -0,0 +1,34 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_iscxtor2016_seed42_b1_noflow
packets_npz: /home/chy/JANUS/datasets/iscxtor2016/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/iscxtor2016/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/iscxtor2016/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 42
data_seed: 42
train_ratio: 0.8
benign_label: nontor
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
reference_mode: causal_packets
device: auto
use_flow_token: false

View File

@@ -0,0 +1,34 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_iscxtor2016_seed43_b1_noflow
packets_npz: /home/chy/JANUS/datasets/iscxtor2016/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/iscxtor2016/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/iscxtor2016/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 43
data_seed: 43
train_ratio: 0.8
benign_label: nontor
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
reference_mode: causal_packets
device: auto
use_flow_token: false

View File

@@ -0,0 +1,34 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_iscxtor2016_seed44_b1_noflow
packets_npz: /home/chy/JANUS/datasets/iscxtor2016/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/iscxtor2016/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/iscxtor2016/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 44
data_seed: 44
train_ratio: 0.8
benign_label: nontor
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
reference_mode: causal_packets
device: auto
use_flow_token: false

View File

@@ -0,0 +1,36 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicddos2019_seed42_b2_flowonly
source_store: /home/chy/JANUS/datasets/cicddos2019/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/cicddos2019/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicddos2019/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 42
data_seed: 42
train_ratio: 0.8
benign_label: normal
val_cap: 20000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 10000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto
n_packet_tokens: 0

View File

@@ -0,0 +1,36 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicddos2019_seed43_b2_flowonly
source_store: /home/chy/JANUS/datasets/cicddos2019/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/cicddos2019/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicddos2019/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 43
data_seed: 43
train_ratio: 0.8
benign_label: normal
val_cap: 20000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 10000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto
n_packet_tokens: 0

View File

@@ -0,0 +1,36 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicddos2019_seed44_b2_flowonly
source_store: /home/chy/JANUS/datasets/cicddos2019/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/cicddos2019/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicddos2019/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 44
data_seed: 44
train_ratio: 0.8
benign_label: normal
val_cap: 20000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 10000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto
n_packet_tokens: 0

View File

@@ -0,0 +1,34 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicids2017_seed42_b2_flowonly
packets_npz: /home/chy/JANUS/datasets/cicids2017/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/cicids2017/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicids2017/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 42
data_seed: 42
train_ratio: 0.8
benign_label: normal
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto
n_packet_tokens: 0

View File

@@ -0,0 +1,34 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicids2017_seed43_b2_flowonly
packets_npz: /home/chy/JANUS/datasets/cicids2017/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/cicids2017/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicids2017/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 43
data_seed: 43
train_ratio: 0.8
benign_label: normal
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto
n_packet_tokens: 0

View File

@@ -0,0 +1,34 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicids2017_seed44_b2_flowonly
packets_npz: /home/chy/JANUS/datasets/cicids2017/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/cicids2017/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicids2017/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 44
data_seed: 44
train_ratio: 0.8
benign_label: normal
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto
n_packet_tokens: 0

View File

@@ -0,0 +1,36 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_ciciot2023_seed42_b2_flowonly
source_store: /home/chy/JANUS/datasets/ciciot2023/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/ciciot2023/processed/full_store/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/ciciot2023/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 42
data_seed: 42
train_ratio: 0.8
benign_label: normal
val_cap: 10000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
device: auto
reference_mode: causal_packets
n_packet_tokens: 0

View File

@@ -0,0 +1,36 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_ciciot2023_seed43_b2_flowonly
source_store: /home/chy/JANUS/datasets/ciciot2023/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/ciciot2023/processed/full_store/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/ciciot2023/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 43
data_seed: 43
train_ratio: 0.8
benign_label: normal
val_cap: 10000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
device: auto
reference_mode: causal_packets
n_packet_tokens: 0

View File

@@ -0,0 +1,36 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_ciciot2023_seed44_b2_flowonly
source_store: /home/chy/JANUS/datasets/ciciot2023/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/ciciot2023/processed/full_store/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/ciciot2023/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 44
data_seed: 44
train_ratio: 0.8
benign_label: normal
val_cap: 10000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
device: auto
reference_mode: causal_packets
n_packet_tokens: 0

View File

@@ -0,0 +1,34 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_iscxtor2016_seed42_b2_flowonly
packets_npz: /home/chy/JANUS/datasets/iscxtor2016/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/iscxtor2016/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/iscxtor2016/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 42
data_seed: 42
train_ratio: 0.8
benign_label: nontor
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto
n_packet_tokens: 0

View File

@@ -0,0 +1,34 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_iscxtor2016_seed43_b2_flowonly
packets_npz: /home/chy/JANUS/datasets/iscxtor2016/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/iscxtor2016/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/iscxtor2016/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 43
data_seed: 43
train_ratio: 0.8
benign_label: nontor
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto
n_packet_tokens: 0

View File

@@ -0,0 +1,34 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_iscxtor2016_seed44_b2_flowonly
packets_npz: /home/chy/JANUS/datasets/iscxtor2016/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/iscxtor2016/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/iscxtor2016/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 44
data_seed: 44
train_ratio: 0.8
benign_label: nontor
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto
n_packet_tokens: 0

View File

@@ -0,0 +1,36 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicddos2019_seed42_b3_allcont
source_store: /home/chy/JANUS/datasets/cicddos2019/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/cicddos2019/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicddos2019/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 42
data_seed: 42
train_ratio: 0.8
benign_label: normal
val_cap: 20000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 10000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto
disc_as_cont: true

View File

@@ -0,0 +1,36 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicddos2019_seed43_b3_allcont
source_store: /home/chy/JANUS/datasets/cicddos2019/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/cicddos2019/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicddos2019/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 43
data_seed: 43
train_ratio: 0.8
benign_label: normal
val_cap: 20000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 10000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto
disc_as_cont: true

View File

@@ -0,0 +1,36 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicddos2019_seed44_b3_allcont
source_store: /home/chy/JANUS/datasets/cicddos2019/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/cicddos2019/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicddos2019/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 44
data_seed: 44
train_ratio: 0.8
benign_label: normal
val_cap: 20000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 10000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto
disc_as_cont: true

View File

@@ -0,0 +1,34 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicids2017_seed42_b3_allcont
packets_npz: /home/chy/JANUS/datasets/cicids2017/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/cicids2017/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicids2017/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 42
data_seed: 42
train_ratio: 0.8
benign_label: normal
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto
disc_as_cont: true

View File

@@ -0,0 +1,34 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicids2017_seed43_b3_allcont
packets_npz: /home/chy/JANUS/datasets/cicids2017/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/cicids2017/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicids2017/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 43
data_seed: 43
train_ratio: 0.8
benign_label: normal
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto
disc_as_cont: true

View File

@@ -0,0 +1,34 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicids2017_seed44_b3_allcont
packets_npz: /home/chy/JANUS/datasets/cicids2017/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/cicids2017/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicids2017/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 44
data_seed: 44
train_ratio: 0.8
benign_label: normal
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto
disc_as_cont: true

View File

@@ -0,0 +1,36 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_ciciot2023_seed42_b3_allcont
source_store: /home/chy/JANUS/datasets/ciciot2023/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/ciciot2023/processed/full_store/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/ciciot2023/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 42
data_seed: 42
train_ratio: 0.8
benign_label: normal
val_cap: 10000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
device: auto
reference_mode: causal_packets
disc_as_cont: true

View File

@@ -0,0 +1,36 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_ciciot2023_seed43_b3_allcont
source_store: /home/chy/JANUS/datasets/ciciot2023/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/ciciot2023/processed/full_store/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/ciciot2023/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 43
data_seed: 43
train_ratio: 0.8
benign_label: normal
val_cap: 10000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
device: auto
reference_mode: causal_packets
disc_as_cont: true

View File

@@ -0,0 +1,36 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_ciciot2023_seed44_b3_allcont
source_store: /home/chy/JANUS/datasets/ciciot2023/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/ciciot2023/processed/full_store/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/ciciot2023/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 44
data_seed: 44
train_ratio: 0.8
benign_label: normal
val_cap: 10000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
device: auto
reference_mode: causal_packets
disc_as_cont: true

View File

@@ -0,0 +1,34 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_iscxtor2016_seed42_b3_allcont
packets_npz: /home/chy/JANUS/datasets/iscxtor2016/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/iscxtor2016/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/iscxtor2016/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 42
data_seed: 42
train_ratio: 0.8
benign_label: nontor
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto
disc_as_cont: true

View File

@@ -0,0 +1,34 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_iscxtor2016_seed43_b3_allcont
packets_npz: /home/chy/JANUS/datasets/iscxtor2016/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/iscxtor2016/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/iscxtor2016/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 43
data_seed: 43
train_ratio: 0.8
benign_label: nontor
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto
disc_as_cont: true

View File

@@ -0,0 +1,34 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_iscxtor2016_seed44_b3_allcont
packets_npz: /home/chy/JANUS/datasets/iscxtor2016/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/iscxtor2016/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/iscxtor2016/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 44
data_seed: 44
train_ratio: 0.8
benign_label: nontor
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto
disc_as_cont: true

View File

@@ -0,0 +1,37 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicddos2019_seed42_b4_alldisc
source_store: /home/chy/JANUS/datasets/cicddos2019/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/cicddos2019/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicddos2019/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 42
data_seed: 42
train_ratio: 0.8
benign_label: normal
val_cap: 20000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 10000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
reference_mode: causal_packets
device: auto
cont_as_disc: true
n_disc_classes: 8

View File

@@ -0,0 +1,37 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicddos2019_seed43_b4_alldisc
source_store: /home/chy/JANUS/datasets/cicddos2019/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/cicddos2019/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicddos2019/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 43
data_seed: 43
train_ratio: 0.8
benign_label: normal
val_cap: 20000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 10000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
reference_mode: causal_packets
device: auto
cont_as_disc: true
n_disc_classes: 8

View File

@@ -0,0 +1,37 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicddos2019_seed44_b4_alldisc
source_store: /home/chy/JANUS/datasets/cicddos2019/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/cicddos2019/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicddos2019/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 44
data_seed: 44
train_ratio: 0.8
benign_label: normal
val_cap: 20000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 10000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
reference_mode: causal_packets
device: auto
cont_as_disc: true
n_disc_classes: 8

View File

@@ -0,0 +1,35 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicids2017_seed42_b4_alldisc
packets_npz: /home/chy/JANUS/datasets/cicids2017/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/cicids2017/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicids2017/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 42
data_seed: 42
train_ratio: 0.8
benign_label: normal
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
reference_mode: causal_packets
device: auto
cont_as_disc: true
n_disc_classes: 8

View File

@@ -0,0 +1,35 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicids2017_seed43_b4_alldisc
packets_npz: /home/chy/JANUS/datasets/cicids2017/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/cicids2017/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicids2017/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 43
data_seed: 43
train_ratio: 0.8
benign_label: normal
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
reference_mode: causal_packets
device: auto
cont_as_disc: true
n_disc_classes: 8

View File

@@ -0,0 +1,35 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicids2017_seed44_b4_alldisc
packets_npz: /home/chy/JANUS/datasets/cicids2017/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/cicids2017/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicids2017/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 44
data_seed: 44
train_ratio: 0.8
benign_label: normal
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
reference_mode: causal_packets
device: auto
cont_as_disc: true
n_disc_classes: 8

View File

@@ -0,0 +1,37 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_ciciot2023_seed42_b4_alldisc
source_store: /home/chy/JANUS/datasets/ciciot2023/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/ciciot2023/processed/full_store/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/ciciot2023/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 42
data_seed: 42
train_ratio: 0.8
benign_label: normal
val_cap: 10000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
device: auto
reference_mode: causal_packets
cont_as_disc: true
n_disc_classes: 8

View File

@@ -0,0 +1,37 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_ciciot2023_seed43_b4_alldisc
source_store: /home/chy/JANUS/datasets/ciciot2023/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/ciciot2023/processed/full_store/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/ciciot2023/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 43
data_seed: 43
train_ratio: 0.8
benign_label: normal
val_cap: 10000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
device: auto
reference_mode: causal_packets
cont_as_disc: true
n_disc_classes: 8

View File

@@ -0,0 +1,37 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_ciciot2023_seed44_b4_alldisc
source_store: /home/chy/JANUS/datasets/ciciot2023/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/ciciot2023/processed/full_store/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/ciciot2023/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 44
data_seed: 44
train_ratio: 0.8
benign_label: normal
val_cap: 10000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
device: auto
reference_mode: causal_packets
cont_as_disc: true
n_disc_classes: 8

View File

@@ -0,0 +1,35 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_iscxtor2016_seed42_b4_alldisc
packets_npz: /home/chy/JANUS/datasets/iscxtor2016/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/iscxtor2016/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/iscxtor2016/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 42
data_seed: 42
train_ratio: 0.8
benign_label: nontor
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
reference_mode: causal_packets
device: auto
cont_as_disc: true
n_disc_classes: 8

View File

@@ -0,0 +1,35 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_iscxtor2016_seed43_b4_alldisc
packets_npz: /home/chy/JANUS/datasets/iscxtor2016/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/iscxtor2016/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/iscxtor2016/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 43
data_seed: 43
train_ratio: 0.8
benign_label: nontor
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
reference_mode: causal_packets
device: auto
cont_as_disc: true
n_disc_classes: 8

View File

@@ -0,0 +1,35 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_iscxtor2016_seed44_b4_alldisc
packets_npz: /home/chy/JANUS/datasets/iscxtor2016/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/iscxtor2016/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/iscxtor2016/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 44
data_seed: 44
train_ratio: 0.8
benign_label: nontor
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 1.0
reference_mode: causal_packets
device: auto
cont_as_disc: true
n_disc_classes: 8

View File

@@ -0,0 +1,35 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicddos2019_seed42_b5_nodisc
source_store: /home/chy/JANUS/datasets/cicddos2019/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/cicddos2019/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicddos2019/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 42
data_seed: 42
train_ratio: 0.8
benign_label: normal
val_cap: 20000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 10000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto

View File

@@ -0,0 +1,35 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicddos2019_seed43_b5_nodisc
source_store: /home/chy/JANUS/datasets/cicddos2019/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/cicddos2019/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicddos2019/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 43
data_seed: 43
train_ratio: 0.8
benign_label: normal
val_cap: 20000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 10000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto

View File

@@ -0,0 +1,35 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicddos2019_seed44_b5_nodisc
source_store: /home/chy/JANUS/datasets/cicddos2019/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/cicddos2019/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicddos2019/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 44
data_seed: 44
train_ratio: 0.8
benign_label: normal
val_cap: 20000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 10000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto

View File

@@ -0,0 +1,33 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicids2017_seed42_b5_nodisc
packets_npz: /home/chy/JANUS/datasets/cicids2017/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/cicids2017/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicids2017/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 42
data_seed: 42
train_ratio: 0.8
benign_label: normal
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto

View File

@@ -0,0 +1,33 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicids2017_seed43_b5_nodisc
packets_npz: /home/chy/JANUS/datasets/cicids2017/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/cicids2017/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicids2017/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 43
data_seed: 43
train_ratio: 0.8
benign_label: normal
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto

View File

@@ -0,0 +1,33 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_cicids2017_seed44_b5_nodisc
packets_npz: /home/chy/JANUS/datasets/cicids2017/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/cicids2017/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/cicids2017/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 44
data_seed: 44
train_ratio: 0.8
benign_label: normal
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto

View File

@@ -0,0 +1,35 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_ciciot2023_seed42_b5_nodisc
source_store: /home/chy/JANUS/datasets/ciciot2023/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/ciciot2023/processed/full_store/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/ciciot2023/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 42
data_seed: 42
train_ratio: 0.8
benign_label: normal
val_cap: 10000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
device: auto
reference_mode: causal_packets

View File

@@ -0,0 +1,35 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_ciciot2023_seed43_b5_nodisc
source_store: /home/chy/JANUS/datasets/ciciot2023/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/ciciot2023/processed/full_store/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/ciciot2023/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 43
data_seed: 43
train_ratio: 0.8
benign_label: normal
val_cap: 10000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
device: auto
reference_mode: causal_packets

View File

@@ -0,0 +1,35 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_ciciot2023_seed44_b5_nodisc
source_store: /home/chy/JANUS/datasets/ciciot2023/processed/full_store
flows_parquet: /home/chy/JANUS/datasets/ciciot2023/processed/full_store/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/ciciot2023/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 44
data_seed: 44
train_ratio: 0.8
benign_label: normal
val_cap: 10000
attack_cap: 20000
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
device: auto
reference_mode: causal_packets

View File

@@ -0,0 +1,33 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_iscxtor2016_seed42_b5_nodisc
packets_npz: /home/chy/JANUS/datasets/iscxtor2016/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/iscxtor2016/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/iscxtor2016/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 42
data_seed: 42
train_ratio: 0.8
benign_label: nontor
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto

View File

@@ -0,0 +1,33 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_iscxtor2016_seed43_b5_nodisc
packets_npz: /home/chy/JANUS/datasets/iscxtor2016/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/iscxtor2016/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/iscxtor2016/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 43
data_seed: 43
train_ratio: 0.8
benign_label: nontor
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto

View File

@@ -0,0 +1,33 @@
save_dir: /home/chy/JANUS/artifacts/ablation/janus_iscxtor2016_seed44_b5_nodisc
packets_npz: /home/chy/JANUS/datasets/iscxtor2016/processed/packets.npz
flows_parquet: /home/chy/JANUS/datasets/iscxtor2016/processed/flows.parquet
flow_features_path: /home/chy/JANUS/datasets/iscxtor2016/processed/flow_features.parquet
flow_features_align: auto
T: 64
n_train: 10000
min_len: 2
seed: 44
data_seed: 44
train_ratio: 0.8
benign_label: nontor
d_model: 128
n_layers: 4
n_heads: 4
mlp_ratio: 4.0
time_dim: 64
token_dim: null
batch_size: 256
num_workers: 0
epochs: 50
lr: 0.0003
weight_decay: 0.01
grad_clip: 1.0
eval_every: 10
eval_n: 20000
eval_batch_size: 512
eval_n_steps: 8
sigma: 0.1
use_ot: true
lambda_disc: 0.0
reference_mode: causal_packets
device: auto

View File

@@ -20,7 +20,7 @@ def _device(arg: str) -> torch.device:
return torch.device('cuda' if torch.cuda.is_available() else 'cpu')
return torch.device(arg)
def _score_batch(model, flow_z, cont_z, disc_int, lens, device, batch_size=256, n_steps=16):
def _score_batch(model, flow_z, cont_z, disc_int, lens, device, batch_size=256, n_steps=16, cont_bin_edges=None):
out: dict[str, list[np.ndarray]] = {}
for start in range(0, len(flow_z), batch_size):
sl = slice(start, start + batch_size)
@@ -29,8 +29,8 @@ def _score_batch(model, flow_z, cont_z, disc_int, lens, device, batch_size=256,
d = torch.from_numpy(disc_int[sl]).long().to(device)
l = torch.from_numpy(lens[sl]).long().to(device)
with torch.no_grad():
traj = model.trajectory_metrics(f, c, d, l, n_steps=n_steps)
nll = model.disc_nll_score(f, c, d, l)
traj = model.trajectory_metrics(f, c, d, l, n_steps=n_steps, cont_bin_edges=cont_bin_edges)
nll = model.disc_nll_score(f, c, d, l, cont_bin_edges=cont_bin_edges)
for src in (traj, nll):
for (k, v) in src.items():
out.setdefault(k, []).append(v.detach().cpu().numpy())
@@ -63,6 +63,10 @@ def main() -> None:
model = MixedTokenCFM(model_cfg).to(device)
model.load_state_dict(ckpt['model_state_dict'])
model.eval()
cont_bin_edges = None
if 'cont_bin_edges' in ckpt:
cont_bin_edges = torch.from_numpy(np.asarray(ckpt['cont_bin_edges'])).to(device)
print(f'[model] cont_bin_edges shape={tuple(cont_bin_edges.shape)} (B4 mode; src edges applied to target)')
cont_mean = np.asarray(ckpt['cont_mean'], dtype=np.float32)
cont_std = np.asarray(ckpt['cont_std'], dtype=np.float32)
flow_mean = np.asarray(ckpt['flow_mean'], dtype=np.float32)
@@ -140,11 +144,11 @@ def main() -> None:
a_flow_z = ((a_flow - flow_mean) / np.maximum(flow_std, 1e-06)).astype(np.float32)
t0 = time.time()
print('[eval] benign...')
b_scores = _score_batch(model, b_flow_z, b_cont, b_disc, b_len, device, batch_size=args.batch_size, n_steps=args.n_steps)
b_scores = _score_batch(model, b_flow_z, b_cont, b_disc, b_len, device, batch_size=args.batch_size, n_steps=args.n_steps, cont_bin_edges=cont_bin_edges)
print(f'[eval] benign done {time.time() - t0:.1f}s')
t0 = time.time()
print('[eval] attack...')
a_scores = _score_batch(model, a_flow_z, a_cont, a_disc, a_len, device, batch_size=args.batch_size, n_steps=args.n_steps)
a_scores = _score_batch(model, a_flow_z, a_cont, a_disc, a_len, device, batch_size=args.batch_size, n_steps=args.n_steps, cont_bin_edges=cont_bin_edges)
print(f'[eval] attack done {time.time() - t0:.1f}s')
keys = sorted(set(b_scores) & set(a_scores))
overall = {}

View File

@@ -18,7 +18,7 @@ def _device(arg: str) -> torch.device:
return torch.device('cuda' if torch.cuda.is_available() else 'cpu')
return torch.device(arg)
def _score_batch(model: MixedTokenCFM, flow_np: np.ndarray, cont_np: np.ndarray, disc_np: np.ndarray, len_np: np.ndarray, device: torch.device, *, batch_size: int, n_steps: int) -> dict[str, np.ndarray]:
def _score_batch(model: MixedTokenCFM, flow_np: np.ndarray, cont_np: np.ndarray, disc_np: np.ndarray, len_np: np.ndarray, device: torch.device, *, batch_size: int, n_steps: int, cont_bin_edges: torch.Tensor | None = None) -> dict[str, np.ndarray]:
out: dict[str, list[np.ndarray]] = {}
for start in range(0, len(flow_np), batch_size):
sl = slice(start, start + batch_size)
@@ -27,8 +27,8 @@ def _score_batch(model: MixedTokenCFM, flow_np: np.ndarray, cont_np: np.ndarray,
disc = torch.from_numpy(disc_np[sl]).long().to(device)
lens = torch.from_numpy(len_np[sl]).long().to(device)
with torch.no_grad():
traj = model.trajectory_metrics(flow, cont, disc, lens, n_steps=n_steps)
nll = model.disc_nll_score(flow, cont, disc, lens)
traj = model.trajectory_metrics(flow, cont, disc, lens, n_steps=n_steps, cont_bin_edges=cont_bin_edges)
nll = model.disc_nll_score(flow, cont, disc, lens, cont_bin_edges=cont_bin_edges)
for d in (traj, nll):
for (k, v) in d.items():
out.setdefault(k, []).append(v.detach().cpu().numpy())
@@ -65,7 +65,11 @@ def main() -> None:
model = MixedTokenCFM(model_cfg).to(device)
model.load_state_dict(ckpt['model_state_dict'])
model.eval()
print(f'[model] T={model_cfg.T} flow_dim={model_cfg.flow_dim}')
cont_bin_edges = None
if 'cont_bin_edges' in ckpt:
cont_bin_edges = torch.from_numpy(np.asarray(ckpt['cont_bin_edges'])).to(device)
print(f'[model] cont_bin_edges shape={tuple(cont_bin_edges.shape)} (B4 mode)')
print(f'[model] T={model_cfg.T} flow_dim={model_cfg.flow_dim} use_flow_token={model_cfg.use_flow_token} n_packet_tokens={model_cfg.n_packet_tokens} disc_as_cont={model_cfg.disc_as_cont} cont_as_disc={model_cfg.cont_as_disc}')
data = load_mixed_data(packets_npz=Path(cfg['packets_npz']) if cfg.get('packets_npz') else None, source_store=Path(cfg['source_store']) if cfg.get('source_store') else None, flows_parquet=Path(cfg['flows_parquet']), flow_features_path=Path(cfg['flow_features_path']), flow_features_align=str(cfg.get('flow_features_align', 'auto')), T=int(cfg['T']), split_seed=int(cfg.get('data_seed', cfg.get('seed', 42))), train_ratio=float(cfg.get('train_ratio', 0.8)), benign_label=str(cfg.get('benign_label', 'normal')), min_len=int(cfg.get('min_len', 2)), attack_cap=int(cfg['attack_cap']) if cfg.get('attack_cap') else None, val_cap=int(cfg['val_cap']) if cfg.get('val_cap') else None)
print(f'[data] val={len(data.val_flow):,} attack={len(data.attack_flow):,}')
rng = np.random.default_rng(0)
@@ -81,10 +85,10 @@ def main() -> None:
atk_labels = atk_labels[idx]
print(f'[eval] scoring val={len(val_flow):,} atk={len(atk_flow):,}')
t0 = time.time()
val = _score_batch(model, val_flow, val_cont, val_disc, val_len, device, batch_size=args.batch_size, n_steps=args.n_steps)
val = _score_batch(model, val_flow, val_cont, val_disc, val_len, device, batch_size=args.batch_size, n_steps=args.n_steps, cont_bin_edges=cont_bin_edges)
print(f'[eval] val done {time.time() - t0:.1f}s')
t0 = time.time()
atk = _score_batch(model, atk_flow, atk_cont, atk_disc, atk_len, device, batch_size=args.batch_size, n_steps=args.n_steps)
atk = _score_batch(model, atk_flow, atk_cont, atk_disc, atk_len, device, batch_size=args.batch_size, n_steps=args.n_steps, cont_bin_edges=cont_bin_edges)
print(f'[eval] atk done {time.time() - t0:.1f}s')
keys = sorted(set(val) & set(atk))
overall: dict[str, dict[str, float]] = {}

View File

@@ -1,6 +1,6 @@
from __future__ import annotations
import math
from dataclasses import dataclass, field
from dataclasses import dataclass
import torch
import torch.nn as nn
import torch.nn.functional as F
@@ -19,6 +19,7 @@ AdaLNBlock = _unified.AdaLNBlock
SinusoidalTimeEmb = _unified.SinusoidalTimeEmb
_sinkhorn_coupling = _unified._sinkhorn_coupling
@dataclass
class MixedCFMConfig:
T: int = 64
@@ -40,6 +41,11 @@ class MixedCFMConfig:
lambda_disc: float = 1.0
disc_path: str = 'uniform'
disc_embed_scale: float = 1.0
# ---- B-group ablation flags (defaults preserve JANUS-full behavior) ----
use_flow_token: bool = True # B1: False removes the [FLOW] token
n_packet_tokens: int = -1 # B2: 0 removes packet tokens entirely; -1 = use cfg.T
disc_as_cont: bool = False # B3: feed 6 disc bits through CFM head as continuous values
cont_as_disc: bool = False # B4: quantize 3 cont channels into n_disc_classes bins (mask-pred only)
def __post_init__(self) -> None:
if len(self.cont_pkt_idx) != self.n_cont_pkt:
@@ -48,10 +54,13 @@ class MixedCFMConfig:
raise ValueError('disc_pkt_idx length mismatch n_disc_pkt')
if self.disc_path != 'uniform':
raise NotImplementedError(f'disc_path={self.disc_path}')
if self.disc_as_cont and self.cont_as_disc:
raise ValueError('disc_as_cont and cont_as_disc are mutually exclusive')
class MixedVelocity(nn.Module):
def __init__(self, token_dim: int, seq_len: int, n_disc: int, n_classes: int, d_model: int=128, n_layers: int=4, n_heads: int=4, mlp_ratio: float=4.0, time_dim: int=64, reference_mode: str | None=None) -> None:
def __init__(self, token_dim: int, seq_len: int, n_disc: int, n_classes: int, d_model: int=128, n_layers: int=4, n_heads: int=4, mlp_ratio: float=4.0, time_dim: int=64, reference_mode: str | None=None, has_flow_token: bool=True) -> None:
super().__init__()
if reference_mode not in (None, 'causal_packets', 'causal_all'):
raise ValueError(f'reference_mode={reference_mode!r}')
@@ -60,6 +69,7 @@ class MixedVelocity(nn.Module):
self.n_disc = n_disc
self.n_classes = n_classes
self.reference_mode = reference_mode
self.has_flow_token = has_flow_token
self.input_proj = nn.Linear(token_dim, d_model)
self.pos_emb = nn.Parameter(torch.zeros(1, seq_len, d_model))
self.type_emb = nn.Embedding(2, d_model)
@@ -70,12 +80,15 @@ class MixedVelocity(nn.Module):
self.blocks = nn.ModuleList([AdaLNBlock(d_model, n_heads, mlp_ratio, cond_dim=d_model) for _ in range(n_layers)])
self.out_norm = nn.LayerNorm(d_model, elementwise_affine=False)
self.head_v = nn.Linear(d_model, token_dim)
self.head_disc = nn.Linear(d_model, n_disc * n_classes)
# head_disc only meaningful when n_disc > 0
out_disc = max(n_disc * n_classes, 1)
self.head_disc = nn.Linear(d_model, out_disc)
for layer in (self.head_v, self.head_disc):
nn.init.zeros_(layer.weight)
nn.init.zeros_(layer.bias)
type_ids = torch.ones(seq_len, dtype=torch.long)
type_ids[0] = 0
if has_flow_token and seq_len >= 1:
type_ids[0] = 0
self.register_buffer('type_ids', type_ids, persistent=False)
def _attn_mask(self, L: int, device: torch.device) -> torch.Tensor | None:
@@ -83,8 +96,11 @@ class MixedVelocity(nn.Module):
return None
if self.reference_mode == 'causal_packets':
mask = torch.zeros((L, L), dtype=torch.bool, device=device)
if L > 1:
mask[1:, 1:] = torch.triu(torch.ones(L - 1, L - 1, dtype=torch.bool, device=device), diagonal=1)
offset = 1 if self.has_flow_token else 0
if L > offset:
M = L - offset
if M > 1:
mask[offset:, offset:] = torch.triu(torch.ones(M, M, dtype=torch.bool, device=device), diagonal=1)
return mask
return torch.triu(torch.ones(L, L, dtype=torch.bool, device=device), diagonal=1)
@@ -100,143 +116,339 @@ class MixedVelocity(nn.Module):
h = block(h, cond, key_padding_mask, attn_mask=attn_mask)
h = self.out_norm(h)
v = self.head_v(h)
d = self.head_disc(h).view(B, L, self.n_disc, self.n_classes)
if self.n_disc > 0:
d = self.head_disc(h).view(B, L, self.n_disc, self.n_classes)
else:
d = h.new_zeros((B, L, 0, self.n_classes))
return (v, d)
class MixedTokenCFM(nn.Module):
def __init__(self, cfg: MixedCFMConfig) -> None:
super().__init__()
self.cfg = cfg
cont_size = cfg.n_cont_pkt + cfg.n_disc_pkt
# Effective packet count (B2: n_packet_tokens=0 → no packets)
self.eff_T = cfg.T if cfg.n_packet_tokens < 0 else int(cfg.n_packet_tokens)
if not cfg.use_flow_token and self.eff_T == 0:
raise ValueError('cannot disable both FLOW token and packet tokens')
# Effective per-packet feature split
if cfg.disc_as_cont:
# B3: 9 cont, 0 disc (CFM head only)
self.eff_n_cont = cfg.n_cont_pkt + cfg.n_disc_pkt
self.eff_n_disc = 0
elif cfg.cont_as_disc:
# B4: 0 cont, 9 disc (mask-pred head only)
self.eff_n_cont = 0
self.eff_n_disc = cfg.n_cont_pkt + cfg.n_disc_pkt
else:
self.eff_n_cont = cfg.n_cont_pkt
self.eff_n_disc = cfg.n_disc_pkt
cont_size = self.eff_n_cont + self.eff_n_disc
# Token layout: [type_flag(1) | flow_dim or cont_size]
self.token_dim = cfg.token_dim or 1 + max(cfg.flow_dim, cont_size)
if self.token_dim < 1 + max(cfg.flow_dim, cont_size):
raise ValueError('token_dim too small')
self.seq_len = cfg.T + 1
self.velocity = MixedVelocity(token_dim=self.token_dim, seq_len=self.seq_len, n_disc=cfg.n_disc_pkt, n_classes=cfg.n_disc_classes, d_model=cfg.d_model, n_layers=cfg.n_layers, n_heads=cfg.n_heads, mlp_ratio=cfg.mlp_ratio, time_dim=cfg.time_dim, reference_mode=cfg.reference_mode)
self.seq_len = (1 if cfg.use_flow_token else 0) + self.eff_T
self.velocity = MixedVelocity(
token_dim=self.token_dim, seq_len=self.seq_len,
n_disc=self.eff_n_disc, n_classes=cfg.n_disc_classes,
d_model=cfg.d_model, n_layers=cfg.n_layers, n_heads=cfg.n_heads,
mlp_ratio=cfg.mlp_ratio, time_dim=cfg.time_dim,
reference_mode=cfg.reference_mode, has_flow_token=cfg.use_flow_token,
)
# ------------------------------------------------------------------ #
# token assembly #
# ------------------------------------------------------------------ #
def _embed_disc(self, x_disc_int: torch.Tensor) -> torch.Tensor:
n = self.cfg.n_disc_classes
s = self.cfg.disc_embed_scale
return (x_disc_int.float() - 0.5) * s
if n <= 1:
return x_disc_int.float() * 0.0
# Map integers in [0, n-1] to centered floats in [-s/2, +s/2].
# Backwards-compatible with old (x - 0.5)*s formula when n=2.
return (x_disc_int.float() / (n - 1) - 0.5) * s
def _flow_dim(self) -> int:
return self.cfg.flow_dim
def build_tokens(self, flow: torch.Tensor, packets_cont: torch.Tensor, x_disc_t_int: torch.Tensor) -> torch.Tensor:
(B, T, Cp) = packets_cont.shape
assert T == self.cfg.T and Cp == self.cfg.n_cont_pkt
z = packets_cont.new_zeros((B, T + 1, self.token_dim))
z[:, 0, 0] = -1.0
z[:, 0, 1:1 + self.cfg.flow_dim] = flow
z[:, 1:, 0] = 1.0
z[:, 1:, 1:1 + self.cfg.n_cont_pkt] = packets_cont
z[:, 1:, 1 + self.cfg.n_cont_pkt:1 + self.cfg.n_cont_pkt + self.cfg.n_disc_pkt] = self._embed_disc(x_disc_t_int)
"""Assemble [B, seq_len, token_dim].
packets_cont: [B, eff_T, eff_n_cont] (may be empty in last dim)
x_disc_t_int: [B, eff_T, eff_n_disc] integer ids in [0, n_disc_classes-1]
"""
B = flow.shape[0]
device = flow.device
T = self.eff_T
z = flow.new_zeros((B, self.seq_len, self.token_dim))
cur = 0
if self.cfg.use_flow_token:
z[:, 0, 0] = -1.0 # type flag
z[:, 0, 1:1 + self._flow_dim()] = flow
cur = 1
if T > 0:
z[:, cur:cur + T, 0] = 1.0 # type flag
base = 1
if self.eff_n_cont > 0:
z[:, cur:cur + T, base:base + self.eff_n_cont] = packets_cont
base += self.eff_n_cont
if self.eff_n_disc > 0:
z[:, cur:cur + T, base:base + self.eff_n_disc] = self._embed_disc(x_disc_t_int)
return z
def key_padding_mask(self, lens: torch.Tensor) -> torch.Tensor:
B = lens.shape[0]
idx = torch.arange(self.cfg.T, device=lens.device)[None, :]
packet_real = idx < lens[:, None]
real = torch.cat([torch.ones(B, 1, dtype=torch.bool, device=lens.device), packet_real], dim=1)
device = lens.device
T = self.eff_T
pieces = []
if self.cfg.use_flow_token:
pieces.append(torch.ones(B, 1, dtype=torch.bool, device=device))
if T > 0:
idx = torch.arange(T, device=device)[None, :]
pieces.append(idx < lens[:, None])
real = torch.cat(pieces, dim=1) if pieces else torch.ones(B, 0, dtype=torch.bool, device=device)
return ~real
def _loss_mask(self, lens: torch.Tensor) -> torch.Tensor:
return (~self.key_padding_mask(lens)).float()
def compute_loss(self, flow: torch.Tensor, packets_cont: torch.Tensor, packets_disc: torch.Tensor, lens: torch.Tensor, *, return_components: bool=False) -> torch.Tensor | dict[str, torch.Tensor]:
(B, T, _) = packets_cont.shape
device = packets_cont.device
# ------------------------------------------------------------------ #
# B4 helper: quantize cont -> integer bins #
# ------------------------------------------------------------------ #
def quantize_cont(self, packets_cont: torch.Tensor, bin_edges: torch.Tensor) -> torch.Tensor:
"""packets_cont [B, T, n_cont_orig] (already z-scored); bin_edges [n_cont_orig, n_classes-1]
returns int64 [B, T, n_cont_orig] in [0, n_classes-1]."""
B, T, C = packets_cont.shape
out = torch.zeros((B, T, C), dtype=torch.long, device=packets_cont.device)
for c in range(C):
edges = bin_edges[c] # [n_classes-1]
# bucketize: returns 0..n for n edges
out[:, :, c] = torch.bucketize(packets_cont[:, :, c].contiguous(), edges)
out.clamp_(0, self.cfg.n_disc_classes - 1)
return out
# ------------------------------------------------------------------ #
# Loss #
# ------------------------------------------------------------------ #
def compute_loss(self, flow: torch.Tensor, packets_cont: torch.Tensor, packets_disc: torch.Tensor, lens: torch.Tensor, *, return_components: bool=False, cont_bin_edges: torch.Tensor | None=None) -> torch.Tensor | dict[str, torch.Tensor]:
cfg = self.cfg
B = flow.shape[0]
T = self.eff_T
device = flow.device
# Resolve effective cont/disc tensors per ablation mode
if cfg.disc_as_cont:
# 9 cont = original 3 cont + 6 disc-as-float
disc_as_cont_float = self._embed_disc(packets_disc) if T > 0 else None
if T > 0:
eff_cont = torch.cat([packets_cont, disc_as_cont_float], dim=-1) if cfg.n_cont_pkt > 0 else disc_as_cont_float
else:
eff_cont = packets_cont.new_zeros((B, 0, 0))
eff_disc_int = torch.zeros((B, T, 0), dtype=torch.long, device=device)
elif cfg.cont_as_disc:
# 0 cont, 9 disc: quantize cont via supplied bin_edges
if T > 0:
if cont_bin_edges is None:
raise ValueError('cont_as_disc requires cont_bin_edges')
cont_int = self.quantize_cont(packets_cont, cont_bin_edges)
eff_disc_int = torch.cat([cont_int, packets_disc.long()], dim=-1)
else:
eff_disc_int = torch.zeros((B, 0, self.eff_n_disc), dtype=torch.long, device=device)
eff_cont = flow.new_zeros((B, T, 0))
else:
eff_cont = packets_cont if T > 0 else packets_cont.new_zeros((B, 0, cfg.n_cont_pkt))
eff_disc_int = packets_disc.long() if T > 0 else torch.zeros((B, 0, cfg.n_disc_pkt), dtype=torch.long, device=device)
# Build x_1 (data tokens; mask-pred path uses zero ids for disc at packet positions during CFM regression)
zero_disc = torch.zeros_like(eff_disc_int)
x_1_cont = self.build_tokens(flow, eff_cont, zero_disc)
mask = self._loss_mask(lens)
kpm = mask == 0
x_1_cont = self.build_tokens(flow, packets_cont, torch.zeros_like(packets_disc))
x_0_cont = torch.randn_like(x_1_cont)
if self.cfg.use_ot:
if cfg.use_ot:
flat0 = (x_0_cont * mask[:, :, None]).reshape(B, -1)
flat1 = (x_1_cont * mask[:, :, None]).reshape(B, -1)
col = _sinkhorn_coupling(torch.cdist(flat0.float(), flat1.float()))
x_1_cont = x_1_cont[col]
packets_cont = packets_cont[col]
eff_cont = eff_cont[col] if eff_cont.numel() > 0 else eff_cont
eff_disc_int = eff_disc_int[col] if eff_disc_int.numel() > 0 else eff_disc_int
packets_disc = packets_disc[col]
flow = flow[col]
lens = lens[col]
mask = self._loss_mask(lens)
kpm = mask == 0
t = torch.rand(B, device=device)
x_t_cont = (1.0 - t[:, None, None]) * x_0_cont + t[:, None, None] * x_1_cont
if self.cfg.sigma > 0:
std = self.cfg.sigma * torch.sqrt(t * (1.0 - t))[:, None, None]
if cfg.sigma > 0:
std = cfg.sigma * torch.sqrt(t * (1.0 - t))[:, None, None]
x_t_cont = x_t_cont + std * torch.randn_like(x_t_cont)
target_cont = x_1_cont - x_0_cont
u = torch.rand(B, T, self.cfg.n_disc_pkt, device=device)
keep = u < t[:, None, None]
rand_disc = torch.randint(0, self.cfg.n_disc_classes, packets_disc.shape, device=device)
x_disc_t = torch.where(keep, packets_disc, rand_disc)
disc_start = 1 + self.cfg.n_cont_pkt
x_t_full = x_t_cont.clone()
x_t_full[:, 1:, disc_start:disc_start + self.cfg.n_disc_pkt] = self._embed_disc(x_disc_t)
# Disc corruption schedule (mask-pred): keep fraction t of true labels
if T > 0 and self.eff_n_disc > 0:
u = torch.rand(B, T, self.eff_n_disc, device=device)
keep = u < t[:, None, None]
rand_disc = torch.randint(0, cfg.n_disc_classes, eff_disc_int.shape, device=device)
x_disc_t = torch.where(keep, eff_disc_int, rand_disc)
disc_start = (1 if cfg.use_flow_token else 0) + 0 # placeholder; overwritten below
# Where in x_t_full do disc embeds go?
# Within each packet token: [type(1) | cont(eff_n_cont) | disc(eff_n_disc) | pad...]
disc_start_in_token = 1 + self.eff_n_cont
cur_offset = 1 if cfg.use_flow_token else 0
x_t_full = x_t_cont.clone()
x_t_full[:, cur_offset:cur_offset + T, disc_start_in_token:disc_start_in_token + self.eff_n_disc] = self._embed_disc(x_disc_t)
else:
x_t_full = x_t_cont
x_disc_t = eff_disc_int # unused
keep = None
(v_pred, d_logits) = self.velocity(x_t_full, t, key_padding_mask=kpm)
# CFM regression loss on cont slots (mask out disc slots)
v_err = (v_pred - target_cont).square()
v_err[:, :, disc_start:disc_start + self.cfg.n_disc_pkt] = 0.0
if T > 0 and self.eff_n_disc > 0:
disc_start_in_token = 1 + self.eff_n_cont
cur_offset = 1 if cfg.use_flow_token else 0
v_err[:, cur_offset:cur_offset + T, disc_start_in_token:disc_start_in_token + self.eff_n_disc] = 0.0
v_per_token = v_err.mean(dim=-1)
per_sample = (v_per_token * mask).sum(dim=-1) / mask.sum(dim=-1).clamp_min(1.0)
L_cont = per_sample.mean()
pkt_logits = d_logits[:, 1:]
pkt_real = mask[:, 1:].bool()
corrupt = ~keep & pkt_real[:, :, None]
flat_logits = pkt_logits.reshape(-1, self.cfg.n_disc_classes)
flat_targets = packets_disc.reshape(-1).long()
flat_ce = F.cross_entropy(flat_logits, flat_targets, reduction='none')
flat_ce = flat_ce.view(B, T, self.cfg.n_disc_pkt)
flat_ce = flat_ce * corrupt.float()
denom = corrupt.float().sum().clamp_min(1.0)
L_disc = flat_ce.sum() / denom
total = L_cont + self.cfg.lambda_disc * L_disc
# Mask-pred CE on corrupted disc positions
if T > 0 and self.eff_n_disc > 0 and keep is not None:
cur_offset = 1 if cfg.use_flow_token else 0
pkt_logits = d_logits[:, cur_offset:cur_offset + T]
pkt_real = mask[:, cur_offset:cur_offset + T].bool()
corrupt = ~keep & pkt_real[:, :, None]
flat_logits = pkt_logits.reshape(-1, cfg.n_disc_classes)
flat_targets = eff_disc_int.reshape(-1).long()
flat_ce = F.cross_entropy(flat_logits, flat_targets, reduction='none')
flat_ce = flat_ce.view(B, T, self.eff_n_disc)
flat_ce = flat_ce * corrupt.float()
denom = corrupt.float().sum().clamp_min(1.0)
L_disc = flat_ce.sum() / denom
else:
L_disc = L_cont.new_zeros(())
total = L_cont + cfg.lambda_disc * L_disc
if return_components:
return {'total': total, 'main': L_cont.detach(), 'aux_disc': L_disc.detach(), 'aux_flow': L_cont.new_zeros(()), 'aux_packet': L_cont.new_zeros(())}
return {'total': total, 'main': L_cont.detach(), 'aux_disc': L_disc.detach(),
'aux_flow': L_cont.new_zeros(()), 'aux_packet': L_cont.new_zeros(())}
return total
# ------------------------------------------------------------------ #
# Scoring #
# ------------------------------------------------------------------ #
@torch.no_grad()
def trajectory_metrics(self, flow: torch.Tensor, packets_cont: torch.Tensor, packets_disc: torch.Tensor, lens: torch.Tensor, n_steps: int=16) -> dict[str, torch.Tensor]:
z = self.build_tokens(flow, packets_cont, packets_disc)
def trajectory_metrics(self, flow: torch.Tensor, packets_cont: torch.Tensor, packets_disc: torch.Tensor, lens: torch.Tensor, n_steps: int=16, cont_bin_edges: torch.Tensor | None=None) -> dict[str, torch.Tensor]:
cfg = self.cfg
B = flow.shape[0]
T = self.eff_T
# Build effective cont / disc tensors per ablation mode
if cfg.disc_as_cont:
disc_float = self._embed_disc(packets_disc) if T > 0 else None
if T > 0:
eff_cont = torch.cat([packets_cont, disc_float], dim=-1) if cfg.n_cont_pkt > 0 else disc_float
else:
eff_cont = packets_cont.new_zeros((B, 0, 0))
eff_disc_int = torch.zeros((B, T, 0), dtype=torch.long, device=flow.device)
elif cfg.cont_as_disc:
if T > 0:
if cont_bin_edges is None:
raise ValueError('cont_as_disc requires cont_bin_edges at scoring time')
cont_int = self.quantize_cont(packets_cont, cont_bin_edges)
eff_disc_int = torch.cat([cont_int, packets_disc.long()], dim=-1)
else:
eff_disc_int = torch.zeros((B, 0, 0), dtype=torch.long, device=flow.device)
eff_cont = flow.new_zeros((B, T, 0))
else:
eff_cont = packets_cont if T > 0 else packets_cont.new_zeros((B, 0, cfg.n_cont_pkt))
eff_disc_int = packets_disc.long() if T > 0 else torch.zeros((B, 0, cfg.n_disc_pkt), dtype=torch.long, device=flow.device)
z = self.build_tokens(flow, eff_cont, eff_disc_int)
mask = self._loss_mask(lens)
kpm = mask == 0
B = z.shape[0]
dt = 1.0 / n_steps
disc_start = 1 + self.cfg.n_cont_pkt
disc_end = disc_start + self.cfg.n_disc_pkt
disc_embed = z[:, 1:, disc_start:disc_end].clone()
# Disc embed slot bounds (within token vector) for "freeze disc during ODE"
cur_offset = 1 if cfg.use_flow_token else 0
disc_start_in_token = 1 + self.eff_n_cont
disc_end_in_token = disc_start_in_token + self.eff_n_disc
if self.eff_n_disc > 0 and T > 0:
disc_embed = z[:, cur_offset:cur_offset + T, disc_start_in_token:disc_end_in_token].clone()
else:
disc_embed = None
for k in range(n_steps):
t_val = 1.0 - k * dt
t = torch.full((B,), t_val, device=z.device)
(v, _) = self.velocity(z, t, key_padding_mask=kpm)
v[:, :, disc_start:disc_end] = 0.0
if self.eff_n_disc > 0 and T > 0:
v[:, cur_offset:cur_offset + T, disc_start_in_token:disc_end_in_token] = 0.0
z = z - v * dt
z[:, 1:, disc_start:disc_end] = disc_embed
if disc_embed is not None:
z[:, cur_offset:cur_offset + T, disc_start_in_token:disc_end_in_token] = disc_embed
# Compute terminal-norm scores. Zero out the discrete embed slots so they don't pollute.
z_real = z * mask[:, :, None]
z_cont = z_real.clone()
z_cont[:, 1:, disc_start:disc_end] = 0.0
packet_count = mask[:, 1:].sum(dim=-1).clamp_min(1.0)
terminal = z_cont.reshape(B, -1).norm(dim=-1) / (mask.sum(dim=-1) * self.token_dim).clamp_min(1.0).sqrt()
terminal_flow = z_cont[:, 0].norm(dim=-1) / math.sqrt(self.token_dim)
terminal_packet = (z_cont[:, 1:] * mask[:, 1:, None]).reshape(B, -1).norm(dim=-1) / (packet_count * self.token_dim).sqrt()
return {'terminal_norm': terminal, 'terminal_flow': terminal_flow, 'terminal_packet': terminal_packet}
if self.eff_n_disc > 0 and T > 0:
z_cont[:, cur_offset:cur_offset + T, disc_start_in_token:disc_end_in_token] = 0.0
full_norm = z_cont.reshape(B, -1).norm(dim=-1) / (mask.sum(dim=-1) * self.token_dim).clamp_min(1.0).sqrt()
out = {'terminal_norm': full_norm}
if cfg.use_flow_token:
out['terminal_flow'] = z_cont[:, 0].norm(dim=-1) / math.sqrt(self.token_dim)
if T > 0:
packet_count = mask[:, cur_offset:cur_offset + T].sum(dim=-1).clamp_min(1.0)
out['terminal_packet'] = (z_cont[:, cur_offset:cur_offset + T] * mask[:, cur_offset:cur_offset + T, None]).reshape(B, -1).norm(dim=-1) / (packet_count * self.token_dim).sqrt()
return out
@torch.no_grad()
def disc_nll_score(self, flow: torch.Tensor, packets_cont: torch.Tensor, packets_disc: torch.Tensor, lens: torch.Tensor, t_eval: float=0.5) -> dict[str, torch.Tensor]:
(B, T, _) = packets_cont.shape
device = packets_cont.device
def disc_nll_score(self, flow: torch.Tensor, packets_cont: torch.Tensor, packets_disc: torch.Tensor, lens: torch.Tensor, t_eval: float=0.5, cont_bin_edges: torch.Tensor | None=None) -> dict[str, torch.Tensor]:
cfg = self.cfg
B = flow.shape[0]
T = self.eff_T
device = flow.device
if T == 0 or self.eff_n_disc == 0:
return {} # no disc head to score
# Build effective disc int per mode
if cfg.cont_as_disc:
if cont_bin_edges is None:
raise ValueError('cont_as_disc requires cont_bin_edges at scoring time')
cont_int = self.quantize_cont(packets_cont, cont_bin_edges)
eff_disc_int = torch.cat([cont_int, packets_disc.long()], dim=-1)
eff_cont = flow.new_zeros((B, T, 0))
ch_idx_list = list(cfg.cont_pkt_idx) + list(cfg.disc_pkt_idx)
else:
eff_disc_int = packets_disc.long()
eff_cont = packets_cont
ch_idx_list = list(cfg.disc_pkt_idx)
mask = self._loss_mask(lens)
kpm = mask == 0
z = self.build_tokens(flow, packets_cont, packets_disc)
z = self.build_tokens(flow, eff_cont, eff_disc_int)
t = torch.full((B,), float(t_eval), device=device)
(_, d_logits) = self.velocity(z, t, key_padding_mask=kpm)
pkt_logits = d_logits[:, 1:]
flat_logits = pkt_logits.reshape(-1, self.cfg.n_disc_classes)
flat_targets = packets_disc.reshape(-1).long()
cur_offset = 1 if cfg.use_flow_token else 0
pkt_logits = d_logits[:, cur_offset:cur_offset + T]
flat_logits = pkt_logits.reshape(-1, cfg.n_disc_classes)
flat_targets = eff_disc_int.reshape(-1).long()
ce = F.cross_entropy(flat_logits, flat_targets, reduction='none')
ce = ce.view(B, T, self.cfg.n_disc_pkt)
pkt_real = mask[:, 1:].bool().float()
ce = ce.view(B, T, self.eff_n_disc)
pkt_real = mask[:, cur_offset:cur_offset + T].bool().float()
per_sample = (ce.sum(dim=-1) * pkt_real).sum(dim=-1) / pkt_real.sum(dim=-1).clamp_min(1.0)
per_ch = (ce * pkt_real[:, :, None]).sum(dim=1) / pkt_real.sum(dim=1).clamp_min(1.0)[:, None]
out = {'disc_nll_total': per_sample}
for (c, idx) in enumerate(self.cfg.disc_pkt_idx):
for c, idx in enumerate(ch_idx_list):
out[f'disc_nll_ch{idx}'] = per_ch[:, c]
return out

View File

@@ -21,7 +21,7 @@ def _device(arg: str) -> torch.device:
return torch.device('cuda' if torch.cuda.is_available() else 'cpu')
return torch.device(arg)
def _batch_score(model: MixedTokenCFM, flow_np: np.ndarray, cont_np: np.ndarray, disc_np: np.ndarray, len_np: np.ndarray, device: torch.device, *, batch_size: int, n_steps: int) -> dict[str, np.ndarray]:
def _batch_score(model: MixedTokenCFM, flow_np: np.ndarray, cont_np: np.ndarray, disc_np: np.ndarray, len_np: np.ndarray, device: torch.device, *, batch_size: int, n_steps: int, cont_bin_edges: torch.Tensor | None = None) -> dict[str, np.ndarray]:
out: dict[str, list[np.ndarray]] = {}
model.eval()
for start in range(0, len(flow_np), batch_size):
@@ -30,14 +30,14 @@ def _batch_score(model: MixedTokenCFM, flow_np: np.ndarray, cont_np: np.ndarray,
cont = torch.from_numpy(cont_np[sl]).float().to(device)
disc = torch.from_numpy(disc_np[sl]).long().to(device)
lens = torch.from_numpy(len_np[sl]).long().to(device)
m = model.trajectory_metrics(flow, cont, disc, lens, n_steps=n_steps)
d = model.disc_nll_score(flow, cont, disc, lens)
m = model.trajectory_metrics(flow, cont, disc, lens, n_steps=n_steps, cont_bin_edges=cont_bin_edges)
d = model.disc_nll_score(flow, cont, disc, lens, cont_bin_edges=cont_bin_edges)
for src in (m, d):
for (k, v) in src.items():
out.setdefault(k, []).append(v.detach().cpu().numpy())
return {k: np.concatenate(v, axis=0) for (k, v) in out.items()}
def _quick_eval(model: MixedTokenCFM, data: MixedData, device: torch.device, cfg: dict[str, Any]) -> dict[str, float]:
def _quick_eval(model: MixedTokenCFM, data: MixedData, device: torch.device, cfg: dict[str, Any], cont_bin_edges: torch.Tensor | None = None) -> dict[str, float]:
n_eval = int(cfg.get('eval_n', 2000))
rng = np.random.default_rng(0)
@@ -46,8 +46,8 @@ def _quick_eval(model: MixedTokenCFM, data: MixedData, device: torch.device, cfg
return rng.choice(n, m, replace=False)
vi = pick(len(data.val_flow))
ai = pick(len(data.attack_flow))
v = _batch_score(model, data.val_flow[vi], data.val_cont[vi], data.val_disc[vi], data.val_len[vi], device, batch_size=int(cfg.get('eval_batch_size', 512)), n_steps=int(cfg.get('eval_n_steps', 8)))
a = _batch_score(model, data.attack_flow[ai], data.attack_cont[ai], data.attack_disc[ai], data.attack_len[ai], device, batch_size=int(cfg.get('eval_batch_size', 512)), n_steps=int(cfg.get('eval_n_steps', 8)))
v = _batch_score(model, data.val_flow[vi], data.val_cont[vi], data.val_disc[vi], data.val_len[vi], device, batch_size=int(cfg.get('eval_batch_size', 512)), n_steps=int(cfg.get('eval_n_steps', 8)), cont_bin_edges=cont_bin_edges)
a = _batch_score(model, data.attack_flow[ai], data.attack_cont[ai], data.attack_disc[ai], data.attack_len[ai], device, batch_size=int(cfg.get('eval_batch_size', 512)), n_steps=int(cfg.get('eval_n_steps', 8)), cont_bin_edges=cont_bin_edges)
y = np.concatenate([np.zeros(len(vi)), np.ones(len(ai))])
out: dict[str, float] = {}
for k in sorted(v.keys()):
@@ -73,9 +73,36 @@ def train(cfg: dict[str, Any]) -> Path:
ds = TensorDataset(torch.from_numpy(tr_f).float(), torch.from_numpy(tr_c).float(), torch.from_numpy(tr_d).long(), torch.from_numpy(tr_l).long())
loader = DataLoader(ds, batch_size=int(cfg['batch_size']), shuffle=True, drop_last=True, num_workers=int(cfg.get('num_workers', 0)), pin_memory=device.type == 'cuda')
print(f'[data] training on {len(ds):,} flows')
model_cfg = MixedCFMConfig(T=data.T, flow_dim=data.flow_dim, token_dim=cfg.get('token_dim'), d_model=int(cfg['d_model']), n_layers=int(cfg['n_layers']), n_heads=int(cfg['n_heads']), mlp_ratio=float(cfg.get('mlp_ratio', 4.0)), time_dim=int(cfg.get('time_dim', 64)), sigma=float(cfg.get('sigma', 0.1)), use_ot=bool(cfg.get('use_ot', False)), reference_mode=cfg.get('reference_mode'), lambda_disc=float(cfg.get('lambda_disc', 1.0)))
n_disc_classes = int(cfg.get('n_disc_classes', 2))
model_cfg = MixedCFMConfig(
T=data.T, flow_dim=data.flow_dim, token_dim=cfg.get('token_dim'),
d_model=int(cfg['d_model']), n_layers=int(cfg['n_layers']), n_heads=int(cfg['n_heads']),
mlp_ratio=float(cfg.get('mlp_ratio', 4.0)), time_dim=int(cfg.get('time_dim', 64)),
sigma=float(cfg.get('sigma', 0.1)), use_ot=bool(cfg.get('use_ot', False)),
reference_mode=cfg.get('reference_mode'), lambda_disc=float(cfg.get('lambda_disc', 1.0)),
n_disc_classes=n_disc_classes,
# B-group ablation flags
use_flow_token=bool(cfg.get('use_flow_token', True)),
n_packet_tokens=int(cfg.get('n_packet_tokens', -1)),
disc_as_cont=bool(cfg.get('disc_as_cont', False)),
cont_as_disc=bool(cfg.get('cont_as_disc', False)),
)
model = MixedTokenCFM(model_cfg).to(device)
print(f'[model] params={model.param_count():,} token_dim={model.token_dim} sigma={model_cfg.sigma} use_ot={model_cfg.use_ot} lambda_disc={model_cfg.lambda_disc}')
# B4: compute bin edges from benign train cont (z-scored, masked) for cont_as_disc quantization
cont_bin_edges = None
if model_cfg.cont_as_disc:
n_bins = n_disc_classes
n_cont_orig = model_cfg.n_cont_pkt
# gather real cont samples per channel (mask padding)
masks = np.arange(data.train_cont.shape[1])[None, :] < data.train_len[:, None]
edges = np.zeros((n_cont_orig, n_bins - 1), dtype=np.float32)
for c in range(n_cont_orig):
vals = data.train_cont[..., c][masks]
qs = np.linspace(0, 1, n_bins + 1)[1:-1] # interior quantiles
edges[c] = np.quantile(vals, qs).astype(np.float32)
cont_bin_edges = torch.from_numpy(edges).to(device)
print(f'[B4] cont_bin_edges shape={tuple(edges.shape)} (n_bins={n_bins})')
print(f'[model] params={model.param_count():,} token_dim={model.token_dim} sigma={model_cfg.sigma} use_ot={model_cfg.use_ot} lambda_disc={model_cfg.lambda_disc} use_flow_token={model_cfg.use_flow_token} n_packet_tokens={model_cfg.n_packet_tokens} disc_as_cont={model_cfg.disc_as_cont} cont_as_disc={model_cfg.cont_as_disc}')
opt = torch.optim.AdamW(model.parameters(), lr=float(cfg['lr']), weight_decay=float(cfg.get('weight_decay', 0.01)))
total_steps = max(1, int(cfg['epochs']) * len(loader))
sched = torch.optim.lr_scheduler.CosineAnnealingLR(opt, T_max=total_steps)
@@ -91,7 +118,7 @@ def train(cfg: dict[str, Any]) -> Path:
cont = cont.to(device, non_blocking=True)
disc = disc.to(device, non_blocking=True)
lens = lens.to(device, non_blocking=True)
comp = model.compute_loss(flow, cont, disc, lens, return_components=True)
comp = model.compute_loss(flow, cont, disc, lens, return_components=True, cont_bin_edges=cont_bin_edges)
loss = comp['total']
ldisc_sum += float(comp['aux_disc'].item())
opt.zero_grad(set_to_none=True)
@@ -104,7 +131,7 @@ def train(cfg: dict[str, Any]) -> Path:
mean_loss = float(np.mean(losses)) if losses else float('nan')
eval_metrics: dict[str, float] | None = None
if epoch % int(cfg.get('eval_every', 5)) == 0 or epoch == int(cfg['epochs']):
eval_metrics = _quick_eval(model, data, device, cfg)
eval_metrics = _quick_eval(model, data, device, cfg, cont_bin_edges=cont_bin_edges)
history['epoch'].append(epoch)
history['loss'].append(mean_loss)
history['eval'].append(eval_metrics)
@@ -120,6 +147,8 @@ def train(cfg: dict[str, Any]) -> Path:
if not np.isfinite(mean_loss):
raise RuntimeError(f'non-finite loss at epoch {epoch}')
payload = {'model_state_dict': model.state_dict(), 'model_cfg': asdict(model_cfg), 'cont_mean': data.cont_mean, 'cont_std': data.cont_std, 'flow_mean': data.flow_mean, 'flow_std': data.flow_std, 'flow_feature_names': np.asarray(data.flow_feature_names), 'packet_feature_names': np.asarray(data.packet_feature_names)}
if cont_bin_edges is not None:
payload['cont_bin_edges'] = cont_bin_edges.detach().cpu().numpy()
torch.save(payload, save_dir / 'model.pt')
with open(save_dir / 'history.json', 'w') as f:
json.dump(history, f, indent=2, default=str)

View File

@@ -51,6 +51,28 @@ Source (rows) trained on 10K benign of source dataset; target (columns) tested o
Forward CICIDS17→CICDDoS19 (0.969) beats Shafir 0.89 by **+0.08**; reverse CICDDoS19→CICIDS17 (0.941) approximately matches Shafir 0.93. CICIoT23 is hardest both as source and target — its IoT-protocol diversity makes the "benign of source ≈ benign of target" assumption brittle. Full table at `artifacts/route_comparison/CROSS_MATRIX_3x3.md`.
### Ablations (architecture & aggregator)
Two orthogonal ablation axes, each evaluated **within-dataset** (4 datasets × 3 seeds) **and** **cross-dataset** (3×3 transfer × 3 seeds):
- **Group A** — 7 alternative aggregators on the same JANUS-full sub-score vector (post-processing only; no retraining).
- **Group B** — 5 architecture variants, each retrained 4 datasets × 3 seeds = 60 runs + 90 cross-evals.
Every load-bearing JANUS design choice has the **same shape of ablation curve**: small in-distribution cost, large cross-dataset gain.
| Component (removed in ablation) | Variant | Within Δ | Cross-mean Δ | Cross-worst Δ |
|---|---|---:|---:|---:|
| FLOW token (global context) | B1 | **0.94** | 6.70 | 19.97 |
| Packet sequence | B2 | +0.15 | **23.82** | **36.27** |
| Cont/disc head split (drop disc head) | B3 | +0.44 | **13.14** | **25.03** |
| CFM head (drop continuous side) | B4 | **2.37** | 2.03 | 2.86 |
| Joint training of two heads | B5 | +0.20 | **18.93** | **27.54** |
| OAS Mahalanobis aggregator | A1 vs A5 | +0.37 | **15.88** | **27.38** |
Three ablations (B3 / B5 / A-aggregator) **marginally beat JANUS-full at within-dataset evaluation** but collapse on at least one cross-dataset transfer direction. The disc head, joint training, and OAS aggregator are deliberate trades: their value is exclusively in cross-dataset robustness.
Full headline summary: `artifacts/ablation/ABLATION_SUMMARY.md`. Per-variant 3×3 cross matrices: `artifacts/ablation/ABLATION_CROSS_B_full.md` and `artifacts/ablation/ABLATION_TABLE_CROSS_full.md`.
## Layout
```
@@ -74,6 +96,12 @@ scripts/ Workspace-level pcap → artifact pipeline,
orchestration. aggregate_score_router.py is the
deployable score path; run_cross_3x3.sh +
cross_3x3_table.py produce the cross matrix.
aggregate_ablation.py / aggregate_ablation_cross.py /
aggregate_ablation_cross_B.py produce the ablation
tables in artifacts/ablation/.
ablation/ B-group ablation training/eval drivers
(generate_configs.py, run_groupB.sh,
run_cross_groupB.sh).
tests/ Data-contract unit tests.
```
@@ -177,7 +205,8 @@ Common gotcha: if CSV timestamps and pcap epochs are in different time zones, `e
## Authoritative documents
- `RESULTS.md` — full headline tables, ablations, per-attack analysis, JANUS configuration, thresholded operating-point metrics, what the experiments proved / disproved.
- `RESULTS.md` — full headline tables, per-attack analysis, JANUS configuration, thresholded operating-point metrics, what the experiments proved / disproved.
- `artifacts/ablation/ABLATION_SUMMARY.md` — paper-facing ablation summary (Group A aggregator + Group B architecture, both within and cross views).
- `Mixed_CFM/model.py` and `common/data_contract.py` — model + data-contract source of truth.
## Python environment

View File

@@ -21,6 +21,7 @@ dependencies = [
"pyarrow>=24.0.0",
"pzflow>=4.0.0",
"shap>=0.51.0",
"cython>=3.2.4",
]
[build-system]

View File

@@ -0,0 +1,56 @@
"""Generate 60 B-group ablation configs from existing 12 base configs.
Reads:
Mixed_CFM/configs/<ds>_seed<S>.yaml (4 datasets × 3 seeds = 12 base)
Writes:
Mixed_CFM/configs/ablation/<gid>/<ds>_seed<S>.yaml (5 variants × 12 = 60)
Each variant overrides save_dir → artifacts/ablation/janus_<ds>_seed<S>_<gid>/
plus the variant-specific flags. CICIoT2023 base is `ciciot2023_seed42.yaml`
(NOT `ciciot2023_route_c_seed42.yaml`, which is a different score-router config).
"""
from __future__ import annotations
from pathlib import Path
import yaml
ROOT = Path(__file__).resolve().parents[2]
BASE_DIR = ROOT / "Mixed_CFM" / "configs"
OUT_DIR = ROOT / "Mixed_CFM" / "configs" / "ablation"
DATASETS = ["iscxtor2016", "cicids2017", "cicddos2019", "ciciot2023"]
SEEDS = [42, 43, 44]
VARIANTS = {
"b1_noflow": {"use_flow_token": False},
"b2_flowonly": {"n_packet_tokens": 0, "lambda_disc": 0.0},
"b3_allcont": {"disc_as_cont": True, "lambda_disc": 0.0},
"b4_alldisc": {"cont_as_disc": True, "n_disc_classes": 8},
"b5_nodisc": {"lambda_disc": 0.0},
}
def main() -> None:
OUT_DIR.mkdir(parents=True, exist_ok=True)
for gid, overrides in VARIANTS.items():
(OUT_DIR / gid).mkdir(parents=True, exist_ok=True)
n_written = 0
for ds in DATASETS:
for seed in SEEDS:
base_path = BASE_DIR / f"{ds}_seed{seed}.yaml"
if not base_path.exists():
print(f"[miss] {base_path}")
continue
base_cfg = yaml.safe_load(base_path.read_text())
for gid, overrides in VARIANTS.items():
cfg = dict(base_cfg)
cfg["save_dir"] = str(ROOT / "artifacts" / "ablation" / f"janus_{ds}_seed{seed}_{gid}")
cfg.update(overrides)
out = OUT_DIR / gid / f"{ds}_seed{seed}.yaml"
out.write_text(yaml.safe_dump(cfg, sort_keys=False))
n_written += 1
print(f"[wrote] {n_written} config files under {OUT_DIR}")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,66 @@
#!/usr/bin/env bash
# Cross-dataset evaluation for B-group ablation models.
# 5 variants × 6 off-diagonal directions × 3 seeds = 90 cross evals.
#
# Each B-variant model dir is artifacts/ablation/janus_<ds>_seed<S>_<gid>/.
# We only cross within the 3-dataset matrix (cicids2017, cicddos2019, ciciot2023);
# ISCXTor16 has different feature space for cross.
#
# Usage:
# bash scripts/ablation/run_cross_groupB.sh # all 90
# bash scripts/ablation/run_cross_groupB.sh b1_noflow b3_allcont
set -euo pipefail
ROOT=/home/chy/JANUS
EVAL=${ROOT}/Mixed_CFM/eval_cross.py
OUT_DIR=${ROOT}/artifacts/ablation/cross
mkdir -p "${OUT_DIR}"
declare -A STORE FLOWS FEATS
STORE[cicids2017]=${ROOT}/datasets/cicids2017/processed/full_store
FLOWS[cicids2017]=${ROOT}/datasets/cicids2017/processed/flows.parquet
FEATS[cicids2017]=${ROOT}/datasets/cicids2017/processed/flow_features.parquet
STORE[cicddos2019]=${ROOT}/datasets/cicddos2019/processed/full_store
FLOWS[cicddos2019]=${ROOT}/datasets/cicddos2019/processed/flows.parquet
FEATS[cicddos2019]=${ROOT}/datasets/cicddos2019/processed/flow_features.parquet
STORE[ciciot2023]=${ROOT}/datasets/ciciot2023/processed/full_store
FLOWS[ciciot2023]=${ROOT}/datasets/ciciot2023/processed/full_store/flows.parquet
FEATS[ciciot2023]=${ROOT}/datasets/ciciot2023/processed/flow_features.parquet
ALL_GIDS=(b1_noflow b2_flowonly b3_allcont b4_alldisc b5_nodisc)
DATASETS=(cicids2017 cicddos2019 ciciot2023)
SEEDS=(42 43 44)
GPU="${GPU:-0}"
if [[ $# -gt 0 ]]; then
GIDS=("$@")
else
GIDS=("${ALL_GIDS[@]}")
fi
run_one() {
local gid=$1 src=$2 tgt=$3 seed=$4
local md=${ROOT}/artifacts/ablation/janus_${src}_seed${seed}_${gid}
local out=${OUT_DIR}/${gid}__seed${seed}_${src}_to_${tgt}.json
if [[ -f "${out}" ]]; then echo "[skip] $gid ${src}${tgt} seed${seed}"; return; fi
if [[ ! -f "${md}/model.pt" ]]; then echo "[missing model] ${md}/model.pt"; return; fi
echo "[gpu${GPU}] $(date +%H:%M:%S) $gid ${src}${tgt} seed${seed}"
cd ${ROOT}/Mixed_CFM
CUDA_VISIBLE_DEVICES=${GPU} uv run --no-sync python -u ${EVAL} \
--model-dir ${md} \
--target-store ${STORE[$tgt]} --target-flows ${FLOWS[$tgt]} --target-flow-features ${FEATS[$tgt]} \
--benign-label normal --n-benign 10000 --n-attack 1000000 \
--out ${out} --seed ${seed} --T 64 --batch-size 512 --n-steps 16 \
> ${OUT_DIR}/${gid}__seed${seed}_${src}_to_${tgt}.log 2>&1
}
for gid in "${GIDS[@]}"; do
for src in "${DATASETS[@]}"; do
for tgt in "${DATASETS[@]}"; do
[[ "$src" == "$tgt" ]] && continue
for seed in "${SEEDS[@]}"; do
run_one "$gid" "$src" "$tgt" "$seed"
done
done
done
done
echo "[done] cross evals complete"

76
scripts/ablation/run_groupB.sh Executable file
View File

@@ -0,0 +1,76 @@
#!/usr/bin/env bash
# Run all 60 B-group ablation training + phase1-eval runs.
#
# Splits work across two GPUs round-robin (set GPUS env to override).
# Logs per-run go to artifacts/ablation/<save_dir>/{train,phase1}.log.
#
# Usage:
# bash scripts/ablation/run_groupB.sh # all 60 runs
# bash scripts/ablation/run_groupB.sh b1_noflow b5_nodisc # subset of groups
# GPUS=0 bash scripts/ablation/run_groupB.sh # single-GPU serial
set -euo pipefail
cd "$(dirname "$0")/../.."
ALL_GIDS=(b1_noflow b2_flowonly b3_allcont b4_alldisc b5_nodisc)
DATASETS=(iscxtor2016 cicids2017 cicddos2019 ciciot2023)
SEEDS=(42 43 44)
GPUS="${GPUS:-0,1}"
IFS=',' read -ra GPU_ARR <<< "$GPUS"
N_GPU=${#GPU_ARR[@]}
if [[ $# -gt 0 ]]; then
GIDS=("$@")
else
GIDS=("${ALL_GIDS[@]}")
fi
# Build the full run list
runs=()
for gid in "${GIDS[@]}"; do
for ds in "${DATASETS[@]}"; do
for seed in "${SEEDS[@]}"; do
runs+=("${gid}|${ds}|${seed}")
done
done
done
n_runs=${#runs[@]}
echo "[plan] ${n_runs} runs across GPUs ${GPUS} (gids=${GIDS[*]})"
run_one() {
local spec="$1" gpu_id="$2"
IFS='|' read -r gid ds seed <<< "$spec"
local cfg="Mixed_CFM/configs/ablation/${gid}/${ds}_seed${seed}.yaml"
local save_dir
save_dir=$(uv run --no-sync python -c "import yaml,sys; print(yaml.safe_load(open('$cfg'))['save_dir'])")
mkdir -p "$save_dir"
echo "[gpu${gpu_id}] $(date +%H:%M:%S) START $gid $ds seed${seed}"
CUDA_VISIBLE_DEVICES="$gpu_id" uv run --no-sync python Mixed_CFM/train.py \
--config "$cfg" >"$save_dir/train.log" 2>&1
CUDA_VISIBLE_DEVICES="$gpu_id" uv run --no-sync python Mixed_CFM/eval_phase1.py \
--model-dir "$save_dir" --out-dir "$save_dir" \
--batch-size 256 --n-steps 16 \
--n-val-cap 30000 --n-atk-cap 30000 >"$save_dir/phase1.log" 2>&1
echo "[gpu${gpu_id}] $(date +%H:%M:%S) DONE $gid $ds seed${seed}"
}
# Round-robin assignment
pids=()
for i in "${!runs[@]}"; do
spec="${runs[$i]}"
gpu_id="${GPU_ARR[$((i % N_GPU))]}"
# If single GPU: serial; if multi-GPU: parallel up to N_GPU at a time
if [[ $N_GPU -eq 1 ]]; then
run_one "$spec" "$gpu_id"
else
run_one "$spec" "$gpu_id" &
pids+=($!)
# Cap concurrency at N_GPU
if (( (i + 1) % N_GPU == 0 )); then
for pid in "${pids[@]}"; do wait "$pid" || true; done
pids=()
fi
fi
done
for pid in "${pids[@]}"; do wait "$pid" || true; done
echo "[done] all ${n_runs} runs complete"

39
scripts/ablation/smoke_test.sh Executable file
View File

@@ -0,0 +1,39 @@
#!/usr/bin/env bash
# Smoke-test all 5 B-group variants on cicids2017 seed42 with reduced epochs
# and tiny train set, on CPU (so VLLM workers on the GPUs are not disturbed).
#
# After: each ablation/janus_cicids2017_seed42_<gid>/ should contain model.pt
# + phase1_scores.npz with the variant-specific score keys.
set -euo pipefail
cd "$(dirname "$0")/../.."
GIDS=(b1_noflow b2_flowonly b3_allcont b4_alldisc b5_nodisc)
DS=cicids2017
SEED=42
for gid in "${GIDS[@]}"; do
cfg="Mixed_CFM/configs/ablation/${gid}/${DS}_seed${SEED}.yaml"
echo "=================================================="
echo "[smoke] $gid"
echo "=================================================="
uv run --no-sync python Mixed_CFM/train.py \
--config "$cfg" \
--override "device=cpu" "epochs=2" "n_train=500" "eval_n=200" "eval_every=2" \
"save_dir=/home/chy/JANUS/artifacts/ablation_smoke/${gid}" 2>&1 | tail -8
uv run --no-sync python Mixed_CFM/eval_phase1.py \
--model-dir "/home/chy/JANUS/artifacts/ablation_smoke/${gid}" \
--out-dir "/home/chy/JANUS/artifacts/ablation_smoke/${gid}" \
--device cpu --batch-size 64 --n-steps 4 \
--n-val-cap 200 --n-atk-cap 200 2>&1 | tail -4
echo
done
echo "=== Smoke summary ==="
for gid in "${GIDS[@]}"; do
npz="/home/chy/JANUS/artifacts/ablation_smoke/${gid}/phase1_scores.npz"
if [[ -f "$npz" ]]; then
keys=$(uv run --no-sync python -c "import numpy as np; z=np.load('$npz', allow_pickle=True); print(','.join(sorted(k for k in z.files if k.startswith(('val_terminal','val_disc')))))")
echo "$gid: $keys"
else
echo "$gid: MISSING"
fi
done

View File

@@ -0,0 +1,533 @@
"""JANUS ablation aggregator (Groups A + B).
Reads phase1_scores.npz from:
artifacts/route_comparison/janus_<ds>_seed<S>/ (A + JANUS-full anchor)
artifacts/ablation/janus_<ds>_seed<S>_<gid>/ (B variants)
Produces:
artifacts/ablation/ABLATION_TABLE.md final markdown table
artifacts/ablation/ABLATION_TABLE_RAW.json per-cell mean / std / CI / per-seed
artifacts/ablation/ABLATION_DELONG.md paired DeLong p-values vs JANUS-full
Group A operates entirely on existing route_comparison npz files (no GPU).
Group B requires the 60 B-variant runs to have completed.
"""
from __future__ import annotations
import argparse
import json
from dataclasses import dataclass
from pathlib import Path
from typing import Iterable
import numpy as np
from sklearn.covariance import OAS
from sklearn.metrics import roc_auc_score
ROOT = Path(__file__).resolve().parents[2]
ROUTE = ROOT / "artifacts" / "route_comparison"
ABL = ROOT / "artifacts" / "ablation"
DATASETS = ["iscxtor2016", "cicids2017", "cicddos2019", "ciciot2023"]
PRETTY = {
"iscxtor2016": "ISCXTor16",
"cicids2017": "CICIDS17",
"cicddos2019": "CICDDoS19",
"ciciot2023": "CICIoT23",
}
SEEDS = [42, 43, 44]
T_975_N3 = 4.302653 # 95% t-CI factor for n=3 (df=2)
CONT_KEYS = ["terminal_norm", "terminal_flow", "terminal_packet"]
DISC_KEYS = ["disc_nll_total", "disc_nll_ch2", "disc_nll_ch3",
"disc_nll_ch4", "disc_nll_ch5", "disc_nll_ch6", "disc_nll_ch7"]
ALL_KEYS = CONT_KEYS + DISC_KEYS # 10-d
# --------------------------------------------------------------------------- #
# I/O #
# --------------------------------------------------------------------------- #
def _load_npz(npz_path: Path):
z = np.load(npz_path, allow_pickle=True)
val = {}
atk = {}
for k in z.files:
if k.startswith("val_") and k != "val_labels":
val[k[4:]] = z[k]
elif k.startswith("atk_") and k != "atk_labels":
atk[k[4:]] = z[k]
return val, atk
def _load_cross_npz(npz_path: Path):
"""Cross npz schema: b_<key> = target benign, a_<key> = target attacks."""
z = np.load(npz_path, allow_pickle=True)
val = {}
atk = {}
for k in z.files:
if k.startswith("b_") and k != "b_labels":
val[k[2:]] = z[k]
elif k.startswith("a_") and k != "a_labels":
atk[k[2:]] = z[k]
return val, atk
def _stack(d: dict, keys: list[str]) -> np.ndarray:
arrs = []
for k in keys:
if k in d:
arrs.append(d[k])
else:
# variant doesn't produce this score (e.g. B2 has no disc, B5 disc untrained)
return None
out = np.stack(arrs, axis=1).astype(np.float64)
return np.nan_to_num(out, nan=0.0, posinf=1e6, neginf=-1e6)
# --------------------------------------------------------------------------- #
# Score functions (Group A definitions) #
# --------------------------------------------------------------------------- #
def _mahal(S, mu, inv_cov):
d = S - mu
return np.einsum("ni,ij,nj->n", d, inv_cov, d)
def _oas_mahal(val_S, atk_S):
mu = val_S.mean(axis=0)
cov = OAS().fit(val_S).covariance_
inv = np.linalg.inv(cov + 1e-9 * np.eye(cov.shape[0]))
return _mahal(val_S, mu, inv), _mahal(atk_S, mu, inv)
def _zscore_agg(val_S, atk_S, mode="mean"):
mu = val_S.mean(axis=0)
sd = val_S.std(axis=0) + 1e-9
zv = (val_S - mu) / sd
za = (atk_S - mu) / sd
if mode == "mean":
return zv.mean(axis=1), za.mean(axis=1)
if mode == "max":
return zv.max(axis=1), za.max(axis=1)
raise ValueError(mode)
def score_a1_terminal_norm(val, atk):
return val["terminal_norm"], atk["terminal_norm"]
def score_a2_disc_total(val, atk):
if "disc_nll_total" not in val:
return None
return val["disc_nll_total"], atk["disc_nll_total"]
def score_a3_oas_term3(val, atk):
Sv = _stack(val, CONT_KEYS)
Sa = _stack(atk, CONT_KEYS)
if Sv is None or Sa is None:
return None
return _oas_mahal(Sv, Sa)
def score_a4_oas_disc7(val, atk):
Sv = _stack(val, DISC_KEYS)
Sa = _stack(atk, DISC_KEYS)
if Sv is None or Sa is None:
return None
return _oas_mahal(Sv, Sa)
def score_a5_oas_all10(val, atk):
Sv = _stack(val, ALL_KEYS)
Sa = _stack(atk, ALL_KEYS)
if Sv is None or Sa is None:
return None
return _oas_mahal(Sv, Sa)
def score_a6_zmean(val, atk):
Sv = _stack(val, ALL_KEYS)
Sa = _stack(atk, ALL_KEYS)
if Sv is None or Sa is None:
return None
return _zscore_agg(Sv, Sa, "mean")
def score_a7_zmax(val, atk):
Sv = _stack(val, ALL_KEYS)
Sa = _stack(atk, ALL_KEYS)
if Sv is None or Sa is None:
return None
return _zscore_agg(Sv, Sa, "max")
def score_oas_disc_all(val, atk):
"""Auto-discover all `disc_nll_*` keys; OAS-Mahal over them. Used by B4."""
keys = sorted(k for k in val.keys() if k.startswith("disc_nll_"))
if not keys:
return None
Sv = _stack(val, keys)
Sa = _stack(atk, keys)
if Sv is None or Sa is None:
return None
return _oas_mahal(Sv, Sa)
def score_oas_all_available(val, atk):
"""OAS-Mahal over all `terminal_*` `disc_nll_*` keys present in the npz.
Used by B1 (no terminal_flow). Handles arbitrary subset of the 10 standard keys.
"""
keys = sorted([k for k in val.keys() if k.startswith("terminal_") or k.startswith("disc_nll_")])
if not keys:
return None
if len(keys) == 1:
return val[keys[0]], atk[keys[0]]
Sv = _stack(val, keys)
Sa = _stack(atk, keys)
if Sv is None or Sa is None:
return None
return _oas_mahal(Sv, Sa)
def score_oas_term_all(val, atk):
"""Auto-discover all `terminal_*` keys; OAS-Mahal. Used by B3 (3 keys) / B1 (2 keys)."""
keys = sorted(k for k in val.keys() if k.startswith("terminal_"))
if not keys:
return None
if len(keys) == 1:
# single scalar: just return raw
return val[keys[0]], atk[keys[0]]
Sv = _stack(val, keys)
Sa = _stack(atk, keys)
if Sv is None or Sa is None:
return None
return _oas_mahal(Sv, Sa)
SCORE_FNS = {
"A1_terminal_norm": score_a1_terminal_norm,
"A2_disc_nll_total": score_a2_disc_total,
"A3_OAS_term3": score_a3_oas_term3,
"A4_OAS_disc7": score_a4_oas_disc7,
"A5_OAS_all10": score_a5_oas_all10,
"A6_zmean_all10": score_a6_zmean,
"A7_zmax_all10": score_a7_zmax,
"OAS_disc_all": score_oas_disc_all,
"OAS_term_all": score_oas_term_all,
"OAS_all_available": score_oas_all_available,
}
# --------------------------------------------------------------------------- #
# Stats #
# --------------------------------------------------------------------------- #
def _auroc(s_v, s_a):
y = np.r_[np.zeros(len(s_v)), np.ones(len(s_a))]
s = np.r_[s_v, s_a]
return float(roc_auc_score(y, s))
def _mean_ci(values: list[float]):
"""3-seed mean ± 95% t-CI (n=3, df=2)."""
a = np.asarray([v for v in values if v is not None and not np.isnan(v)], dtype=float)
if a.size == 0:
return None
if a.size == 1:
return {"mean": float(a[0]), "std": 0.0, "ci": 0.0, "n": 1, "vals": a.tolist()}
se = a.std(ddof=1) / np.sqrt(a.size)
return {
"mean": float(a.mean()),
"std": float(a.std(ddof=1)),
"ci": float(T_975_N3 * se) if a.size == 3 else float(1.96 * se),
"n": int(a.size),
"vals": a.tolist(),
}
def _delong_var(s_v, s_a):
"""Compute DeLong AUROC variance (Sun & Xu 2014, fast O(n log n))."""
n0, n1 = len(s_v), len(s_a)
s = np.concatenate([s_a, s_v]) # positives first
order = np.argsort(s, kind="mergesort")
L = np.empty_like(s)
s_sorted = s[order]
# midrank
i = 0
while i < len(s_sorted):
j = i
while j < len(s_sorted) and s_sorted[j] == s_sorted[i]:
j += 1
L[order[i:j]] = (i + j - 1) / 2.0 + 1
i = j
# ranks split
L_a = L[:n1]
L_v = L[n1:]
# midrank within each class
s_a_order = np.argsort(s_a, kind="mergesort")
L_aa = np.empty(n1)
sa_sorted = s_a[s_a_order]
i = 0
while i < n1:
j = i
while j < n1 and sa_sorted[j] == sa_sorted[i]:
j += 1
L_aa[s_a_order[i:j]] = (i + j - 1) / 2.0 + 1
i = j
s_v_order = np.argsort(s_v, kind="mergesort")
L_vv = np.empty(n0)
sv_sorted = s_v[s_v_order]
i = 0
while i < n0:
j = i
while j < n0 and sv_sorted[j] == sv_sorted[i]:
j += 1
L_vv[s_v_order[i:j]] = (i + j - 1) / 2.0 + 1
i = j
auc = (L_a.sum() / n1 - (n1 + 1) / 2) / n0
V10 = (L_a - L_aa) / n0 # length n1
V01 = 1 - (L_v - L_vv) / n1 # length n0
s10 = V10.var(ddof=1)
s01 = V01.var(ddof=1)
var = s10 / n1 + s01 / n0
return float(auc), float(var), V10, V01
def _delong_paired_p(s_v, s_a, t_v, t_a):
"""Paired DeLong test for two AUROCs on the same data.
Returns (auc1 - auc2, p_value_two_sided).
s_*: candidate scores; t_*: reference (JANUS-full) scores.
Both arrays must align flow-by-flow.
"""
auc1, var1, V10_1, V01_1 = _delong_var(s_v, s_a)
auc2, var2, V10_2, V01_2 = _delong_var(t_v, t_a)
n1, n0 = len(s_a), len(s_v)
cov10 = np.cov(np.stack([V10_1, V10_2]), ddof=1)[0, 1]
cov01 = np.cov(np.stack([V01_1, V01_2]), ddof=1)[0, 1]
cov12 = cov10 / n1 + cov01 / n0
var_diff = var1 + var2 - 2 * cov12
if var_diff <= 0:
return auc1 - auc2, 1.0
z = (auc1 - auc2) / np.sqrt(var_diff)
# two-sided
from scipy.stats import norm
p = 2 * (1 - norm.cdf(abs(z)))
return auc1 - auc2, float(p)
# --------------------------------------------------------------------------- #
# Aggregation entry points #
# --------------------------------------------------------------------------- #
@dataclass
class VariantSpec:
vid: str
label: str
what_removed: str
npz_dir_pattern: str # e.g. "route_comparison/janus_{ds}_seed{seed}" or "ablation/janus_{ds}_seed{seed}_{gid}"
score_fn_id: str # which Group A score to apply on the npz (usually "A5_OAS_all10")
gid: str = "" # for B variants
def _expand_path(spec: VariantSpec, ds: str, seed: int) -> Path:
return ROOT / "artifacts" / spec.npz_dir_pattern.format(ds=ds, seed=seed, gid=spec.gid) / "phase1_scores.npz"
def collect_variant(spec: VariantSpec) -> dict:
rows: dict[str, list[float]] = {ds: [] for ds in DATASETS}
per_seed: dict[str, dict[int, float]] = {ds: {} for ds in DATASETS}
for ds in DATASETS:
for seed in SEEDS:
npz = _expand_path(spec, ds, seed)
if not npz.exists():
continue
val, atk = _load_npz(npz)
fn = SCORE_FNS[spec.score_fn_id]
res = fn(val, atk)
if res is None:
continue
sv, sa = res
auc = _auroc(sv, sa)
rows[ds].append(auc)
per_seed[ds][seed] = auc
summary = {ds: _mean_ci(rows[ds]) for ds in DATASETS}
return {
"vid": spec.vid,
"label": spec.label,
"what_removed": spec.what_removed,
"score_fn_id": spec.score_fn_id,
"gid": spec.gid,
"per_dataset": summary,
"per_seed": per_seed,
}
def collect_delong_pvals(spec: VariantSpec, ref_spec: VariantSpec) -> dict:
"""Paired DeLong test: spec vs ref_spec, on each (ds, seed)."""
out: dict[str, list[dict]] = {ds: [] for ds in DATASETS}
for ds in DATASETS:
for seed in SEEDS:
npz_s = _expand_path(spec, ds, seed)
npz_r = _expand_path(ref_spec, ds, seed)
if not (npz_s.exists() and npz_r.exists()):
continue
val_s, atk_s = _load_npz(npz_s)
val_r, atk_r = _load_npz(npz_r)
fn_s = SCORE_FNS[spec.score_fn_id]
fn_r = SCORE_FNS[ref_spec.score_fn_id]
res_s = fn_s(val_s, atk_s)
res_r = fn_r(val_r, atk_r)
if res_s is None or res_r is None:
continue
sv_s, sa_s = res_s
sv_r, sa_r = res_r
# if shapes differ (e.g. variant evaluated on subset), align by index — they should match seed-for-seed
# in practice for B variants the npz is from the SAME data as JANUS-full at that (ds, seed)
if len(sv_s) != len(sv_r) or len(sa_s) != len(sa_r):
continue
d, p = _delong_paired_p(sv_s, sa_s, sv_r, sa_r)
out[ds].append({"seed": seed, "delta": d, "p": p})
return out
# --------------------------------------------------------------------------- #
# Variant registry #
# --------------------------------------------------------------------------- #
ROUTE_DIR = "route_comparison/janus_{ds}_seed{seed}"
ABL_DIR = "ablation/janus_{ds}_seed{seed}_{gid}"
def _group_a_specs() -> list[VariantSpec]:
base = ROUTE_DIR
return [
VariantSpec("JANUS-full", "JANUS-full (A5)", "", base, "A5_OAS_all10"),
VariantSpec("A1", "A1 terminal_norm", "OAS aggregator + disc head", base, "A1_terminal_norm"),
VariantSpec("A2", "A2 disc_nll_total", "OAS aggregator + CFM head", base, "A2_disc_nll_total"),
VariantSpec("A3", "A3 OAS-Mahal term3", "disc head", base, "A3_OAS_term3"),
VariantSpec("A4", "A4 OAS-Mahal disc7", "CFM head", base, "A4_OAS_disc7"),
VariantSpec("A6", "A6 z-score mean (10-d)", "covariance structure", base, "A6_zmean_all10"),
VariantSpec("A7", "A7 z-score max (10-d)", "weighted aggregation", base, "A7_zmax_all10"),
]
def _group_b_specs() -> list[VariantSpec]:
return [
# B1 has 2 terminal keys (no terminal_flow) + full disc7 → use auto-key OAS (9-d in this case)
VariantSpec("B1", "B1 no FLOW token", "global context", ABL_DIR, "OAS_all_available", gid="b1_noflow"),
# B2 has only terminal_flow (= terminal_norm); single scalar
VariantSpec("B2", "B2 flow-only", "packet sequence", ABL_DIR, "A1_terminal_norm", gid="b2_flowonly"),
# B3 has terminal_norm/flow/packet covering all 9 dims (cont + disc-as-cont); OAS on 3-tuple
VariantSpec("B3", "B3 all-cont", "cont/disc split", ABL_DIR, "A3_OAS_term3", gid="b3_allcont"),
# B4 has 9 disc channels + total; auto-discover keys
VariantSpec("B4", "B4 all-disc", "cont/disc split (rev)", ABL_DIR, "OAS_disc_all", gid="b4_alldisc"),
# B5 has full schema but disc head is untrained noise; use term3 only
VariantSpec("B5", "B5 λ_disc=0", "joint training", ABL_DIR, "A3_OAS_term3", gid="b5_nodisc"),
]
# --------------------------------------------------------------------------- #
# Markdown writer #
# --------------------------------------------------------------------------- #
def _fmt_cell(c: dict | None) -> str:
if c is None:
return ""
if c["n"] == 1:
return f"{100 * c['mean']:.2f}"
return f"{100 * c['mean']:.2f} ± {100 * c['ci']:.2f}"
def write_table(rows: list[dict], path: Path, *, title: str = "JANUS ablation"):
lines = [f"# {title}", ""]
lines.append(f"3-seed mean ± 95% t-CI AUROC (%). Seeds = {SEEDS}.")
lines.append("")
header = ["Variant", "What removed"] + [PRETTY[ds] for ds in DATASETS] + ["Mean"]
lines.append("| " + " | ".join(header) + " |")
lines.append("|" + "|".join("---" for _ in header) + "|")
for r in rows:
cells = [r["label"], r["what_removed"]]
ds_means = []
for ds in DATASETS:
c = r["per_dataset"].get(ds)
cells.append(_fmt_cell(c))
if c is not None:
ds_means.append(c["mean"])
cells.append(f"{100 * np.mean(ds_means):.2f}" if ds_means else "")
lines.append("| " + " | ".join(cells) + " |")
lines.append("")
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text("\n".join(lines))
def write_delong(records: list[dict], path: Path):
lines = ["# Paired DeLong p-values vs JANUS-full",
"",
f"Seeds = {SEEDS}. p reported per (variant, dataset, seed). "
"Holm-Bonferroni-correctable; raw p shown.",
""]
for rec in records:
lines.append(f"## {rec['label']} ({rec['vid']})")
lines.append("")
header = ["Seed"] + [PRETTY[ds] for ds in DATASETS]
lines.append("| " + " | ".join(header) + " |")
lines.append("|" + "|".join("---" for _ in header) + "|")
for seed in SEEDS:
row = [str(seed)]
for ds in DATASETS:
hits = [x for x in rec["delong"][ds] if x["seed"] == seed]
if hits:
h = hits[0]
sign = "+" if h["delta"] >= 0 else ""
row.append(f"Δ={sign}{abs(h['delta']):.4f}, p={h['p']:.3g}")
else:
row.append("")
lines.append("| " + " | ".join(row) + " |")
lines.append("")
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text("\n".join(lines))
# --------------------------------------------------------------------------- #
# Main #
# --------------------------------------------------------------------------- #
def main() -> None:
ap = argparse.ArgumentParser()
ap.add_argument("--group", choices=["A", "B", "all"], default="A")
ap.add_argument("--delong", action="store_true",
help="Compute paired DeLong p-values vs JANUS-full (CPU heavy on big eval sets).")
args = ap.parse_args()
ABL.mkdir(parents=True, exist_ok=True)
specs: list[VariantSpec] = []
if args.group in ("A", "all"):
specs.extend(_group_a_specs())
if args.group in ("B", "all"):
specs.extend(_group_b_specs())
rows = []
for spec in specs:
r = collect_variant(spec)
rows.append(r)
n_ok = sum(1 for ds in DATASETS if r["per_dataset"][ds] is not None)
print(f"[ok] {spec.vid:14s} datasets_with_data={n_ok}/{len(DATASETS)}", flush=True)
out_md = ABL / f"ABLATION_TABLE_{args.group}.md"
write_table(rows, out_md, title=f"JANUS ablation (group {args.group})")
out_json = ABL / f"ABLATION_TABLE_{args.group}.json"
out_json.write_text(json.dumps(rows, indent=2, default=lambda o: None))
print(f"[wrote] {out_md}")
print(f"[wrote] {out_json}")
if args.delong:
ref = next(s for s in _group_a_specs() if s.vid == "JANUS-full")
recs = []
for spec in specs:
if spec.vid == "JANUS-full":
continue
d = collect_delong_pvals(spec, ref)
recs.append({"vid": spec.vid, "label": spec.label, "delong": d})
print(f"[delong] {spec.vid}", flush=True)
write_delong(recs, ABL / f"ABLATION_DELONG_{args.group}.md")
print(f"[wrote] {ABL / f'ABLATION_DELONG_{args.group}.md'}")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,218 @@
"""Cross-dataset version of the Group-A score-aggregator ablation.
For each (src, tgt, seed) cell we have a phase1-style npz with:
b_<key> target benign val (aggregator fit on this)
a_<key> target attacks
Within-dataset (src == tgt) cells reuse the standard
artifacts/route_comparison/janus_<ds>_seed<S>/phase1_scores.npz
(val_/atk_ prefixes — handled via the same _load_npz path).
We score 7 aggregators (A1..A7) + JANUS-full's deployed A5 across all
3×3 cells × 3 seeds, then summarize with two complementary views:
ABLATION_TABLE_CROSS_summary.md
| Aggregator | Within mean | Cross mean | Cross min (worst cell) |
Shows whether OAS's value lives in cross-dataset robustness.
ABLATION_TABLE_CROSS_full.md
Per-aggregator full 3×3 matrix (each cell = 3-seed mean ± 95% t-CI).
"""
from __future__ import annotations
import argparse
import json
from pathlib import Path
import numpy as np
from aggregate_ablation import (
SCORE_FNS, T_975_N3, _auroc, _load_npz, _load_cross_npz,
)
ROOT = Path(__file__).resolve().parents[2]
ROUTE = ROOT / "artifacts" / "route_comparison"
CROSS = ROUTE / "cross"
ABL = ROOT / "artifacts" / "ablation"
# 3x3 cross matrix datasets (no ISCXTor16 — different feature space)
CROSS_DATASETS = ["cicids2017", "cicddos2019", "ciciot2023"]
PRETTY = {
"cicids2017": "CICIDS17",
"cicddos2019": "CICDDoS19",
"ciciot2023": "CICIoT23",
}
SEEDS = [42, 43, 44]
AGGREGATORS = [
("JANUS-full (A5)", "A5_OAS_all10", "deployed JANUS"),
("A1 terminal_norm","A1_terminal_norm", "raw scalar (CFM head)"),
("A2 disc_total", "A2_disc_nll_total","raw scalar (disc head)"),
("A3 OAS term3", "A3_OAS_term3", "OAS on 3 cont sub-scores"),
("A4 OAS disc7", "A4_OAS_disc7", "OAS on 7 disc sub-scores"),
("A6 z-score mean", "A6_zmean_all10", "equal-weight z-score sum"),
("A7 z-score max", "A7_zmax_all10", "equal-weight z-score max"),
]
# --------------------------------------------------------------------------- #
def _cell_path(src: str, tgt: str, seed: int) -> Path | None:
"""Return npz path for (src, tgt, seed) cell, or None if missing."""
if src == tgt:
p = ROUTE / f"janus_{src}_seed{seed}" / "phase1_scores.npz"
return p if p.exists() else None
p = CROSS / f"janus_seed{seed}_{src}_to_{tgt}.npz"
return p if p.exists() else None
def _load_cell(src: str, tgt: str, seed: int):
p = _cell_path(src, tgt, seed)
if p is None:
return None, None
if src == tgt:
return _load_npz(p)
return _load_cross_npz(p)
def _score_cell(src: str, tgt: str, seed: int, score_fn_id: str) -> float | None:
val, atk = _load_cell(src, tgt, seed)
if val is None:
return None
fn = SCORE_FNS[score_fn_id]
res = fn(val, atk)
if res is None:
return None
sv, sa = res
return _auroc(sv, sa)
def _seed_means(src: str, tgt: str, score_fn_id: str) -> dict | None:
"""3-seed AUROC for cell (src,tgt). Returns dict with mean/std/ci, or None."""
vals = []
for seed in SEEDS:
v = _score_cell(src, tgt, seed, score_fn_id)
if v is not None and not np.isnan(v):
vals.append(v)
if not vals:
return None
a = np.asarray(vals)
if a.size == 1:
return {"mean": float(a[0]), "std": 0.0, "ci": 0.0, "n": 1, "vals": a.tolist()}
se = a.std(ddof=1) / np.sqrt(a.size)
return {
"mean": float(a.mean()),
"std": float(a.std(ddof=1)),
"ci": float(T_975_N3 * se) if a.size == 3 else float(1.96 * se),
"n": int(a.size),
"vals": a.tolist(),
}
# --------------------------------------------------------------------------- #
def _fmt_cell(c):
if c is None:
return ""
if c["n"] == 1:
return f"{100 * c['mean']:.2f}"
return f"{100 * c['mean']:.2f} ± {100 * c['ci']:.2f}"
def _summary_row(rows_3x3: dict[tuple[str, str], dict | None]) -> tuple[float, float, float, dict | None]:
"""Return (within_mean, cross_mean, cross_worst, worst_cell_summary)."""
within = []
cross = []
worst_v = None
worst_cell = None
for (src, tgt), cell in rows_3x3.items():
if cell is None:
continue
if src == tgt:
within.append(cell["mean"])
else:
cross.append(cell["mean"])
if worst_v is None or cell["mean"] < worst_v:
worst_v = cell["mean"]
worst_cell = (src, tgt, cell)
w = float(np.mean(within)) if within else float("nan")
c = float(np.mean(cross)) if cross else float("nan")
cw = worst_v if worst_v is not None else float("nan")
return w, c, cw, worst_cell
# --------------------------------------------------------------------------- #
def main() -> None:
ap = argparse.ArgumentParser()
ap.add_argument("--out-dir", type=Path, default=ABL)
args = ap.parse_args()
args.out_dir.mkdir(parents=True, exist_ok=True)
full = {} # aggregator label -> {(src, tgt) -> cell summary}
for label, fn_id, _why in AGGREGATORS:
rows = {}
for src in CROSS_DATASETS:
for tgt in CROSS_DATASETS:
rows[(src, tgt)] = _seed_means(src, tgt, fn_id)
full[label] = rows
n_ok = sum(1 for v in rows.values() if v is not None)
print(f"[ok] {label:20s} cells={n_ok}/{len(rows)}", flush=True)
# Summary table: within mean, cross mean, cross worst
summary_lines = ["# Cross-dataset Group-A summary",
"",
f"3-seed mean ± 95% t-CI AUROC. Datasets = {CROSS_DATASETS}.",
"Aggregator fit on **target** benign val only.",
"",
"| Aggregator | Within (3 cells, mean) | Cross (6 cells, mean) | Cross worst cell | Within Cross |",
"|---|---|---|---|---|"]
summary_data = {}
for label, fn_id, _why in AGGREGATORS:
rows = full[label]
w, c, cw, worst_cell = _summary_row(rows)
gap = (w - c) * 100 if not np.isnan(w) and not np.isnan(c) else float("nan")
worst_str = ""
if worst_cell is not None:
src, tgt, cell = worst_cell
worst_str = f"{PRETTY[src]}{PRETTY[tgt]}: {_fmt_cell(cell)}"
summary_lines.append(
f"| {label} | {100 * w:.2f} | {100 * c:.2f} | {worst_str} | {gap:+.2f} |"
)
summary_data[label] = {"within_mean": w, "cross_mean": c, "cross_worst": cw, "worst_cell": worst_cell}
summary_path = args.out_dir / "ABLATION_TABLE_CROSS_summary.md"
summary_path.write_text("\n".join(summary_lines) + "\n")
print(f"[wrote] {summary_path}")
# Full per-aggregator 3x3 matrices
full_lines = ["# Cross-dataset Group-A full matrices",
"",
"Per aggregator: 3×3 matrix (rows = source / training, columns = target / test).",
"Each cell = 3-seed mean ± 95% t-CI AUROC (%). Diagonal italic = within-dataset.",
""]
for label, fn_id, why in AGGREGATORS:
full_lines.append(f"## {label} ({why})")
full_lines.append("")
header = ["Source ↓ / Target →"] + [PRETTY[d] for d in CROSS_DATASETS]
full_lines.append("| " + " | ".join(header) + " |")
full_lines.append("|" + "|".join("---" for _ in header) + "|")
for src in CROSS_DATASETS:
row = [f"**{PRETTY[src]}**"]
for tgt in CROSS_DATASETS:
cell = full[label][(src, tgt)]
txt = _fmt_cell(cell)
if src == tgt:
txt = f"_{txt}_"
row.append(txt)
full_lines.append("| " + " | ".join(row) + " |")
full_lines.append("")
full_path = args.out_dir / "ABLATION_TABLE_CROSS_full.md"
full_path.write_text("\n".join(full_lines))
print(f"[wrote] {full_path}")
json_path = args.out_dir / "ABLATION_TABLE_CROSS.json"
json_path.write_text(json.dumps({
"summary": summary_data,
"full": {label: {f"{src}->{tgt}": cell for (src, tgt), cell in rows.items()}
for label, rows in full.items()},
}, indent=2, default=lambda o: None))
print(f"[wrote] {json_path}")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,180 @@
"""B-variant cross-dataset aggregation.
Reads:
artifacts/ablation/janus_<ds>_seed<S>_<gid>/phase1_scores.npz (within-dataset)
artifacts/ablation/cross/<gid>__seed<S>_<src>_to_<tgt>.npz (cross-dataset)
For each B-variant we apply the variant-appropriate aggregator (auto-key OAS
fits whatever sub-scores the variant produces). JANUS-full anchor is read from
the production route_comparison/ paths.
Outputs:
ABLATION_CROSS_B_summary.md within mean / cross mean / cross worst per gid
ABLATION_CROSS_B_full.md per-gid 3×3 matrices
"""
from __future__ import annotations
import argparse
import json
from pathlib import Path
import numpy as np
from aggregate_ablation import (
SCORE_FNS, T_975_N3, _auroc, _load_npz, _load_cross_npz,
)
ROOT = Path(__file__).resolve().parents[2]
ROUTE = ROOT / "artifacts" / "route_comparison"
ROUTE_CROSS = ROUTE / "cross"
ABL = ROOT / "artifacts" / "ablation"
ABL_CROSS = ABL / "cross"
CROSS_DATASETS = ["cicids2017", "cicddos2019", "ciciot2023"]
PRETTY = {
"cicids2017": "CICIDS17",
"cicddos2019": "CICDDoS19",
"ciciot2023": "CICIoT23",
}
SEEDS = [42, 43, 44]
# (gid, label, what_removed, score_fn_id)
B_VARIANTS = [
("janus_full", "JANUS-full", "", "OAS_all_available"),
("b1_noflow", "B1 no FLOW token","global context", "OAS_all_available"),
("b2_flowonly", "B2 flow-only", "packet sequence", "A1_terminal_norm"),
("b3_allcont", "B3 all-cont", "cont/disc split", "OAS_term_all"),
("b4_alldisc", "B4 all-disc", "cont/disc split (rev)", "OAS_disc_all"),
("b5_nodisc", "B5 λ_disc=0", "joint training", "OAS_term_all"),
]
def _within_path(gid: str, ds: str, seed: int) -> Path:
if gid == "janus_full":
return ROUTE / f"janus_{ds}_seed{seed}" / "phase1_scores.npz"
return ABL / f"janus_{ds}_seed{seed}_{gid}" / "phase1_scores.npz"
def _cross_path(gid: str, src: str, tgt: str, seed: int) -> Path:
if gid == "janus_full":
return ROUTE_CROSS / f"janus_seed{seed}_{src}_to_{tgt}.npz"
return ABL_CROSS / f"{gid}__seed{seed}_{src}_to_{tgt}.npz"
def _cell_score(gid: str, src: str, tgt: str, seed: int, fn_id: str):
if src == tgt:
p = _within_path(gid, src, seed)
if not p.exists():
return None
val, atk = _load_npz(p)
else:
p = _cross_path(gid, src, tgt, seed)
if not p.exists():
return None
val, atk = _load_cross_npz(p)
fn = SCORE_FNS[fn_id]
res = fn(val, atk)
if res is None:
return None
sv, sa = res
return _auroc(sv, sa)
def _seed_summary(vals: list[float]):
a = np.asarray([v for v in vals if v is not None and not np.isnan(v)])
if a.size == 0:
return None
if a.size == 1:
return {"mean": float(a[0]), "ci": 0.0, "n": 1}
se = a.std(ddof=1) / np.sqrt(a.size)
return {"mean": float(a.mean()),
"ci": float(T_975_N3 * se) if a.size == 3 else float(1.96 * se),
"n": int(a.size)}
def _fmt(c):
if c is None:
return ""
if c["n"] == 1:
return f"{100 * c['mean']:.2f}"
return f"{100 * c['mean']:.2f} ± {100 * c['ci']:.2f}"
def main() -> None:
ap = argparse.ArgumentParser()
ap.add_argument("--out-dir", type=Path, default=ABL)
args = ap.parse_args()
args.out_dir.mkdir(parents=True, exist_ok=True)
full = {}
for gid, label, _why, fn_id in B_VARIANTS:
rows = {}
for src in CROSS_DATASETS:
for tgt in CROSS_DATASETS:
vals = [_cell_score(gid, src, tgt, s, fn_id) for s in SEEDS]
rows[(src, tgt)] = _seed_summary(vals)
full[gid] = (label, rows)
n_ok = sum(1 for v in rows.values() if v is not None)
print(f"[ok] {label:20s} cells={n_ok}/{len(rows)}", flush=True)
# Summary
lines = ["# B-variant cross-dataset summary",
"",
f"3-seed mean ± 95% t-CI AUROC. Datasets = {CROSS_DATASETS}.",
"All B variants share the same aggregator-fit-on-target-benign protocol as JANUS-full.",
"",
"| Variant | What removed | Within (3 cells) | Cross (6 cells) | Cross worst | Within Cross |",
"|---|---|---|---|---|---|"]
for gid, label, why, fn_id in B_VARIANTS:
_, rows = full[gid]
within = [v["mean"] for (s, t), v in rows.items() if s == t and v is not None]
cross = [v["mean"] for (s, t), v in rows.items() if s != t and v is not None]
cross_pairs = [((s, t), v) for (s, t), v in rows.items() if s != t and v is not None]
worst = min(cross_pairs, key=lambda x: x[1]["mean"], default=None)
w = float(np.mean(within)) if within else float("nan")
c = float(np.mean(cross)) if cross else float("nan")
worst_str = ""
if worst is not None:
(s, t), v = worst
worst_str = f"{PRETTY[s]}{PRETTY[t]}: {_fmt(v)}"
gap = (w - c) * 100 if not np.isnan(w) and not np.isnan(c) else float("nan")
lines.append(f"| {label} | {why} | {100 * w:.2f} | {100 * c:.2f} | {worst_str} | {gap:+.2f} |")
summary_path = args.out_dir / "ABLATION_CROSS_B_summary.md"
summary_path.write_text("\n".join(lines) + "\n")
print(f"[wrote] {summary_path}")
# Full per-variant 3x3 matrices
flines = ["# B-variant cross-dataset full matrices",
"",
"Per variant: 3×3 matrix (rows = source, columns = target). Diagonal italic.",
"Each cell = 3-seed mean ± 95% t-CI AUROC (%).",
""]
for gid, label, why, fn_id in B_VARIANTS:
_, rows = full[gid]
flines.append(f"## {label} ({why})")
flines.append("")
header = ["Source ↓ / Target →"] + [PRETTY[d] for d in CROSS_DATASETS]
flines.append("| " + " | ".join(header) + " |")
flines.append("|" + "|".join("---" for _ in header) + "|")
for src in CROSS_DATASETS:
row = [f"**{PRETTY[src]}**"]
for tgt in CROSS_DATASETS:
cell = rows[(src, tgt)]
txt = _fmt(cell)
if src == tgt:
txt = f"_{txt}_"
row.append(txt)
flines.append("| " + " | ".join(row) + " |")
flines.append("")
full_path = args.out_dir / "ABLATION_CROSS_B_full.md"
full_path.write_text("\n".join(flines))
print(f"[wrote] {full_path}")
json_path = args.out_dir / "ABLATION_CROSS_B.json"
json_path.write_text(json.dumps({
gid: {"label": label, "rows": {f"{s}->{t}": v for (s, t), v in rows.items()}}
for gid, (label, rows) in full.items()
}, indent=2, default=lambda o: None))
print(f"[wrote] {json_path}")
if __name__ == "__main__":
main()

View File

@@ -17,8 +17,18 @@ sys.path.insert(0, str(REPO / 'Unified_CFM'))
from FeatureExtractor import FE
from KitNET.KitNET import KitNET
from data import load_unified_data
PCAP_GLOBS = {'iscxtor': str(REPO / 'datasets/iscxtor2016/raw/pcap_extracted/**/*.pcap'), 'cicids2017': str(REPO / 'datasets/cicids2017/raw/pcap/*.pcap'), 'cicddos2019': str(REPO / 'datasets/cicddos2019/raw/pcap/*')}
WITHIN_DIRS = {'iscxtor_within': ('phase25_multiseed_2026_04_25/iscxtor2016_lambda0p3_seed{seed}', 'iscxtor', {'n_val': 10000, 'n_atk': None}), 'cicids_within': ('phase25_sigma06_multiseed_2026_04_25/cicids2017_lambda0p3_sigma0p6_seed{seed}', 'cicids2017', {'n_val': 10000, 'n_atk': 30000}), 'cicddos_within': ('phase25_multiseed_2026_04_25/cicddos2019_lambda0p3_seed{seed}', 'cicddos2019', {'n_val': 10000, 'n_atk': 20000})}
PCAP_GLOBS = {
'iscxtor2016': str(REPO / 'datasets/iscxtor2016/raw/pcap_extracted/**/*.pcap'),
'cicids2017': str(REPO / 'datasets/cicids2017/raw/pcap/*.pcap'),
'cicddos2019': str(REPO / 'datasets/cicddos2019/raw/pcap/*'),
'ciciot2023': str(REPO / 'datasets/ciciot2023/raw/pcap/**/*.pcap'),
}
WITHIN_DIRS = {
'iscxtor_within': ('route_comparison/janus_iscxtor2016_seed{seed}', 'iscxtor2016', {'n_val': 10000, 'n_atk': None}),
'cicids_within': ('route_comparison/janus_cicids2017_seed{seed}', 'cicids2017', {'n_val': 10000, 'n_atk': None}),
'cicddos_within': ('route_comparison/janus_cicddos2019_seed{seed}', 'cicddos2019', {'n_val': 10000, 'n_atk': None}),
'ciciot_within': ('route_comparison/janus_ciciot2023_seed{seed}', 'ciciot2023', {'n_val': 10000, 'n_atk': None}),
}
def _canonical_key(src_ip, dst_ip, src_port, dst_port, protocol) -> tuple:
a = (src_ip, src_port)
@@ -69,11 +79,47 @@ class FEWithMeta(FE):
(srcproto, dstproto, IPtype) = ('icmp', 'icmp', 0)
elif srcIP + srcproto + dstIP + dstproto == '':
(srcIP, dstIP) = (row[2], row[3])
elif self.parse_type == 'scapy':
from scapy.all import IP, IPv6, TCP, UDP, ARP, ICMP
packet = self.scapyin[self.curPacketIndx]
IPtype = np.nan
timestamp = packet.time
framelen = len(packet)
if packet.haslayer(IP):
srcIP = packet[IP].src
dstIP = packet[IP].dst
IPtype = 0
elif packet.haslayer(IPv6):
srcIP = packet[IPv6].src
dstIP = packet[IPv6].dst
IPtype = 1
else:
srcIP = ''
dstIP = ''
if packet.haslayer(TCP):
srcproto = str(packet[TCP].sport)
dstproto = str(packet[TCP].dport)
elif packet.haslayer(UDP):
srcproto = str(packet[UDP].sport)
dstproto = str(packet[UDP].dport)
else:
srcproto = ''
dstproto = ''
srcMAC = packet.src
dstMAC = packet.dst
if srcproto == '':
if packet.haslayer(ARP):
(srcproto, dstproto) = ('arp', 'arp')
(srcIP, dstIP, IPtype) = (packet[ARP].psrc, packet[ARP].pdst, 0)
elif packet.haslayer(ICMP):
(srcproto, dstproto, IPtype) = ('icmp', 'icmp', 0)
elif srcIP + srcproto + dstIP + dstproto == '':
(srcIP, dstIP) = (packet.src, packet.dst)
else:
return []
try:
sp = int(srcproto) if srcproto.isdigit() else 0
dp = int(dstproto) if dstproto.isdigit() else 0
sp = int(srcproto) if str(srcproto).isdigit() else 0
dp = int(dstproto) if str(dstproto).isdigit() else 0
except Exception:
(sp, dp) = (0, 0)
try:

View File

@@ -0,0 +1,132 @@
"""Render Unified-style 3-panel field view per dataset from run_field_view.py output.
Panels (no titles; semantic info encoded in filename):
L: velocity field at t=0.5 (heatmap of log10‖v‖ + streamlines)
M: attack reverse trajectories t=1 → t=0 (lines + endpoints over benign t=1 cloud)
R: forward generation cloud comparison (benign t=1 / N(0,I) / generated overlays)
"""
from __future__ import annotations
import argparse
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
ROOT = Path(__file__).resolve().parents[2]
OUT = ROOT / "artifacts" / "janus_mechanism_figures_2026_05_08"
def _set_lim(ax, x, y, pad=0.08):
xlo, xhi = x.min(), x.max()
ylo, yhi = y.min(), y.max()
sx, sy = xhi - xlo, yhi - ylo
ax.set_xlim(xlo - pad * sx, xhi + pad * sx)
ax.set_ylim(ylo - pad * sy, yhi + pad * sy)
def plot_one(npz: Path, dataset: str) -> Path:
z = np.load(npz)
GX = z["grid_x"]
GY = z["grid_y"]
field_log = z["field_log_norm"]
field_v = z["field_v_2d"]
benign_t1 = z["benign_t1_2d"]
benign_t05 = z["benign_t05_2d"]
benign_t0 = z["benign_t0_2d"]
ra = z["reverse_a_2d"]
fw = z["forward_v_2d"]
ev = z["pca_explained_var"]
fig = plt.figure(figsize=(15.5, 5.0), constrained_layout=True)
gs = fig.add_gridspec(1, 3, width_ratios=[1.05, 1, 1])
# ========== L: velocity field heatmap + streamplot ==========
axL = fig.add_subplot(gs[0, 0])
vmin, vmax = np.percentile(field_log, [5, 95])
pcm = axL.pcolormesh(GX, GY, field_log, cmap="viridis", shading="auto",
vmin=vmin, vmax=vmax, rasterized=True)
cbar = fig.colorbar(pcm, ax=axL, shrink=0.85, pad=0.02)
cbar.set_label(r"$\log_{10}\|v(x_t,t{=}0.5)\|$ (full token)", fontsize=8)
cbar.ax.tick_params(labelsize=7)
# streamlines: width varies with local speed
speed = np.linalg.norm(field_v, axis=-1)
lw = 0.35 + 1.6 * (speed / (speed.max() + 1e-9))
axL.streamplot(GX, GY, field_v[..., 0], field_v[..., 1],
color="white", linewidth=lw, density=1.4, arrowsize=0.7)
# sparse benign t=0.5 cloud overlay (light, doesn't drown out heatmap)
n_overlay = min(300, benign_t05.shape[0])
rng = np.random.default_rng(0)
idx_ov = rng.choice(benign_t05.shape[0], n_overlay, replace=False)
axL.scatter(benign_t05[idx_ov, 0], benign_t05[idx_ov, 1],
s=3, c="white", alpha=0.55, edgecolors="black",
linewidths=0.15, rasterized=True, zorder=4)
axL.set_xlabel(f"PC1 ({100*ev[0]:.1f}%)")
axL.set_ylabel(f"PC2 ({100*ev[1]:.1f}%)")
axL.text(0.02, 1.02, f"{dataset} · velocity field at t=0.5",
transform=axL.transAxes, fontsize=10)
# ========== M: attack reverse trajectories over benign t=1 cloud ==========
axM = fig.add_subplot(gs[0, 1])
axM.scatter(benign_t1[:, 0], benign_t1[:, 1], s=6, c="#a6cee3", alpha=0.55,
edgecolors="none", label="benign cloud (t=1)", rasterized=True)
for i in range(ra.shape[0]):
axM.plot(ra[i, :, 0], ra[i, :, 1], color="#d7191c", lw=0.55, alpha=0.55)
axM.scatter(ra[:, 0, 0], ra[:, 0, 1], s=14, c="#d7191c", marker="o",
edgecolors="white", linewidths=0.4, label="attack t=1 (start)", zorder=3)
axM.scatter(ra[:, -1, 0], ra[:, -1, 1], s=18, c="#d7191c", marker="x",
linewidths=1.0, label="attack t=0 (end)", zorder=3)
axM.legend(loc="upper left", bbox_to_anchor=(0.0, -0.12), ncol=3,
fontsize=7, framealpha=0.85, borderaxespad=0.0)
_set_lim(axM,
np.r_[benign_t1[:, 0], ra[..., 0].ravel()],
np.r_[benign_t1[:, 1], ra[..., 1].ravel()])
axM.set_xlabel("PC1")
axM.text(0.02, 1.02, f"{dataset} · attack reverse trajectories t=1→0",
transform=axM.transAxes, fontsize=10)
# ========== R: forward generation cloud comparison ==========
axR = fig.add_subplot(gs[0, 2])
gen = fw[:, -1, :] # generated samples (t=1 endpoints)
axR.scatter(benign_t0[:, 0], benign_t0[:, 1], s=6, c="#888888", alpha=0.40,
edgecolors="none", label="N(0,I) at t=0", rasterized=True)
axR.scatter(benign_t1[:, 0], benign_t1[:, 1], s=8, c="#1f78b4", alpha=0.55,
edgecolors="none", label="benign cloud (t=1)", rasterized=True)
axR.scatter(gen[:, 0], gen[:, 1], s=12, c="#33a02c", alpha=0.75,
edgecolors="white", linewidths=0.3,
label="generated (forward t=0→1)", rasterized=True)
axR.legend(loc="upper left", bbox_to_anchor=(0.0, -0.12), ncol=3,
fontsize=7, framealpha=0.85, borderaxespad=0.0)
_set_lim(axR,
np.r_[benign_t1[:, 0], benign_t0[:, 0], gen[:, 0]],
np.r_[benign_t1[:, 1], benign_t0[:, 1], gen[:, 1]])
axR.set_xlabel("PC1")
axR.text(0.02, 1.02, f"{dataset} · forward generation vs benign cloud",
transform=axR.transAxes, fontsize=10)
out = OUT / f"velocity_field_view_{dataset.lower()}.pdf"
fig.savefig(out, bbox_inches="tight")
fig.savefig(out.with_suffix(".png"), bbox_inches="tight", dpi=160)
plt.close(fig)
return out
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--datasets", nargs="+",
default=["cicids2017", "cicddos2019", "iscxtor2016", "ciciot2023"])
args = parser.parse_args()
OUT.mkdir(parents=True, exist_ok=True)
mpl.rcParams.update({"font.size": 9, "pdf.fonttype": 42, "ps.fonttype": 42})
pretty = {"cicids2017": "CICIDS2017", "cicddos2019": "CICDDoS2019",
"iscxtor2016": "ISCXTor2016", "ciciot2023": "CICIoT2023"}
for ds in args.datasets:
npz = OUT / f"field_{ds}.npz"
if not npz.exists():
print(f"[skip] missing {npz}")
continue
p = plot_one(npz, pretty.get(ds, ds))
print(f"[wrote] {p}")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,329 @@
"""Mechanism-level figures for JANUS / Mixed_CFM.
Generates:
fig6_score_corr.pdf — 10x10 sub-score correlation per dataset (benign val)
fig1_dual_head.pdf — (terminal_norm, disc_nll_total) + OAS ellipses + whitened PCA
fig3_score_hist.pdf — raw vs OAS-aggregated score distributions across datasets
Inputs: artifacts/route_comparison/janus_<dataset>_seed42/phase1_scores.npz
"""
from __future__ import annotations
import argparse
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.patches import Ellipse
from sklearn.covariance import OAS
from sklearn.metrics import roc_auc_score
ROOT = Path(__file__).resolve().parents[2]
RUNS = ROOT / "artifacts" / "route_comparison"
OUT = ROOT / "artifacts" / "janus_mechanism_figures_2026_05_08"
DATASETS = ["iscxtor2016", "cicids2017", "cicddos2019", "ciciot2023"]
PRETTY = {
"iscxtor2016": "ISCXTor2016",
"cicids2017": "CICIDS2017",
"cicddos2019": "CICDDoS2019",
"ciciot2023": "CICIoT2023",
}
SCORE_KEYS = [
"terminal_norm", "terminal_flow", "terminal_packet",
"disc_nll_total",
"disc_nll_ch2", "disc_nll_ch3", "disc_nll_ch4",
"disc_nll_ch5", "disc_nll_ch6", "disc_nll_ch7",
]
SCORE_LABELS = [
r"$\|\!|\,t_{\mathrm{norm}}$", r"$t_{\mathrm{flow}}$", r"$t_{\mathrm{pkt}}$",
r"$\mathcal{L}_{\mathrm{disc}}$",
"ch2", "ch3", "ch4", "ch5", "ch6", "ch7",
]
SCORE_LABELS = [
"term_norm", "term_flow", "term_pkt",
"disc_total", "disc_ch2", "disc_ch3", "disc_ch4",
"disc_ch5", "disc_ch6", "disc_ch7",
]
def load_scores(dataset: str, seed: int = 42) -> tuple[np.ndarray, np.ndarray]:
"""Return (val_S, atk_S) of shape (n, 10)."""
npz = RUNS / f"janus_{dataset}_seed{seed}" / "phase1_scores.npz"
z = np.load(npz, allow_pickle=True)
val = np.stack([z[f"val_{k}"] for k in SCORE_KEYS], axis=1)
atk = np.stack([z[f"atk_{k}"] for k in SCORE_KEYS], axis=1)
val = np.nan_to_num(val, nan=0.0, posinf=1e6, neginf=-1e6).astype(np.float64)
atk = np.nan_to_num(atk, nan=0.0, posinf=1e6, neginf=-1e6).astype(np.float64)
return val, atk
def fit_oas(val_S: np.ndarray):
"""Fit OAS on benign val. Return (mu, inv_cov, cov, transform) where transform whitens."""
mu = val_S.mean(axis=0)
oas = OAS().fit(val_S)
cov = oas.covariance_
inv_cov = np.linalg.inv(cov + 1e-9 * np.eye(cov.shape[0]))
# whitening: x_w = L^{-1} (x - mu) where cov = L L^T (Cholesky)
L = np.linalg.cholesky(cov + 1e-9 * np.eye(cov.shape[0]))
Linv = np.linalg.solve(L, np.eye(L.shape[0]))
return mu, inv_cov, cov, Linv
def mahal(S: np.ndarray, mu: np.ndarray, inv_cov: np.ndarray) -> np.ndarray:
d = S - mu
return np.einsum("ni,ij,nj->n", d, inv_cov, d)
def plot_corr_heatmap() -> Path:
fig, axes = plt.subplots(1, 4, figsize=(18, 4.6), constrained_layout=True)
for ax, ds in zip(axes, DATASETS):
val, _ = load_scores(ds)
# Pearson correlation on benign val; mask diagonals to free visual budget
C = np.corrcoef(val, rowvar=False)
np.fill_diagonal(C, np.nan)
im = ax.imshow(C, vmin=-1, vmax=1, cmap="RdBu_r")
ax.set_xticks(range(len(SCORE_LABELS)))
ax.set_yticks(range(len(SCORE_LABELS)))
ax.set_xticklabels(SCORE_LABELS, rotation=60, ha="right", fontsize=7)
ax.set_yticklabels(SCORE_LABELS, fontsize=7)
K = len(SCORE_LABELS)
off = C[~np.isnan(C)]
ax.text(
0.02, 1.06, f"{PRETTY[ds]} ⟨|ρ|⟩={np.abs(off).mean():.2f}",
transform=ax.transAxes, fontsize=10,
)
cbar = fig.colorbar(im, ax=axes, shrink=0.85, location="right", pad=0.01)
cbar.set_label("Pearson ρ on benign val", fontsize=10)
out = OUT / "subscore_correlation_benign_val.pdf"
fig.savefig(out, bbox_inches="tight")
fig.savefig(out.with_suffix(".png"), bbox_inches="tight", dpi=160)
plt.close(fig)
return out
def _ellipse_from_2x2(mu2, cov2, n_sigma, **kw):
vals, vecs = np.linalg.eigh(cov2)
order = vals.argsort()[::-1]
vals, vecs = vals[order], vecs[:, order]
angle = np.degrees(np.arctan2(vecs[1, 0], vecs[0, 0]))
w, h = 2 * n_sigma * np.sqrt(vals)
return Ellipse(xy=mu2, width=w, height=h, angle=angle, **kw)
def plot_dual_head() -> Path:
fig = plt.figure(figsize=(16, 8.5), constrained_layout=True)
gs = fig.add_gridspec(2, 4)
rng = np.random.default_rng(0)
for col, ds in enumerate(DATASETS):
val, atk = load_scores(ds)
# raw two-axes scatter: terminal_norm (idx 0) vs disc_nll_total (idx 3)
x_v, y_v = val[:, 0], val[:, 3]
x_a, y_a = atk[:, 0], atk[:, 3]
# subsample for legibility
nv = min(3000, len(x_v))
na = min(3000, len(x_a))
iv = rng.choice(len(x_v), nv, replace=False)
ia = rng.choice(len(x_a), na, replace=False)
ax = fig.add_subplot(gs[0, col])
ax.scatter(x_v[iv], y_v[iv], s=3, alpha=0.25, c="#2c7fb8", label="benign", rasterized=True)
ax.scatter(x_a[ia], y_a[ia], s=3, alpha=0.18, c="#d7191c", label="attack", rasterized=True)
# 2D OAS on these two cols only
XY_v = val[:, [0, 3]]
oas2 = OAS().fit(XY_v)
mu2 = XY_v.mean(axis=0)
for ns, ls in [(1, "-"), (2, "--"), (3, ":")]:
e = _ellipse_from_2x2(
mu2, oas2.covariance_, ns,
edgecolor="black", facecolor="none", lw=1.1, ls=ls, alpha=0.85,
)
ax.add_patch(e)
ax.set_xlabel(r"$t_{\mathrm{norm}}$ (continuous head)")
if col == 0:
ax.set_ylabel(r"$\mathcal{L}_{\mathrm{disc}}$ (discrete head)")
ax.text(0.02, 1.03, PRETTY[ds], transform=ax.transAxes, fontsize=11)
if col == 0:
ax.legend(loc="upper right", fontsize=8, framealpha=0.85)
# zoom to capture benign body + part of attack mass; use 99.5% of attack
x_lo = min(np.quantile(x_v, 0.005), np.quantile(x_a, 0.005))
x_hi = max(np.quantile(x_v, 0.995), np.quantile(x_a, 0.995))
y_lo = min(np.quantile(y_v, 0.005), np.quantile(y_a, 0.005))
y_hi = max(np.quantile(y_v, 0.995), np.quantile(y_a, 0.995))
ax.set_xlim(x_lo - 0.05 * (x_hi - x_lo), x_hi + 0.05 * (x_hi - x_lo))
ax.set_ylim(y_lo - 0.05 * (y_hi - y_lo), y_hi + 0.05 * (y_hi - y_lo))
# whitened-PCA panel
ax2 = fig.add_subplot(gs[1, col])
mu, inv_cov, cov, Linv = fit_oas(val)
Wv = (val - mu) @ Linv.T
Wa = (atk - mu) @ Linv.T
# PCA on benign whitened (which is ~ identity covariance, but we still pick top-2 PCs
# of the joint val+atk to maximize visual separation)
# Use SVD on val_w to get axes; benign should be ~isotropic, so PCA will essentially
# rotate; instead, use direction of maximum (atk - val) mean shift as PC1.
delta = Wa.mean(axis=0) - Wv.mean(axis=0)
delta_norm = np.linalg.norm(delta) + 1e-12
u1 = delta / delta_norm
# u2: top PC of attack-whitened residual orthogonal to u1
Wa_res = Wa - Wa @ u1[:, None] * u1[None, :]
_, _, Vt = np.linalg.svd(Wa_res - Wa_res.mean(axis=0), full_matrices=False)
u2 = Vt[0]
u2 = u2 - (u2 @ u1) * u1
u2 /= np.linalg.norm(u2) + 1e-12
Wv2 = np.c_[Wv @ u1, Wv @ u2]
Wa2 = np.c_[Wa @ u1, Wa @ u2]
nv2 = min(3000, len(Wv2))
na2 = min(3000, len(Wa2))
iv2 = rng.choice(len(Wv2), nv2, replace=False)
ia2 = rng.choice(len(Wa2), na2, replace=False)
ax2.scatter(Wv2[iv2, 0], Wv2[iv2, 1], s=3, alpha=0.25, c="#2c7fb8", rasterized=True)
ax2.scatter(Wa2[ia2, 0], Wa2[ia2, 1], s=3, alpha=0.18, c="#d7191c", rasterized=True)
# benign in whitened space ≈ N(0,I); draw unit-σ Mahalanobis circles
for ns, ls in [(1, "-"), (2, "--"), (3, ":")]:
ax2.add_patch(plt.Circle((0, 0), ns, fill=False, edgecolor="black", lw=1.1, ls=ls, alpha=0.85))
ax2.set_xlabel("whitened PC1 (mean-shift dir)")
if col == 0:
ax2.set_ylabel("whitened PC2")
# symlog axes so benign unit-ball is visible alongside far-field attack
# linthresh = 3 covers the 3σ Mahalanobis circles linearly
ax2.set_xscale("symlog", linthresh=3)
ax2.set_yscale("symlog", linthresh=3)
# set a generous range that shows the unit circles AND the attack mass
x_max = max(np.quantile(np.abs(Wa2[:, 0]), 0.995), 5)
y_max = max(np.quantile(np.abs(Wa2[:, 1]), 0.995), 5)
ax2.set_xlim(-x_max * 1.1, x_max * 1.1)
ax2.set_ylim(-y_max * 1.1, y_max * 1.1)
ax2.axhline(0, color="0.7", lw=0.6, zorder=0)
ax2.axvline(0, color="0.7", lw=0.6, zorder=0)
# add Mahalanobis AUROC for reference
m_v = mahal(val, mu, inv_cov)
m_a = mahal(atk, mu, inv_cov)
y = np.r_[np.zeros(len(m_v)), np.ones(len(m_a))]
s = np.r_[m_v, m_a]
auc = roc_auc_score(y, s)
ax2.text(
0.02, 0.97, f"AUROC(mahal-OAS)={auc:.4f}",
transform=ax2.transAxes, ha="left", va="top",
fontsize=9, bbox=dict(boxstyle="round,pad=0.25", fc="white", ec="0.5", alpha=0.9),
)
out = OUT / "dual_head_oas_ellipses_top__whitened_pca_bottom.pdf"
fig.savefig(out, bbox_inches="tight")
fig.savefig(out.with_suffix(".png"), bbox_inches="tight", dpi=160)
plt.close(fig)
return out
def plot_score_hist() -> Path:
fig, axes = plt.subplots(4, 4, figsize=(16, 12), constrained_layout=True)
for col, ds in enumerate(DATASETS):
val, atk = load_scores(ds)
mu, inv_cov, _, _ = fit_oas(val)
# Row 0: raw terminal_norm (linear)
sv, sa = val[:, 0], atk[:, 0]
_hist_panel(axes[0, col], sv, sa, log_x=False)
# Row 1: OAS-Mahal terminal3 (log)
idx_t3 = [SCORE_KEYS.index(k) for k in ["terminal_norm", "terminal_flow", "terminal_packet"]]
mu_s = val[:, idx_t3].mean(axis=0)
oas_s = OAS().fit(val[:, idx_t3])
iv_s = np.linalg.inv(oas_s.covariance_ + 1e-9 * np.eye(len(idx_t3)))
sv = mahal(val[:, idx_t3], mu_s, iv_s)
sa = mahal(atk[:, idx_t3], mu_s, iv_s)
_hist_panel(axes[1, col], sv, sa, log_x=True)
# Row 2: OAS-Mahal disc7 (log)
idx_d7 = [SCORE_KEYS.index(k) for k in [
"disc_nll_total", "disc_nll_ch2", "disc_nll_ch3",
"disc_nll_ch4", "disc_nll_ch5", "disc_nll_ch6", "disc_nll_ch7"]]
mu_s = val[:, idx_d7].mean(axis=0)
oas_s = OAS().fit(val[:, idx_d7])
iv_s = np.linalg.inv(oas_s.covariance_ + 1e-9 * np.eye(len(idx_d7)))
sv = mahal(val[:, idx_d7], mu_s, iv_s)
sa = mahal(atk[:, idx_d7], mu_s, iv_s)
_hist_panel(axes[2, col], sv, sa, log_x=True)
# Row 3: OAS-Mahal all 10 (log)
sv = mahal(val, mu, inv_cov)
sa = mahal(atk, mu, inv_cov)
_hist_panel(axes[3, col], sv, sa, log_x=True)
axes[0, col].text(0.02, 1.04, PRETTY[ds], transform=axes[0, col].transAxes, fontsize=11)
# row labels
row_labels = [
"raw terminal_norm",
"OAS Mahal: term3 (CFM head)",
"OAS Mahal: disc7 (discrete head)",
"OAS Mahal: all 10 (deployed)",
]
for r, lbl in enumerate(row_labels):
axes[r, 0].set_ylabel(lbl, fontsize=10)
axes[0, 3].legend(loc="upper right", fontsize=8, framealpha=0.85)
out = OUT / "score_distributions_raw__termOAS__discOAS__allOAS.pdf"
fig.savefig(out, bbox_inches="tight")
fig.savefig(out.with_suffix(".png"), bbox_inches="tight", dpi=160)
plt.close(fig)
return out
def _hist_panel(ax, sv, sa, log_x: bool = False):
y = np.r_[np.zeros(len(sv)), np.ones(len(sa))]
s = np.r_[sv, sa]
auc = roc_auc_score(y, s)
# Use within-class fraction weighting so heights stay comparable when bin
# widths are uneven (geomspace on log-x) — density=True compresses right-tail
# mass invisibly because density-per-linear-unit collapses at high x.
w_v = np.full_like(sv, 1.0 / len(sv))
w_a = np.full_like(sa, 1.0 / len(sa))
if log_x:
eps = max(1e-3, np.quantile(s[s > 0], 0.001) * 0.5) if (s > 0).any() else 1e-3
sv_p = np.maximum(sv, eps)
sa_p = np.maximum(sa, eps)
lo = np.quantile(np.r_[sv_p, sa_p], 0.001)
hi = max(sv_p.max(), sa_p.max()) # show full right tail
bins = np.geomspace(max(lo, eps), hi, 80)
ax.hist(sv_p, bins=bins, color="#2c7fb8", alpha=0.55, label="benign", weights=w_v)
ax.hist(sa_p, bins=bins, color="#d7191c", alpha=0.55, label="attack", weights=w_a)
ax.set_xscale("log")
else:
lo, hi = np.quantile(s, [0.001, 0.999])
bins = np.linspace(lo, hi, 80)
ax.hist(np.clip(sv, lo, hi), bins=bins, color="#2c7fb8", alpha=0.55, label="benign", weights=w_v)
ax.hist(np.clip(sa, lo, hi), bins=bins, color="#d7191c", alpha=0.55, label="attack", weights=w_a)
ax.text(
0.97, 0.95, f"AUROC={auc:.4f}",
transform=ax.transAxes, ha="right", va="top", fontsize=8,
bbox=dict(boxstyle="round,pad=0.2", fc="white", ec="0.5", alpha=0.9),
)
ax.set_yticks([])
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--which", choices=["all", "corr", "dual", "hist"], default="all")
args = parser.parse_args()
OUT.mkdir(parents=True, exist_ok=True)
mpl.rcParams.update({
"font.size": 10,
"axes.titlesize": 11,
"axes.labelsize": 10,
"pdf.fonttype": 42,
"ps.fonttype": 42,
})
if args.which in ("all", "corr"):
p = plot_corr_heatmap()
print(f"[wrote] {p}")
if args.which in ("all", "dual"):
p = plot_dual_head()
print(f"[wrote] {p}")
if args.which in ("all", "hist"):
p = plot_score_hist()
print(f"[wrote] {p}")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,135 @@
"""Plots 4 (CFM trajectory in 2D PCA) and 5 (velocity-norm vs t).
Reads npz produced by run_trajectory_inference.py.
"""
from __future__ import annotations
import argparse
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
from sklearn.decomposition import PCA
ROOT = Path(__file__).resolve().parents[2]
OUT = ROOT / "artifacts" / "janus_mechanism_figures_2026_05_08"
PRETTY = {
"cicids2017": "CICIDS2017",
"cicddos2019": "CICDDoS2019",
"iscxtor2016": "ISCXTor2016",
"ciciot2023": "CICIoT2023",
}
def _pca_fit_project(benign_t1: np.ndarray, benign_traj: np.ndarray, attack_traj: np.ndarray):
"""benign_t1 [n, D] is data anchor; trajectories [n, S, D] each."""
pca = PCA(n_components=2).fit(benign_t1)
bt = pca.transform(benign_traj.reshape(-1, benign_traj.shape[-1])).reshape(benign_traj.shape[0], benign_traj.shape[1], 2)
at = pca.transform(attack_traj.reshape(-1, attack_traj.shape[-1])).reshape(attack_traj.shape[0], attack_traj.shape[1], 2)
return pca, bt, at
def _draw_traj(ax, traj_v, traj_a, title, n_show=80):
rng = np.random.default_rng(0)
iv = rng.choice(traj_v.shape[0], min(n_show, traj_v.shape[0]), replace=False)
ia = rng.choice(traj_a.shape[0], min(n_show, traj_a.shape[0]), replace=False)
# trajectories: thin alpha lines
for i in iv:
ax.plot(traj_v[i, :, 0], traj_v[i, :, 1], color="#2c7fb8", alpha=0.18, lw=0.6, zorder=1)
for i in ia:
ax.plot(traj_a[i, :, 0], traj_a[i, :, 1], color="#d7191c", alpha=0.18, lw=0.6, zorder=1)
# endpoints
ax.scatter(traj_v[iv, 0, 0], traj_v[iv, 0, 1], s=14, c="#2c7fb8", marker="o",
edgecolors="white", linewidths=0.4, zorder=3, label="benign t=1 (data)")
ax.scatter(traj_v[iv, -1, 0], traj_v[iv, -1, 1], s=14, c="#2c7fb8", marker="x",
linewidths=0.9, alpha=0.85, zorder=3, label="benign t=0 (post-flow)")
ax.scatter(traj_a[ia, 0, 0], traj_a[ia, 0, 1], s=14, c="#d7191c", marker="o",
edgecolors="white", linewidths=0.4, zorder=3, label="attack t=1 (data)")
ax.scatter(traj_a[ia, -1, 0], traj_a[ia, -1, 1], s=14, c="#d7191c", marker="x",
linewidths=0.9, alpha=0.85, zorder=3, label="attack t=0 (post-flow)")
# unit circle (target N(0,I) for benign post-flow if flow learned correctly)
theta = np.linspace(0, 2 * np.pi, 120)
for ns, ls in [(1, "-"), (2, "--")]:
ax.plot(ns * np.cos(theta), ns * np.sin(theta), color="black", lw=0.8, ls=ls, alpha=0.5, zorder=2)
ax.axhline(0, color="0.85", lw=0.5, zorder=0)
ax.axvline(0, color="0.85", lw=0.5, zorder=0)
ax.set_title(title, fontsize=11)
ax.set_aspect("equal", adjustable="datalim")
def plot_trajectory(npz_paths: dict[str, Path]) -> Path:
fig, axes = plt.subplots(2, len(npz_paths), figsize=(7.5 * len(npz_paths), 12), constrained_layout=True)
if len(npz_paths) == 1:
axes = axes[:, None]
for col, (ds, npz) in enumerate(npz_paths.items()):
z = np.load(npz)
# FLOW-token trajectory
ftv = z["z_traj_flow_v"] # [n, S, D]
fta = z["z_traj_flow_a"]
_, bt, at = _pca_fit_project(ftv[:, 0], ftv, fta)
_draw_traj(axes[0, col], bt, at, f"{PRETTY[ds]} — FLOW token (PC1 vs PC2 of benign t=1)")
# mean-packet trajectory
ptv = z["z_traj_pkt_v"]
pta = z["z_traj_pkt_a"]
_, bt2, at2 = _pca_fit_project(ptv[:, 0], ptv, pta)
_draw_traj(axes[1, col], bt2, at2, f"{PRETTY[ds]} — mean packet token")
axes[0, 0].legend(loc="upper right", fontsize=7, framealpha=0.85)
fig.suptitle(
"Reverse CFM flow (t=1 → t=0): benign collapses toward learned source (≈ N(0,I) inside dashed circles); "
"attack endpoints land off-distribution",
fontsize=12,
)
out = OUT / "fig4_trajectory_pca.pdf"
fig.savefig(out, bbox_inches="tight")
fig.savefig(out.with_suffix(".png"), bbox_inches="tight", dpi=160)
plt.close(fig)
return out
def plot_velocity_norm(npz_paths: dict[str, Path]) -> Path:
fig, axes = plt.subplots(1, len(npz_paths), figsize=(6.5 * len(npz_paths), 5.6), constrained_layout=True)
if len(npz_paths) == 1:
axes = [axes]
for ax, (ds, npz) in zip(axes, npz_paths.items()):
z = np.load(npz)
vn_v = z["vnorm_v"] # [n, n_steps]
vn_a = z["vnorm_a"]
# t at integration step k corresponds to t = 1 - k*dt, k=0..n_steps-1
n_steps = vn_v.shape[1]
t_steps = 1.0 - np.arange(n_steps) / n_steps
# mean ± std band
m_v, s_v = vn_v.mean(0), vn_v.std(0)
m_a, s_a = vn_a.mean(0), vn_a.std(0)
ax.plot(t_steps, m_v, color="#2c7fb8", lw=1.6, label="benign mean")
ax.fill_between(t_steps, m_v - s_v, m_v + s_v, color="#2c7fb8", alpha=0.18)
ax.plot(t_steps, m_a, color="#d7191c", lw=1.6, label="attack mean")
ax.fill_between(t_steps, m_a - s_a, m_a + s_a, color="#d7191c", alpha=0.18)
ax.set_xlabel("CFM time t (1 = data → 0 = source)")
ax.set_ylabel("‖v(x_t, t)‖ per real token (mean over flow)")
ax.text(0.02, 1.02, PRETTY[ds], transform=ax.transAxes, fontsize=11)
ax.invert_xaxis() # so left is t=1 (data), right is t=0 (source)
ax.legend(fontsize=8, loc="upper left", framealpha=0.85)
out = OUT / "velocity_norm_vs_t_benign_vs_attack.pdf"
fig.savefig(out, bbox_inches="tight")
fig.savefig(out.with_suffix(".png"), bbox_inches="tight", dpi=160)
plt.close(fig)
return out
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--datasets", nargs="+",
default=["cicids2017", "cicddos2019", "iscxtor2016", "ciciot2023"])
args = parser.parse_args()
OUT.mkdir(parents=True, exist_ok=True)
mpl.rcParams.update({"font.size": 10, "pdf.fonttype": 42, "ps.fonttype": 42})
npz_paths = {ds: OUT / f"traj_{ds}.npz" for ds in args.datasets}
p4 = plot_trajectory(npz_paths)
print(f"[wrote] {p4}")
p5 = plot_velocity_norm(npz_paths)
print(f"[wrote] {p5}")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,266 @@
"""Generate Unified-style 3-panel field-view data for Mixed_CFM:
(1) velocity field at t=0.5 on benign-FLOW-token PCA grid (with streamlines)
(2) reverse-flow trajectories t=1 → t=0
(3) forward-flow trajectories t=0 → t=1 (sample from N(0,I) noise back to data)
All operations run in token space; the visual projection is 2D PCA fit on benign
FLOW token at t=0.5 (so interpolation states align with the source-side flow).
Output npz keys:
pca_components [2, token_dim] PCA basis (benign FLOW @ t=0.5)
pca_mean [token_dim]
pca_explained_var [2]
benign_t1_2d [N, 2] benign FLOW token at t=1 in PCA coords
benign_t05_2d [N, 2] benign FLOW token at t=0.5 (the PCA fit basis)
benign_t0_2d [N, 2] benign FLOW token at t=0 (random N(0,I))
reverse_v_2d [Nrv, S+1, 2] benign reverse trajectory in PCA coords
reverse_a_2d [Nra, S+1, 2] attack reverse trajectory
forward_v_2d [Nfv, S+1, 2] forward trajectory from noise → benign-cond-template
grid_x, grid_y [G, G] field grid coords in PCA space
field_v_2d [G, G, 2] velocity vectors in PCA coords at t=0.5
field_log_norm [G, G] log10 ‖v(x_t,t)‖ at full token scale
"""
from __future__ import annotations
import argparse
import sys
from pathlib import Path
import numpy as np
import torch
import yaml
from sklearn.decomposition import PCA
ROOT = Path(__file__).resolve().parents[2]
sys.path.insert(0, str(ROOT / "Mixed_CFM"))
from data import load_mixed_data # noqa: E402
from model import MixedCFMConfig, MixedTokenCFM # noqa: E402
@torch.no_grad()
def integrate_reverse(model: MixedTokenCFM, z1: torch.Tensor, lens: torch.Tensor, *, n_steps: int) -> torch.Tensor:
"""z1 already at t=1 (data). Returns snapshots [B, n_steps+1, L, D] from t=1 → t=0."""
z = z1.clone()
mask = model._loss_mask(lens)
kpm = mask == 0
B = z.shape[0]
dt = 1.0 / n_steps
cfg = model.cfg
disc_start = 1 + cfg.n_cont_pkt
disc_end = disc_start + cfg.n_disc_pkt
disc_embed = z[:, 1:, disc_start:disc_end].clone()
snaps = [z.clone()]
for k in range(n_steps):
t_val = 1.0 - k * dt
t = torch.full((B,), t_val, device=z.device)
v, _ = model.velocity(z, t, key_padding_mask=kpm)
v[:, :, disc_start:disc_end] = 0.0
z = z - v * dt
z[:, 1:, disc_start:disc_end] = disc_embed
snaps.append(z.clone())
return torch.stack(snaps, dim=1)
@torch.no_grad()
def integrate_forward(model: MixedTokenCFM, z0: torch.Tensor, lens: torch.Tensor, disc_embed: torch.Tensor, *, n_steps: int) -> torch.Tensor:
"""Forward Euler from t=0 (Gaussian noise) → t=1 (generated). Holds disc embed fixed."""
z = z0.clone()
mask = model._loss_mask(lens)
kpm = mask == 0
B = z.shape[0]
dt = 1.0 / n_steps
cfg = model.cfg
disc_start = 1 + cfg.n_cont_pkt
disc_end = disc_start + cfg.n_disc_pkt
snaps = [z.clone()]
for k in range(n_steps):
t_val = k * dt
t = torch.full((B,), t_val, device=z.device)
z[:, 1:, disc_start:disc_end] = disc_embed
v, _ = model.velocity(z, t, key_padding_mask=kpm)
v[:, :, disc_start:disc_end] = 0.0
z = z + v * dt
snaps.append(z.clone())
return torch.stack(snaps, dim=1)
@torch.no_grad()
def velocity_at(model: MixedTokenCFM, z: torch.Tensor, lens: torch.Tensor, t_val: float) -> torch.Tensor:
"""Return velocity v at given t. Mirrors signature of model.velocity but masks disc-channel rows in v."""
mask = model._loss_mask(lens)
kpm = mask == 0
B = z.shape[0]
t = torch.full((B,), float(t_val), device=z.device)
v, _ = model.velocity(z, t, key_padding_mask=kpm)
cfg = model.cfg
disc_start = 1 + cfg.n_cont_pkt
disc_end = disc_start + cfg.n_disc_pkt
v[:, :, disc_start:disc_end] = 0.0
return v
def main() -> None:
p = argparse.ArgumentParser()
p.add_argument("--model-dir", type=Path, required=True)
p.add_argument("--out", type=Path, required=True)
p.add_argument("--n-pca-benign", type=int, default=1500)
p.add_argument("--n-reverse-benign", type=int, default=30)
p.add_argument("--n-reverse-attack", type=int, default=30)
p.add_argument("--n-forward", type=int, default=200)
p.add_argument("--grid", type=int, default=40)
p.add_argument("--grid-templates", type=int, default=8, help="Number of benign templates to average velocity over per grid point")
p.add_argument("--n-steps", type=int, default=32)
p.add_argument("--device", type=str, default="auto")
p.add_argument("--batch-size", type=int, default=128)
args = p.parse_args()
device = torch.device("cuda" if (args.device == "auto" and torch.cuda.is_available()) else (args.device if args.device != "auto" else "cpu"))
cfg = yaml.safe_load((args.model_dir / "config.yaml").read_text())
ckpt = torch.load(args.model_dir / "model.pt", map_location="cpu", weights_only=False)
model_cfg = MixedCFMConfig(**ckpt["model_cfg"])
model = MixedTokenCFM(model_cfg).to(device)
model.load_state_dict(ckpt["model_state_dict"])
model.eval()
data = load_mixed_data(
packets_npz=Path(cfg["packets_npz"]) if cfg.get("packets_npz") else None,
source_store=Path(cfg["source_store"]) if cfg.get("source_store") else None,
flows_parquet=Path(cfg["flows_parquet"]),
flow_features_path=Path(cfg["flow_features_path"]),
flow_features_align=str(cfg.get("flow_features_align", "auto")),
T=int(cfg["T"]),
split_seed=int(cfg.get("data_seed", cfg.get("seed", 42))),
train_ratio=float(cfg.get("train_ratio", 0.8)),
benign_label=str(cfg.get("benign_label", "normal")),
min_len=int(cfg.get("min_len", 2)),
attack_cap=int(cfg["attack_cap"]) if cfg.get("attack_cap") else None,
val_cap=int(cfg["val_cap"]) if cfg.get("val_cap") else None,
)
print(f"[data] val={len(data.val_flow):,} attack={len(data.attack_flow):,}")
rng = np.random.default_rng(0)
nv = min(args.n_pca_benign, len(data.val_flow))
iv_pca = np.sort(rng.choice(len(data.val_flow), nv, replace=False))
irv = np.sort(rng.choice(len(data.val_flow), args.n_reverse_benign, replace=False))
ira = np.sort(rng.choice(len(data.attack_flow), args.n_reverse_attack, replace=False))
ifw_template = np.sort(rng.choice(len(data.val_flow), args.n_forward, replace=False))
def build_z(flow, cont, disc):
flow_t = torch.from_numpy(flow).float().to(device)
cont_t = torch.from_numpy(cont).float().to(device)
disc_t = torch.from_numpy(disc).long().to(device)
return model.build_tokens(flow_t, cont_t, disc_t)
# ==== Step 1: build z1 for benign PCA pool, get FLOW token at t=0.5 ====
z1_pca = build_z(data.val_flow[iv_pca], data.val_cont[iv_pca], data.val_disc[iv_pca]) # [N, L, D]
lens_pca = torch.from_numpy(data.val_len[iv_pca]).long().to(device)
flow_t1 = z1_pca[:, 0, :].cpu().numpy() # [N, D] — FLOW token at t=1
sigma = float(model_cfg.sigma)
z0_for_pca = torch.randn_like(z1_pca)
t_val = 0.5
z_t05 = (1 - t_val) * z0_for_pca + t_val * z1_pca
if sigma > 0:
std = sigma * np.sqrt(t_val * (1 - t_val))
z_t05 = z_t05 + std * torch.randn_like(z_t05)
flow_t05 = z_t05[:, 0, :].cpu().numpy() # [N, D]
flow_t0 = z0_for_pca[:, 0, :].cpu().numpy()
pca = PCA(n_components=2).fit(flow_t05)
print(f"[pca] explained var on benign FLOW @ t=0.5: {pca.explained_variance_ratio_}")
benign_t1_2d = pca.transform(flow_t1)
benign_t05_2d = pca.transform(flow_t05)
benign_t0_2d = pca.transform(flow_t0)
# ==== Step 2: reverse trajectories ====
print("[run] reverse benign")
z1_rv = build_z(data.val_flow[irv], data.val_cont[irv], data.val_disc[irv])
lens_rv = torch.from_numpy(data.val_len[irv]).long().to(device)
snaps_rv = integrate_reverse(model, z1_rv, lens_rv, n_steps=args.n_steps) # [B, S+1, L, D]
rv_2d = pca.transform(snaps_rv[:, :, 0, :].reshape(-1, snaps_rv.shape[-1]).cpu().numpy()).reshape(snaps_rv.shape[0], snaps_rv.shape[1], 2)
print("[run] reverse attack")
z1_ra = build_z(data.attack_flow[ira], data.attack_cont[ira], data.attack_disc[ira])
lens_ra = torch.from_numpy(data.attack_len[ira]).long().to(device)
snaps_ra = integrate_reverse(model, z1_ra, lens_ra, n_steps=args.n_steps)
ra_2d = pca.transform(snaps_ra[:, :, 0, :].reshape(-1, snaps_ra.shape[-1]).cpu().numpy()).reshape(snaps_ra.shape[0], snaps_ra.shape[1], 2)
# ==== Step 3: forward trajectories (sample noise, integrate to t=1, hold disc embed from a benign template) ====
print("[run] forward from noise")
z1_fw = build_z(data.val_flow[ifw_template], data.val_cont[ifw_template], data.val_disc[ifw_template])
lens_fw = torch.from_numpy(data.val_len[ifw_template]).long().to(device)
cfg_m = model.cfg
disc_start = 1 + cfg_m.n_cont_pkt
disc_end = disc_start + cfg_m.n_disc_pkt
disc_embed_fw = z1_fw[:, 1:, disc_start:disc_end].clone()
z0_fw = torch.randn_like(z1_fw)
snaps_fw = integrate_forward(model, z0_fw, lens_fw, disc_embed_fw, n_steps=args.n_steps)
fw_2d = pca.transform(snaps_fw[:, :, 0, :].reshape(-1, snaps_fw.shape[-1]).cpu().numpy()).reshape(snaps_fw.shape[0], snaps_fw.shape[1], 2)
# ==== Step 4: velocity field on PCA grid at t=0.5 ====
print("[run] velocity field grid")
pad = 0.3
x_min, x_max = benign_t05_2d[:, 0].min(), benign_t05_2d[:, 0].max()
y_min, y_max = benign_t05_2d[:, 1].min(), benign_t05_2d[:, 1].max()
sx = x_max - x_min
sy = y_max - y_min
x_lo, x_hi = x_min - pad * sx, x_max + pad * sx
y_lo, y_hi = y_min - pad * sy, y_max + pad * sy
gx = np.linspace(x_lo, x_hi, args.grid)
gy = np.linspace(y_lo, y_hi, args.grid)
GX, GY = np.meshgrid(gx, gy)
grid_2d = np.stack([GX.ravel(), GY.ravel()], axis=1) # [G^2, 2]
grid_full = pca.inverse_transform(grid_2d) # [G^2, D]
grid_full_t = torch.from_numpy(grid_full).float().to(device)
# For each grid point, replace FLOW token at t=0.5 of K random benign templates;
# average velocity at FLOW position over templates.
K = args.grid_templates
# Sample K template z_t05 from PCA pool
template_idx = rng.choice(len(z1_pca), K, replace=False)
z1_tpl = z1_pca[template_idx] # [K, L, D]
lens_tpl = lens_pca[template_idx]
z0_tpl = torch.randn_like(z1_tpl)
z_t05_tpl = (1 - t_val) * z0_tpl + t_val * z1_tpl
if sigma > 0:
z_t05_tpl = z_t05_tpl + (sigma * np.sqrt(t_val * (1 - t_val))) * torch.randn_like(z_t05_tpl)
G2 = grid_full_t.shape[0]
v_grid_full = torch.zeros((G2, grid_full_t.shape[1]), device=device)
bs = args.batch_size
for k in range(K):
# build a [G^2, L, D] tensor where token 0 = grid point, tokens 1: = template's z_t05 packets
tpl_packets = z_t05_tpl[k:k + 1, 1:, :].expand(G2, -1, -1).contiguous()
z_grid = torch.cat([grid_full_t.unsqueeze(1), tpl_packets], dim=1) # [G^2, L, D]
lens_grid = lens_tpl[k:k + 1].expand(G2).contiguous()
# batched velocity eval
v_chunks = []
for s in range(0, G2, bs):
v_chunks.append(velocity_at(model, z_grid[s:s + bs], lens_grid[s:s + bs], t_val=t_val)[:, 0, :])
v_grid_full = v_grid_full + torch.cat(v_chunks, dim=0)
v_grid_full = v_grid_full / K
v_grid_np = v_grid_full.cpu().numpy()
v_grid_2d = (v_grid_np - 0) @ pca.components_.T # project [G^2, D] onto 2 PCA basis vectors → [G^2, 2]
v_grid_2d = v_grid_2d.reshape(args.grid, args.grid, 2)
log_norm_full = np.log10(np.linalg.norm(v_grid_np, axis=-1) + 1e-9).reshape(args.grid, args.grid)
args.out.parent.mkdir(parents=True, exist_ok=True)
np.savez(
args.out,
pca_components=pca.components_.astype(np.float32),
pca_mean=pca.mean_.astype(np.float32),
pca_explained_var=pca.explained_variance_ratio_.astype(np.float32),
benign_t1_2d=benign_t1_2d.astype(np.float32),
benign_t05_2d=benign_t05_2d.astype(np.float32),
benign_t0_2d=benign_t0_2d.astype(np.float32),
reverse_v_2d=rv_2d.astype(np.float32),
reverse_a_2d=ra_2d.astype(np.float32),
forward_v_2d=fw_2d.astype(np.float32),
grid_x=GX.astype(np.float32),
grid_y=GY.astype(np.float32),
field_v_2d=v_grid_2d.astype(np.float32),
field_log_norm=log_norm_full.astype(np.float32),
)
print(f"[wrote] {args.out}")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,157 @@
"""Reverse-flow ODE integration with per-step snapshots, for trajectory + velocity-norm plots.
For a chosen (dataset, seed), loads a small balanced sample (n_per_class benign+attack)
and integrates the velocity field from t=1 (data) to t=0 (Gaussian) using the same Euler
scheme as `MixedTokenCFM.trajectory_metrics`, but saves z snapshots and ‖v‖ at each step.
Outputs npz with keys:
z_traj_flow_v / z_traj_flow_a [n, n_steps+1, token_dim] FLOW-token trajectories
z_traj_pkt_v / z_traj_pkt_a [n, n_steps+1, token_dim] masked-mean packet-token trajectories
vnorm_v / vnorm_a [n, n_steps] per-step velocity norm (real tokens only)
t_grid [n_steps+1] t values; t_grid[0] = 1.0, decreasing
"""
from __future__ import annotations
import argparse
import sys
import time
from pathlib import Path
import numpy as np
import torch
import yaml
ROOT = Path(__file__).resolve().parents[2]
sys.path.insert(0, str(ROOT / "Mixed_CFM"))
from data import load_mixed_data # noqa: E402
from model import MixedCFMConfig, MixedTokenCFM # noqa: E402
@torch.no_grad()
def run_reverse_flow(model: MixedTokenCFM, flow, cont, disc, lens, *, n_steps: int):
z = model.build_tokens(flow, cont, disc) # at t=1
mask = model._loss_mask(lens)
kpm = mask == 0
B = z.shape[0]
dt = 1.0 / n_steps
cfg = model.cfg
disc_start = 1 + cfg.n_cont_pkt
disc_end = disc_start + cfg.n_disc_pkt
disc_embed = z[:, 1:, disc_start:disc_end].clone()
snaps = [z.clone()]
vnorms = []
for k in range(n_steps):
t_val = 1.0 - k * dt
t = torch.full((B,), t_val, device=z.device)
v, _ = model.velocity(z, t, key_padding_mask=kpm)
v[:, :, disc_start:disc_end] = 0.0
# per-sample velocity norm averaged over real tokens
v_norm_per_tok = v.norm(dim=-1) # [B, L]
per_sample = (v_norm_per_tok * mask).sum(dim=-1) / mask.sum(dim=-1).clamp_min(1.0)
vnorms.append(per_sample.cpu().numpy())
z = z - v * dt
z[:, 1:, disc_start:disc_end] = disc_embed
snaps.append(z.clone())
return snaps, np.stack(vnorms, axis=1), mask.cpu().numpy()
def main() -> None:
p = argparse.ArgumentParser()
p.add_argument("--model-dir", type=Path, required=True)
p.add_argument("--out", type=Path, required=True)
p.add_argument("--n-per-class", type=int, default=200)
p.add_argument("--n-steps", type=int, default=32)
p.add_argument("--device", type=str, default="auto")
p.add_argument("--batch-size", type=int, default=128)
args = p.parse_args()
device = torch.device("cuda" if (args.device == "auto" and torch.cuda.is_available()) else (args.device if args.device != "auto" else "cpu"))
cfg = yaml.safe_load((args.model_dir / "config.yaml").read_text())
ckpt = torch.load(args.model_dir / "model.pt", map_location="cpu", weights_only=False)
model_cfg = MixedCFMConfig(**ckpt["model_cfg"])
model = MixedTokenCFM(model_cfg).to(device)
model.load_state_dict(ckpt["model_state_dict"])
model.eval()
data = load_mixed_data(
packets_npz=Path(cfg["packets_npz"]) if cfg.get("packets_npz") else None,
source_store=Path(cfg["source_store"]) if cfg.get("source_store") else None,
flows_parquet=Path(cfg["flows_parquet"]),
flow_features_path=Path(cfg["flow_features_path"]),
flow_features_align=str(cfg.get("flow_features_align", "auto")),
T=int(cfg["T"]),
split_seed=int(cfg.get("data_seed", cfg.get("seed", 42))),
train_ratio=float(cfg.get("train_ratio", 0.8)),
benign_label=str(cfg.get("benign_label", "normal")),
min_len=int(cfg.get("min_len", 2)),
attack_cap=int(cfg["attack_cap"]) if cfg.get("attack_cap") else None,
val_cap=int(cfg["val_cap"]) if cfg.get("val_cap") else None,
)
print(f"[data] val={len(data.val_flow):,} attack={len(data.attack_flow):,}")
rng = np.random.default_rng(0)
nv = min(args.n_per_class, len(data.val_flow))
na = min(args.n_per_class, len(data.attack_flow))
iv = np.sort(rng.choice(len(data.val_flow), nv, replace=False))
ia = np.sort(rng.choice(len(data.attack_flow), na, replace=False))
def to_t(flow, cont, disc, lens):
return (
torch.from_numpy(flow).float().to(device),
torch.from_numpy(cont).float().to(device),
torch.from_numpy(disc).long().to(device),
torch.from_numpy(lens).long().to(device),
)
def collect(flows_np, conts_np, discs_np, lens_np):
snaps_all, vnorms_all, mask_all = [], [], []
for start in range(0, len(flows_np), args.batch_size):
sl = slice(start, start + args.batch_size)
flow, cont, disc, lens = to_t(flows_np[sl], conts_np[sl], discs_np[sl], lens_np[sl])
snaps, vn, mask = run_reverse_flow(model, flow, cont, disc, lens, n_steps=args.n_steps)
snaps_all.append(torch.stack(snaps, dim=1).cpu().numpy()) # [b, n_steps+1, L, D]
vnorms_all.append(vn)
mask_all.append(mask)
print(f" [batch] {min(start + args.batch_size, len(flows_np))}/{len(flows_np)}", flush=True)
return (np.concatenate(snaps_all, axis=0), np.concatenate(vnorms_all, axis=0), np.concatenate(mask_all, axis=0))
print("[run] benign val")
t0 = time.time()
snaps_v, vn_v, mask_v = collect(data.val_flow[iv], data.val_cont[iv], data.val_disc[iv], data.val_len[iv])
print(f" done {time.time() - t0:.1f}s snaps={snaps_v.shape}")
print("[run] attack")
t0 = time.time()
snaps_a, vn_a, mask_a = collect(data.attack_flow[ia], data.attack_cont[ia], data.attack_disc[ia], data.attack_len[ia])
print(f" done {time.time() - t0:.1f}s snaps={snaps_a.shape}")
# extract FLOW token trajectory and packet-mean trajectory
def flow_and_pkt_traj(snaps, mask):
# snaps [n, S, L, D], mask [n, L] (L = T+1, includes flow token at idx 0)
flow_tok = snaps[:, :, 0, :] # [n, S, D]
pkt_mask = mask[:, 1:][:, None, :, None].astype(np.float32)
pkt_count = pkt_mask.sum(axis=2).clip(1.0)
pkt_mean = (snaps[:, :, 1:, :] * pkt_mask).sum(axis=2) / pkt_count # [n, S, D]
return flow_tok, pkt_mean
flow_tok_v, pkt_mean_v = flow_and_pkt_traj(snaps_v, mask_v)
flow_tok_a, pkt_mean_a = flow_and_pkt_traj(snaps_a, mask_a)
n_steps = args.n_steps
t_grid = np.array([1.0 - k * (1.0 / n_steps) for k in range(n_steps + 1)])
args.out.parent.mkdir(parents=True, exist_ok=True)
np.savez(
args.out,
z_traj_flow_v=flow_tok_v.astype(np.float32),
z_traj_flow_a=flow_tok_a.astype(np.float32),
z_traj_pkt_v=pkt_mean_v.astype(np.float32),
z_traj_pkt_a=pkt_mean_a.astype(np.float32),
vnorm_v=vn_v.astype(np.float32),
vnorm_a=vn_a.astype(np.float32),
t_grid=t_grid.astype(np.float32),
)
print(f"[wrote] {args.out}")
if __name__ == "__main__":
main()

32
uv.lock generated
View File

@@ -243,6 +243,36 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/e7/05/c19819d5e3d95294a6f5947fb9b9629efb316b96de511b418c53d245aae6/cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30", size = 8321, upload-time = "2023-10-07T05:32:16.783Z" },
]
[[package]]
name = "cython"
version = "3.2.4"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/91/85/7574c9cd44b69a27210444b6650f6477f56c75fee1b70d7672d3e4166167/cython-3.2.4.tar.gz", hash = "sha256:84226ecd313b233da27dc2eb3601b4f222b8209c3a7216d8733b031da1dc64e6", size = 3280291, upload-time = "2026-01-04T14:14:14.473Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/91/4d/1eb0c7c196a136b1926f4d7f0492a96c6fabd604d77e6cd43b56a3a16d83/cython-3.2.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:64d7f71be3dd6d6d4a4c575bb3a4674ea06d1e1e5e4cd1b9882a2bc40ed3c4c9", size = 2970064, upload-time = "2026-01-04T14:15:08.567Z" },
{ url = "https://files.pythonhosted.org/packages/03/1c/46e34b08bea19a1cdd1e938a4c123e6299241074642db9d81983cef95e9f/cython-3.2.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:869487ea41d004f8b92171f42271fbfadb1ec03bede3158705d16cd570d6b891", size = 3226757, upload-time = "2026-01-04T14:15:10.812Z" },
{ url = "https://files.pythonhosted.org/packages/12/33/3298a44d201c45bcf0d769659725ae70e9c6c42adf8032f6d89c8241098d/cython-3.2.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:55b6c44cd30821f0b25220ceba6fe636ede48981d2a41b9bbfe3c7902ce44ea7", size = 3388969, upload-time = "2026-01-04T14:15:12.45Z" },
{ url = "https://files.pythonhosted.org/packages/bb/f3/4275cd3ea0a4cf4606f9b92e7f8766478192010b95a7f516d1b7cf22cb10/cython-3.2.4-cp312-cp312-win_amd64.whl", hash = "sha256:767b143704bdd08a563153448955935844e53b852e54afdc552b43902ed1e235", size = 2756457, upload-time = "2026-01-04T14:15:14.67Z" },
{ url = "https://files.pythonhosted.org/packages/18/b5/1cfca43b7d20a0fdb1eac67313d6bb6b18d18897f82dd0f17436bdd2ba7f/cython-3.2.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:28e8075087a59756f2d059273184b8b639fe0f16cf17470bd91c39921bc154e0", size = 2960506, upload-time = "2026-01-04T14:15:16.733Z" },
{ url = "https://files.pythonhosted.org/packages/71/bb/8f28c39c342621047fea349a82fac712a5e2b37546d2f737bbde48d5143d/cython-3.2.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:03893c88299a2c868bb741ba6513357acd104e7c42265809fd58dce1456a36fc", size = 3213148, upload-time = "2026-01-04T14:15:18.804Z" },
{ url = "https://files.pythonhosted.org/packages/7a/d2/16fa02f129ed2b627e88d9d9ebd5ade3eeb66392ae5ba85b259d2d52b047/cython-3.2.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f81eda419b5ada7b197bbc3c5f4494090e3884521ffd75a3876c93fbf66c9ca8", size = 3375764, upload-time = "2026-01-04T14:15:20.817Z" },
{ url = "https://files.pythonhosted.org/packages/91/3f/deb8f023a5c10c0649eb81332a58c180fad27c7533bb4aae138b5bc34d92/cython-3.2.4-cp313-cp313-win_amd64.whl", hash = "sha256:83266c356c13c68ffe658b4905279c993d8a5337bb0160fa90c8a3e297ea9a2e", size = 2754238, upload-time = "2026-01-04T14:15:23.001Z" },
{ url = "https://files.pythonhosted.org/packages/ee/d7/3bda3efce0c5c6ce79cc21285dbe6f60369c20364e112f5a506ee8a1b067/cython-3.2.4-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:d4b4fd5332ab093131fa6172e8362f16adef3eac3179fd24bbdc392531cb82fa", size = 2971496, upload-time = "2026-01-04T14:15:25.038Z" },
{ url = "https://files.pythonhosted.org/packages/89/ed/1021ffc80b9c4720b7ba869aea8422c82c84245ef117ebe47a556bdc00c3/cython-3.2.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e3b5ac54e95f034bc7fb07313996d27cbf71abc17b229b186c1540942d2dc28e", size = 3256146, upload-time = "2026-01-04T14:15:26.741Z" },
{ url = "https://files.pythonhosted.org/packages/0c/51/ca221ec7e94b3c5dc4138dcdcbd41178df1729c1e88c5dfb25f9d30ba3da/cython-3.2.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:90f43be4eaa6afd58ce20d970bb1657a3627c44e1760630b82aa256ba74b4acb", size = 3383458, upload-time = "2026-01-04T14:15:28.425Z" },
{ url = "https://files.pythonhosted.org/packages/79/2e/1388fc0243240cd54994bb74f26aaaf3b2e22f89d3a2cf8da06d75d46ca2/cython-3.2.4-cp314-cp314-win_amd64.whl", hash = "sha256:983f9d2bb8a896e16fa68f2b37866ded35fa980195eefe62f764ddc5f9f5ef8e", size = 2791241, upload-time = "2026-01-04T14:15:30.448Z" },
{ url = "https://files.pythonhosted.org/packages/0a/8b/fd393f0923c82be4ec0db712fffb2ff0a7a131707b842c99bf24b549274d/cython-3.2.4-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:36bf3f5eb56d5281aafabecbaa6ed288bc11db87547bba4e1e52943ae6961ccf", size = 2875622, upload-time = "2026-01-04T14:15:39.749Z" },
{ url = "https://files.pythonhosted.org/packages/73/48/48530d9b9d64ec11dbe0dd3178a5fe1e0b27977c1054ecffb82be81e9b6a/cython-3.2.4-cp39-abi3-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:6d5267f22b6451eb1e2e1b88f6f78a2c9c8733a6ddefd4520d3968d26b824581", size = 3210669, upload-time = "2026-01-04T14:15:41.911Z" },
{ url = "https://files.pythonhosted.org/packages/5e/91/4865fbfef1f6bb4f21d79c46104a53d1a3fa4348286237e15eafb26e0828/cython-3.2.4-cp39-abi3-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3b6e58f73a69230218d5381817850ce6d0da5bb7e87eb7d528c7027cbba40b06", size = 2856835, upload-time = "2026-01-04T14:15:43.815Z" },
{ url = "https://files.pythonhosted.org/packages/fa/39/60317957dbef179572398253f29d28f75f94ab82d6d39ea3237fb6c89268/cython-3.2.4-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:e71efb20048358a6b8ec604a0532961c50c067b5e63e345e2e359fff72feaee8", size = 2994408, upload-time = "2026-01-04T14:15:45.422Z" },
{ url = "https://files.pythonhosted.org/packages/8d/30/7c24d9292650db4abebce98abc9b49c820d40fa7c87921c0a84c32f4efe7/cython-3.2.4-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:28b1e363b024c4b8dcf52ff68125e635cb9cb4b0ba997d628f25e32543a71103", size = 2891478, upload-time = "2026-01-04T14:15:47.394Z" },
{ url = "https://files.pythonhosted.org/packages/86/70/03dc3c962cde9da37a93cca8360e576f904d5f9beecfc9d70b1f820d2e5f/cython-3.2.4-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:31a90b4a2c47bb6d56baeb926948348ec968e932c1ae2c53239164e3e8880ccf", size = 3225663, upload-time = "2026-01-04T14:15:49.446Z" },
{ url = "https://files.pythonhosted.org/packages/b1/97/10b50c38313c37b1300325e2e53f48ea9a2c078a85c0c9572057135e31d5/cython-3.2.4-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:e65e4773021f8dc8532010b4fbebe782c77f9a0817e93886e518c93bd6a44e9d", size = 3115628, upload-time = "2026-01-04T14:15:51.323Z" },
{ url = "https://files.pythonhosted.org/packages/8f/b1/d6a353c9b147848122a0db370863601fdf56de2d983b5c4a6a11e6ee3cd7/cython-3.2.4-cp39-abi3-win32.whl", hash = "sha256:2b1f12c0e4798293d2754e73cd6f35fa5bbdf072bdc14bc6fc442c059ef2d290", size = 2437463, upload-time = "2026-01-04T14:15:53.787Z" },
{ url = "https://files.pythonhosted.org/packages/2d/d8/319a1263b9c33b71343adfd407e5daffd453daef47ebc7b642820a8b68ed/cython-3.2.4-cp39-abi3-win_arm64.whl", hash = "sha256:3b8e62049afef9da931d55de82d8f46c9a147313b69d5ff6af6e9121d545ce7a", size = 2442754, upload-time = "2026-01-04T14:15:55.382Z" },
{ url = "https://files.pythonhosted.org/packages/ff/fa/d3c15189f7c52aaefbaea76fb012119b04b9013f4bf446cb4eb4c26c4e6b/cython-3.2.4-py3-none-any.whl", hash = "sha256:732fc93bc33ae4b14f6afaca663b916c2fdd5dcbfad7114e17fb2434eeaea45c", size = 1257078, upload-time = "2026-01-04T14:14:12.373Z" },
]
[[package]]
name = "dpkt"
version = "1.9.8"
@@ -433,6 +463,7 @@ version = "0.1.0"
source = { virtual = "." }
dependencies = [
{ name = "causal-conv1d" },
{ name = "cython" },
{ name = "dpkt" },
{ name = "mamba-ssm" },
{ name = "matplotlib" },
@@ -458,6 +489,7 @@ dev = [
[package.metadata]
requires-dist = [
{ name = "causal-conv1d", specifier = ">=1.6.1" },
{ name = "cython", specifier = ">=3.2.4" },
{ name = "dpkt", specifier = ">=1.9.8" },
{ name = "mamba-ssm", specifier = ">=2.3.1" },
{ name = "matplotlib", specifier = ">=3.10.8" },