Mixed_CFM: absorb Unified_CFM primitives; remove Unified_CFM

Mixed_CFM was loading AdaLNBlock / SinusoidalTimeEmb / _sinkhorn_coupling and flow-feature helpers from Unified_CFM via importlib spec hacks. Pulled those symbols into Mixed_CFM/_layers.py (model primitives) and inlined the flow-feature loader helpers into Mixed_CFM/data.py, then deleted Unified_CFM/ entirely along with three dead aggregate shell scripts whose referenced eval entry point (artifacts/verify_2026_04_24/) was already gone. Verified: historic janus_iscxtor2016_seed42 checkpoint re-evaluated under the absorbed code reproduces all 10 phase1 AUROC scores to 6 decimals; same-seed retrain converges to within +/-0.001 on terminal_norm (residual drift is CUDA non-determinism in MultiheadAttention + Sinkhorn argmax, not the absorption). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-11 14:17:04 +08:00
parent ee232058b1
commit ff0efa97bf
32 changed files with 175 additions and 2372 deletions
--- a/scripts/aggregate/run_all_phase1.sh
+++ b/scripts/aggregate/run_all_phase1.sh
@@ -1,68 +0,0 @@
-#!/bin/bash
-# Run phase1 eval on all routes after trainings complete.
-# Splits across 2 GPUs in parallel chains.
-
-set -e
-ROOT=/home/chy/JANUS
-UNIFIED_EVAL=${ROOT}/artifacts/verify_2026_04_24/eval_phase1_unified.py
-MIXED_EVAL=${ROOT}/Mixed_CFM/eval_phase1.py
-
-cd ${ROOT}
-
-# GPU 0: baselines + route_a (6 models)
-{
-for prefix in baseline_ciciot2023 route_a_causal_ciciot2023; do
-  for seed in 42 43 44; do
-    name=${prefix}_seed${seed}
-    md=${ROOT}/artifacts/route_comparison/${name}
-    [ -f "${md}/model.pt" ] || continue
-    [ -f "${md}/phase1_summary.json" ] && continue
-    echo "[GPU0 eval] ${name}"
-    cd ${ROOT}/Unified_CFM
-    CUDA_VISIBLE_DEVICES=0 stdbuf -oL uv run --no-sync python -u ${UNIFIED_EVAL} \
-      --model-dir ${md} --out-dir ${md} \
-      --batch-size 256 --n-steps 16 --jacobian-n-eps 4 \
-      --n-val-cap 5000 --n-atk-cap 10000 \
-      > ${md}/phase1.log 2>&1
-  done
-done
-echo "[GPU0 done]"
-} &
-GPU0_PID=$!
-
-# GPU 1: route_b + route_c (6 models)
-{
-for seed in 42 43 44; do
-  name=route_b_spectral_ciciot2023_seed${seed}
-  md=${ROOT}/artifacts/route_comparison/${name}
-  [ -f "${md}/model.pt" ] || continue
-  [ -f "${md}/phase1_summary.json" ] && continue
-  echo "[GPU1 eval] ${name}"
-  cd ${ROOT}/Unified_CFM
-  CUDA_VISIBLE_DEVICES=1 stdbuf -oL uv run --no-sync python -u ${UNIFIED_EVAL} \
-    --model-dir ${md} --out-dir ${md} \
-    --batch-size 256 --n-steps 16 --jacobian-n-eps 4 \
-    --n-val-cap 5000 --n-atk-cap 10000 \
-    > ${md}/phase1.log 2>&1
-done
-for seed in 42 43 44; do
-  name=route_c_mixed_ciciot2023_seed${seed}
-  md=${ROOT}/artifacts/route_comparison/${name}
-  [ -f "${md}/model.pt" ] || continue
-  [ -f "${md}/phase1_summary.json" ] && continue
-  echo "[GPU1 eval] ${name}"
-  cd ${ROOT}/Mixed_CFM
-  CUDA_VISIBLE_DEVICES=1 stdbuf -oL uv run --no-sync python -u ${MIXED_EVAL} \
-    --model-dir ${md} --out-dir ${md} \
-    --batch-size 256 --n-steps 16 \
-    --n-val-cap 5000 --n-atk-cap 10000 \
-    > ${md}/phase1.log 2>&1
-done
-echo "[GPU1 done]"
-} &
-GPU1_PID=$!
-
-wait $GPU0_PID
-wait $GPU1_PID
-echo "[all phase1 done]"
-cd ${ROOT} && uv run --no-sync python artifacts/route_comparison/aggregate_results.py
--- a/scripts/aggregate/run_cross_all.sh
+++ b/scripts/aggregate/run_cross_all.sh
@@ -1,105 +0,0 @@
-#!/bin/bash
-# Cross-dataset eval for all 4 routes × 2 targets × 3 seeds = 24 runs.
-# Source: CICIoT2023 (where all models were trained).
-# Targets: CICIDS2017 + CICDDoS2019.
-
-set -e
-ROOT=/home/chy/JANUS
-UNIFIED_EVAL=${ROOT}/artifacts/verify_2026_04_24/eval_phase2_cross_cicddos2019.py
-MIXED_EVAL=${ROOT}/Mixed_CFM/eval_cross.py
-CROSS_DIR=${ROOT}/artifacts/route_comparison/cross
-mkdir -p ${CROSS_DIR}
-
-# Target dataset paths
-declare -A TARGETS
-TARGETS[cicids2017_store]=${ROOT}/datasets/cicids2017/processed/full_store
-TARGETS[cicids2017_flows]=${ROOT}/datasets/cicids2017/processed/flows.parquet
-TARGETS[cicids2017_features]=${ROOT}/datasets/cicids2017/processed/flow_features.parquet
-TARGETS[cicids2017_features_spectral]=${ROOT}/datasets/cicids2017/processed/flow_features_spectral.parquet
-
-TARGETS[cicddos2019_store]=${ROOT}/datasets/cicddos2019/processed/full_store
-TARGETS[cicddos2019_flows]=${ROOT}/datasets/cicddos2019/processed/flows.parquet
-TARGETS[cicddos2019_features]=${ROOT}/datasets/cicddos2019/processed/flow_features.parquet
-TARGETS[cicddos2019_features_spectral]=${ROOT}/datasets/cicddos2019/processed/flow_features_spectral.parquet
-
-run_unified_eval() {
-  local gpu=$1 model_dir=$2 target=$3 features=$4 out_name=$5
-  local out=${CROSS_DIR}/${out_name}.json
-  [ -f "${out}" ] && { echo "[skip] ${out_name}"; return; }
-  echo "[gpu${gpu} eval] ${out_name}"
-  cd ${ROOT}/Unified_CFM
-  CUDA_VISIBLE_DEVICES=${gpu} stdbuf -oL uv run --no-sync python -u ${UNIFIED_EVAL} \
-    --model-dir ${model_dir} \
-    --target-store ${TARGETS[${target}_store]} \
-    --target-flows ${TARGETS[${target}_flows]} \
-    --target-flow-features ${features} \
-    --out ${out} \
-    --n-benign 10000 --n-attack 10000 --seed 42 \
-    --T 64 --batch-size 256 --n-steps 16 \
-    > ${CROSS_DIR}/${out_name}.log 2>&1
-}
-
-run_mixed_eval() {
-  local gpu=$1 model_dir=$2 target=$3 out_name=$4
-  local out=${CROSS_DIR}/${out_name}.json
-  [ -f "${out}" ] && { echo "[skip] ${out_name}"; return; }
-  echo "[gpu${gpu} mixed eval] ${out_name}"
-  cd ${ROOT}/Mixed_CFM
-  CUDA_VISIBLE_DEVICES=${gpu} stdbuf -oL uv run --no-sync python -u ${MIXED_EVAL} \
-    --model-dir ${model_dir} \
-    --target-store ${TARGETS[${target}_store]} \
-    --target-flows ${TARGETS[${target}_flows]} \
-    --target-flow-features ${TARGETS[${target}_features]} \
-    --out ${out} \
-    --n-benign 10000 --n-attack 10000 --seed 42 \
-    --T 64 --batch-size 256 --n-steps 16 \
-    > ${CROSS_DIR}/${out_name}.log 2>&1
-}
-
-# === GPU 0 chain: baselines + route_a, both targets ===
-{
-for prefix_route in "baseline_ciciot2023:baseline" "route_a_causal_ciciot2023:route_a_causal"; do
-  prefix=${prefix_route%:*}
-  short=${prefix_route#*:}
-  for seed in 42 43 44; do
-    md=${ROOT}/artifacts/route_comparison/${prefix}_seed${seed}
-    [ -f "${md}/model.pt" ] || continue
-    for target in cicids2017 cicddos2019; do
-      run_unified_eval 0 "${md}" "${target}" "${TARGETS[${target}_features]}" \
-        "${short}_seed${seed}_to_${target}"
-    done
-  done
-done
-echo "[gpu0 cross chain done]"
-} > /tmp/cross_gpu0.log 2>&1 &
-GPU0=$!
-
-# === GPU 1 chain: route_b (uses spectral features) + route_c (mixed) ===
-{
-# route_b: must use flow_features_spectral.parquet
-for seed in 42 43 44; do
-  md=${ROOT}/artifacts/route_comparison/route_b_spectral_ciciot2023_seed${seed}
-  [ -f "${md}/model.pt" ] || continue
-  for target in cicids2017 cicddos2019; do
-    run_unified_eval 1 "${md}" "${target}" "${TARGETS[${target}_features_spectral]}" \
-      "route_b_spectral_seed${seed}_to_${target}"
-  done
-done
-
-# route_c: Mixed_CFM eval (uses canonical flow_features)
-for seed in 42 43 44; do
-  md=${ROOT}/artifacts/route_comparison/route_c_mixed_ciciot2023_seed${seed}
-  [ -f "${md}/model.pt" ] || continue
-  for target in cicids2017 cicddos2019; do
-    run_mixed_eval 1 "${md}" "${target}" \
-      "route_c_mixed_seed${seed}_to_${target}"
-  done
-done
-echo "[gpu1 cross chain done]"
-} > /tmp/cross_gpu1.log 2>&1 &
-GPU1=$!
-
-wait $GPU0
-wait $GPU1
-echo "[all cross done]"
-ls -la ${CROSS_DIR}/*.json | wc -l
--- a/scripts/aggregate/run_phase1_all.sh
+++ b/scripts/aggregate/run_phase1_all.sh
@@ -1,45 +0,0 @@
-#!/bin/bash
-# Run phase1 eval on all route_comparison models.
-# Output: <model_dir>/phase1_summary.json + phase1_scores.npz
-#
-# Usage:
-#   bash artifacts/route_comparison/run_phase1_all.sh [GPU_ID]
-#
-# Default GPU_ID = 0. Each eval takes ~3-5 min with the caps below.
-
-set -e
-GPU_ID="${1:-0}"
-ROOT=/home/chy/JANUS
-EVAL=${ROOT}/artifacts/verify_2026_04_24/eval_phase1_unified.py
-
-models=(
-  baseline_ciciot2023_seed42
-  baseline_ciciot2023_seed43
-  baseline_ciciot2023_seed44
-  route_a_causal_ciciot2023_seed42
-  route_a_causal_ciciot2023_seed43
-  route_a_causal_ciciot2023_seed44
-)
-
-cd ${ROOT}/Unified_CFM
-for name in "${models[@]}"; do
-  model_dir=${ROOT}/artifacts/route_comparison/${name}
-  if [ ! -f "${model_dir}/model.pt" ]; then
-    echo "[skip] ${name}: model.pt missing"
-    continue
-  fi
-  out_dir=${model_dir}
-  if [ -f "${out_dir}/phase1_summary.json" ]; then
-    echo "[skip] ${name}: phase1_summary.json exists"
-    continue
-  fi
-  echo "[eval] ${name}"
-  CUDA_VISIBLE_DEVICES=${GPU_ID} stdbuf -oL uv run --no-sync python -u ${EVAL} \
-    --model-dir ${model_dir} --out-dir ${out_dir} \
-    --batch-size 256 --n-steps 16 \
-    --jacobian-n-eps 4 \
-    --n-val-cap 5000 --n-atk-cap 10000 \
-    2>&1 | tee ${model_dir}/phase1.log | tail -5
-  echo "[done] ${name}"
-done
-echo "[all done]"