Mixed_CFM: absorb Unified_CFM primitives; remove Unified_CFM

Mixed_CFM was loading AdaLNBlock / SinusoidalTimeEmb / _sinkhorn_coupling
and flow-feature helpers from Unified_CFM via importlib spec hacks. Pulled
those symbols into Mixed_CFM/_layers.py (model primitives) and inlined
the flow-feature loader helpers into Mixed_CFM/data.py, then deleted
Unified_CFM/ entirely along with three dead aggregate shell scripts whose
referenced eval entry point (artifacts/verify_2026_04_24/) was already gone.

Verified: historic janus_iscxtor2016_seed42 checkpoint re-evaluated under
the absorbed code reproduces all 10 phase1 AUROC scores to 6 decimals;
same-seed retrain converges to within +/-0.001 on terminal_norm (residual
drift is CUDA non-determinism in MultiheadAttention + Sinkhorn argmax,
not the absorption).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-11 14:17:04 +08:00
parent ee232058b1
commit ff0efa97bf
32 changed files with 175 additions and 2372 deletions

View File

@@ -1,68 +0,0 @@
#!/bin/bash
# Run phase1 eval on all routes after trainings complete.
# Splits across 2 GPUs in parallel chains.
set -e
ROOT=/home/chy/JANUS
UNIFIED_EVAL=${ROOT}/artifacts/verify_2026_04_24/eval_phase1_unified.py
MIXED_EVAL=${ROOT}/Mixed_CFM/eval_phase1.py
cd ${ROOT}
# GPU 0: baselines + route_a (6 models)
{
for prefix in baseline_ciciot2023 route_a_causal_ciciot2023; do
for seed in 42 43 44; do
name=${prefix}_seed${seed}
md=${ROOT}/artifacts/route_comparison/${name}
[ -f "${md}/model.pt" ] || continue
[ -f "${md}/phase1_summary.json" ] && continue
echo "[GPU0 eval] ${name}"
cd ${ROOT}/Unified_CFM
CUDA_VISIBLE_DEVICES=0 stdbuf -oL uv run --no-sync python -u ${UNIFIED_EVAL} \
--model-dir ${md} --out-dir ${md} \
--batch-size 256 --n-steps 16 --jacobian-n-eps 4 \
--n-val-cap 5000 --n-atk-cap 10000 \
> ${md}/phase1.log 2>&1
done
done
echo "[GPU0 done]"
} &
GPU0_PID=$!
# GPU 1: route_b + route_c (6 models)
{
for seed in 42 43 44; do
name=route_b_spectral_ciciot2023_seed${seed}
md=${ROOT}/artifacts/route_comparison/${name}
[ -f "${md}/model.pt" ] || continue
[ -f "${md}/phase1_summary.json" ] && continue
echo "[GPU1 eval] ${name}"
cd ${ROOT}/Unified_CFM
CUDA_VISIBLE_DEVICES=1 stdbuf -oL uv run --no-sync python -u ${UNIFIED_EVAL} \
--model-dir ${md} --out-dir ${md} \
--batch-size 256 --n-steps 16 --jacobian-n-eps 4 \
--n-val-cap 5000 --n-atk-cap 10000 \
> ${md}/phase1.log 2>&1
done
for seed in 42 43 44; do
name=route_c_mixed_ciciot2023_seed${seed}
md=${ROOT}/artifacts/route_comparison/${name}
[ -f "${md}/model.pt" ] || continue
[ -f "${md}/phase1_summary.json" ] && continue
echo "[GPU1 eval] ${name}"
cd ${ROOT}/Mixed_CFM
CUDA_VISIBLE_DEVICES=1 stdbuf -oL uv run --no-sync python -u ${MIXED_EVAL} \
--model-dir ${md} --out-dir ${md} \
--batch-size 256 --n-steps 16 \
--n-val-cap 5000 --n-atk-cap 10000 \
> ${md}/phase1.log 2>&1
done
echo "[GPU1 done]"
} &
GPU1_PID=$!
wait $GPU0_PID
wait $GPU1_PID
echo "[all phase1 done]"
cd ${ROOT} && uv run --no-sync python artifacts/route_comparison/aggregate_results.py

View File

@@ -1,105 +0,0 @@
#!/bin/bash
# Cross-dataset eval for all 4 routes × 2 targets × 3 seeds = 24 runs.
# Source: CICIoT2023 (where all models were trained).
# Targets: CICIDS2017 + CICDDoS2019.
set -e
ROOT=/home/chy/JANUS
UNIFIED_EVAL=${ROOT}/artifacts/verify_2026_04_24/eval_phase2_cross_cicddos2019.py
MIXED_EVAL=${ROOT}/Mixed_CFM/eval_cross.py
CROSS_DIR=${ROOT}/artifacts/route_comparison/cross
mkdir -p ${CROSS_DIR}
# Target dataset paths
declare -A TARGETS
TARGETS[cicids2017_store]=${ROOT}/datasets/cicids2017/processed/full_store
TARGETS[cicids2017_flows]=${ROOT}/datasets/cicids2017/processed/flows.parquet
TARGETS[cicids2017_features]=${ROOT}/datasets/cicids2017/processed/flow_features.parquet
TARGETS[cicids2017_features_spectral]=${ROOT}/datasets/cicids2017/processed/flow_features_spectral.parquet
TARGETS[cicddos2019_store]=${ROOT}/datasets/cicddos2019/processed/full_store
TARGETS[cicddos2019_flows]=${ROOT}/datasets/cicddos2019/processed/flows.parquet
TARGETS[cicddos2019_features]=${ROOT}/datasets/cicddos2019/processed/flow_features.parquet
TARGETS[cicddos2019_features_spectral]=${ROOT}/datasets/cicddos2019/processed/flow_features_spectral.parquet
run_unified_eval() {
local gpu=$1 model_dir=$2 target=$3 features=$4 out_name=$5
local out=${CROSS_DIR}/${out_name}.json
[ -f "${out}" ] && { echo "[skip] ${out_name}"; return; }
echo "[gpu${gpu} eval] ${out_name}"
cd ${ROOT}/Unified_CFM
CUDA_VISIBLE_DEVICES=${gpu} stdbuf -oL uv run --no-sync python -u ${UNIFIED_EVAL} \
--model-dir ${model_dir} \
--target-store ${TARGETS[${target}_store]} \
--target-flows ${TARGETS[${target}_flows]} \
--target-flow-features ${features} \
--out ${out} \
--n-benign 10000 --n-attack 10000 --seed 42 \
--T 64 --batch-size 256 --n-steps 16 \
> ${CROSS_DIR}/${out_name}.log 2>&1
}
run_mixed_eval() {
local gpu=$1 model_dir=$2 target=$3 out_name=$4
local out=${CROSS_DIR}/${out_name}.json
[ -f "${out}" ] && { echo "[skip] ${out_name}"; return; }
echo "[gpu${gpu} mixed eval] ${out_name}"
cd ${ROOT}/Mixed_CFM
CUDA_VISIBLE_DEVICES=${gpu} stdbuf -oL uv run --no-sync python -u ${MIXED_EVAL} \
--model-dir ${model_dir} \
--target-store ${TARGETS[${target}_store]} \
--target-flows ${TARGETS[${target}_flows]} \
--target-flow-features ${TARGETS[${target}_features]} \
--out ${out} \
--n-benign 10000 --n-attack 10000 --seed 42 \
--T 64 --batch-size 256 --n-steps 16 \
> ${CROSS_DIR}/${out_name}.log 2>&1
}
# === GPU 0 chain: baselines + route_a, both targets ===
{
for prefix_route in "baseline_ciciot2023:baseline" "route_a_causal_ciciot2023:route_a_causal"; do
prefix=${prefix_route%:*}
short=${prefix_route#*:}
for seed in 42 43 44; do
md=${ROOT}/artifacts/route_comparison/${prefix}_seed${seed}
[ -f "${md}/model.pt" ] || continue
for target in cicids2017 cicddos2019; do
run_unified_eval 0 "${md}" "${target}" "${TARGETS[${target}_features]}" \
"${short}_seed${seed}_to_${target}"
done
done
done
echo "[gpu0 cross chain done]"
} > /tmp/cross_gpu0.log 2>&1 &
GPU0=$!
# === GPU 1 chain: route_b (uses spectral features) + route_c (mixed) ===
{
# route_b: must use flow_features_spectral.parquet
for seed in 42 43 44; do
md=${ROOT}/artifacts/route_comparison/route_b_spectral_ciciot2023_seed${seed}
[ -f "${md}/model.pt" ] || continue
for target in cicids2017 cicddos2019; do
run_unified_eval 1 "${md}" "${target}" "${TARGETS[${target}_features_spectral]}" \
"route_b_spectral_seed${seed}_to_${target}"
done
done
# route_c: Mixed_CFM eval (uses canonical flow_features)
for seed in 42 43 44; do
md=${ROOT}/artifacts/route_comparison/route_c_mixed_ciciot2023_seed${seed}
[ -f "${md}/model.pt" ] || continue
for target in cicids2017 cicddos2019; do
run_mixed_eval 1 "${md}" "${target}" \
"route_c_mixed_seed${seed}_to_${target}"
done
done
echo "[gpu1 cross chain done]"
} > /tmp/cross_gpu1.log 2>&1 &
GPU1=$!
wait $GPU0
wait $GPU1
echo "[all cross done]"
ls -la ${CROSS_DIR}/*.json | wc -l

View File

@@ -1,45 +0,0 @@
#!/bin/bash
# Run phase1 eval on all route_comparison models.
# Output: <model_dir>/phase1_summary.json + phase1_scores.npz
#
# Usage:
# bash artifacts/route_comparison/run_phase1_all.sh [GPU_ID]
#
# Default GPU_ID = 0. Each eval takes ~3-5 min with the caps below.
set -e
GPU_ID="${1:-0}"
ROOT=/home/chy/JANUS
EVAL=${ROOT}/artifacts/verify_2026_04_24/eval_phase1_unified.py
models=(
baseline_ciciot2023_seed42
baseline_ciciot2023_seed43
baseline_ciciot2023_seed44
route_a_causal_ciciot2023_seed42
route_a_causal_ciciot2023_seed43
route_a_causal_ciciot2023_seed44
)
cd ${ROOT}/Unified_CFM
for name in "${models[@]}"; do
model_dir=${ROOT}/artifacts/route_comparison/${name}
if [ ! -f "${model_dir}/model.pt" ]; then
echo "[skip] ${name}: model.pt missing"
continue
fi
out_dir=${model_dir}
if [ -f "${out_dir}/phase1_summary.json" ]; then
echo "[skip] ${name}: phase1_summary.json exists"
continue
fi
echo "[eval] ${name}"
CUDA_VISIBLE_DEVICES=${GPU_ID} stdbuf -oL uv run --no-sync python -u ${EVAL} \
--model-dir ${model_dir} --out-dir ${out_dir} \
--batch-size 256 --n-steps 16 \
--jacobian-n-eps 4 \
--n-val-cap 5000 --n-atk-cap 10000 \
2>&1 | tee ${model_dir}/phase1.log | tail -5
echo "[done] ${name}"
done
echo "[all done]"