Mixed_CFM: absorb Unified_CFM primitives; remove Unified_CFM
Mixed_CFM was loading AdaLNBlock / SinusoidalTimeEmb / _sinkhorn_coupling and flow-feature helpers from Unified_CFM via importlib spec hacks. Pulled those symbols into Mixed_CFM/_layers.py (model primitives) and inlined the flow-feature loader helpers into Mixed_CFM/data.py, then deleted Unified_CFM/ entirely along with three dead aggregate shell scripts whose referenced eval entry point (artifacts/verify_2026_04_24/) was already gone. Verified: historic janus_iscxtor2016_seed42 checkpoint re-evaluated under the absorbed code reproduces all 10 phase1 AUROC scores to 6 decimals; same-seed retrain converges to within +/-0.001 on terminal_norm (residual drift is CUDA non-determinism in MultiheadAttention + Sinkhorn argmax, not the absorption). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,68 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Run phase1 eval on all routes after trainings complete.
|
||||
# Splits across 2 GPUs in parallel chains.
|
||||
|
||||
set -e
|
||||
ROOT=/home/chy/JANUS
|
||||
UNIFIED_EVAL=${ROOT}/artifacts/verify_2026_04_24/eval_phase1_unified.py
|
||||
MIXED_EVAL=${ROOT}/Mixed_CFM/eval_phase1.py
|
||||
|
||||
cd ${ROOT}
|
||||
|
||||
# GPU 0: baselines + route_a (6 models)
|
||||
{
|
||||
for prefix in baseline_ciciot2023 route_a_causal_ciciot2023; do
|
||||
for seed in 42 43 44; do
|
||||
name=${prefix}_seed${seed}
|
||||
md=${ROOT}/artifacts/route_comparison/${name}
|
||||
[ -f "${md}/model.pt" ] || continue
|
||||
[ -f "${md}/phase1_summary.json" ] && continue
|
||||
echo "[GPU0 eval] ${name}"
|
||||
cd ${ROOT}/Unified_CFM
|
||||
CUDA_VISIBLE_DEVICES=0 stdbuf -oL uv run --no-sync python -u ${UNIFIED_EVAL} \
|
||||
--model-dir ${md} --out-dir ${md} \
|
||||
--batch-size 256 --n-steps 16 --jacobian-n-eps 4 \
|
||||
--n-val-cap 5000 --n-atk-cap 10000 \
|
||||
> ${md}/phase1.log 2>&1
|
||||
done
|
||||
done
|
||||
echo "[GPU0 done]"
|
||||
} &
|
||||
GPU0_PID=$!
|
||||
|
||||
# GPU 1: route_b + route_c (6 models)
|
||||
{
|
||||
for seed in 42 43 44; do
|
||||
name=route_b_spectral_ciciot2023_seed${seed}
|
||||
md=${ROOT}/artifacts/route_comparison/${name}
|
||||
[ -f "${md}/model.pt" ] || continue
|
||||
[ -f "${md}/phase1_summary.json" ] && continue
|
||||
echo "[GPU1 eval] ${name}"
|
||||
cd ${ROOT}/Unified_CFM
|
||||
CUDA_VISIBLE_DEVICES=1 stdbuf -oL uv run --no-sync python -u ${UNIFIED_EVAL} \
|
||||
--model-dir ${md} --out-dir ${md} \
|
||||
--batch-size 256 --n-steps 16 --jacobian-n-eps 4 \
|
||||
--n-val-cap 5000 --n-atk-cap 10000 \
|
||||
> ${md}/phase1.log 2>&1
|
||||
done
|
||||
for seed in 42 43 44; do
|
||||
name=route_c_mixed_ciciot2023_seed${seed}
|
||||
md=${ROOT}/artifacts/route_comparison/${name}
|
||||
[ -f "${md}/model.pt" ] || continue
|
||||
[ -f "${md}/phase1_summary.json" ] && continue
|
||||
echo "[GPU1 eval] ${name}"
|
||||
cd ${ROOT}/Mixed_CFM
|
||||
CUDA_VISIBLE_DEVICES=1 stdbuf -oL uv run --no-sync python -u ${MIXED_EVAL} \
|
||||
--model-dir ${md} --out-dir ${md} \
|
||||
--batch-size 256 --n-steps 16 \
|
||||
--n-val-cap 5000 --n-atk-cap 10000 \
|
||||
> ${md}/phase1.log 2>&1
|
||||
done
|
||||
echo "[GPU1 done]"
|
||||
} &
|
||||
GPU1_PID=$!
|
||||
|
||||
wait $GPU0_PID
|
||||
wait $GPU1_PID
|
||||
echo "[all phase1 done]"
|
||||
cd ${ROOT} && uv run --no-sync python artifacts/route_comparison/aggregate_results.py
|
||||
@@ -1,105 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Cross-dataset eval for all 4 routes × 2 targets × 3 seeds = 24 runs.
|
||||
# Source: CICIoT2023 (where all models were trained).
|
||||
# Targets: CICIDS2017 + CICDDoS2019.
|
||||
|
||||
set -e
|
||||
ROOT=/home/chy/JANUS
|
||||
UNIFIED_EVAL=${ROOT}/artifacts/verify_2026_04_24/eval_phase2_cross_cicddos2019.py
|
||||
MIXED_EVAL=${ROOT}/Mixed_CFM/eval_cross.py
|
||||
CROSS_DIR=${ROOT}/artifacts/route_comparison/cross
|
||||
mkdir -p ${CROSS_DIR}
|
||||
|
||||
# Target dataset paths
|
||||
declare -A TARGETS
|
||||
TARGETS[cicids2017_store]=${ROOT}/datasets/cicids2017/processed/full_store
|
||||
TARGETS[cicids2017_flows]=${ROOT}/datasets/cicids2017/processed/flows.parquet
|
||||
TARGETS[cicids2017_features]=${ROOT}/datasets/cicids2017/processed/flow_features.parquet
|
||||
TARGETS[cicids2017_features_spectral]=${ROOT}/datasets/cicids2017/processed/flow_features_spectral.parquet
|
||||
|
||||
TARGETS[cicddos2019_store]=${ROOT}/datasets/cicddos2019/processed/full_store
|
||||
TARGETS[cicddos2019_flows]=${ROOT}/datasets/cicddos2019/processed/flows.parquet
|
||||
TARGETS[cicddos2019_features]=${ROOT}/datasets/cicddos2019/processed/flow_features.parquet
|
||||
TARGETS[cicddos2019_features_spectral]=${ROOT}/datasets/cicddos2019/processed/flow_features_spectral.parquet
|
||||
|
||||
run_unified_eval() {
|
||||
local gpu=$1 model_dir=$2 target=$3 features=$4 out_name=$5
|
||||
local out=${CROSS_DIR}/${out_name}.json
|
||||
[ -f "${out}" ] && { echo "[skip] ${out_name}"; return; }
|
||||
echo "[gpu${gpu} eval] ${out_name}"
|
||||
cd ${ROOT}/Unified_CFM
|
||||
CUDA_VISIBLE_DEVICES=${gpu} stdbuf -oL uv run --no-sync python -u ${UNIFIED_EVAL} \
|
||||
--model-dir ${model_dir} \
|
||||
--target-store ${TARGETS[${target}_store]} \
|
||||
--target-flows ${TARGETS[${target}_flows]} \
|
||||
--target-flow-features ${features} \
|
||||
--out ${out} \
|
||||
--n-benign 10000 --n-attack 10000 --seed 42 \
|
||||
--T 64 --batch-size 256 --n-steps 16 \
|
||||
> ${CROSS_DIR}/${out_name}.log 2>&1
|
||||
}
|
||||
|
||||
run_mixed_eval() {
|
||||
local gpu=$1 model_dir=$2 target=$3 out_name=$4
|
||||
local out=${CROSS_DIR}/${out_name}.json
|
||||
[ -f "${out}" ] && { echo "[skip] ${out_name}"; return; }
|
||||
echo "[gpu${gpu} mixed eval] ${out_name}"
|
||||
cd ${ROOT}/Mixed_CFM
|
||||
CUDA_VISIBLE_DEVICES=${gpu} stdbuf -oL uv run --no-sync python -u ${MIXED_EVAL} \
|
||||
--model-dir ${model_dir} \
|
||||
--target-store ${TARGETS[${target}_store]} \
|
||||
--target-flows ${TARGETS[${target}_flows]} \
|
||||
--target-flow-features ${TARGETS[${target}_features]} \
|
||||
--out ${out} \
|
||||
--n-benign 10000 --n-attack 10000 --seed 42 \
|
||||
--T 64 --batch-size 256 --n-steps 16 \
|
||||
> ${CROSS_DIR}/${out_name}.log 2>&1
|
||||
}
|
||||
|
||||
# === GPU 0 chain: baselines + route_a, both targets ===
|
||||
{
|
||||
for prefix_route in "baseline_ciciot2023:baseline" "route_a_causal_ciciot2023:route_a_causal"; do
|
||||
prefix=${prefix_route%:*}
|
||||
short=${prefix_route#*:}
|
||||
for seed in 42 43 44; do
|
||||
md=${ROOT}/artifacts/route_comparison/${prefix}_seed${seed}
|
||||
[ -f "${md}/model.pt" ] || continue
|
||||
for target in cicids2017 cicddos2019; do
|
||||
run_unified_eval 0 "${md}" "${target}" "${TARGETS[${target}_features]}" \
|
||||
"${short}_seed${seed}_to_${target}"
|
||||
done
|
||||
done
|
||||
done
|
||||
echo "[gpu0 cross chain done]"
|
||||
} > /tmp/cross_gpu0.log 2>&1 &
|
||||
GPU0=$!
|
||||
|
||||
# === GPU 1 chain: route_b (uses spectral features) + route_c (mixed) ===
|
||||
{
|
||||
# route_b: must use flow_features_spectral.parquet
|
||||
for seed in 42 43 44; do
|
||||
md=${ROOT}/artifacts/route_comparison/route_b_spectral_ciciot2023_seed${seed}
|
||||
[ -f "${md}/model.pt" ] || continue
|
||||
for target in cicids2017 cicddos2019; do
|
||||
run_unified_eval 1 "${md}" "${target}" "${TARGETS[${target}_features_spectral]}" \
|
||||
"route_b_spectral_seed${seed}_to_${target}"
|
||||
done
|
||||
done
|
||||
|
||||
# route_c: Mixed_CFM eval (uses canonical flow_features)
|
||||
for seed in 42 43 44; do
|
||||
md=${ROOT}/artifacts/route_comparison/route_c_mixed_ciciot2023_seed${seed}
|
||||
[ -f "${md}/model.pt" ] || continue
|
||||
for target in cicids2017 cicddos2019; do
|
||||
run_mixed_eval 1 "${md}" "${target}" \
|
||||
"route_c_mixed_seed${seed}_to_${target}"
|
||||
done
|
||||
done
|
||||
echo "[gpu1 cross chain done]"
|
||||
} > /tmp/cross_gpu1.log 2>&1 &
|
||||
GPU1=$!
|
||||
|
||||
wait $GPU0
|
||||
wait $GPU1
|
||||
echo "[all cross done]"
|
||||
ls -la ${CROSS_DIR}/*.json | wc -l
|
||||
@@ -1,45 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Run phase1 eval on all route_comparison models.
|
||||
# Output: <model_dir>/phase1_summary.json + phase1_scores.npz
|
||||
#
|
||||
# Usage:
|
||||
# bash artifacts/route_comparison/run_phase1_all.sh [GPU_ID]
|
||||
#
|
||||
# Default GPU_ID = 0. Each eval takes ~3-5 min with the caps below.
|
||||
|
||||
set -e
|
||||
GPU_ID="${1:-0}"
|
||||
ROOT=/home/chy/JANUS
|
||||
EVAL=${ROOT}/artifacts/verify_2026_04_24/eval_phase1_unified.py
|
||||
|
||||
models=(
|
||||
baseline_ciciot2023_seed42
|
||||
baseline_ciciot2023_seed43
|
||||
baseline_ciciot2023_seed44
|
||||
route_a_causal_ciciot2023_seed42
|
||||
route_a_causal_ciciot2023_seed43
|
||||
route_a_causal_ciciot2023_seed44
|
||||
)
|
||||
|
||||
cd ${ROOT}/Unified_CFM
|
||||
for name in "${models[@]}"; do
|
||||
model_dir=${ROOT}/artifacts/route_comparison/${name}
|
||||
if [ ! -f "${model_dir}/model.pt" ]; then
|
||||
echo "[skip] ${name}: model.pt missing"
|
||||
continue
|
||||
fi
|
||||
out_dir=${model_dir}
|
||||
if [ -f "${out_dir}/phase1_summary.json" ]; then
|
||||
echo "[skip] ${name}: phase1_summary.json exists"
|
||||
continue
|
||||
fi
|
||||
echo "[eval] ${name}"
|
||||
CUDA_VISIBLE_DEVICES=${GPU_ID} stdbuf -oL uv run --no-sync python -u ${EVAL} \
|
||||
--model-dir ${model_dir} --out-dir ${out_dir} \
|
||||
--batch-size 256 --n-steps 16 \
|
||||
--jacobian-n-eps 4 \
|
||||
--n-val-cap 5000 --n-atk-cap 10000 \
|
||||
2>&1 | tee ${model_dir}/phase1.log | tail -5
|
||||
echo "[done] ${name}"
|
||||
done
|
||||
echo "[all done]"
|
||||
Reference in New Issue
Block a user