#!/bin/bash # End-to-end representation experiment: re-extract CICIDS2017 + CICDDoS2019 # with metadata columns, then train E0/E1/E2 with fixed 10k benign and # evaluate on CICDDoS2019. # # Stages (each with wall-clock logging + per-stage log file): # S1 re-extract CICIDS2017 → datasets/cicids2017/processed/* # S2a re-extract CICDDoS2019 03-11 shard # S2b re-extract CICDDoS2019 01-12 shard # S2c merge CICDDoS2019 shards # S3 train E0 (mixed_dequant, no ctx) [configs/n10k_baseline.yaml] # S4 train E1 (relative_v2, no ctx) [configs/n10k_relv2.yaml] # S5 train E2 (relative_v2, with 8-d ctx) [configs/n10k_relv2_ctx.yaml] # S6 detect+per_class for each on CICDDoS2019 # S7 summary table # # Any stage's failure aborts the rest and leaves the partial log intact. set -uo pipefail ROOT=/home/chy/mambafortrafficmodeling cd "$ROOT" STAMP=$(date +%Y%m%d_%H%M%S) OUT_DIR="runs/repr_experiment_${STAMP}" mkdir -p "$OUT_DIR" MAIN_LOG="$OUT_DIR/orch.log" exec > >(tee -a "$MAIN_LOG") 2>&1 N_VAL=20000 N_ATK=100000 SPLIT_SEED=42 echo "========================================================================" echo "= $(date): repr_experiment start =" echo "= output root: $OUT_DIR =" echo "========================================================================" run_stage() { local name=$1; shift local log="$OUT_DIR/${name}.log" echo "" echo ">>> $(date): [$name] START" echo ">>> $(date): [$name] command: $*" local t0=$(date +%s) if ! "$@" > "$log" 2>&1; then local t1=$(date +%s); echo "!!! $(date): [$name] FAILED after $((t1-t0))s — see $log" tail -30 "$log" exit 1 fi local t1=$(date +%s) echo "<<< $(date): [$name] OK in $((t1-t0))s (log: $log)" # Print tail of log so orch.log shows meaningful progress. tail -10 "$log" | sed 's/^/ | /' } # ==================================================================== # S1 — re-extract CICIDS2017 # ==================================================================== run_stage "s1_extract_cicids2017" \ uv run python scripts/extract_cicids2017.py --jobs 5 --time-offset 28800 # ==================================================================== # S2 — re-extract CICDDoS2019 (per-shard) + merge # ==================================================================== run_stage "s2a_extract_cicddos2019_03-11" \ uv run python scripts/extract_cicddos2019.py \ --shards 03-11 --jobs 1 \ --out-packets datasets/cicddos2019/processed/packets.03-11.npz \ --out-flows datasets/cicddos2019/processed/flows.03-11.parquet run_stage "s2b_extract_cicddos2019_01-12" \ uv run python scripts/extract_cicddos2019.py \ --shards 01-12 --jobs 1 \ --out-packets datasets/cicddos2019/processed/packets.01-12.npz \ --out-flows datasets/cicddos2019/processed/flows.01-12.parquet run_stage "s2c_merge_cicddos2019" \ uv run python scripts/merge_shard_artifacts.py \ --in datasets/cicddos2019/processed/packets.03-11.npz \ --in datasets/cicddos2019/processed/packets.01-12.npz \ --out-packets datasets/cicddos2019/processed/packets.npz \ --out-flows datasets/cicddos2019/processed/flows.parquet # ==================================================================== # S3..S5 — train E0 / E1 / E2 with the same 10k benign # ==================================================================== train_and_eval() { local tag=$1 cfg=$2 local run_dir="$OUT_DIR/$tag" mkdir -p "$run_dir" # Copy config and patch save_dir to our per-tag directory. cp "$cfg" "$run_dir/config.yaml" sed -i "s#^save_dir:.*#save_dir: $run_dir#" "$run_dir/config.yaml" run_stage "${tag}_train" \ uv run python -m train --config "$run_dir/config.yaml" run_stage "${tag}_detect_ddos" \ uv run python -m detect \ --save-dir "$run_dir" \ --packets-npz datasets/cicddos2019/processed/packets.npz \ --flows-parquet datasets/cicddos2019/processed/flows.parquet \ --n-val "$N_VAL" --n-atk "$N_ATK" --seed "$SPLIT_SEED" run_stage "${tag}_per_class" \ uv run python -m eval.per_class --save-dir "$run_dir" } train_and_eval "e0_baseline" "configs/n10k_baseline.yaml" train_and_eval "e1_relv2" "configs/n10k_relv2.yaml" train_and_eval "e2_relv2_ctx" "configs/n10k_relv2_ctx.yaml" # ==================================================================== # S7 — summary table # ==================================================================== run_stage "s7_summary" \ uv run python scripts/summarize_repr_exp.py --root "$OUT_DIR" echo "" echo "========================================================================" echo "= $(date): repr_experiment DONE =" echo "= results under: $OUT_DIR =" echo "========================================================================"