Initial commit: code, paper, small artifacts
This commit is contained in:
124
scripts/repr_experiment.sh
Executable file
124
scripts/repr_experiment.sh
Executable file
@@ -0,0 +1,124 @@
|
||||
#!/bin/bash
|
||||
# End-to-end representation experiment: re-extract CICIDS2017 + CICDDoS2019
|
||||
# with metadata columns, then train E0/E1/E2 with fixed 10k benign and
|
||||
# evaluate on CICDDoS2019.
|
||||
#
|
||||
# Stages (each with wall-clock logging + per-stage log file):
|
||||
# S1 re-extract CICIDS2017 → datasets/cicids2017/processed/*
|
||||
# S2a re-extract CICDDoS2019 03-11 shard
|
||||
# S2b re-extract CICDDoS2019 01-12 shard
|
||||
# S2c merge CICDDoS2019 shards
|
||||
# S3 train E0 (mixed_dequant, no ctx) [configs/n10k_baseline.yaml]
|
||||
# S4 train E1 (relative_v2, no ctx) [configs/n10k_relv2.yaml]
|
||||
# S5 train E2 (relative_v2, with 8-d ctx) [configs/n10k_relv2_ctx.yaml]
|
||||
# S6 detect+per_class for each on CICDDoS2019
|
||||
# S7 summary table
|
||||
#
|
||||
# Any stage's failure aborts the rest and leaves the partial log intact.
|
||||
set -uo pipefail
|
||||
|
||||
ROOT=/home/chy/mambafortrafficmodeling
|
||||
cd "$ROOT"
|
||||
|
||||
STAMP=$(date +%Y%m%d_%H%M%S)
|
||||
OUT_DIR="runs/repr_experiment_${STAMP}"
|
||||
mkdir -p "$OUT_DIR"
|
||||
MAIN_LOG="$OUT_DIR/orch.log"
|
||||
exec > >(tee -a "$MAIN_LOG") 2>&1
|
||||
|
||||
N_VAL=20000
|
||||
N_ATK=100000
|
||||
SPLIT_SEED=42
|
||||
|
||||
echo "========================================================================"
|
||||
echo "= $(date): repr_experiment start ="
|
||||
echo "= output root: $OUT_DIR ="
|
||||
echo "========================================================================"
|
||||
|
||||
run_stage() {
|
||||
local name=$1; shift
|
||||
local log="$OUT_DIR/${name}.log"
|
||||
echo ""
|
||||
echo ">>> $(date): [$name] START"
|
||||
echo ">>> $(date): [$name] command: $*"
|
||||
local t0=$(date +%s)
|
||||
if ! "$@" > "$log" 2>&1; then
|
||||
local t1=$(date +%s); echo "!!! $(date): [$name] FAILED after $((t1-t0))s — see $log"
|
||||
tail -30 "$log"
|
||||
exit 1
|
||||
fi
|
||||
local t1=$(date +%s)
|
||||
echo "<<< $(date): [$name] OK in $((t1-t0))s (log: $log)"
|
||||
# Print tail of log so orch.log shows meaningful progress.
|
||||
tail -10 "$log" | sed 's/^/ | /'
|
||||
}
|
||||
|
||||
# ====================================================================
|
||||
# S1 — re-extract CICIDS2017
|
||||
# ====================================================================
|
||||
run_stage "s1_extract_cicids2017" \
|
||||
uv run python scripts/extract_cicids2017.py --jobs 5 --time-offset 28800
|
||||
|
||||
# ====================================================================
|
||||
# S2 — re-extract CICDDoS2019 (per-shard) + merge
|
||||
# ====================================================================
|
||||
run_stage "s2a_extract_cicddos2019_03-11" \
|
||||
uv run python scripts/extract_cicddos2019.py \
|
||||
--shards 03-11 --jobs 1 \
|
||||
--out-packets datasets/cicddos2019/processed/packets.03-11.npz \
|
||||
--out-flows datasets/cicddos2019/processed/flows.03-11.parquet
|
||||
|
||||
run_stage "s2b_extract_cicddos2019_01-12" \
|
||||
uv run python scripts/extract_cicddos2019.py \
|
||||
--shards 01-12 --jobs 1 \
|
||||
--out-packets datasets/cicddos2019/processed/packets.01-12.npz \
|
||||
--out-flows datasets/cicddos2019/processed/flows.01-12.parquet
|
||||
|
||||
run_stage "s2c_merge_cicddos2019" \
|
||||
uv run python scripts/merge_shard_artifacts.py \
|
||||
--in datasets/cicddos2019/processed/packets.03-11.npz \
|
||||
--in datasets/cicddos2019/processed/packets.01-12.npz \
|
||||
--out-packets datasets/cicddos2019/processed/packets.npz \
|
||||
--out-flows datasets/cicddos2019/processed/flows.parquet
|
||||
|
||||
# ====================================================================
|
||||
# S3..S5 — train E0 / E1 / E2 with the same 10k benign
|
||||
# ====================================================================
|
||||
train_and_eval() {
|
||||
local tag=$1 cfg=$2
|
||||
local run_dir="$OUT_DIR/$tag"
|
||||
mkdir -p "$run_dir"
|
||||
|
||||
# Copy config and patch save_dir to our per-tag directory.
|
||||
cp "$cfg" "$run_dir/config.yaml"
|
||||
sed -i "s#^save_dir:.*#save_dir: $run_dir#" "$run_dir/config.yaml"
|
||||
|
||||
run_stage "${tag}_train" \
|
||||
uv run python -m train --config "$run_dir/config.yaml"
|
||||
|
||||
run_stage "${tag}_detect_ddos" \
|
||||
uv run python -m detect \
|
||||
--save-dir "$run_dir" \
|
||||
--packets-npz datasets/cicddos2019/processed/packets.npz \
|
||||
--flows-parquet datasets/cicddos2019/processed/flows.parquet \
|
||||
--n-val "$N_VAL" --n-atk "$N_ATK" --seed "$SPLIT_SEED"
|
||||
|
||||
run_stage "${tag}_per_class" \
|
||||
uv run python -m eval.per_class --save-dir "$run_dir"
|
||||
}
|
||||
|
||||
train_and_eval "e0_baseline" "configs/n10k_baseline.yaml"
|
||||
train_and_eval "e1_relv2" "configs/n10k_relv2.yaml"
|
||||
train_and_eval "e2_relv2_ctx" "configs/n10k_relv2_ctx.yaml"
|
||||
|
||||
# ====================================================================
|
||||
# S7 — summary table
|
||||
# ====================================================================
|
||||
run_stage "s7_summary" \
|
||||
uv run python scripts/summarize_repr_exp.py --root "$OUT_DIR"
|
||||
|
||||
echo ""
|
||||
echo "========================================================================"
|
||||
echo "= $(date): repr_experiment DONE ="
|
||||
echo "= results under: $OUT_DIR ="
|
||||
echo "========================================================================"
|
||||
Reference in New Issue
Block a user