125 lines
5.0 KiB
Bash
Executable File
125 lines
5.0 KiB
Bash
Executable File
#!/bin/bash
|
|
# End-to-end representation experiment: re-extract CICIDS2017 + CICDDoS2019
|
|
# with metadata columns, then train E0/E1/E2 with fixed 10k benign and
|
|
# evaluate on CICDDoS2019.
|
|
#
|
|
# Stages (each with wall-clock logging + per-stage log file):
|
|
# S1 re-extract CICIDS2017 → datasets/cicids2017/processed/*
|
|
# S2a re-extract CICDDoS2019 03-11 shard
|
|
# S2b re-extract CICDDoS2019 01-12 shard
|
|
# S2c merge CICDDoS2019 shards
|
|
# S3 train E0 (mixed_dequant, no ctx) [configs/n10k_baseline.yaml]
|
|
# S4 train E1 (relative_v2, no ctx) [configs/n10k_relv2.yaml]
|
|
# S5 train E2 (relative_v2, with 8-d ctx) [configs/n10k_relv2_ctx.yaml]
|
|
# S6 detect+per_class for each on CICDDoS2019
|
|
# S7 summary table
|
|
#
|
|
# Any stage's failure aborts the rest and leaves the partial log intact.
|
|
set -uo pipefail
|
|
|
|
ROOT=/home/chy/mambafortrafficmodeling
|
|
cd "$ROOT"
|
|
|
|
STAMP=$(date +%Y%m%d_%H%M%S)
|
|
OUT_DIR="runs/repr_experiment_${STAMP}"
|
|
mkdir -p "$OUT_DIR"
|
|
MAIN_LOG="$OUT_DIR/orch.log"
|
|
exec > >(tee -a "$MAIN_LOG") 2>&1
|
|
|
|
N_VAL=20000
|
|
N_ATK=100000
|
|
SPLIT_SEED=42
|
|
|
|
echo "========================================================================"
|
|
echo "= $(date): repr_experiment start ="
|
|
echo "= output root: $OUT_DIR ="
|
|
echo "========================================================================"
|
|
|
|
run_stage() {
|
|
local name=$1; shift
|
|
local log="$OUT_DIR/${name}.log"
|
|
echo ""
|
|
echo ">>> $(date): [$name] START"
|
|
echo ">>> $(date): [$name] command: $*"
|
|
local t0=$(date +%s)
|
|
if ! "$@" > "$log" 2>&1; then
|
|
local t1=$(date +%s); echo "!!! $(date): [$name] FAILED after $((t1-t0))s — see $log"
|
|
tail -30 "$log"
|
|
exit 1
|
|
fi
|
|
local t1=$(date +%s)
|
|
echo "<<< $(date): [$name] OK in $((t1-t0))s (log: $log)"
|
|
# Print tail of log so orch.log shows meaningful progress.
|
|
tail -10 "$log" | sed 's/^/ | /'
|
|
}
|
|
|
|
# ====================================================================
|
|
# S1 — re-extract CICIDS2017
|
|
# ====================================================================
|
|
run_stage "s1_extract_cicids2017" \
|
|
uv run python scripts/extract_cicids2017.py --jobs 5 --time-offset 28800
|
|
|
|
# ====================================================================
|
|
# S2 — re-extract CICDDoS2019 (per-shard) + merge
|
|
# ====================================================================
|
|
run_stage "s2a_extract_cicddos2019_03-11" \
|
|
uv run python scripts/extract_cicddos2019.py \
|
|
--shards 03-11 --jobs 1 \
|
|
--out-packets datasets/cicddos2019/processed/packets.03-11.npz \
|
|
--out-flows datasets/cicddos2019/processed/flows.03-11.parquet
|
|
|
|
run_stage "s2b_extract_cicddos2019_01-12" \
|
|
uv run python scripts/extract_cicddos2019.py \
|
|
--shards 01-12 --jobs 1 \
|
|
--out-packets datasets/cicddos2019/processed/packets.01-12.npz \
|
|
--out-flows datasets/cicddos2019/processed/flows.01-12.parquet
|
|
|
|
run_stage "s2c_merge_cicddos2019" \
|
|
uv run python scripts/merge_shard_artifacts.py \
|
|
--in datasets/cicddos2019/processed/packets.03-11.npz \
|
|
--in datasets/cicddos2019/processed/packets.01-12.npz \
|
|
--out-packets datasets/cicddos2019/processed/packets.npz \
|
|
--out-flows datasets/cicddos2019/processed/flows.parquet
|
|
|
|
# ====================================================================
|
|
# S3..S5 — train E0 / E1 / E2 with the same 10k benign
|
|
# ====================================================================
|
|
train_and_eval() {
|
|
local tag=$1 cfg=$2
|
|
local run_dir="$OUT_DIR/$tag"
|
|
mkdir -p "$run_dir"
|
|
|
|
# Copy config and patch save_dir to our per-tag directory.
|
|
cp "$cfg" "$run_dir/config.yaml"
|
|
sed -i "s#^save_dir:.*#save_dir: $run_dir#" "$run_dir/config.yaml"
|
|
|
|
run_stage "${tag}_train" \
|
|
uv run python -m train --config "$run_dir/config.yaml"
|
|
|
|
run_stage "${tag}_detect_ddos" \
|
|
uv run python -m detect \
|
|
--save-dir "$run_dir" \
|
|
--packets-npz datasets/cicddos2019/processed/packets.npz \
|
|
--flows-parquet datasets/cicddos2019/processed/flows.parquet \
|
|
--n-val "$N_VAL" --n-atk "$N_ATK" --seed "$SPLIT_SEED"
|
|
|
|
run_stage "${tag}_per_class" \
|
|
uv run python -m eval.per_class --save-dir "$run_dir"
|
|
}
|
|
|
|
train_and_eval "e0_baseline" "configs/n10k_baseline.yaml"
|
|
train_and_eval "e1_relv2" "configs/n10k_relv2.yaml"
|
|
train_and_eval "e2_relv2_ctx" "configs/n10k_relv2_ctx.yaml"
|
|
|
|
# ====================================================================
|
|
# S7 — summary table
|
|
# ====================================================================
|
|
run_stage "s7_summary" \
|
|
uv run python scripts/summarize_repr_exp.py --root "$OUT_DIR"
|
|
|
|
echo ""
|
|
echo "========================================================================"
|
|
echo "= $(date): repr_experiment DONE ="
|
|
echo "= results under: $OUT_DIR ="
|
|
echo "========================================================================"
|