JANUS/scripts/repr_experiment.sh

#!/bin/bash
# End-to-end representation experiment: re-extract CICIDS2017 + CICDDoS2019
# with metadata columns, then train E0/E1/E2 with fixed 10k benign and
# evaluate on CICDDoS2019.
#
# Stages (each with wall-clock logging + per-stage log file):
#   S1  re-extract CICIDS2017   → datasets/cicids2017/processed/*
#   S2a re-extract CICDDoS2019 03-11 shard
#   S2b re-extract CICDDoS2019 01-12 shard
#   S2c merge CICDDoS2019 shards
#   S3  train E0  (mixed_dequant, no ctx)          [configs/n10k_baseline.yaml]
#   S4  train E1  (relative_v2,   no ctx)          [configs/n10k_relv2.yaml]
#   S5  train E2  (relative_v2,   with 8-d ctx)    [configs/n10k_relv2_ctx.yaml]
#   S6  detect+per_class for each on CICDDoS2019
#   S7  summary table
#
# Any stage's failure aborts the rest and leaves the partial log intact.
set -uo pipefail

ROOT=/home/chy/mambafortrafficmodeling
cd "$ROOT"

STAMP=$(date +%Y%m%d_%H%M%S)
OUT_DIR="runs/repr_experiment_${STAMP}"
mkdir -p "$OUT_DIR"
MAIN_LOG="$OUT_DIR/orch.log"
exec > >(tee -a "$MAIN_LOG") 2>&1

N_VAL=20000
N_ATK=100000
SPLIT_SEED=42

echo "========================================================================"
echo "= $(date): repr_experiment start                                       ="
echo "= output root: $OUT_DIR                                                 ="
echo "========================================================================"

run_stage() {
    local name=$1; shift
    local log="$OUT_DIR/${name}.log"
    echo ""
    echo ">>> $(date): [$name] START"
    echo ">>> $(date): [$name] command: $*"
    local t0=$(date +%s)
    if ! "$@" > "$log" 2>&1; then
        local t1=$(date +%s); echo "!!! $(date): [$name] FAILED after $((t1-t0))s — see $log"
        tail -30 "$log"
        exit 1
    fi
    local t1=$(date +%s)
    echo "<<< $(date): [$name] OK in $((t1-t0))s  (log: $log)"
    # Print tail of log so orch.log shows meaningful progress.
    tail -10 "$log" | sed 's/^/    | /'
}

# ====================================================================
# S1 — re-extract CICIDS2017
# ====================================================================
run_stage "s1_extract_cicids2017" \
    uv run python scripts/extract_cicids2017.py --jobs 5 --time-offset 28800

# ====================================================================
# S2 — re-extract CICDDoS2019 (per-shard) + merge
# ====================================================================
run_stage "s2a_extract_cicddos2019_03-11" \
    uv run python scripts/extract_cicddos2019.py \
        --shards 03-11 --jobs 1 \
        --out-packets datasets/cicddos2019/processed/packets.03-11.npz \
        --out-flows   datasets/cicddos2019/processed/flows.03-11.parquet

run_stage "s2b_extract_cicddos2019_01-12" \
    uv run python scripts/extract_cicddos2019.py \
        --shards 01-12 --jobs 1 \
        --out-packets datasets/cicddos2019/processed/packets.01-12.npz \
        --out-flows   datasets/cicddos2019/processed/flows.01-12.parquet

run_stage "s2c_merge_cicddos2019" \
    uv run python scripts/merge_shard_artifacts.py \
        --in datasets/cicddos2019/processed/packets.03-11.npz \
        --in datasets/cicddos2019/processed/packets.01-12.npz \
        --out-packets datasets/cicddos2019/processed/packets.npz \
        --out-flows   datasets/cicddos2019/processed/flows.parquet

# ====================================================================
# S3..S5 — train E0 / E1 / E2 with the same 10k benign
# ====================================================================
train_and_eval() {
    local tag=$1 cfg=$2
    local run_dir="$OUT_DIR/$tag"
    mkdir -p "$run_dir"

    # Copy config and patch save_dir to our per-tag directory.
    cp "$cfg" "$run_dir/config.yaml"
    sed -i "s#^save_dir:.*#save_dir: $run_dir#" "$run_dir/config.yaml"

    run_stage "${tag}_train" \
        uv run python -m train --config "$run_dir/config.yaml"

    run_stage "${tag}_detect_ddos" \
        uv run python -m detect \
            --save-dir "$run_dir" \
            --packets-npz  datasets/cicddos2019/processed/packets.npz \
            --flows-parquet datasets/cicddos2019/processed/flows.parquet \
            --n-val "$N_VAL" --n-atk "$N_ATK" --seed "$SPLIT_SEED"

    run_stage "${tag}_per_class" \
        uv run python -m eval.per_class --save-dir "$run_dir"
}

train_and_eval "e0_baseline"     "configs/n10k_baseline.yaml"
train_and_eval "e1_relv2"        "configs/n10k_relv2.yaml"
train_and_eval "e2_relv2_ctx"    "configs/n10k_relv2_ctx.yaml"

# ====================================================================
# S7 — summary table
# ====================================================================
run_stage "s7_summary" \
    uv run python scripts/summarize_repr_exp.py --root "$OUT_DIR"

echo ""
echo "========================================================================"
echo "= $(date): repr_experiment DONE                                        ="
echo "= results under: $OUT_DIR                                               ="
echo "========================================================================"