baselines: update Kitsune Path A to JANUS route_comparison checkpoints

Replaces stale phase25_* checkpoint paths with the current janus_<ds>_seed<S>
layout under route_comparison/, adds CICIoT2023 to PCAP_GLOBS / WITHIN_DIRS,
and removes the per-dataset n_atk caps so within-dataset eval uses the same
sample budget as JANUS phase1.

Adds cython (3.2.4) — required by Kitsune's KitNET cluster compile path.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-08 23:59:40 +08:00
parent a6bcbbd299
commit 0ccd758600
3 changed files with 83 additions and 4 deletions

View File

@@ -17,8 +17,18 @@ sys.path.insert(0, str(REPO / 'Unified_CFM'))
from FeatureExtractor import FE
from KitNET.KitNET import KitNET
from data import load_unified_data
PCAP_GLOBS = {'iscxtor': str(REPO / 'datasets/iscxtor2016/raw/pcap_extracted/**/*.pcap'), 'cicids2017': str(REPO / 'datasets/cicids2017/raw/pcap/*.pcap'), 'cicddos2019': str(REPO / 'datasets/cicddos2019/raw/pcap/*')}
WITHIN_DIRS = {'iscxtor_within': ('phase25_multiseed_2026_04_25/iscxtor2016_lambda0p3_seed{seed}', 'iscxtor', {'n_val': 10000, 'n_atk': None}), 'cicids_within': ('phase25_sigma06_multiseed_2026_04_25/cicids2017_lambda0p3_sigma0p6_seed{seed}', 'cicids2017', {'n_val': 10000, 'n_atk': 30000}), 'cicddos_within': ('phase25_multiseed_2026_04_25/cicddos2019_lambda0p3_seed{seed}', 'cicddos2019', {'n_val': 10000, 'n_atk': 20000})}
PCAP_GLOBS = {
'iscxtor2016': str(REPO / 'datasets/iscxtor2016/raw/pcap_extracted/**/*.pcap'),
'cicids2017': str(REPO / 'datasets/cicids2017/raw/pcap/*.pcap'),
'cicddos2019': str(REPO / 'datasets/cicddos2019/raw/pcap/*'),
'ciciot2023': str(REPO / 'datasets/ciciot2023/raw/pcap/**/*.pcap'),
}
WITHIN_DIRS = {
'iscxtor_within': ('route_comparison/janus_iscxtor2016_seed{seed}', 'iscxtor2016', {'n_val': 10000, 'n_atk': None}),
'cicids_within': ('route_comparison/janus_cicids2017_seed{seed}', 'cicids2017', {'n_val': 10000, 'n_atk': None}),
'cicddos_within': ('route_comparison/janus_cicddos2019_seed{seed}', 'cicddos2019', {'n_val': 10000, 'n_atk': None}),
'ciciot_within': ('route_comparison/janus_ciciot2023_seed{seed}', 'ciciot2023', {'n_val': 10000, 'n_atk': None}),
}
def _canonical_key(src_ip, dst_ip, src_port, dst_port, protocol) -> tuple:
a = (src_ip, src_port)
@@ -69,11 +79,47 @@ class FEWithMeta(FE):
(srcproto, dstproto, IPtype) = ('icmp', 'icmp', 0)
elif srcIP + srcproto + dstIP + dstproto == '':
(srcIP, dstIP) = (row[2], row[3])
elif self.parse_type == 'scapy':
from scapy.all import IP, IPv6, TCP, UDP, ARP, ICMP
packet = self.scapyin[self.curPacketIndx]
IPtype = np.nan
timestamp = packet.time
framelen = len(packet)
if packet.haslayer(IP):
srcIP = packet[IP].src
dstIP = packet[IP].dst
IPtype = 0
elif packet.haslayer(IPv6):
srcIP = packet[IPv6].src
dstIP = packet[IPv6].dst
IPtype = 1
else:
srcIP = ''
dstIP = ''
if packet.haslayer(TCP):
srcproto = str(packet[TCP].sport)
dstproto = str(packet[TCP].dport)
elif packet.haslayer(UDP):
srcproto = str(packet[UDP].sport)
dstproto = str(packet[UDP].dport)
else:
srcproto = ''
dstproto = ''
srcMAC = packet.src
dstMAC = packet.dst
if srcproto == '':
if packet.haslayer(ARP):
(srcproto, dstproto) = ('arp', 'arp')
(srcIP, dstIP, IPtype) = (packet[ARP].psrc, packet[ARP].pdst, 0)
elif packet.haslayer(ICMP):
(srcproto, dstproto, IPtype) = ('icmp', 'icmp', 0)
elif srcIP + srcproto + dstIP + dstproto == '':
(srcIP, dstIP) = (packet.src, packet.dst)
else:
return []
try:
sp = int(srcproto) if srcproto.isdigit() else 0
dp = int(dstproto) if dstproto.isdigit() else 0
sp = int(srcproto) if str(srcproto).isdigit() else 0
dp = int(dstproto) if str(dstproto).isdigit() else 0
except Exception:
(sp, dp) = (0, 0)
try: