In [None]:
# Apprentissage supervis√© classique (algorithmes d‚Äôensemble, non profond).


import os
import numpy as np
from scapy.all import rdpcap, IP, TCP, UDP, ICMP
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, f1_score
from collections import defaultdict
import joblib   # pour sauvegarder le mod√®le
import psycopg

mapping = {
    # üîä Assistants vocaux / enceintes intelligentes
    "40:f6:bc:bc:89:7b": "Enceinte connect√©e (Echo Dot 4th Gen)",
    "18:48:be:31:4b:49": "Enceinte connect√©e avec √©cran (Echo Show 8)",
    "74:d4:23:32:a2:d7": "Enceinte connect√©e avec √©cran (Echo Show 8)",

    # üì∑ Cam√©ras de s√©curit√© / surveillance
    "70:ee:50:57:95:29": "Cam√©ra connect√©e (Netatmo Indoor Security Camera)",
    "54:af:97:bb:8d:8f": "Cam√©ra connect√©e (TP-Link Tapo Wi-Fi Camera)",
    "00:16:6c:d7:d5:f9": "Cam√©ra connect√©e (Samsung Pan/Tilt Wi-Fi Camera)",
    "40:ac:bf:29:04:d4": "Cam√©ra connect√©e (EZVIZ Security Camera)",
    "90:48:6c:08:da:8a": "Cam√©ra sonnette (Ring Video Doorbell)",

    # üì∫ Affichage multim√©dia
    "70:09:71:9d:ad:10": "√âcran/Moniteur connect√© (Samsung Smart Monitor M80B)",
    "b0:02:47:6f:63:37": "Cadre photo connect√© (Pix-Star Digital Frame)",

    # üîå Domotique (prises, hubs, capteurs)
    "68:3a:48:0d:d4:1c": "Hub domotique (Aeotec Smart Hub)",
    "10:5a:17:b8:a2:0b": "Prise connect√©e (TOPERSUN Smart Plug)",
    "10:5a:17:b8:9f:70": "Prise connect√©e (TOPERSUN Smart Plug)",
    "fc:67:1f:53:fa:6e": "Capteur de mouvement (Perfk Motion Sensor)",
    "1c:90:ff:bf:89:46": "Capteur de mouvement (Perfk Motion Sensor)",
    "cc:a7:c1:6a:b5:78": "D√©tecteur de fum√©e (Nest Protect Smoke Alarm)",
    "70:ee:50:96:bb:dc": "Station m√©t√©o connect√©e (Netatmo Weather Station)",
    "70:3a:2d:4a:48:e2": "Sonnette connect√©e (TUYA Smartdoor Bell)",

    # üè• Sant√© connect√©e (Withings)
    "00:24:e4:e3:15:6e": "Balance connect√©e (Withings Body+)",
    "00:24:e4:e4:55:26": "Balance connect√©e (Withings Body+)",
    "00:24:e4:f6:91:38": "Tensiom√®tre connect√© (Withings Blood Pressure)",
    "00:24:e4:f7:ee:ac": "Tensiom√®tre connect√© (Withings Blood Pressure)",

    # üíª Ordinateurs / p√©riph√©riques classiques
    "84:69:93:27:ad:35": "Ordinateur portable (HP Envy)",

    # ‚åö Objets connect√©s personnels
    "6e:fe:2f:5a:d7:7e": "Montre connect√©e (Samsung Galaxy Watch5 Pro)",
}

In [73]:
# Connexion √† la base
conn = psycopg.connect(
    dbname="sniff",
    user="cialson",
    password="3913",
    host="localhost",
    port="5432"
)
cur = conn.cursor()

# R√©cup√©ration des donn√©es
cur.execute("SELECT time, mac, size, prot_transp, prot_appl FROM network_trame")
rows = cur.fetchall()
cur.close()
conn.close()

mac_packets = defaultdict(list)
for row in rows:
    pkt_time, pkt_mac, pkt_size, pkt_prot, pkt_sprot = row
    mac_norm = pkt_mac.lower()
    if mac_norm in mapping:
        mac_packets[mac_norm].append({
            "time": pkt_time,
            "size": pkt_size,
            "prot_transp": pkt_prot,
            "prot_appl": pkt_sprot
        })

print(len(mac_packets))

12


In [74]:
app_protocols = ["HTTP", "DNS", "DHCP", "MDNS", "SSDP", "MQTT", "None"]

def extract_rf_features_from_packets(packets):
    """
    Lit un fichier PCAP et calcule des statistiques simples
    qui serviront de features pour le Random Forest.
    Retourne un vecteur numpy de dimension fixe.
    """
    if len(packets) == 0:
        return None

    sizes, deltas = [], []
    last_time = None
    n_tcp = n_udp = n_icmp = n_other = 0
    app_counts = {proto: 0 for proto in app_protocols}

    for pkt in packets:
        # Taille du paquet
        sizes.append(pkt["size"])

        # Temps depuis le paquet pr√©c√©dent
        if last_time is None:
            delta = 0.0
        else:
            delta = pkt["time"] - last_time
        deltas.append(delta)
        last_time = pkt["time"]

        # Compte des protocoles
        if pkt["prot_transp"] == "TCP":
            n_tcp += 1
        elif pkt["prot_transp"] == "UDP":
            n_udp += 1
        elif pkt["prot_transp"] == "ICMP":
            n_icmp += 1
        else:
            n_other += 1

        # Compte des protocoles application
        proto_appl = str(pkt["prot_appl"]).upper()
        if proto_appl in app_counts:
            app_counts[proto_appl] += 1


    sizes = np.array(sizes)
    deltas = np.array(deltas)

    # Dur√©e totale de la capture
    duration = packets[-1]["time"] - packets[0]["time"] if len(packets) > 1 else 0.0
    # Fr√©quence d'√©mission (paquets par seconde)
    freq = len(packets) / duration if duration > 0 else 0.0

    # Ajoute les ratios des protocoles application
    app_ratios = [app_counts[proto] / len(packets) for proto in app_protocols]

    # Vecteur de features (11 dimensions ici)
    feats = [
        sizes.mean(), sizes.std(), sizes.min(), sizes.max(), # stats tailles
        deltas.mean(), deltas.std(), # stats inter-arriv√©es
        n_tcp/len(packets), n_udp/len(packets), 
        n_icmp/len(packets), n_other/len(packets), # ratios de protocole
        freq # fr√©quence d'√©mission
    ] + app_ratios
    
    return np.array(feats, dtype=np.float32)


X_ext, y_ext = [], []
class_names = []
mac_to_label = {}
window_size = 10
for i, (mac, pkts) in enumerate(mac_packets.items()):
    if len(pkts) < window_size:
        print("oui")
        continue
    mac_to_label[mac] = i
    class_names.append(mapping[mac])
    for start in range(0, len(pkts) - window_size + 1, window_size):
        window = pkts[start:start+window_size]
        feats = extract_rf_features_from_packets(window)
        if feats is not None:
            X_ext.append(feats)
            y_ext.append(i)
X = np.vstack(X_ext)
y = np.array(y_ext)

In [75]:
"""
Entra√Æne un mod√®le Random Forest et √©value ses performances
sur un jeu de test.
"""
from collections import Counter
print("Distribution des classes :", Counter(y))
print("Nombre total d'√©chantillons :", len(y))
print("Nombre de classes :", len(set(y)))

# S√©paration train/test (80% / 20%)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# Cr√©ation du mod√®le
rf = RandomForestClassifier(
    n_estimators=1000,   # nombre d'arbres (plus √©lev√© = plus robuste mais plus lent)
    max_depth=None,     # profondeur illimit√©e (on peut limiter pour √©viter l'overfitting)
    n_jobs=-1,          # utilise tous les c≈ìurs CPU disponibles
    random_state=42
)
print("entrainement")
# Entra√Ænement
rf.fit(X_train, y_train)

# Pr√©diction sur le test
y_pred = rf.predict(X_test)

# √âvaluation
acc = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average="macro")
print("=== R√©sultats Random Forest ===")
print(f"Accuracy : {acc:.3f}")
print(f"F1-score : {f1:.3f}")

# Importance des features
print("\nImportance des features :")
feature_names = [
    "size_mean","size_std","size_min","size_max",
    "delta_mean","delta_std",
    "tcp_ratio","udp_ratio","icmp_ratio","other_ratio",
    "freq"
] + [f"app_{proto.lower()}_ratio" for proto in app_protocols]
for name, imp in sorted(zip(feature_names, rf.feature_importances_),
                        key=lambda x: x[1], reverse=True):
    print(f"{name:12s}: {imp:.3f}")

# Validation crois√©e rapide (optionnel)
cv_scores = cross_val_score(rf, X, y, cv=5, n_jobs=-1)
print(f"\nCross-validation moyenne (5 folds) : {cv_scores.mean():.3f}")

# Sauvegarde du mod√®le pour une r√©utilisation ult√©rieure
joblib.dump((rf, class_names), "rf_model_short.joblib")
print("\n‚úÖ Mod√®le sauvegard√© dans 'rf_model.joblib'.")

Distribution des classes : Counter({np.int64(9): 1225, np.int64(2): 740, np.int64(7): 731, np.int64(5): 575, np.int64(3): 556, np.int64(6): 375, np.int64(1): 186, np.int64(0): 162, np.int64(4): 71, np.int64(8): 59, np.int64(10): 6, np.int64(11): 6})
Nombre total d'√©chantillons : 4692
Nombre de classes : 12
entrainement
=== R√©sultats Random Forest ===
Accuracy : 0.904
F1-score : 0.748

Importance des features :
size_mean   : 0.137
size_max    : 0.136
delta_std   : 0.103
delta_mean  : 0.099
freq        : 0.099
app_ssdp_ratio: 0.090
udp_ratio   : 0.080
other_ratio : 0.077
size_min    : 0.053
tcp_ratio   : 0.052
size_std    : 0.042
app_dns_ratio: 0.027
icmp_ratio  : 0.003
app_dhcp_ratio: 0.000
app_http_ratio: 0.000
app_mdns_ratio: 0.000
app_mqtt_ratio: 0.000
app_none_ratio: 0.000

Cross-validation moyenne (5 folds) : 0.905

‚úÖ Mod√®le sauvegard√© dans 'rf_model.joblib'.
