In [3]:
import pandas as pd
import numpy as np
import random
from faker import Faker

fake = Faker()
np.random.seed(42)

def generate_ip():
    return ".".join(str(random.randint(1, 255)) for _ in range(4))

def generate_data(n=1000):
    data = []
    for _ in range(n):
        ip = generate_ip()
        user_agent = fake.user_agent()
        time_on_site = np.abs(np.random.normal(60, 30))  # in seconds
        num_requests = np.abs(int(np.random.normal(20, 10)))
        data.append([ip, user_agent, time_on_site, num_requests])
    return pd.DataFrame(data, columns=["ip", "user_agent", "time_on_site", "num_requests"])

# Generate and save data
df = generate_data()
df.to_csv(r"C:\Users\varad\mile-mlops-anomaly-detector\data/traffic.csv", index=False)
print(" traffic.csv created with 1000 fake records.")


 traffic.csv created with 1000 fake records.


In [6]:
import pandas as pd
from sklearn.ensemble import IsolationForest
import joblib

# Step 1: Load data
df = pd.read_csv(r"C:\Users\varad\mile-mlops-anomaly-detector\data/traffic.csv")

# Step 2: Select relevant features
X = df[["time_on_site", "num_requests"]]

# Step 3: Train Isolation Forest
model = IsolationForest(contamination=0.1, random_state=42)
model.fit(X)

# Step 4: Save the model
joblib.dump(model, r"C:\Users\varad\mile-mlops-anomaly-detector\models/isolation_forest.pkl")

print("✅ Model trained and saved as models/isolation_forest.pkl")


✅ Model trained and saved as models/isolation_forest.pkl


In [42]:
# Define the FastAPI app code
code = r"""
from fastapi import FastAPI
from pydantic import BaseModel
import joblib
import numpy as np
import csv
from datetime import datetime
import os

app = FastAPI()

model = joblib.load(r"C:\Users\varad\mile-mlops-anomaly-detector\models/isolation_forest.pkl")

# Define input schema
class InputData(BaseModel):
    feature1: float
    feature2: float

# Log file path
LOG_FILE = "predictions_log.csv"

# Create file with header if it doesn't exist
if not os.path.exists(LOG_FILE):
    with open(LOG_FILE, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["timestamp", "feature1", "feature2", "prediction"])

@app.post("/predict")
def predict(data: InputData):
    # Prepare input
    features = np.array([[data.feature1, data.feature2]])
    prediction = model.predict(features)[0]
    label = "Anomaly" if prediction == -1 else "Normal"

    # Log input and prediction
    with open(LOG_FILE, mode='a', newline='') as file:
        writer = csv.writer(file)
        writer.writerow([datetime.now(), data.feature1, data.feature2, label])

    return {"prediction": label}

"""

# Save to your specific path
save_path = r"C:\Users\varad\mile-mlops-anomaly-detector\app.py"
with open(save_path, "w") as f:
    f.write(code)

print(f"✅ app.py successfully created at:\n{save_path}")


✅ app.py successfully created at:
C:\Users\varad\mile-mlops-anomaly-detector\app.py


In [43]:
# Save to desired path
save_path = r"C:\Users\varad\mile-mlops-anomaly-detector\app.py"
with open(save_path, "w") as f:
    f.write(code)

print(f"✅ app.py successfully created at:\n{save_path}")

✅ app.py successfully created at:
C:\Users\varad\mile-mlops-anomaly-detector\app.py


In [44]:
import joblib
model = joblib.load(r"C:\Users\varad\mile-mlops-anomaly-detector\models/isolation_forest.pkl")
print("✅ model.pkl loaded")
print("Expected input shape:", model.n_features_in_)


✅ model.pkl loaded
Expected input shape: 2
