machine learning model to detect anomalies in simulated sensor data

In [None]:
!pip install -q pandas numpy scikit-learn matplotlib seaborn streamlit

create synthetic dataset with 1000 normal readings and 50 anomalous readings

In [None]:
import numpy as np
import pandas as pd

# set random seed for reproducibility
np.random.seed(42)

# generate normal data (temperature between 20–30°C)
normal_data = np.random.normal(loc=25, scale=2, size=1000)

# generate anomalies (temperature between 50–60°C)
anomalies = np.random.normal(loc=55, scale=3, size=50)

# combine data
data = np.concatenate([normal_data, anomalies])
time_index = pd.date_range(start='2025-06-01', periods=len(data), freq='T')  # timestamp every minute
df = pd.DataFrame({'timestamp': time_index, 'temperature': data})

# shuffle data to mix anomalies
df = df.sample(frac=1, random_state=42).reset_index(drop=True)

# label anomalies for evaluation (for supervised testing)
df['is_anomaly'] = df['temperature'].apply(lambda x: 1 if x > 40 else 0)

# save to CSV
df.to_csv('sensor_data.csv', index=False)
print("Data generated and saved to sensor_data.csv")

preprocess the data

In [None]:
# load data
df = pd.read_csv('sensor_data.csv')

# convert timestamp to datetime
df['timestamp'] = pd.to_datetime(df['timestamp'])

# check for missing values
print(df.isnull().sum())

# normalize temperature
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
df['temperature_scaled'] = scaler.fit_transform(df[['temperature']])

In [None]:
# training data: only normal data (temperature <= 40°C)
train_data = df[df['is_anomaly'] == 0][['temperature_scaled']]
test_data = df[['temperature_scaled']]

build and train isolation forest model

In [None]:
from sklearn.ensemble import IsolationForest

# initialize model
model = IsolationForest(contamination=0.01, random_state=42)  # expect 1% anomalies

# fit model on training data
model.fit(train_data)

# predict anomalies on test data (-1 for anomalies, 1 for normal)
df['anomaly_pred'] = model.predict(test_data)
df['anomaly_pred'] = df['anomaly_pred'].map({1: 0, -1: 1})  # convert to 0 (normal), 1 (anomaly)

evaluate model

In [None]:
# calculate metrics
from sklearn.metrics import precision_score, recall_score, f1_score

precision = precision_score(df['is_anomaly'], df['anomaly_pred']) * 100
recall = recall_score(df['is_anomaly'], df['anomaly_pred']) * 100
f1 = f1_score(df['is_anomaly'], df['anomaly_pred']) * 100

print(f"Precision: {precision:.2f} %")
print(f"Recall: {recall:.2f} %")
print(f"F1-Score: {f1:.2f} %")

In [None]:
# visualize results
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(12, 6))
sns.scatterplot(x=df['timestamp'], y=df['temperature'], hue=df['anomaly_pred'],
                palette={0: 'blue', 1: 'red'}, style=df['anomaly_pred'])

plt.title('Anomaly Detection in Sensor Data')

plt.xlabel('Timestamp')
plt.ylabel('Temperature (°C)')

plt.legend(['Normal', 'Anomaly'])

plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('anomaly_plot.png')
plt.show()