In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn.cluster import KMeans
import pickle as pkl
from flask import Flask

In [2]:
df = pd.read_csv('Dataset_pale.csv')

In [None]:
df.head()

In [None]:
plt.style.use('seaborn')

def plot_sensor(title, n_sample=None, bottom_n=0):
    if n_sample is None:
        n_sample = int((df.shape[0]-1)*1)
    print(type(n_sample))

    plt.figure(figsize=(16, 5))
    plt.plot(pd.RangeIndex(len(df)), df.Suhu, color='#F7DC6F', label='Sensor Suhu')
    plt.plot(pd.RangeIndex(len(df)), df.pH, color='#8E44AD', label='Sensor pH')
    plt.plot(pd.RangeIndex(len(df)), df.Amonia, color='#16A085', label='Sensor Amonia')

    plt.tick_params(axis='y', labelsize=18)
    plt.tick_params(axis='x', labelsize=18)
    plt.ylabel('Values', fontsize=21)
    plt.xlabel('Sample Index', fontsize=21)
    plt.xlim(bottom_n, n_sample)
    plt.ylim(min(df[['Suhu', 'pH', 'Amonia']][bottom_n:n_sample].min()),
             max(df[['Suhu', 'pH', 'Amonia']][bottom_n:n_sample].max()))

    plt.suptitle(title, fontsize=20, y=0.94, fontweight='heavy')

    leg = plt.legend(loc='best', ncol=1, prop={'size': 14, 'weight': 'bold'})

    for i in range(3):
        leg.get_lines()[i].set_linewidth(12)

plot_sensor('Plot Sensors')
plot_sensor('Plot Sensors (Zoomed)', 500, 100)
plt.show()


In [None]:
df.head()

In [None]:
df.describe(percentiles=[.005,.01,.25,.5,.75,.8,.99,.995])

In [None]:
df.isna().sum()

In [None]:
df.info()

In [None]:
df.hist(bins = 100, figsize=(20, 15), color = 'brown');

In [10]:
def apply_thresholds(df):
    # Example usage:
    column_thresholds = {
        'Suhu': (25.0, 30.0),
        'pH': (6.0, 9.0),
        'Amonia': (0.0, 0.8)
    }
    for column, thresholds in column_thresholds.items():
        lower_bound, upper_bound = thresholds
        df[f'{column}_thresholded'] = df[column].apply(lambda val: 1 if val > upper_bound or val < lower_bound else 0)
    return df
def get_pattern(raw_features):
    pattern_binary = ''
    pattern_readable = ''

    for i in range(3):
        if raw_features[i+3] == 1:
            pattern_readable += raw_features.keys()[i] + ': Failure, '
        else:
            pattern_readable += raw_features.keys()[i] + ': Normal, '
        pattern_binary += str(int(raw_features[i+3]))

    return pattern_binary, pattern_readable


In [11]:
# Assuming df is your DataFrame, you can call the function like this:
df = apply_thresholds(df)

In [None]:
df

In [None]:
df.head(3)

In [None]:
df.tail(3)

In [None]:
df.nunique()

In [16]:
cls = KMeans(n_clusters=2, init= 'k-means++', n_init=1000, max_iter=60).fit(df[df.columns[3:6]])
df['Condition'] = cls.labels_

In [17]:
df['Binary Pattern'], df['Readable Pattern'] = zip(*df.apply(get_pattern, axis=1))

In [None]:
from sklearn.metrics import silhouette_score
print(silhouette_score(df[df.columns[3:6]], cls.labels_))

In [None]:
df.Amonia_thresholded.value_counts(), df.pH_thresholded.value_counts(), df.Suhu_thresholded.value_counts()

In [20]:
with open('/content/'+'cluster_model.pkl', 'wb') as f:
  pkl.dump(cls, f, pkl.HIGHEST_PROTOCOL)

In [None]:
df

In [None]:
df['Condition'].value_counts()

In [None]:
df['Binary Pattern'].value_counts()

In [None]:
df['Readable Pattern'].value_counts()

In [None]:
import pickle

rules_model = []
alert_rules = []

for pattern in df[df.Condition==1]['Binary Pattern'].value_counts().keys():
  rules_model.append(pattern)
for pattern in df[df.Condition==1]['Readable Pattern'].value_counts().keys():
  alert_rules.append(pattern+' -> Kualitas Air Tidak Baik !')

with open('/content/'+'rules_model.pkl', 'wb') as f:
  pickle.dump(rules_model, f, pickle.HIGHEST_PROTOCOL)
with open('/content/'+'alert_rules.pkl', 'wb') as f:
  pickle.dump(alert_rules, f, pickle.HIGHEST_PROTOCOL)

alert_rules


In [None]:
df[df.Condition==0]['Binary Pattern'].value_counts()

In [None]:
rules_model

In [None]:
alert_rules