In [3]:
import pandas as pd
import numpy as np
import random
import sqlite3 as sql
from datetime import datetime, timedelta
import os

In [120]:
conn = sql.connect("el.db")
cur = conn.cursor()

In [101]:
db_col = ["Index","EName","KPI1","KPI2","KPI3","KPI4","TimeStamp"]
kpi = ["KPI1","KPI2","KPI3","KPI4"]
is_monday = False

In [166]:
def truncate_table(cmd,name):
    try:
        q = f"DROP TABLE {name}" if cmd == 'Drop' else f"DELETE FROM {name}"
        cur.execute(q)
        conn.commit()
    except Exception as e:
        print(e)
        raise

def create_table():
    cur.execute('''
        CREATE TABLE IF NOT EXISTS Transactions(
            Id INTEGER PRIMARY KEY AUTOINCREMENT,
            EName TEXT NOT NULL,    
            KPI1 REAL DEFAULT 0,
            KPI2 REAL DEFAULT 0,
            KPI3 REAL DEFAULT 0,
            KPI4 REAL DEFAULT 0,
            TimeStamp TEXT NOT NULL
        );
    ''')
    conn.commit()

In [7]:
def generate_daily_data(days):
    e_list = []
    k1 = []
    k2 = []
    k3 = []
    k4 = []
    t = []
    daily_seconds = 24*60*60
    timestamps = pd.date_range(start='2024-09-01 00:00:00', periods=days *daily_seconds, freq='s')
    for j in range(0,daily_seconds * days):
        for i in range(1,11):
            e = 'EL0' if i < 10 else 'EL' + str(i)
            k1Val = random.uniform(48,53)
            k2Val = random.uniform(30,35)
            k3Val = random.uniform(1,2)
            k4Val = random.uniform(490,505)
            tVal = timestamps[j]
            e_list.append(e)  
            t.append(tVal)
            k1.append(k1Val)
            k2.append(k2Val)
            k3.append(k3Val)
            k4.append(k4Val)
    data = {
        "EName":e_list,
        "KPI1":k1,
        "KPI2":k2,
        "KPI3":k3,
        "KPI4":k4,
        "TimeStamp":t
    }
    df = pd.DataFrame(data)
    return df

In [8]:
if os.path.exists("./data.csv"):
    print("file exists")
    data = pd.read_csv("./data.csv")
else:
    print("generating data...")
    data = generate_daily_data(15)
    data.to_csv("./data.csv")
    data.to_sql('Transactions',conn)

print(len(data))
data.head()

file exists
12960000


Unnamed: 0.1,Unnamed: 0,EName,KPI1,KPI2,KPI3,KPI4,TimeStamp
0,0,EL1,50.472902,33.787028,1.698933,502.73144,2024-09-01 00:00:00
1,1,EL2,48.582651,32.049887,1.953103,490.827532,2024-09-01 00:00:00
2,2,EL3,48.305235,32.75545,1.177142,496.012453,2024-09-01 00:00:00
3,3,EL4,52.543139,32.256765,1.467176,496.114154,2024-09-01 00:00:00
4,4,EL5,51.064158,31.798667,1.519589,497.176418,2024-09-01 00:00:00


In [9]:
def get_date(date):
    currDate = datetime.strptime(date, '%Y-%m-%d %H:%M:%S')
    return currDate

In [10]:
def get_KPI_avg(data):
    res = []
    for i in range(1,11):
        # el = 'EL0' if i < 10 else 'EL'
        el = "EL" + str(i)
        el_data = data[data["EName"] == el][kpi]
        kpi_means = {j: np.round(el_data[j].mean(), 4).item() if not np.isnan(el_data[j].mean()) else 0.00 for j in kpi}
        res.append({el: kpi_means})
    kpi_avg = np.round((data[kpi].mean()).infer_objects(copy=False).fillna(0),4).to_dict()
    res.append({"total": kpi_avg})
    return res

In [11]:
def fetch_data_within_range(start,end):
    try:
        d = cur.execute(f"SELECT * from Transactions WHERE TimeStamp BETWEEN '{start}' AND '{end}';")
        selected_data = d.fetchall()
        return selected_data
    except sql.Error as e:
        print(e)
        return []

In [12]:
def create_df(data,col):
    data_df = pd.DataFrame(data,columns=col)
    return data_df

In [105]:
def get_prev_week_range(curr_week_start):
    curr_week_start = curr_week_start.replace(hour=0, minute=0, second=0, microsecond=0)
    prev_week_start = curr_week_start - timedelta(weeks=1)
    prev_week_end = curr_week_start - timedelta(seconds=1)
    return prev_week_start,prev_week_end

In [106]:
def get_current_hour_range(current_hour):
    current_hour = current_hour.replace(minute=0, second=0, microsecond=0)
    prev_hour_end = current_hour - timedelta(hours=1)
    return current_hour,prev_hour_end

In [109]:
d = get_date('2024-09-12 00:10:00')

current_hour, prev_hour_start = get_current_hour_range(d)
current_week_start= d - timedelta(days=d.weekday()) if is_monday else d

prev_week_start,prev_week_end = get_prev_week_range(current_week_start)
prev_2_week_start,prev_2_week_end = get_prev_week_range(prev_week_start)

print("current: ",current_hour,prev_hour_start)
print("prev: ",prev_week_start,prev_week_end)
print("prev 2: ",prev_2_week_start,prev_2_week_end)

current:  2024-09-12 00:00:00 2024-09-11 23:00:00
prev:  2024-09-05 00:00:00 2024-09-11 23:59:59
prev 2:  2024-08-29 00:00:00 2024-09-04 23:59:59


In [16]:
curr_hour_data = fetch_data_within_range(prev_hour_start,current_hour)
prev_week_data = fetch_data_within_range(prev_week_start,prev_week_end)
prev_2_week_data = fetch_data_within_range(prev_2_week_start,prev_2_week_end)

In [17]:
print(len(curr_hour_data),len(prev_week_data),len(prev_2_week_data))

36010 864010 0


In [18]:
curr_hour_df = pd.DataFrame(curr_hour_data,columns=db_col)
prev_week_df = pd.DataFrame(prev_week_data,columns=db_col)
prev_2_week_df = pd.DataFrame(prev_2_week_data,columns=db_col)

In [91]:
def get_week_number(curr_hour_df,prev_week_df,prev_2_week_df):
    curr_hour_week_num = get_date(curr_hour_df['TimeStamp'][0]).isocalendar()[1] if len(curr_hour_df) > 0 else -1
    prev_week_num = get_date(prev_week_df['TimeStamp'][0]).isocalendar()[1] if len(prev_week_df) > 0 else -1
    prev_2_week_num = get_date(prev_2_week_df['TimeStamp'][0]).isocalendar()[1] if len(prev_2_week_df) > 0 else -1
    print(curr_hour_week_num,prev_week_num,prev_2_week_num)
    return curr_hour_week_num,prev_week_num,prev_2_week_num

In [96]:
def get_bar_data(curr_hour_df,prev_week_df,prev_2_week_df):
    res = {"x":"Week1,Week2,CurrentHour"}
    print(get_week_number(curr_hour_df,prev_week_df,prev_2_week_df))
    curr_hour_week_num,prev_week_num,prev_2_week_num = get_week_number(curr_hour_df,prev_week_df,prev_2_week_df)
    db_res = {
        "x": [curr_hour_week_num,prev_week_num,prev_2_week_num],
    }
    for i in kpi:
        l = []
        prev_week_avg = np.round(prev_week_df[i].mean(),4) if not np.isnan(prev_week_df[i].mean()) else 0.00
        prev_2_week_avg = np.round(prev_2_week_df[i].mean(),4) if not np.isnan(prev_2_week_df[i].mean()) else 0.00
        curr_hour_avg = np.round(curr_hour_df[i].mean(),4) if not np.isnan(curr_hour_df[i].mean()) else 0.00
        l.append(str(prev_week_avg))
        l.append(str(prev_2_week_avg))
        l.append(str(curr_hour_avg))
        db_res[i] = [curr_hour_avg,prev_week_avg,prev_2_week_avg,]
        s = ",".join(l)
        res[i]= s
    return res,db_res

In [121]:
def table_exists(table_name):
    query = "SELECT * FROM sqlite_master WHERE type='table' AND name=?;"
    cur.execute(query, (table_name,))
    result = cur.fetchone()
    return result is not None

In [164]:
def generate_plant_data(col_list):
    cols = []
    for i in col_list:
        cols.append(i + " REAL Default 0")
    query1 = f'''
        CREATE TABLE IF Not Exists PlantData
        (
            Id INTEGER PRIMARY KEY,
            Week Text Not Null,
            WeekStart DateTime Default "1999-01-01 00:00:00",
            WeekEnd DateTime Default "1999-01-01 00:00:00",
            {", ".join(cols)}
        )
    '''
    
    query2 = f'''
        INSERT INTO PlantData (Week) VALUES ("w1"),("w2"),("h")
    '''
    try:
        cur.execute(query1)
        print("Table created")
        cur.execute(query2)
        conn.commit()
        print("Data inserted")
    except Exception as e:
        print(e)
        raise


In [167]:
truncate_table("Drop","PlantData")

no such table: PlantData


OperationalError: no such table: PlantData

In [195]:
def check_week_data(cols):
    query = f'''SELECT {", ".join(cols)} FROM PlantData WHERE Week IN ('w1', 'w2')'''
    row = cur.execute(query)
    rows = row.fetchall()
    df = pd.DataFrame(rows)
    df_sum_1 = df.iloc[0].sum()
    df_sum_2 = df.iloc[1].sum()
    return True if df_sum_1 > 0 and df_sum_2 > 0 else False

In [None]:
def get_current_week_range(current_date):
    current_week_start = current_date - timedelta(days=current_date.weekday())
    current_week_end = current_date + timedelta(days=6-current_date.weekday()) + timedelta(seconds=86399)
    return current_week_start,current_week_end

In [None]:
def calculate_data(d,is_week_data):
    d = get_date('2024-09-12 00:10:00')
    current_hour, prev_hour_start = get_current_hour_range(d)
    current_week_start,current_week_end= get_current_week_range(d)
    curr_hour_data = fetch_data_within_range(prev_hour_start,current_hour)
    if not is_week_data:
        prev_week_start,prev_week_end = get_prev_week_range(current_week_start)
        prev_2_week_start,prev_2_week_end = get_prev_week_range(prev_week_start)
        prev_week_data = fetch_data_within_range(prev_week_start,prev_week_end)
        prev_2_week_data = fetch_data_within_range(prev_2_week_start,prev_2_week_end)
        

In [196]:
def dump_hourly_data(d):
    if not table_exists("PlantData"):
        generate_plant_data(kpi)
    is_week_data = check_week_data(kpi)
    calculate_data(d,is_week_data)

    
        

    

    
dump_hourly_data(curr_hour_df)
        

0    0.0
1    0.0
2    0.0
3    0.0
Name: 0, dtype: float64
0    0.0
1    0.0
2    0.0
3    0.0
Name: 1, dtype: float64
False


     0    1    2    3
0  0.0  0.0  0.0  0.0
1  0.0  0.0  0.0  0.0
0    0.0
1    0.0
2    0.0
3    0.0
dtype: float64 0    0.0
1    0.0
2    0.0
3    0.0
dtype: float64


In [97]:
bar_data,bar_data_dict = get_bar_data(curr_hour_df,prev_week_df,prev_2_week_df)

36 35 -1
(36, 35, -1)
36 35 -1


In [48]:
bar_data

{'x': 'Week1,Week2,CurrentHour',
 'KPI1': '50.4997,0.0,50.495',
 'KPI2': '32.4991,0.0,32.5056',
 'KPI3': '1.5003,0.0,1.502',
 'KPI4': '497.4962,0.0,497.4623'}

In [49]:
bar_data_dict

{'x': ['Week1', 'Week2', 'CurrentHour'],
 'KPI1': [np.float64(50.4997), 0.0, np.float64(50.495)],
 'KPI2': [np.float64(32.4991), 0.0, np.float64(32.5056)],
 'KPI3': [np.float64(1.5003), 0.0, np.float64(1.502)],
 'KPI4': [np.float64(497.4962), 0.0, np.float64(497.4623)]}

In [50]:
def dump_data_db(df,table):
    try:
        df.to_sql(table,conn)

    except sql.Error as e:
        print(e)
        raise

In [98]:
def dump_bar(d):
    print(d)
    print(pd.DataFrame(d,columns=d["x"]))
    return pd.DataFrame(d)

In [99]:
dump_bar(bar_data_dict)

{'x': [36, 35, -1], 'KPI1': [np.float64(50.495), np.float64(50.4997), 0.0], 'KPI2': [np.float64(32.5056), np.float64(32.4991), 0.0], 'KPI3': [np.float64(1.502), np.float64(1.5003), 0.0], 'KPI4': [np.float64(497.4623), np.float64(497.4962), 0.0]}
Empty DataFrame
Columns: [36, 35, -1]
Index: []


Unnamed: 0,x,KPI1,KPI2,KPI3,KPI4
0,36,50.495,32.5056,1.502,497.4623
1,35,50.4997,32.4991,1.5003,497.4962
2,-1,0.0,0.0,0.0,0.0
