In [477]:
import pandas as pd
import numpy as np
import random
import sqlite3 as sql
from datetime import datetime, timedelta
import os

In [478]:
conn = sql.connect("el.db")
cur = conn.cursor()

In [479]:
db_col = ["Index","EName","KPI1","KPI2","KPI3","KPI4","TimeStamp"]
kpi = ["KPI1","KPI2","KPI3","KPI4"]
is_monday = True
d = "2024-09-15 22:00:00"
plant_data_col = ["Week","WeekStart","WeekEnd","IsUpdated",*kpi]

In [480]:
def truncate_table(cmd,name):
    try:
        q = f"DROP TABLE {name}" if cmd == 'Drop' else f"DELETE FROM {name}"
        cur.execute(q)
        conn.commit()
    except Exception as e:
        print(e)
        raise

In [498]:
# truncate_table('Drop','PlantData')

In [482]:
def get_date(date):
    currDate = datetime.strptime(date, '%Y-%m-%d %H:%M:%S')
    return currDate

In [483]:
def fetch_data_within_range(start,end):
    try:
        d = cur.execute(f"SELECT * from Transactions WHERE TimeStamp BETWEEN '{start}' AND '{end}';")
        selected_data = d.fetchall()
        df = pd.DataFrame(selected_data,columns=["Index","EName",*kpi,"TimeStamp"])
        return df
    except sql.Error as e:
        print(e)
        return []

In [484]:
def get_current_week_range(current_date):
    current_date = current_date.replace(hour=0, minute=0, second=0, microsecond=0) 
    current_week_start = current_date - timedelta(days=current_date.weekday()) if is_monday else current_date
    current_week_end = current_week_start  + timedelta(days=6, hours=23, minutes=59, seconds=59)
    return current_week_start,current_week_end

In [485]:
def get_prev_week_range(curr_week_start):
    curr_week_start = curr_week_start.replace(hour=0, minute=0, second=0, microsecond=0)
    prev_week_start = curr_week_start - timedelta(weeks=1)
    prev_week_end = curr_week_start - timedelta(seconds=1)
    return prev_week_start,prev_week_end

In [486]:
def get_current_hour_range(current_hour):
    current_hour = current_hour.replace(minute=0, second=0, microsecond=0)
    prev_hour_end = current_hour - timedelta(hours=1)
    return current_hour,prev_hour_end

In [487]:
def table_exists(table_name):
    query = "SELECT * FROM sqlite_master WHERE type='table' AND name=?;"
    cur.execute(query, (table_name,))
    result = cur.fetchone()
    return result is not None

In [488]:
def generate_plant_data(col_list):
    cols = []
    for i in col_list:
        cols.append(i + " REAL Default 0")
    query1 = f'''
        CREATE TABLE IF Not Exists PlantData
        (
            Id INTEGER PRIMARY KEY,
            Week TEXT NOT NULL,
            WeekStart DATETIME DEFAULT "1999-01-01 00:00:00",
            WeekEnd DATETIME DEFAULT "1999-01-01 00:00:00",
            IsUpdated BOOLEAN DEFAULT FALSE,
            {", ".join(cols)}
        )
    '''
    
    query2 = f'''
        INSERT INTO PlantData (Week) VALUES ("w1"),("w2"),("h")
    '''
    try:
        cur.execute(query1)
        print("Table created")
        cur.execute(query2)
        conn.commit()
        print("Data inserted")
    except Exception as e:
        print(e)
        conn.rollback()
        raise


In [489]:
def check_week_data():
    query = f'''SELECT IsUpdated, {", ".join(kpi)} FROM PlantData WHERE Week IN ('w1', 'w2')'''
    row = cur.execute(query)
    rows = row.fetchall()
    df = pd.DataFrame(rows)
    return True if df.loc[0,0] and df.loc[1,0] else False

In [490]:
def calculate_mean(data):
    kpi_avg = np.round((data[kpi].mean()).infer_objects(copy=False).fillna(0),2).tolist()
    return kpi_avg

In [491]:
def fetch_weekly_data(d):
    print("Fetching week data")
    df = pd.DataFrame(columns=plant_data_col)
    current_week_start,_= get_current_week_range(d)
    for i in ['w1','w2']:
        data_l = []
        prev_week_start,prev_week_end = get_prev_week_range(current_week_start)
        print(f"Fetching {i}",prev_week_start,prev_week_end)
        prev_week_data = fetch_data_within_range(prev_week_start,prev_week_end)
        calculated_mean = calculate_mean(prev_week_data)
        data_l.extend([i,prev_week_start,prev_week_end,True])
        data_l.extend(calculated_mean)
        current_week_start = prev_week_start
        df.loc[len(df)] = data_l
        print(f"{i} fetched")
    return df

In [492]:
def fetch_hourly_data(d):
    df = pd.DataFrame(columns=plant_data_col)
    data_l = []
    current_hour, prev_hour_start = get_current_hour_range(d)
    current_week_start,current_week_end= get_current_week_range(d)
    _,prev_week_end = get_prev_week_range(current_week_start)
    print("Fetching hourly data",current_week_start,current_week_end)
    curr_hour_data = fetch_data_within_range(prev_hour_start,current_hour)
    calculated_mean = calculate_mean(curr_hour_data)
    data_l.extend(['h',current_week_start,current_week_end,True])
    data_l.extend(calculated_mean)
    df.loc[len(df)] = data_l
    
    if current_hour >= prev_week_end + timedelta(seconds=1):
        print("Week end")
        week_df = fetch_weekly_data(d)
        df = pd.concat([week_df,df],ignore_index=True)
    return df

In [493]:
def get_data_hourly(d):
    d = get_date(d)
    
    if not table_exists("PlantData"):
        generate_plant_data(kpi)
    
    curr_df = fetch_hourly_data(d)
    
    if len(curr_df) < 2 and not check_week_data():
        week_df = fetch_weekly_data(d)
        df = pd.concat([week_df,curr_df],ignore_index=True)
        return df
    return curr_df

In [496]:
def dump_data(df):
    try:
        if len(df) > 1:
            query_1 = 'DELETE FROM PlantData'
            cur.execute(query_1)
            df.to_sql("PlantData",conn,if_exists='replace')
        else:
            query_1 = '''
                DELETE FROM PlantData
                WHERE Week = 'h';
            '''
            cur.execute(query_1)
            df.to_sql('PlantData',conn,if_exists='append')
        conn.commit()
    except sql.Error as e:
        print(e)
        conn.rollback()
        raise


In [494]:
bar_df = get_data_hourly(d)

Fetching hourly data 2024-09-09 00:00:00 2024-09-15 23:59:59
Week end
Fetching week data
Fetching w1 2024-09-02 00:00:00 2024-09-08 23:59:59


In [495]:
bar_df.head()

Unnamed: 0,Week,WeekStart,WeekEnd,IsUpdated,KPI1,KPI2,KPI3,KPI4
0,w1,2024-09-02,2024-09-08 23:59:59,True,50.5,32.5,1.5,497.5
1,w2,2024-08-26,2024-09-01 23:59:59,True,50.5,32.5,1.5,497.5
2,h,2024-09-09,2024-09-15 23:59:59,True,50.51,32.5,1.5,497.5


In [497]:
dump_data(bar_df)