## Database Metrics

### 0.0. Requirements

#### 0.1. Imports

In [30]:
import os
import re
import pandas as pd

from datetime import datetime
from concurrent.futures import ThreadPoolExecutor

from sqlalchemy import event
from sqlalchemy.engine import create_engine

from warnings import filterwarnings
filterwarnings('ignore')

In [5]:
from sys import platform
from sqlalchemy import __version__

print(platform)
print(f'SQLAlchemy Version: {__version__}')

linux
SQLAlchemy Version: 1.4.47


#### 0.2. Aux Functions

In [31]:
def get_ssms_connection(connection_string):
    engine = create_engine(connection_string)

    @event.listens_for(engine, 'before_cursor_execute')
    def receive_before_cursor_execute(conn, cursor, statement, params, context, executemany):
        if executemany:
            cursor.fast_executemany = True
            cursor.commit()

    return engine

def get_tables(engine):
    con = engine.connect()

    stgs = con.execute(f"SELECT TABLE_SCHEMA, TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE lower(TABLE_NAME) like 'stg[_]%' AND TABLE_SCHEMA IN ({SCHEMAS_STR}) ORDER BY 1")
    stgs = stgs.fetchall()

    dims = con.execute(f"SELECT TABLE_SCHEMA, TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME like 'D[_]%' AND TABLE_SCHEMA IN ({SCHEMAS_STR}) ORDER BY 1")
    dims = dims.fetchall()

    facts = con.execute(f"SELECT TABLE_SCHEMA, TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME like 'F[_]%' AND TABLE_SCHEMA IN ({SCHEMAS_STR}) ORDER BY 1")
    facts = facts.fetchall()

    stgs = [schema + "." + name for schema, name in stgs]
    dims = [schema + "." + name for schema, name in dims]
    facts = [schema + "." + name for schema, name in facts]

    con.close()
    
    return stgs, dims, facts

def get_table_infos(engine, tables):
    results = []
    con = engine.connect()

    for table in tables:
        try:
            m = con.execute(f"SELECT MAX(DATA_PROCESSAMENTO) FROM {table}").fetchone()
            if m: m = m[0]
            else: m = None
        
        except:
            m = None

        space_table = con.execute(f"sp_spaceused '{table}'").fetchone()
        space_table = {str(k).strip():str(v).strip() for k, v in space_table._mapping.items()}

        table_info = {'SCHEMA': table.split('.')[0],
            'TABLE': table.split('.')[-1], 
            'MAX_DATA_PROCESSAMENTO': m} | space_table
        
        results.append(table_info)

    con.close()

    return results

def check_control_schema(engine, check_schema):
    con = engine.connect()

    current_schemas = con.execute("SELECT name FROM sys.schemas").fetchall()

    if check_schema not in [k[0] for k in current_schemas]:

        con.execute(f'CREATE SCHEMA {check_schema}')

    con.close()

### 1.0. Database Metrics

In [15]:
ssms_db   = os.environ.get("ssms_db")
ssms_host = os.environ.get("ssms_host")
ssms_user = os.environ.get("ssms_user")
ssms_pswd = os.environ.get("ssms_pswd")

SCHEMAS = [
    'DMOls',
    'StgOls'
]

DW_SCHEMA = 'DMControl'
SCHEMAS_STR = "'" + "', '".join(SCHEMAS) + "'"

In [32]:
string_con_sql_server = f"mssql+pyodbc:///?odbc_connect=DRIVER={{ODBC Driver 17 for SQL Server}};SERVER={ssms_host};DATABASE={ssms_db};UID={ssms_user};PWD={ssms_pswd}"

engine = get_ssms_connection(string_con_sql_server)

#### 1.1. Load Data

In [17]:
# Get Stages, Dims and Facts table for all Dw
stgs, dims, facts = get_tables(engine)

In [22]:
# Parallel thread for schemas, facts and dims
with ThreadPoolExecutor(max_workers=3) as executor:
    future1 = executor.submit(get_table_infos, engine, stgs)
    future2 = executor.submit(get_table_infos, engine, dims)
    future3 = executor.submit(get_table_infos, engine, facts)

    result1 = future1.result()
    result2 = future2.result()
    result3 = future3.result()

results = result1 + result2 + result3

In [26]:
df_history_table_sizes = pd.DataFrame(results)
df_history_table_sizes.columns = [k.strip().upper() for k in df_history_table_sizes.columns]

df_history_table_sizes.drop(columns=['TABLE'], inplace=True)

df_history_table_sizes.columns = ['DM', 'ULTIMA_DATA_PROCESSAMENTO', 'NOME', 'LINHAS', 'TAMANHO_RESERVADO', 'TAMANHO_DADO', 'TAMANHO_INDEX', 'SEM_USO']

df_history_table_sizes['DATA_BASE'] = datetime.now().strftime('%Y-%m-%d')

In [44]:
# Check Existing Control Schema
check_control_schema(engine, DW_SCHEMA)

In [52]:
# Append because this is a DAILY dw table sizes checkout
df_history_table_sizes.to_sql(
    'TABLE_SIZES',
    con=engine,
    schema=DW_SCHEMA,
    if_exists='append',
    index=False,
)

-1

In [55]:
df_history_table_sizes.tail(1).T

Unnamed: 0,8
DM,DMOls
ULTIMA_DATA_PROCESSAMENTO,2023-06-22 23:02:39.917000
NOME,F_ORDEM_ITEM
LINHAS,103616
TAMANHO_RESERVADO,23440 KB
TAMANHO_DADO,17544 KB
TAMANHO_INDEX,5672 KB
SEM_USO,224 KB
DATA_BASE,2023-06-28
