In [1]:
from datetime import datetime
from init_azure_db import create_tables
from sqlalchemy.engine.base import Engine
import pandas as pd
from connect_mssql import connect_mssql, get_mssql_engine
from load_mssql import empty_table, load_table, migrate_table, load_json_data

In [2]:
pd.set_option("display.max_columns", 150, "display.width", 2000)

In [3]:
engine = get_mssql_engine()

In [4]:
df = load_json_data(company="recom")

# create tables

In [5]:
create_tables(schema="recom")

# create product series

In [6]:
def create_product_series_data(input_data: pd.DataFrame):
    result_data = pd.DataFrame(data={"name": input_data["product_series"].unique()})

    return result_data


In [7]:
def upsert_table(
    data: pd.DataFrame, 
    table_name: str,
    column_identifier: str,
    schema: str = "recom", 
    db_engine: Engine = engine
):

    # get existing products series
    df_series = pd.read_sql_table(table_name=table_name, schema=schema, con=db_engine)

    data_to_insert = data.loc[~data[column_identifier].isin(df_series[column_identifier])].copy()

    load_table(data=data_to_insert, table_name=table_name, db_engine=db_engine, schema_name=schema)

    return data_to_insert.shape

In [8]:
upsert_table(data=create_product_series_data(df), table_name="product_series", column_identifier="name")

(81, 1)

# create certifications

In [9]:
def create_certifications_data(input_data: pd.DataFrame) -> pd.DataFrame:

    result = pd.DataFrame(data={"name": input_data["certifications"].explode().unique()})
    result["name"] = result["name"].str.strip()
    result["name"] = result["name"].str.lower()
    
    return result.drop_duplicates(subset="name")

In [10]:
t = create_certifications_data(df)

In [11]:
upsert_table(data=create_certifications_data(df), table_name="certifications", column_identifier="name")

(138, 1)

In [12]:
df['protections'].values[2]

['reinforced isolation', '2MOPP']

# create protections

In [13]:
def create_protections_data(input_data: pd.DataFrame) -> pd.DataFrame:
    result_data = pd.DataFrame(data={"name": input_data["protections"].explode().unique()})
    result_data["name"] = result_data["name"].str.strip()
    result_data["name"] = result_data["name"].str.lower()
    result_data = result_data.drop_duplicates(subset="name")

    return result_data.loc[~result_data["name"].isna()]

In [14]:
upsert_table(data=create_protections_data(df), table_name="protections", column_identifier="name")

(28, 1)

In [41]:
df.columns

Index(['product_series', 'part_number', 'converter_type',
       'ac_voltage_input_min', 'ac_voltage_input_max', 'dc_voltage_input_min',
       'dc_voltage_input_max', 'input_voltage_tolerance', 'power',
       'is_regulated', 'regulation_voltage_range', 'efficiency',
       'isolation_test_voltage', 'voltage_output_1', 'voltage_output_2',
       'voltage_output_3', 'i_out1', 'i_out2', 'i_out3', 'output_type', 'pins',
       'package', 'packaging_type', 'dimensions', 'certifications',
       'protections', 'operating_temperature', 'power_derating', 'company'],
      dtype='object')

## return - depends on converters

In [46]:
df["isolation_test_voltage"].values[0]

[{'duration_sec': 60, 'unit': 'VDC', 'voltage': 5200},
 {'duration_sec': 60, 'unit': 'VAC', 'voltage': 4000}]

In [56]:
def create_isolation_tests_data(input_data: pd.DataFrame) -> pd.DataFrame:
    result = pd.DataFrame(input_data["isolation_test_voltage"].explode())
    #result = pd.json_normalize(result)
    
    return result

In [58]:
t = create_isolation_tests_data(df)

In [60]:
t["isolation_test_voltage"].apply(pd.Series)

Unnamed: 0,duration_sec,unit,voltage
0,60.0,VDC,5200
0,60.0,VAC,4000
1,60.0,VDC,5200
2,60.0,VDC,5200
2,60.0,VAC,4000
...,...,...,...
477,60.0,VAC,4200
478,60.0,VAC,4200
479,60.0,VAC,4200
480,60.0,VAC,4200


# create converters

In [15]:
def create_converters_data(input_data: pd.DataFrame, company: str, schema: str="recom", db_engine: Engine=engine) -> pd.DataFrame:
    result = df.copy()

    product_series_df = pd.read_sql_table(table_name="product_series", schema=schema, con=db_engine)
    product_series_df = product_series_df.rename(columns={"id": "product_series_id"})

    # join the product series table to get DB id from schema
    result = result.merge(product_series_df, left_on="product_series", right_on="name", how="left")
    # drop redundant name col
    result = result.drop(columns=["product_series", "name"])
    
    result["pin_count"] = result["pins"].map(len)

    for k in ["mounting_type", "connection_type"]:
        result[k] = result["package"].map(lambda x: x.get(k) if x is not None else None)
    
    for k in ["unit", "length", "width", "height"]:
        result[f"dimensions_{k}"] = result["dimensions"].map(lambda x: x.get(k) if x is not None else None)    
    
    for k in ["min", "max"]:
        result[f"operating_temp_{k}"] = result["operating_temperature"].map(lambda x: x.get(k) if x is not None else None)
    
    result['company'] = company

    result["created_at"] = datetime.now()
    result["updated_at"] = datetime.now()

    res_columns = [
        "company", 
        "product_series_id",
        "part_number",
        "converter_type",
        "ac_voltage_input_min",
        "ac_voltage_input_max",
        "dc_voltage_input_min",
        "dc_voltage_input_max",
        "input_voltage_tolerance",
        "power",
        "is_regulated",
        "regulation_voltage_range",
        "efficiency",
        "voltage_output_1",
        "voltage_output_2",
        "voltage_output_3",
        "i_out1",
        "i_out2",
        "i_out3",
        "output_type",
        "pin_count",
        "mounting_type",
        "connection_type",
        "dimensions_unit",
        "dimensions_length",
        "dimensions_width",
        "dimensions_height",
        "operating_temp_min",
        "operating_temp_max",
        "created_at",
        "updated_at"
    ]
        

    return result[res_columns].copy().drop_duplicates(subset="part_number")

    

In [16]:
t = create_converters_data(df, company='recom')

In [21]:
upsert_table(data=t, table_name="converters", column_identifier="part_number", schema="recom")

(412, 31)