In [50]:
import pathlib
import pandas as pd
import numpy as np
from openai import file_from_path
from typing import Any
import json

from sqlalchemy import result_tuple

from load_schema import upsert_table, load_json_data
from connect_mssql import connect_mssql, create_engine


In [2]:
def load_products(path: pathlib.Path) -> list[dict[str, Any]]:
    products = []

    for file in path.iterdir():
        print(file.name)
        if file.name.startswith("_cache"):
            print(f"skipping : {file.name}")
            continue
        try:
            with open(file, "r") as f:
                items = json.load(f)

            products.extend(items['answer']["power_converters"])
        except (json.JSONDecodeError, FileNotFoundError):
            continue

    return products


In [3]:
db_data = load_json_data(company='recom')

In [4]:
mornsun_data_path = pathlib.Path.cwd().parent.parent / "simplify-recom" / "simplified-src" / "mornsun" / "data"
mornsun_data_path.exists()

True

In [5]:
mornsun_data = load_products(mornsun_data_path)

cache_191f10ab93b003be324a4d58c5268d6a124bc2ff.json
cache_3af6625c476e65c22b5461ca87854d674614720f.json
2bf8532698d215cc8bff232101e7300f2e55a75d3a933ebee6f4257f0bcd763b.json
f225113df7f82e52c9c92bb31560d9f0af700aca9f81a21bb74ae600239397de.json
cache_64fd84e62463044eda59021b2b80b19cb9bd0194.json
1c32ad41a91838acb6ed7fc48846f4badea2b7509b1332fec031b960f64eb9b6.json
cache_0fd0d299966f2cb716000e9eeb10a8039830807e.json
cache_d98098fabf8ef509b337b96d20bdefd3f8c298da.json
b1d4f516bff7f043dea79b0d868f738464ed954bc6935799b4dd1d1595017bc9.json
eca3b136a2ea7953e31796fd82eaab7229f6d8a2b28290e0a6577fdf4936bb8c.json
6ee7bfcd769702f1fe17d460929a727160079d260b90db727590b37ab0433308.json
cache_7d74785a3d0248be83777ef6a1d45900e915b64e.json
cache_d7505e9f3937020bb0cc9445e02c5345393bd7e2.json
1d5dca8f8cd85a8dceae6f2be0a475f29bf61de9cda250f951171c4aac94da10.json
d3712a16f7a0f2880bc10cbdab9290a7b29f660146b875ba871e11f5feb7a0a3.json
54a0f97e2eed8b3a46d70a34c2e3b0fd9dddbb36fd00d9abf8868a09456cbb98.json
cache_

In [6]:
df_mornsun = pd.DataFrame(mornsun_data)

In [11]:
data_path = pathlib.Path.cwd()

In [13]:
products_path = data_path / 'crosses_old' / 'recom_products.csv'

In [14]:
db_path = data_path / 'crosses_old' / 'converters.csv'
db_path.exists()

True

In [15]:
with open(products_path, mode='r', encoding='latin-1') as file:
    products = pd.read_csv(file)

In [73]:
missing_cols = []

for column_name in db_data.columns:
    print()
    print(f"column name in database: {column_name}")
    print(f"column available in csv data: {column_name in products.columns}")
    if not column_name in products.columns:
        missing_cols.append(column_name)


column name in database: product_series
column available in csv data: True

column name in database: part_number
column available in csv data: True

column name in database: converter_type
column available in csv data: True

column name in database: ac_voltage_input_min
column available in csv data: True

column name in database: ac_voltage_input_max
column available in csv data: True

column name in database: dc_voltage_input_min
column available in csv data: True

column name in database: dc_voltage_input_max
column available in csv data: True

column name in database: input_voltage_tolerance
column available in csv data: True

column name in database: power
column available in csv data: True

column name in database: is_regulated
column available in csv data: True

column name in database: regulation_voltage_range
column available in csv data: True

column name in database: efficiency
column available in csv data: True

column name in database: isolation_test_voltage
column availab

In [74]:
missing_cols

['power_derating']

In [75]:
for i in missing_cols:
    print(i)
    print(db_data[i].values[:10])

power_derating
[list([{'threshold': {'temperature': 85, 'unit': 'C'}, 'unit': '%', 'rate': 0.0}])
 list([{'threshold': {'temperature': 85, 'unit': 'C'}, 'unit': '%', 'rate': 0.0}])
 list([{'threshold': {'temperature': 85, 'unit': 'C'}, 'unit': '%', 'rate': 30.0}])
 list([{'threshold': {'temperature': 85, 'unit': 'C'}, 'unit': '%', 'rate': 0.0}])
 list([{'threshold': {'temperature': 85, 'unit': 'C'}, 'unit': '%', 'rate': 30.0}])
 list([{'threshold': {'temperature': 85, 'unit': 'C'}, 'unit': '%', 'rate': 30.0}])
 list([{'threshold': {'temperature': 85, 'unit': 'C'}, 'unit': '%', 'rate': 30.0}])
 list([{'threshold': {'temperature': 85, 'unit': 'C'}, 'unit': '%', 'rate': 100.0}])
 list([{'threshold': {'temperature': 85, 'unit': 'C'}, 'unit': '%', 'rate': 100.0}])
 list([{'threshold': {'temperature': 85, 'unit': 'C'}, 'unit': '%', 'rate': 100.0}])]


In [20]:

products['pin_count'].isna().mean()

np.float64(1.0)

# missing columns notes:

1. isolation test voltage --> isolation test voltage VDC & VAC & duration
2. pins --> drop / empty
3. package --> drop
4. packaging type --> drop
5. dimensions --> pack
6. operating temp --> pack

In [23]:
products.columns

Index(['article_number', 'id', 'company', 'product_series', 'part_number',
       'converter_type', 'ac_voltage_input_min', 'ac_voltage_input_max',
       'dc_voltage_input_min', 'dc_voltage_input_max',
       'input_voltage_tolerance', 'power', 'is_regulated',
       'regulation_voltage_range', 'efficiency', 'isolation_test_duration',
       'isolation_test_voltage_VDC', 'isolation_test_voltage_VAC',
       'isolation_test_duration_in_seconds', 'Isolation Grade',
       'voltage_output_1', 'voltage_output_2', 'voltage_output_3', 'i_out1',
       'i_out2', 'i_out3', 'output_type', 'Load_regulation',
       'Load_regulation_typ_max', 'Line_regulation', 'Accuracy',
       'Package_Style', 'pin_count', 'connection_type', 'dimensions_unit',
       'dimensions_length', 'dimensions_width', 'dimensions_height',
       'certifications', 'protections', 'OVC_category', 'operating_temp_min',
       'operating_temp_max', 'Protection SCP', 'Protection OLP',
       'Protection OVP', 'Protection OCP'

In [24]:
with open(db_path, mode='r') as file:
    df = pd.read_csv(file)

# step 1 - create certifications field from columns

## test row from dataframe

In [42]:

products['isolation_test_voltage_VDC'].isna().mean()


np.float64(1.0)

In [48]:
test_row = products.iloc[0].to_dict()

In [46]:
db_data["isolation_test_voltage"].values[0]

[{'duration_sec': 60, 'unit': 'VDC', 'voltage': 5200},
 {'duration_sec': 60, 'unit': 'VAC', 'voltage': 4000}]

### create isolation test information

In [54]:
def create_isolation_test_dict(row: dict) -> list[dict]:

    isolation_test_column_names = ["isolation_test_voltage_VDC", "isolation_test_voltage_VAC"]
# "isolation_test_duration_in_seconds"
    result = []

    for test_name in isolation_test_column_names:
        test_result = {}
        if not np.isnan(row[test_name]):
            test_result["unit"] = test_name.split("_")[-1]
            test_result["voltage"] = int(row[test_name])
            test_result["duration_sec"] = int(row["isolation_test_duration_in_seconds"])

        result.append(test_result)

    return result

In [58]:
products["isolation_test_voltage"] = products.apply(lambda x: create_isolation_test_dict(x), axis=1)

In [105]:
products['pins'] = [[]] * products.shape[0]
products['package'] = None
products['packaging_type'] = None
products['power_derating'] = None



### create dimensions

In [61]:
def create_dimensions(row: dict) -> dict:

    # {'unit': 'mm', 'length': 19.6, 'width': 6.0, 'height': 10.2}

    result = {}

    result["unit"] = row["dimensions_unit"]
    result["length"] = row["dimensions_length"]
    result["width"] = row["dimensions_width"]
    result["height"] = row["dimensions_height"]

    return result



In [64]:
products['dimensions'] = products.apply(lambda x: create_dimensions(x), axis=1)

### create operating temperature

In [69]:
def create_operating_temperature(row: dict) -> dict:
    #{'min': -40.0, 'max': 85.0}

    result = {}

    result["min"] = row["operating_temp_min"]
    result["max"] = row["operating_temp_max"]

    return result

In [70]:
create_operating_temperature(test_row)

{'min': -40.0, 'max': 95.0}

In [72]:
products["operating_temperature"] = products.apply(lambda x: create_operating_temperature(x), axis=1)

In [96]:
recom_certificates = list(filter(lambda x: x.startswith("Certification"), products.columns))
recom_protections = list(filter(lambda x: x.startswith("Protection"), products.columns))


In [59]:
db_data['pins'].values[0]

[{'pin_id': 1, 'type': '+VDC in'},
 {'pin_id': 2, 'type': '-VDC in'},
 {'pin_id': 6, 'type': '-V out'},
 {'pin_id': 7, 'type': '+V out'}]

In [30]:
recom_certificates

['Certification EN 60950-1',
 'Certification UL 60950-1',
 'Certification EN 50155',
 'Certification EN 60601-1',
 'Certification UL 60601-1',
 'Certification EN 61010-1',
 'Certification EN 61347-1',
 'Certification EN 60601-1-2',
 'Certification EN 60335-1',
 'Certification EN 62368-1',
 'Certification UL 62368-1',
 'Certification IEC 60950-1']

In [76]:
products

Unnamed: 0,article_number,id,company,product_series,part_number,converter_type,ac_voltage_input_min,ac_voltage_input_max,dc_voltage_input_min,dc_voltage_input_max,...,Certification EN 60335-1,Certification EN 62368-1,Certification UL 62368-1,Certification IEC 60950-1,isolation_test_voltage,pins,package,packaging_type,dimensions,operating_temperature
0,10003989,1,recom,RxxP2xx,R12P212D,DC/DC-Converters,,,10.8,13.2,...,0,1,1,1,"[{}, {}]",,,,"{'unit': 'mm', 'length': 19.5, 'width': 9.8, '...","{'min': -40.0, 'max': 95.0}"
1,10020435,2,recom,REC20K-Z,REC20K-483737DZ,DC/DC-Converters,,,18.0,75.0,...,0,0,0,0,"[{}, {}]",,,,"{'unit': 'mm', 'length': 25.4, 'width': 25.4, ...","{'min': -40.0, 'max': 85.0}"
2,10019127,3,recom,RS3E,RS3E-2405S/H3,DC/DC-Converters,,,18.0,36.0,...,0,1,1,0,"[{}, {}]",,,,"{'unit': 'mm', 'length': 21.8, 'width': 9.2, '...","{'min': -40.0, 'max': 70.0}"
3,21000705,4,recom,RPK50-W,RPK50-4824SW/R3/X8A,DC/DC-Converters,,,19.0,72.0,...,0,0,0,0,"[{}, {}]",,,,"{'unit': 'mm', 'length': 0.0, 'width': 0.0, 'h...","{'min': nan, 'max': nan}"
4,23000089,5,recom,RACPRO1-T240,ET-RACPRO1-T240/24,AC/DC-Converters,320.0,575.0,,,...,0,0,0,0,"[{}, {}]",,,,"{'unit': 'mm', 'length': 135.0, 'width': 43.0,...","{'min': -40.0, 'max': 60.0}"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34444,90000031,34445,recom,,Miete Überl.BGA IC Solutions GmbH,,,,,,...,0,0,0,0,"[{}, {}]",,,,"{'unit': 'mm', 'length': 0.0, 'width': 0.0, 'h...","{'min': nan, 'max': nan}"
34445,90000008,34446,recom,,Teilnahmegebühr EMV Seminar/Webinar,,,,,,...,0,0,0,0,"[{}, {}]",,,,"{'unit': 'mm', 'length': 0.0, 'width': 0.0, 'h...","{'min': nan, 'max': nan}"
34446,90000014,34447,recom,,Commission Charges,,,,,,...,0,0,0,0,"[{}, {}]",,,,"{'unit': 'mm', 'length': 0.0, 'width': 0.0, 'h...","{'min': nan, 'max': nan}"
34447,90000047,34448,recom,,Weiterverr.IC über Bilanzkonto,,,,,,...,0,0,0,0,"[{}, {}]",,,,"{'unit': 'mm', 'length': 0.0, 'width': 0.0, 'h...","{'min': nan, 'max': nan}"


In [78]:
db_data['certifications'].values[0]

['IEC/EN60601-1', 'ANSI/AAMI ES60601-1', 'EN62368-1', 'IEC60601-1-2']

In [97]:
def extract_certificates(row: dict) -> list[str]:

    result = []
    for name in recom_certificates:
       if row[name] == 1:
           result.append(name)

    return result


def extract_protections(row: dict) -> list[str]:

    result = []
    for name in recom_protections:
       if row[name] == 1:
           result.append(name)

    return result


In [98]:
products['certifications'] = products.apply(lambda x: extract_certificates(x), axis=1)
products['protections'] = products.apply(lambda x: extract_protections(x), axis=1)


In [107]:
df['pins'].values[:10]

array([list([]), list([]), list([]), list([]), list([]), list([]),
       list([]), list([]), list([]), list([])], dtype=object)

In [124]:
df = products[db_data.columns].copy()

In [125]:
df['isolation_test_voltage'] = None

In [87]:
from load_schema import upsert_table, create_product_series_data, create_certifications_data, create_protections_data, create_converters_data, create_isolation_tests_data, create_pins_data, create_derating_data, create_converter_certifications_mapping_table, create_converter_protections_mapping_table
from connect_mssql import get_mssql_engine
import os

In [112]:
engine = get_mssql_engine(
        server=os.environ["MSSQL_HOST_RECOM"],
        username=os.environ["MSSQL_USERNAME_RECOM"],
        password=os.environ["MSSQL_PASSWORD_RECOM"],
        database="Time2Act",
    )
schema_name = "crosslist"
company = "recom"


In [126]:
df['isolation_test_voltage'].values[:10]

array([None, None, None, None, None, None, None, None, None, None],
      dtype=object)

In [128]:
db_data['isolation_test_voltage'].values[0]

[{'duration_sec': 60, 'unit': 'VDC', 'voltage': 5200},
 {'duration_sec': 60, 'unit': 'VAC', 'voltage': 4000}]

In [131]:
upsert_table(
    data=create_product_series_data(df),
    table_name="product_series",
    column_identifier="name",
            schema=schema_name,
            db_engine=engine,
        )
upsert_table(
            data=create_certifications_data(df),
            table_name="certifications",
            column_identifier="name",
            schema=schema_name,
            db_engine=engine,
        )

upsert_table(
            data=create_protections_data(df),
            table_name="protections",
            column_identifier="name",
            schema=schema_name,
            db_engine=engine,
        )

upsert_table(
            data=create_converters_data(
                input_data=df, company=company, schema=schema_name, db_engine=engine
            ),
            table_name="converters",
            column_identifier="part_number",
            schema=schema_name,
            db_engine=engine,
        )

# upsert_table(
#             data=create_isolation_tests_data(
#                 input_data=df, schema=schema_name, db_engine=engine
#             ),
#             table_name="isolation_tests",
#             schema=schema_name,
#             db_engine=engine,
#         )
#
# upsert_table(
#             data=create_pins_data(
#                 input_data=df, schema=schema_name, db_engine=engine
#             ),
#             table_name="pins",
#             schema=schema_name,
#             db_engine=engine,
#         )

# upsert_table(
#             data=create_derating_data(
#                 input_data=df, schema=schema_name, db_engine=engine
#             ),
#             table_name="power_derating",
#             schema=schema_name,
#             db_engine=engine,
#         )

upsert_table(
            data=create_converter_certifications_mapping_table(
                input_data=df, schema=schema_name, db_engine=engine
            ),
            table_name="converter_certifications",
            schema=schema_name,
            db_engine=engine,
        )
upsert_table(
            data=create_converter_protections_mapping_table(
                input_data=df, schema=schema_name, db_engine=engine
            ),
            table_name="converter_protections",
            schema=schema_name,
            db_engine=engine,
        )

(36348, 2)