In [1]:
# Устанавливает библиотеку для работы с БД
pip install pyodbc

Note: you may need to restart the kernel to use updated packages.


You should consider upgrading via the 'D:\ProgramFiles\Anaconda3\python.exe -m pip install --upgrade pip' command.


In [1]:
# Загрузка библиотек
import pandas as pd
import numpy as np
import pyodbc
import re
from scipy.stats import mode

# Функции

In [2]:
# Удаление лишних символов в колонке с номиналами
def clean_nom(x): 
    if x == None:
        return x
    x = re.sub('(\\r\\n)*(\/\*)+[\w\d\s=-]*(\*\/)+(\\r\\n)*', '\n', x)
    x = re.sub('\r', '', x)
    x = re.sub('\t', ' ', x)
    x = re.sub('  ', ' ', x)
    x = x.split('\n')
    x = [i for i in x if i.strip() != ""]
    x = [i.rstrip() for i in x]
    x = [i.lstrip() for i in x]
    x = [i.strip() for i in x]
    return x

# Записывает номиналы в словарь
def get_nom_df(x):
    if x !=None:
        for row in x:
            try:
                name, val = row.split(' ')
                if nom_dict.get(name, 0)==0:
                    nom_dict[name] = [float(val)]
                else:
                    nom_dict[name].append(float(val))
            except ValueError:
                print(row)
    return 

# Запрос из БД и сохранение данных в таблице

In [3]:
# Параметры для входа в БД
conn_str = (
    r'DRIVER={SQL Server};'
    r'SERVER=192.168.0.250\SQLEXPRESS;'
    r'DATABASE=db1;'
    r'UID=NordLab;'
    r'PWD=nordnord;')

In [4]:
# Текст запроса
query = '''
        Select    *  from Channel_States
        Full outer join Devices on Devices.Device_ID=Channel_States.Device_ID
        Full outer join Orders on Orders.Order_ID=Devices.Order_Number
        Full outer join Device_Types on Device_Types.Device_Type_ID=Devices.Device_Type
        Full outer join Sensors on Sensors.Sensor_ID =Channel_States.Sensor_ID 
        Full outer join Sensor_Type on Sensor_Type.Sensor_Type_ID = Sensors.Sensor_Type
        '''

In [5]:
cnxn = pyodbc.connect(conn_str) # Подключение к БД
DF = pd.read_sql_query(query, cnxn) # Запрос к БД и сохранение данных в формате pd.DataFrame

# Статистика номиналов

In [6]:
device_type = 'MTSS-1031А' # тип прибора

In [7]:
col_list = ['Device_Type_Name', 'Device_ID', 'NOM', 'Iteration_Number'] # необходимые колонки для статистики
new_df = DF[col_list] # срез таблицы
new_df= new_df.loc[:,~new_df.columns.duplicated()] # удаление повторов
new_df.NOM = new_df.NOM.apply(lambda x: clean_nom(x)) # преобразование колонки с номиналами

In [8]:
# Сохраняет последние итерации каждого прибора
sel_df = new_df[new_df.Device_Type_Name == device_type]\
        .sort_values('Iteration_Number')\
        .groupby("Device_ID").tail(1)
sel_df

Unnamed: 0,Device_Type_Name,Device_ID,NOM,Iteration_Number
15015,MTSS-1031А,2184.0,"[R1 0.000075, Rt1 0.001, C1 0.0056, R3 0.033, ...",0.0
8138,MTSS-1031А,1464.0,"[R1 0.000075, Rt1 0.001, C1 0.01, R3 0.033, R4...",0.0
8160,MTSS-1031А,1471.0,"[R1 0.000150, Rt1 0.0022, C1 0.01, R3 0.033, R...",0.0
28617,MTSS-1031А,3910.0,"[R1 0.000150, Rt1 0.0022, C1 0.0056, R3 0.033,...",0.0
28618,MTSS-1031А,3911.0,"[R1 0.000150, Rt1 0.0022, C1 0.0056, R3 0.033,...",0.0
...,...,...,...,...
17397,MTSS-1031А,2460.0,"[R1 0.000150, Rt1 0.0022, C1 0.0056, R3 0.011,...",23.0
17360,MTSS-1031А,2454.0,"[R1 0.000075, Rt1 0.001, C1 0.0056, R3 0.018, ...",23.0
30652,MTSS-1031А,4311.0,"[R1 0.000160, Rt1 0.105, C1 0.001, R3 0.0033, ...",27.0
30721,MTSS-1031А,4313.0,"[R1 0.0001, Rt1 0.0261, C1 0.001, R3 0.0033, R...",33.0


In [10]:
# Записывает список номиналов в словарь, печатает строки, которые не были добавлены
nom_dict = {}
sel_df.NOM.apply(lambda x: get_nom_df(x))
print()

newz
newz
newz
newz
newz
/
* 1-st Stage*/
* 1-st Stage*/
* 1-st Stage*/



In [11]:
# Подсчет статистики по каждому номиналу
col = ['Mean', 'Max', 'Min', 'Median', 'Mode']
idx = nom_dict.keys()
stat_table = pd.DataFrame(columns=col, index = idx)
for key in nom_dict.keys():
    stat_table['Mean'][key] = np.asarray(nom_dict[key]).mean()
    stat_table['Max'][key] = np.asarray(nom_dict[key]).max()
    stat_table['Min'][key] = np.asarray(nom_dict[key]).min()
    stat_table['Median'][key] = np.median(np.asarray(nom_dict[key]))
    stat_table['Mode'][key] = mode(np.asarray(nom_dict[key]))[0][0]

In [12]:
stat_table

Unnamed: 0,Mean,Max,Min,Median,Mode
R1,0.000116,0.0033,1.5e-05,7.5e-05,7.5e-05
Rt1,0.005273,1.748,0.00043,0.001,0.001
C1,0.007781,0.033,0.001,0.0056,0.0056
R3,0.018118,0.068,0.00033,0.015,0.033
R4,0.074712,0.27,0.0013,0.062,0.13
R5,0.238817,8.794,0.13,0.158,0.2
R7,1.12649,20.0,0.15,0.91,0.51
C3,118014.693385,47000000.2,1.0,2.2,2.2
C4,0.048386,18.0,0.0,0.022,0.022
C5,3212.553337,2000000.0,0.01,0.1,0.1
