In [1]:
# Устанавливает библиотеку для работы с БД
pip install pyodbc

Note: you may need to restart the kernel to use updated packages.


You should consider upgrading via the 'D:\ProgramFiles\Anaconda3\python.exe -m pip install --upgrade pip' command.


In [1]:
# Загрузка библиотек
import pandas as pd
import numpy as np
import pyodbc
import re
from scipy.stats import mode

# Функции

In [2]:
# Удаление лишних символов в колонке с номиналами
def clean_nom(x): 
    if x == None:
        return x
    x = re.sub('(\\r\\n)*(\/\*)+[\w\d\s=-]*(\*\/)+(\\r\\n)*', '\n', x)
    x = re.sub('\r', '', x)
    x = re.sub('\t', ' ', x)
    x = re.sub('  ', ' ', x)
    x = x.split('\n')
    x = [i for i in x if i.strip() != ""]
    x = [i.rstrip() for i in x]
    x = [i.lstrip() for i in x]
    x = [i.strip() for i in x]
    return x

# Записывает номиналы в словарь
def get_nom_df(x):
    if x !=None:
        for row in x:
            try:
                name, val = row.split(' ')
                if nom_dict.get(name, 0)==0:
                    nom_dict[name] = [float(val)]
                else:
                    nom_dict[name].append(float(val))
            except ValueError:
                print(row)
    return 

# Запрос из БД и сохранение данных в таблице

In [3]:
# Параметры для входа в БД
conn_str = (
    r'DRIVER={SQL Server};'
    r'SERVER=192.168.0.250\SQLEXPRESS;'
    r'DATABASE=db1;'
    r'UID=NordLab;'
    r'PWD=nordnord;')

In [4]:
# Текст запроса
query = '''
        Select    *  from Channel_States
        Full outer join Devices on Devices.Device_ID=Channel_States.Device_ID
        Full outer join Orders on Orders.Order_ID=Devices.Order_Number
        Full outer join Device_Types on Device_Types.Device_Type_ID=Devices.Device_Type
        Full outer join Sensors on Sensors.Sensor_ID =Channel_States.Sensor_ID 
        Full outer join Sensor_Type on Sensor_Type.Sensor_Type_ID = Sensors.Sensor_Type
        '''

In [5]:
cnxn = pyodbc.connect(conn_str) # Подключение к БД
DF = pd.read_sql_query(query, cnxn) # Запрос к БД и сохранение данных в формате pd.DataFrame

In [35]:
DF.columns

Index(['Channel_State_ID', 'Device_ID', 'Sensor_ID', 'SPL_ID', 'Calibr_SPL_ID',
       'Calibr_File', 'Calibr_Data', 'Sensor_Tr_func', 'NOM', 'Date',
       'Iteration_Number', 'Channel_Type', 'Comment', 'Self_Calib_ID',
       'Device_ID', 'Device_Type', 'Device_SN', 'Comment', 'Date', 'In_Work',
       'Z_Locked', 'X_locked', 'Y_Locked', 'Order_Number', 'Sensitivity',
       'Order_ID', 'Number', 'Comment', 'Customer', 'Date', 'In_Work', 'Term',
       'Amount', 'Responsible', 'Device_Type_ID', 'Device_Type_Name',
       'Comment', 'Date', 'Sensitivity', 'Type', 'Default_Calib_File',
       'Default_Calib_SPL', 'Default_Z_Sensor_Type', 'Default_X_Sensor_Type',
       'Default_Y_Sensor_Type', 'Sensor_ID', 'Sensor_Type', 'Sensor_Number',
       'Date', 'Comment', 'FilledBy', 'Sensor_Type_ID', 'Sensor_Type_Name',
       'Date', 'Comment'],
      dtype='object')

# Статистика номиналов

In [20]:
# Список всех типов приборов
DF.Device_Type_Name.unique()

array(['CME-4211 BH', 'MTSS-2003', 'MTSS-1001', 'MTSS-1043A', 'MTSS-1011',
       'CME-6011', 'CME-4211', 'MTSS-1003', None, 'MTSS-1031А',
       'MTSS-1021', 'MTSS-1033A', 'MTSS-1043A DIG', 'CME-4311LT',
       'CME-3211', 'MTSS-1041A', 'CME-4311', 'CME-3011h', 'METR-01',
       'CME-6111', 'CME-6011A', 'METR-03', 'MTSS-1031H', 'MTSS-1031G',
       'Imp', 'UMG_hydro', 'Gen_I', 'UMG_Geo', 'NS_hydro', 'METR-01 MHD'],
      dtype=object)

In [21]:
# Список каналов
DF.Channel_Type.unique()

array(['V', None, 'S', 'Z', 'X', 'Y'], dtype=object)

In [49]:
device_type = 'MTSS-1031А' # тип прибора
channel = 'Z' #номер канала
min_iter = 1 #минимальное число итераций

In [50]:
col_list = ['Device_Type_Name', 'Device_SN', 'NOM', 'Iteration_Number', 'Channel_Type'] # необходимые колонки для статистики
new_df = DF[col_list] # срез таблицы
new_df= new_df.loc[:,~new_df.columns.duplicated()] # удаление повторов
new_df.NOM = new_df.NOM.apply(lambda x: clean_nom(x)) # преобразование колонки с номиналами

In [51]:
# Сохраняет последние итерации каждого прибора
sel_df = new_df[(new_df.Device_Type_Name == device_type) &
                (new_df.Channel_Type == channel) &
                (new_df.Iteration_Number >= min_iter)]\
        .sort_values('Iteration_Number')\
        .groupby("Device_SN").tail(1)
sel_df

Unnamed: 0,Device_Type_Name,Device_SN,NOM,Iteration_Number,Channel_Type
10400,MTSS-1031А,1!,"[R1 0.000075, Rt1 0.001, C1 0.01, R3 0.020, R4...",1.0,Z
15891,MTSS-1031А,171202,"[R1 0.000075, Rt1 0.001, C1 0.0056, R3 0.033, ...",1.0,Z
6538,MTSS-1031А,272,"[R1 0.000075, Rt1 0.001, C1 0.015, R3 0.033, R...",1.0,Z
24291,MTSS-1031А,86203,"[R1 0.000150, Rt1 0.0022, C1 0.0056, R3 0.033,...",1.0,Z
24290,MTSS-1031А,86303,"[R1 0.000075, Rt1 0.001, C1 0.0056, R3 0.033, ...",1.0,Z
...,...,...,...,...,...
17360,MTSS-1031А,749,"[R1 0.000075, Rt1 0.001, C1 0.0056, R3 0.018, ...",23.0,Z
17397,MTSS-1031А,76501,"[R1 0.000150, Rt1 0.0022, C1 0.0056, R3 0.011,...",23.0,Z
30652,MTSS-1031А,1011582,"[R1 0.000160, Rt1 0.105, C1 0.001, R3 0.0033, ...",27.0,Z
30721,MTSS-1031А,1011722,"[R1 0.0001, Rt1 0.0261, C1 0.001, R3 0.0033, R...",33.0,Z


In [52]:
# Записывает список номиналов в словарь, печатает строки, которые не были добавлены
nom_dict = {}
sel_df.NOM.apply(lambda x: get_nom_df(x))
print()

/
* 1-st Stage*/
* 1-st Stage*/
* 1-st Stage*/



In [53]:
# Подсчет статистики по каждому номиналу
col = ['Median', 'Mode', 'Mode_count', 'Mean', 'Max', 'Min',  'Count']
idx = nom_dict.keys()
stat_table = pd.DataFrame(columns=col, index = idx)
for key in nom_dict.keys():
    stat_table['Mean'][key] = np.round(np.asarray(nom_dict[key]).mean(),4)
    stat_table['Max'][key] = np.asarray(nom_dict[key]).max()
    stat_table['Min'][key] = np.asarray(nom_dict[key]).min()
    stat_table['Median'][key] = np.median(np.asarray(nom_dict[key]))
    stat_table['Mode'][key] = mode(np.asarray(nom_dict[key]))[0][0]
    stat_table['Mode_count'][key] = mode(np.asarray(nom_dict[key]))[1][0]
    stat_table['Count'][key] = len(nom_dict[key])

In [54]:
stat_table

Unnamed: 0,Median,Mode,Mode_count,Mean,Max,Min,Count
R1,7.5e-05,7.5e-05,590,0.0001,0.0033,1.5e-05,915
Rt1,0.001,0.001,586,0.0055,1.748,0.001,915
C1,0.0056,0.0056,546,0.0078,0.033,0.001,915
R3,0.015,0.033,110,0.0174,0.068,0.00033,915
R4,0.062,0.13,106,0.0721,0.27,0.0013,915
R5,0.158,0.2,381,0.242,8.794,0.13,915
R7,0.91,0.51,139,1.145,20.0,0.15,915
C3,2.2,2.2,880,73226.3211,47000000.2,1.0,915
C4,0.022,0.022,124,0.0509,18.0,0.0,915
C5,0.1,0.1,437,3282.3949,2000000.0,0.01,914


In [17]:
stat_table.to_csv('stat_MTSS-1031А.csv')