In [1]:
# Устанавливает библиотеку для работы с БД
pip install pyodbc

Note: you may need to restart the kernel to use updated packages.


You should consider upgrading via the 'D:\ProgramFiles\Anaconda3\python.exe -m pip install --upgrade pip' command.


In [1]:
# Загрузка библиотек
import pandas as pd
import numpy as np
import pyodbc
import re
from scipy.stats import mode

# Функции

In [83]:
# Удаление лишних символов в колонке с номиналами
def clean_nom(x): 
    if x == None:
        return x
    x = re.sub('(\\r\\n)*(\/\*)+[\w\d\s=-]*(\*\/)+(\\r\\n)*', '\n', x)
    x = re.sub('\r', '', x)
    x = re.sub('\t', ' ', x)
    x = re.sub('  ', ' ', x)
    x = x.split('\n')
    x = [i for i in x if i.strip() != ""]
    x = [i.rstrip() for i in x]
    x = [i.lstrip() for i in x]
    x = [i.strip() for i in x]
    return x

# Записывает номиналы в словарь
def get_nom_df(x):
    if x !=None:
        for row in x:
            try:
                name, val = row.split(' ')
                if nom_dict.get(name, 0)==0:
                    nom_dict[name] = [float(val)]
                else:
                    nom_dict[name].append(float(val))
            except ValueError:
                print(row)
    return 

# Запрос из БД и сохранение данных в таблице

In [84]:
# Параметры для входа в БД
conn_str = (
    r'DRIVER={SQL Server};'
    r'SERVER=192.168.0.250\SQLEXPRESS;'
    r'DATABASE=db1;'
    r'UID=NordLab;'
    r'PWD=nordnord;')

In [85]:
# Текст запроса
query = '''
        Select    *  from Channel_States
        Full outer join Devices on Devices.Device_ID=Channel_States.Device_ID
        Full outer join Orders on Orders.Order_ID=Devices.Order_Number
        Full outer join Device_Types on Device_Types.Device_Type_ID=Devices.Device_Type
        Full outer join Sensors on Sensors.Sensor_ID =Channel_States.Sensor_ID 
        Full outer join Sensor_Type on Sensor_Type.Sensor_Type_ID = Sensors.Sensor_Type
        '''

In [230]:
cnxn = pyodbc.connect(conn_str) # Подключение к БД
DF = pd.read_sql_query(query, cnxn) # Запрос к БД и сохранение данных в формате pd.DataFrame
DF =DF.loc[:,~DF.columns.duplicated()]
DF = DF[(DF.Sensor_Tr_func.isna()==0) & (DF.Sensor_ID.isna()==0)]

In [231]:
DF.columns

Index(['Channel_State_ID', 'Device_ID', 'Sensor_ID', 'SPL_ID', 'Calibr_SPL_ID',
       'Calibr_File', 'Calibr_Data', 'Sensor_Tr_func', 'NOM', 'Date',
       'Iteration_Number', 'Channel_Type', 'Comment', 'Self_Calib_ID',
       'Device_Type', 'Device_SN', 'In_Work', 'Z_Locked', 'X_locked',
       'Y_Locked', 'Order_Number', 'Sensitivity', 'Order_ID', 'Number',
       'Customer', 'Term', 'Amount', 'Responsible', 'Device_Type_ID',
       'Device_Type_Name', 'Type', 'Default_Calib_File', 'Default_Calib_SPL',
       'Default_Z_Sensor_Type', 'Default_X_Sensor_Type',
       'Default_Y_Sensor_Type', 'Sensor_Type', 'Sensor_Number', 'FilledBy',
       'Sensor_Type_ID', 'Sensor_Type_Name'],
      dtype='object')

# Статистика номиналов

In [232]:
# Список всех типов приборов
DF.Device_Type_Name.unique()

array(['MTSS-1043A', 'MTSS-1003', 'MTSS-1031А', 'CME-4211 BH',
       'MTSS-2003', 'MTSS-1001', 'MTSS-1021', 'MTSS-1033A', 'MTSS-1011',
       'CME-4311LT', 'CME-4211', 'CME-3211', 'MTSS-1041A', 'CME-4311',
       'CME-3011h', 'METR-01', 'CME-6011', 'CME-6111', 'CME-6011A',
       'METR-03', 'MTSS-1031H', 'MTSS-1031G', 'MTSS-1043A DIG',
       'UMG_hydro', 'Gen_I', 'UMG_Geo', 'NS_hydro', 'METR-01 MHD'],
      dtype=object)

In [233]:
# Список каналов
DF.Channel_Type.unique()

array(['Z', 'X', 'Y'], dtype=object)

In [234]:
device_type = 'MTSS-1031А' # тип прибора
channel = 'Z' # номер канала
min_iter = 0 # минимальное число итераций
year = 1000 # минимальный год для среза

In [235]:
col_list = ['Device_Type_Name', 'Device_SN', 'NOM', 'Iteration_Number', 'Channel_Type', 'Date'] # необходимые колонки для статистики
new_df = DF[col_list] # срез таблицы
new_df= new_df.loc[:,~new_df.columns.duplicated()] # удаление повторов
new_df.NOM = new_df.NOM.apply(lambda x: clean_nom(x)) # преобразование колонки с номиналами
new_df.Date = pd.DatetimeIndex(new_df.Date) # преобразование даты в нужный формат
new_df['Year'] = new_df.Date.dt.year #  добавление колонки год

In [236]:
# Сохраняет последние итерации каждого прибора
sel_df = new_df[(new_df.Device_Type_Name == device_type) &
                (new_df.Channel_Type == channel) &
                (new_df.Iteration_Number >= min_iter) &
                (new_df.Year >= year)]\
        .sort_values('Iteration_Number')\
        .groupby("Device_SN").tail(1)
sel_df

Unnamed: 0,Device_Type_Name,Device_SN,NOM,Iteration_Number,Channel_Type,Date,Year
24289,MTSS-1031А,86303,"[R1 0.000075, Rt1 0.001, C1 0.0056, R3 0.033, ...",0.0,Z,2019-06-17 19:50:01.600,2019
24292,MTSS-1031А,86203,"[R1 0.000150, Rt1 0.0022, C1 0.0056, R3 0.033,...",0.0,Z,2019-06-17 19:49:43.527,2019
8775,MTSS-1031А,358,"[R1 0.000150, Rt1 0.0022, C1 0.0068, R3 0.020,...",0.0,Z,2016-06-09 17:20:55.110,2016
8281,MTSS-1031А,339,"[R1 0.000150, Rt1 0.0022, C1 0.0047, R3 0.033,...",0.0,Z,2016-04-06 16:06:13.653,2016
6537,MTSS-1031А,272,"[R1 0.000075, Rt1 0.001, C1 0.015, R3 0.033, R...",0.0,Z,2015-09-12 18:24:53.847,2015
...,...,...,...,...,...,...,...
17359,MTSS-1031А,749,"[R1 0.000075, Rt1 0.001, C1 0.0056, R3 0.018, ...",22.0,Z,2019-02-14 21:41:30.613,2019
17397,MTSS-1031А,76501,"[R1 0.000150, Rt1 0.0022, C1 0.0056, R3 0.011,...",23.0,Z,2019-05-21 10:17:06.250,2019
30656,MTSS-1031А,1011582,"[R1 0.000160, Rt1 0.105, C1 0.001, R3 0.0033, ...",26.0,Z,2021-07-06 11:58:28.187,2021
30718,MTSS-1031А,1011722,"[R1 0.0001, Rt1 0.0261, C1 0.001, R3 0.0033, R...",32.0,Z,2021-07-06 11:55:49.777,2021


In [237]:
# Записывает список номиналов в словарь, печатает строки, которые не были добавлены
nom_dict = {}
sel_df.NOM.apply(lambda x: get_nom_df(x))
print()

newz
/
* 1-st Stage*/
* 1-st Stage*/
* 1-st Stage*/



In [238]:
# Подсчет статистики по каждому номиналу
col = ['Median', 'Mode', 'Mode_count', 'Mean', 'Max', 'Min',  'Count']
idx = nom_dict.keys()
stat_table = pd.DataFrame(columns=col, index = idx)
for key in nom_dict.keys():
    stat_table['Mean'][key] = np.round(np.asarray(nom_dict[key]).mean(),4)
    stat_table['Max'][key] = np.asarray(nom_dict[key]).max()
    stat_table['Min'][key] = np.asarray(nom_dict[key]).min()
    stat_table['Median'][key] = np.median(np.asarray(nom_dict[key]))
    stat_table['Mode'][key] = mode(np.asarray(nom_dict[key]))[0][0]
    stat_table['Mode_count'][key] = mode(np.asarray(nom_dict[key]))[1][0]
    stat_table['Count'][key] = len(nom_dict[key])

In [239]:
stat_table

Unnamed: 0,Median,Mode,Mode_count,Mean,Max,Min,Count
R1,7.5e-05,7.5e-05,592,0.0001,0.0033,1.5e-05,918
Rt1,0.001,0.001,588,0.0055,1.748,0.001,918
C1,0.0056,0.0056,546,0.0079,0.033,0.001,918
R3,0.015,0.033,155,0.0187,0.082,0.00024,918
R4,0.068,0.13,156,0.0768,0.33,0.0013,918
R5,0.158,0.2,387,0.2422,8.794,0.13,918
R7,0.82,0.51,191,1.0872,20.0,0.15,918
C3,2.2,2.2,887,124185.2745,47000000.2,1.0,918
C4,0.022,0.022,119,0.0496,18.0,0.0,918
C5,0.1,0.1,465,3380.7071,2000000.0,0.01,917


In [17]:
stat_table.to_csv('stat_MTSS-1031А.csv')