# 1. Эксперимент #

Цель работы - освоение алгоритмов классификации на наборе данных, полученных методом Монте Карло. Набор данных представляет собой 3 класса. Каждый класс имеет два признака - время замедления нейтрона и число столкновений.

## Блок нейтронной физики ##

In [1]:
import pandas as pd
import random as rd
import math as math
import scipy.constants as const # Модуль физико-математических констант

# отключим предупреждения Anaconda
import warnings
warnings.simplefilter('ignore')

N_A = const.Avogadro

### Функция жизни нейтрона: ###

In [2]:
# Для одинаковой генерации данных
rd.seed(42)

def life_of_neutron(Ein, Eout, dens, A1, A2, Mol_m, Sigma, alpha_1, alpha_2, sigma_1, sigma_2, x1, x2):
    
    Energy = Ein
    colision = 0 # Число столкновений
    time = 0 # Время полета
    a = 0 # Угол полета
    b = 0 # Изменение угла полета
    l = 0 # Длина пролета
    V = 0 # Скорость
    elder = 0 #Возраст
    while Energy > Eout: #Пока энергия нейтрона лежит в области > ... ЭВ
        l = -1*math.log(rd.random())/Sigma/100 # В метрах
        V = (2*Energy*1.6*(10**(-19))/(1.6749*10**(-27)))**0.5 # В м/с
        time += l/V
        colision += 1
        
        #Разыгрывание на каком ядре произошло рассеяние
        x = rd.random()
        if x < (x1*sigma_1/(x1*sigma_1 + x2*sigma_2)):# Рассеяние на первом ядре
            E1 = Energy
            E2 = Energy*alpha_1
            Energy = rd.uniform(E1, E2)
            b = math.acos

            
        else:# Рассеяние на втором ядре
            E1 = Energy
            E2 = Energy*alpha_2
            Energy = rd.uniform(E1, E2)

    return colision, time

### Входные данные по материалам: ###

In [3]:
df = pd.DataFrame({'Material': ['Water', 'Heavy Plexiglass', 'Heavy Water'],
                   'Density g/cm^3': [1.0, 1.6, 1.1],
                    'A_1' : [1, 12, 2],
                    'A_2' : [16, 2, 16],
                    'Molar mass g/mol' : [18, 28, 20],
                    'Chemical formula' : ['H2O', 'C2D2', 'D2O'],
                    'sigma_1 b' : [30.0, 4.9, 3.5],
                    'sigma_2 b' : [4.0, 3.5, 4.0],
                    'x1' : [2, 2, 2],
                    'x2' : [1, 2, 1],
                    
              })

df

Unnamed: 0,Material,Density g/cm^3,A_1,A_2,Molar mass g/mol,Chemical formula,sigma_1 b,sigma_2 b,x1,x2
0,Water,1.0,1,16,18,H2O,30.0,4.0,2,1
1,Heavy Plexiglass,1.6,12,2,28,C2D2,4.9,3.5,2,2
2,Heavy Water,1.1,2,16,20,D2O,3.5,4.0,2,1


### Расчет макросечения: ###

In [4]:
df['\u03A3, 1/cm'] = 0.0
for k in df.index:
    df['Σ, 1/cm'][k] = (df['x1'][k]*df['sigma_1 b'][k]+df['x2'][k]*df['sigma_2 b'][k]
                       )*N_A*df['Density g/cm^3'][k]/df['Molar mass g/mol'][k]/10**(24)
display(df)

Unnamed: 0,Material,Density g/cm^3,A_1,A_2,Molar mass g/mol,Chemical formula,sigma_1 b,sigma_2 b,x1,x2,"Σ, 1/cm"
0,Water,1.0,1,16,18,H2O,30.0,4.0,2,1,2.141206
1,Heavy Plexiglass,1.6,12,2,28,C2D2,4.9,3.5,2,2,0.578126
2,Heavy Water,1.1,2,16,20,D2O,3.5,4.0,2,1,0.36434


### Расчет ступеньки замедления: ###

In [5]:
df['\u0251_1'] = 0.0
df['\u0251_2'] = 0.0
for k in df.index:
    df['\u0251_1'][k] = ((df['A_1'][k]-1)/(df['A_1'][k]+1))**2
    df['\u0251_2'][k] = ((df['A_2'][k]-1)/(df['A_2'][k]+1))**2
display(df)

Unnamed: 0,Material,Density g/cm^3,A_1,A_2,Molar mass g/mol,Chemical formula,sigma_1 b,sigma_2 b,x1,x2,"Σ, 1/cm",ɑ_1,ɑ_2
0,Water,1.0,1,16,18,H2O,30.0,4.0,2,1,2.141206,0.0,0.778547
1,Heavy Plexiglass,1.6,12,2,28,C2D2,4.9,3.5,2,2,0.578126,0.715976,0.111111
2,Heavy Water,1.1,2,16,20,D2O,3.5,4.0,2,1,0.36434,0.111111,0.778547


### Создание DataFrame для записи данных ###

In [6]:
df1 = pd.DataFrame({'time, 10^(-5) c': [],
                   'colisions': [],
                    'Material' : [],
                    'target' : [] 
              })
df1

Unnamed: 0,"time, 10^(-5) c",colisions,Material,target


### Эксперимент методом Монте-Карло ###

In [7]:
Estart = 1_000_000 #1Мэв
Eend = 1 #1 Эв
Colision = [0, 0, 0, 0]
Time = [0, 0, 0, 0]
number_of_event = 300
counter = 0 # Счетчик для записи в DataFrame
for k in df.index: #Цикл по материалам
    for i in range(0, number_of_event*(k+1)): #Колличество событий для каждого материала
        colision, time = life_of_neutron(Estart, Eend, df['Density g/cm^3'][k],
                                          df['A_1'][k], df['A_2'][k],
                                          df['Molar mass g/mol'][k], df['Σ, 1/cm'][k],
                                          df['ɑ_1'][k], df['ɑ_2'][k],
                                          df['sigma_1 b'][k], df['sigma_2 b'][k],
                                          df['x1'][k], df['x2'][k],)
        df1.loc[counter] = {'Material': df['Material'][k], 'time, 10^(-5) c': time*100_000, 'colisions': colision, 'target': k}
        Colision[k]+=colision
        Time[k]+=time
        counter+=1


for k in df.index:
    Colision[k] = Colision[k]/number_of_event/(k+1)
    Time[k] = Time[k]/number_of_event/(k+1)
    
for k in df.index:
    print(df['Material'][k])
    print(Colision[k])
    print(Time[k])

Water
15.643333333333333
6.787573276044247e-07
Heavy Plexiglass
35.90833333333333
6.268212392367504e-06
Heavy Water
28.513333333333335
7.811704616661977e-06


# 2. SQL vs Pandas

## 2.1 Create a Table

Посмотрим, что представляют собой полученные данные

In [8]:
df1.head()

Unnamed: 0,"time, 10^(-5) c",colisions,Material,target
0,0.060134,18.0,Water,0.0
1,0.015265,11.0,Water,0.0
2,0.058799,17.0,Water,0.0
3,0.047027,14.0,Water,0.0
4,0.018141,10.0,Water,0.0


Еще раз взглянем на входные данные

In [9]:
df.head()

Unnamed: 0,Material,Density g/cm^3,A_1,A_2,Molar mass g/mol,Chemical formula,sigma_1 b,sigma_2 b,x1,x2,"Σ, 1/cm",ɑ_1,ɑ_2
0,Water,1.0,1,16,18,H2O,30.0,4.0,2,1,2.141206,0.0,0.778547
1,Heavy Plexiglass,1.6,12,2,28,C2D2,4.9,3.5,2,2,0.578126,0.715976,0.111111
2,Heavy Water,1.1,2,16,20,D2O,3.5,4.0,2,1,0.36434,0.111111,0.778547


Создадим базу данных из этих двух data frames

In [10]:
import sqlite3

In [11]:
conn = sqlite3.connect('data/neutron_moderation.db')
c = conn.cursor()

In [12]:
df.to_sql('Input_data', conn, if_exists='replace', index = False)

In [13]:
df1.to_sql('Output_data', conn, if_exists='replace', index = False)

Создадим пустую таблицу для тренировки не из DF и затем её удалим

In [14]:
c.execute('CREATE TABLE CARS (Brand text, Price number)')
conn.commit()

In [15]:
c.execute('DROP TABLE IF EXISTS CARS')
conn.commit()

Создадим df из полученной базы данных

In [16]:
df1 = pd.read_sql("SELECT * from Output_data", conn)
df1.head()

Unnamed: 0,"time, 10^(-5) c",colisions,Material,target
0,0.060134,18.0,Water,0.0
1,0.015265,11.0,Water,0.0
2,0.058799,17.0,Water,0.0
3,0.047027,14.0,Water,0.0
4,0.018141,10.0,Water,0.0


In [17]:
df = pd.read_sql("SELECT * from Input_data", conn)
df.head()

Unnamed: 0,Material,Density g/cm^3,A_1,A_2,Molar mass g/mol,Chemical formula,sigma_1 b,sigma_2 b,x1,x2,"Σ, 1/cm",ɑ_1,ɑ_2
0,Water,1.0,1,16,18,H2O,30.0,4.0,2,1,2.141206,0.0,0.778547
1,Heavy Plexiglass,1.6,12,2,28,C2D2,4.9,3.5,2,2,0.578126,0.715976,0.111111
2,Heavy Water,1.1,2,16,20,D2O,3.5,4.0,2,1,0.36434,0.111111,0.778547


## 2.2 INSERT Operation

Добавление через pandas

In [18]:
data = [{"Material": 'Carbon', "Chemical formula": 'C'}]
data

[{'Material': 'Carbon', 'Chemical formula': 'C'}]

In [19]:
data = pd.DataFrame(data)
data

Unnamed: 0,Material,Chemical formula
0,Carbon,C


In [20]:
df = pd.concat([df, data], ignore_index=True)

In [21]:
df

Unnamed: 0,Material,Density g/cm^3,A_1,A_2,Molar mass g/mol,Chemical formula,sigma_1 b,sigma_2 b,x1,x2,"Σ, 1/cm",ɑ_1,ɑ_2
0,Water,1.0,1.0,16.0,18.0,H2O,30.0,4.0,2.0,1.0,2.141206,0.0,0.778547
1,Heavy Plexiglass,1.6,12.0,2.0,28.0,C2D2,4.9,3.5,2.0,2.0,0.578126,0.715976,0.111111
2,Heavy Water,1.1,2.0,16.0,20.0,D2O,3.5,4.0,2.0,1.0,0.36434,0.111111,0.778547
3,Carbon,,,,,C,,,,,,,


## 2.3 DELETE Operatioin

Удаление через pandas

In [22]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 13 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Material          4 non-null      object 
 1   Density g/cm^3    3 non-null      float64
 2   A_1               3 non-null      float64
 3   A_2               3 non-null      float64
 4   Molar mass g/mol  3 non-null      float64
 5   Chemical formula  4 non-null      object 
 6   sigma_1 b         3 non-null      float64
 7   sigma_2 b         3 non-null      float64
 8   x1                3 non-null      float64
 9   x2                3 non-null      float64
 10  Σ, 1/cm           3 non-null      float64
 11  ɑ_1               3 non-null      float64
 12  ɑ_2               3 non-null      float64
dtypes: float64(11), object(2)
memory usage: 544.0+ bytes


In [23]:
df.isnull().sum()

Material            0
Density g/cm^3      1
A_1                 1
A_2                 1
Molar mass g/mol    1
Chemical formula    0
sigma_1 b           1
sigma_2 b           1
x1                  1
x2                  1
Σ, 1/cm             1
ɑ_1                 1
ɑ_2                 1
dtype: int64

In [24]:
df = df.dropna()
df

Unnamed: 0,Material,Density g/cm^3,A_1,A_2,Molar mass g/mol,Chemical formula,sigma_1 b,sigma_2 b,x1,x2,"Σ, 1/cm",ɑ_1,ɑ_2
0,Water,1.0,1.0,16.0,18.0,H2O,30.0,4.0,2.0,1.0,2.141206,0.0,0.778547
1,Heavy Plexiglass,1.6,12.0,2.0,28.0,C2D2,4.9,3.5,2.0,2.0,0.578126,0.715976,0.111111
2,Heavy Water,1.1,2.0,16.0,20.0,D2O,3.5,4.0,2.0,1.0,0.36434,0.111111,0.778547


Добавление данных через SQL

In [25]:
conn = sqlite3.connect('data/neutron_moderation.db')
c = conn.cursor()

In [26]:
def sql_insert(conn, entities):

    cursorObj = conn.cursor()

    cursorObj.execute('INSERT INTO Input_data (Material, x1) VALUES(?, ?)', entities)

    conn.commit()

In [27]:
entities = ('Carbon', 1)
sql_insert(conn, entities)

Удаление данных через SQL, где конкретный столбец равен нулю

In [28]:
c.execute('DELETE FROM Input_data WHERE (x2 IS NULL)')
conn.commit()

## 2.4 SELECT Operation, WHERE, DISTINCT, LIMIT

Получим список столбцов через Pandas

In [29]:
c = list(df)
c

['Material',
 'Density g/cm^3',
 'A_1',
 'A_2',
 'Molar mass g/mol',
 'Chemical formula',
 'sigma_1 b',
 'sigma_2 b',
 'x1',
 'x2',
 'Σ, 1/cm',
 'ɑ_1',
 'ɑ_2']

Получим список столбцов через SQL

In [30]:
row_names = []

def sql_fetch(conn):

    cursorObj = conn.cursor()

    cursorObj.execute('select * from Input_data')
    
    colnames = cursorObj.description
    for row in colnames:
        row_names.append(row[0])

sql_fetch(conn)

print(row_names)

['Material', 'Density g/cm^3', 'A_1', 'A_2', 'Molar mass g/mol', 'Chemical formula', 'sigma_1 b', 'sigma_2 b', 'x1', 'x2', 'Σ, 1/cm', 'ɑ_1', 'ɑ_2']


Вывод конкретных столбцов через Pandas

In [31]:
names = ["Material", "A_1", "A_2"]
df[names]

Unnamed: 0,Material,A_1,A_2
0,Water,1.0,16.0
1,Heavy Plexiglass,12.0,2.0
2,Heavy Water,2.0,16.0


Вывод конкретных столбцов через SQL

In [46]:
conn = sqlite3.connect('data/neutron_moderation.db')

cursor = conn.execute("SELECT Material, A_1, A_2 from Input_data")
for row in cursor:
    print("Material = ", row[0])
    print("A_1 = ", row[1])
    print("A_2 = ", row[2], "\n")

conn.close()

Material =  Water
A_1 =  1
A_2 =  16 

Material =  Heavy Plexiglass
A_1 =  12
A_2 =  2 

Material =  Heavy Water
A_1 =  2
A_2 =  16 



Вывод ограниченного числа строк через Pandas

In [43]:
df1.head(3)

Unnamed: 0,"time, 10^(-5) c",colisions,Material,target
0,0.060134,18.0,Water,0.0
1,0.015265,11.0,Water,0.0
2,0.058799,17.0,Water,0.0


Вывод ограниченного числа строк через SQL

In [51]:
conn = sqlite3.connect('data/neutron_moderation.db')

cursor = conn.execute("SELECT * FROM Input_data limit 2")

for row in cursor:
    print(row)
    print()

conn.close()

('Water', 1.0, 1, 16, 18, 'H2O', 30.0, 4.0, 2, 1, 2.1412056035555556, 0.0, 0.7785467128027681)

('Heavy Plexiglass', 1.6, 12, 2, 28, 'C2D2', 4.9, 3.5, 2, 2, 0.57812551296, 0.7159763313609467, 0.1111111111111111)



Select c сножественными условиями Pandas

In [57]:
df[(df.Material != 'Water') & (df.A_1 == 2)]

Unnamed: 0,Material,Density g/cm^3,A_1,A_2,Molar mass g/mol,Chemical formula,sigma_1 b,sigma_2 b,x1,x2,"Σ, 1/cm",ɑ_1,ɑ_2
2,Heavy Water,1.1,2.0,16.0,20.0,D2O,3.5,4.0,2.0,1.0,0.36434,0.111111,0.778547


Select с множественными условиями SQL

In [59]:
conn = sqlite3.connect('data/neutron_moderation.db')

cursor = conn.execute("SELECT * FROM Input_data where Material = 'Heavy Water' and  A_1 = 2.0")

for row in cursor:
    print(row)
    print()

conn.close()

('Heavy Water', 1.1, 2, 16, 20, 'D2O', 3.5, 4.0, 2, 1, 0.36433951598000003, 0.1111111111111111, 0.7785467128027681)



Вывод уникальных значений Pandas

In [64]:
df1['Material'].value_counts(normalize=True)

Heavy Water         0.500000
Heavy Plexiglass    0.333333
Water               0.166667
Name: Material, dtype: float64

Вывод уникальных значений SQL

In [61]:
conn = sqlite3.connect('data/neutron_moderation.db')

cursor = conn.execute("SELECT DISTINCT Material FROM Output_data")

for row in cursor:
    print(row)
    print()

conn.close()

('Water',)

('Heavy Plexiglass',)

('Heavy Water',)



## 2.5 UPDATE Operation

Изменение данных через Pandas

In [33]:
df

Unnamed: 0,Material,Density g/cm^3,A_1,A_2,Molar mass g/mol,Chemical formula,sigma_1 b,sigma_2 b,x1,x2,"Σ, 1/cm",ɑ_1,ɑ_2
0,Water,1.0,1.0,16.0,18.0,H2O,30.0,4.0,2.0,1.0,2.141206,0.0,0.778547
1,Heavy Plexiglass,1.6,12.0,2.0,28.0,C2D2,4.9,3.5,2.0,2.0,0.578126,0.715976,0.111111
2,Heavy Water,1.1,2.0,16.0,20.0,D2O,3.5,4.0,2.0,1.0,0.36434,0.111111,0.778547


In [34]:
df.loc[df['Chemical formula'] == 'C2D2', 'Material'] = 'Unknow'
df

Unnamed: 0,Material,Density g/cm^3,A_1,A_2,Molar mass g/mol,Chemical formula,sigma_1 b,sigma_2 b,x1,x2,"Σ, 1/cm",ɑ_1,ɑ_2
0,Water,1.0,1.0,16.0,18.0,H2O,30.0,4.0,2.0,1.0,2.141206,0.0,0.778547
1,Unknow,1.6,12.0,2.0,28.0,C2D2,4.9,3.5,2.0,2.0,0.578126,0.715976,0.111111
2,Heavy Water,1.1,2.0,16.0,20.0,D2O,3.5,4.0,2.0,1.0,0.36434,0.111111,0.778547


In [35]:
df.loc[df['Chemical formula'] == 'C2D2', 'Material'] = 'Heavy Plexiglass'
df

Unnamed: 0,Material,Density g/cm^3,A_1,A_2,Molar mass g/mol,Chemical formula,sigma_1 b,sigma_2 b,x1,x2,"Σ, 1/cm",ɑ_1,ɑ_2
0,Water,1.0,1.0,16.0,18.0,H2O,30.0,4.0,2.0,1.0,2.141206,0.0,0.778547
1,Heavy Plexiglass,1.6,12.0,2.0,28.0,C2D2,4.9,3.5,2.0,2.0,0.578126,0.715976,0.111111
2,Heavy Water,1.1,2.0,16.0,20.0,D2O,3.5,4.0,2.0,1.0,0.36434,0.111111,0.778547


Изменение данных через SQL

In [39]:
conn = sqlite3.connect('data/neutron_moderation.db')

cursor = conn.execute("UPDATE Input_data set Material = 'Unknow' where Material = 'Water'")
conn.commit()

for row in cursor.execute('SELECT * FROM Input_data'):
    print(row)
    print()

conn.close()

('Unknow', 1.0, 1, 16, 18, 'H2O', 30.0, 4.0, 2, 1, 2.1412056035555556, 0.0, 0.7785467128027681)

('Heavy Plexiglass', 1.6, 12, 2, 28, 'C2D2', 4.9, 3.5, 2, 2, 0.57812551296, 0.7159763313609467, 0.1111111111111111)

('Heavy Water', 1.1, 2, 16, 20, 'D2O', 3.5, 4.0, 2, 1, 0.36433951598000003, 0.1111111111111111, 0.7785467128027681)



In [42]:
conn = sqlite3.connect('data/neutron_moderation.db')

cursor = conn.execute("UPDATE Input_data set Material = 'Water' where Material = 'Unknow'")
conn.commit()

for row in cursor.execute('SELECT * FROM Input_data'):
    print(row)
    print()

conn.close()

('Water', 1.0, 1, 16, 18, 'H2O', 30.0, 4.0, 2, 1, 2.1412056035555556, 0.0, 0.7785467128027681)

('Heavy Plexiglass', 1.6, 12, 2, 28, 'C2D2', 4.9, 3.5, 2, 2, 0.57812551296, 0.7159763313609467, 0.1111111111111111)

('Heavy Water', 1.1, 2, 16, 20, 'D2O', 3.5, 4.0, 2, 1, 0.36433951598000003, 0.1111111111111111, 0.7785467128027681)



## 2.6 ORDER BY

Сортировка в Pandas по возрастанию с условием

In [67]:
df1[df1.Material == 'Water'].sort_values('colisions')

Unnamed: 0,"time, 10^(-5) c",colisions,Material,target
129,0.039810,6.0,Water,0.0
280,0.013819,6.0,Water,0.0
61,0.017364,7.0,Water,0.0
44,0.017380,7.0,Water,0.0
38,0.008106,7.0,Water,0.0
...,...,...,...,...
90,0.248149,26.0,Water,0.0
60,0.041406,26.0,Water,0.0
130,0.113050,26.0,Water,0.0
71,0.135314,27.0,Water,0.0


Сортировка в Pandas по убыванию с условием

In [69]:
df1[df1.Material == 'Water'].sort_values('colisions', ascending = False)

Unnamed: 0,"time, 10^(-5) c",colisions,Material,target
71,0.135314,27.0,Water,0.0
202,0.096467,27.0,Water,0.0
90,0.248149,26.0,Water,0.0
23,0.157339,26.0,Water,0.0
60,0.041406,26.0,Water,0.0
...,...,...,...,...
61,0.017364,7.0,Water,0.0
178,0.019329,7.0,Water,0.0
38,0.008106,7.0,Water,0.0
280,0.013819,6.0,Water,0.0


Сортировка SQL по возрастанию с условием

In [75]:
conn = sqlite3.connect('data/neutron_moderation.db')

cursor = conn.execute(
    "SELECT * FROM Output_data where Material = 'Water' order by colisions limit 10")

for row in cursor:
    print(row)
    print()

conn.close()

(0.03981004270665086, 6.0, 'Water', 0.0)

(0.013818766639619376, 6.0, 'Water', 0.0)

(0.008105794157350812, 7.0, 'Water', 0.0)

(0.017379645765128005, 7.0, 'Water', 0.0)

(0.01736400741192504, 7.0, 'Water', 0.0)

(0.01932861098308254, 7.0, 'Water', 0.0)

(0.023179459144930373, 8.0, 'Water', 0.0)

(0.07682616463959462, 8.0, 'Water', 0.0)

(0.0631499341506568, 8.0, 'Water', 0.0)

(0.037781693103645486, 8.0, 'Water', 0.0)



In [77]:
conn = sqlite3.connect('data/neutron_moderation.db')

cursor = conn.execute(
    "SELECT * FROM Output_data where Material = 'Water' order by colisions desc limit 5")

for row in cursor:
    print(row)
    print()

conn.close()

(0.13531443463957188, 27.0, 'Water', 0.0)

(0.09646672385022056, 27.0, 'Water', 0.0)

(0.15733943266182615, 26.0, 'Water', 0.0)

(0.04140597805212526, 26.0, 'Water', 0.0)

(0.24814923845952933, 26.0, 'Water', 0.0)



## 2.7 IN и NOT IN