In [None]:
#
#
#

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

from sklearn.preprocessing import PolynomialFeatures
from sklearn import linear_model
from sklearn.metrics import r2_score

from reloading_utils import get_url

In [None]:
# data/reloading_data - CATRIDGES.csv
cartridges = pd.read_csv(get_url('data/reloading_data - CATRIDGES.csv'))
print(cartridges)

cartridges_to_process = cartridges.loc[:, ['title', 'volume', 'diameter']].to_numpy()
print(cartridges_to_process)

In [None]:
def process_data_file(c, v, a):
    filename = f'data/reloading_data - %s.csv' % c
    try:
        d = pd.read_csv(get_url(filename))
        d['cartridge_caliber'] = c
        d['cartridge_volume'] = v
        d['cartridge_diameter'] = a
        return d
    except:
        print("ERROR: Cannot process:", filename)
        return None

frames = [ process_data_file(c, v, a) for c, v, a in cartridges_to_process ]
data = pd.concat(frames, ignore_index=True)

# print(data.head())  # DEBUG
print('data.shape', data.shape)

In [None]:
# reloading_data - powder burning rate chart
powder_rates = pd.read_csv(get_url('data/reloading_data - powder burning rate chart.csv'))
print(powder_rates.head())

#
# verify the data
#
unique_powder_names = powder_rates.powder.unique()
print(len(unique_powder_names), len(powder_rates))
assert np.all(unique_powder_names == powder_rates.powder), "powder_rates is not unique"

In [None]:
# add a "bullet type" column

orig_columns = [
    'powder', 'start', 'velocity', 'never exceed', 'velocity max', 'press', 'units', 'min oal',
    'cartridge_caliber', 'cartridge_volume', 'cartridge_diameter'
    ]
new_columns  = [
    'bul_weight', 'bul_type', 
    'powder', 's_load', 's_velocity', 'ne_load', 'ne_velocity', 'press', 'units', 'oal',
    'cartridge_caliber', 'cartridge_volume', 'cartridge_diameter'
    ]

lee = pd.DataFrame(columns = new_columns)

bul_weight = None
bul_type = None
for pos in range(0, data.shape[0]):
    line = tuple(data.loc[pos, orig_columns].array)
    # print(123, line) # DEBUG
    if ' grain ' in line[0]: # this is a bullettype "115 grain jacketed"
        bul_weight, bul_type = line[0].split(' grain ')
    else:
        line = (int(bul_weight), bul_type, *line)
        lee.loc[len(lee.index)] = line
print('lee.shape', lee.shape)

In [None]:
def powder_id(x):
    try:
        return np.where(powder_rates.powder == x)[0][0]
    except :
        print (x)

def bul_type_id(x):
    return np.where(lee.bul_type.unique() == x)[0][0]


In [None]:
lee['powder_id'] = [ powder_id(x) for x in lee.powder ]
lee['bul_type_id'] = [ bul_type_id(x) for x in lee.bul_type ]
lee['powder_rate'] = list(powder_rates.rate[lee.powder_id])

In [None]:
# print(lee)
# lee.to_csv('lee.csv')
# print("saved...")

if False:
    for p in ['HP38', 'H110']:
        arr = lee[lee.powder == p][['powder', 'powder_id', 'bul_type_id', 'powder_rate', 'cartridge_volume', 'cartridge_diameter']].to_numpy()
        print(arr[0])

    print(lee[lee.powder == 'H110'][['powder', 'powder_id', 'bul_type_id', 'powder_rate', 'cartridge_volume', 'cartridge_diameter']])
    print('-------------')
    print(lee[lee.powder == 'H110'])

In [None]:
print(powder_rates[powder_rates.powder == 'HP38'].to_numpy())
print(lee.bul_weight.unique())

In [None]:
lee_clean = lee.dropna()
print(lee_clean.shape)

In [None]:
msk = np.random.rand(len(lee_clean)) < 0.8
train = lee_clean[msk]
test = lee_clean[~msk]

y_keys = ['s_load', 'ne_load']
x_keys = ['powder_id', 'bul_weight', 'bul_type_id', 'powder_rate', 'cartridge_volume', 'cartridge_diameter' ]

train_x = np.asanyarray(train[x_keys])
train_y = np.asanyarray(train[y_keys])

test_x = np.asanyarray(test[x_keys])
test_y = np.asanyarray(test[y_keys])
print('train_x', train_x.shape)

poly = PolynomialFeatures(degree=2)
train_x_poly = poly.fit_transform(train_x)
print('train_x_poly', train_x_poly.shape)

clf = linear_model.LinearRegression()
train_y_ = clf.fit(train_x_poly, train_y)
# The coefficients
if False:
    print ('Coefficients: ', clf.coef_)
    print ('Intercept: ',clf.intercept_)

test_x_poly = poly.fit_transform(test_x)
test_y_ = clf.predict(test_x_poly)

print('------------')
print("Mean absolute error: %.2f" % np.mean(np.absolute(test_y_ - test_y)))
print("Residual sum of squares (MSE): %.2f" % np.mean((test_y_ - test_y) ** 2))
print("R2-score: %.2f" % r2_score(test_y_ , test_y) )

In [None]:
print(lee.bul_type.unique())
print(lee.bul_weight.unique())

print(cartridges.loc[:, ['title', 'volume', 'diameter']])

In [None]:
def compute_load(powder, weight, caliber):
    pwd, rate = lee[lee.powder == powder][['powder_id', 'powder_rate']].to_numpy()[0]
    print('powder id', pwd, 'rate', rate)
    vol, diam = cartridges[cartridges.title == caliber][['volume', 'diameter']].to_numpy()[0]
    print('volume', vol, 'diameter', diam)
    
    actual_x = np.array(
        list([pwd, weight, x, rate, vol, diam]  for x in range(len(lee.bul_type.unique())))
    )
    print('X\n', actual_x)
    actual_x_poly = poly.fit_transform(actual_x)
    actual_y_ = clf.predict(actual_x_poly)
    print(lee.bul_type.unique())
    print('Y\n', actual_y_)

In [None]:
compute_load('H110', 125, '38 special')

In [None]:
compute_load('ACCUR #2', 115, '9mm parabellum')

In [None]:
compute_load('HP38', 125, '9mm parabellum')

In [None]:
load_rate = lee[['s_load', 'ne_load']].copy()
load_rate['rate'] = (load_rate.ne_load / load_rate.s_load)

In [None]:
load_rate.describe()

In [None]:
lee.groupby('powder').bul_type.count()

In [None]:
lee.groupby('powder').count()