## Buckingham-Pi Python module
* [F=MA test case](#fma)
* [Dimensionless Data from data and units files](#dim1)
* [Dimensionless Groups from sympy expressions](#dim2)
* [Dimensionless Data from dimensionless groups and data](#dim3)
* [Function Test](#test1)

In [1]:
import pandas as pd
import numpy as np
import sympy.functions
from sympy import symbols, lambdify, parse_expr, Function
from BuckinghamPy.buckinghampy.buckinghampi import BuckinghamPi
from sympy import S

### F=MA test case <a class="anchor" id="fma"></a>
There seems to be a problem with this module when the dimensions for the F=MA equation are fed in as it gives an error. Instead it should return a set with only one dimensionless group.

In [2]:
Example = BuckinghamPi()
# Example.add_variable(name='m', dimensions="M")
# Example.add_variable(name='f', dimensions="M*T/L^(2)")
# Example.add_variable(name='a', dimensions = "T/L^(2)", non_repeating=True)
Example.add_variable(name='m', dimensions="M")
Example.add_variable(name='f', dimensions="M*Q")
Example.add_variable(name='a', dimensions = "Q", non_repeating=True)
#Where Q = L/T^2
Example.generate_pi_terms()

Example.print_all()

<IPython.core.display.Math object>

---

### Create Dimensionless data from data and units file <a class="anchor" id="dim1"></a>

In [2]:
def create_dimless_data(units, data):
    trial = BuckinghamPi()
    unit_names = ""
    for i in range(0, len(units)-1):
        new_unit = "X"+str(i)
        trial.add_variable(name=new_unit, dimensions=units[i])
        unit_names = unit_names + new_unit + " "
    new_unit = "X"+str(len(units)-1)
    trial.add_variable(name=new_unit, dimensions=units[-1],non_repeating=True)
    unit_names = unit_names + new_unit
    print(unit_names)
    print(units)
    trial.generate_pi_terms()
    symbo = symbols(unit_names)
    print(trial.print_all())
    output = [] #array containing the dimensionless data
    for f in trial.pi_terms:
        lambda_expr = np.vectorize(lambdify(symbo, f[0], modules=["scipy", "numpy"]))
        output.append(lambda_expr(*(data.T.tolist())))
    return output

In [None]:
data = pd.read_csv('f1_train.txt', header=None, sep=' ').to_numpy()
fma_units = ["M*Q","M","Q"]

In [None]:
dimred_data = create_dimless_data(fma_units, data)
print(dimred_data)

In [None]:
units_file = pd.read_csv("units.csv", sep=',')
units_file.head()

### Create dimensionless groups from sympy expressions <a class="anchor" id="dim2"></a>

In [3]:
def expr_vars_to_dimless_groups(ind_vars,dep_var, units_path):
    ind_vars = ind_vars.split()
    Example = BuckinghamPi()
    units_file = pd.read_csv(units_path, sep=',')
    dim_types = ['m','s','kg','T','V']
    for var in ind_vars:
        print(var)
        var_dat = units_file[units_file['Variable'] == var].values[0,2:7]
        dim_str = ""
        for i in range(0, len(var_dat)):
            if var_dat[i] != 0:
                dim_str = dim_str + '*' + dim_types[i] + '^' + '('+ str(var_dat[i]) + ')'
        if dim_str == '':
            continue
        Example.add_variable(name=var, dimensions=dim_str[1:])
    var_dat = units_file[units_file['Variable'] == dep_var].values[0,2:7]
    dim_str = ""
    for i in range(0, len(var_dat)):
        if var_dat[i] != 0:
            dim_str = dim_str + '*' + dim_types[i] + '^' + '('+ str(var_dat[i]) + ')'

    Example.add_variable(name=dep_var, dimensions=dim_str[1:], non_repeating=True)
    return Example

In [43]:
ind_vars = "epsilon c Ef r omega omega_0 p"
ind_vars = ind_vars.split()
Example = BuckinghamPi()
units_file = pd.read_csv("units.csv", sep=',')
dim_types = ['m','s','kg','T','V']
for var in ind_vars:
    #print(var)
    #print(units_file[units_file['Variable'] == var].values[0,2:7])
    var_dat = units_file[units_file['Variable'] == var].values[0,2:7]
    dim_str = ""
    for i in range(0, len(var_dat)):
        if var_dat[i] != 0:
            dim_str = dim_str + '*' + dim_types[i] + '^' + '('+ str(var_dat[i]) + ')'
    print(dim_str)
    #print("after dim_Str")
    if dim_str == '':
        continue
    #Example.add_variable(name=var, dimensions=dim_str[1:])

*m^(1.0)*s^(-2.0)*kg^(1.0)*V^(-2.0)
*m^(1.0)*s^(-1.0)
*m^(-1.0)*V^(1.0)
*m^(1.0)
*s^(-1.0)
*s^(-1.0)
*m^(1.0)*s^(-1.0)*kg^(1.0)


In [34]:
Example.generate_pi_terms()
Example.print_all()

<IPython.core.display.Math object>

---

### Create dimensionless data from dimensionless groups and data <a class="anchor" id="dim3"></a>

In [5]:
class protected_division(Function):
    @classmethod
    def eval(cls, x, y):
        if y.is_Number:
            if y.is_zero:
                return S.One
            else:
                return x/y
class protected_square_root(Function):
    @classmethod
    def eval(cls, x):
        if x.is_Number:
            return sympy.functions.sqrt(sympy.functions.Abs(x))

def dimless_groups_to_dimless_data(data, buckpi, expr_vars):
    buckpi.generate_pi_terms()
    symbo = symbols(expr_vars)
    print(buckpi.print_all())
    output = []  # array containing the dimensionless data
    j = 0
    for f in buckpi.pi_terms:
        dim_groups_set = np.zeros((len(data), len(f)))
        try:
            for i in range(0, len(f)):
                lamda_expr = np.vectorize(lambdify(symbo, f[i], modules= [{'/':protected_division},"scipy", "numpy"]))
                dim_groups_set[:,i] = lamda_expr(*(data.T.tolist()))
            output.append(dim_groups_set)
        except:
            print("problem with set ", len(output))
            output.append(0)

        i+=j
    return output

In [13]:
I_13_12_data = pd.read_csv('Test_Data_Final/I_13_12/I_13_12_train.txt', header=None, sep=' ')
bp_e1 = expr_vars_to_dimless_groups("G m1 m2 r1 r2","U", "units.csv")
#bp_e1 = expr_vars_to_dimless_groups("m1 m2 r1 r2","U", "units.csv")
print("____________________")
#dimless_data = dimless_groups_to_dimless_data(I_13_12_data.to_numpy(), bp_e1, "G m1 m2 r1 r2 U")
dimless_data = dimless_groups_to_dimless_data(I_13_12_data.to_numpy(), bp_e1, "G m1 m2 r1 r2 U")
for i in range(len(dimless_data)):
    np.savetxt("Test_Data_Final/I_13_12_dimless/I_13_12_train_dimless_set_"+str(i)+".txt", dimless_data[i])

I_13_12_extrap_data = pd.read_csv('Test_Data_Final/I_13_12/I_13_12_test.txt', header=None, sep=' ')
dimless_data = dimless_groups_to_dimless_data(I_13_12_extrap_data.to_numpy(), bp_e1, "G m1 m2 r1 r2 U")
for i in range(len(dimless_data)):
    np.savetxt("Test_Data_Final/I_13_12_dimless/I_13_12_test_dimless_set_"+str(i)+".txt", dimless_data[i])

I_13_12_extrap_data = pd.read_csv('Test_Data_Final/I_13_12/I_13_12_extrap_1.txt', header=None, sep=' ')
dimless_data = dimless_groups_to_dimless_data(I_13_12_extrap_data.to_numpy(), bp_e1, "G m1 m2 r1 r2 U")
for i in range(len(dimless_data)):
    np.savetxt("Test_Data_Final/I_13_12_dimless/I_13_12_extrap_1_dimless_set_"+str(i)+".txt", dimless_data[i])

I_13_12_extrap_data = pd.read_csv('Test_Data_Final/I_13_12/I_13_12_extrap_2.txt', header=None, sep=' ')
dimless_data = dimless_groups_to_dimless_data(I_13_12_extrap_data.to_numpy(), bp_e1,  "G m1 m2 r1 r2 U")
for i in range(len(dimless_data)):
    np.savetxt("Test_Data_Final/I_13_12_dimless/I_13_12_extrap_2_dimless_set_"+str(i)+".txt", dimless_data[i])

I_13_12_extrap_data = pd.read_csv('Test_Data_Final/I_13_12/I_13_12_extrap_3.txt', header=None, sep=' ')
dimless_data = dimless_groups_to_dimless_data(I_13_12_extrap_data.to_numpy(), bp_e1,  "G m1 m2 r1 r2 U")
for i in range(len(dimless_data)):
    np.savetxt("Test_Data_Final/I_13_12_dimless/I_13_12_extrap_3_dimless_set_"+str(i)+".txt", dimless_data[i])

G
m1
m2
r1
r2
____________________


<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

None


<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

None


<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

None


<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

None


<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

None


In [9]:
I_29_16_data = pd.read_csv('Test_Data_Final/I_29_16/I_29_16_train.txt', header=None, sep=' ')
bp_e2 = expr_vars_to_dimless_groups("x1 x2 theta1 theta2","x", "units.csv")
#bp_e1 = expr_vars_to_dimless_groups("m1 m2 r1 r2","U", "units.csv")
print("____________________")
#dimless_data = dimless_groups_to_dimless_data(I_13_12_data.to_numpy(), bp_e1, "G m1 m2 r1 r2 U")
dimless_data = dimless_groups_to_dimless_data(I_29_16_data.to_numpy(), bp_e2, "x1 x2 theta1 theta2 x")
for i in range(len(dimless_data)):
    np.savetxt("Test_Data_Final/I_29_16_dimless/I_29_16_train_dimless_set_"+str(i)+".txt", dimless_data[i])

I_29_16_extrap_data = pd.read_csv('Test_Data_Final/I_29_16/I_29_16_test.txt', header=None, sep=' ')
dimless_data = dimless_groups_to_dimless_data(I_29_16_extrap_data.to_numpy(), bp_e2, "x1 x2 theta1 theta2 x")
for i in range(len(dimless_data)):
    np.savetxt("Test_Data_Final/I_29_16_dimless/I_29_16_test_dimless_set_"+str(i)+".txt", dimless_data[i])

I_29_16_extrap_data = pd.read_csv('Test_Data_Final/I_29_16/I_29_16_extrap_1.txt', header=None, sep=' ')
dimless_data = dimless_groups_to_dimless_data(I_29_16_extrap_data.to_numpy(), bp_e2, "x1 x2 theta1 theta2 x")
for i in range(len(dimless_data)):
    np.savetxt("Test_Data_Final/I_29_16_dimless/I_29_16_extrap_1_dimless_set_"+str(i)+".txt", dimless_data[i])

I_29_16_extrap_data = pd.read_csv('Test_Data_Final/I_29_16/I_29_16_extrap_2.txt', header=None, sep=' ')
dimless_data = dimless_groups_to_dimless_data(I_29_16_extrap_data.to_numpy(), bp_e2, "x1 x2 theta1 theta2 x")
for i in range(len(dimless_data)):
    np.savetxt("Test_Data_Final/I_29_16_dimless/I_29_16_extrap_2_dimless_set_"+str(i)+".txt", dimless_data[i])

I_29_16_extrap_data = pd.read_csv('Test_Data_Final/I_29_16/I_29_16_extrap_3.txt', header=None, sep=' ')
dimless_data = dimless_groups_to_dimless_data(I_29_16_extrap_data.to_numpy(), bp_e2, "x1 x2 theta1 theta2 x")
for i in range(len(dimless_data)):
    np.savetxt("Test_Data_Final/I_29_16_dimless/I_29_16_extrap_3_dimless_set_"+str(i)+".txt", dimless_data[i])

x1
x2
theta1
theta2
____________________


<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

None


<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

None


<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

None


<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

None


<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

None


In [11]:
I_32_17_data = pd.read_csv('Test_Data_Final/I_32_17/I_32_17_train.txt', header=None, sep=' ')
bp_e2 = expr_vars_to_dimless_groups("epsilon c Ef r omega omega_0","p", "units.csv")
#bp_e1 = expr_vars_to_dimless_groups("m1 m2 r1 r2","U", "units.csv")
print("____________________")
#dimless_data = dimless_groups_to_dimless_data(I_13_12_data.to_numpy(), bp_e1, "G m1 m2 r1 r2 U")
dimless_data = dimless_groups_to_dimless_data(I_32_17_data.to_numpy(), bp_e2, "epsilon c Ef r omega omega_0 p")
for i in range(len(dimless_data)):
    np.savetxt("Test_Data_Final/I_32_17_dimless/I_32_17_train_dimless_set_"+str(i)+".txt", dimless_data[i])

I_32_17_extrap_data = pd.read_csv('Test_Data_Final/I_32_17/I_32_17_test.txt', header=None, sep=' ')
dimless_data = dimless_groups_to_dimless_data(I_32_17_extrap_data.to_numpy(), bp_e2, "epsilon c Ef r omega omega_0 p")
for i in range(len(dimless_data)):
    np.savetxt("Test_Data_Final/I_32_17_dimless/I_32_17_test_dimless_set_"+str(i)+".txt", dimless_data[i])

I_32_17_extrap_data = pd.read_csv('Test_Data_Final/I_32_17/I_32_17_extrap_1.txt', header=None, sep=' ')
dimless_data = dimless_groups_to_dimless_data(I_32_17_extrap_data.to_numpy(), bp_e2, "epsilon c Ef r omega omega_0 p")
for i in range(len(dimless_data)):
    np.savetxt("Test_Data_Final/I_32_17_dimless/I_32_17_extrap_1_dimless_set_"+str(i)+".txt", dimless_data[i])

I_32_17_extrap_data = pd.read_csv('Test_Data_Final/I_32_17/I_32_17_extrap_2.txt', header=None, sep=' ')
dimless_data = dimless_groups_to_dimless_data(I_32_17_extrap_data.to_numpy(), bp_e2, "epsilon c Ef r omega omega_0 p")
for i in range(len(dimless_data)):
    np.savetxt("Test_Data_Final/I_32_17_dimless/I_32_17_extrap_2_dimless_set_"+str(i)+".txt", dimless_data[i])

I_32_17_extrap_data = pd.read_csv('Test_Data_Final/I_32_17/I_32_17_extrap_3.txt', header=None, sep=' ')
dimless_data = dimless_groups_to_dimless_data(I_32_17_extrap_data.to_numpy(), bp_e2, "epsilon c Ef r omega omega_0 p")
for i in range(len(dimless_data)):
    np.savetxt("Test_Data_Final/I_32_17_dimless/I_32_17_extrap_3_dimless_set_"+str(i)+".txt", dimless_data[i])

epsilon
c
Ef
r
omega
omega_0
____________________


<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

None


<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

None


<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

None


<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

None


<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

None


In [30]:
ex = BuckinghamPi()

ex.add_variable(name='m1', dimensions="K")
ex.add_variable(name='m2', dimensions="K")
ex.add_variable(name='r1', dimensions="M")
ex.add_variable(name='r2', dimensions="M")
#ex.add_variable(name='G', dimensions = "")
ex.add_variable(name='U', dimensions = "M^2*S^(-2)*K", non_repeating=True)

ex.generate_pi_terms()

print(ex.print_all())

None


In [4]:
I_13_12_data = pd.read_csv('Test Data/normal/I_13_12_train.txt', header=None, sep=' ')
I_13_12_data.head()

Unnamed: 0,0,1,2,3,4,5
0,1.64644,2.14557,1.80829,1.63465,1.27096,-1.11821
1,1.31276,2.67532,2.89099,1.15032,2.37518,4.55171
2,1.70413,2.77679,0.213108,0.261388,0.060655,-12.7677
3,2.33447,2.61004,2.93586,2.39748,1.38444,-5.45968
4,0.354823,1.91976,0.43006,2.83401,1.56554,-0.083753


In [None]:
fma_data = np.ones((5, 3))
fma_data[:,0] *= 2
fma_data[:,1] *= 2
fma_data[:,2] *= 4
print(fma_data)
dimless_data = dimless_groups_to_dimless_data(fma_data, Example, "f m a")
print(dimless_data)

In [41]:
pg_data = pd.read_csv('Test_Data_Final/pg_data/pg_train.txt', header=None, sep=' ')
pg_units = ['T', 'kg*s^(-1)', '-', 'T', 'T', '-', 'kg*m^(-1)*s^(-2)', 'm', '-', '-', '-', '-', '-', '-', '-', '-', 'T', '-', '-', 'kg*m^(-3)', 'kg*m^(-3)', '-', '-', '-', 'kg*m^(-3)']
print(pg_data.shape,len(pg_units))
# pg_columns = pg_data.columns
# for i in range(len(pg_units)):
#     if pg_units[i] == '-':
#         del pg_data[pg_columns[i]]
index_with_units = [i for i in range(len(pg_units)) if pg_units[i] != '-']
pg_data = pg_data[index_with_units]
pg_dep_unit = pg_units.pop(-1)
pg_ind_units = (" ".join(pg_units)).replace(' -','')
print(pg_dep_unit, pg_ind_units)
# #pg_column_index = list(pg_data.columns)
#
pg_data.head()

(60, 25) 25
kg*m^(-3) T kg*s^(-1) T T kg*m^(-1)*s^(-2) m T kg*m^(-3) kg*m^(-3)


Unnamed: 0,0,1,3,4,6,7,16,19,20,24
0,0.652778,0.187552,0.104175,0.698214,0.049065,0.217631,0.891566,0.287101,0.475431,0.6
1,0.743056,0.083135,0.124365,0.478571,0.046729,0.247934,0.903614,0.121589,0.05242,0.139394
2,0.673611,0.035598,0.09686,0.485714,0.046729,0.179063,0.777108,0.0,0.0,0.172727
3,0.763889,0.096346,0.182388,0.896429,0.046729,0.041322,0.626506,0.294136,0.197692,0.515152
4,0.763889,0.091979,0.160461,0.780357,0.049065,0.275482,0.668675,0.287402,0.195256,0.209091


In [42]:
pg_ind_units = " rho T mu_drift T1 T2 pr m T3 rho_0"
pg_dep_unit = "rho_1"
pg_e2 = expr_vars_to_dimless_groups(pg_ind_units,pg_dep_unit, "units.csv")
print("____________________")

rho
T
mu_drift
T1
T2
pr
m
T3
rho_0
____________________


In [21]:
index_with_units

[0, 1, 3, 4, 6, 7, 16, 19, 20, 24]

In [43]:
pg_all_units = pg_ind_units+" "+pg_dep_unit
dimless_data = dimless_groups_to_dimless_data(pg_data.to_numpy(), pg_e2, pg_all_units)

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

None
problem with set  0
problem with set  1
problem with set  2
problem with set  3
problem with set  4
problem with set  7
problem with set  8
problem with set  9
problem with set  11
problem with set  12
problem with set  13
problem with set  14
problem with set  15
problem with set  16
problem with set  17
problem with set  18
problem with set  19
problem with set  20
problem with set  21
problem with set  22
problem with set  23
problem with set  24
problem with set  25
problem with set  26
problem with set  27


  return mu_drift*rho**(1/6)/(m**(2/3)*sqrt(pr))
  return mu_drift*rho**(1/6)/(m**(2/3)*sqrt(pr))


In [None]:
pg_all_units = pg_ind_units+" "+pg_dep_unit
dimless_data = dimless_groups_to_dimless_data(pg_data.to_numpy(), pg_e2, pg_all_units)
pg_original = pd.read_csv('Test_Data_Final/pg_data/pg_train.txt', header=None, sep=' ')
pg_original = pg_original[[2,9,10,18]]
for i in range(len(dimless_data)):
    if not isinstance(dimless_data[i], np.ndarray):
        continue
    full_data = pd.concat([pg_original.reset_index(drop=True), pd.DataFrame(dimless_data[i]).reset_index(drop=True)], axis=1).to_numpy()
    np.savetxt("Test_Data_Final/pg_data_dimless/pg_train_dimless_set_"+str(i)+".txt",full_data)

In [29]:
pg_extrap_data = pd.read_csv('Test_Data_Final/pg_data/pg_test.txt', header=None, sep=' ')
pg_extrap_data_units = pg_extrap_data[index_with_units]
pg_extrap_data = pg_extrap_data[[2,9,10,18]]
dimless_data = dimless_groups_to_dimless_data(pg_extrap_data_units.to_numpy(), pg_e2, pg_all_units)
for i in range(len(dimless_data)):
    if not isinstance(dimless_data[i], np.ndarray):
        continue
    full_data = pd.concat([pg_extrap_data.reset_index(drop=True), pd.DataFrame(dimless_data[i]).reset_index(drop=True)], axis=1).to_numpy()
    np.savetxt("Test_Data_Final/pg_data_dimless/pg_test_dimless_set_"+str(i)+".txt", full_data)

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

None
problem with set  0
problem with set  1
problem with set  3
problem with set  4
problem with set  5
problem with set  6
problem with set  7
problem with set  8
problem with set  12
problem with set  13
problem with set  14
problem with set  16
problem with set  17
problem with set  18
problem with set  19
problem with set  20
problem with set  21
problem with set  22
problem with set  23
problem with set  24


  return mu_drift*rho**(1/6)/(m**(2/3)*sqrt(pr))
  return mu_drift*rho**(1/6)/(m**(2/3)*sqrt(pr))
  return mu_drift*rho**(1/6)/(m**(2/3)*sqrt(pr))
  return mu_drift*rho**(1/6)/(m**(2/3)*sqrt(pr))
  return mu_drift*rho_0**(1/6)/(m**(2/3)*sqrt(pr))
  return mu_drift*rho_0**(1/6)/(m**(2/3)*sqrt(pr))
  return mu_drift*rho_0**(1/6)/(m**(2/3)*sqrt(pr))
  return mu_drift*rho_0**(1/6)/(m**(2/3)*sqrt(pr))


In [30]:
pg_extrap_data = pd.read_csv('Test_Data_Final/pg_data/pg_extrap.txt', header=None, sep=' ')
pg_extrap_data_units = pg_extrap_data[index_with_units]
pg_extrap_data = pg_extrap_data[[2,9,10,18]]
dimless_data = dimless_groups_to_dimless_data(pg_extrap_data_units.to_numpy(), pg_e2, pg_all_units)
for i in range(len(dimless_data)):
    if not isinstance(dimless_data[i], np.ndarray):
        continue
    full_data = pd.concat([pg_extrap_data.reset_index(drop=True), pd.DataFrame(dimless_data[i]).reset_index(drop=True)], axis=1).to_numpy()
    np.savetxt("Test_Data_Final/pg_data_dimless/pg_extrap_dimless_set_"+str(i)+".txt", full_data)

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

<IPython.core.display.Math object>

---

None
problem with set  0
problem with set  1
problem with set  2
problem with set  3
problem with set  4
problem with set  5
problem with set  6
problem with set  7
problem with set  8
problem with set  9
problem with set  10
problem with set  11
problem with set  19
problem with set  20
problem with set  21
problem with set  26


Remake for completion

In [32]:
pg_ind_units

' rho T mu_drift T1 T2 pr m T3 rho_0'

In [44]:
texts = ["train","test","extrap"]
for text in texts:
    pg_extrap_data = pd.read_csv('Test_Data_Final/pg_data/pg_'+text+'.txt', header=None, sep=' ')
    pg_extrap_data_units = pg_extrap_data[index_with_units].to_numpy()
    m23 = np.cbrt(np.square(pg_extrap_data_units[:, 6]))
    p6 = pg_extrap_data_units[:, 0]**(1/6)
    pi1 = pg_extrap_data_units[:, 1]/pg_extrap_data_units[:, 3]
    pi2 = (pg_extrap_data_units[:, 2]*p6/(m23*np.sqrt(pg_extrap_data_units[:, 5])))
    pi3 = pg_extrap_data_units[:, 4]/pg_extrap_data_units[:, 3]
    pi4 = pg_extrap_data_units[:, 7]/pg_extrap_data_units[:, 3]
    pi5 = pg_extrap_data_units[:, 8]/pg_extrap_data_units[:, 0]
    pi6 = pg_extrap_data_units[:, 9]/pg_extrap_data_units[:, 0]

    pi2[pi2 == np.inf] = 0
    pi5[pi5 == np.inf] = 0
    pi5[pi5 == np.NaN] = 0
    pi6[pi6 == np.inf] = 0
    pg_extrap_data = pg_extrap_data[[2,9,10,18]]
    dimless_data = pd.DataFrame(np.hstack([pi1[:,None],pi2[:,None],pi3[:,None],pi4[:,None],pi5[:,None],pi6[:,None]]))
    full_data = pd.concat([pg_extrap_data.reset_index(drop=True), pd.DataFrame(dimless_data).reset_index(drop=True)], axis=1).to_numpy()
    #full_data = full_data[[0,3,4,8,9]]
    np.savetxt("Test_Data_Final/pg_data_dimless/pg_"+text+"_dimless_set_11.txt", full_data)

  pi1 = pg_extrap_data_units[:, 1]/pg_extrap_data_units[:, 3]
  pi2 = (pg_extrap_data_units[:, 2]*p6/(m23*np.sqrt(pg_extrap_data_units[:, 5])))
  pi3 = pg_extrap_data_units[:, 4]/pg_extrap_data_units[:, 3]
  pi4 = pg_extrap_data_units[:, 7]/pg_extrap_data_units[:, 3]
  pi2 = (pg_extrap_data_units[:, 2]*p6/(m23*np.sqrt(pg_extrap_data_units[:, 5])))
  pi5 = pg_extrap_data_units[:, 8]/pg_extrap_data_units[:, 0]
  pi6 = pg_extrap_data_units[:, 9]/pg_extrap_data_units[:, 0]


In [88]:
pg_extrap_data = pd.read_csv('Test_Data_Final/pg_data_dimless/pg_train_dimless_set_10.txt', header=None, sep=' ')
#ds = pg_extrap_data.isin([np.inf, -np.inf])
print(np.isnan(pg_extrap_data[5]))
#r = pg_extrap_data.index[np.isinf(pg_extrap_data).any()]
#print(r)

0     False
1     False
2     False
3     False
4     False
5     False
6     False
7     False
8     False
9     False
10    False
11    False
12    False
13    False
14    False
15    False
16    False
17    False
18    False
19    False
20    False
21    False
22    False
23    False
24    False
25    False
26    False
27    False
28    False
29    False
30    False
31    False
32    False
33    False
34    False
35    False
36    False
37    False
38    False
39    False
40    False
41    False
42    False
43    False
44    False
45    False
46    False
47    False
48    False
49    False
50    False
51    False
52    False
53    False
54    False
55    False
56    False
57    False
58    False
59    False
Name: 5, dtype: bool


In [95]:
pg_extrap_data = pd.read_csv('Test_Data_Final/pg_data_dimless/pg_'+texts[0]+'_dimless_set_10.txt', header=None, sep=' ')
correlations = pg_extrap_data.corr()
print(correlations[9])

0   -0.635012
1   -0.406447
2   -0.375169
3   -0.431741
4    0.551525
5    0.317013
6    0.401227
7    0.372724
8    0.548583
9    1.000000
Name: 9, dtype: float64


In [99]:
pg_ind_units


' rho T mu_drift T1 T2 pr m T3 rho_0'

In [103]:
for text in texts:
    pg_extrap_data = pd.read_csv('Test_Data_Final/pg_data/pg_'+text+'.txt', header=None, sep=' ')
    pg_extrap_data_units = pg_extrap_data[index_with_units].to_numpy()
    m23 = np.cbrt(np.square(pg_extrap_data_units[:, 6]))
    p6 = pg_extrap_data_units[:, 8]**(1/6)
    pi1 = pg_extrap_data_units[:, 0]/pg_extrap_data_units[:, 8]
    pi2 = (pg_extrap_data_units[:, 2]*p6/(m23*np.sqrt(pg_extrap_data_units[:, 5])))
    pi3 = pg_extrap_data_units[:, 1]/pg_extrap_data_units[:, 4]
    pi4 = pg_extrap_data_units[:, 3]/pg_extrap_data_units[:, 4]
    pi5 = pg_extrap_data_units[:, 7]/pg_extrap_data_units[:, 4]
    pi6 = pg_extrap_data_units[:, 9]/pg_extrap_data_units[:, 8]
    pi1[pi1 == np.inf] = 0
    pi2[pi2 == np.inf] = 0
    pi3[pi3 == np.inf] = 0
    pi5[pi5 == np.inf] = 0
    pi5[pi5 == np.NaN] = 0
    pi6[pi6 == np.inf] = 0
    pg_extrap_data = pg_extrap_data[[2,9,10,18]]
    dimless_data = pd.DataFrame(np.hstack([pi1[:,None],pi2[:,None],pi3[:,None],pi4[:,None],pi5[:,None],pi6[:,None]]))
    full_data = pd.concat([pg_extrap_data.reset_index(drop=True), pd.DataFrame(dimless_data).reset_index(drop=True)], axis=1).to_numpy()
    #full_data = full_data[[0,3,4,8,9]]
    np.savetxt("Test_Data_Final/pg_data_dimless/pg_"+text+"_dimless_set_27.txt", full_data)

  pi1 = pg_extrap_data_units[:, 0]/pg_extrap_data_units[:, 8]
  pi2 = (pg_extrap_data_units[:, 2]*p6/(m23*np.sqrt(pg_extrap_data_units[:, 5])))
  pi6 = pg_extrap_data_units[:, 9]/pg_extrap_data_units[:, 8]
  pi2 = (pg_extrap_data_units[:, 2]*p6/(m23*np.sqrt(pg_extrap_data_units[:, 5])))
  pi3 = pg_extrap_data_units[:, 1]/pg_extrap_data_units[:, 4]
  pi4 = pg_extrap_data_units[:, 3]/pg_extrap_data_units[:, 4]
  pi5 = pg_extrap_data_units[:, 7]/pg_extrap_data_units[:, 4]


In [39]:

clustering = list(np.random.randint(low = 0, high=5,size=30))
self = clustering.copy()
min_points=15
max_points=25
num_clusters = 5
print(clustering)
num_points_remove = int((len(self)-min_points)/num_clusters)
if len(self) > max_points and len(self) > 5:
    cluster_counts = np.array([[x,clustering.count(x)] for x in set(clustering)])
    print("cluster_counts",cluster_counts)
    cluster_counts[:,1] = (cluster_counts[:,1]/(num_clusters)*num_points_remove).astype(int)
    print("cluster_counts mod",cluster_counts)

    for i in range(cluster_counts.shape[0]):
        clustering_arr = np.array(clustering)
        ii = np.where(clustering_arr == i)[0]
        print("location of item",ii)
        items_delete = np.random.choice(ii,cluster_counts[i,1],replace=False)
        print("location of item to delete",items_delete)
        items_delete[::-1].sort()
        print("items",clustering)
        print("location of item to delete sorted",items_delete)
        len_b = len(clustering)
        #items_delete = list(items_delete)
        print("num items to delete",len(items_delete))
        for item in items_delete:
            print(item)
            clustering.pop(item)
            self.pop(item)
        print("number of items deleted",-len(clustering)+len_b)
        print("\n")

[1, 3, 0, 3, 0, 1, 4, 0, 0, 1, 3, 0, 2, 1, 0, 0, 4, 2, 1, 1, 1, 0, 3, 4, 0, 2, 2, 2, 4, 0]
cluster_counts [[ 0 10]
 [ 1  7]
 [ 2  5]
 [ 3  4]
 [ 4  4]]
cluster_counts mod [[0 6]
 [1 4]
 [2 3]
 [3 2]
 [4 2]]
location of item [ 2  4  7  8 11 14 15 21 24 29]
location of item to delete [21 14 24 15  7  8]
items [1, 3, 0, 3, 0, 1, 4, 0, 0, 1, 3, 0, 2, 1, 0, 0, 4, 2, 1, 1, 1, 0, 3, 4, 0, 2, 2, 2, 4, 0]
location of item to delete sorted [24 21 15 14  8  7]
num items to delete 6
24
21
15
14
8
7
number of items deleted 6


location of item [ 0  5  7 11 14 15 16]
location of item to delete [15  7  5 11]
items [1, 3, 0, 3, 0, 1, 4, 1, 3, 0, 2, 1, 4, 2, 1, 1, 1, 3, 4, 2, 2, 2, 4, 0]
location of item to delete sorted [15 11  7  5]
num items to delete 4
15
11
7
5
number of items deleted 4


location of item [ 8 10 15 16 17]
location of item to delete [10 16 15]
items [1, 3, 0, 3, 0, 4, 3, 0, 2, 4, 2, 1, 1, 3, 4, 2, 2, 2, 4, 0]
location of item to delete sorted [16 15 10]
num items to delete 3
16
15


In [22]:
cluster_counts.shape[0]

5