In [1]:
import csv
import pandas as pd
import numpy as np
import networkx as nx
import sympy
import math
import matplotlib.pyplot as plt 
import knowledge_representation as KR
import itertools
import re

generating the knowledge graph that we previously built, to implement our applications on

In [2]:
G = KR.generate_graph(KR.import_equations(), KR.import_variables())
print([x for x in G.nodes()])
print([(x,y,z) for x,y,z in G.edges(data = True)])

['A', 'a', 'A_vec', 'alpha', 'B', 'beta', 'Bx', 'By', 'Bz', 'c', 'C', 'chi', 'd', 'D', 'd1', 'd2', 'delta', 'el_ct', 'E_n', 'E_den', 'Ef', 'epsilon', 'f', 'F', 'flux', 'foc', 'G', 'g', 'g_', 'gamma', 'H', 'h', 'hbar', 'H_G', 'I', 'I_0', 'I1', 'I2', 'Int', 'Int_0', 'j', 'Jz', 'K', 'k', 'k_f', 'k_G', 'k_spring', 'kappa', 'kb', 'L', 'L_rad', 'lambd', 'm', 'M', 'm_0', 'm1', 'm2', 'mob', 'mom', 'mu', 'mu_drift', 'Nn', 'n', 'n_rho', 'n_0', 'omega', 'omega_0', 'p', 'Pol', 'p_d', 'Pwr', 'pr', 'prob', 'q', 'q1', 'q2', 'r', 'R', 'r1', 'r2', 'rho', 'rho_c', 'rho_c_0', 'rho_0', 'sigma', 'sigma_den', 't', 'T', 't1', 'T1', 'T2', 'tau', 'theta', 'theta1', 'theta2', 'U', 'u', 'v', 'V', 'v1', 'V1', 'V2', 'Volt', 'w', 'x', 'x1', 'x2', 'x3', 'y', 'Y', 'y1', 'y2', 'y3', 'z', 'z1', 'z2', 'mu_S', 'L_ind', 'Z_1', 'Z_2', 'Length', 'Mass', 'Temperature', 'Voltage', 'Τime', (-3.0, 0.0, 0.0, 0.0, 0.0), (-3.0, 0.0, 1.0, 0.0, 0.0), (-2.0, 1.0, 0.0, 0.0, 1.0), (-2.0, 3.0, -1.0, 0.0, 2.0), (-2.0, 4.0, -1.0, 0.0, 2.0

Creating the list of formulas, which we use for testing

In [3]:
all_mysteries = KR.import_equations()

implementing dimensional analysis, which queries the graph for parameters based on the units of our observations. It then suggests possible relationships that make use of parameters with the same units as our observations

In [4]:
def dim_analysis_units(G, input_variables_array, output_variables_array=None):
    def strict_search(char, arr):
        for i in char:
            index = 0
            while arr[index:].find(i)>0:
                index = arr.find(i)
                if ((index > 0 and (arr[index-1] in ["*", "+", "/", "-", "(", ")"]) )
                    and (arr[index+len(i)] in ["*", "+", "/", "-", "(", ")"])): return True
        return False

    def strict_search_array(relation, arr):
        for i in arr:
            possible_variables = [x for x,y,z in G.edges(data = True) if z.get("HAS_UNIT") == True and y == i]
#             print(possible_variables, relation)
            if strict_search(possible_variables, relation) ==True: 
#                 print("found ", possible_variables, " in ", relation)
                continue
            else:
                return False
        return True
    
    possible_relations = [x for x,y in G.nodes(data = True) if y['type'] == "Subequation"]
    relations = possible_relations
    relations[:] = [relation for relation in possible_relations if strict_search_array(relation, input_variables_array)]
    if output_variables_array == None: return relations
    possible_outputs = [x for x,y,z in G.edges(data = True) if y ==output_variables_array and z.get("HAS_UNIT")==True]
    
    equation_filter = [(x,y,z) for x,y,z in G.edges(data = True) if z.get("IS_EQUAL") == True 
                       and x in relations and y in possible_outputs]
    return equation_filter


In [5]:
print(dim_analysis_units(G, [(0,0,1,0,0), (1,-1,0,0,0), (1,-1,0,0,0)], (1,-1,1,0,0)))

[('((m_0*v/sqrt(1-v**2/c**2)))', 'p', {'IS_EQUAL': True})]


We run dimensional analysis for all the equations, to see how many can be recovered by dimensional analysis of (1) only units of input variables and (2) units of both input and output variables

In [6]:
#testing dimensional analysis:
# print(all_mysteries)


overall_results_dim_analysis = []
for j in range(len(all_mysteries)):
#     print(all_mysteries['Formula'][j])
    results = []
    variables = []
    for i in range(10):
        new_var = all_mysteries["v%s_name" %(i+1)][j]
#         print(new_var)
        variables.append(new_var)
    variables = list(filter(lambda x: str(x) != "nan", variables))
    input_variable = [y for x,y,z in G.edges(data = True) if  x in variables and z.get("HAS_UNIT")==True]
    output_variable = [y for x,y,z in G.edges(data = True) if x == all_mysteries['Output'][j] and z.get("HAS_UNIT")==True]
    answer = all_mysteries['Formula'][j]
    results.append(answer)
    print('assessing formula: ', answer)
    simple_dim_analysis = dim_analysis_units(G, input_variable)
    results.append(len(simple_dim_analysis))
    print("simple dimensional analysis with no output has ", len(simple_dim_analysis), " possible answers")
    if len(simple_dim_analysis) == 1:
        print(answer, simple_dim_analysis[0])
        print("the length is one and the answer is ", simple_dim_analysis[0].find(answer)>0)
        results.append(simple_dim_analysis[0].find(answer)>0)
    else: results.append(None)
    full_dim_analysis = dim_analysis_units(G, input_variable, output_variable[0])
    results.append(len(full_dim_analysis))
    print("full dimensional analysis has ", len(full_dim_analysis), " possible answers")
    if len(full_dim_analysis) == 1:
        print("the length is one and the answer is ", full_dim_analysis[0][0].find(answer)>0)   
        results.append(full_dim_analysis[0][0].find(answer)>0)
    else: results.append(None)
    overall_results_dim_analysis.append(results)    
    #place into df
    
print(overall_results_dim_analysis)

assessing formula:  exp(-theta**2/2)/sqrt(2*pi)
simple dimensional analysis with no output has  36  possible answers
full dimensional analysis has  10  possible answers
assessing formula:  exp(-(theta/sigma)**2/2)/(sqrt(2*pi)*sigma)
simple dimensional analysis with no output has  36  possible answers
full dimensional analysis has  10  possible answers
assessing formula:  exp(-((theta-theta1)/sigma)**2/2)/(sqrt(2*pi)*sigma)
simple dimensional analysis with no output has  36  possible answers
full dimensional analysis has  10  possible answers
assessing formula:  sqrt((x2-x1)**2+(y2-y1)**2)
simple dimensional analysis with no output has  35  possible answers
full dimensional analysis has  6  possible answers
assessing formula:  G*m1*m2/((x2-x1)**2+(y2-y1)**2+(z2-z1)**2)
simple dimensional analysis with no output has  2  possible answers
full dimensional analysis has  1  possible answers
the length is one and the answer is  True
assessing formula:  m_0/sqrt(1-v**2/c**2)
simple dimensional

simple dimensional analysis with no output has  1  possible answers
1/(gamma-1)*pr*V ((1/(gamma-1)*pr*V))
the length is one and the answer is  True
full dimensional analysis has  1  possible answers
the length is one and the answer is  True
assessing formula:  n*kb*T/V
simple dimensional analysis with no output has  2  possible answers
full dimensional analysis has  1  possible answers
the length is one and the answer is  True
assessing formula:  n_0*exp(-m*g*x/(kb*T))
simple dimensional analysis with no output has  0  possible answers
full dimensional analysis has  0  possible answers
assessing formula:  h/(2*pi)*omega**3/(pi**2*c**2*(exp((h/(2*pi))*omega/(kb*T))-1))
simple dimensional analysis with no output has  1  possible answers
h/(2*pi)*omega**3/(pi**2*c**2*(exp((h/(2*pi))*omega/(kb*T))-1)) ((h/(2*pi)*omega**3/(pi**2*c**2*(exp((h/(2*pi))*omega/(kb*T))-1))))
the length is one and the answer is  True
full dimensional analysis has  1  possible answers
the length is one and the answ

full dimensional analysis has  1  possible answers
the length is one and the answer is  True
assessing formula:  g_*q*B/(2*m)
simple dimensional analysis with no output has  1  possible answers
g_*q*B/(2*m) ((g_*q*B/(2*m)))
the length is one and the answer is  True
full dimensional analysis has  1  possible answers
the length is one and the answer is  True
assessing formula:  q*h/(4*pi*m)
simple dimensional analysis with no output has  3  possible answers
full dimensional analysis has  1  possible answers
the length is one and the answer is  True
assessing formula:  g_*mom*B*Jz/(h/(2*pi))
simple dimensional analysis with no output has  1  possible answers
g_*mom*B*Jz/(h/(2*pi)) ((g_*mom*B*Jz/(h/(2*pi))))
the length is one and the answer is  True
full dimensional analysis has  1  possible answers
the length is one and the answer is  True
assessing formula:  n_0/(exp(mom*B/(kb*T))+exp(-mom*B/(kb*T)))
simple dimensional analysis with no output has  2  possible answers
full dimensional ana

In [7]:
# np.savetxt('dim_analysis.csv', overall_results_dim_analysis, delimiter=',', fmt = '%s') 