In [1]:
import csv
import pandas as pd
import numpy as np
import networkx as nx
import sympy
import math
import matplotlib.pyplot as plt 
import knowledge_representation as KR
import itertools
import re

generating the knowledge graph that we previously built, to implement our applications on

In [2]:
G = KR.generate_graph(KR.import_equations(), KR.import_variables())
print([x for x in G.nodes()])
print([(x,y,z) for x,y,z in G.edges(data = True)])

['A', 'a', 'A_vec', 'alpha', 'B', 'beta', 'Bx', 'By', 'Bz', 'c', 'C', 'chi', 'd', 'D', 'd1', 'd2', 'delta', 'el_ct', 'E_n', 'E_den', 'Ef', 'epsilon', 'f', 'F', 'flux', 'foc', 'G', 'g', 'g_', 'gamma', 'H', 'h', 'hbar', 'H_G', 'I', 'I_0', 'I1', 'I2', 'Int', 'Int_0', 'j', 'Jz', 'K', 'k', 'k_f', 'k_G', 'k_spring', 'kappa', 'kb', 'L', 'L_rad', 'lambd', 'm', 'M', 'm_0', 'm1', 'm2', 'mob', 'mom', 'mu', 'mu_drift', 'Nn', 'n', 'n_rho', 'n_0', 'omega', 'omega_0', 'p', 'Pol', 'p_d', 'Pwr', 'pr', 'prob', 'q', 'q1', 'q2', 'r', 'R', 'r1', 'r2', 'rho', 'rho_c', 'rho_c_0', 'rho_0', 'sigma', 'sigma_den', 't', 'T', 't1', 'T1', 'T2', 'tau', 'theta', 'theta1', 'theta2', 'U', 'u', 'v', 'V', 'v1', 'V1', 'V2', 'Volt', 'w', 'x', 'x1', 'x2', 'x3', 'y', 'Y', 'y1', 'y2', 'y3', 'z', 'z1', 'z2', 'mu_S', 'L_ind', 'Z_1', 'Z_2', 'Length', 'Mass', 'Temperature', 'Voltage', 'Τime', (-3.0, 0.0, 0.0, 0.0, 0.0), (-3.0, 0.0, 1.0, 0.0, 0.0), (-2.0, 1.0, 0.0, 0.0, 1.0), (-2.0, 3.0, -1.0, 0.0, 2.0), (-2.0, 4.0, -1.0, 0.0, 2.0

Creating the list of formulas, which we use for testing

In [3]:
all_mysteries = KR.import_equations()

A second way to recover equations when the units of the equation are not known is brute force. The idea behind brute force is that we permute and substitute the input variables into the equation. We do not do apply brute force on all equations, only on equations that match the number of variables as that we have observed.

In [4]:
#count number of variables in the equation
          
            
def count_variables(G, formula):
    variable_list = [x for x,y in G.nodes(data = True) if y["type"] == "Variable"]
    parameter_list = []
    for i in variable_list:
        index = find_parameters(i, formula)
        if index > 0: parameter_list.append(i)
        while index > 0:
            formula = formula[:index:] + formula[index+len(i):]
            index = find_parameters(i, formula)
    return len(parameter_list), parameter_list


def find_parameters(parameter, formula):
    if formula.find(parameter) <0: return -1
    total = 0
    while formula.find(parameter)>-1:
        index = formula.find(parameter)
        total += index 
        if (formula[index-1] in ["*", "+", "/", "-", "(", ")"]) and (formula[index+len(parameter)] in ["*", "+", "/", "-", "(", ")"]): 
            return total
        else: 
            formula = formula[index+1:]
            total += 1
    return -1
        
    
def multiple_replacements(parameters, values, formula):
    for i in range(len(parameters)):
        index = find_parameters(parameters[i], formula)
        while find_parameters(parameters[i], formula)>=0:
            index = find_parameters(parameters[i], formula)
            formula = formula[:index] + str(values[i])+ formula[index+len(parameters[i]):]
    formula = formula.replace("sin", "np.sin") #note that sinh gets replaced as well
    formula = formula.replace("arcnp.sin", "np.arcsin")
    formula = formula.replace("cos", "np.cos")
    formula = formula.replace("tan", "np.tan")
    formula = formula.replace("sqrt", "np.sqrt")
    formula = formula.replace("exp", "np.exp")
    formula = formula.replace("ln", "np.log")
    formula = formula.replace("pi", "np.pi")
    return formula

# print(multiple_replacements(['c', 'm_0', 'v'], (1.7, 1.8, 2.1), "((m_0/sqrt(1-v**2/c**2)))" ))


def brute_force(G, variable_array, subeqns_to_test=None): 
    #can take in either a list or a graph, we need to test it
    if isinstance(subeqns_to_test, list) == True:
        subeqns = subeqns_to_test
    else: subeqns = [x for x,y in G.nodes(data = True) if y["type"] == "Subequation"]
    for i in subeqns:
        number_of_vars, parameter_list = count_variables(G, i)
        if number_of_vars == len(variable_array)-1: 
#             print("match in number of variables: ", i)
            for j in list(itertools.permutations(variable_array[:-1])):
#                 print("trying this set of j:", j)
                try:
                    new_formula = multiple_replacements(parameter_list, j, i)
                    if math.isclose(variable_array[-1], eval(new_formula), rel_tol=variable_array[-1]/10000, abs_tol=variable_array[-1]/10000): 
                        print("matches equation", i)
                        return i
                except:
                    continue
#     print("no equation matches the data")
    return None

            
# print(brute_force(G, [1.7, 2.1, 1.8, 1.408]))
# print(brute_force(G, [1.7, 2.1, 1.8, 1.408], ["((m_0/sqrt(1-v**2/c**2)))", "((m_0*v/sqrt(1-v**2/c**2)))"]))



We run brute force on all the equations to see how many of the equations are correctly identified

Note that tolerancing is an issue

In [5]:
overall_results_brute_force = []


    
for j in range(len(all_mysteries)):
# for j in range(60):
    #creating the test dataset 
    print("formula: ", all_mysteries["Formula"][j])
    counter, parameter_list = count_variables(G, "("+str(all_mysteries["Formula"][j])+")")
    dataset = []
    for i in range(counter): 
        dataset.append(i+np.random.rand(10)) #i want to have 10 test cases per formula to brute force on
    dataset = np.apply_along_axis(np.random.permutation, axis=1, arr=np.transpose(dataset)) #generate random vars
#     print(dataset)
    results = []
    for k in range(len(dataset)):
        results.append(eval(multiple_replacements(parameter_list, dataset[k], "("+all_mysteries["Formula"][j]+")")))
    dataset = np.hstack((dataset,np.transpose([np.asarray(results, dtype=np.float32)]))) # generate answers
    
    correct = 0
    total = 0
    for k in range(len(dataset)):
#         print("dataset: ",  k)
        if str(brute_force(G, dataset[k], subeqns_to_test=None)).find(str(all_mysteries["Formula"][j]))>-1:
            correct+=1
        total +=1
    print(correct, " out of ", total)
    overall_results_brute_force.append([all_mysteries["Formula"][j], correct, total])
    
    
print(overall_results_brute_force)

formula:  exp(-theta**2/2)/sqrt(2*pi)
matches equation ((exp(-theta**2/2)/sqrt(2*pi)))
matches equation ((exp(-theta**2/2)/sqrt(2*pi)))
matches equation ((exp(-theta**2/2)/sqrt(2*pi)))
matches equation ((exp(-theta**2/2)/sqrt(2*pi)))
matches equation ((exp(-theta**2/2)/sqrt(2*pi)))
matches equation ((exp(-theta**2/2)/sqrt(2*pi)))
matches equation ((exp(-theta**2/2)/sqrt(2*pi)))
matches equation ((exp(-theta**2/2)/sqrt(2*pi)))
matches equation ((exp(-theta**2/2)/sqrt(2*pi)))
matches equation ((exp(-theta**2/2)/sqrt(2*pi)))
10  out of  10
formula:  exp(-(theta/sigma)**2/2)/(sqrt(2*pi)*sigma)
matches equation ((exp(-(theta/sigma)**2/2)/(sqrt(2*pi)*sigma)))
matches equation ((exp(-(theta/sigma)**2/2)/(sqrt(2*pi)*sigma)))
matches equation ((exp(-(theta/sigma)**2/2)/(sqrt(2*pi)*sigma)))
matches equation ((exp(-(theta/sigma)**2/2)/(sqrt(2*pi)*sigma)))
matches equation ((exp(-(theta/sigma)**2/2)/(sqrt(2*pi)*sigma)))
matches equation ((exp(-(theta/sigma)**2/2)/(sqrt(2*pi)*sigma)))
matches equat

  """Entry point for launching an IPython kernel.
  """Entry point for launching an IPython kernel.


matches equation ((1/2*m*(v**2+u**2+w**2)))
matches equation ((1/2*m*(v**2+u**2+w**2)))
matches equation ((1/2*m*(v**2+u**2+w**2)))
matches equation ((1/2*m*(v**2+u**2+w**2)))
matches equation ((1/2*m*(v**2+u**2+w**2)))
matches equation ((1/2*m*(v**2+u**2+w**2)))
matches equation ((1/2*m*(v**2+u**2+w**2)))
matches equation ((1/2*m*(v**2+u**2+w**2)))
matches equation ((1/2*m*(v**2+u**2+w**2)))
10  out of  10
formula:  G*m1*m2*(1/r2-1/r1)
matches equation ((G*m1*m2*(1/r2-1/r1)))
1  out of  10
formula:  m*g*z
matches equation ((m*g*z))
matches equation ((m*g*z))
matches equation ((m*g*z))
matches equation ((m*g*z))
matches equation ((m*g*z))
matches equation ((m*g*z))
matches equation ((m*g*z))
matches equation ((m*g*z))
matches equation ((m*g*z))
matches equation ((m*g*z))
10  out of  10
formula:  1/2*k_spring*x**2
matches equation ((1/2*k_spring*x**2))
matches equation ((1/2*k_spring*x**2))
matches equation ((1/2*k_spring*x**2))
matches equation ((1/2*k_spring*x**2))
matches equation ((

matches equation (((1/2*epsilon*c*Ef**2)*(8*pi*r**2/3)*(omega**4/(omega**2-omega_0**2)**2)))
matches equation (((1/2*epsilon*c*Ef**2)*(8*pi*r**2/3)*(omega**4/(omega**2-omega_0**2)**2)))
matches equation (((1/2*epsilon*c*Ef**2)*(8*pi*r**2/3)*(omega**4/(omega**2-omega_0**2)**2)))
matches equation (((1/2*epsilon*c*Ef**2)*(8*pi*r**2/3)*(omega**4/(omega**2-omega_0**2)**2)))
matches equation (((1/2*epsilon*c*Ef**2)*(8*pi*r**2/3)*(omega**4/(omega**2-omega_0**2)**2)))
matches equation (((1/2*epsilon*c*Ef**2)*(8*pi*r**2/3)*(omega**4/(omega**2-omega_0**2)**2)))
10  out of  10
formula:  q*v*B/p
matches equation ((q*v*B/p))
matches equation ((q*v*B/p))
matches equation ((q*v*B/p))
matches equation ((q*v*B/p))
matches equation ((q*v*B/p))
matches equation ((q*v*B/p))
matches equation ((q*v*B/p))
matches equation ((q*v*B/p))
matches equation ((q*v*B/p))
matches equation ((q*v*B/p))
10  out of  10
formula:  omega_0/(1-v/c)
matches equation ((omega_0/(1-v/c)))
matches equation ((omega_0/(1-v/c)))
matc

matches equation ((kappa*(T2-T1)*A/d))
6  out of  10
formula:  Pwr/(4*pi*r**2)
matches equation ((Pwr/(4*pi*r**2)))
matches equation ((Pwr/(4*pi*r**2)))
matches equation ((Pwr/(4*pi*r**2)))
matches equation ((Pwr/(4*pi*r**2)))
matches equation ((Pwr/(4*pi*r**2)))
matches equation ((Pwr/(4*pi*r**2)))
matches equation ((Pwr/(4*pi*r**2)))
matches equation ((Pwr/(4*pi*r**2)))
matches equation ((Pwr/(4*pi*r**2)))
matches equation ((Pwr/(4*pi*r**2)))
10  out of  10
formula:  q/(4*pi*epsilon*r)
matches equation ((q/(4*pi*epsilon*r)))
matches equation ((q/(4*pi*epsilon*r)))
matches equation ((q/(4*pi*epsilon*r)))
matches equation ((q/(4*pi*epsilon*r)))
matches equation ((q/(4*pi*epsilon*r)))
matches equation ((q/(4*pi*epsilon*r)))
matches equation ((q/(4*pi*epsilon*r)))
matches equation ((q/(4*pi*epsilon*r)))
matches equation ((q/(4*pi*epsilon*r)))
matches equation ((q/(4*pi*epsilon*r)))
10  out of  10
formula:  1/(4*pi*epsilon)*p_d*cos(theta)/r**2
matches equation ((1/(4*pi*epsilon)*p_d*cos(t

matches equation ((q*v/(2*pi*r)))
matches equation ((q*v/(2*pi*r)))
matches equation ((q*v/(2*pi*r)))
matches equation ((q*v/(2*pi*r)))
10  out of  10
formula:  q*v*r/2
matches equation ((q*v*r/2))
matches equation ((q*v*r/2))
matches equation ((q*v*r/2))
matches equation ((q*v*r/2))
matches equation ((q*v*r/2))
matches equation ((q*v*r/2))
matches equation ((q*v*r/2))
matches equation ((q*v*r/2))
matches equation ((q*v*r/2))
matches equation ((q*v*r/2))
10  out of  10
formula:  g_*q*B/(2*m)
matches equation ((g_*q*B/(2*m)))
matches equation ((g_*q*B/(2*m)))
matches equation ((g_*q*B/(2*m)))
matches equation ((g_*q*B/(2*m)))
matches equation ((g_*q*B/(2*m)))
matches equation ((g_*q*B/(2*m)))
matches equation ((g_*q*B/(2*m)))
matches equation ((g_*q*B/(2*m)))
matches equation ((g_*q*B/(2*m)))
matches equation ((g_*q*B/(2*m)))
10  out of  10
formula:  q*h/(4*pi*m)
matches equation ((q*h/(4*pi*m)))
matches equation ((q*h/(4*pi*m)))
matches equation ((q*h/(4*pi*m)))
matches equation ((q*h/

  """Entry point for launching an IPython kernel.


matches equation ((n_0/(exp(mom*B/(kb*T))+exp(-mom*B/(kb*T)))))
matches equation ((n_0/(exp(mom*B/(kb*T))+exp(-mom*B/(kb*T)))))
matches equation ((n_0/(exp(mom*B/(kb*T))+exp(-mom*B/(kb*T)))))
matches equation ((n_0/(exp(mom*B/(kb*T))+exp(-mom*B/(kb*T)))))
matches equation ((n_0/(exp(mom*B/(kb*T))+exp(-mom*B/(kb*T)))))
matches equation ((n_0/(exp(mom*B/(kb*T))+exp(-mom*B/(kb*T)))))
matches equation ((n_0/(exp(mom*B/(kb*T))+exp(-mom*B/(kb*T)))))
matches equation ((n_0/(exp(mom*B/(kb*T))+exp(-mom*B/(kb*T)))))
matches equation ((n_0/(exp(mom*B/(kb*T))+exp(-mom*B/(kb*T)))))
10  out of  10
formula:  n_rho*mom*tanh(mom*B/(kb*T))
matches equation ((n_rho*mom*tanh(mom*B/(kb*T))))
matches equation ((n_rho*mom*tanh(mom*B/(kb*T))))
matches equation ((n_rho*mom*tanh(mom*B/(kb*T))))
matches equation ((n_rho*mom*tanh(mom*B/(kb*T))))
matches equation ((n_rho*mom*tanh(mom*B/(kb*T))))
matches equation ((n_rho*p_d**2*Ef/(3*kb*T)))
matches equation ((n_rho*mom*tanh(mom*B/(kb*T))))
matches equation ((n_rho

matches equation ((I_0*(exp(q*Volt/(kb*T))-1)))
matches equation ((I_0*(exp(q*Volt/(kb*T))-1)))
8  out of  10
formula:  2*U*(1-cos(k*d))
matches equation ((2*U*(1-cos(k*d))))
matches equation ((2*U*(1-cos(k*d))))
matches equation ((2*U*(1-cos(k*d))))
matches equation ((2*U*(1-cos(k*d))))
matches equation ((2*U*(1-cos(k*d))))
matches equation ((2*U*(1-cos(k*d))))
matches equation ((2*U*(1-cos(k*d))))
matches equation ((2*U*(1-cos(k*d))))
matches equation ((2*U*(1-cos(k*d))))
matches equation ((2*U*(1-cos(k*d))))
10  out of  10
formula:  (h/(2*pi))**2/(2*E_n*d**2)
matches equation (((h/(2*pi))**2/(2*E_n*d**2)))
matches equation (((h/(2*pi))**2/(2*E_n*d**2)))
matches equation (((h/(2*pi))**2/(2*E_n*d**2)))
matches equation (((h/(2*pi))**2/(2*E_n*d**2)))
matches equation (((h/(2*pi))**2/(2*E_n*d**2)))
matches equation (((h/(2*pi))**2/(2*E_n*d**2)))
matches equation (((h/(2*pi))**2/(2*E_n*d**2)))
matches equation (((h/(2*pi))**2/(2*E_n*d**2)))
matches equation (((h/(2*pi))**2/(2*E_n*d**2)))

In [6]:
np.savetxt('brute_force.csv', overall_results_brute_force, delimiter=',', fmt = '%s') 