In [2]:
import numpy as np 
import sympy as sp
from IPython.display import Latex
from sklearn.feature_extraction import DictVectorizer
from collections import defaultdict

In [3]:
def decay_an_element(mol):
    """
    mol: a tuple is an element with it's index
    return a Latex string
    """
    if mol[-1]==1:
        return r"\text{" + mol[0] + r"}"
    return r"\text{" + mol[0] + r"}_" + str(mol[-1])

In [4]:
def decay_an_group_element(mol):
    """
    mol: a tuple is an group of chemical element with it's index
    return a Latex string
    """
    if type(mol[0]) == type("str"):
        return decay_an_element(mol)
    latex_string = ""
    for item in mol[0]:
        latex_string += decay_an_group_element(item)
    if mol[-1] == 1:
        return latex_string
    return "(" + latex_string + ")_" + str(mol[-1])

In [5]:
def chemical_formula(mol):
    """
    mol: a list of group chemistry element
    return a Latex string
    """
    latex_string = ""
    for item in mol:
        latex_string += decay_an_group_element(item)
    return latex_string

In [6]:
def get_list_element_in_group(mol):
    """
    mol: a tuple of group chemistry element
    return a list of element and it's index
    """
    lst = list()
    if type(mol[0]) == type("str"):
        return tuple(mol)
    for item in mol[0]:
        item = list(item)
        item[-1] *= mol[-1]
        item = tuple(item)
        lst.append(get_list_element_in_group(item))
    return lst

In [7]:
def flatten(x):
    """
    x: a complex list need to flatten
    return a flatten of x
    """
    result = []
    for el in x:
        if hasattr(el, "__iter__") and not isinstance(el, str):
            result.extend(flatten(el))
        else:
            result.append(el)
    return result

def get_list_element_chemical_formula(mol):
    """
    mol: a list of group chemistry element
    return a list of element and it's index
    """
    lst = list()
    for item in mol:
        lst.append(get_list_element_in_group(item))
    flat = flatten(lst)
    d = defaultdict(list)
    for x,y in zip (flat[::2], flat[1::2]):
        d[x].append(y)
    d = dict(d)
    for x in d.keys():
        rs = 0
        for _ in d[x]:
            rs += _
        d[x] = rs
    return d


In [8]:
def balance_chemical_equation(lhs, rhs):
    lhs_dict = [get_list_element_chemical_formula(item) for item in lhs ]
    rhs_dict = [get_list_element_chemical_formula(item) for item in rhs ]
    for dict_item in rhs_dict:
        for t in dict_item:
            dict_item[t] *= -1  
    total_list = lhs_dict + rhs_dict
    dictvectorizer = DictVectorizer(sparse=False)
    features = dictvectorizer.fit_transform(total_list).T
    features = np.array(features).astype(int)
    mtrx = sp.Matrix(features)
    coeffs = mtrx.nullspace()[0]
    coeffs *= sp.lcm([term.q for term in coeffs])
    lhs_sol = ""
    rhs_sol = ""
    for i in range(0, len(lhs)):
        if coeffs[i] == 1:
            if i == len(lhs)-1:
                lhs_sol += chemical_formula(lhs[i]) + r" \to "
            else:
                lhs_sol += chemical_formula(lhs[i]) + r" + "
        else:
            if i == len(lhs)-1:
                lhs_sol += str(coeffs[i]) + chemical_formula(lhs[i]) + r" \to "
            else:
                lhs_sol += str(coeffs[i]) + chemical_formula(lhs[i]) + r" + "
    for i in range(0, len(rhs)):
        if coeffs[i + len(lhs)] == 1:
            if i == len(rhs)-1:
                rhs_sol += chemical_formula(rhs[i]) + r" "
            else:
                rhs_sol += chemical_formula(rhs[i]) + r" + "
        else:
            if i == len(rhs)-1:
                rhs_sol += str(coeffs[i + len(lhs)]) + chemical_formula(rhs[i]) + r" "
            else:
                rhs_sol += str(coeffs[i + len(lhs)]) + chemical_formula(rhs[i]) + r" + "
    return lhs_sol + rhs_sol

In [9]:
H2 = [("H", 2)]
O2 = [("O", 2)]
H2O = [("H", 2), ("O", 1)]

sol = balance_chemical_equation([H2, O2], [H2O])
Latex("$$" + sol + "$$")


<IPython.core.display.Latex object>

In [10]:
Al_OH3 = [("Al", 1), ([("O", 1), ("H", 1)], 3)]
H2_SO4 = [("H", 2), ([("S", 1), ("O", 4)], 1)]
Al2_SO43 = [("Al", 2), ([("S", 1), ("O", 4)], 3)]
H2O = [("H", 2), ("O", 1)]

equation = balance_chemical_equation([Al_OH3, H2_SO4], [Al2_SO43, H2O])
Latex("$$" + equation + "$$")

<IPython.core.display.Latex object>

In [11]:
Na_OH = [("Na", 1), ([("O", 1), ("H", 1)], 1)]
HCl = [("H", 1), ("Cl", 1)]
Na_Cl = [("Na", 1), ("Cl", 1)]
H2O = [("H", 2), ("O", 1)]
equation = balance_chemical_equation([Na_OH, HCl], [Na_Cl, H2O])
Latex("$$" + equation + "$$")

<IPython.core.display.Latex object>

In [12]:
Al = [("Al", 1)]
HNO3 = [("H", 1), ([("N", 1), ("O", 3)], 1)]
NO = [("N", 1), ("O", 1)]
NH4NO3 = [([("N", 1),("H", 4)], 1), ([("N", 1), ("O", 3)], 1)]
AlNO33 = [("Al", 1), ([("N", 1), ("O", 3)], 3)]
H2O = [("H", 2), ("O", 1)]
equation = balance_chemical_equation([Al, HNO3], [AlNO33, H2O, NH4NO3])
Latex("$$" + equation + "$$")

<IPython.core.display.Latex object>

In [13]:
Fe3_O4 = [("Fe", 3), ("O", 4)]
HCl = [("H", 1), ("Cl", 1)]
FeCl2 = [("Fe", 1), ("Cl", 2)]
FeCl3 = [("Fe", 1), ("Cl", 3)]
H2O = [("H", 2), ("O", 1)]

equation = balance_chemical_equation([Fe3_O4, HCl], [FeCl3, FeCl2, H2O])
Latex("$$" + equation + "$$")

<IPython.core.display.Latex object>

In [14]:
CH3_CHO = [([("C", 1), ("H", 3)], 1), ([("C", 1), ("H", 1), ("O", 1)], 1)]
Ag_NH3_2_OH = [("Ag", 1), ([("N", 1), ("H", 3)], 2), ([("O", 1), ("H", 1)], 1)]
CH3COONH4 = [("C",1),("H",3),("C",1),("O",1),("O",1),("N",1),("H",4)]
Ag = [("Ag", 1)]
H2O = [("H", 2), ("O", 1)]
NH3 = [("N", 1), ("H", 3)]
equation = balance_chemical_equation([CH3_CHO, Ag_NH3_2_OH], [Ag, NH3, CH3COONH4, H2O])
Latex("$$" + equation + "$$")

<IPython.core.display.Latex object>