In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from IPython.display import display, Math, Latex

In [2]:
class RecursiveFormula(nn.Module):
    """
    Class used for representing formulas
    
    Attributes:
        depth
        num_variables
        powers
        lambdas - list of linear coefficients
        subformulas - list of subformulas of smaller depth, which are used for computing
        parameters - list of all learnable parameters of formula
        last_subformula - additional subformula, which is not multiplied by variable
    """
    
    def __init__(self, depth=0, num_variables=1):
        super(RecursiveFormula, self).__init__()
        self.depth = depth
        self.num_variables = num_variables
        self.powers = []
        self.lambdas = []
        self.subformulas = nn.ModuleList()
        # When depth is zero, formula is just a real number
        if depth == 0:
            new_lambda = nn.Parameter((2 * torch.randn((1, 1)))).requires_grad_(True)
            self.lambdas.append(new_lambda)
            self.register_parameter("lambda_const_{}".format(depth), new_lambda)
        else:
            for i in range(self.num_variables):
                # When depth is 1, we do not need to create subformulas, since they would be just real numbers
                if self.depth != 1:
                    subformula = RecursiveFormula(self.depth - 1, self.num_variables)
                    self.subformulas.append(subformula)
                new_lambda = nn.Parameter((2 * torch.randn((1, 1)))).requires_grad_(True)
                new_power = nn.Parameter((2 * torch.randn((1, 1)))).requires_grad_(True)
                self.register_parameter("lambda_{}".format(i), new_lambda)
                self.register_parameter("power_{}".format(i), new_power)
                self.lambdas.append(new_lambda)
                self.powers.append(new_power)
            self.last_subformula = RecursiveFormula(self.depth - 1, self.num_variables)
                                
    def forward(self, x):
        """
        Iterate over subformulas, recursively computing result using results of subformulas
        """
        # When depth is 0, we just return the corresponding number
        if self.depth == 0:
            return self.lambdas[0].repeat(x.shape[0], 1)
        
        ans = torch.zeros(x.shape[0], 1)
        for i in range(self.num_variables):
            x_powered = torch.t(x[:, i]**self.powers[i])
            subformula_result = torch.ones((x.shape[0], 1))
            # When depth is 1, we do not need to compute subformulas
            if self.depth != 1:
                subformula_result = self.subformulas[i](x)
            assert subformula_result.shape == (x.shape[0], 1)
            assert x_powered.shape == (x.shape[0], 1)
            ans += self.lambdas[i] * x_powered * subformula_result
            
        ans += self.last_subformula(x)
        return ans

    def __repr__(self):
        """
        Return tex-style string, recursively combining result from representation of subformulas
        """
        if self.depth == 0:
            return "{:.3}".format(self.lambdas[0].item())
        
        ans = ["("]
        for i in range(self.num_variables):
            if i != 0 and self.lambdas[i] > 0:
                ans.append(" + {:.3}".format(self.lambdas[i].item()))
            else:
                ans.append("{:.3}".format(self.lambdas[i].item()))            
            ans.append("x_{}^".format(i + 1) + "{" + "{:.3}".format(self.powers[i].item()) + "}")
            if self.depth != 1:
                ans.append(str(self.subformulas[i]))
        if self.last_subformula.lambdas[0] > 0:        
            ans.append(" + ")
        ans.append(str(self.last_subformula))
        ans.append(")")
        ans = ''.join(ans)
        return ans

In [3]:
def info(formula):
    print("depth: {}, number of variables: {}, total parameters: {}".format(
        formula.depth, formula.num_variables, len(formula.parameters)))
    
def PrintFormula(formula, mode="slow"):
#     info(network)
    if mode == "slow":
        display(Math(str(formula)))   
    else:
        print(formula)

Обучение

Пока модель быстро входит в локальный экстремум, и результат сильно зависит от начальной инициализации весов. До последнего свободного члена градиент не доходит. Буду пробовать разные оптимизаторы и гиперпараметры. Возможно, параметры powers и lambdas имеют разную природу, и надо их регуляризовать по отдельности с разными коэффициентами, которые тоже надо будет подобрать. Модели может быть выгодно просто делать степени большими по модулю отрицательными чистами. Еще надо добавить обучение по батчам.

In [17]:
def LearnFormula(X, y, depth=1, num_epochs=700, verbose=True, seed=13337):
#     torch.random.manual_seed(seed)
    formula = RecursiveFormula(depth, X.shape[1])
    # create your optimizer
    optimizer = optim.Rprop(formula.parameters(), lr=1e-3)
    criterion = nn.MSELoss()
    for epoch in range(num_epochs):
        optimizer.zero_grad()   # zero the gradient buffers
        output = formula(X)
        loss = criterion(output, y) 
        loss.backward()
        if verbose and (epoch + 1) % 20 == 0:
            print("Epoch {}, current loss {:.3}, current formula ".format(epoch + 1, loss.item()), end='')
            PrintFormula(formula, "fast")       
        optimizer.step()  
    return formula

In [12]:
X1 = torch.rand(100, 1) * 10
y1 = 2.5 * X1**2 + 3

X2 = torch.rand(100, 2) * 10
y2 = 2.5 * X2[:, 0]**2 + 0.333 * X2[:, 1]**0.5 + 3    
# y = 1.2 * X[:, 0]**2.1 * X[:, 2] + 2.5 * X[:, 1]**(-3) + 1/2 * X[:, 2]**0.3333 * X[:, 1]**(-4)

In [18]:
PrintFormula(LearnFormula(X1, y1))

Epoch 20, current loss 9.83e+03, current formula (0.579x_1^{-0.982}-2.67)
Epoch 40, current loss 4.42e+02, current formula (3.31x_1^{1.75} + 0.0637)
Epoch 60, current loss 3.49, current formula (3.42x_1^{1.86} + 0.167)
Epoch 80, current loss 3.15, current formula (3.39x_1^{1.86}-0.268)
Epoch 100, current loss 2.72, current formula (3.32x_1^{1.87}-0.0316)
Epoch 120, current loss 2.64, current formula (3.31x_1^{1.87}-0.00988)
Epoch 140, current loss 2.33, current formula (3.25x_1^{1.88} + 0.149)
Epoch 160, current loss 2.0, current formula (3.19x_1^{1.89} + 0.409)
Epoch 180, current loss 1.76, current formula (3.14x_1^{1.9} + 0.541)
Epoch 200, current loss 1.58, current formula (3.11x_1^{1.9} + 0.674)
Epoch 220, current loss 1.34, current formula (3.06x_1^{1.91} + 0.927)
Epoch 240, current loss 1.19, current formula (3.02x_1^{1.91} + 0.959)
Epoch 260, current loss 1.04, current formula (2.98x_1^{1.92} + 1.19)
Epoch 280, current loss 1.01, current formula (2.98x_1^{1.92} + 1.18)
Epoch 300

<IPython.core.display.Math object>

In [19]:
PrintFormula(LearnFormula(X2, y2))

Epoch 20, current loss 1.39e+04, current formula (-0.635x_1^{-1.12}-0.478x_2^{1.75}-0.466)
Epoch 40, current loss 6.52e+03, current formula (2.04x_1^{1.55} + 1.5x_2^{1.68} + 5.5)
Epoch 60, current loss 5.12e+03, current formula (1.7x_1^{1.21} + 1.53x_2^{0.667} + 63.8)
Epoch 80, current loss 5.05e+03, current formula (1.05x_1^{-0.0235} + 1.54x_2^{-0.027} + 79.9)
Epoch 100, current loss 5.05e+03, current formula (1.05x_1^{-0.0114} + 1.54x_2^{0.0136} + 79.7)
Epoch 120, current loss 5.05e+03, current formula (1.05x_1^{-0.000689} + 1.54x_2^{-0.000293} + 79.7)
Epoch 140, current loss 5.05e+03, current formula (1.05x_1^{5.35e-05} + 1.54x_2^{3.74e-06} + 79.7)
Epoch 160, current loss 5.05e+03, current formula (1.05x_1^{1.24e-06} + 1.54x_2^{5.94e-06} + 79.7)
Epoch 180, current loss 5.05e+03, current formula (1.05x_1^{2.44e-06} + 1.54x_2^{-1.63e-06} + 79.7)
Epoch 200, current loss 5.05e+03, current formula (1.05x_1^{2.44e-06} + 1.54x_2^{-1.63e-06} + 79.7)
Epoch 220, current loss 5.05e+03, current

<IPython.core.display.Math object>