In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from IPython.display import display, Math, Latex

In [2]:
class RecursiveFormula(nn.Module):
    """
    Class used for representing formulas
    
    Attributes:
        depth
        num_variables
        powers
        lambdas - list of linear coefficients
        subformulas - list of subformulas of smaller depth, which are used for computing
        parameters - list of all learnable parameters of formula
        last_subformula - additional subformula, which is not multiplied by variable
    """
    
    def __init__(self, depth=0, num_variables=1):
        super(RecursiveFormula, self).__init__()
        self.depth = depth
        self.num_variables = num_variables
        self.powers = []
        self.lambdas = []
        self.subformulas = []
        self.parameters = []
        # When depth is zero, formula is just a real number
        if depth == 0:
            new_lambda = (2 * torch.randn(1, 1)).requires_grad_(True)
            self.lambdas.append(new_lambda)
            self.parameters.append(new_lambda)
        else:
            for i in range(self.num_variables):
                # When depth is 1, we do not need to create subformulas, since they would be just real numbers
                if self.depth != 1:
                    subformula = RecursiveFormula(self.depth - 1, self.num_variables)
                    self.subformulas.append(subformula)
                    self.parameters.extend(subformula.parameters)
                new_lambda = (2 * torch.randn(1, 1)).requires_grad_(True)
                new_power = (2 * torch.randn(1, 1)).requires_grad_(True)
                self.lambdas.append(new_lambda)
                self.powers.append(new_power)
                self.parameters.extend([new_power, new_lambda])
            subformula = RecursiveFormula(self.depth - 1, self.num_variables)
            self.last_subformula = subformula
            self.parameters.extend(subformula.parameters)
                                
    def forward(self, x):
        """
        Iterate over subformulas, recursively computing result using results of subformulas
        """
        # When depth is 0, we just return the corresponding number
        if self.depth == 0:
            return torch.full((x.shape[0], 1), self.lambdas[0].item())
        
        ans = torch.zeros(x.shape[0], 1)
        for i in range(self.num_variables):
            x_powered = torch.t(x[:, i]**self.powers[i])
            subformula_result = torch.ones((x.shape[0], 1))
            # When depth is 1, we do not need to compute subformulas
            if self.depth != 1:
                subformula_result = self.subformulas[i](x)
            assert subformula_result.shape == (x.shape[0], 1)
            assert x_powered.shape == (x.shape[0], 1)
            ans += self.lambdas[i] * x_powered * subformula_result
            
        ans += self.last_subformula(x)
        return ans

    def __repr__(self):
        """
        Return tex-style string, recursively combining result from representation of subformulas
        """
        if self.depth == 0:
            return "{:.3}".format(self.lambdas[0].item())
        
        ans = ["("]
        for i in range(self.num_variables):
            if i != 0 and self.lambdas[i] > 0:
                ans.append(" + {:.3}".format(self.lambdas[i].item()))
            else:
                ans.append("{:.3}".format(self.lambdas[i].item()))            
            ans.append("x_{}^".format(i + 1) + "{" + "{:.3}".format(self.powers[i].item()) + "}")
            if self.depth != 1:
                ans.append(str(self.subformulas[i]))
        if self.last_subformula.lambdas[0] > 0:        
            ans.append(" + ")
        ans.append(str(self.last_subformula))
        ans.append(")")
        ans = ''.join(ans)
        return ans

In [3]:
def info(formula):
    print("depth: {}, number of variables: {}, total parameters: {}".format(
        formula.depth, formula.num_variables, len(formula.parameters)))
    
def PrintFormula(formula, mode="slow"):
#     info(network)
    if mode == "slow":
        display(Math(str(formula)))   
    else:
        print(formula)

Обучение

Пока модель быстро входит в локальный экстремум, и результат сильно зависит от начальной инициализации весов. До последнего свободного члена градиент не доходит. Буду пробовать разные оптимизаторы и гиперпараметры. Возможно, параметры powers и lambdas имеют разную природу, и надо их регуляризовать по отдельности с разными коэффициентами, которые тоже надо будет подобрать. Модели может быть выгодно просто делать степени большими по модулю отрицательными чистами. Еще надо добавить обучение по батчам.

In [4]:
def LearnFormula(X, y, depth=1, num_epochs=3000, verbose=True):
    formula = RecursiveFormula(depth, X.shape[1])
    # create your optimizer
    optimizer = optim.Rprop(formula.parameters)
    criterion = nn.MSELoss()
    for epoch in range(num_epochs):
        optimizer.zero_grad()   # zero the gradient buffers
        output = formula(X)
        loss = criterion(output, y) 
        loss.backward()
        if verbose and (epoch + 1) % 200 == 0:
            print("Epoch {}, current loss {:.3}, current formula ".format(epoch + 1, loss.item()), end='')
            PrintFormula(formula, "fast")       
        optimizer.step()  
    return formula

In [5]:
X1 = torch.rand(100, 1) * 10
y1 = 2.5 * X1**2 + 3

X2 = torch.rand(100, 2) * 10
y2 = 2.5 * X2[:, 0]**2 + 0.333 * X2[:, 1]**0.5 + 3    
# y = 1.2 * X[:, 0]**2.1 * X[:, 2] + 2.5 * X[:, 1]**(-3) + 1/2 * X[:, 2]**0.3333 * X[:, 1]**(-4)

In [6]:
PrintFormula(LearnFormula(X1, y1))

Epoch 200, current loss 8.55, current formula (2.14x_1^{2.08} + 0.968)
Epoch 400, current loss 1.73, current formula (2.58x_1^{1.99} + 0.968)
Epoch 600, current loss 1.03, current formula (2.73x_1^{1.96} + 0.968)
Epoch 800, current loss 0.952, current formula (2.78x_1^{1.96} + 0.968)
Epoch 1000, current loss 0.948, current formula (2.8x_1^{1.95} + 0.968)
Epoch 1200, current loss 0.947, current formula (2.8x_1^{1.95} + 0.968)
Epoch 1400, current loss 0.947, current formula (2.8x_1^{1.95} + 0.968)
Epoch 1600, current loss 0.947, current formula (2.8x_1^{1.95} + 0.968)
Epoch 1800, current loss 0.947, current formula (2.8x_1^{1.95} + 0.968)
Epoch 2000, current loss 0.947, current formula (2.8x_1^{1.95} + 0.968)
Epoch 2200, current loss 0.947, current formula (2.8x_1^{1.95} + 0.968)
Epoch 2400, current loss 0.947, current formula (2.8x_1^{1.95} + 0.968)
Epoch 2600, current loss 0.947, current formula (2.8x_1^{1.95} + 0.968)
Epoch 2800, current loss 0.947, current formula (2.8x_1^{1.95} + 0.

<IPython.core.display.Math object>

In [8]:
PrintFormula(LearnFormula(X2, y2))

Epoch 200, current loss 6.11e+03, current formula (85.2x_1^{6.77e-08} + 6.85x_2^{8.79e-06} + 2.81)
Epoch 400, current loss 6.11e+03, current formula (85.2x_1^{6.77e-08} + 6.85x_2^{6.8e-06} + 2.81)
Epoch 600, current loss 6.11e+03, current formula (85.2x_1^{6.77e-08} + 6.85x_2^{6.8e-06} + 2.81)
Epoch 800, current loss 6.11e+03, current formula (85.2x_1^{6.77e-08} + 6.85x_2^{6.8e-06} + 2.81)
Epoch 1000, current loss 6.11e+03, current formula (85.2x_1^{6.77e-08} + 6.85x_2^{6.8e-06} + 2.81)
Epoch 1200, current loss 6.11e+03, current formula (85.2x_1^{6.77e-08} + 6.85x_2^{6.8e-06} + 2.81)
Epoch 1400, current loss 6.11e+03, current formula (85.2x_1^{6.77e-08} + 6.85x_2^{6.8e-06} + 2.81)
Epoch 1600, current loss 6.11e+03, current formula (85.2x_1^{6.77e-08} + 6.85x_2^{6.8e-06} + 2.81)
Epoch 1800, current loss 6.11e+03, current formula (85.2x_1^{6.77e-08} + 6.85x_2^{6.8e-06} + 2.81)
Epoch 2000, current loss 6.11e+03, current formula (85.2x_1^{6.77e-08} + 6.85x_2^{6.8e-06} + 2.81)
Epoch 2200, c

<IPython.core.display.Math object>