In [33]:
import numpy as np

from invargen.data.expression import *
from dso import DeepSymbolicRegressor
from dso.library import Token, HardCodedConstant
from dso import functions
from invargen.models.alpha_pool import AlphaPool
from invargen.utils import reseed_everything
from invargen_generic.operators import funcs as generic_funcs
from invargen_generic.features import *

import tensorflow as tf

In [34]:
def prime_numbers(n):
    primes = []
    for i in range(2, n + 1):
        for j in range(2, int(i ** 0.5) + 1):
            if i%j == 0:
                break
        else:
            primes.append(i)
    return primes

prime_list = prime_numbers(50)


In [35]:
# this is GSO tokens

funcs = {func.name: Token(complexity=1, **func._asdict()) for func in generic_funcs}

#for v in prime_list:
    #funcs[f'Constant({v})'] = HardCodedConstant(name=f'Constant({v})', value=v)
    
for i, feature in enumerate(['a', 'b', 'c']):
    funcs[f'x{i+1}'] = Token(name=feature, arity=0, complexity=1, function=None, input_var=i)

In [36]:
def const(cls):
    
    def _calc(a):
        
        n = len(a)
    
        return np.array([f'Mul(Constant({str(cls)}),{a[i]})' for i in range(n)])

    return _calc
        
const_ops = [i for i in range(4, 5)]
    
#funcs = {}
# Annotate unprotected ops
for op in const_ops:
    
    funcs[str(op)] = Token(function=const(str(op)), name=str(op), arity=1, complexity=1)

In [37]:
list(funcs.keys())

['Add', 'Sub', 'Mul', 'x1', 'x2', 'x3', '4']

In [20]:
seed = 4

import torch
from torch import Tensor

device = torch.device('cpu')

from invargen_qlib.poly_data import PolyData

data = PolyData(device=device)

data_1 = PolyData(device=device)

data_2 = PolyData(device=device)

data_3 = PolyData(device=device)

data_4 = PolyData(device=device)

In [21]:
# first transform under (0, 1//-1, 0)

data_1.data[0][[0, 2]] = data_1.data[0][[2, 0]]

data_1.data[0][[1]] = -data_1.data[0][[1]]

data_1.data

tensor([[[  1.,   2.,   2.,  ...,  51.,  55.,  55.],
         [ -0.,  -1.,  -0.,  ...,  -1.,  35., -35.],
         [  1.,   1.,   1.,  ...,  49.,  51.,  51.]]])

In [22]:
# second transform under (1, 1//0, 1)

data_2.data[0][[2]] = data_2.data[0][[2]] + 1 * data_2.data[0][[1]] + data_2.data[0][[0]]

data_2.data[0][[1]] = data_2.data[0][[1]] + data_2.data[0][[0]] * 2

data_2.data

tensor([[[  1.,   1.,   1.,  ...,  49.,  51.,  51.],
         [  2.,   3.,   2.,  ...,  99.,  67., 137.],
         [  2.,   4.,   3.,  ..., 101.,  71., 141.]]])

In [23]:
# second transform under (1, 1//0, 1)

data_3.data = data.data * 2

data_3.data

tensor([[[  2.,   2.,   2.,  ...,  98., 102., 102.],
         [  0.,   2.,   0.,  ...,   2., -70.,  70.],
         [  2.,   4.,   4.,  ..., 102., 110., 110.]]])

In [24]:
# second transform under (1, 1//0, 1)

data_4.data = data.data * 4

data_4.data

tensor([[[   4.,    4.,    4.,  ...,  196.,  204.,  204.],
         [   0.,    4.,    0.,  ...,    4., -140.,  140.],
         [   4.,    8.,    8.,  ...,  204.,  220.,  220.]]])

In [38]:
X = np.array([['a', 'b', 'c']])
y = np.array([[1]])

functions.function_map = funcs

pool = AlphaPool(capacity=10, poly_data=data, ic_lower_bound=None)

In [39]:
b = Feature(FeatureType.B)
c = Feature(FeatureType.C)
a = Feature(FeatureType.A)

exprs = b**2 - 4 * a * c

print(repr(exprs))

exprs.evaluate(data_4) * exprs.evaluate(data) == exprs.evaluate(data_3) **2

Sub(Pow($b,Constant(2)),Mul(Mul(Constant(4),$a),$c))


tensor([[True, True, True,  ..., True, True, True]])

In [42]:
from invargen.utils.correlation import batch_pearsonr, batch_spearmanr
from typing import Tuple, Optional

capacity = 20

exprs = [None for _ in range(capacity + 1)]
values = [None for _ in range(capacity + 1)]
single_ics = np.zeros(capacity+1)
mutual_ics = np.identity(capacity+1)
weights = np.zeros(capacity + 1)

size = 0


def _calc_ics(factor, ic_mut_threshold):
    
    global size, values
        
    mutual_ics = []
    
    for i in range(size):
        
        mutual_ic = batch_pearsonr(factor, values[i]).mean().item()
        
        if mutual_ic > ic_mut_threshold:
            
            return None
        
        mutual_ics.append(mutual_ic)
        
    return mutual_ics


def _add_factor(expr, factor, ic_ret, ic_mut):
    
    global size, exprs, values, single_ics, mutual_ics, weights
    
    n = size
    
    exprs[n] = expr
    
    values[n] = factor
    
    single_ics[n] = ic_ret
    
    for i in range(n):
        
        mutual_ics[i][n] = ic_mut[i]
        
    size += 1
    
    weights[n] = ic_ret
    

def _optimize():
    
    global size, weights
    
    return weights[:size]


def _pop():
    
    global size, capacity, weights
    
    if size <= capacity: return
    
    idx = np.argmin(np.abs(weights))
    
    _swap_idx(idx, capacity)
    
    size = capacity
    

def _swap_idx(i, j):
    
    global size, exprs, values, single_ics, mutual_ics, weights
    
    exprs[i], exprs[j] = exprs[j], exprs[i]
    
    values[i], values[j] = values[j], values[i]
    
    single_ics[i], single_ics[j] = single_ics[j], single_ics[i]
    
    mutual_ics[:, [i, j]] = mutual_ics[:, [j, i]] 
    
    mutual_ics[[i, j], :] = mutual_ics[[j, i], :] 
    
    weights[i], weights[j] = weights[j], weights[i]
    
    


class Ev:
    
    global data, data_1, data_2, data_3, data_4, exprs, weights
    
    def __init__(self, pool):
        self.cnt = 0
        self.pool = pool
        self.results = {}

    def alpha_ev_fn(self, key):
        
        expr = eval(key)
        
        #print(expr)
                
        factor = expr.evaluate(data)

        factor_1 = expr.evaluate(data_1)

        factor_2 = expr.evaluate(data_2)
        
        factor_3 = expr.evaluate(data_3)

        factor_4 = expr.evaluate(data_4)
        
        ic = torch.sum(factor_4 * factor == factor_3 ** 2) / data.n_polys
        
        ic = torch.sum(factor_4 * factor == torch.pow(factor_3, 2)) / data.n_polys
    
        if ic.item() < 1: return 0
                
                
        #ic_ret = ic.item() * (0.5 * torch.sum((factor == factor_1) & (factor != 0)) / data.n_polys \
        #+ 0.5 * torch.sum((factor == factor_2) & (factor != 0)) / data.n_polys).item()
    
        ic_ret = min(batch_pearsonr(factor, factor_1).mean().item(), batch_pearsonr(factor, factor_2).mean().item())

        ic_mut = _calc_ics(factor, ic_mut_threshold=0.99) # if factor is highly correlated to any of the factors in memory return 0

        if ic_ret is None or ic_mut is None:

            #print(expr, ic_ret)

            return 0


        _add_factor(expr, factor, ic_ret, ic_mut)

        if size > 1:

            new_weights = _optimize()

            worst_idx = np.argmin(np.abs(new_weights))

            if worst_idx != capacity:

                weights[:size] = new_weights

                print(f"[Pool +] {expr}")

                if size > capacity:

                    print(f"[Pool -] {exprs[worst_idx]}")


            _pop()

    
        return ic_ret

ev = Ev(pool)


config = dict(
    task=dict(
        task_type='regression',
        function_set=list(funcs.keys()),
        metric='alphagen',
        metric_params=[lambda key: ev.alpha_ev_fn(key)],
    ),
    training={'n_samples': 1000, 'batch_size': 100, 'epsilon': 0.05},
    prior={'length': {'min_': 2, 'max_': 20, 'on': True}}
)

In [43]:
# Create the model
model = DeepSymbolicRegressor(config=config)
model.fit(X, y)

print(ev.results)

[x1, x2, x3]
-- BUILDING PRIOR START -------------
LengthConstraint: Sequences have minimum length 2.
                  Sequences have maximum length 20.
UniformArityPrior: Activated.
SoftLengthPrior: No description available.
-- BUILDING PRIOR END ---------------



  return tf.nn.rnn_cell.LSTMCell(num_units, initializer=initializer)
  logits = tf.layers.dense(outputs, units=self._output_size)


-- RUNNING ITERATIONS START -------------
[Pool +] Add(Mul(Constant(4),$a),Mul(Constant(4),Mul(Constant(4),Mul(Constant(4),Add($c,Sub($b,Mul(Constant(4),Sub($a,Add($a,$c)))))))))
[Pool +] Mul($b,Mul(Mul(Mul(Constant(4),Sub(Mul(Constant(4),$b),$a)),Mul(Constant(4),$c)),Mul(Mul(Constant(4),Mul(Constant(4),Mul(Constant(4),$c))),$a)))
[Pool +] Mul(Constant(4),Mul(Constant(4),Mul(Constant(4),Mul(Mul(Constant(4),$a),$c))))
[Pool +] Sub(Mul(Constant(4),$c),Mul(Constant(4),Mul(Constant(4),Mul(Constant(4),Sub(Sub(Mul(Constant(4),Mul(Constant(4),$c)),$a),$b)))))
[Pool +] Mul(Constant(4),Mul(Constant(4),Mul(Constant(4),Mul($a,Mul($a,Mul(Mul(Constant(4),Add($c,Mul(Constant(4),$b))),$c))))))
[Pool +] Mul(Constant(4),Mul(Constant(4),Mul(Mul(Constant(4),$a),Mul(Constant(4),Mul(Constant(4),Mul(Mul(Constant(4),$b),$b))))))
[Pool +] Mul($c,Mul(Constant(4),Mul(Constant(4),Mul(Constant(4),Mul(Constant(4),Mul(Constant(4),Mul(Mul(Constant(4),Mul(Constant(4),$c)),Add($c,$b))))))))
[Pool +] Mul(Constant(4),Mu

[Pool +] Mul($b,Mul(Constant(4),Add(Mul(Constant(4),Add($a,$c)),$b)))
[Pool -] Mul(Constant(4),Mul(Mul(Constant(4),Mul(Constant(4),Mul(Constant(4),Mul($b,$b)))),Mul(Constant(4),Mul(Mul(Constant(4),Mul(Constant(4),Mul(Constant(4),$c))),$a))))
-- RUNNING ITERATIONS START -------------
[Pool +] Mul(Constant(4),Mul(Constant(4),Sub(Mul(Constant(4),$c),Mul(Constant(4),Sub(Mul(Constant(4),$b),$a)))))
[Pool -] Mul(Constant(4),Mul(Constant(4),Mul(Mul(Constant(4),$a),Mul(Constant(4),Mul(Constant(4),Mul(Mul(Constant(4),$b),$b))))))
[Pool +] Sub(Mul(Constant(4),$c),Mul(Constant(4),Sub(Mul(Constant(4),Sub(Sub(Mul(Constant(4),$b),$b),$b)),Add($b,$a))))
[Pool -] Add($b,Mul(Constant(4),Mul(Constant(4),Sub(Mul(Constant(4),Mul(Constant(4),$a)),Sub($b,$c)))))
[Pool +] Mul(Constant(4),Mul(Constant(4),Mul(Constant(4),Mul(Constant(4),Mul(Constant(4),Mul(Constant(4),Add(Mul(Constant(4),Add(Mul(Constant(4),$c),$c)),Mul(Constant(4),$a))))))))
[Pool -] Sub(Mul(Constant(4),$c),Mul(Constant(4),Sub(Mul(Constant(4)

2.0.1+cpu
