In [20]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from scipy.stats import pearsonr

In [25]:
np.random.rand(3, 2, dtype='float')

TypeError: rand() got an unexpected keyword argument 'dtype'

In [21]:
class Simulator:
    def __init__(self):
        self.x_train, self.y_train = None, None
        self.x_test, self.y_test = None, None
        self.model = None
    
    def generate_train(self, n, p):
        self._generate_data(n, p)
        self._train()
    
    def _generate_data(self,n, p):
        self.x_train = pd.DataFrame(np.random.rand(n, p, dtype))
        self.y_train = np.random.rand(n, 1).astype(np.float16)
        self.x_test = pd.DataFrame(np.random.rand(n, p))
        self.y_test = np.random.rand(n, 1).astype(np.float16)
        
    def _train(self):
        self.model = RandomForestRegressor(max_depth=5, random_state=0, n_estimators=50)
        self.model.fit(self.x_train ,self.y_train)
        
    def evalute_train_test(self):
        mae_train, pcc_train = self._evaluate(self.x_train, self.y_train)
        mae_test, pcc_test = self._evaluate(self.x_test, self.y_test)
        return mae_train, pcc_train, mae_test, pcc_test
    
    def _evaluate(self,x, y):
        y_hat = self.model.predict(x)
#         plt.scatter(y, y_hat)
#         plt.show()
        pcc = pearsonr(y, y_hat)
        return mean_squared_error(y, y_hat), pcc[0]

In [22]:
simulator = Simulator()

In [26]:
simulator.x_train

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
0,0.738914,0.821333,0.392434,0.302929,0.731424,0.987838,0.056737,0.932778,0.771957,0.551042,...,0.428768,0.843778,0.880172,0.842483,0.020998,0.158568,0.423596,0.716609,0.239153,0.434414
1,0.171815,0.375275,0.329601,0.905359,0.673053,0.809716,0.867832,0.243554,0.075103,0.748811,...,0.515526,0.172732,0.771311,0.160730,0.078013,0.739971,0.041421,0.453835,0.598082,0.457963
2,0.822667,0.474186,0.003126,0.281423,0.640170,0.201893,0.080002,0.506818,0.217948,0.239518,...,0.561898,0.537647,0.050058,0.104335,0.834292,0.278264,0.007048,0.460494,0.380207,0.664944
3,0.373775,0.982262,0.439441,0.251133,0.702917,0.599619,0.933451,0.613711,0.305897,0.930859,...,0.815183,0.555832,0.465713,0.208423,0.099310,0.438979,0.568839,0.201233,0.912461,0.462678
4,0.257840,0.227456,0.084550,0.535638,0.415564,0.372663,0.555387,0.813214,0.723215,0.908226,...,0.200332,0.427646,0.688791,0.526931,0.864408,0.261520,0.928606,0.794242,0.774542,0.458565
5,0.394365,0.748386,0.867012,0.143541,0.420110,0.535799,0.721612,0.041878,0.919889,0.157583,...,0.968331,0.121830,0.039258,0.179604,0.636287,0.724444,0.042100,0.256018,0.230525,0.130462
6,0.308449,0.934859,0.002268,0.879525,0.135112,0.424155,0.288303,0.377016,0.446023,0.257910,...,0.835720,0.764348,0.067190,0.495429,0.000025,0.311608,0.727537,0.169614,0.661522,0.668011
7,0.150920,0.266053,0.676474,0.570173,0.428494,0.777915,0.272433,0.329544,0.579621,0.408418,...,0.784458,0.229100,0.700735,0.250522,0.096860,0.479075,0.521806,0.873111,0.695770,0.544043
8,0.791512,0.393293,0.780125,0.223905,0.391170,0.303592,0.712714,0.441963,0.000679,0.604129,...,0.072332,0.308936,0.899983,0.208871,0.349422,0.080934,0.469899,0.456927,0.252454,0.738290
9,0.377804,0.713753,0.714460,0.884823,0.257817,0.925832,0.270380,0.897386,0.599117,0.052709,...,0.952169,0.924061,0.157156,0.033952,0.994681,0.002547,0.036082,0.410562,0.474233,0.337425


In [23]:
pccs_train, pccs_test = [], []

for p in [5e1, 5e2]:#, 5e3, 5e4]:
    simulator.generate_train(n=int(1e3), p = int(p))
    mae_train, pcc_train, mae_test, pcc_test = simulator.evalute_train_test()
    # print(mae_train, mae_test)
    print(pcc_train, pcc_test)
    pccs_train.append(pcc_train)
    pccs_test.append(pcc_test)



TypeError: No loop matching the specified signature and casting
was found for ufunc add

In [14]:
simulator.x_test

array([[0.4258 , 0.562  , 0.2437 , ..., 0.4355 , 0.605  , 0.814  ],
       [0.04037, 0.1055 , 0.9893 , ..., 0.9116 , 0.629  , 0.6934 ],
       [0.638  , 0.5923 , 0.7734 , ..., 0.387  , 0.0908 , 0.788  ],
       ...,
       [0.4424 , 0.0323 , 0.583  , ..., 0.6226 , 0.8247 , 0.5054 ],
       [0.2104 , 0.545  , 0.6147 , ..., 0.781  , 0.1539 , 0.657  ],
       [0.0826 , 0.08484, 0.1998 , ..., 0.8096 , 0.7583 , 0.7837 ]],
      dtype=float16)