In [12]:
import numpy as np
import DTLearner as dt
import LinRegLearner as lrl

In [7]:
def best_4_lin_reg(seed=1489683273):
    np.random.seed(seed)
    x = np.random.normal(size=(1000, 10))    
    # Create a linear relationship between features and the target variable
    y = 2*x[:,0] + 3*x[:,1] + 4*x[:,2] + 5*x[:,3] + 6*x[:,4] \
        + 7*x[:,5] + 8*x[:,6] + 9*x[:,7] + 10*x[:,8] + 11*x[:,9] + np.random.normal(size=1000)  
    return x, y

In [65]:
def best_4_dt(seed=1489683273):
    np.random.seed(seed)
    x = np.random.normal(size=(1000, 5))  # Reduced from 10 features to 5
    # Binning the x values into categories
    bins = np.linspace(-3, 3, 10)
    digitized = np.digitize(x, bins)
    # Generating y based on the category each x falls into
    y = np.sin(digitized[:,0]) + np.cos(digitized[:,1]) + np.random.normal(size=1000)
    return x, y

In [57]:
def best_4_dt(seed=1489683273):
    np.random.seed(seed)
    x = np.random.uniform(-1, 1, size=(1000, 10))
    
    # Create a more complex relationship with multiple conditions
    y = np.where((x[:,0] > 0) & (x[:,1] < 0), np.sin(x[:,2]**3), np.cos(x[:,3]**2)) + \
        np.where((x[:,4] > 0) & (x[:,5] < 0), np.sin(x[:,6]**2), np.cos(x[:,7]**3)) + \
        np.where((x[:,8] > 0) & (x[:,9] < 0), np.sin(x[:,9]**2), np.cos(x[:,9]**3))

    
    return x, y



In [63]:
def best_4_dt(seed=1489683273):
    np.random.seed(seed)
    x = np.random.uniform(-1, 1, size=(1000, 5))  # Decreased the number of features from 10 to 5
    
    # Create a more complex relationship with multiple conditions
    y = np.where((x[:,0] > 0) & (x[:,1] < 0), np.sin(x[:,2]**3), np.cos(x[:,3]**2)) + \
        np.where((x[:,4] > 0) & (x[:,1] < 0), np.sin(x[:,0]**2), np.cos(x[:,2]**3))

    return x, y





In [66]:
from sklearn.metrics import mean_squared_error
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

# Assuming we have these two custom classes
# from custom_lr_learner import LRLearner
# from custom_dt_learner import DTLearner

# Generate the data
x, y = best_4_dt(seed=511)

# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Sklearn's decision tree
skl_dt = DecisionTreeRegressor(random_state=0)
skl_dt.fit(x_train, y_train)
skl_dt_preds = skl_dt.predict(x_test)
print(f"Sklearn Decision Tree RMSE: {np.sqrt(mean_squared_error(y_test, skl_dt_preds))}")

# Homemade decision tree
dt_learner = dt.DTLearner(leaf_size=1)
dt_learner.add_evidence(x_train, y_train)
dt_preds = dt_learner.query(x_test)
print(f"Homemade Decision Tree RMSE: {np.sqrt(mean_squared_error(y_test, dt_preds))}")

# Homemade linear regression
lr_learner = lrl.LinRegLearner()
lr_learner.add_evidence(x_train, y_train)
lr_preds = lr_learner.query(x_test)
print(f"Homemade Linear Regression RMSE: {np.sqrt(mean_squared_error(y_test, lr_preds))}")


Sklearn Decision Tree RMSE: 1.588692829622705
Homemade Decision Tree RMSE: 1.6168098711177528
Homemade Linear Regression RMSE: 1.3400050592435633
