In [1]:
import pandas as pd
from sklearn.linear_model import Lasso, LassoCV
from sklearn.model_selection import train_test_split

In [2]:
def linearReg(ticker):      
    df = pd.read_csv(ticker+'_weighted_joined.csv', index_col=0)
    cvar = ["day", ]
    df[cvar] = df[cvar].astype('category')
    df = pd.get_dummies(df, prefix_sep='_')
    
    df2 = df.copy()
    rdummies = ["day_0"]
    df2 = df2.drop(columns=rdummies)

    df3 = df2.copy()
    
    testpart_size = 0.2
    df_nontestData, df_testData = train_test_split(df3, test_size=testpart_size, random_state=1)
    
    DV = 'Movement'
    y = df_nontestData[DV]
    X = df_nontestData.drop(columns=[DV])
    
    kfolds = 5
    clf_optimal = LassoCV(cv= kfolds, random_state=1, n_jobs=-1).fit(X,y)

    def summary_coef(model_object):
        n_predictors = X.shape[1]
        model_coef = pd.DataFrame(model_object.coef_.reshape(1, n_predictors), columns=X.columns.values)
        model_coef['Intercept'] = model_object.intercept_
        return model_coef.transpose()
    
    print(summary_coef(clf_optimal ))
    return clf_optimal

In [3]:
linearReg("TSLA")

                  0
score     -2.049171
day_1      1.069732
day_2      1.290476
day_3      0.245360
day_6      3.355953
Intercept -0.566053


LassoCV(cv=5, n_jobs=-1, random_state=1)

In [4]:
def linearRegPredict(ticker):
    clf_optimal = linearReg("TSLA")
    df = pd.read_csv(ticker+"_test.csv",index_col=0)
    cvar = ["day"]
    df[cvar] = df[cvar].astype('category')
    df = pd.get_dummies(df, prefix_sep='_')
    
    df2 = df.copy()
    rdummies = ["day_0"]
    df2 = df2.drop(columns=rdummies)
    
    predicted_results = clf_optimal.predict(df2)
    print(predicted_results)
    
    logReg = pd.DataFrame(predicted_results)
    logReg.to_excel(ticker+"_linearReg.xlsx")

In [5]:
linearRegPredict("TSLA")

                  0
score     -2.049171
day_1      1.069732
day_2      1.290476
day_3      0.245360
day_6      3.355953
Intercept -0.566053
[ 0.52714652 -0.5662152   2.52465304 -0.92517884 -0.31703178  0.10981467
 -1.68204871  2.20052318 -1.11732826  0.30625918  0.33310984 -0.8515908
  2.53222257 -0.11877157 -0.20505895  0.29668766  0.03112908  3.37428406
 -1.29575026  0.28287235  0.32619539 -1.40387262]


In [6]:
linearRegPredict("NIO")

                  0
score     -2.049171
day_1      1.069732
day_2      1.290476
day_3      0.245360
day_6      3.355953
Intercept -0.566053
[ 0.29010417 -1.21517873  2.42935442 -0.84353593  0.13997946  0.49633349
 -1.65295557  3.11715736 -0.90101203  0.12465862  0.76464729 -0.53348261
  2.70794659 -0.5258155   0.11574833  0.60256941 -0.68408926  2.57647465
 -1.0640957   0.29947318  0.78680776 -0.36404032]


In [7]:
linearRegPredict("MSFT")

                  0
score     -2.049171
day_1      1.069732
day_2      1.290476
day_3      0.245360
day_6      3.355953
Intercept -0.566053
[ 0.78093305 -0.91027911  2.78009319 -0.57778435  0.48611964  0.80299036
 -1.74795166  2.43520014 -0.61942227  0.25906591  0.41325197 -0.55349296
  2.46265425 -0.81398379  0.25729283 -0.46866878 -0.45788881  2.64899791
 -0.47395191  0.37395585  0.36797004 -0.44484771]


In [8]:
linearRegPredict("AAPL")

                  0
score     -2.049171
day_1      1.069732
day_2      1.290476
day_3      0.245360
day_6      3.355953
Intercept -0.566053
[ 0.78096509 -0.91019763  2.78017303 -0.57778944  0.48619072  0.80297495
 -1.75173883  2.43518774 -0.61941154  0.25911498  0.41323476 -0.55348098
  2.4626263  -0.8139667   0.25730845 -0.46866009 -0.45783144  2.6489966
 -0.47407272  0.37396196  0.36094233 -0.44487022]
