## Setting Up....

In [42]:
home_dir = normalizePath("~")
relative_path = file.path("R programs")
full_path = file.path(home_dir, relative_path)
setwd(full_path)
getwd()

In [43]:
library(readxl)
library(caret)
library(lubridate)
library(moments)
library(car)
library(lmtest)
library(sandwich)
library(tidyverse)
library(margins)

In [44]:
Data <- read_excel("week_6/ABC.xlsx")
class(Data$Date)
Data$Date = as.Date(Data$Date)
class(Data$Date)

### Create updown 1 returns are positive and 0 when returns are negative

In [45]:
Data = Data |> mutate(updown= ifelse(ABC>0, 1, 0))
Data = na.omit(Data)
head(Data)

Date,Price,ABC,Sensex,DividendAnnounced,Sentiment,Nifty,updown
<date>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
2000-01-03,718.15,0.07992481,0.073772129,0,0.04893645,0.09581641,1
2000-01-04,712.9,-0.00731045,0.021562349,0,-0.05503706,0.009706008,0
2000-01-05,730.0,0.02398653,-0.024405346,0,0.01913459,-0.032213609,1
2000-01-06,788.35,0.07993151,0.012045921,0,0.08035507,0.011204936,1
2000-01-07,851.4,0.07997717,-0.001300371,0,0.09403754,-0.000397248,1
2000-01-10,919.5,0.07998591,0.019191132,1,0.01522908,0.030167565,1


## Splittin data into trainging and testing dataset

In [46]:
Data = Data |>filter(year(Date)>2006)
indx = sample(1:nrow(Data), as.integer(0.8*nrow(Data)))
train = Data[indx,]
test = Data[-indx,]
head(train)
head(test)

Date,Price,ABC,Sensex,DividendAnnounced,Sentiment,Nifty,updown
<date>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
2017-09-06,276.25,0.007292616,0.00848483,0,0.009931342,0.033892731,1
2008-03-19,176.4,0.053448791,0.039929529,0,-7.67e-05,0.049643752,1
2013-03-27,333.05,0.015706008,0.005015048,0,0.06446592,-0.002174353,1
2013-02-14,338.75,0.011949216,0.003118151,0,-0.011342509,0.014683646,1
2016-09-22,330.4,-0.008552138,0.000991624,1,0.044637134,-0.004574449,0
2017-01-02,269.35,0.008423811,0.013345461,0,0.085421622,0.001864313,1


Date,Price,ABC,Sensex,DividendAnnounced,Sentiment,Nifty,updown
<date>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
2007-01-17,161.8,0.01125,0.014838128,0,0.03160541,0.009882584,1
2007-01-24,163.45,0.017112632,-0.000745927,0,0.04228807,-0.013464147,1
2007-01-30,163.1,0.007100957,0.014213175,0,-0.03569235,0.02329971,1
2007-02-09,166.1,-0.00359928,0.004145497,0,-0.05653594,-0.002741446,0
2007-02-12,165.0,-0.006622517,0.007662234,0,-0.05911901,0.005747302,0
2007-02-19,161.8,-0.017905918,0.002706997,0,-0.01222755,0.014448035,0


In [47]:
train = train |>arrange(Date)
test = test|>arrange(Date)

In [48]:
prop.table(table(Data$updown))*100
prop.table(table(train$updown))*100
prop.table(table(test$updown))*100


       0        1 
49.63942 50.36058 


       0        1 
50.52592 49.47408 


      0       1 
46.0961 53.9039 

In [50]:
linear = lm(updown~ Sensex, data = train)
summary(linear)


Call:
lm(formula = updown ~ Sensex, data = train)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.69057 -0.45542 -0.05474  0.45375  1.34696 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)  0.491424   0.009051   54.29   <2e-16 ***
Sensex      12.682979   0.640391   19.80   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.4669 on 2660 degrees of freedom
Multiple R-squared:  0.1285,	Adjusted R-squared:  0.1282 
F-statistic: 392.2 on 1 and 2660 DF,  p-value: < 2.2e-16


In [51]:
fitted.results = ifelse(linear$fitted.values>0.4, 1 , 0)
CM = confusionMatrix(as.factor(fitted.results), as.factor(train$updown))
performance_4 = tibble(Threshold =0.4, Accuracy = CM$overall["Accuracy"],Sensitivity = CM$byClass["Sensitivity"],
                Specificity = CM$byClass["Specificity"])

In [55]:
fitted.results = ifelse(linear$fitted.values>0.6, 1 , 0)
CM = confusionMatrix(as.factor(fitted.results), as.factor(train$updown))
performance_6 = tibble(Threshold =0.6, Accuracy = CM$overall["Accuracy"],Sensitivity = CM$byClass["Sensitivity"],
                Specificity = CM$byClass["Specificity"])

In [56]:
fitted.results = ifelse(linear$fitted.values>0.8, 1 , 0)
CM = confusionMatrix(as.factor(fitted.results), as.factor(train$updown))
performance_8 = tibble(Threshold =0.8, Accuracy = CM$overall["Accuracy"],Sensitivity = CM$byClass["Sensitivity"],
                Specificity = CM$byClass["Specificity"])

In [57]:
LinearPerformance = rbind(performance_4,performance_6, performance_8)
LinearPerformance$Class = "Linear"
LinearPerformance

Threshold,Accuracy,Sensitivity,Specificity,Class
<dbl>,<dbl>,<dbl>,<dbl>,<chr>
0.4,0.6115702,0.3286245,0.90053151,Linear
0.6,0.6141998,0.9152416,0.30675778,Linear
0.8,0.5300526,0.9933086,0.05694761,Linear


### Logit performance object

In [58]:
logit = glm(formula(linear), data = train, family = binomial('logit'))
null =glm(updown~1, data = train, family = binomial('logit'))
PseudoRsq = 1-logLik(logit)/logLik(null)
margins(logit)

Unnamed: 0_level_0,Date,Price,ABC,Sensex,DividendAnnounced,Sentiment,Nifty,updown,fitted,se.fitted,dydx_Sensex,Var_dydx_Sensex,_weights,_at_number
Unnamed: 0_level_1,<date>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<mrgnlffc>,<dbl>,<dbl>,<int>
1,2007-01-01,153.20,-0.018577835,0.020609571,0,-0.054241830,0.002635723,0,0.8312359,0.01430681,11.200698,0.5775692,,1
2,2007-01-02,158.65,0.035574413,0.023273919,0,0.008467111,0.010307882,1,0.8590158,0.01372075,9.669802,0.5775692,,1
3,2007-01-03,156.10,-0.016073117,-0.006464859,0,0.004034366,-0.010663054,0,0.3618658,0.01218871,18.436363,0.5775692,,1
4,2007-01-04,157.05,0.006085842,0.004787963,0,0.097575403,0.020007297,1,0.5820457,0.01131843,19.422187,0.5775692,,1
5,2007-01-05,159.00,0.012416428,-0.003839180,0,-0.056319409,0.003910649,1,0.4115323,0.01125354,19.334774,0.5775692,,1
6,2007-01-08,160.60,0.010062893,0.020390317,0,0.079457951,0.014303838,1,0.8287659,0.01434245,11.330849,0.5775692,,1
7,2007-01-09,156.40,-0.026151930,0.002224875,0,-0.068338808,-0.009343922,0,0.5315914,0.01065942,19.879867,0.5775692,,1
8,2007-01-10,156.45,0.000319693,0.014961593,0,0.027008654,0.028965824,1,0.7583150,0.01447608,14.632824,0.5775692,,1
9,2007-01-11,161.75,0.033876638,0.005733296,0,-0.006122434,0.006707490,1,0.6002855,0.01165082,19.156711,0.5775692,,1
10,2007-01-12,161.40,-0.002163833,0.000781416,0,0.058066754,-0.012296953,0,0.5028212,0.01051310,19.958899,0.5775692,,1


### Probit Model

In [59]:
fitted.results = ifelse(linear$fitted.values>0.4, 1 , 0)
CM = confusionMatrix(as.factor(fitted.results), as.factor(train$updown))
performance_4 = tibble(Threshold =0.4, Accuracy = CM$overall["Accuracy"],Sensitivity = CM$byClass["Sensitivity"],
                Specificity = CM$byClass["Specificity"])

In [60]:
fitted.results = ifelse(linear$fitted.values>0.6, 1 , 0)
CM = confusionMatrix(as.factor(fitted.results), as.factor(train$updown))
performance_6 = tibble(Threshold =0.6, Accuracy = CM$overall["Accuracy"],Sensitivity = CM$byClass["Sensitivity"],
                Specificity = CM$byClass["Specificity"])

In [61]:
fitted.results = ifelse(linear$fitted.values>0.8, 1 , 0)
CM = confusionMatrix(as.factor(fitted.results), as.factor(train$updown))
performance_6 = tibble(Threshold =0.8, Accuracy = CM$overall["Accuracy"],Sensitivity = CM$byClass["Sensitivity"],
                Specificity = CM$byClass["Specificity"])

In [62]:
LinearPerformance = rbind(performance_4,performance_6, performance_8)
LinearPerformance$Class = "Linear"
LinearPerformance

Threshold,Accuracy,Sensitivity,Specificity,Class
<dbl>,<dbl>,<dbl>,<dbl>,<chr>
0.4,0.6115702,0.3286245,0.90053151,Linear
0.8,0.5300526,0.9933086,0.05694761,Linear
0.8,0.5300526,0.9933086,0.05694761,Linear


In [64]:
probit = glm(formula(linear), data = train, family = binomial('probit'))
null =glm(updown~1, data = train, family = binomial('probit'))
PseudoRsq = 1-logLik(probit)/logLik(null)
margins(probit)

Unnamed: 0_level_0,Date,Price,ABC,Sensex,DividendAnnounced,Sentiment,Nifty,updown,fitted,se.fitted,dydx_Sensex,Var_dydx_Sensex,_weights,_at_number
Unnamed: 0_level_1,<date>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<mrgnlffc>,<dbl>,<dbl>,<int>
1,2007-01-01,153.20,-0.018577835,0.020609571,0,-0.054241830,0.002635723,0,0.8151129,0.01533685,11.95742,0.5402411,,1
2,2007-01-02,158.65,0.035574413,0.023273919,0,0.008467111,0.010307882,1,0.8452555,0.01514784,10.66688,0.5402411,,1
3,2007-01-03,156.10,-0.016073117,-0.006464859,0,0.004034366,-0.010663054,0,0.3758484,0.01174714,17.00461,0.5402411,,1
4,2007-01-04,157.05,0.006085842,0.004787963,0,0.097575403,0.020007297,1,0.5745128,0.01091331,17.56467,0.5402411,,1
5,2007-01-05,159.00,0.012416428,-0.003839180,0,-0.056319409,0.003910649,1,0.4212342,0.01084165,17.52783,0.5402411,,1
6,2007-01-08,160.60,0.010062893,0.020390317,0,0.079457951,0.014303838,1,0.8124796,0.01533712,12.06267,0.5402411,,1
7,2007-01-09,156.40,-0.026151930,0.002224875,0,-0.068338808,-0.009343922,0,0.5291026,0.01031476,17.82980,0.5402411,,1
8,2007-01-10,156.45,0.000319693,0.014961593,0,0.027008654,0.028965824,1,0.7401445,0.01460091,14.53148,0.5402411,,1
9,2007-01-11,161.75,0.033876638,0.005733296,0,-0.006122434,0.006707490,1,0.5910469,0.01122530,17.40981,0.5402411,,1
10,2007-01-12,161.40,-0.002163833,0.000781416,0,0.058066754,-0.012296953,0,0.5033223,0.01018317,17.87677,0.5402411,,1


In [65]:
cor(logit$fitted.values, probit$fitted.values)

In [66]:
Corr_ob = cbind.data.frame(linear$fitted.values, logit$fitted.values, probit$fitted.values)

In [67]:
cor(Corr_ob)

Unnamed: 0,linear$fitted.values,logit$fitted.values,probit$fitted.values
linear$fitted.values,1.0,0.9279176,0.9368064
logit$fitted.values,0.9279176,1.0,0.9992691
probit$fitted.values,0.9368064,0.9992691,1.0


ERROR: Error: object 'Logit_Performance' not found
