# Web Scrapping

## Method 1: Selenium

https://stackoverflow.com/questions/52448137/python-selenium-scrape-the-whole-table

In [5]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from datetime import date

d = webdriver.Chrome('C:/webdriver/chromedriver.exe') 
d.get('https://www.investing.com/equities/wct-holdings-bhd-historical-data')

while True:
    try:  #attempt to dismiss banners that could block later clicks
        WebDriverWait(d, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, ".closer"))).click()
        d.find_element_by_css_selector('.closer').click()
    except TimeoutException:
        break
        print("waiting no enough")

In [6]:
element = d.find_element_by_id('widgetFieldDateRange') #show the date picker
webdriver.ActionChains(d).move_to_element(element ).click(element ).perform()

In [7]:
sDate  = d.find_element_by_id('startDate') # set start date input element into variable
sDate.clear() #clear existing entry
sDate.send_keys('01/01/2018') #add custom entry
eDate = d.find_element_by_id('endDate') #repeat for end date
eDate.clear()
eDate.send_keys(date.today().strftime('%m/%d/%Y'))

In [8]:
d.find_element_by_id('applyBtn').click() #submit changes

In [10]:
AllData = []

for table in WebDriverWait(d,10).until(EC.visibility_of_all_elements_located((By.XPATH,'//*[contains(@id,"results_box")]//tr'))):
    data = [item.text for item in table.find_elements_by_xpath(".//*[self::td or self::th]")]
    AllData.append(data)
    
#d.quit()

In [11]:
d.get('https://www.investing.com/equities/wct-holdings-bhd-related-indices')

while True:
    try:  #attempt to dismiss banners that could block later clicks
        WebDriverWait(d, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, ".closer"))).click()
        d.find_element_by_css_selector('.closer').click()
    except TimeoutException:
        break
        print("waiting no enough")

relatedIndices = []

for table in WebDriverWait(d,10).until(EC.visibility_of_all_elements_located((By.XPATH,'//*[contains(@id,"stock_component_of_indices")]//tr'))):
    indices = [item.text for item in table.find_elements_by_xpath(".//*[self::td or self::th]")]
    relatedIndices.append(indices)

In [51]:
import pandas as pd
data = pd.DataFrame(AllData, columns = ['date','price','open price','high price','low price','volume','percent'])
relatedIndicesDF = pd.DataFrame(relatedIndices, columns = ['country','indices','last','high','low','change','change_percent','date','nothing'])

data.drop(data.tail(1).index,inplace=True)
data.drop(data.head(1).index,inplace=True)
data.date = pd.to_datetime(data['date'])
data = data.reset_index()
relatedIndicesDF.drop(relatedIndicesDF.head(1).index,inplace=True)
relatedIndicesDF.date = relatedIndicesDF.date + str('/2020')
relatedIndicesDF.date = pd.to_datetime(relatedIndicesDF['date'], format= "%d/%m/%Y")
relatedIndicesDF = relatedIndicesDF.drop(['country','nothing'], axis = 1) 
relatedIndicesDF

Unnamed: 0,indices,last,high,low,change,change_percent,date
1,KL Construction,211.67,214.69,210.47,1.09,+0.52%,2020-01-17


In [53]:
relatedIndicesDF

Unnamed: 0,indices,last,high,low,change,change_percent,date
1,KL Construction,211.67,214.69,210.47,1.09,+0.52%,2020-01-17


## Method 2: Beautiful Soup

# MACD

In [None]:
def myEWMA(df, column, span):
    revert_price_upside_down = df[column][::-1]
    sma = revert_price_upside_down.rolling(window=span, min_periods=span).mean()[:span]
    rest = revert_price_upside_down[span:]
    ewma = pd.concat([sma, rest]).ewm(span=span, adjust=False).mean()
    return ewma

In [None]:
#spanList = [12, 26, 9]
spanList = [5, 35, 5]
data[f'EWMA_{spanList[0]}'] = myEWMA(data, 'price', spanList[0])
data[f'EWMA_{spanList[1]}'] = myEWMA(data, 'price', spanList[1])
data['MACD'] = data[f'EWMA_{spanList[0]}'] - data[f'EWMA_{spanList[1]}']
data[f'MACD_{spanList[2]}'] = myEWMA(data, 'MACD', spanList[2])

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.figure_factory as ff
import plotly.io as pio
pio.renderers.default = "browser"

layout = go.Layout(title= "WCT",
                  hovermode= 'closest',
                   hoverdistance = -1,
                   spikedistance = -1,
                  xaxis= {'showspikes': True,
                         'spikemode':'toaxis+across+marker',
                         'automargin' : True},
                   yaxis= {'showspikes': True,
                         'spikemode':'toaxis+across',
                         'automargin' : True}
                  )
# subplot setupt
fig = make_subplots(
    rows=3, cols=1, shared_xaxes=True, vertical_spacing = 0.01,
    specs = [[{"rowspan": 2}],[{}],[{}]]
)

fig.add_trace(go.Scatter(x = data['date'],
                         y = data['price'],
                         mode = "lines",
                         marker={"color": "black"},
                         name = "Closing price",
                         showlegend=False),
                         row = 1, col = 1)

fig.add_trace(go.Scatter(x = data['date'],
                         y = data[f"EWMA_{spanList[0]}"],
                         mode = "lines",
                         marker={"color": "blue"},
                         name = f"EWMA_{spanList[0]}"),
                         row = 1, col = 1)

fig.add_trace(go.Scatter(x = data['date'],
                         y = data[f"EWMA_{spanList[1]}"],
                         mode = "lines",
                         marker={"color": "red"},
                         name = f"EWMA_{spanList[1]}"),
                         row = 1, col = 1)

fig.add_trace(go.Scatter(x = data['date'],
                         y = data['MACD'],
                         mode="lines",
                         marker={"color":'red'},
                         name="MACD"),
                         row = 3, col = 1)

fig.add_trace(go.Scatter(x = data['date'],
                         y = data[f"MACD_{spanList[2]}"],
                         mode = "lines",
                         marker={"color": "blue"},
                         name = f"MACD_{spanList[2]}"),
                         row = 3, col = 1)

fig.add_trace(go.Scatter(x = data['date'],
                         y = [0]*len(data.index),
                         mode = "lines",
                         line=go.scatter.Line(color="gray"),
                         name = "threhold",
                         showlegend=False),
                         row = 3, col = 1)

#fig.update_yaxes(range=[1,1], tickfont=dict(color='rgba(0,0,0,0)', size=14), row=3, col=2)
fig.update_xaxes(showgrid=False,
                 showspikes= True,
                 spikemode='toaxis+across',
                 automargin = True, row=3, col=1)
fig.update_yaxes(showgrid=False,
                 showspikes= True,
                 spikemode='toaxis+across',
                 automargin = True, title = 'MACD', side = 'right', row=3, col=1)
fig.update_layout(layout)

fig.show()


if MACD Crossover occurs and price after 2 day - price at crossover > 0% give reward =1, else: -1, other put 0
total up reward = 1 vs reward = -1

In [None]:
import numpy as np
data['MACD_crossover'] = abs(data['MACD']) - abs(data[f'MACD_{spanList[2]}'])
#data['crossover'] = ["Negative Crossover" if (i > 0) & (np.abs(i) < 0.008) else 
#                     "Positive Crossover" if (i < 0) & (np.abs(i) < 0.008) else "noCO" 
#                     for i in data.MACD_crossover]

crossover = []
for i in range(len(data.MACD_crossover)):
    if (np.abs(data.MACD_crossover[i]) < 0.008) & (data.MACD_crossover.shift(-2)[i] > 0):
        crossover.append("Positive Crossover")
    elif (np.abs(data.MACD_crossover[i]) < 0.008) & (data.MACD_crossover.shift(-2)[i] < 0):
        crossover.append("Negative Crossover")
    else: crossover.append("No Crossover")
data['crossover'] = crossover

Compute True Positive of CrossOver: if after 2day price % change >0.005, then true positive

In [None]:
data = data[::-1].reset_index()

In [None]:
data['2dayPriceChange'] = data.price.astype(float).pct_change(periods = 2).shift(-2)

truePositive = []
for i in range(len(data.crossover)): 
    if (data.crossover[i] == "Positive Crossover") and (data['2dayPriceChange'][i] > 0):
        truePositive.append("TP")
    elif data.crossover[i] == "Negative Crossover" and data['2dayPriceChange'][i] < 0:
        truePositive.append("TN")
    elif data.crossover[i] == "Positive Crossover" and data['2dayPriceChange'][i] < 0:
        truePositive.append("FN")
    elif data.crossover[i] == "Negative Crossover" and data['2dayPriceChange'][i] > 0:
        truePositive.append("FP")
    else: truePositive.append("null")

data['truePositive'] = truePositive

In [None]:
data.loc[data.truePositive == "TP",['date', 'crossover','truePositive','2dayPriceChange']]

In [None]:
data[['crossover','truePositive','date']].groupby(['crossover','truePositive']).count()

In [None]:
data.loc[((data.truePositive == "null") & (data.crossover == "Positive Crossover")),['date','crossover','2dayPriceChange']]