# Analysis single stock market price using yfinance module

In [None]:
# Upgrade the module 
# Ref: https://stackoverflow.com/questions/68320184/yfinance-returning-error-when-downloading-data
#!pip install yfinance --upgrade --no-cache-dir

In [1]:
import yfinance as yf
import pandas as pd

import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Set any float number precision 2 decimal points
pd.set_option('precision', 2)

In [2]:
start_date = '2016-01-01'
end_date = '2021-09-15'

In [3]:
# Extract stock price only 'Adj Close' and 'Volume'

df = yf.download('AAPL',start=start_date,end=end_date,interval='1d',rounding=1)[['Adj Close', 'Volume']]#.to_frame()

[*********************100%***********************]  1 of 1 completed


In [4]:
df.head(3)

Unnamed: 0_level_0,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-12-31,24.27,163649200
2016-01-04,24.29,270597600
2016-01-05,23.68,223164000


In [5]:
# Define function for style
def above_zero(val):
  color = 'green' if val > 0 else 'red'
  return 'color: %s' % color

In [6]:
# Copy file to prevent A value is trying to be set on a copy of a slice from a DataFrame
df = df.copy()

# Find Adjusted Close price percentage
df.loc[:,'AdjC_pct'] = df.loc[:,'Adj Close'].pct_change()*100

# Fill NaN value with zero
df.fillna(value = 0, inplace = True)

### Find typical price change %

In [7]:
df['AdjC_pct'].value_counts(bins = 5)

(-2.92, 2.051]      1233
(2.051, 7.021]       132
(-7.89, -2.92]        59
(7.021, 11.991]        7
(-12.886, -7.89]       5
Name: AdjC_pct, dtype: int64

<div class="alert alert-block alert-info">
<b>Key insight:<br/> 
Stock price typically change between -2.92% to 2%.
</div>

In [8]:
# Highlight column 'AdjC_pct' and put bar on 'Volume'
df.sample(3).style.applymap(above_zero, subset=['AdjC_pct'])\
.bar(subset = ['Volume'])\
.format({'Adj Close': "{:.2f}",'AdjC_pct': "{:.2f}",'AdjC_cum': "{:.2f}"})\
.highlight_max()

Unnamed: 0_level_0,Adj Close,Volume,AdjC_pct
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2015-12-31 00:00:00,24.27,163649200,0.0
2016-01-04 00:00:00,24.29,270597600,0.08
2016-01-05 00:00:00,23.68,223164000,-2.51
2016-01-06 00:00:00,23.21,273829600,-1.98
2016-01-07 00:00:00,22.24,324377600,-4.18
2016-01-08 00:00:00,22.35,283192000,0.49
2016-01-11 00:00:00,22.71,198957600,1.61
2016-01-12 00:00:00,23.04,196616800,1.45
2016-01-13 00:00:00,22.45,249758400,-2.56
2016-01-14 00:00:00,22.94,252680400,2.18


<div class="alert alert-block alert-info">
<b>Key insight:<br/> 
High volume transaction is an indication of price %change quite significant ↓ (or ↑) (might relate with any issue).
</div>

In [9]:
# Create list of days category
dayscat = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

# Create list of months category
monthscat = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']

### Check typical % change in monthly basis

In [10]:
# Groupby months name → find average → highlight on volume
df.groupby(df.index.month_name()).mean().reindex(monthscat).style.applymap(above_zero, subset=['AdjC_pct'])\
.format({'Adj Close': "{:.2f}",\
         'Volume': "{:.0f}",\
         'AdjC_pct': "{:.2f}",\
         'AdjC_cum': "{:.2f}"})\
.bar(subset = ['Volume'])

Unnamed: 0_level_0,Adj Close,Volume,AdjC_pct
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
January,56.1,152608096,0.07
February,57.01,139114776,0.04
March,55.6,148636659,0.15
April,58.24,119572548,0.1
May,57.8,127387525,0.14
June,61.33,115318152,0.2
July,66.38,101909509,0.36
August,70.54,121053710,0.41
September,65.16,146732267,-0.04
October,58.6,119384628,0.11


In [11]:
# Set pivot table based segregate based on year and month name, then calculate the mean aggregate
df2 = df.pivot_table(index=df.index.year,
                     columns = df.index.month_name(),
                     fill_value = 0,
                     aggfunc = {'AdjC_pct':'mean'})

# Remove the first multi index column 
df2.columns = df2.columns.droplevel(0)

# Re-index based on monthscat
df2 = df2.reindex(columns=monthscat)

In [12]:
df2.style.applymap(above_zero).background_gradient(cmap='RdYlGn_r')

Date,January,February,March,April,May,June,July,August,September,October,November,December
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2015,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2016,-0.38,0.0,0.55,-0.7,0.34,-0.19,0.45,0.11,0.32,0.02,-0.09,0.22
2017,0.24,0.67,0.21,0.0,0.31,-0.26,0.16,0.45,-0.31,0.43,0.1,-0.07
2018,-0.05,0.37,-0.27,-0.06,0.59,-0.04,0.14,0.81,-0.03,-0.11,-0.91,-0.61
2019,0.3,0.24,0.45,0.27,-0.58,0.62,0.34,-0.05,0.36,0.47,0.38,0.45
2020,0.27,-0.61,-0.13,0.73,0.42,0.65,0.73,0.96,-0.46,-0.24,0.47,0.51
2021,-0.0,-0.43,0.06,0.36,-0.25,0.44,0.31,0.2,-0.27,0.0,0.0,0.0


<div class="alert alert-block alert-info">
<b>Key insight:<br/> 
1. July and August is the month with highest % price change.<br/>
2. September is the month with lowest % price change.<br/>
3. January is the month with highest volume transaction.<br/>
4. July is the month with lowest volume transaction.<br/>
</div>

### Check typical % change in daily basis

In [13]:
# Groupby days name → find average → highlight on volume → set precision
df.groupby(df.index.day_name()).mean().reindex(dayscat).style\
.bar(subset = ['Volume'])\
.format({'Adj Close': "{:.2f}",\
         'Volume': "{:.0f}",\
         'AdjC_pct': "{:.2f}",\
         'AdjC_cum': "{:.2f}"})

Unnamed: 0_level_0,Adj Close,Volume,AdjC_pct
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Monday,60.81,126876300.0,0.32
Tuesday,61.05,125759340.0,0.24
Wednesday,60.63,128258469.0,0.23
Thursday,60.53,124811812.0,-0.05
Friday,60.06,136712512.0,-0.02
Saturday,,,
Sunday,,,


In [14]:
# Set pivot table based segregate based on year and month name, then calculate the mean aggregate
df3 = df.pivot_table(index=df.index.year,
                     columns = df.index.day_name(),
                     fill_value = 0,
                     aggfunc = {'AdjC_pct':'mean'})

# Remove the first multi index column 
df3.columns = df3.columns.droplevel(0)

# Re-index based on monthscat
df3 = df3.reindex(columns=dayscat)

# Fill NaN value with zero
df3.fillna(value = 0, inplace = True)

# Apply style
df3.style.applymap(above_zero).background_gradient(cmap='RdYlGn_r')

Date,Monday,Tuesday,Wednesday,Thursday,Friday,Saturday,Sunday
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2015,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2016,0.13,0.29,0.07,-0.2,-0.0,0.0,0.0
2017,0.47,0.19,0.19,-0.18,0.18,0.0,0.0
2018,-0.08,0.14,0.2,0.18,-0.46,0.0,0.0
2019,0.23,0.19,0.68,-0.02,0.23,0.0,0.0
2020,0.84,0.51,0.39,-0.18,-0.15,0.0,0.0
2021,0.37,0.07,-0.28,0.16,0.11,0.0,0.0


<div class="alert alert-block alert-info">
<b>Key insight:<br/> 
1. Monday is the highest % price change.<br/>
2. Thursday is the lowest % price change.<br/>
3. Friday is the day with highest volume transaction.<br/>
4. Thursday is the day with lowest volume transaction.<br/>
</div>

### Check typical % change in week basis in a month

In [15]:
# Groupby weekday → find average → highlight on volume → set precision
df.groupby(df.index.weekday).mean().style.applymap(above_zero, subset=['AdjC_pct'])\
.bar(subset = ['Volume'])\
.format({'Adj Close': "{:.2f}",\
         'Volume': "{:.0f}",\
         'AdjC_pct': "{:.2f}",\
         'AdjC_cum': "{:.2f}"})

Unnamed: 0_level_0,Adj Close,Volume,AdjC_pct
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,60.81,126876300,0.32
1,61.05,125759340,0.24
2,60.63,128258469,0.23
3,60.53,124811812,-0.05
4,60.06,136712512,-0.02


In [16]:
# Set pivot table based segregate based on year and weekday name, then calculate the mean aggregate
df4 = df.pivot_table(index=df.index.month_name(),
                     columns = df.index.weekday,
                     aggfunc = {'AdjC_pct':'mean'})

# Remove the first multi index column 
df4.columns = df4.columns.droplevel(0)

# Re-index based on monthscat
df4 = df4.T.reindex(columns=monthscat)

# Fill NaN value with zero
df4.fillna(value = 0, inplace = True)

# Apply style
df4.style.applymap(above_zero).background_gradient(cmap='RdYlGn_r')

Date,January,February,March,April,May,June,July,August,September,October,November,December
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0,-0.42,0.18,0.15,0.61,0.23,0.38,0.44,0.73,0.67,0.55,-0.41,0.52
1,0.13,0.19,0.9,0.1,-0.03,0.77,0.09,0.22,0.21,0.14,-0.21,0.25
2,0.23,0.57,-0.1,0.06,0.21,0.24,0.72,0.53,0.15,-0.51,0.47,0.15
3,-0.14,-0.48,-0.38,0.01,0.08,-0.12,0.0,0.35,-0.46,0.18,0.35,0.13
4,0.41,-0.25,0.17,-0.36,0.22,-0.27,0.54,0.22,-0.58,0.19,-0.24,-0.45


<div class="alert alert-block alert-info">
<b>Key insight:<br/> 
1. Mostly first week in every month is the highest % price change.<br/>
2. Mostly last week in every month is the lowest % price change.<br/>
3. Last week in a month typically record extreme max & min of volume transaction.<br/>
</div>

### Check typical % change in date basis

In [17]:
# Groupby day name → find average → highlight on volume → set precision
df.groupby(df.index.day).mean().style.applymap(above_zero, subset=['AdjC_pct'])\
.bar(subset = ['Volume'])\
.format({'Adj Close': "{:.2f}",\
         'Volume': "{:.0f}",\
         'AdjC_pct': "{:.2f}",\
         'AdjC_cum': "{:.2f}"})

Unnamed: 0_level_0,Adj Close,Volume,AdjC_pct
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,60.09,146269718,0.94
2,62.54,144432057,0.22
3,61.42,130882548,-0.23
4,62.18,132906750,0.68
5,59.24,121125591,0.12
6,57.96,122539112,0.42
7,59.3,123222131,0.03
8,60.49,118901380,0.09
9,63.5,129094598,0.26
10,62.44,123411391,-0.05


In [18]:
# Set pivot table based segregate based on year and month name, then calculate the mean aggregate
df5 = df.pivot_table(index=df.index.year,
                     columns = df.index.day,
                     fill_value = 0,
                     aggfunc = {'AdjC_pct':'mean'})

# Remove the first multi index column 
df5.columns = df5.columns.droplevel(0)

# Apply style
df5.T.style.applymap(above_zero).background_gradient(cmap='RdYlGn_r')

Date,2015,2016,2017,2018,2019,2020,2021
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,0.0,0.27,1.2,1.53,1.18,0.3,1.36
2,0.0,-0.35,0.94,0.02,-0.61,0.96,0.28
3,0.0,0.61,0.3,0.64,-1.33,-1.04,-0.32
4,0.0,0.32,-0.1,0.18,1.95,1.59,-0.57
5,0.0,-0.86,0.14,-0.43,0.2,1.27,0.77
6,0.0,0.16,0.14,0.23,0.4,1.51,-0.17
7,0.0,-0.38,0.46,-0.62,-0.13,-0.21,1.44
8,0.0,0.13,0.32,-0.37,0.35,0.4,-0.36
9,0.0,0.06,-0.14,0.7,0.04,0.02,0.91
10,0.0,-0.38,-0.36,-0.31,0.61,1.08,-1.4


<div class="alert alert-block alert-info">
<b>Key insight:<br/> 
1. Day 1 and Day 15 is the highest % price change.<br/>
2. Day 19 and Day 27 is the lowest % price change.<br/>
3. Day 31 is the day with highest volume transaction.<br/>
4. Day 22 is the day with lowest volume transaction.<br/>
</div>

### Key Takeaway

<div class="alert alert-block alert-success">

General finding:<br/>
1. Stock price typically % change between -2.92% to 2%.<br/>

From month perspective:<br/>
2. July and August seems the best month to harvest the profit.<br/>
3. September seems the best month to buy the stock (lowest % price change).<br/>

From day perspective:<br/>
4. Monday is the highest % price change.<br/>
5. Thursday seems the best day to buy the stock (lowest % price change).<br/> 

From week in a month perspective:<br/>
6. Mostly first week in every month is the highest % price change.<br/>
7. Mostly last week in every month seems the best week to buy the stock (lowest % price change).<br/> 

From date perspective:<br/>
8. Day 1 and Day 15 is the highest % price change.<br/>
9. Day 19 and Day 27 probably the best date to buy the stock (lowest % price change).<br/>
</div>