# Calculate Stock Change - Team Gannett Peak

### Team Members: Congda Xu, Binqi Shen,  Matthew Ko, Isaac Choi

In [1]:
# load packages
import pandas as pd
import numpy as np
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

In [2]:
# import csv to pandas dataframe
data = []
columns = ['N', 'Date', 'Open', 'High', 'Low', 'Close', 'AdjClose', 'Volume', 'Dividend', 'SplitCoefficient', 'Ticker', 'PctChange']
file_numbers = [4]                     # can substitute with a list of multiple filenumbers

for num in file_numbers:
    path = r'financial%d.csv' % num    # all file names are in the format of financial[x].csv
    df = pd.read_csv(path, names=columns, index_col = False).iloc[1:]   # remove first header row of each file
    df.Date = df.Date.astype('datetime64[s]').dt.to_period('M')         # to only contain year-month
    df.Ticker = df.Ticker.astype('str')
    df.AdjClose = df.Close.astype('float')
    data.append(df)
    
financial = pd.concat(data)            # concatinate files to 1 dataframe


  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [3]:
financial.head()

Unnamed: 0,N,Date,Open,High,Low,Close,AdjClose,Volume,Dividend,SplitCoefficient,Ticker,PctChange
1,181.0,2020-01,3.04,3.0498,3.04,3.0498,3.0498,1221,0,1,BMRA,0.003223684
2,182.0,2020-01,3.0452,3.0452,3.033,3.033,3.033,971,0,1,BMRA,-0.004006305
3,183.0,2020-01,3.0,3.05,3.0,3.0301,3.0301,16372,0,1,BMRA,0.010033333
4,184.0,2020-01,3.01,3.05,2.985,3.02,3.02,14547,0,1,BMRA,0.003322259
5,185.0,2020-01,3.05,3.05,3.0,3.01,3.01,14006,0,1,BMRA,-0.013114754


In [4]:
financial.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 467492 entries, 1 to 467492
Data columns (total 12 columns):
 #   Column            Non-Null Count   Dtype    
---  ------            --------------   -----    
 0   N                 467480 non-null  float64  
 1   Date              467480 non-null  period[M]
 2   Open              467480 non-null  object   
 3   High              467480 non-null  object   
 4   Low               467480 non-null  object   
 5   Close             467480 non-null  object   
 6   AdjClose          467480 non-null  float64  
 7   Volume            467480 non-null  object   
 8   Dividend          467480 non-null  object   
 9   SplitCoefficient  467480 non-null  object   
 10  Ticker            467492 non-null  object   
 11  PctChange         467462 non-null  object   
dtypes: float64(2), object(9), period[M](1)
memory usage: 42.8+ MB


In [5]:
# Get last stock price for each ticker each month using .first()
temp_financial = financial[['Ticker', 'Date', 'AdjClose']].groupby(['Ticker', 'Date']).first() 

temp_financial = temp_financial.reset_index()

# change column name from 'AdjClose' to 'CurrentClose'
temp_financial['CurrentClose'] = temp_financial[['AdjClose']]

# add a column to store previous month's close price
temp_financial['PrevClose'] = temp_financial.groupby('Ticker')['CurrentClose'].shift(1)

# remove the adjclose column
temp_financial = temp_financial[['Ticker', 'Date', 'CurrentClose', 'PrevClose']]

# add a column to calculate close price change
temp_financial['CloseChange(%)'] = 100 * (temp_financial.CurrentClose - temp_financial.PrevClose)/ temp_financial.PrevClose

# add a column to Calculate the monthly return
temp_financial['MonthlyReturn'] = temp_financial.CurrentClose / temp_financial.PrevClose

# same df to cleaned_financial
cleaned_financial = temp_financial

In [6]:
# test to see if everything works the right way
cleaned_financial

Unnamed: 0,Ticker,Date,CurrentClose,PrevClose,CloseChange(%),MonthlyReturn
0,AAPL,2016-01,97.340,,,
1,AAPL,2016-02,96.690,97.34,-0.667762,0.993322
2,AAPL,2016-03,108.990,96.69,12.721067,1.127211
3,AAPL,2016-04,93.740,108.99,-13.992109,0.860079
4,AAPL,2016-05,99.860,93.74,6.528696,1.065287
...,...,...,...,...,...,...
22527,ZIXI,2019-09,7.240,7.35,-1.496599,0.985034
22528,ZIXI,2019-10,6.610,7.24,-8.701657,0.912983
22529,ZIXI,2019-11,7.540,6.61,14.069592,1.140696
22530,ZIXI,2019-12,6.780,7.54,-10.079576,0.899204


In [7]:
# test to make sure the first PrevMonth record for each ticker is NAN
cleaned_financial[cleaned_financial.Ticker == 'ZIXI']

Unnamed: 0,Ticker,Date,CurrentClose,PrevClose,CloseChange(%),MonthlyReturn
22483,ZIXI,2016-01,4.49,,,
22484,ZIXI,2016-02,3.99,4.49,-11.135857,0.888641
22485,ZIXI,2016-03,3.93,3.99,-1.503759,0.984962
22486,ZIXI,2016-04,3.73,3.93,-5.089059,0.949109
22487,ZIXI,2016-05,4.02,3.73,7.774799,1.077748
22488,ZIXI,2016-06,3.75,4.02,-6.716418,0.932836
22489,ZIXI,2016-07,4.06,3.75,8.266667,1.082667
22490,ZIXI,2016-08,3.81,4.06,-6.157635,0.938424
22491,ZIXI,2016-09,4.1,3.81,7.611549,1.076115
22492,ZIXI,2016-10,4.07,4.1,-0.731707,0.992683


In [8]:
# export to csv
for num in file_numbers:
    path = r'CleanStock%d.csv' % num
    cleaned_financial.to_csv(path, index = False)