# FLIP (00): Data Science 
**(Module 02: Data Visualization)**

---
- Materials in this module include resources collected from various open-source online repositories.
- You are free to use,but NOT allowed to change and distribute this package.

Prepared by and for 
**Student Members** |
2006-2018 [TULIP Lab](http://www.tulip.org.au), Australia

---


# Session H - Time Series

This lesson, we will learn how to handle time data with a stock case,including reading the time series,DateOffset and TimeDelta,Time Series Function.




In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

sns.set_style('whitegrid')
%matplotlib inline
pd.set_option('display.max_columns', 10)
pd.set_option('display.max_rows', 10)

### Read the time series

In [None]:
import wget

link_to_data = 'https://github.com/tuliplab/mds/raw/master/Jupyter/data/gertek_2016_d.csv'
DataSet = wget.download(link_to_data)

In [None]:
goertek=pd.read_csv('gertek_2016_d.csv')
goertek.head()

In [None]:
type(goertek['date'])

In [None]:
type(goertek['date'][0])

In [None]:
goertek['date']=pd.to_datetime(goertek['date'])
type(goertek['date'][0])

In [None]:
goertek=goertek.set_index('date')
goertek.head()

In [None]:
type(goertek.index)

In [None]:
goertek=pd.read_csv('gertek_2016_d.csv',index_col='date',parse_dates=True)
goertek.head()

In [None]:
type(goertek.index)

### DateOffset and TimeDelta

In [None]:
lastday=pd.datetime(2016,12,31)
lastday

In [None]:
newyear=lastday+pd.DateOffset(days=1)
newyear

In [None]:
today=pd.datetime.now()
today

In [None]:
today+pd.DateOffset(weeks=1)

In [None]:
today+2*pd.DateOffset(years=2, months=6)

In [None]:
import datetime

In [None]:
weekDelta=datetime.timedelta(weeks=1)
weekDelta

In [None]:
datetime.timedelta(7)

In [None]:
today=pd.datetime.now()

In [None]:
today+weekDelta

### Time Series Function

In [None]:
goertek=goertek.sort_index(ascending=True)
goertek.head()

In [None]:
goertek.tail()

In [None]:
goertek.shift(1).head(3)

In [None]:
goertek.shift(1,freq=pd.datetools.bday).head(3)

In [None]:
goertek.asfreq('BM')

In [None]:
goertek.asfreq('H',method='ffill')

In [None]:
goertek_m=pd.read_csv('gertek_2016_d.csv',parse_dates=True,index_col='date')

In [None]:
goertek_m.head()

In [None]:
goertek_resample=goertek_m.resample('30Min') #pandas 0.18 http://pandas.pydata.org/pandas-docs/version/0.18.0/whatsnew.html#resample-api

In [None]:
type(goertek_resample)

In [None]:
goertek_resample.mean()

+ B, BM: This stands for business day, business month. These are the working
days of the month, that is, any day that is not a holiday or a weekend.
+  D, W, M, Q, A: It stands for calendar day, week, month, quarter, year-end.
+  H, T, S, L, U: It stands for hour, minute, second, millisecond, and
microsecond.

### Stock datas case

In [None]:
#!pip install tushare

#!pip install --force-reinstall --upgrade tushare

In [None]:
#!ls -l ./

In [None]:
import tushare as ts
from datetime import datetime

In [None]:
auto_list=['000625','600104','002594','601238']
#auto_var=['changan','sqjt','byd','gqjt']

In [None]:
#for stock,var in zip(auto_list,auto_var):
#    globals()[var] = ts.get_h_data(stock,'2016-01-01','2016-12-23')
changan=ts.get_h_data('000625','2016-01-01','2016-12-23')

In [None]:
sqjt=ts.get_h_data('600104','2016-01-01','2016-12-23')

In [None]:
byd=ts.get_h_data('002594','2016-01-01','2016-12-23')

In [None]:
gqjt=ts.get_h_data('601238','2016-01-01','2016-12-23')

In [None]:
#changan=pd.read_csv('../data/changan.csv',index_col='date',parse_dates=True)
changan.head()

In [None]:
changan.describe()

In [None]:
changan.info()

In [None]:
changan['close'].plot(legend=True,figsize=(10,4))

In [None]:
changan['volume'].plot(legend=True,figsize=(10,4))

In [None]:
ma_day = [10,20,50]

for ma in ma_day:
    column_name = "MA for %s days" %(str(ma))
    changan[column_name] = changan['close'].rolling(window=ma).mean()
    #changan[column_name] = pd.rolling_mean(changan['close'],ma)

In [None]:
changan[['close','MA for 10 days','MA for 20 days','MA for 50 days']].plot(subplots=False,figsize=(10,4))

In [None]:
changan['Daily Return'] = changan['close'].pct_change()
changan['Daily Return'].plot(figsize=(10,4),legend=True,linestyle='--',marker='o')


In [None]:
sns.distplot(changan['Daily Return'].dropna(),bins=100, color = 'purple');

In [None]:
changan['Daily Return'].hist(bins=100)

In [None]:
#gqjt=pd.read_csv('../data/gqjt_2016.csv',index_col='date',parse_dates=True)

In [None]:
closing_df = pd.DataFrame({'changan':changan['close'],'gqjt':gqjt['close'],'byd':byd['close']})

In [None]:
closing_df.head()

In [None]:
auto_rets = closing_df.pct_change()

In [None]:
auto_rets.head()

In [None]:
sns.jointplot('changan','changan',auto_rets,
              kind='scatter',color = 'seagreen')

In [None]:
sns.jointplot('changan','byd',auto_rets,kind='scatter')

In [None]:
sns.pairplot(auto_rets.dropna())

In [None]:
sns.corrplot(closing_df,annot=True)

In [None]:
corr=closing_df.corr()
corr

In [None]:
sns.heatmap(corr,annot=True)

In [None]:
sns.heatmap?