### 1.1 Reading and Plotting Stock Data

In [10]:
import pandas as pd
import matplotlib.pyplot as plt

#### Read CSV

In [6]:
def run():
    df = pd.read_csv("data/APPL.csv")
    print df.head()
    print df.tail()
    
if __name__ == "__main__":
    run()

         Date        Open        High         Low       Close    Volume  \
0  2016-08-05  106.269997  107.650002  106.180000  107.480003  38415400   
1  2016-08-04  105.580002  106.000000  105.279999  105.870003  27408700   
2  2016-08-03  104.809998  105.839996  104.769997  105.790001  30202600   
3  2016-08-02  106.050003  106.070000  104.000000  104.480003  33816600   
4  2016-08-01  104.410004  106.150002  104.410004  106.050003  38167900   

    Adj Close  
0  107.480003  
1  105.870003  
2  105.220002  
3  103.917063  
4  105.478603  
            Date       Open       High     Low   Close     Volume  Adj Close
8985  1980-12-18  26.625000  26.750000  26.625  26.625   18362400   0.397323
8986  1980-12-17  25.875000  25.999999  25.875  25.875   21610400   0.386130
8987  1980-12-16  25.375000  25.375000  25.250  25.250   26432000   0.376804
8988  1980-12-15  27.375001  27.375001  27.250  27.250   43971200   0.406649
8989  1980-12-12  28.750000  28.875000  28.750  28.750  117258400   

#### compute max closing price

In [8]:
def get_max_close(symbol):
    df = pd.read_csv("data/{}.csv".format(symbol))
    return df["Close"].max()

def run():
    for symbol in ["APPL", "IBM"]:
        print "Max Close"
        print symbol, get_max_close(symbol)
        
if __name__ == "__main__":
    run()

Max Close
APPL 702.100021
Max Close
IBM 649.000015


#### compute mean volume

In [9]:
def get_mean_volume(symbol):
    df = pd.read_csv("data/{}.csv".format(symbol))
    return df["Volume"].mean()

def run():
    for symbol in ["APPL","IBM"]:
        print "Mean Volume"
        print symbol, get_mean_volume(symbol)
        
        
if __name__ == "__main__":
    run()

Mean Volume
APPL 91302202.6585
Mean Volume
IBM 4886525.16007


#### plot high price for IBM

In [16]:
def run():
    df = pd.read_csv("data/IBM.csv")
    df["High"].plot()
    plt.show()
    
if __name__ == "__main__":
    run()

#### plot two columns

In [17]:
def run():
    df = pd.read_csv("data/APPL.csv")
    df[["Close", "Adj Close"]].plot()
    plt.show()
    
if __name__ == "__main__":
    run()

### 1.2 Working with Multiple Stocks

In [55]:
import pandas as pd
import os

#### create an empty dataframe

In [22]:
def run():
    #Define date range
    start_date = "2010-01-22"
    end_date = "2010-01-26"
    dates = pd.date_range(start_date, end_date)
    #dates: DatetimeIndex class
    print dates
    for i in range(len(dates)):
        print dates[i]
    
if __name__ == "__main__":
    run()

DatetimeIndex(['2010-01-22', '2010-01-23', '2010-01-24', '2010-01-25',
               '2010-01-26'],
              dtype='datetime64[ns]', freq='D')
2010-01-22 00:00:00
2010-01-23 00:00:00
2010-01-24 00:00:00
2010-01-25 00:00:00
2010-01-26 00:00:00


In [24]:
def run():
    start_date = "2010-01-22"
    end_date = "2010-01-26"
    dates = pd.date_range(start_date, end_date)
    
    df1 = pd.DataFrame(index=dates)
    print df1
    
if __name__ == "__main__":
    run()

Empty DataFrame
Columns: []
Index: [2010-01-22 00:00:00, 2010-01-23 00:00:00, 2010-01-24 00:00:00, 2010-01-25 00:00:00, 2010-01-26 00:00:00]


#### join SPY data

In [52]:
def run():
    #Define date range
    start_date = "2010-01-22"
    end_date = "2010-01-26"
    dates = pd.date_range(start_date, end_date)
    
    #Create an empty dataframe
    df1 = pd.DataFrame(index=dates)
    
    #Read in SPY data
    dfSPY = pd.read_csv("data/SPY.csv", 
                       index_col = "Date", 
                       parse_dates = True, 
                       usecols = ["Date", "Adj Close"], 
                       na_values = ["NaN"])
    #parse_dates=True let the Date be DatetimeIndex class
    print dfSPY.head()
    
    #df1 = df1.join(dfSPY, how="inner")
    print df1
    
    #OR:
    df1 = df1.join(dfSPY)
    df1 = df1.dropna()
    print df1
    
if __name__ == "__main__":
    run()

             Adj Close
Date                  
2016-08-05  218.179993
2016-08-04  216.410004
2016-08-03  216.179993
2016-08-02  215.550003
2016-08-01  216.940002
Empty DataFrame
Columns: []
Index: [2010-01-22 00:00:00, 2010-01-23 00:00:00, 2010-01-24 00:00:00, 2010-01-25 00:00:00, 2010-01-26 00:00:00]
            Adj Close
2010-01-22  95.682624
2010-01-25  96.173257
2010-01-26  95.770236


#### read in more stocks

In [48]:
def run():
    #Define the date range
    start_date = "2010-01-22"
    end_date = "2010-01-26"
    dates = pd.date_range(start_date, end_date)
    
    #create an empty dataframe
    df1 = pd.DataFrame(index = dates)
    
    #read in SPY data
    dfSPY = pd.read_csv("data/SPY.csv", 
                       index_col = "Date",
                       parse_dates = True,
                       usecols = ["Date", "Adj Close"],
                       na_values = ["nan"])
    
    #rename "Adj Close" column to "SPY"
    dfSPY = dfSPY.rename(columns = {"Adj Close":"SPY"})
    
    #df1 join SPY
    df1 = df1.join(dfSPY, how="inner")
    
    #read in more stocks
    symbols = ["GOOG", "IBM", "GLD"]
    for symbol in symbols:
        df = pd.read_csv("data/{}.csv".format(symbol),
                        index_col = "Date",
                        parse_dates = True,
                        usecols = ["Date", "Adj Close"],
                        na_values = ["nan"])
        
        df = df.rename(columns = {"Adj Close":symbol})
        
        df1 = df1.join(df) #use default how="left"
        
    print df1
    

if __name__ == "__main__":
    run()
    

                  SPY        GOOG         IBM         GLD
2010-01-26  95.770236  270.939526  108.502085  107.559998
2010-01-25  96.173257  269.730740  108.821338  107.480003
2010-01-22  95.682624  274.730736  108.286375  107.169998


#### Utility function for reading data

In [54]:
import os
import pandas as pd

def symbol_to_path(symbol, base_dir="data"):
    return os.path.join(base_dir, "{}.csv".format(str(symbol)))

def get_data(symbols, dates):
    """return a data frame for given symbols"""
    
    df = pd.DataFrame(index = dates)
    
    if "SPY" not in symbols:
        symbols.insert(0, "SPY")
    
    for symbol in symbols:
        df_temp = pd.read_csv(symbol_to_path(symbol),
                             index_col = "Date",
                             parse_dates = True,
                             usecols = ["Date", "Adj Close"],
                             na_values = ["nan"])
        
        df_temp = df_temp.rename(columns = {"Adj Close":symbol})
        
        df = df.join(df_temp) #use default how="left"
        
        if symbol == "SPY":
            df = df.dropna(subset = ["SPY"])
            
    return df

def run():
    dates = pd.date_range("2010-01-22", "2010-01-26")
    
    symbols = ["GOOG", "IBM", "GLD"]
    
    df = get_data(symbols, dates)
    print df
    
if __name__ == "__main__":
    run()
        

                  SPY        GOOG         IBM         GLD
2010-01-22  95.682624  274.730736  108.286375  107.169998
2010-01-25  96.173257  269.730740  108.821338  107.480003
2010-01-26  95.770236  270.939526  108.502085  107.559998


#### data frame slicing

In [57]:
import os
import pandas as pd

def symbol_to_path(symbol, base_dir="data"):
    return os.path.join(base_dir, "{}.csv".format(str(symbol)))

def get_data(symbols, dates):
    """return a data frame for given symbols"""
    
    df = pd.DataFrame(index = dates)
    if "SPY" not in symbols:
        symbols.insert(0, "SPY")
    
    for symbol in symbols:
        df_temp = pd.read_csv(symbol_to_path(symbol),
                             index_col = "Date",
                             parse_dates = True,
                             usecols = ["Date", "Adj Close"],
                             na_values = ["nan"])
        df_temp = df_temp.rename(columns = {"Adj Close":symbol})
        df = df.join(df_temp) #use default how="left"
        if symbol == "SPY":
            df = df.dropna(subset = ["SPY"])        
    return df

def run():
    dates = pd.date_range("2010-01-01", "2010-12-31")
    symbols = ["GOOG", "IBM", "GLD"]
    df = get_data(symbols, dates)
    
    #row slicing:
    print df.ix["2010-01-01":"2010-01-31"] #the month of Janurary
    
    #column slicing:
    print df[["GOOG"]]
    print df[["IBM","GLD"]]
    
    #row and column slicing
    print df.ix["2010-03-10":"2010-03-15",["SPY","IBM"]]
    
    
if __name__ == "__main__":
    run()

                   SPY        GOOG         IBM         GLD
2010-01-04   99.292299  313.062468  114.283108  109.800003
2010-01-05   99.555135  311.683844  112.902572  109.699997
2010-01-06   99.625228  303.826685  112.169153  111.510002
2010-01-07  100.045775  296.753749  111.780878  110.820000
2010-01-08  100.378704  300.709808  112.902572  111.370003
2010-01-11  100.518888  300.255255  111.720473  112.849998
2010-01-12   99.581425  294.945572  112.609197  110.489998
2010-01-13  100.422513  293.252243  112.367603  111.540001
2010-01-14  100.694113  294.630868  114.162311  112.029999
2010-01-15   99.563899  289.710772  113.705007  110.860001
2010-01-19  100.808008  293.516976  115.741309  111.519997
2010-01-20   99.782932  289.915587  112.384863  108.940002
2010-01-21   97.864196  291.199286  111.306313  107.370003
2010-01-22   95.682624  274.730736  108.286375  107.169998
2010-01-25   96.173257  269.730740  108.821338  107.480003
2010-01-26   95.770236  270.939526  108.502085  107.5599

#### Plotting multiple stocks

In [60]:
import os
import pandas as pd
import matplotlib.pyplot as plt

def symbol_to_path(symbol, base_dir="data"):
    return os.path.join(base_dir, "{}.csv".format(str(symbol)))

def get_data(symbols, dates):
    """return a data frame for given symbols"""
    
    df = pd.DataFrame(index = dates)
    if "SPY" not in symbols:
        symbols.insert(0, "SPY")
    
    for symbol in symbols:
        df_temp = pd.read_csv(symbol_to_path(symbol),
                             index_col = "Date",
                             parse_dates = True,
                             usecols = ["Date", "Adj Close"],
                             na_values = ["nan"])
        df_temp = df_temp.rename(columns = {"Adj Close":symbol})
        df = df.join(df_temp) #use default how="left"
        if symbol == "SPY":
            df = df.dropna(subset = ["SPY"])        
    return df

def plot_data(df, title="Stock Prices"):
    """plot stock prices"""
    ax = df.plot(title=title)
    ax.set_xlabel("Date")
    ax.set_ylabel("Price")
    plt.show()

def run():
    dates = pd.date_range("2010-01-01", "2010-12-31")
    symbols = ["GOOG", "IBM", "GLD"]
    df = get_data(symbols, dates)
    
    #plot stock prices
    plot_data(df)
    
if __name__ == "__main__":
    run()

#### normalizing

In [62]:
import os
import pandas as pd
import matplotlib.pyplot as plt

def symbol_to_path(symbol, base_dir="data"):
    return os.path.join(base_dir, "{}.csv".format(str(symbol)))

def get_data(symbols, dates):
    """return a data frame for given symbols"""
    
    df = pd.DataFrame(index = dates)
    if "SPY" not in symbols:
        symbols.insert(0, "SPY")
    
    for symbol in symbols:
        df_temp = pd.read_csv(symbol_to_path(symbol),
                             index_col = "Date",
                             parse_dates = True,
                             usecols = ["Date", "Adj Close"],
                             na_values = ["nan"])
        df_temp = df_temp.rename(columns = {"Adj Close":symbol})
        df = df.join(df_temp) #use default how="left"
        if symbol == "SPY":
            df = df.dropna(subset = ["SPY"])        
    return df

def plot_data(df, title="Stock Prices"):
    """plot stock prices"""
    ax = df.plot(title=title)
    ax.set_xlabel("Date")
    ax.set_ylabel("Price")
    plt.show()
    
def normalize_data(df):
    return df / df.ix[0,:]

def run():
    dates = pd.date_range("2010-01-01", "2010-12-31")
    symbols = ["GOOG", "IBM", "GLD"]
    df = get_data(symbols, dates)
    df = normalize_data(df)
    
    #plot stock prices
    plot_data(df, title ="Normalized Prices")
    
if __name__ == "__main__":
    run()

### 1.3 Numpy

In [63]:
import numpy as np

#### creating numpy arrays

In [64]:
import numpy as np

def run():
    #1d array
    print np.array([2,3,4])
    
    #2d array
    print np.array([[2,3,4],[4,5,6]])
    
if __name__ == "__main__":
    run()

[2 3 4]
[[2 3 4]
 [4 5 6]]


#### arrays with initial values

In [74]:
def run():
    #empty array, elements are floating numbers
    print np.empty(5) #1d
    print np.empty((5,4)) #2d
    
    #array of 1s,  
    print np.ones((5,4)) #elements are floating numbers
    print np.ones((5,4), dtype=np.int)
    
if __name__ == "__main__":
    run()

[  0.  23.  46.  69.  92.]
[[  4.94065646e-324   4.94065646e-324   4.94065646e-324   4.94065646e-324]
 [  4.94065646e-324   4.94065646e-324   4.94065646e-324   4.94065646e-324]
 [  4.94065646e-324   4.94065646e-324   4.94065646e-324   4.94065646e-324]
 [  4.94065646e-324   4.94065646e-324   4.94065646e-324   4.94065646e-324]
 [  4.94065646e-324   4.94065646e-324   4.94065646e-324   4.94065646e-324]]
[[ 1.  1.  1.  1.]
 [ 1.  1.  1.  1.]
 [ 1.  1.  1.  1.]
 [ 1.  1.  1.  1.]
 [ 1.  1.  1.  1.]]
[[1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]]
