In [198]:
import csv
import os.path 
import json 
import numpy as np
import datetime

import matplotlib.pyplot as plt
import plotly
import plotly.plotly as py
from plotly.graph_objs import Scatter, Layout, Bar


pathPrefix = ''
prefix = pathPrefix + 'chart_cluster_data_'
ending = '.csv'


def loadStatsFromOneFile(clusterStr): 
    fName = ''
    
    if(len(clusterStr)==2) and ('x' not in clusterStr):
        fName = prefix + clusterStr[0] +"_"+ clusterStr[1] + ending   
    #print fName
    
    pub_year_dict = {}
    appl_year_dict = {}
    years_to_publish_dict = {}
    
    if(fName == ''):
        return [pub_year_dict, appl_year_dict, years_to_publish_dict]
    
    with open(fName) as csvfile:
        spamreader = csv.reader(csvfile, delimiter= ' ', quotechar='|')
        rowCount = 0 
        for row in spamreader:
            rowCount += 1
            jsonStr = str(row).replace("['",'').replace("']",'')
            jsons = json.loads(jsonStr)
            for j in jsons:  
                if (rowCount == 1):
                    if j['pub_year'] in pub_year_dict:
                        pub_year_dict[j['pub_year']] += int(j['count'])
                    else:
                        pub_year_dict[j['pub_year']] = int(j['count'])

                if (rowCount == 2):
                    if j['appl_year'] in appl_year_dict:
                        appl_year_dict[j['appl_year']] += int(j['count'])
                    else:
                        appl_year_dict[j['appl_year']] = int(j['count'])

                if (rowCount == 3):
                    if j['years_to_publish'] in years_to_publish_dict:
                        years_to_publish_dict[j['years_to_publish']] += int(j['count'])
                    else:
                        years_to_publish_dict[j['years_to_publish']] = int(j['count'])

    return [pub_year_dict, appl_year_dict, years_to_publish_dict]

def mergeDicts(dict_full, dict_temp):
    for k,v in dict_temp.items():
        if k in dict_full:
            dict_full[k] += v
        else:
            dict_full[k] = v
            
def loadStatsFromFiles(clusterStr): 
    clusters = ['0','1','2','3','4','5','6','7','8','9']
    files = []
    pub_year_dict = {}
    appl_year_dict = {}
    years_to_publish_dict = {}
    
    if clusterStr[0] == 'x':
        for x in clusters: 
            if(clusterStr[1] == 'x'):
                for xx in clusters:
                    files.append(x+xx)
            else:
                files.append(x+clusterStr[1])
    else:
        if(clusterStr[1] == 'x'):
            for xx in clusters:
                files.append(clusterStr[0]+xx)
        else:
            files.append(clusterStr[0]+clusterStr[1])

    for f in files: 
        temp = loadStatsFromFile(f)
        mergeDicts(pub_year_dict, temp[0])
        mergeDicts(appl_year_dict, temp[1])
        mergeDicts(years_to_publish_dict, temp[2])

    return [pub_year_dict, appl_year_dict, years_to_publish_dict]
    
    
def plotDict(data_dict, yearlyData, fileName, offline):
    py.sign_in('yanglinfang', 'F5jaY29GDfD8TIenMz9p')
    date = plt.figure()
    x = []
    y = []
    for k, v in data_dict.items():
        if(yearlyData == True):
            x.append(datetime.datetime(k, 1, 1))
        else:
            x.append(k)
        y.append(v)
        
    layout = Layout(
        title=fileName,
        xaxis=dict(
            autotick=True,
            ticks='outside',
            tickcolor='#000'
        ),
        yaxis=dict(
            autotick=True,
            ticks='outside',
            tickcolor='#000'
        )
    )
    
    if(yearlyData == True):
        layout.xaxis=dict(range=[datetime.datetime(2000, 1, 1), datetime.datetime(2018, 1, 1)]) 
    else:
        layout.xaxis=dict(range=[0, 15])
    
    if offline == True: 
        plotly.offline.init_notebook_mode(connected=True)
        plotly.offline.iplot({
            "data": [Bar(x=x, y=y)],
            "layout": layout
        })
    else:
        print ("only supporting offline for now")

def saveToCsv(clusterStr, data_dict, header):
    fileToSave = prefix + clusterStr + '_' + header[0] + ending
    if os.path.isfile(fileToSave) == False:
        with open(fileToSave, 'w') as csvfile:
            fieldnames = header
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
            for key in data_dict: 
                writer.writerow({header[0]: key, header[1]: data_dict[key]})
    return fileToSave

def saveStatsSummary(clusterStr):
    [pub_year_dict, appl_year_dict, years_to_publish_dict] = loadStatsFromFiles(clusterStr)
    f1 = saveToCsv(clusterStr, pub_year_dict, ["pub_year","count"])
    f2 = saveToCsv(clusterStr, appl_year_dict, ["appl_year","count"])
    f3 = saveToCsv(clusterStr, years_to_publish_dict, ["years_to_publish","count"])
    return [f1, f2, f3]

In [199]:
saveStatsSummary("1x")

chart_cluster_data_1_0.csv
chart_cluster_data_1_1.csv
chart_cluster_data_1_2.csv
chart_cluster_data_1_3.csv
chart_cluster_data_1_4.csv
chart_cluster_data_1_5.csv
chart_cluster_data_1_6.csv
chart_cluster_data_1_7.csv
chart_cluster_data_1_8.csv
chart_cluster_data_1_9.csv


['chart_cluster_data_1x_pub_year.csv',
 'chart_cluster_data_1x_appl_year.csv',
 'chart_cluster_data_1x_years_to_publish.csv']

In [200]:
[pub_year_dict, appl_year_dict, years_to_publish_dict] = loadStatsFromFiles("1x")
plotDict(pub_year_dict, True, 'pub_year_dict', True)
plotDict(appl_year_dict, True, 'appl_year_dict', True)
plotDict(years_to_publish_dict, False, 'years_to_publish_dict', True)

chart_cluster_data_1_0.csv
chart_cluster_data_1_1.csv
chart_cluster_data_1_2.csv
chart_cluster_data_1_3.csv
chart_cluster_data_1_4.csv
chart_cluster_data_1_5.csv
chart_cluster_data_1_6.csv
chart_cluster_data_1_7.csv
chart_cluster_data_1_8.csv
chart_cluster_data_1_9.csv


<matplotlib.figure.Figure at 0x14ddecc0>

<matplotlib.figure.Figure at 0x111b57f0>

<matplotlib.figure.Figure at 0x1607b128>

In [164]:
py.sign_in('yanglinfang', 'F5jaY29GDfD8TIenMz9p')

def results(symbol, trend1, trend2):
    data = web.DataReader(symbol, data_source='yahoo')
    data['Trend 1'] = data['Adj Close'].rolling(int(trend1)).mean()
    data['Trend 2'] = data['Adj Close'].rolling(int(trend2)).mean()
    #print data
    url = data[['Adj Close', 'Trend 1', 'Trend 2']].iplot(asUrl=True)
    print url 
    table = data.tail().to_html()

results('AAPL', 20, 100)

<plotly.tools.PlotlyDisplay object>
