# Group 2 Project 1 Sector TOP DOGS

In [1]:
import pandas as pd
import numpy as np
import datetime
import os.path
import csv
from pathlib import Path
%matplotlib inline
import matplotlib.pyplot as plt

## Function: Get tickers for years

In [2]:
def get_tickers_for_years(years,sector):
    """pass me a list of years [2013,2015,...], and right, ZERO parameter or error checking"""
    tech_csv = Path(f"./{sector}SectorTickers-between-2015-2018.csv")
    #print(type(years))
    tech_tickers_df = pd.read_csv(tech_csv)
    #print(tech_tickers_df.head())
    all_tickers = []
    
    for year in years:
        #print(type(year))
        ticker_list = tech_tickers_df[year].tolist()
        ticker_list = [x for x in ticker_list if str(x) != "nan"]
        all_tickers.extend(ticker_list)
    
    return np.array(all_tickers)

## Function: Calculate the top performing tickers in range

In [3]:
def combined_df_for_timeframe(all_prices_df,ticker_list,day_start,day_end,sector):
    """Explain what I do here ;) """
    top_dogs = None
    
    print(f"\n\nRunning combined_df_for_timeframe wtih start: {day_start} and end: {day_end}\n\n")
    
    range_df = all_prices_df[(all_prices_df['Date'] >= day_start) & (all_prices_df['Date'] <= day_end)]

    # Create empty dataframe with dates
    prices_df = range_df[range_df['Ticker']=='A']
    prices_df = prices_df[['Date','Adj Close']].copy()
    prices_df = prices_df.drop(columns=["Adj Close"])
    prices_df = prices_df.set_index('Date')
    
    for ticker in ticker_list:
        if ticker in ticker_list:
            # I can't say I understand this bit... [Will]
            ticker_df = range_df[range_df['Ticker']==ticker]
            ticker_df = ticker_df[['Date', 'Adj Close']].copy()
            ticker_df = ticker_df.rename(columns={'Adj Close':ticker})
            ticker_df = ticker_df.set_index('Date')
            prices_df = pd.concat([prices_df, ticker_df], axis="columns", sort=True)
            
    return prices_df

## Function: Calc percentage Change, and return TOP 10 best performing TICKERS as DataFrame

In [4]:
def top_10_df(df):
    pct_change = df.pct_change()
    cumulative_returns = (1+pct_change).cumprod() -1
    cum_returns = cumulative_returns.tail(1).T
    column = cum_returns.columns[-1]
    top = cum_returns.sort_values(by=column,ascending=False)
    top = top.drop_duplicates()
    top = top.head(10)
    top = top.reset_index()
    return top

## Function: Return the returns of the TOP DOG Portfolio as DataFrame

In [8]:
def top_10_returns(df,sector,year):
    weights = [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1]
    pct_change = df.dot(weights)
    # cumulative_returns = (1 + portfolio_returns).cumprod()
    # Removed the -1, per above example: https://rice.bootcampcontent.com/Rice-Coding-Bootcamp/RU-HOU-FIN-PT-07-2019-U-C/blob/master/class/04-Pandas/1/Activities/09-Stu_Portfolio_Planner_Part_II/Solved/portfolio_planner_part_2.ipynb
    my_portfolio_one = (1+ pct_change).cumprod()
#     plt.plot(my_portfolio_one)
#     plt.xlabel('Time')
#     plt.title(f"{sector} 'Top Dog' returns for {year}")
#     plt.ylabel('Percent Change')
#     plt.savefig(f"./out/{sector}_top_dog_returns{year}.png")
    print(my_portfolio_one)
    my_portfolio_tail = my_portfolio_one.tail(1)
    my_portfolio_tail = pd.DataFrame(my_portfolio_tail)
    my_portfolio_tail = my_portfolio_tail.reset_index(drop=True)
    my_portfolio_tail = my_portfolio_tail.rename(columns={0:'Annual Return'})
    my_portfolio_tail['Sector'] = sector
    my_portfolio_tail['Year'] = year
    my_portfolio_tail['Portfolio']= f"{sector} 'Top Dog' for year ending {year}"
    IT_csv = Path(f"./out/top_{sector}.csv")
    my_portfolio_tail.to_csv(path_or_buf=IT_csv, index=False)
    return my_portfolio_one

# Run the code

## grab all ticker data for all sectors

In [9]:
monthly_csv = Path("./aggregatedata.csv")
all_prices_df = pd.read_csv(monthly_csv, parse_dates=True, infer_datetime_format=True)

## Now Loop through sectors

In [10]:
sectors = ['Technology','Financials','HealthCare']
start = '2015-01-01'
end   = '2017-12-31'
year_end,month,day = end.split('-')

top_start = '2018-01-01'
top_end   = '2018-12-31'

# Loop through sectors, to find top dogs by sector
for sector in sectors:
    print(f"Calculating Top Portfolio for {sector} for year end {year_end}")
    tickers = get_tickers_for_years(['2015','2016','2017'], sector)
    results_df = combined_df_for_timeframe(all_prices_df,tickers,start,end,sector)
    top10 = top_10_df(results_df)
    top_tickers = []
    top_tickers = top10['index'].tolist()
    top_dog_portfolio = combined_df_for_timeframe(all_prices_df,top_tickers,top_start,top_end,sector)
    #print(top_dog_portfolio)
    #print(top_dog_portfolio.plot( title=f"{sector} Portfolio Performance for {year_end}" )   )
    top_10_returns(top_dog_portfolio,sector,year_end)

Calculating Top Portfolio for Technology for year end 2017


Running combined_df_for_timeframe wtih start: 2015-01-01 and end: 2017-12-31




Running combined_df_for_timeframe wtih start: 2018-01-01 and end: 2018-12-31


Date
2018-01-02    8.675146e+01
2018-02-01    7.717731e+03
2018-03-01    6.268052e+05
2018-04-02    4.703495e+07
2018-05-01    3.560401e+09
2018-06-01    2.913115e+11
2018-07-02    2.340388e+13
2018-08-01    1.940739e+15
2018-09-04    1.810680e+17
2018-10-01    1.624530e+19
2018-11-01    1.181742e+21
2018-12-03    7.774477e+22
dtype: float64
Calculating Top Portfolio for Financials for year end 2017


Running combined_df_for_timeframe wtih start: 2015-01-01 and end: 2017-12-31




Running combined_df_for_timeframe wtih start: 2018-01-01 and end: 2018-12-31


Date
2018-01-02    8.363038e+01
2018-02-01    6.637953e+03
2018-03-01    5.056737e+05
2018-04-02    3.731003e+07
2018-05-01    2.586722e+09
2018-06-01    1.807141e+11
2018-07-02    1.064559e+13
2018-08-01    6.2163