In [None]:
import pandas as pd 
import os 
import matplotlib.pyplot as plt 
import numpy as np 
import seaborn as sns
from IPython.display import display
import sys
from statistics import mean
import datetime
from datetime import timedelta
import time
sns.set_context('poster') # makes text larger

path = 'trends.csv' # net worth over time, exported from mint.com

In [None]:
dfraw = pd.read_csv(path, 
                 converters={'Assets': lambda s: float(s.replace('$', '').replace(',','')),
                            'Debts': lambda s: float(s.replace('$', '').replace(',','').replace('(','-').replace(')','')),
                            'NET': lambda s: float(s.replace('$', '').replace(',','')),
                            'DATES': lambda s: pd.to_datetime(s) + timedelta(days=27)}) # really should be last day of month

# in 2022, Mint changed its trends.csv saved file. I'm just changing everything back to the way it's always been.
dfraw.Debts = -dfraw.Debts # Debts are negative.
dfraw = dfraw.rename(columns={'DATES':'Dates', 'NET':'Net'}) # We don't need all caps.

df=dfraw.copy() # a copy for editing

# Mint exports the current data for this month.
if df.at[df.index[-1],'Dates'] > datetime.datetime.now():
       df.at[df.index[-1],'Dates'] = pd.to_datetime(datetime.datetime.now())
        
## Deal with corrupted data from Mint: duplicate rows
duplicate = df[df[['Dates']].duplicated()] # dataframe of duplicate rows
# remove the row BEFORE each duplicate row. That's the one we don't want.
df.drop(duplicate.index-1,inplace=True)
df.reset_index(inplace=True, drop=True)

display(df.head())
display(df.tail())

In [None]:
plt.plot(df['Dates'], df['Net'])
plt.ylabel('Net worth/$')
plt.title("Net worth over time")
ax = plt.gca()
plt.xticks(rotation=-40, ha="left");

x = df['Dates'].values
y2 = df['Net'].values
y1 = 0
ax.fill_between(x, y1, y2, where=y2>=y1, facecolor='lightblue', interpolate=True);

In [None]:
plt.plot(df['Dates'], df['Net'])
plt.ylabel('Net worth/$')

ax = plt.gca()
ax.set_yscale('log')
plt.title("Net worth over time: log plot")

plt.xticks(rotation=-40, ha="left");

In [None]:
plt.plot(df.set_index('Dates').diff()['Net'], 'o', alpha=.5)
ax = plt.gca()
plt.ylabel("Dollars gained or lost");
plt.xticks(rotation=-40, ha="left");
#leg2 = ax2.legend()
plt.title('Difference per month');

In [None]:
plt.plot(df['Dates'], -df['Debts'], 'k', label="All debts")
plt.ylabel('Debt/$')
plt.title("Debts over time")
plt.xticks(rotation=-40, ha="left");

In [None]:
#!pip install yfinance --upgrade --no-cache-dir

In [None]:
#import sys
#sys.path.append(r'C:\users\vhorowit\p3\lib');
#sys.path.append(r'C:\users\vhorowit\p3\lib\site-packages')
#sys.path.append(r'C:\users\vhorowit\anaconda3\lib\site-packages')
#!pip show yahoo-finance

In [None]:
import yfinance as yf

earliestdate = '2012-01-01'

GSPC = yf.Ticker("^GSPC")
DJIpd = GSPC.history(start=earliestdate, end =datetime.date.today(), interval = "1mo")

display(DJIpd.tail())

plt.figure()
plt.plot(df['Dates'], df['Net'], label='Net worth')
plt.plot(DJIpd.index, DJIpd['Close'], label='Dow Jones')
ax = plt.gca()
plt.ylabel('Net worth/$ and DJI')

ax.set_yscale('log')
plt.title("Net worth over time vs DJI")
leg = ax.legend();
plt.xticks(rotation=-90, ha="left");
#plt.ylim(ymin=0)

#plt.xlim(xmin=pd.to_datetime(earliestdate))
#plt.axhline(y=0, color='grey', linestyle='-', alpha = .5);

plt.show()

In [None]:
plt.figure(figsize=(15,8))
plt.plot(df.set_index('Dates').diff()['Net']/100, '.', alpha=.4, color='blue', label="Net worth changes")
ax = plt.gca()
#ym,yma=plt.ylim()
#ax.fill_between(df['Dates'], ym, 0, where=0>=ym, facecolor='lightblue', interpolate=True);
plt.ylabel("Dollars gained or lost / 100");
plt.xticks(rotation=-40, ha="left");
#leg2 = ax2.legend()
plt.title('Difference per month');
plt.axhline(y=0, color='grey', linestyle='-')


if True:
    ax.minorticks_on()
    #ax.xaxis.set_minor_locator(MultipleLocator(1))
    #ax.tick_params(axis='x', which='minor', direction='out')

# rolling average over a few months
months = 5
plt.plot(pd.DataFrame.rolling(df.set_index('Dates').diff()['Net']/100, months, center=False).sum()/months,  
         color='blue', label="Net worth changes rolling avg, " + str(months) + ' months' )

xmin, xmax = plt.xlim()
#plt.figure()
plt.plot(DJIpd.diff()['Close'], '.', alpha=.4, color='orange', label="DJI changes")
plt.plot(pd.DataFrame.rolling(DJIpd.diff()['Close'], months, center=False).sum()/months,  
         color='orange', alpha = .8,  label="DJI changes rolling avg, " + str(months) + ' months')
plt.xlim(xmin = pd.to_datetime("2012-01-01"), xmax = xmax)
plt.title('Difference per month');
plt.xticks(rotation=-40, ha="left");
plt.axhline(y=0, color='grey', linestyle='-')
plt.legend();
