In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.style as style
import seaborn as sns
%matplotlib inline

# Read in data from Excel workbook
ps5_data = pd.read_excel('all_prvtplace.xlsx',sheetname="Sheet1", header=0, skiprows=7, usecols=(0, 12, 13, 15))
ps5_data.head(n=5)

Unnamed: 0,All Transactions Announced Date,State of Incorporation [Target/Issuer],"Aggregated Amount Raised ($USDmm, Historical rate)",Round Number
0,2006-12-31,California,60.0,3
1,2006-12-31,Ohio,1.0,1
2,2006-12-31,Delaware,296.3,3
3,2006-12-31,Delaware,0.38,1
4,2006-12-31,Delaware,2.0,1


In [2]:
ps5_data = ps5_data.rename(columns={'All Transactions Announced Date':'date_announce', 
                                   'State of Incorporation [Target/Issuer]': 'issuer_state', 
                                   'Aggregated Amount Raised ($USDmm, Historical rate)': 'raised_amt',
                                   'Round Number': 'round_num'})

In [9]:
print(ps5_data.dtypes)

date_announce    datetime64[ns]
issuer_state             object
raised_amt              float64
round_num                 int32
year                      int64
dtype: object


In [8]:
ps5_data['round_num'] = ps5_data['round_num'].astype(int)

In [10]:
ps5_data.head(5)

Unnamed: 0,date_announce,issuer_state,raised_amt,round_num,year
0,2006-12-31,California,60.0,3,2006
1,2006-12-31,Ohio,1.0,1,2006
2,2006-12-31,Delaware,296.3,3,2006
3,2006-12-31,Delaware,0.38,1,2006
4,2006-12-31,Delaware,2.0,1,2006


In [4]:
# Sample selection and variable definition
ps5_data['raised_amt'] = pd.to_numeric(ps5_data['raised_amt'], errors='coerce')
ps5_data['round_num'] = pd.to_numeric(ps5_data['round_num'], errors='coerce')
ps5_data['year'] = ps5_data['date_announce'].dt.year
ps5_data = ps5_data[(ps5_data.raised_amt >0)]

  result = getattr(x, name)(y)


TypeError: invalid type comparison

In [None]:
# compute year level data
year_ps5 = pd.DataFrame({'raised_amt' : ps5_data.groupby('year').apply(lambda x: np.sum(x['raised_amt'])),
                        'count': ps5_data.groupby('year').apply(lambda x: np.ma.count(x['raised_amt']))})

In [None]:
year_ps5.head(5)

In [None]:
print(year_ps5.dtypes)

In [None]:
year_ps5.iloc[:, 1]

In [None]:
# Plot figure 1
plt.style.use('ggplot')
fig1, ax1 = plt.subplots()
ax2 = ax1.twinx()

ax1.set(title='Private Placement Across Years', xlabel='Year') # plot title, axis labels
ax1.axhline(y = 0, color = 'black', linewidth = 1.3, alpha = .7)
ax1.axvline(x = 1996, color='k', linestyle='--')
ax1.annotate('This line\n denotes\n year 1996\n when NSMIA\n is effective', xy=(1996,80000),
             arrowprops=dict(facecolor='black', shrink=0.05), xytext=(1990, 70000))

# Try to define a function to plot the following two figures
ax1.grid(False)
ax1.xaxis.grid(True)
ax1.plot(year_ps5.index, year_ps5['raised_amt'], 'b')
# Make the y-axis label, ticks and tick labels match the line color.
ax1.set_ylabel('Million Dollars', color='b')
ax1.tick_params('y', colors='b')


ax2.grid(False)
ax2.xaxis.grid(True)
ax2.plot(year_ps5.index, year_ps5['count'], 'r')
ax2.set_ylabel('Number', color='r')
ax2.tick_params('y', colors='r')

plt.show()

In [None]:
ps5_data['round_num'].astype(int)

In [None]:
# Figure 2
# Select first three rounds to compare across years
rd_ps5 = ps5_data[(ps5_data['round_num'] <= 3)].copy()

In [None]:
rd_ps5.head(5)

In [None]:
rd_ps5 = rd_ps5[(ps5_data.round_num != '-')]

In [None]:
rd_ps5['round_num'].astype(int)

In [None]:
year_rd_ps5 = pd.DataFrame({'raised_amt' : rd_ps5.groupby(['year', 'round_num']).apply(lambda x: np.sum(x['raised_amt'])),
                        'count': rd_ps5.groupby(['year', 'round_num']).apply(lambda x: np.ma.count(x['raised_amt']))})

In [None]:
year_rd_ps5['year'] = year_rd_ps5.index.get_level_values('year')
year_rd_ps5['round_num'] = year_rd_ps5.index.get_level_values('round_num')
year_rd_ps5 = year_rd_ps5.reset_index(drop = True)

In [None]:
year_rd_ps5.pivot(index = 'year', columns = 'round_num', values='count').plot(kind="bar")
plt.xlabel('Year')
plt.ylabel('Number')
plt.title('Number of Private Placements for each round by Year')
plt.show

In [None]:
# Figure 3
# Drop observations with missing state
ps5_data = ps5_data[(ps5_data.issuer_state != '-')]

In [None]:
ps5_data['dummy'] = ((ps5_data['issuer_state'] == 'Arizona') | (ps5_data['issuer_state'] == 'California') |
                    (ps5_data['issuer_state'] == 'Florida') |(ps5_data['issuer_state'] == 'Georgia') |
                    (ps5_data['issuer_state'] == 'Illinois') |(ps5_data['issuer_state'] == 'Louisiana') |
                    (ps5_data['issuer_state'] == 'New York') |(ps5_data['issuer_state'] == 'North Dakota') |
                    (ps5_data['issuer_state'] == 'Ohio') |(ps5_data['issuer_state'] == 'Tennessee') |
                    (ps5_data['issuer_state'] == 'Texas')).astype(int)

In [None]:
dummy_ps5 = pd.DataFrame({'raised_amt' : ps5_data.groupby(['year', 'dummy']).apply(lambda x: np.sum(x['raised_amt'])),
                        'count': ps5_data.groupby(['year', 'dummy']).apply(lambda x: np.ma.count(x['raised_amt']))})

In [None]:
dummy_ps5['year'] = dummy_ps5.index.get_level_values('year')
dummy_ps5['dummy'] = dummy_ps5.index.get_level_values('dummy')
dummy_ps5 = dummy_ps5.reset_index(drop = True)

In [None]:
dummy_ps5.pivot(index = 'year', columns = 'dummy', values='count').plot(kind="bar")
plt.xlabel('Year')
plt.ylabel('Number')
plt.title('Number of Private Placements for each round by Year')
plt.show

In [None]:
# Create state level data
state_ps5 = pd.DataFrame({'raised_amt' : ps5_data.groupby('issuer_state').apply(lambda x: np.sum(x['raised_amt'])),
                        'count': ps5_data.groupby('issuer_state').apply(lambda x: np.ma.count(x['raised_amt']))})

In [None]:
state_ps5

In [None]:
state_ps5['issuer_state'] = state_ps5.index.get_level_values('issuer_state')
state_ps5 = state_ps5.reset_index(drop = True)
state_ps5['count'] = np.log(1 + state_ps5['count'])
# Delaware has the biggest counts which is many times larger than the second one
# So if plot by original value, the color on the map is not differentiable,
# convertting to log value solves this issue

In [None]:
import matplotlib as mpl
from mpl_toolkits.basemap import Basemap as Basemap
from matplotlib.colors import rgb2hex
from matplotlib.patches import Polygon
from matplotlib.colors import Normalize
from matplotlib.colorbar import ColorbarBase

# Lambert Conformal map of U.S. states
m = Basemap(llcrnrlon=-121,llcrnrlat=20,urcrnrlon=-62,urcrnrlat=51,
    projection='lcc',lat_1=32,lat_2=45,lon_0=-95)

# draw state boundaries.
# data from U.S Census Bureau
# https://www.census.gov/cgi-bin/geo/shapefiles/index.php?year=2010&layergroup=States+%28and+equivalent%29
shp_info = m.readshapefile('data/tl_2010_us_state00',name='states',drawbounds=True)
# choose a color for each state based on population density.
colors={}
statenames=[]
cmap = plt.cm.Reds # use 'Reds' colormap
vmin = state_ps5['count'].min() * 0.95
vmax = state_ps5['count'].max() * 1.05 # set range.
for shapedict in m.states_info:
    statename = shapedict['NAME00']
    # skip DC and Puerto Rico.
    if statename not in ['District of Columbia','Puerto Rico']:
        count = float(state_ps5[state_ps5['issuer_state'] == statename]['count'].values)
        # calling colormap with value between 0 and 1 returns
        # rgba value. 
        colors[statename] = cmap(((count - vmin) / (vmax - vmin)))[:3]
    statenames.append(statename)
# cycle through state names, color each one.
ax = plt.gca() # get current axes instance
fig = plt.gcf()
for nshape,seg in enumerate(m.states):
    # skip DC and Puerto Rico.
    if statenames[nshape] not in ['Puerto Rico', 'District of Columbia']:
    # Offset Alaska and Hawaii to the lower-left corner. 
        if statenames[nshape] == 'Alaska':
        # Alaska is too big. Scale it down to 35% first, then transate it. 
            seg = list(map(lambda x_y: (0.35*x_y[0] + 1100000, 0.35*x_y[1]-1300000), seg))
        if statenames[nshape] == 'Hawaii':
            seg = list(map(lambda x_y: (x_y[0] + 5200000, x_y[1] - 1400000), seg))
        color = rgb2hex(colors[statenames[nshape]]) 
        poly = Polygon(seg,facecolor=color,edgecolor=color)
        ax.add_patch(poly)
        
plt.title('Private Placement Across States')
# construct custom colorbar
data_min = state_ps5['count'].min()
data_max = state_ps5['count'].max()
norm = Normalize(vmin=data_min, vmax=data_max)
cax = fig.add_axes([0.17, 0.01, 0.7, 0.05])
cb = ColorbarBase(cax, cmap=cmap, norm=norm, orientation='horizontal')
cb.ax.set_xlabel('Number of Private Placement')
plt.show()