In [13]:
import numpy as np
import pandas as pd
import snap
import matplotlib.pyplot as plt

In [14]:
#Load edge list into TTable
context = snap.TTableContext()
edgefilename = "./data/InvInvPairs_top_reversed.csv"
nodefilename = "./data/Investor.csv"
edgeschema = snap.Schema()
edgeschema.Add(snap.TStrTAttrPr("srcNID", snap.atInt))
edgeschema.Add(snap.TStrTAttrPr("dstNID", snap.atInt))
edgeschema.Add(snap.TStrTAttrPr("weight", snap.atFlt))

nodeschema = snap.Schema()
nodeschema.Add(snap.TStrTAttrPr("investorID", snap.atStr))
nodeschema.Add(snap.TStrTAttrPr("investorname", snap.atStr))

edge_table = snap.TTable.LoadSS(edgeschema, edgefilename, context, "\t", snap.TBool(False))
node_table = snap.TTable.LoadSS(nodeschema, nodefilename, context, "\t", snap.TBool(False))

In [15]:
#Convert TTable into Directed graph
edgeattrv = snap.TStrV()
edgeattrv.Add("weight")

nodeattrv = snap.TStrV()
nodeattrv.Add("investorname")
net = snap.ToNetwork(snap.PNEANet, edge_table, "srcNID", "dstNID", edgeattrv, node_table, "investorID", nodeattrv, snap.aaFirst)

In [16]:
PRankH = snap.TIntFltH()
snap.GetPageRank(net, PRankH)

PRank = {}
for item in PRankH:
    PRank[item] = PRankH[item]
sorted_PRank = sorted(PRank.items(), key = lambda kv: -kv[1])

NameV = snap.TStrV()

df_investors = pd.read_csv('./data/Investor.csv', header=None, sep = '\t')
df_investors = df_investors.rename(columns = {0:'investorID', 1:'investorname'})

for i in range(10):
    NId = sorted_PRank[i][0]
    score = sorted_PRank[i][1]
    net.AttrNameNI(NId, NameV)
    investorID = 0
    investorname = df_investors[df_investors['investorID']==NId].investorname.values[0] 
    indeg = net.GetNI(NId).GetInDeg()
    outdeg = net.GetNI(NId).GetOutDeg()
    print(' nodeID=',NId, ' PageRank=',score, ' name=',investorname, ' indeg=', indeg, ' outdeg=',outdeg)

 nodeID= 5021  PageRank= 0.0025983297409260865  name= SUMMIT ASSET MANAGEMENT LLC  indeg= 119  outdeg= 15
 nodeID= 233  PageRank= 0.002334896916866191  name= AMERICAN ECONOMIC PLANNING GROUP INC  indeg= 148  outdeg= 15
 nodeID= 1824  PageRank= 0.0022715717852605536  name= FAMILY LEGACY INC  indeg= 238  outdeg= 15
 nodeID= 3166  PageRank= 0.0021610855116964544  name= M KRAUS CO  indeg= 124  outdeg= 15
 nodeID= 3889  PageRank= 0.002127253594124111  name= PARAGON CAPITAL MANAGEMENT LLC  indeg= 97  outdeg= 15
 nodeID= 1418  PageRank= 0.00211472770412623  name= CTC LLC  indeg= 135  outdeg= 15
 nodeID= 2544  PageRank= 0.002052837656317131  name= HT PARTNERS LLC  indeg= 87  outdeg= 15
 nodeID= 1118  PageRank= 0.00202524192017762  name= CHEVIOT VALUE MANAGEMENT LLC  indeg= 131  outdeg= 15
 nodeID= 2209  PageRank= 0.0019732200322021584  name= GOLDSTEIN MUNGER ASSOCIATES  indeg= 92  outdeg= 15
 nodeID= 2066  PageRank= 0.001893751481675757  name= FUNDSMITH LLP  indeg= 92  outdeg= 15


In [10]:
#PageRanks of the value investors 
valueinvestor = pd.read_csv('./data/valueinvestorname.txt', names = {'investorname'})
valueinvestor = valueinvestor.append(df_investors[df_investors['investorname'].str.contains('VALUE')][['investorname']], ignore_index = True)
valueinvestor = valueinvestor.merge(df_investors, on = ['investorname'], how = 'left')
valueinvestor = valueinvestor[valueinvestor.investorID.notnull()]
valueinvestor = valueinvestor.astype({'investorID':'uint16'})
PRanks = pd.DataFrame(sorted_PRank, columns = ['investorID', 'PRank'])
valueinvestor = valueinvestor.merge(PRanks, on = ['investorID'], how = 'left')
valueinvestor.sort_values(by = ['PRank'], ascending=False)

Unnamed: 0,investorname,investorID,PRank
40,CHEVIOT VALUE MANAGEMENT LLC,1118,0.000978
35,COHO PARTNERS LTD,1229,0.000856
25,RIVERPARK ADVISORS LLC,4374,0.000634
0,BERKSHIRE HATHAWAY INC,648,0.000516
67,VULCAN VALUE PARTNERS LLC,5518,0.000478
16,THIRD POINT LLC,5168,0.000451
66,VALUEWORKS LLC,5429,0.000444
10,VALUEWORKS LLC,5429,0.000444
3,ABRAMS CAPITAL MANAGEMENT LP,33,0.000403
41,DECCAN VALUE INVESTORS LP,1500,0.000372


In [12]:
PRanks.to_csv("./data/PRanks.csv", header=True, index=False, sep="\t")