## Dependencies

In [138]:
!pip install  lxml pandas
from urllib.request import urlopen
from lxml import html, etree
import pandas as pd
import numpy as np
import json

[33mYou are using pip version 9.0.3, however version 10.0.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


## Fund Portfolio
Configurable elements

In [139]:
funds=[]
funds.append({
        'Name':'Janus Henderson Global Technology Fund',
        'URL':'https://www.bloomberg.com/quote/JAGTX:US',
        'Allocation':4
    })
funds.append({
        'Name':'iShares Healthcare Innovation UCITS ETF',
        'URL':'https://www.bloomberg.com/quote/HEAL:SW',
        'Allocation':2
    })
funds.append({
        'Name':'Lyxor MSCI World Information Technology',
        'URL':'https://www.bloomberg.com/quote/TNOW:IM',
        'Allocation':2
    })
funds.append({
        'Name':'Lyxor MSCI World Consumer Discretionary',
        'URL':'https://www.bloomberg.com/quote/DISW:LN',
        'Allocation':4
    })


## Portfolio Analysis

In [140]:
def extractText(ele):
    return ele.text.strip()

In [141]:
def extractHoldingsFromStock(url):
    print(url)
    page=urlopen(url)
    tree = html.fromstring(page.read().decode('utf-8'))
    holding=tree.xpath('//div[@class="holding"]')[0]
    tbody=holding.xpath('//tbody')[0]
    holding={
        'FullName':tbody.xpath('//div[@data-type="full"]'),
        'Abbreviation':tbody.xpath('//div[@data-type="abbreviation"]'),
        'Percentage':tbody.xpath('//td[@data-type="percent"]')
    }
    df = pd.DataFrame(data=holding)
    df=df.apply(lambda s:s.apply(extractText))
    return df

In [142]:
df=pd.DataFrame(funds,columns=['Name','URL','Allocation'])
df

Unnamed: 0,Name,URL,Allocation
0,Janus Henderson Global Technology Fund,https://www.bloomberg.com/quote/JAGTX:US,4
1,iShares Healthcare Innovation UCITS ETF,https://www.bloomberg.com/quote/HEAL:SW,2
2,Lyxor MSCI World Information Technology,https://www.bloomberg.com/quote/TNOW:IM,2
3,Lyxor MSCI World Consumer Discretionary,https://www.bloomberg.com/quote/DISW:LN,4


In [143]:
df['Holdings']=df.URL.apply(extractHoldingsFromStock)

https://www.bloomberg.com/quote/JAGTX:US
https://www.bloomberg.com/quote/HEAL:SW
https://www.bloomberg.com/quote/TNOW:IM
https://www.bloomberg.com/quote/DISW:LN


In [144]:
sum=df.Allocation.sum()
weights=list(map(lambda v:round(v/sum,2),df.Allocation))
df['OverallWeight']=weights
df[['Name','OverallWeight']]

Unnamed: 0,Name,OverallWeight
0,Janus Henderson Global Technology Fund,0.33
1,iShares Healthcare Innovation UCITS ETF,0.17
2,Lyxor MSCI World Information Technology,0.17
3,Lyxor MSCI World Consumer Discretionary,0.33


In [145]:
df

Unnamed: 0,Name,URL,Allocation,Holdings,OverallWeight
0,Janus Henderson Global Technology Fund,https://www.bloomberg.com/quote/JAGTX:US,4,FullName Abbreviation...,0.33
1,iShares Healthcare Innovation UCITS ETF,https://www.bloomberg.com/quote/HEAL:SW,2,FullName Abbreviation...,0.17
2,Lyxor MSCI World Information Technology,https://www.bloomberg.com/quote/TNOW:IM,2,FullName Abbreviation Percentage 0...,0.17
3,Lyxor MSCI World Consumer Discretionary,https://www.bloomberg.com/quote/DISW:LN,4,FullName Abbreviation...,0.33


In [146]:
def factorInWeights(row):
#     print(row.Name)
    row.Holdings['OverallPercentage']=row.Holdings.Percentage.apply(lambda v:float(v)*row.OverallWeight)
    return 

df.apply(factorInWeights,axis=1)

0    None
1    None
2    None
3    None
dtype: object

In [147]:
def appendFundName(row):
    row.Holdings['FundName']=row.Name
    return
df.apply(appendFundName,axis=1)

0    None
1    None
2    None
3    None
dtype: object

In [148]:
allHoldings=pd.concat(df.Holdings.values)
# allHoldings

In [149]:
grouped=allHoldings.groupby(
    ['Abbreviation']
).agg({
    'FullName':np.unique,
    'OverallPercentage': [np.sum,np.count_nonzero],
    'FundName':np.unique
}).rename(
    columns={
        'OverallPercentage':'Overall',
        'count_nonzero':'count',
        'unique':'values'
    }).reset_index()
# grouped

In [150]:
final=grouped.sort_values([('Overall', 'sum'),('Overall', 'count')],ascending=[0,0])
final.columns
final[['Abbreviation','FullName', 'FundName', 'Overall']]


Unnamed: 0_level_0,Abbreviation,FullName,FundName,Overall,Overall
Unnamed: 0_level_1,Unnamed: 1_level_1,values,values,sum,count
12,AMZN:US,Amazon.com Inc,"[Janus Henderson Global Technology Fund, Lyxor...",5.1546,2.0
30,MSFT:US,Microsoft Corp,"[Janus Henderson Global Technology Fund, Lyxor...",3.4806,2.0
22,GOOG:US,Alphabet Inc,"[Janus Henderson Global Technology Fund, Lyxor...",2.5672,2.0
9,AAPL:US,Apple Inc,Lyxor MSCI World Information Technology,2.2151,1.0
24,HD:US,Home Depot Inc/The,Lyxor MSCI World Consumer Discretionary,1.3926,1.0
7,700:HK,Tencent Holdings Ltd,Janus Henderson Global Technology Fund,1.3827,1.0
17,CRM:US,salesforce.com Inc,Janus Henderson Global Technology Fund,1.2606,1.0
8,7203:JP,Toyota Motor Corp,Lyxor MSCI World Consumer Discretionary,1.0989,1.0
14,BABA:US,Alibaba Group Holding Ltd,Janus Henderson Global Technology Fund,1.0692,1.0
21,FB:US,Facebook Inc,Lyxor MSCI World Information Technology,1.0166,1.0


## Git Commit

In [153]:
!git add *.ipynb
!git commit -am "Final"
!git push

[binder 9ca0982] Final
 1 file changed, 980 insertions(+)
 create mode 100644 notebook/ExampleProject.ipynb
Counting objects: 4, done.
Delta compression using up to 32 threads.
Compressing objects: 100% (4/4), done.
Writing objects: 100% (4/4), 5.01 KiB | 5.01 MiB/s, done.
Total 4 (delta 0), reused 0 (delta 0)
To https://github.com/yoyu777/jupyter_notebooks
   adb05b1..9ca0982  binder -> binder
