In [6]:
import gzip
from tqdm import tqdm
import json
import pandas as pd
import numpy as np
import networkx as nx
from networkx.algorithms import bipartite, community
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import datetime


In [2]:
with open('all_data.json' , 'r') as f:
    data = list(map(json.loads, f))

In [3]:
with open('all_data2.json' , 'r') as f:
    data2 = list(map(json.loads, f))

### convert into dataframe

In [4]:
def to_df(data):
    rating = []
    verified = []
    user = []
    asin = []
    review = []
    summary = []
    time = []
    category = []
    for i in data:

        rating.append(i['overall'])
        verified.append(i['verified'])
        user.append(i['reviewerID'])
        asin.append(i['asin'])
        if 'reviewText' not in i:
            review.append(np.nan)
        else:
            review.append(i['reviewText'])
        if 'summary' not in i:
            summary.append(np.nan)
        else:
            summary.append(i['summary'])
        time.append(i['unixReviewTime'])
        category.append(i['category'][:-2])
    df = pd.DataFrame({
                    'rating':rating, 
                    'verified': verified, 
                    'user': user,
                    'asin':asin,
                    'review':review,
                    'summary': summary,
                    'time':time,
                    'category': category
    })
    return df
df = to_df(data)
df2 = to_df(data2)

In [None]:
df[df['review'].isnull() | df['summary'].isnull()]

In [None]:
df.sort_values(by = 'time')

In [None]:
item_in_cat = df.groupby('category')['asin'].agg(set)
item_in_cat[:4]

In [None]:
len(item_in_cat)

In [None]:
item_in_cat

### Check if item belong to multiple category

In [None]:
item_cat = df.groupby('asin')['category'].agg(set)
item_cat[:4]

In [None]:
sum(item_cat.apply(lambda x: len(x)) > 1)

In [None]:
item_cat = item_cat.apply(lambda x:x.pop())
item_cat

In [None]:
df.sort_values(by = 'time').iloc[-1]['time']

In [5]:
class Simulation:
    def __init__(self, df, di = False):
        
        self.model = nx.DiGraph() if di else nx.Graph()
        self.df = df.sort_values(by = 'time')
        self.item_cat = df.groupby('asin')['category'].max()
    
    #min t: 1451692800 max t: 1538524800
    def add_node(self, t_start: int, t_end: int):
        df_part = self.df[(self.df['time'] >= t_start) & (self.df['time'] <= t_end)]
        '''
        user_added = set(df_part['user'])
        self.model.add_nodes_from([(node, {'category':'user'}) for node in user_added], bipartite=0)
        
        item_added = set(df_part[['asin', 'category']].itertuples(index=False, name=None))
        self.model.add_nodes_from([(node, {'category':category}) for (node, category) in item_added], bipartite=1)
        '''
        df_part['line_attr'] = df_part.apply(lambda x: {'time': x['time'], 'rating':x['rating']}, axis = 1)
        self.model.add_edges_from(df_part[['user','asin','line_attr']].itertuples(index=False, name=None))
        for i in self.model:
            if i in self.item_cat:
                self.model.nodes[i]['category'] = self.item_cat[i]
                self.model.nodes[i]['bipartite'] = 1
            else:
                self.model.nodes[i]['category'] = 'user'
                self.model.nodes[i]['bipartite'] = 0
            
    def get_sets(self) -> tuple:
        top_nodes = {n for n, d in self.model.nodes(data=True) if d["bipartite"] == 0}
        #bottom_nodes = set(self.model) - top_nodes
        return bipartite.sets(self.model, top_nodes)
    
    def get_color_pos(self):
        color_map = []
        pos = {}
        height = 0

        for i in self.model:
            cat = s.model.nodes[i]['category']
            
            if cat == 'user':
                last = i
            else:
                if i in s.model[last]:
                    height = (s.model[last][i]['time'] - 1451692800) / (5*864000)
                
            if  cat == 'user':
                color_map.append('xkcd:blue')
                pos[i] = (np.random.uniform(0,1), height)
            elif cat == 'AMAZON_FASHION':
                color_map.append('xkcd:red')
                pos[i] = (np.random.uniform(4,5), height)
            elif cat == 'All_Beauty':
                color_map.append('xkcd:coral')
                pos[i] = (np.random.uniform(4,5), height)
            elif cat == 'Appliances':
                color_map.append('xkcd:crimson')
                pos[i] = (np.random.uniform(4,5), height)
            elif cat == 'Arts_Crafts_and_Sewing':
                color_map.append('xkcd:cyan')
                pos[i] = (np.random.uniform(4,5), height)
            elif cat == 'Automotive':
                color_map.append('xkcd:beige')
                pos[i] = (np.random.uniform(4,5), height)
            elif cat == 'Cell_Phones_and_Accessories':
                color_map.append('xkcd:brown')
                pos[i] = (np.random.uniform(4,5), height)
            elif cat == 'Clothing_Shoes_and_Jewelry':
                color_map.append('xkcd:chartreuse')
                pos[i] = (np.random.uniform(4,5), height)
            elif cat == 'Digital_Music':
                color_map.append('xkcd:chocolate')
                pos[i] = (np.random.uniform(4,5), height)
            elif cat == 'Electronics':
                color_map.append('xkcd:darkgreen')
                pos[i] = (np.random.uniform(4,5), height)
            elif cat == 'Gift_Cards':
                color_map.append('xkcd:fuchsia')
                pos[i] = (np.random.uniform(4,5), height)
            elif cat == 'Grocery_and_Gourmet_Food':
                color_map.append('xkcd:gold')
                pos[i] = (np.random.uniform(4,5), height)
            elif cat == 'Home_and_Kitchen':
                color_map.append('xkcd:green')
                pos[i] = (np.random.uniform(4,5), height)
            elif cat == 'Industrial_and_Scientific':
                color_map.append('xkcd:grey')
                pos[i] = (np.random.uniform(4,5), height) 
            elif cat == 'Luxury_Beauty':
                color_map.append('xkcd:khaki')
                pos[i] = (np.random.uniform(4,5), height)
            elif cat == 'Magazine_Subscriptions':
                color_map.append('xkcd:lavender')
                pos[i] = (np.random.uniform(4,5), height)
            elif cat == 'Musical_Instruments':
                color_map.append('xkcd:lime')
                pos[i] = (np.random.uniform(4,5), height)
            elif cat == 'Office_Products':
                color_map.append('xkcd:olive')
                pos[i] = (np.random.uniform(4,5), height)
            elif cat == 'Patio_Lawn_and_Garden':
                color_map.append('xkcd:orange')
                pos[i] = (np.random.uniform(4,5), height)
                
            elif cat == 'Pet_Supplies':
                color_map.append('xkcd:orchid')
                pos[i] = (np.random.uniform(4,5), height)
            elif cat == 'Prime_Pantry':
                color_map.append('xkcd:purple')
                pos[i] = (np.random.uniform(4,5), height)
            elif cat == 'Software':
                color_map.append('xkcd:teal')
                pos[i] = (np.random.uniform(4,5), height)
            elif cat == 'Sports_and_Outdoors':
                color_map.append('xkcd:wheat')
                pos[i] = (np.random.uniform(4,5), height)
            elif cat == 'Tools_and_Home_Improvement':
                color_map.append('xkcd:yellow')
                pos[i] = (np.random.uniform(4,5), height)
            elif cat == 'Toys_and_Games':
                color_map.append('xkcd:aquamarine')
                pos[i] = (np.random.uniform(4,5), height)
                
            elif cat == 'Video_Games':
                color_map.append('xkcd:goldenrod')
                pos[i] = (np.random.uniform(4,5), height)
            else:
                raise Exception("class missing")
            
        return color_map, pos

    def draw(self, fig_size = (12,30), node_size = 2, width = 0.09, save = False):
        plt.figure(figsize = fig_size)
        color_map, pos = self.get_color_pos()
        nx.draw(self.model, node_size = node_size, pos = pos, width = width, node_color=color_map)
        if save:
            latest = self.df.iloc[-1]['time'] 
            
            dt = datetime.date.fromtimestamp(latest)
            plt.title("Until: {}".format(dt))
            plt.savefig('./plots/Amazon{}.png'.format(dt), bbox_inches='tight')
        '''nx.draw_networkx(
        self.model,
        pos = nx.drawing.layout.bipartite_layout(self.model, self.get_sets()[0]),
        node_size = 3,
        with_labels=False,
        width = 0.03)''' # Or whatever other display options you like
        
    def get_projection(self, s: int) -> nx.classes.graph.Graph:
        bi_sets = self.get_sets()
        return bipartite.projected_graph(self.model, bi_sets[s])
        
    

In [None]:
type(s.model)

In [6]:

s = Simulation(df[['rating','user','asin','time','category']], di = False)
s.model.nodes()

NodeView(())

In [7]:
s.add_node(0, 1698524800)

In [8]:

s2 = Simulation(df2[['rating','user','asin','time','category']], di = False)
s2.model.nodes()
s2.add_node(0, 1698524800)

## Get separate part

In [None]:
s.model.edges()

In [None]:
df[['rating','user','asin','time','category']][:10000][df[['rating','user','asin','time','category']][:10000]['asin'] == 'B000HCLLMM']


## Clustering Coefficient Problems

The definition of the clustering coefficient cannot be extended to bipartite networks, because the connection of direct neighbors of the same type is prohibited, and triangles cannot exist.
# $c_u = \frac{2 T(u)}{deg(u)(deg(u)-1)}$
Here the $T(u)$ is interpreted as the number of links between neighbors of node u. And it's the same as the triangles mentioned above

To deal with this problem, we turn to two possible solutions:
1. Measure the clustering coefficient of projections separately. #####picture \
However, there are also some problems with projection.

### Projection Problems
Information in the bipartite structure may disappear after projection. \
1. For instance, the fact that two items brought by multiple people. In projected graph, the two items are
simply linked together. However, we could probably solve this problem using a weighted bipartite graph (increase the weight if more people buy both items) 
The fact that there are many bipartite graphs which lead to the same projection can also show this loss of information
2. The projection couldn't capture the status other than the shared node. For example, B and C are linked because they both connect to 5. But the information of 4-C and 2B are not captured. \
And we will show below the information loss will make the unipartite ananlysis of clustering coefficient even less informative when the relation is sparse as in our dataset 

Therefore, we need to find out another way of measuring clustering coefficient.

Latapy et al. (2008) propose extending the local clustering
coefficient to bipartite networks with a more abstract definition of overlapping neighborhoods. The overlap equals the fraction of joint neighbors in both neighborhoods. (In simple words, the overlap is represented by the similarity $c_{uv}$ between two sets )

 # $c_u = \frac{\sum_{v \in N(N(u))} c_{uv} }{|N(N(u))|}$
 where $N(N(u)$) are the second order neighbors of u in G excluding u, and $c_{uv}$ is the pairwise clustering coefficient between nodes u and v. \
 The mode selects the function for $c_{uv}$ which can be:

# $c_{uv}=\frac{|N(u)\cap N(v)|}{|N(u) \cup N(v)|}$ 
Jaccard Similarity

# $c_{uv}=\frac{|N(u)\cap N(v)|}{min(|N(u)|,|N(v)|)}$
Min. which is similar to the one mentioned on lecture slides 9b page 65

# $c_{uv}=\frac{|N(u)\cap N(v)|}{max(|N(u)|,|N(v)|)}$
Max

In [None]:
item_g = s.get_projection(1)
bipartite_clusterCo = bipartite.clustering(s.model, s.get_sets()[1], mode = 'dot')

In [None]:
item_g = s2.get_projection(1)
bipartite_clusterCo = bipartite.clustering(s2.model, s2.get_sets()[1], mode = 'dot')

In [None]:
df_2 = pd.DataFrame({'ssn':bipartite_clusterCo.keys(), 'coef': bipartite_clusterCo.values() })
df_2.plot(kind='hist', y = 'coef')
clusterCo = nx.clustering(item_g)
df_3 = pd.DataFrame({'ssn':clusterCo.keys(), 'coef': clusterCo.values() })
df_3.plot(kind='hist', y = 'coef')

In [None]:
df_2

In [None]:

sorted(bipartite_clusterCo.items(), key=lambda item: item[1], reverse = True)[10000]


In [None]:
df2[df2['asin'] == 'B00XRDH9QK']

In [None]:
df2[df2['user'] == 'AXIY5ZEG2SJ53']

In [None]:
df2[df2['asin'] == 'B00XUYWYU2']

## Hubs and Authorities

In [7]:
s3 = Simulation(df2[['rating','user','asin','time','category']], di = True)
s3.model.nodes()
s3.add_node(0, 1698524800)

In [8]:
h, a = nx.hits(s3.model)

  A = nx.adjacency_matrix(G, nodelist=list(G), dtype=float)


In [9]:
h = dict(sorted(h.items(), key=lambda item: item[1], reverse = True))
h

{'A1ZKJOISGOBVI4': 0.0008354493336628768,
 'A2UEL6KQBZNV2K': 0.0008237000260819074,
 'A3SJ9XS2JP8X0L': 0.0008112231781956696,
 'A23MCJE4JUG8GK': 0.0008102857761228791,
 'ALY1ZJ944F9ZZ': 0.0008097851146565849,
 'A1BB1IR0WLNL2N': 0.0008097851146565849,
 'A1L2I5Z31JIYX2': 0.0008096152058620826,
 'A23EPR9H73EUFA': 0.0008092194678442637,
 'A2EIK5SM7W4571': 0.000809186235110868,
 'A18FT4SOVZ4CCW': 0.0008091365014478684,
 'AUFMSA7LB7UC3': 0.0008091274041300168,
 'A2A2DOJ1GW009C': 0.0008091247719017626,
 'A2T708F2UC9OUO': 0.0008091185542067089,
 'A3310092NOC1F3': 0.0008091101217308896,
 'A2IUEJVB8V9DVS': 0.0008090836793890387,
 'A4JI0CFOUAXBR': 0.0008090255962086027,
 'A32NAQNNDOEZRB': 0.0008090161913882589,
 'A3TP1UMGCTIO2U': 0.0008090151448106344,
 'A1KZFUDOY4LEKL': 0.0008090052151769546,
 'A3V598Q5FOKH4F': 0.00080899749087925,
 'A1IW5YRXXO0FPM': 0.0008089888406962673,
 'A3DIWONF6S6RT1': 0.0008089876899337571,
 'A1L7PLOCYB3FIT': 0.0008089875715172139,
 'A1ALCYG31VJB3U': 0.0008089845303008229

In [15]:
len(a)

129388

In [16]:
a = dict(sorted(a.items(), key=lambda item: item[1], reverse = True))
a

{'B00W1UJRK6': 0.9098207346940382,
 'B0096TXQNE': 0.002526160410041589,
 'B00DU76A6I': 0.0016888119586737829,
 'B00SIFO53M': 0.001518224620233473,
 'B01DLZHM10': 0.0010521091345138383,
 'B00VV5KRD8': 0.0010147017369174082,
 'B000FKBDLU': 0.0009587205787142922,
 'B0109ZA5C8': 0.0009484804644906582,
 'B00DEQDEUK': 0.000945517578239511,
 'B016YTTGC8': 0.0009385188222768903,
 'B01BDEUCIG': 0.0009290270669062269,
 'B01GAGM62M': 0.0008992630633491185,
 'B0002YTFWI': 0.0008338835293109437,
 'B0000ZFF7S': 0.0008232972844259898,
 'B005H3AU1Y': 0.0008221192367206508,
 'B008YDVYKK': 0.0008109422519503741,
 'B00C7OTIV4': 0.0008022476353519879,
 'B01E0LJD0W': 0.0007925108246521341,
 'B01890QD8K': 0.0007912155044553625,
 'B01FS81D68': 0.0007910822125392762,
 'B017OPE860': 0.0007876589614525105,
 'B00LU78TSC': 0.0007863271043222459,
 'B018JA6FGU': 0.0007813632547922454,
 'B00GBQ8P2C': 0.0007797043591638816,
 'B00PBSZ6E6': 0.0007755710050674034,
 'B0174FD6HC': 0.0007737384167716921,
 'B000FXSIRO': 0.0

In [13]:
for (i, j) in a.items():
    if i[0] == 'A':
        print(i,j)

AFH5NKKWX6KA8 -0.0
A1M04W8INV3KFT -0.0
A3AKM4O7X02C8P -0.0
A3EYTLVQOP59BN -0.0
A14NX9Y4UD3YNV -0.0
AX0WFC2E6HTDD -0.0
A38A0QN1VXIDFJ -0.0
A23YTIYZ2A0ZI7 -0.0
A3CG0FARYYRN3H -0.0
AIFEKE0RDBLNY -0.0
A2VQHN3S2NXSOL -0.0
ALEKK58YGEVJW -0.0
A1YYT9PM8PNFI8 -0.0
A3F5MB8COEPWWX -0.0
A33PB6N1R71P7F -0.0
A22BL0TBQLM9OY -0.0
A1Q35E76J6NX9O -0.0
A34EWNAE33U74Z -0.0
A2W82EQ9HXNTLJ -0.0
A4DS6FNGHECFF -0.0
ABJI0DXM4Q2NI -0.0
APS9TI8DFKEBC -0.0
A2285KF0AW4AO0 -0.0
A3BG58MJJERJ30 -0.0
A2FE14R4ILLNDX -0.0
A13OFOB1394G31 -0.0
A2P6M8ZLZ4Z1DD -0.0
A21BQ70T32VF17 -0.0
A3A13J4I8T5X58 -0.0
ACYFFSOH3Y40M -0.0
A35VLF5RWN0MFM -0.0
A1DO9842MTF0S1 -0.0
A1RQRBR1B7S281 -0.0
ATSNUA8S0PMVF -0.0
A1GH6T8X82Z426 -0.0
A1QEQVC4RTS3AN -0.0
A5UF7TXPL1BX0 -0.0
A1KY9I3NVOWWGC -0.0
A2Y8LDD88R8OYK -0.0
A25TNBMJ4ZO7RD -0.0
A27YP12Z7AO9F8 -0.0
A3DX91XZZE9IKP -0.0
A3V8S9BOXJ9EOX -0.0
A1320FN3TU5ZX9 -0.0
A32VL8QFCYQV5M -0.0
A3GZJT1USXJYHF -0.0
A2VYH2Q0SCYO4D -0.0
A1HYRZQZM1DVWX -0.0
APQ3M2RBGLKG1 -0.0
A39GCZJDJUU7KA -0.0
A3UK8H2NPFY

In [None]:
h['A1ZKJOISGOBVI4']

In [None]:
a = dict(sorted(a.items(), key=lambda item: item[1], reverse = True))
a

In [14]:
df2[df2['user'] == 'A1ZKJOISGOBVI4']

Unnamed: 0,rating,verified,user,asin,review,summary,time,category
8841,5.0,True,A1ZKJOISGOBVI4,B0027A7U2C,I wear a 7.5 in the Kilty moccasins so I sized...,Stretches to fit my calves,1476835200,Clothing_Shoes_and_Jewelry
12173,5.0,True,A1ZKJOISGOBVI4,B007EV702E,These are great! Perfect fit every time & qual...,Perfect as always from Minnetonka,1476835200,Clothing_Shoes_and_Jewelry
13819,5.0,True,A1ZKJOISGOBVI4,B00BJH4NXS,"Great costume, held up well after several wear...",Held up well,1478476800,Clothing_Shoes_and_Jewelry
16683,5.0,True,A1ZKJOISGOBVI4,B00I31XLG8,My daughter and I love to wear Minnetonka moca...,Princess Mocs - glittery and girly!,1460246400,Clothing_Shoes_and_Jewelry
16905,3.0,True,A1ZKJOISGOBVI4,B00IK5A8U4,This slip is really long and too long for the ...,"The fit is very good, and is soft on the skin",1481328000,Clothing_Shoes_and_Jewelry
18173,3.0,True,A1ZKJOISGOBVI4,B00K1NTMMI,My daughter gets painful blisters in between h...,"Very fun and cute, but causes blisters.",1460246400,Clothing_Shoes_and_Jewelry
18482,5.0,True,A1ZKJOISGOBVI4,B00KRYL7CE,"I purchased a size small - I am 5'5"" & 110 lbs...",Great fit,1483315200,Clothing_Shoes_and_Jewelry
20136,5.0,True,A1ZKJOISGOBVI4,B00O5AHXVA,Bought a size 7 for my 6 year old. Fit perfect...,Darling dress with beautiful detail & nice qua...,1457308800,Clothing_Shoes_and_Jewelry
21877,5.0,True,A1ZKJOISGOBVI4,B00SIFO53M,True to size 8. Very comfortable to walk in. N...,No blisters,1523664000,Clothing_Shoes_and_Jewelry
23630,5.0,True,A1ZKJOISGOBVI4,B00W1UJRK6,True to size 8. Very comfortable to walk in. N...,No blisters,1523664000,Clothing_Shoes_and_Jewelry


In [18]:
df2[df2['asin'] == 'B0096TXQNE']

Unnamed: 0,rating,verified,user,asin,review,summary,time,category
51020,5.0,True,A79KHI0GCA9X7,B0096TXQNE,This is just right for me. Mixing wet ingredie...,perfect,1529366400,Home_and_Kitchen
51021,5.0,True,A22Q7MT9PHIUV6,B0096TXQNE,love this mixer!,great addition to your kitchen,1529107200,Home_and_Kitchen
51022,5.0,True,A25JABZNAS3NP0,B0096TXQNE,LOVE LOVE LOVE IT,Five Stars,1528934400,Home_and_Kitchen
51023,5.0,True,A2CPQYOJCJ2ES7,B0096TXQNE,Needed a new hand mixer. This mixer fits my ne...,Great hand mixer!,1528848000,Home_and_Kitchen
51024,4.0,True,A3JEI9VAAN9EF8,B0096TXQNE,I'm writing this pre-use and will update. I j...,I'm writing this pre-use and will update. I ju...,1528761600,Home_and_Kitchen
...,...,...,...,...,...,...,...,...
51884,3.0,True,AANZXJJ51MIUX,B0096TXQNE,The highest setting is more like the Medium se...,Disappointed for a KitchenAid,1451779200,Home_and_Kitchen
51885,4.0,True,A21NM4RKXVEMCK,B0096TXQNE,WIFE LOVES IT.,MIXER,1451779200,Home_and_Kitchen
51886,5.0,True,A8OQX5KE5CYHU,B0096TXQNE,It's great! Two different mixing utensils and ...,It's great! Two different mixing utensils and ...,1451692800,Home_and_Kitchen
65847,3.0,True,A2312ZO7VATTAN,B0096TXQNE,Not powerful at all. Plus is it is lightweight,Three Stars,1530662400,Home_and_Kitchen


In [None]:
df.groupby('asin').count().sort_values(by = 'rating')

In [None]:
temp = np.array(list(h.values()))*1e20

In [None]:
plt.hist(x = temp[temp > 0] , bins = 100)

In [None]:
s.df

## k-clique communities

In [None]:
clique_g = find_cliques(s.get_projection(1))


In [None]:
for i in cliques:
    print(i)

In [None]:
from networkx.algorithms.community import k_clique_communities
#clique_g = nx.find_cliques(s2.model)
items = s2.get_projection(1)
clique_g = nx.find_cliques(items)
list(k_clique_communities(items, 20, clique_g))

In [None]:
temp_df = df[df['asin'].isin(['B00006IBA2',
            'B00006JSUE',
            'B000JXR0NS',
            'B0015QV5S4',
            'B001HTOXHY',
            'B001L97SQS',
            'B001N82KLA',
            'B0027Y0E1W',
            'B002GWM9N6',
            'B003D6EGS0',
            'B0046EBYPQ',
            'B004LLD8G2',
            'B004QM8W26',
            'B004W7GRG8',
            'B005596XIE',
            'B005EX6N2W',
            'B0065P2WB4',
            'B006NFJEH6',
            'B0073MP6GQ',
            'B0078ZP974',
            'B008X3KQ46',
            'B009CCOI9Q',
            'B009ZRTNYI',
            'B00BBOD8US',
            'B00C6M4NP8',
            'B00C774OGK',
            'B00D096BRY',
            'B00DBWU2WU',
            'B00DV44QL6',
            'B00L7H5IAC',
            'B00M9MGUQ0',
            'B00O9GV906',
            'B00ORHLYWK',
            'B00T49G9LC',
            'B00U48ERIY',
            'B00V0I02AE',
            'B00VRUHDXE',
            'B00VS3X7G2',
            'B00XN14SW4',
            'B01B50DXEI',
            'B01CSY33ZE',
            'B01GHCQGAC'])][['asin','review', 'category']] 


In [None]:
temp_df['category'].value_counts()

In [None]:
temp_df.groupby(['category', 'asin']).count()

In [None]:
items.order()

In [None]:
s.model['A3N9FDER4SGFQU']['B00HVMFAPK']

In [None]:
#s.model.nodes(data = True)

In [None]:
s.model['B0068AD39Q']

In [None]:
#s.draw(fig_size = (20,100), width = 0.03, save = True)

In [None]:
np.random.normal(5 * 2,2,1)

In [None]:
a = df[['user','asin']]
a.itertuples(index=False, name=None)

In [None]:
B = nx.Graph()
# Add nodes with the node attribute "bipartite"
B.add_nodes_from(set(df['user']), bipartite=0)
B.add_nodes_from(set(df['asin']), bipartite=1)
# Add edges only between nodes of opposite node sets
B.add_weighted_edges_from(df[['user','asin', 'rating']].itertuples(index=False, name=None))
nx.set_node_attributes(B, 0, "grain")

In [None]:
#B.degree()

Piepenbrink, Anke and Ajai Gaur (2013). Methodological Advances in the Analysis of
Bipartite Networks: An Illustration Using Board Interlocks in Indian Firms. SSRN
Scholarly Paper ID 2199111. Rochester, NY: Social Science Research Network. url:
http://papers.ssrn.com/abstract=2199111.

Latapy, Matthieu, Cl´emence Magnien, and Nathalie Vecchio (2008). “Basic notions for
the analysis of large two-mode networks”. In: Social Networks 30.1, pp. 31–48. issn:
03788733. doi: 10.1016/j.socnet.2007.04.006.