-
Notifications
You must be signed in to change notification settings - Fork 106
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #7 from srikris/howto-80-chars
Made everything 80 chars and consistent with format.
- Loading branch information
Showing
8 changed files
with
52 additions
and
48 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,18 +1,18 @@ | ||
# Title: Convert a date string column to a UNIX timestamp | ||
# Requires you to 'pip install python-dateutil==1.5' | ||
import graphlab as gl | ||
from datetime import datetime | ||
# Requires you to 'pip install python-dateutil==1.5' | ||
from dateutil import parser | ||
|
||
def str_to_timestamp(the_str): | ||
try: | ||
dt = parser.parse(the_str) | ||
except: | ||
return None | ||
|
||
# UNIX epoch is January 1, 1970 | ||
return (dt - datetime(1970,1,1)).total_seconds() | ||
|
||
# 02/29/2001 is invalid, so should be 'None' in output | ||
sf = gl.SFrame({'date':['2000-08-21','2013-06-08 17:25:00.12753','02/29/2001'],'id':[1,2,3]}) | ||
sf = gl.SFrame({ | ||
'date':['2000-08-21','2013-06-08 17:25:00.12753','02/29/2001'], | ||
'id':[1,2,3]}) | ||
sf['date'] = sf['date'].apply(str_to_timestamp) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,17 +1,26 @@ | ||
import graphlab | ||
import graphlab as gl | ||
|
||
business = graphlab.SFrame.read_csv('yelp_academic_dataset_business.json', header=False, delimiter='\n', column_type_hints=dict) | ||
checkin = graphlab.SFrame.read_csv('yelp_academic_dataset_checkin.json', header=False, delimiter='\n', column_type_hints=dict) | ||
review = graphlab.SFrame.read_csv('yelp_academic_dataset_review.json', header=False, delimiter='\n', column_type_hints=dict) | ||
user = graphlab.SFrame.read_csv('yelp_academic_dataset_user.json', header=False, delimiter='\n', column_type_hints=dict) | ||
tip = graphlab.SFrame.read_csv('yelp_academic_dataset_tip.json', header=False, delimiter='\n', column_type_hints=dict) | ||
# Data available to download from | ||
# https://www.yelp.com/academic_dataset | ||
business = gl.SFrame.read_csv('yelp_academic_dataset_business.json', | ||
header=False, delimiter='\n', column_type_hints=dict) | ||
checkin = gl.SFrame.read_csv('yelp_academic_dataset_checkin.json', | ||
header=False, delimiter='\n', column_type_hints=dict) | ||
review = gl.SFrame.read_csv('yelp_academic_dataset_review.json', | ||
header=False, delimiter='\n', column_type_hints=dict) | ||
user = gl.SFrame.read_csv('yelp_academic_dataset_user.json', | ||
header=False, delimiter='\n', column_type_hints=dict) | ||
tip = gl.SFrame.read_csv('yelp_academic_dataset_tip.json', | ||
header=False, delimiter='\n', column_type_hints=dict) | ||
|
||
# Changing JSON into tables, i.e. SFrames | ||
reviews = review.unpack('X1', column_name_prefix='') | ||
businesses = business.unpack('X1', column_name_prefix='', limit=['business_id', 'name', 'latitude', 'longitude', 'stars']) | ||
businesses = business.unpack('X1', column_name_prefix='', | ||
limit=['business_id', 'name', 'latitude', 'longitude', 'stars']) | ||
|
||
# Build a recommender system | ||
m = graphlab.recommender.create(reviews, 'user_id', 'business_id') | ||
m = gl.recommender.create(reviews, 'user_id', 'business_id') | ||
|
||
# Find businesses that are similar based on users in common | ||
m.get_similar_items(['BVxlrYWgmi-8TPGMe6CTpg']).join(businesses, on={'similar_item':'business_id'}) | ||
similar_items = m.get_similar_items(['BVxlrYWgmi-8TPGMe6CTpg']) | ||
print similar_items.join(businesses, on={'similar_item':'business_id'}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,17 +1,19 @@ | ||
# Title: Remove duplicate edges from SGraph | ||
import graphlab as gl | ||
|
||
vertices = gl.SFrame({'id':[1,2,3,4,5]}) | ||
edges = gl.SFrame({'src':[1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4], | ||
'dst':[2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5]}) | ||
edges['edata'] = edges['src'] + edges['dst'] | ||
|
||
# Create a graph (as an example) | ||
g = gl.SGraph(vertices, edges, vid_field='id', src_field='src', dst_field='dst') | ||
print g.summary() | ||
print g.vertices | ||
print g.edges | ||
|
||
g2 = gl.SGraph(g.vertices, g.edges.groupby(['__src_id', '__dst_id'], {'data': gl.aggregate.SELECT_ONE('edata')})) | ||
# Remove duplicates | ||
g2 = gl.SGraph(g.vertices, g.edges.groupby(['__src_id', '__dst_id'], | ||
{'data': gl.aggregate.SELECT_ONE('edata')})) | ||
print g2.summary() | ||
print g2.vertices | ||
print g2.edges |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,22 +1,23 @@ | ||
import graphlab as gl | ||
|
||
# Add some example edges -- replace with your own graph | ||
sg = gl.SGraph() | ||
# add some example edges -- replace with your own graph | ||
sg = sg.add_edges([gl.Edge(i, i+1) for i in range(10)]) | ||
|
||
import networkx as nx | ||
g = nx.Graph() | ||
|
||
# put the nodes and edges from the SGraph into a NetworkX graph | ||
# Put the nodes and edges from the SGraph into a NetworkX graph | ||
g.add_nodes_from(list(sg.vertices['__id'])) | ||
g.add_edges_from([(e['__src_id'], e['__dst_id']) for e in sg.edges]) | ||
|
||
# create the layout with NetworkX and convert to regular Python types | ||
# you can substitute any of the layout algorithms here for circular_layout: | ||
# Create the layout with NetworkX and convert to regular Python types | ||
# You can substitute any of the layout algorithms here for circular_layout: | ||
# http://networkx.github.io/documentation/latest/reference/drawing.html#module-networkx.drawing.layout | ||
layout = nx.circular_layout(g) | ||
layout = {k: map(float, list(v)) for k,v in layout.iteritems()} | ||
|
||
# show the SGraph in Canvas with that layout | ||
# Show the SGraph in Canvas with that layout | ||
sg.vertices['x'] = sg.vertices.apply(lambda v: layout[v['__id']][0]) | ||
sg.vertices['y'] = sg.vertices.apply(lambda v: layout[v['__id']][1]) | ||
sg.show(vertex_positions=('x', 'y')) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters