## Setup working enviroment 

In [40]:
# Install a pip package in the current Jupyter kernel
import sys
!{sys.executable} -m pip install kaggle ijson pyTigerGraph autoTigerGraph

Collecting autoTigerGraph
  Downloading autoTigerGraph-0.0.1-py3-none-any.whl (3.7 kB)
Installing collected packages: autoTigerGraph
Successfully installed autoTigerGraph-0.0.1


In [None]:
## Launch and TigerGraph Developer instance on AWS 
Use the following command in a shell to connect to it 

`ssh -L14240:localhost:14240 -L 8888:localhost:8888 -i your_aws_key.pem ubuntu@your-server-ip`

Check the connection by clicking on the link below. It should open TigerStudio in a browser window

In [3]:
server = 'http://localhost'
print(server+':14240')

http://localhost:14240


In [22]:
import pyTigerGraph as tg

conn = tg.TigerGraphConnection(host=server, graphname='yelp')
shell = tg.Gsql(conn , certNeeded=False)

shell.jarLocation='/home/ubuntu/tigergraph/app/3.0.0/dev/gdk/gsql/lib'

In [23]:
print(shell.gsql('ls', options=[]))

Connecting to localhost:14240
If there is any relative path, it is relative to <System.AppRoot>/dev/gdk/gsql
---- Global vertices, edges, and all graphs
Vertex Types: 
Edge Types: 

Graphs: 
Jobs: 


JSON API version: v2
Syntax version: v1




## Load Yelp Dataset from Kaggle

see https://www.kaggle.com/docs/api

In [24]:
from ipywidgets import FileUpload
upload = FileUpload()
upload

FileUpload(value={}, description='Upload')

In [26]:
import os

!mkdir ~/.kaggle
with open(os.path.expanduser("~/.kaggle/kaggle.json"), "w+b") as i:
    i.write(upload.data[0])
    
!chmod 600 ~/.kaggle/kaggle.json

mkdir: cannot create directory ‘/home/ubuntu/.kaggle’: File exists


In [27]:
!kaggle datasets download yelp-dataset/yelp-dataset
!unzip yelp-dataset

Downloading yelp-dataset.zip to /home/ubuntu/autoTigerGraph/notebooks
100%|██████████████████████████████████████| 4.48G/4.48G [01:43<00:00, 82.9MB/s]
100%|██████████████████████████████████████| 4.48G/4.48G [01:43<00:00, 46.5MB/s]
Archive:  yelp-dataset.zip
  inflating: Dataset_Agreement.pdf   
  inflating: yelp_academic_dataset_business.json  
  inflating: yelp_academic_dataset_checkin.json  
  inflating: yelp_academic_dataset_review.json  
  inflating: yelp_academic_dataset_tip.json  
  inflating: yelp_academic_dataset_user.json  


In [28]:
ls -l

total 14911452
-rw-rw-r-- 1 ubuntu ubuntu      41776 Mar 26 01:18 Dataset_Agreement.pdf
-rw-rw-r-- 1 ubuntu ubuntu      22045 Jul  1 03:34 YelpGraph.ipynb
-rw-rw-r-- 1 ubuntu ubuntu 4809540040 Jul  1 03:33 [0m[01;31myelp-dataset.zip[0m
-rw-rw-r-- 1 ubuntu ubuntu  152898689 Mar 26 01:18 yelp_academic_dataset_business.json
-rw-rw-r-- 1 ubuntu ubuntu  449663480 Mar 26 01:18 yelp_academic_dataset_checkin.json
-rw-rw-r-- 1 ubuntu ubuntu 6325565224 Mar 26 01:19 yelp_academic_dataset_review.json
-rw-rw-r-- 1 ubuntu ubuntu  263489322 Mar 26 01:31 yelp_academic_dataset_tip.json
-rw-rw-r-- 1 ubuntu ubuntu 3268069927 Mar 26 01:32 yelp_academic_dataset_user.json


In [29]:
!rm yelp-dataset.zip

## Create schema and load data

In [41]:
import autoTigerGraph as atg
import importlib

importlib.reload(atg)

<module 'autoTigerGraph' from '/home/ubuntu/anaconda3/envs/tensorflow2_latest_p37/lib/python3.7/site-packages/autoTigerGraph/__init__.py'>

In [42]:
filename = 'yelp_academic_dataset_business.json'
business, fields = atg.get_first(filename)
business, fields

({'business_id': 'f9NumwFMBDn751xgFiRbNA',
  'name': 'The Range At Lake Norman',
  'address': '10913 Bailey Rd',
  'city': 'Cornelius',
  'state': 'NC',
  'postal_code': '28031',
  'latitude': 35.4627242,
  'longitude': -80.8526119,
  'stars': 3.5,
  'review_count': 36,
  'is_open': 1,
  'attributes': {'BusinessAcceptsCreditCards': 'True',
   'BikeParking': 'True',
   'GoodForKids': 'False',
   'BusinessParking': "{'garage': False, 'street': False, 'validated': False, 'lot': True, 'valet': False}",
   'ByAppointmentOnly': 'False',
   'RestaurantsPriceRange2': '3'},
  'categories': 'Active Life, Gun/Rifle Ranges, Guns & Ammo, Shopping',
  'hours': {'Monday': '10:0-18:0',
   'Tuesday': '11:0-20:0',
   'Wednesday': '10:0-18:0',
   'Thursday': '11:0-20:0',
   'Friday': '11:0-20:0',
   'Saturday': '11:0-20:0',
   'Sunday': '13:0-18:0'}},
 [(0, 'business_id', 'str'),
  (1, 'name', 'str'),
  (2, 'address', 'str'),
  (3, 'city', 'str'),
  (4, 'state', 'str'),
  (5, 'postal_code', 'str'),
  (6,

In [43]:
print(shell.gsql('create graph yelp (*)', options=[]))

Connecting to localhost:14240
If there is any relative path, it is relative to <System.AppRoot>/dev/gdk/gsql
Stopping GPE GSE RESTPP
Successfully stopped GPE GSE RESTPP in 11.925 seconds
Starting GPE GSE RESTPP
Successfully started GPE GSE RESTPP in 0.076 seconds
The graph yelp is created.



In [44]:
atg.guess_vertex(json_object=business, vertex_name='Business')

'CREATE VERTEX Business (PRIMARY_ID business_id STRING, name STRING, address STRING, city STRING, state STRING, postal_code STRING, latitude DOUBLE, longitude DOUBLE, stars DOUBLE, review_count INT, is_open INT, attributes STRING, categories STRING, hours STRING)'

In [45]:
filename = 'yelp_academic_dataset_business.json'
vertex_name = 'Business'
primary_id = 'business_id'

json_object, _ = atg.get_first(filename)
atg.create_vertex(shell=shell, json_object=json_object, vertex_name=vertex_name, graph_name='yelp')


Connecting to localhost:14240
If there is any relative path, it is relative to <System.AppRoot>/dev/gdk/gsql
The graph yelp is dropped.
The vertex type Business could not be found.
The vertex type Business is created.
The graph yelp is created.
---- Graph yelp
Vertex Types: 
  - VERTEX Business(PRIMARY_ID business_id STRING, name STRING, address STRING, city STRING, state STRING, postal_code STRING, latitude DOUBLE, longitude DOUBLE, stars DOUBLE, review_count INT, is_open INT, attributes STRING, categories STRING, hours STRING) WITH STATS="OUTDEGREE_BY_EDGETYPE"
Edge Types: 

Graphs: 
  - Graph yelp(Business:v)
Jobs: 
Queries: 







In [46]:
atg.upsert_json(filename=filename, conn=conn, vertex_name=vertex_name, 
                primary_id=primary_id, n=10000)

209393

In [47]:
filename = 'yelp_academic_dataset_user.json'
vertex_name = 'User'
primary_id = 'user_id'

json_object, _ = atg.get_first(filename)
atg.create_vertex(shell=shell, json_object=json_object, vertex_name=vertex_name, graph_name='yelp')
atg.upsert_json(filename=filename, conn=conn, vertex_name=vertex_name, 
                primary_id=primary_id, n=10000)

Connecting to localhost:14240
If there is any relative path, it is relative to <System.AppRoot>/dev/gdk/gsql
The graph yelp is dropped.
The vertex type User could not be found.
The vertex type User is created.
The graph yelp is created.
---- Graph yelp
Vertex Types: 
  - VERTEX Business(PRIMARY_ID business_id STRING, name STRING, address STRING, city STRING, state STRING, postal_code STRING, latitude DOUBLE, longitude DOUBLE, stars DOUBLE, review_count INT, is_open INT, attributes STRING, categories STRING, hours STRING) WITH STATS="OUTDEGREE_BY_EDGETYPE"
  - VERTEX User(PRIMARY_ID user_id STRING, name STRING, review_count INT, yelping_since STRING, useful INT, funny INT, cool INT, elite STRING, friends STRING, fans INT, average_stars DOUBLE, compliment_hot INT, compliment_more INT, compliment_profile INT, compliment_cute INT, compliment_list INT, compliment_note INT, compliment_plain INT, compliment_cool INT, compliment_funny INT, compliment_writer INT, compliment_photos INT) WITH STA

1968703

In [48]:
filename = 'yelp_academic_dataset_review.json'
vertex_name = 'Review'
primary_id = 'review_id'

json_object, _ = atg.get_first(filename)
atg.create_vertex(shell=shell, json_object=json_object, vertex_name=vertex_name, graph_name='yelp')

Connecting to localhost:14240
If there is any relative path, it is relative to <System.AppRoot>/dev/gdk/gsql
The graph yelp is dropped.
The vertex type Review could not be found.
The vertex type Review is created.
The graph yelp is created.
---- Graph yelp
Vertex Types: 
  - VERTEX Business(PRIMARY_ID business_id STRING, name STRING, address STRING, city STRING, state STRING, postal_code STRING, latitude DOUBLE, longitude DOUBLE, stars DOUBLE, review_count INT, is_open INT, attributes STRING, categories STRING, hours STRING) WITH STATS="OUTDEGREE_BY_EDGETYPE"
  - VERTEX User(PRIMARY_ID user_id STRING, name STRING, review_count INT, yelping_since STRING, useful INT, funny INT, cool INT, elite STRING, friends STRING, fans INT, average_stars DOUBLE, compliment_hot INT, compliment_more INT, compliment_profile INT, compliment_cute INT, compliment_list INT, compliment_note INT, compliment_plain INT, compliment_cool INT, compliment_funny INT, compliment_writer INT, compliment_photos INT) WITH

In [None]:
atg.upsert_json(filename=filename, conn=conn, vertex_name=vertex_name, 
                primary_id=primary_id, n=10000)