In [None]:
from rdflib import URIRef, BNode, Literal, Namespace
from rdflib.namespace import RDF, SDO, RDFS
from rdflib import Graph

#14	Nintendo shop - https://store.nintendo.nl/nl
#Zijian Dong -i6226172	Anqi Fan -i6272224	Qi Cui -i6288444	Tianyu Wei -i6231911	Zhefan Cheng -i6267501


In [None]:
# Make a graph
g = Graph()
NS = Namespace('https://store.nintendo.nl/nl/')
nintendo = Literal("Nintendo")
#--------------Categories------------------------------------

console = NS['consoles']
games = NS['games']
merchandise = NS['merchandise'] #周边merchandise
franchise = NS['franchises'] #独家ip 和其它的会重复Exclusive IP and others will be duplicated
new_product = NS['new']

#--------Sub-Categories--------------------------------------
#--------Sub Categories for consoles-------------------------
switch = NS['consoles/nintendo-switch-consoles'] #是console的product
switch_bundle = NS['consoles/nintendo-switch-bundles'] #是console的product
accessory = NS['consoles/accessories'] #是switch的配件
#-------Sub Categories for merchandise-----------------
apparel = NS['merchandise/apparel']
home_and_gifts = NS['merchandise/home-and-gifts']


In [None]:
#--------Instance--------------------------------------

#Define types for Mainpage
g.add((console, RDF.type, nintendo)) # triple set："_SubjectType", "_PredicateType", "_ObjectType"
g.add((games, RDF.type, nintendo))
g.add((merchandise, RDF.type, nintendo))
g.add((franchise, RDF.type, nintendo))
g.add((new_product, RDF.type, nintendo))

#Define subclasses of main categories
g.add((switch, RDFS.subClassOf, console))
g.add((switch_bundle, RDFS.subClassOf, console))
g.add((accessory, SDO.isAccessoryOrSparePartFor, console))

g.add((apparel, RDFS.subClassOf, merchandise))
g.add((home_and_gifts, RDFS.subClassOf, merchandise))


In [None]:
import pandas as pd

df = pd.read_csv("group14_ken3140_webshop.csv")

for i,row in df.iterrows():
    # Check if URI is empty, skip the empty ones
    if not pd.isna(row["Item URI"]) :
        item = NS[row["Item URL"].replace("https://store.nintendo.nl/nl/","").replace(" ","")]
        for category in row['Category'].split(","):
            g.add((item, SDO.product, locals()[category]))
        g.add((item, SDO.name, Literal(row["Value 1"])))
        g.add((item, SDO.description, Literal(row["Value 2"])))
        g.add((item, SDO.sku, Literal(row["Value 3"])))
        g.add((item, SDO.price, Literal(row["Value 4"])))
        g.add((item, SDO.brand, Literal("Nintendo"))) 
    #    graph.add((product, schema[row["link"]],eval(row["type"])))


In [None]:
g.bind('NS', NS)

print(g.serialize('Nintendo.ttl',format='turtle'))

## SPARQL:

In [None]:
from rdflib import Graph

graph = Graph()
graph.parse("Nintendo.ttl", format="turtle")

In [None]:
'''
AN EXAMPLE
'''
item_info_query = """
SELECT ?price WHERE {
  ?s schema:name 'Splatoon 3'.
  ?s schema:price ?price.
}
"""

results = graph.query(item_info_query)
for row in results:
    print(row.price)

Questions:

A. For a given item (select a random item from your RDF Graph), provide all its categories and subcategories, and its brand.

B. Provide items from different subcategories that have the same brand.

C. Group products by brand and show the average price or rating for each brand.

D. Sort products in one category according to average brand price or rating.

E. Use an external service point, provide a description of 5 facts about the top brand from part D (e.g. https://query.wikidata.org/), e.g. location of headquarters. You may return images as one of your facts.

F. Recommend an item which is similar to the item using your linked RDF graph (i.e., shared properties and categories).

G. Write your own question about the webshop in plain English, then translate it to the corresponding SPARQL query, and run it on the graph. Provide a rationale for why this query would be valuable in a webshop setting, such as for semantic search or other applications.

In [None]:
'''
Query A
 For a given item (select a random item from your RDF Graph), provide all its categories and subcategories, and its brand.
'''
item_info_query = """
  SELECT ?name ?subcategory ?category ?brand
  WHERE {
    ?item schema:price ?price .
   # FILTER (?price >= 0 && ?price <= 1000)        #We select all items by using price range

    ?item schema:brand ?brand .                   #Get the brand
    ?item schema:name ?name.                      #Get item name
    ?item schema:product ?subcategory.            #Get the category.

    OPTIONAL {?subcategory rdfs:subClassOf ?category }    # if there exist subcategory, also get it.
  }


ORDER BY RAND()
LIMIT 1
"""

results = graph.query(item_info_query)
for row in results:
    print('Name: ',row.name)
    if(row.category==None):
      print('Category: ',row.subcategory)
    if(row.category!=None):
      print('Category: ',row.category)
      print('Subcategory: ',row.subcategory)
    print('Brand: ',row.brand)

In [None]:
'''
Query B
 Provide items from different subcategories that have the same brand
'''
item_info_query = """
  SELECT ?name ?subcategory
  WHERE {

                {
                    #SAMPLE: We choose 1 item from each subcategory
                    SELECT DISTINCT ?subcategory (SAMPLE(?item) AS ?sampleItem)
                    WHERE {
                      ?item schema:brand 'Nintendo' . #choose same brand
                      ?item schema:name ?name .
                      ?item schema:product ?subcategory .
                    }
                    GROUP BY ?subcategory


                 }
    ?sampleItem schema:brand 'Nintendo' .
    ?sampleItem schema:name ?name .
    ?sampleItem schema:product ?subcategory .
  }


ORDER BY RAND()
LIMIT 10
"""

results = graph.query(item_info_query)
for row in results:
    print('Name: ',row.name)
    print('subCategory: ',row.subcategory)

In [None]:
'''
Query C
 Group products by brand and show the average price or rating for each brand.
'''
item_info_query = """
      SELECT ?brand (AVG(?price) AS ?averagePrice)
        WHERE {
          ?item schema:brand ?brand .
          ?item schema:price ?price .
        }
        GROUP BY ?brand
"""

results = graph.query(item_info_query)
for row in results:
    print('Brand: ',row.brand)
    print('Average Price: ',row.averagePrice)

In [None]:
'''
Query D
 Sort products in one category according to average brand price or rating.
'''
item_info_query = """
      SELECT ?item ?category ?price
        WHERE {


            {
              #Here we choose a random category
              SELECT ?category
                WHERE {
                  ?item schema:product ?category .
                }
                ORDER BY RAND()
                LIMIT 1
            }

        ?item schema:product ?category.    #Get all items within that category
        ?item schema:price ?price .

        }
        ORDER BY ?price   #sort by price
"""

results = graph.query(item_info_query)
for row in results:
    print('category: ',row.category)
    print('item: ',row.item)
    print('price: ',row.price)

In [None]:
'''
Query E
E. Use an external service point, provide a description of 5 facts about the top brand from part D (e.g. https://query.wikidata.org/), e.g. location of headquarters. You may return images as one of your facts.
'''

In [None]:
'''
Query F
F. Recommend an item which is similar to the item using your linked RDF graph (i.e., shared properties and categories).
'''
from SPARQLWrapper import SPARQLWrapper, JSON

# Set up the SPARQL endpoint URL for Wikidata Query Service
wikidata_endpoint_url = "https://query.wikidata.org/sparql"

# Create a SPARQLWrapper object and set the endpoint URL
sparql = SPARQLWrapper(wikidata_endpoint_url)

# Define your SPARQL query
query = """
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

SELECT ?property ?propertyLabel (SAMPLE(?value) as ?sampleValue) (SAMPLE(?valueLabel) as ?sampleValueLabel)
WHERE {
  wd:Q8093 ?property ?value.
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
  FILTER (?property IN (wdt:P154, wdt:P159, wdt:P452, wdt:P1448, wdt:P169))

    #154:logo image
  #159: headquarters location
  #452: industry
  #1448: official name
  #169: chief executive officer

}
GROUP BY ?property ?propertyLabel
"""

# Set the SPARQL query
sparql.setQuery(query)

# Set the return format to JSON
sparql.setReturnFormat(JSON)

# Execute the query and get the results
results = sparql.query().convert()

# Print the results
# Assuming 'results' contains the SPARQL query results


for result in results["results"]["bindings"]:
    property_label = result["propertyLabel"]["value"]
    sample_value_label = result["sampleValueLabel"]["value"] if "sampleValueLabel" in result else "N/A"
    print(f"Property: {property_label}, Value: {sample_value_label}")


In [None]:
'''
Query G
Write your own question about the webshop in plain English, then translate it to the corresponding SPARQL query, and run it on the graph. Provide a rationale for why this query would be valuable in a webshop setting, such as for semantic search or other applications.

Question: what are the price of Switch? list their prices from low to high.
'''

item_info_query = """

      SELECT ?name  ?price
        WHERE {
          ?switch     schema:product <https://store.nintendo.nl/nl/consoles/nintendo-switch-consoles>.
          ?switch schema:name ?name .
          ?switch schema:price ?price .

        }
       ORDER BY ?price   #sort by price

"""

results = graph.query(item_info_query)
for row in results:
       print('Name: ',row.name)
       print('Price: ',row.price)

In [None]:
# Check the full graph: 查看所有语句 很长 没事别run 需要检查的时候再跑
query = """
SELECT * WHERE {
  ?s ?p ?o
}
"""
results = graph.query(query)
for row in results:
    print(row)