In [45]:
from neo4j import GraphDatabase
import pandas as pd

In [46]:
pd.set_option('display.max_rows', 5)

In [47]:
URI = "neo4j://localhost:7999"
AUTH = ("neo4j", "password")

In [48]:
dataset = pd.read_csv("./BestBooksEverClean.csv")
dataset

Unnamed: 0,title,series,author,genres,pages,publishDate,publishYear,rating,likedPercent,price
0,Manhood Of Humanity - The Science And Art Of H...,standalone,Alfred Korkzybski,['Science'],280,April 4th 2010,2010,5.00,100.0,22.40
1,CIRCUS RIDER,standalone,Peter Breschard,['Historical Fiction'],330,December 15th 2010,2010,5.00,100.0,19.59
...,...,...,...,...,...,...,...,...,...,...
998,Konin: A Quest,standalone,Theo Richmond,"['Holocaust', 'History', 'Nonfiction', 'Jewish...",448,August 29th 1995,1995,4.46,99.0,2.77
999,The Black Flamingo,standalone,Dean Atta,"['Poetry', 'Young Adult', 'LGBT', 'Contemporar...",360,August 8th 2019,2019,4.46,99.0,12.06


#### From String list, return the list of genres

In [49]:
def genres_to_list(genres: str) -> list:
  list_of_genres = genres.removeprefix('[').removesuffix(']').split(', ')
  return list(map(lambda s: s[1:][:-1], list_of_genres))

In [50]:
dataset["genres"] = dataset["genres"].apply(genres_to_list)
type(dataset['genres'].iloc[0])

list

#### Enable Connection

In [51]:
driver = GraphDatabase.driver(URI, auth=AUTH)
session = driver.session()

In [82]:
def insert_user(name: str) -> bool:
  # Insertar usuario
  result = session.run(f"MATCH (u:User) WHERE u.name = $name RETURN u.name as name", {"name": name})
  if len(result.data()) == 0:
    try:
      session.run("CREATE (u:User {name: $name})", {"name": name})
      return True
    except:
      pass
  return False

def show_users():
  # Mostrar usuarios
  print("Usuarios:")
  result = session.run("MATCH (u:User) RETURN u.name AS name")
  for record in result:
    print(f" - {record["name"]}")

def show_connections():
  # Mostrar vinculos
  print("\nVinculos:")
  result = session.run("MATCH (u:User)-[r:FRIEND]->(u2:User) RETURN u.name AS name, u2.name AS name2")
  for record in result:
    print(f" - {record['name']} es amigo de {record['name2']}")

def insert_author(author: str) -> bool:
  # Insertar autor
  result = session.run("MATCH (a:Author) WHERE a.name = $name RETURN a.name as name", {"name": author})
  if len(result.data()) == 0:
    try:
      session.run("CREATE (a:Author {name: $name})", {"name": author})
      return True
    except:
      pass
  return False

def show_authors():
  # Mostrar autores
  print("Autores:")
  result = session.run("MATCH (a:Author) RETURN a.name AS name")
  for record in result:
    print(f" - {record["name"]}")

def insert_genre(genre: str) -> bool:
  # Insertar Genero
  result = session.run("MATCH (g:Genre) WHERE g.name = $name RETURN g.name as name", {"name": genre})
  if len(result.data()) == 0:
    try:
      session.run("CREATE (g:Genre {name: $name})", {"name": genre})
      return True
    except:
      pass
  return False

def show_genres():
  # Mostrar generos
  print("Generos:")
  result = session.run("MATCH (g:Genre) RETURN g.name AS name")
  for record in result:
    print(f" - {record["name"]}")

def insert_books(books: pd.DataFrame) -> bool:
  # Insertar libros
  query = """
    UNWIND $books AS book
    MERGE (b:Book {title: book.title})
    ON CREATE SET b.author = book.author, 
                  b.pages = book.pages, b.rating = book.rating, b.likedPercent = book.likedPercent, 
                  b.numRating = book.numRating, b.price = book.price, b.publishDate = book.publishDate, 
                  b.publishYear = book.publishYear
    """
  
  try:
    books_dict = books.to_dict('records')
    session.run(query, {"books": books_dict})
    return True
  except:
    pass
  return False

def get_random_books(n: int):
  query = f"""
    MATCH (b:Book)
    RETURN b.title AS title, b.author AS author, b.pages AS pages,
           b.rating AS rating, b.likedPercent AS likedPercent, b.numRating AS numRating,
           b.price AS price, b.publishDate AS publishDate, b.publishYear AS publishYear
    ORDER BY rand()
    LIMIT {n}
    """
  result = session.run(query)
  books = result.data()
  return books

def make_relation(a_name: str, a_type: str, b_name: str, b_type: str, relation: str) -> bool:
  # Crear una relacion
  query = "MATCH (a:" + a_type + " {name: $a_name}),"\
        +       "(b:" + b_type + " {name: $b_name})"\
        + "CREATE (a)-[:" + relation + "]->(b)"
  attr = {
    "a_name": a_name, 
    "b_name": b_name
    }
  
  session.run(query, attr)
  # try:
  #   return True
  # except:
  #   pass
  # return False

#### Insert Users

In [53]:
users = [
  'Victor',
  'Badre',
  'Daniel',
  'Adrian',
  'Manuel',
  'Angel',
  'Felix',
  'Fernando'
]

In [54]:
for user in users:
  insert_user(user)

In [93]:
show_users()

Usuarios:
 - Victor
 - Badre
 - Daniel
 - Adrian
 - Manuel
 - Angel
 - Felix
 - Fernando


#### Insert authors

In [56]:
for author in dataset['author'].unique():
  insert_author(author)

In [57]:
show_authors()

Autores:
 - Alfred Korkzybski
 - Peter Breschard
 - Ocelot Emerson
 - Tina M. Randolph
 - Richard Atwood
 - Jerry Seguin
 - Violet's Vegan Comics
 - Ki Longfellow
 - Ruth Watson-Morris
 - Steve Julien
 - Ali Marsman
 - Hudith F. Dolkart
 - Brae Wyckoff
 - Sita Bennett
 - Jacob Lasher
 - Bernie Morris
 - Rohith S. Katbamna
 - Martha A. Cheves
 - Jo-Anne McArthur
 - Bill Watterson
 - Francis Shenstone
 - J.D. Estrada
 - Aimee Lewis
 - Brandon Sanderson
 - William Shakespeare
 - Victoria Botkin
 - Anonymous
 - Rachna Khemchandani
 - Janet G. Travell
 - Pythia Peay
 - Banani Ray
 - J.K. Rowling
 - Laurick Ingram
 - G.B. Hope
 - George Herriman
 - George Mendoza
 - Lawrence Hole
 - Francine Rivers
 - Neil Gaiman
 - Fletcher McHale
 - Marika Germanis
 - Amit Ray
 - Georg Trakl
 - Andrew Peterson
 - Rich Okun
 - S.F. Mazhar
 - Gary Collins
 - Rien Poortvliet
 - Nick Brandt
 - Sue Lloyd-Roberts
 - Teric Darken
 - Sarah J. Maas
 - Michelle R. Eastman
 - Karen Kingsbury
 - Amanda Barratt
 - Lisa

#### Insert Genres

In [58]:
set_of_genres = set()
for genres in dataset['genres'].values:
  for genre in genres:
    set_of_genres.add(genre)
set_of_genres

{'14th Century',
 '16th Century',
 '18th Century',
 '19th Century',
 '1st Grade',
 '20th Century',
 '21st Century',
 '40k',
 'Abuse',
 'Academic',
 'Action',
 'Activism',
 'Adult',
 'Adult Fiction',
 'Adventure',
 'Aeroplanes',
 'Africa',
 'African American',
 'African American Literature',
 'African American Romance',
 'Agriculture',
 'Alchemy',
 'Aliens',
 'Alternate History',
 'American',
 'American Civil War',
 'American Fiction',
 'American History',
 'Americana',
 'Ancient',
 'Ancient History',
 'Angels',
 'Animal Fiction',
 'Animals',
 'Anime',
 'Anthologies',
 'Anthropology',
 'Anthropomorphic',
 'Anti Racist',
 'Architecture',
 'Art',
 'Art History',
 'Art and Photography',
 'Asia',
 'Asian Literature',
 'Astrology',
 'Astronomy',
 'Audiobook',
 'Australia',
 'Autobiography',
 'Aviation',
 'BDSM',
 'Baha I',
 'Basketball',
 'Batman',
 'Biblical',
 'Biblical Fiction',
 'Biography',
 'Biography Memoir',
 'Biology',
 'Birds',
 'Boarding School',
 'Book Club',
 'Books About Books'

In [59]:
len(set_of_genres)

456

In [60]:
for genre in set_of_genres:
  insert_genre(genre)

In [61]:
show_genres()

Generos:
 - Catholic
 - Self Help
 - 14th Century
 - Fostering
 - Classic Literature
 - Fairies
 - Personal Development
 - Young Adult Romance
 - Zen
 - Novella
 - The United States Of America
 - Fairy Tales
 - Gaming
 - Realistic Fiction
 - Military Fiction
 - Activism
 - International
 - Nobel Prize
 - Christian Non Fiction
 - 20th Century
 - Greek Mythology
 - Steampunk
 - Spanish Literature
 - Computers
 - Aviation
 - Research
 - Wildlife
 - Scripture
 - Short Stories
 - Basketball
 - Abuse
 - Halloween
 - Horror
 - Food
 - Short Story Collection
 - Witchcraft
 - Martial Arts
 - Students
 - Dragons
 - Erotic Paranormal Romance
 - Ornithology
 - Asian Literature
 - Sword and Sorcery
 - Outdoors
 - Russian Literature
 - American History
 - Storytime
 - Photography
 - Physics
 - Ancient
 - Writing
 - Forgotten Realms
 - Ireland
 - LGBT
 - Lds Non Fiction
 - African American Romance
 - Conspiracy Theories
 - Field Guides
 - Graphic Novels Comics
 - Social Issues
 - Art and Photography


#### Insert books

In [65]:
insert_books(dataset)

True

In [70]:
books = get_random_books(5)
[f"{b["title"]} - {b["author"]}" for b in books]

['Five Complete Hercule Poirot Novels: ABC Murders / Cards on the Table / Death on the Nile / Murder on the Orient Express / Thirteen at Dinner - Agatha Christie',
 'Pharmako/Poeia: Plant Powers, Poisons, and Herbcraft - Dale Pendell',
 'The Abhorsen Trilogy Box Set - Garth Nix',
 'Wildfire - Ilona Andrews',
 'J.R.R. Tolkien 4-Book Boxed Set: The Hobbit and The Lord of the Rings - J.R.R. Tolkien']

#### Friend relationship

In [105]:
user1 = "Victor"
user2 = "Angel"
make_relation(user1, "User", user2, "User", "FRIEND")
make_relation(user2, "User", user1, "User", "FRIEND")

In [98]:
# Mostrar vinculos
print("\nVinculos:")
result = session.run("MATCH (u:User)-[r:FRIEND]->(u2:User) RETURN u.name AS name, u2.name AS name2")
for record in result:
  print(f" - {record['name']} es amigo de {record['name2']}")


Vinculos:
 - Badre es amigo de Victor
 - Victor es amigo de Badre
 - Daniel es amigo de Victor
 - Victor es amigo de Daniel


#### Genre relationship

In [112]:
for _, row in dataset.iterrows():
  print(row["title"])
  for genre in row['genres']:
    make_relation(row['title'], "Book", genre, "Genre", "BELONG_TO")

Manhood Of Humanity - The Science And Art Of Human Engineering


BufferError: Existing exports of data: object cannot be re-sized

In [111]:
print("\nVinculos:")
result = session.run("MATCH (b:Book)-[r:BELONG_TO]->(g:Genre) RETURN b.name AS name, g.name AS name2")
for record in result:
  print(f" - {record['name']} pertenece a {record['name2']}")


Vinculos:


BufferError: Existing exports of data: object cannot be re-sized

#### Close Connection

In [64]:
# session.close()
# driver.close()