In [393]:
import psycopg2
from psycopg2 import OperationalError
from psycopg2.extensions import register_adapter, AsIs
import numpy as np
psycopg2.extensions.register_adapter(np.int64, psycopg2._psycopg.AsIs)
psycopg2.extensions.register_adapter(np.bool_, psycopg2._psycopg.AsIs)
import json
import pandas as pd

In [394]:
#get credentials
with open("postgre_credentials.json") as file:
    data = json.load(file)

In [395]:
#template
connection = psycopg2.connect(
    database = data["db_name"], 
    user = data["db_user"],
    password = data["db_password"],
    host = data["db_host"],
    port = data["db_port"]
)


In [396]:
cur = connection.cursor()

In [397]:
query = "DROP TABLE anime CASCADE"
cur.execute(query)
connection.commit()

In [398]:
query="""CREATE TABLE IF NOT EXISTS anime (
anime_id INTEGER PRIMARY KEY,
eng_name TEXT,
type TEXT,
source TEXT,
episode_count INTEGER,
maturity_rating TEXT,
MAL_score DECIMAL,
scored_count INTEGER,
MAL_rank INTEGER,
MAL_popularity INTEGER,
favorite_count INTEGER,
studio_name TEXT,
licensor TEXT,
air_status TEXT,
is_airing BOOLEAN,
year INTEGER,
season TEXT
)
"""

In [399]:
cur.execute(query)


In [400]:
connection.commit()

In [401]:
query="""CREATE TABLE IF NOT EXISTS date (
date_id SERIAL PRIMARY KEY,
year INTEGER,
month INTEGER,
day INTEGER
)
"""
cur.execute(query)
connection.commit()

In [402]:
query="""CREATE TABLE IF NOT EXISTS show_name (
alternate_name TEXT PRIMARY KEY,
anime_id TEXT REFERENCES anime (anime_id)
)
"""
cur.execute(query)
connection.commit()

In [403]:
query="""CREATE TABLE IF NOT EXISTS genre (
genre_id SERIAL PRIMARY KEY,
genre_name TEXT,
anime_id INTEGER REFERENCES anime (anime_id)
)
"""
cur.execute(query)
connection.commit()

In [404]:
query="""CREATE TABLE IF NOT EXISTS google_trends (
google_trends_id SERIAL PRIMARY KEY,
show_name TEXT REFERENCES show_name(alternate_name),
date INTEGER REFERENCES date(date_id),
popularity_score INT
)
"""
cur.execute(query)
connection.commit()

In [405]:
#Loop through MAL csv file
#insert into anime table first
#split alternative names and loop to insert each show name
#split through genres and insert each genre

In [406]:
def split_column(row, col):
    row_attributes = mal_df.loc[row,col]
    split_row = row_attributes.replace(" ", "").split(",")
    return split_row

In [407]:
def insert_genre(split_row, anime_id):
    for genre in split_row:
        cur.execute("""INSERT INTO genre(genre_id, genre_name, anime_id)
        VALUES (%s,%s)""",
        (genre, anime_id))
        connection.commit()

In [413]:
def insert_anime_info(row):
    try:
        time = row["premiered"].split(" ")
        if time[0] != "Not" and len(time)==3 and "" not in time:
            year = time[1]
            season = time[0]

            cur.execute("""INSERT INTO anime(anime_id, eng_name, type, source,episode_count,
                        maturity_rating, MAL_score, scored_count, MAL_rank, MAL_popularity,
                        favorite_count, studio_name, licensor, air_status, is_airing, year, season)
                        VALUES (%s, %s, %s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)""",
                       (row["anime_id"],row["title_english"],row["type"], row["source"], row["episodes"],
                       row["rating"], row["score"], row["scored_by"], row["rank"], row["popularity"],
                       row["favorites"], row["studio"], row["licensor"], row["status"], row["airing"],
                       year, season))
            connection.commit()
        
    except: 
        time = row["aired_string"].split("to")[0].split(" ")
        if time[0] != "Not" and len(time)==3 and "" not in time:
            #print(time)
            year = int(time[2])
            season = seasons[time[0]]
            cur.execute("""INSERT INTO anime(anime_id, eng_name, type, source,episode_count,
                        maturity_rating, MAL_score, scored_count, MAL_rank, MAL_popularity,
                        favorite_count, studio_name, licensor, air_status, is_airing, year, season)
                        VALUES (%s, %s, %s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)""",
                       (row["anime_id"],row["title_english"],row["type"], row["source"], row["episodes"],
                       row["rating"], row["score"], row["scored_by"], row["rank"], row["popularity"],
                       row["favorites"], row["studio"], row["licensor"], row["status"], row["airing"],
                       year, season))
            connection.commit()

        

    


In [409]:
#test with 1 anime
with open("Anime_List_Path.txt") as file:
    path = file.read()

mal_df = pd.read_csv(path)

In [410]:
#mal_df.head()
mal_df.loc[:,"producer"] = mal_df.loc[:,"producer"].fillna("NA")
mal_df.loc[:,"licensor"] = mal_df.loc[:,"licensor"].fillna("NA")
mal_df.loc[:,"title_english"] = mal_df.loc[:,"title_english"].fillna("NA")
mal_df.loc[:,"rank"] = mal_df.loc[:,"rank"].fillna(0)

In [411]:
seasons = {"Jan":"Winter",
          "Feb": "Winter",
          "Mar": "Spring",
          "Apr": "Spring",
          "May": "Spring",
          "Jun": "Summer",
          "Jul": "Summer",
          "Aug": "Summer",
          "Sep": "Fall",
          "Oct": "Fall",
          "Nov": "Fall",
          "Dec": "Winter"}

In [412]:
#insert_anime_info(mal_df.iloc[0])
for idx,row in mal_df.iterrows():
    insert_anime_info(mal_df.iloc[idx])

['Jun', '2,', '2018']
['Oct', '2,', '2006']
['Feb', '25,', '1985']
['Jan', '12,', '2017']
['Oct', '21,', '1985']
['Feb', '26,', '2015']
['Feb', '12,', '2014']
['Mar', '28,', '2012']
['Apr', '7,', '2000']
['Feb', '10,', '2016']
['Sep', '6,', '2001']
['Mar', '2,', '2013']
['Aug', '1,', '2014']
['Jul', '12,', '1986']
['Jan', '10,', '1997']
['Jun', '4,', '2017']
['Mar', '16,', '2016']
['Mar', '12,', '2005']
['Jun', '27,', '1990']
['Oct', '14,', '2010']
['Dec', '22,', '2010']
['Feb', '3,', '2012']
['May', '20,', '2006']
['Jun', '12,', '1990']
['Sep', '25,', '2013']
['Aug', '14,', '1993']
['Dec', '28,', '2013']
['Oct', '26,', '2018']
['Mar', '13,', '1989']
['May', '10,', '2014']
['Sep', '13,', '2003']
['Dec', '4,', '1999']
['Dec', '1,', '2017']
['Jan', '1,', '1980']
['Mar', '11,', '1978']
['Mar', '1,', '1996']
['Jul', '29,', '1995']
['Aug', '6,', '2009']
['Aug', '25,', '1994']
['Mar', '28,', '2010']
['Dec', '25,', '2014']
['Feb', '25,', '2014']
['Nov', '9,', '2018']
['Mar', '12,', '1988']
['

In [237]:
mal_df.loc[mal_df["anime_id"]==34152,:]

Unnamed: 0,anime_id,title,title_english,title_japanese,title_synonyms,image_url,type,source,episodes,status,...,background,premiered,broadcast,related,producer,licensor,studio,genre,opening_theme,ending_theme
60,34152,Super Danganronpa 2.5: Komaeda Nagito to Sekai...,,スーパーダンガンロンパ2.5 狛枝凪斗と世界の破壊者,Super Danganronpa 2.5: Nagito Komaeda and the ...,https://myanimelist.cdn-dena.com/images/anime/...,OVA,Original,1,Finished Airing,...,,,,"{'Sequel': [{'mal_id': 32189, 'type': 'anime',...",,,Lerche,"Action, Mystery, Horror, Psychological",[],[]


nan

In [392]:
connection.close()