# Import Dependencies

In [3]:
import pandas as pd
import psycopg2
import os
from sqlalchemy import create_engine
from config import db_password
from config import db_name

# Read Video Games Data from CSV

In [6]:
# Set CSV file path
path = os.path.join('Output', 'Transformed_video_games_data.csv')

In [7]:
# Read games data from CSV file into Dataframe
transformed_df = pd.read_csv(path)
transformed_df

Unnamed: 0,rank,game_name,console,publisher,developer,vgchartz_score,critic_score,user_score,total_shipped,global_sales,na_sales,pal_sales,japan_sales,other_sales,release_date,genre,release_year
0,1,God of War,Series,Sony Computer Entertainment,SIE Santa Monica Studio,,,,51.00,,,,,,2005-03-22,Action,2005.0
1,2,Warriors,Series,KOEI,Omega Force,,,,45.26,,,,,,1997-06-30,Action,1997.0
2,3,Devil May Cry,Series,Capcom,Capcom,,,,22.00,,,,,,2001-10-16,Action,2001.0
3,4,Dynasty Warriors,Series,Unknown,Omega Force,,,,21.00,,,,,,,Action,
4,5,Grand Theft Auto V,PS3,Rockstar Games,Rockstar North,,9.4,,,20.32,6.37,9.85,0.99,3.12,2013-09-17,Action,2013.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22097,232,"Nora, Princess, and Stray Cat",NS,Harukaze,Harukaze,,,,,0.00,,,0.00,,2018-10-25,Visual Novel,2018.0
22098,233,Memories Off: Innocent File,NS,5pb,5pb. Games,,,,,0.00,,,0.00,,2018-10-25,Visual Novel,2018.0
22099,234,Enkan no Memoria: Kakera Tomoshi,PSV,Dramatic Create,A'sRing,,,,,0.00,,,0.00,,2018-03-29,Visual Novel,2018.0
22100,235,Disorder 6,X360,5pb,5pb. Games,,,,,0.00,,,0.00,,2013-08-22,Visual Novel,2013.0


# Create Genre dataframe

In [8]:
# Generate list of unique genres
genre_list = transformed_df['genre'].unique()
genre_list

array(['Action', 'Action-Adventure', 'Adventure', 'Board Game',
       'Education', 'Fighting', 'Misc', 'MMO', 'Music', 'Party',
       'Platform', 'Puzzle', 'Racing', 'Role-Playing', 'Sandbox',
       'Shooter', 'Simulation', 'Sports', 'Strategy', 'Visual Novel'],
      dtype=object)

In [9]:
# Generate list of genre ids
genre_id_list = list(range(0, len(genre_list)))
genre_id_list

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]

In [10]:
# Create genre dataframe
genre_df = pd.DataFrame({
    'genre_id': genre_id_list,
    'genre': genre_list
})
genre_df

Unnamed: 0,genre_id,genre
0,0,Action
1,1,Action-Adventure
2,2,Adventure
3,3,Board Game
4,4,Education
5,5,Fighting
6,6,Misc
7,7,MMO
8,8,Music
9,9,Party


# Create Developer dataframe

In [11]:
# Generate list of unique developers
developer_list = transformed_df['developer'].dropna().unique()
developer_list

array(['SIE Santa Monica Studio', 'Omega Force', 'Capcom', ..., 'ADELTA',
       'girls dynamics', "A'sRing"], dtype=object)

In [12]:
# Generate list of developer ids
developer_id_list = list(range(0, len(developer_list)))
developer_id_list

[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 57,
 58,
 59,
 60,
 61,
 62,
 63,
 64,
 65,
 66,
 67,
 68,
 69,
 70,
 71,
 72,
 73,
 74,
 75,
 76,
 77,
 78,
 79,
 80,
 81,
 82,
 83,
 84,
 85,
 86,
 87,
 88,
 89,
 90,
 91,
 92,
 93,
 94,
 95,
 96,
 97,
 98,
 99,
 100,
 101,
 102,
 103,
 104,
 105,
 106,
 107,
 108,
 109,
 110,
 111,
 112,
 113,
 114,
 115,
 116,
 117,
 118,
 119,
 120,
 121,
 122,
 123,
 124,
 125,
 126,
 127,
 128,
 129,
 130,
 131,
 132,
 133,
 134,
 135,
 136,
 137,
 138,
 139,
 140,
 141,
 142,
 143,
 144,
 145,
 146,
 147,
 148,
 149,
 150,
 151,
 152,
 153,
 154,
 155,
 156,
 157,
 158,
 159,
 160,
 161,
 162,
 163,
 164,
 165,
 166,
 167,
 168,
 169,
 170,
 171,
 172,
 173,
 174,
 175,
 176,
 177,
 178,
 179,
 180,
 181,
 182,
 183,
 184,


In [13]:
# Create developer dataframe
developer_df = pd.DataFrame({
    'developer_id': developer_id_list,
    'developer': developer_list
})
developer_df

Unnamed: 0,developer_id,developer
0,0,SIE Santa Monica Studio
1,1,Omega Force
2,2,Capcom
3,3,Rockstar North
4,4,Konami
...,...,...
3541,3541,Tenco
3542,3542,Gesen 18
3543,3543,ADELTA
3544,3544,girls dynamics


In [14]:
developer_df.dtypes

developer_id     int64
developer       object
dtype: object

# Create Console dataframe

In [15]:
# Generate list of unique consoles
console_list = transformed_df['console'].unique()
console_list

array(['Series', 'PS3', 'PS4', 'PS2', 'X360', 'PC', 'XOne', 'PS', 'PSP',
       'Wii', 'DS', 'All', '3DS', 'NES', 'GC', 'WiiU', 'XB', 'NS', 'N64',
       'GEN', '2600', 'GBA', 'GB', 'PSV', 'SNES', 'DC', 'SAT', 'XBL',
       'PSN', 'GBC', 'PCE', '3DO', 'NG', 'VC', 'WW', 'SCD', 'Mob', 'GG',
       'Amig', 'WS', 'PCFX', 'OSX'], dtype=object)

In [16]:
# Generate list of consoles ids
console_id_list = list(range(0, len(console_list)))
console_id_list

[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41]

In [17]:
# Create console dataframe
console_df = pd.DataFrame({
    'console_id': console_id_list,
    'console': console_list
})
console_df

Unnamed: 0,console_id,console
0,0,Series
1,1,PS3
2,2,PS4
3,3,PS2
4,4,X360
5,5,PC
6,6,XOne
7,7,PS
8,8,PSP
9,9,Wii


In [19]:
console_df.dtypes

console_id     int64
console       object
dtype: object

# Create Publisher dataframe

In [20]:
# Generate list of unique publishers
publisher_list = transformed_df['publisher'].unique()
publisher_list

array(['Sony Computer Entertainment', 'KOEI', 'Capcom', ..., 'Digiturbo',
       'MAGES', 'Stack'], dtype=object)

In [21]:
# Generate list of publisher ids
publisher_id_list = list(range(0, len(publisher_list)))
publisher_id_list

[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 57,
 58,
 59,
 60,
 61,
 62,
 63,
 64,
 65,
 66,
 67,
 68,
 69,
 70,
 71,
 72,
 73,
 74,
 75,
 76,
 77,
 78,
 79,
 80,
 81,
 82,
 83,
 84,
 85,
 86,
 87,
 88,
 89,
 90,
 91,
 92,
 93,
 94,
 95,
 96,
 97,
 98,
 99,
 100,
 101,
 102,
 103,
 104,
 105,
 106,
 107,
 108,
 109,
 110,
 111,
 112,
 113,
 114,
 115,
 116,
 117,
 118,
 119,
 120,
 121,
 122,
 123,
 124,
 125,
 126,
 127,
 128,
 129,
 130,
 131,
 132,
 133,
 134,
 135,
 136,
 137,
 138,
 139,
 140,
 141,
 142,
 143,
 144,
 145,
 146,
 147,
 148,
 149,
 150,
 151,
 152,
 153,
 154,
 155,
 156,
 157,
 158,
 159,
 160,
 161,
 162,
 163,
 164,
 165,
 166,
 167,
 168,
 169,
 170,
 171,
 172,
 173,
 174,
 175,
 176,
 177,
 178,
 179,
 180,
 181,
 182,
 183,
 184,


In [None]:
# Create publisher dataframe
publisher_df = pd.DataFrame){}