In [None]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from statistics import mean

# Archive #7

In [None]:
df = pd.read_csv("archive_7.csv")
df

## Ikea Data Frame

In [None]:
ikea_cat = ['item_id', 'name', 'category', 'price', 'old_price', 'sellable_online', 'link', 'other_colors', 'short_description', 'designer', 'depth', 'height', 'width']
ikea_df = df[ikea_cat]
ikea_df = ikea_df.dropna(subset=['link'])

In [None]:
ikea_df.head()

#### Schema Model

In [None]:
ikea_df.columns

Column names: item_id, name, category, price, old_price, sellable_online, link, other_colors, short_description, designer, depth, height, width

In [None]:
ikea_df.info()

In [None]:
# Convert Object to String

def objectToString(df, col):
    df[col] = df[col].astype('string')

In [None]:
str_cols = ['name', 'category', 'old_price', 'sellable_online', 'link', 'other_colors', 'short_description', 'designer']
for col in str_cols:
    objectToString(ikea_df, col)

ikea_df.info()

Our Ikea Data Frame is split between float64 types for numerical values and string types for descriptors

#### Sample Queries

Average Prices and Sizes for each Furniture Category

In [None]:
ikea_df.drop(columns=['item_id']).groupby('category').mean(numeric_only=True)

Ikea URLs for each item, let's expand our size to get the full link

Some of these have functional product pages while others not

In [None]:
def print_full(x):
    pd.set_option('display.max_colwidth', len(x))
    print(x)
    pd.reset_option('display.max_colwidth')

In [None]:
print_full(ikea_df.link)

## Pokemon Data Frame

In [None]:
pokemon_cat = ['Pokemon', 'Card Type', 'Generation', 'Card Number', 'Price']
pokemon_df = df[pokemon_cat]
pokemon_df = pokemon_df.dropna()

In [None]:
pokemon_df.head(15)

#### Schema Model

In [None]:
pokemon_df.columns

Column names: Pokemon, Card Type, Generation, Card Number, Price

In [None]:
pokemon_df.info()

In [None]:
str_cols = ['Pokemon', 'Card Type', 'Generation', 'Card Number']
for col in str_cols:
    objectToString(pokemon_df, col)
pokemon_df.info()

#### Sample Queries

Pokemon Cards with a price greater than $2.50

In [None]:
pokemon_df[pokemon_df['Price'] > 2.50]

The reverse holocards in our dataset

In [None]:
pokemon_df[pokemon_df['Card Type'] == 'REVERSE HOLO']

# Books Data Frame

In [None]:
books_cat = ["ranks","title","no_of_reviews","ratings","author","cover_type","year","genre"]
book_df = df[books_cat]
book_df = book_df.dropna(subset=["author"])

In [None]:
book_df

#### Schema model

In [None]:
book_df.columns

#### Sample Queries

In [None]:
query_one = book_df.loc[ ( (book_df['genre'] == 'Fiction') & (book_df['ratings'] > 4.0) ) & (book_df['cover_type'] == 'Paperback') ]
query_one

In [None]:
query_two = book_df.loc[(book_df['author'].str.len() > 25) & (book_df["ranks"] % 2 == 0) ]
query_two

In [None]:
food_cat = ['price', 'date', 'product', 'market', 'size']
food_df = df[food_cat]
food_df = food_df.dropna()

In [None]:
food_df

In [None]:
food_df.info

# Query One, food that is from Orgil, costs over 2,000 and is made of yogurt

In [None]:
query_one = food_df.loc[ ( (food_df['market'] == 'Orgil') & (food_df['price'] > 2000) ) & (food_df['product'] == 'Yogurt')  ]
query_one

# Query Two, food that is from Bumbugur and costs less than 1,500

In [None]:
query_two = food_df.loc[ ( (food_df['market'] == 'Bumbugur') & (food_df['price'] < 1500) ) ]
query_two