# Pandas Tutorial
# https://towardsdatascience.com/my-python-pandas-cheat-sheet-746b11e44368

In [1]:
import pandas as pd
import numpy as np

## Importing

In [5]:
# Build data frame from inputted data
df = pd.DataFrame([[1,'Bob', 'Builder'],
                  [2,'Sally', 'Baker'],
                  [3,'Scott', 'Candle Stick Maker']], 
                  columns=['id','name', 'occupation'])

In [7]:
df

Unnamed: 0,id,name,occupation
0,1,Bob,Builder
1,2,Sally,Baker
2,3,Scott,Candle Stick Maker


In [None]:
# Copy a data frame
df.copy(deep=True)

In [3]:
# Convert CSV into a data frame
anime = pd.read_csv('anime-recommendations-database/anime.csv')

## Exporting

In [10]:
# Save to CSV
# index -- Write row names (index)
df.to_csv('tmp.csv', index=False)

## Viewing and Inspecting

In [12]:
# Get top n records
anime.head(2)

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
12292,6133,Violence Gekiga Shin David no Hoshi: Inma Dens...,Hentai,OVA,1,4.98,175
12293,26081,Yasuji no Pornorama: Yacchimae!!,Hentai,Movie,1,5.46,142


In [13]:
# Get bottom n records
anime.tail(2)

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
12292,6133,Violence Gekiga Shin David no Hoshi: Inma Dens...,Hentai,OVA,1,4.98,175
12293,26081,Yasuji no Pornorama: Yacchimae!!,Hentai,Movie,1,5.46,142


In [16]:
# Count rows
print('Total Rows: ', len(anime))
print('Unique Rows (Type Col): ', len(anime['type'].unique()))

Total Rows:  12294
Unique Rows (Type Col):  7


In [17]:
# Get data frame info
anime.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12294 entries, 0 to 12293
Data columns (total 7 columns):
anime_id    12294 non-null int64
name        12294 non-null object
genre       12232 non-null object
type        12269 non-null object
episodes    12294 non-null object
rating      12064 non-null float64
members     12294 non-null int64
dtypes: float64(1), int64(2), object(4)
memory usage: 672.4+ KB


In [18]:
# Get statistics
anime.describe()

Unnamed: 0,anime_id,rating,members
count,12294.0,12064.0,12294.0
mean,14058.221653,6.473902,18071.34
std,11455.294701,1.026746,54820.68
min,1.0,1.67,5.0
25%,3484.25,5.88,225.0
50%,10260.5,6.57,1550.0
75%,24794.5,7.18,9437.0
max,34527.0,10.0,1013917.0


In [22]:
# Get counts of values for a particular column
anime.type.value_counts()

TV         3787
OVA        3311
Movie      2348
Special    1676
ONA         659
Music       488
Name: type, dtype: int64

## Selecting

In [None]:
# Get a list or series of values for a column
anime.type.tolist()

In [29]:
# Get a list of column values
anime.columns.tolist()

['anime_id', 'name', 'genre', 'type', 'episodes', 'rating', 'members']

## Adding / Dropping

In [32]:
# Append new column with a set value
anime['train set'] = False

In [None]:
# Create new data frame from a subset of columns
anime[['name','rating']]

In [None]:
# Drop specified columns
