## Introduction to Pandas

Creating a Dataframe:

In [1]:
import pandas as pd

In [2]:
data = [[4, 2, 1],
        [3, 2, 2],
        [5, 0, 8]]

column = ["Apples", "Bananas", "Oranges"]
row = ["Monday", "Tuesday", "Wednesday"]

df = pd.DataFrame(data, row, column)
print(df)

           Apples  Bananas  Oranges
Monday          4        2        1
Tuesday         3        2        2
Wednesday       5        0        8


## Exploratory Analysis

In [3]:
# Load and Read Dataset
csv_url = "https://raw.githubusercontent.com/xinconggg/ML-Ops/refs/heads/main/Python%20Essentials%20for%20MLOps/datasets/wine-ratings.csv"
df = pd.read_csv(csv_url, index_col=0)

In [4]:
# Inspect first 5 rows of the Dataset
df.head()

Unnamed: 0,name,grape,region,variety,rating,notes
0,1000 Stories Bourbon Barrel Aged Batch Blue Ca...,,"Mendocino, California",Red Wine,91.0,"This is a very special, limited release of 100..."
1,1000 Stories Bourbon Barrel Aged Gold Rush Red...,,California,Red Wine,89.0,The California Gold Rush was a period of coura...
2,1000 Stories Bourbon Barrel Aged Gold Rush Red...,,California,Red Wine,90.0,The California Gold Rush was a period of coura...
3,1000 Stories Bourbon Barrel Aged Zinfandel 2013,,"North Coast, California",Red Wine,91.0,"The wine has a deep, rich purple color. An int..."
4,1000 Stories Bourbon Barrel Aged Zinfandel 2014,,California,Red Wine,90.0,Batch #004 is the first release of the 2014 vi...


In [5]:
# Get a description of the dataset
df.describe()

Unnamed: 0,grape,rating
count,0.0,32780.0
mean,,91.186608
std,,2.190391
min,,85.0
25%,,90.0
50%,,91.0
75%,,92.0
max,,99.0


In [6]:
# Sort based on "rating"
df.sort_values(by="rating", ascending=False).head()

Unnamed: 0,name,grape,region,variety,rating,notes
9986,Chateau Angelus (Futures Pre-Sale) 2019,,"St. Emilion, Bordeaux, France",Red Wine,99.0,"This 2019 vintage, made while the estate was u..."
21597,Espectacle Espectacle del Montsant 2012,,Spain,Red Wine,99.0,Its color is surprisingly intense compared to ...
12857,Chateau Pavie (1.5 Liter Futures Pre-Sale) 2019,,"St. Emilion, Bordeaux, France",Red Wine,99.0,"Blend: 50% Merlot, 32% Cabernet Franc, 18% Cab..."
25936,Guigal La Turque Cote Rotie 2010,,"Cote Rotie, Rhone, France",Red Wine,99.0,La Turque displays deep ruby red color with da...
12856,Chateau Pavie (1.5 Liter Futures Pre-Sale) 2018,,"St. Emilion, Bordeaux, France",Red Wine,99.0,"Blend: 60% Merlot, 22% Cabernet Franc and 18% ..."


In [7]:
# Remove any newlines or carriage returns
df = df.replace({"\r": ""}, regex=True)
df = df.replace({"\n": " "}, regex=True)
df.head()

Unnamed: 0,name,grape,region,variety,rating,notes
0,1000 Stories Bourbon Barrel Aged Batch Blue Ca...,,"Mendocino, California",Red Wine,91.0,"This is a very special, limited release of 100..."
1,1000 Stories Bourbon Barrel Aged Gold Rush Red...,,California,Red Wine,89.0,The California Gold Rush was a period of coura...
2,1000 Stories Bourbon Barrel Aged Gold Rush Red...,,California,Red Wine,90.0,The California Gold Rush was a period of coura...
3,1000 Stories Bourbon Barrel Aged Zinfandel 2013,,"North Coast, California",Red Wine,91.0,"The wine has a deep, rich purple color. An int..."
4,1000 Stories Bourbon Barrel Aged Zinfandel 2014,,California,Red Wine,90.0,Batch #004 is the first release of the 2014 vi...


In [8]:
# Since the "grape" column are all NaN, remove them
df.drop(["grape"], axis=1, inplace=True)
df.head()

Unnamed: 0,name,region,variety,rating,notes
0,1000 Stories Bourbon Barrel Aged Batch Blue Ca...,"Mendocino, California",Red Wine,91.0,"This is a very special, limited release of 100..."
1,1000 Stories Bourbon Barrel Aged Gold Rush Red...,California,Red Wine,89.0,The California Gold Rush was a period of coura...
2,1000 Stories Bourbon Barrel Aged Gold Rush Red...,California,Red Wine,90.0,The California Gold Rush was a period of coura...
3,1000 Stories Bourbon Barrel Aged Zinfandel 2013,"North Coast, California",Red Wine,91.0,"The wine has a deep, rich purple color. An int..."
4,1000 Stories Bourbon Barrel Aged Zinfandel 2014,California,Red Wine,90.0,Batch #004 is the first release of the 2014 vi...


## Manipulating Text

In [9]:
# Replace "Red Wine" with "R" and "White Wine" with "W"
df["variety_short"] = df["variety"].replace({"Red Wine": "R", "White Wine": "W"})
df.head()

Unnamed: 0,name,region,variety,rating,notes,variety_short
0,1000 Stories Bourbon Barrel Aged Batch Blue Ca...,"Mendocino, California",Red Wine,91.0,"This is a very special, limited release of 100...",R
1,1000 Stories Bourbon Barrel Aged Gold Rush Red...,California,Red Wine,89.0,The California Gold Rush was a period of coura...,R
2,1000 Stories Bourbon Barrel Aged Gold Rush Red...,California,Red Wine,90.0,The California Gold Rush was a period of coura...,R
3,1000 Stories Bourbon Barrel Aged Zinfandel 2013,"North Coast, California",Red Wine,91.0,"The wine has a deep, rich purple color. An int...",R
4,1000 Stories Bourbon Barrel Aged Zinfandel 2014,California,Red Wine,90.0,Batch #004 is the first release of the 2014 vi...,R


In [10]:
# Split region and keep only the last part
df["region_short"] = df["region"].str.split().str.get(-1)
df.head()

Unnamed: 0,name,region,variety,rating,notes,variety_short,region_short
0,1000 Stories Bourbon Barrel Aged Batch Blue Ca...,"Mendocino, California",Red Wine,91.0,"This is a very special, limited release of 100...",R,California
1,1000 Stories Bourbon Barrel Aged Gold Rush Red...,California,Red Wine,89.0,The California Gold Rush was a period of coura...,R,California
2,1000 Stories Bourbon Barrel Aged Gold Rush Red...,California,Red Wine,90.0,The California Gold Rush was a period of coura...,R,California
3,1000 Stories Bourbon Barrel Aged Zinfandel 2013,"North Coast, California",Red Wine,91.0,"The wine has a deep, rich purple color. An int...",R,California
4,1000 Stories Bourbon Barrel Aged Zinfandel 2014,California,Red Wine,90.0,Batch #004 is the first release of the 2014 vi...,R,California


## Applying Functions

In [11]:
def good_wine(value):
  if value > 94:
    return True
  else:
    return False

df["good"] = df["rating"].apply(good_wine)
df[df["good"]==True].head()

Unnamed: 0,name,region,variety,rating,notes,variety_short,region_short,good
36,3 Rings Reserve Shiraz 2004,"Barossa Valley, Barossa, South Australia, Aust...",Red Wine,96.0,Vintage Comments : Classic Barossa vintage con...,R,Australia,True
110,Aalto 2005,"Ribera del Duero, Spain",Red Wine,95.0,The grapes come exclusively from old vineyards...,R,Spain,True
111,Aalto 2007,"Ribera del Duero, Spain",Red Wine,95.0,It is ironic that one of the youngest properti...,R,Spain,True
113,Aalto 2008,"Ribera del Duero, Spain",Red Wine,95.0,This wine's elegance and fruitful presence per...,R,Spain,True
121,Aalto 2016,"Ribera del Duero, Spain",Red Wine,95.0,"Deep purple red color, as corresponds to old T...",R,Spain,True
