### Import data and check the datatypes

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("Coffee Shop Sales.csv")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 149116 entries, 0 to 149115
Data columns (total 11 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   transaction_id    149116 non-null  int64  
 1   transaction_date  149116 non-null  object 
 2   transaction_time  149116 non-null  object 
 3   transaction_qty   149116 non-null  int64  
 4   store_id          149116 non-null  int64  
 5   store_location    149116 non-null  object 
 6   product_id        149116 non-null  int64  
 7   unit_price        149116 non-null  float64
 8   product_category  149116 non-null  object 
 9   product_type      149116 non-null  object 
 10  product_detail    149116 non-null  object 
dtypes: float64(1), int64(4), object(6)
memory usage: 12.5+ MB


In [3]:
df.head()

Unnamed: 0,transaction_id,transaction_date,transaction_time,transaction_qty,store_id,store_location,product_id,unit_price,product_category,product_type,product_detail
0,1,2023-01-01,07:06:11,2,5,Lower Manhattan,32,3.0,Coffee,Gourmet brewed coffee,Ethiopia Rg
1,2,2023-01-01,07:08:56,2,5,Lower Manhattan,57,3.1,Tea,Brewed Chai tea,Spicy Eye Opener Chai Lg
2,3,2023-01-01,07:14:04,2,5,Lower Manhattan,59,4.5,Drinking Chocolate,Hot chocolate,Dark chocolate Lg
3,4,2023-01-01,07:20:24,1,5,Lower Manhattan,22,2.0,Coffee,Drip coffee,Our Old Time Diner Blend Sm
4,5,2023-01-01,07:22:41,2,5,Lower Manhattan,57,3.1,Tea,Brewed Chai tea,Spicy Eye Opener Chai Lg


In [4]:
# Converts the 'transaction_date' column in the df DataFrame into a DateTime object using the pd.to_datetime() function.
df['transaction_date'] = pd.to_datetime(df['transaction_date'])

# Converts the 'transaction_time' column into a DateTime object using the specified time format ("%H:%M:%S", which corresponds to hours, minutes, and seconds).
# The .dt.time accessor is then used to extract just the time part (without the date) as datetime.time objects. This ensures that the time values are stored properly and can be used in time-related operations.
df['transaction_time'] = pd.to_datetime(df['transaction_time'], format="%H:%M:%S").dt.time

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 149116 entries, 0 to 149115
Data columns (total 11 columns):
 #   Column            Non-Null Count   Dtype         
---  ------            --------------   -----         
 0   transaction_id    149116 non-null  int64         
 1   transaction_date  149116 non-null  datetime64[ns]
 2   transaction_time  149116 non-null  object        
 3   transaction_qty   149116 non-null  int64         
 4   store_id          149116 non-null  int64         
 5   store_location    149116 non-null  object        
 6   product_id        149116 non-null  int64         
 7   unit_price        149116 non-null  float64       
 8   product_category  149116 non-null  object        
 9   product_type      149116 non-null  object        
 10  product_detail    149116 non-null  object        
dtypes: datetime64[ns](1), float64(1), int64(4), object(5)
memory usage: 12.5+ MB


### Create a Month Column

Check the dataset for the year(s) and month(s) 

In [7]:
# Get unique years
unique_years = df['transaction_date'].dt.year.unique()
unique_years

array([2023])

Since we only have one year to analyse, we only need to create the month column

In [8]:
# Get unique months
unique_months = df['transaction_date'].dt.month.unique()
unique_months

array([1, 2, 3, 4, 5, 6])

In [9]:
# Add a new month column
df['transaction_month'] = df['transaction_date'].dt.month

In [10]:
df.head(1)

Unnamed: 0,transaction_id,transaction_date,transaction_time,transaction_qty,store_id,store_location,product_id,unit_price,product_category,product_type,product_detail,transaction_month
0,1,2023-01-01,07:06:11,2,5,Lower Manhattan,32,3.0,Coffee,Gourmet brewed coffee,Ethiopia Rg,1


### Export new dataframe

In [11]:
df.to_csv("clean_coffee_shop_sales.csv")