In [None]:
# 1 Customer Personality Analysis


In [6]:
#✅ Step 3.1: Dataset Load Karna & Column Check Karna

import pandas as pd
from google.colab import files

# CSV file upload karo
uploaded = files.upload()

# File ka naam daalo jo aapne upload kiya
df = pd.read_csv('marketing_campaign.csv', sep='\t')  # Note: sep='\t' because it's tab-separated

# Pehle 5 rows dekho
print(df.head())

# Column names check karo
print(df.columns)


Saving marketing_campaign.csv to marketing_campaign (2).csv
     ID  Year_Birth   Education Marital_Status   Income  Kidhome  Teenhome  \
0  5524        1957  Graduation         Single  58138.0        0         0   
1  2174        1954  Graduation         Single  46344.0        1         1   
2  4141        1965  Graduation       Together  71613.0        0         0   
3  6182        1984  Graduation       Together  26646.0        1         0   
4  5324        1981         PhD        Married  58293.0        1         0   

  Dt_Customer  Recency  MntWines  ...  NumWebVisitsMonth  AcceptedCmp3  \
0  04-09-2012       58       635  ...                  7             0   
1  08-03-2014       38        11  ...                  5             0   
2  21-08-2013       26       426  ...                  4             0   
3  10-02-2014       26        11  ...                  6             0   
4  19-01-2014       94       173  ...                  5             0   

   AcceptedCmp4  AcceptedC

In [7]:
#✅ Step 3.2: Missing Values Handle Karna
# Har column me missing values count karo
print(df.isnull().sum())


ID                      0
Year_Birth              0
Education               0
Marital_Status          0
Income                 24
Kidhome                 0
Teenhome                0
Dt_Customer             0
Recency                 0
MntWines                0
MntFruits               0
MntMeatProducts         0
MntFishProducts         0
MntSweetProducts        0
MntGoldProds            0
NumDealsPurchases       0
NumWebPurchases         0
NumCatalogPurchases     0
NumStorePurchases       0
NumWebVisitsMonth       0
AcceptedCmp3            0
AcceptedCmp4            0
AcceptedCmp5            0
AcceptedCmp1            0
AcceptedCmp2            0
Complain                0
Z_CostContact           0
Z_Revenue               0
Response                0
dtype: int64


In [None]:
#Missing Values Ka Treatment


In [8]:
#➤ Option A: Remove rows with missing values
df = df.dropna()


In [9]:
#➤ Option B: Fill missing values
if 'Income' in df.columns:
    df['Income'] = df['Income'].fillna(df['Income'].mean())


In [10]:
print(df.isnull().sum())


ID                     0
Year_Birth             0
Education              0
Marital_Status         0
Income                 0
Kidhome                0
Teenhome               0
Dt_Customer            0
Recency                0
MntWines               0
MntFruits              0
MntMeatProducts        0
MntFishProducts        0
MntSweetProducts       0
MntGoldProds           0
NumDealsPurchases      0
NumWebPurchases        0
NumCatalogPurchases    0
NumStorePurchases      0
NumWebVisitsMonth      0
AcceptedCmp3           0
AcceptedCmp4           0
AcceptedCmp5           0
AcceptedCmp1           0
AcceptedCmp2           0
Complain               0
Z_CostContact          0
Z_Revenue              0
Response               0
dtype: int64


In [None]:
#✅ Step 3.3: Duplicate Rows Ko Check Aur Remove Karna


In [11]:
#🔎 Step 1: Check for Duplicate Rows
# Duplicate rows kitne hain
duplicate_count = df.duplicated().sum()
print(f"Duplicate rows found: {duplicate_count}")


Duplicate rows found: 0


In [12]:
#🧹 Step 2: Remove Duplicate Rows
# Duplicate rows hatao
df = df.drop_duplicates()


In [13]:
#🔁 Optional: Re-check after removing
print(f"Duplicate rows after cleaning: {df.duplicated().sum()}")


Duplicate rows after cleaning: 0


In [14]:
#✅ Step 3.4: Column Names Cleaning
# Column names ko clean karo: lowercase + underscores + strip
df.columns = df.columns.str.strip().str.lower().str.replace(" ", "_")


In [15]:
print(df.columns)


Index(['id', 'year_birth', 'education', 'marital_status', 'income', 'kidhome',
       'teenhome', 'dt_customer', 'recency', 'mntwines', 'mntfruits',
       'mntmeatproducts', 'mntfishproducts', 'mntsweetproducts',
       'mntgoldprods', 'numdealspurchases', 'numwebpurchases',
       'numcatalogpurchases', 'numstorepurchases', 'numwebvisitsmonth',
       'acceptedcmp3', 'acceptedcmp4', 'acceptedcmp5', 'acceptedcmp1',
       'acceptedcmp2', 'complain', 'z_costcontact', 'z_revenue', 'response'],
      dtype='object')


In [None]:
#✅ Step 3.5: Date Column Ko Fix Karna (Datetime Format)


In [16]:
#🛠 Step 1: Convert Date Format
# 'dt_customer' ko datetime format me convert karo
df['dt_customer'] = pd.to_datetime(df['dt_customer'], errors='coerce')


In [17]:
#🔍 Step 2: Check Conversion Successful Hai Ya Nahi
print(df['dt_customer'].dtypes)
print(df['dt_customer'].head())


datetime64[ns]
0   2012-04-09
1   2014-08-03
2          NaT
3   2014-10-02
4          NaT
Name: dt_customer, dtype: datetime64[ns]


In [None]:
#✅ Step 3.6: Data Types Fixing

In [18]:
#🔍 Step 1: Dekho Abhi Ke Data Types
print(df.dtypes)


id                              int64
year_birth                      int64
education                      object
marital_status                 object
income                        float64
kidhome                         int64
teenhome                        int64
dt_customer            datetime64[ns]
recency                         int64
mntwines                        int64
mntfruits                       int64
mntmeatproducts                 int64
mntfishproducts                 int64
mntsweetproducts                int64
mntgoldprods                    int64
numdealspurchases               int64
numwebpurchases                 int64
numcatalogpurchases             int64
numstorepurchases               int64
numwebvisitsmonth               int64
acceptedcmp3                    int64
acceptedcmp4                    int64
acceptedcmp5                    int64
acceptedcmp1                    int64
acceptedcmp2                    int64
complain                        int64
z_costcontac

In [20]:
#🛠 Step 2: Data Type Fix Karna
# year_birth should be integer
df['year_birth'] = df['year_birth'].astype(int)

# income should be float
df['income'] = df['income'].astype(float)

# education and marital_status should be category type
df['education'] = df['education'].astype('category')
df['marital_status'] = df['marital_status'].astype('category')

# complain and response should be boolean (optional)
df['complain'] = df['complain'].astype(bool)
df['response'] = df['response'].astype(bool)


In [21]:
#✅ Verify Final Data Types:
print(df.dtypes)


id                              int64
year_birth                      int64
education                    category
marital_status               category
income                        float64
kidhome                         int64
teenhome                        int64
dt_customer            datetime64[ns]
recency                         int64
mntwines                        int64
mntfruits                       int64
mntmeatproducts                 int64
mntfishproducts                 int64
mntsweetproducts                int64
mntgoldprods                    int64
numdealspurchases               int64
numwebpurchases                 int64
numcatalogpurchases             int64
numstorepurchases               int64
numwebvisitsmonth               int64
acceptedcmp3                    int64
acceptedcmp4                    int64
acceptedcmp5                    int64
acceptedcmp1                    int64
acceptedcmp2                    int64
complain                         bool
z_costcontac

In [None]:
#✅ Step 4: Save and Download Cleaned Dataset
#Now that your data is fully cleaned, you need to:

#Save the cleaned data to a CSV file

#Download that file from Colab to your computer



In [22]:
#📝 Step 1: Save as CSV in Colab
df.to_csv('cleaned_marketing_campaign.csv', index=False)


In [23]:
#⬇️ Step 2: Download the File
from google.colab import files
files.download('cleaned_marketing_campaign.csv')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>