In [54]:
import pandas as pd
import numpy as np

In [55]:
from sklearn.preprocessing import OneHotEncoder

In [56]:
data = pd.read_csv("../data/test.csv")
data.head()

Unnamed: 0,id,Name,Gender,Age,City,Working Professional or Student,Profession,Academic Pressure,Work Pressure,CGPA,Study Satisfaction,Job Satisfaction,Sleep Duration,Dietary Habits,Degree,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Family History of Mental Illness
0,140700,Shivam,Male,53.0,Visakhapatnam,Working Professional,Judge,,2.0,,,5.0,Less than 5 hours,Moderate,LLB,No,9.0,3.0,Yes
1,140701,Sanya,Female,58.0,Kolkata,Working Professional,Educational Consultant,,2.0,,,4.0,Less than 5 hours,Moderate,B.Ed,No,6.0,4.0,No
2,140702,Yash,Male,53.0,Jaipur,Working Professional,Teacher,,4.0,,,1.0,7-8 hours,Moderate,B.Arch,Yes,12.0,4.0,No
3,140703,Nalini,Female,23.0,Rajkot,Student,,5.0,,6.84,1.0,,More than 8 hours,Moderate,BSc,Yes,10.0,4.0,No
4,140704,Shaurya,Male,47.0,Kalyan,Working Professional,Teacher,,5.0,,,5.0,7-8 hours,Moderate,BCA,Yes,3.0,4.0,No


In [57]:
data.shape

(93800, 19)

In [58]:
data = data.drop(["id", "Name"], axis=1)

In [59]:
def return_value_counts(col):
    print("Count of unique values in " + col + " column are:\n", data[col].value_counts())

def return_null_values(col):
    print(str("Null values in " + col + " column are ="), data[col].isnull().sum())

In [60]:
one_hot_encoder = OneHotEncoder(sparse_output=False)

# Fit and transform the Gender column
gender_encoded = one_hot_encoder.fit_transform(data[['Gender']])
df_encoded = pd.DataFrame(
    gender_encoded, columns=one_hot_encoder.get_feature_names_out(["Gender"])
)
data = pd.concat([data, df_encoded], axis=1)
data = data.drop(["Gender"], axis=1)

In [61]:
data.isnull().sum()

Age                                          0
City                                         0
Working Professional or Student              0
Profession                               24632
Academic Pressure                        75033
Work Pressure                            18778
CGPA                                     75034
Study Satisfaction                       75033
Job Satisfaction                         18774
Sleep Duration                               0
Dietary Habits                               5
Degree                                       2
Have you ever had suicidal thoughts ?        0
Work/Study Hours                             0
Financial Stress                             0
Family History of Mental Illness             0
Gender_Female                                0
Gender_Male                                  0
dtype: int64

In [62]:
def cat_age(age):
    if age >= 18 and age <= 26.4:
        return 0
    elif age > 26.4 and age <= 34.8:
        return 1
    elif age > 34.8 and age <= 43.2:
        return 2
    elif age > 43.2 and age <= 51.6:
        return 3
    return 4

data["Age"] = data["Age"].apply(cat_age)

In [63]:
data["Student"] = data[
    "Working Professional or Student"
].map({"Working Professional": 0, "Student": 1})

In [64]:
data.drop("Working Professional or Student", axis=1, inplace=True)

In [65]:
data["Work Pressure"].fillna(data["Academic Pressure"], inplace=True)

In [66]:
data["Work Pressure"].fillna(data["Work Pressure"].median(), inplace=True)

In [67]:
data = data.drop(["Academic Pressure"], axis=1)

In [68]:
data["Job Satisfaction"].fillna(data["Study Satisfaction"], inplace=True)
median_satisfaction = data["Job Satisfaction"].median()
data["Job Satisfaction"].fillna(median_satisfaction, inplace=True)
data = data.drop(["Study Satisfaction"], axis=1)

In [69]:
data["CGPA"].fillna(-1, inplace=True)
data["CGPA"] = np.round(data["CGPA"])

In [70]:
suicidal_thoughts = {"No": 0, "Yes": 1}
data["suicidal_thoughts"] = data["Have you ever had suicidal thoughts ?"].map(
    suicidal_thoughts
)
data.drop(["Have you ever had suicidal thoughts ?"], axis=1, inplace=True)

In [71]:
family_history = {"No": 0, "Yes": 1}
data["family_history"] = data["Family History of Mental Illness"].map(
    family_history
)
data.drop(["Family History of Mental Illness"], axis=1, inplace=True)

In [72]:
return_value_counts("Financial Stress")

Count of unique values in Financial Stress column are:
 Financial Stress
2.0    21151
5.0    18694
4.0    18453
1.0    18341
3.0    17161
Name: count, dtype: int64


In [73]:
return_value_counts("Work/Study Hours")

Count of unique values in Work/Study Hours column are:
 Work/Study Hours
10.0    9450
11.0    8555
9.0     8315
0.0     8132
12.0    7657
2.0     7083
6.0     7029
7.0     6646
1.0     6525
3.0     6261
5.0     6118
4.0     6079
8.0     5950
Name: count, dtype: int64


Work on city, profession, sleep, dietary, degree

In [74]:
data["City"].unique()

array(['Visakhapatnam', 'Kolkata', 'Jaipur', 'Rajkot', 'Kalyan', 'Mumbai',
       'Surat', 'Srinagar', 'Delhi', 'Lucknow', 'Thane', 'Meerut',
       'Nagpur', 'Ghaziabad', 'Chennai', 'Varanasi', 'Indore', 'Pune',
       'Hyderabad', 'Kanpur', 'Nashik', 'Bhopal', 'Faridabad',
       'Bangalore', 'Vasai-Virar', 'Ludhiana', 'Patna', 'Vadodara',
       'Ahmedabad', 'Agra', 'Malyan', 'Pratyush', 'Vidya',
       'Less than 5 hours', 'Aditi', 'Keshav', 'Nalini', 'Mhopal', 'Avni',
       'Ira', 'Vaishnavi', 'Bhavna', 'Lawyer', 'Thani', 'Hrithik', 'City',
       'Unaly', 'Is Kanpur', 'Golkata', 'Less Delhi', 'Sara', 'Saurav',
       'Vikram', 'Parth', 'Siddhesh', 'Vaikot', 'Leela', 'Chemist',
       'San Vasai-Virar', 'No', 'More Delhi', 'Saanvi', 'Pratham',
       'Vidhi', 'Abhinav', 'Rolkata', 'Ghopal', 'No.12'], dtype=object)

In [75]:
data.loc[data["City"] == "Malyan", "City"] = "Kalyan"
data.loc[data["City"] == "Mhopal", "City"] = "Bhopal"
data.loc[data["City"] == "Thani", "City"] = "Thane"
data.loc[data["City"] == "Is Kanpur", "City"] = "Kanpur"
data.loc[data["City"] == "Golkata", "City"] = "Kolkata"
data.loc[data["City"] == "Less Delhi", "City"] = "Delhi"
data.loc[data["City"] == "San Vasai-Virar", "City"] = "Vasai-Virar"
data.loc[data["City"] == "More Delhi", "City"] = "Delhi"
data.loc[data["City"] == "Rolkata", "City"] = "Kolkata"
data.loc[data["City"] == "Ghopal", "City"] = "Bhopal"

others = [
    "Pratyush",
    "Vidya",
    "Less than 5 hours",
    "Aditi",
    "Keshav",
    "Nalini",
    "Avni",
    "Ira",
    "Vaishnavi",
    "Bhavna",
    "Lawyer",
    "Hrithik",
    "City",
    "Unaly",
    "Sara",
    "Saurav",
    "Vikram",
    "Parth",
    "Siddhesh",
    "Vaikot",
    "Leela",
    "Chemist",
    "No",
    "Saanvi",
    "Pratham",
    "Vidhi",
    "Abhinav",
    "No.12",
]
for name in others:
    data.loc[data["City"] == name, "City"] = "Others"

In [76]:
data["City"].unique()

array(['Visakhapatnam', 'Kolkata', 'Jaipur', 'Rajkot', 'Kalyan', 'Mumbai',
       'Surat', 'Srinagar', 'Delhi', 'Lucknow', 'Thane', 'Meerut',
       'Nagpur', 'Ghaziabad', 'Chennai', 'Varanasi', 'Indore', 'Pune',
       'Hyderabad', 'Kanpur', 'Nashik', 'Bhopal', 'Faridabad',
       'Bangalore', 'Vasai-Virar', 'Ludhiana', 'Patna', 'Vadodara',
       'Ahmedabad', 'Agra', 'Others'], dtype=object)

In [77]:
train = pd.read_csv("../data/train.csv")

In [78]:
names_to_change = [
    "Vidhi",
    "Ayush",
    "Krishna",
    "Aishwarya",
    "Keshav",
    "Harsha",
    "Nalini",
    "Aditya",
    "Malyansh",
    "Raghavendra",
    "Saanvi",
    "M.Tech",
    "Bhavna",
    "Nandini",
    "M.Com",
    "Plata",
    "Atharv",
    "Pratyush",
    "City",
    "3.0",
    "MCA",
    "Mira",
    "Moreadhyay",
    "Morena",
    "Ishkarsh",
    "Kashk",
    "Mihir",
    "Vidya",
    "Anvi",
    "Krinda",
    "Ayansh",
    "Shrey",
    "Ivaan",
    "Vaanya",
    "Gaurav",
    "Harsh",
    "Reyansh",
    "Kashish",
    "Kibara",
    "Vaishnavi",
    "Chhavi",
    "Parth",
    "Mahi",
    "Tushar",
    "MSc",
    "No",
    "Rashi",
    "ME",
    "Researcher",
    "Kagan",
    "Armaan",
    "Ithal",
    "Nalyan",
    "Dhruv",
    "Galesabad",
    "Itheg",
    "Aaradhya",
    "Pooja",
    "Khushi",
    "Jhanvi",
    "Unirar",
]
wrong_spelling = ["Less Delhi", "Less than 5 Kalyan", "Tolkata", "Molkata", "Khaziabad"]


def name_to_others(lst):
    for name in lst:
        train.loc[train["City"] == name, "City"] = "Others"


name_to_others(names_to_change)
for name in wrong_spelling:
    if name == "Less Delhi":
        train.loc[train["City"] == name, "City"] = "Delhi"
    elif name == "Less than 5 Kalyan":
        train.loc[train["City"] == name, "City"] = "Kalyan"
    elif name == "Tolkata" or name == "Molkata":
        train.loc[train["City"] == name, "City"] = "Kolkata"
    else:
        train.loc[train["City"] == name, "City"] = "Ghaziabad"
train.loc[train["City"] == "Ishanabad", "City"] = "Others"
train.loc[train["City"] == "Gurgaon", "City"] = "Others"
city_mean_encoded = train.groupby("City")["Depression"].mean()
city_mean_encoded

City
Agra             0.192357
Ahmedabad        0.217887
Bangalore        0.175600
Bhopal           0.232518
Chennai          0.194362
Delhi            0.191708
Faridabad        0.146879
Ghaziabad        0.195250
Hyderabad        0.275133
Indore           0.160920
Jaipur           0.181377
Kalyan           0.199636
Kanpur           0.125739
Kolkata          0.173577
Lucknow          0.214019
Ludhiana         0.192690
Meerut           0.134045
Mumbai           0.131293
Nagpur           0.144928
Nashik           0.158715
Others           0.195652
Patna            0.163234
Pune             0.161036
Rajkot           0.173228
Srinagar         0.211667
Surat            0.201898
Thane            0.242248
Vadodara         0.167688
Varanasi         0.142206
Vasai-Virar      0.197918
Visakhapatnam    0.159196
Name: Depression, dtype: float64

In [79]:
data["City_encoded"] = data["City"].map(city_mean_encoded)

In [84]:
data.drop(["City"], axis=1, inplace=True)

In [86]:
return_value_counts("Profession")

Count of unique values in Profession column are:
 Profession
Teacher           16385
Content Writer     5187
Architect          2982
Consultant         2920
Pharmacist         2656
                  ...  
Manvi                 1
24th                  1
ME                    1
3M                    1
M.Pharm               1
Name: count, Length: 64, dtype: int64


In [88]:
data[data["Profession"].isnull()][["Student"]].value_counts()

Student
1          18746
0           5886
Name: count, dtype: int64

In [89]:
data.loc[
    (data["Profession"].isnull()) & (data["Student"] == 1),
    "Profession",
] = "Student"
data.loc[data["Profession"].isnull(), "Profession"] = "Unemployed"

In [90]:
data["Profession"].unique()

array(['Judge', 'Educational Consultant', 'Teacher', 'Student',
       'Customer Support', 'Unemployed', 'Chemist', 'Content Writer',
       'Consultant', 'HR Manager', 'Research Analyst', 'Digital Marketer',
       'Electrician', 'Marketing Manager', 'Plumber', 'Pharmacist',
       'Lawyer', 'Pilot', 'Architect', 'Chef', 'Graphic Designer',
       'Entrepreneur', 'Manager', 'Mechanical Engineer',
       'Software Engineer', 'Travel Consultant', 'Finanancial Analyst',
       'Financial Analyst', 'Doctor', 'Business Analyst',
       'UX/UI Designer', 'Sales Executive', 'Data Scientist',
       'Accountant', 'Researcher', 'Civil Engineer', 'Investment Banker',
       'Unhealthy', 'B.Ed', 'Working Professional', '3M', 'ME', 'B.Pharm',
       '24th', 'Manvi', 'Yogesh', 'Samar', 'Surat', 'PhD', 'M.Ed', 'MD',
       'Name', 'MCA', 'Simran', 'Analyst', 'Profession', 'BBA', 'M.Tech',
       'LLM', 'Surgeon', 'No', 'Unveil', 'City Consultant', 'M.Pharm'],
      dtype=object)

In [91]:
others = [
    "Unhealthy",
    "B.Ed",
    "Working Professional",
    "3M",
    "ME",
    "B.Pharm",
    "24th",
    "Manvi",
    "Yogesh",
    "Samar",
    "Surat",
    "PhD",
    "M.Ed",
    "MD",
    "Name",
    "MCA",
    "Simran",
    "Profession",
    "BBA",
    "M.Tech",
    "LLM",
    "No",
    "Unveil",
    "City Consultant",
    "M.Pharm",
]
data.loc[data["Profession"].isin(others), "Profession"] = "Others"
data.loc[data["Profession"] == "Analyst", "Profession"] = "Business Analyst"
data.loc[data["Profession"] == "Surgeon", "Profession"] = "Doctor"

In [92]:
data["Profession"].unique()

array(['Judge', 'Educational Consultant', 'Teacher', 'Student',
       'Customer Support', 'Unemployed', 'Chemist', 'Content Writer',
       'Consultant', 'HR Manager', 'Research Analyst', 'Digital Marketer',
       'Electrician', 'Marketing Manager', 'Plumber', 'Pharmacist',
       'Lawyer', 'Pilot', 'Architect', 'Chef', 'Graphic Designer',
       'Entrepreneur', 'Manager', 'Mechanical Engineer',
       'Software Engineer', 'Travel Consultant', 'Finanancial Analyst',
       'Financial Analyst', 'Doctor', 'Business Analyst',
       'UX/UI Designer', 'Sales Executive', 'Data Scientist',
       'Accountant', 'Researcher', 'Civil Engineer', 'Investment Banker',
       'Others'], dtype=object)

In [94]:
train["Student"] = train["Working Professional or Student"].map(
    {"Working Professional": 0, "Student": 1}
)
train.loc[
    (train["Profession"].isnull()) & (train["Student"] == 1),
    "Profession",
] = "Student"
train.loc[train["Profession"].isnull(), "Profession"] = "Unemployed"
train.loc[train["Profession"] == "Finanancial Analyst", "Profession"] = (
    "Financial Analyst"
)
train.loc[train["Profession"] == "Dev", "Profession"] = "Software Engineer"
train.loc[train["Profession"] == "City Manager", "Profession"] = "Manager"
train.loc[train["Profession"] == "Analyst", "Profession"] = "Business Analyst"
train.loc[train["Profession"] == "Medical Doctor", "Profession"] = "Doctor"
profession_to_change = [
    "B.Com",
    "BE",
    "Yogesh",
    "MBA",
    "LLM",
    "BCA",
    "Academic",
    "Profession",
    "FamilyVirar",
    "BBA",
    "Working Professional",
    "MBBS",
    "Patna",
    "Unveil",
    "B.Ed",
    "Nagpur",
    "Moderate",
    "M.Ed",
    "Pranav",
    "Visakhapatnam",
    "PhD",
    "Yuvraj",
]
train.loc[train["Profession"].isin(profession_to_change), "Profession"] = "Others"
train.loc[train["Profession"] == "Family Consultant", "Profession"] = "Others"
profession_mean_encoded = train.groupby("Profession")["Depression"].mean()
profession_mean_encoded

Profession
Accountant                0.064227
Architect                 0.099085
Business Analyst          0.056610
Chef                      0.048567
Chemist                   0.028311
Civil Engineer            0.080952
Consultant                0.047056
Content Writer            0.018684
Customer Support          0.045255
Data Scientist            0.077824
Digital Marketer          0.045190
Doctor                    0.052826
Educational Consultant    0.074684
Electrician               0.041719
Entrepreneur              0.020889
Financial Analyst         0.064072
Graphic Designer          0.185727
HR Manager                0.106912
Investment Banker         0.071247
Judge                     0.108645
Lawyer                    0.075045
Manager                   0.081703
Marketing Manager         0.050607
Mechanical Engineer       0.101105
Others                    0.083333
Pharmacist                0.026715
Pilot                     0.051228
Plumber                   0.064073
Research 

In [95]:
data["Profession_encoded"] = data["Profession"].map(profession_mean_encoded)
data.drop(["Profession"], axis=1, inplace=True)

In [97]:
data["Profession_encoded"].fillna(data["Profession_encoded"].median(), inplace=True)

In [100]:
data["Sleep Duration"].unique()

array(['Less than 5 hours', '7-8 hours', 'More than 8 hours', '5-6 hours',
       '0', 'Meerut', '9-5 hours', '6-7 hours', '60-65 hours', 'Vivan',
       '3-4 hours', '1-6 hours', '9-5', 'Unhealthy', '8-9 hours',
       '4-5 hours', 'than 5 hours', '9-6 hours', '1-2 hours',
       '8-89 hours', 'Have_you_ever_had_suicidal_thoughts', '20-21 hours',
       '10-6 hours', '1-3 hours', '6 hours', '50-75 hours', '4-6 hours',
       '2-3 hours', '9-11 hours', '9-10 hours', '3-6 hours'], dtype=object)

In [101]:
sleep_duration = {
    "Less than 5 hours": 5,
    "7-8 hours": 8,
    "More than 8 hours": 8,
    "60-65 hours": 8,
    "1-6 hours": 5,
    "5-6 hours": 6,
    "9-5": 7,
    "9-5 hours": 7,
    "6-7 hours": 7,
    "3-4 hours": 5,
    "8-9 hours": 8,
    "4-5 hours": 5,
    "9-6 hours": 8,
    "1-2 hours": 5,
    "8-89 hours": 8,
    "6-8 hours": 7,
    "4-6 hours": 5,
    "than 5 hours": 5,
    "20-21 hours": 5,
    "10-6 hours": 8,
    "1-3 hours": 5,
    "6 hours": 6,
    "50-75 hours": 8,
    "2-3 hours": 5,
    "9-11 hours": 8,
    "9-10 hours": 8,
    "3-6 hours": 5,
}
data["Sleep Duration"] = data["Sleep Duration"].map(sleep_duration)

In [103]:
data["Sleep Duration"].fillna(8, inplace=True)

In [106]:
data["Dietary Habits"].unique()

array(['Moderate', 'Healthy', 'Unhealthy', 'More Healthy', 'No', 'Indoor',
       'Prachi', nan, 'Male', 'Less Healthy', 'Mealy', 'Resistant', 'MCA',
       '5 Healthy', 'Academic', 'Educational', 'Soham', '5 Unhealthy',
       'Vivaan', 'Raghav', '1.0', 'Naina', 'Kolkata'], dtype=object)

In [107]:
dietary_habits = {
    "Unhealthy": 2,
    "Moderate": 1,
    "Healthy": 0,
    "More Healthy": 0,
    "No Healthy": 2,
    "Less Healthy": 2,
    "Less than Healthy": 1,
    "No": 2,
    "5 Healthy": 0,
    "5 Unhealthy": 2,
}
data["Dietary Habits"] = data["Dietary Habits"].map(dietary_habits)

In [108]:
data["Dietary Habits"].fillna(1, inplace=True)

In [111]:
data["Degree"].unique()

array(['LLB', 'B.Ed', 'B.Arch', 'BSc', 'BCA', 'B.Com', 'MA', 'BA', 'BBA',
       'Class 12', 'MD', 'MBA', 'M.Ed', 'M.Pharm', 'BHM', 'LLM', 'PhD',
       'M.Com', 'BE', 'MBBS', 'B.Tech', 'ME', 'MCA', 'B.Pharm', 'MHM',
       'M.Tech', 'BTech', 'MSc', 'BArch', 'B. Gender', 'B.Study_Hours',
       'Advait', 'M.Arch', 'A.Ed', 'Mechanical Engineer', 'B.H', 'B.Sc',
       'B', 'M.UI', 'Vibha', 'B BCA', 'B.Press', 'BPharm', 'Gagan',
       'MPharm', 'Travel Consultant', '5.65', 'Business Analyst',
       'Eshita', 'B_Com', 'Navya', 'B._Pharm', 'Pune', 'Bian', 'B.M.Com',
       'Kavya', 'M.M.Ed', 'S.Pharm', 'Vrinda', 'M', 'E.Ed', '3.0',
       'Moham', 'B.BA', nan, 'I.Ed', 'Degree', 'Magan', 'B B.Tech',
       'M.B.Ed', 'Bhopal', 'B Financial Analyst', 'GCA', 'G.Ed', 'Rupak',
       'RCA', 'B.CA', 'PCA', 'J.Ed', 'BH', 'BEd', '8.95', 'Aadhya', '20',
       'Banchal', 'M.', 'K.Ed', 'BHCA'], dtype=object)

In [112]:
bachelors = data[data["Degree"].str.startswith("B", na=False)]["Degree"].unique()
masters = data[data["Degree"].str.startswith("M", na=False)]["Degree"].unique()
data["Degree"] = data["Degree"].replace(bachelors, "bachelors")
data["Degree"] = data["Degree"].replace(masters, "masters")

In [113]:
data["Degree"].unique()

array(['LLB', 'bachelors', 'masters', 'Class 12', 'LLM', 'PhD', 'Advait',
       'A.Ed', 'Vibha', 'Gagan', 'Travel Consultant', '5.65', 'Eshita',
       'Navya', 'Pune', 'Kavya', 'S.Pharm', 'Vrinda', 'E.Ed', '3.0', nan,
       'I.Ed', 'Degree', 'GCA', 'G.Ed', 'Rupak', 'RCA', 'PCA', 'J.Ed',
       '8.95', 'Aadhya', '20', 'K.Ed'], dtype=object)

In [114]:
degrees = {
    "bachelors": "bachelors",
    "masters": "masters",
    "PhD": "PhD",
    "LLB": "bachelors",
    "Class 12": "high_school",
    "LLM": "masters",
    "A.Ed": "bachelors",
    "S.Pharm": "bachelors",
    "I.Ed": "bachelors",
    "GCA": "bachelors",
    "G.Ed": "bachelors",
    "RCA": "bachelors",
    "PCA": "bachelors",
    "J.Ed": "bachelors",
    "K.Ed": "bachelors",
}
data["Degree"] = data["Degree"].map(degrees)

In [116]:
data["Degree"].fillna("Others", inplace=True)

In [117]:
degree_map = {"others": 0, "high_school": 1, "bachelors": 2, "masters": 3, "PhD": 4}
data["Degree"] = data["Degree"].map(degree_map)

In [120]:
data["Degree"].fillna(1, inplace=True)

In [121]:
data.head(2)

Unnamed: 0,Age,Work Pressure,CGPA,Job Satisfaction,Sleep Duration,Dietary Habits,Degree,Work/Study Hours,Financial Stress,Gender_Female,Gender_Male,Student,suicidal_thoughts,family_history,City_encoded,Profession_encoded
0,4,2.0,-1.0,5.0,5.0,1.0,2.0,9.0,3.0,0.0,1.0,0,0,1,0.159196,0.108645
1,4,2.0,-1.0,4.0,5.0,1.0,2.0,6.0,4.0,1.0,0.0,0,0,0,0.173577,0.074684


In [122]:
data.isnull().sum()

Age                   0
Work Pressure         0
CGPA                  0
Job Satisfaction      0
Sleep Duration        0
Dietary Habits        0
Degree                0
Work/Study Hours      0
Financial Stress      0
Gender_Female         0
Gender_Male           0
Student               0
suicidal_thoughts     0
family_history        0
City_encoded          0
Profession_encoded    0
dtype: int64

In [123]:
data.to_csv("../data/test_cleaned_v1.csv", index=False)