# Data anonymization using the concept of masking

In [1]:
# Importing packages
import pandas as pd
from faker import Faker

In [2]:
# Creating Faker instance
fake = Faker()

In [3]:
# Read the CSV file
df = pd.read_csv('users.csv')

In [4]:
# Display original dataframe
df

Unnamed: 0,first_name,last_name,gender,email,city,phone,job_title,university,linkedin_skill,ip_address
0,Priscella,Pollett,Agender,ppollett0@foxnews.com,Sesheke,222-398-8279,Internal Auditor,University of Zambia,Lifestyle,129.60.144.253
1,Ana,Onge,Male,aonge1@fda.gov,Querecotillo,650-786-5808,Compensation Analyst,Universidad Nacional de San Antonio Abad,Healthcare Industry,100.2.67.41
2,Butch,Rawls,Male,brawls2@slideshare.net,Licupis,325-412-0143,Health Coach II,Universidad Nacional de San Martín,Visual SVN,48.161.197.47
3,Ambros,Fairrie,Male,afairrie3@reuters.com,Yonghe,664-380-9221,General Manager,Fuzhou University,Nanofabrication,36.188.226.116
4,Gaylene,Titcomb,Female,gtitcomb4@dyndns.org,Wang Yang,477-429-2626,Internal Auditor,Mahachulalongkorn Buddhist University,AQL,181.24.84.57
5,Cleveland,Lindegard,Female,clindegard5@ezinearticles.com,Tarauacá,179-490-8666,Mechanical Systems Engineer,Universidade Vale do Rio Doce,Occupational Therapists,221.193.207.249
6,Fredric,Gascard,Female,fgascard6@bbb.org,Filabusi,669-932-9006,Geologist II,Bindura University of Science Education,Eagle PCB,114.190.66.122
7,Afton,Prandi,Male,aprandi7@feedburner.com,Pak Phanang,735-377-4981,Electrical Engineer,Lampang College of Commerce and Technology,Yeast two-hybrid,161.3.154.93
8,Yehudit,Fleisch,Male,yfleisch8@php.net,Palopat,599-268-1866,Nurse Practicioner,Universitas Islam Sultan Agung,Business Journalism,237.7.164.62
9,Loretta,Daybell,Male,ldaybell9@google.es,Hot,743-857-4116,Help Desk Operator,"University of Korca ""Fan Noli""",Geomatics,15.186.211.54


In [5]:
# Defining methods for generating fake data
def mask_first_name(email):
    return fake.first_name()

def mask_last_name(email):
    return fake.last_name()

def mask_email(email):
    return fake.email()

def mask_ip_address(email):
    return fake.ipv4()

In [6]:
# Masking name, email, address
df['faked_first_name'] = df['first_name'].apply(mask_first_name)

df['faked_last_name'] = df['last_name'].apply(mask_last_name)

df['faked_email'] = df['email'].apply(mask_email)

df['faked_ip_address'] = df['ip_address'].apply(mask_ip_address)

In [7]:
# Selecting original and faked column one after other
selected_columns = ['first_name', 'faked_first_name', 'last_name','faked_last_name', 'email','faked_email', 'ip_address','faked_ip_address']

In [8]:
# Creating new dataframe
newdf = df[selected_columns]

In [9]:
# Display new dataframe
newdf

Unnamed: 0,first_name,faked_first_name,last_name,faked_last_name,email,faked_email,ip_address,faked_ip_address
0,Priscella,Bryce,Pollett,Hart,ppollett0@foxnews.com,randy12@hotmail.com,129.60.144.253,192.30.38.89
1,Ana,Rachel,Onge,Mccarthy,aonge1@fda.gov,johnwise@gmail.com,100.2.67.41,192.0.43.198
2,Butch,Kathy,Rawls,Wilson,brawls2@slideshare.net,ewilliams@crawford-reese.com,48.161.197.47,198.38.12.194
3,Ambros,Jessica,Fairrie,Barnett,afairrie3@reuters.com,yallen@bell.biz,36.188.226.116,192.31.192.159
4,Gaylene,Joshua,Titcomb,Parrish,gtitcomb4@dyndns.org,patricia18@gmail.com,181.24.84.57,192.62.187.146
5,Cleveland,Ashley,Lindegard,Thompson,clindegard5@ezinearticles.com,rebecca82@foster.net,221.193.207.249,169.247.244.15
6,Fredric,Colleen,Gascard,Johnston,fgascard6@bbb.org,yreyes@lee.biz,114.190.66.122,168.227.44.75
7,Afton,Sheri,Prandi,Ruiz,aprandi7@feedburner.com,vpetty@yahoo.com,161.3.154.93,192.57.24.66
8,Yehudit,Douglas,Fleisch,Long,yfleisch8@php.net,glovermichael@tanner-bradshaw.net,237.7.164.62,192.164.14.67
9,Loretta,Brian,Daybell,Perry,ldaybell9@google.es,xanderson@yahoo.com,15.186.211.54,192.175.50.159
