# Hướng dẫn Python Pandas (Phần 5): Cập nhật hàng và cột - Sửa đổi dữ liệu trong DataFrames

# Part 1

##### Khai báo thư viện

In [1]:
import pandas as pd

##### Tạo từ điển people

In [2]:
people = {
    "first" : ["Corey", "Jane", "John"],
    "last" : ["Schafer", "Doe", "Doe"], 
    "email" : ["CoreyMSchafer@gmail.com", "JaneDoe@gmail.com", "JohnDoe@gmail.com"]
}

##### Tạo dataframe từ từ điển people

In [3]:
df = pd.DataFrame(people)

##### Hiển thị dataframe vừa tạo

In [4]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@gmail.com
2,John,Doe,JohnDoe@gmail.com


##### In ra mảng các cột trong dữ liệu

In [5]:
df.columns.values

array(['first', 'last', 'email'], dtype=object)

##### Thay thế tên cột

In [6]:
df.columns = ['first name', 'last name', 'email']

##### Cột của dữ liệu sau khi thay thế

In [7]:
df.columns

Index(['first name', 'last name', 'email'], dtype='object')

##### Hiển thị dữ liệu sau khi thay thế tên cột

In [8]:
df

Unnamed: 0,first name,last name,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@gmail.com
2,John,Doe,JohnDoe@gmail.com


##### In hoa tên cột

In [9]:
df.columns = [x.upper() for x in df.columns]

##### Hiển thị dữ liệu sau khi in hoa tên cột

In [10]:
df

Unnamed: 0,FIRST NAME,LAST NAME,EMAIL
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@gmail.com
2,John,Doe,JohnDoe@gmail.com


##### Thêm dấu _ cho tên cột

In [11]:
df.columns = df.columns.str.replace(' ', '_')

##### Hiển thị dữ liệu sau khi in hoa tên cột

In [12]:
df

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@gmail.com
2,John,Doe,JohnDoe@gmail.com


##### In thường tên cột

In [13]:
df.columns = [x.lower() for x in df.columns]

##### In ra dữ liệu sau khi in thường tên cột

In [14]:
df

Unnamed: 0,first_name,last_name,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@gmail.com
2,John,Doe,JohnDoe@gmail.com


##### Đổi lại tên cột về ban đầu

In [15]:
df.rename(columns={'first_name':'first', 'last_name':'last'}, inplace=True)

##### Hiển thị dữ liệu sau khi thay đổi

In [16]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@gmail.com
2,John,Doe,JohnDoe@gmail.com


##### Lấy ra hàng số 2, đổi last thành 'Smith' và email thành 'JohnSmith@gmail.com' (3 cách)

In [17]:
df.loc[2][1:] = ['Smith', 'JohnSmith@gmail.com']
df.loc[2] = ['John','Smith', 'JohnSmith@gmail.com']
df.loc[2, ['last', 'email']] = ['Smith', 'JohnSmith@gmail.com']

##### In ra dữ liệu sau khi đổi

In [18]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@gmail.com
2,John,Smith,JohnSmith@gmail.com


##### df.at(row_label, column_label) dụng để truy cập hoặc gán giá trị cho 1 ô trong dataframe khi biết nhãn cụ thể của dòng và cột

In [19]:
df.at[2, 'last'] = 'Doe'

##### In dữ liệu sau khi đổi

In [20]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@gmail.com
2,John,Doe,JohnSmith@gmail.com


In [21]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@gmail.com
2,John,Doe,JohnSmith@gmail.com


##### Lấy ra người có có họ tên là John Doe, sửa họ thành Simth (dùng bộ lọc)

In [22]:
filt = (df['first']=='John') & (df['last']=='Doe')
df.loc[filt, 'last'] = 'Smith'

##### In ra dữ liệu sau khi thay đổi

In [23]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@gmail.com
2,John,Smith,JohnSmith@gmail.com


##### Đưa tất cả email về chữ thường (2 cách)

In [24]:
df['email'] = [x.lower() for x in df['email']]
df['email'] = df['email'].str.lower()

##### Hiển thị dữ liệu sau thay đổi

In [25]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@gmail.com
2,John,Smith,johnsmith@gmail.com


# apply(), map(), applymap(), replace()

## >>> df.apply là một phương thức được sử dụng để áp dụng một hàm cho mỗi hàng hoặc cột của DataFrame và trả về kết quả

##### Xem độ dài của tất cả các email

In [26]:
len_email = {x:len(x) for x in df['email']}
len_email

{'coreymschafer@gmail.com': 23,
 'janedoe@gmail.com': 17,
 'johnsmith@gmail.com': 19}

##### Xem độ dài của tất cả email (2 cách)

In [27]:
df['email'].str.len()
df['email'].apply(len)

0    23
1    17
2    19
Name: email, dtype: int64

##### Hàm trả về email in hoa

In [28]:
def upper_email (email):
    return email.upper()

##### Áp dụng hàm trên trong phương thức apply

In [29]:
df['email'].apply(upper_email)

0    COREYMSCHAFER@GMAIL.COM
1          JANEDOE@GMAIL.COM
2        JOHNSMITH@GMAIL.COM
Name: email, dtype: object

##### Cách khác để in hoa email

In [30]:
df['email'] = [x.upper() for x in df['email']]

##### In ra thay đổi

In [31]:
df['email']

0    COREYMSCHAFER@GMAIL.COM
1          JANEDOE@GMAIL.COM
2        JOHNSMITH@GMAIL.COM
Name: email, dtype: object

##### In thường email với hàm ẩn danh lambda

In [32]:
df['email'] = df['email'].apply(lambda x: x.lower())

##### In ra thay đổi

In [33]:
df['email']

0    coreymschafer@gmail.com
1          janedoe@gmail.com
2        johnsmith@gmail.com
Name: email, dtype: object

##### Đưa ra chuỗi tối thiểu của mỗi cột

In [34]:
df.apply(pd.Series.min)

first                      Corey
last                         Doe
email    coreymschafer@gmail.com
dtype: object

In [35]:
df.apply(lambda x: x.min())

first                      Corey
last                         Doe
email    coreymschafer@gmail.com
dtype: object

##### Đưa ra dộ dài

In [36]:
df.apply(len, axis='rows')

first    3
last     3
email    3
dtype: int64

In [37]:
df.apply(len, axis='columns')

0    3
1    3
2    3
dtype: int64

In [38]:
df.apply(len)

first    3
last     3
email    3
dtype: int64

##### Đưa dộ dài các phần tử trong cả dataframe

## >>> df.applymap là một phương thức được sử dụng để áp dụng một hàm cho từng phần tử của DataFrame và trả về một DataFrame mới với các giá trị được biến đổi theo hàm đã được chỉ định.

In [39]:
result = pd.DataFrame(df.applymap(len))

In [40]:
result.applymap(lambda x: x**2)

Unnamed: 0,first,last,email
0,25,49,529
1,16,9,289
2,16,25,361


In [41]:
df = df.applymap(str.lower)

## >>> df['column'].map là một phương thức áp dụng cho Series (một cột hoặc một dòng của DataFrame) để ánh xạ các giá trị từ một loạt giá trị này sang một loạt giá trị khác, hoặc từ một hàm hoặc ánh xạ được chỉ định.

In [42]:
df['first'].map(lambda x: x.upper())

0    COREY
1     JANE
2     JOHN
Name: first, dtype: object

In [43]:
df['last'].map({'schafer':'name', 'doe':'your', 'smith':'one'})

0    name
1    your
2     one
Name: last, dtype: object

## >>> Phương thức replace trong pandas được sử dụng để thay thế các giá trị trong một cột của DataFrame bằng các giá trị mới.

In [44]:
df.replace({'doe':3, 'corey':4, 'janedoe@gmail.com':5})

Unnamed: 0,first,last,email
0,4,schafer,coreymschafer@gmail.com
1,jane,3,5
2,john,smith,johnsmith@gmail.com


In [45]:
df['last'].replace({'smith':6})

0    schafer
1        doe
2          6
Name: last, dtype: object

# Part 2

##### Đọc dữ liệu và dữ liệu mô tả các thuộc tính

In [46]:
df = pd.read_csv(r".\data\survey_results_public.csv")
schedma_df = pd.read_csv(r".\data\survey_results_schema.csv")

  df = pd.read_csv(r".\data\survey_results_public.csv")


##### Cài đặt cho dữ liệu hiển thị đủ 129 cột và dữ liệu mô tả hiện thị đủ 129 hàng

In [47]:
pd.set_option("display.max_columns", 129)
pd.set_option("display.max_rows", 129)

##### Hiển thị dữ liệu

In [48]:
df.head(10)

Unnamed: 0,Respondent,Hobby,OpenSource,Country,Student,Employment,FormalEducation,UndergradMajor,CompanySize,DevType,YearsCoding,YearsCodingProf,JobSatisfaction,CareerSatisfaction,HopeFiveYears,JobSearchStatus,LastNewJob,AssessJob1,AssessJob2,AssessJob3,AssessJob4,AssessJob5,AssessJob6,AssessJob7,AssessJob8,AssessJob9,AssessJob10,AssessBenefits1,AssessBenefits2,AssessBenefits3,AssessBenefits4,AssessBenefits5,AssessBenefits6,AssessBenefits7,AssessBenefits8,AssessBenefits9,AssessBenefits10,AssessBenefits11,JobContactPriorities1,JobContactPriorities2,JobContactPriorities3,JobContactPriorities4,JobContactPriorities5,JobEmailPriorities1,JobEmailPriorities2,JobEmailPriorities3,JobEmailPriorities4,JobEmailPriorities5,JobEmailPriorities6,JobEmailPriorities7,UpdateCV,Currency,Salary,SalaryType,ConvertedSalary,CurrencySymbol,CommunicationTools,TimeFullyProductive,EducationTypes,SelfTaughtTypes,TimeAfterBootcamp,HackathonReasons,AgreeDisagree1,AgreeDisagree2,AgreeDisagree3,LanguageWorkedWith,LanguageDesireNextYear,DatabaseWorkedWith,DatabaseDesireNextYear,PlatformWorkedWith,PlatformDesireNextYear,FrameworkWorkedWith,FrameworkDesireNextYear,IDE,OperatingSystem,NumberMonitors,Methodology,VersionControl,CheckInCode,AdBlocker,AdBlockerDisable,AdBlockerReasons,AdsAgreeDisagree1,AdsAgreeDisagree2,AdsAgreeDisagree3,AdsActions,AdsPriorities1,AdsPriorities2,AdsPriorities3,AdsPriorities4,AdsPriorities5,AdsPriorities6,AdsPriorities7,AIDangerous,AIInteresting,AIResponsible,AIFuture,EthicsChoice,EthicsReport,EthicsResponsible,EthicalImplications,StackOverflowRecommend,StackOverflowVisit,StackOverflowHasAccount,StackOverflowParticipate,StackOverflowJobs,StackOverflowDevStory,StackOverflowJobsRecommend,StackOverflowConsiderMember,HypotheticalTools1,HypotheticalTools2,HypotheticalTools3,HypotheticalTools4,HypotheticalTools5,WakeTime,HoursComputer,HoursOutside,SkipMeals,ErgonomicDevices,Exercise,Gender,SexualOrientation,EducationParents,RaceEthnicity,Age,Dependents,MilitaryUS,SurveyTooLong,SurveyEasy
0,1,Yes,No,Kenya,No,Employed part-time,"Bachelor’s degree (BA, BS, B.Eng., etc.)",Mathematics or statistics,20 to 99 employees,Full-stack developer,3-5 years,3-5 years,Extremely satisfied,Extremely satisfied,Working as a founder or co-founder of my own c...,"I’m not actively looking, but I am open to new...",Less than a year ago,10.0,7.0,8.0,1.0,2.0,5.0,3.0,4.0,9.0,6.0,,,,,,,,,,,,3.0,1.0,4.0,2.0,5.0,5.0,6.0,7.0,2.0,1.0,4.0,3.0,My job status or other personal status changed,,,Monthly,,KES,Slack,One to three months,"Taught yourself a new language, framework, or ...",The official documentation and/or standards fo...,,To build my professional network,Strongly agree,Strongly agree,Neither Agree nor Disagree,JavaScript;Python;HTML;CSS,JavaScript;Python;HTML;CSS,Redis;SQL Server;MySQL;PostgreSQL;Amazon RDS/A...,Redis;SQL Server;MySQL;PostgreSQL;Amazon RDS/A...,AWS;Azure;Linux;Firebase,AWS;Azure;Linux;Firebase,Django;React,Django;React,Komodo;Vim;Visual Studio Code,Linux-based,1.0,Agile;Scrum,Git,Multiple times per day,Yes,No,,Strongly agree,Strongly agree,Strongly agree,Saw an online advertisement and then researche...,1.0,5.0,4.0,7.0,2.0,6.0,3.0,Artificial intelligence surpassing human intel...,Algorithms making important decisions,The developers or the people creating the AI,I'm excited about the possibilities more than ...,No,"Yes, and publicly",Upper management at the company/organization,Yes,10 (Very Likely),Multiple times per day,Yes,I have never participated in Q&A on Stack Over...,"No, I knew that Stack Overflow had a jobs boar...",Yes,,Yes,Extremely interested,Extremely interested,Extremely interested,Extremely interested,Extremely interested,Between 5:00 - 6:00 AM,9 - 12 hours,1 - 2 hours,Never,Standing desk,3 - 4 times per week,Male,Straight or heterosexual,"Bachelor’s degree (BA, BS, B.Eng., etc.)",Black or of African descent,25 - 34 years old,Yes,,The survey was an appropriate length,Very easy
1,3,Yes,Yes,United Kingdom,No,Employed full-time,"Bachelor’s degree (BA, BS, B.Eng., etc.)","A natural science (ex. biology, chemistry, phy...","10,000 or more employees",Database administrator;DevOps specialist;Full-...,30 or more years,18-20 years,Moderately dissatisfied,Neither satisfied nor dissatisfied,Working in a different or more specialized tec...,I am actively looking for a job,More than 4 years ago,1.0,7.0,10.0,8.0,2.0,5.0,4.0,3.0,6.0,9.0,1.0,5.0,3.0,7.0,10.0,4.0,11.0,9.0,6.0,2.0,8.0,3.0,1.0,5.0,2.0,4.0,1.0,3.0,4.0,5.0,2.0,6.0,7.0,I saw an employer’s advertisement,British pounds sterling (£),51000.0,Yearly,70841.0,GBP,Confluence;Office / productivity suite (Micros...,One to three months,"Taught yourself a new language, framework, or ...",The official documentation and/or standards fo...,,,Agree,Agree,Neither Agree nor Disagree,JavaScript;Python;Bash/Shell,Go;Python,Redis;PostgreSQL;Memcached,PostgreSQL,Linux,Linux,Django,React,IPython / Jupyter;Sublime Text;Vim,Linux-based,2.0,,Git;Subversion,A few times per week,Yes,Yes,The website I was visiting asked me to disable it,Somewhat agree,Neither agree nor disagree,Neither agree nor disagree,,3.0,5.0,1.0,4.0,6.0,7.0,2.0,Increasing automation of jobs,Increasing automation of jobs,The developers or the people creating the AI,I'm excited about the possibilities more than ...,Depends on what it is,Depends on what it is,Upper management at the company/organization,Yes,10 (Very Likely),A few times per month or weekly,Yes,A few times per month or weekly,Yes,"No, I have one but it's out of date",7.0,Yes,A little bit interested,A little bit interested,A little bit interested,A little bit interested,A little bit interested,Between 6:01 - 7:00 AM,5 - 8 hours,30 - 59 minutes,Never,Ergonomic keyboard or mouse,Daily or almost every day,Male,Straight or heterosexual,"Bachelor’s degree (BA, BS, B.Eng., etc.)",White or of European descent,35 - 44 years old,Yes,,The survey was an appropriate length,Somewhat easy
2,4,Yes,Yes,United States,No,Employed full-time,Associate degree,"Computer science, computer engineering, or sof...",20 to 99 employees,Engineering manager;Full-stack developer,24-26 years,6-8 years,Moderately satisfied,Moderately satisfied,Working as a founder or co-founder of my own c...,"I’m not actively looking, but I am open to new...",Less than a year ago,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,5,No,No,United States,No,Employed full-time,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",100 to 499 employees,Full-stack developer,18-20 years,12-14 years,Neither satisfied nor dissatisfied,Slightly dissatisfied,Working as a founder or co-founder of my own c...,"I’m not actively looking, but I am open to new...",Less than a year ago,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,A recruiter contacted me,U.S. dollars ($),,,,,,Three to six months,Completed an industry certification program (e...,The official documentation and/or standards fo...,,,Disagree,Disagree,Strongly disagree,C#;JavaScript;SQL;TypeScript;HTML;CSS;Bash/Shell,C#;JavaScript;SQL;TypeScript;HTML;CSS;Bash/Shell,"SQL Server;Microsoft Azure (Tables, CosmosDB, ...","SQL Server;Microsoft Azure (Tables, CosmosDB, ...",Azure,Azure,,Angular;.NET Core;React,Visual Studio;Visual Studio Code,Windows,2.0,Agile;Kanban;Scrum,Git,Multiple times per day,Yes,Yes,The ad-blocking software was causing display i...,Neither agree nor disagree,Somewhat agree,Somewhat agree,Stopped going to a website because of their ad...,,,,,,,,Artificial intelligence surpassing human intel...,Artificial intelligence surpassing human intel...,A governmental or other regulatory body,"I don't care about it, or I haven't thought ab...",No,"Yes, but only within the company",Upper management at the company/organization,Yes,10 (Very Likely),A few times per week,Yes,A few times per month or weekly,Yes,"No, I have one but it's out of date",8.0,Yes,Somewhat interested,Somewhat interested,Somewhat interested,Somewhat interested,Somewhat interested,Between 6:01 - 7:00 AM,9 - 12 hours,Less than 30 minutes,3 - 4 times per week,,I don't typically exercise,Male,Straight or heterosexual,Some college/university study without earning ...,White or of European descent,35 - 44 years old,No,No,The survey was an appropriate length,Somewhat easy
4,7,Yes,No,South Africa,"Yes, part-time",Employed full-time,Some college/university study without earning ...,"Computer science, computer engineering, or sof...","10,000 or more employees",Data or business analyst;Desktop or enterprise...,6-8 years,0-2 years,Slightly satisfied,Moderately satisfied,Working in a different or more specialized tec...,"I’m not actively looking, but I am open to new...",Between 1 and 2 years ago,8.0,5.0,7.0,1.0,2.0,6.0,4.0,3.0,10.0,9.0,1.0,10.0,2.0,4.0,8.0,3.0,11.0,7.0,5.0,9.0,6.0,2.0,1.0,4.0,5.0,3.0,7.0,3.0,6.0,2.0,1.0,4.0,5.0,My job status or other personal status changed,South African rands (R),260000.0,Yearly,21426.0,ZAR,"Office / productivity suite (Microsoft Office,...",Three to six months,Taken a part-time in-person course in programm...,The official documentation and/or standards fo...,,,Strongly agree,Agree,Strongly disagree,C;C++;Java;Matlab;R;SQL;Bash/Shell,Assembly;C;C++;Matlab;SQL;Bash/Shell,SQL Server;PostgreSQL;Oracle;IBM Db2,PostgreSQL;Oracle;IBM Db2,Arduino;Windows Desktop or Server,Arduino;Windows Desktop or Server,,,Notepad++;Visual Studio;Visual Studio Code,Windows,2.0,Evidence-based software engineering;Formal sta...,Zip file back-ups,Weekly or a few times per month,No,,,Somewhat agree,Somewhat agree,Somewhat disagree,Clicked on an online advertisement;Saw an onli...,2.0,3.0,4.0,6.0,1.0,7.0,5.0,Algorithms making important decisions,Algorithms making important decisions,The developers or the people creating the AI,I'm excited about the possibilities more than ...,No,"Yes, but only within the company",Upper management at the company/organization,Yes,10 (Very Likely),Daily or almost daily,Yes,Less than once per month or monthly,"No, I knew that Stack Overflow had a jobs boar...","No, I know what it is but I don't have one",,Yes,Extremely interested,Extremely interested,Extremely interested,Extremely interested,Extremely interested,Before 5:00 AM,Over 12 hours,1 - 2 hours,Never,,3 - 4 times per week,Male,Straight or heterosexual,Some college/university study without earning ...,White or of European descent,18 - 24 years old,Yes,,The survey was an appropriate length,Somewhat easy
5,8,Yes,No,United Kingdom,No,Employed full-time,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",10 to 19 employees,Back-end developer;Database administrator;Fron...,6-8 years,3-5 years,Moderately satisfied,Slightly satisfied,Working in a different or more specialized tec...,I am actively looking for a job,Between 2 and 4 years ago,8.0,5.0,4.0,9.0,1.0,3.0,6.0,2.0,10.0,7.0,1.0,3.0,4.0,10.0,9.0,2.0,6.0,5.0,11.0,7.0,8.0,4.0,2.0,5.0,1.0,3.0,2.0,6.0,7.0,3.0,1.0,5.0,4.0,I did not receive an expected change in compen...,British pounds sterling (£),30000.0,,41671.0,GBP,Confluence;Jira;Office / productivity suite (M...,Less than a month,Received on-the-job training in software devel...,The official documentation and/or standards fo...,,To improve my general technical skills or prog...,Disagree,Neither Agree nor Disagree,Strongly disagree,Java;JavaScript;Python;TypeScript;HTML;CSS,C#;Go;Java;JavaScript;Python;SQL;TypeScript;HT...,MongoDB,PostgreSQL,Linux,Linux,Angular;Node.js,Node.js,IntelliJ;PyCharm;Visual Studio Code,Linux-based,2.0,Agile,Git,A few times per week,Yes,Yes,I wanted to support the website I was visiting...,Somewhat agree,Somewhat agree,Somewhat disagree,Saw an online advertisement and then researche...,1.0,3.0,4.0,2.0,7.0,5.0,6.0,Increasing automation of jobs,Algorithms making important decisions,A governmental or other regulatory body,I'm excited about the possibilities more than ...,Depends on what it is,Depends on what it is,Upper management at the company/organization,Unsure / I don't know,7,A few times per month or weekly,Yes,Less than once per month or monthly,Yes,"No, I have one but it's out of date",8.0,No,A little bit interested,Not at all interested,Very interested,Very interested,Extremely interested,Between 7:01 - 8:00 AM,9 - 12 hours,30 - 59 minutes,1 - 2 times per week,,1 - 2 times per week,Male,Straight or heterosexual,"Secondary school (e.g. American high school, G...",White or of European descent,18 - 24 years old,No,,The survey was an appropriate length,Somewhat easy
6,9,Yes,Yes,United States,No,Employed full-time,Some college/university study without earning ...,"Computer science, computer engineering, or sof...","10,000 or more employees",Back-end developer;Front-end developer;Full-st...,9-11 years,0-2 years,Slightly satisfied,Moderately satisfied,Working as a founder or co-founder of my own c...,"I’m not actively looking, but I am open to new...",Less than a year ago,5.0,3.0,9.0,4.0,1.0,8.0,2.0,7.0,10.0,6.0,1.0,3.0,2.0,9.0,11.0,4.0,8.0,6.0,7.0,10.0,5.0,3.0,1.0,5.0,4.0,2.0,1.0,5.0,3.0,4.0,2.0,6.0,7.0,My job status or other personal status changed,U.S. dollars ($),120000.0,Yearly,120000.0,USD,Confluence;Office / productivity suite (Micros...,Six to nine months,Received on-the-job training in software devel...,The official documentation and/or standards fo...,,,Disagree,Agree,Strongly disagree,JavaScript;HTML;CSS,C;Go;JavaScript;Python;HTML;CSS,MongoDB,,Linux,Linux,Node.js;React,React;TensorFlow,Atom;Visual Studio Code,MacOS,2.0,Agile;Scrum,Git,Multiple times per day,Yes,Yes,The ad-blocking software was causing display i...,Somewhat disagree,Neither agree nor disagree,Somewhat disagree,Clicked on an online advertisement;Stopped goi...,1.0,4.0,2.0,5.0,3.0,7.0,6.0,Algorithms making important decisions,Artificial intelligence surpassing human intel...,The developers or the people creating the AI,I'm excited about the possibilities more than ...,Depends on what it is,"Yes, but only within the company",Upper management at the company/organization,Yes,10 (Very Likely),Multiple times per day,Yes,I have never participated in Q&A on Stack Over...,Yes,"No, I have one but it's out of date",7.0,No,Very interested,A little bit interested,Extremely interested,Very interested,Very interested,Between 9:01 - 10:00 AM,Over 12 hours,Less than 30 minutes,1 - 2 times per week,,I don't typically exercise,Male,Straight or heterosexual,"Master’s degree (MA, MS, M.Eng., MBA, etc.)",White or of European descent,18 - 24 years old,No,No,The survey was an appropriate length,Somewhat easy
7,10,Yes,Yes,Nigeria,No,Employed full-time,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",10 to 19 employees,Designer;Front-end developer;QA or test developer,0-2 years,3-5 years,Slightly satisfied,Moderately satisfied,Working as a founder or co-founder of my own c...,"I’m not actively looking, but I am open to new...",Less than a year ago,6.0,5.0,4.0,2.0,7.0,8.0,10.0,1.0,9.0,3.0,1.0,3.0,5.0,7.0,6.0,2.0,11.0,9.0,4.0,10.0,8.0,1.0,3.0,2.0,4.0,5.0,2.0,6.0,1.0,3.0,7.0,5.0,4.0,I saw an employer’s advertisement,,,,,,Facebook;Google Hangouts/Chat;Office / product...,One to three months,Taken an online course in programming or softw...,,Immediately after graduating,To improve my general technical skills or prog...,Strongly agree,Strongly disagree,Neither Agree nor Disagree,JavaScript;TypeScript;HTML;CSS,Matlab;SQL;Kotlin;Bash/Shell,"MongoDB;MySQL;Microsoft Azure (Tables, CosmosD...",,Azure;Heroku,Amazon Echo;Android;Apple Watch or Apple TV;AW...,Angular;Node.js,.NET Core;Django,Atom;Notepad++;Sublime Text;Visual Studio Code,Windows,1.0,Agile;Extreme programming (XP);Scrum,Git,Multiple times per day,Yes,No,,Strongly agree,Neither agree nor disagree,Strongly disagree,Saw an online advertisement and then researche...,,,,,,,,Artificial intelligence surpassing human intel...,"Evolving definitions of ""fairness"" in algorith...",,I'm excited about the possibilities more than ...,Depends on what it is,"Yes, but only within the company",The developer who wrote it,Yes,10 (Very Likely),Daily or almost daily,Yes,A few times per month or weekly,"No, I knew that Stack Overflow had a jobs boar...","No, and I don't know what that is",,Yes,Very interested,Very interested,Very interested,A little bit interested,Extremely interested,I do not have a set schedule,Over 12 hours,1 - 2 hours,Daily or almost every day,,1 - 2 times per week,Female,,Primary/elementary school,Black or of African descent,25 - 34 years old,No,,The survey was too long,Somewhat difficult
8,11,Yes,Yes,United States,No,Employed full-time,Some college/university study without earning ...,Fine arts or performing arts (ex. graphic desi...,100 to 499 employees,"Back-end developer;C-suite executive (CEO, CTO...",30 or more years,21-23 years,Moderately satisfied,Moderately satisfied,Doing the same work,"I’m not actively looking, but I am open to new...",Between 2 and 4 years ago,6.0,3.0,7.0,4.0,1.0,5.0,10.0,8.0,9.0,2.0,1.0,3.0,2.0,9.0,11.0,5.0,8.0,4.0,10.0,7.0,6.0,5.0,1.0,2.0,3.0,4.0,3.0,7.0,2.0,4.0,1.0,6.0,5.0,My job status or other personal status changed,U.S. dollars ($),250000.0,Yearly,250000.0,USD,Confluence;HipChat;Jira;Office / productivity ...,Three to six months,Taken an online course in programming or softw...,The official documentation and/or standards fo...,,Because I find it enjoyable,Strongly agree,Strongly disagree,Strongly disagree,Assembly;CoffeeScript;Erlang;Go;JavaScript;Lua...,Erlang;Go;Python;Rust;SQL,Redis;PostgreSQL;Amazon DynamoDB;Apache HBase;...,Redis;PostgreSQL;Amazon DynamoDB;Apache Hive;A...,Amazon Echo;AWS;iOS;Linux;Mac OS;Serverless,AWS;Linux;Mac OS;Serverless,Hadoop;Node.js;React;Spark,,IntelliJ;PyCharm;Sublime Text;Vim,MacOS,1.0,Agile;Evidence-based software engineering;Extr...,Git,Multiple times per day,No,,,Neither agree nor disagree,Somewhat agree,Neither agree nor disagree,Clicked on an online advertisement;Saw an onli...,1.0,3.0,5.0,4.0,2.0,7.0,6.0,Algorithms making important decisions,Artificial intelligence surpassing human intel...,The developers or the people creating the AI,I'm worried about the dangers more than I'm ex...,No,"Yes, and publicly",The person who came up with the idea,Yes,7,A few times per month or weekly,Yes,Less than once per month or monthly,Yes,Yes,7.0,No,Not at all interested,Not at all interested,Not at all interested,Not at all interested,Not at all interested,Between 8:01 - 9:00 AM,9 - 12 hours,Less than 30 minutes,1 - 2 times per week,Standing desk;Fatigue-relieving floor mat,I don't typically exercise,Male,Straight or heterosexual,Some college/university study without earning ...,White or of European descent,35 - 44 years old,Yes,No,The survey was an appropriate length,Very easy
9,16,No,Yes,India,No,Employed full-time,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",500 to 999 employees,Designer,0-2 years,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


##### Đổi tên cột 'ConvertedSalary' thành 'ConvertedUSD'

In [49]:
df.rename(columns={'ConvertedSalary':'ConvertedUSD'}, inplace=True)

In [50]:
df['ConvertedUSD']

0            NaN
1        70841.0
2            NaN
3            NaN
4        21426.0
          ...   
98850        NaN
98851        NaN
98852        NaN
98853        NaN
98854        NaN
Name: ConvertedUSD, Length: 98855, dtype: float64

##### Mã hóa cột 'Hobby'

In [52]:
df['Hobby'].value_counts()

Hobby
Yes    79897
No     18958
Name: count, dtype: int64

In [59]:
df['Hobby'].map({'No':0, 'Yes':1})
df['Hobby'].replace({'No':0, 'Yes':1})

0        1
1        1
2        1
3        0
4        1
        ..
98850    1
98851    0
98852    1
98853    1
98854    1
Name: Hobby, Length: 98855, dtype: int64

In [63]:
def convert_hobby(x):
    return 1 if x == "Yes" else 0

In [64]:
df['Hobby'].apply(convert_hobby)

0        1
1        1
2        1
3        0
4        1
        ..
98850    1
98851    0
98852    1
98853    1
98854    1
Name: Hobby, Length: 98855, dtype: int64