In [1]:
import pandas as pd

df = pd.read_csv('survey.csv')
df.shape #gives us rows and cols count

(64461, 61)

In [2]:
df.info() #if you need more info about DATA TYPES (strings, ints etc)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 64461 entries, 0 to 64460
Data columns (total 61 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Respondent                    64461 non-null  int64  
 1   MainBranch                    64162 non-null  object 
 2   Hobbyist                      64416 non-null  object 
 3   Age                           45446 non-null  float64
 4   Age1stCode                    57900 non-null  object 
 5   CompFreq                      40069 non-null  object 
 6   CompTotal                     34826 non-null  float64
 7   ConvertedComp                 34756 non-null  float64
 8   Country                       64072 non-null  object 
 9   CurrencyDesc                  45472 non-null  object 
 10  CurrencySymbol                45472 non-null  object 
 11  DatabaseDesireNextYear        44070 non-null  object 
 12  DatabaseWorkedWith            49537 non-null  object 
 13  D

In [3]:
# once we know how many rows x cols we've got, 
# we might tweak options to show all cols for example:
pd.set_option('display.max_columns', 61) # max 61 cols now, check df.head(1)
df.head(1) # here we go:

Unnamed: 0,Respondent,MainBranch,Hobbyist,Age,Age1stCode,CompFreq,CompTotal,ConvertedComp,Country,CurrencyDesc,CurrencySymbol,DatabaseDesireNextYear,DatabaseWorkedWith,DevType,EdLevel,Employment,Ethnicity,Gender,JobFactors,JobSat,JobSeek,LanguageDesireNextYear,LanguageWorkedWith,MiscTechDesireNextYear,MiscTechWorkedWith,NEWCollabToolsDesireNextYear,NEWCollabToolsWorkedWith,NEWDevOps,NEWDevOpsImpt,NEWEdImpt,NEWJobHunt,NEWJobHuntResearch,NEWLearn,NEWOffTopic,NEWOnboardGood,NEWOtherComms,NEWOvertime,NEWPurchaseResearch,NEWPurpleLink,NEWSOSites,NEWStuck,OpSys,OrgSize,PlatformDesireNextYear,PlatformWorkedWith,PurchaseWhat,Sexuality,SOAccount,SOComm,SOPartFreq,SOVisitFreq,SurveyEase,SurveyLength,Trans,UndergradMajor,WebframeDesireNextYear,WebframeWorkedWith,WelcomeChange,WorkWeekHrs,YearsCode,YearsCodePro
0,1,I am a developer by profession,Yes,,13,Monthly,,,Germany,European Euro,EUR,Microsoft SQL Server,Elasticsearch;Microsoft SQL Server;Oracle,"Developer, desktop or enterprise applications;...","Master’s degree (M.A., M.S., M.Eng., MBA, etc.)","Independent contractor, freelancer, or self-em...",White or of European descent,Man,"Languages, frameworks, and other technologies ...",Slightly satisfied,I am not interested in new job opportunities,C#;HTML/CSS;JavaScript,C#;HTML/CSS;JavaScript,.NET Core;Xamarin,.NET;.NET Core,Microsoft Teams;Microsoft Azure;Trello,Confluence;Jira;Slack;Microsoft Azure;Trello,No,Somewhat important,Fairly important,,,Once a year,Not sure,,No,Often: 1-2 days per week or more,Start a free trial;Ask developers I know/work ...,Amused,Stack Overflow (public Q&A for anyone who codes),Visit Stack Overflow;Go for a walk or other ph...,Windows,2 to 9 employees,Android;iOS;Kubernetes;Microsoft Azure;Windows,Windows,,Straight / Heterosexual,No,"No, not at all",,Multiple times per day,Neither easy nor difficult,Appropriate in length,No,"Computer science, computer engineering, or sof...",ASP.NET Core,ASP.NET;ASP.NET Core,Just as welcome now as I felt last year,50.0,36,27


In [4]:
# You can create your own data frames from structures 
# (list, sets for example). Lets take a look:
people = {
    'name': ['Power', 'Rangers', 'Ritta'],
    'last_name': ['Zordon', 'Hulk', 'SikiTa'],
    'age': [22, 11, 33]
}
# and now lets transfer it into data frame:
new_df = pd.DataFrame(people)
new_df

Unnamed: 0,name,last_name,age
0,Power,Zordon,22
1,Rangers,Hulk,11
2,Ritta,SikiTa,33


In [5]:
# We have DataFrames and Series:
print(type(new_df['name']), type(new_df))

<class 'pandas.core.series.Series'> <class 'pandas.core.frame.DataFrame'>


In [6]:
# Series: a list of data, but has more functionality than a python list. It's 1-D array.
# DataFrame: 2-D array; think of it as rows and columns. It is a container for 
# multiple of series object

In [7]:
new_df[['name', 'age']] # we may acces few columns like this

Unnamed: 0,name,age
0,Power,22
1,Rangers,11
2,Ritta,33


In [8]:
# GET A COLUMN:
# use .columns to grab some specific col:
new_df.columns

Index(['name', 'last_name', 'age'], dtype='object')

In [9]:
# GET A ROW:
# use .loc[] or .iloc[] to grab a row
# iloc[] - allows access rows by INT location, hence the name - integerLocation. 
# Say we wanna access 1st row using iloc, we use:
new_df.iloc[0]
# we might also access couple rows:
new_df.iloc[[0, 1]]

Unnamed: 0,name,last_name,age
0,Power,Zordon,22
1,Rangers,Hulk,11


### what is more, we might access also data by providing **<u>ROWS-COLS</u>** with iloc and loc:

In [10]:
# what is more, we might access also data by providing rows-cols with iloc:
new_df.iloc[[0, 1], [1, 2]] # since its iloc then INTEGERS ONLY! new_df.iloc[[0, 1], [2]]
# works too for 1 col but few rows (we could have provided name of col if it was loc)

Unnamed: 0,last_name,age
0,Zordon,22
1,Hulk,11


In [11]:
# loc - gets rows (x cols as 2nd arg) of LABELS/STRING this time, not INT
# but dont get confused, right now our new_df has on the very left some "numbers",
# in fact they are labels but we will cover it in further notes

In [12]:
new_df.loc[[0, 1], ['name', 'age']] # NOTICE we call to cols by labels
# and ofc you could have called 1 col too: ... .loc[[0, 1], ['name']]

Unnamed: 0,name,age
0,Power,22
1,Rangers,11


In [108]:
schema = pd.read_csv('schema.csv')
schema.head(1)

Unnamed: 0,Column,QuestionText
0,Respondent,Randomized respondent ID number (not in order ...


In [14]:
df['Hobbyist'].value_counts() # just a quick tatse, counts vals (yes and no) in this col

Yes    50388
No     14028
Name: Hobbyist, dtype: int64

In [15]:
df.loc[[0], ['Hobbyist']]

Unnamed: 0,Hobbyist
0,Yes


In [16]:
df.loc[0] # all data user 1 (on index 0) filled and had to answer example:

Respondent                                                  1
MainBranch                     I am a developer by profession
Hobbyist                                                  Yes
Age                                                       NaN
Age1stCode                                                 13
                                       ...                   
WebframeWorkedWith                       ASP.NET;ASP.NET Core
WelcomeChange         Just as welcome now as I felt last year
WorkWeekHrs                                              50.0
YearsCode                                                  36
YearsCodePro                                               27
Name: 0, Length: 61, dtype: object

In [18]:
# IMPORTANT: if you passing a slice, say df.loc[0:2, ['COL_NAME']] notice that we dont use brackets in slices, 
# and slices are INCLUSIVE so will print 2 results:
df.loc[0:2, ['Hobbyist']]

Unnamed: 0,Hobbyist
0,Yes
1,No
2,Yes


In [19]:
# we may also use these slices INCLUSIVE on columns:
df.loc[0:2, 'Hobbyist':'Employment'] # look at columns slice, you can alsways check names of columns by df.columns

Unnamed: 0,Hobbyist,Age,Age1stCode,CompFreq,CompTotal,ConvertedComp,Country,CurrencyDesc,CurrencySymbol,DatabaseDesireNextYear,DatabaseWorkedWith,DevType,EdLevel,Employment
0,Yes,,13,Monthly,,,Germany,European Euro,EUR,Microsoft SQL Server,Elasticsearch;Microsoft SQL Server;Oracle,"Developer, desktop or enterprise applications;...","Master’s degree (M.A., M.S., M.Eng., MBA, etc.)","Independent contractor, freelancer, or self-em..."
1,No,,19,,,,United Kingdom,Pound sterling,GBP,,,"Developer, full-stack;Developer, mobile","Bachelor’s degree (B.A., B.S., B.Eng., etc.)",Employed full-time
2,Yes,,15,,,,Russian Federation,,,,,,,


---
## Let's talk deeper about INDEXES (set, reset, use indexes):

In [26]:
people2 = {
    'name': ['Power', 'Rangers', 'Ritta'],
    'email': ['Zordon@gf.gg', 'Hulk@wp.gg', 'SikiTa@gg.gg'],
    'age': [22, 11, 33]
}
new_df2 = pd.DataFrame(people2)
new_df2  # we have this data generated from dict PEOPLE above

Unnamed: 0,name,email,age
0,Power,Zordon@gf.gg,22
1,Rangers,Hulk@wp.gg,11
2,Ritta,SikiTa@gg.gg,33


In [24]:
# lets make these indexes (unique), so use email address:
new_df2['email']

0    Zordon@gf.gg
1      Hulk@wp.gg
2    SikiTa@gg.gg
Name: email, dtype: object

In [28]:
new_df2.set_index('email') # NOTICE that it DOES NOT changes our original DataFrame! look below, it retains the state declared on the very first run

Unnamed: 0_level_0,name,age
email,Unnamed: 1_level_1,Unnamed: 2_level_1
Zordon@gf.gg,Power,22
Hulk@wp.gg,Rangers,11
SikiTa@gg.gg,Ritta,33


In [30]:
new_df2 # see, no changes on index in our original state, which is awesome
# but what if you indeed want to modify it for permanent change? just add an argument: ... , inplace=True). Looks like this:
# new_df2.set_index('email', inplace=True) # so this will permanently modify our DataFrame. Actually lets see this in action below

Unnamed: 0,name,email,age
0,Power,Zordon@gf.gg,22
1,Rangers,Hulk@wp.gg,11
2,Ritta,SikiTa@gg.gg,33


In [32]:
new_df2.index # proof of current indexes:

RangeIndex(start=0, stop=3, step=1)

In [37]:
# and now after changes:
new_df2.set_index('email', inplace=True) # inplace arg makes PERMANENT CHANGE
new_df2.index # proof of current indexes AFTER changes:

Index(['Zordon@gf.gg', 'Hulk@wp.gg', 'SikiTa@gg.gg'], dtype='object', name='email')

In [46]:
# since we have indexes now as string, it's great to show you .loc and .iloc again in action:
new_df2.iloc[0] # .iloc[]

name    Power
age        22
Name: Zordon@gf.gg, dtype: object

In [49]:
new_df2.loc['Zordon@gf.gg'] # .loc[]

name    Power
age        22
Name: Zordon@gf.gg, dtype: object

In [50]:
new_df2

Unnamed: 0_level_0,name,age
email,Unnamed: 1_level_1,Unnamed: 2_level_1
Zordon@gf.gg,Power,22
Hulk@wp.gg,Rangers,11
SikiTa@gg.gg,Ritta,33


In [55]:
# Back to our df DataFrame, time to do some clean ups. Lets begin from indexes. 
# Notice col Respondent - seems like this is unique id, so lets make it as indexes in our data Frame:
df.head(2)

Unnamed: 0,Respondent,MainBranch,Hobbyist,Age,Age1stCode,CompFreq,CompTotal,ConvertedComp,Country,CurrencyDesc,CurrencySymbol,DatabaseDesireNextYear,DatabaseWorkedWith,DevType,EdLevel,Employment,Ethnicity,Gender,JobFactors,JobSat,JobSeek,LanguageDesireNextYear,LanguageWorkedWith,MiscTechDesireNextYear,MiscTechWorkedWith,NEWCollabToolsDesireNextYear,NEWCollabToolsWorkedWith,NEWDevOps,NEWDevOpsImpt,NEWEdImpt,NEWJobHunt,NEWJobHuntResearch,NEWLearn,NEWOffTopic,NEWOnboardGood,NEWOtherComms,NEWOvertime,NEWPurchaseResearch,NEWPurpleLink,NEWSOSites,NEWStuck,OpSys,OrgSize,PlatformDesireNextYear,PlatformWorkedWith,PurchaseWhat,Sexuality,SOAccount,SOComm,SOPartFreq,SOVisitFreq,SurveyEase,SurveyLength,Trans,UndergradMajor,WebframeDesireNextYear,WebframeWorkedWith,WelcomeChange,WorkWeekHrs,YearsCode,YearsCodePro
0,1,I am a developer by profession,Yes,,13,Monthly,,,Germany,European Euro,EUR,Microsoft SQL Server,Elasticsearch;Microsoft SQL Server;Oracle,"Developer, desktop or enterprise applications;...","Master’s degree (M.A., M.S., M.Eng., MBA, etc.)","Independent contractor, freelancer, or self-em...",White or of European descent,Man,"Languages, frameworks, and other technologies ...",Slightly satisfied,I am not interested in new job opportunities,C#;HTML/CSS;JavaScript,C#;HTML/CSS;JavaScript,.NET Core;Xamarin,.NET;.NET Core,Microsoft Teams;Microsoft Azure;Trello,Confluence;Jira;Slack;Microsoft Azure;Trello,No,Somewhat important,Fairly important,,,Once a year,Not sure,,No,Often: 1-2 days per week or more,Start a free trial;Ask developers I know/work ...,Amused,Stack Overflow (public Q&A for anyone who codes),Visit Stack Overflow;Go for a walk or other ph...,Windows,2 to 9 employees,Android;iOS;Kubernetes;Microsoft Azure;Windows,Windows,,Straight / Heterosexual,No,"No, not at all",,Multiple times per day,Neither easy nor difficult,Appropriate in length,No,"Computer science, computer engineering, or sof...",ASP.NET Core,ASP.NET;ASP.NET Core,Just as welcome now as I felt last year,50.0,36,27
1,2,I am a developer by profession,No,,19,,,,United Kingdom,Pound sterling,GBP,,,"Developer, full-stack;Developer, mobile","Bachelor’s degree (B.A., B.S., B.Eng., etc.)",Employed full-time,,,,Very dissatisfied,I am not interested in new job opportunities,Python;Swift,JavaScript;Swift,React Native;TensorFlow;Unity 3D,React Native,Github;Slack,Confluence;Jira;Github;Gitlab;Slack,,,Fairly important,,,Once a year,Not sure,,No,,,Amused,Stack Overflow (public Q&A for anyone who code...,Visit Stack Overflow;Go for a walk or other ph...,MacOS,"1,000 to 4,999 employees",iOS;Kubernetes;Linux;MacOS,iOS,I have little or no influence,,Yes,"Yes, definitely",Less than once per month or monthly,Multiple times per day,,,,"Computer science, computer engineering, or sof...",,,Somewhat more welcome now than last year,,7,4


In [57]:
df.set_index('Respondent', inplace=True)

In [59]:
# DONE:
df.head(2) # now we have indexes Responden and they are probably are ints, lets check the type

Unnamed: 0_level_0,MainBranch,Hobbyist,Age,Age1stCode,CompFreq,CompTotal,ConvertedComp,Country,CurrencyDesc,CurrencySymbol,DatabaseDesireNextYear,DatabaseWorkedWith,DevType,EdLevel,Employment,Ethnicity,Gender,JobFactors,JobSat,JobSeek,LanguageDesireNextYear,LanguageWorkedWith,MiscTechDesireNextYear,MiscTechWorkedWith,NEWCollabToolsDesireNextYear,NEWCollabToolsWorkedWith,NEWDevOps,NEWDevOpsImpt,NEWEdImpt,NEWJobHunt,NEWJobHuntResearch,NEWLearn,NEWOffTopic,NEWOnboardGood,NEWOtherComms,NEWOvertime,NEWPurchaseResearch,NEWPurpleLink,NEWSOSites,NEWStuck,OpSys,OrgSize,PlatformDesireNextYear,PlatformWorkedWith,PurchaseWhat,Sexuality,SOAccount,SOComm,SOPartFreq,SOVisitFreq,SurveyEase,SurveyLength,Trans,UndergradMajor,WebframeDesireNextYear,WebframeWorkedWith,WelcomeChange,WorkWeekHrs,YearsCode,YearsCodePro
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1
1,I am a developer by profession,Yes,,13,Monthly,,,Germany,European Euro,EUR,Microsoft SQL Server,Elasticsearch;Microsoft SQL Server;Oracle,"Developer, desktop or enterprise applications;...","Master’s degree (M.A., M.S., M.Eng., MBA, etc.)","Independent contractor, freelancer, or self-em...",White or of European descent,Man,"Languages, frameworks, and other technologies ...",Slightly satisfied,I am not interested in new job opportunities,C#;HTML/CSS;JavaScript,C#;HTML/CSS;JavaScript,.NET Core;Xamarin,.NET;.NET Core,Microsoft Teams;Microsoft Azure;Trello,Confluence;Jira;Slack;Microsoft Azure;Trello,No,Somewhat important,Fairly important,,,Once a year,Not sure,,No,Often: 1-2 days per week or more,Start a free trial;Ask developers I know/work ...,Amused,Stack Overflow (public Q&A for anyone who codes),Visit Stack Overflow;Go for a walk or other ph...,Windows,2 to 9 employees,Android;iOS;Kubernetes;Microsoft Azure;Windows,Windows,,Straight / Heterosexual,No,"No, not at all",,Multiple times per day,Neither easy nor difficult,Appropriate in length,No,"Computer science, computer engineering, or sof...",ASP.NET Core,ASP.NET;ASP.NET Core,Just as welcome now as I felt last year,50.0,36,27
2,I am a developer by profession,No,,19,,,,United Kingdom,Pound sterling,GBP,,,"Developer, full-stack;Developer, mobile","Bachelor’s degree (B.A., B.S., B.Eng., etc.)",Employed full-time,,,,Very dissatisfied,I am not interested in new job opportunities,Python;Swift,JavaScript;Swift,React Native;TensorFlow;Unity 3D,React Native,Github;Slack,Confluence;Jira;Github;Gitlab;Slack,,,Fairly important,,,Once a year,Not sure,,No,,,Amused,Stack Overflow (public Q&A for anyone who code...,Visit Stack Overflow;Go for a walk or other ph...,MacOS,"1,000 to 4,999 employees",iOS;Kubernetes;Linux;MacOS,iOS,I have little or no influence,,Yes,"Yes, definitely",Less than once per month or monthly,Multiple times per day,,,,"Computer science, computer engineering, or sof...",,,Somewhat more welcome now than last year,,7,4


In [63]:
df.index # as we suspected, they are ints, cool

Int64Index([    1,     2,     3,     4,     5,     6,     7,     8,     9,
               10,
            ...
            64104, 64236, 64330, 64480, 64567, 64858, 64867, 64898, 64925,
            65112],
           dtype='int64', name='Respondent', length=64461)

In [106]:
df.iloc[1, 0]

'I am a developer by profession'

In [111]:
schema.set_index('Column', inplace=True)

In [114]:
schema.head(2)

Unnamed: 0_level_0,QuestionText
Column,Unnamed: 1_level_1
Respondent,Randomized respondent ID number (not in order ...
MainBranch,Which of the following options best describes ...


In [117]:
# once you've managed to set custom indexes, you may sort them for example:
schema.sort_index().head(4) # use .sort_index(ascending=False) for reverse order :)

Unnamed: 0_level_0,QuestionText
Column,Unnamed: 1_level_1
Age,What is your age (in years)? If you prefer not...
Age1stCode,At what age did you write your first line of c...
CompFreq,"Is that compensation weekly, monthly, or yearly?"
CompTotal,What is your current total compensation (salar...


In [118]:
# AND REMEMBER, if you want this changes PERMANENT, just add arg: inplace=True.
# so schema.sort_index(inplace=True [, ascending=..., args...]) would be used for PERMA change the DataFrame

---
## Filtering - Using Conditionals to Filter Rows and Columns

In [9]:
import pandas as pd
ppl = {
    'name': ['John', 'Maxim', 'Julian'],
    'last': ['Doe', 'Doe', 'Cesar'],
    'email': ['jnDoe@ff.gg', 'maxim@aa.aa', 'rome4lyfe@gov.it']
}
df2 = pd.DataFrame(ppl)
df2

Unnamed: 0,name,last,email
0,John,Doe,jnDoe@ff.gg
1,Maxim,Doe,maxim@aa.aa
2,Julian,Cesar,rome4lyfe@gov.it


In [10]:
df2['last'] == 'Doe' # we can filter like that, it returns BOOLEAN, we can use that boolean into loc or inside df2,
# look below:

0     True
1     True
2    False
Name: last, dtype: bool

In [11]:
df2[df2['last'] == 'Doe'] # 1) way; so this or:
fltr = df2['last'] == 'Doe'
df2[fltr] # 2) way; or this:
df2.loc[fltr] # 3) way; or just this:
df2.loc[df2['last'] == 'Doe'] # 4) way. You can put BOOLEANS into .loc - this gives us some advantage because we can
# still GRAB A COLUMN in this notation :)

Unnamed: 0,name,last,email
0,John,Doe,jnDoe@ff.gg
1,Maxim,Doe,maxim@aa.aa


In [13]:
# for example I want email column I would do:
df2.loc[df2['last'] == 'Doe', ['email']] 

Unnamed: 0,email
0,jnDoe@ff.gg
1,maxim@aa.aa


##### AND and OR operators - we cannot use python built in for filters :(

In [21]:
# in filters we use & for AND, also | for OR:
flt = (df2['last'] == 'Doe') & (df2['name'] == 'John') # notice parenthesis, if you would skip it then program throws error
df2.loc[flt, ['email']]
# also you may use NEGATION with tilda ~ at the begning of filter inside df2.loc: df2.loc[~flt, 'email']

Unnamed: 0,email
0,jnDoe@ff.gg


In [2]:
import pandas as pd

df = pd.read_csv('survey.csv', index_col='Respondent') # these args are like setting index, but this way is faster
schema = pd.read_csv('schema.csv', index_col="Column")
df.head(2)

Unnamed: 0_level_0,MainBranch,Hobbyist,Age,Age1stCode,CompFreq,CompTotal,ConvertedComp,Country,CurrencyDesc,CurrencySymbol,...,SurveyEase,SurveyLength,Trans,UndergradMajor,WebframeDesireNextYear,WebframeWorkedWith,WelcomeChange,WorkWeekHrs,YearsCode,YearsCodePro
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,I am a developer by profession,Yes,,13,Monthly,,,Germany,European Euro,EUR,...,Neither easy nor difficult,Appropriate in length,No,"Computer science, computer engineering, or sof...",ASP.NET Core,ASP.NET;ASP.NET Core,Just as welcome now as I felt last year,50.0,36,27
2,I am a developer by profession,No,,19,,,,United Kingdom,Pound sterling,GBP,...,,,,"Computer science, computer engineering, or sof...",,,Somewhat more welcome now than last year,,7,4


In [4]:
salary = (df['ConvertedComp'] >= 70000)
df.loc[salary, ['Country', 'LanguageWorkedWith', 'ConvertedComp']]

Unnamed: 0_level_0,Country,LanguageWorkedWith,ConvertedComp
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
8,United States,Python;SQL,116000.0
16,United Kingdom,Bash/Shell/PowerShell;HTML/CSS;Java;JavaScript...,108576.0
17,United States,C#;HTML/CSS;JavaScript;Python;SQL;VBA,79000.0
18,United States,Bash/Shell/PowerShell;HTML/CSS;Perl,1260000.0
19,United States,Bash/Shell/PowerShell;C#;HTML/CSS;JavaScript;S...,83400.0
...,...,...,...
65589,United States,,150000.0
65596,United States,JavaScript;Python;TypeScript,70000.0
65602,United States,,140000.0
65604,United States,,150000.0


In [9]:
# neat trick with filters:
countries = ['United States', 'Canada', 'United Kingdom', 'Germany', 'India']
flt2 = df['Country'].isin(countries) # flt2 = (df['Country'].isin(['Poland'])) & (df['ConvertedComp'] > 50000)
df.loc[flt2, ['Country', 'LanguageWorkedWith', 'ConvertedComp']]

Unnamed: 0_level_0,Country,LanguageWorkedWith,ConvertedComp
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1505,Poland,Bash/Shell/PowerShell;Java;JavaScript;SQL,212625.0
1778,Poland,Bash/Shell/PowerShell;C++;Python;Rust,50124.0
1837,Poland,C;C#;HTML/CSS;JavaScript;PHP;SQL,69864.0
2379,Poland,HTML/CSS;JavaScript;TypeScript,110000.0
2445,Poland,Bash/Shell/PowerShell;C#;C++;HTML/CSS;Java;Jav...,58320.0
...,...,...,...
61126,Poland,Haskell;Java,63281.0
61353,Poland,Bash/Shell/PowerShell;HTML/CSS;Java,150000.0
61385,Poland,Bash/Shell/PowerShell;JavaScript,53940.0
61612,Poland,HTML/CSS;JavaScript;PHP;SQL,66828.0


In [10]:
flt3 = df['LanguageWorkedWith'].str.contains('Python', na=False)
df.loc[flt3, ['Country', 'LanguageWorkedWith', 'ConvertedComp']]

Unnamed: 0_level_0,Country,LanguageWorkedWith,ConvertedComp
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
3,Russian Federation,Objective-C;Python;Swift,
8,United States,Python;SQL,116000.0
10,United Kingdom,HTML/CSS;Java;JavaScript;Python;SQL,32315.0
13,Netherlands,C;JavaScript;Python,38916.0
15,France,Bash/Shell/PowerShell;C;HTML/CSS;Java;Python;SQL,
...,...,...,...
61561,France,Bash/Shell/PowerShell;HTML/CSS;JavaScript;Perl...,
62391,Morocco,C++;HTML/CSS;JavaScript;Python;Ruby;TypeScript,
63077,United States,C++;HTML/CSS;Java;JavaScript;Python;SQL,
63640,Australia,Bash/Shell/PowerShell;C;C#;C++;HTML/CSS;Java;J...,


---
## Updating Rows and Columns - Modifying Data Within DataFrames
##### It's trivial. Once you have a `DataFrame` assingned to, say, `adf` var, then just use method `.columns = [COL_NAME, COL2_NAME, ...]`:

In [1]:
import pandas as pd

animals = {
    'name': ['Moouse', 'Dogo', 'Mouse'],
    'age': [1, 2, 3],
    'salary': [60000, 30000, 12000],
    'country' : ['CA', 'USA', 'UK'],
    'is_newbe' : [True, False, True]
}
adf = pd.DataFrame(animals)
adf.columns = ['animal_name', 'animal_age', 'animal_salary', 'a_country', 'a_is_newbe'] # HERE WE'R CHANGING COL NAMES
adf.columns # proof

Index(['animal_name', 'animal_age', 'animal_salary', 'a_country',
       'a_is_newbe'],
      dtype='object')

In [2]:
# maybe your col names are small and you want capital letters? :
for i in adf.columns:
    print(i.upper())

ANIMAL_NAME
ANIMAL_AGE
ANIMAL_SALARY
A_COUNTRY
A_IS_NEWBE


In [3]:
# fk it change them permanently to capital letters:
# ['name', 'age', 'salary', 'country', 'is_newbe'] # just want old names but they will be transferred to CAPITALS
adf.columns = [i.upper() for i in adf.columns] # list comprehension instead of full for loop...
adf.columns 

Index(['NAME', 'AGE', 'SALARY', 'COUNTRY', 'IS_NEWBE'], dtype='object')

---
### SOME FILTERS REMINDER HERE (and back to topic below):

In [4]:
# my_flt = (adf['SALARY'] < 20000)                                     # 1) filter
# my_flt = (adf['SALARY'] < 20000) & (adf['NAME'].str.contains('use')) # 2) filter: .str.contains(ARG)
# countries = ['UK', 'CA']                                             # 3) filter: .isin(ARG)
# my_flt = adf['COUNTRY'].isin(countries)                              # still 3) filter: .isin(ARG)
# my_flt = adf['IS_NEWBE'].value_counts()                              # 4) filter: .value_counts()
# my_flt # returns True x2 and False x1                                # still 4) filter: .value_counts()
# adf.columns = adf.columns.str.replace(' ', '_')                      # 5) .str.replace() replace in COL spaces to underscore _
# adf.rename(columns={'NAME' : 'first_name', ...}, inplace=True)       # 6) DataFragment.rename(columns={'k':'k2'}) this one renames SPECIFIC COLUMNS, 
                                                                       # say NAME to first_name, ofc param inplace=True is optional - for perma change
my_flt = (adf['COUNTRY'].isin(['CA'])) & (adf['NAME'].str.contains('use'))
adf.loc[my_flt, ['NAME', 'SALARY', 'COUNTRY', 'IS_NEWBE']]

Unnamed: 0,NAME,SALARY,COUNTRY,IS_NEWBE
0,Moouse,60000,CA,True


---
### back to topic now:

In [5]:
# Lets take a look how to update a single value - lets grab a Dogo and change its name for Dog:
adf.loc[[1]]

Unnamed: 0,NAME,AGE,SALARY,COUNTRY,IS_NEWBE
1,Dogo,2,30000,USA,False


In [6]:
adf.loc[[1]] = ['Dog', 2, 30000, 'USA', False] # we modified a single row like this

In [7]:
adf

Unnamed: 0,NAME,AGE,SALARY,COUNTRY,IS_NEWBE
0,Moouse,1,60000,CA,True
1,Dog,2,30000,USA,False
2,Mouse,3,12000,UK,True


In [8]:
# say we have 61 columns, we will not be doing the way above, but this is for specific col:
adf.loc[[1], ['NAME']] = ['Ddoogg'] # you can ofc do for few specific cols, you got this
adf

Unnamed: 0,NAME,AGE,SALARY,COUNTRY,IS_NEWBE
0,Moouse,1,60000,CA,True
1,Ddoogg,2,30000,USA,False
2,Mouse,3,12000,UK,True


In [9]:
# say we want lower letters in columns. you may use .str.lower():
adf['COUNTRY'].str.lower() # see this is not permanent change so you can make it:
adf['COUNTRY'] = adf['COUNTRY'].str.lower() # this is perma change now
adf

Unnamed: 0,NAME,AGE,SALARY,COUNTRY,IS_NEWBE
0,Moouse,1,60000,ca,True
1,Ddoogg,2,30000,usa,False
2,Mouse,3,12000,uk,True


#### Time for methods `apply`, `map`, `applymap`, `replace`:
##### 1. `apply` - can work on either `DataFrame` or `Series` object. Behaviour are different on each object. First lets look how apply works for `Series`.

In [10]:
# btw - side note - I've changed col names for lower : adf.columns = [col.lower() for col in adf.columns]

# apply for SERIES - it will apply for EVERY VALUE. Say I wanna see length of names (function .str.len() would
# be used for that but for SAKE OF DEMONSTRATION WE GONNA USE OUR FUNCTION):
def show_length(name):
    return len(name)

adf['name'].apply(show_length) #NOTICE WE DONT EXECUTE IT WITH show_length(), we pass it without parenthesis!

# OR EXAMPLE NO. 2, we want upper names (we would use faster and easier rdy func .str.upper() but for this example we do our):
def make_upper(name):
    return name.upper()

adf['name'].apply(make_upper) # now lets make perma change:
adf['name'] = adf['name'].apply(make_upper)
adf # proof

Unnamed: 0,name,age,salary,country,is_newbe
0,MOOUSE,1,60000,ca,True
1,DDOOGG,2,30000,usa,False
2,MOUSE,3,12000,uk,True


#### `apply` for `DATA FRAMES` - it just returns values in every columns. For instance if we do apply:

In [11]:
adf.apply(len)

name        3
age         3
salary      3
country     3
is_newbe    3
dtype: int64

In [12]:
# we get this which means we have 3 row data value in Name, etc. This is same as we would do:
len(adf['name']) # for example

3

In [13]:
# and you can also use extra arg like axis="columns" so it will count in rows each col data:
adf.apply(len, axis='columns') # notice 5 data value (counts from left to right, while axis on rows counts from up to bottom)

0    5
1    5
2    5
dtype: int64

---
##### 2. `applymap` - can work only on `Series` object.

In [148]:
# my data frame has some int data, so adf.applymap(len) will not work, cos it works only on strings. but say we have only strings so
# it would return table with each count for row/col
# another good example is to make everywhere lower cases - lets assume our data has strings only - so we would do:
# adf.applymap(str.lower)

In [20]:
# you know what? fk it lets cast str on our salary to show how it works:
# adf['salary'] = adf['salary'].astype('string') ... and same appied for cols that were other types than string
adf.applymap(len) # now we can see this in action

Unnamed: 0,name,age,salary,country,is_newbe
0,6,1,5,2,4
1,6,1,5,3,5
2,5,1,5,2,4


In [23]:
adf

Unnamed: 0,name,age,salary,country,is_newbe
0,MOOUSE,1,60000,CA,True
1,DDOOGG,2,30000,USA,False
2,MOUSE,3,12000,UK,True


---
##### 3. `map` - can work only on `Series` object as well. For substituin each value in series for another value. Say we wanna substitude a couple of our names 

In [24]:
adf['name'].map({'DDOOGG' : 'Dog', 'MOOUSE': 'Mcause'}) # notice val that we DID NOT SUBSTITUDE are in NaN. 

0    Mcause
1       Dog
2       NaN
Name: name, dtype: object

##### but what if we want to substitude only speciifc values without other infos? just use
##### 4. `replace`

In [25]:
adf['name'].replace({'DDOOGG' : 'Dog', 'MOOUSE': 'Mcause'}) # you know the drill if you want perma change, just assing it to this col...

0    Mcause
1       Dog
2     MOUSE
Name: name, dtype: object

---
### Dummy Data for Sandbox begins:

<b>We will find here some drills like:
<ol>
    <li>Make DataFrame from dict</li>
    <li>Get emails from COL</li>
    <li>Make COL names SMALL LETTERS - 2 options: OPTION A and OPTION B</li>
    <li><b>Replace/Change</b> spaces to underscore in COL</li>
    <li><b>Rename</b> specific COL names</li>
    <li>Get data from COLUMNS and then from 2 COLS</li>
    <li><b>Create new COL</b></li>
    <li>Count how many Yes and No are in COL 'hobbyist'</li>
    <li><b>Delete/Drop</b> COL 'full_name'</li>
    <li>Rule of Thumb: ROW-COL first always</li>
    <li>Get data from ROW, then from ROW-COL (.iloc and .loc)</li>
    <li>Make INDEXES UNIQUE, LIKE MAKE COL EMAILS as INDEX!</li>
    <li>Sort by indexes</li>
    <li>Filtering</li>
</ol>

In [28]:
import pandas as pd

dummy_dat = {
    'FIRST NAME' : ['John', 'Thomas', 'Bjork', 'Zelenda'],
    'LAST NAME' : ['Doe', 'Troll', 'Bawler', 'DDoe'],
    'AGE' : [43, 32, 27, 17],
    'SALARY' : [90000, 82000, 50000, 12000],
    'EMAIL' : ['hailMary@gg.gg', 'locked@go.off', 'this@that.gg', 'stfu@ggs.com'],
    'HOBBYIST' : ['Yes', 'No', 'Yes', 'No']
}

# Create dummy data frame [ddf]:
ddf = pd.DataFrame(dummy_dat)
ddf

Unnamed: 0,FIRST NAME,LAST NAME,AGE,SALARY,EMAIL,HOBBYIST
0,John,Doe,43,90000,hailMary@gg.gg,Yes
1,Thomas,Troll,32,82000,locked@go.off,No
2,Bjork,Bawler,27,50000,this@that.gg,Yes
3,Zelenda,DDoe,17,12000,stfu@ggs.com,No


In [37]:
# Get emails:
emails = [i for i in ddf['EMAIL']]

In [38]:
# Make col names small letters OPTION A:
ddf.columns = [col.lower() for col in ddf.columns]
ddf

Unnamed: 0,first name,last name,age,salary,email,hobbyist
0,John,Doe,43,90000,hailMary@gg.gg,Yes
1,Thomas,Troll,32,82000,locked@go.off,No
2,Bjork,Bawler,27,50000,this@that.gg,Yes
3,Zelenda,DDoe,17,12000,stfu@ggs.com,No


In [42]:
# Make col names small letters OPTION B:
ddf.columns = ddf.columns.str.lower()
ddf

Unnamed: 0,first name,last name,age,salary,email,hobbyist
0,John,Doe,43,90000,hailMary@gg.gg,Yes
1,Thomas,Troll,32,82000,locked@go.off,No
2,Bjork,Bawler,27,50000,this@that.gg,Yes
3,Zelenda,DDoe,17,12000,stfu@ggs.com,No


In [44]:
# Replace/change spaces to underscore in columns:
ddf.columns = ddf.columns.str.replace(' ', '_')
ddf

Unnamed: 0,first_name,last_name,age,salary,email,hobbyist
0,John,Doe,43,90000,hailMary@gg.gg,Yes
1,Thomas,Troll,32,82000,locked@go.off,No
2,Bjork,Bawler,27,50000,this@that.gg,Yes
3,Zelenda,DDoe,17,12000,stfu@ggs.com,No


In [45]:
# Rename specific column names (first_name, last_name):
ddf.rename(columns={'first_name':'name', 'last_name': 'surname'}, inplace=True)
ddf

Unnamed: 0,name,surname,age,salary,email,hobbyist
0,John,Doe,43,90000,hailMary@gg.gg,Yes
1,Thomas,Troll,32,82000,locked@go.off,No
2,Bjork,Bawler,27,50000,this@that.gg,Yes
3,Zelenda,DDoe,17,12000,stfu@ggs.com,No


In [50]:
# Accessing data from column and then from 2 columns:
ddf[['salary']] # get data from one col
ddf[['name', 'email']] # get data from few cols

Unnamed: 0,name,email
0,John,hailMary@gg.gg
1,Thomas,locked@go.off
2,Bjork,this@that.gg
3,Zelenda,stfu@ggs.com


In [51]:
# Lets calulate for fun how much these ppl earned TOGETHER:
ddf['salary'].sum()

234000

In [52]:
# Lets create new COL 'full_name' with their first + last name:
ddf['full_name'] = ddf['name'] + ' ' + ddf['surname']
ddf

Unnamed: 0,name,surname,age,salary,email,hobbyist,full_name
0,John,Doe,43,90000,hailMary@gg.gg,Yes,John Doe
1,Thomas,Troll,32,82000,locked@go.off,No,Thomas Troll
2,Bjork,Bawler,27,50000,this@that.gg,Yes,Bjork Bawler
3,Zelenda,DDoe,17,12000,stfu@ggs.com,No,Zelenda DDoe


In [55]:
# Count how many Yes and No are in COL 'hobbyist:
ddf['hobbyist'].value_counts()

Yes    2
No     2
Name: hobbyist, dtype: int64

In [57]:
# Delete/Drop COL 'full_name':
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.drop.html#pandas.DataFrame.drop
ddf.drop(columns=['full_name'], inplace=True) # COL full_name is gone
ddf

Unnamed: 0,name,surname,age,salary,email,hobbyist
0,John,Doe,43,90000,hailMary@gg.gg,Yes
1,Thomas,Troll,32,82000,locked@go.off,No
2,Bjork,Bawler,27,50000,this@that.gg,Yes
3,Zelenda,DDoe,17,12000,stfu@ggs.com,No


In [58]:
# Rule of Thumb: ROW-COL first always - always we read data if we have 2-D ROWs-COLs;

In [65]:
# Get data from ROW, then from ROW-COL:
# .iloc[] - for indexed cordinates or
# .loc[] - for labeled cordinates
# Get data from ROW - .iloc[]:
ddf.iloc[[0]] # gets 1st row

Unnamed: 0,name,surname,age,salary,email,hobbyist
0,John,Doe,43,90000,hailMary@gg.gg,Yes


In [66]:
# Get data from ROW - .loc[]:
ddf.loc[[0]] # because indexes are from 0 to n. We will make indexes as string below, but lets show how to get data from ROW-COL

Unnamed: 0,name,surname,age,salary,email,hobbyist
0,John,Doe,43,90000,hailMary@gg.gg,Yes


In [69]:
# Get data from ROW-COL - .iloc[]:
ddf.iloc[[0], [0]]

Unnamed: 0,name
0,John


In [70]:
# Get data from ROW-COL - .loc[]:
ddf.loc[[0], ['name']]

Unnamed: 0,name
0,John


In [77]:
# Get data from ROW-COL - slices - !NOTICE NO LIST and INCLUSIVE SLICES!:
ddf.loc[0:2, 'name':'salary']

Unnamed: 0,name,surname,age,salary
0,John,Doe,43,90000
1,Thomas,Troll,32,82000
2,Bjork,Bawler,27,50000


In [78]:
# Make INDEXES UNIQUE, LIKE MAKE COL EMAILS as INDEX!:
# ddf.index # returns RangeIndex(start=0, stop=4, step=1)
ddf.set_index('email', inplace=True) # make email as index and perma change DataFrame
ddf.index # proof

Index(['hailMary@gg.gg', 'locked@go.off', 'this@that.gg', 'stfu@ggs.com'], dtype='object', name='email')

In [79]:
ddf # proof 2

Unnamed: 0_level_0,name,surname,age,salary,hobbyist
email,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
hailMary@gg.gg,John,Doe,43,90000,Yes
locked@go.off,Thomas,Troll,32,82000,No
this@that.gg,Bjork,Bawler,27,50000,Yes
stfu@ggs.com,Zelenda,DDoe,17,12000,No


In [80]:
# You can also do a shortcut and set idnexes during object to DataFrame, just add arg: index_col='NAME_OF_UNIQUE_DATA_COL_THAT_WILL_BECOME_AN_INDEX'
# so for example: ddf = pd.DataFrame(SOME_DICT, index_col=...) # you got the point

In [82]:
# SORTING INDEXES - .sort_index():
ddf.sort_index()

Unnamed: 0_level_0,name,surname,age,salary,hobbyist
email,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
hailMary@gg.gg,John,Doe,43,90000,Yes
locked@go.off,Thomas,Troll,32,82000,No
stfu@ggs.com,Zelenda,DDoe,17,12000,No
this@that.gg,Bjork,Bawler,27,50000,Yes


In [88]:
# Filtering:
fltr = (ddf['surname'].str.contains('Doe')) & (ddf['age'] > 33)
ddf.loc[fltr]
# ddf.loc[fltr, ['surname']]

Unnamed: 0_level_0,name,surname,age,salary,hobbyist
email,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
hailMary@gg.gg,John,Doe,43,90000,Yes


# END OF SANDBOX
---

### How to split data in COL to other new COLs:

In [95]:
# WE HAD SOME DUMMY DATA:
rdf['full_name'] = rdf['colors'] + ' ' + rdf['adj'] # WE CREATED SOME FULL_NAME COL
rdf.drop(columns={'colors', 'adj'}, inplace=True)   # WE DELETED COLS WHICH FULL_NAME CONSISTED OF
rdf[['name', 'desc']] = rdf['full_name'].str.split(expand=True) # <-- HERE IS A KEY: WE ADDED 2 NEW COL WITH DATA FROM SPLITED COL DATA FULL_NAME
# NOTICE /\ HERE LIST IN LIST notation. I tried at the begining to do with like rdf['name'], rdf['desc'] and it didnt work

Unnamed: 0_level_0,rare,full_name,name,desc
count,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
222,True,yellow stones,yellow,stones
37,False,blue sky,blue,sky
897,False,red blood,red,blood
22,True,green bones,green,bones


### ADD ROWS of Data:

In [98]:
# currently we have this data:
rdf

Unnamed: 0_level_0,rare,full_name,name,desc
count,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
222,True,yellow stones,yellow,stones
37,False,blue sky,blue,sky
897,False,red blood,red,blood
22,True,green bones,green,bones


In [106]:
# lets add a ROW - just append mehtod but...:
#rdf.append({'stones': 'dimaond'}) # this will show error, because we did not provided INDEX, we can skip providing INDEX by adding arg:
# ignore_index=True:
rdf.append({'desc':'diamonds'}, ignore_index=True)

Unnamed: 0,rare,full_name,name,desc
0,1.0,yellow stones,yellow,stones
1,0.0,blue sky,blue,sky
2,0.0,red blood,red,blood
3,1.0,green bones,green,bones
4,,,,diamonds


In [110]:
# you may also add a dataFrame to ROWs. Say we have this dict:
smth = {
    'name':['zuchini', 'crytal'],
    'desc':['lon', 'castle'],
    'count':[15, 22]
}
rdf222 = pd.DataFrame(smth)
rdf222 # NOTICE: that this new DataFrame has different order COLS and diff indexes, but we would like to add it to our rdf DF.

Unnamed: 0,name,desc,count
0,zuchini,lon,15
1,crytal,castle,22


In [113]:
rdf.append(rdf222, ignore_index=True, sort=False) # LISTEN if we dont have inplace=True arg available, like in this case, we just assign to
# the same DataFrame (like a var)

Unnamed: 0,rare,full_name,name,desc,count
0,True,yellow stones,yellow,stones,
1,False,blue sky,blue,sky,
2,False,red blood,red,blood,
3,True,green bones,green,bones,
4,,,zuchini,lon,15.0
5,,,crytal,castle,22.0


In [115]:
# so perma change:
rdf = rdf.append(rdf222, ignore_index=True, sort=False)
rdf

Unnamed: 0,rare,full_name,name,desc,count
0,True,yellow stones,yellow,stones,
1,False,blue sky,blue,sky,
2,False,red blood,red,blood,
3,True,green bones,green,bones,
4,,,zuchini,lon,15.0
5,,,crytal,castle,22.0


In [117]:
# DROPPING ROWs (INDEXES in fact):
# well same drill as droppin a COL but this time keyword is index:
rdf.drop(index=5, inplace=True) # crystal castle is gone
rdf # proof

Unnamed: 0,rare,full_name,name,desc,count
0,True,yellow stones,yellow,stones,
1,False,blue sky,blue,sky,
2,False,red blood,red,blood,
3,True,green bones,green,bones,
4,,,zuchini,lon,15.0


In [122]:
# say we wanna remove few ROWS with the same name like 'zuchini'. Lets first add rl quick same name row:
rdf = rdf.append({'name':'zuchini', 'desc':'DELETE US!'}, ignore_index=True)

In [123]:
# once we have 2x zuchini lets delete them with our flt combo:

Unnamed: 0,rare,full_name,name,desc,count
0,True,yellow stones,yellow,stones,
1,False,blue sky,blue,sky,
2,False,red blood,red,blood,
3,True,green bones,green,bones,
4,,,zuchini,lon,15.0
5,,,zuchini,DELETE US!,


In [125]:
flt = rdf['name'] == 'zuchini'
rdf.drop(index=rdf[flt].index) # NOTICE .index IT IS VERY IMPORTANT, NOTICE also index= as a keyword and arg. take a look at data:

Unnamed: 0,rare,full_name,name,desc,count
0,True,yellow stones,yellow,stones,
1,False,blue sky,blue,sky,
2,False,red blood,red,blood,
3,True,green bones,green,bones,


---
## Sorting Data