In [1]:
import pandas as pd

In [2]:
bond = pd.read_csv("jamesbond.csv")
bond.head(3)

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


# The .set_index() and .reset_index() Methods

In [3]:
bond.set_index("Film", inplace = True)
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [4]:
bond.reset_index(drop=False, inplace = True)
bond.head(3)

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [5]:
bond.set_index("Film", inplace=True)
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [6]:
#bond.set_index("Year") #it wipes out Film column!
## we need to first convert Film column back to column and after that we can set a Year as Index
bond.reset_index(inplace = True)
bond.set_index("Year", inplace = True)  #Recommendation - do not method chain here!
bond.head(3)                            #keep the code clean

Unnamed: 0_level_0,Film,Actor,Director,Box Office,Budget,Bond Actor Salary
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1962,Dr. No,Sean Connery,Terence Young,448.8,7.0,0.6
1963,From Russia with Love,Sean Connery,Terence Young,543.8,12.6,1.6
1964,Goldfinger,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [7]:
#5.3

# Retrieve Rows by Index Label with .loc()

In [8]:
bond = pd.read_csv("jamesbond.csv", index_col = "Film") #tweak 1: use ndex_col
bond.sort_index() #tweak 2: use sort_index
                  #when index is sorted, pandas works faster
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [9]:
#NOTE: .loc method uses bracket parenthesis
bond.loc["Goldfinger"]
bond.loc["GoldenEye"]

Year                            1995
Actor                 Pierce Brosnan
Director             Martin Campbell
Box Office                     518.5
Budget                          76.9
Bond Actor Salary                5.1
Name: GoldenEye, dtype: object

In [10]:
bond.loc["Sacred Bond"] #if item does not exist --> .loc method returns error

KeyError: 'the label [Sacred Bond] is not in the [index]'

In [None]:
bond.loc["Casino Royale"]  #if more than 1 item exist --> .loc returns all that match

In [None]:
bond.loc["Diamonds Are Forever" : "Moonraker"]  #extract values in a row
                                                #last value is inclusive

In [None]:
bond.loc["GoldenEye" :]  #extract every movie after GoldenEye

In [None]:
bond.loc[["Spectre", "Moonraker", "Octopussy"]]  #extract multiple non-sequencial values

In [None]:
bond.loc[["For Your Eyes Only", "Live and Let Die", "Gold Bond"]]  #be careful when extracting multiple non-sequencial values
                                                # "Gold Bond" does not exist -->we get NaN values

In [None]:
"Gold Bond" in bond.index # False means value does not exist

# Retrieve Row(s) by Index Position with .iloc()

In [None]:
bond = pd.read_csv("jamesbond.csv")
bond.head(3)

In [None]:
bond.loc[15]  #here both methods produce same result
bond.iloc[15]

bond.iloc[[15,20]]
bond.iloc[:4]  #4 is not included
bond.iloc[4:8]
bond.iloc[20:]

In [None]:
bond = pd.read_csv("jamesbond.csv", index_col="Film")
bond.sort_index()  #each of these items receives index position (that is currently not seen)
                   #we can still use index_position operations
                   #it does NOT matter which column we use as index
bond.head(3)

In [None]:
bond.iloc[0]

In [None]:
bond.iloc[:8]  #8 is excluded from list

# The Catch-All .ix[] Method

In [None]:
bond = pd.read_csv("jamesbond.csv", index_col="Film")
bond.sort_index() 
bond.head(3)

In [12]:
bond.ix["GoldenEye"] #.ix is deprecated. Please use
                     #.loc for label based indexing or
                     #.iloc for positional indexing
bond.ix["A View to a Kill" : "The World Is Not Enough"] #"The World Is Not Enough" IS included

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  """Entry point for launching an IPython kernel.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  after removing the cwd from sys.path.


Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
The Living Daylights,1987,Timothy Dalton,John Glen,313.5,68.8,5.2
Licence to Kill,1989,Timothy Dalton,John Glen,250.9,56.7,7.9
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Tomorrow Never Dies,1997,Pierce Brosnan,Roger Spottiswoode,463.2,133.9,10.0
The World Is Not Enough,1999,Pierce Brosnan,Michael Apted,439.5,158.3,13.5


In [None]:
# bond.ix["Sacred Bond"]  #produces error
#bond.ix["Spectre", "Sacred Bond"] #produces error
"Spectre" in bond.index  #NOTE: way to check if item exists
"Sacred Bond" in bond.index

In [16]:
bond.ix[10]
bond.ix[5:10]  #10 IS NOT included
bond.ix[[8,16,24]]
#bond.ix[100]  #produces error, 101th item does not exist

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  """Entry point for launching an IPython kernel.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Live and Let Die,1973,Roger Moore,Guy Hamilton,460.3,30.8,
The Living Daylights,1987,Timothy Dalton,John Glen,313.5,68.8,5.2
Skyfall,2012,Daniel Craig,Sam Mendes,943.5,170.2,14.5


# Second Arguments to .loc[], .iloc[], and .ix[] Methods

In [18]:
bond = pd.read_csv("jamesbond.csv", index_col = "Film") 
bond.sort_index() 
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [34]:
#simple extractions
bond.loc["Moonraker","Actor"]
bond.loc["Moonraker","Director":"Budget"]
bond.loc["Moonraker",["Actor","Budget","Year"]]

bond.iloc[14]  #pulls out all info for row 14
bond.iloc[14,2]  #pulls out info for row 14 and column 2
bond.iloc[14,2 : 5]  # extract sequential values
bond.iloc[14,[5, 3, 2]]  # extract non-sequential values

bond.ix[20]
bond.ix[20, "Budget"]  #mix and match - strings and number
bond.ix["The Man with the Golden Gun", 2]
bond.ix["The Man with the Golden Gun", :4]
bond.ix[5,3]

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  # This is added back by InteractiveShellApp.init_path()
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  if sys.path[0] == '':
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  del sys.path[0]
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for posi

514.2

# Set New Values for a Specific Cell or Row

In [35]:
bond = pd.read_csv("jamesbond.csv", index_col = "Film") 
bond.sort_index() 
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [36]:
bond.ix["Dr. No"]

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  """Entry point for launching an IPython kernel.


Year                          1962
Actor                 Sean Connery
Director             Terence Young
Box Office                   448.8
Budget                           7
Bond Actor Salary              0.6
Name: Dr. No, dtype: object

In [39]:
# assign new value into column
bond.ix["Dr. No","Actor"]

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  


'Sir Sean Connery'

In [38]:
bond.ix["Dr. No","Actor"] = "Sir Sean Connery"  #replacing with new value

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  """Entry point for launching an IPython kernel.


In [40]:
bond.ix["Dr. No"]

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  """Entry point for launching an IPython kernel.


Year                             1962
Actor                Sir Sean Connery
Director                Terence Young
Box Office                      448.8
Budget                              7
Bond Actor Salary                 0.6
Name: Dr. No, dtype: object

In [43]:
bond.ix["Dr. No", ["Box Office", "Budget", "Bond Actor Salary"]]  

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  """Entry point for launching an IPython kernel.


Box Office           4.488e+08
Budget                   7e+06
Bond Actor Salary       600000
Name: Dr. No, dtype: object

In [42]:
bond.ix["Dr. No", ["Box Office", "Budget", "Bond Actor Salary"]] =[448800000, 7000000,600000]  #replacing with PY list  multiple new values

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  """Entry point for launching an IPython kernel.


In [44]:
bond.ix["Dr. No", "Box Office"]

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  """Entry point for launching an IPython kernel.


448800000.0

# Set Multiple Values in DataFrame

In [45]:
bond = pd.read_csv("jamesbond.csv", index_col = "Film") 
bond.sort_index() 
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [51]:
#why is this way incorrect?
#we get error --> A value is trying to be set on a copy of a slice from a DataFrame.

mask = bond["Actor"] == "Sean Connery"  #we get NEW DataFrame
bond[mask]["Actor"] = "Sir Sean Connery"  #we do not work with original bond dataframe!


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


In [55]:
#example on df2 of explanation on df2
df2 =  bond[mask]
df2["Actor"] = "Sir Sean Connery"
df2

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dr. No,1962,Sir Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sir Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sir Sean Connery,Guy Hamilton,820.4,18.6,3.2
Thunderball,1965,Sir Sean Connery,Terence Young,848.1,41.9,4.7
You Only Live Twice,1967,Sir Sean Connery,Lewis Gilbert,514.2,59.9,4.4
Diamonds Are Forever,1971,Sir Sean Connery,Guy Hamilton,442.5,34.7,5.8
Never Say Never Again,1983,Sir Sean Connery,Irvin Kershner,380.0,86.0,


In [56]:
bond

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
You Only Live Twice,1967,Sean Connery,Lewis Gilbert,514.2,59.9,4.4
On Her Majesty's Secret Service,1969,George Lazenby,Peter R. Hunt,291.5,37.3,0.6
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Live and Let Die,1973,Roger Moore,Guy Hamilton,460.3,30.8,
The Man with the Golden Gun,1974,Roger Moore,Guy Hamilton,334.0,27.7,


In [57]:
#how to change data in original DataFrame

In [60]:
bond.ix[mask]  #this is NOT COPY
               #these are original rows in our DataFrame
bond.ix[mask,"Actor"]
bond.ix[mask,"Actor"] = "Sir Sean Connery"

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  """Entry point for launching an IPython kernel.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  This is separate from the ipykernel package so we can avoid doing imports until
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  after removing the cwd from sys.path.


In [61]:
bond

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dr. No,1962,Sir Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sir Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sir Sean Connery,Guy Hamilton,820.4,18.6,3.2
Thunderball,1965,Sir Sean Connery,Terence Young,848.1,41.9,4.7
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
You Only Live Twice,1967,Sir Sean Connery,Lewis Gilbert,514.2,59.9,4.4
On Her Majesty's Secret Service,1969,George Lazenby,Peter R. Hunt,291.5,37.3,0.6
Diamonds Are Forever,1971,Sir Sean Connery,Guy Hamilton,442.5,34.7,5.8
Live and Let Die,1973,Roger Moore,Guy Hamilton,460.3,30.8,
The Man with the Golden Gun,1974,Roger Moore,Guy Hamilton,334.0,27.7,


# Rename Index Labels or Columns in a DataFrame

In [81]:
bond = pd.read_csv("jamesbond.csv", index_col = "Film") 
bond.sort_index() 
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [63]:
bond.rename(columns = {"Year" : "Release Date", "Box Office" : "Revenue"},inplace=True)  #{}  uses python dictionary key:value pair
                                                                       #to modify original DataFrame use inplace=True

In [65]:
bond.head(1)

Unnamed: 0_level_0,Release Date,Actor,Director,Revenue,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6


In [67]:
#way 1: here we can pick and choose which title to change
bond.rename(index = { "Dr. No" : "Dr No",
                      "GoldenEye" : "Golden Eye",
                       "The World Is Not Enough" : "Best Bond Movie Ever"},inplace=True)

In [68]:
bond.ix["Best Bond Movie Ever"]

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  """Entry point for launching an IPython kernel.


Release Date                   1999
Actor                Pierce Brosnan
Director              Michael Apted
Revenue                       439.5
Budget                        158.3
Bond Actor Salary              13.5
Name: Best Bond Movie Ever, dtype: object

In [69]:
#way 2: here we set a list and change all at once
bond.columns = ["Year of Release","Actor","Director","Gross","Cost","Salary"]

In [70]:
bond.head(3)

Unnamed: 0_level_0,Year of Release,Actor,Director,Gross,Cost,Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dr No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


# Delete Rows or Columns from a DataFrame

In [85]:
bond = pd.read_csv("jamesbond.csv", index_col = "Film") 
bond.sort_index(inplace = True) 
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [74]:
bond.drop("A View to a Kill") #axis=0 at deafult removes the rows
     #if the index column is string, we insert string value
     #if the index column is number, we insert numbert value
#bond.drop(["A View to a Kill", "Casino Royale"],inplace=True)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Licence to Kill,1989,Timothy Dalton,John Glen,250.9,56.7,7.9


In [77]:
#to remove columns, change axis = "columns" OR axis = 1
bond.drop("Box Office", axis = "columns")
bond.drop(labels = ["Box Office", "Bond Actor Salary", "Actor"], axis = "columns") #inplace = True

Unnamed: 0_level_0,Year,Director,Budget
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A View to a Kill,1985,John Glen,54.5
Casino Royale,2006,Martin Campbell,145.3
Casino Royale,1967,Ken Hughes,85.0
Diamonds Are Forever,1971,Guy Hamilton,34.7
Die Another Day,2002,Lee Tamahori,154.2
Dr. No,1962,Terence Young,7.0
For Your Eyes Only,1981,John Glen,60.2
From Russia with Love,1963,Terence Young,12.6
GoldenEye,1995,Martin Campbell,76.9
Goldfinger,1964,Guy Hamilton,18.6


In [82]:
#bond.pop("Actor")  #removes Series from DataFrame 
actor = bond.pop("Actor")  #and also returns it and can store inside variable 
                           #think as cut and paste data from one df into another

In [83]:
actor 

Film
Dr. No                               Sean Connery
From Russia with Love                Sean Connery
Goldfinger                           Sean Connery
Thunderball                          Sean Connery
Casino Royale                         David Niven
You Only Live Twice                  Sean Connery
On Her Majesty's Secret Service    George Lazenby
Diamonds Are Forever                 Sean Connery
Live and Let Die                      Roger Moore
The Man with the Golden Gun           Roger Moore
The Spy Who Loved Me                  Roger Moore
Moonraker                             Roger Moore
For Your Eyes Only                    Roger Moore
Never Say Never Again                Sean Connery
Octopussy                             Roger Moore
A View to a Kill                      Roger Moore
The Living Daylights               Timothy Dalton
Licence to Kill                    Timothy Dalton
GoldenEye                          Pierce Brosnan
Tomorrow Never Dies                Pierce Bro

In [88]:
del bond["Director"]  # deletes columns from DataFrame

In [89]:
bond

Unnamed: 0_level_0,Year,Actor,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
A View to a Kill,1985,Roger Moore,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,581.5,145.3,3.3
Casino Royale,1967,David Niven,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,465.4,154.2,17.9
Dr. No,1962,Sean Connery,448.8,7.0,0.6
For Your Eyes Only,1981,Roger Moore,449.4,60.2,
From Russia with Love,1963,Sean Connery,543.8,12.6,1.6
GoldenEye,1995,Pierce Brosnan,518.5,76.9,5.1
Goldfinger,1964,Sean Connery,820.4,18.6,3.2


# Create Random Sample

In [91]:
bond = pd.read_csv("jamesbond.csv", index_col = "Film") 
bond.sort_index(inplace = True) 
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [96]:
bond.sample()  #extract one row at random
bond.sample(n = 5)  #we make a NEW df from original DataFrame
bond.sample(frac  = .25)  #frac is short for fractal -pulls percentage of original DataFrame

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Never Say Never Again,1983,Sean Connery,Irvin Kershner,380.0,86.0,
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
The World Is Not Enough,1999,Pierce Brosnan,Michael Apted,439.5,158.3,13.5
Quantum of Solace,2008,Daniel Craig,Marc Forster,514.2,181.4,8.1
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Tomorrow Never Dies,1997,Pierce Brosnan,Roger Spottiswoode,463.2,133.9,10.0


In [99]:
bond.sample(n = 3)  #axis = None means axis = 0 OR axis = "rows"

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
The Man with the Golden Gun,1974,Roger Moore,Guy Hamilton,334.0,27.7,
You Only Live Twice,1967,Sean Connery,Lewis Gilbert,514.2,59.9,4.4
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6


In [100]:
bond.sample(n = 3, axis = 1)  # means axis = "columns"

Unnamed: 0_level_0,Bond Actor Salary,Actor,Year
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A View to a Kill,9.1,Roger Moore,1985
Casino Royale,3.3,Daniel Craig,2006
Casino Royale,,David Niven,1967
Diamonds Are Forever,5.8,Sean Connery,1971
Die Another Day,17.9,Pierce Brosnan,2002
Dr. No,0.6,Sean Connery,1962
For Your Eyes Only,,Roger Moore,1981
From Russia with Love,1.6,Sean Connery,1963
GoldenEye,5.1,Pierce Brosnan,1995
Goldfinger,3.2,Sean Connery,1964


# The .nsmallest() and .nlargest() Methods

In [102]:
bond = pd.read_csv("jamesbond.csv", index_col = "Film") 
bond.sort_index(inplace = True) 
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [104]:
bond.sort_values("Box Office", ascending = False).head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Skyfall,2012,Daniel Craig,Sam Mendes,943.5,170.2,14.5
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [106]:
bond.nlargest(3, columns = "Box Office")  #number of largest

bond.nsmallest (n = 3, columns = "Box Office")  #number of smallest

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Licence to Kill,1989,Timothy Dalton,John Glen,250.9,56.7,7.9
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
On Her Majesty's Secret Service,1969,George Lazenby,Peter R. Hunt,291.5,37.3,0.6


In [108]:
bond.nlargest(3, columns = "Budget")
bond.nsmallest (n = 6, columns = "Bond Actor Salary")

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
On Her Majesty's Secret Service,1969,George Lazenby,Peter R. Hunt,291.5,37.3,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
You Only Live Twice,1967,Sean Connery,Lewis Gilbert,514.2,59.9,4.4


In [109]:
bond["Box Office"].nlargest(8)

Film
Skyfall                  943.5
Thunderball              848.1
Goldfinger               820.4
Spectre                  726.7
Casino Royale            581.5
From Russia with Love    543.8
Moonraker                535.0
The Spy Who Loved Me     533.0
Name: Box Office, dtype: float64

# Filtering with the .where() Method

In [113]:
mask = bond["Actor"] == "Sean Connery"  #mask returns boolean DataFrame
bond[mask]

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Never Say Never Again,1983,Sean Connery,Irvin Kershner,380.0,86.0,
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
You Only Live Twice,1967,Sean Connery,Lewis Gilbert,514.2,59.9,4.4


In [115]:
bond.where(mask)  #we need to insert to where method a Boolean Series
                  #pandas returns full DataFrame but we only get values where condition "Sean Connery" is met

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,,,,,,
Casino Royale,,,,,,
Casino Royale,,,,,,
Diamonds Are Forever,1971.0,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,,,,,,
Dr. No,1962.0,Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,,,,,,
From Russia with Love,1963.0,Sean Connery,Terence Young,543.8,12.6,1.6
GoldenEye,,,,,,
Goldfinger,1964.0,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [116]:
bond.where(bond["Box Office"] > 800)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,,,,,,
Casino Royale,,,,,,
Casino Royale,,,,,,
Diamonds Are Forever,,,,,,
Die Another Day,,,,,,
Dr. No,,,,,,
For Your Eyes Only,,,,,,
From Russia with Love,,,,,,
GoldenEye,,,,,,
Goldfinger,1964.0,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [117]:
#multiple conditions can be feed into where method
#ANDand OR can be used
mask2 = bond["Box Office"] > 800

bond.where(mask & mask2)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,,,,,,
Casino Royale,,,,,,
Casino Royale,,,,,,
Diamonds Are Forever,,,,,,
Die Another Day,,,,,,
Dr. No,,,,,,
For Your Eyes Only,,,,,,
From Russia with Love,,,,,,
GoldenEye,,,,,,
Goldfinger,1964.0,Sean Connery,Guy Hamilton,820.4,18.6,3.2


# The .query() Method
#### NOTE: method works if column name have NO spaces

In [118]:
bond = pd.read_csv("jamesbond.csv", index_col = "Film") 
bond.sort_index(inplace = True) 
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [122]:
bond.columns  #possible problem with Box Office and Bond Actor Salary
#pure python code - we fill empty spaces with underscores
bond.columns = [column_name.replace(" ","_") for column_name in bond.columns]
bond.head(1)

Unnamed: 0_level_0,Year,Actor,Director,Box_Office,Budget,Bond_Actor_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1


In [126]:
bond.query('Actor == "Sean Connery" ')
bond.query("Director == 'Terence Young'")
bond.query('Actor != "Roger Moore" ')  # not equal

Unnamed: 0_level_0,Year,Actor,Director,Box_Office,Budget,Bond_Actor_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Licence to Kill,1989,Timothy Dalton,John Glen,250.9,56.7,7.9
Never Say Never Again,1983,Sean Connery,Irvin Kershner,380.0,86.0,


In [127]:
bond.query("Box_Office > 600")

Unnamed: 0_level_0,Year,Actor,Director,Box_Office,Budget,Bond_Actor_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Skyfall,2012,Daniel Craig,Sam Mendes,943.5,170.2,14.5
Spectre,2015,Daniel Craig,Sam Mendes,726.7,206.3,
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7


In [129]:
bond.query("Actor == 'Roger Moore' and Director == 'John Glen'")  # and and or operator are written as string!
bond.query("Actor == 'Roger Moore' or Director == 'John Glen'") 

Unnamed: 0_level_0,Year,Actor,Director,Box_Office,Budget,Bond_Actor_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
Licence to Kill,1989,Timothy Dalton,John Glen,250.9,56.7,7.9
Live and Let Die,1973,Roger Moore,Guy Hamilton,460.3,30.8,
Moonraker,1979,Roger Moore,Lewis Gilbert,535.0,91.5,
Octopussy,1983,Roger Moore,John Glen,373.8,53.9,7.8
The Living Daylights,1987,Timothy Dalton,John Glen,313.5,68.8,5.2
The Man with the Golden Gun,1974,Roger Moore,Guy Hamilton,334.0,27.7,
The Spy Who Loved Me,1977,Roger Moore,Lewis Gilbert,533.0,45.1,


In [131]:
bond.query ("Actor in ['Timothy Dalton', 'Georde Lazenby']")

bond.query ("Actor not in ['Sean Connery', 'Roger Moore']")

Unnamed: 0_level_0,Year,Actor,Director,Box_Office,Budget,Bond_Actor_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Licence to Kill,1989,Timothy Dalton,John Glen,250.9,56.7,7.9
On Her Majesty's Secret Service,1969,George Lazenby,Peter R. Hunt,291.5,37.3,0.6
Quantum of Solace,2008,Daniel Craig,Marc Forster,514.2,181.4,8.1
Skyfall,2012,Daniel Craig,Sam Mendes,943.5,170.2,14.5
Spectre,2015,Daniel Craig,Sam Mendes,726.7,206.3,
The Living Daylights,1987,Timothy Dalton,John Glen,313.5,68.8,5.2


# A Review of the .apply() Method on Single Columns

In [132]:
bond = pd.read_csv("jamesbond.csv", index_col = "Film") 
bond.sort_index(inplace = True) 
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [137]:
def convert_to_string_and_add_millions(number):
    return str(number) + "  MILLIONS!"

bond["Box Office"] = bond["Box Office"].apply(convert_to_string_and_add_millions)

In [140]:
bond["Budget"].apply(convert_to_string_and_add_millions)
bond["Budget"] = bond["Budget"].apply(convert_to_string_and_add_millions)