In [167]:
import pandas as pd
import numpy as np

In [168]:
data = {
    'Character': ['Goku', 'Vegeta', 'Frieza', 'Piccolo'],
    'Power Level': [1500000000, 1400000000, 1200000000, 1000000000],
    'Species': ['Saiyan', 'Saiyan', 'Frieza Race', 'Namekian']
}

df = pd.DataFrame(data, index = ['hero', 'hero', 'villain', 'hero'])
print(df)

        Character  Power Level      Species
hero         Goku   1500000000       Saiyan
hero       Vegeta   1400000000       Saiyan
villain    Frieza   1200000000  Frieza Race
hero      Piccolo   1000000000     Namekian


DataFrame.index creates a RangeIndex object where it represents indices from range 0 to n where n is the total rows in the dataframe. Here, you can only get the indices by indexing the RangeIndex object indivdually or slicing it.

But if the indices are non-default, meaning it has custom attributes, DataFrame.index represents an Index object. Here you can directly visualize the index values by the variable DataFrame.index itself.

In [169]:
# print([i for i in df.index]) # for default index values or custom values for a more clean look.
print(df.index) # for custom index values

Index(['hero', 'hero', 'villain', 'hero'], dtype='object')


we can reset indices and set the old indices to a column as this:

In [170]:
df = df.reset_index()
df = df.rename(columns={'index':'archetype'})
print(df)

  archetype Character  Power Level      Species
0      hero      Goku   1500000000       Saiyan
1      hero    Vegeta   1400000000       Saiyan
2   villain    Frieza   1200000000  Frieza Race
3      hero   Piccolo   1000000000     Namekian


In [171]:
df = df.set_index(np.array([i + 2 for i in df.index]))

print(df)

  archetype Character  Power Level      Species
2      hero      Goku   1500000000       Saiyan
3      hero    Vegeta   1400000000       Saiyan
4   villain    Frieza   1200000000  Frieza Race
5      hero   Piccolo   1000000000     Namekian


In [172]:
df.set_index('archetype', inplace=True)
df.index.name = None
print(df)

        Character  Power Level      Species
hero         Goku   1500000000       Saiyan
hero       Vegeta   1400000000       Saiyan
villain    Frieza   1200000000  Frieza Race
hero      Piccolo   1000000000     Namekian


In [173]:
df.index = [4, 2, 3, 1]
print(f'unsorted\n{df}\n')
print(f'sorted\n{df.sort_index()}\n')
print(f'by power level\n{df.sort_values(by = ['Power Level'], axis = 0, ascending = False)}')

unsorted
  Character  Power Level      Species
4      Goku   1500000000       Saiyan
2    Vegeta   1400000000       Saiyan
3    Frieza   1200000000  Frieza Race
1   Piccolo   1000000000     Namekian

sorted
  Character  Power Level      Species
1   Piccolo   1000000000     Namekian
2    Vegeta   1400000000       Saiyan
3    Frieza   1200000000  Frieza Race
4      Goku   1500000000       Saiyan

by power level
  Character  Power Level      Species
4      Goku   1500000000       Saiyan
2    Vegeta   1400000000       Saiyan
3    Frieza   1200000000  Frieza Race
1   Piccolo   1000000000     Namekian


In [174]:
df.query('`Power Level` > 1100000000 and Species == "Saiyan"')

Unnamed: 0,Character,Power Level,Species
4,Goku,1500000000,Saiyan
2,Vegeta,1400000000,Saiyan


In [175]:
df[(df['Power Level'] > 1100000000) & (df['Species'] == 'Saiyan')]

Unnamed: 0,Character,Power Level,Species
4,Goku,1500000000,Saiyan
2,Vegeta,1400000000,Saiyan


In [176]:
arr = np.array([[1, np.nan, 2], [np.nan, np.nan, 5], [9, 3, 4]])
df = pd.DataFrame(arr)

print('Original Array')
print()
print(df)
print()
print(f"Null Array")
print()
print(df.isna())
print()
print(f'Not Null Array')
print()
print(df.notna())

Original Array

     0    1    2
0  1.0  NaN  2.0
1  NaN  NaN  5.0
2  9.0  3.0  4.0

Null Array

       0      1      2
0  False   True  False
1   True   True  False
2  False  False  False

Not Null Array

       0      1     2
0   True  False  True
1  False  False  True
2   True   True  True


In [177]:
print(df.fillna(value = 'ding!'))

       0      1    2
0    1.0  ding!  2.0
1  ding!  ding!  5.0
2    9.0    3.0  4.0


In [185]:
print('drops all rows that have nan values')
print(df.dropna(ignore_index=False)) # ignores and reassigns index labels.
print()
print('drops all columns that have nan values')
print(df.dropna(axis=1))

drops all rows that have nan values
     0    1    2
2  9.0  3.0  4.0

drops all columns that have nan values
     2
0  2.0
1  5.0
2  4.0
