# Pandas Display Options
When viewing data, somewhere in the middle, you’ll see a column of ellipses (...) indicating the missing data. 

`display.max_rows` and `display.max_columns` sets the maximum number of rows and columns displayed when a frame is pretty-printed. Truncated lines are replaced by an ellipsis.

In [1]:
import pandas as pd

pd.set_option("display.max_rows", 100)
pd.set_option("display.max_columns", None)

In [11]:
pd.__version__

'2.0.3'

In [2]:
pd.set_option("display.precision", 2)

# View data using `.head()` and `.tail()`

In [3]:
df = pd.read_csv('../data/penguins.csv')

You can view the first few or last few rows of a DataFrame using the `.head()` or `.tail()` methods, respectively. You can specify the number of rows through the n argument (the default value is 5).

In [4]:
df.head()

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
0,Adelie,Torgersen,39.1,18.7,181.0,3750.0,MALE
1,Adelie,Torgersen,39.5,17.4,186.0,3800.0,FEMALE
2,Adelie,Torgersen,40.3,18.0,195.0,3250.0,FEMALE
3,Adelie,Torgersen,,,,,
4,Adelie,Torgersen,36.7,19.3,193.0,3450.0,FEMALE


In [5]:
df.tail(n=3)

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
341,Gentoo,Biscoe,50.4,15.7,222.0,5750.0,MALE
342,Gentoo,Biscoe,45.2,14.8,212.0,5200.0,FEMALE
343,Gentoo,Biscoe,49.9,16.1,213.0,5400.0,MALE


In [6]:
df.index

RangeIndex(start=0, stop=344, step=1)

In [7]:
df.dtypes

species               object
island                object
bill_length_mm       float64
bill_depth_mm        float64
flipper_length_mm    float64
body_mass_g          float64
sex                   object
dtype: object

# Understanding data using .describe()

In [12]:
df.describe()

Unnamed: 0,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g
count,342.0,342.0,342.0,342.0
mean,43.92,17.15,200.92,4201.75
std,5.46,1.97,14.06,801.95
min,32.1,13.1,172.0,2700.0
25%,39.23,15.6,190.0,3550.0
50%,44.45,17.3,197.0,4050.0
75%,48.5,18.7,213.0,4750.0
max,59.6,21.5,231.0,6300.0


In [9]:
df.describe(percentiles=[0.3, 0.5, 0.7])

Unnamed: 0,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g
count,342.0,342.0,342.0,342.0
mean,43.92,17.15,200.92,4201.75
std,5.46,1.97,14.06,801.95
min,32.1,13.1,172.0,2700.0
30%,40.2,15.93,191.0,3650.0
50%,44.45,17.3,197.0,4050.0
70%,47.37,18.5,210.0,4650.0
max,59.6,21.5,231.0,6300.0


In [10]:
df.describe(include='all')

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
count,344,344,342.0,342.0,342.0,342.0,333
unique,3,3,,,,,2
top,Adelie,Biscoe,,,,,MALE
freq,152,168,,,,,168
mean,,,43.92,17.15,200.92,4201.75,
std,,,5.46,1.97,14.06,801.95,
min,,,32.1,13.1,172.0,2700.0,
25%,,,39.23,15.6,190.0,3550.0,
50%,,,44.45,17.3,197.0,4050.0,
75%,,,48.5,18.7,213.0,4750.0,


In [11]:
df.describe(include='object')

Unnamed: 0,species,island,sex
count,344,344,333
unique,3,3,2
top,Adelie,Biscoe,MALE
freq,152,168,168


In [12]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
bill_length_mm,342.0,43.92,5.46,32.1,39.23,44.45,48.5,59.6
bill_depth_mm,342.0,17.15,1.97,13.1,15.6,17.3,18.7,21.5
flipper_length_mm,342.0,200.92,14.06,172.0,190.0,197.0,213.0,231.0
body_mass_g,342.0,4201.75,801.95,2700.0,3550.0,4050.0,4750.0,6300.0


# Understanding data using .info()


In [13]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 344 entries, 0 to 343
Data columns (total 7 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   species            344 non-null    object 
 1   island             344 non-null    object 
 2   bill_length_mm     342 non-null    float64
 3   bill_depth_mm      342 non-null    float64
 4   flipper_length_mm  342 non-null    float64
 5   body_mass_g        342 non-null    float64
 6   sex                333 non-null    object 
dtypes: float64(4), object(3)
memory usage: 18.9+ KB


In [14]:
df.head()

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
0,Adelie,Torgersen,39.1,18.7,181.0,3750.0,MALE
1,Adelie,Torgersen,39.5,17.4,186.0,3800.0,FEMALE
2,Adelie,Torgersen,40.3,18.0,195.0,3250.0,FEMALE
3,Adelie,Torgersen,,,,,
4,Adelie,Torgersen,36.7,19.3,193.0,3450.0,FEMALE


In [15]:
df.isna().head()

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
0,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False
3,False,False,True,True,True,True,True
4,False,False,False,False,False,False,False


In [16]:
df.isna().sum()

species               0
island                0
bill_length_mm        2
bill_depth_mm         2
flipper_length_mm     2
body_mass_g           2
sex                  11
dtype: int64

In [17]:
df.isna().sum().sum()

19

# Understanding your data using .shape


In [18]:
df.shape # Get the number of rows and colum

(344, 7)

In [19]:
df.shape[0] # Get the number of rows only

344

In [20]:
df.shape[1] # Get the number of columns only

7

# Get all columns and column names


In [21]:
df.columns

Index(['species', 'island', 'bill_length_mm', 'bill_depth_mm',
       'flipper_length_mm', 'body_mass_g', 'sex'],
      dtype='object')

In [22]:
type(df.columns)

pandas.core.indexes.base.Index

In [23]:
list(df.columns)

['species',
 'island',
 'bill_length_mm',
 'bill_depth_mm',
 'flipper_length_mm',
 'body_mass_g',
 'sex']

# How often specific values occur in a column

The `value_counts()` method returns a Series containing all the different values.

In [24]:
df.island.value_counts()

island
Biscoe       168
Dream        124
Torgersen     52
Name: count, dtype: int64

In [25]:
df.island.value_counts()['Biscoe']

168

In [26]:
df.island.value_counts()[0]

168

In [27]:
type(df.island.value_counts())

pandas.core.series.Series

In [28]:
df.island.value_counts().index

Index(['Biscoe', 'Dream', 'Torgersen'], dtype='object', name='island')

In [29]:
df.species.value_counts()

species
Adelie       152
Gentoo       124
Chinstrap     68
Name: count, dtype: int64

In [30]:
df.island.value_counts(normalize=True) #.sort_index(ascending=True)

island
Biscoe       0.49
Dream        0.36
Torgersen    0.15
Name: proportion, dtype: float64

# Check a list of unique values 
To see a list of unique values we can use the `unique()` function:
    

In [31]:
df.species.unique()

array(['Adelie', 'Chinstrap', 'Gentoo'], dtype=object)

In [32]:
df.island.unique()

array(['Torgersen', 'Biscoe', 'Dream'], dtype=object)

In [33]:
df.sex.unique()

array(['MALE', 'FEMALE', nan], dtype=object)

# How to Sort Data in a Pandas DataFrame

In [34]:
df.head(1)

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
0,Adelie,Torgersen,39.1,18.7,181.0,3750.0,MALE


In [35]:
sorted = df.sort_values(by='body_mass_g')
sorted.head(4)

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
190,Chinstrap,Dream,46.9,16.6,192.0,2700.0,FEMALE
64,Adelie,Biscoe,36.4,17.1,184.0,2850.0,FEMALE
58,Adelie,Biscoe,36.5,16.6,181.0,2850.0,FEMALE
116,Adelie,Torgersen,38.6,17.0,188.0,2900.0,FEMALE


In [36]:
sorted.tail(4)

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
253,Gentoo,Biscoe,59.6,17.0,230.0,6050.0,MALE
237,Gentoo,Biscoe,49.2,15.2,221.0,6300.0,MALE
3,Adelie,Torgersen,,,,,
339,Gentoo,Biscoe,,,,,


###### Sorting Multiple Pandas DataFrame Columns


In [37]:
sorted = df.sort_values(by=['bill_length_mm','bill_depth_mm'])
sorted.head(4)

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
142,Adelie,Dream,32.1,15.5,188.0,3050.0,FEMALE
98,Adelie,Dream,33.1,16.1,178.0,2900.0,FEMALE
70,Adelie,Torgersen,33.5,19.0,190.0,3600.0,FEMALE
92,Adelie,Dream,34.0,17.1,185.0,3400.0,FEMALE


In [38]:
sorted.tail(4)

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
169,Chinstrap,Dream,58.0,17.8,181.0,3700.0,FEMALE
253,Gentoo,Biscoe,59.6,17.0,230.0,6050.0,MALE
3,Adelie,Torgersen,,,,,
339,Gentoo,Biscoe,,,,,


###### Change Sort Order in Pandas sort_values

In [39]:
sorted = df.sort_values(by='flipper_length_mm',ascending=False)
sorted.head()

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
283,Gentoo,Biscoe,54.3,15.7,231.0,5650.0,MALE
333,Gentoo,Biscoe,51.5,16.3,230.0,5500.0,MALE
335,Gentoo,Biscoe,55.1,16.0,230.0,5850.0,MALE
285,Gentoo,Biscoe,49.8,16.8,230.0,5700.0,MALE
295,Gentoo,Biscoe,48.6,16.0,230.0,5800.0,MALE


###### Sorting a Pandas DataFrame In Place
 `inplace=` parameter. This parameter defaults to False; modifying it to True will allow the operation to occur in place. Let’s see what this looks like:

In [40]:
df.sort_values(by='flipper_length_mm',ascending=False,inplace =True)

In [41]:
df.head()

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
283,Gentoo,Biscoe,54.3,15.7,231.0,5650.0,MALE
333,Gentoo,Biscoe,51.5,16.3,230.0,5500.0,MALE
335,Gentoo,Biscoe,55.1,16.0,230.0,5850.0,MALE
285,Gentoo,Biscoe,49.8,16.8,230.0,5700.0,MALE
295,Gentoo,Biscoe,48.6,16.0,230.0,5800.0,MALE


In [42]:
df.sort_values(by='flipper_length_mm',inplace =True,ignore_index=True)

df.head()

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
0,Adelie,Biscoe,37.9,18.6,172.0,3150.0,FEMALE
1,Adelie,Biscoe,37.8,18.3,174.0,3400.0,FEMALE
2,Adelie,Torgersen,40.2,17.0,176.0,3450.0,FEMALE
3,Adelie,Dream,33.1,16.1,178.0,2900.0,FEMALE
4,Adelie,Dream,37.2,18.1,178.0,3900.0,MALE
