In [13]:
import pandas as pd
import numpy as np

## Membuat DataFrame

In [14]:
df = pd.DataFrame(data={"Nama": ["Ahmad", "Joko", "Adi"],
                        "Umur": [12, 13, 15],
                        "Kelas": [6, 7, 8]
                        })

In [15]:
df

Unnamed: 0,Nama,Umur,Kelas
0,Ahmad,12,6
1,Joko,13,7
2,Adi,15,8


In [16]:
df.columns

Index(['Nama', 'Umur', 'Kelas'], dtype='object')

In [17]:
df.columns.values[0] = "Nama Singkat"

In [18]:
df.columns

Index(['Nama Singkat', 'Umur', 'Kelas'], dtype='object')

In [19]:
data = {
    "calories": [420, 380, 390],
    "duration": [50, 40, 45],
    "pulse": [120, 173, 221],
}

In [20]:
df = pd.DataFrame(data)

In [21]:
df

Unnamed: 0,calories,duration,pulse
0,420,50,120
1,380,40,173
2,390,45,221


## Mengakses element pada DataFrame

#### ILOC: Positional indexing

In [22]:
df.iloc[0:3, [0, 1]]

Unnamed: 0,calories,duration
0,420,50
1,380,40
2,390,45


In [23]:
df.iloc[0:2, [0, 1]]

Unnamed: 0,calories,duration
0,420,50
1,380,40


In [26]:
df.iloc[0:3, [0, 2]]

Unnamed: 0,calories,pulse
0,420,120
1,380,173
2,390,221


In [27]:
df.iloc[:, :]

Unnamed: 0,calories,duration,pulse
0,420,50,120
1,380,40,173
2,390,45,221


#### LOC: label Indexing

In [28]:
df.loc[0:2, :'calories']

Unnamed: 0,calories
0,420
1,380
2,390


In [32]:
df.loc[0:2, ['pulse']]

Unnamed: 0,pulse
0,120
1,173
2,221


In [34]:
df.loc[0:2, ['calories', 'duration']]

Unnamed: 0,calories,duration
0,420,50
1,380,40
2,390,45


In [35]:
df['calories']

0    420
1    380
2    390
Name: calories, dtype: int64

In [36]:
type(df['calories'])

pandas.core.series.Series

In [37]:
type(df[['calories']])

pandas.core.frame.DataFrame

In [38]:
df['calories'][0]

420

In [39]:
df[['calories']].loc[[0, 1]]

Unnamed: 0,calories
0,420
1,380


### Memberi nama pada index

In [40]:
data = {
    "calories": [420, 380, 390],
    "duration": [50, 40, 45],
}

In [42]:
df = pd.DataFrame(data, index=['data1', 'data2', 'data3'])

In [43]:
df

Unnamed: 0,calories,duration
data1,420,50
data2,380,40
data3,390,45


In [44]:
df.iloc[0:1]

Unnamed: 0,calories,duration
data1,420,50


In [45]:
df.iloc[0:3]

Unnamed: 0,calories,duration
data1,420,50
data2,380,40
data3,390,45


## Membaca File CSV dengan DataFrame

In [46]:
df = pd.read_csv('assets/data.csv')

In [47]:
type(df)

pandas.core.frame.DataFrame

### Melihat DataFrame

In [48]:
df.head()

Unnamed: 0,Duration,Pulse,Maxpulse,Calories
0,60,110,130,409.1
1,60,117,145,479.0
2,60,103,135,340.0
3,45,109,175,282.4
4,45,117,148,406.0


In [49]:
df.tail()

Unnamed: 0,Duration,Pulse,Maxpulse,Calories
164,60,105,140,290.8
165,60,110,145,300.0
166,60,115,145,310.2
167,75,120,150,320.4
168,75,125,150,330.4


In [50]:
df.head(10)

Unnamed: 0,Duration,Pulse,Maxpulse,Calories
0,60,110,130,409.1
1,60,117,145,479.0
2,60,103,135,340.0
3,45,109,175,282.4
4,45,117,148,406.0
5,60,102,127,300.0
6,60,110,136,374.0
7,45,104,134,253.3
8,30,109,133,195.1
9,60,98,124,269.0


In [51]:
df.tail(5)

Unnamed: 0,Duration,Pulse,Maxpulse,Calories
164,60,105,140,290.8
165,60,110,145,300.0
166,60,115,145,310.2
167,75,120,150,320.4
168,75,125,150,330.4


In [52]:
df.shape

(169, 4)

In [53]:
df.dtypes

Duration      int64
Pulse         int64
Maxpulse      int64
Calories    float64
dtype: object

### Membaca Informasi pada DataFrame

In [54]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 169 entries, 0 to 168
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Duration  169 non-null    int64  
 1   Pulse     169 non-null    int64  
 2   Maxpulse  169 non-null    int64  
 3   Calories  164 non-null    float64
dtypes: float64(1), int64(3)
memory usage: 5.4 KB


In [55]:
df.describe()

Unnamed: 0,Duration,Pulse,Maxpulse,Calories
count,169.0,169.0,169.0,164.0
mean,63.846154,107.461538,134.047337,375.790244
std,42.299949,14.510259,16.450434,266.379919
min,15.0,80.0,100.0,50.3
25%,45.0,100.0,124.0,250.925
50%,60.0,105.0,131.0,318.6
75%,60.0,111.0,141.0,387.6
max,300.0,159.0,184.0,1860.4


In [57]:
df['Calories'].mean()

375.79024390243904

In [58]:
df['Calories'].median()

318.6

In [60]:
df['Calories'].mode()[0]

300.0

In [61]:
df.corr()

Unnamed: 0,Duration,Pulse,Maxpulse,Calories
Duration,1.0,-0.155408,0.009403,0.922717
Pulse,-0.155408,1.0,0.786535,0.025121
Maxpulse,0.009403,0.786535,1.0,0.203813
Calories,0.922717,0.025121,0.203813,1.0
