# Pandas Basic in A Notebook

In [1]:
#Release: 1.2109.1901

In [2]:
import pandas as pd
import numpy as np

#Series

Pandas series adalah kumpulan object satu dimensi yang mirip dengan array

##Creating Series

Deklarasi pandas series menggunakan list

In [3]:
s = pd.Series([1, 'Hello', 3.14, 'World!', True])
s

0         1
1     Hello
2      3.14
3    World!
4      True
dtype: object

Deklarasi pandas series menggunakan dictionary

In [4]:
f = {"apples": 10, "bananas": 20, "cherries": 50, "pears": 30}
fs = pd.Series(f)
fs

apples      10
bananas     20
cherries    50
pears       30
dtype: int64

##Series Operation

In [5]:
print(fs['bananas'])      #accessing item

20


In [6]:
print(fs[:2])             #accessing items with index slicing


apples     10
bananas    20
dtype: int64


In [7]:
fs['cherries']+=5         #updating an item
print(fs)         



apples      10
bananas     20
cherries    55
pears       30
dtype: int64


In [8]:
print('grapes' in fs)    #check membership
print('pears' in fs)     #check membership

False
True


In [9]:
print(fs/10)              #math operation


apples      1.0
bananas     2.0
cherries    5.5
pears       3.0
dtype: float64


#DataFrames

Pandas DataFrame adalah kumpulan object berbentuk tabular yang memiliki struktur kolom dan baris, mirip pada database

##Creating DataFrames

###Create from series

In [10]:
# Dataframe from Series
fruits = {"apples": 10, "bananas": 20, "cherries": 50, "pears": 30}
quantity = pd.Series(fruits)

prices = {"apples": 1000, "bananas": 500, "cherries": 750, "pears": 900}
price = pd.Series(prices)

df = pd.DataFrame({'quantity':quantity, 'price':price})

In [11]:
# tampilkan df

###Create from dictionary

In [12]:
#Dataframe from dictionary
f = {'name':["apples", "bananas", "cherries", "pears", "pears"],
    'quantity':[20, 20, 50, 30, 40],
    'price':[1000,500,750,900,950]}
    
df = pd.DataFrame(f)

In [13]:
# tampilkan df

###Create from csv file

Fungsi **read_csv** dapat digunakan untuk membaca file csv ke dalam pandas dataframe

In [14]:
# load the california housing csv file from sample_data
df_housing = __

In [15]:
# tampilkan df_housing


###Create from JSON file

Fungsi **read_json** dapat digunakan untuk membaca file csv ke dalam pandas dataframe

In [16]:
# load Anscombe's quartet data from sample_data
df_anscombe = __

In [17]:
# tampilkan df_anscombe

##Accessing DataFrame 

###DataFrame quick checking

some useful commands to quick check a dataframe : `info(), head(), tail(), sample(), describe()`

In [18]:
# tampilkan struktur df menggunakan fungsi info


In [19]:
# tampilkan 10 record pertama dari df menggunakan fungsi head


In [20]:
# tampilkan 10 record terakhir df menggunakan fungsi tail


In [21]:
# tampilkan sample records secara acak menggunakan fungsi sample


In [22]:
# tampilkan nilai statistik df dengan fungsi describe


In [23]:
# untuk memudahkan pembacaan dapat menambahkan fungsi transpose
df.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
quantity,5.0,32.0,13.038405,20.0,20.0,30.0,40.0,50.0
price,5.0,820.0,201.866292,500.0,750.0,900.0,950.0,1000.0


###Selecting rows or columns

In [24]:
# mengakses data menggunakan index --> [idx:pos]
df[2:4]

Unnamed: 0,name,quantity,price
2,cherries,50,750
3,pears,30,900


In [25]:
# kolom dapat dianggap sebagai sebuah atribut
df.price

0    1000
1     500
2     750
3     900
4     950
Name: price, dtype: int64

In [26]:
# menampilkan kolom berdasarkan nama
df[["price"]]   #accessing column by name

Unnamed: 0,price
0,1000
1,500
2,750
3,900
4,950


In [27]:
# menampilkan lebih dari satu kolom
df[['name','price']] 

Unnamed: 0,name,price
0,apples,1000
1,bananas,500
2,cherries,750
3,pears,900
4,pears,950


In [28]:
df[df['quantity'] > 20]     #filtering/select rows

Unnamed: 0,name,quantity,price
2,cherries,50,750
3,pears,30,900
4,pears,40,950


In [29]:
# filter untuk nama = 'pears' atau price > 900
df[(df.name == 'pears') | (df.price > 900)]

Unnamed: 0,name,quantity,price
0,apples,20,1000
3,pears,30,900
4,pears,40,950


In [30]:
# filter untuk price antara 900 dan 1000
df[(df.price > 900) & (df.price < 1000)]

Unnamed: 0,name,quantity,price
4,pears,40,950


In [31]:
# tampilkan data dengan kolom name dan price yang memiliki harga > 750


###Select unique values

In [32]:
# menampilkan nilai unik dari nama
df['name'].unique()

array(['apples', 'bananas', 'cherries', 'pears'], dtype=object)

###Sorting rows

In [33]:
df.sort_values("quantity")

Unnamed: 0,name,quantity,price
0,apples,20,1000
1,bananas,20,500
3,pears,30,900
4,pears,40,950
2,cherries,50,750


In [34]:
# mengurutkan dari yang terbesar dengan menset parameter ascending = False
df.sort_values("quantity", ascending = False)

Unnamed: 0,name,quantity,price
2,cherries,50,750
4,pears,40,950
3,pears,30,900
0,apples,20,1000
1,bananas,20,500


In [35]:
df.sort_values(["quantity","price"]) #sort by multiple columns value

Unnamed: 0,name,quantity,price
1,bananas,20,500
0,apples,20,1000
3,pears,30,900
4,pears,40,950
2,cherries,50,750


In [36]:
df.sort_values(["quantity","price"], ascending=[1, 0])

Unnamed: 0,name,quantity,price
0,apples,20,1000
1,bananas,20,500
3,pears,30,900
4,pears,40,950
2,cherries,50,750


##Updating a DataFrame

###Adding rows

In [37]:
#append a row from dictionary

new_rec = {'name': "grapes", 
        'quantity': 30,
        'price':1200}
df1 = df.append(new_rec, ignore_index=True)
df1

Unnamed: 0,name,quantity,price
0,apples,20,1000
1,bananas,20,500
2,cherries,50,750
3,pears,30,900
4,pears,40,950
5,grapes,30,1200


Append multiple rows --> combine dataframes

In [38]:
new_rec = {'name':["apples", "grapes", "oranges"],
        'quantity':[25, 30, 20],
        'price':[800,1200,700]}

dfnew = pd.DataFrame(new_rec,index=[6, 7, 8])

In [39]:
dfnew

Unnamed: 0,name,quantity,price
6,apples,25,800
7,grapes,30,1200
8,oranges,20,700


In [40]:
df2 = df1.append(dfnew)
df2

Unnamed: 0,name,quantity,price
0,apples,20,1000
1,bananas,20,500
2,cherries,50,750
3,pears,30,900
4,pears,40,950
5,grapes,30,1200
6,apples,25,800
7,grapes,30,1200
8,oranges,20,700


###Adding columns

In [41]:
df2['total'] = df2['price'] * df2['quantity']
df2

Unnamed: 0,name,quantity,price,total
0,apples,20,1000,20000
1,bananas,20,500,10000
2,cherries,50,750,37500
3,pears,30,900,27000
4,pears,40,950,38000
5,grapes,30,1200,36000
6,apples,25,800,20000
7,grapes,30,1200,36000
8,oranges,20,700,14000


###Deleting columns or rows

**Deleting a column**

In [42]:
df2.drop('total', axis = 1) #set the parameter axis = 1

Unnamed: 0,name,quantity,price
0,apples,20,1000
1,bananas,20,500
2,cherries,50,750
3,pears,30,900
4,pears,40,950
5,grapes,30,1200
6,apples,25,800
7,grapes,30,1200
8,oranges,20,700


In [43]:
df2

Unnamed: 0,name,quantity,price,total
0,apples,20,1000,20000
1,bananas,20,500,10000
2,cherries,50,750,37500
3,pears,30,900,27000
4,pears,40,950,38000
5,grapes,30,1200,36000
6,apples,25,800,20000
7,grapes,30,1200,36000
8,oranges,20,700,14000


To delete column in the original dataframe, set `inplace = True`


In [44]:
df2.drop('total', axis = 1, inplace = True) #set the parameter axis = 1
df2

Unnamed: 0,name,quantity,price
0,apples,20,1000
1,bananas,20,500
2,cherries,50,750
3,pears,30,900
4,pears,40,950
5,grapes,30,1200
6,apples,25,800
7,grapes,30,1200
8,oranges,20,700


Deleting a row by index

In [45]:
df2.drop(4) 

Unnamed: 0,name,quantity,price
0,apples,20,1000
1,bananas,20,500
2,cherries,50,750
3,pears,30,900
5,grapes,30,1200
6,apples,25,800
7,grapes,30,1200
8,oranges,20,700


In [46]:
df2

Unnamed: 0,name,quantity,price
0,apples,20,1000
1,bananas,20,500
2,cherries,50,750
3,pears,30,900
4,pears,40,950
5,grapes,30,1200
6,apples,25,800
7,grapes,30,1200
8,oranges,20,700


Deleting rows by condition

In [47]:
df2.drop(df2[df2.quantity < 30].index)

Unnamed: 0,name,quantity,price
2,cherries,50,750
3,pears,30,900
4,pears,40,950
5,grapes,30,1200
7,grapes,30,1200


###Updating a column

In [48]:
df2['quantity'] = df2['quantity'] * 1000
df2

Unnamed: 0,name,quantity,price
0,apples,20000,1000
1,bananas,20000,500
2,cherries,50000,750
3,pears,30000,900
4,pears,40000,950
5,grapes,30000,1200
6,apples,25000,800
7,grapes,30000,1200
8,oranges,20000,700


###Updating rows

In [49]:
#update rows based on some conditions
df2.loc[df2['name'] == 'pears', 'quantity'] = 60000 

df2

Unnamed: 0,name,quantity,price
0,apples,20000,1000
1,bananas,20000,500
2,cherries,50000,750
3,pears,60000,900
4,pears,60000,950
5,grapes,30000,1200
6,apples,25000,800
7,grapes,30000,1200
8,oranges,20000,700


##Combining DataFrame - Join by columns

In [50]:
colors = pd.DataFrame({'name':["apples", "bananas", "pears", "grapes", "cherries", "plum"], 
                       'color':['red', 'yellow', 'green', 'purple', 'red', 'purple']})
colors

Unnamed: 0,name,color
0,apples,red
1,bananas,yellow
2,pears,green
3,grapes,purple
4,cherries,red
5,plum,purple


In [51]:
pd.merge(df2, colors, on='name', how='inner')

Unnamed: 0,name,quantity,price,color
0,apples,20000,1000,red
1,apples,25000,800,red
2,bananas,20000,500,yellow
3,cherries,50000,750,red
4,pears,60000,900,green
5,pears,60000,950,green
6,grapes,30000,1200,purple
7,grapes,30000,1200,purple


In [52]:
df3 = pd.merge(df2, colors, on='name', how='outer')
df3

Unnamed: 0,name,quantity,price,color
0,apples,20000.0,1000.0,red
1,apples,25000.0,800.0,red
2,bananas,20000.0,500.0,yellow
3,cherries,50000.0,750.0,red
4,pears,60000.0,900.0,green
5,pears,60000.0,950.0,green
6,grapes,30000.0,1200.0,purple
7,grapes,30000.0,1200.0,purple
8,oranges,20000.0,700.0,
9,plum,,,purple


# Revision History


Release: 1.2109.1901

* Code cleanup