# DataFrames

In [18]:
# Data sets in Pandas are usually multi-dimensional tables, called DataFrames.
# Series is like a column, a DataFrame is the whole table.
# Create a DataFrame from two Series:
# Pandas DataFrame is a 2 dimensional data structure, like a 2 dimensional array, or a table with rows and columns.

import pandas as pd

data = {
  "walk": [30, 38, 27],
  "duration": [2, 1, 1]
}
df = pd.DataFrame(data)
print(df)

   walk  duration
0    30         2
1    38         1
2    27         1


In [22]:
# Pandas use the loc attribute to return one or more specified rows
print(df.loc[1]) # This is returns a Pandas Series

walk        38
duration     1
Name: 1, dtype: int64


In [23]:
# Note: When using [], the result is a Pandas DataFrame.
print(df.loc[[1,2]]) #Return row 1 and 2

   walk  duration
1    38         1
2    27         1


# Named Indexes

In [28]:
# With the index argument, you can name your own indexes.
df = pd.DataFrame(data, index=['day1', 'day2', 'day3'])
print(df)

      walk  duration
day1    30         2
day2    38         1
day3    27         1


In [30]:
print(df.loc['day3']) # refer to the named index:

walk        27
duration     1
Name: day3, dtype: int64


# csv file data importing

In [81]:
df = pd.read_csv("weather_data.csv")
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2022,30,5,sunny
1,2/1/2022,24,5,rain
2,3/1/2022,25,8,sunny
3,4/1/2022,26,8,rain
4,5/1/2022,27,8,sunny
5,6/1/2022,25,5,rain
6,7/1/2022,31,9,rain
7,8/1/2022,25,9,sunny
8,9/1/2022,26,5,rain
9,10/1/2022,28,5,snow


In [68]:
print(df.info) # Information About the Data

<bound method DataFrame.info of           day  temperature  windspeed  event
0    1/1/2022           30          5  sunny
1    2/1/2022           24          5   rain
2    3/1/2022           25          8  sunny
3    4/1/2022           26          8   rain
4    5/1/2022           27          8  sunny
5    6/1/2022           25          5   rain
6    7/1/2022           31          9   rain
7    8/1/2022           25          9  sunny
8    9/1/2022           26          5   rain
9   10/1/2022           28          5   snow
10  11/1/2022           29          7   snow
11  12/1/2022           32          7   snow
12  13/1/2022           24          7   rain
13  14/1/2022           28          5   rain
14  15/1/2022           27          6   rain>


In [37]:
print(df.shape)
rows, columns = df.shape
print(rows)
print(columns)

(15, 4)
15
4


In [38]:
df.head() # first few rows

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2022,30,5,sunny
1,2/1/2022,24,5,rain
2,3/1/2022,25,8,sunny
3,4/1/2022,26,8,rain
4,5/1/2022,27,8,sunny


In [39]:
df.head(2) # first 2 rows

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2022,30,5,sunny
1,2/1/2022,24,5,rain


In [40]:
df.tail() # last 5 rows

Unnamed: 0,day,temperature,windspeed,event
10,11/1/2022,29,7,snow
11,12/1/2022,32,7,snow
12,13/1/2022,24,7,rain
13,14/1/2022,28,5,rain
14,15/1/2022,27,6,rain


In [41]:
df.tail(2) # last two rows

Unnamed: 0,day,temperature,windspeed,event
13,14/1/2022,28,5,rain
14,15/1/2022,27,6,rain


In [43]:
df[2:6] # row number 2 to 5 (not include 6)

Unnamed: 0,day,temperature,windspeed,event
2,3/1/2022,25,8,sunny
3,4/1/2022,26,8,rain
4,5/1/2022,27,8,sunny
5,6/1/2022,25,5,rain


In [45]:
df.columns # first heading column get

Index(['day', 'temperature', 'windspeed', 'event'], dtype='object')

In [48]:
# df['day']  
df.day # Note: Both are same result return

0      1/1/2022
1      2/1/2022
2      3/1/2022
3      4/1/2022
4      5/1/2022
5      6/1/2022
6      7/1/2022
7      8/1/2022
8      9/1/2022
9     10/1/2022
10    11/1/2022
11    12/1/2022
12    13/1/2022
13    14/1/2022
14    15/1/2022
Name: day, dtype: object

In [49]:
df.event

0     sunny
1      rain
2     sunny
3      rain
4     sunny
5      rain
6      rain
7     sunny
8      rain
9      snow
10     snow
11     snow
12     rain
13     rain
14     rain
Name: event, dtype: object

In [50]:
type(df.event)

pandas.core.series.Series

In [51]:
df[['day', 'event']]

Unnamed: 0,day,event
0,1/1/2022,sunny
1,2/1/2022,rain
2,3/1/2022,sunny
3,4/1/2022,rain
4,5/1/2022,sunny
5,6/1/2022,rain
6,7/1/2022,rain
7,8/1/2022,sunny
8,9/1/2022,rain
9,10/1/2022,snow


In [52]:
df['temperature'].max() 

32

In [54]:
df['temperature'].min()

24

In [56]:
df['temperature'].mean() # average

27.133333333333333

In [58]:
df['temperature'].std() # standered daviation

2.503331114069145

In [60]:
df.describe() # quickly statistics value print

Unnamed: 0,temperature,windspeed
count,15.0,15.0
mean,27.133333,6.6
std,2.503331,1.549193
min,24.0,5.0
25%,25.0,5.0
50%,27.0,7.0
75%,28.5,8.0
max,32.0,9.0


# Conditional data get

In [61]:
df[df.temperature > 30] # which rows temperature is getter then 30

Unnamed: 0,day,temperature,windspeed,event
6,7/1/2022,31,9,rain
11,12/1/2022,32,7,snow


In [64]:
df[df.temperature == df.temperature.max()]  # conditionaly max data get or enething conditionaly get
# df[df.temperature == df['temperature'].max()] # both are same

Unnamed: 0,day,temperature,windspeed,event
11,12/1/2022,32,7,snow


In [69]:
# get only selected 2 column where temperature is max
df[['day', 'temperature']][df.temperature == df.temperature.max()] 

Unnamed: 0,day,temperature
11,12/1/2022,32


# Indexing dataset

In [70]:
df.index

RangeIndex(start=0, stop=15, step=1)

In [82]:
df.set_index('day') # then original data set not change
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2022,30,5,sunny
1,2/1/2022,24,5,rain
2,3/1/2022,25,8,sunny
3,4/1/2022,26,8,rain
4,5/1/2022,27,8,sunny
5,6/1/2022,25,5,rain
6,7/1/2022,31,9,rain
7,8/1/2022,25,9,sunny
8,9/1/2022,26,5,rain
9,10/1/2022,28,5,snow


In [83]:
# if want to change original dataset then use inplace=True
df.set_index('day', inplace=True)
df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1/1/2022,30,5,sunny
2/1/2022,24,5,rain
3/1/2022,25,8,sunny
4/1/2022,26,8,rain
5/1/2022,27,8,sunny
6/1/2022,25,5,rain
7/1/2022,31,9,rain
8/1/2022,25,9,sunny
9/1/2022,26,5,rain
10/1/2022,28,5,snow


In [86]:
df.reset_index(inplace=True) # Note: that means indes set was work but index colunm hiddenly work
df

Unnamed: 0,index,day,temperature,windspeed,event
0,0,1/1/2022,30,5,sunny
1,1,2/1/2022,24,5,rain
2,2,3/1/2022,25,8,sunny
3,3,4/1/2022,26,8,rain
4,4,5/1/2022,27,8,sunny
5,5,6/1/2022,25,5,rain
6,6,7/1/2022,31,9,rain
7,7,8/1/2022,25,9,sunny
8,8,9/1/2022,26,5,rain
9,9,10/1/2022,28,5,snow


In [87]:
df.set_index('event', inplace=True)

In [88]:
df

Unnamed: 0_level_0,index,day,temperature,windspeed
event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
sunny,0,1/1/2022,30,5
rain,1,2/1/2022,24,5
sunny,2,3/1/2022,25,8
rain,3,4/1/2022,26,8
sunny,4,5/1/2022,27,8
rain,5,6/1/2022,25,5
rain,6,7/1/2022,31,9
sunny,7,8/1/2022,25,9
rain,8,9/1/2022,26,5
snow,9,10/1/2022,28,5


In [91]:
df.loc['rain']

Unnamed: 0_level_0,index,day,temperature,windspeed
event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rain,1,2/1/2022,24,5
rain,3,4/1/2022,26,8
rain,5,6/1/2022,25,5
rain,6,7/1/2022,31,9
rain,8,9/1/2022,26,5
rain,12,13/1/2022,24,7
rain,13,14/1/2022,28,5
rain,14,15/1/2022,27,6


# Study about pandas series more functions

In [None]:
# https://pandas.pydata.org/docs/reference/api/pandas.Series.html