In [2]:
import os
import pandas as pd
import numpy as np

file_path = os.path.join('resources','candidate.csv')

df = pd.read_csv(file_path,index_col=0)

print(df)

       name         city  age  py-score
101  Xavier  Mexico City   41      88.0
102     Ann      Toronto   28      79.0
103    Jana       Prague   33      81.0
104      Yi     Shanghai   34      80.0
105   Robin   Manchester   38      68.0
106    Amal        Cairo   31      61.0
107    Nori        Osaka   37      84.0


### .items() or .iteritems()

To iterate over columns

In [6]:
for col_label,col in df.items():
    print(col_label,col , sep='\n',end='\n\n')
    
    

name
101    Xavier
102       Ann
103      Jana
104        Yi
105     Robin
106      Amal
107      Nori
Name: name, dtype: object

city
101    Mexico City
102        Toronto
103         Prague
104       Shanghai
105     Manchester
106          Cairo
107          Osaka
Name: city, dtype: object

age
101    41
102    28
103    33
104    34
105    38
106    31
107    37
Name: age, dtype: int64

py-score
101    88.0
102    79.0
103    81.0
104    80.0
105    68.0
106    61.0
107    84.0
Name: py-score, dtype: float64



### .iterrows()

To iterate over rows

In [7]:
for row_label,row in df.iterrows():
    print(row_label,row,sep='\n',end='\n\n')

101
name             Xavier
city        Mexico City
age                  41
py-score           88.0
Name: 101, dtype: object

102
name            Ann
city        Toronto
age              28
py-score       79.0
Name: 102, dtype: object

103
name          Jana
city        Prague
age             33
py-score      81.0
Name: 103, dtype: object

104
name              Yi
city        Shanghai
age               34
py-score        80.0
Name: 104, dtype: object

105
name             Robin
city        Manchester
age                 38
py-score          68.0
Name: 105, dtype: object

106
name         Amal
city        Cairo
age            31
py-score     61.0
Name: 106, dtype: object

107
name         Nori
city        Osaka
age            37
py-score     84.0
Name: 107, dtype: object



### .itertuples()

iterates over the rows and in each iteration yields a named tuple with (optionally) the index and data:

In [10]:
for row in df.loc[:,['name','city','age']].itertuples():
    print(row)

Pandas(Index=101, name='Xavier', city='Mexico City', age=41)
Pandas(Index=102, name='Ann', city='Toronto', age=28)
Pandas(Index=103, name='Jana', city='Prague', age=33)
Pandas(Index=104, name='Yi', city='Shanghai', age=34)
Pandas(Index=105, name='Robin', city='Manchester', age=38)
Pandas(Index=106, name='Amal', city='Cairo', age=31)
Pandas(Index=107, name='Nori', city='Osaka', age=37)


## Creating DataFrames With Time-Series Labels

We’ll create a pandas DataFrame using the hourly temperature data from a single day.

We can start by creating a list of data values

```python
temp_c = [ 8.0,  7.1,  6.8,  6.4,  6.0,  5.4,  4.8,  5.0,
9.1, 12.8, 15.3, 19.1, 21.2, 22.1, 22.4, 23.1,
21.0, 17.9, 15.5, 14.4, 11.9, 11.0, 10.2,  9.1]
```
Now we have the variable ``temp_c``, which refers to the list of temperature values.


The next step is to create a sequence of dates and times. pandas provides a very convenient function, ``date_range()``, for this purpose:

```python
dt = pd.date_range(start='2019-10-27 00:00:00.0', periods=24,freq='H')
```

Now that we have the temperature values and the corresponding dates and times, we can create the DataFrame. In many cases, it’s convenient to use date-time values as the row labels:

```python
temp = pd.DataFrame(data={'temp_c': temp_c}, index=dt)
```

In [16]:
temp_c = [ 8.0,  7.1,  6.8,  6.4,  6.0,  5.4,  4.8,  5.0,
9.1, 12.8, 15.3, 19.1, 21.2, 22.1, 22.4, 23.1,
21.0, 17.9, 15.5, 14.4, 11.9, 11.0, 10.2,  9.1]

dt = pd.date_range(start='2019-10-27 00:00:00.0', periods=24,freq='H')


df = pd.DataFrame(data={'temp_c':temp_c,'date':dt})


print('DataFrame with two columns - temp_c and date')
print(df)

print()

df1 = pd.DataFrame(data={'temp_c':temp_c},index=dt)

print('DataFrame with 1 columns - temp_c and row_label as date')

print(df1)



DataFrame with two columns - temp_c and date
    temp_c                date
0      8.0 2019-10-27 00:00:00
1      7.1 2019-10-27 01:00:00
2      6.8 2019-10-27 02:00:00
3      6.4 2019-10-27 03:00:00
4      6.0 2019-10-27 04:00:00
5      5.4 2019-10-27 05:00:00
6      4.8 2019-10-27 06:00:00
7      5.0 2019-10-27 07:00:00
8      9.1 2019-10-27 08:00:00
9     12.8 2019-10-27 09:00:00
10    15.3 2019-10-27 10:00:00
11    19.1 2019-10-27 11:00:00
12    21.2 2019-10-27 12:00:00
13    22.1 2019-10-27 13:00:00
14    22.4 2019-10-27 14:00:00
15    23.1 2019-10-27 15:00:00
16    21.0 2019-10-27 16:00:00
17    17.9 2019-10-27 17:00:00
18    15.5 2019-10-27 18:00:00
19    14.4 2019-10-27 19:00:00
20    11.9 2019-10-27 20:00:00
21    11.0 2019-10-27 21:00:00
22    10.2 2019-10-27 22:00:00
23     9.1 2019-10-27 23:00:00

DataFrame with 1 columns - temp_c and row_label as date
                     temp_c
2019-10-27 00:00:00     8.0
2019-10-27 01:00:00     7.1
2019-10-27 02:00:00     6.8
2019-10-27 

  dt = pd.date_range(start='2019-10-27 00:00:00.0', periods=24,freq='H')


### Resampling and Rolling

If we want to split a day into four six-hour intervals and get the mean temperature for each interval, pandas provides us method ``.resample()``.

**Rolling-window analysis** involves calculating a statistic over a specified number of consecutive rows, known as the “window,” and then shifting this window one row at a time to perform the calculation again. Starting from the first row, the window moves down the DataFrame row by row, repeating the calculation until it reaches the end.

Pandas provides the ``.rolling()`` method to perform rolling-window calculations.



In [18]:
df2 = df1.resample(rule='6h').mean()

print("Calculate Mean of Sample at 6H Interval")
print(df2)

print("Calculate Means at rolling window of 3H")

df3 = df1.rolling(window=3).mean()

print(df3)


Calculate Mean of Sample at 6H Interval
                        temp_c
2019-10-27 00:00:00   6.616667
2019-10-27 06:00:00  11.016667
2019-10-27 12:00:00  21.283333
2019-10-27 18:00:00  12.016667
Calculate Means at rolling window of 3H
                        temp_c
2019-10-27 00:00:00        NaN
2019-10-27 01:00:00        NaN
2019-10-27 02:00:00   7.300000
2019-10-27 03:00:00   6.766667
2019-10-27 04:00:00   6.400000
2019-10-27 05:00:00   5.933333
2019-10-27 06:00:00   5.400000
2019-10-27 07:00:00   5.066667
2019-10-27 08:00:00   6.300000
2019-10-27 09:00:00   8.966667
2019-10-27 10:00:00  12.400000
2019-10-27 11:00:00  15.733333
2019-10-27 12:00:00  18.533333
2019-10-27 13:00:00  20.800000
2019-10-27 14:00:00  21.900000
2019-10-27 15:00:00  22.533333
2019-10-27 16:00:00  22.166667
2019-10-27 17:00:00  20.666667
2019-10-27 18:00:00  18.133333
2019-10-27 19:00:00  15.933333
2019-10-27 20:00:00  13.933333
2019-10-27 21:00:00  12.433333
2019-10-27 22:00:00  11.033333
2019-10-27 23:00:00  