In [1]:
import numpy as np
import pandas as pd

In [2]:
import datetime
from datetime import datetime, date

In [6]:
pd.set_option("display.notebook_repr_html", False)
pd.set_option("display.max_columns", 7)
pd.set_option("display.max_rows", 10)
pd.set_option("display.width", 60)

In [7]:
import matplotlib.pyplot as plt
%matplotlib inline

In [10]:
s = pd.Series([1, 2, 3, 4])
s

0    1
1    2
2    3
3    4
dtype: int64

In [11]:
s[1]

2

In [12]:
s[[1, 3]]

1    2
3    4
dtype: int64

In [15]:
s = pd.Series([1, 2, 3, 4], index = ["a", "b", "c", "d"])
s

a    1
b    2
c    3
d    4
dtype: int64

In [16]:
s[["a", "b"]]

a    1
b    2
dtype: int64

In [17]:
s[[1, 2]]

b    2
c    3
dtype: int64

In [18]:
s.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [20]:
dates = pd.date_range("2023-03-01", "2023-03-06")
dates

DatetimeIndex(['2023-03-01', '2023-03-02', '2023-03-03',
               '2023-03-04', '2023-03-05', '2023-03-06'],
              dtype='datetime64[ns]', freq='D')

In [30]:
temps1 = pd.Series([80, 82, 85, 90, 83, 87], index = dates)
temps1

2023-03-01    80
2023-03-02    82
2023-03-03    85
2023-03-04    90
2023-03-05    83
2023-03-06    87
Freq: D, dtype: int64

In [31]:
temps1["2023-03-04"]

90

In [34]:
temps2 = pd.Series([70, 75, 69, 83, 79, 77], index = dates)
temps2

2023-03-01    70
2023-03-02    75
2023-03-03    69
2023-03-04    83
2023-03-05    79
2023-03-06    77
Freq: D, dtype: int64

In [35]:
temp_diffs = temps1 - temps2
temp_diffs

2023-03-01    10
2023-03-02     7
2023-03-03    16
2023-03-04     7
2023-03-05     4
2023-03-06    10
Freq: D, dtype: int64

In [36]:
temp_diffs[2]

16

In [37]:
temp_diffs.mean()

9.0

In [40]:
temp_df = pd.DataFrame({"Missoula":temps1, "Philadelphia":temps2})
temp_df

            Missoula  Philadelphia
2023-03-01        80            70
2023-03-02        82            75
2023-03-03        85            69
2023-03-04        90            83
2023-03-05        83            79
2023-03-06        87            77

In [41]:
temp_df["Missoula"]

2023-03-01    80
2023-03-02    82
2023-03-03    85
2023-03-04    90
2023-03-05    83
2023-03-06    87
Freq: D, Name: Missoula, dtype: int64

In [42]:
temp_df["Philadelphia"]

2023-03-01    70
2023-03-02    75
2023-03-03    69
2023-03-04    83
2023-03-05    79
2023-03-06    77
Freq: D, Name: Philadelphia, dtype: int64

In [43]:
temp_df[["Missoula", "Philadelphia"]]

            Missoula  Philadelphia
2023-03-01        80            70
2023-03-02        82            75
2023-03-03        85            69
2023-03-04        90            83
2023-03-05        83            79
2023-03-06        87            77

In [45]:
temp_df.Missoula

2023-03-01    80
2023-03-02    82
2023-03-03    85
2023-03-04    90
2023-03-05    83
2023-03-06    87
Freq: D, Name: Missoula, dtype: int64

In [46]:
temp_df.Missoula - temp_df.Philadelphia

2023-03-01    10
2023-03-02     7
2023-03-03    16
2023-03-04     7
2023-03-05     4
2023-03-06    10
Freq: D, dtype: int64

In [48]:
temp_df["Difference"] = temp_diffs
temp_df

            Missoula  Philadelphia  Difference
2023-03-01        80            70          10
2023-03-02        82            75           7
2023-03-03        85            69          16
2023-03-04        90            83           7
2023-03-05        83            79           4
2023-03-06        87            77          10

In [49]:
temp_df.index

DatetimeIndex(['2023-03-01', '2023-03-02', '2023-03-03',
               '2023-03-04', '2023-03-05', '2023-03-06'],
              dtype='datetime64[ns]', freq='D')

In [50]:
temp_df.columns

Index(['Missoula', 'Philadelphia', 'Difference'], dtype='object')

In [54]:
temp_df.Difference[1:4]

2023-03-02     7
2023-03-03    16
2023-03-04     7
Freq: D, Name: Difference, dtype: int64

In [55]:
temp_df.iloc[1]

Missoula        82
Philadelphia    75
Difference       7
Name: 2023-03-02 00:00:00, dtype: int64

In [56]:
temp_df.iloc[1].index

Index(['Missoula', 'Philadelphia', 'Difference'], dtype='object')

In [57]:
temp_df.loc["2023-03-05"]

Missoula        83
Philadelphia    79
Difference       4
Name: 2023-03-05 00:00:00, dtype: int64

In [64]:
temp_df.iloc[[1, 3, 5]].Difference

2023-03-02     7
2023-03-04     7
2023-03-06    10
Freq: 2D, Name: Difference, dtype: int64

In [65]:
temp_df.Missoula > 82

2023-03-01    False
2023-03-02    False
2023-03-03     True
2023-03-04     True
2023-03-05     True
2023-03-06     True
Freq: D, Name: Missoula, dtype: bool

In [66]:
temp_df > 82

            Missoula  Philadelphia  Difference
2023-03-01     False         False       False
2023-03-02     False         False       False
2023-03-03      True         False       False
2023-03-04      True          True       False
2023-03-05      True         False       False
2023-03-06      True         False       False

In [68]:
temp_df.Philadelphia[temp_df.Missoula > 82]

2023-03-03    69
2023-03-04    83
2023-03-05    79
2023-03-06    77
Freq: D, Name: Philadelphia, dtype: int64

In [76]:
df = pd.read_csv("C:/Users/ASUS/data/goog.csv", parse_dates = ["Date"], index_col = "Date")
df

                  Open        High         Low  \
Date                                             
2016-12-19  790.219971  797.659973  786.270020   
2016-12-20  796.760010  798.650024  793.270020   
2016-12-21  795.840027  796.676025  787.099976   
2016-12-22  792.359985  793.320007  788.580017   
2016-12-23  790.900024  792.739990  787.280029   
...                ...         ...         ...   
2017-03-13  844.000000  848.684998  843.250000   
2017-03-14  843.640015  847.239990  840.799988   
2017-03-15  847.590027  848.630005  840.770020   
2017-03-16  849.030029  850.849976  846.130005   
2017-03-17  851.609985  853.400024  847.109985   

                 Close   Volume  
Date                             
2016-12-19  794.200012  1225900  
2016-12-20  796.419983   925100  
2016-12-21  794.559998  1208700  
2016-12-22  791.260010   969100  
2016-12-23  789.909973   623400  
...                ...      ...  
2017-03-13  845.539978  1149500  
2017-03-14  845.619995   779900  
2017-03-1