In [1]:
from pandas import Series
import numpy as np
import math


In [14]:
g = np.random.default_rng(0)
# Generating a data series of temperatures with a mean of 20, and std dev of 5
# with a repeating index from Sun - Sat
days = "Sun Mon Tue Wed Thu Fri Sat".split()
# create the series, and make it rounded to the nearest integer
# if I had used `astype(np.int8)` instead it would have truncated
s = Series(g.normal(20, 5, 28), index=days * 4).round(0).astype(np.int8)
s.describe()

count    28.000000
mean     19.357143
std       4.373441
min       8.000000
25%      16.750000
50%      19.000000
75%      22.000000
max      27.000000
dtype: float64

In [22]:
# print out the entire series for later checking
s

Sun    21
Mon    19
Tue    23
Wed    21
Thu    17
Fri    22
Sat    27
Sun    25
Mon    16
Tue    14
Wed    17
Thu    20
Fri     8
Sat    19
Sun    14
Mon    16
Tue    17
Wed    18
Thu    22
Fri    25
Sat    19
Sun    27
Mon    17
Tue    22
Wed    25
Thu    20
Fri    16
Sat    15
dtype: int8

In [None]:
# Now let's find the mean of all Mondays in the series
# We can use the index masking method, providing a list to the mask
# of all indices we want (in this case just "Mon")
# could be s.loc[["Mon"]] or the single value as below
s.loc["Mon"].mean()

np.float64(17.0)

In [19]:
# Extension questions
# 1. What was the average weekend temperature?
s.loc[["Sat", "Sun"]].mean()

np.float64(20.875)

In [None]:
# 2. How many times is the change in temperature from the previous day greater than 2 degrees?
# Create a series that is shifted two elements to the left (previous days)
sprev = s.shift(1)
# mask where the adjacent value difference is more than 2
mask = (s - sprev) > 2
# shifting will result in some NaN values, so drop those
mask = mask.dropna()
s.loc[mask]

Tue    23
Fri    22
Sat    27
Wed    17
Thu    20
Sat    19
Thu    22
Fri    25
Sun    27
Tue    22
Wed    25
dtype: int8

In [None]:
# 3. What are the two most common temperatures in the data set and how often does each appear?
# Apparently this is the convoluted way to do this and there's a better way (follows)
# top2 = s.sort_values(ascending=False).drop_duplicates().head(2)
top2 = s.drop_duplicates().nlargest(2)
# apparently we can't just use `[s == top2]` because they don't share a (presumably complete)
# index, so using `.isin` checks if the value is in the top2 series
s.loc[s.isin(top2)].value_counts()

25    3
27    2
Name: count, dtype: int64