In [28]:
from pandas import Series
import numpy as np


In [29]:
g = np.random.default_rng(0)

months = "Sep Oct Nov Dec Jan Feb Mar Apr May Jun".split()
s = Series(g.integers(70, 101, 10), index=months)

# Can slice the series prior to getting a mean etc
print(f"Average: {s[:5].mean()}")

# can also use .loc (based off index) and .iloc (numeric position location)
print(f"Average (iloc): {s.iloc[:5].mean()}")
# .loc uses 'up to and including' indexing
# so 'Jan' is index location 4, here
print(f"Average (loc): {s.loc['Sep':'Jan'].mean()}")
print(f".loc slice from Sep to Jan: {s.loc['Sep':'Jan']}")


Average: 85.4
Average (iloc): 85.4
Average (loc): 85.4
.loc slice from Sep to Jan: Sep    96
Oct    89
Nov    85
Dec    78
Jan    79
dtype: int64


In [30]:
# Can use the '.head()' method to get the first 5 (default) or more items from the series
print(f"First 5: {s.head(5)}")
# Can use the '.tail()' method to get the last 5 (default) or more items from the series
print(f"Last 5: {s.tail(5)}")


First 5: Sep    96
Oct    89
Nov    85
Dec    78
Jan    79
dtype: int64
Last 5: Feb    71
Mar    72
Apr    70
May    75
Jun    95
dtype: int64


In [31]:
# now let's calculate the difference in mean between each half of the data
m1 = s.iloc[:5].mean()
m2 = s.iloc[5:].mean()
print(f"First half: {m1}")
print(f"Second half: {m2}")
print(f"Improvement: {m2 - m1:.2f}")


First half: 85.4
Second half: 76.6
Improvement: -8.80


In [32]:
# Additional exercises
# 1. Which month did the student get the highest score (3 ways)
# using the index, which since it's a string sounds like the best method
print(f"The month with the highest score was {s.idxmax()} and that score was {s.max()}")
# this one uses a boolean mask to find the index of the .max()
# the mask is generated with the series == value, and since it returns a single
# value back, show the index of the first index
print(f"The month with the max ({s.max()}) score was {s[s==s.max()].index[0]}")


The month with the highest score was Sep and that score was 96
The month with the max (96) score was Sep


In [41]:
# What were the student's five highest scores?
print(f"The five highest scores were:\n{s.sort_values(ascending=False).head(5).values}")


The five highest scores were:
[96 95 89 85 79]


In [43]:
# Round the student's scores to the nearest 10
# Pandas uses the "banker's round" or "round half to even" so when using
# -1 as the decimals, it will round 75 to 80 but will also round 85 to 80
print(f"Scores before rounding:\n{s.values}")
print(f"Scores rounded down to the nearest 10:\n{s.round(-1).values}")


Scores before rounding:
[96 89 85 78 79 71 72 70 75 95]
Scores rounded down to the nearest 10:
[100  90  80  80  80  70  70  70  80 100]
