In [1]:
import math
import collections

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline

In [2]:
pd.options.display.max_rows = 16

In [59]:
read_file = 'nobels.csv'
column_names=['year', 'discipline', 'nobelist']
nobels = pd.read_csv(read_file, names=column_names)

nobel_year = 'year'
nobel_discipline = 'discipline'
nobel_nobelist = 'nobelist'

In [60]:
nobels.index

RangeIndex(start=0, stop=950, step=1)

***This line of code nobels_by_year = nobels.set_index('year') sets the 'year' column as the index of the DataFrame nobels, creating a new DataFrame named nobels_by_year.***

***Setting the index to the 'year' column allows you to easily access Nobel laureates by their respective years, making it convenient for time-series analysis or for quickly retrieving laureates from a specific year.***

In [61]:
nobels_by_year = nobels.set_index(nobel_year)

In [62]:
nobels_by_year

Unnamed: 0_level_0,discipline,nobelist
year,Unnamed: 1_level_1,Unnamed: 2_level_1
1901,Chemistry,Jacobus Henricus van 't Hoff
1901,Literature,Sully Prudhomme
1901,Medicine,Emil Adolf von Behring
1901,Peace,Frédéric Passy
1901,Peace,Henry Dunant
...,...,...
2019,Medicine,William Kaelin Jr.
2019,Peace,Abiy Ahmed
2019,Physics,Didier Queloz
2019,Physics,James Peebles


In [63]:
nobels_by_year.index

Index([1901, 1901, 1901, 1901, 1901, 1901, 1902, 1902, 1902, 1902,
       ...
       2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019],
      dtype='int64', name='year', length=950)

***Using nobels_by_year.loc[1901], you're accessing the rows in the DataFrame nobels_by_year that have the index value 1901, which corresponds to Nobel laureates awarded in the year 1901.***

***This code will return all Nobel laureates from the year 1901, as indexed by the 'year' column. It allows you to easily retrieve data specific to a particular year.***


In [64]:
nobels_by_year.loc[1901]

Unnamed: 0_level_0,discipline,nobelist
year,Unnamed: 1_level_1,Unnamed: 2_level_1
1901,Chemistry,Jacobus Henricus van 't Hoff
1901,Literature,Sully Prudhomme
1901,Medicine,Emil Adolf von Behring
1901,Peace,Frédéric Passy
1901,Peace,Henry Dunant
1901,Physics,Wilhelm Röntgen


***This code will return the Nobel laureates awarded in the year 1901 from the 'nobelist' column of the nobels_by_year DataFrame.***

In [65]:
nobels_by_year.loc[1901, nobel_nobelist]

year
1901    Jacobus Henricus van 't Hoff
1901                 Sully Prudhomme
1901          Emil Adolf von Behring
1901                  Frédéric Passy
1901                    Henry Dunant
1901                 Wilhelm Röntgen
Name: nobelist, dtype: object

***This code will return all Nobel laureates from the years 1914 to 1918, inclusive, as indexed by the 'year' column in the nobels_by_year DataFrame.***

In [66]:
nobels_by_year.loc[1914:1918]

Unnamed: 0_level_0,discipline,nobelist
year,Unnamed: 1_level_1,Unnamed: 2_level_1
1914,Chemistry,Theodore William Richards
1914,Medicine,Robert Bárány
1914,Physics,Max von Laue
1915,Chemistry,Richard Willstätter
1915,Literature,Romain Rolland
1915,Physics,William Henry Bragg
1915,Physics,William Lawrence Bragg
1916,Literature,Verner von Heidenstam
1917,Literature,Henrik Pontoppidan
1917,Literature,Karl Adolph Gjellerup


***Sort the 'discipline' column by ASC***

In [67]:
nobels_by_discipline = nobels.set_index(nobel_discipline).sort_index()

In [68]:
nobels_by_discipline

Unnamed: 0_level_0,year,nobelist
discipline,Unnamed: 1_level_1,Unnamed: 2_level_1
Chemistry,1901,Jacobus Henricus van 't Hoff
Chemistry,1988,Robert Huber
Chemistry,1932,Irving Langmuir
Chemistry,1988,Johann Deisenhofer
Chemistry,1988,Hartmut Michel
...,...,...
Physics,1999,Gerard 't Hooft
Physics,1922,Niels Bohr
Physics,1998,Robert B. Laughlin
Physics,1921,Albert Einstein


In [69]:
nobels_by_discipline.loc['Physics']

Unnamed: 0_level_0,year,nobelist
discipline,Unnamed: 1_level_1,Unnamed: 2_level_1
Physics,1951,Ernest Walton
Physics,2018,Arthur Ashkin
Physics,1957,Chen Ning Yang
Physics,1906,J. J. Thomson
Physics,1953,Frits Zernike
...,...,...
Physics,1999,Gerard 't Hooft
Physics,1922,Niels Bohr
Physics,1998,Robert B. Laughlin
Physics,1921,Albert Einstein


In [75]:
nobels_by_discipline.loc['Medicine':'Peace']

Unnamed: 0_level_0,year,nobelist
discipline,Unnamed: 1_level_1,Unnamed: 2_level_1
Medicine,1995,Christiane Nüsslein-Volhard
Medicine,1993,Phillip Allen Sharp
Medicine,1927,Julius Wagner-Jauregg
Medicine,1994,Alfred G. Gilman
Medicine,1993,Richard J. Roberts
...,...,...
Peace,1973,Henry Kissinger
Peace,1995,Pugwash Conferences on Science and World Affairs
Peace,1911,Alfred Hermann Fried
Peace,1973,Le Duc Tho


In [73]:
nobels_by_year.iloc[0:10]

Unnamed: 0_level_0,discipline,nobelist
year,Unnamed: 1_level_1,Unnamed: 2_level_1
1901,Chemistry,Jacobus Henricus van 't Hoff
1901,Literature,Sully Prudhomme
1901,Medicine,Emil Adolf von Behring
1901,Peace,Frédéric Passy
1901,Peace,Henry Dunant
1901,Physics,Wilhelm Röntgen
1902,Chemistry,Hermann Emil Fischer
1902,Literature,Theodor Mommsen
1902,Medicine,Ronald Ross
1902,Peace,Charles Albert Gobat


***It appears you're creating a multi-index DataFrame named nobels_multi by setting the index to a combination of the 'year' and 'discipline' columns from your original nobels DataFrame. This can be useful for organizing and accessing data in a hierarchical manner, particularly for more complex datasets where you want to group data by multiple criteria.***

***This will set a multi-index on the DataFrame nobels using the 'year' and 'discipline' columns. This means that each row in the DataFrame will be uniquely identified by a combination of year and discipline.***

In [78]:
nobels_multi = nobels.set_index([nobel_year, nobel_discipline])

In [77]:
nobels_multi

Unnamed: 0_level_0,Unnamed: 1_level_0,nobelist
year,discipline,Unnamed: 2_level_1
1901,Chemistry,Jacobus Henricus van 't Hoff
1901,Literature,Sully Prudhomme
1901,Medicine,Emil Adolf von Behring
1901,Peace,Frédéric Passy
1901,Peace,Henry Dunant
...,...,...
2019,Medicine,William Kaelin Jr.
2019,Peace,Abiy Ahmed
2019,Physics,Didier Queloz
2019,Physics,James Peebles


In [84]:
nobels_multi.index.sort_values()

MultiIndex([(1901,  'Chemistry'),
            (1901, 'Literature'),
            (1901,   'Medicine'),
            (1901,      'Peace'),
            (1901,      'Peace'),
            (1901,    'Physics'),
            (1902,  'Chemistry'),
            (1902, 'Literature'),
            (1902,   'Medicine'),
            (1902,      'Peace'),
            ...
            (2019,  'Economics'),
            (2019,  'Economics'),
            (2019, 'Literature'),
            (2019,   'Medicine'),
            (2019,   'Medicine'),
            (2019,   'Medicine'),
            (2019,      'Peace'),
            (2019,    'Physics'),
            (2019,    'Physics'),
            (2019,    'Physics')],
           names=['year', 'discipline'], length=950)

In [80]:
nobels_multi.index.get_level_values(0)

Index([1901, 1901, 1901, 1901, 1901, 1901, 1902, 1902, 1902, 1902,
       ...
       2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019],
      dtype='int64', name='year', length=950)

In [87]:
nobels_multi.index.get_level_values(1)

Index(['Chemistry', 'Literature', 'Medicine', 'Peace', 'Peace', 'Physics',
       'Chemistry', 'Literature', 'Medicine', 'Peace',
       ...
       'Economics', 'Economics', 'Literature', 'Medicine', 'Medicine',
       'Medicine', 'Peace', 'Physics', 'Physics', 'Physics'],
      dtype='object', name='discipline', length=950)

In [82]:
nobels_multi.loc[(2017, 'Physics')]

Unnamed: 0_level_0,Unnamed: 1_level_0,nobelist
year,discipline,Unnamed: 2_level_1
2017,Physics,Barry Barish
2017,Physics,Kip Thorne
2017,Physics,Rainer Weiss


In [82]:
nobels_multi.loc[(2017, 'Physics')]

Unnamed: 0_level_0,Unnamed: 1_level_0,nobelist
year,discipline,Unnamed: 2_level_1
2017,Physics,Barry Barish
2017,Physics,Kip Thorne
2017,Physics,Rainer Weiss


***If you execute nobels_multi.loc[(1901:1910, 'Chemistry')] without correcting the code, you will encounter a SyntaxError.***

In [89]:
nobels_multi.loc[(1901:1910, 'Chemistry')]

SyntaxError: invalid syntax (2556996078.py, line 1)

***If you don't correct the code and execute nobels_multi.loc[(slice(1901,1910), 'Chemistry')], you will likely encounter a TypeError. This error occurs because the .loc indexer expects both a row indexer and a column indexer, but you're only providing a row indexer (the tuple (slice(1901,1910), 'Chemistry')).***

***To resolve this error, you need to provide a colon : after the tuple to indicate that you want to select all columns. Without the colon, the code is incomplete, leading to a TypeError.***

In [102]:
nobels_multi.loc[(slice(1901,1910), 'Chemistry')]

KeyError: 'Chemistry'

***the .loc indexer with a multi-index DataFrame to select specific rows based on a slice of years (1901 to 1910, inclusive) and the 'Chemistry' discipline.***

***This code selects all rows where the year falls within the range 1901 to 1910 (inclusive) and the discipline is 'Chemistry'. The slice() function is used to create a slice object for the range of years.***

In [95]:
nobels_multi.loc[(slice(1901,1910), 'Chemistry'), :]

Unnamed: 0_level_0,Unnamed: 1_level_0,nobelist
year,discipline,Unnamed: 2_level_1
1901,Chemistry,Jacobus Henricus van 't Hoff
1902,Chemistry,Hermann Emil Fischer
1903,Chemistry,Svante Arrhenius
1904,Chemistry,William Ramsay
1905,Chemistry,Adolf von Baeyer
1906,Chemistry,Henri Moissan
1907,Chemistry,Eduard Buchner
1908,Chemistry,Ernest Rutherford
1909,Chemistry,Wilhelm Ostwald
1910,Chemistry,Otto Wallach


***This code nobels_multi.loc[(slice(None), ['Chemistry', 'Physics']), :] selects all rows from the nobels_multi DataFrame where the discipline is either 'Chemistry' or 'Physics', regardless of the year.***

***Here's a breakdown of what each part of the code does:***

1. slice(None): This part indicates that we want to include all values along the 'year' axis. It essentially means "all years".
2. ['Chemistry', 'Physics']: This specifies the disciplines we're interested in. We're selecting rows where the discipline is either 'Chemistry' or 'Physics'.
3. :: This selects all columns in the DataFrame.

***So, when you put it all together, you're essentially saying: "Give me all rows where the discipline is either 'Chemistry' or 'Physics', regardless of the year."***

In [96]:
nobels_multi.loc[(slice(None), ['Chemistry', 'Physics']), :]

Unnamed: 0_level_0,Unnamed: 1_level_0,nobelist
year,discipline,Unnamed: 2_level_1
1901,Chemistry,Jacobus Henricus van 't Hoff
1901,Physics,Wilhelm Röntgen
1902,Chemistry,Hermann Emil Fischer
1902,Physics,Hendrik Lorentz
1902,Physics,Pieter Zeeman
...,...,...
2019,Chemistry,John B. Goodenough
2019,Chemistry,M. Stanley Whittingham
2019,Physics,Didier Queloz
2019,Physics,James Peebles


In [99]:
nobels[(nobels.year >= 1901) & (nobels.year <= 1910) & (nobels.discipline == 'Chemistry')]

Unnamed: 0,year,discipline,nobelist
0,1901,Chemistry,Jacobus Henricus van 't Hoff
6,1902,Chemistry,Hermann Emil Fischer
13,1903,Chemistry,Svante Arrhenius
20,1904,Chemistry,William Ramsay
26,1905,Chemistry,Adolf von Baeyer
31,1906,Chemistry,Henri Moissan
37,1907,Chemistry,Eduard Buchner
43,1908,Chemistry,Ernest Rutherford
50,1909,Chemistry,Wilhelm Ostwald
57,1910,Chemistry,Otto Wallach


In [100]:
nobels.query('year >= 1901 and year <= 1910 and discipline == "Chemistry"')

Unnamed: 0,year,discipline,nobelist
0,1901,Chemistry,Jacobus Henricus van 't Hoff
6,1902,Chemistry,Hermann Emil Fischer
13,1903,Chemistry,Svante Arrhenius
20,1904,Chemistry,William Ramsay
26,1905,Chemistry,Adolf von Baeyer
31,1906,Chemistry,Henri Moissan
37,1907,Chemistry,Eduard Buchner
43,1908,Chemistry,Ernest Rutherford
50,1909,Chemistry,Wilhelm Ostwald
57,1910,Chemistry,Otto Wallach
