In [1]:
import math
import collections

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline

***Setting pd.options.display.max_rows to 16 in pandas will adjust the maximum number of rows displayed when you output a DataFrame or Series in a Jupyter Notebook or a similar environment that uses pandas for tabular data display. With this setting, if a DataFrame or Series has more than 16 rows, only the first 16 rows and the last few rows will be displayed, with a "..." indicating that some rows have been omitted in between.***

In [3]:
pd.options.display.max_rows = 16

***Reading a CSV file named 'nobels.csv' into a pandas DataFrame called nobels with three columns: 'year', 'discipline', and 'nobelist'. By using the names parameter, you're specifying custom column names for the DataFrame. This can be useful if the CSV file doesn't have a header row, or if you want to override the existing column names.***

In [23]:
nobels = pd.read_csv('nobels.csv', names=['year', 'discipline', 'nobelist'])

In [24]:
nobels.shape

(950, 3)

In [25]:
nobels.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 950 entries, 0 to 949
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   year        950 non-null    int64 
 1   discipline  950 non-null    object
 2   nobelist    950 non-null    object
dtypes: int64(1), object(2)
memory usage: 22.4+ KB


In [26]:
nobels.head()

Unnamed: 0,year,discipline,nobelist
0,1901,Chemistry,Jacobus Henricus van 't Hoff
1,1901,Literature,Sully Prudhomme
2,1901,Medicine,Emil Adolf von Behring
3,1901,Peace,Frédéric Passy
4,1901,Peace,Henry Dunant


In [27]:
nobels.tail()

Unnamed: 0,year,discipline,nobelist
945,2019,Medicine,William Kaelin Jr.
946,2019,Peace,Abiy Ahmed
947,2019,Physics,Didier Queloz
948,2019,Physics,James Peebles
949,2019,Physics,Michel Mayor


In [28]:
len(nobels)

950

In [29]:
nobels.columns

Index(['year', 'discipline', 'nobelist'], dtype='object')

In [30]:
nobels.dtypes

year           int64
discipline    object
nobelist      object
dtype: object

In [31]:
nobels.index

RangeIndex(start=0, stop=950, step=1)

In [32]:
nobels['discipline']

0       Chemistry
1      Literature
2        Medicine
3           Peace
4           Peace
          ...    
945      Medicine
946         Peace
947       Physics
948       Physics
949       Physics
Name: discipline, Length: 950, dtype: object

In [33]:
nobels.nobelist

0      Jacobus Henricus van 't Hoff
1                   Sully Prudhomme
2            Emil Adolf von Behring
3                    Frédéric Passy
4                      Henry Dunant
                   ...             
945              William Kaelin Jr.
946                      Abiy Ahmed
947                   Didier Queloz
948                   James Peebles
949                    Michel Mayor
Name: nobelist, Length: 950, dtype: object

***This code nobels.discipline.values[:50] is accessing the 'discipline' column of the DataFrame nobels and returning the values of the first 50 entries in that column as a NumPy array. It's a way to quickly inspect the disciplines of the Nobel laureates in the dataset.***

In [35]:
nobels.discipline.values[:50]

array(['Chemistry', 'Literature', 'Medicine', 'Peace', 'Peace', 'Physics',
       'Chemistry', 'Literature', 'Medicine', 'Peace', 'Peace', 'Physics',
       'Physics', 'Chemistry', 'Literature', 'Medicine', 'Peace',
       'Physics', 'Physics', 'Physics', 'Chemistry', 'Literature',
       'Literature', 'Medicine', 'Peace', 'Physics', 'Chemistry',
       'Literature', 'Medicine', 'Peace', 'Physics', 'Chemistry',
       'Literature', 'Medicine', 'Medicine', 'Peace', 'Physics',
       'Chemistry', 'Literature', 'Medicine', 'Peace', 'Peace', 'Physics',
       'Chemistry', 'Literature', 'Medicine', 'Medicine', 'Peace',
       'Peace', 'Physics'], dtype=object)

***This code nobels.discipline.unique() will return an array containing the unique values present in the 'discipline' column of the nobels DataFrame. It's useful for quickly understanding the different disciplines for which Nobel Prizes have been awarded in your dataset. You can use this to get an overview of the variety of disciplines represented among the Nobel laureates. If you run this code in your Python environment, you'll see the unique disciplines listed.***

In [36]:
nobels.discipline.unique()

array(['Chemistry', 'Literature', 'Medicine', 'Peace', 'Physics',
       'Economics'], dtype=object)

***This code nobels.nobelist.value_counts() will return a Series containing the count of occurrences of each unique value in the 'nobelist' column of the nobels DataFrame. It essentially shows how many Nobel Prizes each laureate has won according to your dataset.***
***
Running this code will give you a breakdown of the number of Nobel Prizes won by each Nobel laureate recorded in your dataset. It's useful for understanding which laureates have been most prolific in terms of winning Nobel Prize***s.

In [37]:
nobels.nobelist.value_counts()

nobelist
International Committee of the Red Cross         3
John Bardeen                                     2
Frederick Sanger                                 2
Linus Pauling                                    2
United Nations High Commissioner for Refugees    2
                                                ..
Peter Medawar                                    1
Albert Lutuli                                    1
Donald A. Glaser                                 1
Melvin Calvin                                    1
Michel Mayor                                     1
Name: count, Length: 943, dtype: int64

***Return all rows that contain the 'Physics' discipline***

In [39]:
nobels[nobels.discipline == 'Physics']

Unnamed: 0,year,discipline,nobelist
5,1901,Physics,Wilhelm Röntgen
11,1902,Physics,Hendrik Lorentz
12,1902,Physics,Pieter Zeeman
17,1903,Physics,Henri Becquerel
18,1903,Physics,Marie Curie
...,...,...,...
934,2018,Physics,Donna Strickland
935,2018,Physics,Gérard Mourou
947,2019,Physics,Didier Queloz
948,2019,Physics,James Peebles


***Return all rows when the discipline is equal to "Chemestry"***

In [41]:
nobels.query('discipline == "Chemistry"')

Unnamed: 0,year,discipline,nobelist
0,1901,Chemistry,Jacobus Henricus van 't Hoff
6,1902,Chemistry,Hermann Emil Fischer
13,1903,Chemistry,Svante Arrhenius
20,1904,Chemistry,William Ramsay
26,1905,Chemistry,Adolf von Baeyer
...,...,...,...
924,2018,Chemistry,George P. Smith
925,2018,Chemistry,Greg Winter
936,2019,Chemistry,Akira Yoshino
937,2019,Chemistry,John B. Goodenough


In [42]:
nobels['Curie' in nobels.nobelist]

KeyError: False

***Return all rows that contain the string "Curie"***

In [43]:
nobels[nobels.nobelist.str.contains('Curie')]

Unnamed: 0,year,discipline,nobelist
18,1903,Physics,Marie Curie
19,1903,Physics,Pierre Curie
62,1911,Chemistry,Marie Curie
178,1935,Chemistry,Frédéric Joliot-Curie
179,1935,Chemistry,Irène Joliot-Curie


In [44]:
disco = np.load('discography.npy')

In [45]:
disco

array([('David Bowie', '1969-11-14', 17),
       ('The Man Who Sold the World', '1970-11-04',  3),
       ('Hunky Dory', '1971-12-17',  5),
       ('Ziggy Stardust', '1972-06-16',  1),
       ('Aladdin Sane', '1973-04-13',  1), ('Pin Ups', '1973-10-19',  1),
       ('Diamond Dogs', '1974-05-24',  1),
       ('Young Americans', '1975-03-07',  2),
       ('Station To Station', '1976-01-23',  5),
       ('Low', '1977-01-14',  2), ('Heroes', '1977-10-14',  3),
       ('Lodger', '1979-05-18',  4)],
      dtype=[('title', '<U32'), ('release', '<M8[D]'), ('toprank', '<i8')])

In [46]:
disco_df = pd.DataFrame(disco)

In [47]:
disco_df

Unnamed: 0,title,release,toprank
0,David Bowie,1969-11-14,17
1,The Man Who Sold the World,1970-11-04,3
2,Hunky Dory,1971-12-17,5
3,Ziggy Stardust,1972-06-16,1
4,Aladdin Sane,1973-04-13,1
5,Pin Ups,1973-10-19,1
6,Diamond Dogs,1974-05-24,1
7,Young Americans,1975-03-07,2
8,Station To Station,1976-01-23,5
9,Low,1977-01-14,2


In [48]:
disco_df.shape

(12, 3)

In [49]:
disco_df.dtypes

title             object
release    datetime64[s]
toprank            int64
dtype: object

In [50]:
pd.DataFrame([{'title': 'David Bowie', 'year': 1969},
             {'title': 'The Man Who Sold the World', 'year': 1970},
             {'title': 'Hunky Dory', 'year': 1971},])

Unnamed: 0,title,year
0,David Bowie,1969
1,The Man Who Sold the World,1970
2,Hunky Dory,1971


In [51]:
pd.DataFrame([('Ziqqy Stardust', 1), ('Aladdin Sane', 1), ('Pin Ups', 1)],
            columns=['title', 'toprank'])

Unnamed: 0,title,toprank
0,Ziqqy Stardust,1
1,Aladdin Sane,1
2,Pin Ups,1


In [54]:
pd.DataFrame({'title': ['David Bowie', 'The Man Who Sold the World', 'Hunky Dory',
                        'Ziggy Stardust', 'Aladin Sane', 'Pin Ups', 'Diamond Dogs',
                        'Young Americans', 'Station to Station', 'Low', 'Heroes', 'Lodger'],
              'release':['1969-11-14', '1970-11-04', '1971-12-17', '1972-06-16',
                         '1973-01-23', '1973-10-19', '1974-05-24', '1975-03-07',
                         '1976-01-23', '1977-01-14', '1977-10-14', '1979-05-18']})

Unnamed: 0,title,release
0,David Bowie,1969-11-14
1,The Man Who Sold the World,1970-11-04
2,Hunky Dory,1971-12-17
3,Ziggy Stardust,1972-06-16
4,Aladin Sane,1973-01-23
5,Pin Ups,1973-10-19
6,Diamond Dogs,1974-05-24
7,Young Americans,1975-03-07
8,Station to Station,1976-01-23
9,Low,1977-01-14
