# 1. Import libraries and load the DataFrames

In [1]:
import pandas as pd
import numpy as np

In [3]:
nls97 = pd.read_csv('data/nls97.csv')
covid_totals = pd.read_csv('data/covidtotals.csv', parse_dates=['lastdate'])

# 2. Set and show the index and the size of the `nls97` data

Also check to see whether the index values are unique:

In [4]:
nls97.set_index('personid', inplace=True)
nls97.index

Index([100061, 100139, 100284, 100292, 100583, 100833, 100931, 101089, 101122,
       101132,
       ...
       998997, 999031, 999053, 999087, 999103, 999291, 999406, 999543, 999698,
       999963],
      dtype='int64', name='personid', length=8984)

In [9]:
nls97.shape

(8984, 88)

In [10]:
nls97.index.nunique()

8984

# 3. Show the data types and `non-null` value counts:

In [11]:
nls97.info()

<class 'pandas.core.frame.DataFrame'>
Index: 8984 entries, 100061 to 999963
Data columns (total 88 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   gender                 8984 non-null   object 
 1   birthmonth             8984 non-null   int64  
 2   birthyear              8984 non-null   int64  
 3   highestgradecompleted  6663 non-null   float64
 4   maritalstatus          6672 non-null   object 
 5   childathome            4791 non-null   float64
 6   childnotathome         4791 non-null   float64
 7   wageincome             5091 non-null   float64
 8   weeklyhrscomputer      5792 non-null   object 
 9   weeklyhrstv            6711 non-null   object 
 10  nightlyhrssleep        6706 non-null   float64
 11  satverbal              1406 non-null   float64
 12  satmath                1407 non-null   float64
 13  gpaoverall             6004 non-null   float64
 14  gpaenglish             5798 non-null   float64
 15  gp

# 4. Show the first row of the `nls97` data.

Use transpose to show a little more of the output

In [12]:
nls97.head(2).T

personid,100061,100139
gender,Female,Male
birthmonth,5,9
birthyear,1980,1983
highestgradecompleted,13.0,12.0
maritalstatus,Married,Married
...,...,...
colenroct15,1. Not enrolled,1. Not enrolled
colenrfeb16,1. Not enrolled,1. Not enrolled
colenroct16,1. Not enrolled,1. Not enrolled
colenrfeb17,1. Not enrolled,1. Not enrolled


# 5. Set and show the index and size for the COVID data.

Also check to see whethere index values are unique:

In [15]:
unique_columns = []
total_rows = covid_totals.shape[0]

for column in covid_totals.columns:
    if covid_totals[column].nunique() == total_rows:
        unique_columns.append(column)
        
unique_columns        

['iso_code', 'location', 'total_cases', 'total_cases_pm', 'population']

In [16]:
covid_totals.set_index('iso_code', inplace=True)
covid_totals.index

Index(['AFG', 'ALB', 'DZA', 'ASM', 'AND', 'AGO', 'AIA', 'ATG', 'ARG', 'ARM',
       ...
       'URY', 'UZB', 'VUT', 'VAT', 'VEN', 'VNM', 'WLF', 'YEM', 'ZMB', 'ZWE'],
      dtype='object', name='iso_code', length=231)

In [17]:
covid_totals.shape

(231, 16)

In [18]:
covid_totals.index.nunique()

231

# 6. Show the data types and `non=null` value counts

In [19]:
covid_totals.info()

<class 'pandas.core.frame.DataFrame'>
Index: 231 entries, AFG to ZWE
Data columns (total 16 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   lastdate         231 non-null    datetime64[ns]
 1   location         231 non-null    object        
 2   total_cases      231 non-null    float64       
 3   total_deaths     231 non-null    float64       
 4   total_cases_pm   231 non-null    float64       
 5   total_deaths_pm  231 non-null    float64       
 6   population       231 non-null    int64         
 7   pop_density      209 non-null    float64       
 8   median_age       194 non-null    float64       
 9   gdp_per_capita   191 non-null    float64       
 10  hosp_beds        170 non-null    float64       
 11  vac_per_hund     13 non-null     float64       
 12  aged_65_older    188 non-null    float64       
 13  life_expectancy  227 non-null    float64       
 14  hum_dev_ind      187 non-null    float64     

# 7. Show a sample of a few rows of the COVID case data:

In [25]:
covid_totals.sample(2, random_state=1).T

iso_code,GHA,NIU
lastdate,2023-12-03 00:00:00,2023-12-31 00:00:00
location,Ghana,Niue
total_cases,171834.0,993.0
total_deaths,1462.0,0.0
total_cases_pm,5133.07,508709.016
total_deaths_pm,43.673,0.0
population,33475870,1952
pop_density,126.719,
median_age,21.1,
gdp_per_capita,4227.63,
