### Run this code to get your list of Physician-Scientists

In [1]:
import pandas as pd

def get_physician_scientists_by_year(file_path, induction_year, num_physician_scientists=None):
    
    # Read CSV file into a dataframe
    df = pd.read_csv(file_path, encoding = "ISO-8859-1")

    # Filter dataframe based on given induction_year
    filtered_df = df[df['year'] == induction_year]

    # Get the total number of physician scientists that have
    # the given induction year
    total = filtered_df.shape[0]

    # Check if the number of requested physicians is greater than the available ones
    if num_physician_scientists and num_physician_scientists > total:
        raise ValueError(f"Requested {num_physician_scientists} physician-scientists, but only {total} available for the year {induction_year}.")

    # If number of physician-scientists specified, filter
    # dataframe to only include first num_physician_scientists
    if num_physician_scientists:
        filtered_df = filtered_df.head(num_physician_scientists)

    # Row indices will be non-sequential due to filtering the dataframe
    # so reset them to start at 0 to num_physician_scientists - 1
    filtered_df.reset_index(drop=True, inplace=True)

    return filtered_df

ModuleNotFoundError: No module named 'pandas'

In [19]:
# Get first 25 physician scientists for some ascii induction year
first_set = get_physician_scientists_by_year('asci_aap_data/original_names.csv', 2000, 25)

# Get next 25 physician scientists for another ascii induction year
second_set = get_physician_scientists_by_year('asci_aap_data/original_names.csv', 2010, 25)

# Combined dataframe of all 50 physician-scientists
# Note: ignore_index tells pandas to reset the row indices.
# By default pandas preserves the original indices of the rows from each DataFrame. 
# This can result in duplicate indices or indices that are not sequential.
fifty_physcian_scientists_df = pd.concat([first_set, second_set], ignore_index=True)

In [20]:
# Print dataframe
fifty_physcian_scientists_df

# Save the DataFrame to a CSV file
# index = False gets rid of the row indices
fifty_physcian_scientists_df.to_csv('physician_scientists.csv', index=False)