# 1. Demonstrate basic usage of `Analyst` and `Respondent` classes

December 21, 2024

This notebook explores the basic usage of two core classes:

- `Analyst`: handles analysis tasks including computation and stats
- `Respondent`: handles data storage for each individual census respondent

## Set working directory, import classes

In [1]:
import os

# Set working directory to root of project
current_folder = os.path.basename(os.getcwd())
if current_folder == 'notebooks':
    root_path = os.path.dirname(os.path.abspath(''))
    os.chdir(root_path)

# print to check
# print(f'Directory set to: {os.getcwd()}')

from pprint import pprint
from src.respondent import Respondent
from src.analyst import Analyst
import src.utils as utils

from IPython import get_ipython

# Automatically reload all modules every time a line of code is executed
get_ipython().run_line_magic('load_ext', 'autoreload')
get_ipython().run_line_magic('autoreload', '2')


## Using `Analyst`

In [2]:
# Initialize the Analyst
analyst = Analyst()
analyst.load_data()
analyst.build_respondents_list()

In [3]:
# Each respondent is an instance of the Respondent class
resp_all = analyst.filter_respondents_on()

# Count the number of respondents
print(f"Total number of respondents: {len(resp_all)}")

Total number of respondents: 1008


In [20]:
# Filter for working status, country, and gender
resp_filtered = analyst.filter_respondents_on(is_working=True,is_completed_all_questions=True,country='United States',gender='Female')

# Count the number of respondents
print(f"Filtered of respondents: {len(resp_filtered)}")

Filtered of respondents: 63


In [5]:
# Filter for people who answered company questions - working/previously working
resp_work = analyst.filter_for_working()

# Count the number of respondents
print(f"Filtered of respondents: {len(resp_work)}")

Filtered of respondents: 665


In [6]:
# Summarize the data
res_stats = analyst.summarize_stats()
print(res_stats.keys())

# Print key and value of each item in the dictionary that contains 'median'
for key, value in res_stats.items():
    if 'median' in key:
        print(f"{key}: {value}")

dict_keys(['num_total', 'num_working', 'num_working_and_completed_all_questions', 'num_unemployed', 'num_unemployed_and_completed_all_questions', 'num_student', 'num_student_and_completed_all_questions', 'mins_working_median', 'mins_working_completed_median', 'mins_student_median', 'mins_student_completed_median', 'mins_unemployed_median', 'mins_unemployed_completed_median', 'mins_working_list', 'mins_working_completed_list', 'mins_student_list', 'mins_student_completed_list', 'mins_unemployed_list', 'mins_unemployed_completed_list', 'response_by_time_datetime', 'response_by_time_num'])
mins_working_median: 4.258333333333333
mins_working_completed_median: 14.4
mins_student_median: 5.6
mins_student_completed_median: 7.75
mins_unemployed_median: 3.7666666666666666
mins_unemployed_completed_median: 14.45


## Using `Respondent`

In [13]:
# Selecting a random respondent from the all respondents list
resp_test = resp_all[617]

# View summary as set by `__repr__`
resp_test

Token:     fwg1ksl5ktlvskt6fwg1kuhx056997gi
Submitted: 2024-10-31 08:55:26
Duration:  9.65 minutes
Status:    Student

In [14]:
# Woo it's a student! Let's access census data
resp_test.census

{'sentiment': {'keys': ["I feel good about what I'm working on",
   'I feel good about my career path',
   'I feel good about my work-life balance',
   'I feel valued by those around me',
   'I see opportunities for career growth'],
  'values': array([3., 2., 3., 3., 3.])},
 'skills_demand': 'Previous hands-on experience (especially in manufacturing); network; scientific/technical rigour',
 'skills_value_chain': ['Recycling',
  'Cell production',
  'Component/precursor production'],
 'education': 'Doctorate',
 'degree': nan,
 'country': 'United Kingdom',
 'zip': 'Ox2 6du',
 'state': None,
 'income': np.float64(43500.0),
 'hours_worked': np.float64(38.0),
 'age': np.float64(35.0),
 'ethnicity': ['White'],
 'gender': 'Male',
 'citizenship': 'Non-citizen (Other)',
 'military_status': 'No',
 'employment_status': "I'm in school or in training (e.g., a student or postdoc)",
 'to_complete_industry_questions': nan,
 'to_complete_student_questions': True,
 'to_complete_unemployed_questions': na

In [None]:
# And student data
resp_test.student

{'student_sentiment': {'keys': ['After graduating, I know what role(s) to apply to',
   'After graduating, I will find a job',
   'By the time I graduate, I will have learned the skills needed to find a job',
   'I am optimistic about the future of the battery industry'],
  'values': array([3., 5., 4., 4.])},
 'ideal_job_title': 'Principal scientist',
 'ideal_value_chain': ['Component/precursor production',
  'Cell production',
  'Consulting'],
 'ideal_job_aspects': ['Competitive salary and benefits',
  'Challenge and innovation',
  'Work-life balance',
  'Autonomy and independence',
  'Learning and skills development',
  'Career growth opportunities',
  'Desirable work location',
  'Sustainability and impact',
  'Supportive team and environment'],
 'ideal_salary': np.float64(70000.0),
 'num_internships': '5+',
 'internship_value_chain': [],
 'internship_role': ['Engineering and design',
  'Research and development',
  'Managing projects or programs',
  'Legal'],
 'internship_top_skill

In [17]:
# if we try to access company data, we get an nan/empty values
resp_test.company

{'company_satisfaction': {'keys': ['I am satisfied with my compensation',
   'I am being underpaid compared to similar roles',
   'I am satisfied with the raises and/or bonuses I have been receiving'],
  'values': array([nan, nan, nan])},
 'salary_base': np.float64(nan),
 'salary_comp_types': [],
 'salary_num_raises': nan,
 'salary_num_bonuses': nan,
 'company_years_with': np.float64(nan),
 'company_value_chain': [],
 'company_stage': nan,
 'company_country': nan,
 'company_state': nan,
 'company_days_in_office': np.float64(nan),
 'company_headcount': nan,
 'company_team_count': nan,
 'role_title': nan,
 'role_role': [],
 'role_level': nan,
 'role_why_choose': [],
 'role_prev_industries': nan,
 'role_prev_role': nan,
 'skills_preparedness': {'keys': ['After working for 1 week?',
   'After working for 1 month?',
   'After working for 3 months?',
   'Last week?'],
  'values': array([nan, nan, nan, nan])},
 'skills_how_was_trained': [],
 'skills_how_to_improve': [],
 'skills_num_internshi

In [21]:
# Let's select another random respondent
resp_test = resp_filtered[10]
resp_test

Token:     5cq52oj70v25tzghsxp6d5cq52ola5s1
Submitted: 2024-10-04 20:45:35
Duration:  17.63 minutes
Status:    Working

In [22]:
# example of company survey results
resp_test.company

{'company_satisfaction': {'keys': ['I am satisfied with my compensation',
   'I am being underpaid compared to similar roles',
   'I am satisfied with the raises and/or bonuses I have been receiving'],
  'values': array([5., 1., 5.])},
 'salary_base': np.float64(150000.0),
 'salary_comp_types': ['Bonuses (based on company performance)',
  'Education or tuition reimbursement',
  'Health and wellness stipends',
  'Equity (stock',
  'options',
  'etc.)'],
 'salary_num_raises': '2',
 'salary_num_bonuses': '1',
 'company_years_with': np.float64(6.0),
 'company_value_chain': ['Consulting'],
 'company_stage': 'Mid-sized (stable growth)',
 'company_country': 'United States',
 'company_state': 'Maryland',
 'company_days_in_office': np.float64(0.0),
 'company_headcount': '51-100',
 'company_team_count': '1-10',
 'role_title': 'Energy market specialist',
 'role_role': ['market research'],
 'role_level': 'Senior',
 'role_why_choose': ['Hybrid/remote work',
  'Supportive team and environment',
  'F

# Access key class properties

In [None]:
pprint(len(analyst.respondents_list))

1008


The `respondents_list` holds a list of `Respondent` objects, each of which hold data for that particular response.

In [None]:
resp = analyst.respondents_list[0]
resp

Token:     fkqh2mecyeh4ille74ccdfkqh2d4hld5
Submitted: 2024-10-04 20:36:11
Duration:  18.72 minutes
Status:    Working

In [None]:
pprint(resp.census)

{'age': np.float64(35.0),
 'citizenship': 'Non-citizen (TN visa)',
 'country': 'United States',
 'degree': 'Chemistry',
 'education': 'Doctorate',
 'employment_status': "I'm working professionally (e.g., at a company, "
                      'national lab)',
 'ethnicity': ['Asian'],
 'gender': 'Male',
 'hours_worked': np.float64(40.0),
 'income': np.float64(120000.0),
 'military_status': 'No',
 'sentiment': {'keys': ["I feel good about what I'm working on",
                        'I feel good about my career path',
                        'I feel good about my work-life balance',
                        'I feel valued by those around me',
                        'I see opportunities for career growth'],
               'values': array([4., 4., 4., 4., 4.])},
 'skills_demand': 'Scale up, the ability to keep up with and foresee '
                  'research/industry trends and directions, language abilities '
                  '(Chinese, Korean, Japanese) to learn from the bigger '
     

In [None]:
pprint(resp.company)

{'benefits_entitlements': [],
 'benefits_parental_leave_weeks': np.float64(nan),
 'benefits_priorities': {'keys': ['Mental health support',
                                  'Work-life balance initiatives',
                                  'Financial wellness programs',
                                  'Career development opportunities'],
                         'values': array([nan, nan, nan, nan])},
 'benefits_pto_weeks': np.float64(nan),
 'benefits_sick_leave_days': np.float64(nan),
 'benefits_unique': nan,
 'company_country': 'United States',
 'company_days_in_office': np.float64(4.0),
 'company_headcount': '11-50',
 'company_satisfaction': {'keys': ['I am satisfied with my compensation',
                                   'I am being underpaid compared to similar '
                                   'roles',
                                   'I am satisfied with the raises and/or '
                                   'bonuses I have been receiving'],
                          '

In [None]:
pprint(resp.student)

{'ideal_job_aspects': [],
 'ideal_job_title': nan,
 'ideal_salary': np.float64(nan),
 'ideal_value_chain': [],
 'internship_hourly_pay': np.float64(nan),
 'internship_hours_per_week': np.float64(nan),
 'internship_role': [],
 'internship_skills_unprepared': nan,
 'internship_skills_wish_learned': nan,
 'internship_top_skills': nan,
 'internship_value_chain': [],
 'num_internships': nan,
 'student_sentiment': {'keys': ['After graduating, I know what role(s) to '
                                'apply to',
                                'After graduating, I will find a job',
                                'By the time I graduate, I will have learned '
                                'the skills needed to find a job',
                                'I am optimistic about the future of the '
                                'battery industry'],
                       'values': array([nan, nan, nan, nan])}}


In [None]:
resp.is_completed_all_questions

True

In [None]:
resp.is_student

False

In [None]:
resp.is_working

True

In [None]:
resp.is_unemployed

False