In [6]:
import pandas as pd
import numpy as np

# Setting a seed for reproducibility
np.random.seed(42)

# Generating patient records
n_patients = 5000
patient_ids = np.arange(1, n_patients + 1)
full_names = ['Patient_' + str(i) for i in range(1, n_patients + 1)]
birth_dates = pd.date_range(start='1930-01-01', end='2012-12-31', periods=n_patients).date
genders = np.random.choice(['Male', 'Female'], n_patients)
phone_numbers = ['+1' + ''.join(np.random.choice(list('1234567890'), 10)) for _ in range(n_patients)]

# Creating the DataFrame
patient_records_df = pd.DataFrame({
    'PatientID': patient_ids,
    'FullName': full_names,
    'BirthDate': birth_dates,
    'Gender': genders,
    'PhoneNumber': phone_numbers
})

# Saving the DataFrame to a CSV file
patient_records_df.to_csv('patient_records.csv', index=False)


In [16]:
# Appointment records
n_appointments = 10000
patient_ids_appointments = np.random.choice(patient_records_df['PatientID'], n_appointments, replace=True)

# Generating appointment dates
start_date = np.datetime64('2023-01-01')
appointment_dates = [start_date + np.timedelta64(np.random.randint(0, 365), 'D') for _ in range(n_appointments)]
appointment_dates.sort()  # Sorting to maintain chronological order
appointment_dates = np.array(appointment_dates).astype('datetime64[s]')

doctor_names = ['Doctor_' + str(i) for i in np.random.randint(1, 51, n_appointments)]
departments = np.random.choice(['Cardiology', 'General Practice', 'Pediatrics', 'Orthopedics', 'Dermatology'], n_appointments)
visit_reasons = ['Reason_' + str(i) for i in np.random.randint(1, 101, n_appointments)]
billing_amounts = np.random.uniform(50, 500, n_appointments).round(2)

# Creating the DataFrame
appointment_records_df = pd.DataFrame({
    'AppointmentID': np.arange(1, n_appointments + 1),
    'PatientID': patient_ids_appointments,
    'AppointmentDate': appointment_dates,
    'DoctorName': doctor_names,
    'Department': departments,
    'VisitReason': visit_reasons,
    'BillingAmount': billing_amounts
})

# Saving the DataFrame to a CSV file
appointment_records_df.to_csv('appointment_records.csv', index=False)


## Решение

In [50]:
patient_records_df.head()

Unnamed: 0,PatientID,FullName,BirthDate,Gender,PhoneNumber
0,1,Patient_1,1930-01-01,Male,13473984111
1,2,Patient_2,1930-01-07,Female,11836357266
2,3,Patient_3,1930-01-13,Male,13600529838
3,4,Patient_4,1930-01-19,Male,19332801877
4,5,Patient_5,1930-01-25,Male,13149515885


In [48]:
appointment_records_df.head()

Unnamed: 0,AppointmentID,PatientID,AppointmentDate,DoctorName,Department,VisitReason,BillingAmount
0,1,3636,2023-01-01,Doctor_45,Dermatology,Reason_22,77.28
1,2,4824,2023-01-01,Doctor_32,Pediatrics,Reason_35,374.6
2,3,4150,2023-01-01,Doctor_13,Cardiology,Reason_86,230.44
3,4,4623,2023-01-01,Doctor_22,Pediatrics,Reason_16,216.59
4,5,509,2023-01-01,Doctor_22,General Practice,Reason_100,209.41


In [100]:
patient_records_df = pd.read_csv('patient_records.csv')
appointment_records_df = pd.read_csv('appointment_records.csv')

In [102]:
merged_df = pd.merge(appointment_records_df, patient_records_df, how='left', on='PatientID')

In [103]:
merged_df['BillingAmount'] = merged_df['BillingAmount'].astype(float)
merged_df['BirthDate'] = pd.to_datetime(merged_df['BirthDate'])
merged_df['AppointmentDate'] = pd.to_datetime(merged_df['AppointmentDate'])

In [124]:
merged_df['AppointmentDate'].dt.year - merged_df['BirthDate'].dt.year

0       33
1       13
2       25
3       17
4       85
        ..
9995    24
9996    55
9997    88
9998    37
9999    49
Length: 10000, dtype: int32

In [104]:
merged_df['AgeAtAppointment'] = merged_df['AppointmentDate'].dt.year - merged_df['BirthDate'].dt.year

In [105]:
merged_df.head(5)

Unnamed: 0,AppointmentID,PatientID,AppointmentDate,DoctorName,Department,VisitReason,BillingAmount,FullName,BirthDate,Gender,PhoneNumber,AgeAtAppointment
0,1,3636,2023-01-01,Doctor_45,Dermatology,Reason_22,77.28,Patient_3636,1990-05-09,Female,13433044646,33
1,2,4824,2023-01-01,Doctor_32,Pediatrics,Reason_35,374.6,Patient_4824,2010-01-28,Female,16669907375,13
2,3,4150,2023-01-01,Doctor_13,Cardiology,Reason_86,230.44,Patient_4150,1998-11-20,Female,15184513221,25
3,4,4623,2023-01-01,Doctor_22,Pediatrics,Reason_16,216.59,Patient_4623,2006-09-27,Male,19618064621,17
4,5,509,2023-01-01,Doctor_22,General Practice,Reason_100,209.41,Patient_509,1938-06-08,Female,11745051692,85


In [106]:
cardiology_appointments = merged_df[(merged_df['Department'] == 'Cardiology') & (merged_df['BillingAmount'] > 200)]

In [108]:
sorted_appointments = cardiology_appointments.sort_values(by='AgeAtAppointment', ascending=False)

In [109]:
departments_per_patient = merged_df.groupby('PatientID')['Department'].nunique()
patients_multidepartments = departments_per_patient[departments_per_patient >= 3].index
multidepartments_records = merged_df[merged_df['PatientID'].isin(patients_multidepartments)]

In [87]:
appointments_count_per_patient = merged_df.groupby('PatientID').size()
frequent_patients = appointments_count_per_patient[appointments_count_per_patient > 5].index
frequent_patients_records = merged_df[merged_df['PatientID'].isin(frequent_patients)]


In [88]:
average_billing_task8 = high_billing_new_patients['BillingAmount'].mean()
sum_billing_task10 = 0  # Assuming sum_billing_task10 is calculated from the hypothetical frequent visits records
unique_patients_task9 = multidept_records['PatientID'].nunique()

new_checksum = int((average_billing_task8 + sum_billing_task10) * unique_patients_task9) % 100000
print(new_checksum)


17851


In [95]:
Для финального числа возьмем количество уникальных пациентов из шага 6, 
умножим на общее количество визитов из шага 7, 
и добавим медианный возраст пациентов из отсортированного датафрейма шага 5.
В качестве финального числа возьмем последние четыре цифры этой суммы.

SyntaxError: invalid syntax (3872556958.py, line 1)

In [120]:
final_number = (multidepartments_records['PatientID'].nunique() *
                frequent_patients_records.shape[0] +
                sorted_appointments['AgeAtAppointment'].median())

In [121]:
final_number

460175.0

In [119]:
final_number

175.0