diff --git a/fetch_physicians.py b/fetch_physicians.py index 8620e98..2d355e4 100644 --- a/fetch_physicians.py +++ b/fetch_physicians.py @@ -1,7 +1,7 @@ # %% import camelot -import pandas as pd import numpy as np +import pandas as pd url = "https://cpsa.ca/MedicalDirectory/Alphabetical%20Listing.pdf" tables = camelot.read_pdf(url, pages="all", flavor="stream") @@ -28,13 +28,10 @@ def process(tdf): df = df[~df.apply(lambda row: row.isna().sum() == 4, axis=1)] df = df.reset_index(drop=True) for ind in df[df.apply(lambda row: row.isna().sum() == 3, axis=1)].index: - if df.loc[ind]["CITY"] is not np.nan: - df.loc[ind - 1]["CITY"] += f' {df.loc[ind]["CITY"]}' if df.loc[ind]["NAME"] is not np.nan: - df.loc[ind - 1]["NAME"] += f' {df.loc[ind]["NAME"]}' df = df[~df.apply(lambda row: row.isna().sum() == 3, axis=1)] return df diff --git a/fetch_ratemds.py b/fetch_ratemds.py index 04099bd..cdce1bc 100644 --- a/fetch_ratemds.py +++ b/fetch_ratemds.py @@ -7,7 +7,6 @@ from seleniumbase import SB from tqdm import tqdm - df = pd.read_csv("physicians.csv") diff --git a/streamlit_app.py b/streamlit_app.py index 7f40730..99516f1 100644 --- a/streamlit_app.py +++ b/streamlit_app.py @@ -4,7 +4,6 @@ import plotly.express as px import streamlit as st - st.set_page_config(page_title="ab-physicians", page_icon=":mask:") _, center, _ = st.columns([2, 1, 2]) with center: