In [1]:
from pathlib import Path
import pandas as pd
from pptx import Presentation
ROOT = Path("/Users/ylin/My Drive/Cohost/** Properties ** -- Valta/")  # <-- EDIT THIS

def find_ppts_by_subfolder(root: Path) -> pd.DataFrame:
    rows = []

    # Only immediate subfolders under ROOT (one level)
    for sub in sorted([p for p in root.iterdir() if p.is_dir() and not p.name.startswith(".")]):
        # Find ppt/pptx recursively within the subfolder
        ppts = list(sub.rglob("*.pptx")) + list(sub.rglob("*.ppt"))

        for ppt in sorted(ppts):
            if ppt.name.startswith("~$"):  # ignore Office temp files
                continue
            rows.append({
                "property_folder": sub.name,
                "ppt_name": ppt.name,
                "ppt_path": str(ppt),
            })

    return pd.DataFrame(rows).sort_values(["property_folder", "ppt_name"]).reset_index(drop=True)



In [None]:
#ppt_df = find_ppts_by_subfolder(ROOT)
#ppt_df.to_csv("/Users/ylin/My Drive/Cohost/Data and Reporting/10-OnboardingTemplate/Cohost_Property_PPTs_Locations.csv", index=False)

In [None]:
ppts_loc = pd.read_excel("/Users/ylin/My Drive/Cohost/Data and Reporting/10-OnboardingTemplate/Cohost_Property_PPTs_Locations.xlsx")
ppts_loc

Unnamed: 0,Listing,property_folder,ppt_name,ppt_path
0,Bainbridge 11143,Bainbridge 2025 - 11143 Rolling Bay Walk 98110...,Bainbridge 2025 - 11143 Rolling Bay Walk NE Ma...,/Users/ylin/My Drive/Cohost/** Properties ** -...
1,Bellevue 14615,Bellevue 2021 - 14615 NE 32nd St #D303 Bellevu...,"14615 NE 32nd St D303 Bellevue, WA 98007 Maint...",/Users/ylin/My Drive/Cohost/** Properties ** -...
2,Bellevue 14620,"Bellevue 2022 - 14620 NE 31st St #E205, 98007,...",14620 NE 31st St #E205 Maintenance Overview - ...,/Users/ylin/My Drive/Cohost/** Properties ** -...
3,Bellevue 1621,"Bellevue 2022 - 1621 107th Ave SE, Bellevue","Bellevue, 1621 107th Ave SE - Maintenance Over...",/Users/ylin/My Drive/Cohost/** Properties ** -...
4,Bellevue 514,"Bellevue 2022 - 514 142nd Ave SE APT 97, 98007",Bellevue 514 142nd Ave SE APT 97 Maintenance O...,/Users/ylin/My Drive/Cohost/** Properties ** -...
...,...,...,...,...
92,Seattle 10057 Upper,Seattle 2025-10057 17th Ave SW 98146 -Quinnlan...,Seattle 10057-UPPER 17th Ave SW 98146 Mainte...,/Users/ylin/My Drive/Cohost/** Properties ** -...
93,Seattle 10057 Lower,Seattle 2025-10057 17th Ave SW 98146 -Quinnlan...,Copy of Seattle 10057-LOWER 17th Ave SW 98146...,/Users/ylin/My Drive/Cohost/** Properties ** -...
94,Seattle 10057 Whole,Seattle 2025-10057 17th Ave SW 98146 -Quinnlan...,Seattle 10057 17th Ave SW 98146 Maintenance Ov...,/Users/ylin/My Drive/Cohost/** Properties ** -...
95,Shelton 250,"Shelton 2022 - 250 SE Dogwood Acres Rd,98584 - Ni",Shelton - 250 SE Dogwood Acres Rd Maintenance ...,/Users/ylin/My Drive/Cohost/** Properties ** -...


In [None]:
tables = []

for _, row in ppts_loc.iterrows():
    ppt_path = row["ppt_path"]
    print(f"Processing: {ppt_path}")
    prs = Presentation(ppt_path)
    n = min(2, len(prs.slides))  # first 2 slides, or fewer if deck is short
    for slide_idx in range(n):
        slide = prs.slides[slide_idx]   # integer indexing (works)
        for shape_idx, shape in enumerate(slide.shapes, start=1):
            if getattr(shape, "has_table", False):
                table = shape.table

                data = [
                    [cell.text.strip() for cell in row.cells]
                    for row in table.rows
                ]

                df = pd.DataFrame(data)
                df.columns = df.iloc[0]
                df = df[1:].reset_index(drop=True)

                if slide_idx == 0:
                    df["Category"] = "Main"
                else:
                    df = df.rename(columns={df.columns[2]: "Description"})
                    df["Category"] = df["Category"].replace("", pd.NA).ffill()
        
        df["Listing"] = row["Listing"]
        tables.append(df)

In [None]:
#tables = pd.concat(tables, ignore_index=True, sort=False)

Unnamed: 0,Resource,Description,Category,Listing
0,Owner,"Margaret Dufresne: 2064913917, margduf@gmail.c...",Main,Bainbridge 11143
1,Cohost,Crystal,Main,Bainbridge 11143
2,Airbnb Titles,Rolling Bay Villa: Waterfront Haven in Bainbridge,Main,Bainbridge 11143
3,Listing link,"rollingbayvilla.com login: MargaretDuFresne, R...",Main,Bainbridge 11143
4,3D link,,Main,Bainbridge 11143
...,...,...,...,...
2855,Cleaning Person,"Ali, adjust thermostat before and after guest,...",Service,Shelton 310
2856,Landscaper,Mario (owner),Service,Shelton 310
2857,STR License/HOA,No need,Legal,Shelton 310
2858,Insurance,,Legal,Shelton 310


In [None]:
import re

_illegal_excel_re = re.compile(r"[\x00-\x08\x0B\x0C\x0E-\x1F]")

def clean_for_excel(x):
    if isinstance(x, str):
        return _illegal_excel_re.sub(" ", x)   # replace illegal chars with space
    return x

In [None]:
out = {
    "All_Tables": tables,
}
with pd.ExcelWriter(
    "/Users/ylin/My Drive/Cohost/Data and Reporting/10-OnboardingTemplate/Property_ppt_tables.xlsx",
    engine="openpyxl"
) as writer:
    for name, df in out.items():
        df = df.applymap(clean_for_excel)
        df.to_excel(writer, sheet_name=name[:31], index=False)  # Excel sheet name max 31 chars

In [2]:
# merge ppt_df with master file info
property = pd.read_excel("/Users/ylin/My Drive/Cohost/Cohost Cleaner Compensation/Working/Data/Property_Cohost.xlsx")
ppt = "/Users/ylin/My Drive/Cohost/Data and Reporting/10-OnboardingTemplate/Property_ppt_tables.xlsx"
ppt_df = pd.read_excel(ppt)
ppt_df.loc[ppt_df["Listing"]=="OSBR","Listing"] = "Cottages All OSBR"
ppt_df =ppt_df.copy()

master_df= pd.read_excel("/Users/ylin/My Drive/Cohost/** Properties ** -- Valta/Listings, Team & Vendor Master Sheet.xlsx")
master_df.loc[master_df["Property"]=="Cottage 12(Caregiver)","Property"] = "Cottage 12"
master_df.loc[master_df["Property"]=="OSBR","Property"] ="Cottages All OSBR"
master_df =master_df.copy()

In [3]:
def merged_dfs(ppt: pd.DataFrame, ppt_var: list,
               master: pd.DataFrame, master_var: list,
               property: pd.DataFrame) -> pd.DataFrame:
    ppt = ppt[ppt_var]
    master = master[master_var]
    merged= (property.loc[(property["Status"]=="Active") & (property["Type"].isin(["LTR","STR"])&(~property["Listings"].isna())),["Listing","Type"]]
            .merge(ppt, left_on="Listing", right_on="Listing", how="left")
            .merge(master, left_on="Listing", right_on="Property", how="left"))
    return merged.drop(columns=["Resource","Property"]) 



In [4]:
#listing_door_code
#check_in_instruction
combined_access = merged_dfs(
    ppt=ppt_df.loc[ppt_df["Resource"].isin(["Access",'Accss & Keys info']),],
    ppt_var=["Listing","Resource","Description"],
    master=master_df,
    master_var=["Property","Access","Backup Access (default lockbox: 3012)"],
    property=property
)

combined_access.columns = ["Listing","Type","Access_ppt","Access_masterfile","Backup Access_masterfile"]

In [None]:
master_df.head()
master_df.columns

Index(['Unnamed: 0', 'Property', 'Address:', 'City', 'Status', 'Co host',
       'Airbnb', 'Airbnb rating as of 10/29', 'Other badge',
       'Primary host as of 10/29', 'Bed room', 'No of Bed types', 'King',
       'Queen', 'Twin', 'Full', 'Sofa Bed ', 'Trundle bed ', 'Air Matress',
       'Bunk beds (twin)', 'Bunk beds (full)', 'Bath room', 'Max guests',
       'No of Fee types', 'Cleaing', 'Pet', 'Extra Guest Applied After',
       'Extra Guest Fee', 'Access & Camera', 'Access',
       'Backup Access (default lockbox: 3012)', 'Camera', 'Trash Day',
       'Sq Feet', 'No of Links', 'Airbnb.1', 'Airbnb Custom Link', 'VRBO',
       'Booking.com', 'Marriott', 'guestybookings.com', 'Trip.com', 'Hopper',
       'Whimstay', 'Google.com', 'Blueground', 'Expedia', 'Showmojo', 'Zillow',
       'Furnished Finder', 'Bnb Finder', 'MLS ID', '3D Tour Link',
       'House Manual Links', 'Valta link',
       'Note (google drive to share,anyone with link can view)',
       'Suggested rent May-Sep', '

In [25]:
set(ppt_df["Resource"])

{'3D Link',
 'ADU Sewer sump Pump Monitor',
 'ADU sump pump monthly maintenance',
 'Access',
 'Accommodation tax registration number',
 'Airbnb Titles',
 'Backflow',
 'Bbq grill',
 'Blind setup',
 'Bulbs, batteries, filter',
 'Circuit Breaker',
 'Circuit Breaker/Water Valve',
 'Cleaning Person',
 'Cleaning with rate/Landscaper/handyman',
 'Cohost',
 'Common issues',
 'Electricity',
 'Fire alarm/extinguisher/bulbs',
 'Fireplace',
 'Furnace/Filter change/vacuum',
 'Garbage(recommend the biggest size to avoid overflow)',
 'Gas/Electricity',
 'Handyman',
 'Heat and AC',
 'Hot tub',
 'Insurance/W9',
 'Landscaper',
 'Landscaper/handyman',
 'Layout',
 'Leasing Agreement',
 'Links/Account',
 'Listing Channels',
 'Listing desc',
 'Lockbox',
 'Mailbox',
 'Move in/out checklist',
 'Other Maintenance',
 'Owner',
 'Owner closet location for supplies. How supplies are managed',
 'Parking',
 'Pest Control',
 'Plumbing /indoor gas leak',
 'Plumbing/electrician',
 'Restrictions',
 'Restrictions/HOA',
 

In [13]:
## listing_parking_instructions
combined = merged_dfs(
    ppt=ppt_df.loc[ppt_df["Resource"].isin(["Parking"]),],
    ppt_var=["Listing","Resource","Description"],
    master=master_df,
    master_var=["Property"],
    property=property
)
combined.columns = ["Listing","Type","Parking_ppt"]
combined_parking = combined.copy()

In [12]:
#listing_wifi_name
#listing_wifi_password

combined = merged_dfs(
    ppt=ppt_df.loc[ppt_df["Resource"].isin(["Wifi & Internet"]),],
    ppt_var=["Listing","Resource","Description"],
    master=master_df,
    master_var=["Property"],
    property=property
)
combined.columns = ["Listing","Type","Wifi_ppt"]
#combined_wifi["wifi_name"] = combined_wifi["Wifi_ppt"].strsplit()
combined_wifi = combined.copy()

In [7]:
#listing_trash_collected_on
combined = merged_dfs(
    ppt=ppt_df.loc[ppt_df["Resource"].isin(["Garbage(recommend the biggest size to avoid overflow)"]),],
    ppt_var=["Listing","Resource","Description"],
    master=master_df,
    master_var=["Property","Trash Day"],
    property=property
)
combined.columns = ["Listing","Type","Trash_ppt","Trash_master"]
combined_trash = combined.copy()

In [10]:
#critical_warning
combined = merged_dfs(
    ppt=ppt_df.loc[ppt_df["Resource"].isin(['Restrictions']),],
    ppt_var=["Listing","Resource","Description"],
    master=master_df,
    master_var=["Property"],
    property=property
)
combined.columns = ["Listing","Type","Restrictions_ppt"]
combined_warnings = combined.copy()

In [9]:
#pet_rule

combined = merged_dfs(
    ppt=ppt_df.loc[ppt_df["Resource"].isin(['Restrictions/Pets']),],
    ppt_var=["Listing","Resource","Description"],
    master=master_df,
    master_var=["Property","Pet"],
    property=property
)
combined.columns = ["Listing","Type","pets_ppt","pets_master"]
combined_pets = combined.copy()

In [None]:
combined = {
    "listing_wifi" : combined_wifi,
    "listing_door_code": combined_access,
    "listing_parking_instructions": combined_parking,
    "listing_trash_collected_on": combined_trash,
    "critical_warning": combined_warnings,
    "pet_rule": combined_pets
}
with pd.ExcelWriter(
    "/Users/ylin/My Drive/Cohost/Data and Reporting/10-OnboardingTemplate/PPT_Masterfile_Combined_Info.xlsx",
    engine="openpyxl"
) as writer:
    for name, df in combined.items():
        df.to_excel(writer, sheet_name=name[:31], index=False)  # Excel sheet name max 31 chars

