In [None]:
from pathlib import Path
import pandas as pd
from pptx import Presentation
ROOT = Path("/Users/ylin/My Drive/Cohost/** Properties ** -- Valta/")  # <-- EDIT THIS

def find_ppts_by_subfolder(root: Path) -> pd.DataFrame:
    rows = []

    # Only immediate subfolders under ROOT (one level)
    for sub in sorted([p for p in root.iterdir() if p.is_dir() and not p.name.startswith(".")]):
        # Find ppt/pptx recursively within the subfolder
        ppts = list(sub.rglob("*.pptx")) + list(sub.rglob("*.ppt"))

        for ppt in sorted(ppts):
            if ppt.name.startswith("~$"):  # ignore Office temp files
                continue
            rows.append({
                "property_folder": sub.name,
                "ppt_name": ppt.name,
                "ppt_path": str(ppt),
            })

    return pd.DataFrame(rows).sort_values(["property_folder", "ppt_name"]).reset_index(drop=True)



In [None]:
ppt_df = find_ppts_by_subfolder(ROOT)
ppt_df.to_csv("/Users/ylin/My Drive/Cohost/Data and Reporting/10-OnboardingTemplate/Cohost_Property_PPTs.csv", index=False)

In [None]:
ppts_loc = pd.read_excel("/Users/ylin/My Drive/Cohost/Data and Reporting/10-OnboardingTemplate/Cohost_Property_PPT.xlsx")
ppts_loc

Unnamed: 0,Listing,property_folder,ppt_name,ppt_path
0,Bainbridge 11143,Bainbridge 2025 - 11143 Rolling Bay Walk 98110...,Bainbridge 2025 - 11143 Rolling Bay Walk NE Ma...,/Users/ylin/My Drive/Cohost/** Properties ** -...
1,Bellevue 14615,Bellevue 2021 - 14615 NE 32nd St #D303 Bellevu...,"14615 NE 32nd St D303 Bellevue, WA 98007 Maint...",/Users/ylin/My Drive/Cohost/** Properties ** -...
2,Bellevue 14620,"Bellevue 2022 - 14620 NE 31st St #E205, 98007,...",14620 NE 31st St #E205 Maintenance Overview - ...,/Users/ylin/My Drive/Cohost/** Properties ** -...
3,Bellevue 1621,"Bellevue 2022 - 1621 107th Ave SE, Bellevue","Bellevue, 1621 107th Ave SE - Maintenance Over...",/Users/ylin/My Drive/Cohost/** Properties ** -...
4,Bellevue 514,"Bellevue 2022 - 514 142nd Ave SE APT 97, 98007",Bellevue 514 142nd Ave SE APT 97 Maintenance O...,/Users/ylin/My Drive/Cohost/** Properties ** -...
...,...,...,...,...
92,Seattle 10057 Upper,Seattle 2025-10057 17th Ave SW 98146 -Quinnlan...,Seattle 10057-UPPER 17th Ave SW 98146 Mainte...,/Users/ylin/My Drive/Cohost/** Properties ** -...
93,Seattle 10057 Lower,Seattle 2025-10057 17th Ave SW 98146 -Quinnlan...,Copy of Seattle 10057-LOWER 17th Ave SW 98146...,/Users/ylin/My Drive/Cohost/** Properties ** -...
94,Seattle 10057 Whole,Seattle 2025-10057 17th Ave SW 98146 -Quinnlan...,Seattle 10057 17th Ave SW 98146 Maintenance Ov...,/Users/ylin/My Drive/Cohost/** Properties ** -...
95,Shelton 250,"Shelton 2022 - 250 SE Dogwood Acres Rd,98584 - Ni",Shelton - 250 SE Dogwood Acres Rd Maintenance ...,/Users/ylin/My Drive/Cohost/** Properties ** -...


In [None]:
tables = []

for _, row in ppts_loc.iterrows():
    ppt_path = row["ppt_path"]
    print(f"Processing: {ppt_path}")
    prs = Presentation(ppt_path)
    n = min(2, len(prs.slides))  # first 2 slides, or fewer if deck is short
    for slide_idx in range(n):
        slide = prs.slides[slide_idx]   # integer indexing (works)
        for shape_idx, shape in enumerate(slide.shapes, start=1):
            if getattr(shape, "has_table", False):
                table = shape.table

                data = [
                    [cell.text.strip() for cell in row.cells]
                    for row in table.rows
                ]

                df = pd.DataFrame(data)
                df.columns = df.iloc[0]
                df = df[1:].reset_index(drop=True)

                if slide_idx == 0:
                    df["Category"] = "Main"
                else:
                    df = df.rename(columns={df.columns[2]: "Description"})
                    df["Category"] = df["Category"].replace("", pd.NA).ffill()
        
        df["Listing"] = row["Listing"]
        tables.append(df)

Processing: /Users/ylin/My Drive/Cohost/** Properties ** -- Valta/Bainbridge 2025 - 11143 Rolling Bay Walk 98110 - Margaret/Bainbridge 2025 - 11143 Rolling Bay Walk NE Maintenance Overview - Cohost.pptx
Processing: /Users/ylin/My Drive/Cohost/** Properties ** -- Valta/Bellevue 2021 - 14615 NE 32nd St #D303 Bellevue, WA 98007/14615 NE 32nd St D303 Bellevue, WA 98007 Maintenance Overview - Cohost.pptx
Processing: /Users/ylin/My Drive/Cohost/** Properties ** -- Valta/Bellevue 2022 - 14620 NE 31st St #E205, 98007, Bellevue Highland/14620 NE 31st St #E205 Maintenance Overview - Cohost.pptx
Processing: /Users/ylin/My Drive/Cohost/** Properties ** -- Valta/Bellevue 2022 - 1621 107th Ave SE, Bellevue/Bellevue, 1621 107th Ave SE - Maintenance Overview - Cohost.pptx
Processing: /Users/ylin/My Drive/Cohost/** Properties ** -- Valta/Bellevue 2022 - 514 142nd Ave SE APT 97, 98007/Bellevue 514 142nd Ave SE APT 97 Maintenance Overview - Cohost.pptx
Processing: /Users/ylin/My Drive/Cohost/** Propertie

In [45]:
tables = pd.concat(tables, ignore_index=True, sort=False)
tables

Unnamed: 0,Resource,Description,Category,Listing
0,Owner,"Margaret Dufresne: 2064913917, margduf@gmail.c...",Main,Bainbridge 11143
1,Cohost,Crystal,Main,Bainbridge 11143
2,Airbnb Titles,Rolling Bay Villa: Waterfront Haven in Bainbridge,Main,Bainbridge 11143
3,Listing link,"rollingbayvilla.com login: MargaretDuFresne, R...",Main,Bainbridge 11143
4,3D link,,Main,Bainbridge 11143
...,...,...,...,...
2855,Cleaning Person,"Ali, adjust thermostat before and after guest,...",Service,Shelton 310
2856,Landscaper,Mario (owner),Service,Shelton 310
2857,STR License/HOA,No need,Legal,Shelton 310
2858,Insurance,,Legal,Shelton 310


In [None]:
import re

_illegal_excel_re = re.compile(r"[\x00-\x08\x0B\x0C\x0E-\x1F]")

def clean_for_excel(x):
    if isinstance(x, str):
        return _illegal_excel_re.sub(" ", x)   # replace illegal chars with space
    return x

  tables = tables.applymap(clean_for_excel)


In [60]:
out = {
    "All_Tables": tables,
}
with pd.ExcelWriter(
    "/Users/ylin/My Drive/Cohost/Data and Reporting/10-OnboardingTemplate/Property_ppt_tables.xlsx",
    engine="openpyxl"
) as writer:
    for name, df in out.items():
        df = df.applymap(clean_for_excel)
        df.to_excel(writer, sheet_name=name[:31], index=False)  # Excel sheet name max 31 chars

  df = df.applymap(clean_for_excel)
