In [1]:
from pathlib import Path
import pandas as pd
from pptx import Presentation
ROOT = Path("/Users/ylin/My Drive/Cohost/** Properties ** -- Valta/")  # <-- EDIT THIS

def find_ppts_by_subfolder(root: Path) -> pd.DataFrame:
    rows = []

    # Only immediate subfolders under ROOT (one level)
    for sub in sorted([p for p in root.iterdir() if p.is_dir() and not p.name.startswith(".")]):
        # Find ppt/pptx recursively within the subfolder
        ppts = list(sub.rglob("*.pptx")) + list(sub.rglob("*.ppt"))

        for ppt in sorted(ppts):
            if ppt.name.startswith("~$"):  # ignore Office temp files
                continue
            rows.append({
                "property_folder": sub.name,
                "ppt_name": ppt.name,
                "ppt_path": str(ppt),
            })

    return pd.DataFrame(rows).sort_values(["property_folder", "ppt_name"]).reset_index(drop=True)



In [None]:
ppt_df = find_ppts_by_subfolder(ROOT)
ppt_df.to_csv("/Users/ylin/My Drive/Cohost/Data and Reporting/10-OnboardingTemplate/Cohost_Property_PPTs.csv", index=False)

In [5]:
ppts_loc = pd.read_excel("/Users/ylin/My Drive/Cohost/Data and Reporting/10-OnboardingTemplate/Cohost_Property_PPTs.xlsx")
ppts_loc

Unnamed: 0,Listing,property_folder,ppt_name,ppt_path
0,Bainbridge 11143,Bainbridge 2025 - 11143 Rolling Bay Walk 98110...,Bainbridge 2025 - 11143 Rolling Bay Walk NE Ma...,/Users/ylin/My Drive/Cohost/** Properties ** -...
1,Bellevue 14615,Bellevue 2021 - 14615 NE 32nd St #D303 Bellevu...,"14615 NE 32nd St D303 Bellevue, WA 98007 Maint...",/Users/ylin/My Drive/Cohost/** Properties ** -...
2,Bellevue 14620,"Bellevue 2022 - 14620 NE 31st St #E205, 98007,...",14620 NE 31st St #E205 Maintenance Overview - ...,/Users/ylin/My Drive/Cohost/** Properties ** -...
3,Bellevue 1621,"Bellevue 2022 - 1621 107th Ave SE, Bellevue","Bellevue, 1621 107th Ave SE - Maintenance Over...",/Users/ylin/My Drive/Cohost/** Properties ** -...
4,Bellevue 514,"Bellevue 2022 - 514 142nd Ave SE APT 97, 98007",Bellevue 514 142nd Ave SE APT 97 Maintenance O...,/Users/ylin/My Drive/Cohost/** Properties ** -...
...,...,...,...,...
92,Seattle 10057 Upper,Seattle 2025-10057 17th Ave SW 98146 -Quinnlan...,Seattle 10057-UPPER 17th Ave SW 98146 Mainte...,/Users/ylin/My Drive/Cohost/** Properties ** -...
93,Seattle 10057 Lower,Seattle 2025-10057 17th Ave SW 98146 -Quinnlan...,Copy of Seattle 10057-LOWER 17th Ave SW 98146...,/Users/ylin/My Drive/Cohost/** Properties ** -...
94,Seattle 10057 Whole,Seattle 2025-10057 17th Ave SW 98146 -Quinnlan...,Seattle 10057 17th Ave SW 98146 Maintenance Ov...,/Users/ylin/My Drive/Cohost/** Properties ** -...
95,Shelton 250,"Shelton 2022 - 250 SE Dogwood Acres Rd,98584 - Ni",Shelton - 250 SE Dogwood Acres Rd Maintenance ...,/Users/ylin/My Drive/Cohost/** Properties ** -...


In [None]:
tables = []

for _, row in ppts_loc.iterrows():
    ppt_path = row["ppt_path"]
    print(f"Processing: {ppt_path}")
    prs = Presentation(ppt_path)
    n = min(2, len(prs.slides))  # first 2 slides, or fewer if deck is short
    for slide_idx in range(n):
        slide = prs.slides[slide_idx]   # integer indexing (works)
        for shape_idx, shape in enumerate(slide.shapes, start=1):
            if getattr(shape, "has_table", False):
                table = shape.table

                data = [
                    [cell.text.strip() for cell in row.cells]
                    for row in table.rows
                ]

                df = pd.DataFrame(data)
                df.columns = df.iloc[0]
                df = df[1:].reset_index(drop=True)

                if slide_idx == 0:
                    df["Category"] = "Main"
                else:
                    df = df.rename(columns={df.columns[2]: "Description"})
                    df["Category"] = df["Category"].replace("", pd.NA).ffill()
        
        df["Listing"] = row["Listing"]
        tables.append(df)

In [45]:
tables = pd.concat(tables, ignore_index=True, sort=False)
tables

Unnamed: 0,Resource,Description,Category,Listing
0,Owner,"Margaret Dufresne: 2064913917, margduf@gmail.c...",Main,Bainbridge 11143
1,Cohost,Crystal,Main,Bainbridge 11143
2,Airbnb Titles,Rolling Bay Villa: Waterfront Haven in Bainbridge,Main,Bainbridge 11143
3,Listing link,"rollingbayvilla.com login: MargaretDuFresne, R...",Main,Bainbridge 11143
4,3D link,,Main,Bainbridge 11143
...,...,...,...,...
2855,Cleaning Person,"Ali, adjust thermostat before and after guest,...",Service,Shelton 310
2856,Landscaper,Mario (owner),Service,Shelton 310
2857,STR License/HOA,No need,Legal,Shelton 310
2858,Insurance,,Legal,Shelton 310


In [None]:
import re

_illegal_excel_re = re.compile(r"[\x00-\x08\x0B\x0C\x0E-\x1F]")

def clean_for_excel(x):
    if isinstance(x, str):
        return _illegal_excel_re.sub(" ", x)   # replace illegal chars with space
    return x

In [None]:
out = {
    "All_Tables": tables,
}
with pd.ExcelWriter(
    "/Users/ylin/My Drive/Cohost/Data and Reporting/10-OnboardingTemplate/Property_ppt_tables.xlsx",
    engine="openpyxl"
) as writer:
    for name, df in out.items():
        df = df.applymap(clean_for_excel)
        df.to_excel(writer, sheet_name=name[:31], index=False)  # Excel sheet name max 31 chars

In [2]:
# merge ppt_df with master file info
property = pd.read_excel("/Users/ylin/My Drive/Cohost/Cohost Cleaner Compensation/Working/Data/Property_Cohost.xlsx")
ppt = "/Users/ylin/My Drive/Cohost/Data and Reporting/10-OnboardingTemplate/Property_ppt_tables.xlsx"
ppt_df = pd.read_excel(ppt)
ppt_df =ppt_df.loc[ppt_df["Resource"].isin(["Access",'Accss & Keys info',
                                            "Access\n(front door manual; basement lock manual )\nSchlage+smartthings+hub_aurmur"]),
                   ["Listing","Resource","Description"]]

master_df= pd.read_excel("/Users/ylin/My Drive/Cohost/** Properties ** -- Valta/Listings, Team & Vendor Master Sheet.xlsx")
master_df = master_df[["Property", "Access","Backup Access (default lockbox: 3012)"]]


In [3]:
master_df.loc[master_df["Property"]=="Cottage 12(Caregiver)","Property"] = "Cottage 12"
master_df.loc[master_df["Property"]=="OSBR","Property"] ="Cottages All OSBR"
master_df =master_df.copy()

ppt_df.loc[ppt_df["Listing"]=="OSBR","Listing"] = "Cottages All OSBR"
ppt_df =ppt_df.copy()

property

Unnamed: 0,Listing,Property,Type,Status,furnished,Cohost,Listings,Entity,Address,Apt_Unit,...,Bunk_full,Bunk_twin,Sofa.Bed,Trundle.bed,Air.Matress,Pet,TYPE.OF.BEDS,sheets.wt,blanket.wt,Set
0,Bainbridge 11143,Bainbridge 11143,STR,Active,Yes,Crystal,SF-Bainbridge 11143,Margaret Dufresne,"11143 Rolling Bay Walk Northeast, Bainbridge I...",,...,2.0,,,,,No,"QUEEN_BED, BUNK_BED",2.566667,21.72,Remote 3+B Lux
1,Beachwood 1,Beachwood,STR,Active,Yes,VA,Condo-Beachwood 1,Valta Beachwood LLC,"4027 Beach Dr SW, Seattle, WA 98116, USA",1,...,,,,,,No,"KING_BED, SINGLE_BED",2.700000,15.16,Beachwood 2B
2,Beachwood 10,Beachwood,LTR,Active,Yes,VA,Condo-Beachwood 10,Valta Beachwood LLC,"4027 Beach Dr SW, Seattle, WA 98116, USA",10,...,,,,,,100,,3.300000,4.58,Beachwood 1B
3,Beachwood 2,Beachwood,STR,Active,Yes,VA,Condo-Beachwood 2,Valta Beachwood LLC,"4027 Beach Dr SW, Seattle, WA 98116, USA",2,...,,2.0,,,,No,"BUNK_BED, QUEEN_BED",2.420000,17.14,Beachwood 2B
4,Beachwood 3,Beachwood,LTR,Active,Yes,VA,Condo-Beachwood 3,Valta Beachwood LLC,"4027 Beach Dr SW, Seattle, WA 98116, USA",3,...,,,,,,100,,3.300000,4.58,Beachwood 1B
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
129,Valta Realty,Valta Realty,,,,,,,,,...,,,,,,,,,,
130,Bellevue 1960,Bellevue 1960,STR,Inactive,Yes,,,,,,...,,,,,,,,,,
131,Bellevue 14507,Bellevue 14507,LTR,Active,No,Zexi,,,,,...,,,,,,,,,,
132,Beachwood,Beachwood,Mixed,Active,Mixed,,,,,,...,,,,,,,,,,


In [4]:
combined = (property.loc[(property["Status"]=="Active") & (property["Type"].isin(["LTR","STR"])&(~property["Listings"].isna())),["Listing","Type"]]
            .merge(ppt_df, left_on="Listing", right_on="Listing", how="left")
            .merge(master_df, left_on="Listing", right_on="Property", how="left"))

combined.columns = ["Listing","Type","Resource","Access_ppt","Property","Access_masterfile","Backup Access_masterfile"]
combined

Unnamed: 0,Listing,Type,Resource,Access_ppt,Property,Access_masterfile,Backup Access_masterfile
0,Bainbridge 11143,STR,Access,"Smart lock code 0113(both main entry, and gara...",Bainbridge 11143,Lockbox,Digital keypad: 402701 & physical spare key in...
1,Beachwood 1,STR,Access,"Building code:3219, door master code: 402701, ...",Beachwood 1,"PointCentral, 023081 backup code",Digital keypad: 402701 & physical spare key in...
2,Beachwood 10,LTR,Access,"Building code:3219, door master code: 402701, ...",Beachwood 10,"ShowMojo, 240610",Physical spare key in Unit 3
3,Beachwood 2,STR,Access,"3219 = main building, digital access code (Sho...",Beachwood 2,"Building 3219, main floor storage 3012 keybox ...",Physical spare key in Unit 3
4,Beachwood 3,LTR,Access,"Building code:3219, Unit lockbox: 3012,",Beachwood 3,"ShowMojo, 240703",Physical key in lockbox on first floor handrai...
...,...,...,...,...,...,...,...
94,Seattle 9021,STR,Access,Point Central lock. Master code for the lock 1...,Seattle 9021,"PointCentral, backup 626500","Backup key in lockbox on metal gate, code 3012."
95,Seattle 906 Lower,STR,Access,Upstairs: follow the left walkway and find the...,Seattle 906 Lower,"Backup code 070712, (code 7529 to open the si...","Access from garage using app: install myQ app,..."
96,Seattle 906 Upper,LTR,Access,Upstairs: follow the left walkway and find the...,Seattle 906 Upper,Backup code 906101,
97,Shelton 250,STR,Access,Gate & padlock code ; 1421 Front Door Kwikse...,Shelton 250,Code 2250,Key in lockbox on front of house to the right....


In [5]:
combined = combined.drop(columns=["Resource","Property"])  # drop Resource column to avoid confusion
combined.to_excel("/Users/ylin/My Drive/Cohost/Data and Reporting/10-OnboardingTemplate/Combined_Access_Info.xlsx", index=False) 