In [11]:

    """
    Function to pull diversion records from Utah Department of Water Resources website 

    Parameters
    ----------
    
    dst_dir : str
        relative path location to directory to save downloaded data
        (default is "utdwr_raw_data")

    sites_ifp : str
        relative path location to csv file containing all UCRB diversion sites.
        This function only attempts to pull data for records with "dataSource" attribute of "UTDWR"
        (default is "ucrb_diversion_master_table.csv")

    sp_df : pandas dataframe
        dataframe containing one record per day within period of interest, used for combining automatically
        retrieved data with manually retrieved data located in hst_dir directory

    hst_dir : str
        relative path location to directory containing manually-retrieved historical records
        (default is "utdwr_historical_data")

    comb_dir : str
        relative path location to directory where combined automatically-retrieved and manually-retrieved
        records will be saved
        (default is "utdwr_combined_data")

    Exports
    ----------
    Microsoft Excel CSV file "utdwr_diversion_sites.csv" containing site information of every site
    for which daily records were pulled

    1 additional CSV file for each site (e.g. "cms_ut_caineville_canal.csv") containing daily diversion records of that site


    Returns
    ----------
    None

    See Also
    --------

    Notes
    -----

    Examples
    --------
    Need to add    
    """    
    print("downloading UTDWR diversion record data to directory {0}".format(dst_dir))
    if os.path.exists(dst_dir):
        pass
        # shutil.rmtree(dst_dir)
        # print("existing diversion data directory found and will be replaced")
    else:
        os.mkdir(dst_dir)

    print("combining UTDWR diversion records into directory {0}".format(comb_dir))
    if os.path.exists(comb_dir):
        pass
        # shutil.rmtree(comb_dir)
        # print("existing directory for combined data found and will be replaced")
    else:
        os.mkdir(comb_dir)
    
    # organize info for lookup table
    siteIds = []
    siteNames = []
    siteLat = []
    siteLong = []
    siteSource = []
    siteFiles = []
    siteUse = []
    siteStart = []
    siteEnd = []
    noFillYears = []
    shortID = []
    destinationCode = []
    destinationFlag = []

    # import table of UTDWR diversion sites
    sites = pd.read_csv(sites_ifp)
    sites = sites.loc[sites["dataSource"] == "UTDWR"].copy()
    
    # retrieve UT DWR data
    for i, r in sites.loc[sites["utdwrID"].notnull()].iterrows():
        ID = r.utdwrID
        siteFile = "{0}.csv".format(r.siteName)
        
        today = date.today()
        Current_Date = today.strftime("%Y")
        URL = f"https://www.waterrights.utah.gov/cgi-bin/dvrtview.exe?STATION_ID={ID}&RECORD_YEAR={Current_Date}&Modinfo=Daily_Comma"
        
        try:
            rr = requests.get(URL)
            temp=StringIO(rr.text)
            temp1=temp.readlines()
            for line in temp1:
                if line.startswith("Daily comma delimited"):
                    URL_raw= re.findall('"([^"]*)"', line)
            URL_end="".join(map(str,URL_raw))
            URL_base = 'https://www.waterrights.utah.gov'
            URL_full = f"{URL_base}{URL_end}"
            rrr = requests.get(URL_full)
            temp_cd = StringIO(rrr.text)
            df = pd.read_csv(temp_cd)
            df.columns=["year", "month", "day", "discharge_cfs"]
            df.loc[:,"date"]=pd.to_datetime(df[['year','month', 'day']])
            df.drop(labels=df.columns.difference(["date", "discharge_cfs"]), axis=1, inplace=True)
            df.index=df.pop("date")

            df.to_csv(os.path.join(dst_dir, siteFile))

            if r.historicalRecord == "y":
                try:
                    temp = pd.read_csv(os.path.join(hst_dir, "{0}.csv".format(r.siteName)))
                except:
                    temp = pd.read_csv(os.path.join(hst_dir1, "{0}.csv".format(r.siteName)))

                temp.loc[:,"date"] = pd.to_datetime(temp.loc[:,"date"])
                temp.index = temp.pop("date")
                temp = sp_df.join(temp,how="left")

                for ii, rr in temp.loc[temp["monthly_cfsd"].notnull()].iterrows():
                    ix = temp.loc[(temp["year"] == rr.year) & (temp["month"] == rr.month)].index
                    temp.loc[ix, "discharge_cfs"] = rr.monthly_cfsd / monthrange(int(rr.year), int(rr.month))[1]

                df.rename(columns={"discharge_cfs": "auto_cfs"}, inplace=True)

                df = temp.join(df, how="left")

                df.loc[:, 'discharge_cfs'] = df.loc[:, 'discharge_cfs'].fillna(df.loc[:, 'auto_cfs'])
                df.loc[:, "date"] = df.index.values
                df.index = df.pop("date")

                df.filter(['discharge_cfs']).to_csv(os.path.join(comb_dir, siteFile))
            else:
                df.to_csv(os.path.join(comb_dir, siteFile))

            print(r.siteName)
            siteIds.append(r.utdwrID)
            siteNames.append(r.siteName)
            siteUse.append(r.siteUse)
            siteLat.append(r.decLat)
            siteLong.append(r.decLong)
            siteSource.append(r.dataSource)
            siteFiles.append(siteFile)
            siteStart.append(r.startDate)
            siteEnd.append(r.endDate)
            noFillYears.append(r.no_fill_years)
            shortID.append(r.shortID)
            destinationCode.append(r.destinationCode)
            destinationFlag.append(r.destinationFlag)

        except:
            print("could not download or process data from UTDWR diversion site: {0}".format(r.siteName))
            pass   
            
    # build and export diversion site lookup table for use in build_diversion_tabfiles()
    df = pd.DataFrame(data={"siteID": siteIds, "siteName": siteNames, "siteUse": siteUse,
                            "siteLat": siteLat, "siteLong": siteLong,
                            "siteSource": siteSource, "siteFile": siteFiles,
                            "startDate": siteStart, "endDate": siteEnd,
                            "noFillYears": noFillYears, "shortID": shortID,
                            "destinationCode": destinationCode, "destinationFlag": destinationFlag})
    
    df.loc[:, "siteFolder"] = os.path.split(comb_dir)[-1]
    df_out = format_sites_df(df)
    df_out.to_csv(os.path.join(dst_dir, "..", "utdwr_diversion_sites.csv"))


    def get_wy_diversion_data(dst_dir=os.path.join("..", "output", "wyseo_raw_data"),
                          sp_df=None,
                          sites_ifp=os.path.join("..", "input", "ucrb_diversion_master_table.csv"),
                          hst_dir=os.path.join("..", "input", "wyseo_historical_data"),
                          comb_dir=os.path.join("..", "output", "wyseo_combined_data")):

SyntaxError: incomplete input (2506649436.py, line 177)