### Code to get all reservior data in the ouput formate so stations ie content,elevation,station id, etc. 
### are next to each other in columns the output csv file and creates the site formated metadata

In [None]:
import pandas as pd
import re
from sqlalchemy import create_engine
import urllib

# --- SQL SERVER CONNECTION SETUP ---
server = 'wrt-sql-prod'
database = 'dvrtDB'
username = 'wrtsqlq'
password = '*******'

params = urllib.parse.quote_plus(
    f"DRIVER={{ODBC Driver 17 for SQL Server}};"
    f"SERVER={server};DATABASE={database};UID={username};PWD={password}"
)
engine = create_engine(f"mssql+pyodbc:///?odbc_connect={params}")

# --- STEP 1: SQL METADATA QUERY ---
def query_station_metadata():
    sql_query = """
    SELECT  
        LTRIM(RTRIM([COLLECTION_SYSTEM])) AS COLLECTION_SYSTEM,
        LTRIM(RTRIM([collection_sys_description])) AS collection_sys_description,
        [STATION_MASTER].[STATION_ID] AS MasterStationID,
        LTRIM(RTRIM([STATION_MASTER].[STATION_NAME])) AS MasterStationName,
        LTRIM(RTRIM([COLLECTION_STATIONS].[STATION_NAME])) AS CollectionStationName,
        LTRIM(RTRIM([COMMENTS])) AS COMMENTS,
        LTRIM(RTRIM([SiteType])) AS SiteType,
        LTRIM(RTRIM([ANALOG_CHANNEL])) AS ANALOG_CHANNEL,
        LTRIM(RTRIM([SYSTEM_NAME])) AS SYSTEM_NAME,
        LTRIM(RTRIM([DatasetType])) AS DatasetType,
        LTRIM(RTRIM([MEASURING_DEVICE])) AS MEASURING_DEVICE,
        LTRIM(RTRIM([DEVICE_TYPE])) AS DEVICE_TYPE,
        LTRIM(RTRIM([STATUS])) AS STATUS,
        [LAT], [LON],
        LTRIM(RTRIM([DataEntryMethod])) AS DataEntryMethod,
        LTRIM(RTRIM([Telemetry])) AS Telemetry,
        CONCAT('https://waterrights.utah.gov/cgi-bin/dvrtview.exe?Modinfo=StationView&STATION_ID=', [STATION_MASTER].[STATION_ID]) AS StationPage,
        [UNITS_MASTER].[UNITS_ID],
        LTRIM(RTRIM([UNITS_MASTER].[RECORD_TYPE])) AS RECORD_TYPE,
        LTRIM(RTRIM([UNITS_MASTER].[UNITS_DESC_BASE])) AS UNITS_DESC_BASE,
        LTRIM(RTRIM([UNITS_MASTER].[UNITS_DESC_ENTRY])) AS UNITS_DESC_ENTRY,
        [UNITS_MASTER].[UNITS_MULTIPLIER],
        LTRIM(RTRIM([UNITS_MASTER].[UNITS_DESC_REALTIME])) AS UNITS_DESC_REALTIME,
        COUNT([RECORD_YEAR]) AS NoOfYears, 
        MIN([RECORD_YEAR]) AS StartYr, 
        MAX([RECORD_YEAR]) AS EndYr
    FROM [dvrtDB].[dbo].[STATION_MASTER]
    LEFT JOIN [dvrtDB].[dbo].[COLLECTION_SYSTEMS] 
        ON [COLLECTION_SYSTEMS].[collection_sys_id] = [STATION_MASTER].[STATION_ID]
    LEFT JOIN [dvrtDB].[dbo].[COLLECTION_STATIONS] 
        ON [STATION_MASTER].[CAPTURE_SEQ_NO] = [COLLECTION_STATIONS].[SEQ_NO]
    JOIN [dvrtDB].[dbo].[UNITS_MASTER] 
        ON [STATION_MASTER].[UNITS_ID] = [UNITS_MASTER].[UNITS_ID]
    LEFT JOIN [dvrtDB].[dbo].[DAILY_RECORDS] 
        ON [STATION_MASTER].[STATION_ID] = [DAILY_RECORDS].[STATION_ID]
    WHERE 
        [STATUS] = 'A' AND
        [DatasetType] = 'Observational' AND
        [DataEntryMethod] != 'Manual' AND
        [DataEntryMethod] IS NOT NULL AND
        ([LAT] IS NOT NULL OR [LON] IS NOT NULL) AND
        ([LON] > '-115' OR [LON] < '36') AND
        [LAT] > 0 AND
        (
            [STATION_MASTER].[STATION_NAME] LIKE '%Reservoir%' OR 
            [COLLECTION_STATIONS].[STATION_NAME] LIKE '%Reservoir%'
        )
    GROUP BY
        [COLLECTION_SYSTEM], [collection_sys_description], [STATION_MASTER].[STATION_ID],
        [STATION_MASTER].[STATION_NAME], [COLLECTION_STATIONS].[STATION_NAME],
        [COMMENTS], [SiteType], [ANALOG_CHANNEL], [SYSTEM_NAME], [DatasetType],
        [MEASURING_DEVICE], [DEVICE_TYPE], [STATUS], [LAT], [LON],
        [DataEntryMethod], [Telemetry],
        [UNITS_MASTER].[UNITS_ID], [UNITS_MASTER].[RECORD_TYPE],
        [UNITS_MASTER].[UNITS_DESC_BASE], [UNITS_MASTER].[UNITS_DESC_ENTRY],
        [UNITS_MASTER].[UNITS_MULTIPLIER], [UNITS_MASTER].[UNITS_DESC_REALTIME]
    ORDER BY [STATION_MASTER].[STATION_ID]
    """
    return pd.read_sql(sql_query, engine)

# --- STEP 2: FINAL EXPORT LOGIC WITH TRANSPOSE + CHECK/COMMENT ---
def generate_final_export(df):
    df = df[df["SiteType"].isin(["Reservoir", "Reservoir Release"])].copy()
    df["IsRelease"] = df["MasterStationName"].str.upper().str.contains("RELEASE")

    def extract_base_name(name):
        name = str(name).upper()
        name = re.sub(r'\b(USBR|USGS|RESERVOIR|RELEASE|CONTENTS|ELEVATION|STORAGE|POOL|EVAPORATION|LEVEL|GAGE HEIGHT)\b', '', name)
        name = re.sub(r'[^A-Z0-9 ]+', '', name)
        name = re.sub(r'\s+', ' ', name).strip()
        return name.split()[0].title() if name else "Unknown"

    df["ReservoirRootName"] = df["MasterStationName"].apply(extract_base_name)

    release_df = df[df["IsRelease"]].copy()
    nonrelease_df = df[~df["IsRelease"]].copy()

    unique_nonreleases = nonrelease_df["ReservoirRootName"].unique()
    nonrelease_group_map = {name: f"PD{str(i+1).zfill(3)}" for i, name in enumerate(unique_nonreleases)}
    nonrelease_df["SiteID (New)"] = nonrelease_df["ReservoirRootName"].map(nonrelease_group_map)

    release_df = release_df.reset_index(drop=True)
    release_df["SiteID (New)"] = ["PD" + str(i + len(nonrelease_group_map) + 1).zfill(3) for i in range(len(release_df))]

    final_df = pd.concat([nonrelease_df, release_df], ignore_index=True)

    def make_site_name(row):
        base = row["ReservoirRootName"]
        system = row["SYSTEM_NAME"].strip().title()
        if row["IsRelease"]:
            return f"{base} River Below {base} Reservoir, {system}, Near {base}"
        else:
            return f"{base} Reservoir, {system}, Near {base}"

    final_df["NewSiteName"] = final_df.apply(make_site_name, axis=1)
    final_df = final_df.sort_values(by=["ReservoirRootName", "IsRelease", "MasterStationName"]).reset_index(drop=True)
    final_df["DataStreamID"] = ["DS" + str(i+1).zfill(2) for i in range(len(final_df))]

    export = final_df[[
        "NewSiteName", "SiteID (New)", "DataStreamID",
        "MasterStationName", "MasterStationID", "UNITS_DESC_ENTRY",
        "CollectionStationName", "COLLECTION_SYSTEM"
    ]].rename(columns={
        "MasterStationName": "DIVERT_STATION_NAME (old)",
        "MasterStationID": "Station_ID (old)",
        "COLLECTION_SYSTEM": "CollectionSystemName"
    })

    export["RecordNum"] = export.groupby(["NewSiteName", "SiteID (New)"]).cumcount() + 1
    id_cols = ["NewSiteName", "SiteID (New)"]
    cols_to_expand = [
        "DIVERT_STATION_NAME (old)", "Station_ID (old)",
        "UNITS_DESC_ENTRY", "CollectionStationName", "CollectionSystemName"
    ]

    transposed = export[id_cols].drop_duplicates().copy()
    for col in cols_to_expand:
        pivoted = export.pivot_table(index=id_cols, columns="RecordNum", values=col, aggfunc="first")
        pivoted.columns = [f"{col}{i}" for i in pivoted.columns]
        transposed = transposed.merge(pivoted, on=id_cols, how="left")

    transposed["Check"] = ""
    transposed["Comment"] = ""

    transposed.to_csv("Reservior_Station_Datastream_Metadata_Transposed_20250513.csv", index=False)
    print("✅ Saved: Reservior_Station_Datastream_Metadata_Transposed_20250513.csv")

# --- MAIN ---
def main():
    print("🔎 Querying metadata...")
    metadata_df = query_station_metadata()
    print(f"📦 {len(metadata_df)} rows retrieved.")
    print("🧠 Structuring export with release/non-release logic and transposing...")
    generate_final_export(metadata_df)
    print("✅ Done.")

if __name__ == "__main__":
    main()

### Code to get all reservior data in the ouput formate so stations ie content,elevation, etc. 
### are next to each other in columns the output csv file and creates the site formated metadata 
### same code as aboved but this output code allows the xlsx file to have a drop down column 
### of approved and not approved

In [None]:
import pandas as pd
import re
from sqlalchemy import create_engine
import urllib
from openpyxl import load_workbook
from openpyxl.worksheet.datavalidation import DataValidation

# --- SQL SERVER CONNECTION SETUP ---
server = 'wrt-sql-prod'
database = 'dvrtDB'
username = 'wrtsqlq'
password = '*****'

params = urllib.parse.quote_plus(
    f"DRIVER={{ODBC Driver 17 for SQL Server}};"
    f"SERVER={server};DATABASE={database};UID={username};PWD={password}"
)
engine = create_engine(f"mssql+pyodbc:///?odbc_connect={params}")

# --- STEP 1: SQL METADATA QUERY ---
def query_station_metadata():
    sql_query = """
    SELECT  
        LTRIM(RTRIM([COLLECTION_SYSTEM])) AS COLLECTION_SYSTEM,
        LTRIM(RTRIM([collection_sys_description])) AS collection_sys_description,
        [STATION_MASTER].[STATION_ID] AS MasterStationID,
        LTRIM(RTRIM([STATION_MASTER].[STATION_NAME])) AS MasterStationName,
        LTRIM(RTRIM([COLLECTION_STATIONS].[STATION_NAME])) AS CollectionStationName,
        LTRIM(RTRIM([COMMENTS])) AS COMMENTS,
        LTRIM(RTRIM([SiteType])) AS SiteType,
        LTRIM(RTRIM([ANALOG_CHANNEL])) AS ANALOG_CHANNEL,
        LTRIM(RTRIM([SYSTEM_NAME])) AS SYSTEM_NAME,
        LTRIM(RTRIM([DatasetType])) AS DatasetType,
        LTRIM(RTRIM([MEASURING_DEVICE])) AS MEASURING_DEVICE,
        LTRIM(RTRIM([DEVICE_TYPE])) AS DEVICE_TYPE,
        LTRIM(RTRIM([STATUS])) AS STATUS,
        [LAT], [LON],
        LTRIM(RTRIM([DataEntryMethod])) AS DataEntryMethod,
        LTRIM(RTRIM([Telemetry])) AS Telemetry,
        CONCAT('https://waterrights.utah.gov/cgi-bin/dvrtview.exe?Modinfo=StationView&STATION_ID=', [STATION_MASTER].[STATION_ID]) AS StationPage,
        [UNITS_MASTER].[UNITS_ID],
        LTRIM(RTRIM([UNITS_MASTER].[RECORD_TYPE])) AS RECORD_TYPE,
        LTRIM(RTRIM([UNITS_MASTER].[UNITS_DESC_BASE])) AS UNITS_DESC_BASE,
        LTRIM(RTRIM([UNITS_MASTER].[UNITS_DESC_ENTRY])) AS UNITS_DESC_ENTRY,
        [UNITS_MASTER].[UNITS_MULTIPLIER],
        LTRIM(RTRIM([UNITS_MASTER].[UNITS_DESC_REALTIME])) AS UNITS_DESC_REALTIME,
        COUNT([RECORD_YEAR]) AS NoOfYears, 
        MIN([RECORD_YEAR]) AS StartYr, 
        MAX([RECORD_YEAR]) AS EndYr
    FROM [dvrtDB].[dbo].[STATION_MASTER]
    LEFT JOIN [dvrtDB].[dbo].[COLLECTION_SYSTEMS] 
        ON [COLLECTION_SYSTEMS].[collection_sys_id] = [STATION_MASTER].[STATION_ID]
    LEFT JOIN [dvrtDB].[dbo].[COLLECTION_STATIONS] 
        ON [STATION_MASTER].[CAPTURE_SEQ_NO] = [COLLECTION_STATIONS].[SEQ_NO]
    JOIN [dvrtDB].[dbo].[UNITS_MASTER] 
        ON [STATION_MASTER].[UNITS_ID] = [UNITS_MASTER].[UNITS_ID]
    LEFT JOIN [dvrtDB].[dbo].[DAILY_RECORDS] 
        ON [STATION_MASTER].[STATION_ID] = [DAILY_RECORDS].[STATION_ID]
    WHERE 
        [STATUS] = 'A' AND
        [DatasetType] = 'Observational' AND
        [DataEntryMethod] != 'Manual' AND
        [DataEntryMethod] IS NOT NULL AND
        ([LAT] IS NOT NULL OR [LON] IS NOT NULL) AND
        ([LON] > '-115' OR [LON] < '36') AND
        [LAT] > 0 AND
        (
            [STATION_MASTER].[STATION_NAME] LIKE '%Reservoir%' OR 
            [COLLECTION_STATIONS].[STATION_NAME] LIKE '%Reservoir%'
        )
    GROUP BY
        [COLLECTION_SYSTEM], [collection_sys_description], [STATION_MASTER].[STATION_ID],
        [STATION_MASTER].[STATION_NAME], [COLLECTION_STATIONS].[STATION_NAME],
        [COMMENTS], [SiteType], [ANALOG_CHANNEL], [SYSTEM_NAME], [DatasetType],
        [MEASURING_DEVICE], [DEVICE_TYPE], [STATUS], [LAT], [LON],
        [DataEntryMethod], [Telemetry],
        [UNITS_MASTER].[UNITS_ID], [UNITS_MASTER].[RECORD_TYPE],
        [UNITS_MASTER].[UNITS_DESC_BASE], [UNITS_MASTER].[UNITS_DESC_ENTRY],
        [UNITS_MASTER].[UNITS_MULTIPLIER], [UNITS_MASTER].[UNITS_DESC_REALTIME]
    ORDER BY [STATION_MASTER].[STATION_ID]
    """
    return pd.read_sql(sql_query, engine)

# --- STEP 2: FINAL EXPORT LOGIC ---
def generate_final_export(df):
    df = df[df["SiteType"].isin(["Reservoir", "Reservoir Release"])].copy()
    df["IsRelease"] = df["MasterStationName"].str.upper().str.contains("RELEASE")

    def extract_base_name(name):
        name = str(name).upper()
        name = re.sub(r'\b(USBR|USGS|RESERVOIR|RELEASE|CONTENTS|ELEVATION|STORAGE|POOL|EVAPORATION|LEVEL|GAGE HEIGHT)\b', '', name)
        name = re.sub(r'[^A-Z0-9 ]+', '', name)
        name = re.sub(r'\s+', ' ', name).strip()
        return name.split()[0].title() if name else "Unknown"

    df["ReservoirRootName"] = df["MasterStationName"].apply(extract_base_name)

    release_df = df[df["IsRelease"]].copy()
    nonrelease_df = df[~df["IsRelease"]].copy()

    unique_nonreleases = nonrelease_df["ReservoirRootName"].unique()
    nonrelease_group_map = {name: f"PD{str(i+1).zfill(3)}" for i, name in enumerate(unique_nonreleases)}
    nonrelease_df["SiteID (New)"] = nonrelease_df["ReservoirRootName"].map(nonrelease_group_map)

    release_df = release_df.reset_index(drop=True)
    release_df["SiteID (New)"] = ["PD" + str(i + len(nonrelease_group_map) + 1).zfill(3) for i in range(len(release_df))]

    final_df = pd.concat([nonrelease_df, release_df], ignore_index=True)

    def make_site_name(row):
        base = row["ReservoirRootName"]
        system = row["SYSTEM_NAME"].strip().title()
        if row["IsRelease"]:
            return f"{base} River Below {base} Reservoir, {system}, Near {base}"
        else:
            return f"{base} Reservoir, {system}, Near {base}"

    final_df["NewSiteName"] = final_df.apply(make_site_name, axis=1)
    final_df = final_df.sort_values(by=["ReservoirRootName", "IsRelease", "MasterStationName"]).reset_index(drop=True)
    final_df["DataStreamID"] = ["DS" + str(i+1).zfill(2) for i in range(len(final_df))]

    export = final_df[[
        "NewSiteName", "SiteID (New)", "DataStreamID",
        "MasterStationName", "MasterStationID", "UNITS_DESC_ENTRY",
        "CollectionStationName", "COLLECTION_SYSTEM"
    ]].rename(columns={
        "MasterStationName": "DIVERT_STATION_NAME (old)",
        "MasterStationID": "Station_ID (old)",
        "COLLECTION_SYSTEM": "CollectionSystemName"
    })

    export["RecordNum"] = export.groupby(["NewSiteName", "SiteID (New)"]).cumcount() + 1
    id_cols = ["NewSiteName", "SiteID (New)"]
    cols_to_expand = [
        "DIVERT_STATION_NAME (old)", "Station_ID (old)",
        "UNITS_DESC_ENTRY", "CollectionStationName", "CollectionSystemName"
    ]

    transposed = export[id_cols].drop_duplicates().copy()
    for col in cols_to_expand:
        pivoted = export.pivot_table(index=id_cols, columns="RecordNum", values=col, aggfunc="first")
        pivoted.columns = [f"{col}{i}" for i in pivoted.columns]
        transposed = transposed.merge(pivoted, on=id_cols, how="left")

    transposed["Check"] = ""
    transposed["Comment"] = ""

    excel_path = "Reservior_Station_Datastream_Metadata_Transposed_2.0_20250513.xlsx"
    transposed.to_excel(excel_path, index=False)
    add_dropdown_to_excel(excel_path)

# --- STEP 3: ADD DROPDOWN TO CHECK COLUMN ---
def add_dropdown_to_excel(file_path, sheet_name="Sheet1"):
    wb = load_workbook(file_path)
    ws = wb[sheet_name]
    dv = DataValidation(type="list", formula1='"Approved,Not Approved"', allow_blank=True)

    for col in ws.iter_cols(1, ws.max_column):
        if col[0].value == "Check":
            col_letter = col[0].column_letter
            dv.ranges.add(f"{col_letter}2:{col_letter}{ws.max_row}")
            break

    ws.add_data_validation(dv)
    wb.save(file_path)
    print("✅ Dropdown added to 'Check' column.")

# --- MAIN ---
def main():
    print("🔎 Querying metadata...")
    metadata_df = query_station_metadata()
    print(f"📦 {len(metadata_df)} rows retrieved.")
    print("🧠 Structuring export with release/non-release logic and transposing...")
    generate_final_export(metadata_df)
    print("✅ Done.")

if __name__ == "__main__":
    main()