In [1]:
import requests
import pandas as pd

def get_usgs_sites(state_code='UT'):
    """
    Fetches USGS gage stations and reservoir levels in a given state.
    :param state_code: The US state abbreviation (default: 'UT' for Utah)
    :return: A list of site names and site numbers
    """
    url = "https://waterservices.usgs.gov/nwis/dv/"
    params = {
        'format': 'json',
        'stateCd': state_code,
        'siteStatus': 'all',
    }
    
    response = requests.get(url, params=params)
    response.raise_for_status()
    
    data = response.json()
    sites = []
    
    for site in data.get('value', {}).get('timeSeries', []):
        site_info = site.get('sourceInfo', {})
        site_name = site_info.get('siteName', 'Unknown')
        site_code = site_info.get('siteCode', [{}])[0].get('value', 'Unknown')
        sites.append((site_code, site_name))
    
    return sites

# Fetch USGS gage stations and reservoir levels in Utah
usgs_sites = get_usgs_sites()

# Convert to DataFrame and export to CSV
df = pd.DataFrame(usgs_sites, columns=['Site Code', 'Site Name'])
df.to_csv('usgs_utah_sites.csv', index=False)

print("Data exported to usgs_utah_sites.csv")



Data exported to usgs_utah_sites.csv


In [2]:
import requests
import pandas as pd

def get_usgs_sites(state_code='UT'):
    """
    Fetches unique USGS gage stations and reservoir levels in a given state.
    :param state_code: The US state abbreviation (default: 'UT' for Utah)
    :return: A list of unique site names and site numbers
    """
    url = "https://waterservices.usgs.gov/nwis/dv/"
    params = {
        'format': 'json',
        'stateCd': state_code,
        'siteStatus': 'all',
    }
    
    response = requests.get(url, params=params)
    response.raise_for_status()
    
    data = response.json()
    sites = set()  # Using a set to store unique (site_code, site_name) tuples
    
    for site in data.get('value', {}).get('timeSeries', []):
        site_info = site.get('sourceInfo', {})
        site_name = site_info.get('siteName', 'Unknown')
        site_code = site_info.get('siteCode', [{}])[0].get('value', 'Unknown')
        sites.add((site_code, site_name))  # Sets automatically remove duplicates
    
    return list(sites)  # Convert back to a list for DataFrame

# Fetch unique USGS gage stations and reservoir levels in Utah
usgs_sites = get_usgs_sites()

# Convert to DataFrame and export to CSV
df = pd.DataFrame(usgs_sites, columns=['Site Code', 'Site Name'])
df.to_csv('usgs_utah_sites_removed_copy_Id.csv', index=False)

print("Data exported to usgs_utah_sites.csv")

Data exported to usgs_utah_sites.csv


In [3]:
import requests
import pandas as pd

def get_usgs_sites(state_code='UT'):
    """
    Fetches unique USGS gage stations and reservoir levels in a given state, 
    filtering out site codes with more than 8 digits.
    
    :param state_code: The US state abbreviation (default: 'UT' for Utah)
    :return: A list of unique site names and site numbers with valid site codes
    """
    url = "https://waterservices.usgs.gov/nwis/dv/"
    params = {
        'format': 'json',
        'stateCd': state_code,
        'siteStatus': 'all',
    }
    
    response = requests.get(url, params=params)
    response.raise_for_status()
    
    data = response.json()
    sites = set()  # Using a set to store unique (site_code, site_name) tuples
    
    for site in data.get('value', {}).get('timeSeries', []):
        site_info = site.get('sourceInfo', {})
        site_name = site_info.get('siteName', 'Unknown')
        site_code = site_info.get('siteCode', [{}])[0].get('value', 'Unknown')
        
        # Ensure site_code is numeric and does not exceed 8 digits
        if site_code.isdigit() and len(site_code) <= 8:
            sites.add((site_code, site_name))  # Sets automatically remove duplicates
    
    return list(sites)  # Convert back to a list for DataFrame

# Fetch unique USGS gage stations and reservoir levels in Utah
usgs_sites = get_usgs_sites()

# Convert to DataFrame and export to CSV
df = pd.DataFrame(usgs_sites, columns=['Site Code', 'Site Name'])
df.to_csv('usgs_utah_sites_removed_8_digits.csv', index=False)

print("Data exported to usgs_utah_sites.csv")

Data exported to usgs_utah_sites.csv


In [4]:
import requests
import pandas as pd
import pyodbc

# Database connection parameters
server = 'wrt-sql-prod'
database = 'dvrtDB'
username = 'wrtsqlq'
password = 'guest'

# Function to fetch USGS sites
def get_usgs_sites(state_code='UT'):
    url = "https://waterservices.usgs.gov/nwis/dv/"
    params = {'format': 'json', 'stateCd': state_code, 'siteStatus': 'all'}

    response = requests.get(url, params=params)
    response.raise_for_status()
    data = response.json()

    sites = set()  # Store unique (site_code, site_name) pairs
    for site in data.get('value', {}).get('timeSeries', []):
        site_info = site.get('sourceInfo', {})
        site_name = site_info.get('siteName', 'Unknown')
        site_code = site_info.get('siteCode', [{}])[0].get('value', 'Unknown')

        # Filter: Only numeric site codes with max 8 digits
        if site_code.isdigit() and len(site_code) <= 8:
            sites.add((site_code, site_name))

    return pd.DataFrame(list(sites), columns=['Site Code', 'Site Name'])

# Function to fetch SQL data
def get_sql_data():
    query = """
    SELECT  
        [COLLECTION_SYSTEM], [collection_sys_description], 
        [STATION_MASTER].[STATION_ID] AS MasterStationID,
        [STATION_MASTER].[STATION_NAME] AS MasterStationName,
        [COLLECTION_STATIONS].[STATION_NAME] AS CollectionStationName, 
        [RETRIES], [SEQ_NO], [COMMENTS], [SiteType], [COMMON_DESC], 
        [DIVERTING_WORKS], [CAPTURE_SEQ_NO], [ANALOG_CHANNEL], 
        [STATION_ID], [SYSTEM_NAME], DatasetType, [MEASURING_DEVICE], 
        [DEVICE_TYPE], [OWNER_PHONE], [REALTIME_INCLUDE], [LAT], 
        [STATUS], [LON], [LOW_FLOW], [HIGH_FLOW], [SiteState], 
        [UNITS_DESC_BASE], CAPTURE_SEQ_NO, DataEntryMethod, 
        [Telemetry], [CORRECTED_DATA], [SeriesVerifiedBy], 
        [SeriesVerifiedDate], 
        CONCAT('https://waterrights.utah.gov/cgi-bin/dvrtview.exe?Modinfo=StationView&STATION_ID=', 
        STATION_MASTER.STATION_ID) AS StationPage, 
        COUNT([RECORD_YEAR]) AS NoOfYears, MIN([RECORD_YEAR]) AS StartYr, 
        MAX([RECORD_YEAR]) AS EndYr 
    FROM [dvrtDB].[dbo].[COLLECTION_STATIONS]
    LEFT JOIN [dvrtDB].[dbo].[COLLECTION_SYSTEMS] ON 
        [COLLECTION_SYSTEMS].[collection_sys_id] = [COLLECTION_STATIONS].[collection_sys_id]
    LEFT JOIN [dvrtDB].[dbo].[STATION_MASTER] ON 
        [STATION_MASTER].[CAPTURE_SEQ_NO] = [COLLECTION_STATIONS].[SEQ_NO]
    JOIN [dvrtDB].[dbo].[UNITS_MASTER] ON 
        [STATION_MASTER].[UNITS_ID] = [UNITS_MASTER].[UNITS_ID]
    GROUP BY 
        [COLLECTION_SYSTEM], [collection_sys_description], 
        [STATION_MASTER].[STATION_ID], [STATION_MASTER].[STATION_NAME], 
        [COLLECTION_STATIONS].[STATION_NAME], [RETRIES], [SEQ_NO], 
        [COMMENTS], [SiteType], [COMMON_DESC], [DIVERTING_WORKS], 
        [CAPTURE_SEQ_NO], [ANALOG_CHANNEL], [STATION_ID], [SYSTEM_NAME], 
        DatasetType, [MEASURING_DEVICE], [DEVICE_TYPE], [OWNER_PHONE], 
        [REALTIME_INCLUDE], [LAT], [STATUS], [LON], [LOW_FLOW], 
        [HIGH_FLOW], [SiteState], [UNITS_DESC_BASE], CAPTURE_SEQ_NO, 
        DataEntryMethod, [Telemetry], [CORRECTED_DATA], [SeriesVerifiedBy], 
        [SeriesVerifiedDate], STATION_MASTER.STATION_ID
    """

    # Establish SQL connection
    with pyodbc.connect(
        f"DRIVER={{ODBC Driver 17 for SQL Server}};"
        f"SERVER={server};"
        f"DATABASE={database};"
        f"UID={username};"
        f"PWD={password}"
    ) as conn:
        df_sql = pd.read_sql(query, conn)

    return df_sql

# Fetch data
df_usgs = get_usgs_sites()
df_sql = get_sql_data()

# Merge data by matching "Site Code" with "CollectionStationName" from SQL data
df_merged = df_usgs.merge(df_sql, left_on='Site Code', right_on='CollectionStationName', how='left')

# Export to CSV
df_merged.to_csv('usgs_utah_sites_sql_query.csv', index=False)

print("Data successfully exported to usgs_utah_sites.csv")

  df_sql = pd.read_sql(query, conn)


DatabaseError: Execution failed on sql '
    SELECT  
        [COLLECTION_SYSTEM], [collection_sys_description], 
        [STATION_MASTER].[STATION_ID] AS MasterStationID,
        [STATION_MASTER].[STATION_NAME] AS MasterStationName,
        [COLLECTION_STATIONS].[STATION_NAME] AS CollectionStationName, 
        [RETRIES], [SEQ_NO], [COMMENTS], [SiteType], [COMMON_DESC], 
        [DIVERTING_WORKS], [CAPTURE_SEQ_NO], [ANALOG_CHANNEL], 
        [STATION_ID], [SYSTEM_NAME], DatasetType, [MEASURING_DEVICE], 
        [DEVICE_TYPE], [OWNER_PHONE], [REALTIME_INCLUDE], [LAT], 
        [STATUS], [LON], [LOW_FLOW], [HIGH_FLOW], [SiteState], 
        [UNITS_DESC_BASE], CAPTURE_SEQ_NO, DataEntryMethod, 
        [Telemetry], [CORRECTED_DATA], [SeriesVerifiedBy], 
        [SeriesVerifiedDate], 
        CONCAT('https://waterrights.utah.gov/cgi-bin/dvrtview.exe?Modinfo=StationView&STATION_ID=', 
        STATION_MASTER.STATION_ID) AS StationPage, 
        COUNT([RECORD_YEAR]) AS NoOfYears, MIN([RECORD_YEAR]) AS StartYr, 
        MAX([RECORD_YEAR]) AS EndYr 
    FROM [dvrtDB].[dbo].[COLLECTION_STATIONS]
    LEFT JOIN [dvrtDB].[dbo].[COLLECTION_SYSTEMS] ON 
        [COLLECTION_SYSTEMS].[collection_sys_id] = [COLLECTION_STATIONS].[collection_sys_id]
    LEFT JOIN [dvrtDB].[dbo].[STATION_MASTER] ON 
        [STATION_MASTER].[CAPTURE_SEQ_NO] = [COLLECTION_STATIONS].[SEQ_NO]
    JOIN [dvrtDB].[dbo].[UNITS_MASTER] ON 
        [STATION_MASTER].[UNITS_ID] = [UNITS_MASTER].[UNITS_ID]
    GROUP BY 
        [COLLECTION_SYSTEM], [collection_sys_description], 
        [STATION_MASTER].[STATION_ID], [STATION_MASTER].[STATION_NAME], 
        [COLLECTION_STATIONS].[STATION_NAME], [RETRIES], [SEQ_NO], 
        [COMMENTS], [SiteType], [COMMON_DESC], [DIVERTING_WORKS], 
        [CAPTURE_SEQ_NO], [ANALOG_CHANNEL], [STATION_ID], [SYSTEM_NAME], 
        DatasetType, [MEASURING_DEVICE], [DEVICE_TYPE], [OWNER_PHONE], 
        [REALTIME_INCLUDE], [LAT], [STATUS], [LON], [LOW_FLOW], 
        [HIGH_FLOW], [SiteState], [UNITS_DESC_BASE], CAPTURE_SEQ_NO, 
        DataEntryMethod, [Telemetry], [CORRECTED_DATA], [SeriesVerifiedBy], 
        [SeriesVerifiedDate], STATION_MASTER.STATION_ID
    ': ('42S22', "[42S22] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Invalid column name 'RECORD_YEAR'. (207) (SQLExecDirectW); [42S22] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Invalid column name 'RECORD_YEAR'. (207); [42S22] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Invalid column name 'RECORD_YEAR'. (207)")

In [5]:
import requests
import pandas as pd
import pyodbc

# Fetch USGS Sites
def get_usgs_sites(state_code='UT'):
    url = "https://waterservices.usgs.gov/nwis/dv/"
    params = {'format': 'json', 'stateCd': state_code, 'siteStatus': 'all'}
    
    response = requests.get(url, params=params)
    response.raise_for_status()
    
    data = response.json()
    sites = []
    
    for site in data.get('value', {}).get('timeSeries', []):
        site_info = site.get('sourceInfo', {})
        site_name = site_info.get('siteName', 'Unknown')
        site_code = site_info.get('siteCode', [{}])[0].get('value', 'Unknown')
        
        # Only include site codes with 8 or fewer digits
        if site_code.isdigit() and len(site_code) <= 8:
            sites.append((site_code, site_name))

    return pd.DataFrame(sites, columns=['Site Code', 'Site Name']).drop_duplicates(subset=['Site Code'])

# Fetch SQL Data
def get_sql_data():
    server = 'wrt-sql-prod'
    database = 'dvrtDB'
    username = 'wrtsqlq'
    password = 'guest'

    query = """
    SELECT  
        [COLLECTION_SYSTEM], [collection_sys_description], 
        [STATION_MASTER].[STATION_ID] AS MasterStationID,
        [STATION_MASTER].[STATION_NAME] AS MasterStationName,
        [COLLECTION_STATIONS].[STATION_NAME] AS CollectionStationName, 
        [RETRIES], [SEQ_NO], [COMMENTS], [SiteType], [COMMON_DESC], 
        [DIVERTING_WORKS], [CAPTURE_SEQ_NO], [ANALOG_CHANNEL], 
        [STATION_ID], [SYSTEM_NAME], DatasetType, [MEASURING_DEVICE], 
        [DEVICE_TYPE], [OWNER_PHONE], [REALTIME_INCLUDE], [LAT], 
        [STATUS], [LON], [LOW_FLOW], [HIGH_FLOW], [SiteState], 
        [UNITS_DESC_BASE], CAPTURE_SEQ_NO, DataEntryMethod, 
        [Telemetry], [CORRECTED_DATA], [SeriesVerifiedBy], 
        [SeriesVerifiedDate], 
        CONCAT('https://waterrights.utah.gov/cgi-bin/dvrtview.exe?Modinfo=StationView&STATION_ID=', 
        STATION_MASTER.STATION_ID) AS StationPage
    FROM [dvrtDB].[dbo].[COLLECTION_STATIONS]
    LEFT JOIN [dvrtDB].[dbo].[COLLECTION_SYSTEMS] ON 
        [COLLECTION_SYSTEMS].[collection_sys_id] = [COLLECTION_STATIONS].[collection_sys_id]
    LEFT JOIN [dvrtDB].[dbo].[STATION_MASTER] ON 
        [STATION_MASTER].[CAPTURE_SEQ_NO] = [COLLECTION_STATIONS].[SEQ_NO]
    JOIN [dvrtDB].[dbo].[UNITS_MASTER] ON 
        [STATION_MASTER].[UNITS_ID] = [UNITS_MASTER].[UNITS_ID]
    """

    with pyodbc.connect(
        f"DRIVER={{ODBC Driver 17 for SQL Server}};"
        f"SERVER={server};"
        f"DATABASE={database};"
        f"UID={username};"
        f"PWD={password}"
    ) as conn:
        df_sql = pd.read_sql(query, conn)

    return df_sql

# Fetch data
df_usgs = get_usgs_sites()
df_sql = get_sql_data()

# Merge data by matching "Site Code" with "CollectionStationName" from SQL data
df_merged = df_usgs.merge(df_sql, left_on='Site Code', right_on='CollectionStationName', how='left')

# Export to CSV
df_merged.to_csv('usgs_utah_sites_with_sql_data.csv', index=False)
print("Data exported to usgs_utah_sites_with_sql_data.csv")

  df_sql = pd.read_sql(query, conn)


Data exported to usgs_utah_sites_with_sql_data.csv


In [6]:
import requests
import pandas as pd
from sqlalchemy import create_engine

# Fetch USGS Sites
def get_usgs_sites(state_code='UT'):
    url = "https://waterservices.usgs.gov/nwis/dv/"
    params = {'format': 'json', 'stateCd': state_code, 'siteStatus': 'all'}
    
    response = requests.get(url, params=params)
    response.raise_for_status()
    
    data = response.json()
    sites = []
    
    for site in data.get('value', {}).get('timeSeries', []):
        site_info = site.get('sourceInfo', {})
        site_name = site_info.get('siteName', 'Unknown')
        site_code = site_info.get('siteCode', [{}])[0].get('value', 'Unknown')
        
        # Only include site codes with 8 or fewer digits
        if site_code.isdigit() and len(site_code) <= 8:
            sites.append((site_code, site_name))

    return pd.DataFrame(sites, columns=['Site Code', 'Site Name']).drop_duplicates(subset=['Site Code'])

# Fetch SQL Data using SQLAlchemy
def get_sql_data():
    server = 'wrt-sql-prod'
    database = 'dvrtDB'
    username = 'wrtsqlq'
    password = 'guest'
    
    # Create SQLAlchemy engine
    conn_str = f"mssql+pyodbc://{username}:{password}@{server}/{database}?driver=ODBC+Driver+17+for+SQL+Server"
    engine = create_engine(conn_str)

    query = """
    SELECT  
        [COLLECTION_SYSTEM], [collection_sys_description], 
        [STATION_MASTER].[STATION_ID] AS MasterStationID,
        [STATION_MASTER].[STATION_NAME] AS MasterStationName,
        [COLLECTION_STATIONS].[STATION_NAME] AS CollectionStationName, 
        [RETRIES], [SEQ_NO], [COMMENTS], [SiteType], [COMMON_DESC], 
        [DIVERTING_WORKS], [CAPTURE_SEQ_NO], [ANALOG_CHANNEL], 
        [STATION_ID], [SYSTEM_NAME], DatasetType, [MEASURING_DEVICE], 
        [DEVICE_TYPE], [OWNER_PHONE], [REALTIME_INCLUDE], [LAT], 
        [STATUS], [LON], [LOW_FLOW], [HIGH_FLOW], [SiteState], 
        [UNITS_DESC_BASE], CAPTURE_SEQ_NO, DataEntryMethod, 
        [Telemetry], [CORRECTED_DATA], [SeriesVerifiedBy], 
        [SeriesVerifiedDate], 
        CONCAT('https://waterrights.utah.gov/cgi-bin/dvrtview.exe?Modinfo=StationView&STATION_ID=', 
        STATION_MASTER.STATION_ID) AS StationPage
    FROM [dvrtDB].[dbo].[COLLECTION_STATIONS]
    LEFT JOIN [dvrtDB].[dbo].[COLLECTION_SYSTEMS] ON 
        [COLLECTION_SYSTEMS].[collection_sys_id] = [COLLECTION_STATIONS].[collection_sys_id]
    LEFT JOIN [dvrtDB].[dbo].[STATION_MASTER] ON 
        [STATION_MASTER].[CAPTURE_SEQ_NO] = [COLLECTION_STATIONS].[SEQ_NO]
    JOIN [dvrtDB].[dbo].[UNITS_MASTER] ON 
        [STATION_MASTER].[UNITS_ID] = [UNITS_MASTER].[UNITS_ID]
    """

    # Read SQL data
    df_sql = pd.read_sql(query, engine)

    return df_sql

# Fetch data
df_usgs = get_usgs_sites()
df_sql = get_sql_data()

# Check if SQL data is empty before merging
if df_sql.empty:
    print("SQL query returned no data.")
else:
    print(f"SQL data loaded: {df_sql.shape[0]} rows")

# Merge data by matching "Site Code" with "CollectionStationName" from SQL data
df_merged = df_usgs.merge(df_sql, left_on='Site Code', right_on='CollectionStationName', how='left')

# Export to CSV
df_merged.to_csv('usgs_utah_sites_with_sql_data.csv', index=False)
print("Data exported to usgs_utah_sites_with_sql_data.csv")

SQL data loaded: 1129 rows


PermissionError: [Errno 13] Permission denied: 'usgs_utah_sites_with_sql_data.csv'

In [1]:
import requests
import pandas as pd
from sqlalchemy import create_engine

# Fetch USGS Sites
def get_usgs_sites(state_code='UT'):
    url = "https://waterservices.usgs.gov/nwis/dv/"
    params = {'format': 'json', 'stateCd': state_code, 'siteStatus': 'all'}
    
    response = requests.get(url, params=params)
    response.raise_for_status()
    
    data = response.json()
    sites = []
    
    for site in data.get('value', {}).get('timeSeries', []):
        site_info = site.get('sourceInfo', {})
        site_name = site_info.get('siteName', 'Unknown')
        site_code = site_info.get('siteCode', [{}])[0].get('value', 'Unknown')
        
        # Only include site codes with 8 or fewer digits
        if site_code.isdigit() and len(site_code) <= 8:
            sites.append((site_code, site_name))

    return pd.DataFrame(sites, columns=['Site Code', 'Site Name']).drop_duplicates(subset=['Site Code'])

# Fetch SQL Data using SQLAlchemy
def get_sql_data():
    server = 'wrt-sql-prod'
    database = 'dvrtDB'
    username = 'wrtsqlq'
    password = 'guest'
    
    # Create SQLAlchemy engine
    conn_str = f"mssql+pyodbc://{username}:{password}@{server}/{database}?driver=ODBC+Driver+17+for+SQL+Server"
    engine = create_engine(conn_str)

    query = """
    SELECT  
        [COLLECTION_SYSTEM], [collection_sys_description], 
        [STATION_MASTER].[STATION_ID] AS MasterStationID,
        [STATION_MASTER].[STATION_NAME] AS MasterStationName,
        [COLLECTION_STATIONS].[STATION_NAME] AS CollectionStationName, 
        [RETRIES], [SEQ_NO], [COMMENTS], [SiteType], [COMMON_DESC], 
        [DIVERTING_WORKS], [CAPTURE_SEQ_NO], [ANALOG_CHANNEL], 
        [STATION_ID], [SYSTEM_NAME], DatasetType, [MEASURING_DEVICE], 
        [DEVICE_TYPE], [OWNER_PHONE], [REALTIME_INCLUDE], [LAT], 
        [STATUS], [LON], [LOW_FLOW], [HIGH_FLOW], [SiteState], 
        [UNITS_DESC_BASE], CAPTURE_SEQ_NO, DataEntryMethod, 
        [Telemetry], [CORRECTED_DATA], [SeriesVerifiedBy], 
        [SeriesVerifiedDate], 
        CONCAT('https://waterrights.utah.gov/cgi-bin/dvrtview.exe?Modinfo=StationView&STATION_ID=', 
        STATION_MASTER.STATION_ID) AS StationPage
    FROM [dvrtDB].[dbo].[COLLECTION_STATIONS]
    LEFT JOIN [dvrtDB].[dbo].[COLLECTION_SYSTEMS] ON 
        [COLLECTION_SYSTEMS].[collection_sys_id] = [COLLECTION_STATIONS].[collection_sys_id]
    LEFT JOIN [dvrtDB].[dbo].[STATION_MASTER] ON 
        [STATION_MASTER].[CAPTURE_SEQ_NO] = [COLLECTION_STATIONS].[SEQ_NO]
    JOIN [dvrtDB].[dbo].[UNITS_MASTER] ON 
        [STATION_MASTER].[UNITS_ID] = [UNITS_MASTER].[UNITS_ID]
    """

    # Read SQL data
    df_sql = pd.read_sql(query, engine)

    return df_sql

# Fetch data
df_usgs = get_usgs_sites()
df_sql = get_sql_data()

# Check if SQL data is empty before merging
if df_sql.empty:
    print("SQL query returned no data.")
else:
    print(f"SQL data loaded: {df_sql.shape[0]} rows")

# Merge data by matching "Site Code" with "CollectionStationName" from SQL data
df_merged = df_usgs.merge(df_sql, left_on='Site Code', right_on='CollectionStationName', how='left')

# Export to CSV
df_merged.to_csv('usgs_utah_sites_with_sql_data.csv', index=False)
print("Data exported to usgs_utah_sites_with_sql_data.csv")

SQL data loaded: 1131 rows
Data exported to usgs_utah_sites_with_sql_data.csv


In [2]:
import requests
import pandas as pd
from sqlalchemy import create_engine

# Fetch USGS Sites
def get_usgs_sites(state_code='UT'):
    url = "https://waterservices.usgs.gov/nwis/dv/"
    params = {'format': 'json', 'stateCd': state_code, 'siteStatus': 'all'}
    
    response = requests.get(url, params=params)
    response.raise_for_status()
    
    data = response.json()
    sites = []
    
    for site in data.get('value', {}).get('timeSeries', []):
        site_info = site.get('sourceInfo', {})
        site_name = site_info.get('siteName', 'Unknown')
        site_code = site_info.get('siteCode', [{}])[0].get('value', 'Unknown')
        
        # Only include site codes with 8 or fewer digits
        if site_code.isdigit() and len(site_code) <= 8:
            sites.append((site_code.strip(), site_name.strip()))

    return pd.DataFrame(sites, columns=['Site Code', 'Site Name']).drop_duplicates(subset=['Site Code'])

# Fetch SQL Data using SQLAlchemy
def get_sql_data():
    server = 'wrt-sql-prod'
    database = 'dvrtDB'
    username = 'wrtsqlq'
    password = 'guest'
    
    conn_str = f"mssql+pyodbc://{username}:{password}@{server}/{database}?driver=ODBC+Driver+17+for+SQL+Server"
    engine = create_engine(conn_str)

    query = """
    SELECT  
        [COLLECTION_STATIONS].[STATION_NAME] AS CollectionStationName,
        [STATION_MASTER].[STATION_ID] AS MasterStationID,
        [STATION_MASTER].[STATION_NAME] AS MasterStationName,
        [LAT], [LON], [SiteState], [Status]
    FROM [dvrtDB].[dbo].[COLLECTION_STATIONS]
    LEFT JOIN [dvrtDB].[dbo].[STATION_MASTER] ON 
        [STATION_MASTER].[CAPTURE_SEQ_NO] = [COLLECTION_STATIONS].[SEQ_NO]
    """

    df_sql = pd.read_sql(query, engine)
    return df_sql

# Load data
df_usgs = get_usgs_sites()
df_sql = get_sql_data()

# Prepare columns for merging (strip whitespaces, lower case, and convert to string)
df_usgs['Site Code'] = df_usgs['Site Code'].astype(str).str.strip().str.lower()
df_sql['CollectionStationName'] = df_sql['CollectionStationName'].astype(str).str.strip().str.lower()

# Check unmatched site codes before merging
unmatched = df_usgs[~df_usgs['Site Code'].isin(df_sql['CollectionStationName'])]
if not unmatched.empty:
    print("Unmatched USGS Site Codes:")
    print(unmatched[['Site Code', 'Site Name']])

# Merge DataFrames
df_merged = df_usgs.merge(df_sql, left_on='Site Code', right_on='CollectionStationName', how='left')

# Export to CSV
df_merged.to_csv('usgs_utah_sites_with_sql_data.csv', index=False)
print("Data exported to usgs_utah_sites_with_sql_data.csv")

Unmatched USGS Site Codes:
     Site Code                                       Site Name
0     09163675       COTTONWOOD WASH AT I-70, NEAR CISCO, UTAH
1     09180000                    DOLORES RIVER NEAR CISCO, UT
13    09180500                   COLORADO RIVER NEAR CISCO, UT
33    09180920    ONION CREEK ABOVE ONION C BRIDGE NR MOAB, UT
34    09180970  ONION CREEK BELOW ONION CRK BRIDGE NR MOAB, UT
...        ...                                             ...
1257  10241800             ASHDOWN CREEK NEAR CEDAR CITY, UTAH
1259  10242300                PINTO CREEK NEAR NEWCASTLE, UTAH
1260  10242430              GRASSY CREEK NEAR ENTERPRISE, UTAH
1261  13077700                    GEORGE CREEK NEAR YOST, UTAH
1262  13079000                     CLEAR CREEK NEAR NAF, IDAHO

[666 rows x 2 columns]
Data exported to usgs_utah_sites_with_sql_data.csv


In [3]:
import requests
import pandas as pd
import pyodbc

def get_usgs_sites(state_code='UT'):
    url = "https://waterservices.usgs.gov/nwis/dv/"
    params = {'format': 'json', 'stateCd': state_code, 'siteStatus': 'all'}
    response = requests.get(url, params=params)
    response.raise_for_status()
    data = response.json()
    sites = set()
    for site in data.get('value', {}).get('timeSeries', []):
        site_info = site.get('sourceInfo', {})
        site_name = site_info.get('siteName', 'Unknown')
        site_code = site_info.get('siteCode', [{}])[0].get('value', 'Unknown')
        if len(site_code) == 8:
            sites.add((site_code, site_name))
    return list(sites)

def get_sql_data():
    server = 'wrt-sql-prod'
    database = 'dvrtDB'
    username = 'wrtsqlq'
    password = 'guest'
    query = """[YOUR SQL QUERY WITHOUT RECORD_YEAR]"""
    
    with pyodbc.connect(
        f"DRIVER={{ODBC Driver 17 for SQL Server}};"
        f"SERVER={server};"
        f"DATABASE={database};"
        f"UID={username};"
        f"PWD={password}"
    ) as conn:
        df_sql = pd.read_sql(query, conn)
    
    # Ensure CollectionStationName is zero-padded to 8 digits
    df_sql['CollectionStationName'] = df_sql['CollectionStationName'].astype(str).str.zfill(8)
    return df_sql

# Fetch and prepare data
df_usgs = pd.DataFrame(get_usgs_sites(), columns=['Site Code', 'Site Name'])
df_sql = get_sql_data()

# Merge data on Site Code and CollectionStationName
df_merged = df_usgs.merge(df_sql, left_on='Site Code', right_on='CollectionStationName', how='left')

# Export to CSV
df_merged.to_csv('usgs_utah_sites.csv', index=False)

print("Data exported to usgs_utah_sites.csv")

  df_sql = pd.read_sql(query, conn)


DatabaseError: Execution failed on sql '[YOUR SQL QUERY WITHOUT RECORD_YEAR]': ('42000', "[42000] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Could not find stored procedure 'YOUR SQL QUERY WITHOUT RECORD_YEAR'. (2812) (SQLExecDirectW)")

In [1]:
import requests
import pandas as pd
import pyodbc

def get_usgs_sites(state_code='UT'):
    url = "https://waterservices.usgs.gov/nwis/dv/"
    params = {'format': 'json', 'stateCd': state_code, 'siteStatus': 'all'}
    response = requests.get(url, params=params)
    response.raise_for_status()
    data = response.json()
    sites = set()
    for site in data.get('value', {}).get('timeSeries', []):
        site_info = site.get('sourceInfo', {})
        site_name = site_info.get('siteName', 'Unknown')
        site_code = site_info.get('siteCode', [{}])[0].get('value', 'Unknown')
        if len(site_code) == 8:
            sites.add((site_code, site_name))
    return list(sites)

def get_sql_data():
    server = 'wrt-sql-prod'
    database = 'dvrtDB'
    username = 'wrtsqlq'
    password = 'guest'
    
    query = """
    SELECT  
        [COLLECTION_SYSTEM], [collection_sys_description], 
        [STATION_MASTER].[STATION_ID] AS MasterStationID,
        [STATION_MASTER].[STATION_NAME] AS MasterStationName,
        [COLLECTION_STATIONS].[STATION_NAME] AS CollectionStationName, 
        [RETRIES], [SEQ_NO], [COMMENTS], [SiteType], [COMMON_DESC], 
        [DIVERTING_WORKS], [CAPTURE_SEQ_NO], [ANALOG_CHANNEL], 
        [STATION_ID], [SYSTEM_NAME], DatasetType, [MEASURING_DEVICE], 
        [DEVICE_TYPE], [OWNER_PHONE], [REALTIME_INCLUDE], [LAT], 
        [STATUS], [LON], [LOW_FLOW], [HIGH_FLOW], [SiteState], 
        [UNITS_DESC_BASE], CAPTURE_SEQ_NO, DataEntryMethod, 
        [Telemetry], [CORRECTED_DATA], [SeriesVerifiedBy], 
        [SeriesVerifiedDate], 
        CONCAT('https://waterrights.utah.gov/cgi-bin/dvrtview.exe?Modinfo=StationView&STATION_ID=', 
        STATION_MASTER.STATION_ID) AS StationPage
    FROM [dvrtDB].[dbo].[COLLECTION_STATIONS]
    LEFT JOIN [dvrtDB].[dbo].[COLLECTION_SYSTEMS] ON 
        [COLLECTION_SYSTEMS].[collection_sys_id] = [COLLECTION_STATIONS].[collection_sys_id]
    LEFT JOIN [dvrtDB].[dbo].[STATION_MASTER] ON 
        [STATION_MASTER].[CAPTURE_SEQ_NO] = [COLLECTION_STATIONS].[SEQ_NO]
    JOIN [dvrtDB].[dbo].[UNITS_MASTER] ON 
        [STATION_MASTER].[UNITS_ID] = [UNITS_MASTER].[UNITS_ID]
    """

    with pyodbc.connect(
        f"DRIVER={{ODBC Driver 17 for SQL Server}};"
        f"SERVER={server};"
        f"DATABASE={database};"
        f"UID={username};"
        f"PWD={password}"
    ) as conn:
        df_sql = pd.read_sql(query, conn)
    
    # Ensure CollectionStationName is zero-padded to 8 digits
    df_sql['CollectionStationName'] = df_sql['CollectionStationName'].astype(str).str.zfill(8)
    return df_sql

# Fetch and prepare data
df_usgs = pd.DataFrame(get_usgs_sites(), columns=['Site Code', 'Site Name'])
df_sql = get_sql_data()

# Merge data on Site Code and CollectionStationName
df_merged = df_usgs.merge(df_sql, left_on='Site Code', right_on='CollectionStationName', how='left')

# Export to CSV
df_merged.to_csv('usgs_utah_sites.csv', index=False)

print("Data exported to usgs_utah_sites.csv")

  df_sql = pd.read_sql(query, conn)


Data exported to usgs_utah_sites.csv


In [2]:
import requests
import pandas as pd
from sqlalchemy import create_engine

def get_usgs_sites(state_code='UT'):
    url = "https://waterservices.usgs.gov/nwis/dv/"
    params = {'format': 'json', 'stateCd': state_code, 'siteStatus': 'all'}
    response = requests.get(url, params=params)
    response.raise_for_status()
    data = response.json()
    sites = set()
    for site in data.get('value', {}).get('timeSeries', []):
        site_info = site.get('sourceInfo', {})
        site_name = site_info.get('siteName', 'Unknown')
        site_code = site_info.get('siteCode', [{}])[0].get('value', 'Unknown')
        if len(site_code) == 8:
            sites.add((site_code, site_name))
    return list(sites)

def get_sql_data():
    server = 'wrt-sql-prod'
    database = 'dvrtDB'
    username = 'wrtsqlq'
    password = 'guest'
    
    # Create SQLAlchemy connection string
    connection_string = (
        f"mssql+pyodbc://{username}:{password}@{server}/{database}"
        "?driver=ODBC+Driver+17+for+SQL+Server"
    )
    
    # Create the SQLAlchemy engine
    engine = create_engine(connection_string)
    
    query = """
    SELECT  
        [COLLECTION_SYSTEM], [collection_sys_description], 
        [STATION_MASTER].[STATION_ID] AS MasterStationID,
        [STATION_MASTER].[STATION_NAME] AS MasterStationName,
        [COLLECTION_STATIONS].[STATION_NAME] AS CollectionStationName, 
        [RETRIES], [SEQ_NO], [COMMENTS], [SiteType], [COMMON_DESC], 
        [DIVERTING_WORKS], [CAPTURE_SEQ_NO], [ANALOG_CHANNEL], 
        [STATION_ID], [SYSTEM_NAME], DatasetType, [MEASURING_DEVICE], 
        [DEVICE_TYPE], [OWNER_PHONE], [REALTIME_INCLUDE], [LAT], 
        [STATUS], [LON], [LOW_FLOW], [HIGH_FLOW], [SiteState], 
        [UNITS_DESC_BASE], CAPTURE_SEQ_NO, DataEntryMethod, 
        [Telemetry], [CORRECTED_DATA], [SeriesVerifiedBy], 
        [SeriesVerifiedDate], 
        CONCAT('https://waterrights.utah.gov/cgi-bin/dvrtview.exe?Modinfo=StationView&STATION_ID=', 
        STATION_MASTER.STATION_ID) AS StationPage
    FROM [dvrtDB].[dbo].[COLLECTION_STATIONS]
    LEFT JOIN [dvrtDB].[dbo].[COLLECTION_SYSTEMS] ON 
        [COLLECTION_SYSTEMS].[collection_sys_id] = [COLLECTION_STATIONS].[collection_sys_id]
    LEFT JOIN [dvrtDB].[dbo].[STATION_MASTER] ON 
        [STATION_MASTER].[CAPTURE_SEQ_NO] = [COLLECTION_STATIONS].[SEQ_NO]
    JOIN [dvrtDB].[dbo].[UNITS_MASTER] ON 
        [STATION_MASTER].[UNITS_ID] = [UNITS_MASTER].[UNITS_ID]
    """
    
    df_sql = pd.read_sql(query, engine)
    
    # Ensure CollectionStationName is zero-padded to 8 digits
    df_sql['CollectionStationName'] = df_sql['CollectionStationName'].astype(str).str.zfill(8)
    return df_sql

# Fetch and prepare data
df_usgs = pd.DataFrame(get_usgs_sites(), columns=['Site Code', 'Site Name'])
df_sql = get_sql_data()

# Merge data on Site Code and CollectionStationName
df_merged = df_usgs.merge(df_sql, left_on='Site Code', right_on='CollectionStationName', how='left')

# Export to CSV
df_merged.to_csv('usgs_utah_sites.csv', index=False)

print("Data exported to usgs_utah_sites.csv")

Data exported to usgs_utah_sites.csv


In [3]:
def get_sql_data():
    server = 'wrt-sql-prod'
    database = 'dvrtDB'
    username = 'wrtsqlq'
    password = 'guest'
    
    connection_string = (
        f"mssql+pyodbc://{username}:{password}@{server}/{database}"
        "?driver=ODBC+Driver+17+for+SQL+Server"
    )
    
    engine = create_engine(connection_string)
    
    query = """
    SELECT [COLLECTION_STATIONS].[STATION_NAME] AS CollectionStationName
    FROM [dvrtDB].[dbo].[COLLECTION_STATIONS]
    """
    
    try:
        df_sql = pd.read_sql(query, engine)
        print(df_sql.head())  # Display the first few rows
    except Exception as e:
        print("Query failed:", e)

get_sql_data()

                      CollectionStationName
0  10128500                                
1  AAh                                     
2  8768                                    
3  Thayns Diversion                        
4  Bear R. At Soda Spri                    


In [4]:
df_sql['CollectionStationName'] = df_sql['CollectionStationName'].astype(str).str.zfill(8)
print(df_sql.head())

      COLLECTION_SYSTEM collection_sys_description  MasterStationID  \
0  LBEAR                        Bear River (Cache)             1721   
1  LBEAR                        Bear River (Cache)             1753   
2  LBEAR                        Bear River (Cache)             1728   
3  LBEAR                        Bear River (Cache)             1725   
4  LBEAR                        Bear River (Cache)             1777   

                                   MasterStationName  \
0  22  LARRY FALSLEV                             ...   
1  52  WAYNE GIBBS - WHEELER RANCH LLC           ...   
2  29  JACKSON SMITH                             ...   
3  26  WAYNE GIBBS - WHEELER RANCH               ...   
4  T15 ALLEN                                     ...   

                      CollectionStationName  RETRIES  SEQ_NO  \
0  AO                                             12       3   
1  Ac                                             40       7   
2  Ac                                       