In [1]:

import pandas as pd
import requests
from io import StringIO
from datetime import datetime
import argparse
import os
from site_identifier import identify_sites

In [57]:
"""Fetch Google Sheets data"""
print("Fetching data from Google Sheets...")
response = requests.get(GOOGLE_SHEETS_URL_2, timeout=30)
response.raise_for_status()
df = pd.read_csv(StringIO(response.text), header=None, low_memory=False)

Fetching data from Google Sheets...


In [58]:
response.status_code

200

In [59]:
sites = identify_sites(df)

Identifying sites dynamically...
Strategy: Finding 'INV. SETTING' labels and extracting site names

  ✓ Row 4: Chukchansi | Coarsegold
  ✓ Row 51: Yokut | OLD Lemoore
  ✓ Row 99: Yokut 2
  ✓ Row 142: Coyote Valley | Redwood
  ✓ Row 177: Robinson | Pomo Pumps
  ✓ Row 212: Diamond Mt. | Susanville
  ✓ Row 247: Feather / Falls Upper
  ✓ Row 282: Feather Falls | Lower
  ✓ Row 329: Berry Creek | Maidu Mart
  ✓ Row 372: Hidden Oaks | Covelo
  ✓ Row 407: Pit River | Tamarack
  ✓ Row 442: Chimney Rock | Alturas
  ✓ Row 477: Shingle Springs | Express
  ✓ Row 512: Cedarville | Rabbit Traxx
  ✓ Row 548: Bear Creek | Tuolumne
  ✓ Row 591: Middletown | Uncle Buddies
  ✓ Row 626: Rolling Hills | Paskenta
  ✓ Row 661: Colusa
  ✓ Row 704: Enterprise | Hard Rock
  ✓ Row 739: Buena Vista
  ✓ Row 775: Big Valley
  ✓ Row 826: Jackson Rancheria

✓ Total sites identified: 22


In [21]:
def get_all_dates(df, start_col=6):
    """Extract all date columns from the sheet (up to today only)"""
    print("\nExtracting all dates...")
    dates_row = df.iloc[0, start_col:]
    
    today = pd.Timestamp.now().normalize()  # Get today's date at midnight
    
    date_data = []
    for col_idx, date_val in enumerate(dates_row, start=start_col):
        if pd.notna(date_val):
            try:
                parsed = pd.to_datetime(str(date_val), format='%b-%d-%y', errors='coerce')
                if parsed and parsed <= today:  # Only include dates up to today
                    date_data.append((col_idx, parsed))
            except:
                pass
    
    print(f"✓ Found {len(date_data)} dates (up to today)")
    if date_data:
        print(f"  Date range: {date_data[0][1].date()} to {date_data[-1][1].date()}")
    return date_data



In [22]:
def extract_site_readings(df, site_row, site_name, date_columns):
    """Extract readings for a single site"""
    print(f"  Extracting READINGS for {site_name}...")
    
    # Find READINGS section
    reading_start_row = None
    reading_end_row = None
    
    for offset in range(20):
        row_idx = site_row + offset
        if row_idx >= len(df):
            break
        
        section_label = str(df.iloc[row_idx, 3]).strip() if pd.notna(df.iloc[row_idx, 3]) else ""
        
        if "READINGS" in section_label.upper():
            reading_start_row = row_idx + 1
            break
    
    if reading_start_row is None:
        return []
    
    # Find end of READINGS section
    for offset in range(15):
        row_idx = reading_start_row + offset
        if row_idx >= len(df):
            break
        
        section_label = str(df.iloc[row_idx, 3]).strip() if pd.notna(df.iloc[row_idx, 3]) else ""
        
        if any(keyword in section_label.upper() for keyword in ['ULLAGE', 'LOADS', 'CARRIER', 'NOTES']):
            reading_end_row = row_idx
            break
    
    if reading_end_row is None:
        reading_end_row = reading_start_row + 10
    
    # Scan READINGS section
    records = []
    products_found = {}
    
    for row_idx in range(reading_start_row, reading_end_row):
        if row_idx >= len(df):
            break
        
        product_cell = df.iloc[row_idx, 4]
        
        if pd.notna(product_cell):
            product = str(product_cell).strip()
            
            if any(key in product for key in ['87', '88', '91', 'dsl', 'racing', 'red']):
                if product not in products_found:
                    products_found[product] = []
                
                products_found[product].append(row_idx)
    
    # Extract readings for each date
    for col_idx, date in date_columns:
        for product, row_indices in products_found.items():
            record = {
                'Date': date.strftime('%Y-%m-%d'),
                'Site': site_name,
                'Product': product
            }
            
            for tank_num, row_idx in enumerate(row_indices, start=1):
                value = df.iloc[row_idx, col_idx]
                
                if pd.notna(value):
                    try:
                        clean_val = str(value).replace(',', '').strip()
                        numeric_val = float(clean_val) if clean_val else None
                        record[f'Tank_{tank_num}_Reading'] = numeric_val
                    except:
                        record[f'Tank_{tank_num}_Reading'] = None
                else:
                    record[f'Tank_{tank_num}_Reading'] = None
            
            records.append(record)
    
    return records


In [9]:
def get_three_week_avg(df, site_row, site_name, all_dates):    
    """Get 3-week average sales for a site"""
    print(f" Getting 3-week average sales for {site_name}...")
    avg_start_row = None
    product_name = None
    avg_end_row = None
    for offset in range(200):
        row_idx = site_row + offset
        if row_idx >= len(df):
            break
        
        section_label = str(df.iloc[row_idx-1, 3]).strip() if pd.notna(df.iloc[row_idx-1, 3]) else ""
        avg_col = str(df.iloc[row_idx, 4]).strip() if pd.notna(df.iloc[row_idx, 4]) else ""


        if "3 WK AVG" in avg_col.upper():
            avg_start_row = row_idx + 1
            break
    
    if avg_start_row is None:
        return []

    # Find end of Avg Section
    for offset in range(200):
        row_idx = avg_start_row + offset
        if row_idx >= len(df):
            break
        
        section_label = str(df.iloc[row_idx, 3]).strip() if pd.notna(df.iloc[row_idx, 3]) else ""
        
        if any(keyword in section_label.upper() for keyword in ['ACTUAL']):
            avg_end_row = row_idx
            break
    
    if avg_end_row is None:
        avg_end_row = avg_start_row + 100

    records = []
    products_found = {}

    for row_idx in range(avg_start_row, avg_end_row):
        if row_idx >= len(df):
            break
        
        product_cell = df.iloc[row_idx-2, 4]
        
        if pd.notna(product_cell):
            product = str(product_cell).strip()

            if any(keyword in product for keyword in ['87', '88', '91', 'dsl', 'racing', 'red']):
                if product not in products_found:
                    products_found[product] = []
                
                products_found[product].append(row_idx)

    for col_idx, date in all_dates:
        for product, row_indices in products_found.items():
            record = {
                'Date': date.strftime('%Y-%m-%d'),
                'Site': site_name,
                'Product': product
            }
            for tank_num, row_idx in enumerate(row_indices, start=1):
                value = df.iloc[row_idx-1, col_idx]
                if pd.notna(value):
                    try:
                        clean_val = str(value).replace(',', '').strip()
                        avg_val = float(clean_val) if clean_val else None
                        record[f'Tank_{tank_num}_3_Week_Avg'] = avg_val
                    except:
                        record[f'Tank_{tank_num}_3_Week_Avg'] = None
                else:
                    record[f'Tank_{tank_num}_3_Week_Avg'] = None
            
            records.append(record)

    return records


In [10]:
def get_2_month_avg(df, site_row, site_name, all_dates):    
    """Get 2-month average sales for a site"""
    print(f" Getting 2-month average sales for {site_name}...")
    avg_start_row = None
    product_name = None
    avg_end_row = None
    for offset in range(200):
        row_idx = site_row + offset
        if row_idx >= len(df):
            break
        
        section_label = str(df.iloc[row_idx-1, 3]).strip() if pd.notna(df.iloc[row_idx-1, 3]) else ""
        avg_col = str(df.iloc[row_idx, 4]).strip() if pd.notna(df.iloc[row_idx, 4]) else ""


        if "2 MO AVG" in avg_col.upper():
            avg_start_row = row_idx + 1
            break
    
    if avg_start_row is None:
        return []

    # Find end of Avg Section
    for offset in range(200):
        row_idx = avg_start_row + offset
        if row_idx >= len(df):
            break
        
        section_label = str(df.iloc[row_idx, 3]).strip() if pd.notna(df.iloc[row_idx, 3]) else ""
        
        if any(keyword in section_label.upper() for keyword in ['ACTUAL']):
            avg_end_row = row_idx
            break
    
    if avg_end_row is None:
        avg_end_row = avg_start_row + 100

    records = []
    products_found = {}

    for row_idx in range(avg_start_row, avg_end_row):
        if row_idx >= len(df):
            break
        
        product_cell = df.iloc[row_idx-3, 4]
        
        if pd.notna(product_cell):
            product = str(product_cell).strip()
            
            if any(key in product for key in ['87', '88', '91', 'dsl', 'racing', 'red']):
                if product not in products_found:
                    products_found[product] = []
                
                products_found[product].append(row_idx)

    for col_idx, date in all_dates:
        for product, row_indices in products_found.items():
            record = {
                'Date': date.strftime('%Y-%m-%d'),
                'Site': site_name,
                'Product': product
            }
            for tank_num, row_idx in enumerate(row_indices, start=1):
                value = df.iloc[row_idx-1, col_idx]
                if pd.notna(value):
                    try:
                        clean_val = str(value).replace(',', '').strip()
                        avg_val = float(clean_val) if clean_val else None
                        record[f'Tank_{tank_num}_2_Month_Avg'] = avg_val
                    except:
                        record[f'Tank_{tank_num}_2_Month_Avg'] = None
                else:
                    record[f'Tank_{tank_num}_2_Month_Avg'] = None

            records.append(record)

    return records


In [11]:
def extract_site_loads(df, site_row, site_name, date_columns):
    """Extract loads (fuel deliveries) for a single site"""
    print(f"  Extracting LOADS for {site_name}...")
    
    # First, find ULLAGE section
    ullage_row = None
    for offset in range(30):
        row_idx = site_row + offset
        if row_idx >= len(df):
            break
        
        section_label = str(df.iloc[row_idx, 3]).strip() if pd.notna(df.iloc[row_idx, 3]) else ""
        
        if "ULLAGE" in section_label.upper():
            ullage_row = row_idx
            break
    
    if ullage_row is None:
        return []
    
    # Now find LOADS section AFTER ullage
    loads_start_row = None
    loads_end_row = None
    
    for offset in range(1, 20):  # Start searching after ullage
        row_idx = ullage_row + offset
        if row_idx >= len(df):
            break
        
        section_label = str(df.iloc[row_idx, 3]).strip() if pd.notna(df.iloc[row_idx, 3]) else ""
        
        if "LOADS" in section_label.upper():
            loads_start_row = row_idx
            break
    
    if loads_start_row is None:
        return []
    
    # Find end of LOADS section
    for offset in range(1, 15):
        row_idx = loads_start_row + offset
        if row_idx >= len(df):
            break
        
        section_label = str(df.iloc[row_idx, 3]).strip() if pd.notna(df.iloc[row_idx, 3]) else ""
        col1_label = str(df.iloc[row_idx, 1]).strip() if pd.notna(df.iloc[row_idx, 1]) else ""
        
        if any(keyword in section_label.upper() for keyword in ['SALES', 'CARRIER', 'NOTES']) or \
           any(keyword in col1_label.upper() for keyword in ['SALES', 'CARRIER']):
            loads_end_row = row_idx
            break
    
    if loads_end_row is None:
        loads_end_row = loads_start_row + 10
    
    # Scan LOADS section - get product rows
    records = []
    products_found = {}
    
    for row_idx in range(loads_start_row, loads_end_row):
        if row_idx >= len(df):
            break
        
        product_cell = df.iloc[row_idx, 4]
        
        if pd.notna(product_cell):
            product = str(product_cell).strip()
            
            # Get base product (87, 88, racing, red 91, dsl)
            base_product = None
            if any(keyword in product for keyword in ['87', '88', '91', 'dsl', 'racing', 'red']):
                base_product = product
            
            # Capture all product rows (prefer total if exists, otherwise take the row)
            if base_product:
                is_total = "total" in product.lower()
                # If we haven't seen this product yet, or this is a total row, store it
                if base_product not in products_found or is_total:
                    products_found[base_product] = row_idx
    
    # Extract loads for each date (only totals)
    for col_idx, date in date_columns:
        for product, row_idx in products_found.items():
            value = df.iloc[row_idx, col_idx]
            
            if pd.notna(value):
                try:
                    clean_val = str(value).replace(',', '').strip()
                    load_val = float(clean_val) if clean_val else None
                    
                    if load_val is not None:
                        records.append({
                            'Date': date.strftime('%Y-%m-%d'),
                            'Site': site_name,
                            'Product': product,
                            'Load_Total': load_val
                        })
                except:
                    pass
    
    return records


In [50]:

def extract_site_tank_sizes(df, site_row, site_name):
    """Extract tank sizes for a single site"""
    print(f"  Extracting TANK SIZES for {site_name}...")
    
    # Find TANK SIZE label
    tank_size_row = None
    
    for offset in range(40):
        row_idx = site_row + offset
        if row_idx >= len(df):
            break
        
        label = str(df.iloc[row_idx, 1]).strip() if pd.notna(df.iloc[row_idx, 1]) else ""
        
        if "TANK SIZE" in label.upper():
            tank_size_row = row_idx
            break
    
    if tank_size_row is None:
        return []
    
    # Extract tank sizes - first pass to collect all rows
    records = []
    products_data = {}  # {base_product: {'total': (row_idx, value), 'singles': [(row_idx, value), ...]}}
    
    for row_idx in range(tank_size_row + 1, tank_size_row + 20):
        if row_idx >= len(df):
            break
        
        col1_label = str(df.iloc[row_idx, 1]).strip() if pd.notna(df.iloc[row_idx, 1]) else ""
        col3_label = str(df.iloc[row_idx, 3]).strip() if pd.notna(df.iloc[row_idx, 3]) else ""
        
        if "SALES" in col1_label.upper() or "SALES" in col3_label.upper():
            break
        
        tank_size = df.iloc[row_idx, 1]
        product_cell = df.iloc[row_idx, 4]
        
        if pd.notna(tank_size) and pd.notna(product_cell):
            product = str(product_cell).strip()
            
            # Check if this product is relevant
            if any(keyword in product for keyword in ['87', '88', '91', 'dsl', 'racing', 'red']):
                is_total = "total" in product.lower()
                # Get base product name (without "total")
                base_product = product.lower().replace("total", "").strip() if is_total else product
                
                try:
                    clean_val = str(tank_size).replace(',', '').strip()
                    size_val = float(clean_val) if clean_val else None
                    
                    if size_val and size_val > 0:
                        if base_product not in products_data:
                            products_data[base_product] = {'total': None, 'singles': []}
                        
                        if is_total:
                            products_data[base_product]['total'] = (row_idx, size_val)
                        else:
                            products_data[base_product]['singles'].append((row_idx, size_val))
                except:
                    pass
    
    # Second pass: create records - use singles if they exist, otherwise use total
    for base_product, data in products_data.items():
        if data['singles']:  # If we have individual tanks, use those
            for tank_num, (row_idx, size_val) in enumerate(data['singles'], start=1):
                records.append({
                    'Site': site_name,
                    'Product': base_product,
                    'Tank_Number': tank_num,
                    'Tank_Size': size_val
                })
        elif data['total']:  # If we only have total, use that
            row_idx, size_val = data['total']
            records.append({
                'Site': site_name,
                'Product': base_product,
                'Tank_Number': 1,
                'Tank_Size': size_val
            })
    
    return records

    """Extract tank sizes for a single site"""
    print(f"  Extracting TANK SIZES for {site_name}...")
    
    # Find TANK SIZE label
    tank_size_row = None
    
    for offset in range(40):
        row_idx = site_row + offset
        if row_idx >= len(df):
            break
        
        label = str(df.iloc[row_idx, 1]).strip() if pd.notna(df.iloc[row_idx, 1]) else ""
        
        if "TANK SIZE" in label.upper():
            tank_size_row = row_idx
            break
    
    if tank_size_row is None:
        return []
    
    # Extract tank sizes
    records = []
    products_found = {}
    
    for row_idx in range(tank_size_row + 1, tank_size_row + 20):
        if row_idx >= len(df):
            break
        
        col1_label = str(df.iloc[row_idx, 1]).strip() if pd.notna(df.iloc[row_idx, 1]) else ""
        col3_label = str(df.iloc[row_idx, 3]).strip() if pd.notna(df.iloc[row_idx, 3]) else ""
        
        if "SALES" in col1_label.upper() or "SALES" in col3_label.upper():
            break
        
        tank_size = df.iloc[row_idx, 1]
        product_cell = df.iloc[row_idx, 4]
        
        if pd.notna(tank_size) and pd.notna(product_cell):
            product = str(product_cell).strip()
            
            # Extract base product and strip "total" if present
            base_product = None
            if any(keyword in product for keyword in ['87', '88', '91', 'dsl', 'racing', 'red']):
                # If "total" is in the product name, use the product without "total"
                if "total" in product.lower():
                    # Remove "total" and clean up extra spaces
                    base_product = product.lower().replace("total", "").strip()
                else:
                    base_product = product
            
            if base_product:
                try:
                    clean_val = str(tank_size).replace(',', '').strip()
                    size_val = float(clean_val) if clean_val else None
                    
                    if size_val and size_val > 0:
                        if base_product not in products_found:
                            products_found[base_product] = []
                        
                        tank_num = len(products_found[base_product]) + 1
                        products_found[base_product].append(size_val)
                        
                        records.append({
                            'Site': site_name,
                            'Product': base_product,
                            'Tank_Number': tank_num,
                            'Tank_Size': size_val
                        })
                except:
                    pass
    
    return records


In [41]:

def extract_site_inv_settings(df, site_row, site_name):
    """Extract inventory settings for a single site"""
    print(f"  Extracting INV SETTINGS for {site_name}...")
    
    # Find INV SETTING label
    inv_setting_row = None
    
    for offset in range(20):
        row_idx = site_row + offset
        if row_idx >= len(df):
            break
        
        label = str(df.iloc[row_idx, 1]).strip() if pd.notna(df.iloc[row_idx, 1]) else ""
        
        if "INV. SETTING" in label.upper() or "INV SETTING" in label.upper():
            inv_setting_row = row_idx
            break
    
    if inv_setting_row is None:
        return []
    
    # Extract inventory settings - first pass to collect all rows
    records = []
    products_data = {}  # {base_product: {'total': (row_idx, value), 'singles': [(row_idx, value), ...]}}
    
    for row_idx in range(inv_setting_row + 1, inv_setting_row + 20):
        if row_idx >= len(df):
            break
        
        col1_label = str(df.iloc[row_idx, 1]).strip() if pd.notna(df.iloc[row_idx, 1]) else ""
        
        if "TANK SIZE" in col1_label.upper():
            break
        
        desired_level = df.iloc[row_idx, 1]
        product_cell = df.iloc[row_idx, 4]
        
        if pd.notna(desired_level) and pd.notna(product_cell):
            product = str(product_cell).strip()
            
            # Check if this product is relevant
            if any(keyword in product for keyword in ['87', '88', '91', 'dsl', 'racing', 'red']):
                is_total = "total" in product.lower()
                # Get base product name (without "total")
                base_product = product.lower().replace("total", "").strip() if is_total else product
                
                try:
                    clean_val = str(desired_level).replace(',', '').strip()
                    level_val = float(clean_val) if clean_val else None
                    
                    if level_val and level_val > 0:
                        if base_product not in products_data:
                            products_data[base_product] = {'total': None, 'singles': []}
                        
                        if is_total:
                            products_data[base_product]['total'] = (row_idx, level_val)
                        else:
                            products_data[base_product]['singles'].append((row_idx, level_val))
                except:
                    pass
    
    # Second pass: create records - use singles if they exist, otherwise use total
    for base_product, data in products_data.items():
        if data['singles']:  # If we have individual tanks, use those
            for tank_num, (row_idx, level_val) in enumerate(data['singles'], start=1):
                records.append({
                    'Site': site_name,
                    'Product': base_product,
                    'Tank_Number': tank_num,
                    'Desired_Level': level_val
                })
        elif data['total']:  # If we only have total, use that
            row_idx, level_val = data['total']
            records.append({
                'Site': site_name,
                'Product': base_product,
                'Tank_Number': 1,
                'Desired_Level': level_val
            })
    
    return records


In [14]:
len(sites)

26

In [23]:
all_dates = get_all_dates(df)
len(all_dates)


Extracting all dates...
✓ Found 615 dates (up to today)
  Date range: 2024-03-01 to 2025-11-05


615

In [51]:
all_inv_settings = []
all_readings = []
all_three_week_avgs = []
all_two_month_avgs = []
all_tank_sizes = []
for site_row, site_name in sites:
    if site_name != "NEW Morongo Site #2":
        continue
    # print(f"\nProcessing site: {site_name})")
    readings = extract_site_readings(df, site_row, site_name, all_dates)
    all_readings.extend(readings)
    inv_settings = extract_site_inv_settings(df, site_row, site_name)
    all_inv_settings.extend(inv_settings)
    three_week_avgs = get_three_week_avg(df, site_row, site_name, all_dates)
    all_three_week_avgs.extend(three_week_avgs)
    two_month_avgs = get_2_month_avg(df, site_row, site_name, all_dates)
    all_two_month_avgs.extend(two_month_avgs)
    tank_sizes = extract_site_tank_sizes(df, site_row, site_name)
    all_tank_sizes.extend(tank_sizes)

  Extracting READINGS for NEW Morongo Site #2...
  Extracting INV SETTINGS for NEW Morongo Site #2...
 Getting 3-week average sales for NEW Morongo Site #2...
 Getting 2-month average sales for NEW Morongo Site #2...
  Extracting TANK SIZES for NEW Morongo Site #2...


In [53]:
df_readings = pd.DataFrame(all_readings)
df_inv_settings = pd.DataFrame(all_inv_settings)
df_three_week_avgs = pd.DataFrame(all_three_week_avgs)
df_two_month_avgs = pd.DataFrame(all_two_month_avgs)
df_tank_sizes = pd.DataFrame(all_tank_sizes)

In [None]:
df_tank_sizes


Unnamed: 0,Site,Product,Tank_Number,Desired_Level
0,NEW Morongo Site #2,87,1,20000.0
1,NEW Morongo Site #2,87,2,20000.0
2,NEW Morongo Site #2,91,1,12000.0
3,NEW Morongo Site #2,dsl,1,21000.0
4,NEW Morongo Site #2,dsl,2,21000.0
