In [1]:

import pandas as pd
import requests
from io import StringIO
from datetime import datetime
import argparse
import os
from site_identifier import identify_sites

In [3]:
# Configuration
GOOGLE_SHEET_URL = "https://docs.google.com/spreadsheets/d/e/2PACX-1vRpva-TXUaQR_6tJoXX2vnSN2ertC5GNxAgssqmXvIhqHBNrscDxSxtiSWbCiiHqAoSHb3SzXDQw_VX/pub?gid=1048590026&single=true&output=csv"

In [4]:
"""Fetch Google Sheets data"""
print("Fetching data from Google Sheets...")
response = requests.get(GOOGLE_SHEET_URL, timeout=30)
df = pd.read_csv(StringIO(response.text), header=None, low_memory=False)

Fetching data from Google Sheets...


In [6]:
sites = identify_sites(df)

Identifying sites dynamically...
Strategy: Finding 'INV. SETTING' labels and extracting site names

  ✓ Row 4: OLD Morongo | Cabazon
  ✓ Row 43: NEW Morongo Site #2
  ✓ Row 93: Fort Independence
  ✓ Row 127: Campo | Golden Acorn
  ✓ Row 168: Bishop | Pauite Palace
  ✓ Row 209: Bishop Pauite 2 Nobi
  ✓ Row 248: Pechanga | Temecula
  ✓ Row 283: Salton Sea | Red Earth
  ✓ Row 350: Cahuilla | Anza
  ✓ Row 385: La Jolla Trading Post
  ✓ Row 420: Eagle Feather | Tule River
  ✓ Row 455: Sycuan | El Cajon
  ✓ Row 490: Rincon | Valley Center
  ✓ Row 525: Santa Rosa Pit Stop
  ✓ Row 560: Fort Mojave Smoke Shop
  ✓ Row 599: Thalypo | Fort Mojave
  ✓ Row 666: Pala
  ✓ Row 701: Shivwits | Utah
  ✓ Row 762: San Pasqual | Valley View
  ✓ Row 905: Barona
  ✓ Row 944: Kanosh | Pahvant Travel
  ✓ Row 995: Santa Ysabel
  ✓ Row 1029: Palms | Coachella
  ✓ Row 1112: Kaibab | Red Cliffs
  ✓ Row 1147: Viejas | Alpine
  ✓ Row 1197: Chumash

✓ Total sites identified: 26


In [7]:
def get_all_dates(df, start_col=6):
    """Extract all date columns from the sheet (up to today only)"""
    print("\nExtracting all dates...")
    dates_row = df.iloc[0, start_col:]
    
    today = pd.Timestamp.now().normalize()  # Get today's date at midnight
    
    date_data = []
    for col_idx, date_val in enumerate(dates_row, start=start_col):
        if pd.notna(date_val):
            try:
                parsed = pd.to_datetime(str(date_val), format='%b-%d-%y', errors='coerce')
                if parsed and parsed <= today:  # Only include dates up to today
                    date_data.append((col_idx, parsed))
            except:
                pass
    
    print(f"✓ Found {len(date_data)} dates (up to today)")
    if date_data:
        print(f"  Date range: {date_data[0][1].date()} to {date_data[-1][1].date()}")
    return date_data

In [8]:
all_dates = get_all_dates(df)


Extracting all dates...
✓ Found 609 dates (up to today)
  Date range: 2024-03-01 to 2025-10-30


In [9]:
all_readings = []
all_loads = []
all_tank_sizes = []
all_inv_settings = []
all_sales_actual = []

In [None]:
from sklearn import base


for site_row , site_name in sites:
    print(f"\n{site_name}:")

    print(f"  Extracting SALES (actual) for {site_name}...")
    sales_start_row = None

    for offset in range(40):
        row_idx = site_row + offset
        if row_idx >= len(df):
            break
        
        col1_label = str(df.iloc[row_idx, 1]).strip() if pd.notna(df.iloc[row_idx, 1]) else ""
        col3_label = str(df.iloc[row_idx, 3]).strip() if pd.notna(df.iloc[row_idx, 3]) else ""

        if "SALES" in col1_label.upper() and "ACTUAL" in col3_label.upper():
            sales_start_row = row_idx
            break
        elif "ACTUAL" in col3_label.upper() and 'SALES' in col1_label.upper():
            sales_start_row = row_idx
            break
    print(f"    Found SALES (actual) row at index {sales_start_row}")

    if sales_start_row is None:
        print(f"    ⚠️ SALES (actual) row not found for {site_name}, skipping...")
        break
    records = []
    products_found = {}

    for row_idx in range(sales_start_row, sales_start_row+10):
        if row_idx >= len(df):
            break

        product_cell = df.iloc[row_idx, 4]
        # print(f"    Checking row {row_idx}, product cell: {product_cell}")

        if pd.notna(product_cell):
            product = str(product_cell).strip()
            # print(f"    Found product: {product}")
            if "READING" in product.upper():
                # print(f"    Reached end of products at row {row_idx}.")
                break
            base_product = None
            is_total = False

            if "87" in product:
                base_product = product
                is_total = "total" in product.upper()
            elif "91" in product:
                base_product = product
                is_total = "total" in product.upper()
            elif "dsl" in product.lower():
                base_product = product
                is_total = "total" in product.lower()
            
            if base_product:
                products_found[product] = {
                    'row_idx': row_idx,
                    'base_product': base_product,
                    'is_total': is_total
                }

    print(f"    Products found so far: {products_found}")

    for col_idx, date in all_dates:
        for _, product_info in products_found.items():
            row_idx = product_info['row_idx']
            value = df.iloc[row_idx, col_idx]

            print(f"      Date: {date.date()}, Product: {product_info['base_product']}, Value: {value}, Col: {col_idx}, Row: {row_idx}")
    break



OLD Morongo | Cabazon:
  Extracting SALES (actual) for OLD Morongo | Cabazon...
    Found SALES (actual) row at index 37
    Products found so far: {'87': {'row_idx': 37, 'base_product': '87', 'is_total': False}, '91': {'row_idx': 38, 'base_product': '91', 'is_total': False}, 'dsl': {'row_idx': 39, 'base_product': 'dsl', 'is_total': False}}
      Date: 2024-03-01, Product: 87, Value: 32,348, Col: 6, Row: 37
      Date: 2024-03-01, Product: 91, Value: 5,808, Col: 6, Row: 38
      Date: 2024-03-01, Product: dsl, Value: 4,426, Col: 6, Row: 39
      Date: 2024-03-02, Product: 87, Value: 24,622, Col: 7, Row: 37
      Date: 2024-03-02, Product: 91, Value: 5,954, Col: 7, Row: 38
      Date: 2024-03-02, Product: dsl, Value: 4,019, Col: 7, Row: 39
      Date: 2024-03-03, Product: 87, Value: 29,009, Col: 8, Row: 37
      Date: 2024-03-03, Product: 91, Value: 6,536, Col: 8, Row: 38
      Date: 2024-03-03, Product: dsl, Value: 3,856, Col: 8, Row: 39
      Date: 2024-03-04, Product: 87, Value: 26,