In [None]:
import pandas as pd
from pathlib import Path

# Load data from fixtures (created by 02_process.ipynb)
fixtures_path = Path("../fixtures")

In [None]:
# Load available data files
ngl_files = list(fixtures_path.glob("*ngl*.parquet"))
conv_files = list(fixtures_path.glob("*conv*.parquet"))

print(f"Available NGL files: {len(ngl_files)}")
print(f"Available Conv files: {len(conv_files)}")


In [None]:
# Basic data validation function
def validate_dataframe(df, name):
    """Basic validation of dataframe."""
    issues = []
    
    # Check for null values in key columns
    if 'WellID' in df.columns:
        null_wells = df['WellID'].isnull().sum()
        if null_wells > 0:
            issues.append(f"{null_wells} rows with null WellID")
    
    # Check for negative production values
    prod_cols = [col for col in df.columns if 'Production' in col or 'Volume' in col]
    for col in prod_cols:
        if df[col].dtype in ['float64', 'int64']:
            negative_count = (df[col] < 0).sum()
            if negative_count > 0:
                issues.append(f"{negative_count} negative values in {col}")
    
    return issues


In [None]:
# Validate available files
if ngl_files:
    print("\nValidating NGL data:")
    for file_path in ngl_files[:2]:  # Check first 2 files
        df = pd.read_parquet(file_path)
        issues = validate_dataframe(df, file_path.name)
        print(f"  {file_path.name}: {len(df):,} rows")
        if issues:
            for issue in issues:
                print(f"    - {issue}")
        else:
            print(f"    - No issues found")

if conv_files:
    print("\nValidating Conventional data:")
    for file_path in conv_files[:2]:  # Check first 2 files
        df = pd.read_parquet(file_path)
        issues = validate_dataframe(df, file_path.name)
        print(f"  {file_path.name}: {len(df):,} rows")
        if issues:
            for issue in issues:
                print(f"    - {issue}")
        else:
            print(f"    - No issues found")


In [None]:
print("\nValidation complete. Data is ready for forecasting.")
