### Field Tables

In [None]:
import pandas as pd

def load_daily_field_data(file_path, prod_type):
    daily_df = pd.read_csv(file_path, sep=';', skiprows=0, header=None)
    daily_df.columns = daily_df.iloc[0]
    daily_df = daily_df[1:]
    daily_df.reset_index(drop=True, inplace=True)
    daily_df = daily_df.loc[:, ~daily_df.columns.str.contains('desc|name_on_file', case=False)]
    if prod_type == 'GAS_PROD':
        daily_df['conversion_factor'] = 35.314666721
    elif prod_type == 'OIL_PROD':
        daily_df['conversion_factor'] = daily_df['conversion_factor'].str.replace(',', '.').astype(float)
    daily_df['prod_type'] = prod_type
    return daily_df

daily_gas_file_path = "data/formatted/csv/def_daily_gas_field.csv"
def_daily_gas_field = load_daily_field_data(daily_gas_file_path, prod_type='GAS_PROD')

daily_oil_file_path = "data/formatted/csv/def_daily_oil_field.csv"
def_daily_oil_field = load_daily_field_data(daily_oil_file_path, prod_type='OIL_PROD')

In [251]:
def load_plan_field_data(file_path, prod_type):
    plan_df = pd.read_csv(file_path, sep=';', skiprows=0, header=0,
                           usecols=[1, 2])
    plan_df.dropna(how='all', inplace=True)
    if prod_type == 'GAS_PLAN':
        plan_df['unit'] = 'tr.m3'
        plan_df['conversion_factor'] = 35.314666721
    elif prod_type == 'OIL_PLAN':
        plan_df['unit'] = 'tr.tons'
        plan_df['conversion_factor'] = None
    plan_df['prod_type'] = prod_type
    return plan_df

plan_gas_file_path = "data/formatted/csv/def_plan_gas_field.csv"
def_plan_gas_field = load_plan_field_data(plan_gas_file_path, prod_type='GAS_PLAN')

plan_oil_file_path = "data/formatted/csv/def_plan_oil_field.csv"
def_plan_oil_field = load_plan_field_data(plan_oil_file_path, prod_type='OIL_PLAN')

In [None]:
# Concat to main def dataframe
field_definition_df = pd.concat([def_daily_gas_field, def_daily_oil_field,
                    def_plan_gas_field, def_plan_oil_field], ignore_index=True)
field_definition_df.to_csv("data/formatted/csv/to_sql_daily_fields.csv", index=False)

  field_definition_df = pd.concat([def_daily_gas_field, def_daily_oil_field,


### Planning Data Tables

In [278]:
def load_planning_data(file_path, plan_type):
    _df = pd.read_csv(file_path, sep=';', skiprows=0, header=None)
    _df.drop(columns=[0], inplace=True)
    _df = _df.transpose()
    _df.columns = _df.iloc[0]
    _df = _df[1:]
    for col in _df.columns[1:]:
        _df[col] = (
                    _df[col].str.replace('.', '')
                    .str.replace(",", ".")
                    .astype(float)
                )

    fields = _df.columns.tolist()
    fields.remove('field_cluster_id')
    output = pd.DataFrame()
    for field in fields:
        _output_df = pd.DataFrame()
        _output_df['report_date'] = pd.to_datetime(_df['field_cluster_id'], format='%d/%m/%Y', errors='coerce').dt.strftime('%d/%m/%Y')
        _output_df['field_id'] = field
        _output_df['plan_type'] = plan_type
        _output_df['prod_ton'] = None
        _output_df['prod_bbls'] = None
        _output_df['prod_m3'] = None
        _output_df['prod_ft3'] = None
        if plan_type in ['KHQTGAS', 'KHSLCPGiaoGas']:
            _output_df['prod_m3'] = _df[field].values
            _output_df['prod_ft3'] = _df[field].values * 35.314666721
        elif plan_type in ['KHQTOIL', 'KHSLCPGiaoOil']:
            _output_df['prod_ton'] = _df[field].values
        output = pd.concat([output, _output_df], ignore_index=True)
    output.head()
    return output

KHQTGAS_path = "data/formatted/csv/KHQTGAS.csv"
KHQTGAS_df = load_planning_data(KHQTGAS_path, plan_type='KHQTGAS')

KHQTOIL_path = "data/formatted/csv/KHQTOIL.csv"
KHQTOIL_df = load_planning_data(KHQTOIL_path, plan_type='KHQTOIL')

KHSLCPGiaoGas_path = "data/formatted/csv/KHSLCPGiaoGas.csv"
KHSLCPGiaoGas_df = load_planning_data(KHSLCPGiaoGas_path, plan_type='KHSLCPGiaoGas')

KHSLCPGiaoOil_path = "data/formatted/csv/KHSLCPGiaoOil.csv"
KHSLCPGiaoOil_df = load_planning_data(KHSLCPGiaoOil_path, plan_type='KHSLCPGiaoOil')

In [None]:
planning_df = pd.concat([KHQTGAS_df, KHQTOIL_df, KHSLCPGiaoGas_df, KHSLCPGiaoOil_df], ignore_index=True)
planning_df.to_csv("data/formatted/csv/to_sql_planning_prod.csv", index=False)

  planning_df = pd.concat([KHQTGAS_df, KHQTOIL_df, KHSLCPGiaoGas_df, KHSLCPGiaoOil_df], ignore_index=True)


### Daily Production Tables

In [280]:
import pandas as pd
def load_data(file_path):
    daily_prod = pd.read_csv(file_path, sep=';', skiprows=1, header=None)
    daily_prod.columns = daily_prod.iloc[0]
    daily_prod = daily_prod[1:]
    daily_prod.reset_index(drop=True, inplace=True)
    daily_prod['DATE'] = pd.to_datetime(daily_prod['DATE'], format='%m/%d/%Y', errors='coerce')
    daily_prod['DATE'] = daily_prod['DATE'].dt.strftime('%d/%m/%Y')
    for col in daily_prod.columns[1:]:
        daily_prod[col] = (
                    daily_prod[col].str.replace('.', '')
                    .str.replace(",", ".")
                    .astype(float)
                )
    return daily_prod

def get_daily_data(daily_prod:pd.DataFrame, field_df:pd.DataFrame, prod_type:str) -> pd.DataFrame:
    fields = daily_prod.columns.tolist()
    fields.remove('DATE')
    _formatted_df = pd.DataFrame()
    for field in fields:
        _df = pd.DataFrame()
        _df['report_date'] = daily_prod['DATE']
        _df['field_id'] = field
        _df['prod_type'] = prod_type
        unit = field_df.loc[(field_df['prod_type'] == prod_type) & (field_df['short_name'] == field), 'unit'].values
        _df['prod_ton'] = None
        _df['prod_bbls'] = None
        _df['prod_m3'] = None
        _df['prod_ft3'] = None
        if prod_type == 'GAS_PROD' and unit == 'tr.m3':
            _df['prod_m3'] = daily_prod[field].values
            _df['prod_ft3'] = daily_prod[field].values * 35.314666721
        elif prod_type == 'GAS_PROD' and unit == 'tr.ft3':
            _df['prod_m3'] = daily_prod[field].values / 35.314666721
            _df['prod_ft3'] = daily_prod[field].values
        elif prod_type == 'OIL_PROD' and unit == 'ton':
            conversion_factor = field_df.loc[(field_df['prod_type'] == prod_type) & (field_df['short_name'] == field), 'conversion_factor'].values
            _df['prod_ton'] = daily_prod[field].values
            _df['prod_bbls'] = daily_prod[field].values * conversion_factor
        elif prod_type == 'OIL_PROD' and unit == 'bbls':
            conversion_factor = field_df.loc[(field_df['prod_type'] == prod_type) & (field_df['short_name'] == field), 'conversion_factor'].values
            _df['prod_ton'] = daily_prod[field].values / conversion_factor
            _df['prod_bbls'] = daily_prod[field].values
        _formatted_df = pd.concat([_formatted_df, _df], ignore_index=True)
    return _formatted_df

In [281]:
oil_file_path = "data/formatted/csv/daily_oil_prod.csv"
daily_oil_prod = load_data(oil_file_path)
daily_oil_prod_formatted = get_daily_data(daily_oil_prod, field_definition_df, prod_type = 'OIL_PROD')

gas_file_path = "data/formatted/csv/daily_gas_prod.csv"
daily_gas_prod = load_data(gas_file_path)
daily_gas_prod_formatted = get_daily_data(daily_gas_prod, field_definition_df, prod_type = 'GAS_PROD')

full_daily_prod = pd.concat([daily_gas_prod_formatted, daily_oil_prod_formatted], ignore_index=True)
full_daily_prod.to_csv("data/formatted/csv/to_sql_daily_prod.csv", index=False)

  full_daily_prod = pd.concat([daily_gas_prod_formatted, daily_oil_prod_formatted], ignore_index=True)
