In [1]:
import calendar
import os
import pandas as pd
from python.save_csv import save_df_to_csv
from datetime import timedelta, datetime

In [2]:
# temp read from temp files

DATA_DIR = r'K:/DOP/OED/METHOD&TOOLS/3 - PROJECTS/2 - ON GOING/PYTHON/Scripts/OEE2/python/data'

def read_csv(filename):
    path = os.path.join(DATA_DIR, filename)
    return pd.read_csv(path)

df_circ_data = read_csv('df_circ_data.csv')
df_meters = read_csv('df_meters.csv')
df_operations = read_csv('df_operations.csv')
df_osi_operations = read_csv('df_osi_operations.csv')
df_rigs = read_csv('df_rigs.csv')

In [3]:
# Преобразуем Starttime и Endtime в datetime
if df_osi_operations is not None:
    df_osi_operations['starttime'] = pd.to_datetime(df_osi_operations['starttime'], errors='coerce')
    df_osi_operations['endtime'] = pd.to_datetime(df_osi_operations['endtime'], errors='coerce')
    
mask2 = df_osi_operations['starttime'] > '2025-03-31'
df_osi_operations = df_osi_operations[mask2]

In [4]:
df_meters['ENDDATE'] = pd.to_datetime(df_meters['ENDDATE'])
mask1 = df_meters['ENDDATE'] > '2025.03.31'
df_meters = df_meters[mask1]


In [5]:
# Функция для преобразования имён колонок:
def clean_column_names(columns):
    return [col.replace('_', ' ').title() for col in columns]

# Очищаем и переименовываем колонки сразу после чтения
if df_circ_data is not None:
    df_circ_data.columns = clean_column_names(df_circ_data.columns)

if df_meters is not None:
    df_meters.columns = clean_column_names(df_meters.columns)
    
if df_operations is not None:   
    df_operations.columns = clean_column_names(df_operations.columns)
    
if df_osi_operations is not None:  
    df_osi_operations.columns = clean_column_names(df_osi_operations.columns)
    
if df_rigs is not None:
    df_rigs.columns = clean_column_names(df_rigs.columns)

In [6]:
def add_difference_column(df, col1, col2):
    """
    Возвращает Series: разница между двумя колонками с заменой NaN на 0.
    """
    return (df[col1].fillna(0) - df[col2].fillna(0)).clip(lower=0)


def add_ratio_column(df, numerator_col, denominator_col):
    """
    Возвращает Series: отношение numerator_col / denominator_col
    с защитой от деления на 0, заменой NaN на 0 и ограничением [0, 1].
    """
    num = df[numerator_col].fillna(0)
    denom = df[denominator_col].replace(0, pd.NA).fillna(pd.NA)
    ratio = num / denom
    return ratio.fillna(0).clip(0, 1)

In [7]:
df_osi_operations

Unnamed: 0,Equipment,Event,Starttime,Endtime
5840,Development without Rig,SB_WAITING_FOR_WORK,2025-06-12 09:00:00,2025-06-21 09:00:00
5843,Development without Rig,SB_WAITING_FOR_WORK,2025-04-10 09:00:00,2025-05-25 09:00:00
6098,Development without Rig,CV_LONG_STOP,2025-06-11 09:00:00,2025-06-12 09:00:00
6114,Development without Rig,SB_WAITING_FOR_WORK,2025-04-05 09:00:00,2025-04-08 09:00:00
6115,Development without Rig,SB_WAITING_FOR_WORK,2025-06-06 09:00:00,2025-06-11 09:00:00
...,...,...,...,...
567122,ПБУ ЗМО-1500ПС №6,DV_DEVELOPMENT,2025-07-05 10:00:00,2025-07-05 13:00:00
567123,ПБУ ЗМО-1500ПС №6,DV_DEVELOPMENT,2025-07-05 14:00:00,2025-07-05 21:00:00
567124,ПБУ ЗМО-1500ПС №6,DV_DEVELOPMENT,2025-07-05 22:00:00,2025-07-06 09:00:00
567125,ПБУ ЗМО-1500ПС №6,DV_DEVELOPMENT,2025-07-06 10:00:00,2025-07-06 13:00:00


In [8]:
# Работа с df_operations
df_operations = df_operations[['Event', 'Category']] # pyright: ignore[reportOptionalSubscript]
df_operations = df_operations[df_operations['Category'].notna()].reset_index(drop=True)

In [9]:
if df_rigs is not None:
    df_rigs['Rig-Osidem'] = df_rigs['Rig-Osidem'].str.strip().str.lower()

In [10]:
if df_osi_operations is not None:
    df_osi_operations['Equipment'] = df_osi_operations['Equipment'].str.strip().str.lower()

In [11]:
# Копия твоего исходного DataFrame
df = df_osi_operations.copy()

# Убедимся, что Starttime и Endtime — это datetime
df['Starttime'] = pd.to_datetime(df['Starttime'])
df['Endtime'] = pd.to_datetime(df['Endtime'])

# Вычисляем общее Duration в часах
df['Duration'] = (df['Endtime'] - df['Starttime']).dt.total_seconds() / 3600

# Функция для разбиения на месяцы
def split_row_by_month(row):
    start = row['Starttime']
    end = row['Endtime']
    duration_total = row['Duration']

    total_seconds = (end - start).total_seconds()
    if total_seconds <= 0:
        return []

    segments = []
    current = start

    while current < end:
        # Начало следующего месяца
        next_month = (current.replace(day=1) + timedelta(days=32)).replace(day=1)
        segment_end = min(end, next_month)
        segment_seconds = (segment_end - current).total_seconds()

        part = row.copy()
        part['Year-Month'] = current.strftime('%Y-%m')  # Год-месяц, как ты просил
        part['Duration'] = duration_total * (segment_seconds / total_seconds)
        segments.append(part)

        current = segment_end

    return segments

# Применяем разбиение
split_rows = []
for _, row in df.iterrows():
    split_rows.extend(split_row_by_month(row))

df_split = pd.DataFrame(split_rows)

# При желании округлим Duration:
df_split['Duration'] = df_split['Duration'].round(2)



In [12]:
df_osi_operations

Unnamed: 0,Equipment,Event,Starttime,Endtime
5840,development without rig,SB_WAITING_FOR_WORK,2025-06-12 09:00:00,2025-06-21 09:00:00
5843,development without rig,SB_WAITING_FOR_WORK,2025-04-10 09:00:00,2025-05-25 09:00:00
6098,development without rig,CV_LONG_STOP,2025-06-11 09:00:00,2025-06-12 09:00:00
6114,development without rig,SB_WAITING_FOR_WORK,2025-04-05 09:00:00,2025-04-08 09:00:00
6115,development without rig,SB_WAITING_FOR_WORK,2025-06-06 09:00:00,2025-06-11 09:00:00
...,...,...,...,...
567122,пбу змо-1500пс №6,DV_DEVELOPMENT,2025-07-05 10:00:00,2025-07-05 13:00:00
567123,пбу змо-1500пс №6,DV_DEVELOPMENT,2025-07-05 14:00:00,2025-07-05 21:00:00
567124,пбу змо-1500пс №6,DV_DEVELOPMENT,2025-07-05 22:00:00,2025-07-06 09:00:00
567125,пбу змо-1500пс №6,DV_DEVELOPMENT,2025-07-06 10:00:00,2025-07-06 13:00:00


In [13]:
df_osi_operations = df_split

In [14]:
df_osi_operations.to_excel('output.xlsx', index=False)

In [15]:
# Слияние
if df_osi_operations is not None and df_rigs is not None:
    df_merged = df_osi_operations.merge( # pyright: ignore[reportOptionalMemberAccess]
        df_rigs,
        left_on='Equipment',
        right_on='Rig-Osidem',
        how='left'
    )

In [16]:
df_merged = df_merged[df_merged['Rig-Acquire'].notna()].reset_index(drop=True)
df_temp = df_merged.drop(columns=['Equipment', 'Rig-Osidem'])

In [17]:
# Очистка колонок event
df_temp['Event'] = df_temp['Event'].str.strip().str.lower()
df_operations['Event'] = df_operations['Event'].str.strip().str.lower()

In [18]:
# Слияние с категориями
df_operations_total = df_temp.merge(df_operations, on='Event', how='left')
df_operations_total['Category'] = df_operations_total['Category'].fillna('Standard Work')

In [19]:
df_operations_total


Unnamed: 0,Event,Starttime,Endtime,Duration,Year-Month,Drillcompany,Rig-Acquire,Tipe Of Circulation,Category
0,rp_repair,2025-04-05 22:00:00,2025-05-05 21:00:00,624.0,2025-04,KATCO,PRAKLA_01-KAT,RC,Unplanned_downtime_losses
1,rp_repair,2025-04-05 22:00:00,2025-05-05 21:00:00,95.0,2025-05,KATCO,PRAKLA_01-KAT,RC,Unplanned_downtime_losses
2,rp_repair,2025-05-05 22:00:00,2025-05-19 09:00:00,323.0,2025-05,KATCO,PRAKLA_01-KAT,RC,Unplanned_downtime_losses
3,rp_repair,2025-06-06 09:00:00,2025-06-23 09:00:00,408.0,2025-06,KATCO,PRAKLA_01-KAT,RC,Unplanned_downtime_losses
4,wt_ppr,2025-03-31 09:00:00,2025-03-31 10:00:00,1.0,2025-03,KATCO,PRAKLA_01-KAT,RC,Planned_downtime
...,...,...,...,...,...,...,...,...,...
27458,dv_development,2025-07-05 10:00:00,2025-07-05 13:00:00,3.0,2025-07,KATCO,ZMO1500-6-KAT,Direct,Standard Work
27459,dv_development,2025-07-05 14:00:00,2025-07-05 21:00:00,7.0,2025-07,KATCO,ZMO1500-6-KAT,Direct,Standard Work
27460,dv_development,2025-07-05 22:00:00,2025-07-06 09:00:00,11.0,2025-07,KATCO,ZMO1500-6-KAT,Direct,Standard Work
27461,dv_development,2025-07-06 10:00:00,2025-07-06 13:00:00,3.0,2025-07,KATCO,ZMO1500-6-KAT,Direct,Standard Work


In [20]:
# Добавляем year и month
df_operations_total['Year-Month'] = df_operations_total['Starttime'].dt.strftime('%Y-%m')

In [21]:
# Сводная таблица pivot
pivot_df = df_operations_total.pivot_table(
    index=['Drillcompany', 'Rig-Acquire', 'Year-Month', 'Tipe Of Circulation'],
    columns='Category',
    values='Duration',
    aggfunc='sum',
    fill_value=0
).reset_index()

In [22]:
pivot_df

Category,Drillcompany,Rig-Acquire,Year-Month,Tipe Of Circulation,Planned_downtime,Standard Work,Unplanned_downtime_losses
0,BurGeoProekt,ZIF1200_01-BGP,2025-03,Direct,4.0,22.0,1.0
1,BurGeoProekt,ZIF1200_01-BGP,2025-04,Direct,101.0,566.0,47.0
2,BurGeoProekt,ZIF1200_01-BGP,2025-05,Direct,105.0,599.0,46.0
3,BurGeoProekt,ZIF1200_01-BGP,2025-06,Direct,95.0,586.0,39.0
4,BurGeoProekt,ZIF1200_01-BGP,2025-07,Direct,23.0,84.0,13.0
...,...,...,...,...,...,...,...
197,TechnoService-Eng,ZIF1200_14-TSE,2025-07,Direct,24.0,121.0,8.0
198,TechnoService-Eng,ZIF1200_15-TSE,2025-03,Direct,1.0,11.0,15.0
199,TechnoService-Eng,ZIF1200_15-TSE,2025-04,Direct,81.0,515.0,112.0
200,TechnoService-Eng,ZIF1200_15-TSE,2025-05,Direct,74.0,455.0,107.0


In [23]:
pivot_df = pivot_df.sort_values(
    by=['Drillcompany', 'Rig-Acquire', 'Year-Month'],
    ascending=[True, True, True]
).reset_index(drop=True)

# Функция для подсчёта часов в месяце
def hours_in_month(row):
    year_month_str = row['Year-Month']  # предполагается, что у вас есть колонка с таким именем
    year, month = map(int, year_month_str.split('-'))
    days = calendar.monthrange(year, month)[1]
    return days * 24


pivot_df['H In Month'] = pivot_df.apply(hours_in_month, axis=1)


# Преобразование ENDDATE в datetime
if df_meters is not None and df_rigs is not None:
    df_meters['Enddate'] = pd.to_datetime(df_meters['Enddate'], errors='coerce')
    df_meters.dropna(subset=['Enddate'], inplace=True)
    df_meters['Year-Month'] = df_meters['Enddate'].dt.strftime('%Y-%m')




if df_meters is not None:
    pivot2 = pd.pivot_table(
        df_meters,
        index=['Year-Month', 'Drillrig'],
        columns='Holestatus',
        values='Holeid',
        aggfunc='count',
        fill_value=0
    )


if df_meters is not None:
    depth_sum = df_meters.groupby(['Year-Month', 'Drillrig'])['Depth'].sum()
    
    
pivot2['Depth'] = depth_sum
meters_pivot = pivot2.reset_index()

# Приведение к верхнему регистру и очистка пробелов
pivot_df['Rig-Acquire'] = pivot_df['Rig-Acquire'].str.strip().str.upper()
meters_pivot['Drillrig'] = meters_pivot['Drillrig'].str.strip().str.upper()

total_merged_df = pd.merge(
    pivot_df,
    meters_pivot,
    left_on=['Rig-Acquire', 'Year-Month'],
    right_on=['Drillrig', 'Year-Month'],
    how='left'
)


total_merged_df.drop(columns='Rig-Acquire', inplace=True)


# Расчёты Planned Production Time и Planned Factor с ограничениями
total_merged_df['Planned Production Time'] = add_difference_column(total_merged_df, 'H In Month', 'Planned_downtime')


total_merged_df['Planned Factor'] = add_ratio_column(
    total_merged_df, 'Planned Production Time', 'H In Month'
)

# Gross Operating Time (GOT) и Availability
total_merged_df['Gross Operating Time'] = add_difference_column(
    total_merged_df, 'Planned Production Time', 'Unplanned_downtime_losses'
)


total_merged_df['Availability'] = add_ratio_column(
    total_merged_df, 'Gross Operating Time', 'Planned Production Time'
)

# Назначаем коэффициенты по типу циркуляции
if df_circ_data is not None and 'Circ' in df_circ_data.columns and 'Standard Avarage Drilling, M/H' in df_circ_data.columns:
    circ_avg_drilling = df_circ_data.set_index('Circ')['Standard Avarage Drilling, M/H'].to_dict()
else:
    circ_avg_drilling = {}
    
    
if df_circ_data is not None and 'Circ' in df_circ_data.columns and 'Time To Well Drill, H' in df_circ_data.columns:
    time_to_well_drill = df_circ_data.set_index('Circ')['Time To Well Drill, H'].to_dict()
else:
    time_to_well_drill = {}


# Записываем два столбца в df
total_merged_df['Circulation Coeff'] = total_merged_df['Tipe Of Circulation'].map(circ_avg_drilling)
total_merged_df['Well Drill Coef'] = total_merged_df['Tipe Of Circulation'].map(time_to_well_drill)


# Потенциальная глубина бурения
total_merged_df['Potential Depth'] = (
    total_merged_df['Gross Operating Time'] * total_merged_df['Circulation Coeff']
)


total_merged_df['Net Operating Time'] = total_merged_df['Depth'].div(
    total_merged_df['Circulation Coeff']
).fillna(0)


total_merged_df['Speed Losses'] = add_difference_column(
    total_merged_df, 'Gross Operating Time', 'Net Operating Time'
)


total_merged_df['Performance'] = add_ratio_column(total_merged_df, 'Net Operating Time', 'Gross Operating Time')

total_merged_df['Quality Losses'] =  total_merged_df.get('Well Drill Coef', 0) * total_merged_df.get('LIQUID', 0)

# Valuable Operating Time
total_merged_df['Valuable Operating Time'] = add_difference_column(
    total_merged_df, 'Net Operating Time', 'Quality Losses'
    )



total_merged_df['Quality'] = add_ratio_column(
    total_merged_df, 'Valuable Operating Time', 'Net Operating Time'
    )



total_merged_df['OEE'] = (
    total_merged_df['Availability'] *
    total_merged_df['Performance'] *
    total_merged_df['Quality']
)


total_merged_df['TRS'] = total_merged_df['OEE'] * total_merged_df['Planned Factor']


total_merged_df.columns = clean_column_names(total_merged_df.columns)


total_merged_df = total_merged_df.rename(columns={'Drillrig': 'Rig'})


df_losses = pd.melt(
    total_merged_df,
    id_vars=[
        'Drillcompany',
        'Rig',
        'Year-Month',
        'Tipe Of Circulation'
    ],
    value_vars=[
        'Planned Downtime',
        'Unplanned Downtime Losses',
        'Speed Losses',
        'Quality Losses'
    ],
    var_name='Loss Type',
    value_name='Loss Value'
)


df_productivity = pd.melt(
    total_merged_df,
    id_vars=[
        'Drillcompany',
        'Rig',
        'Year-Month',
        'Tipe Of Circulation'
    ],
    value_vars=[
        'Planned Factor',
        'Availability',
        'Performance',
        'Quality',
        'Oee',
        'Trs'
    ],
    var_name='Productivity Type',
    value_name='Productivity Value'
)


df_operations_total['Category'] = df_operations_total['Category'].replace({
    'Planned_downtime': 'Planned Downtime',
    'Unplanned_downtime_losses': 'Unplanned Downtime Losses'
})


df_operations_total = df_operations_total.rename(columns={'Rig-Acquire': 'Rig'})


df_operations_total['Event Category'] = df_operations_total['Event'].str.split('_').str[0].str.upper()
df_operations_total['Event'] = df_operations_total['Event'].str.split('_', n=1).str[1].str.capitalize()


df_events_duration = df_operations_total[
    [
        'Year-Month',
        "Category",
        'Drillcompany',
        'Rig',
        'Tipe Of Circulation',
        "Event Category",
        "Event",
        "Duration"
    ]
].sort_values(
    by=['Year-Month', 'Rig', 'Category', 'Duration'],
    ascending=[True, True, True, False],
)


if df_meters is not None:
    df_meters.dropna(subset=['Drillcompany', 'Drillrig'], inplace=True)
    
    df_meters.columns = [
    'Hole ID',
    'Drilling Company',
    'Rig',
    'Purpose',
    'Status',
    'End_Date',
    'Depth_m',
    'Year-Month'
]

    df_meters = df_meters.drop(columns=['End_Date'])
    
    
    
unique_dates_series = pd.Series(
    sorted(df_productivity['Year-Month'].dropna().unique()),
    name='Year-Month'
)

unique_companies_series = pd.Series(
    sorted(df_productivity['Drillcompany'].dropna().unique()),
    name='Companies'
)

unique_rigs_series = pd.Series(
    sorted(df_productivity['Rig'].dropna().unique()),
    name='Rig'
)

unique_circ_series = pd.Series(
    sorted(df_productivity['Tipe Of Circulation'].dropna().unique()),
    name='Circ Type'
)

unique_product_type_series = pd.Series(
    sorted(df_productivity['Productivity Type'].dropna().unique()),
    name='Productivity Type'
)

unique_losses_series = pd.Series(
    sorted(df_losses['Loss Type'].dropna().unique()),
    name='Loss Type'
)



folder = r'k:/DOP/OED/METHOD&TOOLS/3 - PROJECTS/2 - ON GOING/2 - OE/2502 DIGITAL PROJECTS YEVGENIY/TRS/extr_csv_files/'

save_df_to_csv(df_meters, 'df_meters.csv', folder)
save_df_to_csv(df_events_duration, 'df_events_duration.csv', folder)
save_df_to_csv(df_productivity, 'df_productivity.csv', folder)
save_df_to_csv(df_losses, 'df_losses.csv', folder)
save_df_to_csv(unique_dates_series, 'df_dates.csv', folder)



  return ratio.fillna(0).clip(0, 1)
  return ratio.fillna(0).clip(0, 1)


Файл сохранён: k:/DOP/OED/METHOD&TOOLS/3 - PROJECTS/2 - ON GOING/2 - OE/2502 DIGITAL PROJECTS YEVGENIY/TRS/extr_csv_files/df_meters.csv
Файл сохранён: k:/DOP/OED/METHOD&TOOLS/3 - PROJECTS/2 - ON GOING/2 - OE/2502 DIGITAL PROJECTS YEVGENIY/TRS/extr_csv_files/df_events_duration.csv
Файл сохранён: k:/DOP/OED/METHOD&TOOLS/3 - PROJECTS/2 - ON GOING/2 - OE/2502 DIGITAL PROJECTS YEVGENIY/TRS/extr_csv_files/df_productivity.csv
Файл сохранён: k:/DOP/OED/METHOD&TOOLS/3 - PROJECTS/2 - ON GOING/2 - OE/2502 DIGITAL PROJECTS YEVGENIY/TRS/extr_csv_files/df_losses.csv
Файл сохранён: k:/DOP/OED/METHOD&TOOLS/3 - PROJECTS/2 - ON GOING/2 - OE/2502 DIGITAL PROJECTS YEVGENIY/TRS/extr_csv_files/df_dates.csv
