# Compact Orthogonal Distance Regression

### Notebook settings

In [1]:
# Automatically reload modules
%load_ext autoreload
%autoreload 2

In [2]:
# Configure plots
%config InlineBackend.figure_formats = 'svg'
%matplotlib inline

### Load dataframe

In [None]:
import pandas as pd

full_df = pd.read_pickle(
    "df_with_fracture_toughness_final_incl_bendingstiffness_final3.pkl"
)

In [9]:
full_df.head()

Unnamed: 0.4,Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,exp_id,date,datetime_x,fieldsite,slope_incl,h_wl_notch,...,GIc_manual_default_min_error,GIIc_manual_default_min_error,Gc_manual_default_min_error,GIIG_manual_default_min_error,slope_incl_min_error,surface_lineload_min_error,GIc_video_seriesopt_min_error,GIIc_video_seriesopt_min_error,Gc_video_seriesopt_min_error,GIIG_video_seriesopt_min_error
0,0,0,0,0,2023_02_15_1,2023-02-15,2023-02-15,2,0.0+/-2.0,13.5,...,0.51+/-0.05,0.0089+/-0.0028,0.51+/-0.06,0.017+/-0.005,0.0+/-2.0,0.000454+/-0.000005,0.283+/-0.013,0.0140+/-0.0018,0.297+/-0.012,0.047+/-0.007
1,1,1,1,1,2023_02_15_2,2023-02-15,2023-02-15,2,-46.0+/-2.0,12.5,...,0.35+/-0.08,0.350+/-0.032,0.70+/-0.09,0.50+/-0.06,-46.0+/-2.0,0.000889+/-0.000009,0.27+/-0.05,0.253+/-0.006,0.52+/-0.05,0.48+/-0.05
2,2,2,2,2,2023_02_15_3,2023-02-15,2023-02-15,2,-56.0+/-2.0,13.0,...,0.19+/-0.07,0.51+/-0.05,0.70+/-0.09,0.73+/-0.07,-56.0+/-2.0,0.000889+/-0.000009,0.32+/-0.09,0.471+/-0.018,0.80+/-0.10,0.59+/-0.06
3,3,3,3,3,2023_02_15_4,2023-02-15,2023-02-15,2,-36.0+/-2.0,13.0,...,0.36+/-0.06,0.213+/-0.021,0.57+/-0.07,0.37+/-0.05,-36.0+/-2.0,0.000889+/-0.000009,0.37+/-0.05,0.205+/-0.005,0.57+/-0.05,0.357+/-0.034
4,4,4,4,4,2023_02_15_5,2023-02-15,2023-02-15,2,-25.0+/-2.0,12.5,...,0.34+/-0.05,0.097+/-0.012,0.44+/-0.05,0.224+/-0.034,-25.0+/-2.0,0.000889+/-0.000009,0.175+/-0.022,0.062+/-0.004,0.237+/-0.019,0.260+/-0.035


### Extract data relevant for fits

In [None]:
import pandas as pd

# Label series by date ranges
date = pd.to_datetime(full_df["datetime_x"], errors="coerce")
date_ranges = {
    "1": (None, "2023-02-16"),
    "2": ("2023-02-27", "2023-03-03"),
    "3": ("2023-03-06", "2023-03-08"),
}


def mask_range(s, start, end):
    m = s.notna()
    if start is not None:
        m &= s >= pd.Timestamp(start)
    if end is not None:
        m &= s <= pd.Timestamp(end)
    return m


series = pd.Series(pd.NA, index=full_df.index, dtype="string")
for name, (start, end) in date_ranges.items():
    series = series.mask(mask_range(date, start, end), name)

# Build wide frame with GIc/GIIc columns, indexed by (source, series)
data_cols = {
    "manual": ("GIc_manual_default_min_error", "GIIc_manual_default_min_error"),
    "video": ("GIc_video_seriesopt_min_error", "GIIc_video_seriesopt_min_error"),
}

frames = []
for source, (gic_col, giic_col) in data_cols.items():
    tmp = full_df[[gic_col, giic_col]].rename(
        columns={gic_col: "GIc", giic_col: "GIIc"}
    )
    tmp["source"] = source
    tmp["series"] = series
    frames.append(tmp)

df = (
    pd.concat(frames, ignore_index=True)
    .dropna(subset=["series"])
    .set_index(["source", "series"])
    .sort_index()
)

In [12]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,GIc,GIIc
source,series,Unnamed: 2_level_1,Unnamed: 3_level_1
manual,1,0.51+/-0.05,0.0089+/-0.0028
manual,1,0.35+/-0.08,0.350+/-0.032
manual,1,0.19+/-0.07,0.51+/-0.05
manual,1,0.36+/-0.06,0.213+/-0.021
manual,1,0.34+/-0.05,0.097+/-0.012
...,...,...,...
video,3,0.237+/-0.034,0.126+/-0.004
video,3,0.33+/-0.06,0.272+/-0.013
video,3,0.461+/-0.030,0.0533+/-0.0028
video,3,0.347+/-0.025,0.0348+/-0.0023
