# core

> All core functions and classes in ecfintools

In [None]:
#| default_exp core

In [None]:
#| hide
from eccore.ipython import nb_setup, pandas_nrows_ncols
from nbdev.showdoc import show_doc

In [None]:
#| hide
nb_setup()

Set autoreload mode


In [None]:
#| export
import re
from datetime import datetime
from pathlib import Path
from typing import Optional

import pandas as pd

## Function transferred from `finutilities`

In [None]:
datetime.now().strftime('%Y-%m-%d %H:%M:%S')

'2024-12-15 22:14:09'

In [None]:
#| export
def safe_date(
    dt:str|datetime # date time to validate or convert
    ) -> datetime:  # datetime type object
    """Return a datetime object from a datetime or string

    Accepted formats:

    - 'YYYY-MM-DD', 'YYYY/MM/DD', 'YYYY.MM.DD'
    - 'HH:MM:SS' can be added to the above formats
    """
    if isinstance(dt, datetime): return dt

    elif isinstance(dt, str):
        pattern = re.compile(r'\d{4}(?P<sep>[-\/\.])\d{2}(?P=sep)\d{2}(?P<t>\s?\d{2}:\d{2}:\d{2})?')
        match = pattern.search(dt)
        if match:
            sep = match.group('sep')
            t = match.group('t')
        else:
            msg = f"dt must be in format 'YYYY<sep>MM<sep>DD hh:mm:ss' where <sep> is one of '-/.'"
            raise AttributeError(msg)

        str_format = f"%Y{sep}%m{sep}%d{' %H:%M:%S' if t else ''}"
        return datetime.strptime(dt, str_format)

This function converts date-time strings in the most current formats into a `datetime` object:
- '2024-12-15 21:02:17'
- '2024/12/15 21:02:17'
- '2024.12.15 21:02:17'
- '2024-12-15'
- '2024/12/15'
- '2024.12.15'

The function also can be used to validate datetimes in general as it also accepts a datetime as argument.

In [None]:
assert isinstance(safe_date('2024-12-15 21:02:17'), datetime)
assert isinstance(safe_date('2024/12/15 21:02:17'), datetime)
assert isinstance(safe_date('2024.12.15 21:02:17'), datetime)
assert isinstance(safe_date('2024-12-15'), datetime)
assert isinstance(safe_date('2024/12/15'), datetime)
assert isinstance(safe_date('2024.12.15'), datetime)
assert isinstance(safe_date(datetime.now()), datetime)

In [None]:
#| export
def df_safe_sampling(
    df:pd.DataFrame,                        # df that will be sampled
    first:Optional[str|datetime] = None,    # first date to be included
    last:Optional[str|datetime] = None      # last date to be included
    ) -> pd.DataFrame:                      # sampled df
    """Sample a DataFrame from first to last, or the closest dates available in df.index."""

    if not isinstance(df.index, pd.DatetimeIndex):
        raise AttributeError("df.index must be a DatetimeIndex")

    earliest, latest = df.index[0], df.index[-1]
    if first is None: first = earliest
    if last is None:  last = latest
    first, last = safe_date(first), safe_date(last)
    if first > last:
        msg = f"first ({first:%Y-%m-%d}) not before last ({last:%Y-%m-%d})"
        raise AttributeError(msg)

    sampled = df.loc[max(first, earliest):min(last, latest), :].copy()
    return sampled

With this function, even if we set dates that are not in the datatrane `DateTimeIndex`, we will still be able to get the closest sampled rows.

In the example below, the DataFrame misses a few days, e.g.:
- 2018-10-27 and 2018-10-28
- 2018-11-03 and 2018-11-04

If we pick one of these dates for `first` and `last` respectively, we still get a sampled dataframe with the closed existing dates.

In [None]:
p2csv = Path('data-dev/ohlcv-data.csv')
assert p2csv.is_file()
df = pd.read_csv(p2csv, header=None, parse_dates=[[0,1]], index_col=0)
df.columns = 'o h l c v'.split()
df.index.name = 'dt'
display(df.iloc[:15, :])

Unnamed: 0_level_0,o,h,l,c,v
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-10-22,2759.02,2779.27,2747.27,2754.48,26562
2018-10-23,2753.11,2755.36,2690.69,2743.45,38777
2018-10-24,2744.83,2748.58,2651.23,2672.8,41777
2018-10-25,2670.8,2722.9,2657.93,2680.71,39034
2018-10-26,2675.59,2692.34,2627.59,2663.57,61436
2018-10-29,2667.7,2707.0,2603.33,2639.17,44960
2018-10-30,2639.55,2689.5,2633.05,2688.5,52786
2018-10-31,2688.88,2736.76,2681.25,2704.75,32374
2018-11-01,2707.13,2741.58,2706.88,2731.9,29565
2018-11-02,2725.28,2766.28,2699.96,2723.76,41892


In [None]:
df_safe_sampling(df, '2018-10-27', '2018-11-04')

Unnamed: 0_level_0,o,h,l,c,v
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-10-29,2667.7,2707.0,2603.33,2639.17,44960
2018-10-30,2639.55,2689.5,2633.05,2688.5,52786
2018-10-31,2688.88,2736.76,2681.25,2704.75,32374
2018-11-01,2707.13,2741.58,2706.88,2731.9,29565
2018-11-02,2725.28,2766.28,2699.96,2723.76,41892


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()