In [59]:
import pandas as pd
import typing
from datetime import datetime, date
import requests
import json

from typing import List

from pydantic import BaseModel, validator
from pydantic_extra_types.country import CountryAlpha2
from pydantic.functional_validators import AfterValidator
from pydantic.dataclasses import dataclass

### Defining the goals

1. Define a pydantic date class to ensure that date format is 'YYYY-MM-DD' ie '%Y-%m-%d'
2. complete *get_holiday_calendar(country, year)* function which requests holiday. Define the function parameters (year and country) with pydantic types
3. Define *HolidayCalendar* which inherits from pydantic *BaseModel*
4. Use functional programming paradigm for the following:
   - *concatenate_dataframes(df1, df2)* which concatenates 2 dataframes of the same pydantic type
   - *filter_country()* and *filter_year()*
   - *get_new_holiday()* [ to define ]

More:
- Verify that date is not before 1980

### Define pydantic dates

In [58]:
def validate_yyyymmdd_dateformat(v: str):
    assert datetime.strptime(v, '%Y-%m-%d'), f"{v} format is not YYYY-MM-DD"
    return v

def validate_dateformat(v: str, date_format: str = '%Y-%m-%d'):
    assert datetime.strptime(v, date_format), f"{v} format is not {date_format}"
    return v

yyyymmdd_date_format = typing.Annotated[str, AfterValidator(validate_yyyymmdd_dateformat)]
calendar_date_format = typing.Annotated[str, AfterValidator(lambda v: validate_dateformat(v, date_format='%Y'))]

class YearDateModel(BaseModel):
    date: calendar_date_format

In [56]:
print(YearDateModel(date='2024')) 

date='2024'


### Requesting data

In [51]:
def get_holiday_calendar(country: CountryAlpha2, year: YearDateModel):
    HOLIDAY_URL = "https://date.nager.at/api/v2/publicholidays"
    url = f"{HOLIDAY_URL}/{year}/{country}"
    resp = requests.get(url)

    if resp.status_code != 200:
        raise requests.exceptions.RequestException(f"An error has occured when requesting {url}")

    return pd.json_normalize(resp.json())    

In [52]:
t = get_holiday_calendar(country='US', year=2024)

In [54]:
t.head()

Unnamed: 0,date,localName,name,countryCode,fixed,global,counties,launchYear,type
0,2024-01-01,New Year's Day,New Year's Day,US,False,True,,,0
1,2024-01-15,"Martin Luther King, Jr. Day","Martin Luther King, Jr. Day",US,False,True,,,0
2,2024-02-19,Washington's Birthday,Presidents Day,US,False,True,,,0
3,2024-03-29,Good Friday,Good Friday,US,False,False,"[US-CT, US-DE, US-HI, US-IN, US-KY, US-LA, US-...",,0
4,2024-03-29,Good Friday,Good Friday,US,False,False,[US-TX],,0


### Define dataclass for Holiday

In [79]:
def validate_county_format(v: str):
    assert len(v) == 2 and v.isalpha(), f"County needs to be a 2-letter symbols"
    return v

county_format = typing.Annotated[str, AfterValidator(validate_county_format)]

class CountyFormatModel(BaseModel):
    county: county_format

In [80]:
print(CountyFormatModel(county='CT'))

county='CT'


In [96]:
def validate_country_county_format(v: str):
    tsplit = v.split('-')

    ERROR_MSG = f"{v} needs to be formatted <COUNTRY>-<COUNTY> ex: 'US-CT'"
    if len(tsplit) != 2:
        raise ValueError(ERROR_MSG)

    country, county = tsplit
    is_valid_country = isinstance(CountryAlpha2(country), CountryAlpha2)
    is_valid_county = isinstance(CountyFormatModel(county=county), CountyFormatModel)
    assert is_valid_country and is_valid_county, ERROR_MSG
    return v

country_county_format = typing.Annotated[str, AfterValidator(validate_country_county_format)]

class CountryCountyFormat(BaseModel):
    country_county: country_county_format

In [97]:
print(CountryCountyFormat(country_county='US-CT'))

country_county='US-CT'


In [100]:
class LaunchYearFormat(BaseModel):
    launch_year: yyyymmdd_date_format | None

In [103]:
print(LaunchYearFormat(launch_year=None))
print(LaunchYearFormat(launch_year='2024-10-12'))

launch_year=None
launch_year='2024-10-12'


In [None]:
class Holiday:
    date: yyyymmdd_date_format
    localName: str
    name: str
    countryCode: CountryAlpha2
    fixed: bool
    global: bool
    counties: List[CountryCountyFormat]
    

### Concatenate Dataframes

In [None]:
countries = ['US', 'CA', 'UK', 'AU'] 

In [82]:
country, county = 'US-CT'.split('-')

In [84]:
isinstance('US', CountryAlpha2)

False

In [86]:
isinstance('CT', CountyFormatModel)

False