In [10]:
import typing as T

import pandas as pd
import numpy as np


def create_features_container_from_dataframe(
    df: pd.DataFrame,
    class_name: str = "Features",
) -> str:
    out = f"""import typing as T
import numpy as np
from pydantic import BaseModel
from pydantic.types import conint, confloat, constr

class FeatureIsNoneError(Exception):
    pass
    
class {class_name}(BaseModel):
"""

    # loop through the dataframe and create constrained fields
    for col, dtype in df.dtypes.items():
        confunc = "confloat"
        if np.issubdtype(dtype, (np.integer, int)):
            confunc = "conint"
        out += f"    {col}: T.Optional[{confunc}(strict=True, ge={df[col].min()}, le={df[col].max()})] = None\n"
            
    out += """
    class Config:
        validate_assignment = True
        
    def set_categorical_features(
        self, 
        prefix: str, 
        positive_category: T.Union[str, int], 
        sep: str = "_"
    ):
        found = False
        for field in self.__annotations__:
            if field == f"{prefix}{sep}{positive_category}":
                setattr(self, field, 1)
                found = True
                continue
            if prefix in field:
                setattr(self, field, 0)
        if not found:
            raise ValueError(
                f"Could not find the positive category {prefix}{sep}{positive_category}"
            )
    
    def set_bulk_features(self, mapping: T.Dict):
        for field, value in mapping.items():
            setattr(self, field, value)
    
    @property
    def numpy_array(self) -> np.ndarray:
        vals = []
        for field in self.__annotations__:
            attr = getattr(self, field)
            if attr is None:
                raise FeatureIsNoneError(f"{field} value cannot be None")
            vals.append(attr)
        return np.array([vals])
"""
    return out

## load dataframe

In [11]:
df = pd.read_csv('./AirPassengers.csv')

In [12]:
df.head()

Unnamed: 0,Month,#Passengers
0,1949-01,112
1,1949-02,118
2,1949-03,132
3,1949-04,129
4,1949-05,121


In [13]:
print(create_features_container_from_dataframe(df))

import typing as T
import numpy as np
from pydantic import BaseModel
from pydantic.types import conint, confloat

class FeatureIsNoneError(Exception):
    pass
    
class Features(BaseModel):
    Month: T.Optional[confloat(strict=True, ge=1949-01, le=1960-12)] = None
    #Passengers: T.Optional[conint(strict=True, ge=104, le=622)] = None

    class Config:
        validate_assignment = True
        
    def set_categorical_features(
        self, 
        prefix: str, 
        positive_category: T.Union[str, int], 
        sep: str = "_"
    ):
        found = False
        for field in self.__annotations__:
            if field == f"{prefix}{sep}{positive_category}":
                setattr(self, field, 1)
                found = True
                continue
            if prefix in field:
                setattr(self, field, 0)
        if not found:
            raise ValueError(
                f"Could not find the positive category {prefix}{sep}{positive_category}"
            )
  