In [None]:
# missing_data_mixin.py

import pandas as pd
from typing import Optional, Dict


class MissingDataMixin:

    def count_missing_values(self, df: pd.DataFrame) -> pd.Series:
        """
        Подсчёт количества пропущенных значений по каждому столбцу
        """
        return df.isna().sum()

    def missing_report(self, df: pd.DataFrame) -> pd.DataFrame:
        """
        Формирование отчёта по пропущенным значениям
        """
        total_missing = df.isna().sum()
        percent_missing = (df.isna().sum() / len(df)) * 100

        report = pd.DataFrame({
            "missing_count": total_missing,
            "missing_percent": percent_missing
        })

        report = report[report["missing_count"] > 0]
        return report.sort_values(by="missing_percent", ascending=False)

    def fill_missing(
        self,
        df: pd.DataFrame,
        strategy: str = "mean",
        columns: Optional[list] = None,
        constant_value: Optional[Dict[str, any]] = None
    ) -> pd.DataFrame:
        """
        Заполнение пропущенных значений
        strategy: mean | median | mode | constant
        """

        df = df.copy()

        if columns is None:
            columns = df.columns

        for col in columns:
            if df[col].isna().sum() == 0:
                continue

            if strategy == "mean":
                if pd.api.types.is_numeric_dtype(df[col]):
                    df[col].fillna(df[col].mean(), inplace=True)

            elif strategy == "median":
                if pd.api.types.is_numeric_dtype(df[col]):
                    df[col].fillna(df[col].median(), inplace=True)

            elif strategy == "mode":
                df[col].fillna(df[col].mode()[0], inplace=True)

            elif strategy == "constant":
                if constant_value and col in constant_value:
                    df[col].fillna(constant_value[col], inplace=True)
                else:
                    df[col].fillna(0, inplace=True)

            else:
                raise ValueError("Неизвестная стратегия заполнения")

        return df

In [None]:
# data_loader.py

import pandas as pd
import requests
from missing_data_mixin import MissingDataMixin


class DataLoader(MissingDataMixin):

    def load_csv(self, file_path: str, delimiter: str = ",") -> pd.DataFrame:
        return pd.read_csv(file_path, delimiter=delimiter)

    def load_json(self, file_path: str) -> pd.DataFrame:
        return pd.read_json(file_path)

    def load_from_api(self, url: str, headers=None, params=None) -> pd.DataFrame:
        response = requests.get(url, headers=headers, params=params)
        response.raise_for_status()
        return pd.DataFrame(response.json())

In [None]:
# data_processing.py

import pandas as pd
from missing_data_mixin import MissingDataMixin


class DataProcessor(MissingDataMixin):

    def remove_duplicates(self, df: pd.DataFrame, subset=None) -> pd.DataFrame:
        return df.drop_duplicates(subset=subset)

    def normalize_column(self, df: pd.DataFrame, column: str) -> pd.DataFrame:
        df = df.copy()
        df[column] = (df[column] - df[column].min()) / (df[column].max() - df[column].min())
        return df

In [None]:
# data_validation.py

import pandas as pd
from missing_data_mixin import MissingDataMixin


class DataValidator(MissingDataMixin):

    def validate_no_missing(self, df: pd.DataFrame) -> bool:
        return df.isna().sum().sum() == 0

    def validate_numeric_columns(self, df: pd.DataFrame, columns: list) -> bool:
        for col in columns:
            if not pd.api.types.is_numeric_dtype(df[col]):
                return False
        return True

In [None]:
# main.py

from data_loader import DataLoader
from data_processing import DataProcessor

loader = DataLoader()
processor = DataProcessor()

df = loader.load_csv("data.csv")

# 1️⃣ Подсчёт пропусков
print("Количество пропусков:")
print(loader.count_missing_values(df))

# 2️⃣ Отчёт
print("\nОтчёт по пропущенным значениям:")
print(loader.missing_report(df))

# 3️⃣ Заполнение средним
df_filled = processor.fill_missing(df, strategy="mean")

# 4️⃣ Заполнение медианой для конкретных столбцов
df_filled = processor.fill_missing(df, strategy="median", columns=["price", "quantity"])

# 5️⃣ Заполнение константой
df_filled = processor.fill_missing(
    df,
    strategy="constant",
    constant_value={"category": "Unknown"}
)

: 