In [1]:
import os,sys
## go to the main project directory
os.chdir("..")

In [2]:
from dataclasses import dataclass, field
from pathlib import Path
from box import ConfigBox
import zipfile
from src.textsummarization.logger import Logger
from src.textsummarization.exception import TSException



logger = Logger()

@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir: Path
    source_dir: Path
    file_name: str

In [3]:
from src.textsummarization.constants import *
from src.textsummarization.utils import read_yaml, create_directories, get_size

In [4]:
class ConfigurationManager:

    def __init__(self, config_file_path: Path = CONFIG_FILE_PATH, params_file_path: Path = PARAMS_FILE_PATH):
        self.config = read_yaml(config_file_path, return_configbox=True)
        self.params = read_yaml(params_file_path, return_configbox=True)
        create_directories([self.config.artifacts_root])
   
    def get_data_ingestion_config(self)->DataIngestionConfig:
        config = self.config.data_ingestion
        create_directories([config.root_dir, config.source_dir])
        logger.info(f"<<<< {config.source_dir} intialized to work with datasets... >>>>")
        return DataIngestionConfig(
            root_dir = config.root_dir,
            source_dir = config.source_dir,
            file_name = config.file_name
        )
    
    

In [5]:
class DataIngestion:

    def __init__(self, config:DataIngestionConfig):
        self.config = config

    def extract_zip_file(self, file_name:str):
        try:
            unzip_path = os.path.join(self.config.source_dir, file_name.split('.zip')[0])
            with zipfile.ZipFile(unzip_path, 'r') as handle:
                os.makedirs(unzip_path, exist_ok=True)
                handle.extractall(unzip_path)
                logger.info(f"{file_name} has been unzipped in path {unzip_path}")
        except Exception as e:
            logger.info(TSException(e,sys))
            raise TSException(e,sys)

In [11]:
## Pipeline
try:
    config = ConfigurationManager()
    data_ingestion_config = config.get_data_ingestion_config()
    if data_ingestion_config.file_name.endswith('.zip'):
        DataIngestion(data_ingestion_config).extract_zip_file(data_ingestion_config.file_name)
    elif data_ingestion_config.file_name in ['',' ',None]:
        raise FileNotFoundError('Please ensure dataset file_name is provided in config.yml and dataset itself is available in data/datasets directory')
except Exception as e:
    logger.error(TSException(e,sys))
    raise TSException(e,sys)



[ 2024-12-27 16:43:45,374 ] 18 TextSummarizer - INFO - config.yml has been loaded successfully.
[ 2024-12-27 16:43:45,378 ] 18 TextSummarizer - INFO - params.yml has been loaded successfully.
[ 2024-12-27 16:43:45,381 ] 11 TextSummarizer - INFO - <<<< data/datasets intialized to work with datasets... >>>>
[ 2024-12-27 16:43:45,384 ] 10 TextSummarizer - ERROR - Error occured in python script name C:\Users\ict-tyson\AppData\Local\Temp\ipykernel_2632\593951052.py line number 8 error message Please ensure dataset file_name is provided in config.yml and dataset itself is available in data/datasets directory


TSException: Error occured in python script name C:\Users\ict-tyson\AppData\Local\Temp\ipykernel_2632\593951052.py line number 8 error message Please ensure dataset file_name is provided in config.yml and dataset itself is available in data/datasets directory