In [1]:
import os

In [2]:
%pwd

'd:\\Projects\\DeepFake Voice Recognition\\audio-deepfake-detection\\research'

In [3]:
os.chdir('../')

In [4]:
%pwd

'd:\\Projects\\DeepFake Voice Recognition\\audio-deepfake-detection'

In [5]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir: Path
    source_URL: str
    local_data_file: Path
    unzip_dir: Path

In [6]:
from audioClassifier.constants import *
from audioClassifier.utils.common import open_yaml_file, create_directories

In [7]:
class ConfigManager:
    def __init__(self, config_file = CONFIG_PATH, params_file = PARAMS_PATH):
        self.config = open_yaml_file(config_file)
        self.params = open_yaml_file(params_file)
        
        create_directories([self.config.artifacts_root])
        create_directories([self.config.data_ingestion.root_dir])
        
    def read_data_ingestion_config(self) -> DataIngestionConfig:
        data_ingestion = self.config.data_ingestion
        
        data_ingestion_config = DataIngestionConfig(
            root_dir = data_ingestion.root_dir,
            source_URL = data_ingestion.source_URL,
            local_data_file = data_ingestion.local_data_file,
            unzip_dir = data_ingestion.unzip_dir
        )
        
        return data_ingestion_config

In [8]:
import os
import urllib.request as request
import zipfile
from audioClassifier import logger
from audioClassifier.utils.common import get_file_size

In [9]:
class DataIngestion:
    def __init__(self, config: DataIngestionConfig):
        self.config = config
        
    def download_file(self):
        try:
            if not os.path.exists(self.config.local_data_file):
                self._retrieve_file()
            else:
                logger.info(f"File already exists of size: {get_file_size(Path(self.config.local_data_file))}")
        except Exception as e:
            logger.error(f"Error occurred during file download: {e}")
              
    def extract_file(self):
        try:
            path = self.config.unzip_dir
            os.makedirs(path, exist_ok=True)
            self._extract_zip_file(path)
        except Exception as e:
            logger.error(f"Error occurred during file extraction: {e}")
            
    def _retrieve_file(self):
        filename, headers = request.urlretrieve(
            url=self.config.source_URL,
            filename=self.config.local_data_file
        )
        logger.info(f"Downloaded {filename} with the following info:\n{headers}")

    def _extract_zip_file(self, path: Path):
        with zipfile.ZipFile(self.config.local_data_file, 'r') as zip_file:
            zip_file.extractall(path)
            logger.info(f"Extracted file '{self.config.local_data_file}' to {path}")

In [10]:
try:
    config = ConfigManager()
    data_ingestion_config = config.read_data_ingestion_config()
    data_ingestion = DataIngestion(config = data_ingestion_config)
    data_ingestion.download_file()
    data_ingestion.extract_file()
except Exception as e:
    raise e

2024-04-26 15:45:55,662 - common.py - INFO - YAML file 'config\config.yaml' was loaded successfully.
2024-04-26 15:45:55,664 - common.py - INFO - YAML file 'params.yaml' was loaded successfully.
2024-04-26 15:45:55,664 - common.py - INFO - Directory 'artifacts' created successfully or already exists.
2024-04-26 15:45:55,666 - common.py - INFO - Directory 'artifacts/data_ingestion' created successfully or already exists.
2024-04-26 15:47:50,567 - 1264272460.py - INFO - Downloaded artifacts/data_ingestion/data.zip with the following info:
X-GUploader-UploadID: ABPtcPpc5gMF9Rw8SyRi0r2X42RrEewX--HaDbgSxES_ffZX77ibEXbF08mZ_56c1n58_MPx3gC0kzLhCQ
Expires: Fri, 26 Apr 2024 22:45:55 GMT
Date: Fri, 26 Apr 2024 22:45:55 GMT
Cache-Control: private, max-age=0
Last-Modified: Thu, 24 Aug 2023 13:24:58 GMT
ETag: "4963755b04520b3b4931da5c3cab6c49"
x-goog-generation: 1692883498228830
x-goog-metageneration: 1
x-goog-stored-content-encoding: identity
x-goog-stored-content-length: 3954601429
Content-Type: 