In [27]:
import os
import sys
import zipfile
from src.audio.constants import *
from src.audio.logger import logging
from src.audio.exception import CustomException
from src.audio.entity.config_entity import DataIngestionConfig
from src.audio.entity.artifact_entity import DataIngestionArtifacts
from src.audio.cloud_storage.s3_operations import S3Sync
   

In [None]:
class DataIngestion:
    def __init__(self, data_ingestion_config: DataIngestionConfig):
        try:
            self.data_ingestion_config = data_ingestion_config
            self.s3_sync = S3Sync()
            self.data_ingestion_artifact = self.data_ingestion_config.data_ingestion_artifact_dir
        except Exception as e:
            raise CustomException(e, sys)
    
    def get_data_from_cloud(self):
        try:
            logging.info("Initiating data download from s3 bucket")
            print(("Initiating data download from s3 bucket"))
            download_dir = self.data_ingestion_config.download_dir
            zip_file_path = self.data_ingestion_config.zip_data_path

            if os.path.exists(zip_file_path):
                logging.info(f"Data already exists at {zip_file_path}. Skipping download")
                print(f"Data already exists at {zip_file_path}. Skipping download")
            else:
                os.makedirs(download_dir, exist_ok=True)
                logging.info(f"Data downloading from {S3_BUCKET_DATA_URI} to {download_dir}")
                print(f"Data downloading from {S3_BUCKET_DATA_URI} to {download_dir}")
                self.s3_sync.sync_folder_from_s3(folder=download_dir, aws_bucket_uri = S3_BUCKET_DATA_URI)
                logging.info("Data downloaded from s3")
        except Exception as e:
            logging.error("Download Failed")
            raise CustomException(e, sys)
    
    def unzip_data(self):
        try:
            logging.info('Unzipping downloaded file')
            print('Unzipping downloaded file')
            raw_zip_path = self.data_ingestion_config.zip_data_path
            unzip_dir = self.data_ingestion_config.unzip_data_dir
            data_dir = os.path.join(unzip_dir, "data")
            if os.path.isdir(data_dir):
                logging.info(f"unzipped folder already exists at {data_dir}")
                print(f"unzipped folder already exists at {data_dir}")
            else:
                os.makedirs(data_dir, exist_ok=True)
                with zipfile.ZipFile(raw_zip_path,"r") as f:
                    f.extractall(data_dir)
                logging.info("Unzipping complete. Extracted to {data_dir}")
        except Exception as e:
            logging.error("Failed to unzip")
            raise CustomException(e,sys)

    def initiate_data_ingestion(self):
        try:
            logging.info("Initiating Data Ingestion")
            os.makedirs(self.data_ingestion_artifact, exist_ok=True)
            self.get_data_from_cloud()
            self.unzip_data()
            data_ingestion_atifact=DataIngestionArtifacts(data_folder_path=self.data_ingestion_config.unzip_data_dir)
            logging.info("Data ingestion complete")
            return data_ingestion_artifact
        except Exception as e:
            logging.error("Ingestion failed")
            raise CustomException(e, sys)



In [35]:
ingest = DataIngestion(DataIngestionConfig)

In [36]:
ingest.unzip_data()

Unzipping downloaded file
unzipped folder already exists at C:\Users\Vijay\Audio-Classification\artifacts\data_ingestion\unzip\data


In [19]:
import os
os.environ["AWS_ACCESS_KEY_ID"] = "AKIAZI2LHRWXRNQ674XZ"
os.environ["AWS_SECRET_ACCESS_KEY"] = "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"