In [7]:

 
# Example of valid IPv4 address
print(ip_address(u'127.0.0.1'))

127.0.0.1


In [117]:
import os
import pandas as pd
import pymysql
from sqlalchemy import create_engine
import yaml
from dataclasses import dataclass
from pathlib import Path
import ipaddress
from videogamesforecasting.constants import *
from videogamesforecasting.utils.common import read_yaml, create_directories

@dataclass(frozen=True)
class DataIngestionConfig:
    s_type: str
    host: ipaddress.IPv4Address
    port: int
    database: str
    username: str
    password: str
    query: str
    d_type: str
    path: Path
    include_column_names: str





In [118]:
print(PARAMS_FILE_PATH)

params.yaml


In [119]:
class ConfigurationManager:
    def __init__(
        self,
        # from constants
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        (create_directories([self.config.artifacts_root]))

    def get_data_ingestion_config(self) -> DataIngestionConfig:
        config = self.config.data_ingestion

        create_directories([os.path.split(config.destination.path)[0]])

        data_ingestion_config = DataIngestionConfig(
            s_type= config.source.type,
            host= config.source.host,
            port= config.source.port,
            database= config.source.database,
            username= config.source.username,
            password= config.source.password,
            query= config.source.query,
            d_type=config.destination.type,
            path= config.destination.path,
            include_column_names= config.destination.include_column_names
        )

        return data_ingestion_config

In [120]:
ConfigurationManager().get_data_ingestion_config()

[2024-02-03 21:49:18,380 - INFO - common - yaml file: config\config.yaml loaded successfully]
[2024-02-03 21:49:18,383 - INFO - common - yaml file: params.yaml loaded successfully]
[2024-02-03 21:49:18,385 - INFO - common - yaml file: schema.yaml loaded successfully]
[2024-02-03 21:49:18,387 - INFO - common - created directory at: artifacts]
[2024-02-03 21:49:18,389 - INFO - common - created directory at: artifacts/data]


DataIngestionConfig(s_type='mysql', host='127.0.0.1', port=3306, database='sales', username='gamesales', password='ubaid123', query='SELECT * FROM videogames;', d_type='csv', path='artifacts/data/video_games_sales.csv', include_column_names=True)

In [121]:
from videogamesforecasting.logging import logger
class DataIngestion:
    def __init__(self, config_path="config/config.yaml"):
        with open(config_path, "r") as config_file:
            self.config = yaml.safe_load(config_file)["data_ingestion"]

        self.source_config = self.config["source"]
        self.destination_config = self.config["destination"]

    def connect_to_mysql(self):
        engine = create_engine(
            f"mysql+pymysql://{self.source_config['username']}:{self.source_config['password']}@{self.source_config['host']}:{self.source_config['port']}/{self.source_config['database']}"
        )
        return engine.connect()

    def fetch_data_from_mysql(self):
        connection = self.connect_to_mysql()
        query = self.source_config["query"]
        data = pd.read_sql(query, connection)
        connection.close()
        return data

    def save_to_csv(self, data):
        path = self.destination_config["path"]
        include_column_names = self.destination_config.get("include_column_names", False)

        data.to_csv(path, index=False, header=include_column_names)

    def run_ingestion(self):
        data = self.fetch_data_from_mysql()
        self.save_to_csv(data)
        path = self.destination_config["path"]
        file=os.path.split(path)[1]

        logger.info(f"{file} is saved")
        



In [122]:
config_manager=ConfigurationManager()
config_manager.get_data_ingestion_config()
data_ingestion = DataIngestion()
data_ingestion.run_ingestion()

[2024-02-03 21:49:21,916 - INFO - common - yaml file: config\config.yaml loaded successfully]
[2024-02-03 21:49:21,919 - INFO - common - yaml file: params.yaml loaded successfully]
[2024-02-03 21:49:21,921 - INFO - common - yaml file: schema.yaml loaded successfully]
[2024-02-03 21:49:21,922 - INFO - common - created directory at: artifacts]
[2024-02-03 21:49:21,924 - INFO - common - created directory at: artifacts/data]
[2024-02-03 21:49:22,877 - INFO - 2186890764 - video_games_sales.csv is saved]


In [102]:

class DataIngestion:
    def __init__(self, config: DataIngestionConfig):
        self.config = config

    def load_config(self,config_file):
        with open(config_file, 'r') as stream:
            return yaml.safe_load(stream)

    def fetch_data_from_mysql(config):
        # Create a MySQL connection
        config=config['data_ingestion']
        engine = create_engine(f"mysql+pymysql://{config['source']['username']}:{config['source']['password']}@{config['source']['host']}:{config['source']['port']}/{config['source']['database']}")

        # Execute the SQL query
        query = config['source']['query']
        df = pd.read_sql_query(query, engine)

        return df

    def save_to_csv(df, config):
        # Save DataFrame to CSV with column headings
        config=config['data_ingestion']
        df.to_csv(config['destination']['path'], index=False,mode='w')

    def main():
        config = load_config('config/config.yaml')

        # Fetch data from MySQL
        data_frame = fetch_data_from_mysql(config)

        # Save data to CSV
        save_to_csv(data_frame, config)

    if __name__ == "__main__":
        main()


In [12]:
import os
import pymysql
import zipfile
from videogamesforecasting.logging import logger

from videogamesforecasting.utils.common import get_size



In [77]:
class DataIngestion:
    def __init__(self, config: DataIngestionConfig):
        self.config = config


    
    def download_file(self):
        if not os.path.exists(self.config.local_data_file):
            filename, headers = request.urlretrieve(
                url = self.config.source_URL,
                filename = self.config.local_data_file
            )
            logger.info(f"{filename} download! with following info: \n{headers}")
        else:
            logger.info(f"File already exists of size: {get_size(Path(self.config.local_data_file))}")



    
    def extract_zip_file(self):
        """
        zip_file_path: str
        Extracts the zip file into the data directory
        Function returns None
        """
        unzip_path = self.config.unzip_dir
        os.makedirs(unzip_path, exist_ok=True)
        with zipfile.ZipFile(self.config.local_data_file, 'r') as zip_ref:
            zip_ref.extractall(unzip_path)

In [19]:
import os
os.chdir('../')

In [78]:
ConfigurationManager().get_data_ingestion_config()

[2024-02-03 19:17:08,426 - INFO - common - yaml file: config\config.yaml loaded successfully]
[2024-02-03 19:17:08,428 - INFO - common - yaml file: params.yaml loaded successfully]
[2024-02-03 19:17:08,430 - INFO - common - yaml file: schema.yaml loaded successfully]
[2024-02-03 19:17:08,432 - INFO - common - created directory at: artifacts]
[2024-02-03 19:17:08,433 - INFO - common - created directory at: artifacts/data]


DataIngestionConfig(s_type='mysql', host='127.0.0.1', port=3306, database='sales', username='gamesales', password='ubaid123', query='SELECT * FROM videogames;', d_type='csv', path='artifacts/data/video_games_sales.csv')

In [50]:
import os
os.path.split('artifacts/data/video_games_sales.csv')[0]

'artifacts/data'

In [45]:
dir

'artifacts/data'

In [76]:
config_filepath = CONFIG_FILE_PATH
config = read_yaml(config_filepath).data_ingestion

DataIngestionConfig(
            s_type= config.source.type,
            host= config.source.host,
            port= config.source.port,
            database= config.source.database,
            username= config.source.username,
            password= config.source.password,
            query= config.source.query,
            d_type=config.destination.type,
            path= config.destination.path
        )

[2024-02-03 19:16:47,952 - INFO - common - yaml file: config\config.yaml loaded successfully]


DataIngestionConfig(s_type='mysql', host='127.0.0.1', port=3306, database='sales', username='gamesales', password='ubaid123', query='SELECT * FROM videogames;', d_type='csv', path='artifacts/data/video_games_sales.csv')

In [69]:
config

'mysql'

In [81]:
import yaml
import pymysql
import pandas as pd

def load_config(file_path=config_filepath):
    with open(file_path, 'r') as config_file:
        config_data = yaml.safe_load(config_file)
    return config_data

def connect_to_mysql(config):
    connection = pymysql.connect(
        host=config['data_ingestion']['source']['host'],
        port=config['data_ingestion']['source']['port'],
        user=config['data_ingestion']['source']['username'],
        password=config['data_ingestion']['source']['password'],
        database=config['data_ingestion']['source']['database']
    )
    return connection

def execute_query(connection, query):
    with connection.cursor() as cursor:
        cursor.execute(query)
        result = cursor.fetchall()
    return result

def save_to_csv(data, file_path):
    df = pd.DataFrame(data)
    df.to_csv(file_path, index=False)

def main():
    config = load_config('config/config.yaml')

    mysql_connection = connect_to_mysql(config)
    query = config['data_ingestion']['source']['query']
    result = execute_query(mysql_connection, query)

    destination_path = config['data_ingestion']['destination']['path']
    save_to_csv(result, destination_path)

    mysql_connection.close()

if __name__ == "__main__":
    main()
    


In [87]:
os.makedirs('mango/1.txt')

In [103]:
def new(x,y):
    return x+y
    return x*y

In [104]:
new(3,6)

9