In [None]:
!pip install boto3

Collecting boto3
  Downloading boto3-1.28.1-py3-none-any.whl (135 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m135.7/135.7 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting botocore<1.32.0,>=1.31.1 (from boto3)
  Downloading botocore-1.31.1-py3-none-any.whl (11.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.0/11.0 MB[0m [31m33.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting jmespath<2.0.0,>=0.7.1 (from boto3)
  Downloading jmespath-1.0.1-py3-none-any.whl (20 kB)
Collecting s3transfer<0.7.0,>=0.6.0 (from boto3)
  Downloading s3transfer-0.6.1-py3-none-any.whl (79 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.8/79.8 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: jmespath, botocore, s3transfer, boto3
Successfully installed boto3-1.28.1 botocore-1.31.1 jmespath-1.0.1 s3transfer-0.6.1


In [None]:
import os
import requests
import pandas as pd
import datetime
import time
import boto3
from botocore.exceptions import NoCredentialsError
import json
import logging
import yfinance as yf
from os.path import join, dirname
from dotenv import load_dotenv

# Load environment variables
dotenv_path = join(dirname(__file__), ".env")
load_dotenv(dotenv_path)

# Configure logging
logging.basicConfig(filename='data_ingestion.log', level=logging.INFO,
                    format='%(asctime)s:%(levelname)s:%(message)s')

# Define the Alpha Vantage API and API key
api_key = os.getenv('API_KEY')
base_url = 'https://www.alphavantage.co/query?'

# Define Bank Ticker Symbols that you want to pull
symbols = os.getenv('SYMBOLS').split(',')

# Define the API functions to retrieve data
functions = os.getenv('FUNCTIONS').split(',')

# Define FDIC institutions list dataset URL
fdic_url = os.getenv('FDIC_URL')

# Initialize the S3 client
s3 = boto3.client('s3')

# Initialize requests Session
session = requests.Session()

def upload_to_aws(data, bucket, s3_file):
    try:
        s3.put_object(Body=json.dumps(data), Bucket=bucket, Key=s3_file)
        logging.info(f"Upload Successful for {s3_file}")
        return True
    except NoCredentialsError:
        logging.error("No AWS Credentials provided")
        return False

def get_data(base_url, api_key, function, symbol):
    params = {
        'function': function,
        'symbol': symbol,
        'apikey': api_key,
    }

    response = session.get(base_url, params=params)

    if response.status_code == 200:
        data = response.json()
        print(f"Data for {symbol} using {function}:")
        print(json.dumps(data, indent=4))  # Print the JSON response
        return data
    else:
        logging.error(f"Failed to fetch data for {symbol} using {function}")
        return None

def fetch_historical_stock_data_yf(symbols):
    stocks_data = {}
    for symbol in symbols:
        stock = yf.Ticker(symbol)
        hist_data = stock.history(start="2017-01-01", end="2022-03-31")

        if hist_data.empty:
            logging.error(f"No historical data available for {symbol}")
            continue

        stocks_data[symbol] = hist_data.to_dict(orient='records')

    return stocks_data

def fetch_fdic_dataset(fdic_url):
    try:
        fdic_data = pd.read_csv(fdic_url)
        return fdic_data.to_dict(orient='records')
    except Exception as e:
        logging.error(f"Failed to fetch FDIC dataset: {e}")
        return None

def main():
    bucket_name = 'your_bucket_name' # replace with your bucket name

    for i in range(0, len(symbols), 5):  # Iterate over symbols in batches of 5
        batch_symbols = symbols[i:i+5]

        for function in functions:
            for symbol in batch_symbols:
                data = get_data(base_url, api_key, function, symbol)
                if data is not None:
                    timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
                    s3_file_name = f"{symbol}_{function}_{timestamp}.json"
                    upload_to_aws(data, bucket_name, s3_file_name)

        time.sleep(60)  # Wait for 60 seconds before processing the next batch

    # Fetch historical stock data
    stocks_data = fetch_historical_stock_data_yf(symbols)
    if stocks_data:
        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
        s3_file_name = f"Historical_Stock_Data_{timestamp}.json"
        upload_to_aws(stocks_data, bucket_name, s3_file_name)

    # Fetch FDIC data
    fdic_data = fetch_fdic_dataset(fdic_url)
    if fdic_data:
        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
        s3_file_name = f"FDIC_Dataset_{timestamp}.json"
        upload_to_aws(fdic_data, bucket_name, s3_file_name)

if __name__ == "__main__":
    main()

ModuleNotFoundError: ignored