In [0]:

dbutils.widgets.text("api_key", "")




In [0]:
from pyspark.sql import SparkSession
import requests
import json
import datetime

class Ingest:
    def __init__(self):
        dbutils.fs.mkdirs("dbfs:/mnt/raw_data/")  # Ensure directory exists

        self.base_data_dir = "/mnt/raw_data"  # Databricks FS path without /dbfs prefix
        self.API_KEY = dbutils.widgets.get("api_key")
  
        self.BASE_URL = f"http://data.fixer.io/api/latest?access_key={self.API_KEY}&symbols=USD,AUD,JPY,CNY"

    def fetch_api_data(self):
        """Fetches raw data from the API and returns JSON."""
        response = requests.get(self.BASE_URL)
        if response.status_code == 200:
            return response.json()
        else:
            print(f"Error fetching data: {response.status_code}")
            return None

    def save_raw_data(self, data):
        """Saves raw JSON data as a file with the timestamp as the filename."""
        if not data:
            print("No data to save.")
            return
        
        # Extract timestamp and format it as YYYY-MM-DD
        timestamp = data.get("timestamp", None)
        if not timestamp:
            print("Missing timestamp in API response.")
            return
        
        date_str = datetime.datetime.utcfromtimestamp(timestamp).strftime('%Y-%m-%d')
        file_path = f"{self.base_data_dir}/{date_str}.json"  

        # Save JSON data using Databricks utilities
        dbutils.fs.put(f"dbfs:{file_path}", json.dumps(data), overwrite=True)

        print(f"Raw data saved to {file_path}")

    def raw_ingest(self):
        """Fetches data from API and stores the raw response in DBFS."""
        print("\tFetching raw data from API...", end='')
        data = self.fetch_api_data()
        if data:
            self.save_raw_data(data)
        print("Done")

# Initialize Spark
spark = SparkSession.builder.appName("FixerDataIngestion").getOrCreate()

# Run ingestion
ingest_instance = Ingest()
ingest_instance.raw_ingest()


	Fetching raw data from API...Wrote 159 bytes.
Raw data saved to /mnt/raw_data/2025-02-18.json
Done


In [0]:
%fs ls /mnt/raw_data/


path,name,size,modificationTime
dbfs:/mnt/raw_data/2025-02-18.json,2025-02-18.json,159,1739849402000


In [0]:
class TestRawInjection:
    def __init__(self):
        self.base_data_dir = "/mnt/raw_data"

    def file_exist(self, filename):
        print("\tStarting search...", end='')
        try:
            # Get a list of files in the directory
            files = [file.name for file in dbutils.fs.ls(self.base_data_dir)]
            
            if filename in files:
                print(f"\n✅ File '{filename}' has been ingested.")
            else:
                print(f"\n❌ File '{filename}' not found.")

        except Exception as e:
            print(f"\n❌ Error while checking file: {e}")

        print("\nDone.")

    def runTests(self):
        self.file_exist("2025-02-18.json")
        print("\nTest completed.\n")

# Run the test
test_instance = TestRawInjection()
test_instance.runTests()


	Starting search...
✅ File '2025-02-18.json' has been ingested.

Done.

Test completed.

