In [0]:
import requests
import json
from pyspark.sql.types import (
    StructType,
    StructField,
    StringType,
    IntegerType,
    DoubleType,
    BooleanType
)

# Step 1: Authenticate and get token
auth_url = "https://dummyjson.com/auth/login"
credentials = {
    "username": "kminchelle",
    "password": "0lelplR"
}
auth_response = requests.post(auth_url, json=credentials)
access_token = auth_response.json().get("token")
print(access_token)
# Step 2: Fetch protected data
headers = {"Authorization": f"Bearer {access_token}"}
print(headers)
data_url = "https://dummyjson.com/products"
data_response = requests.get(data_url, headers=headers)
data_json = data_response.json()["products"]

# Step 3: Define schema to resolve type conflicts
schema = StructType([
    StructField("id", IntegerType(), True),
    StructField("title", StringType(), True),
    StructField("description", StringType(), True),
    StructField("price", IntegerType(), True),
    StructField("discountPercentage", DoubleType(), True),
    StructField("rating", DoubleType(), True),
    StructField("stock", IntegerType(), True),
    StructField("brand", StringType(), True),
    StructField("category", StringType(), True),
    StructField("thumbnail", StringType(), True),
    StructField("images", 
        # ArrayType is not imported, so treat as StringType for simplicity
        StringType(), True
    )
])

# Step 4: Load into PySpark DataFrame with schema
df = spark.createDataFrame(data_json, schema=schema)
display(df)

In [0]:
# main.py
from pyspark.sql import SparkSession
from pyspark.sql.types import (
    StructType, StructField, StringType,
    IntegerType, DoubleType
)
import requests

class APIClient:
    def __init__(self, config):
        self.auth_url = config["auth_url"]
        self.credentials = config["credentials"]
        self.data_url = config["data_url"]
        self.token = None

    def authenticate(self):
        """Authenticate and store the access token."""
        response = requests.post(self.auth_url, json=self.credentials)
        self.token = response.json().get("token")
        return self.token

    def fetch_data(self):
        """Fetch data from the API using the stored token."""
        if not self.token:
            self.authenticate()
        headers = {"Authorization": f"Bearer {self.token}"}
        response = requests.get(self.data_url, headers=headers)
        response.raise_for_status()
        return response.json().get("products", [])
CONFIG = {
    "auth_url": "https://dummyjson.com/auth/login",
    "credentials": {
        "username": "kminchelle",
        "password": "0lelplR"
    },
    "data_url": "https://dummyjson.com/products"
}

SCHEMA = StructType([
    StructField("id", IntegerType(), True),
    StructField("title", StringType(), True),
    StructField("description", StringType(), True),
    StructField("price", IntegerType(), True),
    StructField("discountPercentage", DoubleType(), True),
    StructField("rating", DoubleType(), True),
    StructField("stock", IntegerType(), True),
    StructField("brand", StringType(), True),
    StructField("category", StringType(), True),
    StructField("thumbnail", StringType(), True),
    StructField("images", StringType(), True)  # simplified
])
# Spark session
spark = SparkSession.builder.appName("API Data Loader").getOrCreate()

# Create API client and fetch data
client = APIClient(CONFIG)
products_data = client.fetch_data()

# Load into DataFrame
df = spark.createDataFrame(products_data, schema=SCHEMA)
df.display()