In [4]:
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.job import Job
import sys
import requests
import json

APP_NAME = "api_call_example"  # Define the application name

# Check if SparkContext already exists
if SparkContext._active_spark_context:
    sc = SparkContext._active_spark_context  # Reuse existing context
    if sc.appName != APP_NAME:
        print(f"Warning: Existing SparkContext found with different name ({sc.appName}).")
else:
    # Create a new SparkContext with the desired name
    sc = SparkContext(master="local[*]", appName=APP_NAME)

# Initialize GlueContext
glueContext = GlueContext(sc)
spark = glueContext.spark_session

# Verify the application name
print("Application Name:", spark.sparkContext.appName)


Application Name: api_call_example




In [24]:
# Step 2: Call the API
api_url = "https://randomuser.me/api/?results=5"  # Fetch 5 random users
response = requests.get(api_url)
print(response.json())
# Step 3: Convert API response to JSON
if response.status_code == 200:
    data = response.json()["results"]  # Extract the 'results' field
else:
    print("API Request Failed!")
    data = []

# Step 4: Convert JSON Data to Spark DataFrame
df = spark.createDataFrame(data)

{'results': [{'gender': 'female', 'name': {'title': 'Miss', 'first': 'بهاره', 'last': 'كامياران'}, 'location': {'street': {'number': 9694, 'name': 'شهید کبیری طامه'}, 'city': 'قرچک', 'state': 'آذربایجان غربی', 'country': 'Iran', 'postcode': 13596, 'coordinates': {'latitude': '15.1191', 'longitude': '102.7996'}, 'timezone': {'offset': '+2:00', 'description': 'Kaliningrad, South Africa'}}, 'email': 'bhrh.kmyrn@example.com', 'login': {'uuid': '4848f8a0-014e-40b6-b19b-f3a7c2f465f0', 'username': 'sadzebra304', 'password': 'mercury1', 'salt': 'ft5cTA9g', 'md5': '4065df7d82408b64273347e25e3389a6', 'sha1': 'd724c34a3e0478dbc28820d149e7e271b0d22cc9', 'sha256': '77d4b8d37d943f208b5cc036f904ac8ac0f15416c9c8f809362b16373570b497'}, 'dob': {'date': '1983-02-22T07:44:27.561Z', 'age': 42}, 'registered': {'date': '2011-04-22T04:00:33.854Z', 'age': 13}, 'phone': '019-24390319', 'cell': '0940-714-3211', 'id': {'name': '', 'value': None}, 'picture': {'large': 'https://randomuser.me/api/portraits/women/76.

In [11]:
df.count()

200

In [12]:
df.printSchema()

root
 |-- cell: string (nullable = true)
 |-- dob: map (nullable = true)
 |    |-- key: string
 |    |-- value: string (valueContainsNull = true)
 |-- email: string (nullable = true)
 |-- gender: string (nullable = true)
 |-- id: map (nullable = true)
 |    |-- key: string
 |    |-- value: string (valueContainsNull = true)
 |-- location: map (nullable = true)
 |    |-- key: string
 |    |-- value: map (valueContainsNull = true)
 |    |    |-- key: string
 |    |    |-- value: long (valueContainsNull = true)
 |-- login: map (nullable = true)
 |    |-- key: string
 |    |-- value: string (valueContainsNull = true)
 |-- name: map (nullable = true)
 |    |-- key: string
 |    |-- value: string (valueContainsNull = true)
 |-- nat: string (nullable = true)
 |-- phone: string (nullable = true)
 |-- picture: map (nullable = true)
 |    |-- key: string
 |    |-- value: string (valueContainsNull = true)
 |-- registered: map (nullable = true)
 |    |-- key: string
 |    |-- value: string (valueCo

In [22]:
df.select("name").printSchema()

root
 |-- name: map (nullable = true)
 |    |-- key: string
 |    |-- value: string (valueContainsNull = true)



In [17]:
df.select("name").show(truncate=False)

+-------------------------------------------------+
|name                                             |
+-------------------------------------------------+
|{last -> Budding, title -> Mr, first -> Bareld}  |
|{last -> Kinnunen, title -> Mr, first -> Rasmus} |
|{last -> Lo, title -> Mr, first -> Antoine}      |
|{last -> Ross, title -> Ms, first -> Inna}       |
|{last -> Clement, title -> Madame, first -> Anja}|
|{last -> Dufour, title -> Miss, first -> Romane} |
|{last -> Hanka, title -> Mr, first -> Miro}      |
|{last -> Gies, title -> Ms, first -> Käte}       |
|{last -> Gagné, title -> Miss, first -> Amelia}  |
|{last -> Knight, title -> Mr, first -> Ritthy}   |
|{last -> Beugelink, title -> Mr, first -> Faizel}|
|{last -> Walker, title -> Ms, first -> Natalie}  |
|{last -> Velasco, title -> Miss, first -> Mar}   |
|{last -> White, title -> Mr, first -> Maurice}   |
|{last -> Moore, title -> Mr, first -> Liam}      |
|{last -> Stöcker, title -> Mr, first -> Gunther} |
|{last -> Jo