#####Task3: Create a child notebook "4_child_nb_dataload" and write code to load data,

In [0]:
    
import requests
from pyspark.sql import SparkSession

# Initialize SparkSession
spark = SparkSession.builder.appName("CSV_API_to_Volume").getOrCreate()

# 1. Obtain CSV data via API
api_url = "https://public.tableau.com/app/sample-data/mobile_os_usage.csv" 
try:
    response = requests.get(api_url)
    response.raise_for_status() 
    csv_data = response.text
except requests.exceptions.RequestException as e:
    print(f"Error fetching data from API: {e}")
    exit()

# 2. Write the CSV data to a Databricks Unity Catalog volume
catalog_name = "firstcatalog"
schema_name = "default"
volume_name = "usage_metrics"
file_name = "mobile_os_usage.csv"

volume_path = f"/Volumes/{catalog_name}/{schema_name}/{volume_name}/{file_name}"

try:
    dbutils.fs.put(volume_path, csv_data, overwrite=True)
    print(f"Successfully wrote data to volume: {volume_path}")
except Exception as e:
    print(f"Error writing data to volume: {e}")

# Optional: Verify the data by reading it back
try:
    df = spark.read.csv(volume_path, header=True, inferSchema=True)
    df.show()
except Exception as e:
    print(f"Error reading data from volume: {e}")



###### Loading csv file using different way

In [0]:
%sql
CREATE VOLUME IF NOT EXISTS workspace.default.wimbledons_champions;


In [0]:
import requests
response = requests.get("https://public.tableau.com/app/sample-data/wimbledons_champions.csv")
dbutils.fs.put("/Volumes/workspace/default/wimbledons_champions/wimbledons_champions.csv", response.text, overwrite=True)


In [0]:
%fs
ls /Volumes/workspace/default/wimbledons_champions/wimbledons_champions.csv

In [0]:
%fs head /Volumes/workspace/default/wimbledons_champions/wimbledons_champions.csv

In [0]:
%sql
drop table wimbledons_champions

In [0]:
%python
df = spark.read.csv(
    "/Volumes/workspace/default/wimbledons_champions/wimbledons_champions.csv",
    header=True,
    inferSchema=True,
    samplingRatio=0.5
)

# Replace spaces and parentheses in column names with underscores
df = df.toDF(*[
    col.replace(" ", "_")
       .replace("(", "")
       .replace(")", "")
       .replace("'", "")
       .replace("-", "_")
       .replace(".", "_")
       for col in df.columns
])

df.write.saveAsTable("wimbledons_champions")

In [0]:
%sql
select * from wimbledons_champions