In [0]:
%pip install geopy

Python interpreter will be restarted.
Collecting geopy
  Downloading geopy-2.3.0-py3-none-any.whl (119 kB)
Collecting geographiclib<3,>=1.52
  Downloading geographiclib-2.0-py3-none-any.whl (40 kB)
Installing collected packages: geographiclib, geopy
Successfully installed geographiclib-2.0 geopy-2.3.0
Python interpreter will be restarted.


In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, IntegerType, DoubleType, StringType
import pandas as pd
import numpy as np
import io
import requests
from geopy.geocoders import Nominatim

In [0]:
cities = [
    "Singapore",
    "New York",
    "Los Angeles",
    "London",
    "Sydney",
    "Toronto",
    "Moscow",
    "Tokyo",
    "Hong Kong",
    "Dubai",
    "Cape Town",
    "Rabat",
    "Rio de Janeiro",
    "Mumbai",
    "Beijing",
    "Paris",
    "Berlin",
    "Rome",
    "Seoul",
    "Bangkok",
    "Kuala Lumpur",
    "Manila",
    "Hanoi",
    "Taipei",
    "Buenos Aires",
    "Santiago",
    "Lima",
    "Bogota",
    "Mexico City",
    "Jakarta",
    "Cairo",
    "Johannesburg",
    "Nairobi",
    "Lagos",
    "Casablanca",
    "Istanbul",
    "Madrid",
    "Barcelona",
    "Athens",
    "Amsterdam",
    "Brussels",
    "Vienna",
    "Zurich",
    "Stockholm",
    "Oslo",
    "Helsinki",
    "Dublin",
    "Warsaw",
    "Prague",
    "Budapest",
]

In [0]:
url = "https://power.larc.nasa.gov/api/temporal/daily/point"
parameters = [ "T10M", "CLOUD_AMT", "QV10M", "PW", "PS", "GLOBAL_ILLUMINANCE", "WS10M", "EVLAND" ]
start = "1981"
end = "2021"
community = "ag"

In [0]:
def get_long_lat(city : str):
    geolocater = Nominatim(user_agent="MyApp")
    location = geolocater.geocode(city)
    return location.longitude, location.latitude

In [0]:
spark = SparkSession.builder.master("local[*]").appName("project").getOrCreate()

In [0]:
schema = StructType([
    StructField('YEAR', IntegerType(), True),
    StructField('DOY', IntegerType(), True),
    StructField('T10M', DoubleType(), True),
    StructField('CLOUD_AMT', DoubleType(), True),
    StructField('QV10M', DoubleType(), True),
    StructField('PW', DoubleType(), True),
    StructField('PS', DoubleType(), True),
    StructField('GLOBAL_ILLUMINANCE', DoubleType(), True),
    StructField('WS10M', DoubleType(), True),
    StructField('EVLAND', DoubleType(), True),
    StructField('latitude', DoubleType(), True),
    StructField('longitude', DoubleType(), True),
    StructField('City', StringType(), True)
])
final_sdf = spark.createDataFrame([], schema)

for city in cities:
    param_str = ",".join(parameters)
    long, lat = get_long_lat(city)
    query = f"{url}?parameters={param_str}&community={community}&longitude={long}&latitude={lat}&start={start}&end={end}&header=false&format=CSV"
    
    response = requests.get(query)
    if response.status_code == 200:
        data = pd.read_csv(io.StringIO(response.content.decode("utf-8")))
        data['latitude'] = lat
        data['longitude'] = long
        data['city'] = city
        
        sdf = spark.createDataFrame(data)
        final_sdf = final_sdf.union(sdf)
    else:
        print(f"Error: {response.status_code}")

In [0]:
sorted_final_sdf = final_sdf.orderBy(["YEAR", "DOY"])

In [0]:
from pyspark.sql.functions import mean, col

mean_dict = {c: sorted_final_sdf.filter(col(c) != -999.0).select(mean(c)).first()[0] for c in sorted_final_sdf.columns}

In [0]:
from pyspark.sql.functions import when

for col_name in sorted_final_sdf.columns:
    if not col_name in ["YEAR", "DOY", "latitude", "lonitude", "City"]:
        sorted_final_sdf = sorted_final_sdf.withColumn(col_name, when(col(col_name) == -999.0, mean_dict[col_name]).otherwise(col(col_name)))

In [0]:
sorted_final_sdf.show(52)

+----+---+-----+------------------+-----+----------------+------+------------------+-----+------+------------------+-----------------+--------------+
|YEAR|DOY| T10M|         CLOUD_AMT|QV10M|              PW|    PS|GLOBAL_ILLUMINANCE|WS10M|EVLAND|          latitude|        longitude|          City|
+----+---+-----+------------------+-----+----------------+------+------------------+-----+------+------------------+-----------------+--------------+
|1981|  1|25.33|61.397109077810676|17.52|2.42996198174706|100.62| 34868.50815731391| 3.45| 41.21|          1.357107|      103.8194992|     Singapore|
|1981|  1| 0.34|61.397109077810676| 3.48|2.42996198174706| 99.45| 34868.50815731391| 5.73|  4.08|       47.48138955|19.14609412691246|      Budapest|
|1981|  1| 1.33|61.397109077810676| 3.54|2.42996198174706| 96.84| 34868.50815731391|10.81|  10.7|        50.0874654|       14.4212535|        Prague|
|1981|  1| 1.48|61.397109077810676| 3.72|2.42996198174706| 98.28| 34868.50815731391|11.64|  9.87|   