In [1]:
import pickle

from pyspark.sql.types import StructType,StructField, StringType, IntegerType, FloatType
from pyspark.sql import SparkSession
from pyspark.sql.functions import *

### CONCAT THE CSV FILES

In [69]:
spark = SparkSession.builder.master("local").appName("hdfs_test").getOrCreate()

In [178]:
spark.read.option('header', 'true').csv("/home/yasser/Desktop/hadoop/data/data_wheather", inferSchema=True).write.csv("hdfs://localhost:9000/user/input/all_data.csv")

                                                                                

In [186]:
df_concat = spark.read.option('header', 'true').csv("/home/yasser/Desktop/hadoop/data/data_wheather")
df_concat = df_concat.select(['date_time', 'maxtempC', 'mintempC', 'humidity', 'windspeedKmph', 'totalSnow_cm', 'location'])
df_concat.show()

+----------+--------+--------+--------+-------------+------------+------------+
| date_time|maxtempC|mintempC|humidity|windspeedKmph|totalSnow_cm|    location|
+----------+--------+--------+--------+-------------+------------+------------+
|2009-01-01|      25|      16|      83|           10|         0.0|Johannesburg|
|2009-01-02|      28|      18|      76|            9|         0.0|Johannesburg|
|2009-01-03|      29|      17|      71|           11|         0.0|Johannesburg|
|2009-01-04|      33|      17|      58|            9|         0.0|Johannesburg|
|2009-01-05|      26|      16|      73|           13|         0.0|Johannesburg|
|2009-01-06|      28|      16|      72|           10|         0.0|Johannesburg|
|2009-01-07|      31|      18|      64|            6|         0.0|Johannesburg|
|2009-01-08|      30|      20|      63|            7|         0.0|Johannesburg|
|2009-01-09|      29|      19|      66|            7|         0.0|Johannesburg|
|2009-01-10|      23|      18|      89| 

In [191]:
df_concat.write.option('header', 'true').csv("hdfs://localhost:9000/user/input/all_data.csv")

                                                                                

## PYSPARK

### PRE PROCESSING

In [106]:
df = spark.read.option('header', 'true').csv("hdfs://localhost:9000/user/input/all_data.csv", inferSchema=True)
df.show()

+-------------------+--------+--------+--------+-------------+------------+------------+
|          date_time|maxtempC|mintempC|humidity|windspeedKmph|totalSnow_cm|    location|
+-------------------+--------+--------+--------+-------------+------------+------------+
|2009-01-01 00:00:00|      25|      16|      83|           10|         0.0|Johannesburg|
|2009-01-02 00:00:00|      28|      18|      76|            9|         0.0|Johannesburg|
|2009-01-03 00:00:00|      29|      17|      71|           11|         0.0|Johannesburg|
|2009-01-04 00:00:00|      33|      17|      58|            9|         0.0|Johannesburg|
|2009-01-05 00:00:00|      26|      16|      73|           13|         0.0|Johannesburg|
|2009-01-06 00:00:00|      28|      16|      72|           10|         0.0|Johannesburg|
|2009-01-07 00:00:00|      31|      18|      64|            6|         0.0|Johannesburg|
|2009-01-08 00:00:00|      30|      20|      63|            7|         0.0|Johannesburg|
|2009-01-09 00:00:00|

                                                                                

In [127]:
df.filter((df.location == 'Paris') & (df.date_time == '2009-03-07 00:00:00')).show()

+-------------------+--------+--------+--------+-------------+------------+--------+
|          date_time|maxtempC|mintempC|humidity|windspeedKmph|totalSnow_cm|location|
+-------------------+--------+--------+--------+-------------+------------+--------+
|2009-03-07 00:00:00|      10|       1|      84|           10|         0.0|   Paris|
+-------------------+--------+--------+--------+-------------+------------+--------+



                                                                                

In [149]:
df.selectExpr("month(date_time) AS month", "year(date_time) AS year", 'maxtempC', "mintempC", "location",)\
    .filter((col('year') == '2019') ).groupBy(col('month')).mean().sort(col('month')).show()

    # .groupBy(['month']).mean().select(['month', 'location', 'avg(maxtempC)', 'avg(mintempC)']).orderBy(col('avg(mintempC)').asc()).show()

+-----+----------+---------+------------------+------------------+
|month|avg(month)|avg(year)|     avg(maxtempC)|     avg(mintempC)|
+-----+----------+---------+------------------+------------------+
|    1|       1.0|   2019.0|16.893830253679926|11.491700595051675|
|    2|       2.0|   2019.0|18.630374479889042|12.550970873786408|
|    3|       3.0|   2019.0|20.739743188224242|14.225806451612904|
|    4|       4.0|   2019.0|22.888025889967636|16.133656957928803|
|    5|       5.0|   2019.0|25.068274350140932| 18.26777325399311|
|    6|       6.0|   2019.0|28.179611650485437|21.002912621359222|
|    7|       7.0|   2019.0| 28.54400250548074| 21.38396492326965|
|    8|       8.0|   2019.0|28.953961791418728| 21.72690259943627|
|    9|       9.0|   2019.0|26.851132686084142|19.915210355987053|
|   10|      10.0|   2019.0|24.585342937676167| 18.29314124647667|
|   11|      11.0|   2019.0|20.803559870550163| 15.20906148867314|
|   12|      12.0|   2019.0| 18.17256498590667|13.003758221108

In [131]:
df.count()

504597

In [73]:
df.printSchema()

root
 |-- date_time: timestamp (nullable = true)
 |-- maxtempC: integer (nullable = true)
 |-- mintempC: integer (nullable = true)
 |-- humidity: integer (nullable = true)
 |-- windspeedKmph: integer (nullable = true)
 |-- totalSnow_cm: double (nullable = true)
 |-- location: string (nullable = true)



In [74]:
# CHECK FOR NaN VALUES

df.filter(df.maxtempC.isNull()).show()
df.filter(df.mintempC.isNull()).show()
df.filter(df.location.isNull()).show()

+---------+--------+--------+--------+-------------+------------+--------+
|date_time|maxtempC|mintempC|humidity|windspeedKmph|totalSnow_cm|location|
+---------+--------+--------+--------+-------------+------------+--------+
+---------+--------+--------+--------+-------------+------------+--------+

+---------+--------+--------+--------+-------------+------------+--------+
|date_time|maxtempC|mintempC|humidity|windspeedKmph|totalSnow_cm|location|
+---------+--------+--------+--------+-------------+------------+--------+
+---------+--------+--------+--------+-------------+------------+--------+

+---------+--------+--------+--------+-------------+------------+--------+
|date_time|maxtempC|mintempC|humidity|windspeedKmph|totalSnow_cm|location|
+---------+--------+--------+--------+-------------+------------+--------+
+---------+--------+--------+--------+-------------+------------+--------+



                                                                                

### HOTTEST CITIES

In [103]:
df.orderBy(df.maxtempC.desc()).show()

+-------------------+--------+--------+--------+-------------+------------+--------+
|          date_time|maxtempC|mintempC|humidity|windspeedKmph|totalSnow_cm|location|
+-------------------+--------+--------+--------+-------------+------------+--------+
|2010-06-14 00:00:00|      53|      32|      18|            9|         0.0|    Agra|
|2009-05-21 00:00:00|      52|      33|      16|            5|         0.0|   Delhi|
|2009-05-19 00:00:00|      52|      35|      14|            5|         0.0|    Agra|
|2009-05-20 00:00:00|      52|      36|      14|            3|         0.0|   Delhi|
|2009-05-21 00:00:00|      52|      34|      16|            8|         0.0|    Agra|
|2009-05-22 00:00:00|      52|      37|      17|           10|         0.0|    Agra|
|2010-06-20 00:00:00|      52|      33|      17|            4|         0.0|    Agra|
|2010-05-22 00:00:00|      52|      31|      11|            6|         0.0|   Delhi|
|2009-05-19 00:00:00|      52|      34|      16|            4|   

                                                                                

In [76]:
df.groupBy("location").mean().orderBy(col('avg(maxtempC)').desc()).show()

+---------+------------------+------------------+------------------+------------------+-----------------+
| location|     avg(maxtempC)|     avg(mintempC)|     avg(humidity)|avg(windspeedKmph)|avg(totalSnow_cm)|
+---------+------------------+------------------+------------------+------------------+-----------------+
|     Agra|34.491120636864665|23.867115737905696| 40.61359461114513| 9.527658705858338|              0.0|
|    Macau| 34.43253725250051| 22.63768115942029| 62.84670340885895|16.468258828332313|              0.0|
|    Delhi|34.316391100224536|23.901408450704224| 39.58032251479894| 9.053684425392937|              0.0|
|  Kolkata| 33.68952847519902|24.509695856297203| 60.42620943049602|11.143294549908145|              0.0|
|   Jaipur| 33.35456215554195|22.507450500102063| 37.06552357624005| 10.89140640947132|              0.0|
|  Bangkok| 33.26209430496019|26.617064707083077| 65.08920187793427|11.105531741171667|              0.0|
|   Riyadh| 32.68320065319453|20.6501326801388

### COLDEST CITIES

In [77]:
df.groupBy("location").mean().orderBy(col('avg(mintempC)').asc()).show()

+------------+------------------+------------------+-----------------+------------------+--------------------+
|    location|     avg(maxtempC)|     avg(mintempC)|    avg(humidity)|avg(windspeedKmph)|   avg(totalSnow_cm)|
+------------+------------------+------------------+-----------------+------------------+--------------------+
|        Bern|14.047764849969381|2.4419269238620127|80.65380689936721| 7.436619718309859| 0.33278220044907136|
|       Sofia|16.444988773219023| 4.509083486425801|73.39212084098796|  7.56582976117575| 0.32478056746274714|
|      Munich|13.432741375790977| 4.774852010614411| 81.4996938150643|10.107368850785875|   0.358011839150847|
|      krakow|12.772810777709736| 5.515411308430292|80.93059808124107|14.248009797917943| 0.11916717697489292|
|      Prague|13.534803021024699| 5.687487242294345|76.36844253929374|11.365788936517657| 0.23641559501939144|
|      Warsaw|13.063686466625843|  5.85241886099204|75.88324147785262|12.329454990814451|  0.2173708920187794|
|

In [78]:
df.selectExpr("month(date_time) AS month", 'maxtempC', "mintempC", "location", 'totalSnow_cm').groupBy(['month', 'location', 'totalSnow_cm']).mean().select(['month', 'location', 'avg(maxtempC)', 'avg(mintempC)', 'avg(totalSnow_cm)']).orderBy(col('avg(mintempC)').asc()).show()

[Stage 249:>                                                        (0 + 1) / 1]

+-----+---------+-------------+-------------+-----------------+
|month| location|avg(maxtempC)|avg(mintempC)|avg(totalSnow_cm)|
+-----+---------+-------------+-------------+-----------------+
|   12|     Bern|         -2.0|        -19.0|              8.7|
|   12|     Bern|         -1.0|        -18.0|             11.0|
|   12|     Bern|         -1.5|        -17.5|              5.3|
|    2|     Bern|         -2.0|        -17.0|              2.3|
|    3|  Toronto|        -11.0|        -16.0|              1.9|
|   12|   Prague|         -4.0|        -16.0|              2.2|
|    2|     Bern|         -2.0|        -16.0|              1.2|
|    1|   Prague|         -6.0|        -16.0|              5.7|
|   12|   Prague|         -5.0|        -16.0|              6.7|
|    1|   Edirne|         -4.0|        -15.0|             23.0|
|   12|     Bern|          1.0|        -15.0|              0.6|
|   12|  Beijing|         -3.0|        -15.0|              3.1|
|    1|Vancouver|         -5.0|        -

                                                                                

## Snowiest cities

In [79]:
df.selectExpr("month(date_time) AS month", "totalSnow_cm", "location").groupBy(['month', 'location']).mean().orderBy(col('avg(totalSnow_cm)').desc()).show()

                                                                                

+-----+---------+----------+------------------+
|month| location|avg(month)| avg(totalSnow_cm)|
+-----+---------+----------+------------------+
|    1|   Munich|       1.0|  1.26036866359447|
|    1|    Sofia|       1.0|1.0525345622119815|
|    2|   Munich|       2.0|0.9870886075949368|
|    3|    Sofia|       3.0|0.9170506912442397|
|    2|     Bern|       2.0|0.8772151898734182|
|   12|     Bern|      12.0| 0.873697270471464|
|    1|   Prague|       1.0|0.8117511520737337|
|   12|   Munich|      12.0|0.8062034739454094|
|    1|   Warsaw|       1.0| 0.790783410138249|
|    1|     Bern|       1.0|0.7834101382488481|
|    2|    Sofia|       2.0|0.7729113924050636|
|    1|     malt|       1.0|0.6942396313364054|
|    1|Bucharest|       1.0| 0.683410138248848|
|    2|   Prague|       2.0|0.6554430379746836|
|    2|   Warsaw|       2.0|0.6207594936708859|
|    3|     Bern|       3.0|0.6096774193548387|
|   12|    Sofia|      12.0|0.5985111662531017|
|    2|Vancouver|       2.0| 0.588607594

## Cities with the highest windspeed where we can install wind turbines

In [80]:
df.groupBy("location").mean().orderBy(col('avg(windspeedKmph)').desc()).select(['location', 'avg(maxtempC)', 'avg(mintempC)', 'avg(windspeedKmph)']).show()

+----------+------------------+------------------+------------------+
|  location|     avg(maxtempC)|     avg(mintempC)|avg(windspeedKmph)|
+----------+------------------+------------------+------------------+
|  Barbados|28.022045315370484| 26.75831802408655|  25.3417023882425|
|    Azores|18.701775872627067|17.005307205552153| 23.63849765258216|
|      Fiji| 28.62727087160645|27.533578281281894|22.329046744233516|
|     Dakar|25.408042457644417| 23.26801388038375|21.922229026331905|
|Guadeloupe|27.824862216778936|  26.1924882629108|21.373341498264953|
|      Baku|18.408450704225352| 14.49397836293121|21.240048989589713|
|    Dublin|12.072259644825474| 7.896917738313942| 20.39354970402123|
|    Djerba|22.424780567462747| 19.68789548887528| 20.20575627679118|
|    Manama| 29.19861196162482|26.329250867523985|18.756889161053277|
| marseille|  18.7542355582772|13.328434374362114|18.326597264747907|
|Copenhagen|11.284139620330679| 7.687283119003879|18.161257399469278|
|  Istanbul|18.49683

In [81]:
df.groupBy("location").max().orderBy(col('max(windspeedKmph)').desc()).select(['location', 'max(maxtempC)', 'max(mintempC)', 'max(windspeedKmph)']).show()

+----------+-------------+-------------+------------------+
|  location|max(maxtempC)|max(mintempC)|max(windspeedKmph)|
+----------+-------------+-------------+------------------+
|     Miami|           35|           31|                86|
|    Taipei|           39|           32|                72|
|Guadeloupe|           34|           29|                70|
|    Dublin|           23|           17|                67|
|    Zhuhai|           36|           31|                65|
|      Baku|           40|           32|                64|
|    Djerba|           37|           32|                64|
|    Azores|           26|           24|                64|
|      Fiji|           31|           30|                63|
| marseille|           35|           29|                60|
|    Athens|           43|           33|                58|
|  Istanbul|           40|           29|                57|
|     crete|           36|           29|                57|
|    Muscat|           43|           36|

### CITIES STATS

In [82]:
month_names = {
    1 : 'JANUARY',
    2 : 'FEBRUARY',
    3 : 'MARCH',
    4 : 'APRIL',
    5 : 'MAY',
    6 : 'JUNE',
    7 : 'JULY',
    8 : 'AUGUST',
    9 : 'SEPTEMBER',
    10 : 'OCTOBER',
    11 : 'NOVEMBER',
    12 : 'DECEMBER'
}

In [83]:
def stats():

  # INPUTS VERIFICATION ==============================
  month_ = input('PLEASE CHOOSE A MONTH')
  try:
      month_ = int(month_)
  except ValueError:
      return ("PLEASE CHOOSE AN INTEGER FOR THE MONTH")

  if month_ not in month_names.keys():
      return ("PLEASE CHOOSE A MONTH BETWEEN 1 AND 12")


  city = input('PLEASE CHOOSE A CITY')

  # ===================================================

  max_month = df.selectExpr("month(date_time) AS month", "maxtempC", "location").groupBy(['month', 'location']).max().orderBy(col('max(maxtempC)').desc())
  max_month = max_month.filter((max_month.month == month_) & (max_month['location'] == city)).select(['month', 'location', 'max(maxtempC)'])
  if max_month.count() == 0:
      return "SORRY WE DON'T HAVE DATA FOR YOUR INPUT"

  max_month = max_month.select(["max(maxtempC)"]).first()[0]

  max_year = df.groupBy("location").max().orderBy(col('max(maxtempC)'))
  max_year = max_year.filter((max_year.location == city)).select(["max(maxtempC)"]).first()[0]


  min_month = df.selectExpr("month(date_time) AS month", "mintempC", "location").groupBy(['month', 'location']).min().orderBy(col('min(mintempC)').desc())
  min_month = min_month.filter((min_month.month == month_) & (min_month['location'] == city)).select(['month', 'location', 'min(mintempC)'])
  if min_month.count() == 0:
      print(f"SORRY WE DON'T HAVE DATA FOR YOUR INPUT")

  min_month = min_month.select(["min(mintempC)"]).first()[0]

  min_year = df.groupBy("location").min().orderBy(col('min(mintempC)'))
  min_year = min_year.filter((min_year.location == city)).select(["min(mintempC)"]).first()[0]

  var = df.groupBy("location").agg({'maxtempC': 'variance', 'mintempC': 'variance'})
  var_max = var.filter((var.location == city)).select(["variance(maxtempC)"]).first()[0]
  var_min = var.filter((var.location == city)).select(["variance(mintempC)"]).first()[0]


  data = [
      (month_,city, max_month, min_month, max_year, min_year, var_max, var_min)
    ]
    
  schema = StructType([ \
      StructField("month",IntegerType(),True), \
      StructField("city",StringType(),True), \
      StructField("max_month",IntegerType(),True), \
      StructField("min_month", IntegerType(), True), \
      StructField("max_year",IntegerType(),True), \
      StructField("min_year", IntegerType(), True), \
      StructField("variance_max", FloatType(), True), \
      StructField("variance_min", FloatType(), True) \
    ])


  spark.createDataFrame(data=data,schema=schema).show()

In [151]:
stats()

                                                                                

+-----+-----+---------+---------+--------+--------+------------+------------+
|month| city|max_month|min_month|max_year|min_year|variance_max|variance_min|
+-----+-----+---------+---------+--------+--------+------------+------------+
|    9|Paris|       36|        5|      41|     -10|   58.553497|    33.93195|
+-----+-----+---------+---------+--------+--------+------------+------------+



In [125]:
df.filter(df.location == 'Paris').describe().show()

+-------+------------------+-----------------+----------------+------------------+-------------------+--------+
|summary|          maxtempC|         mintempC|        humidity|     windspeedKmph|       totalSnow_cm|location|
+-------+------------------+-----------------+----------------+------------------+-------------------+--------+
|  count|              4899|             4899|            4899|              4899|               4899|    4899|
|   mean|15.908552765870585|8.553582363747704|76.4633598693611|12.386405388854868|0.04198816084915288|    null|
| stddev| 7.652025723983453|5.825113619402974|11.4733537682678|  5.36350229989391| 0.4388245359119745|    null|
|    min|                -4|              -10|              31|                 2|                0.0|   Paris|
|    max|                41|               28|             100|                42|               11.9|   Paris|
+-------+------------------+-----------------+----------------+------------------+-------------------+--

### EARTH TEMPERATURE

In [88]:
# MEAN TEMPERATURE FOR EACH YEAR

df.selectExpr("year(date_time) AS year", "maxtempC").groupBy("year").mean().select(['year', 'avg(maxtempC)']).sort('year').show()

+----+------------------+
|year|     avg(maxtempC)|
+----+------------------+
|2009|22.715121691714323|
|2010| 22.44684133528395|
|2011|22.451762202420536|
|2012| 22.47424266539339|
|2013|22.616730948264397|
|2014|22.920388349514564|
|2015|23.255379704747973|
|2016|23.113825667144145|
|2017| 23.27743050937625|
|2018|23.560074477989094|
|2019|23.383588243117437|
|2020| 22.96686826887368|
|2021| 22.93879505253358|
|2022|21.198932681797725|
+----+------------------+



### CHOOSE A DESTINATION DEPENDING ON TEMPERATURE AND DATE INPUTS

In [89]:
def vacation_city():

    # INPUTS VERIFICATION ==============================
    month_ = input('PLEASE CHOOSE A MONTH')
    try:
        month_ = int(month_)
    except ValueError:
        return ("PLEASE CHOOSE AN INTEGER FOR THE MONTH")

    if month_ not in month_names.keys():
        return ("PLEASE CHOOSE A MONTH BETWEEN 1 AND 12")


    city = input('PLEASE CHOOSE A CITY')

    # ===================================================

    # REQUESTS ==========================================

    print(f"... traveling to {city} in {month_names[month_]}")
    tmp = df.selectExpr("month(date_time) AS month", "maxtempC", "location").groupBy(['month', 'location']).mean().orderBy(col('avg(maxtempC)').desc())
    tmp = tmp.filter((tmp.month == month_) & (tmp['location'] == city)).select(['month', 'location', 'avg(maxtempC)'])
    if tmp.count() == 0:
        print(f"SORRY WE DON'T HAVE DATA FOR YOUR INPUT")
    else:
        tmp.show()


In [109]:
def vacation_temperature():

    # INPUTS VERIFICATION ==============================
    month_ = input('PLEASE CHOOSE A MONTH')
    try:
        month_ = int(month_)
    except ValueError:
        return ("PLEASE CHOOSE AN INTEGER FOR THE MONTH")

    if month_ not in month_names.keys():
        return ("PLEASE CHOOSE A MONTH BETWEEN 1 AND 12")


    temperature = input('PLEASE CHOOSE A TEMPERATURE')
    try:
        temperature = int(temperature)
    except ValueError:
        return ("PLEASE CHOOSE AN INTEGER FOR TEMPERATURE")
        
    # ===================================================

    # REQUESTS ==========================================
    print(f"... searching for cities with {temperature} degrees in {month_names[month_]}")
    tmp = df.selectExpr("month(date_time) AS month", "maxtempC", "location").groupBy(['month', 'location']).mean().orderBy(col('avg(maxtempC)').desc())
    tmp = tmp.filter((tmp.month == month_) & (tmp['avg(maxtempC)'] < (temperature + 1)) & (tmp['avg(maxtempC)'] > (temperature))).select(['month', 'location', 'avg(maxtempC)'])
    if tmp.count() == 0:
        print(f"SORRY WE DON'T HAVE DATA FOR {temperature} DEGREES PLEASE TRY ANOTHER VALUE")
    else:
        tmp.show()

In [152]:
vacation_city()

... traveling to Barcelona in JUNE
+-----+---------+-------------+
|month| location|avg(maxtempC)|
+-----+---------+-------------+
|    6|Barcelona|         25.3|
+-----+---------+-------------+



In [153]:
vacation_temperature()

... searching for cities with 25 degrees in SEPTEMBER


                                                                                

+-----+---------+------------------+
|month| location|     avg(maxtempC)|
+-----+---------+------------------+
|    9|    Milan|25.817948717948717|
|    9|     Baku| 25.70769230769231|
|    9|Barcelona|25.646153846153847|
|    9|  Nairobi| 25.47179487179487|
|    9| Bordeaux|25.125641025641027|
|    9| Istanbul| 25.08205128205128|
|    9|     malt| 25.05128205128205|
+-----+---------+------------------+



                                                                                

### PREDICTIONS

In [93]:
month_days = {
    1 : '31',
    2 : '28',
    3 : '31',
    4 : '30',
    5 : '31',
    6 : '30',
    7 : '31',
    8 : '31',
    9 : '30',
    10 : '31',
    11 : '30',
    12 : '31'
}

In [94]:
with open('/home/yasser/Desktop/hadoop/prediction/prediction_pickle', 'rb') as f:
    model = pickle.load(f)

In [95]:
def barcelona_prediction():

    # INPUTS VERIFICATION ==============================
    month_ = input('PLEASE CHOOSE A MONTH')
    try:
        month_ = int(month_)
    except ValueError:
        return ("PLEASE CHOOSE AN INTEGER FOR THE MONTH")

    if month_ not in month_names.keys():
        return ("PLEASE CHOOSE A MONTH BETWEEN 1 AND 12")

    year = input('PLEASE CHOOSE A YEAR BETWEEN 2015 AND 2025')
    try:
        year = int(year)
    except ValueError:
        return ("PLEASE CHOOSE AN INTEGER FOR THE YEAR")

    if year < 2015 or year > 2025:
        return ("PLEASE CHOOSE A YEAR BETWEEN 2015 AND 2025")

    return f'{str(year)}-{str(month_)}-{month_days[month_]}'
    # model.predict(f"'{year}-{month_}-{month_days[month_]}'")

In [96]:
date_ = barcelona_prediction()

In [97]:
model.predict(date_)

2020-09-30    29.171461
Freq: M, dtype: float64

In [154]:
model.predict('2025-09-30')

2025-09-30    33.6556
Freq: M, dtype: float64