In [1]:
import pandas as pd
import seaborn as sns; sns.set()
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import json
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
from geojson import dump
warnings.filterwarnings('ignore')

# Récuperation des données

In [2]:
temperatureByCity = pd.read_csv("GlobalLandTemperaturesByCity.csv", encoding="utf-8")
temperatureByCountry = pd.read_csv("GlobalLandTemperaturesByCountry.csv", encoding="utf-8")
temperatureEarth = pd.read_csv("GlobalTemperatures.csv", encoding="utf-8")


In [3]:
temperatureEarth.head()

Unnamed: 0,dt,LandAverageTemperature,LandAverageTemperatureUncertainty,LandMaxTemperature,LandMaxTemperatureUncertainty,LandMinTemperature,LandMinTemperatureUncertainty,LandAndOceanAverageTemperature,LandAndOceanAverageTemperatureUncertainty
0,1750-01-01,3.034,3.574,,,,,,
1,1750-02-01,3.083,3.702,,,,,,
2,1750-03-01,5.626,3.076,,,,,,
3,1750-04-01,8.49,2.451,,,,,,
4,1750-05-01,11.573,2.072,,,,,,


In [4]:
def remove_outliers(df,col):
    q_low = df[col].quantile(0.01)
    q_hi  = df[col].quantile(0.99)
    df_filtered = df[(df[col] < q_hi) & (df[col] > q_low)]
    return df_filtered

# Temperature moyenne par an dans le monde

In [5]:
temperatureEarth['year'] = [val[0:4] for val in temperatureEarth['dt']]
temperatureEarth = temperatureEarth.groupby('year').mean().reset_index()
temperatureEarth = remove_outliers(temperatureEarth,"LandAverageTemperature")
temperatureEarth.to_csv('output_data/temperatureEarth.csv')

In [6]:
fig = px.scatter(temperatureEarth,x='year', y='LandAverageTemperature')
fig.update_layout(title='<b>Evolution de la température moyenne par an</b>',xaxis_title='', yaxis_title='')
fig.show()

# Temperature moyenne par an en France

In [7]:
temperatureByCountry.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country
0,1743-11-01,4.384,2.294,Åland
1,1743-12-01,,,Åland
2,1744-01-01,,,Åland
3,1744-02-01,,,Åland
4,1744-03-01,,,Åland


In [8]:
temperatureFrance = temperatureByCountry.query("Country=='France'")
temperatureFrance['year'] = [val[0:4] for val in temperatureFrance['dt']]
temperatureFranceYear = temperatureFrance.groupby('year').mean().reset_index()

In [9]:
temperatureFranceYear = remove_outliers(temperatureFranceYear,"AverageTemperature")
temperatureFranceYear.to_csv('output_data/temperatureFranceYear.csv')
fig = px.scatter(temperatureFranceYear,x='year', y='AverageTemperature')
fig.update_layout(title='<b>Evolution de la température moyenne par an en France</b>',xaxis_title='', yaxis_title='')
fig.show()

# Temperature moyenne par an en France (mois)

In [10]:
temperatureFrance['month'] = [val[5:7] for val in temperatureFrance['dt']]
month_name = {'01':'Janvier','02':'Février','03':"Mars",'04':"Avril",'05':"Mai",'06':"Juin",'07':"Juillet",'08':"Aout",'09':"Septembre",'10':"Octobre",'11':"Novembre",'12':"Décembre"}
temperatureFranceMonth = temperatureFrance.groupby(['year','month']).mean().reset_index()
temperatureFranceMonth = remove_outliers(temperatureFranceMonth,"AverageTemperature")
temperatureFranceMonth['month'] = [month_name[row] for row in temperatureFranceMonth['month']]


In [11]:
temperatureFranceMonth

Unnamed: 0,year,month,AverageTemperature,AverageTemperatureUncertainty
0,1743,Novembre,10.203,2.030
5,1744,Avril,13.190,2.280
6,1744,Mai,14.133,1.891
7,1744,Juin,17.349,1.926
8,1744,Juillet,18.782,1.994
...,...,...,...,...
3232,2013,Mars,9.041,0.308
3233,2013,Avril,12.320,0.240
3234,2013,Mai,13.763,0.269
3235,2013,Juin,17.750,0.295


In [12]:
temperatureFranceMonth1 = temperatureFranceMonth.query("month == 'Janvier' or month == 'Mars' or month == 'Mai'")
fig = px.scatter(
        temperatureFranceMonth1,x='year', y='AverageTemperature', color='month'
        )
    
fig.update_layout(title='<b>Evolution de la température moyenne par an en France</b>',xaxis_title='', yaxis_title='')
fig.show() 

In [13]:
temperatureFranceMonth2 = temperatureFranceMonth.query("month == 'Juillet' or month == 'Septembre' or month == 'Novembre'")
fig = px.scatter(
        temperatureFranceMonth2,x='year', y='AverageTemperature', color='month'
        )
    
fig.update_layout(title='<b>Evolution de la température moyenne par an en France</b>',xaxis_title='', yaxis_title='')
fig.show() 

In [14]:
temperatureFranceMonth3 = temperatureFranceMonth.query("month == 'Février' or month == 'Avril' or month == 'Juin'")
fig = px.scatter(
        temperatureFranceMonth3,x='year', y='AverageTemperature', color='month'
        )
    
fig.update_layout(title='<b>Evolution de la température moyenne par an en France</b>',xaxis_title='', yaxis_title='')
fig.show() 

In [15]:
temperatureFranceMonth4 = temperatureFranceMonth.query("month == 'Aout' or month == 'Octobre' or month == 'Décembre'")
fig = px.scatter(
        temperatureFranceMonth4,x='year', y='AverageTemperature', color='month'
        )
    
fig.update_layout(title='<b>Evolution de la température moyenne par an en France</b>',xaxis_title='', yaxis_title='')
fig.show() 

In [16]:
months = temperatureFranceMonth.month.unique()
df_changement_temp_month = pd.DataFrame(columns=['month', 'diff'])
i=0

for month in months: 
    dfByMonth = temperatureFranceMonth.query("month==\""+month+"\"").astype({'year':'int32'})
    try:
        mean_val_1850_1900 = dfByMonth.query("year >= 1850 and year <= 1900")["AverageTemperature"].mean()
        mean_val_1990_2013 = dfByMonth.query("year >= 1990 and year <= 2013")["AverageTemperature"].mean()

        diff = mean_val_1990_2013-mean_val_1850_1900
    except:
        diff=0
        
    df_changement_temp_month.loc[i] = [month,diff]
    
    i+=1
df_changement_temp_month = df_changement_temp_month.set_index('month').reindex(["Janvier","Février","Mars","Avril","Mai","Juin","Juillet","Aout","Septembre","Octobre","Novembre","Décembre"]).reset_index()

fig = px.line(
        df_changement_temp_month,x='month', y='diff'
        )
    
fig.update_layout(title='<b></b>',xaxis_title='', yaxis_title='')
fig.show() 

# Temperature moyenne par an à Rennes

In [17]:
temperatureByCity.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude
0,1743-11-01,6.068,1.737,Århus,Denmark,57.05N,10.33E
1,1743-12-01,,,Århus,Denmark,57.05N,10.33E
2,1744-01-01,,,Århus,Denmark,57.05N,10.33E
3,1744-02-01,,,Århus,Denmark,57.05N,10.33E
4,1744-03-01,,,Århus,Denmark,57.05N,10.33E


In [18]:
temperatureRennes = temperatureByCity.query("City=='Rennes'")
temperatureRennes['year'] = [val[0:4] for val in temperatureRennes['dt']]
temperatureRennes = temperatureRennes.groupby('year').mean().reset_index()

In [19]:
temperatureRennes = remove_outliers(temperatureRennes,"AverageTemperature")
temperatureRennes.to_csv('output_data/temperatureRennes.csv')
fig = px.scatter(temperatureRennes,x='year', y='AverageTemperature')
fig.update_layout(title='<b>Evolution de la température moyenne par an à Rennes</b>',xaxis_title='', yaxis_title='')
fig.show()

# Temperature moyenne par pays

In [20]:
with open('countries.geojson', 'r') as outfile:
    boundaries_courties = json.load(outfile)

avgTemperatureCountry = temperatureByCountry.groupby('Country').mean().reset_index()
avgTemperatureCountry.head()

Unnamed: 0,Country,AverageTemperature,AverageTemperatureUncertainty
0,Afghanistan,14.045007,0.930162
1,Africa,24.074203,0.395564
2,Albania,12.610646,1.524574
3,Algeria,22.985112,1.176241
4,American Samoa,26.611965,0.541258


In [21]:
size_json = len(boundaries_courties["features"])
for i in range(0,size_json):
    boundaries_courties["features"][i]['id']=boundaries_courties["features"][i]['properties']['ADMIN']


In [22]:
'''fig = px.choropleth_mapbox(avgTemperatureCountry, geojson=boundaries_courties, 
                           locations='Country',
                           color='AverageTemperature',
                           color_continuous_scale="balance",
                           range_color=(avgTemperatureCountry['AverageTemperature'].min(), avgTemperatureCountry['AverageTemperature'].max()),
                           mapbox_style="carto-positron",
                           zoom=3, center = {"lat": 37.0902, "lon": -95.7129},
                           opacity=0.5,
                           labels={'AverageTemperature':'Temp moyenne'}
                          )
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()'''


'fig = px.choropleth_mapbox(avgTemperatureCountry, geojson=boundaries_courties, \n                           locations=\'Country\',\n                           color=\'AverageTemperature\',\n                           color_continuous_scale="balance",\n                           range_color=(avgTemperatureCountry[\'AverageTemperature\'].min(), avgTemperatureCountry[\'AverageTemperature\'].max()),\n                           mapbox_style="carto-positron",\n                           zoom=3, center = {"lat": 37.0902, "lon": -95.7129},\n                           opacity=0.5,\n                           labels={\'AverageTemperature\':\'Temp moyenne\'}\n                          )\nfig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})\nfig.show()'

# Changement de température par pays

In [23]:
temperatureByCountry['year'] = [val[0:4] for val in temperatureByCountry['dt']]
temperatureByCountry = temperatureByCountry.astype({'year': 'int32'})
avgYearCountry = temperatureByCountry.groupby(['Country','year']).mean().reset_index()
avgYearCountry = avgYearCountry.query("year >= 1850")
avgYearCountry.head()

Unnamed: 0,Country,year,AverageTemperature,AverageTemperatureUncertainty
12,Afghanistan,1850,13.326083,1.961917
13,Afghanistan,1851,13.605667,2.0485
14,Afghanistan,1852,13.541167,2.253833
15,Afghanistan,1853,13.455833,2.112833
16,Afghanistan,1854,13.60575,2.213333


In [24]:
countries = avgYearCountry.Country.unique()
df_changement_temp = pd.DataFrame(columns=['Country', 'diff'])
i=0

for country in countries: 
    dfByCountry = avgYearCountry.query("Country==\""+country+"\"")
    try:
        mean_val_1850_1900 = dfByCountry.query("year >= 1850 and year <= 1900")["AverageTemperature"].mean()
        mean_val_1990_2013 = dfByCountry.query("year >= 1990 and year <= 2013")["AverageTemperature"].mean()
        diff = mean_val_1990_2013-mean_val_1850_1900

    except :
        diff = 0
        
    df_changement_temp.loc[i] = [country,diff]
    
    i+=1



In [26]:
'''fig = px.choropleth_mapbox(df_changement_temp, geojson=boundaries_courties, 
                           locations='Country',
                           color='diff',
                           color_continuous_scale="balance",
                           range_color=(df_changement_temp['diff'].min(), df_changement_temp['diff'].max()),
                           mapbox_style="carto-positron",
                           zoom=3, center = {"lat": 37.0902, "lon": -95.7129},
                           opacity=0.5,
                           labels={'diff':'Difference de temp.'}
                          )
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()'''


'fig = px.choropleth_mapbox(df_changement_temp, geojson=boundaries_courties, \n                           locations=\'Country\',\n                           color=\'diff\',\n                           color_continuous_scale="balance",\n                           range_color=(df_changement_temp[\'diff\'].min(), df_changement_temp[\'diff\'].max()),\n                           mapbox_style="carto-positron",\n                           zoom=3, center = {"lat": 37.0902, "lon": -95.7129},\n                           opacity=0.5,\n                           labels={\'diff\':\'Difference de temp.\'}\n                          )\nfig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})\nfig.show()'