#Complete Guide to Data Visualization with Python

In [0]:
from IPython.display import Image

In [0]:
#Image(url= "https://frenzy86.s3.eu-west-2.amazonaws.com/fav/xxxxxxxxx.PNG", width=400, height=300)

In [0]:
temporal = 'https://frenzy86.s3.eu-west-2.amazonaws.com/fav/visualization/temporal.csv'

In [0]:
#https://towardsdatascience.com/complete-guide-to-data-visualization-with-python-2dd74df12b5e

In [0]:
import pandas as pd
df = pd.read_csv(temporal)
df.head(10) #View first 10 data rows

Unnamed: 0,Mes,data science,machine learning,deep learning,categorical
0,2004-01-01,12,18,4,1
1,2004-02-01,12,21,2,1
2,2004-03-01,9,21,2,1
3,2004-04-01,10,16,4,1
4,2004-05-01,7,14,3,1
5,2004-06-01,9,17,3,1
6,2004-07-01,9,16,3,1
7,2004-08-01,7,14,3,1
8,2004-09-01,10,17,4,1
9,2004-10-01,8,17,4,1


In [0]:
df.describe()

Unnamed: 0,data science,machine learning,deep learning,categorical
count,194.0,194.0,194.0,194.0
mean,20.953608,27.396907,24.231959,0.257732
std,23.951006,28.09149,34.476887,0.438517
min,4.0,7.0,1.0,0.0
25%,6.0,9.0,2.0,0.0
50%,8.0,13.0,3.0,0.0
75%,26.75,31.5,34.0,1.0
max,100.0,100.0,100.0,1.0


In [0]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 194 entries, 0 to 193
Data columns (total 5 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   Mes               194 non-null    object
 1   data science      194 non-null    int64 
 2   machine learning  194 non-null    int64 
 3   deep learning     194 non-null    int64 
 4   categorical       194 non-null    int64 
dtypes: int64(4), object(1)
memory usage: 7.7+ KB


In [0]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [0]:
format_dict = {'data science':'${0:,.2f}', 'Mes':'{:%m-%Y}', 'machine learning':'{:.2%}'}
#We make sure that the Month column has datetime format
df['Mes'] = pd.to_datetime(df['Mes'])
#We apply the style to the visualization
df.head().style.format(format_dict)

Unnamed: 0,Mes,data science,machine learning,deep learning,categorical
0,01-2004,$12.00,1800.00%,4,1
1,02-2004,$12.00,2100.00%,2,1
2,03-2004,$9.00,2100.00%,2,1
3,04-2004,$10.00,1600.00%,4,1
4,05-2004,$7.00,1400.00%,3,1


In [0]:
format_dict = {'Mes':'{:%m-%Y}'} #Simplified format dictionary with values that do make sense for our data
df.head().style.format(format_dict).highlight_max(color='darkgreen').highlight_min(color='#ff0000')

Unnamed: 0,Mes,data science,machine learning,deep learning,categorical
0,01-2004,12,18,4,1
1,02-2004,12,21,2,1
2,03-2004,9,21,2,1
3,04-2004,10,16,4,1
4,05-2004,7,14,3,1


In [0]:
df.head(10).style.format(format_dict).background_gradient(subset=['data science', 'machine learning'], cmap='BuGn')

Unnamed: 0,Mes,data science,machine learning,deep learning,categorical
0,01-2004,12,18,4,1
1,02-2004,12,21,2,1
2,03-2004,9,21,2,1
3,04-2004,10,16,4,1
4,05-2004,7,14,3,1
5,06-2004,9,17,3,1
6,07-2004,9,16,3,1
7,08-2004,7,14,3,1
8,09-2004,10,17,4,1
9,10-2004,8,17,4,1


In [0]:
df.head().style.format(format_dict).bar(color='red', subset=['data science', 'deep learning'])

Unnamed: 0,Mes,data science,machine learning,deep learning,categorical
0,01-2004,12,18,4,1
1,02-2004,12,21,2,1
2,03-2004,9,21,2,1
3,04-2004,10,16,4,1
4,05-2004,7,14,3,1


In [0]:
df.head(10).style.format(format_dict).background_gradient(subset=['data science', 'machine learning'], cmap='BuGn').highlight_max(color='yellow')

Unnamed: 0,Mes,data science,machine learning,deep learning,categorical
0,01-2004,12,18,4,1
1,02-2004,12,21,2,1
2,03-2004,9,21,2,1
3,04-2004,10,16,4,1
4,05-2004,7,14,3,1
5,06-2004,9,17,3,1
6,07-2004,9,16,3,1
7,08-2004,7,14,3,1
8,09-2004,10,17,4,1
9,10-2004,8,17,4,1


#Pandas Profiling
Pandas profiling is a library that generates interactive reports with our data, we can see the distribution of the data, the types of data, possible problems it might have. It is very easy to use, with only 3 lines we can generate a report that we can send to anyone and that can be used even if you do not know programming.

In [0]:
from pandas_profiling import ProfileReport
prof = ProfileReport(df)
#prof.to_file(output_file='informe.html')

TypeError: ignored

In [0]:
'https://frenzy86.s3.eu-west-2.amazonaws.com/fav/visualization/informe.html'

'https://frenzy86.s3.eu-west-2.amazonaws.com/fav/visualization/informe.html'

#Bokeh
Bokeh is a library that allows you to generate interactive graphics. We can export them to an HTML document that we can share with anyone who has a web browser.

In [0]:
from bokeh.plotting import figure, output_file, save
output_file('data_science_popularity.html')

In [0]:
p = figure(title='data science', x_axis_label='Mes', y_axis_label='data science')
p.line(df['Mes'], df['data science'], legend='popularity', line_width=2)
save(p)



'/content/data_science_popularity.html'

#Folium

In [0]:
44.4807035,11.3712528

In [0]:
import folium
m1 = folium.Map(location=[44.48, 11.37], tiles='openstreetmap', zoom_start=150)
m1.save('map1.html')

In [0]:
m1

In [0]:
m2 = folium.Map(location=[41.38, 2.17], tiles='openstreetmap', zoom_start=16)
folium.Marker([41.38, 2.176], popup='<i>You can use whatever HTML code you want</i>', tooltip='click here').add_to(m2)
folium.Marker([41.38, 2.174], popup='<b>You can use whatever HTML code you want</b>', tooltip='dont click here').add_to(m2)
m2.save('map2.html')

In [0]:
m2

In [0]:
mapa = 'https://frenzy86.s3.eu-west-2.amazonaws.com/fav/visualization/mapa.csv'
!pip install geopandas

Collecting geopandas
[?25l  Downloading https://files.pythonhosted.org/packages/83/c5/3cf9cdc39a6f2552922f79915f36b45a95b71fd343cfc51170a5b6ddb6e8/geopandas-0.7.0-py2.py3-none-any.whl (928kB)
[K     |████████████████████████████████| 931kB 4.8MB/s 
[?25hCollecting pyproj>=2.2.0
[?25l  Downloading https://files.pythonhosted.org/packages/77/15/d93f446d253d26b91553f86cf21049183e9b0f51f8c8e6cb2cff081bcc02/pyproj-2.5.0-cp36-cp36m-manylinux2010_x86_64.whl (10.4MB)
[K     |████████████████████████████████| 10.4MB 45.8MB/s 
Collecting fiona
[?25l  Downloading https://files.pythonhosted.org/packages/ec/20/4e63bc5c6e62df889297b382c3ccd4a7a488b00946aaaf81a118158c6f09/Fiona-1.8.13.post1-cp36-cp36m-manylinux1_x86_64.whl (14.7MB)
[K     |████████████████████████████████| 14.7MB 46.0MB/s 
Collecting cligj>=0.5
  Downloading https://files.pythonhosted.org/packages/e4/be/30a58b4b0733850280d01f8bd132591b4668ed5c7046761098d665ac2174/cligj-0.5.0-py3-none-any.whl
Collecting click-plugins>=1.0
  Down

In [0]:

from geopandas.tools import geocode
df2 = pd.read_csv(mapa)
df2.dropna(axis=0, inplace=True)
df2['geometry'] = geocode(df2['País'], provider='nominatim')['geometry'] #It may take a while because it downloads a lot of data.
df2['Latitude'] = df2['geometry'].apply(lambda l: l.y)
df2['Longitude'] = df2['geometry'].apply(lambda l: l.x)



In [0]:
m3 = folium.Map(location=[39.326234,-4.838065], tiles='openstreetmap', zoom_start=3)
def color_producer(val):
	if val <= 50:
		return 'red'
	else:
		return 'green'
for i in range(0,len(df2)):
	folium.Circle(location=[df2.iloc[i]['Latitude'], df2.iloc[i]['Longitude']], radius=5000*df2.iloc[i]['data science'], color=color_producer(df2.iloc[i]['data science'])).add_to(m3)
m3.save('map3.html')

In [0]:
m3

In [0]:
m = folium.Map(location=[45.5236, -122.6750])
m

In [0]:
folium.Map(
    location=[45.5236, -122.6750],
    tiles='Stamen Toner',
    zoom_start=13)

## Markers

In [0]:
m = folium.Map(
    location=[45.372, -121.6972],
    zoom_start=12,
    tiles='Stamen Terrain'
)

tooltip = 'Click me!'

folium.Marker([45.3288, -121.6625], popup='<i>Mt. Hood Meadows</i>', tooltip=tooltip).add_to(m)
folium.Marker([45.3311, -121.7113], popup='<b>Timberline Lodge</b>', tooltip=tooltip).add_to(m)

m

## Color and Icons

In [0]:
# color and icon
m = folium.Map(
    location=[45.372, -121.6972],
    zoom_start=12,
    tiles='Stamen Terrain'
)

folium.Marker(
    location=[45.3288, -121.6625],
    popup='Mt. Hood Meadows',
    icon=folium.Icon(icon='cloud')
).add_to(m)

folium.Marker(
    location=[45.3311, -121.7113],
    popup='Timberline Lodge',
    icon=folium.Icon(color='green')
).add_to(m)

folium.Marker(
    location=[45.3300, -121.6823],
    popup='Some Other Location',
    icon=folium.Icon(color='red', icon='info-sign')
).add_to(m)

m

## Polylines

In [0]:
m = folium.Map(
    width='80%', 
    height='80%',
    location=[45.33, -121.69],
    zoom_start=12,
    tiles='Stamen Terrain'
)
folium.PolyLine([[45.3288, -121.6625],[45.3311, -121.7113]]).add_to(m)

m

## Choropleth maps


In [0]:
base = 'https://github.com/python-visualization/folium/raw/master/examples/data/'
state_geo = base + 'us-states.json'

state_unemployment = base + 'US_Unemployment_Oct2012.csv'
state_data = pd.read_csv(state_unemployment)

m = folium.Map(location=[48, -102], zoom_start=3)

m.choropleth(
    geo_data=state_geo,
    name='choropleth',
    data=state_data,
    columns=['State', 'Unemployment'],
    key_on='feature.id',
    fill_color='YlGn',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Unemployment Rate (%)'
)


folium.LayerControl().add_to(m)

m



In [3]:
from folium import plugins
m = folium.plugins.DualMap(location=(35.7243645,139.5105908),tiles=None, zoom_start=16)

fg_both = folium.FeatureGroup(name='markers_both').add_to(m)
fg_1 = folium.FeatureGroup(name='markers_1').add_to(m.m1)
fg_2 = folium.FeatureGroup(name='markers_2').add_to(m.m2)
folium.TileLayer('openstreetmap').add_to(m.m1)
folium.TileLayer('cartodbpositron').add_to(m.m2)

icon_red = folium.Icon(color='red')
folium.Marker((35.7243645,139.5105908), tooltip='both', icon=icon_red).add_to(fg_both)
folium.Marker((35.7243645,139.5116918), tooltip='1').add_to(fg_1)
folium.Marker((35.7243645,139.5105938), tooltip='2').add_to(fg_2)


folium.LayerControl(collapsed=False).add_to(m)
m.save('sheePark2.html')
m