# Ploy.ly Tutorial

**Learned from data provided by world University Rankings (https://www.kaggle.com/mylesoneill/world-university-rankings#timesData.csv)**

In this kernel, we will use to learn how to use plotly, a visualization tool for python which makes interactive, publication-quality graphs online. 

In [33]:
#Import data
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt

In [34]:
file = 'timesData.csv'
data = pd.read_csv(file)

In [35]:
data.head()

Unnamed: 0,world_rank,university_name,country,teaching,international,research,citations,income,total_score,num_students,student_staff_ratio,international_students,female_male_ratio,year
0,1,Harvard University,United States of America,99.7,72.4,98.7,98.8,34.5,96.1,20152,8.9,25%,,2011
1,2,California Institute of Technology,United States of America,97.7,54.6,98.0,99.9,83.7,96.0,2243,6.9,27%,33 : 67,2011
2,3,Massachusetts Institute of Technology,United States of America,97.8,82.3,91.4,99.9,87.5,95.6,11074,9.0,33%,37 : 63,2011
3,4,Stanford University,United States of America,98.3,29.5,98.1,99.2,64.3,94.3,15596,7.8,22%,42 : 58,2011
4,5,Princeton University,United States of America,90.9,70.3,95.4,99.9,-,94.2,7929,8.4,27%,45 : 55,2011


In [36]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2603 entries, 0 to 2602
Data columns (total 14 columns):
world_rank                2603 non-null object
university_name           2603 non-null object
country                   2603 non-null object
teaching                  2603 non-null float64
international             2603 non-null object
research                  2603 non-null float64
citations                 2603 non-null float64
income                    2603 non-null object
total_score               2603 non-null object
num_students              2544 non-null object
student_staff_ratio       2544 non-null float64
international_students    2536 non-null object
female_male_ratio         2370 non-null object
year                      2603 non-null int64
dtypes: float64(4), int64(1), object(9)
memory usage: 284.8+ KB


# Line Charts
Line Charts Example: Citation and Teaching vs World Rank of Top 100 Universities

In [37]:
import plotly.graph_objs as go
import plotly.offline as py
py.init_notebook_mode(connected=True)

In [38]:
df = data.iloc[:100]
df.head()

Unnamed: 0,world_rank,university_name,country,teaching,international,research,citations,income,total_score,num_students,student_staff_ratio,international_students,female_male_ratio,year
0,1,Harvard University,United States of America,99.7,72.4,98.7,98.8,34.5,96.1,20152,8.9,25%,,2011
1,2,California Institute of Technology,United States of America,97.7,54.6,98.0,99.9,83.7,96.0,2243,6.9,27%,33 : 67,2011
2,3,Massachusetts Institute of Technology,United States of America,97.8,82.3,91.4,99.9,87.5,95.6,11074,9.0,33%,37 : 63,2011
3,4,Stanford University,United States of America,98.3,29.5,98.1,99.2,64.3,94.3,15596,7.8,22%,42 : 58,2011
4,5,Princeton University,United States of America,90.9,70.3,95.4,99.9,-,94.2,7929,8.4,27%,45 : 55,2011


In [39]:
chart1 = go.Scatter(
                    x = df.world_rank,
                    y = df.citations,
                    mode = "lines+markers",
                    name = "citations",
                    marker = dict(color = 'rgba(16, 112, 2, 0.8)'),
                    text= df.university_name
                    )
chart2 = go.Scatter(
                    x = df.world_rank,
                    y = df.research,
                    mode = "lines+markers",
                    name = "research",
                    marker = dict(color = 'rgba(86, 26, 80, 0.8)'),
                    text= df.university_name
                    )
data1 = [chart1, chart2]
layout = dict(title = 'Citation and Research vs World Rank of Top 100 Universities',
              xaxis= dict(title= 'World Rank',ticklen= 5,zeroline= False)
             )
fig = dict(data = data1, layout = layout)
py.iplot(fig)

In [40]:
df2011 = data[data.year==2011].iloc[:100]
df2014 = data[data.year==2014].iloc[:100]
# creating trace1
trace1 =go.Scatter(
                    x = df2011.world_rank,
                    y = df2011.citations,
                    mode = "markers",
                    name = "2011",
                    marker = dict(color = 'rgba(255, 128, 255, 0.8)'),
                    text= df2011.university_name)
# creating trace2
trace2 =go.Scatter(
                    x = df2014.world_rank,
                    y = df2014.citations,
                    mode = "markers",
                    name = "2014",
                    marker = dict(color = 'rgba(255, 128, 2, 0.8)'),
                    text= df2014.university_name)
data2 =[trace1, trace2] 
layout = dict(title = 'Citation vs world rank of top 100 universities with 2014, 2015 and 2016 years',
              xaxis= dict(title= 'World Rank',ticklen= 5,zeroline= False),
              yaxis= dict(title= 'Citation',ticklen= 5,zeroline= False)
             )
fig = dict(data=data2, layout=layout)
py.iplot(fig)

# Pie Chart

In [41]:
# data preparation
df2016 = data[data.year == 2016].iloc[:7]
pie1 = df2016.num_students
pie1_list = [float(each.replace(',', '.')) for each in df2016.num_students]  # str(2,4) => str(2.4) = > float(2.4) = 2.4
labels = df2016.university_name
# figure
fig = {
  "data": [
    {
      "values": pie1_list,
      "labels": labels,
      "domain": {"x": [0, .5]},
      "name": "Number Of Students Rates",
      "hoverinfo":"label+percent+name",
      "hole": .3,
      "type": "pie"
    },],
  "layout": {
        "title":"Universities Number of Students rates",
        "annotations": [
            { "font": { "size": 20},
              "showarrow": False,
              "text": "Number of Students",
                "x": 0.20,
                "y": 1
            },
        ]
    }
}
py.iplot(fig)