# **Python para análise de dados (Pandas)** - *Fernanda Santos*
Complementado por vichShir

### Carregar dados

In [1]:
#importando a biblioteca pandas
import pandas as pd

In [14]:
df = pd.read_csv("https://raw.githubusercontent.com/vichShir/dio-cognizant-analise-dados-2021/master/Dados/Gapminder.csv",
                 error_bad_lines=False, sep=";")

In [15]:
#Visualizando as 5 primeiras linhas
df.head()

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap
0,Afghanistan,Asia,1952,28.801,8425333,779.445314
1,Afghanistan,Asia,1957,30.332,9240934,820.85303
2,Afghanistan,Asia,1962,31.997,10267083,853.10071
3,Afghanistan,Asia,1967,34.02,11537966,836.197138
4,Afghanistan,Asia,1972,36.088,13079460,739.981106


### Tratar dados

In [17]:
# Renomeando as colunas
column_names = {
    "country": "Pais", 
    "continent": "continente", 
    "year": "Ano", 
    "lifeExp": "Expectativa de vida", 
    "pop": "Pop Total", 
    "gdpPercap": "PIB"
}
df.rename(columns=column_names, inplace=True)

In [18]:
df.head(10)

Unnamed: 0,Pais,continente,Ano,Expectativa de vida,Pop Total,PIB
0,Afghanistan,Asia,1952,28.801,8425333,779.445314
1,Afghanistan,Asia,1957,30.332,9240934,820.85303
2,Afghanistan,Asia,1962,31.997,10267083,853.10071
3,Afghanistan,Asia,1967,34.02,11537966,836.197138
4,Afghanistan,Asia,1972,36.088,13079460,739.981106
5,Afghanistan,Asia,1977,38.438,14880372,786.11336
6,Afghanistan,Asia,1982,39.854,12881816,978.011439
7,Afghanistan,Asia,1987,40.822,13867957,852.395945
8,Afghanistan,Asia,1992,41.674,16317921,649.341395
9,Afghanistan,Asia,1997,41.763,22227415,635.341351


### Análise exploratória dos dados

In [19]:
#Total de linhas e colunas
df.shape

(3312, 6)

In [20]:
df.columns

Index(['Pais', 'continente', 'Ano', 'Expectativa de vida', 'Pop Total', 'PIB'], dtype='object')

In [21]:
df.dtypes

Pais                    object
continente              object
Ano                      int64
Expectativa de vida    float64
Pop Total                int64
PIB                    float64
dtype: object

In [22]:
df.tail(15)

Unnamed: 0,Pais,continente,Ano,Expectativa de vida,Pop Total,PIB
3297,Zambia,Africa,1997,40.238,9417789,1071.353818
3298,Zambia,Africa,2002,39.193,10595811,1071.613938
3299,Zambia,Africa,2007,42.384,11746035,1271.211593
3300,Zimbabwe,Africa,1952,48.451,3080907,406.884115
3301,Zimbabwe,Africa,1957,50.469,3646340,518.764268
3302,Zimbabwe,Africa,1962,52.358,4277736,527.272182
3303,Zimbabwe,Africa,1967,53.995,4995432,569.795071
3304,Zimbabwe,Africa,1972,55.635,5861135,799.362176
3305,Zimbabwe,Africa,1977,57.674,6642107,685.587682
3306,Zimbabwe,Africa,1982,60.363,7636524,788.855041


In [23]:
df.describe()

Unnamed: 0,Ano,Expectativa de vida,Pop Total,PIB
count,3312.0,3312.0,3312.0,3312.0
mean,1980.30163,65.246871,31614890.0,11317.115805
std,16.927294,11.768412,104119300.0,11369.142739
min,1950.0,23.599,59412.0,241.165877
25%,1967.0,58.3375,2678572.0,2514.625266
50%,1982.0,69.61,7557218.0,7838.505123
75%,1996.0,73.65775,19585220.0,17357.87917
max,2007.0,82.67,1318683000.0,113523.1329


#### Continente Asia

In [0]:
df["continente"].unique()

array(['Asia', 'Europe', 'Africa', 'Americas', nan, 'FSU', 'Oceania'],
      dtype=object)

In [25]:
asia = df.loc[df["continente"] == "Asia"]
asia.head()

Unnamed: 0,Pais,continente,Ano,Expectativa de vida,Pop Total,PIB
0,Afghanistan,Asia,1952,28.801,8425333,779.445314
1,Afghanistan,Asia,1957,30.332,9240934,820.85303
2,Afghanistan,Asia,1962,31.997,10267083,853.10071
3,Afghanistan,Asia,1967,34.02,11537966,836.197138
4,Afghanistan,Asia,1972,36.088,13079460,739.981106


In [26]:
asia["continente"].unique()

array(['Asia'], dtype=object)

In [36]:
print("PIB Asia: ${:.2f}".format(asia['PIB'].mean()))

PIB Asia: $10070.91


#### Agrupamentos

In [28]:
df.groupby("continente")["Pais"].nunique()

continente
Africa      51
Americas    25
Asia        41
Europe      35
FSU          6
Oceania      3
Name: Pais, dtype: int64

In [30]:
df.groupby("Ano")["Expectativa de vida"].mean()

Ano
1950    62.002568
1951    65.904167
1952    49.206867
1953    66.674563
1954    67.459817
1955    67.806757
1956    67.950637
1957    51.614590
1958    68.815936
1959    68.226579
1960    68.470837
1961    68.862480
1962    54.035234
1963    69.595735
1964    70.063105
1965    70.259881
1966    70.447526
1967    56.263629
1968    70.689081
1969    70.653896
1970    70.961141
1971    71.103976
1972    58.474481
1973    71.500338
1974    71.778504
1975    71.939218
1976    72.158050
1977    60.429090
1978    72.717567
1979    73.018717
1980    73.064524
1981    73.337399
1982    62.365871
1983    73.787778
1984    74.100741
1985    74.112222
1986    74.452222
1987    63.984860
1988    74.760000
1989    74.924444
1990    74.283437
1991    74.374848
1992    65.008443
1993    74.324545
1994    74.456667
1995    74.552727
1996    75.029394
1997    65.873799
1998    75.569697
1999    75.703636
2000    76.026364
2001    76.257879
2002    66.835695
2003    76.586667
2004    76.921563
2005  

In [45]:
df.groupby("Pais")["PIB"].mean().sort_values(ascending=False)[:20]

Pais
Kuwait                  65332.910472
Qatar                   60997.795481
Brunei                  54716.019545
Luxembourg              34439.387820
United Arab Emirates    33541.697014
New Caledonia           29859.301112
Switzerland             26733.025627
Norway                  25848.089348
United States           25494.615240
Germany                 24047.168802
Macao, China            22862.097579
Canada                  21782.593493
Netherlands             21295.802522
Denmark                 21159.164825
French Polynesia        20807.513248
Saudi Arabia            20261.743635
Iceland                 20030.522171
Bahamas                 19702.277569
Austria                 19585.143617
Sweden                  19574.051549
Name: PIB, dtype: float64

In [39]:
# Contagem de observações
print("{} observações de PIB".format(df["PIB"].count()))

3312 observações de PIB


In [40]:
# Média do PIB
print("PIB: ${:.2f}".format(df["PIB"].mean()))

PIB: $11317.12


In [41]:
# Soma do PIB
print("Soma PIB: ${:.2f}".format(df["PIB"].sum()))

Soma PIB: $37482287.55
