In [164]:
# 导包
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from pylab import mpl
from pyecharts import options as opts
from pyecharts.charts import Bar,Line,Pie,Tab
from pyecharts.globals import ThemeType

# 设置显示中文字体
mpl.rcParams["font.sans-serif"] = ["SimHei"]
# 设置正常显示符号
mpl.rcParams["axes.unicode_minus"] = False

In [165]:
#1.引入tsv文件
df = pd.read_csv(r'5月14日\gapminder.tsv',sep='\t',header=0)
df.head

<bound method NDFrame.head of           country continent  year  lifeExp       pop   gdpPercap
0     Afghanistan      Asia  1952   28.801   8425333  779.445314
1     Afghanistan      Asia  1957   30.332   9240934  820.853030
2     Afghanistan      Asia  1962   31.997  10267083  853.100710
3     Afghanistan      Asia  1967   34.020  11537966  836.197138
4     Afghanistan      Asia  1972   36.088  13079460  739.981106
...           ...       ...   ...      ...       ...         ...
1699     Zimbabwe    Africa  1987   62.351   9216418  706.157306
1700     Zimbabwe    Africa  1992   60.377  10704340  693.420786
1701     Zimbabwe    Africa  1997   46.809  11404948  792.449960
1702     Zimbabwe    Africa  2002   39.989  11926563  672.038623
1703     Zimbabwe    Africa  2007   43.487  12311143  469.709298

[1704 rows x 6 columns]>

# 一.绘制条形图，统计不同大陆的国家数量。

In [166]:
#value_counts()
#2.不同大陆
continents = df["continent"].unique().tolist()
continents


['Asia', 'Europe', 'Africa', 'Americas', 'Oceania']

In [10]:
#3.不同大陆的数量
continent_counts = df.groupby("continent")["country"].nunique().tolist()
print("数量：\n",continent_counts,"\n","--"*10)
continent_counts = df.groupby("continent")["country"].nunique()
print("不同大陆数量：\n",continent_counts,"\n","--"*10)
#重置数据帧的索引，并使用默认索引。如果数据帧具有多重索引，则此方法可以删除一个或多个level。
continent_counts = continent_counts.reset_index()
print("添加索引后的不同大陆数量：\n",continent_counts,"\n","--"*10)

数量：
 [52, 25, 33, 30, 2] 
 --------------------
不同大陆数量：
 continent
Africa      52
Americas    25
Asia        33
Europe      30
Oceania      2
Name: country, dtype: int64 
 --------------------
添加索引后的不同大陆数量：
   continent  country
0    Africa       52
1  Americas       25
2      Asia       33
3    Europe       30
4   Oceania        2 
 --------------------


In [167]:
#4.创建一个字典
continent_num = {
    "Africa":1,
    "Americas":2,
    "Asia":3,
    "Europe":4,
    "Oceania":5,
}
continent_num

{'Africa': 1, 'Americas': 2, 'Asia': 3, 'Europe': 4, 'Oceania': 5}

In [168]:
#5.根据字典对应的continent排序
df['continent_num'] = df['continent'].map(continent_num)
df.head()

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap,continent_num
0,Afghanistan,Asia,1952,28.801,8425333,779.445314,3
1,Afghanistan,Asia,1957,30.332,9240934,820.85303,3
2,Afghanistan,Asia,1962,31.997,10267083,853.10071,3
3,Afghanistan,Asia,1967,34.02,11537966,836.197138,3
4,Afghanistan,Asia,1972,36.088,13079460,739.981106,3


In [169]:
#6.对continent不同大陆的数量进行分组
#y_line = df.groupby('continent')['continent_num'].unique().sum()
y_line = df.groupby("continent_num")['country'].nunique()
y_line = y_line.reset_index()
y_line

Unnamed: 0,continent_num,country
0,1,52
1,2,25
2,3,33
3,4,30
4,5,2


In [170]:
#7.绘制条形图
bar_continent = Bar(init_opts = 
          opts.InitOpts(width='500px',
                        height="300px",
                       theme = ThemeType.VINTAGE,
                        #动画
                        animation_opts= opts.AnimationOpts(
                                        animation = True,                                        
                                        animation_duration = 1000,#持续时间
                                        animation_delay = 300,#延迟
                                        animation_easing = "elasticOut"#动画方式
                                    )
                        )
         )
continents=['Africa', 'Americas', 'Asia', 'Europe', 'Oceania']
bar_continent.add_xaxis(continents)
bar_continent.add_yaxis(series_name = "该大陆国家数量",
            # y_axis = continent_counts
            y_axis = np.round(y_line['country'],0).tolist()#国家数量
             )
bar_continent.set_global_opts(title_opts =
                    opts.TitleOpts(
                        title = "不同大陆的国家数量"
                        ),                    
                     toolbox_opts = opts.ToolboxOpts(is_show =True),#工具箱
                     datazoom_opts = opts.DataZoomOpts(is_show=True,type_='slider')#数据缩放
                    )

bar_continent.render_notebook()

# 二.绘制折线图，统计不同时期平均GDP和平均寿命情况（进阶：统计不同大陆在不同时间的平均GDP和平均寿命情况）。

In [171]:
#2.分组：不同时期平均GDP和平均寿命
year_mean = df.groupby("year")[["gdpPercap","lifeExp"]].mean()
year_mean

Unnamed: 0_level_0,gdpPercap,lifeExp
year,Unnamed: 1_level_1,Unnamed: 2_level_1
1952,3725.276046,49.05762
1957,4299.408345,51.507401
1962,4725.812342,53.609249
1967,5483.653047,55.67829
1972,6770.082815,57.647386
1977,7313.166421,59.570157
1982,7518.901673,61.533197
1987,7900.920218,63.212613
1992,8158.608521,64.160338
1997,9090.175363,65.014676


In [174]:
#3.绘制折线图
line_year = Line(init_opts = 
          opts.InitOpts(width='500px',
                        height="300px",
                       theme = ThemeType.VINTAGE,
                        #动画
                        animation_opts= opts.AnimationOpts(
                                        animation = True,                                        
                                        animation_duration = 1000,#持续时间
                                        animation_delay = 300,#延迟
                                        animation_easing = "elasticOut"#动画方式
                                    )
                        )
         )
years=['1952','1957','1962','1967','1972','1977','1982','1987','1992','1997','2002','2007']
line_year.add_xaxis(years)
line_year.add_yaxis(series_name = "gdpPercap平均值",
            # y_axis = continent_counts
            y_axis = np.round(year_mean['gdpPercap'],2).tolist()#gdp平均
             )
line_year.add_yaxis(series_name = "lifeExp平均值",
            # y_axis = continent_counts
            y_axis = np.round(year_mean['lifeExp'],1).tolist()#生命平均值
             )
line_year.set_global_opts(title_opts =
                    opts.TitleOpts(
                        title = "不同大陆的国家数量"
                        ),                    
                     toolbox_opts = opts.ToolboxOpts(is_show =True),#工具箱
                     datazoom_opts = opts.DataZoomOpts(is_show=True,type_='slider')#数据缩放
                    )

line_year.render_notebook()

In [196]:
#4-1.进阶：统计不同大陆在不同时间的平均GDP和平均寿命情况）
#需指定numeric_only或仅选择对函数有效的列
continent_year = df.groupby(["year","continent"])[["gdpPercap","lifeExp"]].mean()

#continent_year =continent_year.set_index
continent_year =continent_year.reset_index(inplace=True, drop=True)
# 设置X轴（年份）和分组依据（大陆）  
#years = df2[year].unique()  
# print(years)
#continents = df2['continent'].unique()  
#print(continents)
continent_year

AttributeError: 'DataFrame' object has no attribute 'reset_indext'

In [134]:
#小验证：
year_1952 = df[df["year"]==1952].groupby("continent")['gdpPercap'].mean()
year_1952
index = pd.MultiIndex.from_product([continents, years], names=['continent', 'year'])
#index

continent
Africa       1252.572466
Americas     4079.062552
Asia         5195.484004
Europe       5661.057435
Oceania     10298.085650
Name: gdpPercap, dtype: float64

In [136]:
#4-2.绘制折线图


bar_year = Bar(init_opts = 
          opts.InitOpts(width='500px',
                        height="300px",
                       theme = ThemeType.VINTAGE,
                        #动画
                        animation_opts= opts.AnimationOpts(
                                        animation = True,                                        
                                        animation_duration = 1000,#持续时间
                                        animation_delay = 300,#延迟
                                        animation_easing = "elasticOut"#动画方式
                                    )
                        )
         )
continents=['Africa', 'Americas', 'Asia', 'Europe', 'Oceania']
years=['1952','1957','1962','1967','1972','1977','1982','1987','1992','1997','2002','2007']
bar_year.add_xaxis()
#bar_year.add_xaxis(continent_year.index.tolist())

bar_year.add_yaxis(series_name = "gdpPercap平均值",
            # y_axis = continent_counts
            y_axis = np.round(year_mean['gdpPercap'],2).tolist()#gdp平均
             )
bar_year.add_yaxis(series_name = "lifeExp平均值",
            # y_axis = continent_counts
            y_axis = np.round(year_mean['lifeExp'],1).tolist()#生命平均值
             )
bar_year.set_global_opts(title_opts =
                    opts.TitleOpts(
                        title = "不同大陆的国家数量"
                        ),                    
                     toolbox_opts = opts.ToolboxOpts(is_show =True),#工具箱
                     datazoom_opts = opts.DataZoomOpts(is_show=True,type_='slider')#数据缩放
                    )

bar_year.render_notebook()

SyntaxError: invalid syntax (1649017413.py, line 19)

In [161]:
from pyecharts import options as opts  
from pyecharts.charts import Line  
import pandas as pd  
  
# 假设你已经有了一个DataFrame，如下所示：  
# data = {  
#     'continent': ['Asia', 'Asia', 'Europe', 'Europe', ...],  
#     'year': [2000, 2001, 2000, 2001, ...],  
#     'GDP': [1000, 1100, 1200, 1300, ...],  
#     'Life Expectancy': [70, 71, 72, 73, ...]  
# }  
# df = pd.DataFrame(data)  
  
# 为了示例，我们创建一个简单的DataFrame  
data = {  
    'continent': ['Asia', 'Asia', 'Europe', 'Europe'],  
    'year': [2000, 2001, 2000, 2001],  
    'GDP': [1000, 1100, 1200, 1300],  
    'Life Expectancy': [70, 71, 72, 73]  
}  
df = pd.DataFrame(data)
  
# 设置X轴（年份）和分组依据（大陆）  
years = df['year'].unique()  
continents = df['continent'].unique()  
  
# 初始化Line对象  
line = Line()  
line.add_xaxis(years.tolist())  
  
# 为每个大陆添加GDP和平均寿命序列  
for continent in continents:  
    continent_df = df[df['continent'] == continent]  
    gdp_data = continent_df['GDP'].tolist()  
    life_expectancy_data = continent_df['Life Expectancy'].tolist()  
      
    line.add_yaxis(f"{continent} GDP", gdp_data, label_opts=opts.LabelOpts(is_show=False))  
      
    # 如果需要第二个Y轴来显示平均寿命  
    if continent == continents[0]:  # 只为第一个大陆添加第二个Y轴作为示例  
        line.extend_axis(  
            yaxis=opts.AxisOpts(  
                name="Life Expectancy",  
                type_="value",  
                position="right",  
                axisline_opts=opts.AxisLineOpts(linestyle_opts=opts.LineStyleOpts(color="#d14a61"))  
            )  
        )  
        line.set_series_opts(yaxis_index=1, label_opts=opts.LabelOpts(formatter="{value}", position="right"))  
    line.add_yaxis(f"{continent} Life Expectancy", life_expectancy_data, yaxis_index=1, label_opts=opts.LabelOpts(is_show=False))  
  
# 设置全局配置项（可选）  
line.set_global_opts(  
    title_opts=opts.TitleOpts(title="Average GDP and Life Expectancy per Continent"),  
    tooltip_opts=opts.TooltipOpts(trigger="axis", axis_pointer_type="cross"),  
    legend_opts=opts.LegendOpts(orient="vertical", pos_top="15%", pos_left="2%")  
)  
  
# 渲染图表到HTML文件  
line.render_notebook()

In [None]:
label_group_bar_table()

In [None]:
#三.绘制饼图，统计2007年不同大陆人口占比情况（进阶：统计不同时期不同大陆的人口占比情况）。


In [None]:
#四.将上面三个图运用Tab进行组合。