# 脚本介绍
下载全球实时碳数据的脚本

In [1]:
import requests

from bs4 import BeautifulSoup
from itertools import chain
import re
import pandas as pd
import datetime
from itertools import chain

import os
import shutil
import time
import random

from tqdm import tqdm

import plotly.graph_objects as go


def get_carbon_data(by='China'):
    """
    by: 输入对应的区域，具体的可以参考这个网站
    https://carbonmonitor.org.cn/user/data.php?by=WORLD
    :return 返回数据框
    """
    location_list = ['WORLD', 'China', 'India', 'US', 'EU27', 'Russia', 'Japan',
                     'Brazil', 'UK', 'France', 'Italy', 'Germany', 'Spain', 'ROW']

    if by not in location_list:
        raise ValueError(f"你输入的 by 参数应该在这个列表内: {' ,'.join(location_list)}")
    web = requests.get(url=f"https://carbonmonitor.org.cn/user/data.php?by={by}")

    soup = BeautifulSoup(web.content, 'lxml')

    target_str = soup.find_all(name='script', attrs={'type': 'text/javascript'})[-2].string

    all_year = re.findall(pattern='''\"name\"\:(\w+)''', string=target_str)
    all_value = re.findall(pattern='''\"data\"\:\[(.*?)\]''', string=target_str)

    # type_list = ['全国', '电力', '地面运输', '工业', '居民消费', '国内航空']
    type_list = re.findall(pattern='''text\:(.*?)\<br \/\>(.*?)''', string=target_str)
    type_list = [''.join(i).replace("'", "").replace('"', "").lstrip() for i in type_list]
    type_list = list(chain(*[[i] * len(set(all_year)) for i in type_list]))

    # print(type_list)

    def generate_pd(i):
        temp_data = pd.DataFrame({'value': [float(i) for i in all_value[i].split(',')]})
        # temp_data['year'] = int(all_year[i])
        temp_data['type'] = type_list[i]
        temp_data['date'] = [datetime.date(year=int(all_year[i]), month=1, day=1) + datetime.timedelta(days=index) for
                             index in range(0, temp_data.shape[0])]
        return temp_data

    allresult = pd.concat([generate_pd(i) for i in range(0, len(all_value))])
    allresult = allresult.pivot_table(index=['date'], columns=['type'])
    allresult.columns = [i[1] for i in allresult.columns.tolist()]
    allresult = allresult.reset_index()
    return allresult


# 使用脚本
get_carbon_data(by='WORLD')

Unnamed: 0,date,Domestic Aviation,Ground Transport,Industry,International Aviation,Power,Residential,全球
0,2019-01-01,0.84,14.16,22.78,1.48,36.38,17.80,95.00
1,2019-01-02,0.97,15.69,24.67,1.62,39.65,18.52,102.98
2,2019-01-03,1.01,17.35,25.22,1.67,40.63,18.38,106.13
3,2019-01-04,1.00,17.39,25.45,1.69,41.07,18.08,106.61
4,2019-01-05,0.92,16.18,25.03,1.74,40.14,17.16,102.98
...,...,...,...,...,...,...,...,...
1030,2021-10-27,0.90,18.69,26.02,1.06,33.91,8.45,90.87
1031,2021-10-28,0.93,18.42,26.82,1.12,34.81,8.42,92.37
1032,2021-10-29,0.96,18.70,26.39,1.16,34.20,8.44,91.75
1033,2021-10-30,0.84,17.30,25.93,1.20,33.33,8.31,88.85


## 批量下载数据

In [2]:

dir_name = "all_region_data"

if os.path.exists(path=dir_name):
    shutil.rmtree(path=dir_name)
    os.makedirs(name=dir_name)

else:
    os.makedirs(name=dir_name)

region_list = ['WORLD', 'China', 'India', 'US', 'EU27', 'Russia', 'Japan',
               'Brazil', 'UK', 'France', 'Italy', 'Germany', 'Spain', 'ROW']

region_chinese_list = ['全球', '中国', '印度', '美国', '欧洲（欧盟及英国）', '俄罗斯', '日本', '巴西', '英国', '法国', '意大利', '德国', '西班牙', '其它']

for temp_reion in tqdm(region_list):
    time.sleep(random.randint(0, 4))
    tempdata = get_carbon_data(by=temp_reion)
    tempdata.to_csv(f"{dir_name}/{temp_reion}.csv", index=False)



100%|██████████| 14/14 [00:34<00:00,  2.47s/it]


## 可视化

In [3]:
## 下载所有数据
def download_region(by):
    data = get_carbon_data(by=by)
    data['region_name'] = by
    return data


all_data_list = [download_region(by=i) for i in tqdm(region_list)]

100%|██████████| 14/14 [00:10<00:00,  1.33it/s]


In [4]:
set(chain(*[i.columns.tolist() for i in all_data_list]))

{'Domestic Aviation',
 'Ground Transport',
 'Industry',
 'International Aviation',
 'Power',
 'Residential',
 'date',
 'region_name',
 '中国',
 '俄罗斯',
 '全球',
 '其它',
 '印度',
 '巴西',
 '德国',
 '意大利',
 '日本',
 '欧洲（欧盟及英国）',
 '法国',
 '美国',
 '英国',
 '西班牙'}

### 对比各地区数据

In [5]:
# 'Residential'
fig = go.Figure()
for index in tqdm(range(len(region_chinese_list))):
    if region_chinese_list[index] == '中国':
        fig.add_trace(go.Scatter(x=all_data_list[index]['date'],
                                 y=all_data_list[index][region_chinese_list[index]],
                                 name=region_chinese_list[index], mode='lines+markers'))
    else:
        fig.add_trace(go.Scatter(x=all_data_list[index]['date'],
                                 y=all_data_list[index][region_chinese_list[index]],
                                 name=region_chinese_list[index], mode='lines'))
fig.update_layout(template='simple_white',
                  xaxis=dict(
                      rangeselector=dict(
                          buttons=list([
                              dict(count=1,
                                   label="1m",
                                   step="month",
                                   stepmode="backward"),
                              dict(count=6,
                                   label="6m",
                                   step="month",
                                   stepmode="backward"),
                              dict(count=1,
                                   label="YTD",
                                   step="year",
                                   stepmode="todate"),
                              dict(count=1,
                                   label="1y",
                                   step="year",
                                   stepmode="backward"),
                              dict(step="all")
                          ])
                      ),
                      rangeslider=dict(
                          visible=True
                      ),
                      type="date"
                  ),
                  title="全球各地区实时碳数据",
                  xaxis_title="date",
                  yaxis_title="MtCO_2 per day",
                  legend_title="地区",
                  font=dict(
                      family="Courier New, monospace",
                      size=18,
                      color="RebeccaPurple"
                  ))
# fig.show()
fig.write_html('全球碳数据.html')
# all_data_list[index]['date']
# all_data_list[index][region_chinese_list[index]]

100%|██████████| 14/14 [00:00<00:00, 89.64it/s]


### 各地区的各行业对比

In [6]:
def plot_hangye(hangye):
    """
    这个函数是从各行业的维度，查看各国的每天的碳排放。
    :param hangye:输入的内容是：'Domestic Aviation',
         'Ground Transport',
         'Industry',
         # 'International Aviation',
         'Power',
         'Residential','Industry'

    :return: none 返回的就是一个plotly图，并且将图都保存起来
    """
    hangye_list = ['Domestic Aviation', 'Industry',
                   'Ground Transport',
                   'Industry',
                   # 'International Aviation',
                   'Power',
                   'Residential']

    if hangye not in hangye_list:
        raise ValueError(f"输入的:{hangye} 不在列表里面")
    fig = go.Figure()
    for index in tqdm(range(len(region_chinese_list))):
        if region_chinese_list[index] == '中国':
            fig.add_trace(go.Scatter(x=all_data_list[index]['date'],
                                     y=all_data_list[index][hangye],
                                     name=region_chinese_list[index], mode='lines+markers'))
        else:
            fig.add_trace(go.Scatter(x=all_data_list[index]['date'],
                                     y=all_data_list[index][region_chinese_list[index]],
                                     name=region_chinese_list[index], mode='lines'))
    fig.update_layout(template='simple_white',
                      xaxis=dict(
                          rangeselector=dict(
                              buttons=list([
                                  dict(count=1,
                                       label="1m",
                                       step="month",
                                       stepmode="backward"),
                                  dict(count=6,
                                       label="6m",
                                       step="month",
                                       stepmode="backward"),
                                  dict(count=1,
                                       label="YTD",
                                       step="year",
                                       stepmode="todate"),
                                  dict(count=1,
                                       label="1y",
                                       step="year",
                                       stepmode="backward"),
                                  dict(step="all")
                              ])
                          ),
                          rangeslider=dict(
                              visible=True
                          ),
                          type="date"
                      ),
                      title=f"行业: {hangye} 实时碳数据",
                      xaxis_title="date",
                      yaxis_title="MtCO_2 per day",
                      legend_title="地区",
                      font=dict(
                          family="Courier New, monospace",
                          size=18,
                          color="RebeccaPurple"
                      ))
    # fig.show()
    fig.write_html(f'{hangye}.html')


plot_hangye(hangye='Industry')

100%|██████████| 14/14 [00:00<00:00, 88.86it/s]


In [7]:
hangye_list = ['Domestic Aviation', 'Industry',
               'Ground Transport',
               'Industry',
               # 'International Aviation',
               'Power',
               'Residential']

for i in hangye_list:
    plot_hangye(i)

100%|██████████| 14/14 [00:00<00:00, 95.39it/s]
100%|██████████| 14/14 [00:00<00:00, 87.26it/s]
100%|██████████| 14/14 [00:00<00:00, 81.88it/s]
100%|██████████| 14/14 [00:00<00:00, 73.57it/s]
100%|██████████| 14/14 [00:00<00:00, 79.81it/s]
100%|██████████| 14/14 [00:00<00:00, 77.42it/s]
