# 脚本介绍
下载全球实时碳数据的脚本

In [1]:
import requests

from bs4 import BeautifulSoup
from itertools import chain
import re
import pandas as pd
import datetime


def get_carbon_data(by='China'):
    """
    by: 输入对应的区域，具体的可以参考这个网站
    https://carbonmonitor.org.cn/user/data.php?by=WORLD
    :return 返回数据框
    """
    location_list = ['WORLD', 'China', 'India', 'US', 'EU27', 'Russia', 'Japan',
                     'Brazil', 'UK', 'France', 'Italy', 'Germany', 'Spain', 'ROW']

    if by not in location_list:
        raise ValueError(f"你输入的 by 参数应该在这个列表内: {' ,'.join(location_list)}")
    web = requests.get(url=f"https://carbonmonitor.org.cn/user/data.php?by={by}")

    soup = BeautifulSoup(web.content, 'lxml')

    target_str = soup.find_all(name='script', attrs={'type': 'text/javascript'})[-2].string

    all_year = re.findall(pattern='''\"name\"\:(\w+)''', string=target_str)
    all_value = re.findall(pattern='''\"data\"\:\[(.*?)\]''', string=target_str)

    # type_list = ['全国', '电力', '地面运输', '工业', '居民消费', '国内航空']
    type_list = re.findall(pattern='''text\:(.*?)\<br \/\>(.*?)''', string=target_str)
    type_list = [''.join(i).replace("'", "").replace('"', "").lstrip() for i in type_list]
    type_list = list(chain(*[[i] * len(set(all_year)) for i in type_list]))
    # print(type_list)

    def generate_pd(i):
        temp_data = pd.DataFrame({'value': [float(i) for i in all_value[i].split(',')]})
        # temp_data['year'] = int(all_year[i])
        temp_data['type'] = type_list[i]
        temp_data['date'] = [datetime.date(year=int(all_year[i]), month=1, day=1) + datetime.timedelta(days=index) for
                             index in range(0, temp_data.shape[0])]
        return temp_data

    allresult = pd.concat([generate_pd(i) for i in range(0, len(all_value))])
    allresult = allresult.pivot_table(index=['date'], columns=['type'])
    allresult.columns = [i[1] for i in allresult.columns.tolist()]
    allresult = allresult.reset_index()
    return allresult



# 使用脚本
get_carbon_data(by='WORLD')

Unnamed: 0,date,Domestic Aviation,Ground Transport,Industry,International Aviation,Power,Residential,全球
0,2019-01-01,0.84,14.16,22.78,1.48,36.38,17.80,95.00
1,2019-01-02,0.97,15.69,24.67,1.62,39.65,18.52,102.98
2,2019-01-03,1.01,17.35,25.22,1.67,40.63,18.38,106.13
3,2019-01-04,1.00,17.39,25.45,1.69,41.07,18.08,106.61
4,2019-01-05,0.92,16.18,25.03,1.74,40.14,17.16,102.98
...,...,...,...,...,...,...,...,...
1030,2021-10-27,0.90,18.69,26.02,1.06,33.91,8.45,90.87
1031,2021-10-28,0.93,18.42,26.82,1.12,34.81,8.42,92.37
1032,2021-10-29,0.96,18.70,26.39,1.16,34.20,8.44,91.75
1033,2021-10-30,0.84,17.30,25.93,1.20,33.33,8.31,88.85


## 批量下载数据

In [4]:
import os
import shutil
import time
import random

from tqdm import tqdm
dir_name = "all_region_data"


if os.path.exists(path=dir_name):
    shutil.rmtree(path=dir_name)
    os.makedirs(name=dir_name)

else:
    os.makedirs(name=dir_name)

region_list = ['WORLD', 'China', 'India', 'US', 'EU27', 'Russia', 'Japan',
                     'Brazil', 'UK', 'France', 'Italy', 'Germany', 'Spain', 'ROW']


for temp_reion in tqdm(region_list):
    time.sleep(random.random(0, 4))
    tempdata = get_carbon_data(by=temp_reion)
    tempdata.to_csv(f"{dir_name}/{temp_reion}.csv", index=False)



100%|██████████| 14/14 [00:09<00:00,  1.43it/s]
