# 企业洞察

- 近 10 年全球企业 OpenRank 演变图
  - 绘制 Bump Charts
- 近 10 年中国企业 OpenRank 演变图
  - 绘制 Bump Charts
- 中国企业在 GitHub/Gitee 平台上的 OpenRank 占比
  - 我不太会，交给志成


In [2]:
import sys
from typing import List
import time

import requests
from pydantic import BaseModel


class OpenRankEntryItem(BaseModel):
    name: str


class OpenRankEntry(BaseModel):
    item: OpenRankEntryItem
    rank: int
    value: float
    rankDelta: int
    valueDelta: float


class OpenRank(BaseModel):
    type: str
    time: int
    data: List[OpenRankEntry]


# 根据实践 xlab-open-source.oss-cn-beijing 只有 2015 开始的数据
year_range = range(2015, 2024)

chinese_list: List[OpenRank] = []

for year in year_range:
    region = "chinese"  # chinese / global
    url = f"http://xlab-open-source.oss-cn-beijing.aliyuncs.com/open_leaderboard/open_rank/company/{region}/{year}.json"

    try:
        response = requests.get(
            url,
            headers={
                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0"
            },
        )
        response.raise_for_status()

    except Exception as e:
        print(f"Error fetching data for year {year}: {e}")
        sys.exit()

    if response.status_code == 200:
        raw_dict = response.json()
        chinese_list.append(OpenRank.model_validate(raw_dict))
    else:
        chinese_list.append(None)

    time.sleep(0.5)

# TODO 做一下数据缓存

In [3]:
chinese_list[0].data[0].model_dump()

{'item': {'name': 'Baidu'},
 'rank': 1,
 'value': 2877.23,
 'rankDelta': 0,
 'valueDelta': 2877.23}

In [4]:
it = chinese_list[-1].data[:15]
it = map(lambda x: x.item.name, it)
it = list(it)
# 选出最后一年排名前 15 的公司作为 finial_list
final_list = it
it

['Huawei',
 'Alibaba',
 'Baidu',
 'Ant group',
 'ByteDance',
 'Tencent',
 'PingCAP',
 'ESPRESSIF',
 'Fit2Cloud',
 'StarRocks',
 'Zilliz',
 'EMQ',
 'Deepin',
 'JD',
 'TAOS Data']

In [5]:
import pandas as pd


class BumpChartItem(BaseModel):
    date: str
    symbol: str
    price: float


chinese_bump_chart_list: List[BumpChartItem] = []

for year_entry in chinese_list:
    for entry in year_entry.data:
        # 过滤掉不在 final_list 中的项目
        if entry.item.name not in final_list:
            continue
        chinese_bump_chart_list.append(
            BumpChartItem(
                date=str(year_entry.time),
                symbol=entry.item.name,
                price=entry.value,
            ).model_dump()  # 虽然看起来很奇怪，但是没办法。
        )


chinese_bump_chart_df = pd.DataFrame(chinese_bump_chart_list)

chinese_bump_chart_df[:5]

Unnamed: 0,date,symbol,price
0,2015,Baidu,2877.23
1,2015,Alibaba,2015.52
2,2015,Ant group,360.34
3,2015,PingCAP,124.26
4,2015,Tencent,55.81


In [9]:
import altair as alt

color_scheme = alt.Scale(
    domain=final_list,
    range=[
        "#1f77b4",
        "#aec7e8",
        "#ff7f0e",
        "#ffbb78",
        "#2ca02c",
        "#98df8a",
        "#d62728",
        "#ff9896",
        "#9467bd",
        "#c5b0d5",
        "#8c564b",
        "#c49c94",
        "#e377c2",
        "#f7b6d2",
        "#7f7f7f",
    ],
)

alt.Chart(chinese_bump_chart_df).mark_line(
    point=True, radius=2, opacity=0.5, interpolate="monotone"
).encode(
    x=alt.X("date:O").timeUnit("year").title("date"),
    y="rank:O",
    color=alt.Color("symbol:N", scale=color_scheme),
).transform_window(
    rank="rank()", sort=[alt.SortField("price", order="descending")], groupby=["date"]
).properties(
    title="中国企业 2015~2023 OpenRank 排名变化",
    width=600,
    height=400,
).configure_view(
    # 设置图表的背景颜色
    strokeWidth=1,
    stroke="white",
).configure_axis(
    # 美化坐标轴
    grid=True,
    gridCap="round",
    gridColor="#e1e1e1",
    gridDash=[4, 2],  # 这会创建一个虚线
    # labelColor="#e1e1e1",
    tickColor="#e1e1e1",
    tickWidth=0,
    domainColor="#e1e1e1",
    domain=False,
    domainDash=[4, 2],
)

## 关于如何 python 绘制 bump chart

[Getting Title at 41:25](https://altair-viz.github.io/gallery/bump_chart.html)

用这个画图工具非常简单，测试一下：


In [45]:
import altair as alt
import pandas as pd
from vega_datasets import data

stocks = data.stocks()

stocks

Unnamed: 0,symbol,date,price
0,MSFT,2000-01-01,39.81
1,MSFT,2000-02-01,36.35
2,MSFT,2000-03-01,43.22
3,MSFT,2000-04-01,28.37
4,MSFT,2000-05-01,25.45
...,...,...,...
555,AAPL,2009-11-01,199.91
556,AAPL,2009-12-01,210.73
557,AAPL,2010-01-01,192.06
558,AAPL,2010-02-01,204.62


In [46]:
source = (
    stocks.groupby([pd.Grouper(key="date", freq="6M"), "symbol"]).mean().reset_index()
)

# 简而言之，需要一个数组，每个数组成员都是 {date symbol price}

source[:5]

Unnamed: 0,date,symbol,price
0,2000-01-31,AAPL,25.94
1,2000-01-31,AMZN,64.56
2,2000-01-31,IBM,100.52
3,2000-01-31,MSFT,39.81
4,2000-07-31,AAPL,27.703333


In [47]:
alt.Chart(source).mark_line(point=True).encode(
    x=alt.X("date:O").timeUnit("yearmonth").title("date"),
    y="rank:O",
    color=alt.Color("symbol:N"),
).transform_window(
    rank="rank()", sort=[alt.SortField("price", order="descending")], groupby=["date"]
).properties(
    title="Bump Chart for Stock Prices",
    width=600,
    height=350,
)