# 企业洞察

- 近 10 年全球企业 OpenRank 演变图
  - 绘制 Bump Charts
- 近 10 年中国企业 OpenRank 演变图
  - 绘制 Bump Charts
- 中国企业在 GitHub/Gitee 平台上的 OpenRank 占比
  - 我不太会，交给志成


In [13]:
import sys
from typing import List



import requests
from pydantic import BaseModel


class OpenRankEntryItem(BaseModel):
    name: str


class OpenRankEntry(BaseModel):
    item: OpenRankEntryItem
    rank: int
    value: float
    rankDelta: int
    valueDelta: float


class OpenRank(BaseModel):
    type: str
    time: int
    data: List[OpenRankEntry]


# 根据实践 xlab-open-source.oss-cn-beijing 只有 2015 开始的数据
year_range = range(2015, 2024)

chinese_list: List[OpenRank] = []

for year in year_range:
    region = "chinese"  # chinese / global
    url = f"https://xlab-open-source.oss-cn-beijing.aliyuncs.com/open_leaderboard/open_rank/company/{region}/{year}.json"
    try:
        response = requests.get(url)
    except Exception as e:
        print(f"Error: Failed to retrieve data from the URL. {e}")
        sys.exit()

    if response.status_code == 200:
        raw_dict = response.json()
        chinese_list.append(OpenRank.model_validate(raw_dict))
    else:
        chinese_list.append(None)

[OpenRank(type='Company_China_Year', time=2015, data=[OpenRankEntry(item=OpenRankEntryItem(name='Baidu'), rank=1, value=2880.08, rankDelta=0, valueDelta=2880.08), OpenRankEntry(item=OpenRankEntryItem(name='Alibaba'), rank=2, value=2017.48, rankDelta=0, valueDelta=2017.48), OpenRankEntry(item=OpenRankEntryItem(name='Ant group'), rank=3, value=361.47, rankDelta=0, valueDelta=361.47), OpenRankEntry(item=OpenRankEntryItem(name='Linux China'), rank=4, value=352.44, rankDelta=0, valueDelta=352.44), OpenRankEntry(item=OpenRankEntryItem(name='Bilibili'), rank=5, value=348.37, rankDelta=0, valueDelta=348.37), OpenRankEntry(item=OpenRankEntryItem(name='Xiaomi'), rank=6, value=256.01, rankDelta=0, valueDelta=256.01), OpenRankEntry(item=OpenRankEntryItem(name='Netease'), rank=7, value=163.17, rankDelta=0, valueDelta=163.17), OpenRankEntry(item=OpenRankEntryItem(name='360'), rank=8, value=158.67, rankDelta=0, valueDelta=158.67), OpenRankEntry(item=OpenRankEntryItem(name='Meituan'), rank=9, value=12

In [23]:
chinese_list[0].model_dump()

{'type': 'Company_China_Year',
 'time': 2015,
 'data': [{'item': {'name': 'Baidu'},
   'rank': 1,
   'value': 2880.08,
   'rankDelta': 0,
   'valueDelta': 2880.08},
  {'item': {'name': 'Alibaba'},
   'rank': 2,
   'value': 2017.48,
   'rankDelta': 0,
   'valueDelta': 2017.48},
  {'item': {'name': 'Ant group'},
   'rank': 3,
   'value': 361.47,
   'rankDelta': 0,
   'valueDelta': 361.47},
  {'item': {'name': 'Linux China'},
   'rank': 4,
   'value': 352.44,
   'rankDelta': 0,
   'valueDelta': 352.44},
  {'item': {'name': 'Bilibili'},
   'rank': 5,
   'value': 348.37,
   'rankDelta': 0,
   'valueDelta': 348.37},
  {'item': {'name': 'Xiaomi'},
   'rank': 6,
   'value': 256.01,
   'rankDelta': 0,
   'valueDelta': 256.01},
  {'item': {'name': 'Netease'},
   'rank': 7,
   'value': 163.17,
   'rankDelta': 0,
   'valueDelta': 163.17},
  {'item': {'name': '360'},
   'rank': 8,
   'value': 158.67,
   'rankDelta': 0,
   'valueDelta': 158.67},
  {'item': {'name': 'Meituan'},
   'rank': 9,
   'valu

In [27]:
import pandas as pd


class BumpChartItem(BaseModel):
    date: str
    symbol: str
    price: float


chinese_bump_chart_list: List[BumpChartItem] = []

for year_entry in chinese_list:
    for entry in year_entry.data:
        chinese_bump_chart_list.append(
            BumpChartItem(
                date=str(year_entry.time),
                symbol=entry.item.name,
                price=entry.value,
            ).model_dump()  # 虽然看起来很奇怪，但是没办法。
        )


chinese_bump_chart_df = pd.DataFrame(chinese_bump_chart_list)

chinese_bump_chart_df


Unnamed: 0,date,symbol,price
0,2015,Baidu,2880.08
1,2015,Alibaba,2017.48
2,2015,Ant group,361.47
3,2015,Linux China,352.44
4,2015,Bilibili,348.37
...,...,...,...
435,2023,Qunar,5.76
436,2023,Vipshop,3.90
437,2023,Oushu,0.65
438,2023,GAAS,0.38


In [30]:
import altair as alt

alt.Chart(chinese_bump_chart_df).mark_line(point=True).encode(
    x=alt.X("date:O").timeUnit("yearmonth").title("date"),
    y="rank:O",
    color=alt.Color("symbol:N"),
).transform_window(
    rank="rank()", sort=[alt.SortField("price", order="descending")], groupby=["date"]
).properties(
    title="Bump Chart for Stock Prices",
    width=800,
    height=2000,
)


## 关于如何 python 绘制 bump chart

[Getting Title at 41:25](https://altair-viz.github.io/gallery/bump_chart.html)

用这个画图工具非常简单，测试一下：


In [17]:
import altair as alt
from vega_datasets import data
import pandas as pd

stocks = data.stocks()

stocks


Unnamed: 0,symbol,date,price
0,MSFT,2000-01-01,39.81
1,MSFT,2000-02-01,36.35
2,MSFT,2000-03-01,43.22
3,MSFT,2000-04-01,28.37
4,MSFT,2000-05-01,25.45
...,...,...,...
555,AAPL,2009-11-01,199.91
556,AAPL,2009-12-01,210.73
557,AAPL,2010-01-01,192.06
558,AAPL,2010-02-01,204.62


In [18]:
source = (
    stocks.groupby([pd.Grouper(key="date", freq="6M"), "symbol"]).mean().reset_index()
)

# 简而言之，需要一个数组，每个数组成员都是 {date symbol price}

source


Unnamed: 0,date,symbol,price
0,2000-01-31,AAPL,25.940000
1,2000-01-31,AMZN,64.560000
2,2000-01-31,IBM,100.520000
3,2000-01-31,MSFT,39.810000
4,2000-07-31,AAPL,27.703333
...,...,...,...
95,2010-07-31,AAPL,213.820000
96,2010-07-31,AMZN,123.610000
97,2010-07-31,GOOG,543.495000
98,2010-07-31,IBM,126.355000


In [31]:
alt.Chart(source).mark_line(point=True).encode(
    x=alt.X("date:O").timeUnit("yearmonth").title("date"),
    y="rank:O",
    color=alt.Color("symbol:N"),
).transform_window(
    rank="rank()", sort=[alt.SortField("price", order="descending")], groupby=["date"]
).properties(
    title="Bump Chart for Stock Prices",
    width=600,
    height=350,
)