In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# 设置中文字体支持
plt.rcParams['font.sans-serif'] = ['SimHei', 'Arial Unicode MS', 'DejaVu Sans']
plt.rcParams['axes.unicode_minus'] = False

# 读取数据
df = pd.read_csv('data/Pillbox_-_Archived_Data.csv')
print("数据读取完成，共", len(df), "条记录")
print(df.head())

In [None]:
# 1. 药物形状分布柱状图
plt.figure(figsize=(14, 8))

# 统计药物形状的分布
shape_counts = df['splshape_text'].value_counts().head(12)

# 创建柱状图
bars = plt.bar(range(len(shape_counts)), shape_counts.values, 
               color=plt.cm.Set3(np.linspace(0, 1, len(shape_counts))))

# 设置标题和标签
plt.title('药物形状分布 (前12种)', fontsize=18, fontweight='bold', pad=20)
plt.xlabel('药物形状', fontsize=14)
plt.ylabel('数量', fontsize=14)
plt.xticks(range(len(shape_counts)), list(shape_counts.index), rotation=45, ha='right')

# 添加数值标签
for i, bar in enumerate(bars):
    height = float(bar.get_height())
    plt.text(bar.get_x() + bar.get_width()/2., height + height*0.01,
             f'{int(height):,}', ha='center', va='bottom', fontweight='bold')

# 添加网格线
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.show()