In [None]:
import pandas as pd
import networkx as nx
import itertools

df = pd.read_excel(r'../new_agriculture_data.xlsx')

inventors = []
for i, row in df.iterrows():
    names = row["[标]原始申请(专利权)人"].split(" | ")
    inventors.append(names)

G = nx.Graph()

# 计数发明人对的合作次数
collaboration_counts = {}

# 统计每对发明人的合作次数
for i in inventors:
    edges = itertools.combinations(i, 2)
    for edge in edges:
        sorted_edge = tuple(sorted(edge))
        if sorted_edge in collaboration_counts:
            collaboration_counts[sorted_edge] += 1
        else:
            collaboration_counts[sorted_edge] = 1

# 只添加合作次数大于2的发明人对
for edge, count in collaboration_counts.items():
    if count > 0:
        G.add_edge(edge[0], edge[1], weight=count)

# 生成带权邻接矩阵
adj_matrix = nx.to_pandas_adjacency(G, weight='weight')
adj_matrix.shape

In [None]:
adj_matrix.to_excel("matrix.xlsx")

In [None]:
inventors = []
patent_numbers = df['公开(公告)号'].tolist()  # 假设公开(公告)号的列名是 '公开(公告)号'
titles = df['标题'].tolist()  # 假设专利标题的列名是 '标题'

# 创建一个字典来存储每对发明人涉及的公开(公告)号及其对应标题
pair_patent_info = {}

for idx, row in df.iterrows():
    names = row["原始申请(专利权)人国家"].split(" | ")
    pub_number = row["公开(公告)号"]
    title = row["标题"]
    
    # 生成发明人对并记录公开(公告)号及标题
    edges = itertools.combinations(names, 2)
    for edge in edges:
        sorted_edge = tuple(sorted(edge))
        if sorted_edge in pair_patent_info:
            pair_patent_info[sorted_edge].append((pub_number, title))
        else:
            pair_patent_info[sorted_edge] = [(pub_number, title)]

# 计数发明人对的合作次数
collaboration_counts = {pair: len(pubs) for pair, pubs in pair_patent_info.items()}

# 将合作次数和公开(公告)号及标题信息转换为 DataFrame，并按合作次数排序
collaboration_df = (
    pd.DataFrame([
        {
            '专利权人': pair, 
            '合作次数': len(set(pub for pub, _ in pair_patent_info[pair])),
            '公开(公告)号': list(set(pub for pub, _ in pair_patent_info[pair])),  # 单独列出公开(公告)号
            '标题': [title for _, title in pair_patent_info[pair]]  # 单独列出标题
        }
        for pair, count in collaboration_counts.items()
    ])
    .sort_values(by='合作次数', ascending=False)
    .head(20)  # 获取合作次数最多的前10对
)

# 输出合作次数最多的专利权人对及其合作的公开(公告)号和标题
collaboration_df