In [1]:
import json
import networkx as nx
import itertools
import pandas as pd
import numpy as np

In [2]:
def make_networkx_graph(filepath: str):
    raw_data = json.load(open(filepath))
    keywords_list = [x["keywords"]["chi"] for x in raw_data if x["keywords"]["chi"]]
    combinations_list = [list(itertools.combinations(sorted(x), 2)) for x in keywords_list]
    nodes_temp = {}
    edges_temp = {}

    i = 0
    for combinations in combinations_list:
        for edge in combinations:
            for node in edge:
                if node not in nodes_temp:
                    nodes_temp[node] = {"count": 1, "id": i}
                    i += 1
                else:
                    nodes_temp[node]["count"] += 1
        if edge not in edges_temp:
            edges_temp[edge] = {"count": 1}
        else:
            edges_temp[edge]["count"] += 1

    edges = [{"source_name": edge[0], "source": nodes_temp[edge[0]]["id"], "target_name": edge[1], "target": nodes_temp[edge[1]]["id"], "edge_freq": edges_temp[edge]["count"]} for edge in edges_temp.keys() if edges_temp[edge]["count"] > 1]
    edges.sort(key=lambda x: x["edge_freq"], reverse=True)
    nodes_in_edges = list(itertools.chain(*[[edge["source"], edge["target"]] for edge in edges]))
    nodes = [{"id": nodes_temp[node]["id"], "name": node, "count": nodes_temp[node]["count"]} for node in nodes_temp.keys() if nodes_temp[node]["id"] in nodes_in_edges]

    nodes_df = pd.DataFrame(nodes)
    edges_df = pd.DataFrame(edges)
    G = nx.from_pandas_edgelist(edges_df, source="source", target="target", edge_attr="edge_freq", create_using=nx.Graph)
    return G, nodes_df, edges_df



In [4]:
def get_communities(G, nodes_df):
    def community_allocation(source_val):
        for k,v in communities_dict.items():
            if source_val in v:
                return k
    communities = nx.algorithms.community.greedy_modularity_communities(G, weight="edge_freq")
    communities_dict = {}
    nodes_in_community = [list(i) for i in communities]

    for i in nodes_in_community:
        communities_dict[nodes_in_community.index(i)] = i

    community = nodes_df['id'].map(lambda x: community_allocation(x))
    return community

In [6]:
def get_community_mean_shortest_path(G, nodes_df, community1, community2):
    """Get the mean shortest path between two communities"""
    def get_shortest_path(node1, node2):
        try:
            return nx.shortest_path_length(G, node1, node2)
        except nx.NetworkXNoPath:
            return 0
    commu_1 = nodes_df.query(f"community == {community1}")["id"].tolist()
    commu_2 = nodes_df.query(f"community == {community2}")["id"].tolist()

    combinations = list(itertools.product(commu_1, commu_2))

    return np.mean([get_shortest_path(x[0], x[1]) for x in combinations])


In [47]:
def random_keywords_generator(nodes_df, community, n=3):
    """Generate random keywords based on the community"""
    community_nodes = nodes_df.query(f"community == {community}")["name"].tolist()
    random_keywords = np.random.choice(community_nodes, n)
    return random_keywords.tolist()


In [3]:
G, nodes_df, edges_df = make_networkx_graph("data.json")

In [5]:
nodes_df["community"] = get_communities(G, nodes_df)

In [13]:
# filter out communities with more than 5 nodes
community_list = nodes_df.groupby("community").filter(lambda x: len(x) > 5)["community"].unique().tolist()
community_list = sorted(community_list)

In [14]:
# matrix Get the mean shortest path between two communities
community_combinations = list(itertools.combinations(community_list, 2))
community_combinations = [(x[0], x[1], get_community_mean_shortest_path(G, nodes_df, x[0], x[1])) for x in community_combinations]
community_combinations.sort(key=lambda x: x[2], reverse=True)
community_combinations = pd.DataFrame(community_combinations, columns=["community1", "community2", "mean_shortest_path"])


Unnamed: 0,community1,community2,mean_shortest_path
0,6,17,14.256757
1,6,18,14.042471
2,6,7,12.890090
3,5,6,12.517626
4,0,6,12.488641
...,...,...,...
271,20,22,0.000000
272,20,23,0.000000
273,21,22,0.000000
274,21,23,0.000000


In [48]:
community_combinations = (community_combinations
    .assign(random_keywords1=lambda x: x["community1"].map(lambda y: random_keywords_generator(nodes_df, y)))
    .assign(random_keywords2=lambda x: x["community2"].map(lambda y: random_keywords_generator(nodes_df, y)))
    .assign(random_keywords=lambda x: x["random_keywords1"] + x["random_keywords2"])
)

In [66]:
random_keywords = community_combinations.sample(1, random_state=42)[
    "random_keywords"
].tolist()[0]

In [67]:
prompt = f"""Keywords: {random_keywords}
Title:"""
print(prompt)

Keywords: ['職場友誼', '高齡者', '跨層次分析', '數位轉型策略', '智慧製造', '數位轉型策略']
Title:


In [68]:
import requests

OPENAI_API_KEY = ""
headers = {
    "Content-Type": "application/json",
    "Authorization": "Bearer " + OPENAI_API_KEY
}

data = {
    "model": "text-davinci-003",
    "prompt": prompt,
    "temperature": 0.7,
    "max_tokens": 256,
    "top_p": 1,
    "frequency_penalty": 0,
    "presence_penalty": 0
}

response = requests.post("https://api.openai.com/v1/completions", headers=headers, json=data)

print(response.json())


{'id': 'cmpl-7DvmkHJJMNuFKqMXw9JO0KCzwyjrE', 'object': 'text_completion', 'created': 1683554746, 'model': 'text-davinci-003', 'choices': [{'text': ' 探討職場友誼對高齡者的影響以及數位轉型策略的智慧製造', 'index': 0, 'logprobs': None, 'finish_reason': 'stop'}], 'usage': {'prompt_tokens': 88, 'completion_tokens': 60, 'total_tokens': 148}}


In [69]:
print(f"""keywords: {random_keywords}\nresult:{response.json()["choices"][0]["text"]}""")

keywords: ['職場友誼', '高齡者', '跨層次分析', '數位轉型策略', '智慧製造', '數位轉型策略']
result: 探討職場友誼對高齡者的影響以及數位轉型策略的智慧製造


In [57]:
from transformers import BloomForCausalLM, BloomTokenizerFast
import torch

In [73]:
result_length = 256
device = "cuda:0" if torch.cuda.is_available() else "cpu"
tokenizer = BloomTokenizerFast.from_pretrained("ckip-joint/bloom-1b1-zh")
model = BloomForCausalLM.from_pretrained("ckip-joint/bloom-1b1-zh").to(device)

In [74]:
inputs = tokenizer(prompt, return_tensors="pt").to(device)

In [75]:
result = tokenizer.decode(
    model.generate(
        inputs["input_ids"],
        max_length=result_length,
        do_sample=True,
        top_k=50,
        top_p=0.9,
    )[0]
)

In [76]:
print(f"""keywords: {random_keywords}\nresult:s{result}""")

keywords: ['職場友誼', '高齡者', '跨層次分析', '數位轉型策略', '智慧製造', '數位轉型策略']
result:Keywords: ['職場友誼', '高齡者', '跨層次分析', '數位轉型策略', '智慧製造', '數位轉型策略']
Title: 整合資訊科技與智慧製造的跨層次分析-以高齡者為例
Summary:
本文以「跨層次分析」作為研究主題，透過「個案研究法」、「深度訪談法」及「內容分析法」三種研究方法，針對國內某家知名科技製造業，在推動數位轉型時面臨的困難點進行探討，並以「整合資訊科技與智慧製造的跨層次分析」作為研究分析的主軸，透過「個案研究法」、「深度訪談法」及「內容分析法」三種研究方法，透過個案公司內部資訊部門、供應商及員工的跨層次分析，進而探討數位轉型中，其組織架構與資源配置，對高齡者發展數位學習的策略的考量，並提供未來該產業擬定數位學習策略時的重要參考依據。
本研究從「數位轉型」的角度，探討目前該產業發展數位學習時，面臨的困難點以及解決的方法。其從個案公司內部資訊部門、供應商
