<div class="alert alert-block alert-success">
    <h1>
        Example notebook - Supply chain
    </h1>
    <p>
        Link to dataset : <a href="https://eto.tech/dataset-docs/chipexplorer/">dataset documentation link</a>
    </p>
</div>

# Import modules and functions

In [1]:
import os
import pandas as pd
import glob
import re
import numpy as np

from turingdb_examples.graph import (
    create_graph_from_df,
    build_create_command_from_networkx,
)
from turingdb_examples.llm import natural_language_to_cypher
from turingdb_examples.utils import get_return_statements, escape_for_cypher

# Check data files are available

In [2]:
example_name = "supply_chain_eto-chip-explorer"
path_data = f"{os.getcwd()}/data/{example_name}"
if not os.path.exists(path_data):
    raise ValueError(f"{path_data} does not exists")

list_csv_files = sorted(os.listdir(path_data))
if not list_csv_files == [
    "inputs.csv",
    "providers.csv",
    "provision.csv",
    "sequence.csv",
    "stages.csv",
]:
    raise ValueError(
        f"At least one of the {len(list_csv_files)} csv files is not available in {path_data}"
    )

# Import and format data

In [3]:
dict_df = {}

for path_csv in glob.glob(f"{path_data}/*.csv"):
    print(100 * "-")
    key_name = re.sub(".csv", "", os.path.basename(path_csv))
    dict_df[key_name] = pd.read_csv(path_csv)

    print(f"--- {key_name}")

    display(dict_df[key_name])

print(100 * "-")

----------------------------------------------------------------------------------------------------
--- providers


Unnamed: 0,provider_name,alias,provider_id,provider_type,country
0,USA,United States,P1,country,
1,CHN,China,P2,country,
2,KOR,South Korea,P4,country,
3,Various countries,,P5,country,
4,JPN,Japan,P7,country,
...,...,...,...,...,...
392,Shenzhen Naso Tech Co.,,P407,organization,CHN
393,Intevac,,P408,organization,USA
394,Palomar Technologies,,P409,organization,USA
395,PacTech,,P410,organization,JPN


----------------------------------------------------------------------------------------------------
--- inputs


Unnamed: 0,input_id,input_name,type,stage_name,stage_id,description,year,market_share_chart_global_market_size_info,market_share_chart_caption,market_share_chart_source
0,N0,Chip design,process,Design,S1,"Chip design involves specification, logic desi...",2022.0,$574.1 billion (2022),,Worldwide semiconductor sales. [World Semicond...
1,N6,Logic chip design,design_resource,,,Logic chip design is the design of integrated ...,2022.0,$255.7 billion (2022),,Worldwide semiconductor sales for logic and mi...
2,N1,Advanced CPUs,design_resource,,,"Central processing units (""CPUs"") are the domi...",2019.0,$56.2 billion (microprocessors) (2019),,[CSET](https://cset.georgetown.edu/publication...
3,N2,Discrete GPUs,design_resource,,,"Discrete graphics processing units (""GPUs"") ha...",2019.0,$11.9 billion (2019),,[CSET](https://cset.georgetown.edu/publication...
4,N3,FPGAs,design_resource,,,"Field-programmable gate arrays (""FPGAs""), unli...",2019.0,$5.7 billion (2019),,[CSET](https://cset.georgetown.edu/publication...
...,...,...,...,...,...,...,...,...,...,...
121,N126,Process-specific fabrication materials,material_resource,,,Process-specific fabrication materials are hig...,,,,
122,N127,Lithography tools (adv. pkg.),tool_resource,,,These tools are lithography tools designed and...,2024.0,$275.6 million (2024),,CSET analysis of TechInsights data (2024). Dat...
123,N128,Deposition tools (adv. pkg.),tool_resource,,,These tools are deposition tools designed and ...,2024.0,$2.6 billion (2024),,CSET analysis of TechInsights data (2024). Dat...
124,N129,Etch and clean tools (adv. pkg.),tool_resource,,,These tools are etch and clean tools designed ...,2024.0,$1.2 billion (2024),,CSET analysis of TechInsights data (2024). Dat...


----------------------------------------------------------------------------------------------------
--- stages


Unnamed: 0,stage_id,stage_name,description,market_share_chart_global_market_size_info,market_share_chart_caption,market_share_chart_source
0,S1,Design,"Semiconductor design involves specification, d...",$574 billion (2022),Chart shows market shares for the overall glob...,[Semiconductor Industry Association (SIA)](htt...
1,S2,Fabrication,Fabrication turns designs into chips. Semicond...,,Chart shows share of global fabrication capaci...,[Semiconductor Industry Association (SIA)](htt...
2,S3,"Assembly, testing, and packaging (ATP)","At the end of the fabrication process, the fin...",$95 billion (2022),Chart shows ATP site capacity by country.,[Semiconductor Industry Association (SIA)](htt...


----------------------------------------------------------------------------------------------------
--- provision


Unnamed: 0,provider_name,provider_id,provided_name,provided_id,share_provided,year,source
0,ACM Research,P313,Wet etching and cleaning tools,N49,11.4,2024,CSET analysis of TechInsights data (2024). Dat...
1,ACM Research,P313,Etch and clean tools,N55,3.0,2024,CSET analysis of TechInsights data (2024). Dat...
2,AMEC,P123,Dry etching tools (adv. pkg.),N101,18.1,2024,CSET analysis of TechInsights data (2024). Dat...
3,AMEC,P123,Dry etch tools,N103,5.5,2024,CSET analysis of TechInsights data (2024). Dat...
4,AMEC,P123,Fabrication tools (for advanced packaging),N109,2.5,2024,CSET analysis of TechInsights data (2024). Dat...
...,...,...,...,...,...,...,...
1300,Zhonghuan,P231,Wafer,N26,,2019,[CSET](https://cset.georgetown.edu/publication...
1301,Zhongwei,P295,Ceramic packages,N95,,2019,
1302,Zhuhai Yueya,P301,Substrates,N96,,2019,
1303,ZingSEMI,P237,Wafer,N26,,2019,[CSET](https://cset.georgetown.edu/publication...


----------------------------------------------------------------------------------------------------
--- sequence


Unnamed: 0,input_name,input_id,goes_into_name,goes_into_id,is_type_of_name,is_type_of_id
0,Crystal growing furnaces,N8,Wafer,N26,,
1,Crystal machining tools,N9,Wafer,N26,,
2,EUV lithography tools,N20,,,Lithography tools,N19
3,ArF dry (DUV) lithography tools,N21,,,Lithography tools,N19
4,ArF immersion (DUV) lithography tools,N22,,,Lithography tools,N19
...,...,...,...,...,...,...
134,Auto ball bonders for IC,N134,,,Wire bonding tools,N74
135,Auto ball bonders for non-IC,N135,,,Wire bonding tools,N74
136,Automatic wedge bonders,N136,,,Wire bonding tools,N74
137,Wafer level stud bonders,N137,,,Wire bonding tools,N74


----------------------------------------------------------------------------------------------------


## Format `df_provision` and `df_providers` dataframes

In [4]:
df_provision = dict_df["provision"].copy()
df_provision = df_provision.rename(
    columns={"year": "year_share_provided", "source": "source_provider_provided"}
)
df_provision

Unnamed: 0,provider_name,provider_id,provided_name,provided_id,share_provided,year_share_provided,source_provider_provided
0,ACM Research,P313,Wet etching and cleaning tools,N49,11.4,2024,CSET analysis of TechInsights data (2024). Dat...
1,ACM Research,P313,Etch and clean tools,N55,3.0,2024,CSET analysis of TechInsights data (2024). Dat...
2,AMEC,P123,Dry etching tools (adv. pkg.),N101,18.1,2024,CSET analysis of TechInsights data (2024). Dat...
3,AMEC,P123,Dry etch tools,N103,5.5,2024,CSET analysis of TechInsights data (2024). Dat...
4,AMEC,P123,Fabrication tools (for advanced packaging),N109,2.5,2024,CSET analysis of TechInsights data (2024). Dat...
...,...,...,...,...,...,...,...
1300,Zhonghuan,P231,Wafer,N26,,2019,[CSET](https://cset.georgetown.edu/publication...
1301,Zhongwei,P295,Ceramic packages,N95,,2019,
1302,Zhuhai Yueya,P301,Substrates,N96,,2019,
1303,ZingSEMI,P237,Wafer,N26,,2019,[CSET](https://cset.georgetown.edu/publication...


In [5]:
df_providers = dict_df["providers"].copy()
df_providers = df_providers.rename(
    columns={"alias": "provider_alias", "country": "provider_country"}
)
df_providers

Unnamed: 0,provider_name,provider_alias,provider_id,provider_type,provider_country
0,USA,United States,P1,country,
1,CHN,China,P2,country,
2,KOR,South Korea,P4,country,
3,Various countries,,P5,country,
4,JPN,Japan,P7,country,
...,...,...,...,...,...
392,Shenzhen Naso Tech Co.,,P407,organization,CHN
393,Intevac,,P408,organization,USA
394,Palomar Technologies,,P409,organization,USA
395,PacTech,,P410,organization,JPN


In [6]:
df_provision = pd.merge(
    df_provision, df_providers, on="provider_id", how="left", suffixes=("", "_y")
)
df_provision = df_provision.drop(df_provision.filter(regex="_y$").columns, axis=1)
df_provision = df_provision[
    [
        "provided_name",
        "provided_id",
        "provider_name",
        "provider_id",
        "provider_alias",
        "provider_type",
        "provider_country",
        "share_provided",
        "year_share_provided",
        "source_provider_provided",
    ]
]
df_provision

Unnamed: 0,provided_name,provided_id,provider_name,provider_id,provider_alias,provider_type,provider_country,share_provided,year_share_provided,source_provider_provided
0,Wet etching and cleaning tools,N49,ACM Research,P313,,organization,USA,11.4,2024,CSET analysis of TechInsights data (2024). Dat...
1,Etch and clean tools,N55,ACM Research,P313,,organization,USA,3.0,2024,CSET analysis of TechInsights data (2024). Dat...
2,Dry etching tools (adv. pkg.),N101,AMEC,P123,Advanced Micro-Fabrication Equipment Inc. China,organization,CHN,18.1,2024,CSET analysis of TechInsights data (2024). Dat...
3,Dry etch tools,N103,AMEC,P123,Advanced Micro-Fabrication Equipment Inc. China,organization,CHN,5.5,2024,CSET analysis of TechInsights data (2024). Dat...
4,Fabrication tools (for advanced packaging),N109,AMEC,P123,Advanced Micro-Fabrication Equipment Inc. China,organization,CHN,2.5,2024,CSET analysis of TechInsights data (2024). Dat...
...,...,...,...,...,...,...,...,...,...,...
1328,Wafer,N26,Zhonghuan,P231,,organization,CHN,,2019,[CSET](https://cset.georgetown.edu/publication...
1329,Ceramic packages,N95,Zhongwei,P295,,organization,CHN,,2019,
1330,Substrates,N96,Zhuhai Yueya,P301,,organization,CHN,,2019,
1331,Wafer,N26,ZingSEMI,P237,,organization,CHN,,2019,[CSET](https://cset.georgetown.edu/publication...


## Format `df_stages` dataframe

In [7]:
# Apply to your dataframe
df_inputs = dict_df["inputs"].copy()
df_inputs = df_inputs.dropna(axis=1, how="all")

# Clean dataframe before creating the graph
for col in df_inputs.select_dtypes(include=["object"]).columns:
    df_inputs[col] = df_inputs[col].apply(escape_for_cypher)

df_inputs

Unnamed: 0,input_id,input_name,type,stage_name,stage_id,description,year,market_share_chart_global_market_size_info,market_share_chart_source
0,N0,Chip design,process,Design,S1,"Chip design involves specification, logic desi...",2022.0,$574.1 billion (2022),Worldwide semiconductor sales. [World Semicond...
1,N6,Logic chip design,design_resource,,,Logic chip design is the design of integrated ...,2022.0,$255.7 billion (2022),Worldwide semiconductor sales for logic and mi...
2,N1,Advanced CPUs,design_resource,,,"Central processing units (\""CPUs\"") are the do...",2019.0,$56.2 billion (microprocessors) (2019),[CSET](https://cset.georgetown.edu/publication...
3,N2,Discrete GPUs,design_resource,,,"Discrete graphics processing units (\""GPUs\"") ...",2019.0,$11.9 billion (2019),[CSET](https://cset.georgetown.edu/publication...
4,N3,FPGAs,design_resource,,,"Field-programmable gate arrays (\""FPGAs\""), un...",2019.0,$5.7 billion (2019),[CSET](https://cset.georgetown.edu/publication...
...,...,...,...,...,...,...,...,...,...
121,N126,Process-specific fabrication materials,material_resource,,,Process-specific fabrication materials are hig...,,,
122,N127,Lithography tools (adv. pkg.),tool_resource,,,These tools are lithography tools designed and...,2024.0,$275.6 million (2024),CSET analysis of TechInsights data (2024). Dat...
123,N128,Deposition tools (adv. pkg.),tool_resource,,,These tools are deposition tools designed and ...,2024.0,$2.6 billion (2024),CSET analysis of TechInsights data (2024). Dat...
124,N129,Etch and clean tools (adv. pkg.),tool_resource,,,These tools are etch and clean tools designed ...,2024.0,$1.2 billion (2024),CSET analysis of TechInsights data (2024). Dat...


In [8]:
df_stages = dict_df["stages"].copy()
df_stages = df_stages.rename(columns={"description": "stage_description"})
df_stages = df_stages.drop(
    ["market_share_chart_global_market_size_info", "market_share_chart_source"], axis=1
)
df_stages

Unnamed: 0,stage_id,stage_name,stage_description,market_share_chart_caption
0,S1,Design,"Semiconductor design involves specification, d...",Chart shows market shares for the overall glob...
1,S2,Fabrication,Fabrication turns designs into chips. Semicond...,Chart shows share of global fabrication capaci...
2,S3,"Assembly, testing, and packaging (ATP)","At the end of the fabrication process, the fin...",Chart shows ATP site capacity by country.


## Format `df_inputs` dataframe

In [9]:
df_inputs = pd.merge(
    df_inputs, df_stages, on="stage_id", how="left", suffixes=("", "_y")
)
df_inputs = df_inputs.drop(df_inputs.filter(regex="_y$").columns, axis=1)

df_inputs = df_inputs[
    [
        "input_id",
        "input_name",
        "type",
        "description",
        "stage_name",
        "stage_id",
        "stage_description",
        "year",
        "market_share_chart_caption",
        "market_share_chart_global_market_size_info",
        "market_share_chart_source",
    ]
]

df_inputs

Unnamed: 0,input_id,input_name,type,description,stage_name,stage_id,stage_description,year,market_share_chart_caption,market_share_chart_global_market_size_info,market_share_chart_source
0,N0,Chip design,process,"Chip design involves specification, logic desi...",Design,S1,"Semiconductor design involves specification, d...",2022.0,Chart shows market shares for the overall glob...,$574.1 billion (2022),Worldwide semiconductor sales. [World Semicond...
1,N6,Logic chip design,design_resource,Logic chip design is the design of integrated ...,,,,2022.0,,$255.7 billion (2022),Worldwide semiconductor sales for logic and mi...
2,N1,Advanced CPUs,design_resource,"Central processing units (\""CPUs\"") are the do...",,,,2019.0,,$56.2 billion (microprocessors) (2019),[CSET](https://cset.georgetown.edu/publication...
3,N2,Discrete GPUs,design_resource,"Discrete graphics processing units (\""GPUs\"") ...",,,,2019.0,,$11.9 billion (2019),[CSET](https://cset.georgetown.edu/publication...
4,N3,FPGAs,design_resource,"Field-programmable gate arrays (\""FPGAs\""), un...",,,,2019.0,,$5.7 billion (2019),[CSET](https://cset.georgetown.edu/publication...
...,...,...,...,...,...,...,...,...,...,...,...
121,N126,Process-specific fabrication materials,material_resource,Process-specific fabrication materials are hig...,,,,,,,
122,N127,Lithography tools (adv. pkg.),tool_resource,These tools are lithography tools designed and...,,,,2024.0,,$275.6 million (2024),CSET analysis of TechInsights data (2024). Dat...
123,N128,Deposition tools (adv. pkg.),tool_resource,These tools are deposition tools designed and ...,,,,2024.0,,$2.6 billion (2024),CSET analysis of TechInsights data (2024). Dat...
124,N129,Etch and clean tools (adv. pkg.),tool_resource,These tools are etch and clean tools designed ...,,,,2024.0,,$1.2 billion (2024),CSET analysis of TechInsights data (2024). Dat...


In [10]:
df_inputs = pd.merge(
    df_inputs,
    df_provision,
    left_on="input_id",
    right_on="provided_id",
    how="left",
    suffixes=("", "_y"),
)
df_inputs = df_inputs.drop(["provided_name", "provided_id"], axis=1)
df_inputs

Unnamed: 0,input_id,input_name,type,description,stage_name,stage_id,stage_description,year,market_share_chart_caption,market_share_chart_global_market_size_info,market_share_chart_source,provider_name,provider_id,provider_alias,provider_type,provider_country,share_provided,year_share_provided,source_provider_provided
0,N0,Chip design,process,"Chip design involves specification, logic desi...",Design,S1,"Semiconductor design involves specification, d...",2022.0,Chart shows market shares for the overall glob...,$574.1 billion (2022),Worldwide semiconductor sales. [World Semicond...,CHN,P2,China,country,,5.0,2022.0,Worldwide semiconductor sales. [World Semicond...
1,N0,Chip design,process,"Chip design involves specification, logic desi...",Design,S1,"Semiconductor design involves specification, d...",2022.0,Chart shows market shares for the overall glob...,$574.1 billion (2022),Worldwide semiconductor sales. [World Semicond...,EUR,P312,Europe,country,EUR,9.0,2022.0,Worldwide semiconductor sales. [World Semicond...
2,N0,Chip design,process,"Chip design involves specification, logic desi...",Design,S1,"Semiconductor design involves specification, d...",2022.0,Chart shows market shares for the overall glob...,$574.1 billion (2022),Worldwide semiconductor sales. [World Semicond...,JPN,P7,Japan,country,,4.0,2022.0,Worldwide semiconductor sales. [World Semicond...
3,N0,Chip design,process,"Chip design involves specification, logic desi...",Design,S1,"Semiconductor design involves specification, d...",2022.0,Chart shows market shares for the overall glob...,$574.1 billion (2022),Worldwide semiconductor sales. [World Semicond...,KOR,P4,South Korea,country,,3.0,2022.0,Worldwide semiconductor sales. [World Semicond...
4,N0,Chip design,process,"Chip design involves specification, logic desi...",Design,S1,"Semiconductor design involves specification, d...",2022.0,Chart shows market shares for the overall glob...,$574.1 billion (2022),Worldwide semiconductor sales. [World Semicond...,TWN,P8,Taiwan,country,,11.0,2022.0,Worldwide semiconductor sales. [World Semicond...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1315,N129,Etch and clean tools (adv. pkg.),tool_resource,These tools are etch and clean tools designed ...,,,,2024.0,,$1.2 billion (2024),CSET analysis of TechInsights data (2024). Dat...,USA,P1,United States,country,,56.4,2024.0,CSET analysis of TechInsights data (2024). Dat...
1316,N129,Etch and clean tools (adv. pkg.),tool_resource,These tools are etch and clean tools designed ...,,,,2024.0,,$1.2 billion (2024),CSET analysis of TechInsights data (2024). Dat...,Various companies,P370,,organization,Various countries,0.8,2024.0,CSET analysis of TechInsights data (2024). Dat...
1317,N129,Etch and clean tools (adv. pkg.),tool_resource,These tools are etch and clean tools designed ...,,,,2024.0,,$1.2 billion (2024),CSET analysis of TechInsights data (2024). Dat...,Veeco,P121,,organization,USA,3.9,2024.0,CSET analysis of TechInsights data (2024). Dat...
1318,N130,Direct write systems (adv. pkg.),tool_resource,These tools are direct write systems designed ...,,,,2024.0,,$48.1 million (2024),CSET analysis of TechInsights data (2024). Dat...,Applied Materials,P81,,organization,USA,100.0,2024.0,CSET analysis of TechInsights data (2024). Dat...


## Format `df_sequence` dataframe

In [11]:
df_sequence = dict_df["sequence"].copy()
df_sequence["output_name"] = df_sequence["goes_into_name"].combine_first(
    df_sequence["is_type_of_name"]
)
df_sequence["output_id"] = df_sequence["goes_into_id"].combine_first(
    df_sequence["is_type_of_id"]
)
df_sequence["type_link"] = np.where(
    df_sequence["goes_into_id"].notna(), "goes_into_id", "is_type_of_id"
)
df_sequence["type_link"] = df_sequence["type_link"].str.replace("_id", "")
df_sequence = df_sequence.drop(
    ["goes_into_name", "goes_into_id", "is_type_of_name", "is_type_of_id"], axis=1
)
df_sequence

Unnamed: 0,input_name,input_id,output_name,output_id,type_link
0,Crystal growing furnaces,N8,Wafer,N26,goes_into
1,Crystal machining tools,N9,Wafer,N26,goes_into
2,EUV lithography tools,N20,Lithography tools,N19,is_type_of
3,ArF dry (DUV) lithography tools,N21,Lithography tools,N19,is_type_of
4,ArF immersion (DUV) lithography tools,N22,Lithography tools,N19,is_type_of
...,...,...,...,...,...
134,Auto ball bonders for IC,N134,Wire bonding tools,N74,is_type_of
135,Auto ball bonders for non-IC,N135,Wire bonding tools,N74,is_type_of
136,Automatic wedge bonders,N136,Wire bonding tools,N74,is_type_of
137,Wafer level stud bonders,N137,Wire bonding tools,N74,is_type_of


# Create graph from dataframe

In [12]:
G = create_graph_from_df(
    df_sequence,
    source_node_col={"id": "input_id", "displayName": "input_name"},
    target_node_col={"id": "output_id", "displayName": "output_name"},
    node_attributes_df=df_inputs,
    node_attributes_key_col="input_id",
    edge_col="type_link",
    edge_col_label="type",
)
print(f"Resulting graph : {G}")

Resulting graph : DiGraph with 126 nodes and 139 edges


In [13]:
n_first = 5

print("NODES :")
for node in list(G.nodes(data=True))[:n_first]:
    print(node[0])
    for key, val in node[1].items():
        print(f"   {key} : {val}")

    print()
print()

print("EDGES :")
for edge in list(G.edges(data=True))[:n_first]:
    print(edge)

NODES :
N8
   displayName : Crystal growing furnaces
   input_name : Crystal growing furnaces
   type : tool_resource
   description : Crystal growing furnaces and machining tools are necessary to produce all wafers-thin, disc-shaped materials fabs used to produce chips. The furnace forms a cylindrical ingot of silicon from polycrystalline raw silicon; machining equipment then cuts the ingot into wafers used for chip fabrication. These tools have relatively low value and complexity relative to other semiconductor manufacturing equipment. Japan, Germany, and Switzerland are the main producers.
   stage_name : 
   stage_id : 
   stage_description : nan
   year : 2022.0
   market_share_chart_caption : nan
   market_share_chart_global_market_size_info : $44 million (2022)
   market_share_chart_source : CSET analysis of TechInsights data (2022). Data used for analysis were published by TechInsights on May 5, 2025.
   provider_name : DEU
   provider_id : P32
   provider_alias : Germany
   pr

# Create graph using `turingdb` python package

<div class="alert alert-block alert-info">
    <h2>
        See <a href="https://docs.turingdb.ai/quickstart">TuringDB Get started documentation</a> for the important steps to follow :
    </h2>
    <h4>
        <ul>
            <li>Create your TuringDB account</li>
            <li>Create your instance in the <a href="https://console.turingdb.ai/auth">TuringDB Cloud UI</a></li>
            <li>Copy your Instance ID from the Database Instances management page</li>
            <li>Get API Key from the Settings in UI</li>
        </ul>
        Remember to have your instance active while working in this notebook !
    </h4>
</div>

In [14]:
from turingdb import TuringDB

# Create TuringDB client
client = TuringDB(
    host="http://localhost:6666"  # Remove this parameter and set the two parameters below
    # instance_id="...",  # Replace by your instance id
    # auth_token="...",  # Replace by your API token
)

In [15]:
# Get list of available graphs
list_graphs = client.query("LIST GRAPH").loc[:, 0].tolist()

In [16]:
# Set graph name
graph_name_prefix = example_name
graph_name_nb_suffix = str(
    max(
        [
            int(re.sub(graph_name_prefix, "", g))
            for g in list_graphs
            if g.startswith(graph_name_prefix)
            and re.sub(graph_name_prefix, "", g).isdigit()
        ]
        + [0]
    )
    + 1
)
graph_name = graph_name_prefix + graph_name_nb_suffix
graph_name = re.sub("-", "_", graph_name)
graph_name

'supply_chain_eto_chip_explorer1'

In [17]:
%%time

# Create a new graph
client.query(f"CREATE GRAPH {graph_name}")
client.set_graph(graph_name)

# Create a new change on the graph
change = client.query("CHANGE NEW").loc[0, 0]

# Checkout into the change
client.checkout(change=change)

CPU times: user 1.47 ms, sys: 1.96 ms, total: 3.44 ms
Wall time: 2.39 ms


In [18]:
# Build CREATE command from networkx object
create_command = build_create_command_from_networkx(G)
print(f"Cypher CREATE command :\n\n{100 * '*'}\n{create_command}\n{100 * '*'}")

Cypher CREATE command :

****************************************************************************************************
CREATE (n0:ToolResource {"id":"N8", "displayName":"Crystal growing furnaces", "input_name":"Crystal growing furnaces", "type":"tool_resource", "description":"Crystal growing furnaces and machining tools are necessary to produce all wafers-thin, disc-shaped materials fabs used to produce chips. The furnace forms a cylindrical ingot of silicon from polycrystalline raw silicon; machining equipment then cuts the ingot into wafers used for chip fabrication. These tools have relatively low value and complexity relative to other semiconductor manufacturing equipment. Japan, Germany, and Switzerland are the main producers.", "stage_name":"", "stage_id":"", "stage_description":"nan", "year":"2022.0", "market_share_chart_caption":"nan", "market_share_chart_global_market_size_info":"$44 million (2022)", "market_share_chart_source":"CSET analysis of TechInsights data (2022)

In [19]:
%%time

# Run CREATE command
client.query(create_command)

# Commit the change
client.query("COMMIT")
client.query("CHANGE SUBMIT")

# Checkout into main
client.checkout()

CPU times: user 4.06 ms, sys: 64 μs, total: 4.13 ms
Wall time: 207 ms


<div class="alert alert-block alert-info">
    <h2>
        Visualize your graph in TuringDB Graph Visualizer ! Now that your instance is running:
    </h2>
    <h3>
        <ul>
            <li>Go to <a href="https://console.turingdb.ai/databases">TuringDB Console - Database Instances</a></li>
            <li>In your current instance panel, click on "Open Visualizer" button</li>
            <li>Visualizer opens, now you can choose your graph in the dropdown menu at the top-right corner</li>
        </ul>
        You can then play with your graph and visualize the nodes you want !
    </h3>
</div>

# Query TuringDB

## Use metaqueries to have insight on graph overall structure

<h3>
    To learn more about 📮 Metaqueries, please check TuringDB documentation on this <a href="https://turingdb.mintlify.app/query/cypher_subset#%F0%9F%93%AE-metaqueries">link</a>
</h3>

In [20]:
%%time

# CALL LABELS () - returns a column of all the different node labels
command = """
CALL LABELS()
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    df.columns = ["Node_type_ID", "Node_type"]
    display(df)

Unnamed: 0,Node_type_ID,Node_type
0,0,Tool_resource
1,1,Material_resource
2,2,Process
3,3,Ultimate_output
4,4,Design_resource
5,5,ToolResource
6,6,MaterialResource
7,7,UltimateOutput
8,8,DesignResource


CPU times: user 8.35 ms, sys: 83 μs, total: 8.44 ms
Wall time: 7.34 ms


In [21]:
%%time

# CALL EDGETYPES() - returns a column of all the different edge types (edge equivalent of node labels)
command = """
CALL EDGETYPES()
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    df.columns = ["Edge_type_ID", "Edge_type"]
    display(df)

Unnamed: 0,Edge_type_ID,Edge_type
0,0,GOES_INTO
1,1,IS_TYPE_OF


CPU times: user 5.79 ms, sys: 1 ms, total: 6.79 ms
Wall time: 5.51 ms


## Simple queries

In [22]:
%%time

# Match all edges and return them
command = "MATCH (n)-[e]-(m) RETURN n.displayName, e, m.displayName"
df = client.query(command)
if df.empty:
    print("No result found")
else:
    df.columns = get_return_statements(command)
    display(df)

Unnamed: 0,n.displayName,e,m.displayName
0,Overlay metrology tools,0,Wafer inspection tools
1,Dry stripping tools,1,Dry etching and cleaning tools
2,Ion milling tools,2,Dry etching and cleaning tools
3,Misc. dry etch tools,3,Dry etch tools
4,Dry clean tools,4,Dry etching and cleaning tools
...,...,...,...
273,Advanced CPUs,273,Logic chip design
274,Logic chip design,274,Chip design
275,Discrete GPUs,275,Logic chip design
276,FPGAs,276,Logic chip design


CPU times: user 11.6 ms, sys: 38 μs, total: 11.6 ms
Wall time: 10.1 ms


In [24]:
%%time

# Find all materials
command = """
MATCH (n:MaterialResource)
RETURN n, n.displayName, n.type
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    df.columns = get_return_statements(command)
    display(df)

Unnamed: 0,n,n.displayName,n.type
0,227,Wafer,material_resource
1,228,Process-specific fabrication materials,material_resource
2,229,ATP materials,material_resource
3,230,Die attach materials,material_resource
4,231,Photomasks,material_resource
5,232,Photoresists,material_resource
6,233,Encapsulation resins,material_resource
7,234,Core intellectual property,material_resource
8,235,Packaging materials,material_resource
9,236,Lead frames,material_resource


CPU times: user 5.53 ms, sys: 1.04 ms, total: 6.57 ms
Wall time: 5.25 ms


In [26]:
%%time

# Find all Tool Resources
command = """
MATCH (n:ToolResource) RETURN n.displayName
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    df.columns = get_return_statements(command)
    display(df)

Unnamed: 0,n.displayName
0,Misc. dry etch tools
1,Dry clean tools
2,Plasma modification tools
3,Direct write systems
4,i-line lithography tools
...,...
85,Photomask inspection and repair tools
86,Process control tools
87,Wafer inspection tools
88,Wafer bonding and aligning tools


CPU times: user 4.7 ms, sys: 1.89 ms, total: 6.59 ms
Wall time: 5.44 ms


In [27]:
%%time

# Find all links between FPGAs and other nodes
command = """
MATCH (n{displayName: "FPGAs"})-[e]-(m)
RETURN n.displayName, e, m.displayName
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    df.columns = get_return_statements(command)
    display(df)

Unnamed: 0,n.displayName,e,m.displayName
0,FPGAs,135,Logic chip design
1,FPGAs,276,Logic chip design


CPU times: user 5.51 ms, sys: 3.79 ms, total: 9.29 ms
Wall time: 7.86 ms


In [28]:
%%time

# Find all relationships between nodes whose descriptions contain "FPGA" using string approximation
command = """
MATCH (n{description ~= "FPGA"})-[e]-(m)
RETURN n.displayName, n.description, e, m.displayName, m.description
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    df.columns = get_return_statements(command)
    display(df)

Unnamed: 0,n.displayName,n.description,e,m.displayName,m.description
0,AI ASICs,Application-specific integrated circuits for a...,134,Logic chip design,Logic chip design is the design of integrated ...
1,FPGAs,"Field-programmable gate arrays (\ FPGAs\ ), un...",135,Logic chip design,Logic chip design is the design of integrated ...
2,AI ASICs,Application-specific integrated circuits for a...,277,Logic chip design,Logic chip design is the design of integrated ...
3,FPGAs,"Field-programmable gate arrays (\ FPGAs\ ), un...",276,Logic chip design,Logic chip design is the design of integrated ...


CPU times: user 11.1 ms, sys: 0 ns, total: 11.1 ms
Wall time: 9.64 ms


In [29]:
%%time

# Find all relationships between Tool_Resource nodes
command = """
MATCH (i1:ToolResource)--(i2:ToolResource)
RETURN i1.displayName, i1.type, i1, i2.displayName, i2.type, i2
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    df.columns = get_return_statements(command)
    display(df)

Unnamed: 0,i1.displayName,i1.type,i1,i2.displayName,i2.type,i2
0,Misc. dry etch tools,tool_resource,137,Dry etch tools,tool_resource,187
1,Dry clean tools,tool_resource,138,Dry etching and cleaning tools,tool_resource,191
2,Plasma modification tools,tool_resource,139,Deposition tools,tool_resource,201
3,Direct write systems,tool_resource,140,Maskless lithography tools,tool_resource,205
4,i-line lithography tools,tool_resource,141,Lithography tools,tool_resource,208
...,...,...,...,...,...,...
69,Process monitoring tools,tool_resource,220,Process control tools,tool_resource,223
70,Inspection tools (adv. pkg.),tool_resource,221,Fabrication tools (for advanced packaging),tool_resource,226
71,Photomask inspection and repair tools,tool_resource,222,Process control tools,tool_resource,223
72,Wafer inspection tools,tool_resource,224,Process control tools,tool_resource,223


CPU times: user 11.3 ms, sys: 1.9 ms, total: 13.2 ms
Wall time: 11.9 ms


In [30]:
%%time

# Find all relationships between Tool_Resource nodes linked by a IS_TYPE_OF edge
command = """
MATCH (i1:ToolResource)-[e:IS_TYPE_OF]-(i2:ToolResource)
RETURN i1.displayName, i1.description, i2.displayName, i2.description
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    df.columns = get_return_statements(command)
    display(df)

Unnamed: 0,i1.displayName,i1.description,i2.displayName,i2.description
0,Misc. dry etch tools,Miscellaneous dry etch tools are specialized s...,Dry etch tools,The main types of dry etching tools are used e...
1,Dry clean tools,Dry clean tools use plasma to remove films or ...,Dry etching and cleaning tools,Etching and cleaning tools have two main types...
2,Plasma modification tools,Plasma modification tools are used to alter ma...,Deposition tools,Deposition tools are used to deposit thin film...
3,Direct write systems,Direct write systems are lithography tools tha...,Maskless lithography tools,Maskless lithography equipment draws patterns ...
4,i-line lithography tools,i-line lithography tools are ultraviolet (UV) ...,Lithography tools,"The Netherlands, Japan, and a small number of ..."
...,...,...,...,...
67,Process monitoring tools,"Process monitoring tools, such as curve tracer...",Process control tools,"In semiconductor fabrication, process control ..."
68,Inspection tools (adv. pkg.),These tools inspect parts of wafers during the...,Fabrication tools (for advanced packaging),Advanced packaging tools are specialized semic...
69,Photomask inspection and repair tools,Photomask inspection and repair tools are simi...,Process control tools,"In semiconductor fabrication, process control ..."
70,Wafer inspection tools,Even the tiniest imperfections can cause serio...,Process control tools,"In semiconductor fabrication, process control ..."


CPU times: user 11.2 ms, sys: 1.94 ms, total: 13.2 ms
Wall time: 11.5 ms


In [31]:
%%time

# Find all relationships between nodes at the Fabrication stage
command = """
MATCH (i1 {"stage_name": "Fabrication"})--(i2 {"stage_name": "Fabrication"})
RETURN i1.displayName, i2.displayName
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    df.columns = get_return_statements(command)
    display(df)

Unnamed: 0,i1.displayName,i2.displayName
0,Ion implantation,Chemical mechanical planarization
1,Photolithography,Etch and clean
2,Etch and clean,Ion implantation
3,Process control and handling,Photolithography
4,Process control and handling,Ion implantation
5,Process control and handling,Deposition
6,Process control and handling,Etch and clean
7,Process control and handling,Chemical mechanical planarization
8,Deposition,Photolithography
9,Fabrication materials,Deposition


CPU times: user 9.43 ms, sys: 1.04 ms, total: 10.5 ms
Wall time: 9.05 ms


In [33]:
%%time

# Find all nodes of type Tool_Resource provided by ASML provider
command = """
MATCH (n:ToolResource{provider_name:"ASML"})
RETURN n, n.displayName, n.description, n.provider_name, n.provider_country
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    df.columns = get_return_statements(command)
    display(df)

Unnamed: 0,n,n.displayName,n.description,n.provider_name,n.provider_country
0,141,i-line lithography tools,i-line lithography tools are ultraviolet (UV) ...,ASML,NLD
1,143,KrF (DUV) lithography tools,KrF lithography tools are deep ultraviolet (DU...,ASML,NLD
2,145,Overlay metrology tools,Overlay metrology tools are used to measure an...,ASML,NLD
3,181,E-beam metrology tools,E-beam metrology tools use focused electron be...,ASML,NLD
4,206,ArF immersion (DUV) lithography tools,ArF immersion lithography scanners are advance...,ASML,NLD
5,207,ArF dry (DUV) lithography tools,ArF dry lithography scanners are advanced deep...,ASML,NLD
6,208,Lithography tools,"The Netherlands, Japan, and a small number of ...",ASML,NLD
7,209,EUV lithography tools,EUV lithography tools are the most advanced ph...,ASML,NLD
8,223,Process control tools,"In semiconductor fabrication, process control ...",ASML,NLD


CPU times: user 7.75 ms, sys: 1.07 ms, total: 8.82 ms
Wall time: 7.53 ms


In [34]:
%%time

# Find direct relationships between tools
command = """
MATCH (n1:ToolResource)-[e]-(n2:ToolResource) RETURN n1, n1.displayName, e, n2, n2.displayName
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    df.columns = get_return_statements(command)
    display(df)

Unnamed: 0,n1,n1.displayName,e,n2,n2.displayName
0,137,Misc. dry etch tools,158,187,Dry etch tools
1,138,Dry clean tools,159,191,Dry etching and cleaning tools
2,139,Plasma modification tools,160,201,Deposition tools
3,140,Direct write systems,161,205,Maskless lithography tools
4,141,i-line lithography tools,162,208,Lithography tools
...,...,...,...,...,...
69,220,Process monitoring tools,241,223,Process control tools
70,221,Inspection tools (adv. pkg.),242,226,Fabrication tools (for advanced packaging)
71,222,Photomask inspection and repair tools,243,223,Process control tools
72,224,Wafer inspection tools,245,223,Process control tools


CPU times: user 10.1 ms, sys: 3.01 ms, total: 13.2 ms
Wall time: 11.6 ms


In [35]:
%%time

# Find what goes into Photolithography process
command = """
MATCH (n:ToolResource)-[e:GOES_INTO]-(p:Process{displayName:"Photolithography"}) RETURN n, n.displayName, e, p, p.displayName
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    df.columns = get_return_statements(command)
    display(df)

Unnamed: 0,n,n.displayName,e,p,p.displayName
0,202,Resist processing tools,223,133,Photolithography
1,208,Lithography tools,229,133,Photolithography


CPU times: user 11.2 ms, sys: 18 μs, total: 11.2 ms
Wall time: 9.62 ms


In [36]:
%%time

# Find EUV lithography and its connections
command = """
MATCH (n{displayName:"EUV lithography tools"})-[e]-(m) RETURN n, n.provider_name, n.share_provided, e, m, m.displayName
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    df.columns = get_return_statements(command)
    display(df)

Unnamed: 0,n,n.provider_name,n.share_provided,e,m,m.displayName
0,73,ASML,100.0,73,72,Lithography tools
1,209,ASML,100.0,230,208,Lithography tools


CPU times: user 8.67 ms, sys: 1.98 ms, total: 10.7 ms
Wall time: 9.34 ms


In [37]:
%%time

# Find all ASML products
command = """
MATCH (n{provider_name:"ASML"})
RETURN n, n.provider_name, n.displayName, n.share_provided, n.year_share_provided
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    df.columns = get_return_statements(command)
    display(df)

Unnamed: 0,n,n.provider_name,n.displayName,n.share_provided,n.year_share_provided
0,0,ASML,Overlay metrology tools,45.1,2024.0
1,7,ASML,i-line lithography tools,35.7,2024.0
2,8,ASML,KrF (DUV) lithography tools,79.2,2024.0
3,49,ASML,E-beam metrology tools,37.6,2024.0
4,70,ASML,ArF immersion (DUV) lithography tools,98.7,2024.0
5,71,ASML,ArF dry (DUV) lithography tools,94.3,2024.0
6,72,ASML,Lithography tools,78.5,2024.0
7,73,ASML,EUV lithography tools,100.0,2024.0
8,89,ASML,Process control tools,5.2,2024.0
9,141,ASML,i-line lithography tools,35.7,2024.0


CPU times: user 12 ms, sys: 81 μs, total: 12 ms
Wall time: 10.9 ms


In [38]:
%%time

# Find all 2-hop paths from Crystal growing furnaces to any processes
command = """
MATCH (start:ToolResource{displayName:"Crystal growing furnaces"})-[e1]-(n1)-[e2]-(end:Process)
RETURN start, start.displayName, start.type, e1, n1, n1.displayName, n1.type, e2, end, end.displayName, end.type
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    df.columns = get_return_statements(command)
    display(df)

Unnamed: 0,start,start.displayName,start.type,e1,n1,n1.displayName,n1.type,e2,end,end.displayName,end.type
0,211,Crystal growing furnaces,tool_resource,232,227,Wafer,material_resource,248,128,Fabrication materials,process


CPU times: user 12.4 ms, sys: 1.11 ms, total: 13.5 ms
Wall time: 12 ms


In [39]:
%%time

# Find all tools that go into Fabrication stage processes
command = """
MATCH (t:ToolResource)-[e:GOES_INTO]-(p:Process{stage_id:"S2"})
RETURN t, t.displayName, t.provider_name, e, p, p.displayName, p.stage_name, p.stage_id
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    df.columns = get_return_statements(command)
    display(df)

Unnamed: 0,t,t.displayName,t.provider_name,e,p,p.displayName,p.stage_name,p.stage_id
0,171,Ion implanters,Applied Materials,192,132,Ion implantation,Fabrication,S2
1,183,Wafer and photomask handlers,Daifuku,204,126,Process control and handling,Fabrication,S2
2,185,CMP tools,Applied Materials,206,131,Chemical mechanical planarization,Fabrication,S2
3,190,Etch and clean tools,ACM Research,211,130,Etch and clean,Fabrication,S2
4,201,Deposition tools,ASM International,222,127,Deposition,Fabrication,S2
5,202,Resist processing tools,CHN,223,133,Photolithography,Fabrication,S2
6,208,Lithography tools,ASML,229,133,Photolithography,Fabrication,S2
7,223,Process control tools,ASML,244,126,Process control and handling,Fabrication,S2


CPU times: user 10.9 ms, sys: 2.01 ms, total: 12.9 ms
Wall time: 11.5 ms


In [40]:
%%time

# Find suppliers and their market share for lithography tools
command = """
MATCH (n:ToolResource{input_name:"Lithography tools"})
RETURN n, n.provider_name, n.provider_country, n.provider_type, n.share_provided, n.year_share_provided
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    df.columns = get_return_statements(command)
    display(df)

Unnamed: 0,n,n.provider_name,n.provider_country,n.provider_type,n.share_provided,n.year_share_provided
0,208,ASML,NLD,organization,78.5,2024.0


CPU times: user 7.43 ms, sys: 1.04 ms, total: 8.48 ms
Wall time: 6.96 ms


In [41]:
%%time

# Find the supply chain path from wafers to fabrication using 2 hops
command = """
MATCH (w:MaterialResource{displayName:"Wafer"})-[e1:GOES_INTO]-(f1)-[e2:GOES_INTO]-(p:Process)
RETURN w, w.displayName, e1, f1, f1.displayName, e2, p, p.displayName, p.stage_name
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    df.columns = get_return_statements(command)
    display(df)

Unnamed: 0,w,w.displayName,e1,f1,f1.displayName,e2,p,p.displayName,p.stage_name
0,227,Wafer,248,128,Fabrication materials,145,133,Photolithography,Fabrication
1,227,Wafer,248,128,Fabrication materials,146,127,Deposition,Fabrication
2,227,Wafer,248,128,Fabrication materials,147,132,Ion implantation,Fabrication
3,227,Wafer,248,128,Fabrication materials,148,131,Chemical mechanical planarization,Fabrication
4,227,Wafer,248,128,Fabrication materials,149,130,Etch and clean,Fabrication


CPU times: user 9.81 ms, sys: 2.98 ms, total: 12.8 ms
Wall time: 11.3 ms


In [None]:
%%time

# Find all deposition tools and their types
command = """
MATCH (specific)-[e:IS_TYPE_OF]-(general:Tool_Resource{displayName:"Deposition tools"})
RETURN specific, specific.displayName, specific.provider_name, e, general, general.displayName
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    df.columns = get_return_statements(command)
    display(df)

In [42]:
%%time

# Find all deposition tools and their types using string approximation
command = """
MATCH (specific)-[e:IS_TYPE_OF]-(general:ToolResource{displayName ~= "Deposition"})
RETURN specific, specific.displayName, specific.provider_name, e, general, general.displayName
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    df.columns = get_return_statements(command)
    display(df)

Unnamed: 0,specific,specific.displayName,specific.provider_name,e,general,general.displayName
0,139,Plasma modification tools,Applied Materials,160,201,Deposition tools
1,157,Other deposition tools (non-IC),ASM International,178,201,Deposition tools
2,172,ECP tools (adv. pkg.),ASM Pacific,193,175,Deposition tools (adv. pkg.)
3,173,Spin-on deposition tools (adv. pkg.),Various companies,194,175,Deposition tools (adv. pkg.)
4,174,PVD tools (adv. pkg.),ASM Pacific,195,175,Deposition tools (adv. pkg.)
5,176,CVD tools (adv. pkg.),ASM International,197,175,Deposition tools (adv. pkg.)
6,192,Electrochemical plating tools,Lam Research,213,201,Deposition tools
7,193,Tube-based diffusion and deposition tools,ASM International,214,201,Deposition tools
8,194,Rapid thermal processing tools,AP Systems,215,201,Deposition tools
9,195,Physical vapor deposition tools,Applied Materials,216,201,Deposition tools


CPU times: user 9.76 ms, sys: 2.9 ms, total: 12.7 ms
Wall time: 11.1 ms


In [43]:
%%time

# Find complete hierarchy of CVD tools
command = """
MATCH (specific)-[e1:IS_TYPE_OF]-(intermediate)-[e2:IS_TYPE_OF]-(general)
RETURN specific, specific.displayName, e1, intermediate, intermediate.displayName, e2, general, general.displayName
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    df.columns = get_return_statements(command)
    display(df)

Unnamed: 0,specific,specific.displayName,e1,intermediate,intermediate.displayName,e2,general,general.displayName
0,0,Overlay metrology tools,0,88,Wafer inspection tools,88,89,Process control tools
1,3,Misc. dry etch tools,3,51,Dry etch tools,51,55,Dry etching and cleaning tools
2,6,Direct write systems,6,69,Maskless lithography tools,69,72,Lithography tools
3,18,Other interconnect tools,18,21,Bonding tools,21,23,Assembly tools
4,19,Wire bonding tools,19,21,Bonding tools,21,23,Assembly tools
...,...,...,...,...,...,...,...,...
61,204,Mask exposure systems - e-beam,225,205,Maskless lithography tools,226,208,Lithography tools
62,212,Die attaching tools,233,213,Bonding tools,234,215,Assembly tools
63,217,Defect inspection tools,238,224,Wafer inspection tools,245,223,Process control tools
64,218,Critical dimensions metrology tools,239,224,Wafer inspection tools,245,223,Process control tools


CPU times: user 12.6 ms, sys: 987 μs, total: 13.5 ms
Wall time: 12.4 ms


In [44]:
%%time

# Find all Chinese providers and what they supply
command = """
MATCH (n{provider_country:"CHN"})
RETURN n, n.provider_name, n.displayName, n.share_provided, n.type
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    df.columns = get_return_statements(command)
    display(df)

Unnamed: 0,n,n.provider_name,n.displayName,n.share_provided,n.type
0,12,AccoTEST,Linear and discrete testing tools,36.9,tool_resource
1,14,AccoTEST,Test tools,1.9,tool_resource
2,45,AMEC,Etch and clean tools (adv. pkg.),12.2,tool_resource
3,46,AMEC,Dry etching tools (adv. pkg.),18.1,tool_resource
4,50,AMEC,Insulator etching tools,9.7,tool_resource
5,51,AMEC,Dry etch tools,5.5,tool_resource
6,52,AMEC,Conductor etching tools,1.4,tool_resource
7,55,AMEC,Dry etching and cleaning tools,5.3,tool_resource
8,62,AMEC,Low-pressure CVD tools,1.2,tool_resource
9,81,AMEC,Fabrication tools (for advanced packaging),2.5,tool_resource


CPU times: user 10.7 ms, sys: 1.97 ms, total: 12.6 ms
Wall time: 11.1 ms


In [46]:
%%time

# Find assembly tools and their connections
command = """
MATCH (n:ToolResource{displayName:"Assembly tools"})-[e]-(m)
RETURN n, n.displayName, e, m, m.displayName, m.type
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    df.columns = get_return_statements(command)
    display(df)

Unnamed: 0,n,n.displayName,e,m,m.displayName,m.type
0,215,Assembly tools,236,134,Assembly and packaging,process


CPU times: user 4.57 ms, sys: 1.05 ms, total: 5.62 ms
Wall time: 4.83 ms


In [47]:
%%time

# Find complete path from design to finished chip (3 hops)
command = """
MATCH (start:Process{displayName:"Memory chip design"})-[e1]-(s1)-[e2]-(s2)-[e3]-(end) RETURN start, start.displayName, e1, s1, s1.displayName, e2, s2, s2.displayName, e3, end, end.displayName
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    df.columns = get_return_statements(command)
    display(df)

No result found
CPU times: user 1.94 ms, sys: 1.88 ms, total: 3.82 ms
Wall time: 2.87 ms


## String approximation

In [49]:
%%time

# Find deposition-related tools with fuzzy matching
command = """
MATCH (n:ToolResource{displayName~="deposition"})
RETURN n, n.displayName, n.description
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    df.columns = get_return_statements(command)
    display(df)

Unnamed: 0,n,n.displayName,n.description
0,157,Other deposition tools (non-IC),Deposition tools for non-IC applications inclu...
1,173,Spin-on deposition tools (adv. pkg.),Spin-on deposition tools for advanced packagin...
2,175,Deposition tools (adv. pkg.),These tools are deposition tools designed and ...
3,193,Tube-based diffusion and deposition tools,Tube-based diffusion and deposition systems ar...
4,195,Physical vapor deposition tools,Physical vapor deposition (PVD) tools vaporize...
5,196,Atomic layer deposition tools,Atomic layer deposition (ALD) tools are partic...
6,199,Chemical vapor deposition tools,Chemical vapor deposition (CVD) tools create a...
7,201,Deposition tools,Deposition tools are used to deposit thin film...


CPU times: user 6.19 ms, sys: 1.02 ms, total: 7.21 ms
Wall time: 5.91 ms


In [50]:
%%time

# Find semiconductor with misspellings in descriptions
command = """
MATCH (n{description~="semico"}) RETURN n, n.displayName, n.description
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    df.columns = get_return_statements(command)
    display(df)

Unnamed: 0,n,n.displayName,n.description
0,39,Ion implanters,Ion implanters embed dopant substances into si...
1,25,Other deposition tools (non-IC),Deposition tools for non-IC applications inclu...
2,28,Auto ball bonders for non-IC,Automatic ball bonders for non-integrated circ...
3,31,Die attaching tools for non-IC,Die attaching tools for non-integrated circuit...
4,119,DAO chip design,"Discrete, Analog, and Other (DAO) semiconducto..."
...,...,...,...
73,228,Process-specific fabrication materials,Process-specific fabrication materials are hig...
74,169,Lithography tools (adv. pkg.),These tools are lithography tools designed and...
75,137,Misc. dry etch tools,Miscellaneous dry etch tools are specialized s...
76,201,Deposition tools,Deposition tools are used to deposit thin film...


CPU times: user 9.46 ms, sys: 2.12 ms, total: 11.6 ms
Wall time: 10.1 ms


## Get maximum length chain for each process (set process at start node)

In [51]:
%%time

# Find all Process nodes
command = """
MATCH (n:Process)
RETURN n, n.displayName, n.description
"""
df_processes = client.query(command)
df_processes.columns = get_return_statements(command)
display(df_processes)
list_processes = list(df_processes["n.displayName"])
print(list_processes)

Unnamed: 0,n,n.displayName,n.description
0,107,Ion implantation,Ion implanters embed dopant substances into si...
1,108,Photolithography,"In photolithography, light is used to draw pat..."
2,109,Chemical mechanical planarization,"After etching and cleaning, the wafer surface ..."
3,110,Etch and clean,After photolithography creates a pattern in th...
4,111,Assembly and packaging,"At the end of the fabrication process, the fin..."
5,112,Process control and handling,"Process control: In semiconductor fabrication,..."
6,113,Testing,Chips undergo tests requiring a range of speci...
7,114,Deposition,"In the deposition process, specialized tools a..."
8,115,EDA and Core IP,Electronic design automation (EDA) software: U...
9,116,Chip design,"Chip design involves specification, logic desi..."


['Ion implantation', 'Photolithography', 'Chemical mechanical planarization', 'Etch and clean', 'Assembly and packaging', 'Process control and handling', 'Testing', 'Deposition', 'EDA and Core IP', 'Chip design', 'Fabrication materials', 'Process control and handling', 'Deposition', 'Fabrication materials', 'EDA and Core IP', 'Etch and clean', 'Chemical mechanical planarization', 'Ion implantation', 'Photolithography', 'Assembly and packaging', 'Testing', 'Chip design']
CPU times: user 9.72 ms, sys: 1.06 ms, total: 10.8 ms
Wall time: 9.29 ms


In [52]:
def build_query_process_chain(process_name: str, hop_count: int) -> str:
    """Build a query to find chains starting from a specific process"""
    query = "MATCH "
    query += f'(start:Process{{displayName:"{process_name}"}})'

    for k in range(1, hop_count + 1):
        query += f"-[e{k}]-(s{k})"

    query += " RETURN start, start.id, start.displayName, "
    for k in range(1, hop_count + 1):
        if k > 1:
            query += ", "
        query += f"e{k}, s{k}, s{k}.id, s{k}.displayName"

    return query

In [53]:
%%time

# Maximum number of hops to try
max_hops = 12

# Save all dictionaries during the process
list_longest_df = []

# Get maximum length chain for each process (set process at start node)
for process in list_processes:
    print(100 * "*")
    print(f"Process: {process}")

    found_path = False
    longest_df = None
    longest_hop = 0

    # Try increasing hop counts until we find the longest path
    for hop_count in range(1, max_hops + 1):
        command = build_query_process_chain(process, hop_count)
        df = client.query(command)

        if not df.empty:
            longest_df = df
            longest_hop = hop_count
            found_path = True
        else:
            longest_df.columns = get_return_statements(
                build_query_process_chain(process, hop_count - 1)
            )
            # If we get an empty result, we've reached the maximum path length
            break

    if found_path:
        print(f"Longest path found: {longest_hop} hops")
        display(longest_df)
        list_longest_df.append(longest_df)
    else:
        print("No paths found")

print(100 * "*")

****************************************************************************************************
Process: Ion implantation
Longest path found: 4 hops


Unnamed: 0,start,start.id,start.displayName,e1,s1,s1.id,s1.displayName,e2,s2,s2.id,s2.displayName,e3,s3,s3.id,s3.displayName,e4,s4,s4.id,s4.displayName
0,107,N16,Ion implantation,113,109,N57,Chemical mechanical planarization,115,111,N69,Assembly and packaging,117,113,N78,Testing,123,118,N99,Finished logic chip
1,132,N16,Ion implantation,153,131,N57,Chemical mechanical planarization,152,134,N69,Assembly and packaging,155,135,N78,Testing,156,244,N99,Finished logic chip


****************************************************************************************************
Process: Photolithography
Longest path found: 6 hops


Unnamed: 0,start,start.id,start.displayName,e1,s1,s1.id,s1.displayName,e2,s2,s2.id,...,s4.id,s4.displayName,e5,s5,s5.id,s5.displayName,e6,s6,s6.id,s6.displayName
0,108,N25,Photolithography,114,110,N46,Etch and clean,116,107,N16,...,N69,Assembly and packaging,117,113,N78,Testing,123,118,N99,Finished logic chip
1,133,N25,Photolithography,154,130,N46,Etch and clean,151,132,N16,...,N69,Assembly and packaging,155,135,N78,Testing,156,244,N99,Finished logic chip


****************************************************************************************************
Process: Chemical mechanical planarization
Longest path found: 3 hops


Unnamed: 0,start,start.id,start.displayName,e1,s1,s1.id,s1.displayName,e2,s2,s2.id,s2.displayName,e3,s3,s3.id,s3.displayName
0,109,N57,Chemical mechanical planarization,115,111,N69,Assembly and packaging,117,113,N78,Testing,123,118,N99,Finished logic chip
1,131,N57,Chemical mechanical planarization,152,134,N69,Assembly and packaging,155,135,N78,Testing,156,244,N99,Finished logic chip


****************************************************************************************************
Process: Etch and clean
Longest path found: 5 hops


Unnamed: 0,start,start.id,start.displayName,e1,s1,s1.id,s1.displayName,e2,s2,s2.id,...,s3.id,s3.displayName,e4,s4,s4.id,s4.displayName,e5,s5,s5.id,s5.displayName
0,110,N46,Etch and clean,116,107,N16,Ion implantation,113,109,N57,...,N69,Assembly and packaging,117,113,N78,Testing,123,118,N99,Finished logic chip
1,130,N46,Etch and clean,151,132,N16,Ion implantation,153,131,N57,...,N69,Assembly and packaging,155,135,N78,Testing,156,244,N99,Finished logic chip


****************************************************************************************************
Process: Assembly and packaging
Longest path found: 2 hops


Unnamed: 0,start,start.id,start.displayName,e1,s1,s1.id,s1.displayName,e2,s2,s2.id,s2.displayName
0,111,N69,Assembly and packaging,117,113,N78,Testing,123,118,N99,Finished logic chip
1,134,N69,Assembly and packaging,155,135,N78,Testing,156,244,N99,Finished logic chip


****************************************************************************************************
Process: Process control and handling
Longest path found: 8 hops


Unnamed: 0,start,start.id,start.displayName,e1,s1,s1.id,s1.displayName,e2,s2,s2.id,...,s6.id,s6.displayName,e7,s7,s7.id,s7.displayName,e8,s8,s8.id,s8.displayName
0,112,N118,Process control and handling,120,114,N35,Deposition,124,108,N25,...,N69,Assembly and packaging,117,113,N78,Testing,123,118,N99,Finished logic chip
1,126,N118,Process control and handling,143,127,N35,Deposition,144,133,N25,...,N69,Assembly and packaging,155,135,N78,Testing,156,244,N99,Finished logic chip


****************************************************************************************************
Process: Testing
Longest path found: 1 hops


Unnamed: 0,start,start.id,start.displayName,e1,s1,s1.id,s1.displayName
0,113,N78,Testing,123,118,N99,Finished logic chip
1,135,N78,Testing,156,244,N99,Finished logic chip


****************************************************************************************************
Process: Deposition
Longest path found: 7 hops


Unnamed: 0,start,start.id,start.displayName,e1,s1,s1.id,s1.displayName,e2,s2,s2.id,...,s5.id,s5.displayName,e6,s6,s6.id,s6.displayName,e7,s7,s7.id,s7.displayName
0,114,N35,Deposition,124,108,N25,Photolithography,114,110,N46,...,N69,Assembly and packaging,117,113,N78,Testing,123,118,N99,Finished logic chip
1,127,N35,Deposition,144,133,N25,Photolithography,154,130,N46,...,N69,Assembly and packaging,155,135,N78,Testing,156,244,N99,Finished logic chip


****************************************************************************************************
Process: EDA and Core IP
Longest path found: 9 hops


Unnamed: 0,start,start.id,start.displayName,e1,s1,s1.id,s1.displayName,e2,s2,s2.id,...,s7.id,s7.displayName,e8,s8,s8.id,s8.displayName,e9,s9,s9.id,s9.displayName
0,115,N7,EDA and Core IP,125,116,N0,Chip design,126,114,N35,...,N69,Assembly and packaging,117,113,N78,Testing,123,118,N99,Finished logic chip
1,129,N7,EDA and Core IP,150,136,N0,Chip design,157,127,N35,...,N69,Assembly and packaging,155,135,N78,Testing,156,244,N99,Finished logic chip


****************************************************************************************************
Process: Chip design
Longest path found: 8 hops


Unnamed: 0,start,start.id,start.displayName,e1,s1,s1.id,s1.displayName,e2,s2,s2.id,...,s6.id,s6.displayName,e7,s7,s7.id,s7.displayName,e8,s8,s8.id,s8.displayName
0,116,N0,Chip design,126,114,N35,Deposition,124,108,N25,...,N69,Assembly and packaging,117,113,N78,Testing,123,118,N99,Finished logic chip
1,136,N0,Chip design,157,127,N35,Deposition,144,133,N25,...,N69,Assembly and packaging,155,135,N78,Testing,156,244,N99,Finished logic chip


****************************************************************************************************
Process: Fabrication materials
Longest path found: 8 hops


Unnamed: 0,start,start.id,start.displayName,e1,s1,s1.id,s1.displayName,e2,s2,s2.id,...,s6.id,s6.displayName,e7,s7,s7.id,s7.displayName,e8,s8,s8.id,s8.displayName
0,117,N117,Fabrication materials,127,114,N35,Deposition,124,108,N25,...,N69,Assembly and packaging,117,113,N78,Testing,123,118,N99,Finished logic chip
1,128,N117,Fabrication materials,146,127,N35,Deposition,144,133,N25,...,N69,Assembly and packaging,155,135,N78,Testing,156,244,N99,Finished logic chip


****************************************************************************************************
Process: Process control and handling
Longest path found: 8 hops


Unnamed: 0,start,start.id,start.displayName,e1,s1,s1.id,s1.displayName,e2,s2,s2.id,...,s6.id,s6.displayName,e7,s7,s7.id,s7.displayName,e8,s8,s8.id,s8.displayName
0,112,N118,Process control and handling,120,114,N35,Deposition,124,108,N25,...,N69,Assembly and packaging,117,113,N78,Testing,123,118,N99,Finished logic chip
1,126,N118,Process control and handling,143,127,N35,Deposition,144,133,N25,...,N69,Assembly and packaging,155,135,N78,Testing,156,244,N99,Finished logic chip


****************************************************************************************************
Process: Deposition
Longest path found: 7 hops


Unnamed: 0,start,start.id,start.displayName,e1,s1,s1.id,s1.displayName,e2,s2,s2.id,...,s5.id,s5.displayName,e6,s6,s6.id,s6.displayName,e7,s7,s7.id,s7.displayName
0,114,N35,Deposition,124,108,N25,Photolithography,114,110,N46,...,N69,Assembly and packaging,117,113,N78,Testing,123,118,N99,Finished logic chip
1,127,N35,Deposition,144,133,N25,Photolithography,154,130,N46,...,N69,Assembly and packaging,155,135,N78,Testing,156,244,N99,Finished logic chip


****************************************************************************************************
Process: Fabrication materials
Longest path found: 8 hops


Unnamed: 0,start,start.id,start.displayName,e1,s1,s1.id,s1.displayName,e2,s2,s2.id,...,s6.id,s6.displayName,e7,s7,s7.id,s7.displayName,e8,s8,s8.id,s8.displayName
0,117,N117,Fabrication materials,127,114,N35,Deposition,124,108,N25,...,N69,Assembly and packaging,117,113,N78,Testing,123,118,N99,Finished logic chip
1,128,N117,Fabrication materials,146,127,N35,Deposition,144,133,N25,...,N69,Assembly and packaging,155,135,N78,Testing,156,244,N99,Finished logic chip


****************************************************************************************************
Process: EDA and Core IP
Longest path found: 9 hops


Unnamed: 0,start,start.id,start.displayName,e1,s1,s1.id,s1.displayName,e2,s2,s2.id,...,s7.id,s7.displayName,e8,s8,s8.id,s8.displayName,e9,s9,s9.id,s9.displayName
0,115,N7,EDA and Core IP,125,116,N0,Chip design,126,114,N35,...,N69,Assembly and packaging,117,113,N78,Testing,123,118,N99,Finished logic chip
1,129,N7,EDA and Core IP,150,136,N0,Chip design,157,127,N35,...,N69,Assembly and packaging,155,135,N78,Testing,156,244,N99,Finished logic chip


****************************************************************************************************
Process: Etch and clean
Longest path found: 5 hops


Unnamed: 0,start,start.id,start.displayName,e1,s1,s1.id,s1.displayName,e2,s2,s2.id,...,s3.id,s3.displayName,e4,s4,s4.id,s4.displayName,e5,s5,s5.id,s5.displayName
0,110,N46,Etch and clean,116,107,N16,Ion implantation,113,109,N57,...,N69,Assembly and packaging,117,113,N78,Testing,123,118,N99,Finished logic chip
1,130,N46,Etch and clean,151,132,N16,Ion implantation,153,131,N57,...,N69,Assembly and packaging,155,135,N78,Testing,156,244,N99,Finished logic chip


****************************************************************************************************
Process: Chemical mechanical planarization
Longest path found: 3 hops


Unnamed: 0,start,start.id,start.displayName,e1,s1,s1.id,s1.displayName,e2,s2,s2.id,s2.displayName,e3,s3,s3.id,s3.displayName
0,109,N57,Chemical mechanical planarization,115,111,N69,Assembly and packaging,117,113,N78,Testing,123,118,N99,Finished logic chip
1,131,N57,Chemical mechanical planarization,152,134,N69,Assembly and packaging,155,135,N78,Testing,156,244,N99,Finished logic chip


****************************************************************************************************
Process: Ion implantation
Longest path found: 4 hops


Unnamed: 0,start,start.id,start.displayName,e1,s1,s1.id,s1.displayName,e2,s2,s2.id,s2.displayName,e3,s3,s3.id,s3.displayName,e4,s4,s4.id,s4.displayName
0,107,N16,Ion implantation,113,109,N57,Chemical mechanical planarization,115,111,N69,Assembly and packaging,117,113,N78,Testing,123,118,N99,Finished logic chip
1,132,N16,Ion implantation,153,131,N57,Chemical mechanical planarization,152,134,N69,Assembly and packaging,155,135,N78,Testing,156,244,N99,Finished logic chip


****************************************************************************************************
Process: Photolithography
Longest path found: 6 hops


Unnamed: 0,start,start.id,start.displayName,e1,s1,s1.id,s1.displayName,e2,s2,s2.id,...,s4.id,s4.displayName,e5,s5,s5.id,s5.displayName,e6,s6,s6.id,s6.displayName
0,108,N25,Photolithography,114,110,N46,Etch and clean,116,107,N16,...,N69,Assembly and packaging,117,113,N78,Testing,123,118,N99,Finished logic chip
1,133,N25,Photolithography,154,130,N46,Etch and clean,151,132,N16,...,N69,Assembly and packaging,155,135,N78,Testing,156,244,N99,Finished logic chip


****************************************************************************************************
Process: Assembly and packaging
Longest path found: 2 hops


Unnamed: 0,start,start.id,start.displayName,e1,s1,s1.id,s1.displayName,e2,s2,s2.id,s2.displayName
0,111,N69,Assembly and packaging,117,113,N78,Testing,123,118,N99,Finished logic chip
1,134,N69,Assembly and packaging,155,135,N78,Testing,156,244,N99,Finished logic chip


****************************************************************************************************
Process: Testing
Longest path found: 1 hops


Unnamed: 0,start,start.id,start.displayName,e1,s1,s1.id,s1.displayName
0,113,N78,Testing,123,118,N99,Finished logic chip
1,135,N78,Testing,156,244,N99,Finished logic chip


****************************************************************************************************
Process: Chip design
Longest path found: 8 hops


Unnamed: 0,start,start.id,start.displayName,e1,s1,s1.id,s1.displayName,e2,s2,s2.id,...,s6.id,s6.displayName,e7,s7,s7.id,s7.displayName,e8,s8,s8.id,s8.displayName
0,116,N0,Chip design,126,114,N35,Deposition,124,108,N25,...,N69,Assembly and packaging,117,113,N78,Testing,123,118,N99,Finished logic chip
1,136,N0,Chip design,157,127,N35,Deposition,144,133,N25,...,N69,Assembly and packaging,155,135,N78,Testing,156,244,N99,Finished logic chip


****************************************************************************************************
CPU times: user 308 ms, sys: 13.5 ms, total: 321 ms
Wall time: 311 ms


# Create subgraph to visualise

In [54]:
# Get subgraph
list_subset_nodes = []
for longest_df in list_longest_df:
    subset_nodes = longest_df.filter(regex="id$", axis=1).iloc[0].values.tolist()
    list_subset_nodes.extend(subset_nodes)

subG = G.subgraph(list_subset_nodes).copy()
print(subG)

# Build CREATE command from subgraph
create_command_subG = build_create_command_from_networkx(subG)
print(f"Cypher CREATE command :\n\n{100 * '*'}\n{create_command_subG}\n{100 * '*'}")

DiGraph with 12 nodes and 19 edges
Cypher CREATE command :

****************************************************************************************************
CREATE (n0:Process {"id":"N117", "displayName":"Fabrication materials", "input_name":"Fabrication materials", "type":"process", "description":"Fabrication materials are a critical input in the fabrication of chips. Fabrication materials include high-purity silicon wafers, chemicals, and gases.", "stage_name":"Fabrication", "stage_id":"S2", "stage_description":"Fabrication turns designs into chips. Semiconductor fabrication facilities (\ fabs\ ) make chips in these wafers in two steps: forming transistors and other electrical devices in material layers within silicon wafers; and forming metal interconnects between the electrical devices in insulating layers above the silicon. There are two business models for fabs: (1) fabs owned by integrated device manufacturers (\ IDMs\ ), which manufacture chips based on their own designs; a

In [55]:
subgraph_name = f"{graph_name}_subgraph"
subgraph_name

'supply_chain_eto_chip_explorer1_subgraph'

In [56]:
%%time

# Create new graph
client.query(f"CREATE GRAPH {subgraph_name}")
client.set_graph(subgraph_name)

# Create a new change on the graph
change = client.query("CHANGE NEW").loc[0, 0]

# Checkout into the change
client.checkout(change=change)

# Run CREATE command
client.query(create_command_subG)

# Commit the change
client.query("COMMIT")
client.query("CHANGE SUBMIT")

# Checkout into main
client.checkout()

CPU times: user 8.24 ms, sys: 1.94 ms, total: 10.2 ms
Wall time: 116 ms


<div class="alert alert-block alert-info">
    <h2>
        You can visualise the subgraph directly in the notebook below. For more details on nodes and edges, you can go to TuringDB visualizer (running on your instance)
    </h2>
</div>

<div class="alert alert-block alert-info">
    <h2>
        Visualize your graph in TuringDB Graph Visualizer ! Now that your instance is running:
    </h2>
    <h3>
        <ul>
            <li>Go to <a href="https://console.turingdb.ai/databases">TuringDB Console - Database Instances</a></li>
            <li>In your current instance panel, click on "Open Visualizer" button</li>
            <li>Visualizer opens, now you can choose your graph in the dropdown menu at the top-right corner</li>
        </ul>
        You can then play with your graph and visualize the nodes you want !
    </h3>
</div>

In [57]:
from pyvis.network import Network

net = Network(
    height="750px",
    width="100%",
    notebook=True,
    bgcolor="#f8f9fa",
    font_color="#212529",
    directed=True,
)

# Node type colors
type_colors = {
    "tool_resource": "#3498db",  # Blue for tools
    "material_resource": "#e74c3c",  # Red for materials
    "process": "#2ecc71",  # Green for processes
    "design_resource": "#9b59b6",  # Purple for design
    "ultimate_output": "#f39c12",  # Orange for output
}

for node, data in subG.nodes(data=True):
    node_type = data.get("type", "Unknown")
    color = type_colors.get(node_type, "#7f8c8d")

    label = data.get("displayName", str(node))

    # Build title with key information
    title_parts = [f"<b>{label}</b>", f"Type: {node_type}"]

    if data.get("provider_name"):
        title_parts.append(f"Provider: {data.get('provider_name')}")
    if data.get("share_provided"):
        title_parts.append(f"Market Share: {data.get('share_provided')}%")
    if data.get("stage_name") and data.get("stage_name") != "nan":
        title_parts.append(f"Stage: {data.get('stage_name')}")

    title = "<br>".join(title_parts)

    net.add_node(node, label=label, color=color, title=title, size=25)

# Edge type colors
edge_colors = {"GOES_INTO": "#27ae60", "IS_TYPE_OF": "#e67e22"}

for source, target, data in subG.edges(data=True):
    edge_type = list(data.keys())[0] if data else "CONNECTED"
    color = edge_colors.get(edge_type, "#95a5a6")
    net.add_edge(source, target, title=edge_type, color=color, width=2)

net.toggle_physics(True)
net.show(f"{example_name}_subgraph.html")

supply_chain_eto-chip-explorer_subgraph.html


# Use LLM to generate Cypher query

Before running this section, create a `.env` file in the project root with your API keys:

```env
ANTHROPIC_API_KEY=your_key_here
OPENAI_API_KEY=your_key_here
MISTRAL_API_KEY=your_key_here

In [58]:
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv(override=True)

True

In [59]:
api_keys = {
    "Anthropic": os.getenv("ANTHROPIC_API_KEY"),
    "Mistral": os.getenv("MISTRAL_API_KEY"),
    "OpenAI": os.getenv("OPENAI_API_KEY"),
}

In [60]:
"""Build system prompt with TuringDB schema and examples"""

turingdb_cypher_system_prompt = """
You are an expert at converting natural language questions into TuringDB queries.

Your task is to generate syntactically correct TuringDB queries based on natural language input.

VERY IMPORTANT - TuringDB Syntax Guidelines:
1. Return ONLY the TuringDB query, no explanations or markdown formatting
2. Use MATCH or CREATE operations only
3. Nodes: (n:Label{property="value"}) or (n:Label{property:value})
4. Edges: Use UNDIRECTED syntax with - (NOT ->)
5. Pattern matching: MATCH (n)-[e]-(m)
6. Property matching: Use = or : operators for exact matching
7. String approximation: Use ~= for approximate string matching
8. Node ID injection: Use @ operator or AT keyword: (n @ 1) or (n AT 1)
9. Multiple constraints: (n:Person,Engineer{name="John", age=30})
10. Return all matched entities: RETURN n, e, m or use RETURN * for all

VERY IMPORTANT - FORBIDDEN in TuringDB:
- Do NOT use directed edges (-> or <-)
- Do NOT use AS aliases
- Do NOT use LIMIT, SKIP clauses
- Do NOT use WHERE clauses
- Do NOT use WITH clauses
- Do NOT use CALL (except for metaqueries)
- Do NOT use toLower() or other functions

Supported TuringDB Operations:
- MATCH queries: MATCH (n:Label)-[e:Type]-(m) RETURN n, m
- CREATE queries: CREATE (n:Label{property="value"})-[e:Type]-(m:Label)
- Metaqueries: CALL PROPERTIES(), CALL LABELS(), CALL EDGETYPES(), CALL LABELSETS()
- Property types: String ("text" or `text`), Boolean (true/false), Integer (20), Unsigned (20u), Double (20.5)

Examples for few-shot learning:
- Find all persons: MATCH (n:Person) RETURN n
- Find connections: MATCH (n:Person)-[e]-(m:Person) RETURN n, e, m
- Create person: CREATE (n:Person{name="John", age=30})
- String approximation: MATCH (n{name~="John"}) RETURN n
- Node by ID: MATCH (n @ 1) RETURN n
- Multiple IDs: MATCH (n:Person @ 1, 2, 3) RETURN n
- Path with 1 hop between Station Paddington and Blackfriars:  MATCH (start:Station{displayName:"Paddington"})-[e1:CONNECTED]-(end:Station{displayName="Blackfriars"}) RETURN start, start.displayName, start.Note, e1.Line, end, end.displayName, end.Note
- Path with 2 hops between Station Paddington and Blackfriars: MATCH (start:Station{displayName:"Paddington"})-[e1:CONNECTED]-(s1:Station)-[e2:CONNECTED]-(end:Station{displayName="Blackfriars"}) RETURN start, start.displayName, start.Note, e1.Line, s1, s1.displayName, s1.Note, e2.Line, end, end.displayName, end.Note
- Path with 8 hops between Station Paddington and Blackfriars: MATCH (start:Station{displayName:"Paddington"})-[e1:CONNECTED]-(s1:Station)-[e2:CONNECTED]-(s2:Station)-[e3:CONNECTED]-(s3:Station)-[e4:CONNECTED]-(s4:Station)-[e5:CONNECTED]-(s5:Station)-[e6:CONNECTED]-(s6:Station)-[e7:CONNECTED]-(s7:Station)-[e8:CONNECTED]-(end:Station{displayName="Blackfriars"}) RETURN start, start.displayName, start.Note, e1.Line, s1, s1.displayName, s1.Note, e2.Line, s2, s2.displayName, s2.Note, e3.Line, s3, s3.displayName, s3.Note, e4.Line, s4, s4.displayName, s4.Note, e5.Line, s5, s5.displayName, s5.Note, e6.Line, s6, s6.displayName, s6.Note, e7.Line, s7, s7.displayName, s7.Note, e8.Line, end, end.displayName, end.Note
- Find all Chinese providers and what they supply: MATCH (n{provider_country:"CHN"}) RETURN n, n.provider_name, n.displayName, n.share_provided, n.type
- Find all deposition tools and their types: MATCH (specific)-[e:IS_TYPE_OF]-(general:Tool_Resource{displayName:"Deposition tools"}) RETURN specific, specific.displayName, specific.provider_name, e, general, general.displayName
"""

In [61]:
# Get subset of CREATE command to avoid exceeding context window
create_command_subset = create_command.split("\n")[:5] + create_command.split("\n")[-5:]

# Create system_prompt
system_prompt = f"""
TuringDB Cypher prompt :
{turingdb_cypher_system_prompt}

Here is a subset of the CREATE command used to create the graph, this way you know graph structure.
Only a subset is passed because the whole command is to long :
{create_command_subset}

Here is also the output of "CALL LABELS ()" command, showing the different node types of the graph :
{client.query("CALL LABELS ()")}

Here is also the output of "CALL EDGETYPES ()" command, showing the different edge types of the graph :
{client.query("CALL EDGETYPES ()")}

Very important :
- You MUST follow current TuringDB Syntax Guidelines
- You MUST NOT USE what is FORBIDDEN in TuringDB
- By default, RETURN ALL THE MATCHED NODES AND EDGES AND THEIR PROPERTIES in the RETURN section (except contrary demand from user)
- Use the correct node and edge properties name in the MATCH section.
- Use the correct node and edge properties name in the RETURN section.
- Pay attention to which properties come from nodes or edges, to create a functioning query
- Pay attention to lower and uppercases in properties
- If some properties contain spaces, be careful to wrap them

Give me the query FOLLOWING TURINGDB GUIDELINES AND NOT USING WHAT IS FORBIDDEN for this specific question :
"""

In [62]:
## Find all deposition tools and their types
# command = """
# MATCH (specific)-[e:IS_TYPE_OF]-(general:Tool_Resource{displayName:"Deposition tools"}) RETURN specific, specific.displayName, specific.provider_name, e, general, general.displayName
# """
# client.query(command)

# Set natural language query
question = """
Find all deposition tools and their types
"""

In [63]:
## Find all Chinese providers and what they supply
# command = """
# MATCH (n{provider_country:"CHN"}) RETURN n, n.provider_name, n.displayName, n.share_provided, n.type
# """
# client.query(command)

# Set natural language query
question = """
Find all Chinese providers and what they supply
"""

In [64]:
## Find assembly tools and their connections
# command = """
# MATCH (n:Tool_Resource{displayName:"Assembly tools"})-[e]-(m) RETURN n, n.displayName, e, m, m.displayName, m.type
# """
# client.query(command)

# Set natural language query
question = """
Find assembly tools and their connections
"""

In [65]:
question = """
What are the other providers linked to ASML specifically for lithography tools in the supply chain ?
"""

In [66]:
%%time

provider = "Anthropic"

cypher_query = natural_language_to_cypher(
    question=question,
    system_prompt=system_prompt,
    provider=provider,
    api_key=api_keys[provider],
    temperature=0.0,
    model="claude-3-haiku-20240307",
)
print(f"cypher_query : {cypher_query}")

cypher_query : MATCH (n:ToolResource{provider_name:"ASML"})-[e:IS_TYPE_OF]-(m:ToolResource) 
RETURN n, n.displayName, n.provider_name, n.provider_country, e, m, m.displayName, m.provider_name, m.provider_country
CPU times: user 194 ms, sys: 39.9 ms, total: 233 ms
Wall time: 1.35 s


In [67]:
%%time

# Set original graph
client.set_graph(graph_name)

df_path = client.query(cypher_query)
df_path.columns = get_return_statements(cypher_query)
if df_path.empty:
    print("--> No result found\n")
else:
    display(df_path)

Unnamed: 0,n,n.displayName,n.provider_name,n.provider_country,e,m,m.displayName,m.provider_name,m.provider_country
0,141,i-line lithography tools,ASML,NLD,162,208,Lithography tools,ASML,NLD
1,143,KrF (DUV) lithography tools,ASML,NLD,164,208,Lithography tools,ASML,NLD
2,145,Overlay metrology tools,ASML,NLD,166,224,Wafer inspection tools,CHN,
3,181,E-beam metrology tools,ASML,NLD,202,224,Wafer inspection tools,CHN,
4,206,ArF immersion (DUV) lithography tools,ASML,NLD,227,208,Lithography tools,ASML,NLD
5,207,ArF dry (DUV) lithography tools,ASML,NLD,228,208,Lithography tools,ASML,NLD
6,209,EUV lithography tools,ASML,NLD,230,208,Lithography tools,ASML,NLD


CPU times: user 12.8 ms, sys: 65 μs, total: 12.8 ms
Wall time: 11.7 ms


In [68]:
print("Notebook finished !")

Notebook finished !
