<div class="alert alert-block alert-success">
    <h1>
        Example notebook - Supply chain
    </h1>
    <p>
        Link to dataset : <a href="https://eto.tech/dataset-docs/chipexplorer/">dataset documentation link</a>
    </p>
</div>

# Import modules and functions

In [35]:
import os
import pandas as pd
import glob
import re
import numpy as np
import time
from tqdm.auto import tqdm

from turingdb_examples.graph import (
    create_graph_from_df,
    build_create_command_from_networkx,
    split_cypher_commands
)
from turingdb_examples.llm import natural_language_to_cypher
from turingdb_examples.utils import escape_for_cypher

In [2]:
%load_ext autoreload
%autoreload 2

# Check data files are available

In [5]:
example_name = "supply_chain_eto-chip-explorer"
path_data = f"{os.getcwd()}/data/{example_name}"
if not os.path.exists(path_data):
    raise ValueError(f"{path_data} does not exists")

list_csv_files = sorted(os.listdir(path_data))
if not all([file in list_csv_files for file in [
    "inputs.csv",
    "providers.csv",
    "provision.csv",
    "sequence.csv",
    "stages.csv",
]]):
    raise ValueError(
        f"At least one of the {len(list_csv_files)} csv files is not available in {path_data}"
    )

# Import and format data

In [6]:
dict_df = {}

for path_csv in glob.glob(f"{path_data}/*.csv"):
    print(100 * "-")
    key_name = re.sub(".csv", "", os.path.basename(path_csv))
    dict_df[key_name] = pd.read_csv(path_csv)

    print(f"--- {key_name}")

    display(dict_df[key_name])

print(100 * "-")

----------------------------------------------------------------------------------------------------
--- providers


Unnamed: 0,provider_name,alias,provider_id,provider_type,country
0,USA,United States,P1,country,
1,CHN,China,P2,country,
2,KOR,South Korea,P4,country,
3,Various countries,,P5,country,
4,JPN,Japan,P7,country,
...,...,...,...,...,...
392,Shenzhen Naso Tech Co.,,P407,organization,CHN
393,Intevac,,P408,organization,USA
394,Palomar Technologies,,P409,organization,USA
395,PacTech,,P410,organization,JPN


----------------------------------------------------------------------------------------------------
--- inputs


Unnamed: 0,input_id,input_name,type,stage_name,stage_id,description,year,market_share_chart_global_market_size_info,market_share_chart_caption,market_share_chart_source
0,N0,Chip design,process,Design,S1,"Chip design involves specification, logic desi...",2022.0,$574.1 billion (2022),,Worldwide semiconductor sales. [World Semicond...
1,N6,Logic chip design,design_resource,,,Logic chip design is the design of integrated ...,2022.0,$255.7 billion (2022),,Worldwide semiconductor sales for logic and mi...
2,N1,Advanced CPUs,design_resource,,,"Central processing units (""CPUs"") are the domi...",2019.0,$56.2 billion (microprocessors) (2019),,[CSET](https://cset.georgetown.edu/publication...
3,N2,Discrete GPUs,design_resource,,,"Discrete graphics processing units (""GPUs"") ha...",2019.0,$11.9 billion (2019),,[CSET](https://cset.georgetown.edu/publication...
4,N3,FPGAs,design_resource,,,"Field-programmable gate arrays (""FPGAs""), unli...",2019.0,$5.7 billion (2019),,[CSET](https://cset.georgetown.edu/publication...
...,...,...,...,...,...,...,...,...,...,...
121,N126,Process-specific fabrication materials,material_resource,,,Process-specific fabrication materials are hig...,,,,
122,N127,Lithography tools (adv. pkg.),tool_resource,,,These tools are lithography tools designed and...,2024.0,$275.6 million (2024),,CSET analysis of TechInsights data (2024). Dat...
123,N128,Deposition tools (adv. pkg.),tool_resource,,,These tools are deposition tools designed and ...,2024.0,$2.6 billion (2024),,CSET analysis of TechInsights data (2024). Dat...
124,N129,Etch and clean tools (adv. pkg.),tool_resource,,,These tools are etch and clean tools designed ...,2024.0,$1.2 billion (2024),,CSET analysis of TechInsights data (2024). Dat...


----------------------------------------------------------------------------------------------------
--- stages


Unnamed: 0,stage_id,stage_name,description,market_share_chart_global_market_size_info,market_share_chart_caption,market_share_chart_source
0,S1,Design,"Semiconductor design involves specification, d...",$574 billion (2022),Chart shows market shares for the overall glob...,[Semiconductor Industry Association (SIA)](htt...
1,S2,Fabrication,Fabrication turns designs into chips. Semicond...,,Chart shows share of global fabrication capaci...,[Semiconductor Industry Association (SIA)](htt...
2,S3,"Assembly, testing, and packaging (ATP)","At the end of the fabrication process, the fin...",$95 billion (2022),Chart shows ATP site capacity by country.,[Semiconductor Industry Association (SIA)](htt...


----------------------------------------------------------------------------------------------------
--- provision


Unnamed: 0,provider_name,provider_id,provided_name,provided_id,share_provided,year,source
0,ACM Research,P313,Wet etching and cleaning tools,N49,11.4,2024,CSET analysis of TechInsights data (2024). Dat...
1,ACM Research,P313,Etch and clean tools,N55,3.0,2024,CSET analysis of TechInsights data (2024). Dat...
2,AMEC,P123,Dry etching tools (adv. pkg.),N101,18.1,2024,CSET analysis of TechInsights data (2024). Dat...
3,AMEC,P123,Dry etch tools,N103,5.5,2024,CSET analysis of TechInsights data (2024). Dat...
4,AMEC,P123,Fabrication tools (for advanced packaging),N109,2.5,2024,CSET analysis of TechInsights data (2024). Dat...
...,...,...,...,...,...,...,...
1300,Zhonghuan,P231,Wafer,N26,,2019,[CSET](https://cset.georgetown.edu/publication...
1301,Zhongwei,P295,Ceramic packages,N95,,2019,
1302,Zhuhai Yueya,P301,Substrates,N96,,2019,
1303,ZingSEMI,P237,Wafer,N26,,2019,[CSET](https://cset.georgetown.edu/publication...


----------------------------------------------------------------------------------------------------
--- sequence


Unnamed: 0,input_name,input_id,goes_into_name,goes_into_id,is_type_of_name,is_type_of_id
0,Crystal growing furnaces,N8,Wafer,N26,,
1,Crystal machining tools,N9,Wafer,N26,,
2,EUV lithography tools,N20,,,Lithography tools,N19
3,ArF dry (DUV) lithography tools,N21,,,Lithography tools,N19
4,ArF immersion (DUV) lithography tools,N22,,,Lithography tools,N19
...,...,...,...,...,...,...
134,Auto ball bonders for IC,N134,,,Wire bonding tools,N74
135,Auto ball bonders for non-IC,N135,,,Wire bonding tools,N74
136,Automatic wedge bonders,N136,,,Wire bonding tools,N74
137,Wafer level stud bonders,N137,,,Wire bonding tools,N74


----------------------------------------------------------------------------------------------------


## Format `df_provision` and `df_providers` dataframes

In [7]:
df_provision = dict_df["provision"].copy()
df_provision = df_provision.rename(
    columns={"year": "year_share_provided", "source": "source_provider_provided"}
)
df_provision

Unnamed: 0,provider_name,provider_id,provided_name,provided_id,share_provided,year_share_provided,source_provider_provided
0,ACM Research,P313,Wet etching and cleaning tools,N49,11.4,2024,CSET analysis of TechInsights data (2024). Dat...
1,ACM Research,P313,Etch and clean tools,N55,3.0,2024,CSET analysis of TechInsights data (2024). Dat...
2,AMEC,P123,Dry etching tools (adv. pkg.),N101,18.1,2024,CSET analysis of TechInsights data (2024). Dat...
3,AMEC,P123,Dry etch tools,N103,5.5,2024,CSET analysis of TechInsights data (2024). Dat...
4,AMEC,P123,Fabrication tools (for advanced packaging),N109,2.5,2024,CSET analysis of TechInsights data (2024). Dat...
...,...,...,...,...,...,...,...
1300,Zhonghuan,P231,Wafer,N26,,2019,[CSET](https://cset.georgetown.edu/publication...
1301,Zhongwei,P295,Ceramic packages,N95,,2019,
1302,Zhuhai Yueya,P301,Substrates,N96,,2019,
1303,ZingSEMI,P237,Wafer,N26,,2019,[CSET](https://cset.georgetown.edu/publication...


In [8]:
df_providers = dict_df["providers"].copy()
df_providers = df_providers.rename(
    columns={"alias": "provider_alias", "country": "provider_country"}
)
df_providers

Unnamed: 0,provider_name,provider_alias,provider_id,provider_type,provider_country
0,USA,United States,P1,country,
1,CHN,China,P2,country,
2,KOR,South Korea,P4,country,
3,Various countries,,P5,country,
4,JPN,Japan,P7,country,
...,...,...,...,...,...
392,Shenzhen Naso Tech Co.,,P407,organization,CHN
393,Intevac,,P408,organization,USA
394,Palomar Technologies,,P409,organization,USA
395,PacTech,,P410,organization,JPN


In [9]:
df_provision = pd.merge(
    df_provision, df_providers, on="provider_id", how="left", suffixes=("", "_y")
)
df_provision = df_provision.drop(df_provision.filter(regex="_y$").columns, axis=1)
df_provision = df_provision[
    [
        "provided_name",
        "provided_id",
        "provider_name",
        "provider_id",
        "provider_alias",
        "provider_type",
        "provider_country",
        "share_provided",
        "year_share_provided",
        "source_provider_provided",
    ]
]
df_provision

Unnamed: 0,provided_name,provided_id,provider_name,provider_id,provider_alias,provider_type,provider_country,share_provided,year_share_provided,source_provider_provided
0,Wet etching and cleaning tools,N49,ACM Research,P313,,organization,USA,11.4,2024,CSET analysis of TechInsights data (2024). Dat...
1,Etch and clean tools,N55,ACM Research,P313,,organization,USA,3.0,2024,CSET analysis of TechInsights data (2024). Dat...
2,Dry etching tools (adv. pkg.),N101,AMEC,P123,Advanced Micro-Fabrication Equipment Inc. China,organization,CHN,18.1,2024,CSET analysis of TechInsights data (2024). Dat...
3,Dry etch tools,N103,AMEC,P123,Advanced Micro-Fabrication Equipment Inc. China,organization,CHN,5.5,2024,CSET analysis of TechInsights data (2024). Dat...
4,Fabrication tools (for advanced packaging),N109,AMEC,P123,Advanced Micro-Fabrication Equipment Inc. China,organization,CHN,2.5,2024,CSET analysis of TechInsights data (2024). Dat...
...,...,...,...,...,...,...,...,...,...,...
1328,Wafer,N26,Zhonghuan,P231,,organization,CHN,,2019,[CSET](https://cset.georgetown.edu/publication...
1329,Ceramic packages,N95,Zhongwei,P295,,organization,CHN,,2019,
1330,Substrates,N96,Zhuhai Yueya,P301,,organization,CHN,,2019,
1331,Wafer,N26,ZingSEMI,P237,,organization,CHN,,2019,[CSET](https://cset.georgetown.edu/publication...


## Format `df_stages` dataframe

In [10]:
# Apply to your dataframe
df_inputs = dict_df["inputs"].copy()
df_inputs = df_inputs.dropna(axis=1, how="all")

# Clean dataframe before creating the graph
for col in df_inputs.select_dtypes(include=["object"]).columns:
    df_inputs[col] = df_inputs[col].apply(escape_for_cypher)

df_inputs

Unnamed: 0,input_id,input_name,type,stage_name,stage_id,description,year,market_share_chart_global_market_size_info,market_share_chart_source
0,N0,Chip design,process,Design,S1,"Chip design involves specification, logic desi...",2022.0,$574.1 billion (2022),Worldwide semiconductor sales. [World Semicond...
1,N6,Logic chip design,design_resource,,,Logic chip design is the design of integrated ...,2022.0,$255.7 billion (2022),Worldwide semiconductor sales for logic and mi...
2,N1,Advanced CPUs,design_resource,,,"Central processing units (\""CPUs\"") are the do...",2019.0,$56.2 billion (microprocessors) (2019),[CSET](https://cset.georgetown.edu/publication...
3,N2,Discrete GPUs,design_resource,,,"Discrete graphics processing units (\""GPUs\"") ...",2019.0,$11.9 billion (2019),[CSET](https://cset.georgetown.edu/publication...
4,N3,FPGAs,design_resource,,,"Field-programmable gate arrays (\""FPGAs\""), un...",2019.0,$5.7 billion (2019),[CSET](https://cset.georgetown.edu/publication...
...,...,...,...,...,...,...,...,...,...
121,N126,Process-specific fabrication materials,material_resource,,,Process-specific fabrication materials are hig...,,,
122,N127,Lithography tools (adv. pkg.),tool_resource,,,These tools are lithography tools designed and...,2024.0,$275.6 million (2024),CSET analysis of TechInsights data (2024). Dat...
123,N128,Deposition tools (adv. pkg.),tool_resource,,,These tools are deposition tools designed and ...,2024.0,$2.6 billion (2024),CSET analysis of TechInsights data (2024). Dat...
124,N129,Etch and clean tools (adv. pkg.),tool_resource,,,These tools are etch and clean tools designed ...,2024.0,$1.2 billion (2024),CSET analysis of TechInsights data (2024). Dat...


In [11]:
df_stages = dict_df["stages"].copy()
df_stages = df_stages.rename(columns={"description": "stage_description"})
df_stages = df_stages.drop(
    ["market_share_chart_global_market_size_info", "market_share_chart_source"], axis=1
)
df_stages

Unnamed: 0,stage_id,stage_name,stage_description,market_share_chart_caption
0,S1,Design,"Semiconductor design involves specification, d...",Chart shows market shares for the overall glob...
1,S2,Fabrication,Fabrication turns designs into chips. Semicond...,Chart shows share of global fabrication capaci...
2,S3,"Assembly, testing, and packaging (ATP)","At the end of the fabrication process, the fin...",Chart shows ATP site capacity by country.


## Format `df_inputs` dataframe

In [12]:
df_inputs = pd.merge(
    df_inputs, df_stages, on="stage_id", how="left", suffixes=("", "_y")
)
df_inputs = df_inputs.drop(df_inputs.filter(regex="_y$").columns, axis=1)

df_inputs = df_inputs[
    [
        "input_id",
        "input_name",
        "type",
        "description",
        "stage_name",
        "stage_id",
        "stage_description",
        "year",
        "market_share_chart_caption",
        "market_share_chart_global_market_size_info",
        "market_share_chart_source",
    ]
]

df_inputs

Unnamed: 0,input_id,input_name,type,description,stage_name,stage_id,stage_description,year,market_share_chart_caption,market_share_chart_global_market_size_info,market_share_chart_source
0,N0,Chip design,process,"Chip design involves specification, logic desi...",Design,S1,"Semiconductor design involves specification, d...",2022.0,Chart shows market shares for the overall glob...,$574.1 billion (2022),Worldwide semiconductor sales. [World Semicond...
1,N6,Logic chip design,design_resource,Logic chip design is the design of integrated ...,,,,2022.0,,$255.7 billion (2022),Worldwide semiconductor sales for logic and mi...
2,N1,Advanced CPUs,design_resource,"Central processing units (\""CPUs\"") are the do...",,,,2019.0,,$56.2 billion (microprocessors) (2019),[CSET](https://cset.georgetown.edu/publication...
3,N2,Discrete GPUs,design_resource,"Discrete graphics processing units (\""GPUs\"") ...",,,,2019.0,,$11.9 billion (2019),[CSET](https://cset.georgetown.edu/publication...
4,N3,FPGAs,design_resource,"Field-programmable gate arrays (\""FPGAs\""), un...",,,,2019.0,,$5.7 billion (2019),[CSET](https://cset.georgetown.edu/publication...
...,...,...,...,...,...,...,...,...,...,...,...
121,N126,Process-specific fabrication materials,material_resource,Process-specific fabrication materials are hig...,,,,,,,
122,N127,Lithography tools (adv. pkg.),tool_resource,These tools are lithography tools designed and...,,,,2024.0,,$275.6 million (2024),CSET analysis of TechInsights data (2024). Dat...
123,N128,Deposition tools (adv. pkg.),tool_resource,These tools are deposition tools designed and ...,,,,2024.0,,$2.6 billion (2024),CSET analysis of TechInsights data (2024). Dat...
124,N129,Etch and clean tools (adv. pkg.),tool_resource,These tools are etch and clean tools designed ...,,,,2024.0,,$1.2 billion (2024),CSET analysis of TechInsights data (2024). Dat...


In [13]:
df_inputs = pd.merge(
    df_inputs,
    df_provision,
    left_on="input_id",
    right_on="provided_id",
    how="left",
    suffixes=("", "_y"),
)
df_inputs = df_inputs.drop(["provided_name", "provided_id"], axis=1)
df_inputs

Unnamed: 0,input_id,input_name,type,description,stage_name,stage_id,stage_description,year,market_share_chart_caption,market_share_chart_global_market_size_info,market_share_chart_source,provider_name,provider_id,provider_alias,provider_type,provider_country,share_provided,year_share_provided,source_provider_provided
0,N0,Chip design,process,"Chip design involves specification, logic desi...",Design,S1,"Semiconductor design involves specification, d...",2022.0,Chart shows market shares for the overall glob...,$574.1 billion (2022),Worldwide semiconductor sales. [World Semicond...,CHN,P2,China,country,,5.0,2022.0,Worldwide semiconductor sales. [World Semicond...
1,N0,Chip design,process,"Chip design involves specification, logic desi...",Design,S1,"Semiconductor design involves specification, d...",2022.0,Chart shows market shares for the overall glob...,$574.1 billion (2022),Worldwide semiconductor sales. [World Semicond...,EUR,P312,Europe,country,EUR,9.0,2022.0,Worldwide semiconductor sales. [World Semicond...
2,N0,Chip design,process,"Chip design involves specification, logic desi...",Design,S1,"Semiconductor design involves specification, d...",2022.0,Chart shows market shares for the overall glob...,$574.1 billion (2022),Worldwide semiconductor sales. [World Semicond...,JPN,P7,Japan,country,,4.0,2022.0,Worldwide semiconductor sales. [World Semicond...
3,N0,Chip design,process,"Chip design involves specification, logic desi...",Design,S1,"Semiconductor design involves specification, d...",2022.0,Chart shows market shares for the overall glob...,$574.1 billion (2022),Worldwide semiconductor sales. [World Semicond...,KOR,P4,South Korea,country,,3.0,2022.0,Worldwide semiconductor sales. [World Semicond...
4,N0,Chip design,process,"Chip design involves specification, logic desi...",Design,S1,"Semiconductor design involves specification, d...",2022.0,Chart shows market shares for the overall glob...,$574.1 billion (2022),Worldwide semiconductor sales. [World Semicond...,TWN,P8,Taiwan,country,,11.0,2022.0,Worldwide semiconductor sales. [World Semicond...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1315,N129,Etch and clean tools (adv. pkg.),tool_resource,These tools are etch and clean tools designed ...,,,,2024.0,,$1.2 billion (2024),CSET analysis of TechInsights data (2024). Dat...,USA,P1,United States,country,,56.4,2024.0,CSET analysis of TechInsights data (2024). Dat...
1316,N129,Etch and clean tools (adv. pkg.),tool_resource,These tools are etch and clean tools designed ...,,,,2024.0,,$1.2 billion (2024),CSET analysis of TechInsights data (2024). Dat...,Various companies,P370,,organization,Various countries,0.8,2024.0,CSET analysis of TechInsights data (2024). Dat...
1317,N129,Etch and clean tools (adv. pkg.),tool_resource,These tools are etch and clean tools designed ...,,,,2024.0,,$1.2 billion (2024),CSET analysis of TechInsights data (2024). Dat...,Veeco,P121,,organization,USA,3.9,2024.0,CSET analysis of TechInsights data (2024). Dat...
1318,N130,Direct write systems (adv. pkg.),tool_resource,These tools are direct write systems designed ...,,,,2024.0,,$48.1 million (2024),CSET analysis of TechInsights data (2024). Dat...,Applied Materials,P81,,organization,USA,100.0,2024.0,CSET analysis of TechInsights data (2024). Dat...


## Format `df_sequence` dataframe

In [14]:
df_sequence = dict_df["sequence"].copy()
df_sequence["output_name"] = df_sequence["goes_into_name"].combine_first(
    df_sequence["is_type_of_name"]
)
df_sequence["output_id"] = df_sequence["goes_into_id"].combine_first(
    df_sequence["is_type_of_id"]
)
df_sequence["type_link"] = np.where(
    df_sequence["goes_into_id"].notna(), "goes_into_id", "is_type_of_id"
)
df_sequence["type_link"] = df_sequence["type_link"].str.replace("_id", "")
df_sequence = df_sequence.drop(
    ["goes_into_name", "goes_into_id", "is_type_of_name", "is_type_of_id"], axis=1
)
df_sequence

Unnamed: 0,input_name,input_id,output_name,output_id,type_link
0,Crystal growing furnaces,N8,Wafer,N26,goes_into
1,Crystal machining tools,N9,Wafer,N26,goes_into
2,EUV lithography tools,N20,Lithography tools,N19,is_type_of
3,ArF dry (DUV) lithography tools,N21,Lithography tools,N19,is_type_of
4,ArF immersion (DUV) lithography tools,N22,Lithography tools,N19,is_type_of
...,...,...,...,...,...
134,Auto ball bonders for IC,N134,Wire bonding tools,N74,is_type_of
135,Auto ball bonders for non-IC,N135,Wire bonding tools,N74,is_type_of
136,Automatic wedge bonders,N136,Wire bonding tools,N74,is_type_of
137,Wafer level stud bonders,N137,Wire bonding tools,N74,is_type_of


# Create graph from dataframe

In [15]:
G = create_graph_from_df(
    df_sequence,
    source_node_col={"id": "input_id", "displayName": "input_name"},
    target_node_col={"id": "output_id", "displayName": "output_name"},
    node_attributes_df=df_inputs,
    node_attributes_key_col="input_id",
    edge_col="type_link",
    edge_col_label="type",
)
print(f"Resulting graph : {G}")

Resulting graph : DiGraph with 126 nodes and 139 edges


In [16]:
n_first = 5

print("NODES :")
for node in list(G.nodes(data=True))[:n_first]:
    print(node[0])
    for key, val in node[1].items():
        print(f"   {key} : {val}")

    print()
print()

print("EDGES :")
for edge in list(G.edges(data=True))[:n_first]:
    print(edge)

NODES :
N8
   displayName : Crystal growing furnaces
   input_name : Crystal growing furnaces
   type : tool_resource
   description : Crystal growing furnaces and machining tools are necessary to produce all wafers-thin, disc-shaped materials fabs used to produce chips. The furnace forms a cylindrical ingot of silicon from polycrystalline raw silicon; machining equipment then cuts the ingot into wafers used for chip fabrication. These tools have relatively low value and complexity relative to other semiconductor manufacturing equipment. Japan, Germany, and Switzerland are the main producers.
   stage_name : 
   stage_id : 
   stage_description : nan
   year : 2022.0
   market_share_chart_caption : nan
   market_share_chart_global_market_size_info : $44 million (2022)
   market_share_chart_source : CSET analysis of TechInsights data (2022). Data used for analysis were published by TechInsights on May 5, 2025.
   provider_name : DEU
   provider_id : P32
   provider_alias : Germany
   pr

# Graph Creation in TuringDB

## Build Cypher CREATE Commands

In [71]:
# Build CREATE command from networkx object
graph_CREATE_command = build_create_command_from_networkx(G, node_type_key="type")

print(f"""
Cypher CREATE command :
* size: {len(graph_CREATE_command.encode('utf-8'))/1024/1000:.4f} MB\n
{100 * '*'}
{graph_CREATE_command if len(graph_CREATE_command.split("\n")) < 10000 else "\n".join(graph_CREATE_command.split('\n')[:5]) + "\n...\n" + "\n".join(graph_CREATE_command.split('\n')[-5:])}
{100 * '*'}
""")

Cypher query will create graph with 126 nodes and 139 edges

Cypher CREATE command :
* size: 0.1569 MB

****************************************************************************************************
CREATE (:ToolResource {id: "N8", displayName: "Crystal growing furnaces", input_name: "Crystal growing furnaces", type: "tool_resource", description: "Crystal growing furnaces and machining tools are necessary to produce all wafers-thin, disc-shaped materials fabs used to produce chips. The furnace forms a cylindrical ingot of silicon from polycrystalline raw silicon; machining equipment then cuts the ingot into wafers used for chip fabrication. These tools have relatively low value and complexity relative to other semiconductor manufacturing equipment. Japan, Germany, and Switzerland are the main producers.", stage_name: "", stage_id: "", stage_description: "nan", year: 2022.0, market_share_chart_caption: "nan", market_share_chart_global_market_size_info: "$44 million (2022)", market

## Split command into chunks

In [72]:
%%time

chunks = split_cypher_commands(graph_CREATE_command, max_size_mb=1)

print(f"✓ Split into {len(chunks['node_chunks'])} node chunk(s) and {len(chunks['edge_chunks'])} edge chunk(s)")

print("\nNode chunks:")
for i, chunk in enumerate(chunks['node_chunks']):
    print(f"  Node chunk {i+1}: {len(chunk.encode('utf-8'))/1024:.1f} KB")
    if i == 10:
        print("  ...")
        break

print("\nEdge chunks:")
for i, chunk in enumerate(chunks['edge_chunks']):
    print(f"  Edge chunk {i+1}: {len(chunk.encode('utf-8'))/1024:.1f} KB")
    if i == 10:
        print("  ...")
        break

✓ Split into 1 node chunk(s) and 139 edge chunk(s)

Node chunks:
  Node chunk 1: 144.8 KB

Edge chunks:
  Edge chunk 1: 0.1 KB
  Edge chunk 2: 0.1 KB
  Edge chunk 3: 0.1 KB
  Edge chunk 4: 0.1 KB
  Edge chunk 5: 0.1 KB
  Edge chunk 6: 0.1 KB
  Edge chunk 7: 0.1 KB
  Edge chunk 8: 0.1 KB
  Edge chunk 9: 0.1 KB
  Edge chunk 10: 0.1 KB
  Edge chunk 11: 0.1 KB
  ...
CPU times: user 21.3 ms, sys: 867 μs, total: 22.2 ms
Wall time: 21.1 ms


# Create graph using `turingdb` python package

<div class="alert alert-block alert-info">
    <h2>
        See <a href="https://docs.turingdb.ai/quickstart">TuringDB Get started documentation</a> for the important steps to follow :
    </h2>
    <h4>
        <ul>
            <li>Create your TuringDB account</li>
            <li>Create your instance in the <a href="https://console.turingdb.ai/auth">TuringDB Cloud UI</a></li>
            <li>Copy your Instance ID from the Database Instances management page</li>
            <li>Get API Key from the Settings in UI</li>
        </ul>
        Remember to have your instance active while working in this notebook !
    </h4>
</div>

In [73]:
from turingdb import TuringDB

# Create TuringDB client
# set host parameter to the URL (as string) on which TuringDB is running,
# default "http://localhost:6666"
client = TuringDB(host="http://localhost:6666")
try:
    client.warmup()
except Exception as e:
    print(f"TuringDB not started, please run `uv run turingdb` in your terminal")

In [74]:
# Get list of available graphs
list_graphs = client.list_available_graphs()

In [75]:
client.list_loaded_graphs()

['default']

In [76]:
# Set graph name
graph_name_prefix = example_name
graph_name_nb_suffix = str(
    max(
        [
            int(re.sub(graph_name_prefix, "", g))
            for g in list_graphs
            if g.startswith(graph_name_prefix)
            and re.sub(graph_name_prefix, "", g).isdigit()
        ]
        + [0]
    )
    + 1
)
graph_name = graph_name_prefix + graph_name_nb_suffix
graph_name = re.sub("-", "_", graph_name)
graph_name

'supply_chain_eto_chip_explorer1'

In [77]:
from turingdb.exceptions import TuringDBException

In [78]:
%%time

# Set graph
try:
    client.create_graph(graph_name)
except TuringDBException as e:
    print(e)

# Set working graph
client.set_graph(graph_name)

CPU times: user 3.46 ms, sys: 850 μs, total: 4.31 ms
Wall time: 20.6 ms


In [79]:
%%time

# Create a new change on the graph
client.checkout()
change = client.new_change()
print(f"Current change {change}")

# Checkout into the change
client.checkout(change=change)

Current change 0
CPU times: user 2.61 ms, sys: 891 μs, total: 3.5 ms
Wall time: 3.13 ms


In [80]:
%%time

# Run CREATE command
print("\nExecuting query on TuringDB...")
start_time = time.time()

print(f"✓ Split into {len(chunks['node_chunks'])} node chunk(s) and {len(chunks['edge_chunks'])} edge chunk(s)")

# CREATE nodes
print("\nNode chunks:")
for i, chunk in enumerate(tqdm(chunks['node_chunks'])):
    result = client.query(chunk)
# Commit the change
client.query("COMMIT")
print(f"✓ {len(chunks['node_chunks'])} node chunks done")

# CREATE edges
print("\nEdge chunks:")
for i, chunk in enumerate(tqdm(chunks['edge_chunks'])):
    result = client.query(chunk)
# Commit the change
client.query("COMMIT")
print(f"✓ {len(chunks['edge_chunks'])} edge chunks done")

execution_time = time.time() - start_time
print(f"\n✓ Graph created successfully in {execution_time:.2f} seconds")

# Submit changes
start_time = time.time()
client.query("CHANGE SUBMIT")
execution_time = time.time() - start_time
print(f"\n✓ Changes successfully submitted in {execution_time:.2f} seconds")

# Checkout into main
client.checkout()


Executing query on TuringDB...
✓ Split into 1 node chunk(s) and 139 edge chunk(s)

Node chunks:


  0%|          | 0/1 [00:00<?, ?it/s]

✓ 1 node chunks done

Edge chunks:


  0%|          | 0/139 [00:00<?, ?it/s]

✓ 139 edge chunks done

✓ Graph created successfully in 0.32 seconds

✓ Changes successfully submitted in 0.08 seconds
CPU times: user 131 ms, sys: 14 ms, total: 145 ms
Wall time: 398 ms


In [81]:
# Returns the commit history
client.query("CALL db.history()")

Unnamed: 0,commit,nodeCount,edgeCount,partCount
0,e39049098531d32d,0,0,0
1,cfb0e77ecde4c857,126,0,1
2,e0549bf77a189832,0,139,1
3,1d70d1bcb578c6a6,0,0,0


<div class="alert alert-block alert-info">
    <h2>
        Visualize your graph in TuringDB Graph Visualizer ! Now that your instance is running:
    </h2>
    <h3>
        <ul>
            <li>Go to <a href="https://console.turingdb.ai/databases">TuringDB Console - Database Instances</a></li>
            <li>In your current instance panel, click on "Open Visualizer" button</li>
            <li>Visualizer opens, now you can choose your graph in the dropdown menu at the top-right corner</li>
        </ul>
        You can then play with your graph and visualize the nodes you want !
    </h3>
</div>

# Query TuringDB

## Use metaqueries to have insight on graph overall structure

<h3>
    To learn more about 📮 Metaqueries, please check TuringDB documentation on this <a href="https://turingdb.mintlify.app/query/cypher_subset#%F0%9F%93%AE-metaqueries">link</a>
</h3>

In [82]:
%%time

# CALL propertyTypes() - returns a column of all the different node and edge properties and their types in the database
command = """
CALL db.propertyTypes()
"""
df_propertyTypes = client.query(command)
if df_propertyTypes.empty:
    print("No result found")
else:
    display(df_propertyTypes)

Unnamed: 0,id,propertyType,valueType
0,0,source_provider_provided,String
1,1,year_share_provided,Double
2,2,share_provided,Double
3,3,provider_type,String
4,4,displayName,String
5,5,input_name,String
6,6,type,String
7,7,stage_name,String
8,8,stage_description,String
9,9,market_share_chart_source,String


CPU times: user 10.1 ms, sys: 726 μs, total: 10.8 ms
Wall time: 9.77 ms


In [83]:
# Get node properties
nodes_properties = df_propertyTypes["propertyType"].values.tolist()
print(f"Node properties: {nodes_properties}")

Node properties: ['source_provider_provided', 'year_share_provided', 'share_provided', 'provider_type', 'displayName', 'input_name', 'type', 'stage_name', 'stage_description', 'market_share_chart_source', 'id', 'stage_id', 'description', 'year', 'provider_alias', 'provider_id', 'provider_name', 'provider_country', 'market_share_chart_caption', 'market_share_chart_global_market_size_info']


In [84]:
%%time

# CALL labels () - returns a column of all the different node labels
command = """
CALL db.labels()
"""
df_labels = client.query(command)
if df_labels.empty:
    print("No result found")
else:
    display(df_labels)

Unnamed: 0,id,label
0,0,ToolResource
1,1,MaterialResource
2,2,Process
3,3,UltimateOutput
4,4,DesignResource


CPU times: user 7.14 ms, sys: 1.75 ms, total: 8.89 ms
Wall time: 7.9 ms


In [85]:
%%time

# CALL edgeTypes() - returns a column of all the different edge types (edge equivalent of node labels)
command = """
CALL db.edgeTypes()
"""
df_edgeTypes = client.query(command)
if df_edgeTypes.empty:
    print("No result found")
else:
    display(df_edgeTypes)

Unnamed: 0,id,edgeType
0,0,GOES_INTO
1,1,IS_TYPE_OF


CPU times: user 8.1 ms, sys: 13 μs, total: 8.11 ms
Wall time: 7.41 ms


## Counts

In [86]:
%%time

# Find number of nodes and number of edges in the graph
n_nodes = len(client.query("MATCH (n) RETURN n"))
n_edges = len(client.query("MATCH (n)-->(m) RETURN n, m"))
print(f"Graph: {n_nodes:,} nodes and {n_edges:,} edges\n")

Graph: 126 nodes and 139 edges

CPU times: user 6 ms, sys: 0 ns, total: 6 ms
Wall time: 5.54 ms


In [87]:
%%time

# Count all nodes
command = """
MATCH (n)
RETURN COUNT(n)
"""
df_count_nodes = client.query(command)
display(df_count_nodes)

# Count all edges
command = """
MATCH (n)-->()
RETURN COUNT(n)
"""
df_count_edges = client.query(command)
display(df_count_edges)

# Find number of nodes and number of edges in the graph
n_nodes = int(df_count_nodes.loc[0, "COUNT(n)"])
n_edges = int(df_count_edges.loc[0, "COUNT(n)"])
print(f"Graph: {n_nodes:,} nodes and {n_edges:,} edges\n")

Unnamed: 0,COUNT(n)
0,126


Unnamed: 0,COUNT(n)
0,139


Graph: 126 nodes and 139 edges

CPU times: user 14.1 ms, sys: 2.48 ms, total: 16.6 ms
Wall time: 15.2 ms


In [88]:
# Count number of nodes for each label
for label in df_labels["label"]:
    print(100 * '-')
    print(f"label: {label}")
    df_curr_label = client.query(f"""
    MATCH (n:{label})
    RETURN n.displayName
    """)
    df_curr_label_count = client.query(f"""
    MATCH (n:{label})
    RETURN count(n)
    """)
    display(df_curr_label)
    display(df_curr_label_count)
    
    print()
print(100 * '-')

----------------------------------------------------------------------------------------------------
label: ToolResource


Unnamed: 0,n.displayName
0,Overlay metrology tools
1,Dry stripping tools
2,Ion milling tools
3,Misc. dry etch tools
4,Dry clean tools
...,...
85,"Film, stack, and shape metrology tools"
86,Critical dimensions metrology tools
87,Defect inspection tools
88,Wafer inspection tools


Unnamed: 0,count(n)
0,90



----------------------------------------------------------------------------------------------------
label: MaterialResource


Unnamed: 0,n.displayName
0,Photoresists
1,ATP materials
2,Process-specific fabrication materials
3,Electronic gases
4,Wet chemicals
5,Photomasks
6,Core intellectual property
7,CMP materials
8,Deposition materials
9,Lead frames


Unnamed: 0,count(n)
0,17



----------------------------------------------------------------------------------------------------
label: Process


Unnamed: 0,n.displayName
0,Ion implantation
1,Photolithography
2,Chemical mechanical planarization
3,Etch and clean
4,Assembly and packaging
5,Process control and handling
6,Testing
7,Deposition
8,EDA and Core IP
9,Chip design


Unnamed: 0,count(n)
0,11



----------------------------------------------------------------------------------------------------
label: UltimateOutput


Unnamed: 0,n.displayName
0,Finished logic chip


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: DesignResource


Unnamed: 0,n.displayName
0,DAO chip design
1,Memory chip design
2,AI ASICs
3,FPGAs
4,Discrete GPUs
5,Logic chip design
6,Advanced CPUs


Unnamed: 0,count(n)
0,7



----------------------------------------------------------------------------------------------------


## Queries

In [89]:
%%time

# Match all edges and return them
command = """
MATCH (n)-[e]->(m)
RETURN n.displayName, e, m.displayName
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    display(df)

Unnamed: 0,n.displayName,e,m.displayName
0,Overlay metrology tools,0,Wafer inspection tools
1,Dry stripping tools,1,Dry etching and cleaning tools
2,Ion milling tools,2,Dry etching and cleaning tools
3,Misc. dry etch tools,3,Dry etch tools
4,Dry clean tools,4,Dry etching and cleaning tools
...,...,...,...
134,AI ASICs,134,Logic chip design
135,FPGAs,135,Logic chip design
136,Discrete GPUs,136,Logic chip design
137,Logic chip design,137,Chip design


CPU times: user 7.35 ms, sys: 801 μs, total: 8.16 ms
Wall time: 7.45 ms


In [90]:
%%time

# Find all materials
command = """
MATCH (n:MaterialResource)
RETURN n, n.displayName, n.type
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    display(df)

Unnamed: 0,n,n.displayName,n.type
0,90,Photoresists,material_resource
1,91,ATP materials,material_resource
2,92,Process-specific fabrication materials,material_resource
3,93,Electronic gases,material_resource
4,94,Wet chemicals,material_resource
5,95,Photomasks,material_resource
6,96,Core intellectual property,material_resource
7,97,CMP materials,material_resource
8,98,Deposition materials,material_resource
9,99,Lead frames,material_resource


CPU times: user 10.5 ms, sys: 549 μs, total: 11.1 ms
Wall time: 10.1 ms


In [92]:
%%time

# Find all Tool Resources
command = """
MATCH (n:ToolResource)
RETURN n.displayName
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    display(df)

Unnamed: 0,n.displayName
0,Overlay metrology tools
1,Dry stripping tools
2,Ion milling tools
3,Misc. dry etch tools
4,Dry clean tools
...,...
85,"Film, stack, and shape metrology tools"
86,Critical dimensions metrology tools
87,Defect inspection tools
88,Wafer inspection tools


CPU times: user 9.04 ms, sys: 1 ms, total: 10 ms
Wall time: 9.02 ms


In [96]:
%%time

# Find all links between FPGAs and other nodes
command = """
MATCH (n)-[e]->(m)
WHERE n.displayName = "FPGAs"
RETURN n.displayName, e, m.displayName
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    display(df)

Unnamed: 0,n.displayName,e,m.displayName
0,FPGAs,135,Logic chip design


CPU times: user 8.72 ms, sys: 907 μs, total: 9.63 ms
Wall time: 8.74 ms


In [98]:
%%time

# Find all relationships between Tool_Resource nodes
command = """
MATCH (i1:ToolResource)-->(i2:ToolResource)
RETURN i1.displayName, i1.type, i1, i2.displayName, i2.type, i2
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    display(df)

Unnamed: 0,i1.displayName,i1.type,i1,i2.displayName,i2.type,i2
0,Overlay metrology tools,tool_resource,0,Wafer inspection tools,tool_resource,88
1,Dry stripping tools,tool_resource,1,Dry etching and cleaning tools,tool_resource,55
2,Ion milling tools,tool_resource,2,Dry etching and cleaning tools,tool_resource,55
3,Misc. dry etch tools,tool_resource,3,Dry etch tools,tool_resource,51
4,Dry clean tools,tool_resource,4,Dry etching and cleaning tools,tool_resource,55
...,...,...,...,...,...,...
69,Process monitoring tools,tool_resource,84,Process control tools,tool_resource,89
70,"Film, stack, and shape metrology tools",tool_resource,85,Wafer inspection tools,tool_resource,88
71,Critical dimensions metrology tools,tool_resource,86,Wafer inspection tools,tool_resource,88
72,Defect inspection tools,tool_resource,87,Wafer inspection tools,tool_resource,88


CPU times: user 7.06 ms, sys: 5 μs, total: 7.07 ms
Wall time: 6.52 ms


In [99]:
%%time

# Find all relationships between Tool_Resource nodes linked by a IS_TYPE_OF edge
command = """
MATCH (i1:ToolResource)-[e:IS_TYPE_OF]->(i2:ToolResource)
RETURN i1.displayName, i1.description, i2.displayName, i2.description
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    display(df)

Unnamed: 0,i1.displayName,i1.description,i2.displayName,i2.description
0,Overlay metrology tools,Overlay metrology tools are used to measure an...,Wafer inspection tools,Even the tiniest imperfections can cause serio...
1,Dry stripping tools,Dry stripping tools use plasma to remove photo...,Dry etching and cleaning tools,Etching and cleaning tools have two main types...
2,Ion milling tools,Ion milling tools are advanced semiconductor m...,Dry etching and cleaning tools,Etching and cleaning tools have two main types...
3,Misc. dry etch tools,Miscellaneous dry etch tools are specialized s...,Dry etch tools,The main types of dry etching tools are used e...
4,Dry clean tools,Dry clean tools use plasma to remove films or ...,Dry etching and cleaning tools,Etching and cleaning tools have two main types...
...,...,...,...,...
67,Process monitoring tools,"Process monitoring tools, such as curve tracer...",Process control tools,"In semiconductor fabrication, process control ..."
68,"Film, stack, and shape metrology tools",Film and wafer measuring tools include tools t...,Wafer inspection tools,Even the tiniest imperfections can cause serio...
69,Critical dimensions metrology tools,Critical dimensions measuring tools (including...,Wafer inspection tools,Even the tiniest imperfections can cause serio...
70,Defect inspection tools,Defect inspection tools include brightfield in...,Wafer inspection tools,Even the tiniest imperfections can cause serio...


CPU times: user 12.2 ms, sys: 2.04 ms, total: 14.2 ms
Wall time: 13.1 ms


In [105]:
%%time

# Find all relationships between nodes at the Fabrication stage
command = """
MATCH (i1:Process)-->(i2:Process)
WHERE i1.stage_name = 'Fabrication'
AND i2.stage_name = 'Fabrication'
RETURN i1.displayName, i2.displayName
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    display(df)

Unnamed: 0,i1.displayName,i2.displayName
0,Ion implantation,Chemical mechanical planarization
1,Photolithography,Etch and clean
2,Etch and clean,Ion implantation
3,Process control and handling,Photolithography
4,Process control and handling,Ion implantation
5,Process control and handling,Deposition
6,Process control and handling,Etch and clean
7,Process control and handling,Chemical mechanical planarization
8,Deposition,Photolithography
9,Fabrication materials,Deposition


CPU times: user 8.52 ms, sys: 1.15 ms, total: 9.67 ms
Wall time: 8.91 ms


In [107]:
%%time

# Find all nodes of type Tool_Resource provided by ASML provider
command = """
MATCH (n:ToolResource)
WHERE n.provider_name = 'ASML'
RETURN n, n.displayName, n.description, n.provider_name, n.provider_country
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    display(df)

Unnamed: 0,n,n.displayName,n.description,n.provider_name,n.provider_country
0,0,Overlay metrology tools,Overlay metrology tools are used to measure an...,ASML,NLD
1,7,i-line lithography tools,i-line lithography tools are ultraviolet (UV) ...,ASML,NLD
2,8,KrF (DUV) lithography tools,KrF lithography tools are deep ultraviolet (DU...,ASML,NLD
3,49,E-beam metrology tools,E-beam metrology tools use focused electron be...,ASML,NLD
4,70,ArF immersion (DUV) lithography tools,ArF immersion lithography scanners are advance...,ASML,NLD
5,71,ArF dry (DUV) lithography tools,ArF dry lithography scanners are advanced deep...,ASML,NLD
6,72,Lithography tools,"The Netherlands, Japan, and a small number of ...",ASML,NLD
7,73,EUV lithography tools,EUV lithography tools are the most advanced ph...,ASML,NLD
8,89,Process control tools,"In semiconductor fabrication, process control ...",ASML,NLD


CPU times: user 5.01 ms, sys: 1e+03 μs, total: 6.01 ms
Wall time: 5.54 ms


In [110]:
%%time

# Find direct relationships between tools
command = """
MATCH (n1:ToolResource)-[e]->(n2:ToolResource)
RETURN n1, n1.displayName, e, n2, n2.displayName
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    display(df)

Unnamed: 0,n1,n1.displayName,e,n2,n2.displayName
0,0,Overlay metrology tools,0,88,Wafer inspection tools
1,1,Dry stripping tools,1,55,Dry etching and cleaning tools
2,2,Ion milling tools,2,55,Dry etching and cleaning tools
3,3,Misc. dry etch tools,3,51,Dry etch tools
4,4,Dry clean tools,4,55,Dry etching and cleaning tools
...,...,...,...,...,...
69,84,Process monitoring tools,84,89,Process control tools
70,85,"Film, stack, and shape metrology tools",85,88,Wafer inspection tools
71,86,Critical dimensions metrology tools,86,88,Wafer inspection tools
72,87,Defect inspection tools,87,88,Wafer inspection tools


CPU times: user 10.3 ms, sys: 82 μs, total: 10.4 ms
Wall time: 9.69 ms


In [112]:
%%time

# Find what goes into Photolithography process
command = """
MATCH (n:ToolResource)-[e:GOES_INTO]->(p:Process)
WHERE p.displayName = 'Photolithography'
RETURN n, n.displayName, e, p, p.displayName
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    display(df)

Unnamed: 0,n,n.displayName,e,p,p.displayName
0,66,Resist processing tools,66,108,Photolithography
1,72,Lithography tools,72,108,Photolithography


CPU times: user 10.2 ms, sys: 2.84 ms, total: 13.1 ms
Wall time: 11.7 ms


In [113]:
%%time

# Find EUV lithography and its connections
command = """
MATCH (n)-[e]->(m)
WHERE n.displayName = 'EUV lithography tools'
RETURN n, n.provider_name, n.share_provided, e, m, m.displayName
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    display(df)

Unnamed: 0,n,n.provider_name,n.share_provided,e,m,m.displayName
0,73,ASML,100.0,73,72,Lithography tools


CPU times: user 10.9 ms, sys: 2.98 ms, total: 13.9 ms
Wall time: 12.9 ms


In [115]:
%%time

# Find all ASML products
command = """
MATCH (n)
WHERE n.provider_name = 'ASML'
RETURN n, n.provider_name, n.displayName, n.share_provided, n.year_share_provided
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    display(df)

Unnamed: 0,n,n.provider_name,n.displayName,n.share_provided,n.year_share_provided
0,0,ASML,Overlay metrology tools,45.1,2024.0
1,7,ASML,i-line lithography tools,35.7,2024.0
2,8,ASML,KrF (DUV) lithography tools,79.2,2024.0
3,49,ASML,E-beam metrology tools,37.6,2024.0
4,70,ASML,ArF immersion (DUV) lithography tools,98.7,2024.0
5,71,ASML,ArF dry (DUV) lithography tools,94.3,2024.0
6,72,ASML,Lithography tools,78.5,2024.0
7,73,ASML,EUV lithography tools,100.0,2024.0
8,89,ASML,Process control tools,5.2,2024.0


CPU times: user 10.1 ms, sys: 3 μs, total: 10.1 ms
Wall time: 9.42 ms


In [118]:
%%time

# Find all 2-hop paths from Crystal growing furnaces to any processes
command = """
MATCH (first:ToolResource)-[e1]->(n1)-[e2]->(last:Process)
WHERE first.displayName = 'Crystal growing furnaces'
RETURN first, first.displayName, first.type, e1, n1, n1.displayName, n1.type, e2, last, last.displayName, last.type
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    display(df)

Unnamed: 0,first,first.displayName,first.type,e1,n1,n1.displayName,n1.type,e2,last,last.displayName,last.type
0,75,Crystal growing furnaces,tool_resource,75,103,Wafer,material_resource,109,117,Fabrication materials,process


CPU times: user 16.2 ms, sys: 1.33 ms, total: 17.5 ms
Wall time: 16.5 ms


In [120]:
%%time

# Find all tools that go into Fabrication stage processes
command = """
MATCH (t:ToolResource)-[e:GOES_INTO]-(p:Process)
WHERE p.stage_id = 'S2'
RETURN t, t.displayName, t.provider_name, e, p, p.displayName, p.stage_name, p.stage_id
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    display(df)

Unnamed: 0,t,t.displayName,t.provider_name,e,p,p.displayName,p.stage_name,p.stage_id
0,39,Ion implanters,Applied Materials,39,107,Ion implantation,Fabrication,S2
1,54,Etch and clean tools,ACM Research,54,110,Etch and clean,Fabrication,S2
2,65,Deposition tools,ASM International,65,114,Deposition,Fabrication,S2
3,66,Resist processing tools,CHN,66,108,Photolithography,Fabrication,S2
4,72,Lithography tools,ASML,72,108,Photolithography,Fabrication,S2
5,77,Wafer and photomask handlers,Daifuku,77,112,Process control and handling,Fabrication,S2
6,78,CMP tools,Applied Materials,78,109,Chemical mechanical planarization,Fabrication,S2
7,89,Process control tools,ASML,89,112,Process control and handling,Fabrication,S2


CPU times: user 11.9 ms, sys: 253 μs, total: 12.2 ms
Wall time: 11.2 ms


In [122]:
%%time

# Find suppliers and their market share for lithography tools
command = """
MATCH (n:ToolResource)
WHERE n.input_name = 'Lithography tools'
RETURN n, n.provider_name, n.provider_country, n.provider_type, n.share_provided, n.year_share_provided
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    display(df)

Unnamed: 0,n,n.provider_name,n.provider_country,n.provider_type,n.share_provided,n.year_share_provided
0,72,ASML,NLD,organization,78.5,2024.0


CPU times: user 6.02 ms, sys: 0 ns, total: 6.02 ms
Wall time: 5.47 ms


In [124]:
%%time

# Find the supply chain path from wafers to fabrication using 2 hops
command = """
MATCH (w:MaterialResource)-[e1:GOES_INTO]-(f1)-[e2:GOES_INTO]-(p:Process)
WHERE w.displayName = 'Wafer'
RETURN w, w.displayName, e1, f1, f1.displayName, e2, p, p.displayName, p.stage_name
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    display(df)

Unnamed: 0,w,w.displayName,e1,f1,f1.displayName,e2,p,p.displayName,p.stage_name
0,103,Wafer,109,117,Fabrication materials,127,114,Deposition,Fabrication
1,103,Wafer,109,117,Fabrication materials,128,107,Ion implantation,Fabrication
2,103,Wafer,109,117,Fabrication materials,129,109,Chemical mechanical planarization,Fabrication
3,103,Wafer,109,117,Fabrication materials,130,110,Etch and clean,Fabrication
4,103,Wafer,109,117,Fabrication materials,131,108,Photolithography,Fabrication


CPU times: user 7.32 ms, sys: 83 μs, total: 7.41 ms
Wall time: 6.86 ms


In [126]:
%%time

# Find all deposition tools and their types
command = """
MATCH (specific)-[e:IS_TYPE_OF]-(general:ToolResource)
WHERE general.displayName = 'Deposition tools'
RETURN specific, specific.displayName, specific.provider_name, e, general, general.displayName
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    display(df)

Unnamed: 0,specific,specific.displayName,specific.provider_name,e,general,general.displayName
0,5,Plasma modification tools,Applied Materials,5,65,Deposition tools
1,25,Other deposition tools (non-IC),ASM International,25,65,Deposition tools
2,56,Electrochemical plating tools,Lam Research,56,65,Deposition tools
3,57,Tube-based diffusion and deposition tools,ASM International,57,65,Deposition tools
4,58,Rapid thermal processing tools,AP Systems,58,65,Deposition tools
5,59,Physical vapor deposition tools,Applied Materials,59,65,Deposition tools
6,60,Atomic layer deposition tools,ASM International,60,65,Deposition tools
7,63,Chemical vapor deposition tools,ASM International,63,65,Deposition tools


CPU times: user 12.4 ms, sys: 800 μs, total: 13.2 ms
Wall time: 12.4 ms


In [127]:
%%time

# Find complete hierarchy of CVD tools
command = """
MATCH (specific)-[e1:IS_TYPE_OF]-(intermediate)-[e2:IS_TYPE_OF]-(general)
RETURN specific, specific.displayName, e1, intermediate, intermediate.displayName, e2, general, general.displayName
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    display(df)

Unnamed: 0,specific,specific.displayName,e1,intermediate,intermediate.displayName,e2,general,general.displayName
0,0,Overlay metrology tools,0,88,Wafer inspection tools,88,89,Process control tools
1,0,Overlay metrology tools,0,88,Wafer inspection tools,85,85,"Film, stack, and shape metrology tools"
2,0,Overlay metrology tools,0,88,Wafer inspection tools,86,86,Critical dimensions metrology tools
3,0,Overlay metrology tools,0,88,Wafer inspection tools,87,87,Defect inspection tools
4,0,Overlay metrology tools,0,88,Wafer inspection tools,49,49,E-beam metrology tools
...,...,...,...,...,...,...,...,...
565,124,Logic chip design,138,125,Advanced CPUs,138,124,Logic chip design
566,125,Advanced CPUs,138,124,Logic chip design,134,121,AI ASICs
567,125,Advanced CPUs,138,124,Logic chip design,135,122,FPGAs
568,125,Advanced CPUs,138,124,Logic chip design,136,123,Discrete GPUs


CPU times: user 17.5 ms, sys: 2.82 ms, total: 20.3 ms
Wall time: 19.7 ms


In [129]:
%%time

# Find all Chinese providers and what they supply
command = """
MATCH (n)
WHERE n.provider_country = 'CHN'
RETURN n, n.provider_name, n.displayName, n.share_provided, n.type
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    display(df)

Unnamed: 0,n,n.provider_name,n.displayName,n.share_provided,n.type
0,12,AccoTEST,Linear and discrete testing tools,36.9,tool_resource
1,14,AccoTEST,Test tools,1.9,tool_resource
2,45,AMEC,Etch and clean tools (adv. pkg.),12.2,tool_resource
3,46,AMEC,Dry etching tools (adv. pkg.),18.1,tool_resource
4,50,AMEC,Insulator etching tools,9.7,tool_resource
5,51,AMEC,Dry etch tools,5.5,tool_resource
6,52,AMEC,Conductor etching tools,1.4,tool_resource
7,55,AMEC,Dry etching and cleaning tools,5.3,tool_resource
8,62,AMEC,Low-pressure CVD tools,1.2,tool_resource
9,81,AMEC,Fabrication tools (for advanced packaging),2.5,tool_resource


CPU times: user 9.67 ms, sys: 0 ns, total: 9.67 ms
Wall time: 9 ms


In [130]:
%%time

# Find assembly tools and their connections
command = """
MATCH (n:ToolResource)-[e]->(m)
WHERE n.displayName = 'Assembly tools'
RETURN n, n.displayName, e, m, m.displayName, m.type
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    display(df)

Unnamed: 0,n,n.displayName,e,m,m.displayName,m.type
0,23,Assembly tools,23,111,Assembly and packaging,process


CPU times: user 13 ms, sys: 994 μs, total: 14 ms
Wall time: 12.9 ms


In [134]:
%%time

# Find complete path from design to finished chip (3 hops)
command = """
MATCH (first:Process)-[e1]->(s1)-[e2]->(s2)-[e3]->(last)
WHERE first.displayName = 'Memory chip design'
RETURN first, first.displayName, e1, s1, s1.displayName, e2, s2, s2.displayName, e3, last, last.displayName
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    display(df)

No result found
CPU times: user 8.39 ms, sys: 102 μs, total: 8.49 ms
Wall time: 7.63 ms


## Get maximum length chain for each process (set process at start node)

In [136]:
%%time

# Find all Process nodes
command = """
MATCH (n:Process)
RETURN n, n.displayName, n.description
"""
df_processes = client.query(command)
display(df_processes)
list_processes = list(df_processes["n.displayName"])
print(list_processes)

Unnamed: 0,n,n.displayName,n.description
0,107,Ion implantation,Ion implanters embed dopant substances into si...
1,108,Photolithography,"In photolithography, light is used to draw pat..."
2,109,Chemical mechanical planarization,"After etching and cleaning, the wafer surface ..."
3,110,Etch and clean,After photolithography creates a pattern in th...
4,111,Assembly and packaging,"At the end of the fabrication process, the fin..."
5,112,Process control and handling,"Process control: In semiconductor fabrication,..."
6,113,Testing,Chips undergo tests requiring a range of speci...
7,114,Deposition,"In the deposition process, specialized tools a..."
8,115,EDA and Core IP,Electronic design automation (EDA) software: U...
9,116,Chip design,"Chip design involves specification, logic desi..."


['Ion implantation', 'Photolithography', 'Chemical mechanical planarization', 'Etch and clean', 'Assembly and packaging', 'Process control and handling', 'Testing', 'Deposition', 'EDA and Core IP', 'Chip design', 'Fabrication materials']
CPU times: user 5.29 ms, sys: 1.08 ms, total: 6.37 ms
Wall time: 5.53 ms


In [139]:
def build_query_process_chain(process_name: str, hop_count: int) -> str:
    """Build a query to find chains starting from a specific process"""
    query = "MATCH "
    query += f'(first:Process{{displayName:"{process_name}"}})'

    for k in range(1, hop_count + 1):
        query += f"-[e{k}]-(n{k})"

    query += " RETURN first, first.id, first.displayName, "
    for k in range(1, hop_count + 1):
        if k > 1:
            query += ", "
        query += f"e{k}, n{k}, n{k}.id, n{k}.displayName"

    return query

In [142]:
%%time

# Maximum number of hops to try
max_hops = 7

# Save all dictionaries during the process
list_longest_df = []

# Get maximum length chain for each process (set process at start node)
for process in list_processes:
    print(100 * "*")
    print(f"Process: {process}")

    found_path = False
    longest_df = None
    longest_hop = 0

    # Try increasing hop counts until we find the longest path
    for hop_count in tqdm(range(1, max_hops + 1)):
        command = build_query_process_chain(process, hop_count)
        df = client.query(command)

        if not df.empty:
            longest_df = df
            longest_hop = hop_count
            found_path = True
        else:
            # If we get an empty result, we've reached the maximum path length
            break

    if found_path:
        print(f"Longest path found: {longest_hop} hops")
        display(longest_df)
        list_longest_df.append(longest_df)
    else:
        print("No paths found")

print(100 * "*")

****************************************************************************************************
Process: Ion implantation


  0%|          | 0/7 [00:00<?, ?it/s]

Longest path found: 7 hops


Unnamed: 0,first,first.id,first.displayName,e1,n1,n1.id,n1.displayName,e2,n2,n2.id,...,n5.id,n5.displayName,e6,n6,n6.id,n6.displayName,e7,n7,n7.id,n7.displayName
0,107,N16,Ion implantation,113,109,N57,Chemical mechanical planarization,115,111,N69,...,N78,Testing,123,118,N99,Finished logic chip,123,113,N78,Testing
1,107,N16,Ion implantation,113,109,N57,Chemical mechanical planarization,115,111,N69,...,N78,Testing,14,14,N120,Test tools,14,113,N78,Testing
2,107,N16,Ion implantation,113,109,N57,Chemical mechanical planarization,115,111,N69,...,N78,Testing,14,14,N120,Test tools,47,47,N79,Memory test tools
3,107,N16,Ion implantation,113,109,N57,Chemical mechanical planarization,115,111,N69,...,N78,Testing,14,14,N120,Test tools,15,15,N80,SoC test tools
4,107,N16,Ion implantation,113,109,N57,Chemical mechanical planarization,115,111,N69,...,N78,Testing,14,14,N120,Test tools,13,13,N81,Burn-in test tools
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88060,107,N16,Ion implantation,119,112,N118,Process control and handling,77,77,N59,...,N11,Wafer handlers,76,77,N59,Wafer and photomask handlers,76,76,N11,Wafer handlers
88061,107,N16,Ion implantation,119,112,N118,Process control and handling,77,77,N59,...,N11,Wafer handlers,76,77,N59,Wafer and photomask handlers,79,79,N12,Photomask handlers
88062,107,N16,Ion implantation,119,112,N118,Process control and handling,77,77,N59,...,N12,Photomask handlers,79,77,N59,Wafer and photomask handlers,77,112,N118,Process control and handling
88063,107,N16,Ion implantation,119,112,N118,Process control and handling,77,77,N59,...,N12,Photomask handlers,79,77,N59,Wafer and photomask handlers,76,76,N11,Wafer handlers


****************************************************************************************************
Process: Photolithography


  0%|          | 0/7 [00:00<?, ?it/s]

Longest path found: 7 hops


Unnamed: 0,first,first.id,first.displayName,e1,n1,n1.id,n1.displayName,e2,n2,n2.id,...,n5.id,n5.displayName,e6,n6,n6.id,n6.displayName,e7,n7,n7.id,n7.displayName
0,108,N25,Photolithography,114,110,N46,Etch and clean,116,107,N16,...,N78,Testing,123,118,N99,Finished logic chip,123,113,N78,Testing
1,108,N25,Photolithography,114,110,N46,Etch and clean,116,107,N16,...,N78,Testing,14,14,N120,Test tools,14,113,N78,Testing
2,108,N25,Photolithography,114,110,N46,Etch and clean,116,107,N16,...,N78,Testing,14,14,N120,Test tools,47,47,N79,Memory test tools
3,108,N25,Photolithography,114,110,N46,Etch and clean,116,107,N16,...,N78,Testing,14,14,N120,Test tools,15,15,N80,SoC test tools
4,108,N25,Photolithography,114,110,N46,Etch and clean,116,107,N16,...,N78,Testing,14,14,N120,Test tools,13,13,N81,Burn-in test tools
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
112522,108,N25,Photolithography,66,66,N32,Resist processing tools,66,108,N25,...,N32,Resist processing tools,66,108,N25,Photolithography,91,90,N31,Photoresists
112523,108,N25,Photolithography,66,66,N32,Resist processing tools,66,108,N25,...,N32,Resist processing tools,66,108,N25,Photolithography,118,112,N118,Process control and handling
112524,108,N25,Photolithography,66,66,N32,Resist processing tools,66,108,N25,...,N32,Resist processing tools,66,108,N25,Photolithography,72,72,N19,Lithography tools
112525,108,N25,Photolithography,66,66,N32,Resist processing tools,66,108,N25,...,N32,Resist processing tools,66,108,N25,Photolithography,131,117,N117,Fabrication materials


****************************************************************************************************
Process: Chemical mechanical planarization


  0%|          | 0/7 [00:00<?, ?it/s]

Longest path found: 7 hops


Unnamed: 0,first,first.id,first.displayName,e1,n1,n1.id,n1.displayName,e2,n2,n2.id,...,n5.id,n5.displayName,e6,n6,n6.id,n6.displayName,e7,n7,n7.id,n7.displayName
0,109,N57,Chemical mechanical planarization,115,111,N69,Assembly and packaging,117,113,N78,...,N99,Finished logic chip,123,113,N78,Testing,123,118,N99,Finished logic chip
1,109,N57,Chemical mechanical planarization,115,111,N69,Assembly and packaging,117,113,N78,...,N99,Finished logic chip,123,113,N78,Testing,14,14,N120,Test tools
2,109,N57,Chemical mechanical planarization,115,111,N69,Assembly and packaging,117,113,N78,...,N99,Finished logic chip,123,113,N78,Testing,11,11,N83,Handlers and probers
3,109,N57,Chemical mechanical planarization,115,111,N69,Assembly and packaging,117,113,N78,...,N99,Finished logic chip,123,113,N78,Testing,92,91,N121,ATP materials
4,109,N57,Chemical mechanical planarization,115,111,N69,Assembly and packaging,117,113,N78,...,N99,Finished logic chip,123,113,N78,Testing,117,111,N69,Assembly and packaging
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82896,109,N57,Chemical mechanical planarization,78,78,N86,CMP tools,78,109,N57,...,N86,CMP tools,78,109,N57,Chemical mechanical planarization,122,112,N118,Process control and handling
82897,109,N57,Chemical mechanical planarization,78,78,N86,CMP tools,78,109,N57,...,N86,CMP tools,78,109,N57,Chemical mechanical planarization,113,107,N16,Ion implantation
82898,109,N57,Chemical mechanical planarization,78,78,N86,CMP tools,78,109,N57,...,N86,CMP tools,78,109,N57,Chemical mechanical planarization,102,97,N90,CMP materials
82899,109,N57,Chemical mechanical planarization,78,78,N86,CMP tools,78,109,N57,...,N86,CMP tools,78,109,N57,Chemical mechanical planarization,129,117,N117,Fabrication materials


****************************************************************************************************
Process: Etch and clean


  0%|          | 0/7 [00:00<?, ?it/s]

Longest path found: 7 hops


Unnamed: 0,first,first.id,first.displayName,e1,n1,n1.id,n1.displayName,e2,n2,n2.id,...,n5.id,n5.displayName,e6,n6,n6.id,n6.displayName,e7,n7,n7.id,n7.displayName
0,110,N46,Etch and clean,116,107,N16,Ion implantation,113,109,N57,...,N99,Finished logic chip,123,113,N78,Testing,123,118,N99,Finished logic chip
1,110,N46,Etch and clean,116,107,N16,Ion implantation,113,109,N57,...,N99,Finished logic chip,123,113,N78,Testing,14,14,N120,Test tools
2,110,N46,Etch and clean,116,107,N16,Ion implantation,113,109,N57,...,N99,Finished logic chip,123,113,N78,Testing,11,11,N83,Handlers and probers
3,110,N46,Etch and clean,116,107,N16,Ion implantation,113,109,N57,...,N99,Finished logic chip,123,113,N78,Testing,92,91,N121,ATP materials
4,110,N46,Etch and clean,116,107,N16,Ion implantation,113,109,N57,...,N99,Finished logic chip,123,113,N78,Testing,117,111,N69,Assembly and packaging
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
93940,110,N46,Etch and clean,130,117,N117,Fabrication materials,109,103,N26,...,N8,Crystal growing furnaces,75,103,N26,Wafer,75,75,N8,Crystal growing furnaces
93941,110,N46,Etch and clean,130,117,N117,Fabrication materials,109,103,N26,...,N8,Crystal growing furnaces,75,103,N26,Wafer,74,74,N9,Crystal machining tools
93942,110,N46,Etch and clean,130,117,N117,Fabrication materials,109,103,N26,...,N9,Crystal machining tools,74,103,N26,Wafer,109,117,N117,Fabrication materials
93943,110,N46,Etch and clean,130,117,N117,Fabrication materials,109,103,N26,...,N9,Crystal machining tools,74,103,N26,Wafer,75,75,N8,Crystal growing furnaces


****************************************************************************************************
Process: Assembly and packaging


  0%|          | 0/7 [00:00<?, ?it/s]

Longest path found: 7 hops


Unnamed: 0,first,first.id,first.displayName,e1,n1,n1.id,n1.displayName,e2,n2,n2.id,...,n5.id,n5.displayName,e6,n6,n6.id,n6.displayName,e7,n7,n7.id,n7.displayName
0,111,N69,Assembly and packaging,117,113,N78,Testing,123,118,N99,...,N78,Testing,123,118,N99,Finished logic chip,123,113,N78,Testing
1,111,N69,Assembly and packaging,117,113,N78,Testing,123,118,N99,...,N78,Testing,14,14,N120,Test tools,14,113,N78,Testing
2,111,N69,Assembly and packaging,117,113,N78,Testing,123,118,N99,...,N78,Testing,14,14,N120,Test tools,47,47,N79,Memory test tools
3,111,N69,Assembly and packaging,117,113,N78,Testing,123,118,N99,...,N78,Testing,14,14,N120,Test tools,15,15,N80,SoC test tools
4,111,N69,Assembly and packaging,117,113,N78,Testing,123,118,N99,...,N78,Testing,14,14,N120,Test tools,13,13,N81,Burn-in test tools
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34287,111,N69,Assembly and packaging,93,91,N121,ATP materials,96,93,N91,...,N121,ATP materials,106,100,N100,Packaging materials,110,104,N98,Die attach materials
34288,111,N69,Assembly and packaging,93,91,N121,ATP materials,96,93,N91,...,N121,ATP materials,106,100,N100,Packaging materials,111,105,N97,Encapsulation resins
34289,111,N69,Assembly and packaging,93,91,N121,ATP materials,96,93,N91,...,N121,ATP materials,106,100,N100,Packaging materials,112,106,N96,Substrates
34290,111,N69,Assembly and packaging,93,91,N121,ATP materials,96,93,N91,...,N121,ATP materials,96,93,N91,Electronic gases,95,117,N117,Fabrication materials


****************************************************************************************************
Process: Process control and handling


  0%|          | 0/7 [00:00<?, ?it/s]

Longest path found: 7 hops


Unnamed: 0,first,first.id,first.displayName,e1,n1,n1.id,n1.displayName,e2,n2,n2.id,...,n5.id,n5.displayName,e6,n6,n6.id,n6.displayName,e7,n7,n7.id,n7.displayName
0,112,N118,Process control and handling,118,108,N25,Photolithography,114,110,N46,...,N69,Assembly and packaging,117,113,N78,Testing,123,118,N99,Finished logic chip
1,112,N118,Process control and handling,118,108,N25,Photolithography,114,110,N46,...,N69,Assembly and packaging,117,113,N78,Testing,14,14,N120,Test tools
2,112,N118,Process control and handling,118,108,N25,Photolithography,114,110,N46,...,N69,Assembly and packaging,117,113,N78,Testing,11,11,N83,Handlers and probers
3,112,N118,Process control and handling,118,108,N25,Photolithography,114,110,N46,...,N69,Assembly and packaging,117,113,N78,Testing,92,91,N121,ATP materials
4,112,N118,Process control and handling,118,108,N25,Photolithography,114,110,N46,...,N69,Assembly and packaging,117,113,N78,Testing,117,111,N69,Assembly and packaging
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
109084,112,N118,Process control and handling,77,77,N59,Wafer and photomask handlers,79,79,N12,...,N59,Wafer and photomask handlers,77,112,N118,Process control and handling,122,109,N57,Chemical mechanical planarization
109085,112,N118,Process control and handling,77,77,N59,Wafer and photomask handlers,79,79,N12,...,N59,Wafer and photomask handlers,77,112,N118,Process control and handling,89,89,N60,Process control tools
109086,112,N118,Process control and handling,77,77,N59,Wafer and photomask handlers,79,79,N12,...,N59,Wafer and photomask handlers,77,112,N118,Process control and handling,77,77,N59,Wafer and photomask handlers
109087,112,N118,Process control and handling,77,77,N59,Wafer and photomask handlers,79,79,N12,...,N59,Wafer and photomask handlers,76,76,N11,Wafer handlers,76,77,N59,Wafer and photomask handlers


****************************************************************************************************
Process: Testing


  0%|          | 0/7 [00:00<?, ?it/s]

Longest path found: 7 hops


Unnamed: 0,first,first.id,first.displayName,e1,n1,n1.id,n1.displayName,e2,n2,n2.id,...,n5.id,n5.displayName,e6,n6,n6.id,n6.displayName,e7,n7,n7.id,n7.displayName
0,113,N78,Testing,123,118,N99,Finished logic chip,123,113,N78,...,N99,Finished logic chip,123,113,N78,Testing,123,118,N99,Finished logic chip
1,113,N78,Testing,123,118,N99,Finished logic chip,123,113,N78,...,N99,Finished logic chip,123,113,N78,Testing,14,14,N120,Test tools
2,113,N78,Testing,123,118,N99,Finished logic chip,123,113,N78,...,N99,Finished logic chip,123,113,N78,Testing,11,11,N83,Handlers and probers
3,113,N78,Testing,123,118,N99,Finished logic chip,123,113,N78,...,N99,Finished logic chip,123,113,N78,Testing,92,91,N121,ATP materials
4,113,N78,Testing,123,118,N99,Finished logic chip,123,113,N78,...,N99,Finished logic chip,123,113,N78,Testing,117,111,N69,Assembly and packaging
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16065,113,N78,Testing,117,111,N69,Assembly and packaging,93,91,N121,...,N91,Electronic gases,95,117,N117,Fabrication materials,109,103,N26,Wafer
16066,113,N78,Testing,117,111,N69,Assembly and packaging,93,91,N121,...,N91,Electronic gases,96,91,N121,ATP materials,92,113,N78,Testing
16067,113,N78,Testing,117,111,N69,Assembly and packaging,93,91,N121,...,N91,Electronic gases,96,91,N121,ATP materials,93,111,N69,Assembly and packaging
16068,113,N78,Testing,117,111,N69,Assembly and packaging,93,91,N121,...,N91,Electronic gases,96,91,N121,ATP materials,106,100,N100,Packaging materials


****************************************************************************************************
Process: Deposition


  0%|          | 0/7 [00:00<?, ?it/s]

Longest path found: 7 hops


Unnamed: 0,first,first.id,first.displayName,e1,n1,n1.id,n1.displayName,e2,n2,n2.id,...,n5.id,n5.displayName,e6,n6,n6.id,n6.displayName,e7,n7,n7.id,n7.displayName
0,114,N35,Deposition,124,108,N25,Photolithography,114,110,N46,...,N69,Assembly and packaging,117,113,N78,Testing,123,118,N99,Finished logic chip
1,114,N35,Deposition,124,108,N25,Photolithography,114,110,N46,...,N69,Assembly and packaging,117,113,N78,Testing,14,14,N120,Test tools
2,114,N35,Deposition,124,108,N25,Photolithography,114,110,N46,...,N69,Assembly and packaging,117,113,N78,Testing,11,11,N83,Handlers and probers
3,114,N35,Deposition,124,108,N25,Photolithography,114,110,N46,...,N69,Assembly and packaging,117,113,N78,Testing,92,91,N121,ATP materials
4,114,N35,Deposition,124,108,N25,Photolithography,114,110,N46,...,N69,Assembly and packaging,117,113,N78,Testing,117,111,N69,Assembly and packaging
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88429,114,N35,Deposition,103,98,N88,Deposition materials,104,92,N126,...,N32,Resist processing tools,66,108,N25,Photolithography,91,90,N31,Photoresists
88430,114,N35,Deposition,103,98,N88,Deposition materials,104,92,N126,...,N32,Resist processing tools,66,108,N25,Photolithography,118,112,N118,Process control and handling
88431,114,N35,Deposition,103,98,N88,Deposition materials,104,92,N126,...,N32,Resist processing tools,66,108,N25,Photolithography,72,72,N19,Lithography tools
88432,114,N35,Deposition,103,98,N88,Deposition materials,104,92,N126,...,N32,Resist processing tools,66,108,N25,Photolithography,131,117,N117,Fabrication materials


****************************************************************************************************
Process: EDA and Core IP


  0%|          | 0/7 [00:00<?, ?it/s]

Longest path found: 7 hops


Unnamed: 0,first,first.id,first.displayName,e1,n1,n1.id,n1.displayName,e2,n2,n2.id,...,n5.id,n5.displayName,e6,n6,n6.id,n6.displayName,e7,n7,n7.id,n7.displayName
0,115,N7,EDA and Core IP,125,116,N0,Chip design,126,114,N35,...,N16,Ion implantation,113,109,N57,Chemical mechanical planarization,115,111,N69,Assembly and packaging
1,115,N7,EDA and Core IP,125,116,N0,Chip design,126,114,N35,...,N16,Ion implantation,113,109,N57,Chemical mechanical planarization,122,112,N118,Process control and handling
2,115,N7,EDA and Core IP,125,116,N0,Chip design,126,114,N35,...,N16,Ion implantation,113,109,N57,Chemical mechanical planarization,113,107,N16,Ion implantation
3,115,N7,EDA and Core IP,125,116,N0,Chip design,126,114,N35,...,N16,Ion implantation,113,109,N57,Chemical mechanical planarization,102,97,N90,CMP materials
4,115,N7,EDA and Core IP,125,116,N0,Chip design,126,114,N35,...,N16,Ion implantation,113,109,N57,Chemical mechanical planarization,129,117,N117,Fabrication materials
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5592,115,N7,EDA and Core IP,100,96,N85,Core intellectual property,100,115,N7,...,N84,Electronic design automation software,10,115,N7,EDA and Core IP,10,10,N84,Electronic design automation software
5593,115,N7,EDA and Core IP,100,96,N85,Core intellectual property,100,115,N7,...,N84,Electronic design automation software,10,115,N7,EDA and Core IP,100,96,N85,Core intellectual property
5594,115,N7,EDA and Core IP,100,96,N85,Core intellectual property,100,115,N7,...,N85,Core intellectual property,100,115,N7,EDA and Core IP,125,116,N0,Chip design
5595,115,N7,EDA and Core IP,100,96,N85,Core intellectual property,100,115,N7,...,N85,Core intellectual property,100,115,N7,EDA and Core IP,10,10,N84,Electronic design automation software


****************************************************************************************************
Process: Chip design


  0%|          | 0/7 [00:00<?, ?it/s]

Longest path found: 7 hops


Unnamed: 0,first,first.id,first.displayName,e1,n1,n1.id,n1.displayName,e2,n2,n2.id,...,n5.id,n5.displayName,e6,n6,n6.id,n6.displayName,e7,n7,n7.id,n7.displayName
0,116,N0,Chip design,126,114,N35,Deposition,124,108,N25,...,N57,Chemical mechanical planarization,115,111,N69,Assembly and packaging,117,113,N78,Testing
1,116,N0,Chip design,126,114,N35,Deposition,124,108,N25,...,N57,Chemical mechanical planarization,115,111,N69,Assembly and packaging,115,109,N57,Chemical mechanical planarization
2,116,N0,Chip design,126,114,N35,Deposition,124,108,N25,...,N57,Chemical mechanical planarization,115,111,N69,Assembly and packaging,23,23,N119,Assembly tools
3,116,N0,Chip design,126,114,N35,Deposition,124,108,N25,...,N57,Chemical mechanical planarization,115,111,N69,Assembly and packaging,17,17,N76,Packaging tools
4,116,N0,Chip design,126,114,N35,Deposition,124,108,N25,...,N57,Chemical mechanical planarization,115,111,N69,Assembly and packaging,81,81,N109,Fabrication tools (for advanced packaging)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24384,116,N0,Chip design,137,124,N6,Logic chip design,138,125,N1,...,N6,Logic chip design,137,116,N0,Chip design,137,124,N6,Logic chip design
24385,116,N0,Chip design,137,124,N6,Logic chip design,138,125,N1,...,N6,Logic chip design,134,121,N4,AI ASICs,134,124,N6,Logic chip design
24386,116,N0,Chip design,137,124,N6,Logic chip design,138,125,N1,...,N6,Logic chip design,135,122,N3,FPGAs,135,124,N6,Logic chip design
24387,116,N0,Chip design,137,124,N6,Logic chip design,138,125,N1,...,N6,Logic chip design,136,123,N2,Discrete GPUs,136,124,N6,Logic chip design


****************************************************************************************************
Process: Fabrication materials


  0%|          | 0/7 [00:00<?, ?it/s]

Longest path found: 7 hops


Unnamed: 0,first,first.id,first.displayName,e1,n1,n1.id,n1.displayName,e2,n2,n2.id,...,n5.id,n5.displayName,e6,n6,n6.id,n6.displayName,e7,n7,n7.id,n7.displayName
0,117,N117,Fabrication materials,127,114,N35,Deposition,124,108,N25,...,N57,Chemical mechanical planarization,115,111,N69,Assembly and packaging,117,113,N78,Testing
1,117,N117,Fabrication materials,127,114,N35,Deposition,124,108,N25,...,N57,Chemical mechanical planarization,115,111,N69,Assembly and packaging,115,109,N57,Chemical mechanical planarization
2,117,N117,Fabrication materials,127,114,N35,Deposition,124,108,N25,...,N57,Chemical mechanical planarization,115,111,N69,Assembly and packaging,23,23,N119,Assembly tools
3,117,N117,Fabrication materials,127,114,N35,Deposition,124,108,N25,...,N57,Chemical mechanical planarization,115,111,N69,Assembly and packaging,17,17,N76,Packaging tools
4,117,N117,Fabrication materials,127,114,N35,Deposition,124,108,N25,...,N57,Chemical mechanical planarization,115,111,N69,Assembly and packaging,81,81,N109,Fabrication tools (for advanced packaging)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
126569,117,N117,Fabrication materials,109,103,N26,Wafer,74,74,N9,...,N26,Wafer,109,117,N117,Fabrication materials,95,93,N91,Electronic gases
126570,117,N117,Fabrication materials,109,103,N26,Wafer,74,74,N9,...,N26,Wafer,109,117,N117,Fabrication materials,94,92,N126,Process-specific fabrication materials
126571,117,N117,Fabrication materials,109,103,N26,Wafer,74,74,N9,...,N26,Wafer,109,117,N117,Fabrication materials,109,103,N26,Wafer
126572,117,N117,Fabrication materials,109,103,N26,Wafer,74,74,N9,...,N26,Wafer,75,75,N8,Crystal growing furnaces,75,103,N26,Wafer


****************************************************************************************************
CPU times: user 7.65 s, sys: 1.44 s, total: 9.09 s
Wall time: 9.94 s


# Create subgraph to visualise

In [144]:
# Get subgraph
list_subset_nodes = []
for longest_df in list_longest_df:
    subset_nodes = longest_df.filter(regex="id$", axis=1).iloc[0].values.tolist()
    list_subset_nodes.extend(subset_nodes)

subG = G.subgraph(list_subset_nodes).copy()
print(subG)

# Build CREATE command from subgraph
create_command_subG = build_create_command_from_networkx(subG)
print(f"""
Cypher CREATE command :
* size: {len(create_command_subG.encode('utf-8'))/1024/1000:.4f} MB\n
{100 * '*'}
{create_command_subG \
if len(create_command_subG.split("\n")) < 10000 \
else "\n".join(create_command_subG.split('\n')[:5]) + "\n...\n" + "\n".join(create_command_subG.split('\n')[-5:])}
{100 * '*'}
""")

DiGraph with 12 nodes and 19 edges
Cypher query will create graph with 12 nodes and 19 edges

Cypher CREATE command :
* size: 0.0250 MB

****************************************************************************************************
CREATE (:Process {id: "N117", displayName: "Fabrication materials", input_name: "Fabrication materials", type: "process", description: "Fabrication materials are a critical input in the fabrication of chips. Fabrication materials include high-purity silicon wafers, chemicals, and gases.", stage_name: "Fabrication", stage_id: "S2", stage_description: "Fabrication turns designs into chips. Semiconductor fabrication facilities ( fabs ) make chips in these wafers in two steps: forming transistors and other electrical devices in material layers within silicon wafers; and forming metal interconnects between the electrical devices in insulating layers above the silicon. There are two business models for fabs: (1) fabs owned by integrated device manufacturers 

In [145]:
subgraph_name = f"{graph_name}_subgraph"
subgraph_name

'supply_chain_eto_chip_explorer1_subgraph'

In [146]:
%%time

# Set graph
try:
    client.create_graph(subgraph_name)
except TuringDBException as e:
    print(e)

# Set working graph
client.set_graph(subgraph_name)

# Create a new change on the graph
client.checkout()
change = client.new_change()
print(f"Current change {change}")

# Checkout into the change
client.checkout(change=change)

Current change 0
CPU times: user 4.83 ms, sys: 671 μs, total: 5.5 ms
Wall time: 11.9 ms


In [147]:
%%time

chunks = split_cypher_commands(create_command_subG, max_size_mb=1)

print(f"✓ Split into {len(chunks['node_chunks'])} node chunk(s) and {len(chunks['edge_chunks'])} edge chunk(s)")

print("\nNode chunks:")
for i, chunk in enumerate(chunks['node_chunks']):
    print(f"  Node chunk {i+1}: {len(chunk.encode('utf-8'))/1024:.1f} KB")
    if i == 10:
        print("  ...")
        break

print("\nEdge chunks:")
for i, chunk in enumerate(chunks['edge_chunks']):
    print(f"  Edge chunk {i+1}: {len(chunk.encode('utf-8'))/1024:.1f} KB")
    if i == 10:
        print("  ...")
        break

✓ Split into 1 node chunk(s) and 19 edge chunk(s)

Node chunks:
  Node chunk 1: 23.4 KB

Edge chunks:
  Edge chunk 1: 0.1 KB
  Edge chunk 2: 0.1 KB
  Edge chunk 3: 0.1 KB
  Edge chunk 4: 0.1 KB
  Edge chunk 5: 0.1 KB
  Edge chunk 6: 0.1 KB
  Edge chunk 7: 0.1 KB
  Edge chunk 8: 0.1 KB
  Edge chunk 9: 0.1 KB
  Edge chunk 10: 0.1 KB
  Edge chunk 11: 0.1 KB
  ...
CPU times: user 4.27 ms, sys: 0 ns, total: 4.27 ms
Wall time: 4.17 ms


In [148]:
%%time

# Run CREATE command
print("\nExecuting query on TuringDB...")
start_time = time.time()

print(f"✓ Split into {len(chunks['node_chunks'])} node chunk(s) and {len(chunks['edge_chunks'])} edge chunk(s)")

# CREATE nodes
print("\nNode chunks:")
for i, chunk in enumerate(tqdm(chunks['node_chunks'])):
    result = client.query(chunk)
# Commit the change
client.query("COMMIT")
print(f"✓ {len(chunks['node_chunks'])} node chunks done")

# CREATE edges
print("\nEdge chunks:")
for i, chunk in enumerate(tqdm(chunks['edge_chunks'])):
    result = client.query(chunk)
# Commit the change
client.query("COMMIT")
print(f"✓ {len(chunks['edge_chunks'])} edge chunks done")

execution_time = time.time() - start_time
print(f"\n✓ Graph created successfully in {execution_time:.2f} seconds")

# Submit changes
start_time = time.time()
client.query("CHANGE SUBMIT")
execution_time = time.time() - start_time
print(f"\n✓ Changes successfully submitted in {execution_time:.2f} seconds")

# Checkout into main
client.checkout()


Executing query on TuringDB...
✓ Split into 1 node chunk(s) and 19 edge chunk(s)

Node chunks:


  0%|          | 0/1 [00:00<?, ?it/s]

✓ 1 node chunks done

Edge chunks:


  0%|          | 0/19 [00:00<?, ?it/s]

✓ 19 edge chunks done

✓ Graph created successfully in 0.15 seconds

✓ Changes successfully submitted in 0.08 seconds
CPU times: user 58.9 ms, sys: 4.29 ms, total: 63.2 ms
Wall time: 234 ms


<div class="alert alert-block alert-info">
    <h2>
        You can visualise the subgraph directly in the notebook below. For more details on nodes and edges, you can go to TuringDB visualizer (running on your instance)
    </h2>
</div>

<div class="alert alert-block alert-info">
    <h2>
        Visualize your graph in TuringDB Graph Visualizer ! Now that your instance is running:
    </h2>
    <h3>
        <ul>
            <li>Go to <a href="https://console.turingdb.ai/databases">TuringDB Console - Database Instances</a></li>
            <li>In your current instance panel, click on "Open Visualizer" button</li>
            <li>Visualizer opens, now you can choose your graph in the dropdown menu at the top-right corner</li>
        </ul>
        You can then play with your graph and visualize the nodes you want !
    </h3>
</div>

In [149]:
from pyvis.network import Network

net = Network(
    height="750px",
    width="100%",
    notebook=True,
    bgcolor="#f8f9fa",
    font_color="#212529",
    directed=True,
)

# Node type colors
type_colors = {
    "tool_resource": "#3498db",  # Blue for tools
    "material_resource": "#e74c3c",  # Red for materials
    "process": "#2ecc71",  # Green for processes
    "design_resource": "#9b59b6",  # Purple for design
    "ultimate_output": "#f39c12",  # Orange for output
}

for node, data in subG.nodes(data=True):
    node_type = data.get("type", "Unknown")
    color = type_colors.get(node_type, "#7f8c8d")

    label = data.get("displayName", str(node))

    # Build title with key information
    title_parts = [f"<b>{label}</b>", f"Type: {node_type}"]

    if data.get("provider_name"):
        title_parts.append(f"Provider: {data.get('provider_name')}")
    if data.get("share_provided"):
        title_parts.append(f"Market Share: {data.get('share_provided')}%")
    if data.get("stage_name") and data.get("stage_name") != "nan":
        title_parts.append(f"Stage: {data.get('stage_name')}")

    title = "<br>".join(title_parts)

    net.add_node(node, label=label, color=color, title=title, size=25)

# Edge type colors
edge_colors = {"GOES_INTO": "#27ae60", "IS_TYPE_OF": "#e67e22"}

for source, target, data in subG.edges(data=True):
    edge_type = list(data.keys())[0] if data else "CONNECTED"
    color = edge_colors.get(edge_type, "#95a5a6")
    net.add_edge(source, target, title=edge_type, color=color, width=2)

net.toggle_physics(True)
net.show(f"{example_name}_subgraph.html")

supply_chain_eto-chip-explorer_subgraph.html


# Use LLM to generate Cypher query

Before running this section, create a `.env` file in the project root with your API keys:

```env
ANTHROPIC_API_KEY=your_key_here
OPENAI_API_KEY=your_key_here
MISTRAL_API_KEY=your_key_here

In [150]:
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv(override=True)

True

In [151]:
api_keys = {
    "Anthropic": os.getenv("ANTHROPIC_API_KEY"),
    "Mistral": os.getenv("MISTRAL_API_KEY"),
    "OpenAI": os.getenv("OPENAI_API_KEY"),
}

In [152]:
"""Build system prompt with TuringDB schema and examples"""

turingdb_cypher_system_prompt = """
You are an expert at converting natural language questions into TuringDB queries.

Your task is to generate syntactically correct TuringDB queries based on natural language input.

VERY IMPORTANT - YOU MUST FOLLOW THESE REQUESTS - TuringDB Syntax Guidelines:
1. Return ONLY the TuringDB query, no explanations or markdown formatting
2. Use MATCH, CREATE and WHERE operations only
3. Nodes: (n:Label {property = "value"}) or (n:Label {property: value})
4. Edges: Use DIRECTED syntax with ->
5. Pattern matching: MATCH (n)-[e]->(m)
6. Property matching: Use = operator for exact matching
7. Multiple constraints: (n:Person:Engineer {name = "John", age = 30})
8. Return all matched entities: RETURN n, e, m or use RETURN * for all
9. Filter using WHERE clause: MATCH (n:Person) WHERE n.name = 'John' RETURN n.firstname, n.lastname

VERY IMPORTANT - YOU ARE NOT ALLOWED TO USE THE FOLLOWING - FORBIDDEN in TuringDB:
- Do NOT use AS aliases
- Do NOT use LIMIT, SKIP clauses
- Do NOT use WITH clauses
- Do NOT use CALL (except for metaqueries)
- Do NOT use toLower() or other functions
- Do NOT use wildcard character (*)
- Do NOT use multi-hops pattern for edges: e.g. `-[e:CONNECTED*1..10]->`
- Do NOT use "end" or "s3" variable name

Supported TuringDB Operations:
- MATCH queries: MATCH (n:Label)-[e:Type]->(m) RETURN n, m
- CREATE queries: CREATE (n:Label{property="value"})-[e:Type]->(m:Label)
- Metaqueries: CALL db.propertyTypes(), CALL db.labels(), CALL db.edgeTypes()
- Property types: String ("text" or `text`), Boolean (true/false), Integer (20), Double (20.5)

Examples for few-shot learning:
- Find all persons: MATCH (n:Person) RETURN n
- Find connections: MATCH (n:Person)-[e]->(m:Person) RETURN n, e, m
- Create person: CREATE (n:Person{name="John", age=30})
- Match person with specific name: MATCH (p:Person) WHERE p.name = "John" RETURN p
- Path with 1 hop between Station Paddington and Blackfriars:  MATCH (first:Station{displayName:"Paddington"})-[e1:CONNECTED]->(last:Station{displayName="Blackfriars"}) RETURN start, start.displayName, start.Note, e1.Line, last, last.displayName, last.Note
- Path with 2 hops between Station Paddington and Blackfriars: MATCH (first:Station{displayName:"Paddington"})-[e1:CONNECTED]->(s1:Station)-[e2:CONNECTED]->(last:Station{displayName="Blackfriars"}) RETURN start, start.displayName, start.Note, e1.Line, s1, s1.displayName, s1.Note, e2.Line, last, last.displayName, last.Note
- Path with 8 hops between Station Paddington and Blackfriars: MATCH (first:Station{displayName:"Paddington"})-[e1:CONNECTED]->(s1:Station)-[e2:CONNECTED]->(s2:Station)-[e3:CONNECTED]->(s3:Station)-[e4:CONNECTED]->(s4:Station)-[e5:CONNECTED]->(s5:Station)-[e6:CONNECTED]->(s6:Station)-[e7:CONNECTED]->(s7:Station)-[e8:CONNECTED]->(last:Station{displayName="Blackfriars"}) RETURN start, start.displayName, start.Note, e1.Line, s1, s1.displayName, s1.Note, e2.Line, s2, s2.displayName, s2.Note, e3.Line, s3, s3.displayName, s3.Note, e4.Line, s4, s4.displayName, s4.Note, e5.Line, s5, s5.displayName, s5.Note, e6.Line, s6, s6.displayName, s6.Note, e7.Line, s7, s7.displayName, s7.Note, e8.Line, last, last.displayName, last.Note
- Find all Chinese providers and what they supply: MATCH (n{provider_country:"CHN"}) RETURN n, n.provider_name, n.displayName, n.share_provided, n.type
- Find all deposition tools and their types: MATCH (specific)-[e:IS_TYPE_OF]->(general:Tool_Resource{displayName:"Deposition tools"}) RETURN specific, specific.displayName, specific.provider_name, e, general, general.displayName
"""

In [153]:
# Get subset of CREATE command to avoid exceeding context window
create_command_subset = create_command_subG.split("\n")[:5] + create_command_subG.split("\n")[-5:]

# Create system_prompt
system_prompt = f"""
TuringDB Cypher prompt :
{turingdb_cypher_system_prompt}

Here is a subset of the CREATE command used to create the graph, this way you know graph structure.
Only a subset is passed because the whole command is to long :
{create_command_subset}

Here is also the output of "CALL LABELS ()" command, showing the different node types of the graph :
{client.query("CALL db.labels()")}

Here is also the output of "CALL EDGETYPES ()" command, showing the different edge types of the graph :
{client.query("CALL db.edgeTypes()")}

Very important :
- You MUST follow current TuringDB Syntax Guidelines
- You MUST NOT USE what is FORBIDDEN in TuringDB
- By default, RETURN ALL THE MATCHED NODES AND EDGES AND THEIR PROPERTIES in the RETURN section (except contrary demand from user)
- Use the correct node and edge properties name in the MATCH section.
- Use the correct node and edge properties name in the RETURN section.
- Pay attention to which properties come from nodes or edges, to create a functioning query
- Pay attention to lower and uppercases in properties
- If some properties contain spaces, be careful to wrap them

Give me the query FOLLOWING TURINGDB GUIDELINES AND NOT USING WHAT IS FORBIDDEN for this specific question :
"""

In [154]:
## Find all deposition tools and their types
# command = """
# MATCH (specific)-[e:IS_TYPE_OF]-(general:Tool_Resource{displayName:"Deposition tools"}) RETURN specific, specific.displayName, specific.provider_name, e, general, general.displayName
# """
# client.query(command)

# Set natural language query
question = """
Find all deposition tools and their types
"""

In [155]:
## Find all Chinese providers and what they supply
# command = """
# MATCH (n{provider_country:"CHN"}) RETURN n, n.provider_name, n.displayName, n.share_provided, n.type
# """
# client.query(command)

# Set natural language query
question = """
Find all Chinese providers and what they supply
"""

In [156]:
## Find assembly tools and their connections
# command = """
# MATCH (n:Tool_Resource{displayName:"Assembly tools"})-[e]-(m) RETURN n, n.displayName, e, m, m.displayName, m.type
# """
# client.query(command)

# Set natural language query
question = """
Find assembly tools and their connections
"""

In [157]:
question = """
What are the other providers linked to ASML specifically for lithography tools in the supply chain ?
"""

In [158]:
%%time

provider = "Anthropic"

cypher_query = natural_language_to_cypher(
    question=question,
    system_prompt=system_prompt,
    provider=provider,
    api_key=api_keys[provider],
    temperature=0.0,
    #model="claude-3-haiku-20240307",
)
print(f"cypher_query : {cypher_query}")

cypher_query : MATCH (n:Process{displayName:"Lithography"})<-[e:GOES_INTO]-(provider) RETURN provider, provider.provider_name, provider.provider_country, e, n
CPU times: user 200 ms, sys: 19.8 ms, total: 219 ms
Wall time: 2.53 s


In [160]:
%%time

# Set original graph
client.set_graph(graph_name)

try:
    df_path = client.query(cypher_query)

    if df_path.empty:
        print("--> No result found\n")
    else:
        display(df_path)

except TuringDBException:
    print(f"Query generated by LLM not supported.")

--> No result found

CPU times: user 4.23 ms, sys: 1.1 ms, total: 5.33 ms
Wall time: 4.86 ms


In [161]:
print("Notebook finished !")

Notebook finished !
