<div class="alert alert-block alert-success">
    <h1>
        Example notebook - Supply chain
    </h1>
    <p>
        Link to dataset : <a href="https://eto.tech/dataset-docs/chipexplorer/">dataset documentation link</a>
    </p>
</div>

# Import modules and functions

In [1]:
import os
import pandas as pd
import glob
import re
import numpy as np

from turingdb_examples.utils import create_ID_column
from turingdb_examples.graph import create_graph_from_df, build_create_command_from_networkx
from turingdb_examples.llm import natural_language_to_cypher

# Check data files are available

In [2]:
folder_name = "supply_chain_eto-chip-explorer"
path_data = f"{os.getcwd()}/data/{folder_name}"
if not os.path.exists(path_data):
    raise ValueError(f"{path_data} does not exists")

list_csv_files = sorted(os.listdir(path_data))
if not list_csv_files == ['inputs.csv', 'providers.csv', 'provision.csv', 'sequence.csv', 'stages.csv']:
    raise ValueError(
        f"At least one of the {len(list_csv_files)} csv files is not available in {path_data}"
    )

# Import and format data

In [273]:
dict_df = {}

for path_csv in glob.glob(f"{path_data}/*.csv"):
    print(100 * '-')
    key_name = re.sub('.csv', '', os.path.basename(path_csv))
    dict_df[key_name] = pd.read_csv(path_csv)

    print(f"--- {key_name}")

    display(dict_df[key_name])

print(100 * '-')

----------------------------------------------------------------------------------------------------
--- providers


Unnamed: 0,provider_name,alias,provider_id,provider_type,country
0,USA,United States,P1,country,
1,CHN,China,P2,country,
2,KOR,South Korea,P4,country,
3,Various countries,,P5,country,
4,JPN,Japan,P7,country,
...,...,...,...,...,...
392,Shenzhen Naso Tech Co.,,P407,organization,CHN
393,Intevac,,P408,organization,USA
394,Palomar Technologies,,P409,organization,USA
395,PacTech,,P410,organization,JPN


----------------------------------------------------------------------------------------------------
--- inputs


Unnamed: 0,input_id,input_name,type,stage_name,stage_id,description,year,market_share_chart_global_market_size_info,market_share_chart_caption,market_share_chart_source
0,N0,Chip design,process,Design,S1,"Chip design involves specification, logic desi...",2022.0,$574.1 billion (2022),,Worldwide semiconductor sales. [World Semicond...
1,N6,Logic chip design,design_resource,,,Logic chip design is the design of integrated ...,2022.0,$255.7 billion (2022),,Worldwide semiconductor sales for logic and mi...
2,N1,Advanced CPUs,design_resource,,,"Central processing units (""CPUs"") are the domi...",2019.0,$56.2 billion (microprocessors) (2019),,[CSET](https://cset.georgetown.edu/publication...
3,N2,Discrete GPUs,design_resource,,,"Discrete graphics processing units (""GPUs"") ha...",2019.0,$11.9 billion (2019),,[CSET](https://cset.georgetown.edu/publication...
4,N3,FPGAs,design_resource,,,"Field-programmable gate arrays (""FPGAs""), unli...",2019.0,$5.7 billion (2019),,[CSET](https://cset.georgetown.edu/publication...
...,...,...,...,...,...,...,...,...,...,...
121,N126,Process-specific fabrication materials,material_resource,,,Process-specific fabrication materials are hig...,,,,
122,N127,Lithography tools (adv. pkg.),tool_resource,,,These tools are lithography tools designed and...,2024.0,$275.6 million (2024),,CSET analysis of TechInsights data (2024). Dat...
123,N128,Deposition tools (adv. pkg.),tool_resource,,,These tools are deposition tools designed and ...,2024.0,$2.6 billion (2024),,CSET analysis of TechInsights data (2024). Dat...
124,N129,Etch and clean tools (adv. pkg.),tool_resource,,,These tools are etch and clean tools designed ...,2024.0,$1.2 billion (2024),,CSET analysis of TechInsights data (2024). Dat...


----------------------------------------------------------------------------------------------------
--- stages


Unnamed: 0,stage_id,stage_name,description,market_share_chart_global_market_size_info,market_share_chart_caption,market_share_chart_source
0,S1,Design,"Semiconductor design involves specification, d...",$574 billion (2022),Chart shows market shares for the overall glob...,[Semiconductor Industry Association (SIA)](htt...
1,S2,Fabrication,Fabrication turns designs into chips. Semicond...,,Chart shows share of global fabrication capaci...,[Semiconductor Industry Association (SIA)](htt...
2,S3,"Assembly, testing, and packaging (ATP)","At the end of the fabrication process, the fin...",$95 billion (2022),Chart shows ATP site capacity by country.,[Semiconductor Industry Association (SIA)](htt...


----------------------------------------------------------------------------------------------------
--- provision


Unnamed: 0,provider_name,provider_id,provided_name,provided_id,share_provided,year,source
0,ACM Research,P313,Wet etching and cleaning tools,N49,11.4,2024,CSET analysis of TechInsights data (2024). Dat...
1,ACM Research,P313,Etch and clean tools,N55,3.0,2024,CSET analysis of TechInsights data (2024). Dat...
2,AMEC,P123,Dry etching tools (adv. pkg.),N101,18.1,2024,CSET analysis of TechInsights data (2024). Dat...
3,AMEC,P123,Dry etch tools,N103,5.5,2024,CSET analysis of TechInsights data (2024). Dat...
4,AMEC,P123,Fabrication tools (for advanced packaging),N109,2.5,2024,CSET analysis of TechInsights data (2024). Dat...
...,...,...,...,...,...,...,...
1300,Zhonghuan,P231,Wafer,N26,,2019,[CSET](https://cset.georgetown.edu/publication...
1301,Zhongwei,P295,Ceramic packages,N95,,2019,
1302,Zhuhai Yueya,P301,Substrates,N96,,2019,
1303,ZingSEMI,P237,Wafer,N26,,2019,[CSET](https://cset.georgetown.edu/publication...


----------------------------------------------------------------------------------------------------
--- sequence


Unnamed: 0,input_name,input_id,goes_into_name,goes_into_id,is_type_of_name,is_type_of_id
0,Crystal growing furnaces,N8,Wafer,N26,,
1,Crystal machining tools,N9,Wafer,N26,,
2,EUV lithography tools,N20,,,Lithography tools,N19
3,ArF dry (DUV) lithography tools,N21,,,Lithography tools,N19
4,ArF immersion (DUV) lithography tools,N22,,,Lithography tools,N19
...,...,...,...,...,...,...
134,Auto ball bonders for IC,N134,,,Wire bonding tools,N74
135,Auto ball bonders for non-IC,N135,,,Wire bonding tools,N74
136,Automatic wedge bonders,N136,,,Wire bonding tools,N74
137,Wafer level stud bonders,N137,,,Wire bonding tools,N74


----------------------------------------------------------------------------------------------------


In [312]:
df_provision = dict_df['provision'].copy()
df_provision = df_provision.rename(columns={'year': 'year_share_provided', 'source': 'source_provider_provided'})
df_provision

Unnamed: 0,provider_name,provider_id,provided_name,provided_id,share_provided,year_share_provided,source_provider_provided
0,ACM Research,P313,Wet etching and cleaning tools,N49,11.4,2024,CSET analysis of TechInsights data (2024). Dat...
1,ACM Research,P313,Etch and clean tools,N55,3.0,2024,CSET analysis of TechInsights data (2024). Dat...
2,AMEC,P123,Dry etching tools (adv. pkg.),N101,18.1,2024,CSET analysis of TechInsights data (2024). Dat...
3,AMEC,P123,Dry etch tools,N103,5.5,2024,CSET analysis of TechInsights data (2024). Dat...
4,AMEC,P123,Fabrication tools (for advanced packaging),N109,2.5,2024,CSET analysis of TechInsights data (2024). Dat...
...,...,...,...,...,...,...,...
1300,Zhonghuan,P231,Wafer,N26,,2019,[CSET](https://cset.georgetown.edu/publication...
1301,Zhongwei,P295,Ceramic packages,N95,,2019,
1302,Zhuhai Yueya,P301,Substrates,N96,,2019,
1303,ZingSEMI,P237,Wafer,N26,,2019,[CSET](https://cset.georgetown.edu/publication...


In [313]:
df_providers = dict_df['providers'].copy()
df_providers = df_providers.rename(columns={'alias': 'provider_alias', 'country': 'provider_country'})
df_providers

Unnamed: 0,provider_name,provider_alias,provider_id,provider_type,provider_country
0,USA,United States,P1,country,
1,CHN,China,P2,country,
2,KOR,South Korea,P4,country,
3,Various countries,,P5,country,
4,JPN,Japan,P7,country,
...,...,...,...,...,...
392,Shenzhen Naso Tech Co.,,P407,organization,CHN
393,Intevac,,P408,organization,USA
394,Palomar Technologies,,P409,organization,USA
395,PacTech,,P410,organization,JPN


In [323]:
df_provision = pd.merge(df_provision, df_providers, on='provider_id', how='left',  suffixes=('', '_y'))
df_provision = df_provision.drop(df_provision.filter(regex='_y$').columns, axis=1)
df_provision = df_provision[['provided_name', 'provided_id',
                   'provider_name', 'provider_id', 'provider_alias', 'provider_type', 'provider_country',
                   'share_provided', 'year_share_provided', 'source_provider_provided']]
df_provision

Unnamed: 0,provided_name,provided_id,provider_name,provider_id,provider_alias,provider_type,provider_country,share_provided,year_share_provided,source_provider_provided
0,Wet etching and cleaning tools,N49,ACM Research,P313,,organization,USA,11.4,2024,CSET analysis of TechInsights data (2024). Dat...
1,Etch and clean tools,N55,ACM Research,P313,,organization,USA,3.0,2024,CSET analysis of TechInsights data (2024). Dat...
2,Dry etching tools (adv. pkg.),N101,AMEC,P123,Advanced Micro-Fabrication Equipment Inc. China,organization,CHN,18.1,2024,CSET analysis of TechInsights data (2024). Dat...
3,Dry etch tools,N103,AMEC,P123,Advanced Micro-Fabrication Equipment Inc. China,organization,CHN,5.5,2024,CSET analysis of TechInsights data (2024). Dat...
4,Fabrication tools (for advanced packaging),N109,AMEC,P123,Advanced Micro-Fabrication Equipment Inc. China,organization,CHN,2.5,2024,CSET analysis of TechInsights data (2024). Dat...
...,...,...,...,...,...,...,...,...,...,...
1328,Wafer,N26,Zhonghuan,P231,,organization,CHN,,2019,[CSET](https://cset.georgetown.edu/publication...
1329,Ceramic packages,N95,Zhongwei,P295,,organization,CHN,,2019,
1330,Substrates,N96,Zhuhai Yueya,P301,,organization,CHN,,2019,
1331,Wafer,N26,ZingSEMI,P237,,organization,CHN,,2019,[CSET](https://cset.georgetown.edu/publication...


In [318]:
import json

# Function to properly escape strings for Cypher
def escape_for_cypher(value):
    if not isinstance(value, str):
        return value
    
    # Replace problematic characters
    value = value.replace('\\', '\\\\')  # Escape backslashes first
    value = value.replace('"', '\\"')    # Escape quotes
    value = value.replace('\n', '\\n')   # Escape newlines
    value = value.replace('\r', '\\r')   # Escape carriage returns
    value = value.replace('\t', '\\t')   # Escape tabs
    
    return value

In [319]:
df_inputs = dict_df['inputs'].copy()
df_inputs = df_inputs.dropna(axis=1, how='all')

# Clean dataframe before creating the graph
for col in df_inputs.select_dtypes(include=['object']).columns:
    df_inputs[col] = df_inputs[col].apply(escape_for_cypher)

df_inputs

Unnamed: 0,input_id,input_name,type,stage_name,stage_id,description,year,market_share_chart_global_market_size_info,market_share_chart_source
0,N0,Chip design,process,Design,S1,"Chip design involves specification, logic desi...",2022.0,$574.1 billion (2022),Worldwide semiconductor sales. [World Semicond...
1,N6,Logic chip design,design_resource,,,Logic chip design is the design of integrated ...,2022.0,$255.7 billion (2022),Worldwide semiconductor sales for logic and mi...
2,N1,Advanced CPUs,design_resource,,,"Central processing units (\""CPUs\"") are the do...",2019.0,$56.2 billion (microprocessors) (2019),[CSET](https://cset.georgetown.edu/publication...
3,N2,Discrete GPUs,design_resource,,,"Discrete graphics processing units (\""GPUs\"") ...",2019.0,$11.9 billion (2019),[CSET](https://cset.georgetown.edu/publication...
4,N3,FPGAs,design_resource,,,"Field-programmable gate arrays (\""FPGAs\""), un...",2019.0,$5.7 billion (2019),[CSET](https://cset.georgetown.edu/publication...
...,...,...,...,...,...,...,...,...,...
121,N126,Process-specific fabrication materials,material_resource,,,Process-specific fabrication materials are hig...,,,
122,N127,Lithography tools (adv. pkg.),tool_resource,,,These tools are lithography tools designed and...,2024.0,$275.6 million (2024),CSET analysis of TechInsights data (2024). Dat...
123,N128,Deposition tools (adv. pkg.),tool_resource,,,These tools are deposition tools designed and ...,2024.0,$2.6 billion (2024),CSET analysis of TechInsights data (2024). Dat...
124,N129,Etch and clean tools (adv. pkg.),tool_resource,,,These tools are etch and clean tools designed ...,2024.0,$1.2 billion (2024),CSET analysis of TechInsights data (2024). Dat...


In [320]:
df_stages = dict_df['stages'].copy()
df_stages = df_stages.rename(columns={'description': 'stage_description'})
df_stages = df_stages.drop(['market_share_chart_global_market_size_info', 'market_share_chart_source'], axis=1)
df_stages

Unnamed: 0,stage_id,stage_name,stage_description,market_share_chart_caption
0,S1,Design,"Semiconductor design involves specification, d...",Chart shows market shares for the overall glob...
1,S2,Fabrication,Fabrication turns designs into chips. Semicond...,Chart shows share of global fabrication capaci...
2,S3,"Assembly, testing, and packaging (ATP)","At the end of the fabrication process, the fin...",Chart shows ATP site capacity by country.


In [321]:
df_inputs = pd.merge(df_inputs, df_stages, on='stage_id', how='left', suffixes=('', '_y'))
df_inputs = df_inputs.drop(df_inputs.filter(regex='_y$').columns, axis=1)

df_inputs = df_inputs[['input_id', 'input_name', 'type', 'description',
                       'stage_name', 'stage_id', 'stage_description',
                       'year',
                       'market_share_chart_caption', 'market_share_chart_global_market_size_info',
                       'market_share_chart_source'
                      ]]

df_inputs

Unnamed: 0,input_id,input_name,type,description,stage_name,stage_id,stage_description,year,market_share_chart_caption,market_share_chart_global_market_size_info,market_share_chart_source
0,N0,Chip design,process,"Chip design involves specification, logic desi...",Design,S1,"Semiconductor design involves specification, d...",2022.0,Chart shows market shares for the overall glob...,$574.1 billion (2022),Worldwide semiconductor sales. [World Semicond...
1,N6,Logic chip design,design_resource,Logic chip design is the design of integrated ...,,,,2022.0,,$255.7 billion (2022),Worldwide semiconductor sales for logic and mi...
2,N1,Advanced CPUs,design_resource,"Central processing units (\""CPUs\"") are the do...",,,,2019.0,,$56.2 billion (microprocessors) (2019),[CSET](https://cset.georgetown.edu/publication...
3,N2,Discrete GPUs,design_resource,"Discrete graphics processing units (\""GPUs\"") ...",,,,2019.0,,$11.9 billion (2019),[CSET](https://cset.georgetown.edu/publication...
4,N3,FPGAs,design_resource,"Field-programmable gate arrays (\""FPGAs\""), un...",,,,2019.0,,$5.7 billion (2019),[CSET](https://cset.georgetown.edu/publication...
...,...,...,...,...,...,...,...,...,...,...,...
121,N126,Process-specific fabrication materials,material_resource,Process-specific fabrication materials are hig...,,,,,,,
122,N127,Lithography tools (adv. pkg.),tool_resource,These tools are lithography tools designed and...,,,,2024.0,,$275.6 million (2024),CSET analysis of TechInsights data (2024). Dat...
123,N128,Deposition tools (adv. pkg.),tool_resource,These tools are deposition tools designed and ...,,,,2024.0,,$2.6 billion (2024),CSET analysis of TechInsights data (2024). Dat...
124,N129,Etch and clean tools (adv. pkg.),tool_resource,These tools are etch and clean tools designed ...,,,,2024.0,,$1.2 billion (2024),CSET analysis of TechInsights data (2024). Dat...


In [325]:
df_provision

Unnamed: 0,provided_name,provided_id,provider_name,provider_id,provider_alias,provider_type,provider_country,share_provided,year_share_provided,source_provider_provided
0,Wet etching and cleaning tools,N49,ACM Research,P313,,organization,USA,11.4,2024,CSET analysis of TechInsights data (2024). Dat...
1,Etch and clean tools,N55,ACM Research,P313,,organization,USA,3.0,2024,CSET analysis of TechInsights data (2024). Dat...
2,Dry etching tools (adv. pkg.),N101,AMEC,P123,Advanced Micro-Fabrication Equipment Inc. China,organization,CHN,18.1,2024,CSET analysis of TechInsights data (2024). Dat...
3,Dry etch tools,N103,AMEC,P123,Advanced Micro-Fabrication Equipment Inc. China,organization,CHN,5.5,2024,CSET analysis of TechInsights data (2024). Dat...
4,Fabrication tools (for advanced packaging),N109,AMEC,P123,Advanced Micro-Fabrication Equipment Inc. China,organization,CHN,2.5,2024,CSET analysis of TechInsights data (2024). Dat...
...,...,...,...,...,...,...,...,...,...,...
1328,Wafer,N26,Zhonghuan,P231,,organization,CHN,,2019,[CSET](https://cset.georgetown.edu/publication...
1329,Ceramic packages,N95,Zhongwei,P295,,organization,CHN,,2019,
1330,Substrates,N96,Zhuhai Yueya,P301,,organization,CHN,,2019,
1331,Wafer,N26,ZingSEMI,P237,,organization,CHN,,2019,[CSET](https://cset.georgetown.edu/publication...


In [329]:
df_inputs_provision.columns

Index(['input_id', 'input_name', 'type', 'description', 'stage_name',
       'stage_id', 'stage_description', 'year', 'market_share_chart_caption',
       'market_share_chart_global_market_size_info',
       'market_share_chart_source', 'provided_name', 'provided_id',
       'provider_name', 'provider_id', 'provider_alias', 'provider_type',
       'provider_country', 'share_provided', 'year_share_provided',
       'source_provider_provided'],
      dtype='object')

In [337]:
df_inputs = pd.merge(df_inputs, df_provision, left_on='input_id', right_on='provided_id', how='left', suffixes=('', '_y'))
df_inputs = df_inputs.drop(['provided_name', 'provided_id'], axis=1)
df_inputs

Unnamed: 0,input_id,input_name,type,description,stage_name,stage_id,stage_description,year,market_share_chart_caption,market_share_chart_global_market_size_info,market_share_chart_source,provider_name,provider_id,provider_alias,provider_type,provider_country,share_provided,year_share_provided,source_provider_provided
0,N0,Chip design,process,"Chip design involves specification, logic desi...",Design,S1,"Semiconductor design involves specification, d...",2022.0,Chart shows market shares for the overall glob...,$574.1 billion (2022),Worldwide semiconductor sales. [World Semicond...,CHN,P2,China,country,,5.0,2022.0,Worldwide semiconductor sales. [World Semicond...
1,N0,Chip design,process,"Chip design involves specification, logic desi...",Design,S1,"Semiconductor design involves specification, d...",2022.0,Chart shows market shares for the overall glob...,$574.1 billion (2022),Worldwide semiconductor sales. [World Semicond...,EUR,P312,Europe,country,EUR,9.0,2022.0,Worldwide semiconductor sales. [World Semicond...
2,N0,Chip design,process,"Chip design involves specification, logic desi...",Design,S1,"Semiconductor design involves specification, d...",2022.0,Chart shows market shares for the overall glob...,$574.1 billion (2022),Worldwide semiconductor sales. [World Semicond...,JPN,P7,Japan,country,,4.0,2022.0,Worldwide semiconductor sales. [World Semicond...
3,N0,Chip design,process,"Chip design involves specification, logic desi...",Design,S1,"Semiconductor design involves specification, d...",2022.0,Chart shows market shares for the overall glob...,$574.1 billion (2022),Worldwide semiconductor sales. [World Semicond...,KOR,P4,South Korea,country,,3.0,2022.0,Worldwide semiconductor sales. [World Semicond...
4,N0,Chip design,process,"Chip design involves specification, logic desi...",Design,S1,"Semiconductor design involves specification, d...",2022.0,Chart shows market shares for the overall glob...,$574.1 billion (2022),Worldwide semiconductor sales. [World Semicond...,TWN,P8,Taiwan,country,,11.0,2022.0,Worldwide semiconductor sales. [World Semicond...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1315,N129,Etch and clean tools (adv. pkg.),tool_resource,These tools are etch and clean tools designed ...,,,,2024.0,,$1.2 billion (2024),CSET analysis of TechInsights data (2024). Dat...,USA,P1,United States,country,,56.4,2024.0,CSET analysis of TechInsights data (2024). Dat...
1316,N129,Etch and clean tools (adv. pkg.),tool_resource,These tools are etch and clean tools designed ...,,,,2024.0,,$1.2 billion (2024),CSET analysis of TechInsights data (2024). Dat...,Various companies,P370,,organization,Various countries,0.8,2024.0,CSET analysis of TechInsights data (2024). Dat...
1317,N129,Etch and clean tools (adv. pkg.),tool_resource,These tools are etch and clean tools designed ...,,,,2024.0,,$1.2 billion (2024),CSET analysis of TechInsights data (2024). Dat...,Veeco,P121,,organization,USA,3.9,2024.0,CSET analysis of TechInsights data (2024). Dat...
1318,N130,Direct write systems (adv. pkg.),tool_resource,These tools are direct write systems designed ...,,,,2024.0,,$48.1 million (2024),CSET analysis of TechInsights data (2024). Dat...,Applied Materials,P81,,organization,USA,100.0,2024.0,CSET analysis of TechInsights data (2024). Dat...


In [338]:
df_sequence = dict_df['sequence'].copy()
df_sequence['output_name'] = df_sequence['goes_into_name'].combine_first(df_sequence['is_type_of_name'])
df_sequence['output_id'] = df_sequence['goes_into_id'].combine_first(df_sequence['is_type_of_id'])
df_sequence['type_link'] = np.where(df_sequence['goes_into_id'].notna(), 'goes_into_id', 'is_type_of_id')
df_sequence['type_link'] = df_sequence['type_link'].str.replace('_id', '')
df_sequence = df_sequence.drop(['goes_into_name', 'goes_into_id', 'is_type_of_name', 'is_type_of_id'], axis=1)
df_sequence

Unnamed: 0,input_name,input_id,output_name,output_id,type_link
0,Crystal growing furnaces,N8,Wafer,N26,goes_into
1,Crystal machining tools,N9,Wafer,N26,goes_into
2,EUV lithography tools,N20,Lithography tools,N19,is_type_of
3,ArF dry (DUV) lithography tools,N21,Lithography tools,N19,is_type_of
4,ArF immersion (DUV) lithography tools,N22,Lithography tools,N19,is_type_of
...,...,...,...,...,...
134,Auto ball bonders for IC,N134,Wire bonding tools,N74,is_type_of
135,Auto ball bonders for non-IC,N135,Wire bonding tools,N74,is_type_of
136,Automatic wedge bonders,N136,Wire bonding tools,N74,is_type_of
137,Wafer level stud bonders,N137,Wire bonding tools,N74,is_type_of


# Create graph from dataframe

In [339]:
df_sequence

Unnamed: 0,input_name,input_id,output_name,output_id,type_link
0,Crystal growing furnaces,N8,Wafer,N26,goes_into
1,Crystal machining tools,N9,Wafer,N26,goes_into
2,EUV lithography tools,N20,Lithography tools,N19,is_type_of
3,ArF dry (DUV) lithography tools,N21,Lithography tools,N19,is_type_of
4,ArF immersion (DUV) lithography tools,N22,Lithography tools,N19,is_type_of
...,...,...,...,...,...
134,Auto ball bonders for IC,N134,Wire bonding tools,N74,is_type_of
135,Auto ball bonders for non-IC,N135,Wire bonding tools,N74,is_type_of
136,Automatic wedge bonders,N136,Wire bonding tools,N74,is_type_of
137,Wafer level stud bonders,N137,Wire bonding tools,N74,is_type_of


In [340]:
G = create_graph_from_df(
    df_sequence,
    source_node_col={"id": "input_id", "displayName": "input_name"},
    target_node_col={"id": "output_id", "displayName": "output_name"},
    node_attributes_df=df_inputs,
    node_attributes_key_col="input_id",
    edge_col="type_link",
    edge_col_label="type"
)
print(f"Resulting graph : {G}")

Resulting graph : DiGraph with 126 nodes and 139 edges


In [341]:
n_first = 10

print("NODES :")
for node in list(G.nodes(data=True))[:n_first]:
    print(node[0])
    for key, val in node[1].items():
        print(f"   {key} : {val}")
    
    print()
print()

print("EDGES :")
for edge in list(G.edges(data=True))[:n_first]:
    print(edge)

NODES :
N8
   displayName : Crystal growing furnaces
   input_name : Crystal growing furnaces
   type : tool_resource
   description : Crystal growing furnaces and machining tools are necessary to produce all wafers-thin, disc-shaped materials fabs used to produce chips. The furnace forms a cylindrical ingot of silicon from polycrystalline raw silicon; machining equipment then cuts the ingot into wafers used for chip fabrication. These tools have relatively low value and complexity relative to other semiconductor manufacturing equipment. Japan, Germany, and Switzerland are the main producers.
   stage_name : nan
   stage_id : nan
   stage_description : nan
   year : 2022.0
   market_share_chart_caption : nan
   market_share_chart_global_market_size_info : $44 million (2022)
   market_share_chart_source : CSET analysis of TechInsights data (2022). Data used for analysis were published by TechInsights on May 5, 2025.
   provider_name : DEU
   provider_id : P32
   provider_alias : Germany

# Create graph using `turingdb` python package

<div class="alert alert-block alert-info">
    <h2>
        See <a href="https://docs.turingdb.ai/quickstart">TuringDB Get started documentation</a> for the important steps to follow :
    </h2>
    <h4>
        <ul>
            <li>Create your TuringDB account</li>
            <li>Create your instance in the <a href="https://console.turingdb.ai/auth">TuringDB Cloud UI</a></li>
            <li>Copy your Instance ID from the Database Instances management page</li>
            <li>Get API Key from the Settings in UI</li>
        </ul>
        Remember to have your instance active while working in this notebook !
    </h4>
</div>

In [342]:
from turingdb import TuringDB

# Create TuringDB client
client = TuringDB(
    host="http://localhost:6666"
    # instance_id="...",  # Replace by your instance id
    # auth_token="...",  # Replace by your API token
)

In [343]:
# Get list of available graphs
list_graphs = client.query("LIST GRAPH")["graph"].tolist()

In [344]:
# Set graph name
graph_name_prefix = "supply_chain_eto_chip_explorer"
graph_name_nb_suffix = str(
    max(
        [
            int(re.sub(graph_name_prefix, "", g))
            for g in list_graphs
            if g.startswith(graph_name_prefix)
        ] + [0]
    )
    + 1
)
graph_name = graph_name_prefix + graph_name_nb_suffix
graph_name

'supply_chain_eto_chip_explorer8'

In [345]:
# Create a new graph
client.query(f"CREATE GRAPH {graph_name}")
client.set_graph(graph_name)

# Create a new change on the graph
change = client.query("CHANGE NEW")["Change ID"][0]

# Checkout into the change
client.checkout(change=change)

In [346]:
# Build CREATE command from networkx object
create_command = build_create_command_from_networkx(G)
print(f"Cypher CREATE command :\n\n{100 * '*'}\n{create_command}\n{100 * '*'}")

Cypher CREATE command :

****************************************************************************************************
CREATE (n0:Tool_Resource {"id":"N8", "displayName":"Crystal growing furnaces", "input_name":"Crystal growing furnaces", "type":"tool_resource", "description":"Crystal growing furnaces and machining tools are necessary to produce all wafers-thin, disc-shaped materials fabs used to produce chips. The furnace forms a cylindrical ingot of silicon from polycrystalline raw silicon; machining equipment then cuts the ingot into wafers used for chip fabrication. These tools have relatively low value and complexity relative to other semiconductor manufacturing equipment. Japan, Germany, and Switzerland are the main producers.", "stage_name":"nan", "stage_id":"nan", "stage_description":"nan", "year":"2022.0", "market_share_chart_caption":"nan", "market_share_chart_global_market_size_info":"$44 million (2022)", "market_share_chart_source":"CSET analysis of TechInsights data

In [347]:
# Run CREATE command
client.query(create_command)

# Commit the change
client.query("COMMIT")
client.query("CHANGE SUBMIT")

# Checkout into main
client.checkout()

<div class="alert alert-block alert-info">
    <h2>
        Visualize your graph in TuringDB Graph Visualizer ! Now that your instance is running:
    </h2>
    <h3>
        <ul>
            <li>Go to <a href="https://console.turingdb.ai/databases">TuringDB Console - Database Instances</a></li>
            <li>In your current instance panel, click on "Open Visualizer" button</li>
            <li>Visualizer opens, now you can choose your graph in the dropdown menu at the top-right corner</li>
        </ul>
        You can then play with your graph and visualize the nodes you want !
    </h3>
</div>

# Query TuringDB

In [350]:
# Match all edges and return them
command = "MATCH (n)-[e]-(m) RETURN n.displayName, e, m.displayName"
df = client.query(command)
if df.empty:
    print("No result found")
else:
    display(df)

Unnamed: 0,n.displayName,e,m.displayName
0,Overlay metrology tools,0,Wafer inspection tools
1,Dry stripping tools,1,Dry etching and cleaning tools
2,Ion milling tools,2,Dry etching and cleaning tools
3,Misc. dry etch tools,3,Dry etch tools
4,Dry clean tools,4,Dry etching and cleaning tools
...,...,...,...
134,AI ASICs,134,Logic chip design
135,FPGAs,135,Logic chip design
136,Discrete GPUs,136,Logic chip design
137,Logic chip design,137,Chip design


In [349]:
# 
command = """
MATCH (n{displayName: "FPGAs"})-[e]-(m)
RETURN n.displayName, e, m.displayName
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    display(df)

Unnamed: 0,n.displayName,e,m.displayName
0,FPGAs,135,Logic chip design


In [164]:
# 
command = """
MATCH (i1:Tool_Resource)--(i2:Tool_Resource)
RETURN i1.displayName, i1.type, i1, i2.displayName, i2.type, i2
"""
client.query(command)

Unnamed: 0,i1.displayName,i1.type,i1,i2.displayName,i2.type,i2
0,Overlay metrology tools,tool_resource,0,Wafer inspection tools,tool_resource,88
1,Dry stripping tools,tool_resource,1,Dry etching and cleaning tools,tool_resource,55
2,Ion milling tools,tool_resource,2,Dry etching and cleaning tools,tool_resource,55
3,Misc. dry etch tools,tool_resource,3,Dry etch tools,tool_resource,51
4,Dry clean tools,tool_resource,4,Dry etching and cleaning tools,tool_resource,55
...,...,...,...,...,...,...
69,Process monitoring tools,tool_resource,84,Process control tools,tool_resource,89
70,"Film, stack, and shape metrology tools",tool_resource,85,Wafer inspection tools,tool_resource,88
71,Critical dimensions metrology tools,tool_resource,86,Wafer inspection tools,tool_resource,88
72,Defect inspection tools,tool_resource,87,Wafer inspection tools,tool_resource,88


In [169]:
# Find female patients:
command = """
MATCH (i1:Tool_Resource)-[e:IS_TYPE_OF]-(i2:Tool_Resource)
RETURN i1.displayName, i1.description, i2.displayName, i2.description
"""
client.query(command)

Unnamed: 0,i1.displayName,i1.description,i2.displayName,i2.description
0,Overlay metrology tools,Overlay metrology tools are used to measure an...,Wafer inspection tools,Even the tiniest imperfections can cause serio...
1,Dry stripping tools,Dry stripping tools use plasma to remove photo...,Dry etching and cleaning tools,Etching and cleaning tools have two main types...
2,Ion milling tools,Ion milling tools are advanced semiconductor m...,Dry etching and cleaning tools,Etching and cleaning tools have two main types...
3,Misc. dry etch tools,Miscellaneous dry etch tools are specialized s...,Dry etch tools,The main types of dry etching tools are used e...
4,Dry clean tools,Dry clean tools use plasma to remove films or ...,Dry etching and cleaning tools,Etching and cleaning tools have two main types...
...,...,...,...,...
67,Process monitoring tools,"Process monitoring tools, such as curve tracer...",Process control tools,"In semiconductor fabrication, process control ..."
68,"Film, stack, and shape metrology tools",Film and wafer measuring tools include tools t...,Wafer inspection tools,Even the tiniest imperfections can cause serio...
69,Critical dimensions metrology tools,Critical dimensions measuring tools (including...,Wafer inspection tools,Even the tiniest imperfections can cause serio...
70,Defect inspection tools,Defect inspection tools include brightfield in...,Wafer inspection tools,Even the tiniest imperfections can cause serio...


In [171]:
print(create_command)

CREATE (n0:Tool_Resource {"id":"N8", "displayName":"Crystal growing furnaces", "input_name":"Crystal growing furnaces", "type":"tool_resource", "stage_name":"nan", "stage_id":"nan", "description":"Crystal growing furnaces and machining tools are necessary to produce all wafers-thin, disc-shaped materials fabs used to produce chips. The furnace forms a cylindrical ingot of silicon from polycrystalline raw silicon; machining equipment then cuts the ingot into wafers used for chip fabrication. These tools have relatively low value and complexity relative to other semiconductor manufacturing equipment. Japan, Germany, and Switzerland are the main producers.", "year":"2022.0", "market_share_chart_global_market_size_info":"$44 million (2022)", "market_share_chart_source":"CSET analysis of TechInsights data (2022). Data used for analysis were published by TechInsights on May 5, 2025."}),
(n1:Material_Resource {"id":"N26", "displayName":"Wafer", "input_name":"Wafer", "type":"material_resource"

In [177]:
df_inputs#['stage_name']#.value_counts()

Unnamed: 0,input_id,input_name,type,stage_name,stage_id,description,year,market_share_chart_global_market_size_info,market_share_chart_caption,market_share_chart_source
0,N0,Chip design,process,Design,S1,"Chip design involves specification, logic desi...",2022.0,$574.1 billion (2022),,Worldwide semiconductor sales. [World Semicond...
1,N6,Logic chip design,design_resource,,,Logic chip design is the design of integrated ...,2022.0,$255.7 billion (2022),,Worldwide semiconductor sales for logic and mi...
2,N1,Advanced CPUs,design_resource,,,"Central processing units (\""CPUs\"") are the do...",2019.0,$56.2 billion (microprocessors) (2019),,[CSET](https://cset.georgetown.edu/publication...
3,N2,Discrete GPUs,design_resource,,,"Discrete graphics processing units (\""GPUs\"") ...",2019.0,$11.9 billion (2019),,[CSET](https://cset.georgetown.edu/publication...
4,N3,FPGAs,design_resource,,,"Field-programmable gate arrays (\""FPGAs\""), un...",2019.0,$5.7 billion (2019),,[CSET](https://cset.georgetown.edu/publication...
...,...,...,...,...,...,...,...,...,...,...
121,N126,Process-specific fabrication materials,material_resource,,,Process-specific fabrication materials are hig...,,,,
122,N127,Lithography tools (adv. pkg.),tool_resource,,,These tools are lithography tools designed and...,2024.0,$275.6 million (2024),,CSET analysis of TechInsights data (2024). Dat...
123,N128,Deposition tools (adv. pkg.),tool_resource,,,These tools are deposition tools designed and ...,2024.0,$2.6 billion (2024),,CSET analysis of TechInsights data (2024). Dat...
124,N129,Etch and clean tools (adv. pkg.),tool_resource,,,These tools are etch and clean tools designed ...,2024.0,$1.2 billion (2024),,CSET analysis of TechInsights data (2024). Dat...


In [183]:
df_inputs['stage_name'].value_counts()

stage_name
Fabrication                               7
Design                                    2
Assembly, testing, and packaging (ATP)    2
Name: count, dtype: int64

In [194]:
# 
command = """
MATCH (i1 {"stage_name": "Fabrication"})--(i2 {"stage_name": "Fabrication"})
RETURN i1.displayName, i2.displayName
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    display(df)

Unnamed: 0,i1.displayName,i2.displayName
0,Ion implantation,Chemical mechanical planarization
1,Photolithography,Etch and clean
2,Etch and clean,Ion implantation
3,Process control and handling,Photolithography
4,Process control and handling,Ion implantation
5,Process control and handling,Deposition
6,Process control and handling,Etch and clean
7,Process control and handling,Chemical mechanical planarization
8,Deposition,Photolithography
9,Fabrication materials,Deposition


In [196]:
df_sequence[df_sequence['type_link'] == 'goes_into']

Unnamed: 0,input_name,input_id,output_name,output_id,type_link
0,Crystal growing furnaces,N8,Wafer,N26,goes_into
1,Crystal machining tools,N9,Wafer,N26,goes_into
5,Lithography tools,N19,Photolithography,N25,goes_into
6,Ion implantation,N16,Chemical mechanical planarization,N57,goes_into
7,Etch and clean,N46,Ion implantation,N16,goes_into
11,Photomasks,N33,Photolithography,N25,goes_into
12,Photoresists,N31,Photolithography,N25,goes_into
13,Resist processing tools,N32,Photolithography,N25,goes_into
14,Deposition tools,N36,Deposition,N35,goes_into
23,Chip design,N0,Deposition,N35,goes_into
