In [2]:
import requests
from types import SimpleNamespace
import pandas as pd

base_url = 'https://nomad-lab.eu/prod/v1/api/v1'
json_body = {
  "owner": "visible",
  "query": {
    "and": [
      {
        "data.ref.DOI_number#perovskite_solar_cell_database.schema.PerovskiteSolarCell:any": [
          "10.1021/acsnano.7b02015"
        ]
      },
      {
        "section_defs.definition_qualified_name:all": [
          "perovskite_solar_cell_database.schema.PerovskiteSolarCell"
        ]
      }
    ]
  },
  'pagination': {
        'page_size': 20
  },
  "required": {
    "include": [
      "data.perovskite_deposition.thermal_annealing_temperature#perovskite_solar_cell_database.schema.PerovskiteSolarCell",
      "data.perovskite_deposition.thermal_annealing_time#perovskite_solar_cell_database.schema.PerovskiteSolarCell",
      "data.perovskite_deposition.solvents#perovskite_solar_cell_database.schema.PerovskiteSolarCell",
      "data.perovskite_deposition.solvents_mixing_ratios#perovskite_solar_cell_database.schema.PerovskiteSolarCell",
      "data.perovskite.composition_long_form#perovskite_solar_cell_database.schema.PerovskiteSolarCell",
      "results.properties.optoelectronic.solar_cell.efficiency",
      "data.perovskite_deposition.thermal_annealing_temperature#perovskite_solar_cell_database.schema.PerovskiteSolarCell"
    ]
  }
}

dfs = []

while True:
    response = requests.post(f'{base_url}/entries/query', json=json_body)
    response_json = response.json()

    # print(response_json)

    dfs.append(pd.json_normalize(response_json['data']))

    next_value = response_json['pagination'].get('next_page_after_value')
    if not next_value:
        break

    json_body['pagination']['page_after_value'] = next_value

df = pd.concat(dfs, ignore_index=True)

df
# formulas = set()

# while len(formulas) < 100:
#     response = requests.post(f'{base_url}/entries/query', json=json_body)
#     response_json = response.json()

#     for data in response_json['data']:
#         formulas.add(data['results']['material']['chemical_formula_hill'])

#     next_value = response_json['pagination'].get('next_page_after_value')
#     if not next_value:
#         break
#     json_body['pagination']['page_after_value'] = next_value

# print(formulas)

Unnamed: 0,entry_id,results.properties.optoelectronic.solar_cell.efficiency,data.perovskite.composition_long_form,data.perovskite_deposition.solvents,data.perovskite_deposition.solvents_mixing_ratios,data.perovskite_deposition.thermal_annealing_temperature,data.perovskite_deposition.thermal_annealing_time
0,1_ZlrsTE0OtRMlrhY6gytmbZWEuV,20.28,FA0.85MA0.15PbBr0.45I2.55,DMF; DMSO,4; 1,400.0,0.066
1,26F7YEQoHRvd9Eedi8eHTecO7y87,17.59,FA0.85MA0.15PbBr0.45I2.55,DMF; DMSO,4; 1,400.0,0.066
2,2EF7w9auZaesTiyundRJ5-IYyH2-,17.75,FA0.85MA0.15PbBr0.45I2.55,DMF; DMSO,4; 1,400.0,0.066
3,2Tq4Qh9w-bygW272RCrs12kQGrJt,16.99,FA0.85MA0.15PbBr0.45I2.55,DMF; DMSO,4; 1,100.0,30.0
4,2bt-wZjYB3NIwxFA5_O91C1wHJDG,17.12,FA0.85MA0.15PbBr0.45I2.55,DMF; DMSO,4; 1,100.0,30.0
...,...,...,...,...,...,...,...
117,xGyFj670QxjIaX9de_9ObezUj7xQ,17.75,FA0.85MA0.15PbBr0.45I2.55,DMF; DMSO,4; 1,400.0,0.066
118,xTrjgupHt0Pq2O-_d5TzltZA1eGL,20.75,FA0.85MA0.15PbBr0.45I2.55,DMF; DMSO,4; 1,400.0,0.066
119,ynBFOpCz-GwQlddhu6S2wam3DcCQ,20.08,FA0.85MA0.15PbBr0.45I2.55,DMF; DMSO,4; 1,400.0,0.066
120,zNFh-4by95asH6bOYWJb1tbkIGzE,19.15,FA0.85MA0.15PbBr0.45I2.55,DMF; DMSO,4; 1,300.0,0.133


In [5]:
# Clean up the dataframe using assign() for better readability
df_clean = (df.copy()
  .assign(
    # Convert annealing temperature to numeric
    **{'data.perovskite_deposition.thermal_annealing_temperature': 
      lambda x: pd.to_numeric(x['data.perovskite_deposition.thermal_annealing_temperature'], errors='coerce')},
    
    # Convert annealing time to numeric
    **{'data.perovskite_deposition.thermal_annealing_time':
      lambda x: pd.to_numeric(x['data.perovskite_deposition.thermal_annealing_time'], errors='coerce')},
    
    # Convert efficiency to numeric
    **{'results.properties.optoelectronic.solar_cell.efficiency':
      lambda x: pd.to_numeric(x['results.properties.optoelectronic.solar_cell.efficiency'], errors='coerce')}
  )
)

# Drop rows with missing values in important columns
df_clean = df_clean.dropna(subset=[
  'data.perovskite_deposition.thermal_annealing_temperature',
  'data.perovskite_deposition.thermal_annealing_time',
  'results.properties.optoelectronic.solar_cell.efficiency'
]).reset_index(drop=True)

# Display the cleaned dataframe
df_clean.head()


Unnamed: 0,entry_id,results.properties.optoelectronic.solar_cell.efficiency,data.perovskite.composition_long_form,data.perovskite_deposition.solvents,data.perovskite_deposition.solvents_mixing_ratios,data.perovskite_deposition.thermal_annealing_temperature,data.perovskite_deposition.thermal_annealing_time
0,1_ZlrsTE0OtRMlrhY6gytmbZWEuV,20.28,FA0.85MA0.15PbBr0.45I2.55,DMF; DMSO,4; 1,400.0,0.066
1,26F7YEQoHRvd9Eedi8eHTecO7y87,17.59,FA0.85MA0.15PbBr0.45I2.55,DMF; DMSO,4; 1,400.0,0.066
2,2EF7w9auZaesTiyundRJ5-IYyH2-,17.75,FA0.85MA0.15PbBr0.45I2.55,DMF; DMSO,4; 1,400.0,0.066
3,2Tq4Qh9w-bygW272RCrs12kQGrJt,16.99,FA0.85MA0.15PbBr0.45I2.55,DMF; DMSO,4; 1,100.0,30.0
4,2bt-wZjYB3NIwxFA5_O91C1wHJDG,17.12,FA0.85MA0.15PbBr0.45I2.55,DMF; DMSO,4; 1,100.0,30.0


In [4]:
import plotly.graph_objects as go

hover_text = []
for index, row in df_clean.iterrows():
  # Format the chemical formula with proper subscripts
  composition = row['data.perovskite.composition_long_form']
  # Replace numbers with HTML subscript format
  formatted_composition = ''
  i = 0
  while i < len(composition):
    if i < len(composition)-1 and composition[i].isalpha() and composition[i+1].isdigit():
      # Get the element symbol
      element = composition[i]
      # Move to the first digit
      i += 1
      # Collect all digits and decimals for the subscript
      num_str = ''
      while i < len(composition) and (composition[i].isdigit() or composition[i] == '.'):
        num_str += composition[i]
        i += 1
      # Add the element with subscripted number
      formatted_composition += element + '<sub>' + num_str + '</sub>'
    else:
      formatted_composition += composition[i]
      i += 1
  
  hover_text.append(
    f"<b>Composition:</b> {formatted_composition}<br>" +
    f"<b>Efficiency:</b> {row['results.properties.optoelectronic.solar_cell.efficiency']:.2f}%<br>" +
    f"<b>Annealing Temp:</b> {row['data.perovskite_deposition.thermal_annealing_temperature']}°C<br>" + 
    f"<b>Annealing Time:</b> {row['data.perovskite_deposition.thermal_annealing_time']} hr<br>" +
    f"<b>Solvents:</b> {row['data.perovskite_deposition.solvents']}<br>" +
    f"<b>Solvent Ratios:</b> {row['data.perovskite_deposition.solvents_mixing_ratios']}"
  )

fig = go.Figure(data=[go.Scatter3d(
  x=df_clean['data.perovskite_deposition.thermal_annealing_time'],
  y=df_clean['data.perovskite_deposition.thermal_annealing_temperature'],
  z=df_clean['results.properties.optoelectronic.solar_cell.efficiency'],
  mode='markers',
  marker=dict(
    size=5,
    opacity=0.8,
    color=df_clean['results.properties.optoelectronic.solar_cell.efficiency'],
    colorscale='Viridis',
    colorbar=dict(title='Efficiency (%)')
  ),
  text=hover_text,
  hoverinfo='text'
)])

fig.update_layout(
  scene=dict(
    xaxis_title='Annealing Time (log scale)',
    yaxis_title='Annealing Temperature (°C)',
    zaxis_title='Efficiency (%)',
    xaxis=dict(type="log")
  ),
  title='3D Scatter Plot of Annealing Time vs Temperature vs Efficiency'
)

fig.show()