# 1. ALL

## Import and setup

In [44]:
import os
from dotenv import load_dotenv

load_dotenv(".env")

neo4j_url = os.getenv("NEO4J_URI")
# Remapping for Langchain Neo4j integration
os.environ["NEO4J_URL"] = neo4j_url
neo4j_username = os.getenv("NEO4J_USERNAME")
neo4j_password = os.getenv("NEO4J_PASSWORD")

ollama_base_url = os.getenv("OLLAMA_BASE_URL")

embedding_model_name = os.getenv("EMBEDDING_MODEL")

llm_model_name = os.getenv("LLM")

In [45]:
# %pip install -qU -r requirements.txt

In [46]:
import requests
from langchain_community.graphs import Neo4jGraph
# import streamlit as st
# from streamlit.logger import get_logger
# from chains import load_embedding_model
# from utils import create_constraints, create_vector_index
# from PIL import Image

# if Neo4j is local, you can go to http://localhost:7474/ to browse the database
#neo4j_graph = Neo4jGraph(url=neo4j_url, username=neo4j_username, password=neo4j_password)


In [47]:
# To działa

# from langchain_community.llms.ollama import Ollama

# llm = Ollama(model=llm_model_name, base_url=ollama_base_url)
# llm.invoke(input="Why is the sky blue?")

In [48]:
import requests
import json

from datetime import datetime
from typing import Any, List

class OllamaGenResponse:
    model: str
    created_at: datetime
    response: str
    done: bool
    done_reason: str
    context: List[int]
    total_duration: int
    load_duration: int
    prompt_eval_count: int
    prompt_eval_duration: int
    eval_count: int
    eval_duration: int

    def __init__(self, model: str, created_at: datetime, response: str, done: bool, done_reason: str, context: List[int], total_duration: int, load_duration: int, prompt_eval_count: int, prompt_eval_duration: int, eval_count: int, eval_duration: int) -> None:
        self.model = model
        self.created_at = created_at
        self.response = response
        self.done = done
        self.done_reason = done_reason
        self.context = context
        self.total_duration = total_duration
        self.load_duration = load_duration
        self.prompt_eval_count = prompt_eval_count
        self.prompt_eval_duration = prompt_eval_duration
        self.eval_count = eval_count
        self.eval_duration = eval_duration
        
    def __str__(self) -> str:
        return self.response
    
    def toJson(self) -> dict[str, Any]:
        return self.__dict__
    
    def toJsonFormattedString(self) -> str:
        json_str = "{\n"
        for key, value in self.toJson().items():
            json_str += f"  \"{key}\": \"{value}\",\n"
        json_str+= "}"

        return json_str
    
    def toShortJson(self) -> str:
        json_str = "{\n"
        for key, value in self.toJson().items():
            if (key == "context"): 
                continue
            if (value is str):
                json_str += f"  \"{key}\": \"{value}\",\n"
            else:
                json_str += f"  \"{key}\": {value},\n"
        json_str+= "}"

        return json_str
    


_url = f"{ollama_base_url}/api/generate"

_headers = {
    "Accept": "*/*",
    "User-Agent": "Python Requests Client",
    "Content-Type": "application/json" 
}

_body_template = {
        "model": "gemma2",
        "stream":False,
    }

def invokeLLM(prompt:str) -> OllamaGenResponse : 

    _body_template["prompt"] = prompt
    _body = json.dumps(_body_template)

    # response = requests.request("POST", _url, data=_body, headers=_headers)
    response = requests.post(url=_url, json=_body_template, headers=_headers)

    # print(response.text)
    _json = json.loads(response.text)
    
    ## https://stackoverflow.com/questions/6578986/how-to-convert-json-data-into-a-python-object 
    obj = OllamaGenResponse(**_json)
    return obj
    pass



In [49]:
# DZIAŁA

# resp = invokeLLM(prompt="hi")
# print("json:")
# print(resp.toJson())
# print("llm response:")
# print(resp)

json:
{'model': 'gemma2', 'created_at': '2024-07-01T19:14:59.04039Z', 'response': 'Hello! 👋  How can I help you today?', 'done': True, 'done_reason': 'stop', 'context': [106, 2425, 108, 544, 107, 235248, 108, 106, 2091, 108, 4521, 235341, 169692, 139, 2299, 798, 590, 1707, 692, 3646, 235336, 107, 235248, 108], 'total_duration': 6398146375, 'load_duration': 4071351917, 'prompt_eval_count': 11, 'prompt_eval_duration': 1088866000, 'eval_count': 12, 'eval_duration': 1234011000}
llm response:
Hello! 👋  How can I help you today?


In [50]:
# DZIAŁA
 
# print(f"json: {resp.toJson()}\n")
# print(f"llm response: {resp}\n")
# print(f"json formatted: \n{resp.toJsonFormattedString()}")

json: {'model': 'gemma2', 'created_at': '2024-07-01T19:14:59.04039Z', 'response': 'Hello! 👋  How can I help you today?', 'done': True, 'done_reason': 'stop', 'context': [106, 2425, 108, 544, 107, 235248, 108, 106, 2091, 108, 4521, 235341, 169692, 139, 2299, 798, 590, 1707, 692, 3646, 235336, 107, 235248, 108], 'total_duration': 6398146375, 'load_duration': 4071351917, 'prompt_eval_count': 11, 'prompt_eval_duration': 1088866000, 'eval_count': 12, 'eval_duration': 1234011000}

llm response: Hello! 👋  How can I help you today?

json formatted: 
{
  "model": "gemma2",
  "created_at": "2024-07-01T19:14:59.04039Z",
  "response": "Hello! 👋  How can I help you today?",
  "done": "True",
  "done_reason": "stop",
  "context": "[106, 2425, 108, 544, 107, 235248, 108, 106, 2091, 108, 4521, 235341, 169692, 139, 2299, 798, 590, 1707, 692, 3646, 235336, 107, 235248, 108]",
  "total_duration": "6398146375",
  "load_duration": "4071351917",
  "prompt_eval_count": "11",
  "prompt_eval_duration": "108886

## Gen KG - cars ontology

In [53]:
cars_ontology_rdf = ""
with open('ontology/cars.ttl', 'r') as file:
    cars_ontology_rdf = file.read()
"Done"

'Done'

In [54]:
csv = ""
with open('import/vehicles_dataset.csv', 'r') as file:
    csv = file.read()
"Done"

'Done'

### Lets ask llm for better prompt:


```md
You are helpful AI prompt researcher who helps with given tasks.
Please write prompt to LLM to create objects, properties and relations in Cypher for neo4j database, when you are given text content:
 - ontology in rdf format
 - cars data in csv format with column headers
 - descriptions of column headers:
    name: The full name of the vehicle, including make, model, and trim.
    description: A brief description of the vehicle, often including key features and selling points.
    make: The manufacturer of the vehicle (e.g., Ford, Toyota, BMW).
    model: The model name of the vehicle.
    type: The type of the vehicle, which is "New" for all entries in this dataset.
    year: The year the vehicle was manufactured.
    price: The price of the vehicle in USD.
    engine: Details about the engine, including type and specifications.
    cylinders: The number of cylinders in the vehicle's engine.
    fuel: The type of fuel used by the vehicle (e.g., Gasoline, Diesel, Electric).
    mileage: The mileage of the vehicle, typically in miles.
    transmission: The type of transmission (e.g., Automatic, Manual).
    trim: The trim level of the vehicle, indicating different feature sets or packages.
    body: The body style of the vehicle (e.g., SUV, Sedan, Pickup Truck).
    doors: The number of doors on the vehicle.
    exterior_color: The exterior color of the vehicle.
    interior_color: The interior color of the vehicle.
    drivetrain: The drivetrain of the vehicle (e.g., All-wheel Drive, Front-wheel Drive).
```

### Prompt for Generating Neo4j Cypher from RDF Ontology and CSV Data with Detailed Column Descriptions

---

<div class="alert alert-block alert-warning">
<b>[!] WAŻNE </b> uzupełniłem go o wklejenie treści plików
</div> 


**Objective:** Generate Neo4j Cypher statements to create a knowledge graph based on an RDF ontology and CSV data about cars, leveraging detailed column descriptions for accurate property mapping.

**Input:**

* **ontology.rdf:** Content of RDF file containing the ontology definition, including classes, properties, and relationships:
    ```
    {}
    ```

* **cars.csv:** Content of CSV file with car data and column headers:
    ```
    {}
    ```

* **column_descriptions.txt:** Content of text file containing detailed descriptions for each column header in the `cars.csv` file, linking them to relevant ontology properties:
    ```
    name: The full name of the vehicle, including make, model, and trim.
    description: A brief description of the vehicle, often including key features and selling points.
    make: The manufacturer of the vehicle (e.g., Ford, Toyota, BMW).
    model: The model name of the vehicle.
    type: The type of the vehicle, which is "New" for all entries in this dataset.
    year: The year the vehicle was manufactured.
    price: The price of the vehicle in USD.
    engine: Details about the engine, including type and specifications.
    cylinders: The number of cylinders in the vehicle's engine.
    fuel: The type of fuel used by the vehicle (e.g., Gasoline, Diesel, Electric).
    mileage: The mileage of the vehicle, typically in miles.
    transmission: The type of transmission (e.g., Automatic, Manual).
    trim: The trim level of the vehicle, indicating different feature sets or packages.
    body: The body style of the vehicle (e.g., SUV, Sedan, Pickup Truck).
    doors: The number of doors on the vehicle.
    exterior_color: The exterior color of the vehicle.
    interior_color: The interior color of the vehicle.
    drivetrain: The drivetrain of the vehicle (e.g., All-wheel Drive, Front-wheel Drive).
    ```

**Output:**

* A set of Cypher statements that:
    * Create nodes representing classes from the ontology.
    * Create nodes representing individual cars based on the CSV data.
    * Establish relationships between nodes according to the ontology's defined relationships.
    * Assign properties to nodes based on both ontology definitions and the CSV data, using the column descriptions for accurate mapping.

**Prompt:**

Given the `ontology.rdf` file, the `cars.csv` file with column headers, and the `column_descriptions.txt` file, generate Neo4j Cypher statements to build a knowledge graph representing the car data according to the ontology. Ensure that properties are mapped accurately based on the detailed descriptions provided in `column_descriptions.txt`.

**Instructions:**

1. **Analyze the `ontology.rdf` file:**
    * Identify the classes, properties, and relationships defined in the ontology.
    * Determine the data types for each property.
2. **Process the `cars.csv` file and `column_descriptions.txt`:**
    * Map each column header in the CSV file to a corresponding property defined in the ontology using the information provided 
in the `column_descriptions.txt` file.
    * Extract the values from each column in the CSV file to populate the properties of the car nodes.
1. **Generate Cypher statements:**
    * Create nodes for each class defined in the ontology.
    * Create nodes for each car based on the data in the CSV file.
    * Establish relationships between nodes based on the ontology's defined relationships.
    * Assign properties to nodes using the mapped column values from the CSV file and the ontology's defined properties.
    * Ensure that data types for properties are consistent with the ontology and CSV data.

**Example Output:**

The generated Cypher statements might look something like this (this is a simplified example):

```cypher
// Create nodes for classes
CREATE (Car:Car)
CREATE (Color:Color)

// Create nodes for cars
CREATE (car1:Car {make: "Toyota", model: "Camry", year: 2023, color: "Red"})

// Establish relationships
CREATE (car1)-[:HAS_COLOR]->(Color {name: "Red"})
```

**Note:** The specific Cypher statements generated will depend on the content of the `ontology.rdf` file, the `cars.csv` file, and the `column_descriptions.txt` file.

In [36]:
# cars_ontology_rdf = ""
# csv = ""
prompt = f"""**Objective:** Generate Neo4j Cypher statements to create a knowledge graph based on an RDF ontology and CSV data about cars, leveraging detailed column descriptions for accurate property mapping.

**Input:**

* **ontology.rdf:** Content of RDF file containing the ontology definition, including classes, properties, and relationships:
    ```
    {cars_ontology_rdf}
    ```

* **cars.csv:** Content of CSV file with car data and column headers:
    ```
    {csv}
    ```

* **column_descriptions.txt:** Content of text file containing detailed descriptions for each column header in the `cars.csv` file, linking them to relevant ontology properties:
    ```
    name: The full name of the vehicle, including make, model, and trim.
    description: A brief description of the vehicle, often including key features and selling points.
    make: The manufacturer of the vehicle (e.g., Ford, Toyota, BMW).
    model: The model name of the vehicle.
    type: The type of the vehicle, which is "New" for all entries in this dataset.
    year: The year the vehicle was manufactured.
    price: The price of the vehicle in USD.
    engine: Details about the engine, including type and specifications.
    cylinders: The number of cylinders in the vehicle's engine.
    fuel: The type of fuel used by the vehicle (e.g., Gasoline, Diesel, Electric).
    mileage: The mileage of the vehicle, typically in miles.
    transmission: The type of transmission (e.g., Automatic, Manual).
    trim: The trim level of the vehicle, indicating different feature sets or packages.
    body: The body style of the vehicle (e.g., SUV, Sedan, Pickup Truck).
    doors: The number of doors on the vehicle.
    exterior_color: The exterior color of the vehicle.
    interior_color: The interior color of the vehicle.
    drivetrain: The drivetrain of the vehicle (e.g., All-wheel Drive, Front-wheel Drive).
    ```

**Output:**

* A set of Cypher statements that:
    * Create nodes representing classes from the ontology.
    * Create nodes representing individual cars based on the CSV data.
    * Establish relationships between nodes according to the ontology's defined relationships.
    * Assign properties to nodes based on both ontology definitions and the CSV data, using the column descriptions for accurate mapping.

**Prompt:**

Given the `ontology.rdf` file, the `cars.csv` file with column headers, and the `column_descriptions.txt` file, generate Neo4j Cypher statements to build a knowledge graph representing the car data according to the ontology. Ensure that properties are mapped accurately based on the detailed descriptions provided in `column_descriptions.txt`.

**Instructions:**

1. **Analyze the `ontology.rdf` file:**
    * Identify the classes, properties, and relationships defined in the ontology.
    * Determine the data types for each property.
2. **Process the `cars.csv` file and `column_descriptions.txt`:**
    * Map each column header in the CSV file to a corresponding property defined in the ontology using the information provided 
in the `column_descriptions.txt` file.
    * Extract the values from each column in the CSV file to populate the properties of the car nodes.
1. **Generate Cypher statements:**
    * Create nodes for each class defined in the ontology.
    * Create nodes for each car based on the data in the CSV file.
    * Establish relationships between nodes based on the ontology's defined relationships.
    * Assign properties to nodes using the mapped column values from the CSV file and the ontology's defined properties.
    * Ensure that data types for properties are consistent with the ontology and CSV data.

**Example Output:**

The generated Cypher statements might look something like this (this is a simplified example):

```cypher
// Create nodes for classes
CREATE (Car:Car)
CREATE (Color:Color)

// Create nodes for cars
CREATE (car1:Car {{make: "Toyota", model: "Camry", year: 2023, color: "Red"}})

// Establish relationships
CREATE (car1)-[:HAS_COLOR]->(Color {{name: "Red"}})
```

**Note:** The specific Cypher statements generated will depend on the content of the `ontology.rdf` file, the `cars.csv` file, and the `column_descriptions.txt` file."""
prompt



In [38]:
resp = invokeLLM(prompt=prompt)
resp

In [None]:
print(resp.toJsonFormattedString())

{
  model: gemma2,
  created_at: 2024-07-01T18:26:37.322719Z,
  response: You're asking for a complex task that involves parsing RDF, CSV, and text files, understanding their relationships, and generating Neo4j Cypher code.  

Unfortunately, I can't directly execute code or access external files like those you've described. I'm a text-based language model, designed to process and generate text.

**However, I can guide you through the process and provide you with the conceptual framework and snippets of Cypher code that you can adapt to your specific files:**

**1. Understanding the Ontology (`ontology.rdf`)**

*   **Identify Classes:** These represent the main categories in your knowledge graph (e.g., `Car`, `Color`, `Engine`, `Transmission`).
*   **Identify Properties:** These describe attributes of the classes (e.g., `make`, `model`, `year`, `color`, `fuelType`).
*   **Identify Relationships:** These define connections between classes (e.g., `HAS_COLOR`, `HAS_ENGINE`, `HAS_TRANSMISSI

##### odp:

```md
You're asking for a complex task that involves parsing RDF, CSV, and text files, understanding their relationships, and generating Neo4j Cypher code.  

Unfortunately, I can't directly execute code or access external files like those you've described. I'm a text-based language model, designed to process and generate text.

**However, I can guide you through the process and provide you with the conceptual framework and snippets of Cypher code that you can adapt to your specific files:**

**1. Understanding the Ontology (`ontology.rdf`)**

*   **Identify Classes:** These represent the main categories in your knowledge graph (e.g., `Car`, `Color`, `Engine`, `Transmission`).
*   **Identify Properties:** These describe attributes of the classes (e.g., `make`, `model`, `year`, `color`, `fuelType`).
*   **Identify Relationships:** These define connections between classes (e.g., `HAS_COLOR`, `HAS_ENGINE`, `HAS_TRANSMISSION`).

**2. Processing the CSV Data (`cars.csv`)**

*   **Read the CSV:** Use a programming language (like Python) to read the CSV file and extract data into a structured format (e.g., a dictionary or list of dictionaries).
*   **Map Columns to Ontology Properties:** Use the `column_descriptions.txt` file to match each column header in the CSV to the corresponding property in your ontology.

**3. Generating Neo4j Cypher Statements**

*   **Create Nodes:** For each class in your ontology, create a node using the `CREATE` statement.
    ```cypher
    CREATE (Car:Car) 
    CREATE (Color:Color)
    ```
*   **Create Relationships:** Use the `CREATE` statement with the `-[:relationship_name]->` syntax to connect nodes based on the relationships defined in your ontology.
    ```cypher
    CREATE (car1)-[:HAS_COLOR]->(Color {name: "Red"})
    ```
*   **Set Node Properties:** Use curly braces `{}` to assign properties to nodes.
    ```cypher
    CREATE (car1:Car {make: "Toyota", model: "Camry", year: 2023, color: "Red"})
    ```

**Example Snippet (Simplified):**

```cypher
// Create nodes for classes
CREATE (Car:Car)
CREATE (Color:Color)

// Create a car node
CREATE (car1:Car {make: "Toyota", model: "Camry", year: 2023, color: "Red"})

// Create a color node
CREATE (redColor:Color {name: "Red"})

// Establish a relationship
CREATE (car1)-[:HAS_COLOR]->(redColor) 
```

**Important Considerations:**

*   **Data Types:** Ensure that the data types you assign to properties match the types defined in your ontology.
*   **Uniqueness:** Implement mechanisms to ensure that nodes have unique identifiers (e.g., using UUIDs).
*   **Scalability:** If you have a large dataset, consider using bulk import features in Neo4j to efficiently load the data.


```

### 2 podejscie
---

In [40]:
prompt = f"""**Objective:** Generate Neo4j Cypher statements to create a knowledge graph based on an RDF ontology and CSV data about cars, leveraging detailed column descriptions for accurate property mapping.

**Input:**

* **ontology:** ontology definition, including classes, properties, and relationships in RDF format:
    ```
    {cars_ontology_rdf}
    ```

* **cars:** car data and column headers in CSV format:
    ```
    {csv}
    ```

* **column descriptions:** detailed descriptions for each column header in the `cars`:
    ```
    name: The full name of the vehicle, including make, model, and trim.
    description: A brief description of the vehicle, often including key features and selling points.
    make: The manufacturer of the vehicle (e.g., Ford, Toyota, BMW).
    model: The model name of the vehicle.
    type: The type of the vehicle, which is "New" for all entries in this dataset.
    year: The year the vehicle was manufactured.
    price: The price of the vehicle in USD.
    engine: Details about the engine, including type and specifications.
    cylinders: The number of cylinders in the vehicle's engine.
    fuel: The type of fuel used by the vehicle (e.g., Gasoline, Diesel, Electric).
    mileage: The mileage of the vehicle, typically in miles.
    transmission: The type of transmission (e.g., Automatic, Manual).
    trim: The trim level of the vehicle, indicating different feature sets or packages.
    body: The body style of the vehicle (e.g., SUV, Sedan, Pickup Truck).
    doors: The number of doors on the vehicle.
    exterior_color: The exterior color of the vehicle.
    interior_color: The interior color of the vehicle.
    drivetrain: The drivetrain of the vehicle (e.g., All-wheel Drive, Front-wheel Drive).
    ```

**Output:**

* A set of Cypher statements that:
    * Create nodes representing classes from the ontology.
    * Create nodes representing individual cars based on the CSV data.
    * Establish relationships between nodes according to the ontology's defined relationships.
    * Assign properties to nodes based on both ontology definitions and the CSV data, using the column descriptions for accurate mapping.

**Prompt:**

Given the `ontology`, `cars`, and the `column descriptions`, generate Neo4j Cypher statements to build a knowledge graph representing the car data according to the ontology. Ensure that properties are mapped accurately based on the detailed descriptions provided in `column_descriptions`.

**Instructions:**

1. **Analyze the `ontology`:**
    * Identify the classes, properties, and relationships defined in the ontology.
    * Determine the data types for each property.
2. **Process the `cars` data and `column descriptions`:**
    * Map each column header in the CSV file to a corresponding property defined in the ontology using the information provided 
in the `column_descriptions` file.
    * Extract the values from each column in the CSV file to populate the properties of the car nodes.
1. **Generate Cypher statements:**
    * Create nodes for each class defined in the ontology.
    * Create nodes for each car based on the data in the CSV file.
    * Establish relationships between nodes based on the ontology's defined relationships.
    * Assign properties to nodes using the mapped column values from the CSV file and the ontology's defined properties.
    * Ensure that data types for properties are consistent with the ontology and CSV data.

**Example Output:**

The generated Cypher statements might look something like this (this is a simplified example):

```cypher
// Create nodes for classes
CREATE (Car:Car)
CREATE (Color:Color)

// Create nodes for cars
CREATE (car1:Car {{make: "Toyota", model: "Camry", year: 2023, color: "Red"}})

// Establish relationships
CREATE (car1)-[:HAS_COLOR]->(Color {{name: "Red"}})
```

**Note:** The specific Cypher statements generated will depend on the content of the `ontology.rdf` file, the `cars.csv` file, and the `column_descriptions.txt` file."""
prompt



In [41]:
resp = invokeLLM(prompt=prompt)


In [42]:
print(resp.toJsonFormattedString())

{
  "model": "gemma2",
  "created_at": "2024-07-01T19:08:45.595097Z",
  "response": "You're asking for a complex task that involves understanding RDF ontology definitions, parsing CSV data, and generating Neo4j Cypher statements.  

Unfortunately, I can't directly process and interpret files like `ontology.rdf`, `cars.csv`, and `column_descriptions.txt`. My capabilities lie in understanding and generating text.

**Here's a breakdown of the steps involved and how I can help:**

1. **Ontology Understanding:**
   - I can help you analyze the `ontology.rdf` file if you provide me with its content as plain text. I can identify classes, properties, and relationships within the ontology.

2. **CSV Data Processing:**
   - I can help you process the `cars.csv` data if you provide it as plain text. I can identify headers (column names) and values.

3. **Cypher Statement Generation:**
   - I can help you generate basic Cypher statements based on the ontology and CSV data you provide. However, the

In [43]:
print(resp.toShortJson())

{
  "model": gemma2,
  "created_at": 2024-07-01T19:08:45.595097Z,
  "response": You're asking for a complex task that involves understanding RDF ontology definitions, parsing CSV data, and generating Neo4j Cypher statements.  

Unfortunately, I can't directly process and interpret files like `ontology.rdf`, `cars.csv`, and `column_descriptions.txt`. My capabilities lie in understanding and generating text.

**Here's a breakdown of the steps involved and how I can help:**

1. **Ontology Understanding:**
   - I can help you analyze the `ontology.rdf` file if you provide me with its content as plain text. I can identify classes, properties, and relationships within the ontology.

2. **CSV Data Processing:**
   - I can help you process the `cars.csv` data if you provide it as plain text. I can identify headers (column names) and values.

3. **Cypher Statement Generation:**
   - I can help you generate basic Cypher statements based on the ontology and CSV data you provide. However, the mapp

##### odp:

```md
   You're asking for a complex task that involves understanding RDF ontology definitions, parsing CSV data, and generating Neo4j Cypher statements.  

   Unfortunately, I can't directly process and interpret files like `ontology.rdf`, `cars.csv`, and `column_descriptions.txt`. My capabilities lie in understanding and generating text.

   **Here's a breakdown of the steps involved and how I can help:**

   1. **Ontology Understanding:**
      - I can help you analyze the `ontology.rdf` file if you provide me with its content as plain text. I can identify classes, properties, and relationships within the ontology.

   2. **CSV Data Processing:**
      - I can help you process the `cars.csv` data if you provide it as plain text. I can identify headers (column names) and values.

   3. **Cypher Statement Generation:**
      - I can help you generate basic Cypher statements based on the ontology and CSV data you provide. However, the mapping between CSV columns and ontology properties will require your guidance.

   **To get started, please provide the following:**

   * **Content of `ontology.rdf`:** Paste the contents of the ontology file as plain text.
   * **`cars.csv` Data:** Paste the contents of the CSV file as plain text, separated by commas.
   * **`column_descriptions.txt`:** Paste the contents of the file as plain text.

   **Example:**

   Let's say your `ontology.rdf` file has a class named "Car" with properties like "make", "model", "year". Once you provide the ontology and CSV data, I can help you generate Cypher statements like:
```
```cypher
   CREATE (c:Car {make: "Toyota", model: "Camry", year: 2023});
```

### 3 podejscie
---

#### New prompt

##### Lets ask llm for better prompt:
```md
You are helpful AI prompt researcher who helps with given tasks.
Please write prompt to LLM to create Cypher code for neo4j database that will read csv file with headers and crete objects, properties and relations, when you are given text content:
 - ontology in rdf format
 - descriptions of csv column headers
```

###### descriptions of csv column headers:

```yaml
name: The full name of the vehicle, including make, model, and trim.
description: A brief description of the vehicle, often including key features and selling points.
make: The manufacturer of the vehicle (e.g., Ford, Toyota, BMW).
model: The model name of the vehicle.
type: The type of the vehicle, which is "New" for all entries in this dataset.
year: The year the vehicle was manufactured.
price: The price of the vehicle in USD.
engine: Details about the engine, including type and specifications.
cylinders: The number of cylinders in the vehicle's engine.
fuel: The type of fuel used by the vehicle (e.g., Gasoline, Diesel, Electric).
mileage: The mileage of the vehicle, typically in miles.
transmission: The type of transmission (e.g., Automatic, Manual).
trim: The trim level of the vehicle, indicating different feature sets or packages.
body: The body style of the vehicle (e.g., SUV, Sedan, Pickup Truck).
doors: The number of doors on the vehicle.
exterior_color: The exterior color of the vehicle.
interior_color: The interior color of the vehicle.
drivetrain: The drivetrain of the vehicle (e.g., All-wheel Drive, Front-wheel Drive).
```

In [51]:
resp = invokeLLM("""You are helpful AI prompt researcher who helps with given tasks.
Please write prompt to LLM to create Cypher code for neo4j database that will read csv file with headers and crete objects, properties and relations, when you are given text content:
 - ontology in rdf format
 - descriptions of csv column headers""")
resp.response

'## Prompt for LLM to Generate Neo4j Cypher Code\n\n**Context:**\n\nYou will be given two pieces of text:\n\n1. **Ontology in RDF format:** This defines the classes, properties, and relationships relevant to your data. \n2. **Descriptions of CSV column headers:** These describe the meaning and data type of each column in your CSV file.\n\n**Task:**\n\nGenerate Neo4j Cypher code that will:\n\n* Read a CSV file with headers.\n* Create nodes representing entities based on the ontology and column headers.\n* Assign properties to nodes based on column values and ontology definitions.\n* Establish relationships between nodes based on the ontology and column header relationships.\n\n**Example Input:**\n\n**Ontology (RDF):**\n\n```\n@prefix : <http://example.org/> .\n:Person a :NamedEntity ;\n    :name :Name .\n:Book a :NamedEntity ;\n    :author :Person ;\n    :title :String .\n```\n\n**CSV Column Headers Descriptions:**\n\n* **name:** String, represents the name of a person.\n* **title:** St

**Question:**

Given the name and description of column headers classify what cypher neo4j type they should be i.e. string, integer, float

```yaml
name: The full name of the vehicle, including make, model, and trim.
description: A brief description of the vehicle, often including key features and selling points.
make: The manufacturer of the vehicle (e.g., Ford, Toyota, BMW).
model: The model name of the vehicle.
type: The type of the vehicle, which is "New" for all entries in this dataset.
year: The year the vehicle was manufactured.
price: The price of the vehicle in USD.
engine: Details about the engine, including type and specifications.
cylinders: The number of cylinders in the vehicle's engine.
fuel: The type of fuel used by the vehicle (e.g., Gasoline, Diesel, Electric).
mileage: The mileage of the vehicle, typically in miles.
transmission: The type of transmission (e.g., Automatic, Manual).
trim: The trim level of the vehicle, indicating different feature sets or packages.
body: The body style of the vehicle (e.g., SUV, Sedan, Pickup Truck).
doors: The number of doors on the vehicle.
exterior_color: The exterior color of the vehicle.
interior_color: The interior color of the vehicle.
drivetrain: The drivetrain of the vehicle (e.g., All-wheel Drive, Front-wheel Drive).
```



**Response:**

Here's a breakdown of the Neo4j data types that would best suit each column header:

**String**

* **name:**  The full name of the vehicle will likely contain letters, spaces, and potentially special characters.
* **description:**  A text description.
* **make:**  Vehicle manufacturers are names (strings).
* **model:** Vehicle models are names (strings).
* **engine:**  Engine details often include text descriptions.
* **fuel:**  Fuel types are typically named (strings).
* **transmission:**  Transmission types are named (strings).
* **trim:** Trim levels are named (strings).
* **body:** Body styles are named (strings).
* **exterior_color:**  Colors are represented as strings.
* **interior_color:** Colors are represented as strings.
* **drivetrain:** Drivetrain types are named (strings).

**Integer**

* **year:**  Vehicle years are numerical values.
* **cylinders:**  The number of cylinders is a whole number.
* **doors:** The number of doors is a whole number.

**Float**

* **price:**  Vehicle prices often include decimals.
* **mileage:** Mileage can include decimals (e.g., 50,345 miles). 



**Important Notes:**

* **Data Validation:** Always validate your data to ensure it matches the expected type. Neo4j provides ways to enforce data 
types during ingestion.
* **Relationships:** Consider how these columns relate to each other. For example, you might have relationships between 
"Vehicle" nodes and "Engine" nodes, or "Vehicle" nodes and "User" nodes.


Let me know if you have any other questions about Neo4j data types or how to model your vehicle data!


In [55]:
headers_descr = """
name (String) : The full name of the vehicle, including make, model, and trim.
description (String) : A brief description of the vehicle, often including key features and selling points.
make (String) : The manufacturer of the vehicle (e.g., Ford, Toyota, BMW).
model (String) : The model name of the vehicle.
type (String) : The type of the vehicle, which is "New" for all entries in this dataset.
year (Integer) : The year the vehicle was manufactured.
price (Float) : The price of the vehicle in USD.
engine (String) : Details about the engine, including type and specifications.
cylinders (Integer) : The number of cylinders in the vehicle's engine.
fuel (String) : The type of fuel used by the vehicle (e.g., Gasoline, Diesel, Electric).
mileage (Float) : The mileage of the vehicle, typically in miles.
transmission (String) : The type of transmission (e.g., Automatic, Manual).
trim (String) : The trim level of the vehicle, indicating different feature sets or packages.
body (String) : The body style of the vehicle (e.g., SUV, Sedan, Pickup Truck).
doors (Integer) : The number of doors on the vehicle.
exterior_color (String) : The exterior color of the vehicle.
interior_color (String) : The interior color of the vehicle.
drivetrain (String) : The drivetrain of the vehicle (e.g., All-wheel Drive, Front-wheel Drive).
"""

In [56]:
prompt = f"""Generate Neo4j Cypher Code

Context:

You are given two pieces of text:
1. Ontology in RDF format: This defines the classes, properties, and relationships relevant to cars. 
2. Descriptions of CSV column headers: These describe the meaning and data type of each column in CSV file.

Task:

Generate Neo4j Cypher code that will:
* Read a CSV file with headers.
* Create nodes representing entities based on the ontology and column headers.
* Assign properties to nodes based on column values and ontology definitions.
* Establish relationships between nodes based on the ontology and column header relationships.

Ontology (RDF):
{cars_ontology_rdf}


CSV Column Headers Descriptions:
{headers_descr}


Expected Output:
Cypher code that reads a CSV file and creates nodes and relationships based on the provided ontology and column header descriptions.

Additional Notes:
* The LLM should handle different data types (string, integer, float) appropriately.
* The code should be efficient and consider potential performance optimizations.
* The LLM should provide comments explaining the generated code for clarity.
"""
# This prompt provides a clear framework for the LLM to generate Neo4j Cypher code based on the given input. It emphasizes the importance of understanding the ontology and column header descriptions to accurately represent the data in the Neo4j database.


In [57]:
resp = invokeLLM(prompt=prompt)

In [59]:
print(resp.toShortJson())

{
  "model": gemma2,
  "created_at": 2024-07-01T20:54:18.793825Z,
  "response": ```cypher
// Import the CSV file
LOAD CSV FROM 'path/to/your/vehicle_data.csv' USING  
    csvParserOptions = {
      'fieldSeparator': ',',
      'quoteCharacter': '"'
    } AS row

// Create nodes for each vehicle based on the provided schema
CREATE (v:Vehicle {
  name: row.name,
  description: row.description,
  make: row.make,
  model: row.model,
  type: row.type,
  year: row.year,
  price: row.price,
  engine: row.engine,
  cylinders: row.cylinders,
  fuel: row.fuel,
  mileage: row.mileage,
  transmission: row.transmission,
  trim: row.trim,
  body: row.body,
  doors: row.doors,
  exterior_color: row.exterior_color,
  interior_color: row.interior_color,
  drivetrain: row.drivetrain
})

// Add relationships based on the ontology
// Example: Relationship between Vehicle and Brand
MATCH (v:Vehicle) WHERE v.make IS NOT NULL
CREATE (b:Brand {name: v.make})
MERGE (v)-[:MANUFACTURES]->(b)

// Add more relatio

**Response:**

```cypher
// Import the CSV file
LOAD CSV FROM 'path/to/your/vehicle_data.csv' USING  
    csvParserOptions = {
      'fieldSeparator': ',',
      'quoteCharacter': '"'
    } AS row

// Create nodes for each vehicle based on the provided schema
CREATE (v:Vehicle {
  name: row.name,
  description: row.description,
  make: row.make,
  model: row.model,
  type: row.type,
  year: row.year,
  price: row.price,
  engine: row.engine,
  cylinders: row.cylinders,
  fuel: row.fuel,
  mileage: row.mileage,
  transmission: row.transmission,
  trim: row.trim,
  body: row.body,
  doors: row.doors,
  exterior_color: row.exterior_color,
  interior_color: row.interior_color,
  drivetrain: row.drivetrain
})

// Add relationships based on the ontology
// Example: Relationship between Vehicle and Brand
MATCH (v:Vehicle) WHERE v.make IS NOT NULL
CREATE (b:Brand {name: v.make})
MERGE (v)-[:MANUFACTURES]->(b)

// Add more relationships as needed based on your ontology



```

**Explanation:**

1. **Import CSV Data:**
   - The `LOAD CSV` statement imports the data from your CSV file.
   - `csvParserOptions` configures how the CSV data is parsed, handling delimiters and quotes.

2. **Create Vehicle Nodes:**
   - `CREATE (v:Vehicle { ... })` creates nodes representing vehicles.
   - Each node's properties are set using the values from the CSV row.

3. **Add Relationships:**
   - The code provides an example of a relationship between a `Vehicle` and a `Brand`.
   - `MATCH (v:Vehicle) WHERE v.make IS NOT NULL` finds vehicles with a defined make.
   - `CREATE (b:Brand {name: v.make})` creates a `Brand` node if it doesn't already exist using the vehicle's `make`.
   - `MERGE (v)-[:MANUFACTURES]->(b)` establishes a `MANUFACTURES` relationship between the vehicle and the brand.

4. **Expand Relationships:**
   - You'll need to add more `MATCH`, `CREATE`, and `MERGE` statements to create relationships based on your ontology's other properties and concepts (e.g., `hasEngine`, `usesFuel`, `hasTransmission`, etc.).



**Important Considerations:**

- **File Path:** Replace `'path/to/your/vehicle_data.csv'` with the actual path to your CSV file.
- **Ontology Mapping:** Carefully map your CSV column headers to the properties defined in your ontology.
- **Relationship Types:** Define appropriate relationship types based on the relationships described in your ontology.
- **Performance:** For large datasets, consider using batch loading and indexing to improve performance.


### Title....
---