In [1]:
from gliner_finetune.convert import convert
from gliner_finetune.train import train_model
import json

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load your data
with open('bird-original.json', 'r') as file:
    data = json.load(file)

# Convert data
training_data = convert(data, project_path='', train_split=0.8, eval_split=0.2, test_split=0.0,
                        train_file='train.json', eval_file='eval.json', test_file='test.json', overwrite=True)

Data saved to assets/train.json
Data saved to assets/eval.json


In [3]:
train_model(model="urchade/gliner_small-v2.1", train_data="assets/train.json", 
            eval_data="assets/eval.json", project="")

step: 9 | epoch: 1 | loss: 130.55:  90%|█████████ | 9/10 [00:08<00:00,  1.15it/s]

Step=9
P: 54.72%	R: 32.22%	F1: 40.56%



step: 9 | epoch: 1 | loss: 130.55: 100%|██████████| 10/10 [00:09<00:00,  1.01it/s]


In [5]:
import json

def create_prompt(json_data):
    """
    Generate a prompt from a JSON string to guide the AI in generating a specific type of output.
    This function assumes the JSON string represents a structured description of an object.

    :param json_data: A JSON string containing structured data about an object.
    :return: A string that is a well-formed prompt for the AI.
    """
    try:
        data = json.loads(json_data)
    except json.JSONDecodeError:
        raise ValueError("Invalid JSON data provided.")

    # Construct the initial part of the prompt
    prompt = "Given the following JSON data, generate a list of 5 different comprehensive texts. This should only return valid JSON as a list of dictionaries. Do not say anything else. Each description should mimic the structure of the original input:\n\n"

    # Append the JSON data as a string directly into the prompt
    prompt += "JSON Data:\n" + json.dumps(data, indent=2) + "\n"

    # Instruct the AI to generate 5 different examples based on the data
    prompt += "\nGenerate 5 different examples in JSON format that follow the structure and content of the provided data."

    return prompt

# Example usage
example_data = {
    "text": "The Alpine Swift primarily consumes flying insects such as wasps, bees, and flies. It captures its prey mid-air while swiftly flying through the alpine skies. It nests in high, rocky mountain crevices where it uses feathers and small sticks to construct a simple yet secure nesting environment.",
    "generic_plant_food": [],
    "generic_animal_food": ["flying insects"],
    "plant_food": [],
    "specific_animal_food": ["wasps", "bees", "flies"],
    "location_nest": ["rocky mountain crevices"],
    "item_nest": ["feathers", "small sticks"]
}

# Convert example_data to JSON string
json_data = json.dumps(example_data)

# Create prompt
prompt = create_prompt(json_data)
print(prompt)


Given the following JSON data, generate a list of 5 different comprehensive texts. This should only return valid JSON as a list of dictionaries. Do not say anything else. Each description should mimic the structure of the original input:

JSON Data:
{
  "text": "The Alpine Swift primarily consumes flying insects such as wasps, bees, and flies. It captures its prey mid-air while swiftly flying through the alpine skies. It nests in high, rocky mountain crevices where it uses feathers and small sticks to construct a simple yet secure nesting environment.",
  "generic_plant_food": [],
  "generic_animal_food": [
    "flying insects"
  ],
  "plant_food": [],
  "specific_animal_food": [
    "wasps",
    "bees",
    "flies"
  ],
  "location_nest": [
    "rocky mountain crevices"
  ],
  "item_nest": [
    "feathers",
    "small sticks"
  ]
}

Generate 5 different examples in JSON format that follow the structure and content of the provided data.


In [34]:
import openai
import os
from dotenv import load_dotenv

def call_api_and_save(source_json, num_calls):
    # Load environment variables from .env file
    load_dotenv()
    api_key = os.getenv("OPENAI_API_KEY")
    if api_key is None:
        raise ValueError("API Key must be set in the environment variables.")
    client = openai.OpenAI(api_key=api_key)  # Initialize the OpenAI client
    results_list = []  # Store raw string data
    json_results_list = []  # Store converted JSON data

    for _ in range(num_calls):
        prompt = create_prompt(source_json)
        try:
            response = client.chat.completions.create(
                messages=[
                    {"role": "user", "content": prompt}
                ],
                model="gpt-4"  # Use the desired model
            )
            results_list.append(response.choices[0].message.content)
            
            # Attempt to convert string data to JSON and save
            try:
                json_data = json.loads(response.choices[0].message.content)
                json_results_list.append(json_data)
            except json.JSONDecodeError:
                print("Failed to convert response to JSON.")

            # Save the raw string results
            with open('raw_responses.json', 'w') as f:
                json.dump(results_list, f)
            
            # Save the JSON-parsed results
            with open('parsed_responses.json', 'w') as f:
                json.dump(json_results_list, f)

        except Exception as e:
            print(f"API call failed: {e}")
            break

    return results_list, json_results_list


num_calls = 3  # Number of times the user wants to send the prompt
results = call_api_and_save(json_data, num_calls)
print(results)

(['[\n  {\n    "text": "The Red Fox primarily feeds on small mammals such as rabbits, squirrels, and rats. It hunts its prey on the ground, using its keen senses and speed. It makes its home in burrows, where it uses leaves and plant fibres to create a cozy and safe environment.",\n    "generic_plant_food": [],\n    "generic_animal_food": [\n      "small mammals"\n    ],\n    "plant_food": [],\n    "specific_animal_food": [\n      "rabbits",\n      "squirrels",\n      "rats"\n    ],\n    "location_nest": [\n      "burrows"\n    ],\n    "item_nest": [\n      "leaves",\n      "plant fibres"\n    ]\n  },\n  {\n    "text": "The Kodiak Bear primarily feeds on huge mammals like elks or deer and also catches salmon in mid-air during water runoffs. It uses its robust claws to capture its prey and also to dig burrows in the woodland or mountain terrains for nesting, lined with grass for a comfortable habitat.",\n    "generic_plant_food": [],\n    "generic_animal_food": [\n      "huge mammals"\n

In [35]:
results

(['[\n  {\n    "text": "The Red Fox primarily feeds on small mammals such as rabbits, squirrels, and rats. It hunts its prey on the ground, using its keen senses and speed. It makes its home in burrows, where it uses leaves and plant fibres to create a cozy and safe environment.",\n    "generic_plant_food": [],\n    "generic_animal_food": [\n      "small mammals"\n    ],\n    "plant_food": [],\n    "specific_animal_food": [\n      "rabbits",\n      "squirrels",\n      "rats"\n    ],\n    "location_nest": [\n      "burrows"\n    ],\n    "item_nest": [\n      "leaves",\n      "plant fibres"\n    ]\n  },\n  {\n    "text": "The Kodiak Bear primarily feeds on huge mammals like elks or deer and also catches salmon in mid-air during water runoffs. It uses its robust claws to capture its prey and also to dig burrows in the woodland or mountain terrains for nesting, lined with grass for a comfortable habitat.",\n    "generic_plant_food": [],\n    "generic_animal_food": [\n      "huge mammals"\n

In [28]:
results[0]

ChatCompletion(id='chatcmpl-9DSs7Gluh6IYlnzugOVOPHp7Q33uN', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='[\n  {\n    "text": "The Osprey is a dominant bird of prey, feeding extensively on fish. It descends from the air, often quite dramatically, to snatch its prey from water bodies. It builds elaborate nests in tall trees and cliffs using twigs and seaweed.",\n    "generic_plant_food": [],\n    "generic_animal_food": [\n      "fish"\n    ],\n    "plant_food": [],\n    "specific_animal_food": [\n      "salmon",\n      "trout",\n      "catfish"\n    ],\n    "location_nest": [\n      "tall trees",\n      "cliffs"\n    ],\n    "item_nest": [\n      "twigs",\n      "seaweed"\n    ]\n  },\n  {\n    "text": "The Nighthawk feeds on small insects and moths, which it catches during its nocturnal hunts. It constructs a simple nest on the ground, deriving protection by remaining inconspicuous among stones and leaves.",\n    "generic_plant_foo

In [29]:
results[0].message

AttributeError: 'ChatCompletion' object has no attribute 'message'

In [30]:
# Assuming 'results' is your list of ChatCompletion objects
for completion in results:
    for choice in completion.choices:
        print(choice.message.content)

[
  {
    "text": "The Osprey is a dominant bird of prey, feeding extensively on fish. It descends from the air, often quite dramatically, to snatch its prey from water bodies. It builds elaborate nests in tall trees and cliffs using twigs and seaweed.",
    "generic_plant_food": [],
    "generic_animal_food": [
      "fish"
    ],
    "plant_food": [],
    "specific_animal_food": [
      "salmon",
      "trout",
      "catfish"
    ],
    "location_nest": [
      "tall trees",
      "cliffs"
    ],
    "item_nest": [
      "twigs",
      "seaweed"
    ]
  },
  {
    "text": "The Nighthawk feeds on small insects and moths, which it catches during its nocturnal hunts. It constructs a simple nest on the ground, deriving protection by remaining inconspicuous among stones and leaves.",
    "generic_plant_food": [],
    "generic_animal_food": [
      "small insects"
    ],
    "plant_food": [],
    "specific_animal_food": [
      "moths",
      "gnats",
      "mosquitoes"
    ],
    "locati