In [1]:
from openai import OpenAI
import base64
import json
import os
import glob

API_KEY = open('OPENAI_KEY').read().strip()
INSTRUCTIONS = open('ANALYZE.md').read().strip()
DIR = 'sample/affb/'
FILENAME = 'sample/affb/IMG_0FBB1752EA17-29.jpeg'

# Use key, instructions, and filename to generate a structured response from openai api

client = OpenAI(api_key=API_KEY)

def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")

def analyze_single_file(filename):
    # Getting the Base64 string
    base64_image = encode_image(filename)

    completion = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {
                "role": "user",
                "content": [
                    { "type": "text", "text": INSTRUCTIONS },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/jpeg;base64,{base64_image}",
                        },
                    },
                ],
            }
        ],
        response_format={
            "type": "json_object",
        }
    )

    content = completion.choices[0].message.content
    content = content.split('{', 1)[1]
    content = content.rsplit('}', 1)[0]
    content = '{' + content + '}'
    content = json.loads(content)

    record = dict(
        filename=filename.split('/')[-1],
        screenshot_type=content['screenshot_type'],
        transition_bar_event=content['transition_bar_transition_event'],
        transition_bar_position=content['transition_bar_before_during_after'],
        transition_bar_certainty=content['transition_bar_certainty'],
        content=content['content'],
        content_certainty=content['content'],
        future_scenario_tagline=content['future_scenario_tagline'],
        future_scenario_description=content['future_scenario_description'],
        future_scenario_topics=content['future_scenario_topics'],
        detected_language=content['detected_language'],
    )
    return record

def save_results(records):
    output_file = os.path.join(DIR, 'records.json')
    with open(output_file, 'w') as f:
        json.dump(records, f, indent=4)

def load_previous_results():
    output_file = os.path.join(DIR, 'records.json')
    if os.path.exists(output_file):
        with open(output_file, 'r') as f:
            return json.load(f)
    return []

def analyze_files(dirname):
    records = load_previous_results()
    existing_filenames = {record['filename'] for record in records}
    paths = glob.glob(os.path.join(dirname, '*.jpeg'))
    for i, path in enumerate(paths):
        filename = path.split('/')[-1]
        if filename in existing_filenames:
            print(f"Skipping {i + 1}/{len(paths)}: {path} (already analyzed)")
            continue
        print(f"Analyzing {i + 1}/{len(paths)}: {path}")
        try:
            record = analyze_single_file(path)
            records.append(record)
        except Exception as e:
            print(f"Error analyzing {path}: {e}")
        save_results(records)
    print(f"Analysis complete. Records saved to {os.path.join(DIR, 'records.json')}.")
    return records

records = analyze_files(DIR)


Analyzing 1/163: sample/affb/IMG_0FBB1752EA17-2.jpeg
Analyzing 2/163: sample/affb/IMG_0FBB1752EA17-85.jpeg
Analyzing 3/163: sample/affb/IMG_0FBB1752EA17-93.jpeg
Analyzing 4/163: sample/affb/IMG_0FBB1752EA17-136.jpeg
Analyzing 5/163: sample/affb/IMG_0FBB1752EA17-50.jpeg
Analyzing 6/163: sample/affb/IMG_0FBB1752EA17-161.jpeg
Analyzing 7/163: sample/affb/IMG_0FBB1752EA17-11.jpeg
Analyzing 8/163: sample/affb/IMG_0FBB1752EA17-120.jpeg
Analyzing 9/163: sample/affb/IMG_0FBB1752EA17-46.jpeg
Analyzing 10/163: sample/affb/IMG_0FBB1752EA17-157.jpeg
Analyzing 11/163: sample/affb/IMG_0FBB1752EA17-100.jpeg
Analyzing 12/163: sample/affb/IMG_0FBB1752EA17-66.jpeg
Analyzing 13/163: sample/affb/IMG_0FBB1752EA17-89.jpeg
Analyzing 14/163: sample/affb/IMG_0FBB1752EA17-116.jpeg
Analyzing 15/163: sample/affb/IMG_0FBB1752EA17-70.jpeg
Analyzing 16/163: sample/affb/IMG_0FBB1752EA17-141.jpeg
Analyzing 17/163: sample/affb/IMG_0FBB1752EA17-27.jpeg
Analyzing 18/163: sample/affb/IMG_0FBB1752EA17-140.jpeg
Analyzing 19

In [2]:
import requests
import base64
import dataflows as DF
import dataflows_airtable as DFA

AIRTABLE_API_KEY = open('AIRTABLE_KEY').read().strip()
AIRTABLE_BASE_ID = 'appdScgbXnseNeg1S'

records = load_previous_results()

DF.Flow(
    records,
    DF.update_resource(-1, name='records'),
    DFA.dump_to_airtable({
        (AIRTABLE_BASE_ID, 'Auto-extracted'): {
            'resource-name': 'records',
            'typecast': True,
        }
    }, apikey=AIRTABLE_API_KEY)
).process()

# re-read the records from airtable
existing = DF.Flow(
    DFA.load_from_airtable(AIRTABLE_BASE_ID, 'Auto-extracted', 'Grid view', apikey=AIRTABLE_API_KEY),
    DF.select_fields(['filename', DFA.AIRTABLE_ID_FIELD])
).results()[0][0]

key_for_filename = dict(
    (record['filename'], record[DFA.AIRTABLE_ID_FIELD]) for record in existing
)
for record in records:
    filename = record['filename']
    if filename in key_for_filename:
        record[DFA.AIRTABLE_ID_FIELD] = key_for_filename[filename]
    else:
        record[DFA.AIRTABLE_ID_FIELD] = None
save_results(records)

for record in existing:
    filename = record['filename']
    airtable_id = record[DFA.AIRTABLE_ID_FIELD]
    response = requests.post(
        f'https://content.airtable.com/v0/{AIRTABLE_BASE_ID}/{airtable_id}/image/uploadAttachment',
        json={
            'filename': filename,
            'contentType': 'image/jpeg',
            'file': base64.b64encode(open(os.path.join(DIR, filename), 'rb').read()).decode('utf-8')
        },
        headers={
            'Authorization': f'Bearer {AIRTABLE_API_KEY}',
        }
    )
    if response.status_code == 200:
        print(f"Uploaded {filename} to Airtable with ID {airtable_id}")
    else:
        print(f"Failed to upload {filename} to Airtable: {response.status_code} {response.text}")

Loading records for appdScgbXnseNeg1S/Auto-extracted...
Loaded 0 records for appdScgbXnseNeg1S/Auto-extracted
Loaded 325 records for appdScgbXnseNeg1S/Auto-extracted
Uploaded IMG_0FBB1752EA17-2.jpeg to Airtable with ID recJDjruIz4M1NvW4
Uploaded IMG_0FBB1752EA17-85.jpeg to Airtable with ID recy3pOOEibCxAsMH
Uploaded IMG_0FBB1752EA17-93.jpeg to Airtable with ID recHSO2ciuM9bMk5P
Uploaded IMG_0FBB1752EA17-136.jpeg to Airtable with ID recEpuUhZOmfByYwq
Uploaded IMG_0FBB1752EA17-50.jpeg to Airtable with ID recjMinDMRbnqOm3D
Uploaded IMG_0FBB1752EA17-161.jpeg to Airtable with ID recJ09eyx0F67SuVS
Uploaded IMG_0FBB1752EA17-11.jpeg to Airtable with ID recl3UHJu7mR7T4TJ
Uploaded IMG_0FBB1752EA17-120.jpeg to Airtable with ID recCmhKnHPauPO9Em
Uploaded IMG_0FBB1752EA17-46.jpeg to Airtable with ID rec0UhzJZRDk9y85x
Uploaded IMG_0FBB1752EA17-157.jpeg to Airtable with ID recvIZZtGI63yu61U
Uploaded IMG_0FBB1752EA17-100.jpeg to Airtable with ID recPzuIpOa66k8vzq
Uploaded IMG_0FBB1752EA17-66.jpeg to A

FileNotFoundError: [Errno 2] No such file or directory: 'sample/affb/IMG_0FBB1752EA17-130.jpeg'

In [2]:
from openai import OpenAI
import json

# load records from json file
def load_records():
    with open('sample/affb/records.json', 'r') as f:
        return json.load(f)
    
# Create a new OpenAI client
OPENAI_KEY = open('OPENAI_KEY').read().strip()
client = OpenAI(api_key=OPENAI_KEY)

# For each record, calculate embeddings for the 'future_scenario_description' field
def calculate_embeddings(records):
    for record in records:
        description = record['future_scenario_description']
        completion = client.embeddings.create(
            model="text-embedding-3-large",
            input=description
        )
        embedding = completion.data[0].embedding
        record['embedding'] = embedding
    return records

# Save the records with embeddings to a new json file
def save_records_with_embeddings(records):
    with open('sample/affb/records_with_embeddings.json', 'w') as f:
        json.dump(records, f, indent=4)

# Load the records from the json file
records = load_records()
# Calculate embeddings for the records
records_with_embeddings = calculate_embeddings(records)
# Save the records with embeddings to a new json file
save_records_with_embeddings(records_with_embeddings)