In [1]:
import openai

import os

import pandas as pd

import random

In [2]:
openai.api_key = os.getenv("OPENAI_API_KEY")

In [3]:
def query_openai(model, prompt, max_tokens, temperature):
    response = openai.Completion.create(
        model=model,
        prompt=prompt,
        temperature=temperature,
        max_tokens=max_tokens,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0,
    )

    return response['choices'][0]['text']

query_openai('text-davinci-003', 'Adobe, empowering the world through', 3, 0.7)

' digital experiences\n'

### Load data from the CFT paper, and group test cases into two categories: warm vs. cold cities

In [4]:
CITIES_DATA = os.path.join(os.getcwd(), 'compositional-fine-tuning', 'data', 'testing', 'cities_domain', 'test_data_cities.csv')

In [5]:
df_cities = pd.read_csv(CITIES_DATA, index_col=0)

df_cities.head()

Unnamed: 0,city,country,continent,avg_temp,pop
1,Belize City,Belize,North America,26.7(80.1),0.1
2,Hamilton,Bermuda,North America,22.3(72.1),0.1
3,Calgary,Canada,North America,4.4(39.9),1.2
5,Halifax,Canada,North America,7.5(45.5),0.4
7,Ottawa,Canada,North America,6.6(43.9),1.0


In [6]:
WARM_THRESHOLD = 20
COLD_THRESHOLD = 10

warm_cities = [f"{row[1]['city']}, {row[1]['country']}" for row in df_cities.iterrows()\
               if float(row[1]['avg_temp'].split('(')[0].replace('−','-')) >= 20]

cold_cities = [f"{row[1]['city']}, {row[1]['country']}" for row in df_cities.iterrows()\
               if float(row[1]['avg_temp'].split('(')[0].replace('−','-')) <= 10]

len(warm_cities), len(cold_cities)

(112, 39)

In [7]:
warm_cities = random.sample(warm_cities, len(cold_cities))

len(warm_cities), len(cold_cities)

(39, 39)

In [8]:
warm_cities[:10]

['Tripoli, Libya',
 'Vientiane, Laos',
 'Suva, Fiji',
 'La Paz, Mexico',
 'Garissa, Kenya',
 'Ho Chi Minh City, Vietnam',
 'Brasília, Brazil',
 'Makassar, Indonesia',
 'Brisbane, Australia',
 'Hat Yai, Thailand']

### Based on each pivot city, generate 20 candidates similar w.r.t weather

In [9]:
candidate_tuples = []

for i, city in enumerate(warm_cities):
    if not i % (len(warm_cities) // 10):
        print('.', end='')
    
    candidate_prompt = f"Give me a list of 20 touristic cities with a similar weather to {city}"
    response = query_openai('text-davinci-003', candidate_prompt, 180, 0.7)
    tokens = response.strip().split('. ')
    candidates = [token.split('\n')[0] for token in tokens[1:-1]] + [tokens[-1]]
    
    candidate_tuples.append({'weather' : 'warm', 'pivot' : city, 'candidates' : '; '.join(candidates)})

print('\nDone with warm cities, starting cold cities.')

for i, city in enumerate(cold_cities):
    if not i % (len(cold_cities) // 10):
        print('.', end='')
    
    candidate_prompt = f"Give me a list of 20 touristic cities with a similar weather to {city}"
    response = query_openai('text-davinci-003', candidate_prompt, 180, 0.7)
    tokens = response.strip().split('. ')
    candidates = [token.split('\n')[0] for token in tokens[1:-1]] + [tokens[-1]]
    
    candidate_tuples.append({'weather' : 'cold', 'pivot' : city, 'candidates' : '; '.join(candidates)})

df_candidates = pd.DataFrame(candidate_tuples)
df_candidates.head()

.............Done with warm cities, starting cold cities.
.............

Unnamed: 0,weather,pivot,candidates
0,warm,"Tripoli, Libya","Tunis, Tunisia; Marrakech, Morocco; Algiers, A..."
1,warm,"Vientiane, Laos","Hanoi, Vietnam; Siem Reap, Cambodia; Bangkok, ..."
2,warm,"Suva, Fiji","Apia, Samoa; Port Vila, Vanuatu; Honiara, Solo..."
3,warm,"La Paz, Mexico","Cancun, Mexico; Cabo San Lucas, Mexico; Mazatl..."
4,warm,"Garissa, Kenya","Mombasa, Kenya; Lamu, Kenya; Wajir, Kenya; Kis..."


### Save for future use

In [10]:
df_candidates.to_csv('candidates.csv')

In [11]:
! ls -lah *.csv

-rw-r--r--  1 vbursztyn  staff    32K May  3 23:28 candidates.csv
