# Imports

In [4]:
import openai
import json
import os
from sklearn.model_selection import train_test_split

In [5]:
openai.api_key = os.environ.get("OPENAI_API_KEY")

# Workshop/Conference Part

In [6]:
my_file = open('events_with_instanceOf.json')

events_with_instanceOf = json.load(my_file)
my_file.close()

In [7]:
print(len(events_with_instanceOf))

3556


In [8]:
workshop = 0
conference = 0
for event in events_with_instanceOf:
  if event['instanceOf'] == 'http://www.wikidata.org/entity/Q40444998':
    workshop+=1
  elif event['instanceOf'] == 'http://www.wikidata.org/entity/Q2020153':
    conference+=1
  else:
    pass
print(f"No. of Workshops: ",workshop)
print(f"No. of Conferences: ",conference)




No. of Workshops:  2833
No. of Conferences:  659


In [9]:
workshop+conference

3492

In [10]:
my_list = []
for event in events_with_instanceOf:
  if event['instanceOf'] not in my_list:
    my_list.append(event['instanceOf'])
print(my_list)

['http://www.wikidata.org/entity/Q40444998', 'http://www.wikidata.org/entity/Q7935096', 'http://www.wikidata.org/entity/Q2020153', 'http://www.wikidata.org/entity/Q98381912', 'http://www.wikidata.org/entity/Q27968055', 'http://www.wikidata.org/entity/Q104418497', 'http://www.wikidata.org/entity/Q1143604']


In [11]:
len(my_list)

7

In [12]:
for event in events_with_instanceOf:
  if event['instanceOf'] == 'http://www.wikidata.org/entity/Q40444998' or event['instanceOf'] == 'http://www.wikidata.org/entity/Q98381912':
    event['instanceOf'] = 'Workshop'
  elif event['instanceOf'] == 'http://www.wikidata.org/entity/Q2020153':
    event['instanceOf'] = 'Conference'
  elif event['instanceOf'] == 'http://www.wikidata.org/entity/Q7935096':
    event['instanceOf'] = 'Virtual Event'
  elif event['instanceOf'] == 'http://www.wikidata.org/entity/Q27968055':
    event['instanceOf'] = 'Recurrent Event Edition'
  elif event['instanceOf'] == 'http://www.wikidata.org/entity/Q104418497':
    event['instanceOf'] = 'Online Event'
  elif event['instanceOf'] == 'http://www.wikidata.org/entity/Q1143604':
    event['instanceOf'] = 'Proceeding'

my_list = []
for event in events_with_instanceOf:
  if event['instanceOf'] not in my_list:
    my_list.append(event['instanceOf'])
print(my_list)

['Workshop', 'Virtual Event', 'Conference', 'Recurrent Event Edition', 'Online Event', 'Proceeding']


In [13]:
event_title_instanceOff = []
for event in events_with_instanceOf:
  if 'title' in event:
    my_dict = dict()
    my_dict['title'] = event['title']
    my_dict['Category'] = event['instanceOf']
    event_title_instanceOff.append(my_dict)

In [None]:
# event_title_instanceOff

In [14]:
train, test = train_test_split(event_title_instanceOff, test_size=0.1)

In [15]:
len(train)

3125

In [16]:
len(test)

348

In [17]:
import random

random_train = random.sample(train, 200)
random_test = random.sample(test, 50)

In [18]:
conversation = [
    {'role': 'system', 'content': 'You are a human'},

    {'role': 'user', 'content': 'In wikidata, there are about 3400 entries which are interesting to me. Lets call these as "events". These events can be categorized into a few categories such as a "conference", "workshop", "virtual event","recurrent event edition","online event" or a "proceeding"'},
    {'role': 'user', 'content': 'In wikidata, the category to which an event belongs to is sometimes missing, and my task is to deduct from the title of the event to which category it belongs to. For humans it is an easy task for sure, but no one wants to edit thousands of entries by hand. This is where you step in.'},
    {'role': 'user', 'content': 'I want you to deduct to which category an event belongs to. To help you out, i will provide titles of some random events, and their corresponding category to help you out with the pattern recognition. Then I will provide more events for you to find out the corresponding categories for them.'},
]

for count, item in enumerate(random_train, start=1):
    conversation.append({'role': 'user', 'content': 'Event ' + str(count) + " is named '" + item['title'] + "'"})
    conversation.append({'role': 'assistant', 'content': 'The Category for Event ' + str(count) + " is '" + item['Category'] + "'"})

In [19]:
len(conversation)

404

In [20]:
for line in conversation[4:16]:
    print(line)

{'role': 'user', 'content': "Event 1 is named 'Doctoral Symposium on Research on Online Databases in History'"}
{'role': 'assistant', 'content': "The Category for Event 1 is 'Workshop'"}
{'role': 'user', 'content': "Event 2 is named 'Workshop Papers of i-Know 2017'"}
{'role': 'assistant', 'content': "The Category for Event 2 is 'Workshop'"}
{'role': 'user', 'content': "Event 3 is named 'Workshop on Adapted intEraction with SociAl Robots'"}
{'role': 'assistant', 'content': "The Category for Event 3 is 'Workshop'"}
{'role': 'user', 'content': "Event 4 is named 'International Workshop on Affect, Meta-Affect, Data and Learning (AMADL 2015)'"}
{'role': 'assistant', 'content': "The Category for Event 4 is 'Workshop'"}
{'role': 'user', 'content': "Event 5 is named 'PEGOV 2013: Personalization in eGovernment Services and Applications'"}
{'role': 'assistant', 'content': "The Category for Event 5 is 'Workshop'"}
{'role': 'user', 'content': "Event 6 is named 'Models and Methods of Information Sys

In [21]:
deduced_category = []
for event in random_test:
    conversation.append({'role': 'user', 'content': "The event for you to find the category for is " + event['title']})

    response = openai.ChatCompletion.create(
        model='gpt-3.5-turbo-16k',
        messages=conversation,
        stop=None,
        temperature=0.7
    )

# Extract the deduced event series from the response
    deduced_category.append(response['choices'][0]['message']['content'])

# Add the assistant's response to the conversation history
    conversation.append({'role': 'assistant', 'content': response['choices'][0]['message']['content']})

In [22]:
print(deduced_category)

['The Category for the event "SWUI 2008 Exploring HCI Challenges" is \'Workshop\'.', 'The Category for the event "24th International Workshop on Concurrency, Specification and Programming" is \'Workshop\'.', 'The Category for the event "4th Geogames and Geoplay International Workshop" is \'Workshop\'.', 'The Category for the event "IRMLeS \'09 Inductive Reasoning and Machine Learning on the Semantic Web" is \'Workshop\'.', 'The Category for the event "27th International Conference on Inductive Logic Programming" is \'Conference\'.', 'The Category for the event "Third International Workshop on Behavior Change Support Systems" is \'Workshop\'.', 'The Category for the event "SNA 2005 Semantic Network Analysis" is \'Workshop\'.', 'The Category for the event "2nd Workshop on Human Decision Making in Recommender Systems" is \'Workshop\'.', 'The Category for the event "Dateso 2010 Annual International Workshop on DAtabases, TExts, Specifications and Objects" is \'Workshop\'.', 'The Category f

In [33]:
import re
predicted_category = []
pattern = r'The Category for the event "(.*?)" is \'(.*?)\''
event_category_predicted = []
for line in deduced_category:
    match = re.search(pattern, line)
    if match:
        event_name = match.group(1)
        category = match.group(2)
        my_dict = dict()
        my_dict[event_name] = category
        event_category_predicted.append(my_dict)
        predicted_category.append(category)
        print(f"Event: {event_name}, Category: {category}")
    else:
      print(line)

Event: SWUI 2008 Exploring HCI Challenges, Category: Workshop
Event: 24th International Workshop on Concurrency, Specification and Programming, Category: Workshop
Event: 4th Geogames and Geoplay International Workshop, Category: Workshop
Event: IRMLeS '09 Inductive Reasoning and Machine Learning on the Semantic Web, Category: Workshop
Event: 27th International Conference on Inductive Logic Programming, Category: Conference
Event: Third International Workshop on Behavior Change Support Systems, Category: Workshop
Event: SNA 2005 Semantic Network Analysis, Category: Workshop
Event: 2nd Workshop on Human Decision Making in Recommender Systems, Category: Workshop
Event: Dateso 2010 Annual International Workshop on DAtabases, TExts, Specifications and Objects, Category: Workshop
Event: MODELS'13 Invited Talks, Demonstration Session, Poster Session, and ACM Student Research Competition, Category: Conference
Event: Workshop on Methods & Tools for Computer Supported Collaborative Creativity Pr

In [34]:
len(event_category_predicted)

50

In [35]:
event_category_predicted

[{'SWUI 2008 Exploring HCI Challenges': 'Workshop'},
 {'24th International Workshop on Concurrency, Specification and Programming': 'Workshop'},
 {'4th Geogames and Geoplay International Workshop': 'Workshop'},
 {"IRMLeS '09 Inductive Reasoning and Machine Learning on the Semantic Web": 'Workshop'},
 {'27th International Conference on Inductive Logic Programming': 'Conference'},
 {'Third International Workshop on Behavior Change Support Systems': 'Workshop'},
 {'SNA 2005 Semantic Network Analysis': 'Workshop'},
 {'2nd Workshop on Human Decision Making in Recommender Systems': 'Workshop'},
 {'Dateso 2010 Annual International Workshop on DAtabases, TExts, Specifications and Objects': 'Workshop'},
 {"MODELS'13 Invited Talks, Demonstration Session, Poster Session, and ACM Student Research Competition": 'Conference'},
 {'Workshop on Methods & Tools for Computer Supported Collaborative Creativity Process: Linking Creativity & Informal Learning': 'Workshop'},
 {'3rd International Workshop on 

In [36]:
test_category = []
for event in random_test:
  test_category.append(event['Category'])

In [37]:
test_category

['Workshop',
 'Workshop',
 'Workshop',
 'Workshop',
 'Conference',
 'Workshop',
 'Workshop',
 'Workshop',
 'Workshop',
 'Workshop',
 'Workshop',
 'Workshop',
 'Workshop',
 'Workshop',
 'Workshop',
 'Workshop',
 'Workshop',
 'Workshop',
 'Workshop',
 'Workshop',
 'Workshop',
 'Workshop',
 'Workshop',
 'Workshop',
 'Workshop',
 'Workshop',
 'Conference',
 'Workshop',
 'Conference',
 'Conference',
 'Conference',
 'Workshop',
 'Workshop',
 'Workshop',
 'Workshop',
 'Workshop',
 'Conference',
 'Workshop',
 'Workshop',
 'Workshop',
 'Workshop',
 'Workshop',
 'Workshop',
 'Workshop',
 'Conference',
 'Workshop',
 'Workshop',
 'Workshop',
 'Workshop',
 'Workshop']

In [38]:
predicted_category

['Workshop',
 'Workshop',
 'Workshop',
 'Workshop',
 'Conference',
 'Workshop',
 'Workshop',
 'Workshop',
 'Workshop',
 'Conference',
 'Workshop',
 'Workshop',
 'Workshop',
 'Workshop',
 'Conference',
 'Workshop',
 'Workshop',
 'Workshop',
 'Workshop',
 'Workshop',
 'Workshop',
 'Workshop',
 'Workshop',
 'Workshop',
 'Workshop',
 'Workshop',
 'Conference',
 'Workshop',
 'Conference',
 'Workshop',
 'Conference',
 'Workshop',
 'Workshop',
 'Workshop',
 'Conference',
 'Workshop',
 'Conference',
 'Workshop',
 'Workshop',
 'Workshop',
 'Workshop',
 'Workshop',
 'Workshop',
 'Workshop',
 'Workshop',
 'Workshop',
 'Workshop',
 'Workshop',
 'Workshop',
 'Workshop']

In [39]:
sum = 0

for test,predicted in zip(test_category, predicted_category):
    if test == predicted:
        sum += 1

print("accuracy: ", sum/len(test_category))

accuracy:  0.9


In [40]:
sum

45