In [1]:
import json
import logging
import random
from pathlib import Path
from typing import Dict, List, Any

import pandas as pd


with open(Path.home() / "locomo/data/locomo10.json", "r") as f:
    data = json.load(f)

In [2]:
random.seed(42)
conversations = list(range(len(data)))
random.shuffle(conversations)

train_indices = conversations[:7]
test_indices = conversations[7:]

train_data = [data[i] for i in train_indices]
test_data = [data[i] for i in test_indices]

In [18]:
total = 0
for i, dat in enumerate(train_data):
    # print all fields
    print(dat.keys())
    print(f"sample_id: {dat['sample_id']}")
    conv_counter = 0
    for qa in dat['qa']:
        if qa['category'] == 5:
            continue
        total += 1
        conv_counter += 1
    print(f"Training data size for conv {i + 1}: {conv_counter}")
print(f"Total training data size: {total}")

dict_keys(['qa', 'conversation', 'event_summary', 'observation', 'session_summary', 'sample_id'])
sample_id: conv-48
Training data size for conv 1: 191
dict_keys(['qa', 'conversation', 'event_summary', 'observation', 'session_summary', 'sample_id'])
sample_id: conv-42
Training data size for conv 2: 199
dict_keys(['qa', 'conversation', 'event_summary', 'observation', 'session_summary', 'sample_id'])
sample_id: conv-41
Training data size for conv 3: 152
dict_keys(['qa', 'conversation', 'event_summary', 'observation', 'session_summary', 'sample_id'])
sample_id: conv-49
Training data size for conv 4: 156
dict_keys(['qa', 'conversation', 'event_summary', 'observation', 'session_summary', 'sample_id'])
sample_id: conv-44
Training data size for conv 5: 123
dict_keys(['qa', 'conversation', 'event_summary', 'observation', 'session_summary', 'sample_id'])
sample_id: conv-47
Training data size for conv 6: 150
dict_keys(['qa', 'conversation', 'event_summary', 'observation', 'session_summary', 'sam

In [None]:
qs = {}
ans = {}
evidences = {}
for qa in train_data[0]['qa']:
    # append each "question" into <category>
    if qa['category'] != 5:
        qs[qa['category']] = qs.get(qa['category'], []) + [qa['question']]
        ans[qa['category']] = ans.get(qa['category'], []) + [qa['answer']]
        evidences[qa['category']] = evidences.get(qa['category'], []) + [qa['evidence']]

# print first 5 question and the corresponding answer from each category
for category, questions in qs.items():
    print(f"Category: {category}")
    for question, answer, evidence in zip(questions[:5], ans.get(category, [])[:5], evidences.get(category, [])[:5]):  # print first 5 questions
        print(f" - {question}")
        print(f"   Answer: {answer}")
        print(f"   Evidence: {evidence}")
        
# According to https://github.com/snap-research/locomo/blob/3eb6f2c585f5e1699204e3c3bdf7adc5c28cb376/task_eval/evaluation.py#L208
# Category 1 is multi-hop, 2 is single-hop, 3 is temporal, 4 is open-domain

Category: 2
 - What kind of project was Jolene working on in the beginning of January 2023?
   Answer: electricity engineering project
   Evidence: ['D1:2']
 - When did Deborah`s mother pass away?
   Answer: a few years before 2023
   Evidence: ['D1:5']
 - When did Jolene`s mother pass away?
   Answer: in 2022
   Evidence: ['D1:6']
 - When did Jolene's mom gift her a pendant?
   Answer: in 2010
   Evidence: ['D1:8']
 - Which country were Jolene and her mother visiting in 2010?
   Answer: France
   Evidence: ['D1:8']
Category: 1
 - Which of Deborah`s family and friends have passed away?
   Answer: mother, father, her friend Karlie
   Evidence: ['D1:5', 'D2:1', 'D6:4']
 - What symbolic gifts do Deborah and Jolene have from their mothers?
   Answer: pendants
   Evidence: ['D1:8', 'D1:9']
 - What helped Deborah find peace when grieving deaths of her loved ones?
   Answer: yoga, old photos, the roses and dahlias in a flower garden, nature
   Evidence: ['D1:15', 'D2:3', 'D6:4', 'D15:29']
 - 

In [25]:
qa

{'question': "What are the names of Deborah's snakes?",
 'evidence': ['D2:20', 'D2:22'],
 'category': 5,
 'adversarial_answer': 'Susie, Seraphim'}

In [9]:
train_data[0]['conversation']

{'speaker_a': 'Deborah',
 'speaker_b': 'Jolene',
 'session_1_date_time': '4:06 pm on 23 January, 2023',
 'session_1': [{'speaker': 'Deborah',
   'dia_id': 'D1:1',
   'text': "Hey Jolene, nice to meet you! How's your week going? Anything fun happened?"},
  {'speaker': 'Jolene',
   'dia_id': 'D1:2',
   'text': "Hi Deb! Good to meet you! Yeah, my week's been busy. I finished an electrical engineering project last week - took a lot of work, but it's done now. Anything fun happening for you?"},
  {'speaker': 'Deborah',
   'dia_id': 'D1:3',
   'text': 'Congrats! Last week I visited a place that holds a lot of memories for me. It was my mother`s old house.'},
  {'speaker': 'Jolene',
   'dia_id': 'D1:4',
   'text': 'Why does it hold such special memories for you?'},
  {'speaker': 'Deborah',
   'img_url': ['https://jubileewriter.files.wordpress.com/2021/05/20171019_174608.jpg'],
   'blip_caption': 'a photo of a woman in a wheelchair hugging a woman in a wheelchair',
   'query': 'mother old home

In [10]:
train_data[0]['event_summary']

{'events_session_1': {'Deborah': ["Deborah visits her mother's old home while reflecting on her life."],
  'Jolene': ['Jolene conquers a difficult electrical engineering project.'],
  'date': '23 January, 2023'},
 'events_session_2': {'Deborah': ["Deborah's father passes away suddenly, leaving her and her family shocked and in grief.",
   "Deborah's copes with her father's passing by looking at their family album.",
   'Deborah receives a letter from her community where they appreciate her dedication to teaching yoga.'],
  'Jolene': ['Jolene takes a break from her engineering studies by playing "Walking Dead" video game with her partner.'],
  'date': '27 January, 2023'},
 'events_session_3': {'Deborah': ['Deborah meets a new neighbor, Anna, while doing yoga in the park and befriends them.'],
  'Jolene': ['Jolene receives a challenging but fun robotics projects from her professor to work on.'],
  'date': '1 February, 2023'},
 'events_session_4': {'Deborah': ['Deborah further bonds with 

In [11]:
train_data[0]['observation']

{'session_1_observation': {'Deborah': [['Deborah finished an electrical engineering project last week.',
    'D1:2'],
   ["Deborah visited her mother's old house last week which holds special memories as her mother passed away a few years ago.",
    'D1:3'],
   ['Deborah has a photo with her mother, which is their last photo together.',
    'D1:5'],
   ["Deborah's mother had a special bench near the window in her house where she used to sit every morning to take in the view.",
    'D1:7'],
   ['Deborah has a pendant that reminds her of her mother.', 'D1:9'],
   ["Deborah's goal is to keep teaching yoga and supporting her community to help people find peace and joy.",
    'D1:13'],
   ['Yoga helped Deborah find peace during a rough time, inspiring her to share it with others.',
    'D1:15']],
  'Jolene': [['Jolene finished an electrical engineering project last week.',
    'D1:2'],
   ["Jolene's mother passed away last year.", 'D1:6'],
   ["Jolene has a room in her mother's house where 

In [12]:
train_data[0]['session_summary']

{'session_1_summary': "Deborah and Jolene met at 4:06 pm on 23 January, 2023. They discussed their busy weeks and memories of their deceased mothers. Deborah shared about her mother's old house, while Jolene mentioned her room in her mother's house. They talked about special items that reminded them of their mothers. Jolene discussed a pendant symbolizing freedom, inspiring her to pursue goals. Deborah expressed her goal to teach yoga and bring peace to her community. Yoga helped Deborah find peace during a difficult time. Jolene found Deborah's journey inspiring, and the conversation ended with them looking forward to the next chat.",
 'session_2_summary': "Deborah informed Jolene about her father's recent passing, recounting how she copes with grief by cherishing memories and spending time with family. Jolene expressed sympathy and inquired about how Deborah and her family were coping. They discussed Deborah's parents, her relationship, the impact of yoga on her life, and her special

In [13]:
train_data[0]['sample_id']

'conv-48'

In [15]:
for i in range(len(data)):
    print(data[i]['sample_id'])

conv-26
conv-30
conv-41
conv-42
conv-43
conv-44
conv-47
conv-48
conv-49
conv-50
