In [1]:
%cd code

/opt/ml/project/team/code


In [2]:
import json
import regex as re
from attrdict import AttrDict
from string import Template

from collections import defaultdict
from dataclasses import dataclass
from typing import List, Optional, Union, Dict

from prepare import get_data
from data_utils import load_dataset, get_examples_from_dialogues, split_slot

In [3]:
args = AttrDict(
    train_file_name='train_dials.json',
    data_dir= '/opt/ml/input/data/train_dataset',
    ontology_root='/opt/ml/input/data/train_dataset/edit_ontology_metro.json',
    use_convert_ont=False,
    use_domain_slot='basic',
    use_small_data=False,
)

In [4]:
data, slot_meta, ontology = get_data(args)

using train: train_dials.json


In [5]:
examples = get_examples_from_dialogues(
        data, user_first=False, dialogue_level=True, which='all'
)

HBox(children=(FloatProgress(value=0.0, description='Getting all examples from dialogues', max=7000.0, style=P…




In [6]:
examples[4]

[DSTInputExample(guid='falling-king-2544:택시_1-0', context_turns=[], current_turn=['', '요청할게 있습니다. 인사동에서 모텔 킹까지 가는 고급택시를 찾고있습니다. '], label=['택시-출발지-인사동', '택시-도착지-모텔 킹', '택시-종류-고급'], before_label=[]),
 DSTInputExample(guid='falling-king-2544:택시_1-1', context_turns=['', '요청할게 있습니다. 인사동에서 모텔 킹까지 가는 고급택시를 찾고있습니다. '], current_turn=['안녕하세요. 원하시는 출발시간대와 도착시간대가 있으신가요? ', '13시 20분까지 도착만 하면 됩니다. 출발시간은 아무때나 괜찮습니다. '], label=['택시-출발 시간-dontcare', '택시-출발지-인사동', '택시-도착지-모텔 킹', '택시-도착 시간-13:20', '택시-종류-고급'], before_label=['택시-출발지-인사동', '택시-도착지-모텔 킹', '택시-종류-고급']),
 DSTInputExample(guid='falling-king-2544:택시_1-2', context_turns=['', '요청할게 있습니다. 인사동에서 모텔 킹까지 가는 고급택시를 찾고있습니다. ', '안녕하세요. 원하시는 출발시간대와 도착시간대가 있으신가요? ', '13시 20분까지 도착만 하면 됩니다. 출발시간은 아무때나 괜찮습니다. '], current_turn=['10000원으로 이용하실 수 있는 고급택시가 검색되었습니다. 전화번호는 06837405219 입니다. 더 궁금하신 사항 있으신가요? ', '아니요 감사합니다. '], label=['택시-출발 시간-dontcare', '택시-출발지-인사동', '택시-도착지-모텔 킹', '택시-도착 시간-13:20', '택시-종류-고급'], before_label=['택시-출발 시간-dontcare', '택시-출발지-인사동', '택시-

In [7]:
@dataclass
class TurnFeature:
    guid: str
    current_turn: List[str]
    value: str
    before_states: Dict[str, str]
    
    def __repr__(self):
        return f'''
{self.guid}
VAL: {self.value}
SYS: {self.current_turn[0]}
USR: {self.current_turn[1]}
before: {self.before_states}
'''

In [8]:
guid2examples = {y.guid:y for x in examples for y in x}

In [9]:
def labels_to_dict(label):
    ret = dict()
    for x in label:
        slot, value = split_slot(x, True)
        ret[slot] = value
    return ret

In [10]:
slot2guids = defaultdict(list)
for dialogs in examples:
    for example in dialogs:
        before_label_dict = labels_to_dict(example.before_label)

        for slot_value in example.label:
            if slot_value in example.before_label:
                continue
            slot, value = split_slot(slot_value, True)
            slot2guids[slot].append(
                TurnFeature(example.guid, example.current_turn, value, before_label_dict)
            )

In [11]:
def can_find(pat, turn, no_space):
    if re.search(pat, turn) is not None:
        return True
    if no_space and re.search(''.join(pat.split()), ''.join(turn.split())) is not None:
        return True
    return False

In [12]:
def show_example(example):
    print(f'guid: {example.guid}')
    print()
    print('------------ HISTORY ------------')
    for i, val in enumerate(example.context_turns):
        who = 'SYS' if i % 2 == 0 else 'USR'
        print(f'{who}: {val}')
    print()
    print('------------ CURRENT ------------')
    for i, val in enumerate(example.current_turn):
        who = 'SYS' if i % 2 == 0 else 'USR'
        print(f'{who}: {val}')
        
    print(f'label: {example.labe1l}')
    print(f'before_label: {example.before_label}')

In [13]:
slot_meta

['관광-경치 좋은',
 '관광-교육적',
 '관광-도보 가능',
 '관광-문화 예술',
 '관광-역사적',
 '관광-이름',
 '관광-종류',
 '관광-주차 가능',
 '관광-지역',
 '숙소-가격대',
 '숙소-도보 가능',
 '숙소-수영장 유무',
 '숙소-스파 유무',
 '숙소-예약 기간',
 '숙소-예약 명수',
 '숙소-예약 요일',
 '숙소-이름',
 '숙소-인터넷 가능',
 '숙소-조식 가능',
 '숙소-종류',
 '숙소-주차 가능',
 '숙소-지역',
 '숙소-헬스장 유무',
 '숙소-흡연 가능',
 '식당-가격대',
 '식당-도보 가능',
 '식당-야외석 유무',
 '식당-예약 명수',
 '식당-예약 시간',
 '식당-예약 요일',
 '식당-이름',
 '식당-인터넷 가능',
 '식당-종류',
 '식당-주류 판매',
 '식당-주차 가능',
 '식당-지역',
 '식당-흡연 가능',
 '지하철-도착지',
 '지하철-출발 시간',
 '지하철-출발지',
 '택시-도착 시간',
 '택시-도착지',
 '택시-종류',
 '택시-출발 시간',
 '택시-출발지']

## 메모

택시 출발지: 숙소에서 택시 출발 다 못 찾음

## Normal

In [14]:
special_values = set(['dontcare'])

use_alternative = True
# 일단은 택시 출발지
alternative_values = {
    '숙소':['숙소-이름'],
    '모텔':['숙소-이름'],
    '호텔':['숙소-이름'],
    '식당':['식당-이름'],
    '관광':['관광-이름'],
    
    '거기':['숙소-이름', '관광-이름','식당-이름'], # 사용해도 될까?
}

In [15]:
slot_to_check = '식당-예약 요일'
no_space = True

not_found = []
for x in slot2guids[slot_to_check]:
    if x.value in special_values:
        continue
    found = False
    for turn in x.current_turn:
        pat = x.value
        found |= can_find(pat, turn, no_space)
        
    if use_alternative and not found:
        for turn in x.current_turn:
            for k, vv in alternative_values.items():
                pat = k
                if can_find(pat, turn, no_space):
                    for v in vv:
                        if x.before_states.get(v, '') == x.value:
                            found |= True
        
    if not found:
        not_found.append(x)
print(f'{len(slot2guids[slot_to_check])} -> {len(not_found)}')

4027 -> 301


In [16]:
# show_example(guid2examples['patient-lab-8392:숙소_관광_택시_14-5'])

In [17]:
[slot for slot in  slot_meta if '시간' in slot]

['식당-예약 시간', '지하철-출발 시간', '택시-도착 시간', '택시-출발 시간']

## Time

In [18]:
full_time_formats = [Template('$hour:$mn')]
zero_pad_okay_formats = [Template('$prefix $hour 시 $mn 분'), Template('$prefix $hour 시 $mn')]

zero_min_formats = [Template('$prefix $hour 시')]
half_min_formats = [Template('$prefix $hour 시 반')]

am_formats = ['', '오전', '낮'] # 낮 1시는 언제 , 밤 1시는 언제 오히려 밤 1시가 01이고 낮 1시는 13 이거는 예외처리 필요
pm_formats =['오후', '저녁', '밤']

time2word = '영 한 두 세 네 다섯 여섯 일곱 여덟 아홉 열 열한 열두'.split()

ten2word = '영 일 이 삼 사 오 육 칠 팔 구'.split()

def min2word(mn):
    word = ''
    if mn[0] >= '2':
        word += ten2word[int(mn[0])]
    if mn[0] >= '1':
        word += '십'
    if mn[1] >= '1':
        word += ten2word[int(mn[1])]
    
    if mn == '00':
        word = '영'
    return word

In [19]:
slot_to_check = '택시-출발 시간'
no_space = True

not_found = []
for x in slot2guids[slot_to_check]:
    if x.value in special_values:
        continue
        
    hour, mn = x.value.split(':')
    zero_pad_list = [(hour, mn)]
    if hour[0] == '0':
        zero_pad_list.append((hour[1], mn))
    if mn[0] == '0' and mn != '00':
        zero_pad_list.append((hour, mn[1]))
    if hour[0] == '0' and mn[0] == '0' and mn != '00':
        zero_pad_list.append((hour[1], mn[1]))

    found = False
    for turn in x.current_turn:
        for pat_format in full_time_formats:
            pat = pat_format.substitute(hour=hour, mn=mn)
            found |= can_find(pat, turn, no_space)
            
            if hour[0] == '0':
                pat = pat_format.substitute(hour=hour[1], mn=mn)
                found |= can_find(pat, turn, no_space)
            
        for pat_format in zero_pad_okay_formats:
            for z_hour, z_mn in zero_pad_list:
                pat = pat_format.substitute(prefix='', hour=z_hour, mn=z_mn)
                found |= can_find(pat, turn, no_space)
                
                if int(z_hour) <= 11:
                    for am in am_formats:
                        pat = pat_format.substitute(prefix=am, hour=z_hour, mn=z_mn)
                        found |= can_find(pat, turn, no_space)
                        
                        pat = pat_format.substitute(prefix=am, hour=time2word[int(hour)%12], mn=z_mn)
                        found |= can_find(pat, turn, no_space)
                        
                        wmn = min2word(mn)
                        pat = pat_format.substitute(prefix=am, hour=z_hour, mn=wmn)
                        found |= can_find(pat, turn, no_space)
                        
                        pat = pat_format.substitute(prefix=am, hour=time2word[int(hour)%12], mn=wmn)
                        found |= can_find(pat, turn, no_space)

                if int(z_hour) >= 13:
                    for pm in pm_formats:
                        pat = pat_format.substitute(prefix=pm, hour=int(z_hour)-12, mn=z_mn)
                        found |= can_find(pat, turn, no_space)
                        
                        wmn = min2word(mn)
                        pat = pat_format.substitute(prefix=pm, hour=z_hour, mn=wmn)
                        found |= can_find(pat, turn, no_space)
                        
                        pat = pat_format.substitute(prefix=pm, hour=time2word[int(hour)%12], mn=wmn)
                        found |= can_find(pat, turn, no_space)
        
        if mn == '00':
            for pat_format in zero_min_formats:
                pat = pat_format.substitute(prefix='',hour=hour, mn=mn)
                found |= can_find(pat, turn, no_space)
                
                if hour[0] == '0':
                    pat = pat_format.substitute(prefix='',hour=hour[1], mn=mn)
                    found |= can_find(pat, turn, no_space)
                
                if int(hour) <= 11:
                    for am in am_formats:
                        pat = pat_format.substitute(prefix=am, hour=hour, mn=mn)
                        found |= can_find(pat, turn, no_space)
                        
                        pat = pat_format.substitute(prefix=am, hour=time2word[int(hour)%12], mn=mn)
                        found |= can_find(pat, turn, no_space)
                if int(hour) >= 13:
                    for pm in pm_formats:
                        pat = pat_format.substitute(prefix=pm, hour=int(hour)-12, mn=mn)
                        found |= can_find(pat, turn, no_space)
                        
                        pat = pat_format.substitute(prefix=pm, hour=time2word[int(hour)%12], mn=mn)
                        found |= can_find(pat, turn, no_space)
            
        if mn == '30':
            for pat_format in half_min_formats:
                pat = pat_format.substitute(prefix='',hour=hour, mn=mn)
                found |= can_find(pat, turn, no_space)
                
                if hour[0] == '0':
                    pat = pat_format.substitute(prefix='',hour=hour[1], mn=mn)
                    found |= can_find(pat, turn, no_space)
                    
                pat = pat_format.substitute(prefix='',hour=time2word[(int(hour)-1)%12+1], mn=mn)
                found |= can_find(pat, turn, no_space)
                
                if int(hour) <= 11:
                    for am in am_formats:
                        pat = pat_format.substitute(prefix=am, hour=z_hour, mn=z_mn)
                        found |= can_find(pat, turn, no_space)
                if int(hour) >= 13:
                    for pm in pm_formats:
                        pat = pat_format.substitute(prefix=pm, hour=int(z_hour)-12, mn=z_mn)
                        found |= can_find(pat, turn, no_space)
        
    if not found:
        not_found.append(x)
print(f'{len(slot2guids[slot_to_check])} -> {len(not_found)}')

2935 -> 21


# Filters

## Time Filter

In [434]:
def filter_no_str(matches):
    if len(matches) == 0:
        return matches
    if not isinstance(matches[0], tuple):
        return [matches]
    return [[x.strip() for x in match if len(x.strip()) > 0] for match in matches]

In [530]:
time2word = '영 한 두 세 네 다섯 여섯 일곱 여덟 아홉 열 열한 열두'.split()
word2index = {x:idx for idx, x in enumerate(time2word)}

time_zone = [
    '오후',
    '오전',
    '저녁',
    '낮',
    '밤',
    '새벽',
]
time_zone_pat = '|'.join(time_zone)

def solve_time(time_zone_info, hour):
    time_zone_info = time_zone_info.strip()
    if time_zone_info == '오후':
        if hour < 12:
            hour += 12
    elif time_zone_info == '오전':
        pass
    elif time_zone_info == '저녁':
        if hour < 12:
            hour += 12
    elif time_zone_info == '낮':
        if hour < 5:
            hour += 12

    elif time_zone_info == '밤':
        if hour < 12:
            hour += 12
    elif time_zone_info == '새벽':
        pass
    else:
        print('not found: ',time_zone_info)    
        raise NotImplementedError()
    
    return hour

    
ten2word = '영 일 이 삼 사 오 육 칠 팔 구'.split()
ten2word_dict = {x:idx for idx, x in enumerate(ten2word)}

prefix_ten = '|'.join('영 일 이 삼 사 오 육'.split())


def word2min(word):
    word = word.strip()
    
    size = 0
    stack = []
    for w in word:
        if w == '십':
            count = 10
        else:
            count = ten2word_dict[w]
        if size == 0 or stack[-1] >= count:
            stack.append(count)
            size += 1
        else:
            stack[-1] *= count
            
    return sum(stack)
        
def min2word(mn):
    word = ''
    if mn[0] >= '2':
        word += ten2word[int(mn[0])]
    if mn[0] >= '1':
        word += '십'
    if mn[1] >= '1':
        word += ten2word[int(mn[1])]
    
    if mn == '00':
        word = '영'
    return word
    

patterns = [
    r'(\d\d?)\s*:\s*(\d\d)',          # 09:23, 2:23
    fr'((?:{time_zone_pat})\s*)?(\d\d?)\s*시\s*(\d\d?)\s*분?',   # 9시 23분, 09시     2분, 4시4
    fr'((?:{time_zone_pat})\s*)?({"|".join(time2word)})\s*시\s*(\d\d?)\s*분?',
    fr'((?:{time_zone_pat})\s*)?(\d\d?)\s*시\s*((?:(?:(?:{prefix_ten})?십)(?:{ten2word})|(?:(?:{prefix_ten})?십)|(?:{ten2word})))\s*분',
    fr'((?:{time_zone_pat})\s*)?({"|".join(time2word)})\s*시\s*((?:(?:(?:{prefix_ten})?십)(?:{ten2word})|(?:(?:{prefix_ten})?십)|(?:{ten2word})))\s*분'
]

half_patterns = [
    fr'((?:{time_zone_pat})\s*)?(\d\d?)\s*시\s*(?:반|30분?)',
    fr'((?:{time_zone_pat})\s*)?({"|".join(time2word)})\s*시\s*(?:반|30\s*분?)', 
]

hour_only_patterns = [
    fr'((?:{time_zone_pat})\s*)?(\d\d?)\s*시',
    fr'((?:{time_zone_pat})\s*)?({"|".join(time2word)})\s*시',
]

pat = '|'.join(patterns)
half_pat = '|'.join(half_patterns)
hour_only_pat = '|'.join(hour_only_patterns)

In [550]:
slot_to_check = '식당-예약 시간'
no_space = True


sys_turn = '안녕하세요. 서울 송파구에 위치한 랍스터를 판매하는 누가또가재는 어떠세요?'
usr_turn = '안그래도 랍스터를 먹은지 진짜 오래됐는데.. 추천해주신김에 한번 방문해봐야겠어요. 금요일에 20시20분으로 2명 예약 가능할까요?'
slots_to_check = ['식당-예약 시간-06:12']

# slots_to_check = time slots only
def filter_time(sys_turn, usr_turn, slots_to_check, no_space=True):
    turns = [sys_turn, usr_turn]
    not_found = []
    for slot_value in slots_to_check:
        domain, value = split_slot(slot_value, True)
        if value in special_values:
            continue
            
            
        pat = value
        found = False
        for turn in turns:
            found |= can_find(pat, turn, no_space)

        if not found:
            not_found.append(slot_value)
    return len(not_found) == 0, not_found
        
    
filter_time(sys_turn, usr_turn, slots_to_check)


def filter_by_time(turn, value):
    if value in special_values:
        return False
    
    # filter by h:m
    matches = re.findall(pat, turn, overlapped=False)
    matches = filter_no_str(matches)
    for match in matches:
        if len(match) < 2:
            continue

        if len(match) == 3:
            if match[1] in word2index:
                match[1] = word2index[match[1]]
            match[0] = solve_time(match[0], int(match[1]))
            match[1] = match[2]
        else:
            if match[0] in word2index:
                match[0] = word2index[match[0]]

        if not ('0' <= match[1].strip()[0] <= '9'):
            match[1] = word2min(match[1])

        if f'{match[0]:0>2}:{match[1]:0>2}' == value:
            return True

    # filter by h:30
    half_matches = re.findall(half_pat, turn, overlapped=False)
    half_matches = filter_no_str(half_matches)
    for match in half_matches:
        if len(match) == 2:
            if match[1] in word2index:
                match[1] = word2index[match[1]]
            match[0] = solve_time(match[0], int(match[1]))
        else:
            if match[0] in word2index:
                match[0] = word2index[match[0]]

        if f'{match[0]:0>2}:30' == value:
            return True

    # filter by h:00
    hour_only_matches = re.findall(hour_only_pat, turn, overlapped=False)
    hour_only_matches = filter_no_str(hour_only_matches)
    for match in hour_only_matches:
        if len(match) == 2:
            if match[1] in word2index:
                match[1] = word2index[match[1]]
            match[0] = solve_time(match[0], int(match[1]))
        else:
            if match[0] in word2index:
                match[0] = word2index[match[0]]

        if f'{match[0]:0>2}:00' == value:
            return True
        
    return False

## default filter

In [552]:
special_values = set(['dontcare']) # 이거는 해결 못함

no_space = True

sys_turn = '안녕하세요. 서울 송파구에 위치한 랍스터를 판매하는 누가또가재는 어떠세요?'
usr_turn = '안그래도 랍스터를 먹은지 진짜 오래됐는데.. 추천해주신김에 한번 방문해봐야겠어요. 금요일에 20시20분으로 2명 예약 가능할까요?'
slots_to_check = ['식당-예약 시간-20:20', '식당-예약 요일-금요일', '식당-이름-누가또가재']

time_slots = set(['식당-예약 시간', '지하철-출발 시간', '택시-도착 시간', '택시-출발 시간'])
def filter_normal(sys_turn, usr_turn, slots_to_check, no_space=True):
    turns = [sys_turn, usr_turn]
    not_found = []
    for slot_value in slots_to_check:
        domain, value = split_slot(slot_value, True)
        if value in special_values:
            continue
            
        found = False
        if domain in time_slots:
            for turn in turns:
                found |= filter_by_time(turn, value)
        else:
            pat = value
            for turn in turns:
                found |= can_find(pat, turn, no_space)

        if not found:
            not_found.append(slot_value)
    return len(not_found) == 0, not_found
        
    
filter_normal(sys_turn, usr_turn, slots_to_check)

(True, [])

## Archive

In [542]:
def filter_no_str(matches):
    if len(matches) == 0:
        return matches
    if not isinstance(matches[0], tuple):
        return [matches]
    return [[x.strip() for x in match if len(x.strip()) > 0] for match in matches]

In [None]:
time2word = '영 한 두 세 네 다섯 여섯 일곱 여덟 아홉 열 열한 열두'.split()
word2index = {x:idx for idx, x in enumerate(time2word)}

time_zone = [
    '오후',
    '오전',
    '저녁',
    '낮',
    '밤',
    '새벽',
]
time_zone_pat = '|'.join(time_zone)

def solve_time(time_zone_info, hour):
    time_zone_info = time_zone_info.strip()
    if time_zone_info == '오후':
        if hour < 12:
            hour += 12
    elif time_zone_info == '오전':
        pass
    elif time_zone_info == '저녁':
        if hour < 12:
            hour += 12
    elif time_zone_info == '낮':
        if hour < 5:
            hour += 12

    elif time_zone_info == '밤':
        if hour < 12:
            hour += 12
    elif time_zone_info == '새벽':
        pass
    else:
        print('not found: ',time_zone_info)    
        raise NotImplementedError()
    
    return hour

    
ten2word = '영 일 이 삼 사 오 육 칠 팔 구'.split()
ten2word_dict = {x:idx for idx, x in enumerate(ten2word)}

prefix_ten = '|'.join('영 일 이 삼 사 오 육'.split())


def word2min(word):
    word = word.strip()
    
    size = 0
    stack = []
    for w in word:
        if w == '십':
            count = 10
        else:
            count = ten2word_dict[w]
        if size == 0 or stack[-1] >= count:
            stack.append(count)
            size += 1
        else:
            stack[-1] *= count
            
    return sum(stack)
        
def min2word(mn):
    word = ''
    if mn[0] >= '2':
        word += ten2word[int(mn[0])]
    if mn[0] >= '1':
        word += '십'
    if mn[1] >= '1':
        word += ten2word[int(mn[1])]
    
    if mn == '00':
        word = '영'
    return word
    

patterns = [
    r'(\d\d?)\s*:\s*(\d\d)',          # 09:23, 2:23
    fr'((?:{time_zone_pat})\s*)?(\d\d?)\s*시\s*(\d\d?)\s*분?',   # 9시 23분, 09시     2분, 4시4
    fr'((?:{time_zone_pat})\s*)?({"|".join(time2word)})\s*시\s*(\d\d?)\s*분?',
    fr'((?:{time_zone_pat})\s*)?(\d\d?)\s*시\s*((?:(?:(?:{prefix_ten})?십)(?:{ten2word})|(?:(?:{prefix_ten})?십)|(?:{ten2word})))\s*분',
    fr'((?:{time_zone_pat})\s*)?({"|".join(time2word)})\s*시\s*((?:(?:(?:{prefix_ten})?십)(?:{ten2word})|(?:(?:{prefix_ten})?십)|(?:{ten2word})))\s*분'
]

half_patterns = [
    fr'((?:{time_zone_pat})\s*)?(\d\d?)\s*시\s*(?:반|30분?)',
    fr'((?:{time_zone_pat})\s*)?({"|".join(time2word)})\s*시\s*(?:반|30\s*분?)', 
]

hour_only_patterns = [
    fr'((?:{time_zone_pat})\s*)?(\d\d?)\s*시',
    fr'((?:{time_zone_pat})\s*)?({"|".join(time2word)})\s*시',
]

pat = '|'.join(patterns)
half_pat = '|'.join(half_patterns)
hour_only_pat = '|'.join(hour_only_patterns)

In [538]:
slot_to_check = '식당-예약 시간'
no_space = True

old_found = not_found
not_found = []
for x in slot2guids[slot_to_check]:
# for x in [use_found[3]]:
# for x in old_found:
    if x.value in special_values:
        continue
        
    found = False
    for turn in x.current_turn:
        matches = re.findall(pat, turn, overlapped=False)

        matches = filter_no_str(matches)
        for match in matches:
            if len(match) < 2:
                continue
                
            if len(match) == 3:
                if match[1] in word2index:
                    match[1] = word2index[match[1]]
                match[0] = solve_time(match[0], int(match[1]))
                match[1] = match[2]
            else:
                if match[0] in word2index:
                    match[0] = word2index[match[0]]
                    
            if not ('0' <= match[1].strip()[0] <= '9'):
                match[1] = word2min(match[1])
                    
            found |= (f'{match[0]:0>2}:{match[1]:0>2}' == x.value)
        
        half_matches = re.findall(half_pat, turn, overlapped=False)
        half_matches = filter_no_str(half_matches)
        for match in half_matches:
            if len(match) == 2:
                if match[1] in word2index:
                    match[1] = word2index[match[1]]
                match[0] = solve_time(match[0], int(match[1]))
            else:
                if match[0] in word2index:
                    match[0] = word2index[match[0]]

            found |= (f'{match[0]:0>2}:30' == x.value)
            
        hour_only_matches = re.findall(hour_only_pat, turn, overlapped=False)
        hour_only_matches = filter_no_str(hour_only_matches)
        for match in hour_only_matches:
            if len(match) == 2:
                if match[1] in word2index:
                    match[1] = word2index[match[1]]
                match[0] = solve_time(match[0], int(match[1]))
            else:
                if match[0] in word2index:
                    match[0] = word2index[match[0]]

            found |= (f'{match[0]:0>2}:00' == x.value)

    if not found:
        not_found.append(x)
        
print(f'{len(slot2guids[slot_to_check])} -> {len(not_found)}')

[
 autumn-mountain-9993:식당_관광_지하철_14-1
 VAL: 03:30
 SYS: 안녕하세요? 서울 광희동에 위치한 평점 4점의 어차피자라는 곳은 어떠신가요?
 USR: 피자집인거죠? 괜찮네요. 저 여기로 금요일 3시에 4명으로 예약 부탁드립니다.
 before: {'식당-가격대': '적당', '식당-지역': 'dontcare', '식당-종류': '양식당'},
 
 dark-resonance-0485:식당_택시_8-2
 VAL: 15:00
 SYS: 네 그러면 외계인의맛집과 만칠천무한리필을 추천 드리는데 어디로 예약 도와드릴까요?
 USR: 무한리필은 맛없으니깐 외계인의맛집으로 할께요. 예약 하고 식당에 10시까지 도착하는 택시도 같이 불러주세요.
 before: {'식당-가격대': 'dontcare', '식당-지역': '서울 중앙', '식당-종류': '한식당', '식당-흡연 가능': 'yes'},
 
 tight-sky-0988:숙소_식당_택시_10-4
 VAL: 23:20
 SYS: 혹시 서울 강서구에 위치한 스위소보루라는 곳은 어떠세요?
 USR: 음. 방금 사진을 찾아봤는데 굉장히 먹음직스럽네요. 화요일 23시30분에 여기로 4명 예약해주세요.
 before: {'숙소-가격대': 'dontcare', '숙소-종류': 'dontcare', '숙소-지역': '서울 중앙', '숙소-예약 요일': '일요일', '숙소-예약 명수': '4', '숙소-예약 기간': '3', '숙소-이름': '동키 게스트 하우스', '식당-가격대': '저렴', '식당-지역': 'dontcare', '식당-종류': '베이커리'},
 
 falling-king-5536:식당_숙소_택시_15-3
 VAL: 14:30
 SYS: 슈크림 빵입니다.
 USR: 그럼 화요일 2시 반에 한 명으로 예약해 주세요.
 before: {'식당-가격대': 'dontcare', '식당-지역': '서울 동쪽', '식당-종류': '베이커리'},
 
 withered-queen-5280:숙소

In [522]:
matches

[]