In [1]:
from nltk.parse.generate import generate
from nltk import CFG
from collections import OrderedDict, defaultdict
import re

In [2]:
data = {0: ["shelter/NN", "naik/VBI", "turun/VBI", "kampung/NN", "pantai/NN", "marina/NN", "pelangi/NN", "jalan/NN", "simpang/NN", "deket/JJ"],
        1: ["simpang/NN", "arus/NN", "menit/NN", "lintas/NN", "lalu/JJ", "tugu/NN", "muda/JJ", "parkir/NN", "kawasan/NN", "bawah/NN"],
        2: ["semarang/NN", "simpang/NN", "bhayangkara/NN", "lapangan/NN", "jateng/NN", "hut/NN", "daerah/NN", "gue/NN", "hari/NN", "rakyat/NN"],
        3: ["semarang/NN", "canyon/NN", "brown/NN", "simpang/NN", "pak/NN", "melalui/VBT", "lokasi/NN", "kota/NN", "sih/NN", "jasa/NN"],
        4: ["lawang/NN", "sewu/NN", "sama/JJ", "foto/NN", "jadi/JJ", "film/NN", "orang/NN", "nonton/NN", "resa/NN", "depan/NN"],
        5: ["lawang/NN", "sewu/NN", "semarang/NN", "jawa/NN", "tengah/NN", "indonesia/NN", "api/NN", "kereta/NN", "kantor/NN", "ada/VBI"],
        6: ["bawah/NN", "tempat/NN", "lantai/NN", "tanah/NN", "ruang/NN", "utama/JJ", "basement/NN", "air/NN", "guide/NN", "depan/NN"],
        7: ["sewu/NN", "lawang/NN", "dibangun/VBT", "tahun/NN", "bangunan/NN", "belanda/NN", "memiliki/VBT", "ning/NNP", "salah/JJ", "aku/NN"],
        8: ["kota/NN", "koridor/NN", "simpang/NN", "semarang/NN", "sewu/NN", "lawang/NN", "lama/JJ", "sam/NN", "kong/NN", "poo/NN"],
        9: ["simpang/NN", "kediri/NN", "gumul/NN", "kabupaten/NN", "budaya/NN", "pekan/NN", "jalan/NN", "pariwisata/NN", "radio/NN", "monumen/NN"],
        10: ["pecinan/NN", "kota/NN", "rumah/NN", "magelang/NN", "semanis/NN", "jam/NN", "aja/NN", "salah/JJ", "gak/NN", "keep/NN"]
       }

In [3]:
NP = ['NN', 'NNP', 'NNG', 'NN DT', 'DT NNP', 'DT NNG', 'NN JJP', 'NNP JJP', 'NP CC NP']
VP = ['VBI NP', 'VBI IN NP', 'VBI NP PP', 'VBT', 'VBT NP', 'PP', 'JJP']

In [4]:
def get_story_tag(dict_tag_words):
    result = []
    for key in dict_tag_words:
        result.append(key)
    return result

In [5]:
def create_rules_tag(list_tag):
    result = list_tag[0]
    for (index, elem) in list(enumerate(list_tag))[1:]:
        result = result + ' | ' + elem
    return result

def create_rules_word(list_tag):
    result = repr(str(list_tag[0]))
    for (index, elem) in list(enumerate(list_tag))[1:]:
        result = result + ' | ' + repr(str(elem))
    return result

In [6]:
def generate_base_grammar(list_tag):
    S = 'S -> NP VP'
    PP = 'PP -> IN NP'
    if 'JJ' in list_tag:
        NP = 'NP -> NP CC NP | ' + create_rules_tag(generate_NP(list_tag))
        if check_VP(list_tag):
            VP = 'VP -> PP | JJP | ' + create_rules_tag(generate_VP(list_tag))
        else:
            VP = 'VP -> PP | JJP'
        JJP = 'JJP -> JJ | JJ CC JJ'
        return '\n'.join([S, NP, VP, PP, JJP])
    else:
        NP = 'NP -> NP CC NP | ' + create_rules_tag(remove_JJP(generate_NP(list_tag)))
        if check_VP(list_tag):
            VP = 'VP -> PP | ' + create_rules_tag(remove_JJP(generate_VP(list_tag)))
        else:
            VP = 'VP -> PP'
        return '\n'.join([S, NP, VP, PP])

def check_VP(list_tag):
    for tag in list_tag:
        if 'V' in tag:
            return True
    return False
    
def generate_NP(list_tag):
    result = []
    for tag in list_tag:
        for words in NP:
            if re.search(r'\b' + tag + r'\b', words):
                result.append(words)
    return list(OrderedDict.fromkeys(result))

def generate_VP(list_tag):
    result = []
    for tag in list_tag:
        for words in VP:
            if re.search(r'\b' + tag + r'\b', words):
                result.append(words)
    return list(OrderedDict.fromkeys(result))

def remove_JJP(list_tag):
    result = []
    for tag in list_tag:
        if 'JJ' in tag:
            continue
        else:
            result.append(tag)
    return result

In [7]:
def generate_words_grammar(dict_topic):
    result = ''
    IN = "IN -> 'di' | 'ke' | 'dari'"
    DT = "DT -> 'ini' | 'itu'"
    CC = "CC -> 'dan' | 'atau'"
    for key, value in dict_topic.items():
        result = result + '\n' + key + ' -> ' + create_rules_word(value)
    return result + '\n' + '\n'.join([IN, DT, CC])

In [8]:
def organize_words_by_tag(data):
    story = defaultdict(list)
    i = []
    for s in data:
        word = s.split('/')[0]
        tag = s.split('/')[1]
        story[tag].append(word)
    return dict(story)

In [9]:
def create_grammar(data):
    dict_data = organize_words_by_tag(data)
    story_tag = get_story_tag(dict_data)
    return generate_base_grammar(story_tag) + generate_words_grammar(dict_data)

In [10]:
print(create_grammar(data[0]))

S -> NP VP
NP -> NP CC NP | NN | NN DT | NN JJP
VP -> PP | JJP | VBI NP | VBI IN NP | VBI NP PP
PP -> IN NP
JJP -> JJ | JJ CC JJ
NN -> 'shelter' | 'kampung' | 'pantai' | 'marina' | 'pelangi' | 'jalan' | 'simpang'
VBI -> 'naik' | 'turun'
JJ -> 'deket'
IN -> 'di' | 'ke' | 'dari'
DT -> 'ini' | 'itu'
CC -> 'dan' | 'atau'


In [11]:
grammar = CFG.fromstring(create_grammar(data[0]))
print(grammar)

Grammar with 30 productions (start state = S)
    S -> NP VP
    NP -> NP CC NP
    NP -> NN
    NP -> NN DT
    NP -> NN JJP
    VP -> PP
    VP -> JJP
    VP -> VBI NP
    VP -> VBI IN NP
    VP -> VBI NP PP
    PP -> IN NP
    JJP -> JJ
    JJP -> JJ CC JJ
    NN -> 'shelter'
    NN -> 'kampung'
    NN -> 'pantai'
    NN -> 'marina'
    NN -> 'pelangi'
    NN -> 'jalan'
    NN -> 'simpang'
    VBI -> 'naik'
    VBI -> 'turun'
    JJ -> 'deket'
    IN -> 'di'
    IN -> 'ke'
    IN -> 'dari'
    DT -> 'ini'
    DT -> 'itu'
    CC -> 'dan'
    CC -> 'atau'


In [12]:
for sentence in generate(grammar, n=5):
    print(' '.join(sentence))

AttributeError: 'RecursionError' object has no attribute 'message'

In [None]:
grammar.productions()

In [None]:
for sentence in generate(grammar, depth=5):
    print(' '.join(sentence))

In [None]:
for sentence in generate(grammar):
    print(' '.join(sentence))

In [None]:
a = ['JJP', 'PP']
for i in a:
    if 'JJ' in i:
        print(i)

In [4]:
import random

foo = ['a', 'b', 'c', 'd', 'e']
print(random.choice(foo))

d
