# Importing packages

In [1]:
import spacy
import json
import pandas as pd
import re

# Obtaining JSON file

In [2]:
floors = []
for line in open('final.json', 'r'):
    floors.append(json.loads(line))

In [3]:
nlp = spacy.load('en_core_web_sm')

# Getting connections from the annotations

In [4]:
list_of_connections = []
for dic in floors:
    connections = []
    for annot_dic in dic['annotation']:
        if annot_dic['label'] == 'CONNECTION':
            connections.append(annot_dic['text'])
    list_of_connections.append(connections)

# Reformating list_of_connections to a new list

In [5]:
new_list_connections = []
for floor in list_of_connections:
    connections = []
    for room in floor:
        temp = nlp(room)
        for sent in temp.sents:
            connections.append(sent.text)
    new_list_connections.append(connections)

# Initializing a dictionary of 50 dataframes

In [6]:
floor_dictionary = {}
for i in range(50):
    floor_dictionary['floor'+str(i+1)] = pd.DataFrame(columns=['RoomType','Link'])

# Checking pattern if rooms are connected

In [7]:
def check_pattern(sent):
    doc = nlp(sent)

    expression = r"\w+(\s*,\s*\w+)*\s*and\s*(\w+\s*are\s*connected)\.?"
    for match in re.finditer(expression, doc.text):
        start, end = match.span()
        span = doc.char_span(start, end)
        # This is a Span object or None if match doesn't map to valid token sequence
        if span is not None:
            return 1
    return 0

# If rooms are connected, adding to the corresponding dataframe

In [8]:
def add_pattern_connected(floor_df,sent):
    doc = nlp(sent)
    lst = []
    for token in doc:
        if token.pos_ == 'PROPN' or token.pos_ == 'NOUN' or token.tag_ == 'VB':
            lst.append(token.text)
    for i in range(len(lst)):
        if i == len(lst) - 1:
            floor_df = floor_df.append({'RoomType':lst[i],'Link':lst[0]}, ignore_index=True)
        else:
            floor_df = floor_df.append({'RoomType':lst[i],'Link':lst[i+1]}, ignore_index=True)
    return floor_df

# If rooms are adjacent/next to, adding to the corresponding dataframe

In [9]:
def add_pattern_adjacent(floor_df, sent):
    doc = nlp(sent)

    expression = r"\w+\s*is\s*\w+\s*to\s*\w+(\s*,\s*\w+)*\.?"
    for match in re.finditer(expression, doc.text):
        start, end = match.span()
        span = doc.char_span(start, end)
        # This is a Span object or None if match doesn't map to valid token sequence
        if span is not None:
            doc = nlp(sent)
            lst = []
            for token in doc:
                if token.pos_ == 'PROPN' or token.pos_ == 'NOUN' or token.tag_ == 'VB':
                    lst.append(token.text)
            for i in range(len(lst)):
                if i != len(lst)-1:
                    floor_df = floor_df.append({'RoomType':lst[0],'Link':lst[i+1]}, ignore_index=True)
    return floor_df

# The code that runs the functions

In [10]:
for i in range(50):
    for l in range(len(new_list_connections[i])):
        pattern = check_pattern(new_list_connections[i][l])
        if pattern:
            floor_dictionary['floor'+str(i+1)] = add_pattern_connected(floor_dictionary['floor'+str(i+1)],new_list_connections[i][l])
        else:
            floor_dictionary['floor'+str(i+1)] = add_pattern_adjacent(floor_dictionary['floor'+str(i+1)],new_list_connections[i][l])
            

### To cross check results please type `new_list_connections[0]` and `floor_dictionary['floor1']` for 1st floor as shown below.

In [11]:
new_list_connections[0]

['bedroom1, bedroom2, kitchen1, balcony1, washroom1, washroom2, balcony2 and livingroom1 are connected',
 'bedroom1 is adjacent to balcony1, washroom1',
 'bedroom2 is next to washroom2, balcony2',
 'kitchen1 is adjacent to balcony2',
 'washroom2 and washroom1 are connected']

In [12]:
floor_dictionary['floor1']

Unnamed: 0,RoomType,Link
0,bedroom1,bedroom2
1,bedroom2,kitchen1
2,kitchen1,balcony1
3,balcony1,washroom1
4,washroom1,washroom2
5,washroom2,balcony2
6,balcony2,livingroom1
7,livingroom1,bedroom1
8,bedroom1,balcony1
9,bedroom1,washroom1
