#SYSC 5906:
## **Room Detection - Dataset Processor: PKL**

---
Script to process a subset of the MIT Indoor Scenes dataset. This generates a new dataset to train a room/environment classifier, this version includes X/Y coordinates for each object and only includes a subset of the room types.

This script **outputs the data as a .pkl file**, for use with Keras/TF.

###Step 1: Access Drive
Mount the drive with the provided .zip file of code located in it

In [None]:
#Enter the gdrive
from google.colab import drive
drive.mount('/gdrive',force_remount=True)

Mounted at /gdrive


In [None]:
#Install missing libraries
%%capture 
!pip install pandas_read_xml

##Step 2: Setup
Import relevenat libraries, load original MIT dataset into colabs (without the images) and get the folders inside

In [None]:
import pandas as pd
import xml.etree.ElementTree as et
import os
import pickle
from pathlib import Path
import glob
import csv
import numpy as np
from collections import Counter
import pandas_read_xml as pdx
from pandas_read_xml import flatten, fully_flatten, auto_separate_tables

DATASET_DIRECTORY = '/gdrive/My Drive/Colab Notebooks/SYSC 5906/datasets/mit_indoors/indoorCVPR_09annotations/Annotations/'
PICKLE_DIRECTORY = '/gdrive/My Drive/Colab Notebooks/SYSC 5906/datasets/mit_indoors/processed/data_subset/'

In [None]:
#List of desired rooms to create a subset of the MIT dataset
#desiredRooms = ['bathroom','bedroom','children_room','dining_room','corridor','garage','livingroom','kitchen','office','pantry','computerroom','staircase','closet']
desiredRooms = ['bathroom','bedroom','dining_room','corridor','livingroom','kitchen','office']


In [None]:
#Empty list for folders
folderList = []

#Read XML's from dataset folder
with os.scandir(DATASET_DIRECTORY) as entries:
    for entry in entries:
        #Keep only a subset of the rooms in the dataset
        if entry.name in desiredRooms:
            folderList.append(entry.name)
            #print(entry.name)

#Generate dataframe with folder names
folderDF = pd.DataFrame(folderList)

##Step 3: Parse XML
Extract object data from .xml's in the MIT dataset

In [None]:
#List for each instance of a object in the dataset
listOfAllObj = []
numFiles = 0

#List of all unique instances of a object
uniqueObjs = set()
objectMap = []

#List of all unique room tpyes (folders)
roomNames = set()
roomMap = []

#Cycle through collected folders
for folder in folderDF.iloc[:,0]:
    roomNames.add(folder) #Record all of the possible room names
    roomMap.append(folder)
    for file in os.listdir(DATASET_DIRECTORY+folder):
        #print(folder + " ---- " + file)
        if file.endswith("xml"): 
            numFiles += 1
            
            #Create a list to hold objects temporarily
            fileObj = []

            #Add room label integer to instance
            fileObj.append(roomMap.index(folder))
            
            #Open the xml
            curXML = open(DATASET_DIRECTORY+folder+"/"+file,"r",
                          encoding="UTF-8",errors='xmlcharrefreplace')
            fileContents = curXML.read()
            
            #Create parser to run through the xml
            eTreeParser = et.XMLParser()
            
            #Create a element tree from the xml
            etree = et.fromstring(fileContents, parser=eTreeParser)
  
            #Run through all of the object tags inside the element tree
            curObjs = etree.findall(".//object")
            for obj in curObjs:
                #Extract X-Y coordinates for each object
                objStr = obj.findall("name")[0].text
                pts = obj.findall("polygon")[0].findall("pt")
                x_min = 2000
                y_min = 2000
                x_max = 0
                y_max = 0
                for pt in pts:
                    x_val = int(pt.findall("x")[0].text)
                    y_val = int(pt.findall("y")[0].text)
                    x_max = max(x_max,x_val)
                    x_min = min(x_min,x_val)
                    y_max = max(y_max,y_val)
                    y_min = min(y_min,y_val)
                #Calculate centroid of current obj
                y_avg = (y_max - y_min)/2
                x_avg = (x_max - x_min)/2
                cleanObjStr = objStr.strip('\n')
                
                #Replace object string with integer
                if cleanObjStr not in uniqueObjs:
                    objectMap.append(cleanObjStr)
                
                #Store object name and position (centroid)
                obj_tuple = [objectMap.index(cleanObjStr),x_avg,y_avg]

                fileObj.append(obj_tuple)
                
                uniqueObjs.add(cleanObjStr)
                
        #Store list of all objects from the current file
        listOfAllObj.append(fileObj)
        
    print("Finished processing " + folder +" folder!")  

Finished processing corridor folder!
Finished processing bedroom folder!
Finished processing livingroom folder!
Finished processing dining_room folder!
Finished processing kitchen folder!
Finished processing bathroom folder!
Finished processing office folder!


In [None]:
print(listOfAllObj)

[[0, [0, 15.0, 41.5], [0, 5.5, 64.0], [0, 13.0, 66.0], [0, 17.0, 50.0], [0, 19.0, 61.0]], [0, [1, 98.5, 63.0], [2, 23.5, 110.5], [3, 96.0, 39.0], [4, 76.5, 127.5], [4, 31.5, 127.5], [5, 13.5, 11.0], [5, 8.0, 8.5], [5, 5.5, 4.5], [5, 3.5, 2.5], [5, 2.5, 1.5], [2, 11.5, 119.5], [2, 8.0, 58.5], [6, 3.0, 7.0], [7, 2.5, 31.5], [8, 11.5, 20.0], [2, 5.0, 67.5]], [0, [2, 7.5, 26.5], [2, 11.5, 104.0], [2, 9.5, 72.0], [2, 6.0, 31.0]], [0, [5, 4.5, 2.0], [5, 4.0, 1.0], [5, 3.0, 1.0], [5, 2.0, 1.0], [1, 45.0, 57.0], [9, 46.5, 110.0], [10, 16.5, 74.5], [11, 23.0, 102.0], [12, 30.0, 24.0], [2, 4.5, 64.5], [13, 5.5, 37.0], [14, 5.5, 20.5], [15, 14.5, 10.0], [15, 11.0, 14.5], [3, 82.5, 49.5], [16, 8.5, 9.0], [17, 9.0, 44.0], [18, 6.0, 16.0], [19, 3.5, 14.0], [20, 6.0, 9.0], [21, 21.5, 19.0], [22, 7.0, 14.5], [22, 2.5, 4.5], [4, 19.0, 16.5], [19, 6.0, 2.5], [4, 55.5, 123.5]], [0, [23, 178.5, 186.5], [24, 26.0, 136.0], [2, 19.5, 153.0], [25, 196.5, 103.5], [26, 21.5, 18.0], [27, 301.5, 91.5], [28, 78.5,

##Step 4: Pickle the data
Process extracted data into a Pickle to be used with Keras

The format looks like:\
[room label [obj1 tuple] [obj1 tuple] ... [objn tuple]

Where each tuple has: Obj label, x_avg, y_avg

In [None]:
#Pickle the data so we can access it later
pckl = open(PICKLE_DIRECTORY+"listOfAllObjLoc.pkl","wb")
pickle.dump(listOfAllObj,pckl)
pckl.close()

#Test pickle
# new_pckl = open(PICKLE_DIRECTORY+"listOfAllObjLoc.pkl","rb")
# new_list = pickle.load(new_pckl)
# new_pckl.close()