In [5]:
import json
import re
import time

In [6]:
fpath_small = 'sensordata_small.txt'
fpath_7 = 'sensordata_7days.txt'
fpath_365 = 'sensordata_365.txt'

In [7]:
class SensorData:
    '''
    Used for extracting the date of the collection of the data points.\n
    The data points consists of
    - Temperature (in degree Fahrenheit)
    - Humidity (in %)
    '''
    def __init__(self, filePath):
        self.__data_filePath = filePath
        self.__sensor_records = []
        
        # below are the regular expressions used in the parseData function
        
        self.__DATE_RE = re.compile(r"(?P<year>\d{4})(?P<month>\d{2})(?P<day>\d{2})")
        self.__VALUE_RE = re.compile(r"(?P<temperature>\d{4}|NNNN)(?P<humidity>\d{3}|NNN)")
        
        
    def parseData(self):
        with open(self.__data_filePath, 'r', encoding = 'utf-8') as file:
            for line in file: #going through each line one at a time
                tempDict = {}

                match = self.__DATE_RE.search(line) # extrcating the date from the line

                if match:                    
                    t = match.groupdict()
                    year, month, day = t['year'], t['month'], t['day']
                    tempDict['date'] = f'{year}-{month}-{day}'
                    # converting date in a universal format

                match = self.__VALUE_RE.finditer(line, len(match.group(0)))

                for m in match: # iterator for extracting each set of temp-humidity values
                    
                    for key, value in m.groupdict().items():
                        try:
                            value = int(value)
                            
                            if key == 'temperature':
                                value /= 10 
                            elif key == 'humidity':
                                value /= 100

                            # converted into the proper values
                        except: # if N is encounted
                            value = None
                        tempDict[key] = tempDict.get(key, []) + [value]
                    
                self.__sensor_records.append(tempDict)
   

    def __calcTime(function): # used for decorating the function for calculating the time taken
        def wrapper(self):
            start_time = time.time()
            function(self)
            end_time = time.time()
            return f'Time taken = {(end_time - start_time):0.6f} seconds'
        return wrapper
    
    
    def saveDataJSON(self):
        fileName = self.__data_filePath.split('/')[-1].split('.')[0] + '.json'
        
        with open(fileName, 'w', encoding = 'utf-8') as writeFile:
            json.dump(self.__sensor_records, writeFile, ensure_ascii=False)
        print('JSON file created')
    
    @__calcTime
    def main(self): # main function to be run in the class
        return self.parseData()

In [8]:
a = SensorData(fpath_small)
print(a.main())
a.saveDataJSON()

Time taken = 0.009648 seconds
JSON file created


In [9]:
b = SensorData(fpath_7)
print(b.main())
b.saveDataJSON()

Time taken = 0.266728 seconds
JSON file created


In [10]:
c = SensorData(fpath_365)
print(c.main())
c.saveDataJSON()

Time taken = 11.536819 seconds
JSON file created
