In [313]:
import json, re

class Solution:

    def infoToJSON():
        """
        Read n lines of personal information, Each line contains “entry” information, 
        which consists of a first name, last name, phone number, color, and zip code.
        Normalize and write the data into a .json file named as 'result.out'.
        """
        ### You can change input file here ###
        f = open('data.in', 'r')
        ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
        
        entries = []
        errors  = {}
        lineIdx = 0
        
        # function to normalize phone numbers
        def phone_norm(nums):                
            """"
            Given a string of 10-digit number, return a formated phone number
            @Parameters:
            nums: string, 10-digit number
            @Returns:
            phone: string, formated phone number, XXX-XXX-XXXX
            """
            phone = format(int(nums[:-1]), ",").replace(",", "-") + nums[-1]
            if len(phone) < 12:
                phone = '0' + phone
            return phone
        
        # start reading lines
        for line in f:
            entry = {}
            error = []
            items = [x.strip() for x in line.split(',')]

            # sanity check 1: completion of entries in the original line
            if len(items) > 5 or len(items) < 4:
                error.append('invalid entry')
                errors[int(lineIdx)] = error
                lineIdx += 1
                continue
            
            # processing numbers and color:
            for itemIdx in range(-1, -4, -1):
                item = items[itemIdx]
                
                # color:
                if item[0].isalpha():
                    # deal with color
                    entry['color'] = item

                # phone number:
                elif item[0] == '(':
                    # deal with phone number, set name flag as 1, which means Last name is in advance of first name          
                    nums = re.sub('[^0-9]+', '', item)
                    if len(nums) == 10:
                        entry['phonenumber'] = phone_norm(str(nums))
                        nameFlag = 1
                    else:
                        error.append('invalid phone number')
                        errors[int(lineIdx)] = error    
                        continue

                elif len(item.split()) > 1:
                    # deal with phone number, set name flag as 0, which means First name is in advance of last name 
                    nums = re.sub('[^0-9]+', '', item)
                    if len(nums) == 10:
                        entry['phonenumber'] = phone_norm(nums)
                        nameFlag = 0
                    else:
                        error.append('invalid phone number')
                        errors[int(lineIdx)] = error    
                        continue
                
                # zip code:
                else:
                    nums = re.sub('[^0-9]+', '', item)
                    if len(nums) == 5:
                        # deal with zip codes
                        entry['zipcode'] = nums

                    else:
                        error.append('invalid zip code')
                        errors[int(lineIdx)] = error    
                        continue

            # processing names:
            names = items.pop(0)
            if not names[0].isalpha():
                error.append('invalid entry')
                errors[int(lineIdx)] = error
                lineIdx += 1
                continue

            # in the cases that the first and last names are two items
            elif ' ' not in names:
                if nameFlag == 0:
                    entry['firstname'] = names
                    entry['lastname'] = items.pop(0)
                elif nameFlag == 1:
                    entry['lastname'] = names
                    entry['firstname'] = items.pop(0)

            #  in the cases that first and last names are in one item, or first, middle names are in one item
            else:
                name_item = names.split()
                
                # first and middle names are in the first item, last name is in the following item
                if len(name_item) == 3:
                    entry['firstname'] = name_item[0] + ' ' + name_item[1]
                    entry['lastname'] = items.pop(0)
                # first and last names are in the first item
                else:
                    entry['firstname'], entry['lastname'] = names.split()      


            # sanity check 2: completion of the entries in the dictionary
            if len(entry.keys()) == 5:
                entries.append(entry)
            lineIdx += 1


        # sort entries by last name, first name
        entriesSort = sorted(entries, key=lambda k: (k['lastname'].lower() + k['firstname'].lower())) 

        out = {'entris': entriesSort, 'errors': errors}

        with open('result.out', 'w') as result:
            json.dump(out, result, sort_keys = True, indent = 2)

if __name__ == '__main__':
    Solution.infoToJSON()