# Account Merge

Given a list of accounts. Each element accounts[i] is a list of strings, where the
first element accounts[i][0] is a name, and the rest of the elements are emails
of the account.

Now, we would like to merge these accounts. Two accounts definitely belong to
the same person if there is some email that is common to both accounts.

After merging the accounts, return the accounts in the following format:
  - The first element of each account is the name.
  - The rest of the elements are emails in sorted order.
The accounts themselves can be returned in any order.

NOTE:
  1. Name is not a unique identifier. Different accounts may share the same name.
  2. Each account is only associated with one name.

EXAMPLE:
```
Input: accounts = [
    ["John", "johnsmith@mail.com", "john00@mail.com"],
    ["John", "johnnybravo@mail.com"],
    ["John", "johnsmith@mail.com", "john_newyork@mail.com"],
    ["Mary", "mary@mail.com"]]

Output: [
    ["John", 'john00@mail.com', 'john_newyork@mail.com', 'johnsmith@mail.com'],
    ["John", "johnnybravo@mail.com"],
    ["Mary", "mary@mail.com"]]
```

REFERNECE:
  - https://leetcode.com/problems/accounts-merge/ (Medium)
  - https://www.geeksforgeeks.org/find-same-contacts-in-a-list-of-contacts/


In [4]:
from collections import defaultdict
from typing import List


class Solution:

    def accountsMerge(self, accounts: List[List[str]]) -> List[List[str]]:
        """Map email to account IDs, which are their original positions in the account list."""
        
        def merge(accounts, match_set, email_to_id_map):
            """Merge overlapped accounts and update the email-to-id map."""
            primary_id = min(match_set)
            a1 = accounts[primary_id]
            for i in match_set:
                if i != primary_id:
                    emails = accounts[i][1:]
                    a1 += emails
                    for email in emails:
                        email_to_id_map[email] = primary_id
    
        # Use email-to-id map to identify overlapped accounts
        email_to_id_map = dict()   # {email: account_id}
        for i, a in enumerate(accounts):
            match_set = {i}     # IDs for accounts sharing emails.
            for email in a[1:]:
                if email in email_to_id_map:
                    match_set.add(email_to_id_map[email])
                else:
                    email_to_id_map[email] = i

            if len(match_set) > 1:
                merge(accounts, match_set, email_to_id_map)
                
        # Pickup remaining accounts and sort email
        account_ids = set(email_to_id_map.values())        
        final_accounts = [ [a[0], *sorted(set(a[1:]))] 
                          for i, a in enumerate(accounts) if i in account_ids ]
        return final_accounts
    
    
def main():
    """Main function"""

    # Test data
    test_data = [
        [["John", "johnsmith@mail.com", "john00@mail.com"],
         ["John", "johnnybravo@mail.com"],
         ["John", "johnsmith@mail.com", "john_newyork@mail.com"],
         ["Mary", "mary@mail.com"]],            # 3 accounts
        [['Alex', 'Alex5@m.co', 'Alex4@m.co', 'Alex0@m.co'],
            ['Ethan', 'Ethan3@m.co', 'Ethan3@m.co', 'Ethan0@m.co'],
            ['Kevin', 'Kevin4@m.co', 'Kevin2@m.co', 'Kevin2@m.co'],
            ['Gabe', 'Gabe0@m.co', 'Gabe3@m.co', 'Gabe2@m.co'],
            ['Gabe', 'Gabe3@m.co', 'Gabe4@m.co', 'Gabe2@m.co']],   # 4 accounts
        [["David", "David0@m.co", "David1@m.co"],
            ["David", "David3@m.co", "David4@m.co"],
            ["David", "David4@m.co", "David5@m.co"],
            ["David", "David2@m.co", "David3@m.co"],
            ["David", "David1@m.co", "David2@m.co"]]   # 1 account
    ]

    sol = Solution()
    for accounts in test_data:
        print("\n# Input: {}".format(accounts))
        final_accounts = sol.accountsMerge(accounts)
        print(f"+ Output:")
        for a in final_accounts:
            print(f"  - {a[0]}: {a[1:]}")


if __name__ == "__main__":
    main()    


# Input: [['John', 'johnsmith@mail.com', 'john00@mail.com'], ['John', 'johnnybravo@mail.com'], ['John', 'johnsmith@mail.com', 'john_newyork@mail.com'], ['Mary', 'mary@mail.com']]
+ Output:
  - John: ['john00@mail.com', 'john_newyork@mail.com', 'johnsmith@mail.com']
  - John: ['johnnybravo@mail.com']
  - Mary: ['mary@mail.com']

# Input: [['Alex', 'Alex5@m.co', 'Alex4@m.co', 'Alex0@m.co'], ['Ethan', 'Ethan3@m.co', 'Ethan3@m.co', 'Ethan0@m.co'], ['Kevin', 'Kevin4@m.co', 'Kevin2@m.co', 'Kevin2@m.co'], ['Gabe', 'Gabe0@m.co', 'Gabe3@m.co', 'Gabe2@m.co'], ['Gabe', 'Gabe3@m.co', 'Gabe4@m.co', 'Gabe2@m.co']]
+ Output:
  - Alex: ['Alex0@m.co', 'Alex4@m.co', 'Alex5@m.co']
  - Ethan: ['Ethan0@m.co', 'Ethan3@m.co']
  - Kevin: ['Kevin2@m.co', 'Kevin4@m.co']
  - Gabe: ['Gabe0@m.co', 'Gabe2@m.co', 'Gabe3@m.co', 'Gabe4@m.co']

# Input: [['David', 'David0@m.co', 'David1@m.co'], ['David', 'David3@m.co', 'David4@m.co'], ['David', 'David4@m.co', 'David5@m.co'], ['David', 'David2@m.co', 'David3@m.co'], ['