In [None]:
"""
Design an algorithm to encode a list of strings to a string. The encoded string
is then sent over the network and is decoded back to the original list of
strings.

Machine 1 (sender) has the function:

string encode(vector<string> strs) {
  // ... your code
  return encoded_string;
}

Machine 2 (receiver) has the function:
vector<string> decode(string s) {
  //... your code
  return strs;
}

Machine 1 does:
    string encoded_string = encode(strs);
Machine 2 does:
    vector<string> strs2 = decode(encoded_string);

strs2 in Machine 2 should be the same as strs in Machine 1. Implement the encode and decode methods. You are not allowed to solve the problem using any serialize methods (such as eval).


Example 1:
    Input: 
        dummy_input = ["Hello","World"]
    Output: 
        ["Hello","World"]
    Explanation:
        Machine 1:
        Codec encoder = new Codec();
        String msg = encoder.encode(strs);
        Machine 1 ---msg---> Machine 2

        Machine 2:
        Codec decoder = new Codec();
        String[] strs = decoder.decode(msg);

Example 2:
    Input:
        dummy_input = [""]
    Output:
        [""]

Constraints:
    1 <= strs.length <= 200
    0 <= strs[i].length <= 200
    strs[i] contains any possible characters out of 256 valid ASCII characters.

Follow up: Could you write a generalized algorithm to work on any possible set of characters?

-- TIP:
    - Following fixed length encoding
    - word_count|word_len_1|...|Joint-words

https://leetcode.com/problems/encode-and-decode-strings/editorial/

    - May not need word count; use fixed-length-word-len | word | ...;
"""

from typing import List

# Best
class Codec:
    def len_to_str(self, x):
        """
        Encodes length of string to bytes string
        """
        x = len(x)
        bytes = [chr(x >> (i * 8) & 0xff) for i in range(4)]
        bytes.reverse()
        bytes_str = ''.join(bytes)
        return bytes_str
    
    def encode(self, strs):
        """Encodes a list of strings to a single string.
        :type strs: List[str]
        :rtype: str
        """
        # encode here is a workaround to fix BE CodecDriver error
        return ''.join(self.len_to_str(x) + x.encode('utf-8') for x in strs)
        
    def str_to_int(self, bytes_str):
        """
        Decodes bytes string to integer.
        """
        result = 0
        for ch in bytes_str:
            result = result * 256 + ord(ch)
        return result
    
    def decode(self, s):
        """Decodes a single string to a list of strings.
        :type s: str
        :rtype: List[str]
        """
        i, n = 0, len(s)
        output = []
        while i < n:
            length = self.str_to_int(s[i: i + 4])
            i += 4
            output.append(s[i: i + length])
            i += length
        return output

# Better
class Codec:
    def encode(self, strs: List[str]) -> str:
        """Encodes a list of strings to a single string.
        """
        encoded  = ''
        for word in strs:
            encoded += f'{len(word):03}{word}'
        return encoded
  
    def decode(self, s: str) -> List[str]:
        """Decodes a single string to a list of strings.
        """
        result = []
        start  = 0
        while start < len(s):
            offset = start + 3
            word_count = int(s[start:offset])
            result.append(s[offset: offset+word_count])
            start += 3 + word_count
        return result

class Codec:
    def encode(self, strs: List[str]) -> str:
        """Encodes a list of strings to a single string.
        """
        encoded  = f'{len(strs):03}'
        wordlist = ''.join(strs)
        for word in strs:
            encoded += f'{len(word):03}'
        encoded += wordlist
        return encoded
  
    def decode(self, s: str) -> List[str]:
        """Decodes a single string to a list of strings.
        """
        word_count = int(s[:3])
        result = []
        offset = 3 * (word_count + 1)
        for word in range(1, word_count+1):
            word_len = int(s[3*word: 3*word+3])
            result.append(s[offset: offset+word_len])
            offset += word_len
        return result

# Runner code
# codec = Codec()
# codec.decode(codec.encode(['sample_str',]))