In [1]:
import re
import doctest
import functools

In [2]:
try:
	# Check if functiools.cache is available
	cache_decorator = functools.cache
except AttributeError:
	try:
		# Check if functools.lru_cache is available
		cache_decorator = functools.lru_cache(maxsize=None)
	except AttributeError:
		# If neither is available, use a dummy decorator
		def cache_decorator(func):
			return func

In [3]:
def tokenize_hostname(hostname):
    """
    Tokenize the hostname into a list of tokens.
    Tokens will be separated by symbols or numbers.

    Args:
        hostname (str): The hostname to tokenize.

    Returns:
        list: A list of tokens.

    Example:
        >>> tokenize_hostname('www.example.com')
        ('www', '.', 'example', '.', 'com')
        >>> tokenize_hostname('localhost')
        ('localhost',)
        >>> tokenize_hostname('Sub-S1')
        ('Sub', '-', 'S', '1')
        >>> tokenize_hostname('Sub-S10')
        ('Sub', '-', 'S', '10')
        >>> tokenize_hostname('Process-Client10-1')
        ('Process', '-', 'Client', '10', '-', '1')
        >>> tokenize_hostname('Process-C5-15')
        ('Process', '-', 'C', '5', '-', '15')
        >>> tokenize_hostname('192.168.1.1')
        ('192', '.', '168', '.', '1', '.', '1')
    """
    # Regular expression to match sequences of letters, digits, or symbols
    tokens = re.findall(r'[A-Za-z]+|\d+|[^A-Za-z0-9]', hostname)
    return tuple(tokens)


In [4]:
doctest.run_docstring_examples(tokenize_hostname, globals())

In [5]:
@cache_decorator
def hashTokens(tokens):
	"""
	Translate a list of tokens in string to a list of integers with positional information.

	Args:
		tokens (tuple): A tuple of tokens.

	Returns:
		list: A list of integers.

	Example:
		>>> tuple(hashTokens(('1')))
		(1,)
		>>> tuple(hashTokens(('1', '2')))
		(1, 2)
		>>> tuple(hashTokens(('1', '.', '2')))
		(1, -5047856122680242044, 2)
		>>> tuple(hashTokens(('Process', '-', 'C', '5', '-', '15')))
		(117396829274297939, 7549860403020794775, 8629208860073383633, 5, 7549860403020794775, 15)
		>>> tuple(hashTokens(('192', '.', '168', '.', '1', '.', '1')))
		(192, -5047856122680242044, 168, -5047856122680242044, 1, -5047856122680242044, 1)
	"""
	return tuple(int(token) if token.isdigit() else hash(token) for token in tokens)

	"""
	Translate a list of tokens in string to a list of integers with positional information.

	Args:
		tokens (tuple): A tuple of tokens.

	Returns:
		list: A list of integers.

	Example:
		>>> tuple(hashTokens(('1')))
		(1,)
		>>> tuple(hashTokens(('1', '2')))
		(1, 2)
		>>> tuple(hashTokens(('1', '.', '2')))
		(1, -5047856122680242044, 2)
		>>> tuple(hashTokens(('Process', '-', 'C', '5', '-', '15')))
		(117396829274297939, 7549860403020794775, 8629208860073383633, 5, 7549860403020794775, 15)
		>>> tuple(hashTokens(('192', '.', '168', '.', '1', '.', '1')))
		(192, -5047856122680242044, 168, -5047856122680242044, 1, -5047856122680242044, 1)
	"""
	return tuple(int(token) if token.isdigit() else hash(token) for token in tokens)


In [6]:
doctest.run_docstring_examples(hashTokens, globals())

**********************************************************************
File "__main__", line ?, in NoName
Failed example:
    tuple(hashTokens(('1', '.', '2')))
Expected:
    (1, -5047856122680242044, 2)
Got:
    (1, -3131373799587018230, 2)
**********************************************************************
File "__main__", line ?, in NoName
Failed example:
    tuple(hashTokens(('Process', '-', 'C', '5', '-', '15')))
Expected:
    (117396829274297939, 7549860403020794775, 8629208860073383633, 5, 7549860403020794775, 15)
Got:
    (-7711620328456648241, -1413118204010146307, -3547905447637715484, 5, -1413118204010146307, 15)
**********************************************************************
File "__main__", line ?, in NoName
Failed example:
    tuple(hashTokens(('192', '.', '168', '.', '1', '.', '1')))
Expected:
    (192, -5047856122680242044, 168, -5047856122680242044, 1, -5047856122680242044, 1)
Got:
    (192, -3131373799587018230, 168, -3131373799587018230, 1, -313137379958701

In [7]:
list(hashTokens(tokenize_hostname('pc-01-1')))

[-1500884940312033453, -1413118204010146307, 1, -1413118204010146307, 1]

In [8]:
tokenize_hostname('localhost')

('localhost',)

In [9]:
hashTokens(tokenize_hostname('2.1'))

(2, -3131373799587018230, 1)

In [10]:
sum(hashTokens(tokenize_hostname('2.1')))

-3131373799587018227

In [11]:
def getTokenDiff(token1, token2):
	"""
	Compute the sum of the difference between the hash of two lists of tokens.

	Args:
		token1 (tuple): A list of tokens.
		token2 (tuple): A list of tokens.

	Returns:
		Generator: A generator of the difference between the hash of the two lists of tokens.

	Example:
		>>> list(getTokenDiff(('1',), ('1',)))
		[0]
		>>> list(getTokenDiff(('1','2'), ('1', '1')))
		[0, 1]
		>>> list(getTokenDiff(('1','1'), ('1', '1', '1')))
		Traceback (most recent call last):
		...
		ValueError: The two lists must have the same length.
		>>> list(getTokenDiff(('192', '.', '168', '.', '2', '.', '1'), ('192', '.', '168', '.', '1', '.', '1')))
		[0, 0, 0, 0, 1, 0, 0]
		>>> list(getTokenDiff(('192', '.', '168', '.', '2', '.', '1'), ('192', '.', '168', '.', '1', '.', '2')))
		[0, 0, 0, 0, 1, 0, 1]
		>>> list(getTokenDiff(('Process', '-', 'C', '5', '-', '15'), ('Process', '-', 'C', '5', '-', '15')))
		[0, 0, 0, 0, 0, 0]
		>>> list(getTokenDiff(('Process', '-', 'C', '5', '-', '15'), ('Process', '-', 'C', '5', '-', '16')))
		[0, 0, 0, 0, 0, 1]
		>>> list(getTokenDiff(tokenize_hostname('nebulahost3'), tokenize_hostname('nebulaleaf3')))
		[4609286187965956597, 0]
		>>> list(getTokenDiff(tokenize_hostname('nebulaleaf3'), tokenize_hostname('nebulaleaf4')))
		[0, 1]
	"""
	if len(token1) != len(token2):
		raise ValueError('The two lists must have the same length.')
	return (abs(hash1 - hash2) for hash1, hash2 in zip(hashTokens(token1), hashTokens(token2)))

In [12]:
doctest.run_docstring_examples(getTokenDiff, globals())

**********************************************************************
File "__main__", line 29, in NoName
Failed example:
    list(getTokenDiff(tokenize_hostname('nebulahost3'), tokenize_hostname('nebulaleaf3')))
Expected:
    [4609286187965956597, 0]
Got:
    [15162001877972268878, 0]


In [13]:
list(getTokenDiff(tokenize_hostname('nebulahost3'), tokenize_hostname('nebulaleaf3')))

[15162001877972268878, 0]

In [14]:
def generateSumDic(Hostnames):
	"""
	Generate a dictionary of sums of tokens for a list of hostnames.

	Args:
		Hostnames (list): A list of hostnames.

	Example:
		>>> generateSumDic(['localhost'])
		{6564370170492138900: {('localhost',): {}}}
		>>> generateSumDic(['1', '2'])
		{1: {('1',): {}}, 2: {('2',): {}}}
		>>> generateSumDic(['1.1','1.2'])
		{3435203479547611399: {('1', '.', '1'): {}}, 3435203479547611400: {('1', '.', '2'): {}}}
		>>> generateSumDic(['1.2','2.1'])
		{3435203479547611400: {('1', '.', '2'): {}, ('2', '.', '1'): {}}}
	"""
	sumDic = {}
	for hostname in reversed(sorted(Hostnames)):
		tokens = tokenize_hostname(hostname)
		sumHash = sum(hashTokens(tokens))
		sumDic.setdefault(sumHash, {})[tokens] = {}
	return sumDic
		

In [15]:
testHosts = frozenset([f'PC{i}-{j}' for i in range(1, 4) for j in range(1, 6)] + ['3-3PC','nebulamaster'])
','.join(testHosts)

'PC3-2,PC1-5,PC2-2,PC2-3,PC3-3,nebulamaster,PC1-3,PC3-4,PC1-4,PC3-5,PC1-1,3-3PC,PC2-4,PC1-2,PC2-5,PC2-1,PC3-1'

In [16]:
def findDiffIndex(token1, token2):
	"""
	Find the index of the first difference between two lists of tokens.
	If there is more than one difference, return -1.

	Args:
		token1 (tuple): A list of tokens.
		token2 (tuple): A list of tokens.

	Returns:
		int: The index of the first difference between the two lists of tokens.

	Example:
		>>> findDiffIndex(('1',), ('1',))
		-1
		>>> findDiffIndex(('1','2'), ('1', '1'))
		1
		>>> findDiffIndex(('1','1'), ('1', '1', '1'))
		Traceback (most recent call last):
		...
		ValueError: The two lists must have the same length.
		>>> findDiffIndex(('192', '.', '168', '.', '2', '.', '1'), ('192', '.', '168', '.', '1', '.', '1'))
		4
		>>> findDiffIndex(('192', '.', '168', '.', '2', '.', '1'), ('192', '.', '168', '.', '1', '.', '2'))
		-1
		>>> findDiffIndex(('Process', '-', 'C', '5', '-', '15'), ('Process', '-', 'C', '5', '-', '15'))
		-1
		>>> findDiffIndex(('Process', '-', 'C', '5', '-', '15'), ('Process', '-', 'C', '5', '-', '16'))
		5
		>>> findDiffIndex(tokenize_hostname('nebulahost3'), tokenize_hostname('nebulaleaf3'))
		-1
		>>> findDiffIndex(tokenize_hostname('nebulaleaf3'), tokenize_hostname('nebulaleaf4'))
		1
	"""
	if len(token1) != len(token2):
		raise ValueError('The two lists must have the same length.')
	rtn = -1
	for i, (subToken1, subToken2) in enumerate(zip(token1, token2)):
		if subToken1 != subToken2:
			if rtn == -1 and subToken1.isdigit() and subToken2.isdigit():
				rtn = i
			else:
				return -1
	return rtn



In [17]:
sorted(testHosts)

['3-3PC',
 'PC1-1',
 'PC1-2',
 'PC1-3',
 'PC1-4',
 'PC1-5',
 'PC2-1',
 'PC2-2',
 'PC2-3',
 'PC2-4',
 'PC2-5',
 'PC3-1',
 'PC3-2',
 'PC3-3',
 'PC3-4',
 'PC3-5',
 'nebulamaster']

In [18]:
bigTestHosts = frozenset([f'PC{i}-{j}' for i in range(1, 100) for j in range(1, 100) if j != 88 if i != 35] + ['3-3PC','nebulamaster'])
','.join(bigTestHosts)

'PC57-1,PC5-74,PC50-75,PC72-94,PC19-81,PC32-45,PC46-20,PC11-25,PC70-65,PC97-28,PC20-64,PC39-14,PC83-33,PC65-12,PC13-14,PC44-44,PC83-82,PC19-3,PC86-38,PC23-53,PC56-83,PC37-89,PC46-75,PC85-98,PC58-37,PC32-33,PC76-85,PC91-46,PC75-15,PC92-83,PC54-45,PC27-34,PC55-82,PC11-97,PC43-77,PC63-35,PC23-35,PC76-43,PC88-92,PC29-92,PC70-69,PC40-71,PC6-1,PC16-12,PC77-14,PC51-94,PC53-18,PC60-15,PC61-98,PC75-31,PC80-73,PC98-36,PC32-61,PC68-48,PC69-69,PC91-86,PC8-94,PC54-5,PC66-42,PC28-75,PC50-5,PC31-17,PC56-65,PC42-15,PC97-21,PC37-51,PC16-18,PC31-5,PC34-18,PC70-91,PC96-76,PC77-37,PC19-84,PC13-57,PC17-78,PC83-4,PC15-79,PC21-81,PC5-95,PC92-40,PC65-49,PC50-62,PC48-68,PC80-85,PC88-87,PC23-14,PC83-2,PC73-59,PC68-35,PC26-79,PC64-83,PC86-77,PC4-55,PC15-39,PC7-99,PC1-10,PC24-94,PC8-97,PC33-20,PC10-86,PC51-7,PC93-75,PC75-19,PC41-89,PC26-45,PC16-62,PC48-43,PC46-39,PC46-95,PC73-68,PC12-49,PC94-54,PC96-57,PC10-3,PC29-91,PC34-82,PC39-32,PC55-57,PC5-55,PC7-54,PC23-25,PC8-71,PC41-42,PC40-67,PC55-51,PC4-94,PC9-79,PC23-4

In [19]:
bigZeroPaddedHosts = frozenset([f'PC{i:02d}-{j:02d}' for i in range(1, 100) for j in range(1, 3102)] + ['3-3PC','nebulamaster'])
next(iter(bigZeroPaddedHosts))

'PC07-1844'

In [1]:
hugeHosts = frozenset([f'PC{i:01d}-{j:03d}-{k:03d}' for i in range(1, 100) for j in range(1, 50) for k in range(1, 100) if j != 88 if i != 35] + ['3-3-3PC','nebulamaster'])
next(iter(hugeHosts))

'PC69-004-082'

In [9]:
for i in range(100):
	set(set(hugeHosts))

In [10]:
len(bigZeroPaddedHosts)

NameError: name 'bigZeroPaddedHosts' is not defined

In [3]:
len(hugeHosts)

475400

In [23]:
testHosts

frozenset({'3-3PC',
           'PC1-1',
           'PC1-2',
           'PC1-3',
           'PC1-4',
           'PC1-5',
           'PC2-1',
           'PC2-2',
           'PC2-3',
           'PC2-4',
           'PC2-5',
           'PC3-1',
           'PC3-2',
           'PC3-3',
           'PC3-4',
           'PC3-5',
           'nebulamaster'})

In [24]:
import multiSSH3

In [25]:
ipHosts = '192.168.1-5.1-64'

In [26]:
multiSSH3.expand_hostnames(ipHosts)

['192.168.1.1',
 '192.168.1.2',
 '192.168.1.3',
 '192.168.1.4',
 '192.168.1.5',
 '192.168.1.6',
 '192.168.1.7',
 '192.168.1.8',
 '192.168.1.9',
 '192.168.1.10',
 '192.168.1.11',
 '192.168.1.12',
 '192.168.1.13',
 '192.168.1.14',
 '192.168.1.15',
 '192.168.1.16',
 '192.168.1.17',
 '192.168.1.18',
 '192.168.1.19',
 '192.168.1.20',
 '192.168.1.21',
 '192.168.1.22',
 '192.168.1.23',
 '192.168.1.24',
 '192.168.1.25',
 '192.168.1.26',
 '192.168.1.27',
 '192.168.1.28',
 '192.168.1.29',
 '192.168.1.30',
 '192.168.1.31',
 '192.168.1.32',
 '192.168.1.33',
 '192.168.1.34',
 '192.168.1.35',
 '192.168.1.36',
 '192.168.1.37',
 '192.168.1.38',
 '192.168.1.39',
 '192.168.1.40',
 '192.168.1.41',
 '192.168.1.42',
 '192.168.1.43',
 '192.168.1.44',
 '192.168.1.45',
 '192.168.1.46',
 '192.168.1.47',
 '192.168.1.48',
 '192.168.1.49',
 '192.168.1.50',
 '192.168.1.51',
 '192.168.1.52',
 '192.168.1.53',
 '192.168.1.54',
 '192.168.1.55',
 '192.168.1.56',
 '192.168.1.57',
 '192.168.1.58',
 '192.168.1.59',
 '192.

In [27]:
testDic = generateSumDic(frozenset(testHosts))
testDic

{5696782753352216264: {('nebulamaster',): {}},
 3281370981791077619: {('PC', '3', '-', '5'): {}},
 3281370981791077618: {('PC', '3', '-', '4'): {}, ('PC', '2', '-', '5'): {}},
 3281370981791077617: {('PC', '3', '-', '3'): {},
  ('PC', '2', '-', '4'): {},
  ('PC', '1', '-', '5'): {},
  ('3', '-', '3', 'PC'): {}},
 3281370981791077616: {('PC', '3', '-', '2'): {},
  ('PC', '2', '-', '3'): {},
  ('PC', '1', '-', '4'): {}},
 3281370981791077615: {('PC', '3', '-', '1'): {},
  ('PC', '2', '-', '2'): {},
  ('PC', '1', '-', '3'): {}},
 3281370981791077614: {('PC', '2', '-', '1'): {}, ('PC', '1', '-', '2'): {}},
 3281370981791077613: {('PC', '1', '-', '1'): {}}}

In [28]:

doctest.run_docstring_examples(generateSumDic, globals())

**********************************************************************
File "__main__", line 9, in NoName
Failed example:
    generateSumDic(['localhost'])
Expected:
    {6564370170492138900: {('localhost',): {}}}
Got:
    {-530260796449068547: {('localhost',): {}}}
**********************************************************************
File "__main__", line 11, in NoName
Failed example:
    generateSumDic(['1', '2'])
Expected:
    {1: {('1',): {}}, 2: {('2',): {}}}
Got:
    {2: {('2',): {}}, 1: {('1',): {}}}
**********************************************************************
File "__main__", line 13, in NoName
Failed example:
    generateSumDic(['1.1','1.2'])
Expected:
    {3435203479547611399: {('1', '.', '1'): {}}, 3435203479547611400: {('1', '.', '2'): {}}}
Got:
    {-3131373799587018227: {('1', '.', '2'): {}}, -3131373799587018228: {('1', '.', '1'): {}}}
**********************************************************************
File "__main__", line 15, in NoName
Failed example:
   

In [29]:
def filterSumDic(sumDic):
	"""
	Filter the sumDic to do one order of grouping.

	Args:
		sumDic (dict): A dictionary of sums of tokens.

	Returns:
		dict: A filtered dictionary of sums of tokens.

	Example:
		>>> filterSumDic(generateSumDic(['server15', 'server16', 'server17']))
		{-6728831096159691241: {('server', '17'): {(1, 0): [15, 17]}}}
		>>> filterSumDic(generateSumDic(['server15', 'server16', 'server17', 'server18']))
		{-6728831096159691240: {('server', '18'): {(1, 0): [15, 18]}}}
		>>> filterSumDic(generateSumDic(['server-1', 'server-2', 'server-3']))
		{1441623239094376437: {('server', '-', '3'): {(2, 0): [1, 3]}}}
		>>> filterSumDic(generateSumDic(['server-1-2', 'server-1-1', 'server-2-1', 'server-2-2']))
		{9612077574348444129: {('server', '-', '1', '-', '2'): {(4, 0): [1, 2]}}, 9612077574348444130: {('server', '-', '2', '-', '2'): {(4, 0): [1, 2]}}}
		>>> filterSumDic(generateSumDic(['server-1-2', 'server-1-1', 'server-2-2']))
		{9612077574348444129: {('server', '-', '1', '-', '2'): {(4, 0): [1, 2]}}, 9612077574348444130: {('server', '-', '2', '-', '2'): {}}}
		>>> filterSumDic(generateSumDic(['test1-a', 'test2-a']))
		{12310874833182455839: {('test', '2', '-', 'a'): {(1, 0): [1, 2]}}}
		>>> filterSumDic(generateSumDic(['sub-s1', 'sub-s2']))
		{15455586825715425366: {('sub', '-', 's', '2'): {(3, 0): [1, 2]}}}
		>>> filterSumDic(generateSumDic(['s9', 's10', 's11']))
		{1169697225593811728: {('s', '11'): {(1, 0): [9, 11]}}}
		>>> filterSumDic(generateSumDic(['s99', 's98', 's100','s101']))
		{1169697225593811818: {('s', '101'): {(1, 0): [98, 101]}}}
		>>> filterSumDic(generateSumDic(['s08', 's09', 's10', 's11']))
		{1169697225593811728: {('s', '11'): {(1, 2): [8, 11]}}}
		>>> filterSumDic(generateSumDic(['s099', 's098', 's100','s101']))
		{1169697225593811818: {('s', '101'): {(1, 3): [98, 101]}}}
		>>> filterSumDic(generateSumDic(['server1', 'server2', 'server3','server04']))
		{-6728831096159691255: {('server', '3'): {(1, 0): [1, 3]}}, -6728831096159691254: {('server', '04'): {}}}
		>>> filterSumDic(generateSumDic(['server9', 'server09', 'server10','server10']))
		{-6728831096159691249: {('server', '09'): {}}, -6728831096159691248: {('server', '10'): {(1, 0): [9, 10]}}}
		>>> filterSumDic(generateSumDic(['server09', 'server9', 'server10']))
		{-6728831096159691249: {('server', '9'): {}}, -6728831096159691248: {('server', '10'): {(1, 2): [9, 10]}}}
	"""
	lastSumHash = None
	newSumDic = {}    
	for key, value in sumDic.items():
		newSumDic[key] = value.copy()
	sumDic = newSumDic
	newSumDic = {}
	for sumHash in sorted(sumDic):
		if lastSumHash is None:
			lastSumHash = sumHash
			newSumDic[sumHash] = sumDic[sumHash].copy()
			continue
		if sumHash - lastSumHash == 1:
			# this means the distence between these two group of hostnames is 1, thus we try to group them together
			for hostnameTokens in sumDic[sumHash]:
				added = False
				if lastSumHash in newSumDic and sumDic[lastSumHash]:
					for lastHostnameTokens in sumDic[lastSumHash].copy():
						# if the two hostnames are able to group, we group them together
						# the two hostnames are able to group if:
						# 1. the two hostnames have the same amount of tokens
						# 2. the last hostname is not already been grouped
						# 3. the two hostnames have the same tokens except for one token
						# 4. the two hostnames have the same token groups
						if len(hostnameTokens) == len(lastHostnameTokens) and \
							lastSumHash in newSumDic and lastHostnameTokens in newSumDic[lastSumHash] and \
							(diffIndex:=findDiffIndex(hostnameTokens, lastHostnameTokens)) != -1 and \
							sumDic[sumHash][hostnameTokens] == sumDic[lastSumHash][lastHostnameTokens]:
							# the sumDic[sumHash][hostnameTokens] will ba a dic of 2 element value lists with 2 element key representing:
							# (token position that got grouped, the amount of zero padding (length) ):
							#   [ the start int token, the end int token]
							# if we entered here, this means we are able to group the two hostnames together

							if not diffIndex:
								# should never happen, but just in case, we skip grouping
								continue
							tokenToGroup = hostnameTokens[diffIndex]
							try:
								tokenLength = len(tokenToGroup)
								tokenToGroup = int(tokenToGroup)
							except ValueError:
								# if the token is not an int, we skip grouping
								continue
							# group(09 , 10) -> (x, 2): [9, 10]
							# group(9 , 10) -> (x, 0): [9, 10]
							# group(9 , 010) -> not able to group
							# group(009 , 10) -> not able to group
							# group(08, 09) -> (x, 2): [8, 9]
							# group(08, 9) -> not able to group
							# group(8, 09) -> not able to group
							# group(0099, 0100) -> (x, 4): [99, 100]
							# group(0099, 100) -> not able to groups
							# group(099, 100) -> (x, 3): [99, 100]
							# group(99, 100) -> (x, 0): [99, 100]
							lastTokenToGroup = lastHostnameTokens[diffIndex]
							try:
								minimumTokenLength = 0
								lastTokenLength = len(lastTokenToGroup) 
								if lastTokenLength > tokenLength:
									raise ValueError('The last token is longer than the current token.')
								elif lastTokenLength < tokenLength:
									if tokenLength - lastTokenLength != 1:
										raise ValueError('The last token is not one less than the current token.')
									# if the last token is not made out of all 9s, we cannot group
									if any(c != '9' for c in lastTokenToGroup):
										raise ValueError('The last token is not made out of all 9s.')
								elif lastTokenToGroup[0] == '0' and lastTokenLength > 1:
									# we have encoutered a padded last token, will set this as the minimum token length
									minimumTokenLength = lastTokenLength
								lastTokenToGroup = int(lastTokenToGroup)
							except ValueError:
								# if the token is not an int, we skip grouping
								continue
							assert lastTokenToGroup + 1 == tokenToGroup, 'Error! The two tokens are not one apart.'

							# we take the last hostname tokens grouped dic out from the newSumDic
							hostnameGroupDic = newSumDic[lastSumHash][lastHostnameTokens].copy()
							if (diffIndex, minimumTokenLength) in hostnameGroupDic and hostnameGroupDic[(diffIndex, minimumTokenLength)][1] + 1 == tokenToGroup:
								# if the token is already grouped, we just update the end token
								hostnameGroupDic[(diffIndex, minimumTokenLength)][1] = tokenToGroup
							elif (diffIndex, tokenLength) in hostnameGroupDic and hostnameGroupDic[(diffIndex, tokenLength)][1] + 1 == tokenToGroup:
								# alternatively, there is already an exact length padded token grouped
								hostnameGroupDic[(diffIndex, tokenLength)][1] = tokenToGroup
							elif sumDic[lastSumHash][lastHostnameTokens] == newSumDic[lastSumHash][lastHostnameTokens]:
								# only when there are no new groups added to this token group this iter, we can add the new group
								hostnameGroupDic[(diffIndex, minimumTokenLength)] = [lastTokenToGroup, tokenToGroup]
							else:
								# skip grouping if there are new groups added to this token group this iter
								continue
							# move the grouped dic under the new hostname / sum hash
							del newSumDic[lastSumHash][lastHostnameTokens]
							del sumDic[lastSumHash][lastHostnameTokens]
							if not newSumDic[lastSumHash]:
								del newSumDic[lastSumHash]
							newSumDic.setdefault(sumHash, {})[hostnameTokens] = hostnameGroupDic
							# we add the new group to the newSumDic
							added = True
							break
				if not added:
					# if the two hostnames are not able to group, we just add the last group to the newSumDic
					newSumDic.setdefault(sumHash, {})[hostnameTokens] = sumDic[sumHash][hostnameTokens].copy()
		else:
			# this means the distence between these two group of hostnames is not 1, thus we just add the last group to the newSumDic
			newSumDic[sumHash] = sumDic[sumHash].copy()
		lastSumHash = sumHash
	return newSumDic


In [30]:
filterSumDic(generateSumDic(frozenset(['server-1-2', 'server-1-1', 'server-2-1', 'server-2-2'])))

{-6729468311056304989: {('server', '-', '2', '-', '1'): {(2, 0): [1, 2]}},
 -6729468311056304988: {('server', '-', '2', '-', '2'): {(2, 0): [1, 2]}}}

In [31]:
filterSumDic(generateSumDic(frozenset(['server15', 'server16', 'server17'])))

{-3903231903036012361: {('server', '17'): {(1, 0): [15, 17]}}}

In [32]:
filterSumDic(generateSumDic(frozenset(['server1', 'server2', 'server3','server01', 'server02', 'server03','server04'])))

{-3903231903036012375: {('server', '3'): {(1, 0): [1, 3]}},
 -3903231903036012374: {('server', '04'): {(1, 2): [1, 4]}}}

In [33]:
filterSumDic(generateSumDic(['server1', 'server2', 'server3','server04']))

{-3903231903036012375: {('server', '3'): {(1, 0): [1, 3]}},
 -3903231903036012374: {('server', '04'): {}}}

In [34]:
filterSumDic(generateSumDic(['server01', 'server2', 'server3','server04']))

{-3903231903036012377: {('server', '01'): {}},
 -3903231903036012375: {('server', '3'): {(1, 0): [2, 3]}},
 -3903231903036012374: {('server', '04'): {}}}

In [35]:
filterSumDic(generateSumDic(['server9', 'server09', 'server10','server10']))

{-3903231903036012369: {('server', '09'): {}},
 -3903231903036012368: {('server', '10'): {(1, 0): [9, 10]}}}

In [36]:
filterSumDic(generateSumDic(['server9', 'server09','server10']))

{-3903231903036012369: {('server', '09'): {}},
 -3903231903036012368: {('server', '10'): {(1, 0): [9, 10]}}}

In [37]:
doctest.run_docstring_examples(filterSumDic, globals())

**********************************************************************
File "__main__", line 12, in NoName
Failed example:
    filterSumDic(generateSumDic(['server15', 'server16', 'server17']))
Expected:
    {-6728831096159691241: {('server', '17'): {(1, 0): [15, 17]}}}
Got:
    {-3903231903036012361: {('server', '17'): {(1, 0): [15, 17]}}}
**********************************************************************
File "__main__", line 14, in NoName
Failed example:
    filterSumDic(generateSumDic(['server15', 'server16', 'server17', 'server18']))
Expected:
    {-6728831096159691240: {('server', '18'): {(1, 0): [15, 18]}}}
Got:
    {-3903231903036012360: {('server', '18'): {(1, 0): [15, 18]}}}
**********************************************************************
File "__main__", line 16, in NoName
Failed example:
    filterSumDic(generateSumDic(['server-1', 'server-2', 'server-3']))
Expected:
    {1441623239094376437: {('server', '-', '3'): {(2, 0): [1, 3]}}}
Got:
    {-5316350107046158682

In [38]:
testDic

{5696782753352216264: {('nebulamaster',): {}},
 3281370981791077619: {('PC', '3', '-', '5'): {}},
 3281370981791077618: {('PC', '3', '-', '4'): {}, ('PC', '2', '-', '5'): {}},
 3281370981791077617: {('PC', '3', '-', '3'): {},
  ('PC', '2', '-', '4'): {},
  ('PC', '1', '-', '5'): {},
  ('3', '-', '3', 'PC'): {}},
 3281370981791077616: {('PC', '3', '-', '2'): {},
  ('PC', '2', '-', '3'): {},
  ('PC', '1', '-', '4'): {}},
 3281370981791077615: {('PC', '3', '-', '1'): {},
  ('PC', '2', '-', '2'): {},
  ('PC', '1', '-', '3'): {}},
 3281370981791077614: {('PC', '2', '-', '1'): {}, ('PC', '1', '-', '2'): {}},
 3281370981791077613: {('PC', '1', '-', '1'): {}}}

In [39]:
filterSumDic(testDic)

{3281370981791077615: {('PC', '3', '-', '1'): {(1, 0): [1, 3]}},
 3281370981791077616: {('PC', '3', '-', '2'): {(1, 0): [1, 3]}},
 3281370981791077617: {('PC', '3', '-', '3'): {(1, 0): [1, 3]},
  ('3', '-', '3', 'PC'): {}},
 3281370981791077618: {('PC', '3', '-', '4'): {(1, 0): [1, 3]}},
 3281370981791077619: {('PC', '3', '-', '5'): {(1, 0): [1, 3]}},
 5696782753352216264: {('nebulamaster',): {}}}

In [40]:
testDic

{5696782753352216264: {('nebulamaster',): {}},
 3281370981791077619: {('PC', '3', '-', '5'): {}},
 3281370981791077618: {('PC', '3', '-', '4'): {}, ('PC', '2', '-', '5'): {}},
 3281370981791077617: {('PC', '3', '-', '3'): {},
  ('PC', '2', '-', '4'): {},
  ('PC', '1', '-', '5'): {},
  ('3', '-', '3', 'PC'): {}},
 3281370981791077616: {('PC', '3', '-', '2'): {},
  ('PC', '2', '-', '3'): {},
  ('PC', '1', '-', '4'): {}},
 3281370981791077615: {('PC', '3', '-', '1'): {},
  ('PC', '2', '-', '2'): {},
  ('PC', '1', '-', '3'): {}},
 3281370981791077614: {('PC', '2', '-', '1'): {}, ('PC', '1', '-', '2'): {}},
 3281370981791077613: {('PC', '1', '-', '1'): {}}}

In [41]:
filterSumDic(filterSumDic(testDic))

{3281370981791077617: {('3', '-', '3', 'PC'): {}},
 3281370981791077619: {('PC', '3', '-', '5'): {(1, 0): [1, 3],
   (3, 0): [1, 5]}},
 5696782753352216264: {('nebulamaster',): {}}}

In [42]:
filterSumDic(generateSumDic(bigTestHosts))

{3281370981791077617: {('3', '-', '3', 'PC'): {}},
 3281370981791077646: {('PC', '34', '-', '1'): {(1, 0): [1, 34]}},
 3281370981791077647: {('PC', '34', '-', '2'): {(1, 0): [1, 34]}},
 3281370981791077648: {('PC', '34', '-', '3'): {(1, 0): [1, 34]}},
 3281370981791077649: {('PC', '34', '-', '4'): {(1, 0): [1, 34]}},
 3281370981791077650: {('PC', '34', '-', '5'): {(1, 0): [1, 34]}},
 3281370981791077651: {('PC', '34', '-', '6'): {(1, 0): [1, 34]}},
 3281370981791077652: {('PC', '34', '-', '7'): {(1, 0): [1, 34]}},
 3281370981791077653: {('PC', '34', '-', '8'): {(1, 0): [1, 34]}},
 3281370981791077654: {('PC', '34', '-', '9'): {(1, 0): [1, 34]}},
 3281370981791077655: {('PC', '34', '-', '10'): {(1, 0): [1, 34]}},
 3281370981791077656: {('PC', '34', '-', '11'): {(1, 0): [1, 34]}},
 3281370981791077657: {('PC', '34', '-', '12'): {(1, 0): [1, 34]}},
 3281370981791077658: {('PC', '34', '-', '13'): {(1, 0): [1, 34]}},
 3281370981791077659: {('PC', '34', '-', '14'): {(1, 0): [1, 34]}},
 32813

In [43]:
filterSumDic(filterSumDic(generateSumDic(bigTestHosts)))

{3281370981791077617: {('3', '-', '3', 'PC'): {}},
 3281370981791077732: {('PC', '34', '-', '87'): {(1, 0): [1, 34],
   (3, 0): [1, 87]}},
 3281370981791077744: {('PC', '34', '-', '99'): {(1, 0): [1, 34],
   (3, 0): [89, 99]}},
 3281370981791077797: {('PC', '99', '-', '87'): {(1, 0): [36, 99],
   (3, 0): [1, 87]}},
 3281370981791077809: {('PC', '99', '-', '99'): {(1, 0): [36, 99],
   (3, 0): [89, 99]}},
 5696782753352216264: {('nebulamaster',): {}}}

In [44]:
generateSumDic(testHosts)

{5696782753352216264: {('nebulamaster',): {}},
 3281370981791077619: {('PC', '3', '-', '5'): {}},
 3281370981791077618: {('PC', '3', '-', '4'): {}, ('PC', '2', '-', '5'): {}},
 3281370981791077617: {('PC', '3', '-', '3'): {},
  ('PC', '2', '-', '4'): {},
  ('PC', '1', '-', '5'): {},
  ('3', '-', '3', 'PC'): {}},
 3281370981791077616: {('PC', '3', '-', '2'): {},
  ('PC', '2', '-', '3'): {},
  ('PC', '1', '-', '4'): {}},
 3281370981791077615: {('PC', '3', '-', '1'): {},
  ('PC', '2', '-', '2'): {},
  ('PC', '1', '-', '3'): {}},
 3281370981791077614: {('PC', '2', '-', '1'): {}, ('PC', '1', '-', '2'): {}},
 3281370981791077613: {('PC', '1', '-', '1'): {}}}

In [45]:
filterSumDic(filterSumDic(filterSumDic(generateSumDic(bigTestHosts))))

{3281370981791077617: {('3', '-', '3', 'PC'): {}},
 3281370981791077732: {('PC', '34', '-', '87'): {(1, 0): [1, 34],
   (3, 0): [1, 87]}},
 3281370981791077744: {('PC', '34', '-', '99'): {(1, 0): [1, 34],
   (3, 0): [89, 99]}},
 3281370981791077797: {('PC', '99', '-', '87'): {(1, 0): [36, 99],
   (3, 0): [1, 87]}},
 3281370981791077809: {('PC', '99', '-', '99'): {(1, 0): [36, 99],
   (3, 0): [89, 99]}},
 5696782753352216264: {('nebulamaster',): {}}}

In [46]:
filterSumDic(filterSumDic(filterSumDic(generateSumDic(bigZeroPaddedHosts))))

{3281370981791077617: {('3', '-', '3', 'PC'): {}},
 3281370981791077809: {('PC', '99', '-', '99'): {(1, 2): [1, 99],
   (3, 2): [1, 99]}},
 3281370981791080811: {('PC', '99', '-', '3101'): {(1, 2): [1, 99],
   (3, 0): [100, 3101]}},
 5696782753352216264: {('nebulamaster',): {}}}

In [47]:
def compact_hostnames(Hostnames):
	"""
	Compact a list of hostnames.
	Compact numeric numbers into ranges.

	Args:
		Hostnames (list): A list of hostnames.

	Returns:
		list: A list of comapcted hostname list.

	Example:
		>>> compact_hostnames(['server15', 'server16', 'server17'])
		['server[15-17]']
		>>> compact_hostnames(['server-1', 'server-2', 'server-3'])
		['server-[1-3]']
		>>> compact_hostnames(['server-1-2', 'server-1-1', 'server-2-1', 'server-2-2'])
		['server-[1-2]-[1-2]']
		>>> compact_hostnames(['server-1-2', 'server-1-1', 'server-2-2'])
		['server-1-[1-2]', 'server-2-2']
		>>> compact_hostnames(['test1-a', 'test2-a'])
		['test[1-2]-a']
		>>> compact_hostnames(['sub-s1', 'sub-s2'])
		['sub-s[1-2]']
	"""
	sumDic = generateSumDic(Hostnames)
	filteredSumDic = filterSumDic(sumDic)
	lastFilteredSumDicLen = len(filteredSumDic) + 1
	while lastFilteredSumDicLen > len(filteredSumDic):
		lastFilteredSumDicLen = len(filteredSumDic)
		filteredSumDic = filterSumDic(filteredSumDic)
	rtnList = []
	for sumHash in filteredSumDic:
		for hostnameTokens in filteredSumDic[sumHash]:
			hostnameGroupDic = filteredSumDic[sumHash][hostnameTokens]
			hostnameList = list(hostnameTokens)
			for tokenIndex, tokenLength in hostnameGroupDic:
				startToken, endToken = hostnameGroupDic[(tokenIndex, tokenLength)]
				if tokenLength:
					hostnameList[tokenIndex] = f'[{startToken:0{tokenLength}d}-{endToken:0{tokenLength}d}]'
				else:
					hostnameList[tokenIndex] = f'[{startToken}-{endToken}]'
			rtnList.append(''.join(hostnameList))
	return rtnList
			

In [48]:
compact_hostnames(['server15', 'server16', 'server17'])

['server[15-17]']

In [49]:
testHosts

frozenset({'3-3PC',
           'PC1-1',
           'PC1-2',
           'PC1-3',
           'PC1-4',
           'PC1-5',
           'PC2-1',
           'PC2-2',
           'PC2-3',
           'PC2-4',
           'PC2-5',
           'PC3-1',
           'PC3-2',
           'PC3-3',
           'PC3-4',
           'PC3-5',
           'nebulamaster'})

In [50]:
compact_hostnames(testHosts)

['3-3PC', 'PC[1-3]-[1-5]', 'nebulamaster']

In [51]:
compact_hostnames(multiSSH3.expand_hostnames(ipHosts))

['192.168.[1-5].[1-64]']

In [409]:
set(multiSSH3.expand_hostnames(frozenset(compact_hostnames(multiSSH3.expand_hostnames(ipHosts))))) == set(multiSSH3.expand_hostnames(ipHosts))

True

In [410]:
compact_hostnames(bigTestHosts)

['nebulamaster',
 '3-3PC',
 'PC[1-8]-[1-87]',
 'PC[1-8]-[89-99]',
 'PC[9-34]-[1-87]',
 'PC[9-34]-[89-99]',
 'PC[36-99]-[1-87]',
 'PC[36-99]-[89-99]']

In [411]:
compact_hostnames(bigZeroPaddedHosts)

['nebulamaster', '3-3PC', 'PC[01-99]-[01-99]', 'PC[01-99]-[100-3101]']

In [412]:
id('1')

140714934413272

In [235]:
#compact_hostnames(hugeHosts)

['nebulamaster',
 '3-3-3PC',
 'PC[1-8]-[001-049]-[001-099]',
 'PC[9-34]-[001-049]-[001-099]',
 'PC[36-99]-[001-049]-[001-099]']

In [247]:
0%10

0

In [255]:
any(c != '9' for c in '9999299')

True

In [263]:
newSumDic = {}

In [241]:
['1'][-1] == '9'

False

In [204]:
len((('PC', '1', '-', '1'))) == len((('PC', '1', '-', '2')))

True

In [238]:
next(((i , x) for i, x in enumerate(getTokenDiff(('PC', '100', '-', '1'), ('PC', '101', '-', '1'))) if x == 1), None)

(1, 1)

In [261]:
testDic[6127336150862740452]

{('PC', '1', '-', '2'): {}, ('PC', '2', '-', '1'): {}}

In [264]:
newSumDic

{}

In [223]:
not newSumDic[6127336150862740451]

True

In [224]:
del newSumDic[6127336150862740451]

In [192]:
testDic[-1632652661277869711]

{('nebulamaster',): []}

In [265]:
sorted(testDic)

[-1632652661277869711,
 6127336150862740451,
 6127336150862740452,
 6127336150862740453,
 6127336150862740454,
 6127336150862740455,
 6127336150862740456,
 6127336150862740457]

In [270]:
test1 = {(1,1):[2,2],(1,2):[1,1]}
test2 = {(1,1):[2,3],(1,2):[1,2]}
test1 == test2

False

In [55]:
ipRangeHosts = frozenset(multiSSH3.expand_hostnames('10.6-13.100-254.1-254') + multiSSH3.expand_hostnames('192.168.1.1-254'))
len(ipRangeHosts)

315214

In [57]:
print(f'len of ipRangeHosts: {len(ipRangeHosts)}')

','.join(compact_hostnames(ipRangeHosts))


len of ipRangeHosts: 315214


'10.[6-13].[100-254].[1-254],192.168.1.[1-254]'