In [2]:
from typing import List, Set, Tuple
from itertools import combinations

class SimiliarPhoneValidator:
    def __init__(
        self, 
        max_levenshtein_distance: int = 2,
        max_hamming_differences: int = 4
    ):
        self.max_levenshtein_distance = max_levenshtein_distance
        self.max_hamming_differences = max_hamming_differences

    def find_similar_phones(self, phone_numbers: List[str]) -> List[Set[str]]:
        """Find groups of similar phone numbers using both Levenshtein and Hamming distance."""
        similar_pairs: List[Tuple[str, str]] = []
        
        for phone1, phone2 in combinations(phone_numbers, 2):
            if len(phone1) != len(phone2):
                continue
                
            # Check if numbers are similar using either method
            if (self.is_levenshtein_similar(phone1, phone2) or 
                self.is_hamming_similar(phone1, phone2)):
                similar_pairs.append((phone1, phone2))

        return self._group_similar_numbers(similar_pairs)

    def levenshtein_distance(self, s1: str, s2: str) -> int:
        """Calculate Levenshtein (edit) distance between two strings."""
        if len(s1) < len(s2):
            return self.levenshtein_distance(s2, s1)

        if len(s2) == 0:
            return len(s1)

        previous_row = range(len(s2) + 1)
        for i, c1 in enumerate(s1):
            current_row = [i + 1]
            for j, c2 in enumerate(s2):
                insertions = previous_row[j + 1] + 1
                deletions = current_row[j] + 1
                substitutions = previous_row[j] + (c1 != c2)
                current_row.append(min(insertions, deletions, substitutions))
            previous_row = current_row

        return previous_row[-1]

    def hamming_distance(self, s1: str, s2: str) -> int:
        """Calculate Hamming distance between two strings of equal length."""
        return sum(c1 != c2 for c1, c2 in zip(s1, s2))

    def is_levenshtein_similar(self, phone1: str, phone2: str) -> bool:
        """Check if two numbers are similar based on Levenshtein distance."""
        return self.levenshtein_distance(phone1, phone2) <= self.max_levenshtein_distance

    def is_hamming_similar(self, phone1: str, phone2: str) -> bool:
        """Check if two numbers are similar based on Hamming distance."""
        return self.hamming_distance(phone1, phone2) <= self.max_hamming_differences

    def _group_similar_numbers(self, similar_pairs: List[Tuple[str, str]]) -> List[Set[str]]:
        """Group similar numbers into sets based on transitive relationships."""
        if not similar_pairs:
            return []
            
        groups: List[Set[str]] = []
        processed = set()
        
        for phone1, phone2 in similar_pairs:
            if phone1 in processed and phone2 in processed:
                continue
                
            matching_group = None
            for group in groups:
                if phone1 in group or phone2 in group:
                    matching_group = group
                    break
                    
            if matching_group:
                matching_group.add(phone1)
                matching_group.add(phone2)
            else:
                groups.append({phone1, phone2})
                
            processed.add(phone1)
            processed.add(phone2)
        
        return groups
    
    def weighted_hamming_distance(self, s1: str, s2: str) -> float:
        weights = [2.0] * 4 + [1.5] + [1.0] * 10  # Higher weight for prefix
        return sum(
            w * (c1 != c2) 
            for c1, c2, w in zip(s1, s2, weights)
        )
    
    def has_pattern_similarity(self, s1: str, s2: str) -> bool:
        # Check for repeated digit patterns
        return bool(set(s1) == set(s2))

# Example usage
def test_phone_validator():
    validator = SimiliarPhoneValidator(
        max_levenshtein_distance=2,
        max_hamming_differences=4
    )
    
    phone_numbers = [
        "088127318675",  # Original
        "088127578613",  # Reversed digits
        "088127212675",  # Few different digits
        "088127212567",  # Different digits + reordered
        "089999999999"   # Different number
    ]
    
    similar_groups = validator.find_similar_phones(phone_numbers)
    
    print("Similar phone number groups:")
    for group in similar_groups:
        print(group)

if __name__ == "__main__":
    test_phone_validator()

Similar phone number groups:
{'088127578613', '088127318675', '088127212567', '088127212675'}


In [3]:
from typing import List, Set, Tuple
from collections import Counter
from itertools import combinations

class PhoneNumberValidator:
    def __init__(self, max_differences: int = 4):
        self.max_differences = max_differences

    def find_similar_phones(self, phone_numbers: List[str]) -> List[Set[str]]:
        """Find groups of similar phone numbers based on all criteria.
        
        Args:
            phone_numbers: List of phone numbers to check
            
        Returns:
            List of sets containing groups of similar numbers
        """
        similar_pairs: List[Tuple[str, str]] = []
        
        # Compare each pair of numbers
        for phone1, phone2 in combinations(phone_numbers, 2):
            if (
                self.is_reverse_match(phone1, phone2) or
                self.is_sequential_match(phone1, phone2) or
                self.is_partial_scramble_match(phone1, phone2)
            ):
                similar_pairs.append((phone1, phone2))

        # Group similar numbers
        return self._group_similar_numbers(similar_pairs)

    def is_reverse_match(self, phone1: str, phone2: str) -> bool:
        """Check if two numbers are reverse matches.
        
        Example:
            "088127318675" and "576813721880" -> True
        """
        if len(phone1) != len(phone2):
            return False
        
        # Compare original with reverse
        return phone1 == phone2[::-1]

    def is_sequential_match(self, phone1: str, phone2: str) -> bool:
        """Check if numbers are similar with n different digits in same positions.
        
        Example:
            "088127318675" and "088127212675" -> True (2 differences)
        """
        if len(phone1) != len(phone2):
            return False
            
        # Count positions where digits differ
        differences = sum(1 for a, b in zip(phone1, phone2) if a != b)
        return differences <= self.max_differences

    def is_partial_scramble_match(self, phone1: str, phone2: str) -> bool:
        """Check if numbers are similar with some different digits and some scrambled.
        
        Example:
            "088127318675" and "088127212567" -> True
        """
        if len(phone1) != len(phone2):
            return False

        # Find common prefix
        prefix_length = 0
        for a, b in zip(phone1, phone2):
            if a == b:
                prefix_length += 1
            else:
                break

        # Compare rest of numbers
        suffix1 = phone1[prefix_length:]
        suffix2 = phone2[prefix_length:]
        
        # Count character differences
        counter1 = Counter(suffix1)
        counter2 = Counter(suffix2)
        
        # Calculate total different characters
        differences = sum((counter1 - counter2).values()) + sum((counter2 - counter1).values())
        
        return differences <= self.max_differences * 2  # Multiply by 2 because each difference affects both counters

    def _group_similar_numbers(self, similar_pairs: List[Tuple[str, str]]) -> List[Set[str]]:
        """Group similar numbers into sets based on transitive relationships."""
        if not similar_pairs:
            return []
            
        # Build groups from pairs
        groups: List[Set[str]] = []
        processed = set()
        
        for phone1, phone2 in similar_pairs:
            # Skip if both numbers already processed
            if phone1 in processed and phone2 in processed:
                continue
                
            # Find existing group or create new one
            matching_group = None
            for group in groups:
                if phone1 in group or phone2 in group:
                    matching_group = group
                    break
                    
            if matching_group:
                matching_group.add(phone1)
                matching_group.add(phone2)
            else:
                groups.append({phone1, phone2})
                
            processed.add(phone1)
            processed.add(phone2)
        
        return groups
    
    def is_scrambled_phone_number(phone1: str, phone2: str) -> bool:
        """Check if two phone numbers contain exactly the same digits in any order.
        
        Args:
            phone1: First phone number string
            phone2: Second phone number string
            
        Returns:
            bool: True if numbers contain same digits in any order
            
        Examples:
            >>> is_scrambled_phone_number("088127318675", "088127578613")
            True
            >>> is_scrambled_phone_number("088127318675", "088127212675")
            False
        """
        # Early exit if lengths don't match
        if len(phone1) != len(phone2):
            return False
            
        # Convert strings to sorted lists of digits and compare
        return sorted(phone1) == sorted(phone2)

# Example usage:
validator = PhoneNumberValidator(max_differences=4)

phone_numbers = [
    "088127318675",
    "088127578613",  # reversed
    "088127212675",  # sequential different
    "088127212567",  # partial scramble
    "089999999999"   # different number
]

similar_groups = validator.find_similar_phones(phone_numbers)
for group in similar_groups:
    print("Similar numbers found:", group)

Similar numbers found: {'088127578613', '088127318675', '088127212567', '088127212675'}
