<a href="https://colab.research.google.com/github/walkerjian/DailyCode/blob/main/Rabin_KarpStringatch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
class Model:
    """Model class implementing the Rabin-Karp algorithm for string matching."""

    def __init__(self, base=256, mod=10**9 + 7):
        """
        Initialize the model with base and mod values for hashing.

        Args:
        - base (int): Base for polynomial hashing.
        - mod (int): Modulus for polynomial hashing.
        """
        self.base = base
        self.mod = mod

    def compute_hash(self, s):
        """Compute initial hash of the string s."""
        h = 0
        for char in s:
            h = (h * self.base + ord(char)) % self.mod
        return h

    def rabin_karp_search(self, text, pattern):
        """
        Search for pattern in text using the Rabin-Karp algorithm.

        Args:
        - text (str): The main text in which to search.
        - pattern (str): The pattern to search for.

        Returns:
        - int or False: Starting index if the pattern is found, otherwise False.
        """
        n, k = len(text), len(pattern)
        if k > n: return False

        # Calculate the initial hashes for the pattern and the first window of text.
        pattern_hash = self.compute_hash(pattern)
        window_hash = self.compute_hash(text[:k])

        # Precompute the highest power of base used for removing the leading character.
        highest_base_power = pow(self.base, k - 1, self.mod)

        for i in range(n - k + 1):
            if pattern_hash == window_hash and text[i:i+k] == pattern:
                return i  # Found a match.

            # If not at the end, update the hash value for the next window.
            if i + k < n:
                window_hash = (window_hash - ord(text[i]) * highest_base_power) * self.base + ord(text[i+k])
                window_hash %= self.mod

        return False  # No match found.

class View:
    """View class to present the results."""

    @staticmethod
    def display(result, text, pattern):
        """
        Display the result of the string matching.

        Args:
        - result (int or False): Result from the Rabin-Karp search.
        - text (str): The main text in which the search was performed.
        - pattern (str): The pattern that was searched for.
        """
        if result is not False:
            print(f"Pattern '{pattern}' found in text '{text}' starting at index {result}.")
        else:
            print(f"Pattern '{pattern}' not found in text '{text}'.")

class Controller:
    """Controller class to handle input and trigger search operation."""

    def __init__(self, text, pattern):
        """
        Initialize the controller with text and pattern.

        Args:
        - text (str): The main text in which to search.
        - pattern (str): The pattern to search for.
        """
        self.text = text
        self.pattern = pattern
        self.model = Model()

    def execute_search(self):
        """Execute the Rabin-Karp search and display the result."""
        result = self.model.rabin_karp_search(self.text, self.pattern)
        View.display(result, self.text, self.pattern)

def test():
    """Test function to test the Rabin-Karp string matching implementation."""
    test_cases = [
        ("hello world", "world"),
        ("hello world", "earth"),
        ("abcdefgh", "cde"),
        ("abcdefgh", "xyz"),
        ("a" * 100 + "b", "b"),
        ("a" * 100 + "b", "aa"),
        ("openai", "pena"),
        ("openai", "openai"),
        ("openai", "openaiz"),
        ("abracadabra", "abra")
    ]

    for text, pattern in test_cases:
        controller = Controller(text, pattern)
        controller.execute_search()

test()


Pattern 'world' found in text 'hello world' starting at index 6.
Pattern 'earth' not found in text 'hello world'.
Pattern 'cde' found in text 'abcdefgh' starting at index 2.
Pattern 'xyz' not found in text 'abcdefgh'.
Pattern 'b' found in text 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab' starting at index 100.
Pattern 'aa' found in text 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab' starting at index 0.
Pattern 'pena' found in text 'openai' starting at index 1.
Pattern 'openai' found in text 'openai' starting at index 0.
Pattern 'openaiz' not found in text 'openai'.
Pattern 'abra' found in text 'abracadabra' starting at index 0.


In [2]:
def rabin_karp(text, pattern, base=256, mod=10**9 + 7):
    n, k = len(text), len(pattern)
    if k > n: return False

    # Compute initial hashes
    h, window = 0, 0
    for i in range(k):
        h = (h * base + ord(pattern[i])) % mod
        window = (window * base + ord(text[i])) % mod

    highest_base_power = pow(base, k - 1, mod)

    for i in range(n - k + 1):
        if h == window and text[i:i+k] == pattern:
            return i

        if i + k < n:
            window = (window - ord(text[i]) * highest_base_power) * base + ord(text[i+k])
            window %= mod

    return False


In [3]:
def test_rabin_karp():
    """Test function for the concise Rabin-Karp implementation."""
    test_cases = [
        ("hello world", "world"),
        ("hello world", "earth"),
        ("abcdefgh", "cde"),
        ("abcdefgh", "xyz"),
        ("a" * 100 + "b", "b"),
        ("a" * 100 + "b", "aa"),
        ("openai", "pena"),
        ("openai", "openai"),
        ("openai", "openaiz"),
        ("abracadabra", "abra")
    ]

    results = []

    for text, pattern in test_cases:
        result = rabin_karp(text, pattern)
        results.append((text, pattern, result))

    return results

test_results = test_rabin_karp()
test_results


[('hello world', 'world', 6),
 ('hello world', 'earth', False),
 ('abcdefgh', 'cde', 2),
 ('abcdefgh', 'xyz', False),
 ('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab',
  'b',
  100),
 ('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab',
  'aa',
  0),
 ('openai', 'pena', 1),
 ('openai', 'openai', 0),
 ('openai', 'openaiz', False),
 ('abracadabra', 'abra', 0)]

In [4]:
def test_string_matching(matching_function):
    """Test function for string matching routines using a function pointer."""
    test_cases = [
        ("hello world", "world"),
        ("hello world", "earth"),
        ("abcdefgh", "cde"),
        ("abcdefgh", "xyz"),
        ("a" * 100 + "b", "b"),
        ("a" * 100 + "b", "aa"),
        ("openai", "pena"),
        ("openai", "openai"),
        ("openai", "openaiz"),
        ("abracadabra", "abra")
    ]

    results = []

    for text, pattern in test_cases:
        result = matching_function(text, pattern)
        results.append((text, pattern, result))

    return results

# Test the concise Rabin-Karp algorithm with the modified test harness
test_results_modified = test_string_matching(rabin_karp)
test_results_modified


[('hello world', 'world', 6),
 ('hello world', 'earth', False),
 ('abcdefgh', 'cde', 2),
 ('abcdefgh', 'xyz', False),
 ('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab',
  'b',
  100),
 ('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab',
  'aa',
  0),
 ('openai', 'pena', 1),
 ('openai', 'openai', 0),
 ('openai', 'openaiz', False),
 ('abracadabra', 'abra', 0)]