In [1]:
import numpy as np
from rapidfuzz import fuzz

array1 = np.array(["string one", "hello world", "fuzzy match"])
array2 = np.array(["string two", "hello world!", "fuzzy matching"])

def compare_strings(str1, str2):
    return fuzz.token_set_ratio(str1, str2)

similarity_scores = np.vectorize(compare_strings)(array1, array2)
similarity_scores_with_dimension = similarity_scores[:, np.newaxis]

print("Original Similarity Scores:")
print(similarity_scores)
print("\nSimilarity Scores with a New Dimension:")
print(similarity_scores_with_dimension)


Original Similarity Scores:
[80.         95.65217391 88.        ]

Similarity Scores with a New Dimension:
[[80.        ]
 [95.65217391]
 [88.        ]]


In [1]:
import numpy as np
from rapidfuzz import process, fuzz

def apply_rapidfuzz_matching_numpy(bakong_cust_array, aba_cust_list):
    bakong_cust_array = np.array(bakong_cust_array, dtype=str)
    
    aba_cust_results = []
    score_results = []

    # Function to apply fuzzy matching to each element in bakong_cust_array
    for x in bakong_cust_array:
        match = process.extractOne(x, aba_cust_list, scorer=fuzz.token_set_ratio, score_cutoff=95)
        if match:
            aba_cust, score, _ = match
            aba_cust_results.append(aba_cust)
            score_results.append(score)
        else:
            aba_cust_results.append(None)
            score_results.append(None)

    # Convert the results to NumPy arrays
    aba_cust_results = np.array(aba_cust_results, dtype=object)  # Use dtype=object for mixed types
    score_results = np.array(score_results, dtype=float)  # Convert scores to float

    # Create a mask to filter out rows without a match
    mask = aba_cust_results != None

    # Filter the results arrays using the mask
    aba_cust_results = aba_cust_results[mask]
    score_results = score_results[mask]

    return aba_cust_results, score_results

# Example usage:
aba_cust_list = ["string two", "hello world", "fuzzy match"]
bakong_cust_array = ["string two", "hello world!", "fuzzy matching"]

aba_cust_results, score_results = apply_rapidfuzz_matching_numpy(bakong_cust_array, aba_cust_list)

print("Matching ABA Customers:")
print(aba_cust_results)
print("\nMatching Scores:")
print(score_results)


Matching ABA Customers:
['string two' 'hello world']

Matching Scores:
[100.          95.65217391]
