# Demo for "plagiarism as selection criteria"

In [1]:
import numpy as np

In [2]:
class Distances():

    @staticmethod
    def euclidean(piece_1:np.array, piece_2:np.array)->float:
        """
        Compute the euclidiean distance between two pieces.

        Make sure that both pieces of binary encoded
        """
        return np.sum(np.abs(piece_1 - piece_2))

In [49]:
def find_max_plagiarism_for_one_piece(org:np.array, gens:np.array, distance_metric, org_index=None, self_test=False)->(np.array, float):
    """
    Find the piece of generated content that most plagiarizes an original piece of content.
    
    :param org: an original piece of content, binary encoded
    :param gens: an iterable of generated content, binary encoded
    :return (an index, a piece of generated content, its plagiarism score)
    """
    min_diff = np.inf
    max_plagiarism_piece = None
    
    for gen_index, gen in enumerate(gens):
        
        if self_test and gen_index == org_index: pass
        else:
            diff = distance_metric(org, gen)
            if diff < min_diff: 
                max_plagiarism_piece = gen
                min_diff = diff
    
    return max_plagiarism_piece, min_diff

In [50]:
def find_max_plagiarism_pair(orgs:np.array, gens:np.array, distance_metric, self_test=False)->(np.array, np.array, float):
    """
    Find the pair with the highest amount of plagiarism.
    
    :param orgs: a corpus of original content, binary encoded
    :param gens: a corpus of generated content, binary encoded
    :return (a piece of original content, a piece of generated content)
    """
    corpus_min_diff = np.inf
    highest_plagiarism_pair = None
    
    for i, org in enumerate(orgs):
        gen, min_diff = find_max_plagiarism_for_one_piece(org, gens, distance_metric=distance_metric, org_index=i, self_test=self_test)
        if min_diff < corpus_min_diff: 
            highest_plagiarism_pair = (org, gen)
            corpus_min_diff  = min_diff
    
    return (*highest_plagiarism_pair, corpus_min_diff)