In [22]:
# for tf-idf
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
import matplotlib.pyplot as plt
from collections import OrderedDict
import numpy as np

# ford LDA
import gensim
from gensim.utils import simple_preprocess
import gensim.corpora as corpora
from pprint import pprint
import pyLDAvis.gensim
import pickle 
import pyLDAvis
import os
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords


"""
problem and context sections of the Size The Organization organizational pattern and Surrogate Customer
"""
contexts = ["""within a larger organization, usually that of a sponsoring enterprise or company there need to be smaller organizations
               capable of creating large software systems (greater than twenty-five thousand lines of code) that meet competitive cost
               and schedule benchmarks. This pattern shows how the proper sizing of an organization is vital to the health of the project
               and the productivity of its people. Large software projects (greater than twenty-five thousand lines of code) are seldom 
               delivered on time and within budget when the development team is too large or too small. There are two arguments that have
               led us to this conclusion. There are limits to the size of software development teams that allow them to work effectively. 
               A team can handle a larger problem than an individual can. Adding people late to a project rarely helps complete that project
               on time and within budget. If a software development team is too large, you can reach a point of greatly diminishing returns. 
               We have found empirically that an organization’s size affects a deliverable non-linearly. Communication overhead goes up as the
               square of the size, which means that the organization becomes less cohesive as the square of the size while the “horsepower” of
               the organization goes up only linearly. In addition, if the organization is too small, the team won’t have critical mass and 
               productivity will suffer. Projects larger than 25KSLOC can rarely be done by a SOLO VIRTUOSO (4.2.5) and overly small organizations have inadequate inertia and can easily become
               unstable""", 
            """It is important to exchange ideas and clarify issues with customers. But a customer may not be available. There are several 
            reasons that a customer may be unavailable. If the project is new, there may be no customers yet. In fact, the product might 
            even create its own customers. Even in existing products, the organization may never have established relationships with customers,
            and now is not a propitious time to do so. In some cases, the customer might not have the time right now. They’re busy too. But you
            need answers immediately. Some corporate cultures are such that the developers are insulated from the customers; they just don’t talk. 
            We certainly aren’t recommending it, but it does happen. Whatever the cause, there is a temptation for developers to make their best 
            guess and go on. The problem is that developers are naturally biased by their own designs, and will assume customer behavior that conforms 
            to their design. There are always other ways to think about the application, some of which may not mesh with the developer’s view
            """] 

df = pd.DataFrame({'context': ['context1', 'context2'], 'text' : contexts})

tfidf = TfidfVectorizer(stop_words='english', norm=None)
tfidf_matrix = tfidf.fit_transform(df['text'])
df_dtm = pd.DataFrame(tfidf_matrix.toarray(),
                      index=df['context'].values,
                      columns=tfidf.get_feature_names_out())

stop_words = stopwords.words('english')

def sent_to_words(sentences):
    for sentence in sentences:
        yield(gensim.utils.simple_preprocess(str(sentence), deacc=True))
        
def remove_stopwords(texts):
    return [[word for word in simple_preprocess(str(doc)) 
             if word not in stop_words] for doc in texts]
        
words = list(sent_to_words(contexts))
data_words = remove_stopwords(words)
id2word = corpora.Dictionary(data_words)

texts = words
corpus = [id2word.doc2bow(text) for text in texts]

num_of_patterns = 2
lda_model = gensim.models.LdaMulticore(corpus=corpus,
                                       id2word=id2word,
                                       num_topics=num_of_patterns)


pyLDAvis.enable_notebook()
LDAvis_data_filepath = os.path.join('.ldavis_prepared_'+str(num_of_patterns))
if 1 == 1:
    LDAvis_prepared = pyLDAvis.gensim.prepare(lda_model, corpus, id2word)
    with open(LDAvis_data_filepath, 'wb') as f:
        pickle.dump(LDAvis_prepared, f)
with open(LDAvis_data_filepath, 'rb') as f:
    LDAvis_prepared = pickle.load(f)
pyLDAvis.save_html(LDAvis_prepared, './ldavis_prepared_'+ str(num_of_patterns) +'.html')

"""
neural network is designed to receive tf-idf frequencies of the words in textual descriptions of the problem and context
section of organizational patterns. Trained neural network enables us to identify and distinguish those patterns which
descriptions have a same tf-idf frequency of the words and thus assign them to the class 1 or 0.

Frequencies for different tuple of the sections from pattern description can be used too. This example is set up for
problem and context sections.

Dependent variables:
Y = binary indicator, 1.0 if word frequencies are from pattern language 1, 0 if from the other
X1 = tf-idf of a word for the problem and context section of the pattern from pattern language 1
X2 = tf-idf of a word for the problem and context section of the pattern from pattern language 2
"""

class Module:
    
    def __init__(self):
        self.modules = OrderedDict()
        self._parameters = OrderedDict()

    def add_module(self, module, name:str):
        if hasattr(self, name) and name not in self.modules:
            raise KeyError("attribute '{}' already exists".format(name))
        elif '.' in name:
            raise KeyError("module name can't contain \".\"")
        elif name == '':
            raise KeyError("module name can't be empty string \"\"")
        self.modules[name] = module

    def register_parameter(self, name, param):
        if '.' in name:
            raise KeyError("parameter name can't contain \".\"")
        elif name == '':
            raise KeyError("parameter name can't be empty string \"\"")
        elif hasattr(self, name) and name not in self._parameters:
            raise KeyError("attribute '{}' already exists".format(name))
        else:
            self._parameters[name] = param

    def parameters(self, recurse=True):
        for name, param in self._parameters.items():
            if param.requires_grad:
                yield name, param
        if recurse:
            for name, module in self._modules.items():
                for name, param in module.parameters(recurse):
                    if param.requires_grad:
                        yield name, param

    def __dir__(self):
        module_attrs = dir(self.__class__)
        attrs = list(self.__dict__.keys())
        modules = list(self._modules.keys())
        parameters = list(self._parameters.keys())
        keys = module_attrs + attrs + modules + parameters

        # Eliminate attrs that are not legal Python variable names
        keys = [key for key in keys if not key[0].isdigit()]

        return sorted(keys)

    def __getattr__(self, name: str):
        if '_modules' in self.__dict__:
            modules = self.__dict__['_modules']
            if name in modules:
                return modules[name]
        if '_parameters' in self.__dict__:
            parameters = self.__dict__['_parameters']
            if name in parameters:
                return parameters[name]
        raise AttributeError("'{}' object has no attribute '{}'".format(
            type(self).__name__, name))

    def __setattr__(self, name, value):
        if isinstance(value, Module):
            self._modules[name] = value
        elif isinstance(value, np.ndarray):
            self.register_parameter(name, value)
        else:
            object.__setattr__(self, name, value)

    def forward(self, *args, **kwargs) -> np.ndarray:
        pass

    def backward(self, *args, **kwargs) -> np.ndarray:
        pass

    def __call__(self, *args, **kwargs):
        return self.forward(*args, **kwargs)

# beriem poslednu verziu funkcie gradient_check
# naposledy menena v commite https://github.com/hudeclukas/neural_networks_at_fiit_2022/commit/baeacfa5a8ef83c7c7979b2010a0f5a4d336bf10
def gradient_check(network:Module, loss_function:Module, X:np.ndarray, Y:np.ndarray, epsilon=1e-7):
    # https: // datascience - enthusiast.com / DL / Improving_DeepNeural_Networks_Gradient_Checking.html
    # Set-up variables
    gradapprox = []
    grad_backward = []

    for name, layer in network.modules.items():
        # Compute gradapprox
        if not hasattr(layer, "W"):
            continue
        if not hasattr(layer, "dW"):
            continue
        shape = layer.W.shape
        # print(shape[0], ',', shape[1])
        for i in range(shape[0]):
            for j in range(shape[1]):
                # print('i',i,'j',j)
                # Compute J_plus[i]. Inputs: "parameters_values, epsilon". Output = "J_plus[i]".
                # "_" is used because the function you have to outputs two parameters but we only care about the first one
                origin_W = np.copy(layer.W[i][j])

                layer.W[i][j] = origin_W + epsilon
                A_plus = network(X)
                J_plus = np.mean(loss_function(A_plus, Y))

                # Compute J_minus[i]. Inputs: "parameters_values, epsilon". Output = "J_minus[i]".
                layer.W[i][j] = origin_W - epsilon
                A_minus = network(X)
                J_minus = np.mean(loss_function(A_minus, Y))

                # Compute gradapprox[i]
                gradapprox.append((J_plus - J_minus) / (2 * epsilon))
                # print(layer.name, layer.dW.shape)
                # grad = np.mean(layer.dW, axis=0, keepdims=True)
                # grad_backward.append(grad[0][i][j])
                grad_backward.append(layer.dW[i][j])
                layer.W[i][j] = origin_W

    # Compare gradapprox to backward propagation gradients by computing difference.
    gradapprox = np.reshape(gradapprox, (-1, 1))
    grad_backward = np.reshape(grad_backward, (-1, 1))

    numerator = np.linalg.norm(grad_backward - gradapprox)
    denominator = np.linalg.norm(grad_backward) + np.linalg.norm(gradapprox)
    difference = numerator / denominator

    if difference > 2e-7 or not difference:
        print(
            "\033[91m" + "There is a mistake in the backward propagation! difference = " + str(difference) + "\033[0m")
    else:
        print(
            "\033[92m" + "Your backward propagation works perfectly fine! difference = " + str(difference) + "\033[0m")
    

class Linear(Module):
     
    def __init__(self, in_features, out_features):
        super(Linear, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.W = np.random.randn(out_features, in_features)
        self.dW = np.zeros_like(self.W)
        self.b = np.zeros((out_features, 1)) # Watch-out for the shape
        self.db = np.zeros_like(self.b)

    def forward(self, input: np.ndarray) -> np.ndarray:
        self.fw_inputs = input
        self.m = self.fw_inputs.shape[1]
        net = np.matmul(self.W, input) + self.b
        return net

    def backward(self, dz: np.ndarray) -> np.ndarray:
        da = self.W.T @ dz
        self.dW = (1/self.m)*np.matmul(dz, self.fw_inputs.T)
        self.db = (1/self.m)*np.sum(dz, axis = 1, keepdims=True)
        return da
        

class Sigmoid(Module):
    def __init__(self):
        super(Sigmoid, self).__init__()

    """
    aktivacna funkcia v tomto tvare je uvedena v prezentacii Lecture1_history
    na slajde c. 57
    """
    def forward(self, input: np.ndarray) -> np.ndarray:
        self.fw_input = input
        return 1.0 / (1.0 + np.exp(-input))

    """
    derivacia aktivacnej funkcie ma tvar f(x)*(1-f(x)) pricom f(x) je aktivacna
    funkcia Sigmoid, uvedena vo funkcii forward tejto triedy
    """
    def backward(self, da) -> np.ndarray:
        z = self.forward(self.fw_input) # priamy prechod je tu mysleny ako aktivacia
        # vynasob f(x) a 1-f(x) a potom vysledok vynasob gradientom aktivacie
        # postup na toto je v prednaske Lecture2_NN_miniframework, 4.1. prvy vzorec zhora
        # nad ohranicenim v cervenom, slide 122
        return np.multiply(da, np.multiply(z, 1 - z)) 


class Tanh(Module):

    def __init__(self):
        super(Tanh, self).__init__()

    """
    funkcny predpis pre funkciu forward je uvedeny v prezentacii Lecture1_history
    na slajde c. 57
    """
    def forward(self, input: np.ndarray) -> np.ndarray:
        self.fw_input = input
        return (np.exp(2*input) - 1) / (np.exp(2*input) + 1)

    def backward(self, da) -> np.ndarray:
        # vynasom derivaciu aktivacnej funkcie s gradientom aktivacie, vid. prednaska Lecture2_NN_miniframework, slide 122
        return np.multiply(da, 1-np.square(self.forward(self.fw_input)))

#------------------------------------------------------------------------------
#   RELUActivationFunction class
#------------------------------------------------------------------------------

class ReLU(Module):
    def __init__(self):
        super(ReLU, self).__init__()

    # funkcny predpis je v prezentacii Lecture1_history
    # slide 58
    def forward(self, input: np.ndarray) -> np.ndarray:
        self.fw_input = input
        return np.maximum(input, 0.0) 

    # derivacia pre Rectified Linear Unit je na slajde 58 v prednaske Lecture1_history
    def backward(self, da) -> np.ndarray:
        # vynasob derivaciu aktivacnej funkcie gradientom aktivacie, vid. prednaska Lecture2_NN_miniframework, slide 122
        # derivacia akt. funkcie je v prednaske Lecture1_history, slide 58
        return np.multiply(da, np.where(self.fw_input>0, 1, 0))

class MSELoss(Module):
    def __init__(self):
        super(MSELoss, self).__init__()

    def forward(self, input: np.ndarray, target: np.ndarray) -> np.ndarray:
        return np.square(np.subtract(target, input)).mean()

    def backward(self, input: np.ndarray, target: np.ndarray) -> np.ndarray:
        return (-2*(target-input))


class BCELoss(Module):
    
    def __init__(self):
        super(BCELoss, self).__init__()

    def forward(self, input: np.ndarray, target: np.ndarray) -> np.ndarray:
        return -(np.multiply(target, np.log(input))+np.multiply((1-target), np.log(1-input)))

    def backward(self, input: np.ndarray, target: np.ndarray) -> np.ndarray:
        return -target / input  + (1 - target) / (1 - input)
    

class Model(Module):
    
    def __init__(self):
        super(Model, self).__init__()

    def forward(self, input) -> np.ndarray:
        for name, module in self.modules.items():
            input = module(input)
        return input

    def backward(self, z: np.ndarray):
        for name, module in reversed(self.modules.items()):
            z = module.backward(z)
        return z
    

def tf_idf_from_contexts(): # for two patterns: Size The Organization and Surrogate Customer organizational pattern
    
    # frequencies of words 'assurance', 'benchmarks', 'blindsided' and 'budget'    
    X = np.array([[0.0, 1.405465, 0.0, 1.405], [2.81, 0.0, 1.405, 0.0]])
    # two classes, 0 if Organization Design Patterns, 1 if Piecemeal Growth Pattern Language
    # first three words are from Organization Design Patterns (STO)
    # last one is from Piecemeal Growth Pattern Language (SC)
    Y = np.array([[0., 0., 0., 1.]])
    return X, Y

dataset_features_X, dataset_labels_Y = tf_idf_from_contexts()


# using BCE
mlp = Model()
mlp.add_module(Linear(2, 20), 'first-hidden')
mlp.add_module(Sigmoid(), 'activation-1')
mlp.add_module(Linear(20, 20), 'second-hidden')
mlp.add_module(Sigmoid(), 'activation-2')
mlp.add_module(Linear(20, 20), 'third-hidden')
mlp.add_module(Tanh(), 'activation-3')
mlp.add_module(Linear(20, 20), 'fourth-hidden')
mlp.add_module(ReLU(), 'activation-4')
mlp.add_module(Linear(20, 20), 'fifth-hidden')
mlp.add_module(Sigmoid(), 'activation-5')
mlp.add_module(Linear(20, 1), 'sixth-hidden')
mlp.add_module(Sigmoid(), 'activation-6')

y_hat = mlp.forward(dataset_features_X)

bce = BCELoss()
loss = bce.forward(y_hat, dataset_labels_Y)

back = bce.backward(y_hat, dataset_labels_Y)

output = mlp.backward(back)

# (gradient_check(mlp, bce, dataset_features_X, dataset_labels_Y))

"""
This visualization is a modifier version of the one designed by https://nlp.stanford.edu/events/illvi2014/papers/sievert-illvi2014.pdf

LDAvis consists from two interactive components, intertopic distance map on the left and bar chart on the right. Intertopic distance
map is a visualization of the topics in 2D space. Circles are plotted using a multi-dimensional scaling algorithm, text with many
dimensions is squeezed to a 2D space. Circles represent patterns descriptions. Circles close together mean descriptions of the patterns
are similar. Pattern description consists of many topics. Pattern description and the topic is not the same.

When you click on a topic in the intertopic distance map, bar chart changes to display 30 (by-default) most salient words included in the pattern
(circle) description. Most salient words are the most informative words across the whole dataset representing a specific topic. Ligth bars represent 
salient terms. Darker bars show frequency of those terms which are topic-specific.

Because dark bars for Size The Organization (any value of Lambda) almost totally eclipse light bars this means terms used in its description
nearly exclusively belong to this pattern.

Adjusting Lambda close to 0 highlights potentially rare and exclusive terms for a specific organizational pattern (and its topic).

Summary: visualization displays
"""
# LDAvis_prepared

  and should_run_async(code)
  self._context.run(self._callback, *self._args)
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\uzivatel\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


'\nThis visualization is a modifier version of the one designed by https://nlp.stanford.edu/events/illvi2014/papers/sievert-illvi2014.pdf\n\nLDAvis consists from two interactive components, intertopic distance map on the left and bar chart on the right. Intertopic distance\nmap is a visualization of the topics in 2D space. Circles are plotted using a multi-dimensional scaling algorithm, text with many\ndimensions is squeezed to a 2D space. Circles represent patterns descriptions. Circles close together mean descriptions of the patterns\nare similar. Pattern description consists of many topics. Pattern description and the topic is not the same.\n\nWhen you click on a topic in the intertopic distance map, bar chart changes to display 30 (by-default) most salient words included in the pattern\n(circle) description. Most salient words are the most informative words across the whole dataset representing a specific topic. Ligth bars represent \nsalient terms. Darker bars show frequency of th