In [1]:
""" loadTestModel.py
    
        Check that one can start predicting with just the files
        found in the "trained model" directory.
"""

import sys

import json
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import tokenizer_from_json
from tensorflow.keras import models

2024-06-08 20:12:06.781007: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:

# in
trainedModelFile = 'training/trained_model_v1/doc_classify_model.h5'
trainedMetadataFile = 'training/trained_model_v1/doc_metadata.json'
trainedTokenizerFile = 'training/trained_model_v1/doc_tokenizer.json'

In [3]:
# Load tokenizer and metadata:
#   (in metadata, we'll need keys 'label_legend_inverted' and 'max_seq_length')
tokenizer = tokenizer_from_json(open(trainedTokenizerFile).read())
metadata = json.load(open(trainedMetadataFile))
# Load the model:
model = models.load_model(trainedModelFile)




In [4]:


# a function for testing:
def classifyDoc(text, classifyModel, pMaxSequence, pLabelLegendInverted, pTokenizer):
    sequences = pTokenizer.texts_to_sequences([text])
    xInput = pad_sequences(sequences, maxlen=pMaxSequence)
    yOutput = classifyModel.predict(xInput)
    preds = yOutput[0]
    labeledPredictions = {pLabelLegendInverted[str(i)]: x for i, x in enumerate(preds)}
    return labeledPredictions


In [8]:
input_texts = ["""ufthansa flies back to profit

German airline Lufthansa has returned to profit in 2004 after posting huge losses in 2003.

In a preliminary report, the airline announced net profits of 400m euros ($527.61m; £274.73m), compared with a loss of 984m euros in 2003. """]
input_texts = []
if input_texts == []:
    # texts for the test
    sampleTexts = [
        'Growth in Japan evaporated in the three months to September, sparking renewed concern about an economy not long out of a decade-long trough.',
        'Northern Ireland man James McIlroy is confident he can win his first major title at this weekends Spar European Indoor Championships in Madrid.',
    ]
else:
    sampleTexts = [
        ' '.join(sys.argv[1:])
    ]

# simple test:
print('\n\tMODEL TEST:')
print('=' * 20)
for st in sampleTexts:
    preds = classifyDoc(st, model, metadata['max_seq_length'], metadata['label_legend_inverted'], tokenizer)
    print('TEXT       = %s' % st)
    print('PREDICTION = %s' % str(preds))
    print('*' * 20)


	MODEL TEST:
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 777ms/step
TEXT       = Growth in Japan evaporated in the three months to September, sparking renewed concern about an economy not long out of a decade-long trough.
PREDICTION = {'technologie': 0.058473203, 'entertainment': 0.029268414, 'other': 0.031726066, 'business': 0.059240174, 'sport': 0.04151596, 'politics': 0.057238925, 'historical': 0.026255485, 'medical': 0.21293281, 'food': 0.16475269, 'space': 0.12502299, 'graphics': 0.1935733}
********************
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 622ms/step
TEXT       = Northern Ireland man James McIlroy is confident he can win his first major title at this weekends Spar European Indoor Championships in Madrid.
PREDICTION = {'technologie': 0.05507234, 'entertainment': 0.027799955, 'other': 0.031154277, 'business': 0.058582425, 'sport': 0.043955814, 'politics': 0.05505538, 'historical': 0.025594324, 'medical': 0.20245102, 'food': 0.18471