# Natural Language Processing - Text Preprocessing

## Libraries and settings

In [71]:
# Libraries
import os
import re
import string
import numpy as np
import pandas as pd
from pprint import pprint

#Natural Language Toolkit) is a leading platform for building Python programs to work with human language data.
import nltk 

# Import only once
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('averaged_perceptron_tagger')

from nltk.tag import pos_tag
from nltk.corpus import stopwords
from nltk.chunk import tree2conlltags
from nltk.chunk import conlltags2tree
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

# Current working directory
print('Current working directory:', os.getcwd())

Current working directory: /Users/john/School/Sem5/DA/data_analytics/Week_11


[nltk_data] Downloading package stopwords to /Users/john/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /Users/john/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/john/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /Users/john/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/john/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


## Defining documents

In [72]:
# Defining documents (=sentenses)
d1 = 'The car is driven on the road.'
d2 = 'The truck is driven on the highway.'
d3 = 'The bicycle is driven on the bicycle path.'

corpus_01 = d1 + ' ' + d2 + ' ' + d3
corpus_01

'The car is driven on the road. The truck is driven on the highway. The bicycle is driven on the bicycle path.'

## Text preprocessing
#### Steps:
- Text to lowercase
- Removing punctuations
- Tokenization
- Removal of stop words
- Lemmatization

### Text to lowercase

In [73]:
# Text to lowercase function
def text_lowercase(text):
    return text.lower()

# Text to lowercase
corpus_02 = text_lowercase(corpus_01)
corpus_02

'the car is driven on the road. the truck is driven on the highway. the bicycle is driven on the bicycle path.'

### Removing punctuation ( !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ )

In [74]:
print("string.punctuation: " + string.punctuation);

# Remove punctuation function
def remove_punctuation(text):
    translator = str.maketrans('', '', string.punctuation)
    return text.translate(translator)

# Remove punctuation
corpus_03 = remove_punctuation(corpus_02)
corpus_03

string.punctuation: !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~


'the car is driven on the road the truck is driven on the highway the bicycle is driven on the bicycle path'

### Tokenize text & removal of stopwords

In [75]:
# Show english stopwords
eng_stopwords = set(stopwords.words('english'))
print("List of english stopwords:")
print(eng_stopwords)

List of english stopwords:
{'this', 'whom', 'a', 'have', 'by', 'didn', "isn't", "you're", 'some', 'myself', 'during', 'who', 'not', 'weren', 'only', 'i', "won't", 's', 'on', 'is', 'before', 'she', 'those', 'his', 'between', "hasn't", 'below', 'each', 'yours', 'don', "mightn't", 'd', 'its', 'with', 'shouldn', "didn't", 'y', 'does', 'you', 'couldn', "you'd", 're', 'it', 'has', 'through', 'theirs', "wouldn't", 'wasn', 'about', "doesn't", "you'll", 'ain', 'yourself', 'when', 'few', 'where', 'such', 'mustn', 'be', 'hadn', 'themselves', "needn't", 'aren', 'after', 'having', 'll', 'any', 'these', 'down', 'was', 'what', 'into', 'mightn', 'again', 'for', 'if', 've', 'been', 'but', 'o', 'over', 'the', 'm', 'are', 'just', 'both', 'no', 'same', 'too', 'did', 'of', 'against', 'doing', 'am', 'so', 'there', 'that', 'very', 'wouldn', 'haven', 'ours', 'himself', 'out', 'him', 'why', "mustn't", 'nor', "couldn't", 'or', 'which', 'in', 'while', 'here', 'how', "shan't", 'my', 'me', 'because', 'other', 'won

In [76]:
# Function for tokenization and the removal of stopwords
def remove_stopwords(text):
    stop_words = set(stopwords.words("english"))
    word_tokens = word_tokenize(text)
    filtered_text = [word for word in word_tokens if word not in stop_words]
    return filtered_text
 
# Remove stopwords
corpus_04 = remove_stopwords(corpus_03)
print(corpus_04, end="")

['car', 'driven', 'road', 'truck', 'driven', 'highway', 'bicycle', 'driven', 'bicycle', 'path']

### Lemmatization (Zusammenfassen verschiedener flektierter Formen desselben Wortes)

In [77]:
# Initialize Lemmatizer
lemmatizer = WordNetLemmatizer()

# Lemmatize string function
def lemmatize_word(text):
    word_tokens = word_tokenize(text)
    lemmas = [lemmatizer.lemmatize(word, pos ='v') for word in word_tokens]
    return lemmas

# Lemmatize
lem = []
for i in corpus_04:
    lem.append(lemmatize_word(i))

# Nested list to list
corpus_05 = [' '.join([str(x) for x in lst]) for lst in lem]

print('Before lemmatization:')
print(corpus_04, '\n')

print('After lemmatization:')
print(corpus_05, end="")

Before lemmatization:
['car', 'driven', 'road', 'truck', 'driven', 'highway', 'bicycle', 'driven', 'bicycle', 'path'] 

After lemmatization:
['car', 'drive', 'road', 'truck', 'drive', 'highway', 'bicycle', 'drive', 'bicycle', 'path']

## Redefine the text corpus (pre-processed)

In [78]:
# We will use the lemmatized words above to re-define our corpus 
corpus = ['car drive road', 
          'truck drive highway', 
          'bicycle drive bicycle path']

## Document-term matrix with ngram_range=(1,1)

In [79]:
# Vectorizer with ngram_range=(1,1)
vectorizer = CountVectorizer(min_df=0.0, ngram_range=(1,1))

# Transform 
count = vectorizer.fit_transform(corpus)
 
# Create dataframe
df_count = pd.DataFrame(count.toarray(),
                        columns=vectorizer.get_feature_names_out())

print('Document-term matrix')
print(df_count)

Document-term matrix
   bicycle  car  drive  highway  path  road  truck
0        0    1      1        0     0     1      0
1        0    0      1        1     0     0      1
2        2    0      1        0     1     0      0


## Document-term matrix with ngram_range=(2,2)

In [80]:
# Vectorizer with with ngram_range=(2,2)
vectorizer = CountVectorizer(min_df=0.0, ngram_range=(2,2))

# Transform 
count = vectorizer.fit_transform(corpus)
 
# Create dataframe
df_count = pd.DataFrame(count.toarray(),
                        columns=vectorizer.get_feature_names_out())

print('Document-term matrix')
print(df_count)

Document-term matrix
   bicycle drive  bicycle path  car drive  drive bicycle  drive highway  \
0              0             0          1              0              0   
1              0             0          0              0              1   
2              1             1          0              1              0   

   drive road  truck drive  
0           1            0  
1           0            1  
2           0            0  


## Term frequency-inverse document frequency (TF-IDF)
- For details see: https://www.learndatasci.com/glossary/tf-idf-term-frequency-inverse-document-frequency

### Term Frequency (TF)

In [81]:
# Compute Term Frequency (TF)
words_set = set()
for doc in corpus:
    words = doc.split(' ')
    words_set = words_set.union(set(words))
    
print('Number of words in the corpus:',len(words_set), '\n')
print('The words in the corpus: \n', words_set)

# Number of documents in the corpus
n_docs = len(corpus)

# Number of unique words in the corpus 
n_words_set = len(words_set)

df_tf = pd.DataFrame(np.zeros((n_docs, n_words_set)), 
                     columns=list(words_set))

print("\nTerm Frequency (TF):")
for i in range(n_docs):
    # Words in the document
    words = corpus[i].split(' ')
    for w in words:
        df_tf[w][i] = df_tf[w][i] + (1 / len(words))
        
print(df_tf.round(4))

Number of words in the corpus: 7 

The words in the corpus: 
 {'drive', 'highway', 'car', 'path', 'truck', 'road', 'bicycle'}

Term Frequency (TF):
    drive  highway     car  path   truck    road  bicycle
0  0.3333   0.0000  0.3333  0.00  0.0000  0.3333      0.0
1  0.3333   0.3333  0.0000  0.00  0.3333  0.0000      0.0
2  0.2500   0.0000  0.0000  0.25  0.0000  0.0000      0.5


### Inverse Document Frequency (IDF)

In [82]:
# Computing Inverse Document Frequency (IDF)
print("\nInverse Document Frequency (IDF):")

idf = {}

for w in words_set:
    
    # k = number of documents that contain this word
    k = 0
    
    for i in range(n_docs):
        if w in corpus[i].split():
            k += 1
            
    idf[w] =  np.log10(n_docs / k).round(4)
    
    print(f'{w:>15}: {idf[w]:>10}')


Inverse Document Frequency (IDF):
          drive:        0.0
        highway:     0.4771
            car:     0.4771
           path:     0.4771
          truck:     0.4771
           road:     0.4771
        bicycle:     0.4771


### Term Frequency - Inverse Document Frequency (TF-IDF)

In [83]:
# Computing TF-IDF
df_tf_idf = df_tf.copy()

for w in words_set:
    for i in range(n_docs):
        df_tf_idf[w][i] = df_tf[w][i] * idf[w]

print('\nTF-IDF:')
print(df_tf_idf.round(4))


TF-IDF:
   drive  highway    car    path  truck   road  bicycle
0    0.0    0.000  0.159  0.0000  0.000  0.159   0.0000
1    0.0    0.159  0.000  0.0000  0.159  0.000   0.0000
2    0.0    0.000  0.000  0.1193  0.000  0.000   0.2386


## Part-of-Speach (POS) tagging
For meaning of POS-tags see: https://pythonexamples.org/nltk-pos-tagging

In [84]:
text = '''European authorities fined Google a record $5.1 
          billion on Wednesday for abusing its power in the 
          mobile phone market.'''

def preprocess(sent):
    sent = nltk.word_tokenize(sent)
    sent = nltk.pos_tag(sent)
    return sent

sent = preprocess(text)
pattern = 'NP: {<DT>?<JJ>*<NN>}'

cp = nltk.RegexpParser(pattern)
cs = cp.parse(sent)

iob_tagged = tree2conlltags(cs)

# Print the POS-tags
pprint(iob_tagged)

[('European', 'JJ', 'O'),
 ('authorities', 'NNS', 'O'),
 ('fined', 'VBD', 'O'),
 ('Google', 'NNP', 'O'),
 ('a', 'DT', 'B-NP'),
 ('record', 'NN', 'I-NP'),
 ('$', '$', 'O'),
 ('5.1', 'CD', 'O'),
 ('billion', 'CD', 'O'),
 ('on', 'IN', 'O'),
 ('Wednesday', 'NNP', 'O'),
 ('for', 'IN', 'O'),
 ('abusing', 'VBG', 'O'),
 ('its', 'PRP$', 'O'),
 ('power', 'NN', 'B-NP'),
 ('in', 'IN', 'O'),
 ('the', 'DT', 'B-NP'),
 ('mobile', 'JJ', 'I-NP'),
 ('phone', 'NN', 'I-NP'),
 ('market', 'NN', 'B-NP'),
 ('.', '.', 'O')]


### SW11 - Task b - defining own documents

In [85]:
# Defining documents (=sentences)
e1 = 'I love to travel and explore new destinations.'
e2 = 'Flying over Japan was an incredible experience.'
e3 = 'Zurich is a charming city with a rich cultural heritage.'

corpus_10 = e1 + ' ' + e2 + ' ' + e3
print("corpus_10: " + corpus_10)
print()

# Text to lowercase function
def text_lowercase(text):
    return text.lower()

# Text to lowercase
print("1) text to lowercase")
corpus_11 = text_lowercase(corpus_10)
print("corpus_11: " + corpus_11)



#print("string.punctuation: " + string.punctuation);

# Remove punctuation function
def remove_punctuation(text):
    translator = str.maketrans('', '', string.punctuation)
    return text.translate(translator)

# Remove punctuation
print("2) remove punctuation")
corpus_13 = remove_punctuation(corpus_11)
print(corpus_13)


print("3) Stopwords")
# Show english stopwords
eng_stopwords = set(stopwords.words('english'))
print("List of english stopwords:", '\n')
print(eng_stopwords)


# Function for tokenization and the removal of stopwords
def remove_stopwords(text):
    stop_words = set(stopwords.words("english"))
    word_tokens = word_tokenize(text)
    filtered_text = [word for word in word_tokens if word not in stop_words]
    return filtered_text
 
# Remove stopwords
print("4) toenization and remoal of stopwords")
corpus_14 = remove_stopwords(corpus_13)
print(corpus_14, end="")




# Initialize Lemmatizer
lemmatizer = WordNetLemmatizer()

# Lemmatize string function
def lemmatize_word(text):
    word_tokens = word_tokenize(text)
    lemmas = [lemmatizer.lemmatize(word, pos ='v') for word in word_tokens]
    return lemmas

# Lemmatize
lem = []
for i in corpus_14:
    lem.append(lemmatize_word(i))

# Nested list to list
corpus_15 = [' '.join([str(x) for x in lst]) for lst in lem]

print('Before lemmatization:')
print(corpus_14, '\n')

print('After lemmatization:')
print(corpus_15, end="")




corpus_10: I love to travel and explore new destinations. Flying over Japan was an incredible experience. Zurich is a charming city with a rich cultural heritage.

1) text to lowercase
corpus_11: i love to travel and explore new destinations. flying over japan was an incredible experience. zurich is a charming city with a rich cultural heritage.
2) remove punctuation
i love to travel and explore new destinations flying over japan was an incredible experience zurich is a charming city with a rich cultural heritage
3) Stopwords
List of english stopwords: 

{'this', 'whom', 'a', 'have', 'by', 'didn', "isn't", "you're", 'some', 'myself', 'during', 'who', 'not', 'weren', 'only', 'i', "won't", 's', 'on', 'is', 'before', 'she', 'those', 'his', 'between', "hasn't", 'below', 'each', 'yours', 'don', "mightn't", 'd', 'its', 'with', 'shouldn', "didn't", 'y', 'does', 'you', 'couldn', "you'd", 're', 'it', 'has', 'through', 'theirs', "wouldn't", 'wasn', 'about', "doesn't", "you'll", 'ain', 'yourself'

### Task 1.c Redefine the text corpus

In [86]:
# We will use the lemmatized words above to re-define our corpus 
corpus2 = ['love travel explore new destination', 
          'fly japan incredible experience', 
          'zurich charm city rich cultural heritage']

### Task 3.d create a document-term matrix with ngram_range=(1,1)  and a document-term matrix with ngram_range=(2,2)

In [87]:
# Vectorizer with ngram_range=(1,1)
vectorizer = CountVectorizer(min_df=0.0, ngram_range=(1,1))

# Transform 
count = vectorizer.fit_transform(corpus)
 
# Create dataframe
df_count = pd.DataFrame(count.toarray(),
                        columns=vectorizer.get_feature_names_out())

print('Document-term matrix')
print(df_count)

Document-term matrix
   bicycle  car  drive  highway  path  road  truck
0        0    1      1        0     0     1      0
1        0    0      1        1     0     0      1
2        2    0      1        0     1     0      0


In [88]:
# Vectorizer with with ngram_range=(2,2)
vectorizer = CountVectorizer(min_df=0.0, ngram_range=(2,2))

# Transform 
count = vectorizer.fit_transform(corpus)
 
# Create dataframe
df_count = pd.DataFrame(count.toarray(),
                        columns=vectorizer.get_feature_names_out())

print('Document-term matrix')
print(df_count)

Document-term matrix
   bicycle drive  bicycle path  car drive  drive bicycle  drive highway  \
0              0             0          1              0              0   
1              0             0          0              0              1   
2              1             1          0              1              0   

   drive road  truck drive  
0           1            0  
1           0            1  
2           0            0  


### Task 1.e
Based on the 'corpus' from c) create a:
<p>-Term Frequency (TF) matrix</p>
<p>-Inverse Document Frequency (IDF) matrix</p>
<p>-Term Frequency - Inverse Documetn Frequency (TF-IDF) matrix</p>

In [89]:
# Compute Term Frequency (TF)
words_set = set()
for doc in corpus:
    words = doc.split(' ')
    words_set = words_set.union(set(words))
    
print('Number of words in the corpus:',len(words_set), '\n')
print('The words in the corpus: \n', words_set)

# Number of documents in the corpus
n_docs = len(corpus)

# Number of unique words in the corpus 
n_words_set = len(words_set)

df_tf = pd.DataFrame(np.zeros((n_docs, n_words_set)), 
                     columns=list(words_set))

print("\nTerm Frequency (TF):")
for i in range(n_docs):
    # Words in the document
    words = corpus[i].split(' ')
    for w in words:
        df_tf[w][i] = df_tf[w][i] + (1 / len(words))
        
print(df_tf.round(4))

Number of words in the corpus: 7 

The words in the corpus: 
 {'drive', 'highway', 'car', 'path', 'truck', 'road', 'bicycle'}

Term Frequency (TF):
    drive  highway     car  path   truck    road  bicycle
0  0.3333   0.0000  0.3333  0.00  0.0000  0.3333      0.0
1  0.3333   0.3333  0.0000  0.00  0.3333  0.0000      0.0
2  0.2500   0.0000  0.0000  0.25  0.0000  0.0000      0.5


In [90]:
# Computing Inverse Document Frequency (IDF)
print("\nInverse Document Frequency (IDF):")

idf = {}

for w in words_set:
    
    # k = number of documents that contain this word
    k = 0
    
    for i in range(n_docs):
        if w in corpus[i].split():
            k += 1
            
    idf[w] =  np.log10(n_docs / k).round(4)
    
    print(f'{w:>15}: {idf[w]:>10}')


Inverse Document Frequency (IDF):
          drive:        0.0
        highway:     0.4771
            car:     0.4771
           path:     0.4771
          truck:     0.4771
           road:     0.4771
        bicycle:     0.4771


In [91]:
# Computing TF-IDF
df_tf_idf = df_tf.copy()

for w in words_set:
    for i in range(n_docs):
        df_tf_idf[w][i] = df_tf[w][i] * idf[w]

print('\nTF-IDF:')
print(df_tf_idf.round(4))


TF-IDF:
   drive  highway    car    path  truck   road  bicycle
0    0.0    0.000  0.159  0.0000  0.000  0.159   0.0000
1    0.0    0.159  0.000  0.0000  0.159  0.000   0.0000
2    0.0    0.000  0.000  0.1193  0.000  0.000   0.2386


### Task 1.f (part-of_speach POS)

In [92]:
text = '''Apple Inc. unveiled its latest iPhone model on Tuesday, 
          creating a buzz in the tech community. The new device, 
          equipped with cutting-edge features, is expected to 
          dominate the smartphone market.'''

def preprocess(sent):
    sent = nltk.word_tokenize(sent)
    sent = nltk.pos_tag(sent)
    return sent

sent = preprocess(text)
pattern = 'NP: {<DT>?<JJ>*<NN>}'

cp = nltk.RegexpParser(pattern)
cs = cp.parse(sent)

iob_tagged = tree2conlltags(cs)

# Print the POS-tags
pprint(iob_tagged)


[('Apple', 'NNP', 'O'),
 ('Inc.', 'NNP', 'O'),
 ('unveiled', 'VBD', 'O'),
 ('its', 'PRP$', 'O'),
 ('latest', 'JJS', 'O'),
 ('iPhone', 'NN', 'B-NP'),
 ('model', 'NN', 'B-NP'),
 ('on', 'IN', 'O'),
 ('Tuesday', 'NNP', 'O'),
 (',', ',', 'O'),
 ('creating', 'VBG', 'O'),
 ('a', 'DT', 'B-NP'),
 ('buzz', 'NN', 'I-NP'),
 ('in', 'IN', 'O'),
 ('the', 'DT', 'B-NP'),
 ('tech', 'NN', 'I-NP'),
 ('community', 'NN', 'B-NP'),
 ('.', '.', 'O'),
 ('The', 'DT', 'B-NP'),
 ('new', 'JJ', 'I-NP'),
 ('device', 'NN', 'I-NP'),
 (',', ',', 'O'),
 ('equipped', 'VBN', 'O'),
 ('with', 'IN', 'O'),
 ('cutting-edge', 'NN', 'B-NP'),
 ('features', 'NNS', 'O'),
 (',', ',', 'O'),
 ('is', 'VBZ', 'O'),
 ('expected', 'VBN', 'O'),
 ('to', 'TO', 'O'),
 ('dominate', 'VB', 'O'),
 ('the', 'DT', 'B-NP'),
 ('smartphone', 'NN', 'I-NP'),
 ('market', 'NN', 'B-NP'),
 ('.', '.', 'O')]


<!-- Explanation of POS Tags -->

<p>('to', 'TO', 'O'): The word "to" is a word that often shows direction or intention. In this case, it is not part of a special group of words (Named Entity) in the sentence.</p>

<p>('dominate', 'VB', 'O'): The word "dominate" is an action word. It means to be in control. Here, it's not part of any special group of words in the sentence.</p>

<p>('the', 'DT', 'B-NP'): The word "the" is a word we use to talk about something specific. In this sentence, it's the beginning of a group of words (phrase) that is about something.</p>

<p>('smartphone', 'NN', 'I-NP'): The word "smartphone" is a technical word for a type of mobile phone. It is part of the same group of words (phrase) as "the," and it follows it.</p>

<p>('market', 'NN', 'B-NP'): The word "market" is another word for a place where things are bought and sold. It is part of a new group of words (phrase) that is about something different from the smartphone.</p>


<!-- The Parts Of Speech Tag List -->

<!-- In the above example, the output contained tags like NN, NNP, VBD, etc. Following is the complete list of such POS tags. -->

<p>CC Coordinating Conjunction</p>
<p>CD Cardinal Digit</p>
<p>DT Determiner</p>
<p>EX Existential There. Example: “there is” … think of it like “there exists”)</p>
<p>FW Foreign Word.</p>
<p>IN Preposition/Subordinating Conjunction.</p>
<p>JJ Adjective.</p>
<p>JJR Adjective, Comparative.</p>
<p>JJS Adjective, Superlative.</p>
<p>LS List Marker 1.</p>
<p>MD Modal.</p>
<p>NN Noun, Singular.</p>
<p>NNS Noun Plural.</p>
<p>NNP Proper Noun, Singular.</p>
<p>NNPS Proper Noun, Plural.</p>
<p>PDT Predeterminer.</p>
<p>POS Possessive Ending. Example: parent’s</p>
<p>PRP Personal Pronoun. Examples: I, he, she</p>
<p>PRP$ Possessive Pronoun. Examples: my, his, hers</p>
<p>RB Adverb. Examples: very, silently,</p>
<p>RBR Adverb, Comparative. Example: better</p>
<p>RBS Adverb, Superlative. Example: best</p>
<p>RP Particle. Example: give up</p>
<p>TO to. Example: go ‘to’ the store.</p>
<p>UH Interjection. Example: errrrrrrrm</p>
<p>VB Verb, Base Form. Example: take</p>
<p>VBD Verb, Past Tense. Example: took</p>
<p>VBG Verb, Gerund/Present Participle. Example: taking</p>
<p>VBN Verb, Past Participle. Example: taken</p>
<p>VBP Verb, Sing Present, non-3d take</p>
<p>VBZ Verb, 3rd person sing. present takes</p>
<p>WDT wh-determiner. Example: which</p>
<p>WP wh-pronoun. Example: who, what</p>
<p>WP$ possessive wh-pronoun. Example: whose</p>
<p>WRB wh-abverb. Example: where, when</p>


### Jupyter notebook --footer info-- (please always provide this at the end of each submitted notebook)

In [93]:
import os
import platform
import socket
from platform import python_version
from datetime import datetime

print('-----------------------------------')
print(os.name.upper())
print(platform.system(), '|', platform.release())
print('Datetime:', datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
print('Python Version:', python_version())
print('-----------------------------------')

-----------------------------------
POSIX
Darwin | 23.0.0
Datetime: 2023-12-08 17:06:18
Python Version: 3.9.6
-----------------------------------
