In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [1]:
text = """
  Morocco and Marrakech: A Tapestry of Tradition and Modernity** Morocco, located at the crossroads of Europe and Africa, is a country drenched in history, mystery, and cultural richness. A testament to the ancient civilizations that once flourished here, this North African kingdom boasts a unique blend of Arab, Berber, and European influences. At the heart of Morocco's rich tapestry lies Marrakech, one of its four imperial cities and a vibrant epicenter of tradition and modernity. **Geographical Significance** Morocco is bordered by the Atlantic Ocean to the west, the Mediterranean Sea to the north, Algeria to the east and southeast, and the vast Sahara desert to the south. Its strategic location has historically made it a sought-after territory and a melting pot of cultures, religions, and trade routes. **Marrakech: The Red City** Marrakech, often referred to as "The Red City" due to its distinctive red-hued buildings, stands against the backdrop of the snow-capped Atlas Mountains. Established in the 11th century, it has remained a crucial political, economic, and cultural center of Morocco. **Journey through the Medina** Marrakech's old town, the Medina, is a UNESCO World Heritage site and a labyrinthine maze of narrow alleys, bustling souks, and historical landmarks. The Djemaa el-Fna Square lies at the heart of the Medina and comes alive every evening with storytellers, musicians, snake charmers, and food stalls offering tantalizing Moroccan delicacies. **Palaces and Gardens** The city is also home to grand palaces like the Bahia Palace, showcasing intricate Islamic architecture, and the Saadian Tombs, remnants of the Saadian dynasty. The Majorelle Garden, restored by the fashion designer Yves Saint Laurent, is a tranquil oasis of cacti, palm trees, and cobalt blue accents. **Modern Marrakech** While tradition and history permeate Marrakech, the city is not averse to the modern world. Gueliz, the new town, is brimming with contemporary art galleries, stylish cafes, and chic boutiques, offering a stark contrast to the ancient Medina. **Moroccan Cuisine** No journey through Morocco and Marrakech would be complete without indulging in the local cuisine. Tagines, couscous, and pastilla are just a few of the many dishes that combine a plethora of flavors and spices like saffron, cumin, and mint. Paired with Moroccan mint tea, the culinary experience is truly unparalleled. **In Conclusion** Morocco, with Marrakech at its heart, offers travelers an unparalleled journey through time. The convergence of history, culture, architecture, and gastronomy makes it an enthralling destination for those seeking both adventure and reflection. As the Moroccan proverb goes, "He who does not travel does not know the value of men." In the case of Morocco and Marrakech, it's not just the value of men, but also the value of time, tradition, and tales that have spanned centuries.
"""

In [2]:
from gensim.models import Word2Vec
from gensim.models import KeyedVectors
from gensim.models.phrases import Phraser, Phrases
from nltk.tokenize import word_tokenize
import nltk
import string

# downloading natural language toolkit
nltk.download('punkt')

# Preprocessing of data
def preprocess_text(text):
    # Tokenization
    tokens = word_tokenize(text)
    # Suppression de la ponctuation et passage en minuscules
    tokens = [word.lower() for word in tokens if word.isalpha()]
    return tokens

# tokens extraction
tokens = preprocess_text(text)

# Word2Vec model
model = Word2Vec(sentences=[tokens], vector_size=100, window=5, min_count=1, workers=4)

# saving the model
model.save("word2vec_model.model")

# model loading
model = Word2Vec.load("word2vec_model.model")

# 1. Extraction of vector representation of a sample word
word_vector = model.wv['morocco']
print("Représentation vectorielle de 'morocco':", word_vector)

# 2. Similarity between 2 words
similarity = model.wv.similarity('morocco', 'marrakech')
print("Similarité entre 'morocco' et 'marrakech':", similarity)

# 3. Contextual word extraction for a given word
context_words = model.wv.most_similar('morocco', topn=5)
print("Mots contextuels pour 'morocco':", context_words)


[nltk_data] Downloading package punkt to /usr/share/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
Représentation vectorielle de 'morocco': [-8.7957839e-03  2.1959322e-03 -8.8990363e-04 -9.1539789e-03
 -9.4713178e-03 -1.6928079e-03  4.5555565e-03  4.0925676e-03
 -6.6823130e-03 -6.9897915e-03 -5.0572380e-03 -2.3918445e-03
 -7.3562418e-03 -9.5715420e-03 -2.7701894e-03 -8.4568216e-03
 -5.8633699e-03 -5.8504008e-03 -2.5100359e-03 -2.1361161e-03
 -9.0263411e-03 -6.7430036e-04  8.2260994e-03  7.5644916e-03
 -7.2873328e-03 -3.5321403e-03  2.9821973e-03 -9.6747344e-03
  1.3119276e-03  6.6303820e-03  5.8834767e-03 -8.5809538e-03
 -4.4334480e-03 -8.1285406e-03 -7.5148128e-05  9.5939832e-03
  5.9751058e-03  4.8761200e-03  4.9120560e-03 -3.4918855e-03
  9.5450245e-03 -7.6448591e-03 -7.2532296e-03 -2.2674464e-03
 -5.5674731e-04 -3.3034687e-03 -6.2701746e-04  7.4815522e-03
 -5.7221798e-04 -1.5060551e-03  2.8722344e-03 -8.4333606e-03
  7.8946482e-03  8.4465602e-03 -9.7461566e-03  2.7