In [52]:
import requests
from bs4 import BeautifulSoup

def get_metaphysicians_links():
    url = "https://en.wikipedia.org/wiki/List_of_metaphysicians"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    unordered_lists = soup.find_all('ul')
    for ul in unordered_lists:
        links = ul.find_all('a', href=True)
        for link in links:
            if 'wiki' in link['href']:
                yield link['href']
metaphysicians_links = list(get_metaphysicians_links())
print("Metaphysicians Links:")
for link in metaphysicians_links:
    print(link)

Metaphysicians Links:
/wiki/Main_Page
/wiki/Wikipedia:Contents
/wiki/Portal:Current_events
/wiki/Special:Random
/wiki/Wikipedia:About
//en.wikipedia.org/wiki/Wikipedia:Contact_us
https://donate.wikimedia.org/wiki/Special:FundraiserRedirector?utm_source=donate&utm_medium=sidebar&utm_campaign=C13_en.wikipedia.org&uselang=en
/wiki/Help:Contents
/wiki/Help:Introduction
/wiki/Wikipedia:Community_portal
/wiki/Special:RecentChanges
/wiki/Wikipedia:File_upload_wizard
/wiki/Special:MyContributions
/wiki/Special:MyTalk
https://fa.wikipedia.org/wiki/%D9%81%D9%87%D8%B1%D8%B3%D8%AA_%D9%85%D8%AA%D8%A7%D9%81%DB%8C%D8%B2%DB%8C%DA%A9%E2%80%8C%D8%AF%D8%A7%D9%86%D8%A7%D9%86
https://ko.wikipedia.org/wiki/%ED%98%95%EC%9D%B4%EC%83%81%ED%95%99%EC%9E%90_%EB%AA%A9%EB%A1%9D
/wiki/List_of_metaphysicians
/wiki/Talk:List_of_metaphysicians
/wiki/List_of_metaphysicians
/wiki/List_of_metaphysicians
/wiki/Special:WhatLinksHere/List_of_metaphysicians
/wiki/Special:RecentChangesLinked/List_of_metaphysicians
/wiki/Wikipe

In [54]:
import requests

def get_page_content(links):
    URL = "https://en.wikipedia.org/w/api.php"
    content = {}
    for link in links:
        PARAMS = {
            "action": "query",
            "prop": "extracts",
            "exintro": "",
            "explaintext": "",
            "format": "json",
            "titles": link.split("/")[-1]
        }
        
        r = requests.get(url=URL, params=PARAMS)
        data = r.json()
        
        print(data)

        try:
            pages = data['query']['pages']
            for page_id, page_info in pages.items():
                content[page_info['title']] = page_info.get('extract', '')
        except KeyError:
            print("Error: Unexpected response structure.")
    return content

contents = get_page_content(metaphysicians_links)

{'batchcomplete': '', 'query': {'normalized': [{'from': 'Main_Page', 'to': 'Main Page'}], 'pages': {'15580374': {'pageid': 15580374, 'ns': 0, 'title': 'Main Page', 'extract': ''}}}}
{'batchcomplete': '', 'query': {'pages': {'40273': {'pageid': 40273, 'ns': 4, 'title': 'Wikipedia:Contents', 'extract': "Explore the vast knowledge of Wikipedia through these helpful resources. If you have a specific topic in mind, use Wikipedia's search box. If you don't know exactly what you are looking for or wish to explore broad areas, click on a link in the header menu at the top of this page, or begin your browsing below:"}}}}
{'batchcomplete': '', 'query': {'normalized': [{'from': 'Portal:Current_events', 'to': 'Portal:Current events'}], 'pages': {'5776237': {'pageid': 5776237, 'ns': 100, 'title': 'Portal:Current events', 'extract': ''}}}}
{'batchcomplete': '', 'query': {'pages': {'-1': {'ns': -1, 'title': 'Special:Random', 'special': ''}}}}
{'batchcomplete': '', 'query': {'pages': {'63948': {'pagei

In [55]:
import nltk
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/robertbecker/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/robertbecker/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [86]:
from gensim.models import Word2Vec
from nltk.tokenize import word_tokenize
import string
import nltk
import re

nltk.download('punkt')


nltk.download('stopwords')

from nltk.corpus import stopwords


def preprocess_text(text):

    text = text.lower()

    text = text.translate(str.maketrans('', '', string.punctuation))

    text = re.sub(r'\d+', '', text)

    tokens = word_tokenize(text)

    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word not in stop_words]
    return tokens


preprocessed_contents = {name: preprocess_text(content) for name, content in contents.items()}

model = Word2Vec(sentences=preprocessed_contents.values(), vector_size=100, window=5, min_count=5, workers=4)


model.save("philosophers_word2vec.model")



[nltk_data] Downloading package punkt to
[nltk_data]     /Users/robertbecker/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/robertbecker/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [87]:

philosopher_names = ['plato', 'russell']  

similar_words_set = set()

for name in philosopher_names:

    name_vector = model.wv[name]

    similar_words = model.wv.most_similar(positive=[name_vector], topn=100)

    for word, _ in similar_words:
        similar_words_set.add(word)


similar_words_list = list(similar_words_set)
print("Words similar to philosopher names:")
for word in similar_words_list:
    print(word)

Words similar to philosopher names:
particular
italian
central
aristotle
two
various
known
concepts
metaphysics
part
objects
among
properties
later
school
mental
several
existence
considered
traditions
major
people
system
forms
influenced
reality
studies
entities
including
epistemology
influential
different
like
analytic
free
scientific
idealism
russell
tradition
called
schools
important
truth
science
reason
example
thomas
history
things
developed
made
death
view
within
early
life
many
contemporary
theory
term
philosopher
theologian
sometimes
natural
mind
american
information
whether
latin
medieval
metaphysical
form
language
well
born
western
meaning
concept
greek
action
philosophical
may
first
views
used
physics
thought
logic
–
study
german
one
often
moral
experience
new
author
matter
also
include
works
exist
physical
object
work
ancient
plato
published
islamic
ideas
nature
christian
philosophy
philosophers
human
universe
knowledge
world
time
kant
modern
according
theories
