In [2]:
import requests
import numpy as np
import sklearn as sk
from sklearn.decomposition import TruncatedSVD
from sklearn.feature_extraction.text import TfidfVectorizer
from numpy.random import rand

API_KEY = "pub_4dbeb1c8439e481cb04f5e6f2aaafe55"

url = "https://newsdata.io/api/1/news"

params = {
    "apikey": API_KEY,
    "q": "Ukraine",
    "language": "en"
}

response = requests.get(url, params=params)

data = response.json()

print(data.keys())

dict_keys(['status', 'totalResults', 'results', 'nextPage'])


In [3]:
articles = data['results']
# print(articles)
new_data = []
for article in articles:
  new_data.append(article.get('title') + " " + article.get('description'))

In [4]:
vectorizer = TfidfVectorizer(stop_words='english')
vec_data = vectorizer.fit_transform(new_data)

In [39]:
cov_matrix = B = vec_data.T @ vec_data

def find_q_vector(cov_matrix, rand_vector, k, list_of_eigenvectors):
  for i in range(k):
    rand_vector = cov_matrix @ rand_vector
    rand_vector = rand_vector / np.linalg.norm(rand_vector)
    rand_vector = reorthogonalize(rand_vector, list_of_eigenvectors)
  return rand_vector

def reorthogonalize(vector_to_orth, list_of_vectors):
    for vector in list_of_vectors:
        vector_to_orth = vector_to_orth - (vector.T @ vector_to_orth) * vector

    vector_to_orth = vector_to_orth / np.linalg.norm(vector_to_orth)

    return vector_to_orth

def find_eigenvector(cov_matrix, k, n):

  #Create list of eigenvectors for future
  list_of_eigenvectors = []

  for _ in range(n):
    #Create random_vector
    rand_vector = np.random.rand(cov_matrix.shape[1])
    rand_vector = rand_vector / np.linalg.norm(rand_vector)

    #Reorthogonalize
    # rand_vector = reorthogonalize(rand_vector, list_of_eigenvectors)

    #Power mehod
    lambda_vector = find_q_vector(cov_matrix, rand_vector, k, list_of_eigenvectors)

    #Reorthogonalize again
    lambda_vector = reorthogonalize(lambda_vector, list_of_eigenvectors)

    list_of_eigenvectors.append(lambda_vector)

  return list_of_eigenvectors


In [40]:
find_eigenvector(cov_matrix, 100, 20)

[array([0.09395397, 0.09439399, 0.09713246, 0.09956711, 0.09985352,
        0.09994768, 0.10667648, 0.10156795, 0.10071246, 0.09420858,
        0.09558354, 0.10473306, 0.10081743, 0.09252667, 0.0978387 ,
        0.10134304, 0.09120827, 0.10680125, 0.10209263, 0.09501973,
        0.10461685, 0.09653915, 0.1064888 , 0.10618085, 0.09678497,
        0.10224906, 0.09247233, 0.098039  , 0.10341464, 0.09747597,
        0.09879978, 0.10029313, 0.09919764, 0.10041073, 0.09806075,
        0.10183627, 0.10179408, 0.1031294 , 0.10387458, 0.09832266,
        0.10636789, 0.09764251, 0.10190313, 0.10723133, 0.09766542,
        0.10279592, 0.0985289 , 0.10008219, 0.10500889, 0.09808926,
        0.09768698, 0.09713291, 0.09835535, 0.10402073, 0.10441679,
        0.10529733, 0.09631921, 0.10243197, 0.09946131, 0.10587892,
        0.09637042, 0.10034013, 0.10450296, 0.09421949, 0.10482607,
        0.09808984, 0.09839885, 0.09472097, 0.10326596, 0.09568159,
        0.09493822, 0.10958051, 0.09256517, 0.10

In [41]:
# ── Setup ─────────────────────────────────────────────────────────────────────

np.random.seed(42)

# Replace with your actual vec_data if you have it
vec_data = np.random.rand(200, 100)

N_COMPONENTS = 5
K_POWER_ITER = 50

cov_matrix = vec_data.T @ vec_data

# ── Get your eigenvectors ─────────────────────────────────────────────────────

your_eigenvectors = find_eigenvector(cov_matrix, k=K_POWER_ITER, n=N_COMPONENTS)

# ── Get GROUND TRUTH from numpy ───────────────────────────────────────────────
# numpy's eigh is exact for symmetric matrices (cov_matrix is symmetric)
# returns eigenvalues ascending, so we flip to get descending

true_eigenvalues, true_eigenvectors = np.linalg.eigh(cov_matrix)
true_eigenvalues  = true_eigenvalues[::-1][:N_COMPONENTS]
true_eigenvectors = true_eigenvectors[:, ::-1][:, :N_COMPONENTS].T  # shape: (n, features)

# Your eigenvalues via Rayleigh quotient
your_eigenvalues = np.array([v @ cov_matrix @ v for v in your_eigenvectors])


# ── TEST 1: Are they actual eigenvectors? ─────────────────────────────────────
# A true eigenvector satisfies: cov_matrix @ v = λ * v
# So residual = ||cov_matrix @ v - λ * v|| should be ~0

print("=" * 60)
print("  TEST 1: Eigenvector Residual")
print("  cov @ v - λv should be ~0 for a true eigenvector")
print("  < 0.001 = ✅   < 0.01 = ⚠️    > 0.01 = ❌")
print("=" * 60)

for i in range(N_COMPONENTS):
    v  = your_eigenvectors[i]
    lam = your_eigenvalues[i]
    residual = np.linalg.norm(cov_matrix @ v - lam * v)
    flag = "✅" if residual < 0.001 else "⚠️ " if residual < 0.01 else "❌"
    print(f"  v{i+1}  residual = {residual:.6f}  {flag}")


  TEST 1: Eigenvector Residual
  cov @ v - λv should be ~0 for a true eigenvector
  < 0.001 = ✅   < 0.01 = ⚠️    > 0.01 = ❌
  v1  residual = 0.000000  ✅
  v2  residual = 0.549440  ❌
  v3  residual = 0.677793  ❌
  v4  residual = 0.251662  ❌
  v5  residual = 0.416259  ❌
