In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Flatten
from sklearn.preprocessing import LabelEncoder

# Step 1: Encode Majors as Integer Indices
majors = [
    "Accounting", "Actuarial Science", "Art and Creative Technology", "Biochemistry", "Biology - Cellular and Molecular Biology",
    "Biology - General", "Business Analytics - General", "Business Analytics - Global Supply Chain Analytics",
    "Business Analytics - Managerial Economics", "Business Analytics - Marketing Analytics", "Business Analytics - Sport Management",
    "Business Management - Criminal Justice", "Business Management - Esports Management", "Business Management - General",
    "Business Management - Human Resource Management", "Business Management - Sport Management",
    "Chemical and Biomolecular Engineering - Biomedical Engineering", "Chemical Engineering - General",
    "Chemistry - Computational And AI Chemistry", "Chemistry - Environmental and Sustainability", "Chemistry - General",
    "Chemistry - Pharmaceutical Chemistry", "Civil Engineering", "Communication - Film Production & Media Entrepreneurship",
    "Communication - Multi-Platform Journalism & Media", "Communication - Public Relations & Digital Media",
    "Computer Science - Cybersecurity", "Computer Science - Game Design and Development", "Computer Science - General",
    "Criminal Justice - Correctional Rehabilitation and Supervision", "Criminal Justice - Crime Analysis",
    "Criminal Justice - Crime Victim Services", "Criminal Justice - Investigative Services",
    "Criminal Justice - Juvenile Justice and Delinquency Prevention", "Criminal Justice - Police Science",
    "Cyber Threat Intelligence and Financial Crimes Investigations - Hybrid/Online", "Cybersecurity - General",
    "Dental Hygiene - AS", "Dental Hygiene - BS", "Electrical and Computer Engineering - Cybersecurity",
    "Electrical and Computer Engineering - General", "Emergency Management", "Engineering", "English - Literature",
    "English - Writing", "Esports and Gaming - Game Studies", "Esports and Gaming - General", "Exercise Science",
    "Exploratory Computing and Cybersecurity", "Finance and Financial Analytics", "Fire Protection Engineering",
    "Fire Science - AS", "Fire Science - Fire Administration", "Fire Science - Fire/Arson Investigation",
    "Forensic Science", "Game Design & Interactive Media", "Genetics and Biotechnology", "Graphic and Digital Design - BFA",
    "Health Sciences - General", "Health Sciences - Occupational Therapy", "Health Sciences - Speech-Language Pathology",
    "History", "Homeland Security", "Hospitality and Tourism Management - General", "Intelligence Analysis",
    "Interdisciplinary Studies", "Interior Design", "Interior Design - Pre-Architecture", "International Affairs",
    "International Business", "Legal Studies - Dispute Resolution", "Legal Studies - Paralegal Studies",
    "Legal Studies - Public Affairs", "Marine and Environmental Sciences - Marine Policy",
    "Marine and Environmental Sciences - Environmental Science", "Marine and Environmental Sciences - Marine Biology",
    "Marketing - General", "Marketing - Marketing Analytics", "Mathematics", "Mechanical Engineering",
    "Medical Laboratory Science", "Music", "Music and Sound Recording - BA", "Music Industry",
    "Music Technology and Innovation - Digital Systems", "Music Technology and Innovation - Electronics",
    "National Security - Intelligence Analysis", "Non-Degree", "Nutrition Sciences", "Paramedicine - AS",
    "Paramedicine - BS", "Political Science", "Psychology - Clinical", "Psychology - Community and Society",
    "Psychology - Forensic Psychology", "Psychology - General", "Public Health", "Security and Defense Policy",
    "Sport Management", "Undecided (Business)", "Undecided (Criminal Justice)", "Undecided (Engineering)", "Undeclared"
]

label_encoder = LabelEncoder()
major_indices = label_encoder.fit_transform(majors)

# Step 2: Define the Model with an Embedding Layer
embedding_dim = 8  # You can adjust this
model = Sequential([
    Embedding(input_dim=len(majors), output_dim=embedding_dim, input_length=1, name="major_embedding"),
    Flatten()
])

# Step 3: Build the Model
model.build((None, 1))  # Explicitly build the model to initialize weights

# Step 4: Initialize the Embedding Layer and Retrieve the Embeddings
embedding_layer = model.get_layer("major_embedding")
embeddings = embedding_layer.get_weights()[0]

# Print embeddings
for major, idx in zip(majors, major_indices):
    print(f"{major}: {embeddings[idx]}")

Accounting: [ 0.03758958 -0.01945715 -0.01932325  0.01881024  0.02934719  0.02705062
  0.02881731 -0.03056144]
Actuarial Science: [-0.02017062 -0.01423011  0.00178474 -0.02530867  0.04418779  0.04743836
 -0.03914599 -0.01096316]
Art and Creative Technology: [ 0.02413226 -0.02710792 -0.04117175  0.04196514 -0.03777725  0.00032387
 -0.01099777  0.01970978]
Biochemistry: [ 0.02508483  0.04741111 -0.02076811  0.01276554  0.04201284 -0.00716417
 -0.04403131  0.02211649]
Biology - Cellular and Molecular Biology: [ 0.01794121 -0.04735456  0.01747093 -0.04706959 -0.03390472  0.04437241
  0.01854156 -0.02841098]
Biology - General: [-0.04274487  0.00773553 -0.02609232 -0.04388124 -0.01444527 -0.02827581
  0.03140049  0.04347625]
Business Analytics - General: [-0.03285588 -0.0059369  -0.00856715  0.03199231 -0.03549486 -0.01554282
  0.0075823  -0.01329324]
Business Analytics - Global Supply Chain Analytics: [ 0.0285846   0.02145914  0.00514618  0.0221119   0.01534984 -0.0045541
  0.01758785  0.03

