In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from keras.layers import Input, Embedding, Flatten, dot, Dense
from keras.models import Model
import numpy as np

# Data
data = {
    'id': [1, 2, 3, 4, 5],
    'name': ['Q1', 'Q2', 'Q3', 'Q4', 'Q5'],
    'text': ['word1 word2 word3', 'text2 word1 word2 word3', 'text3', 'text4', 'text5'],
    'accepted': [9, 17, 25, 38, 41],
    'submission': [10, 20, 30, 40, 50],
    'topics': ['arrays trees', 'linked list', 'trees dp', 'graphs trees', 'dp'],
    'difficulty': ['Easy', 'Medium', 'Hard', 'Medium', 'Easy'],
    'link': ['link1', 'link2', 'link3', 'link4', 'link5'],
    'website': ['site1', 'site2', 'site3', 'site4', 'site5'],
    'upvotes': [100, 200, 300, 400, 500],
    'comments': [10, 20, 30, 40, 50]
}

df = pd.DataFrame(data)

label_encoder_name = LabelEncoder()
df['name_encoded'] = label_encoder_name.fit_transform(df['name'])

label_encoder_topics = LabelEncoder()
df['topics_encoded'] = label_encoder_topics.fit_transform(df['topics'])


df['popularity'] = df['accepted'] / df['submission']


X = df[['name_encoded', 'topics_encoded']]
Y = df['popularity']


n_names = df['name_encoded'].nunique(),reverse=True
n_topics = df['topics_encoded'].nunique()


In [None]:
from keras.layers import Input, Embedding, Flatten, dot, Dense
from keras.models import Model


n_latent_factors = 5


name_input = Input(shape=[1], name='name')
name_embedding = Embedding(input_dim=n_names + 1, output_dim=n_latent_factors, name='name_embedding')(name_input)
name_vec = Flatten(name='flatten_name')(name_embedding)


topics_input = Input(shape=[1], name='topics')
topics_embedding = Embedding(input_dim=n_topics + 1, output_dim=n_latent_factors, name='topics_embedding')(topics_input)
topics_vec = Flatten(name='flatten_topics')(topics_embedding)


dot_product = dot([name_vec, topics_vec], axes=1)


dense_output = Dense(10, activation='relu')(dot_product)
output = Dense(1)(dense_output)


model = Model(inputs=[name_input, topics_input], outputs=output)
model.compile(optimizer='adam', loss='mean_squared_error')


model.summary()


In [None]:
# Prepare training data (using name and topics as features)
X_train = [df['name_encoded'], df['topics_encoded']]

# Train the model
history = model.fit(x=X_train, y=Y, epochs=100, verbose=1, validation_split=0.2)
