In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class ListingEmbeddingModel(nn.Module):
    def __init__(self, num_numerical_features, categorical_sizes, embedding_dim, text_embedding_dim, image_embedding_dim):
        super(ListingEmbeddingModel, self).__init__()

        # Embedding layers for categorical data
        self.categorical_embeddings = nn.ModuleList([
            nn.Embedding(cat_size, embedding_dim) for cat_size in categorical_sizes
        ])
        
        # Fully connected layers for numerical features
        self.fc_num = nn.Linear(num_numerical_features, 128)
        
        # Dense layer for concatenating everything
        self.fc_final = nn.Linear(128 + len(categorical_sizes) * embedding_dim + text_embedding_dim + image_embedding_dim, 64)
        self.output_embedding = nn.Linear(64, 32)  # 32-dim final embedding

    def forward(self, numerical_features, categorical_features, text_embeddings, image_embeddings):
        # Process categorical features with embeddings
        cat_embeds = [self.categorical_embeddings[i](categorical_features[:, i]) for i in range(len(self.categorical_embeddings))]
        cat_embeds = torch.cat(cat_embeds, dim=1)

        # Process numerical features
        num_output = F.relu(self.fc_num(numerical_features))
        
        # Concatenate numerical features, categorical embeddings, text embeddings, and image embeddings
        combined = torch.cat((num_output, cat_embeds, text_embeddings, image_embeddings), dim=1)
        
        # Pass through fully connected layers
        combined_output = F.relu(self.fc_final(combined))
        
        # Final embedding output
        embedding_output = self.output_embedding(combined_output)
        return embedding_output

# Example input sizes
num_numerical_features = 5  # Example: price, number of rooms, etc.
categorical_sizes = [10, 20, 30]  # Example: property type, amenity categories, etc.
embedding_dim = 8
text_embedding_dim = 256  # Example: BERT embeddings
image_embedding_dim = 512  # Example: CLIP embeddings

# Create model
model = ListingEmbeddingModel(num_numerical_features, categorical_sizes, embedding_dim, text_embedding_dim, image_embedding_dim)

# Example inputs
batch_size = 16
numerical_features = torch.rand(batch_size, num_numerical_features)  # batch_size x num_numerical_features
categorical_features = [torch.randint(0, categorical_size, (batch_size, 1)) for categorical_size in categorical_sizes]
categorical_features = torch.cat(categorical_features, dim=1)
text_embeddings = torch.rand(batch_size, text_embedding_dim)  # batch_size x text_embedding_dim
image_embeddings = torch.rand(batch_size, image_embedding_dim)  # batch_size x image_embedding_dim

# Get embedding output
embeddings = model(numerical_features, categorical_features, text_embeddings, image_embeddings)
print(embeddings.shape)  # Output: [32, 32], batch of 32 listings with 32-dim embeddings


torch.Size([16, 32])


In [75]:
import pandas as pd

search_session = {
    'id': [2,3,2,3,2,3],
    'user_id': [18,5,18,5,18,5],
    'listing_id': [26,18,8,9,10,11],
    'position_of_the_listing': [2,5,3,4,6,7],
    'interaction_type': ['Click','Book','impression','click','book','impression'],
    'source': ['Search feature','Similar listing feature','Similar listing feature','Search feature','Similar listing feature','Similar listing feature'],
    'timestamp': [1655121925, 1655135257, 1655121926, 1655121927, 1655121928, 1655121929]
}
df_search_session = pd.DataFrame(search_session)

'''
pd_session = pd.DataFrame({
    'clicked': [1,2,3,4,5],
    'slided': [[2,3],[4,5],[1,2],[2,5],[3,4]],
    'booked':[2,4,1,2,3]
})
'''

"\npd_session = pd.DataFrame({\n    'clicked': [1,2,3,4,5],\n    'slided': [[2,3],[4,5],[1,2],[2,5],[3,4]],\n    'booked':[2,4,1,2,3]\n})\n"

In [76]:
df_search_session['time'] = pd.to_datetime(df_search_session['timestamp'], unit='s')

In [77]:
df_search_session

Unnamed: 0,id,user_id,listing_id,position_of_the_listing,interaction_type,source,timestamp,time
0,2,18,26,2,Click,Search feature,1655121925,2022-06-13 12:05:25
1,3,5,18,5,Book,Similar listing feature,1655135257,2022-06-13 15:47:37
2,2,18,8,3,impression,Similar listing feature,1655121926,2022-06-13 12:05:26
3,3,5,9,4,click,Search feature,1655121927,2022-06-13 12:05:27
4,2,18,10,6,book,Similar listing feature,1655121928,2022-06-13 12:05:28
5,3,5,11,7,impression,Similar listing feature,1655121929,2022-06-13 12:05:29


In [99]:
df_session = pd.DataFrame([], columns=['click', 'impression', 'book'])

In [80]:
df_search_session.source = df_search_session.source.str.lower()

In [81]:
df_search_session.interaction_type = df_search_session.interaction_type.str.lower()

In [83]:
df_search_session_groups = df_search_session.groupby(['id'])

In [100]:
for key, df_search_session_group in df_search_session_groups:
    click_ids = df_search_session_group[df_search_session_group.interaction_type == 'click'].listing_id
    impression_ids = df_search_session_group[df_search_session_group.interaction_type == 'impression'].listing_id
    book_ids = df_search_session_group[df_search_session_group.interaction_type == 'book'].listing_id
    print(key, click_ids.iloc[0], impression_ids.iloc[0], book_ids.iloc[0])
    df_session.loc[len(df_session)] = [click_ids.iloc[0], impression_ids.to_list(), book_ids.iloc[0]]

(2,) 26 8 10
(3,) 9 11 18


In [101]:
df_session

Unnamed: 0,click,impression,book
0,26,[8],10
1,9,[11],18


In [102]:
import numpy as np
import random

n = 100
x = np.arange(n)

df = pd.DataFrame([], columns=['A', 'B', 'C'])

k = 5
ns = min(k - 1, n - k - 1)

for i in range(k // 2, n - 1 - k // 2):
    center = x[i]
    left = i - k // 2
    right = i + k // 2
    for j in range(left, i):
        df.loc[len(df)] = [x[j], x[i], 1]
    for j in range(i + 1, right + 1):
        df.loc[len(df)] = [x[i], x[j], 1]
    negative = np.concatenate([x[0:left], x[right + 1 : n - 1]], axis=0)
    sample = random.sample(negative.tolist(), ns)
    for s in sample:
        df.loc[len(df)] = [x[i], s, -1]
    df.loc[len(df)] = [x[i], x[n - 1], 1]

In [103]:
df.head(n=10)

Unnamed: 0,A,B,C
0,0,2,1
1,1,2,1
2,2,3,1
3,2,4,1
4,2,63,-1
5,2,51,-1
6,2,8,-1
7,2,76,-1
8,2,99,1
9,1,3,1
