In [3]:
import yaml
import json
import pe_modules
import dataloaders
import llms

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
config = config = yaml.safe_load(open("./configs/david_base_config.yaml"))

In [5]:
from item_scorers.ce_scorer import CEScorer

scorer = CEScorer(config)

In [6]:
# Dataloader Class
dataloader_class = dataloaders.DATALOADER_CLASSES[config['data']['data_loader_name']]
# Load item data
item_dataloader = dataloader_class(config['data']['data_path'], config) 

In [7]:
from pe_modules.dt_pe_module import DTPEModule
pe_module = DTPEModule(config, item_dataloader)

In [8]:
# Modify the past aspects
pe_module.aspects = [
                    {'aspect_key': 'age', 'aspect_value': '30 years old'},
                    {'aspect_key': 'fuel efficiency', 'aspect_value': 'guzzler'},
                    {'aspect_key': 'price', 'aspect_value': 'cheap'},
                    ]
# pe_module.aspects = []

In [9]:
# Get aspect for item_desc
item_desc = "Bob's 30 year old truck. As cheap as you get. It's a bit of a guzzler but it wont need to go into the shop much"
aspect_dict = pe_module.get_aspect(item_desc)
print(aspect_dict)

{'aspect_key': 'maintenance', 'aspect_value': 'low maintenance'}


## Aspect Generation Results

### Note: Each table is in the order that the aspects key value pairs were generated. E.g. after (year,1968) was generated, that aspect was listed as a previously usd aspect value pair for all subsequent queries with that description. The next pair was (color, cherry red), etc.

Description: 1968 Red Porsche 911. A classic 2-door sports car designed for the enthusiastic collector. Comes equipped with premium leather seats and a beautiful cherry red finish. Visit one of our dealerships to discuss pricing.

| Key | Value |
| --- | ----- |
| year | 1968 |
| color | cherry red |
| body type | 2-door sports car |
| features | premium leather seats |
| purpose | enthusiastic collector |
| dealership | visit one of our dealerships |
| model | Porsche 911 |
| type | classic |
| type | sports car |
| type | classic sports car |

Description: small red men's tshirt

| Key | Value |
| --- | ----- |
| size | small |
| color | red |
| gender | men's |
| style | tshirt |
| type | men's tshirt |

Description: Bob's 30 year old truck. As cheap as you get. It's a bit of a guzzler but it wont need to go into the shop much

| Key | Value |
| --- | ----- |
| age | 30 years old |
| fuel efficiency | guzzler |
| price | cheap |
| maintenance | low |



In [10]:
# Get entailment for aspect_str and various test items
items = {
    "1" : {'description': "Large Red Car"},
    "2" : {'description': "Small Red Car"},
    "3" : {'description': "Large Blue Car"},
    "4" : {'description': "Small Blue Car"},
}
aspect_str = "colour: not red"

like_probs = scorer.score_items(aspect_str, items)
print(like_probs)

{'1': 0.0052513983100652695, '2': 0.01000937819480896, '3': 4.5004278945270926e-05, '4': 5.5663029343122616e-05}


## MovieLens testing

In [28]:
with open("./data/movielens_2.json", "r") as movie_file:
    movie_data = json.load(movie_file)
movie_data

{'1214': {'description': 'Horror, Sci-Fi, imdb top 250, aliens, sci-fi, suspense, thriller, tense, horror, atmospheric, outer space, space, space travel, suspenseful',
  'name': 'Alien (1979)'},
 '1240': {'description': 'Action, Sci-Fi, Thriller, Action, artificial intelligence, future, time travel, imdb top 250, arnold, Arnold Schwarzenegger, classic, cyborgs, dystopic future, highly quotable, robots',
  'name': 'Terminator, The (1984)'},
 '2571': {'description': 'Action, Sci-Fi, Thriller, alternate reality, artificial intelligence, cyberpunk, dystopia, philosophical, philosophy, sci-fi, virtual reality, Futuristic, post apocalyptic, surreal, thought-provoking',
  'name': 'Matrix, The (1999)'},
 '8874': {'description': 'Comedy, Horror, Simon Pegg, Very British and very funny!, zombies, black comedy, British, british comedy, comedy, dark comedy, dark humor, Edgar Wright, funny, hilarious',
  'name': 'Shaun of the Dead (2004)'},
 '46972': {'description': "Action, Comedy, Fantasy, IMAX, 

In [50]:
# Modify the past aspects
pe_module.aspects = [
    {'aspect_key': 'genre', 'aspect_value': 'sci-fi'},
    {'aspect_key': 'atmosphere', 'aspect_value': 'tense'},
    {'aspect_key': 'subgenre', 'aspect_value': 'horror/sci-fi'},
    {'aspect_key': 'subgenre', 'aspect_value': 'sci-fi/horror'},
    {'aspect_key': 'subgenre', 'aspect_value': 'sci-fi/thriller'},
    {'aspect_key': 'subgenre', 'aspect_value': 'sci-fi/horror/thriller'},
    {'aspect_key': 'imdb rating', 'aspect_value': 'top 250'},
    {'aspect_key': 'subgenre', 'aspect_value': 'sci-fi/horror/thriller'}, # gets stuck on this.
]

# pe_module.aspects = []

In [51]:
item_desc = movie_data['1214']['description']
aspect_dict = pe_module.get_aspect(item_desc)
print(aspect_dict)

{'aspect_key': 'subgenre', 'aspect_value': 'sci-fi/horror/thriller'}


### Alien Results

Description: Horror, Sci-Fi, imdb top 250, aliens, sci-fi, suspense, thriller, tense, horror, atmospheric, outer space, space, space travel, suspenseful

| Key | Value |
| --- | ----- |
| genre | sci-fi |
| atmosphere | tense |
| subgenre | horror/sci-fi |
| subgenre | sci-fi/horror |
| subgenre | sci-fi/thriller |
| subgenre | sci-fi/horror/thriller |
| imdb rating | top 250 |
| subgenre | sci-fi/horror/thriller |


In [52]:
aspect_str = "genre: comedy"

like_probs = scorer.score_items(aspect_str, movie_data)
print(like_probs)

{'1214': 0.000979631207883358, '1240': 0.0001037165493471548, '2571': 0.0015366912120953202, '8874': 0.7904435992240906, '46972': 0.2049161195755005, '51255': 0.8564907312393188, '61132': 0.34490057826042175, '109487': 0.0001463112566852942}
