# Setup Weaviate Embedded

In [1]:
!pip install weaviate_client > /dev/null

<h2> Note we will divide this into OpenAI / Cohere / GPT4all? notebooks </h2>

In [2]:
import weaviate
client = weaviate.Client(embedded_options=weaviate.EmbeddedOptions(),
                         additional_headers = {
                             "X-OpenAI-Api-Key": "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"
                         })

Binary /root/.cache/weaviate-embedded did not exist. Downloading binary from https://github.com/weaviate/weaviate/releases/download/v1.18.3/weaviate-v1.18.3-linux-amd64.tar.gz
Started /root/.cache/weaviate-embedded: process ID 1894


In [3]:
client.get_meta()

{'hostname': 'http://127.0.0.1:6666',
 'modules': {'generative-openai': {'documentationHref': 'https://beta.openai.com/docs/api-reference/completions',
   'name': 'Generative Search - OpenAI'},
  'qna-openai': {'documentationHref': 'https://beta.openai.com/docs/api-reference/completions',
   'name': 'OpenAI Question & Answering Module'},
  'ref2vec-centroid': {},
  'text2vec-cohere': {'documentationHref': 'https://docs.cohere.ai/embedding-wiki/',
   'name': 'Cohere Module'},
  'text2vec-huggingface': {'documentationHref': 'https://huggingface.co/docs/api-inference/detailed_parameters#feature-extraction-task',
   'name': 'Hugging Face Module'},
  'text2vec-openai': {'documentationHref': 'https://beta.openai.com/docs/guides/embeddings/what-are-embeddings',
   'name': 'OpenAI Module'}},
 'version': '1.18.3'}

# Load AirBnB Data

In [4]:
import pandas as pd
import numpy as np
import json

# Read CSV file
csv_file = 'AB_NYC_2019.csv'
df = pd.read_csv(csv_file)

df.replace([np.inf, -np.inf, np.nan], None, inplace=True)

# Convert DataFrame to a list of dictionaries
data_list = df.to_dict(orient='records')

In [5]:
data_list[0]

{'id': 2539,
 'name': 'Clean & quiet apt home by the park',
 'host_id': 2787,
 'host_name': 'John',
 'neighbourhood_group': 'Brooklyn',
 'neighbourhood': 'Kensington',
 'latitude': 40.64749,
 'longitude': -73.97237,
 'room_type': 'Private room',
 'price': 149,
 'minimum_nights': 1,
 'number_of_reviews': 9,
 'last_review': '2018-10-19',
 'reviews_per_month': 0.21,
 'calculated_host_listings_count': 6,
 'availability_365': 365}

In [6]:
Listing_schema = {
    "classes": [
        {
            "class": "Listing",
            "description": "An AirBnb Listing.",
            "moduleConfig": {
                "text2vec-openai": {
                    "skip": False,
                    "vectorizeClassName": False,
                    "vectorizePropertyName": False
                }
            },
            "vectorIndexType": "hnsw",
            "vectorizer": "text2vec-openai",
            "properties": [
               {
                   "name": "description",
                   "dataType": ["text"],
                   "description": "The description of the apartment listing. Generally written by an LLM.",
                   "moduleConfig": {
                    "text2vec-openai": {
                        "skip": False,
                        "vectorizePropertyName": False,
                        "vectorizeClassName": False
                    }
                   }
               },
               {
                "name": "name",
                "dataType": ["string"],
                "description": "A short description of the listing written by humans.",
                "moduleConfig": {
                    "text2vec-openai": {
                        "skip": True,
                        "vectorizePropertyName": False,
                        "vectorizeClassName": False
                    }
                }
               },
               {
                "name": "host_name",
                "dataType": ["string"],
                "description": "The name of the host of the apartment listing",
                "moduleConfig": {
                    "text2vec-openai": {
                        "skip": True,
                        "vectorizePropertyName": False,
                        "vectorizeClassName": False
                    }
                }
               },
               {
                   "name": "neighbourhood",
                   "dataType": ["string"],
                   "description": "The neighbourhood group of the apartment listing",
                   "moduleConfig": {
                    "text2vec-openai": {
                        "skip": True,
                        "vectorizePropertyName": False,
                        "vectorizeClassName": False
                    }
                   }
               },
               {
                   "name": "neighbourhood_group",
                   "dataType": ["string"],
                   "description": "The neighbourhood group of the apartment listing",
                   "moduleConfig": {
                    "text2vec-openai": {
                        "skip": True,
                        "vectorizePropertyName": False,
                        "vectorizeClassName": False
                    }
                   }
               },
               {
                   "name": "latitude",
                   "dataType": ["string"],
                   "description": "The latitude of the apartment listing",
                    "moduleConfig": {
                    "text2vec-openai": {
                        "skip": True,
                        "vectorizePropertyName": False,
                        "vectorizeClassName": False
                    }
                   }
               },
               {
                   "name": "latitude_number",
                   "dataType": ["number"],
                   "description": "The latitude of the apartment listing"
               },
               {
                   "name": "longitude",
                   "dataType": ["string"],
                   "description": "The latitude of the apartment listing",
                    "moduleConfig": {
                    "text2vec-openai": {
                        "skip": True,
                        "vectorizePropertyName": False,
                        "vectorizeClassName": False
                    }
                   }
               },
               {
                   "name": "longitude_number",
                   "dataType": ["number"],
                   "description": "The longitude of the apartment listing"
               },
               {
                   "name": "room_type",
                   "dataType": ["string"],
                   "description": "The type of room of the apartment listing",
                   "moduleConfig": {
                    "text2vec-openai": {
                        "skip": True,
                        "vectorizePropertyName": False,
                        "vectorizeClassName": False
                    }
                   }
               },
               {
                   "name": "price",
                   "dataType": ["string"],
                   "description": "The type of room of the apartment listing",
                   "moduleConfig": {
                    "text2vec-openai": {
                        "skip": True,
                        "vectorizePropertyName": False,
                        "vectorizeClassName": False
                    }
                   }
               },
               {
                   "name": "price_number",
                   "dataType": ["number"],
                   "description": "The price of the apartment listing"
               },
               {
                   "name": "minimum_nights",
                   "dataType": ["string"],
                   "description": "The type of room of the apartment listing",
                   "moduleConfig": {
                    "text2vec-openai": {
                        "skip": True,
                        "vectorizePropertyName": False,
                        "vectorizeClassName": False
                    }
                   }
               },
               {
                   "name": "minimum_nights_int",
                   "dataType": ["int"],
                   "description": "The minimum number of nights to stay at the apartment listing"
               },
               {
                   "name": "number_of_reviews",
                   "dataType": ["string"],
                   "description": "The type of room of the apartment listing",
                   "moduleConfig": {
                    "text2vec-openai": {
                        "skip": True,
                        "vectorizePropertyName": False,
                        "vectorizeClassName": False
                    }
                   }
               },
               {
                   "name": "number_of_reviews_int",
                   "dataType": ["int"],
                   "description": "The number of reviews of the apartment listing"
               },
               {
                   "name": "last_review",
                   "dataType": ["string"],
                   "description": "The type of room of the apartment listing",
                   "moduleConfig": {
                    "text2vec-openai": {
                        "skip": True,
                        "vectorizePropertyName": False,
                        "vectorizeClassName": False
                    }
                   }
               },
               {
                   "name": "last_review_date",
                   "dataType": ["date"],
                   "description": "The date of the last review of the apartment listing"
               },
               {
                   "name": "reviews_per_month",
                   "dataType": ["string"],
                   "description": "The type of room of the apartment listing",
                   "moduleConfig": {
                    "text2vec-openai": {
                        "skip": True,
                        "vectorizePropertyName": False,
                        "vectorizeClassName": False
                    }
                   }
               },
               {
                   "name": "reviews_per_month_number",
                   "dataType": ["number"],
                   "description": "The number of reviews per month of the apartment listing"
               },
               {
                   "name": "calculated_host_listings_count",
                   "dataType": ["string"],
                   "description": "The type of room of the apartment listing",
                   "moduleConfig": {
                    "text2vec-openai": {
                        "skip": True,
                        "vectorizePropertyName": False,
                        "vectorizeClassName": False
                    }
                   }
               },
               {
                   "name": "calculated_host_listings_count_int",
                   "dataType": ["int"],
                   "description": "The number of listings of the host on Airbnb"
               },
               {
                   "name": "availability_365",
                   "dataType": ["string"],
                   "description": "The type of room of the apartment listing",
                   "moduleConfig": {
                    "text2vec-openai": {
                        "skip": True,
                        "vectorizePropertyName": False,
                        "vectorizeClassName": False
                    }
                   }
               },
               {
                   "name": "availability_365_int",
                   "dataType": ["int"],
                   "description": "The availability of the apartment listing in a year"
               },
               {
                   "name": "org_id",
                   "dataType": ["int"],
                   "description": "The id originally associated with this AirBnb listing."
               }
           ]
        }
    ]
}

client.schema.create(Listing_schema)

In [7]:
from weaviate.util import generate_uuid5, get_valid_uuid
from uuid import uuid4

client.batch.configure(
    batch_size=16,
    dynamic=True,
    timeout_retries=3,
    callback=None,
)

limit = 100

for data_obj in data_list[:limit]:
  data_properties = {}
  data_properties["name"] = data_obj["name"]
  data_properties["host_name"] = data_obj["host_name"]
  data_properties["neighbourhood"] = data_obj["neighbourhood"]
  data_properties["neighbourhood_group"] = data_obj["neighbourhood_group"]
  data_properties["latitude"] = str(data_obj["latitude"])
  data_properties["latitude_number"] = data_obj["latitude"]
  data_properties["longitude"] = str(data_obj["longitude"])
  data_properties["longitude_number"] = data_obj["longitude"]
  data_properties["room_type"] = data_obj["room_type"]
  data_properties["price"] = str(data_obj["price"])
  data_properties["price_number"] = data_obj["price"]
  data_properties["minimum_nights"] = str(data_obj["minimum_nights"])
  data_properties["minimum_nights_int"] = data_obj["minimum_nights"]
  data_properties["number_of_reviews"] = str(data_obj["number_of_reviews"])

  # ^ ToDo i think there is a problem with number_of_reviews, getting a lot of 0s

  data_properties["number_of_reviews_int"] = data_obj["number_of_reviews"]
  data_properties["last_review"] = str(data_obj["last_review"])
  data_properties["last_review_date"] = data_obj["last_review"]
  data_properties["reviews_per_month"] = str(data_obj["reviews_per_month"])
  data_properties["reviews_per_month_number"] = data_obj["reviews_per_month"]
  data_properties["calculated_host_listings_count"] = str(data_obj["calculated_host_listings_count"])
  data_properties["calculated_host_listings_count_int"] = data_obj["calculated_host_listings_count"]
  data_properties["availability_365"] = str(data_obj["availability_365"])
  data_properties["availability_365_int"] = data_obj["availability_365"]
  data_properties["org_id"] = data_obj["id"]

  id = get_valid_uuid(uuid4())
  client.batch.add_data_object(data_properties, "Listing", id)

In [8]:
client.query.get("Listing", ["price"]).with_limit(1).do()

{'data': {'Get': {'Listing': [{'price': '150'}]}}}

In [9]:
ad_schema = {
   "classes": [
       {
           "class": "Ad",
           "description": "An advertisement for AirBnb.",
           "properties": [
               {
                   "dataType": ["text"],
                   "name": "content",
                   "description": "The advertisement text."
               }
           ]
       }
   ] 
}

client.schema.create(ad_schema)

listing_hasAd_cref = {
    "dataType": ["Ad"],
    "description": "The ad for an AirBnb Listing",
    "name": "hasAd"
}

client.schema.property.create("Listing", listing_hasAd_cref)

# Generate a Description

In [10]:
generatePrompt = """
Please write a description for the following AirBnb Listing in english: 
Name: {name}
Neighbourhood: {neighbourhood}
Neighbourhood Group: {neighbourhood_group}
Latitude: {latitude}
Longitude: {longitude}
Room Type: {room_type}
Price: {price}
Minimum Nights: {minimum_nights}
Number of Reviews: {number_of_reviews}
Last Review: {last_review}
Reviews per Month: {reviews_per_month}
Calculated Host Listings Count: {calculated_host_listings_count}
Availability_365: {availability_365}

Please do not make up any information about the property in your description.
"""

generate_properties = ["name","neighbourhood","neighbourhood_group",
                       "latitude","longitude","room_type",
                       "price","minimum_nights","number_of_reviews",
                       "last_review","reviews_per_month",
                       "calculated_host_listings_count","availability_365"]

descriptions = client.query\
            .get("Listing", generate_properties)\
            .with_generate(single_prompt=generatePrompt)\
            .with_additional(["id"])\
            .with_limit(5)\
            .do()["data"]["Get"]["Listing"]


for description in descriptions:
  new_property = {
      "description": description["_additional"]["generate"]["singleResult"]
  }
  id = description["_additional"]["id"]
  client.data_object.update(
      new_property,
      class_name = "Listing",
      uuid=id
  )

In [11]:
generate_properties.insert(0, "description")
client.query.get("Listing", generate_properties).with_limit(1).do()

{'data': {'Get': {'Listing': [{'availability_365': '365',
     'calculated_host_listings_count': '1',
     'description': 'This private room is located in the Flatbush neighborhood of Brooklyn, offering a unique country space in the heart of the city. With a price of $150 per night and a minimum stay of just one night, this listing is perfect for travelers looking for a cozy and affordable place to stay. Although there are currently no reviews for this property, guests can expect a peaceful and comfortable stay. The host has only one listing, ensuring that guests receive personalized attention and care. With availability 365 days a year, this country space in the city is ready to welcome guests at any time.',
     'last_review': 'None',
     'latitude': '40.63702',
     'longitude': '-73.96327',
     'minimum_nights': '1',
     'name': 'Country space in the city',
     'neighbourhood': 'Flatbush',
     'neighbourhood_group': 'Brooklyn',
     'number_of_reviews': '0',
     'price': '150

In [12]:
client.query.get("Listing", "description").with_near_text({
    "concepts": ["AirBnB near a place to walk my dog"]
}).with_additional("distance").with_limit(1).do()

{'data': {'Get': {'Listing': [{'_additional': {'distance': 0.18852222},
     'description': "This private room in the Bedford-Stuyvesant neighbourhood of Brooklyn is the perfect spot for a long-term stay. The space is clean and quiet, providing a peaceful retreat from the hustle and bustle of the city. With a minimum stay of 60 nights, you'll have plenty of time to explore all that Brooklyn has to offer. The location is convenient, with easy access to public transportation and a variety of shops and restaurants nearby. Although there are no reviews yet, you can rest assured that the host is experienced and dedicated to providing a comfortable and enjoyable stay. With availability 365 days a year, this listing is a great option for anyone looking for a long-term stay in Brooklyn."}]}}}

# Generate an Ad

In [13]:
generatePrompt = """
Please write an engaging advertisement for the following AirBnb Listing: 
Description: {description}
Please write the advertisement for this listing.
"""

ads = client.query\
            .get("Listing", ["description"])\
            .with_generate(single_prompt=generatePrompt)\
            .with_additional(["id"])\
            .with_limit(5)\
            .do()["data"]["Get"]["Listing"]

ads[0]

{'_additional': {'generate': {'error': None,
   'singleResult': "Escape the hustle and bustle of the city and step into your own private country oasis in the heart of Brooklyn's Flatbush neighborhood. This cozy and affordable private room is the perfect retreat for travelers looking for a peaceful and comfortable stay. With a price of just $150 per night and a minimum stay of only one night, you can enjoy all the comforts of home without breaking the bank.\n\nAlthough there are currently no reviews for this property, rest assured that the host is dedicated to providing personalized attention and care to each and every guest. With only one listing, you can expect a level of service and attention to detail that is unmatched by larger, more impersonal accommodations.\n\nWhether you're in town for business or pleasure, this country space in the city is the perfect home base for exploring all that Brooklyn has to offer. And with availability 365 days a year, you can book your stay at any ti

In [14]:
for ad in ads:
  new_ad_properties = {
      "content": ad["_additional"]["generate"]["singleResult"]
  }
  new_ad_id = get_valid_uuid(uuid4())
  client.data_object.create(
      data_object = new_ad_properties,
      class_name = "Ad",
      uuid = new_ad_id
  )
  client.data_object.reference.add(
      from_uuid = ad["_additional"]["id"],
      from_property_name = "hasAd",
      to_uuid = new_ad_id
  )

In [15]:
weaviate_query_str = """
{
  Get {
    Listing (
      where: {
        path: ["hasAd"],
        operator: GreaterThanEqual,
        valueInt: 1
      },
      limit: 1
    ) {
      description
      hasAd {
        ... on Ad {
          content
        }
      }
    }
  }
}
"""

client.query.raw(weaviate_query_str)

{'data': {'Get': {'Listing': [{'description': 'This private room is located in the Flatbush neighborhood of Brooklyn, offering a unique country space in the heart of the city. With a price of $150 per night and a minimum stay of just one night, this listing is perfect for travelers looking for a cozy and affordable place to stay. Although there are currently no reviews for this property, guests can expect a peaceful and comfortable stay. The host has only one listing, ensuring that guests receive personalized attention and care. With availability 365 days a year, this country space in the city is ready to welcome guests at any time.',
     'hasAd': [{'content': "Escape the hustle and bustle of the city and step into your own private country oasis in the heart of Brooklyn's Flatbush neighborhood. This cozy and affordable private room is the perfect retreat for travelers looking for a peaceful and comfortable stay. With a price of just $150 per night and a minimum stay of only one night,

# Generic Targeting

In [16]:
target_property = {
    "dataType": [
        "text"
    ],
    "name": "target",
    "description": "High-level audience target for this ad."
}

client.schema.property.create("Ad", target_property)

In [17]:
targets = ["young couples", "elderly couples", "single travelers"]

for target in targets:
  generatePrompt = """
  Please write an engaging advertisement for the following AirBnb Listing: 
  Description: {description}
  Please write the advertisement for this listing to target 
  """ + target

  ads = client.query\
              .get("Listing", ["description"])\
              .with_generate(single_prompt=generatePrompt)\
              .with_additional(["id"])\
              .with_limit(5)\
              .do()["data"]["Get"]["Listing"]
  
  for ad in ads:
    new_ad_properties = {
        "content": ad["_additional"]["generate"]["singleResult"],
        "target": target
    }
    new_ad_id = get_valid_uuid(uuid4())
    client.data_object.create(
        data_object = new_ad_properties,
        class_name = "Ad",
        uuid = new_ad_id
    )
    client.data_object.reference.add(
        from_uuid = ad["_additional"]["id"],
        from_property_name = "hasAd",
        to_uuid = new_ad_id
    )


In [18]:
where_filter = {
  "path": ["target"],
  "operator": "Equal",
  "valueText": "single travelers"
}

ads = client.query\
            .get("Ad", ["content", "target"])\
            .with_where(where_filter)\
            .with_limit(5)\
            .do()["data"]["Get"]["Ad"]

ads[0]

{'content': "Attention all solo travelers! Are you looking for a cozy and affordable place to stay in the heart of Brooklyn? Look no further than this private room in the Flatbush neighborhood. For just $150 per night, you can enjoy a unique country space in the city. \n\nAlthough there are currently no reviews for this property, rest assured that you will have a peaceful and comfortable stay. With the host only having one listing, you can expect personalized attention and care during your stay. \n\nAnd the best part? You only have to stay for one night! Perfect for those on a quick trip or just passing through. Plus, with availability 365 days a year, you can book your stay at any time. \n\nDon't miss out on this opportunity to experience a little slice of country life in the city. Book your stay now and make your solo trip to Brooklyn unforgettable.",
 'target': 'single travelers'}

# Personalized Targeting

In [19]:
user_schema = {
   "classes": [
       {
           "class": "User",
           "description": "An advertisement for AirBnb.",
           "properties": [
               {
                   "dataType": ["text"],
                   "name": "biography",
                   "description": "Short description of this user."
               },
               {
                   "dataType": ["text"],
                   "name": "name",
                   "description": "This person's name."
               }
           ]
       }
   ]
}

client.schema.create(user_schema)

In [20]:
new_user_id = get_valid_uuid(uuid4())

user_properties = {
    "biography": "Connor often travels with a golden doodle named Bowen.",
    "name": "Connor"
}

client.data_object.create(
  data_object = user_properties,
  class_name = "User",
  uuid = new_user_id
)

new_user_id = get_valid_uuid(uuid4())

user_properties = {
    "biography": "Bob is a prolific weightlifter who will get upset if he doesn't have a good gym to workout in.",
    "name": "Bob"
}

client.data_object.create(
  data_object = user_properties,
  class_name = "User",
  uuid = new_user_id
)


'66c6c237-4b1b-43c2-b4ff-bad04e6d42d4'

In [21]:
users = client.query\
              .get("User", ["biography"])\
              .with_additional(["id"])\
              .with_limit(5)\
              .do()["data"]["Get"]["User"]

users

[{'_additional': {'id': '66c6c237-4b1b-43c2-b4ff-bad04e6d42d4'},
  'biography': "Bob is a prolific weightlifter who will get upset if he doesn't have a good gym to workout in."},
 {'_additional': {'id': '9cb40fbd-b227-45a9-be14-3a1c974ca9f9'},
  'biography': 'Connor often travels with a golden doodle named Bowen.'}]

In [22]:
user_hasAdTarget_cref = {
    "dataType": ["User"],
    "description": "The ad for an AirBnb Listing targeted to a particular user.",
    "name": "hasUserTarget"
}

client.schema.property.create("Ad", user_hasAdTarget_cref)

In [23]:
users = client.query\
              .get("User", ["biography"])\
              .with_additional(["id"])\
              .with_limit(5)\
              .do()["data"]["Get"]["User"]

for user in users:
  generatePrompt = """
  Please write an engaging advertisement for the following AirBnb Listing: 
  Description: {description}
  Please write the advertisement for this listing to target the following user:
  """ + user["biography"]

  ads = client.query\
              .get("Listing", ["description"])\
              .with_generate(single_prompt=generatePrompt)\
              .with_additional(["id"])\
              .with_limit(5)\
              .do()["data"]["Get"]["Listing"]
  
  for ad in ads:
    new_ad_properties = {
        "content": ad["_additional"]["generate"]["singleResult"],
    }
    new_ad_id = get_valid_uuid(uuid4())
    client.data_object.create(
        data_object = new_ad_properties,
        class_name = "Ad",
        uuid = new_ad_id
    )
    client.data_object.reference.add(
        from_uuid = new_ad_id,
        from_property_name = "hasUserTarget",
        to_uuid = user["_additional"]["id"]
    )

In [24]:
weaviate_query_str = """
{
  Get {
    Ad (
      where: {
        path: ["hasUserTarget"],
        operator: GreaterThanEqual,
        valueInt: 1
      }
    ) {
      content
      hasUserTarget {
        ... on User {
          name
        }
      }
    }
  }
}
"""

client.query.raw(weaviate_query_str)

{'data': {'Get': {'Ad': [{'content': "Attention all weightlifters! Are you tired of sacrificing your workout routine while traveling? Look no further than this private room in the Flatbush neighborhood of Brooklyn. For just $150 per night, you can have a cozy and affordable place to stay while still having access to a top-notch gym. With availability 365 days a year, you can maintain your fitness goals no matter when you travel. Plus, with personalized attention from the host, you can rest assured that your needs will be met. Don't let your fitness routine suffer while on the road - book this country space in the city today!",
     'hasUserTarget': [{'name': 'Bob'}]},
    {'content': "Attention all weightlifters! Are you planning a trip to New York City and need a place to stay that won't compromise your workout routine? Look no further than THE VILLAGE OF HARLEM! This private room is located in the vibrant neighbourhood of Harlem in Manhattan and is priced at $150 per night with a min

<h1> Thank you so much for reading! </h1>