In [1]:
import weaviate

client = weaviate.Client(
  url = "http://localhost:8080",
  additional_headers = {
    "X-Cohere-Api-Key": "<THE-KEY>"  # Replace with your API key
  }
)

In [2]:
import pandas as pd
import numpy as np
import json

# Read CSV file
csv_file = 'AB_NYC_2019.csv'
df = pd.read_csv(csv_file)

df.replace([np.inf, -np.inf, np.nan], None, inplace=True)

# Convert DataFrame to a list of dictionaries
data_list = df.to_dict(orient='records')

In [3]:
Listing_schema = {
    "classes": [
        {
            "class": "Listing",
            "description": "An AirBnb Listing.",
            "moduleConfig": {
                "text2vec-transformers": {
                    "skip": False,
                    "vectorizeClassName": False,
                    "vectorizePropertyName": False
                }
            },
            "vectorIndexType": "hnsw",
            "vectorizer": "text2vec-transformers",
            "properties": [
               {
                   "name": "description",
                   "dataType": ["text"],
                   "description": "The description of the apartment listing. Generally written by an LLM.",
                   "moduleConfig": {
                    "text2vec-transformers": {
                        "skip": False,
                        "vectorizePropertyName": False,
                        "vectorizeClassName": False
                    }
                   }
               },
               {
                "name": "name",
                "dataType": ["string"],
                "description": "A short description of the listing written by humans.",
                "moduleConfig": {
                    "text2vec-transformers": {
                        "skip": True,
                        "vectorizePropertyName": False,
                        "vectorizeClassName": False
                    }
                }
               },
               {
                "name": "host_name",
                "dataType": ["string"],
                "description": "The name of the host of the apartment listing",
                "moduleConfig": {
                    "text2vec-transformers": {
                        "skip": True,
                        "vectorizePropertyName": False,
                        "vectorizeClassName": False
                    }
                }
               },
               {
                   "name": "neighbourhood",
                   "dataType": ["string"],
                   "description": "The neighbourhood group of the apartment listing",
                   "moduleConfig": {
                    "text2vec-transformers": {
                        "skip": True,
                        "vectorizePropertyName": False,
                        "vectorizeClassName": False
                    }
                   }
               },
               {
                   "name": "neighbourhood_group",
                   "dataType": ["string"],
                   "description": "The neighbourhood group of the apartment listing",
                   "moduleConfig": {
                    "text2vec-transformers": {
                        "skip": True,
                        "vectorizePropertyName": False,
                        "vectorizeClassName": False
                    }
                   }
               },
               {
                   "name": "latitude",
                   "dataType": ["string"],
                   "description": "The latitude of the apartment listing",
                    "moduleConfig": {
                    "text2vec-transformers": {
                        "skip": True,
                        "vectorizePropertyName": False,
                        "vectorizeClassName": False
                    }
                   }
               },
               {
                   "name": "latitude_number",
                   "dataType": ["number"],
                   "description": "The latitude of the apartment listing"
               },
               {
                   "name": "longitude",
                   "dataType": ["string"],
                   "description": "The latitude of the apartment listing",
                    "moduleConfig": {
                    "text2vec-transformers": {
                        "skip": True,
                        "vectorizePropertyName": False,
                        "vectorizeClassName": False
                    }
                   }
               },
               {
                   "name": "longitude_number",
                   "dataType": ["number"],
                   "description": "The longitude of the apartment listing"
               },
               {
                   "name": "room_type",
                   "dataType": ["string"],
                   "description": "The type of room of the apartment listing",
                   "moduleConfig": {
                    "text2vec-transformers": {
                        "skip": True,
                        "vectorizePropertyName": False,
                        "vectorizeClassName": False
                    }
                   }
               },
               {
                   "name": "price",
                   "dataType": ["string"],
                   "description": "The type of room of the apartment listing",
                   "moduleConfig": {
                    "text2vec-transformers": {
                        "skip": True,
                        "vectorizePropertyName": False,
                        "vectorizeClassName": False
                    }
                   }
               },
               {
                   "name": "price_number",
                   "dataType": ["number"],
                   "description": "The price of the apartment listing"
               },
               {
                   "name": "minimum_nights",
                   "dataType": ["string"],
                   "description": "The type of room of the apartment listing",
                   "moduleConfig": {
                    "text2vec-transformers": {
                        "skip": True,
                        "vectorizePropertyName": False,
                        "vectorizeClassName": False
                    }
                   }
               },
               {
                   "name": "minimum_nights_int",
                   "dataType": ["int"],
                   "description": "The minimum number of nights to stay at the apartment listing"
               },
               {
                   "name": "number_of_reviews",
                   "dataType": ["string"],
                   "description": "The type of room of the apartment listing",
                   "moduleConfig": {
                    "text2vec-transformers": {
                        "skip": True,
                        "vectorizePropertyName": False,
                        "vectorizeClassName": False
                    }
                   }
               },
               {
                   "name": "number_of_reviews_int",
                   "dataType": ["int"],
                   "description": "The number of reviews of the apartment listing"
               },
               {
                   "name": "last_review",
                   "dataType": ["string"],
                   "description": "The type of room of the apartment listing",
                   "moduleConfig": {
                    "text2vec-transformers": {
                        "skip": True,
                        "vectorizePropertyName": False,
                        "vectorizeClassName": False
                    }
                   }
               },
               {
                   "name": "last_review_date",
                   "dataType": ["date"],
                   "description": "The date of the last review of the apartment listing"
               },
               {
                   "name": "reviews_per_month",
                   "dataType": ["string"],
                   "description": "The type of room of the apartment listing",
                   "moduleConfig": {
                    "text2vec-transformers": {
                        "skip": True,
                        "vectorizePropertyName": False,
                        "vectorizeClassName": False
                    }
                   }
               },
               {
                   "name": "reviews_per_month_number",
                   "dataType": ["number"],
                   "description": "The number of reviews per month of the apartment listing"
               },
               {
                   "name": "calculated_host_listings_count",
                   "dataType": ["string"],
                   "description": "The type of room of the apartment listing",
                   "moduleConfig": {
                    "text2vec-transformers": {
                        "skip": True,
                        "vectorizePropertyName": False,
                        "vectorizeClassName": False
                    }
                   }
               },
               {
                   "name": "calculated_host_listings_count_int",
                   "dataType": ["int"],
                   "description": "The number of listings of the host on Airbnb"
               },
               {
                   "name": "availability_365",
                   "dataType": ["string"],
                   "description": "The type of room of the apartment listing",
                   "moduleConfig": {
                    "text2vec-transformers": {
                        "skip": True,
                        "vectorizePropertyName": False,
                        "vectorizeClassName": False
                    }
                   }
               },
               {
                   "name": "availability_365_int",
                   "dataType": ["int"],
                   "description": "The availability of the apartment listing in a year"
               },
               {
                   "name": "org_id",
                   "dataType": ["int"],
                   "description": "The id originally associated with this AirBnb listing."
               }
           ]
        }
    ]
}

client.schema.create(Listing_schema)

In [4]:
from weaviate.util import generate_uuid5, get_valid_uuid
from uuid import uuid4

client.batch.configure(
    batch_size=16,
    dynamic=True,
    timeout_retries=3,
    callback=None,
)

limit = 100

for data_obj in data_list[:limit]:
    data_properties = {}
    data_properties["name"] = data_obj["name"]
    data_properties["host_name"] = data_obj["host_name"]
    data_properties["neighbourhood"] = data_obj["neighbourhood"]
    data_properties["neighbourhood_group"] = data_obj["neighbourhood_group"]
    data_properties["latitude"] = str(data_obj["latitude"])
    data_properties["latitude_number"] = data_obj["latitude"]
    data_properties["longitude"] = str(data_obj["longitude"])
    data_properties["longitude_number"] = data_obj["longitude"]
    data_properties["room_type"] = data_obj["room_type"]
    data_properties["price"] = str(data_obj["price"])
    data_properties["price_number"] = data_obj["price"]
    data_properties["minimum_nights"] = str(data_obj["minimum_nights"])
    data_properties["minimum_nights_int"] = data_obj["minimum_nights"]
    data_properties["number_of_reviews"] = str(data_obj["number_of_reviews"])

    data_properties["number_of_reviews_int"] = data_obj["number_of_reviews"]
    data_properties["last_review"] = str(data_obj["last_review"])
    data_properties["last_review_date"] = data_obj["last_review"]
    data_properties["reviews_per_month"] = str(data_obj["reviews_per_month"])
    data_properties["reviews_per_month_number"] = data_obj["reviews_per_month"]
    data_properties["calculated_host_listings_count"] = str(data_obj["calculated_host_listings_count"])
    data_properties["calculated_host_listings_count_int"] = data_obj["calculated_host_listings_count"]
    data_properties["availability_365"] = str(data_obj["availability_365"])
    data_properties["availability_365_int"] = data_obj["availability_365"]
    data_properties["org_id"] = data_obj["id"]

    id = get_valid_uuid(uuid4())
    client.batch.add_data_object(data_properties, "Listing", id)

In [5]:
client.query.get("Listing", ["price"]).with_limit(1).do()

{'data': {'Get': {'Listing': [{'price': '190'}]}}}

In [6]:
ad_schema = {
   "classes": [
       {
           "class": "Ad",
           "description": "An advertisement for AirBnb.",
           "properties": [
               {
                   "dataType": ["text"],
                   "name": "content",
                   "description": "The advertisement text."
               }
           ]
       }
   ] 
}

client.schema.create(ad_schema)

listing_hasAd_cref = {
    "dataType": ["Ad"],
    "description": "The ad for an AirBnb Listing",
    "name": "hasAd"
}

client.schema.property.create("Listing", listing_hasAd_cref)

In [7]:
generatePrompt = "Please write a description for the following AirBnB Listing in English. NAME: {name} HOST_NAME {host_name} NEIGHBOURHOOD {neighbourhood} NEIGHBOURHOOD_GROUP {neighbourhood_group} PRICE {price}. Please do not make up any information about the property in your description."

generate_properties = ["name","neighbourhood","neighbourhood_group",
                       "latitude","longitude","room_type",
                       "price","minimum_nights","number_of_reviews",
                       "last_review","reviews_per_month",
                       "calculated_host_listings_count","availability_365"]

descriptions = client.query\
            .get("Listing", generate_properties)\
            .with_generate(single_prompt=generatePrompt)\
            .with_additional(["id"])\
            .with_limit(1)\
            .do()["data"]["Get"]["Listing"]

for description in descriptions:
    new_property = {
      "description": description["_additional"]["generate"]["singleResult"]
    }
    id = description["_additional"]["id"]
    client.data_object.update(
      new_property,
      class_name = "Listing",
      uuid=id
    )

In [8]:
generate_properties.insert(0, "description")
client.query.get("Listing", generate_properties).with_limit(1).do()

{'data': {'Get': {'Listing': [{'availability_365': '249',
     'calculated_host_listings_count': '2',
     'description': '\n\nHuge 2 BR Upper East  Cental Park\n\nHost Sing\n\nEast Harlem\nManhattan\n\nPrice: $190\n\nThis huge 2 bedroom is located in the heart of Manhattan, steps away from Central Park. It is newly renovated with stainless steel appliances, exposed brick, and beautiful hardwood floors. The apartment is located in a very safe and convenient area, with a variety of restaurants and stores nearby. The subway is also very close, making it easy to get around the city.\n\nThis is the perfect place for anyone looking to explore New York City.',
     'last_review': 'None',
     'latitude': '40.79685',
     'longitude': '-73.94872',
     'minimum_nights': '7',
     'name': 'Huge 2 BR Upper East  Cental Park',
     'neighbourhood': 'East Harlem',
     'neighbourhood_group': 'Manhattan',
     'number_of_reviews': '0',
     'price': '190',
     'reviews_per_month': 'None',
     'r

In [9]:
client.query.get("Listing", "description").with_near_text({
    "concepts": ["AirBnB near a place to walk my dog"]
}).with_additional("distance").with_limit(1).do()

{'data': {'Get': {'Listing': [{'_additional': {'distance': 0.64614415},
     'description': '\n\nHuge 2 BR Upper East  Cental Park\n\nHost Sing\n\nEast Harlem\nManhattan\n\nPrice: $190\n\nThis huge 2 bedroom is located in the heart of Manhattan, steps away from Central Park. It is newly renovated with stainless steel appliances, exposed brick, and beautiful hardwood floors. The apartment is located in a very safe and convenient area, with a variety of restaurants and stores nearby. The subway is also very close, making it easy to get around the city.\n\nThis is the perfect place for anyone looking to explore New York City.'}]}}}

In [11]:
generatePrompt = "Please write an engaging advertisement for the following AirBnb Listing: Description: {description} Please write the advertisement for this listing."

ads = client.query\
            .get("Listing", ["description"])\
            .with_generate(single_prompt=generatePrompt)\
            .with_additional(["id"])\
            .with_limit(1)\
            .do()["data"]["Get"]["Listing"]

ads[0]

{'_additional': {'generate': {'error': None,
   'singleResult': "\n\nWelcome to your home away from home in the heart of Manhattan! This huge 2 bedroom is located just steps away from Central Park and is newly renovated with stainless steel appliances, exposed brick, and beautiful hardwood floors. The apartment is located in a very safe and convenient area, with a variety of restaurants and stores nearby. The subway is also very close, making it easy to get around the city.\n\nThis is the perfect place for anyone looking to explore New York City. Whether you're a solo traveler or a group of friends, this apartment is the perfect base for your adventures. Don't miss out on the opportunity to stay in this amazing location and book your stay today!"},
  'id': '03ac2ffc-6c89-48ab-8b69-e0a8db1538cb'},
 'description': '\n\nHuge 2 BR Upper East  Cental Park\n\nHost Sing\n\nEast Harlem\nManhattan\n\nPrice: $190\n\nThis huge 2 bedroom is located in the heart of Manhattan, steps away from Centra

In [12]:
for ad in ads:
    new_ad_properties = {
      "content": ad["_additional"]["generate"]["singleResult"]
    }
    new_ad_id = get_valid_uuid(uuid4())
    client.data_object.create(
      data_object = new_ad_properties,
      class_name = "Ad",
      uuid = new_ad_id
    )
    client.data_object.reference.add(
      from_uuid = ad["_additional"]["id"],
      from_property_name = "hasAd",
      to_uuid = new_ad_id
    )



In [13]:
weaviate_query_str = """
{
  Get {
    Listing (
      where: {
        path: ["hasAd"],
        operator: GreaterThanEqual,
        valueInt: 1
      },
      limit: 1
    ) {
      description
      hasAd {
        ... on Ad {
          content
        }
      }
    }
  }
}
"""

client.query.raw(weaviate_query_str)

{'data': {'Get': {'Listing': [{'description': '\n\nHuge 2 BR Upper East  Cental Park\n\nHost Sing\n\nEast Harlem\nManhattan\n\nPrice: $190\n\nThis huge 2 bedroom is located in the heart of Manhattan, steps away from Central Park. It is newly renovated with stainless steel appliances, exposed brick, and beautiful hardwood floors. The apartment is located in a very safe and convenient area, with a variety of restaurants and stores nearby. The subway is also very close, making it easy to get around the city.\n\nThis is the perfect place for anyone looking to explore New York City.',
     'hasAd': [{'content': "\n\nWelcome to your home away from home in the heart of Manhattan! This huge 2 bedroom is located just steps away from Central Park and is newly renovated with stainless steel appliances, exposed brick, and beautiful hardwood floors. The apartment is located in a very safe and convenient area, with a variety of restaurants and stores nearby. The subway is also very close, making it 

In [14]:
target_property = {
    "dataType": [
        "text"
    ],
    "name": "target",
    "description": "High-level audience target for this ad."
}

client.schema.property.create("Ad", target_property)

In [15]:
targets = ["young couples", "elderly couples", "single travelers"]

for target in targets:
    generatePrompt = "Please write an engaging advertisement for the following AirBnb Listing: Description: {description}Please write the advertisement for this listing to target "
    generatePrompt += target

    ads = client.query\
              .get("Listing", ["description"])\
              .with_generate(single_prompt=generatePrompt)\
              .with_additional(["id"])\
              .with_limit(5)\
              .do()["data"]["Get"]["Listing"]
  
    for ad in ads:
        new_ad_properties = {
            "content": ad["_additional"]["generate"]["singleResult"],
            "target": target
        }
        new_ad_id = get_valid_uuid(uuid4())
        client.data_object.create(
            data_object = new_ad_properties,
            class_name = "Ad",
            uuid = new_ad_id
        )
        client.data_object.reference.add(
            from_uuid = ad["_additional"]["id"],
            from_property_name = "hasAd",
            to_uuid = new_ad_id
        )

In [16]:
where_filter = {
  "path": ["target"],
  "operator": "Equal",
  "valueText": "single travelers"
}

ads = client.query\
            .get("Ad", ["content", "target"])\
            .with_where(where_filter)\
            .with_limit(5)\
            .do()["data"]["Get"]["Ad"]

ads[0]

{'content': "\n\nWelcome to your home away from home in the heart of Manhattan! This huge 2 bedroom is located just steps away from Central Park and all the best that New York City has to offer.\n\nThe apartment has been newly renovated with stainless steel appliances, exposed brick, and beautiful hardwood floors. You'll love the convenience of being located in a very safe and convenient area, with a variety of restaurants and stores nearby. The subway is also very close, making it easy to get around the city.\n\nWhether you're here for business or pleasure, this is the perfect place to explore New York City. Don't miss out on this opportunity to make memories in the Big Apple!",
 'target': 'single travelers'}

In [17]:
user_schema = {
   "classes": [
       {
           "class": "User",
           "description": "An advertisement for AirBnb.",
           "properties": [
               {
                   "dataType": ["text"],
                   "name": "biography",
                   "description": "Short description of this user."
               },
               {
                   "dataType": ["text"],
                   "name": "name",
                   "description": "This person's name."
               }
           ]
       }
   ]
}

client.schema.create(user_schema)

In [18]:
new_user_id = get_valid_uuid(uuid4())

user_properties = {
    "biography": "Connor often travels with a golden doodle named Bowen.",
    "name": "Connor"
}

client.data_object.create(
  data_object = user_properties,
  class_name = "User",
  uuid = new_user_id
)

new_user_id = get_valid_uuid(uuid4())

user_properties = {
    "biography": "Bob is a prolific weightlifter who will get upset if he doesn't have a good gym to workout in.",
    "name": "Bob"
}

client.data_object.create(
  data_object = user_properties,
  class_name = "User",
  uuid = new_user_id
)


'd7adb2f5-56dc-4303-97d6-c77d91fc3eab'

In [19]:
users = client.query\
              .get("User", ["biography"])\
              .with_additional(["id"])\
              .with_limit(5)\
              .do()["data"]["Get"]["User"]

users

[{'_additional': {'id': '5719adbc-f79e-4cf2-b971-a27934c06dba'},
  'biography': 'Connor often travels with a golden doodle named Bowen.'},
 {'_additional': {'id': 'd7adb2f5-56dc-4303-97d6-c77d91fc3eab'},
  'biography': "Bob is a prolific weightlifter who will get upset if he doesn't have a good gym to workout in."}]

In [20]:
user_hasAdTarget_cref = {
    "dataType": ["User"],
    "description": "The ad for an AirBnb Listing targeted to a particular user.",
    "name": "hasUserTarget"
}

client.schema.property.create("Ad", user_hasAdTarget_cref)

In [21]:
users = client.query\
              .get("User", ["biography"])\
              .with_additional(["id"])\
              .with_limit(5)\
              .do()["data"]["Get"]["User"]

for user in users:
    generatePrompt = "Please write an engaging advertisement for the following AirBnb Listing: Description: {description} Please write the advertisement for this listing to target the following user:"
    generatePrompt += user["biography"]

    ads = client.query\
              .get("Listing", ["description"])\
              .with_generate(single_prompt=generatePrompt)\
              .with_additional(["id"])\
              .with_limit(5)\
              .do()["data"]["Get"]["Listing"]
  
    for ad in ads:
        new_ad_properties = {
            "content": ad["_additional"]["generate"]["singleResult"],
        }
        new_ad_id = get_valid_uuid(uuid4())
        client.data_object.create(
            data_object = new_ad_properties,
            class_name = "Ad",
            uuid = new_ad_id
        )
        client.data_object.reference.add(
            from_uuid = new_ad_id,
            from_property_name = "hasUserTarget",
            to_uuid = user["_additional"]["id"]
        )

In [22]:
weaviate_query_str = """
{
  Get {
    Ad (
      where: {
        path: ["hasUserTarget"],
        operator: GreaterThanEqual,
        valueInt: 1
      }
    ) {
      content
      hasUserTarget {
        ... on User {
          name
        }
      }
    }
  }
}
"""

client.query.raw(weaviate_query_str)

{'data': {'Get': {'Ad': [{'content': '\n\nWelcome to Huge 2 BR Upper East  Cental Park\n\nThis huge 2 bedroom is located in the heart of Manhattan, steps away from Central Park. It is newly renovated with stainless steel appliances, exposed brick, and beautiful hardwood floors. The apartment is located in a very safe and convenient area, with a variety of restaurants and stores nearby. The subway is also very close, making it easy to get around the city.\n\nThis is the perfect place for anyone looking to explore New York City.',
     'hasUserTarget': [{'name': 'Connor'}]},
    {'content': None, 'hasUserTarget': [{'name': 'Connor'}]},
    {'content': None, 'hasUserTarget': [{'name': 'Connor'}]},
    {'content': None, 'hasUserTarget': [{'name': 'Connor'}]},
    {'content': None, 'hasUserTarget': [{'name': 'Connor'}]},
    {'content': '\nHello, weightlifters! Are you looking for a place to stay in New York City that will allow you to pursue your fitness goals? Look no further! This huge 2