In [1]:
!pip install --upgrade openai --quiet

In [2]:
with open('key', 'r') as file:
    openai_key = file.read()

In [3]:
import openai
client = openai.OpenAI(api_key=openai_key)

In [4]:
from openai.types import Model, ModelDeleted

In [5]:
models = client.models.list()

In [6]:
for k in models:
    if "gpt" in k.id:
        print(k.id)

gpt-4-vision-preview
gpt-3.5-turbo-0613
gpt-3.5-turbo-instruct-0914
gpt-4-1106-preview
gpt-3.5-turbo-0125
gpt-3.5-turbo
gpt-3.5-turbo-0301
gpt-3.5-turbo-16k
gpt-3.5-turbo-instruct
gpt-4-0125-preview
gpt-4-turbo-preview
gpt-3.5-turbo-1106
gpt-4
gpt-4-0613
gpt-3.5-turbo-16k-0613


In [7]:
model="gpt-4-turbo-preview"

In [8]:
def get_completion(prompt, model=model):
    messages = [{"role": "user", "content": prompt}]
    response = client.chat.completions.create(
        model=model,
        messages=messages,
        temperature=0, # this is the degree of randomness of the model's output
    )
    return response.choices[0].message.content

In [9]:
text = f"""
# this function is used to blend the images together
# lets allow the user to choose between 2 different blending methods:
# 1. use an avergae value per-pixel to blend the images together
# 2. use the distance-transform mask to blend the images together
def stitch_images(imgs_Ts_and_masks, blend_method=0):
    num_imgs = len(imgs_Ts_and_masks)

    # get the center image and its (potential) mask
    img_center = imgs_Ts_and_masks[0][0]
    img_center_mask = imgs_Ts_and_masks[0][2]

    # create a "big" image to show all the images on top of each other
    # the big image should have the same height as the center image
    # the big image should have the width of the center image multiplied by the number of images
    # the big image should have 3 channels (RGB, np.uint8), filled with 0s
    # the big image should be a 4D array (e.g. `np.zeros((num_imgs, img_center.shape[0], img_center.shape[1]*3, 3), dtype=np.uint8)`)
    # we will use a 4th dimension (the 0/first dimension) to store all the images, so that we can blend them later
    big_img = ...
    # create a big mask to use for the blending
    # the big mask should have the same height as the center image
    # the big mask should have the width of the center image multiplied by the number of images
    # the big mask should have 1 channel (np.float32), filled with 0s
    # the big mask should be a 3D array (e.g. `np.zeros(big_img.shape[:-1], dtype=np.float32)`)
    big_mask = ...

    # add the center image to the big image (in the img_center.shape[1]:img_center.shape[1]*2 x-axis range, in the 0/first place)
    big_img[...] = img_center
    
    # add the center image mask to the big mask
    # create a mask for the center image
    if blend_method == 0:
        # use a simple mask with 1s, in the img_center.shape[1]:img_center.shape[1]*2 x-axis range
        big_mask[...] = 1
    if blend_method == 1:
        # use a distance transform mask, using the function `create_distance_transform_mask` defined above
        # and place it in the img_center.shape[1]:img_center.shape[1]*2 x-axis range
        mask = create_distance_transform_mask(img_center)
        big_mask[...] = mask
    
    # if the center image has a mask, multiply the mask with the big mask
    if img_center_mask is not None:
        # multiply the mask with the big mask in the img_center.shape[1]:img_center.shape[1]*2 x-axis range
        # (use the *= operator) if the center image has a mask
        big_mask[...] *= img_center_mask

    # add the other images to the big image
    # for each image-and-transform tuple (im, H, im_mask) in imgs_Ts_and_masks[1:] (skip the first one, which is the center image)
    #   apply the homography H to the image im
    #   add the image to the big image in the img_center.shape[1]*l:img_center.shape[1]*(l+1) x-axis range
    #   (use the l variable to keep track of the current image index)
    #   apply the homography H to the image mask im_mask
    #   add the image mask to the big mask in the img_center.shape[1]*l:img_center.shape[1]*(l+1) x-axis range
    #   if the image has a mask, multiply the mask with the big mask

    for l, (im, H, im_mask) in enumerate(imgs_Ts_and_masks[1:]):
        # we need to shift the homography to the right by img_center.shape[1]*l
        # use the np.matmul function to multiply the homography with a simple translation matrix
        # use the np.array function to create the translation matrix
        # the translation matrix should be of shape (3,3) with 1s on the diagonal
        # and -img_center.shape[1]*l in the last row, first column (for a translation in the x-axis)
        H = ...

        # create a mask for the image, same size as the image, similar to the code above
        # use the blend_method variable (0 or 1) to determine which mask to use: 0 = simple mask, 1 = distance transform mask
        if blend_method == 0:
            # use a simple mask with 1s (np.ones) of the same size as the image (im.shape), dtype=np.float32
            mask = ...
        elif blend_method == 1:
            # use a distance transform mask, using the function `create_distance_transform_mask` defined above
            mask = ...

        # add the mask to the big mask, same as above
        if im_mask is not None:
            mask *= im_mask

        # apply the homography to the mask by using the cv2.warpPerspective function
        # the shape of the output should be (big_mask.shape[1], big_mask.shape[2])
        # the output should be the big_mask[l+1] (the l+1-th image in "big mask")
        # remember to use the borderMode=cv2.BORDER_TRANSPARENT parameter to avoid black borders
        big_mask[l+1] = ...

        # apply the homography to the image by using the cv2.warpPerspective function
        # the shape of the output should be (big_img.shape[1], big_img.shape[2])
        big_img[l+1] = ...
    
    # save the original big mask (before normalization) for visualization later
    big_mask_orig = big_mask.copy()

    # normalize the mask (divide by the sum of the mask)
    big_mask_sum = np.sum(big_mask, axis=0)
    for i in range(num_imgs):
        # divide the big_mask[i] by the sum of the mask (big_mask_sum), but only for the pixels that are > 0 (use the big_mask[i] > 0 condition)
        big_mask[i][...] = ...

    # blend the images using the mask
    big_img_out = np.zeros(big_img.shape[1:], dtype=np.float32)
    for i in range(num_imgs):
        # multiply the image (big_img[i]) with the mask (big_mask[i]) and add it to the big_img_out
        # use the np.repeat function to repeat the mask 3 times (for the 3 color channels, RGB)
        # the trick is to use big_mask[i][...,None] and axis=2 for np.repeat to get the right shape
        # use the np.float32 type for the big_mask[i] and the big_img[i]
        big_img_out += ...

    # convert the big_img_out to uint8 (with `.astype(np.uint8)`)
    return big_img_out.astype(np.uint8), big_mask, big_mask_orig


# run the function with the images and the transforms and inliers from the previous cell
# lets use imgs[1] as the center image, and imgs[0] as the left image, imgs[2] as the right image, and imgs[3] as the last image
# imgs[1] is the center image, so we don't need to apply any transform to it, so we use the identity matrix (np.eye(3))
# imgs[0] is the left image, so we need to apply the transform transforms_and_inliers[0][1][0] (0->1) to it
# imgs[2] is the right image, so we need to apply the inverse of the transform transforms_and_inliers[1][2][0] to it (np.linalg.inv, to get 2->1 transform)
# imgs[3] is the last image, so we need to apply the inverse of the transform transforms_and_inliers[1][2][0] (2->1) and transforms_and_inliers[2][3][0] (3->2) to it
#   we can use the np.matmul function to multiply the two transforms and then use np.linalg.inv to get the overall inverse (3->2->1 = 3->1)
big_img, big_mask, big_mask_orig = stitch_images([
        (imgs[1], ...),
        (imgs[0], ...),
        (imgs[2], ...),
        (imgs[3], ...),
    ],
    blend_method=1
    )
"""

In [10]:
prompt = f"""
I'm coding in python and using the OpenCV library. I want a python script to build panoramas using the following simple algorithm
step 1: extract keypoints and descriptors from all images
step 2: find matches between all images
step 3: select a reference image
step 4: begin adding the other images together with the reference image by
  by first finding a transformation between them
  and then blending the images together

Following is the script I that I currently have. I want you to give me feedback on how I could make it work.

```{text}```
"""
response = get_completion(prompt)
print(response)

Your script outlines a basic approach to stitching images together to create a panorama, but there are several key parts missing or requiring clarification for it to work. Here's a step-by-step guide to fill in the gaps and correct some misunderstandings in your approach:

### 1. Preparing the Big Image and Big Mask Arrays

Your description of creating a "big" image and mask with a 4th dimension to store all images for later blending is not practical for image stitching. Instead, you should aim to create a large canvas where all transformed images can be placed according to their relative positions. This canvas should initially be large enough to hold all images once transformed and placed relative to each other. The exact size can be determined after applying transformations and calculating the bounding box that can contain all images.

### 2. Applying Homographies and Blending

You need to warp each image (except the reference image) using the corresponding homography matrix to align