In [None]:
"""
this pipeline needs to be able to detect if a person is horizontal and if so is the persons head facing to the left or right vertically

based on that observation we can make the decision which value we should choose for detecting the face

1. input the image and then return in prompt if horizontal and if so if left or right facing

2. extract result from prompt 

3. based on result return the value 0 or 90 or 270
""" 

In [None]:
!pip install litellm python-dotenv opencv-python

In [31]:
import os
import cv2
from litellm import completion
import base64
from dotenv import load_dotenv
import json

load_dotenv()

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [7]:
def get_alignment_of_faces(vision_frame) -> str:
	#!only temporarly for testing in prod the vision frame will be no path
	vision_frame = cv2.imread(vision_frame)

	# Convert frame to PNG in memory and encode to base64
	_, buffer = cv2.imencode('.png', vision_frame)
	base64_image = base64.b64encode(buffer).decode('utf-8')

	# Make API call to vision model
	response = completion(
		model="gpt-4o-mini",
		messages=[
			{
				"role": "user",
				"content": [
					{
						"type": "text",
						"text": "You are getting an image as input. You need to say if the person in the image is horizontally or vertically aligned, and you also need to say if the person in the image is more left or right facing. It's also possible that it may not be possible to detect, or there is no person in the image. Keep your response short."
					},
					{
						"type": "image_url",
						"image_url": {
							"url": f"data:image/jpeg;base64,{base64_image}"
						}
					}
				]
			}
		],
	)
	return response.choices[0].message.content

In [8]:
value=get_alignment_of_faces("horizontal-example.png")

In [32]:
#extract the result from the response
def extract_result(response: str) -> str:
	"""
	this function should extract horizontal left - 90 or right - 270 or nothing - 0
	"""
	prompt = f"""
	You are getting a response from an image detection as input - your task is to extract the detection result. If the detection result is horizontal left, you need to return 90; if it is right, return 270. In all other cases, when the response is unclear or doesn't make sense, return 0. Here is the response:

	{response}

	Your output needs to be in JSON format and only contain the value 90, 270 or 0 nothing else!
	"""
	response = completion(
    	model = "gpt-4o", 
		response_format={ "type": "json_object" },
    	messages=[{ "content": prompt,"role": "user"}],
		temperature=0.0
	)
	response_json = json.loads(response.choices[0].message.content)
	#how to make sure that the model really only responds with one value ie 90 or 270 or 0?
	return response_json["value"]

In [33]:
v=extract_result(value)

In [34]:
print(v)

90
