#### podobieństwo cosinusowe
- przyjmuje wartości od -1 do 1
- gdzie -1 to przeciwne znaczenie semantyczne a 1 to identyczne

![alt](pics/wektor.PNG)

In [4]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
p1 = np.array([[2, 5]])
p2 = np.array([[6, 7]])
p3 = np.array([[-4, -3]])

In [7]:
cosine_similarity(p1, p2)
cosine_similarity(p1, p3)

array([[-0.85419856]])

In [9]:
# np.dot(p1, p2) / (np.linalg.norm(p1) * np.linalg.norm(p2))

#### embeddings

In [1]:
from openai import OpenAI
from utils import Settings

In [None]:
api_key = Settings().openai_api_key

In [None]:
client = OpenAI(api_key=api_key)

text = 'dzisiaj jest czwartek i uczymy się pythona'

response = client.embeddings.create(
    model='text-embedding-3-small',
    input=text
)

In [None]:
response.data[0].embedding

[-0.011639133095741272,
 0.008998321369290352,
 0.0017972191562876105,
 -0.010093769058585167,
 0.02758181095123291,
 0.002247135154902935,
 0.021772025153040886,
 0.03466309979557991,
 -0.00766813475638628,
 -0.007990900427103043,
 0.001048377831466496,
 -0.04769110307097435,
 -0.029029367491602898,
 -0.015150435268878937,
 0.0005498634418472648,
 0.0017238631844520569,
 -0.007394272834062576,
 0.031494125723838806,
 -0.033626336604356766,
 0.047378119081258774,
 -0.03742128238081932,
 -0.03251132741570473,
 -0.017840150743722916,
 -0.026818910613656044,
 0.008387022651731968,
 -0.023552129045128822,
 0.04405265301465988,
 0.04604793339967728,
 -0.03877102956175804,
 0.0008325892849825323,
 0.024451961740851402,
 -0.04107929393649101,
 -0.004697221796959639,
 -0.009296635165810585,
 0.012607431039214134,
 -0.05442028492689133,
 -0.01830962859094143,
 0.06169718876481056,
 -0.014759203419089317,
 0.011287025175988674,
 -0.021752463653683662,
 0.0109544787555933,
 0.01640237495303154,
 

In [20]:
response

CreateEmbeddingResponse(data=[Embedding(embedding=[-0.011639133095741272, 0.008998321369290352, 0.0017972191562876105, -0.010093769058585167, 0.02758181095123291, 0.002247135154902935, 0.021772025153040886, 0.03466309979557991, -0.00766813475638628, -0.007990900427103043, 0.001048377831466496, -0.04769110307097435, -0.029029367491602898, -0.015150435268878937, 0.0005498634418472648, 0.0017238631844520569, -0.007394272834062576, 0.031494125723838806, -0.033626336604356766, 0.047378119081258774, -0.03742128238081932, -0.03251132741570473, -0.017840150743722916, -0.026818910613656044, 0.008387022651731968, -0.023552129045128822, 0.04405265301465988, 0.04604793339967728, -0.03877102956175804, 0.0008325892849825323, 0.024451961740851402, -0.04107929393649101, -0.004697221796959639, -0.009296635165810585, 0.012607431039214134, -0.05442028492689133, -0.01830962859094143, 0.06169718876481056, -0.014759203419089317, 0.011287025175988674, -0.021752463653683662, 0.0109544787555933, 0.016402374953

#### pydantic

- walidacja danych
- podnosi bład w momencie niezgosnosci typu danychz deklarowanymi
- jesli możliwa automatyczna konwersja to się dokonuje
- strict, nie dokonuje automatycznej konwersji pytu danych

In [26]:
from pydantic import BaseModel, field_validator

In [29]:
person = {
    "name": "Basia",
    "age": 35,
    "height": 175.5
}
person

{'name': 'Basia', 'age': 35, 'height': 175.5}

In [23]:
person['name'], person.get('age')

('Basia', 35)

In [None]:
# brak klucza w slowniku
# person['is_married']
person.get('is_married')

In [44]:
class PersonBase(BaseModel):
    name: str
    age: int
    height: float

    model_config = {
        'strict': True
    }

class PersonClassic:

    def __init__(self, name, age, height):

        if not isinstance(name, str):
            raise ValueError('imie musi byc typu string')

        self.name = name
        self.age = age
        self.height = height

In [None]:
person2 = PersonBase(name='Danusia', age=45, height=168.5)
person2.name, person2.age, person2.height

('Danusia', 45, 168.5)

In [None]:
# person = PersonClassic(name=13, age='45', height='168.5')
# person

ValueError: imie musi byc typu string

In [None]:
# person2 = PersonBase(name='Danusia', age='45', height=168.5)

ValidationError: 1 validation error for PersonBase
age
  Input should be a valid integer [type=int_type, input_value='45', input_type=str]
    For further information visit https://errors.pydantic.dev/2.12/v/int_type

##### własny walidator

In [54]:
class Person(BaseModel):
    name: str
    age: int
    height: float | int
    is_married: bool

    @field_validator('age')
    def parse_age(cls, value):
        if value < 0:
            raise ValueError('Age cannot be negative')

In [None]:
# Person(name='Adam', age=-30, height=184, is_married=False)

ValidationError: 1 validation error for Person
is_married
  Field required [type=missing, input_value={'name': 'Adam', 'age': 30, 'height': 184}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.12/v/missing

##### kompozycja

In [53]:
from typing import Optional

In [63]:
user1 = {
    'name': 'Adam',
    'address': {
        'city': 'Berlin',
        'country': 'Germany',
    }
}
user1['address']['city']

user2 = {
    'name': 'Adam',
    'age': '30',
    'address': {
        'city': 'Berlin',
        'country': 'Germany',
    }
}

class Address(BaseModel):
    city: str
    country: str

class User(BaseModel):
    name: str
    age: Optional[int] = None
    address: Address


In [65]:
# konserwsja slownik na model danych to ** - dzięki temu wiadomo która wartośc ma sie przypisac do którego pola w modelu danych
User(**user1)
pyd_user2 = User(**user2)

In [67]:
pyd_user2.address.city

'Berlin'

##### web api

In [68]:
import requests

url = "https://catfact.ninja/fact"

response = requests.get(url)

In [74]:
data = response.json() # konwertuje json na słownik
raw_json = response.content # zwraca surowego jsona

In [76]:
data

class CatFact(BaseModel):
    fact: str
    length: int

In [77]:
CatFact(**data)

CatFact(fact='Owning a cat is actually proven to be beneficial for your health.', length=65)