Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

V1.5.0 #324

Open
wants to merge 6 commits into
base: v1.5.0
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions application/.env.template
Original file line number Diff line number Diff line change
@@ -21,7 +21,7 @@ RDS_REGION_NAME=us-west-2
AWS_DEFAULT_REGION=us-west-2
DYNAMODB_AWS_REGION=us-west-2

EMBEDDING_DIMENSION=1536
EMBEDDING_DIMENSION=1024
BEDROCK_EMBEDDING_MODEL=amazon.titan-embed-text-v1

# If you need to use ak/sk to access bedrock, please configure bedrock's ak/sk to Secrets Manager, Examples are as follows
@@ -32,4 +32,4 @@ BEDROCK_SECRETS_AK_SK=
OPENSEARCH_SECRETS_URL_HOST=opensearch-host-url
OPENSEARCH_SECRETS_USERNAME_PASSWORD=opensearch-master-user

# SAGEMAKER_ENDPOINT_EMBEDDING=
SAGEMAKER_ENDPOINT_EMBEDDING=bge-zh-15-2024-08-17-03-56-58-281-endpoint
2 changes: 1 addition & 1 deletion application/config_files/stauth_config.yaml
Original file line number Diff line number Diff line change
@@ -5,7 +5,7 @@ credentials:
failed_login_attempts: 0 # Will be managed automatically
logged_in: False # Will be managed automatically
name: AWS
password: $2b$12$NDQv5NLaWiVlNuzQYHwAo.tv.f.TuX1nbdoUZi44/Y3xv4I4QAfjy # Set the password following instructions in README
password: $2b$12$pP4Vi1ovItxf/22zYn1UFeYrA2IM/D7glGNoAd3TrY0Gr4QzdZSNC # Set the password following instructions in README
cookie:
expiry_days: 2
key: some_signature_key # Must be string
47 changes: 25 additions & 22 deletions application/nlq/business/vector_store.py
Original file line number Diff line number Diff line change
@@ -76,39 +76,42 @@ def get_all_agent_cot_samples(cls, profile_name):
def add_sample(cls, profile_name, question, answer):
logger.info(f'add sample question: {question} to profile {profile_name}')
if SAGEMAKER_ENDPOINT_EMBEDDING is not None and SAGEMAKER_ENDPOINT_EMBEDDING != "":
embedding = cls.create_vector_embedding_with_sagemaker(question)
embedding = cls.create_vector_embedding_with_sagemaker(SAGEMAKER_ENDPOINT_EMBEDDING,question,opensearch_info['sql_index'])
else:
embedding = cls.create_vector_embedding_with_bedrock(question)
has_same_sample = cls.search_same_query(profile_name, 1, opensearch_info['sql_index'], embedding)
#has_same_sample = cls.search_same_query(profile_name, 1, opensearch_info['sql_index'], embedding)
has_same_sample = False
if has_same_sample:
logger.info(f'delete sample sample entity: {question} to profile {profile_name}')
if cls.opensearch_dao.add_sample(opensearch_info['sql_index'], profile_name, question, answer, embedding):
if cls.opensearch_dao.add_sample(opensearch_info['sql_index'], profile_name, question, answer, embedding['vector_field']):
logger.info('Sample added')

@classmethod
def add_entity_sample(cls, profile_name, entity, comment):
logger.info(f'add sample entity: {entity} to profile {profile_name}')
if SAGEMAKER_ENDPOINT_EMBEDDING is not None and SAGEMAKER_ENDPOINT_EMBEDDING != "":
embedding = cls.create_vector_embedding_with_sagemaker(entity)
embedding = cls.create_vector_embedding_with_sagemaker(SAGEMAKER_ENDPOINT_EMBEDDING,entity,opensearch_info['ner_index'])
else:
embedding = cls.create_vector_embedding_with_bedrock(entity)
has_same_sample = cls.search_same_query(profile_name, 1, opensearch_info['ner_index'], embedding)
#has_same_sample = cls.search_same_query(profile_name, 1, opensearch_info['ner_index'], embedding)
has_same_sample = False
if has_same_sample:
logger.info(f'delete sample sample entity: {entity} to profile {profile_name}')
if cls.opensearch_dao.add_entity_sample(opensearch_info['ner_index'], profile_name, entity, comment, embedding):
if cls.opensearch_dao.add_entity_sample(opensearch_info['ner_index'], profile_name, entity, comment, embedding['vector_field']):
logger.info('Sample added')

@classmethod
def add_agent_cot_sample(cls, profile_name, entity, comment):
logger.info(f'add agent sample query: {entity} to profile {profile_name}')
if SAGEMAKER_ENDPOINT_EMBEDDING is not None and SAGEMAKER_ENDPOINT_EMBEDDING != "":
embedding = cls.create_vector_embedding_with_sagemaker(entity)
embedding = cls.create_vector_embedding_with_sagemaker(SAGEMAKER_ENDPOINT_EMBEDDING,entity,opensearch_info['agent_index'])
else:
embedding = cls.create_vector_embedding_with_bedrock(entity)
has_same_sample = cls.search_same_query(profile_name, 1, opensearch_info['agent_index'], embedding)
#has_same_sample = cls.search_same_query(profile_name, 1, opensearch_info['agent_index'], embedding)
has_same_sample = False
if has_same_sample:
logger.info(f'delete agent sample sample query: {entity} to profile {profile_name}')
if cls.opensearch_dao.add_agent_cot_sample(opensearch_info['agent_index'], profile_name, entity, comment, embedding):
if cls.opensearch_dao.add_agent_cot_sample(opensearch_info['agent_index'], profile_name, entity, comment, embedding['vector_field']):
logger.info('Sample added')

@classmethod
@@ -129,19 +132,19 @@ def create_vector_embedding_with_bedrock(cls, text):
return embedding

@classmethod
def create_vector_embedding_with_sagemaker(cls, text):
try:
body = json.dumps(
{
"inputs": text,
"is_query": True
}
)
response = invoke_model_sagemaker_endpoint(SAGEMAKER_ENDPOINT_EMBEDDING, body, model_type="embedding")
embeddings = response[0]
return embeddings
except Exception as e:
logger.error(f'create_vector_embedding_with_sagemaker is error {e}')
def create_vector_embedding_with_sagemaker(cls,endpoint_name, text, index_name):
body=json.dumps(
{
"inputs": text,
"is_query": True,
"instruction" : "Represent this sentence for searching relevant passages:"
}
)
response = invoke_model_sagemaker_endpoint(endpoint_name, body, "embeddings")
embeddings = response['sentence_embeddings'][0]
logger.info("embeddings to ingestion")
logger.info(embeddings[:10])
return {"_index": index_name, "text": text, "vector_field": embeddings}

@classmethod
def delete_sample(cls, profile_name, doc_id):
15 changes: 11 additions & 4 deletions application/nlq/data_access/opensearch.py
Original file line number Diff line number Diff line change
@@ -3,8 +3,9 @@
from opensearchpy import OpenSearch
from opensearchpy.helpers import bulk

from utils.llm import create_vector_embedding_with_bedrock

from utils.llm import create_vector_embedding_with_bedrock,create_vector_embedding_with_sagemaker
from utils.env_var import BEDROCK_REGION, AOS_HOST, AOS_PORT, AOS_USER, AOS_PASSWORD, opensearch_info, \
SAGEMAKER_ENDPOINT_EMBEDDING
logger = logging.getLogger(__name__)

def put_bulk_in_opensearch(list, client):
@@ -190,8 +191,14 @@ def delete_sample(self, index_name, profile_name, doc_id):
return self.opensearch_client.delete(index=index_name, id=doc_id)

def search_sample(self, profile_name, top_k, index_name, query):
records_with_embedding = create_vector_embedding_with_bedrock(query, index_name=index_name)
return self.search_sample_with_embedding(profile_name, top_k, index_name, records_with_embedding['vector_field'])
if SAGEMAKER_ENDPOINT_EMBEDDING is not None and SAGEMAKER_ENDPOINT_EMBEDDING != "":
records_with_embedding = create_vector_embedding_with_sagemaker(SAGEMAKER_ENDPOINT_EMBEDDING, query, index_name)
return self.search_sample_with_embedding(profile_name, top_k, index_name, records_with_embedding['vector_field'])
else:
records_with_embedding = create_vector_embedding_with_bedrock(query, index_name=index_name)
return self.search_sample_with_embedding(profile_name, top_k, index_name, records_with_embedding['vector_field'])




def search_sample_with_embedding(self, profile_name, top_k, index_name, query_embedding):
13 changes: 8 additions & 5 deletions application/utils/llm.py
Original file line number Diff line number Diff line change
@@ -232,7 +232,7 @@ def invoke_model_sagemaker_endpoint(endpoint_name, body, model_type="LLM", with_
Body=body,
ContentType="application/json",
)
response_body = json.loads(response.get('Body').read())
response_body = json.loads(response.get('Body').read().decode("utf8"))
return response_body


@@ -570,14 +570,17 @@ def create_vector_embedding_with_bedrock(text, index_name):


def create_vector_embedding_with_sagemaker(endpoint_name, text, index_name):
body = json.dumps(
body=json.dumps(
{
"inputs": text,
"is_query": True
"is_query": True,
"instruction" : "Represent this sentence for searching relevant passages:"
}
)
response = invoke_model_sagemaker_endpoint(endpoint_name, body, model_type="embedding")
embeddings = response[0]
response = invoke_model_sagemaker_endpoint(endpoint_name, body, "embeddings")
embeddings = response['sentence_embeddings'][0]
logger.info("embeddings in llm.py")
logger.info(embeddings[:10])
return {"_index": index_name, "text": text, "vector_field": embeddings}


Loading
Oops, something went wrong.