# Amazon Kendra with Boto3 library

###### Importing boto3 library

In [1]:
import boto3

In [2]:
client=boto3.client('kendra')

In [3]:
print(boto3.__version__)

1.13.11


###### creating an index,while creating, the user has to provide a single parameter from the collection of edition,rolearn,encrypted key and description

In [4]:
index_response = client.create_index(
    Name='politicsindex',
    Edition='DEVELOPER_EDITION',
    RoleArn='arn:aws:iam::353664287185:role/kendra_project2',
#     ServerSideEncryptionConfiguration={
#         'KmsKeyId': 'string'
#     },
    Description="politics data index"
)

In [5]:
index_response

{'Id': 'fa180e63-02a9-4c27-9c5f-86236f8ea1ad',
 'ResponseMetadata': {'RequestId': 'a9be9115-7188-40be-b002-93e984e8b898',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'a9be9115-7188-40be-b002-93e984e8b898',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '45',
   'date': 'Mon, 29 Jun 2020 03:53:05 GMT'},
  'RetryAttempts': 0}}

In [7]:
index_id=index_response['Id']
print(index_id)

fa180e63-02a9-4c27-9c5f-86236f8ea1ad


###### Describing the created index and looking at its status

In [8]:
describe_index_response = client.describe_index(
    Id=index_id
)

In [9]:
describe_index_response

{'Name': 'politicsindex',
 'Id': 'fa180e63-02a9-4c27-9c5f-86236f8ea1ad',
 'Edition': 'DEVELOPER_EDITION',
 'RoleArn': 'arn:aws:iam::353664287185:role/kendra_project2',
 'Status': 'ACTIVE',
 'Description': 'politics data index',
 'CreatedAt': datetime.datetime(2020, 6, 29, 9, 23, 5, 387000, tzinfo=tzlocal()),
 'UpdatedAt': datetime.datetime(2020, 6, 29, 9, 23, 5, 387000, tzinfo=tzlocal()),
 'DocumentMetadataConfigurations': [{'Name': '_authors',
   'Type': 'STRING_LIST_VALUE',
   'Search': {'Facetable': False, 'Searchable': False, 'Displayable': False}},
  {'Name': '_category',
   'Type': 'STRING_VALUE',
   'Relevance': {'Importance': 1, 'ValueImportanceMap': {}},
   'Search': {'Facetable': False, 'Searchable': False, 'Displayable': False}},
  {'Name': '_created_at',
   'Type': 'DATE_VALUE',
   'Relevance': {'Freshness': False,
    'Importance': 1,
    'Duration': '25920000s',
    'RankOrder': 'ASCENDING'},
   'Search': {'Facetable': False, 'Searchable': False, 'Displayable': False}},
 

###### creating a data source for the index,in our case we're providing the s3 bucket

In [10]:
data_source_response = client.create_data_source(
    Name='politicsdata',
    IndexId=index_id,
    Type='S3',
    Configuration={
        'S3Configuration': {
            'BucketName': 'politics-text-data'
        }
    },
        Description="data source for politics index",
        RoleArn='arn:aws:iam::353664287185:role/kendra_project2'
    
)

In [11]:
data_source_response

{'Id': 'bbfa173d-95da-4647-b0bd-8824179f26d8',
 'ResponseMetadata': {'RequestId': 'd1852f40-3d34-436d-9344-bd38b2b0d4ff',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'd1852f40-3d34-436d-9344-bd38b2b0d4ff',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '45',
   'date': 'Mon, 29 Jun 2020 04:06:57 GMT'},
  'RetryAttempts': 1}}

In [12]:
data_source_id=data_source_response['Id']

###### describing the created data source by passing the index_id and the data source id

In [13]:
describe_data_source_response = client.describe_data_source(
    Id=data_source_id,
    IndexId=index_id
)

In [14]:
describe_data_source_response

{'Id': 'bbfa173d-95da-4647-b0bd-8824179f26d8',
 'IndexId': 'fa180e63-02a9-4c27-9c5f-86236f8ea1ad',
 'Name': 'politicsdata',
 'Type': 'S3',
 'Configuration': {'S3Configuration': {'BucketName': 'politics-text-data'}},
 'CreatedAt': datetime.datetime(2020, 6, 29, 9, 36, 57, 687000, tzinfo=tzlocal()),
 'UpdatedAt': datetime.datetime(2020, 6, 29, 9, 36, 57, 687000, tzinfo=tzlocal()),
 'Description': 'data source for politics index',
 'Status': 'ACTIVE',
 'Schedule': '',
 'RoleArn': 'arn:aws:iam::353664287185:role/kendra_project2',
 'ResponseMetadata': {'RequestId': '174f2dd2-a68d-4ca6-9c72-14d168ce0196',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '174f2dd2-a68d-4ca6-9c72-14d168ce0196',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '396',
   'date': 'Mon, 29 Jun 2020 04:08:20 GMT'},
  'RetryAttempts': 0}}

###### Listing out all the data sources under an index

In [15]:
list_data_source_response = client.list_data_sources(
    IndexId=index_id
)

In [16]:
list_data_source_response

{'SummaryItems': [{'Name': 'politicsdata',
   'Id': 'bbfa173d-95da-4647-b0bd-8824179f26d8',
   'Type': 'S3',
   'CreatedAt': datetime.datetime(2020, 6, 29, 9, 36, 57, 687000, tzinfo=tzlocal()),
   'UpdatedAt': datetime.datetime(2020, 6, 29, 9, 36, 57, 687000, tzinfo=tzlocal()),
   'Status': 'ACTIVE'}],
 'ResponseMetadata': {'RequestId': '9e617e72-900d-42ec-b25c-1406f54b0e9c',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '9e617e72-900d-42ec-b25c-1406f54b0e9c',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '174',
   'date': 'Mon, 29 Jun 2020 04:08:29 GMT'},
  'RetryAttempts': 1}}

###### syncing the documents provided in the bucket so as to train the model,we start the data source sync job

In [17]:
data_source_sync_job_response = client.start_data_source_sync_job(
    Id=data_source_id,
    IndexId=index_id
)

In [18]:
data_source_sync_job_response

{'ExecutionId': '2f84719d-a6c4-47f0-a89a-b5a38297e0b2',
 'ResponseMetadata': {'RequestId': 'a8ea84a3-8437-447d-aef3-7c0825e0910d',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'a8ea84a3-8437-447d-aef3-7c0825e0910d',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '54',
   'date': 'Mon, 29 Jun 2020 04:08:36 GMT'},
  'RetryAttempts': 0}}

###### list of jobs created and their status

In [19]:
list_of_sync_jobs_response = client.list_data_source_sync_jobs(
    Id=data_source_id,
    IndexId=index_id,
    StatusFilter='SUCCEEDED'
)

In [20]:
list_of_sync_jobs_response

{'History': [{'ExecutionId': '2f84719d-a6c4-47f0-a89a-b5a38297e0b2',
   'StartTime': datetime.datetime(2020, 6, 29, 9, 38, 36, 642000, tzinfo=tzlocal()),
   'EndTime': datetime.datetime(2020, 6, 29, 12, 12, 11, 49000, tzinfo=tzlocal()),
   'Status': 'SUCCEEDED',
   'Metrics': {'DocumentsAdded': '417',
    'DocumentsModified': '0',
    'DocumentsDeleted': '0',
    'DocumentsFailed': '0',
    'DocumentsScanned': '417'}}],
 'ResponseMetadata': {'RequestId': '3fc78688-7480-4784-917c-72e60a0fb094',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '3fc78688-7480-4784-917c-72e60a0fb094',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '274',
   'date': 'Mon, 29 Jun 2020 07:08:48 GMT'},
  'RetryAttempts': 1}}

###### to stop a job

In [21]:
stop_sync_job_response = client.stop_data_source_sync_job(
    Id=data_source_id,
    IndexId=index_id
)

In [22]:
stop_sync_job_response

{'ResponseMetadata': {'RequestId': '8d6044ed-e8b2-4b5a-8021-552000e40a8d',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '8d6044ed-e8b2-4b5a-8021-552000e40a8d',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'date': 'Mon, 29 Jun 2020 07:10:45 GMT'},
  'RetryAttempts': 0}}

###### Ask a question to aws kendra by passing the index_id and the query, this prints out the number of solutions availble related to the raised query
###### refer:https://docs.aws.amazon.com/kendra/latest/dg/searching-example.html#searching-index-sdk

# Query1

In [23]:
query='what are the political parties present?'

response=client.query(
    IndexId=index_id,
    QueryText=query
)
response

{'QueryId': '9d97a18e-3d92-4c47-8a61-1bf0a202e5eb',
 'ResultItems': [{'Id': '9d97a18e-3d92-4c47-8a61-1bf0a202e5eb-d9f47253-ab15-46f4-b757-d6541725d5c8',
   'Type': 'ANSWER',
   'AdditionalAttributes': [{'Key': 'AnswerText',
     'ValueType': 'TEXT_WITH_HIGHLIGHTS_VALUE',
     'Value': {'TextWithHighlightsValue': {'Text': '"My concern is that, whilst the Labour Party and the Conservative Party will take a constructive approach to the debate, right-wing political parties, picking up on statements like `burden to Britain\' will exploit this and create a lot of fear and uncertainty". It is precisely that concern - and the possible suggestion the issue is playing to the far right\'s racist agenda - that will provoke strong reactions from many concerned with this issue. The challenge for the big parties is to ensure they can engage in the debate during the cut and thrust of a general election while also avoiding that trap.',
       'Highlights': [{'BeginOffset': 132,
         'EndOffset': 14

In [24]:
response['TotalNumberOfResults']

268

In [25]:
print ('\nSearch results for query: ' + query + '\n') 

for query_result in response['ResultItems']:

    #print('-------------------')
    #print('Type: ' + str(query_result['Type']))
        
    if query_result['Type']=='ANSWER':
        if 'DocumentTitle' in query_result:
            document_title = query_result['DocumentTitle']['Text']
            print('Title: ' + document_title)
        answer_text = query_result['DocumentExcerpt']['Text']
        print(answer_text)


Search results for query: what are the political parties present?

Title: 334
"My concern is that, whilst the Labour Party and the Conservative Party will take a constructive approach to the debate, right-wing political parties, picking up on statements like `burden to Britain' will exploit this and create a lot of fear and uncertainty". It is precisely that concern - and the


In [26]:
print ('\nSearch results for query: ' + query + '\n') 

for query_result in response['ResultItems']:
    if query_result['Type']=='DOCUMENT':
        if 'DocumentTitle' in query_result:
            document_title = query_result['DocumentTitle']['Text']
            print('Title: ' + document_title)
        document_text = query_result['DocumentExcerpt']['Text']
        print(document_text) 
        print('*'*120)


Search results for query: what are the political parties present?

Title: 417
...Lib Dems unveil election slogan

The Liberal Democrats will present themselves as "the real alternative" in the forthcoming general election campaign, Charles Kennedy has said.

Unveiling the slogan at the party's...
************************************************************************************************************************
Title: 111
...Under the present electoral system, people must work together, and small parties have no hope of representation. Last summer, UKIP achieved a major advance, partly and only partly due to Kilroy...
************************************************************************************************************************
Title: 334
...My concern is that, whilst the Labour Party and the Conservative Party will take a constructive approach to the debate, right-wing political parties, picking up on statements like `burden to Britain' will exploit this and create a lot

In [27]:
print ('\nSearch results for query: ' + query + '\n')

for query_result in response['ResultItems']:
    if query_result['Type']=='QUESTION_ANSWER':
        if 'DocumentTitle' in query_result:
            document_title = query_result['DocumentTitle']['Text']
            print('Title: ' + document_title)
        document_text = query_result['DocumentExcerpt']['Text']
        print(document_text)  
        print('*'*120)


Search results for query: what are the political parties present?



# Query2

In [28]:
query='what are the labor targets?' #from doc 30

response=client.query(
    IndexId=index_id,
    QueryText=query
)
response

{'QueryId': 'ce6aa13c-807c-4e3d-b970-194e8a0b5974',
 'ResultItems': [{'Id': 'ce6aa13c-807c-4e3d-b970-194e8a0b5974-ae8ee7b6-6e8e-42f8-af4e-b5724fe13ffb',
   'Type': 'DOCUMENT',
   'AdditionalAttributes': [],
   'DocumentId': 's3://politics-text-data/396.txt',
   'DocumentTitle': {'Text': '396', 'Highlights': []},
   'DocumentExcerpt': {'Text': '...to their last known voting intention, ranging from "Labour (firm)" to "target (Conservative)" - those who supported the party in the past but this time will be voting Tory. The newsletter says: "Using the information we know about people, we can send them direct mailings...',
    'Highlights': []},
   'DocumentURI': 'https://s3.us-west-2.amazonaws.com/politics-text-data/396.txt',
   'DocumentAttributes': [{'Key': '_source_uri',
     'Value': {'StringValue': 'https://s3.us-west-2.amazonaws.com/politics-text-data/396.txt'}}]},
  {'Id': 'ce6aa13c-807c-4e3d-b970-194e8a0b5974-752b988b-cf40-4b63-96b1-b1d015b3a8ed',
   'Type': 'DOCUMENT',
   'Additio

In [29]:
print ('\nSearch results for query: ' + query + '\n') 

for query_result in response['ResultItems']:

    #print('-------------------')
    #print('Type: ' + str(query_result['Type']))
        
    if query_result['Type']=='ANSWER':
        if 'DocumentTitle' in query_result:
            document_title = query_result['DocumentTitle']['Text']
            print('Title: ' + document_title)
        answer_text = query_result['DocumentExcerpt']['Text']
        print(answer_text)


Search results for query: what are the labor targets?



In [30]:
print ('\nSearch results for query: ' + query + '\n') 

for query_result in response['ResultItems']:
    if query_result['Type']=='DOCUMENT':
        if 'DocumentTitle' in query_result:
            document_title = query_result['DocumentTitle']['Text']
            print('Title: ' + document_title)
        document_text = query_result['DocumentExcerpt']['Text']
        print(document_text) 
        print('*'*120)


Search results for query: what are the labor targets?

Title: 396
...to their last known voting intention, ranging from "Labour (firm)" to "target (Conservative)" - those who supported the party in the past but this time will be voting Tory. The newsletter says: "Using the information we know about people, we can send them direct mailings...
************************************************************************************************************************
Title: 030
...Labour targets 'hardcore truants'

A fresh crackdown on persistent truants in England has been launched by Education Secretary Ruth Kelly.

Serial truants make...
************************************************************************************************************************
Title: 213
...cap on aspiration, closing the door to students with good grades and restricting their life ambitions. "They are committed to abandoning Labour's targets of getting 50% of 18 to 30-year-olds going into higher education and

In [31]:
print ('\nSearch results for query: ' + query + '\n')

for query_result in response['ResultItems']:
    if query_result['Type']=='QUESTION_ANSWER':
        if 'DocumentTitle' in query_result:
            document_title = query_result['DocumentTitle']['Text']
            print('Title: ' + document_title)
        document_text = query_result['DocumentExcerpt']['Text']
        print(document_text)  
        print('*'*120)


Search results for query: what are the labor targets?



# Query3

In [33]:
query='why the Blair rejects Iraq advice calls?' #from doc 184

response=client.query(
    IndexId=index_id,
    QueryText=query
)
response

{'QueryId': 'e7feb79d-6b1b-4714-b7c9-e7fb42ac28fa',
 'ResultItems': [{'Id': 'e7feb79d-6b1b-4714-b7c9-e7fb42ac28fa-85905144-4b7b-4a04-940e-e9a99796d369',
   'Type': 'ANSWER',
   'AdditionalAttributes': [{'Key': 'AnswerText',
     'ValueType': 'TEXT_WITH_HIGHLIGHTS_VALUE',
     'Value': {'TextWithHighlightsValue': {'Text': 'Blair rejects Iraq advice calls\n\nTony Blair has rejected calls for the publication of advice on the legality of the Iraq war amid growing calls for an investigation.\n\nThe prime minister told his monthly press conference the matter had been dealt with by the Attorney General. Earlier, Conservative MP Michael Mates joined calls for a probe into claims Lord Goldsmith\'s statement to Parliament was drawn up at Number 10. Mr Blair said the statement was a "fair summary" of Lord Goldsmith\'s opinion.\n\n"That\'s what he (Lord Goldsmith) said and that\'s what I say.',
       'Highlights': [{'BeginOffset': 33,
         'EndOffset': 164,
         'TopAnswer': False},
     

In [34]:
print ('\nSearch results for query: ' + query + '\n') 

for query_result in response['ResultItems']:

    #print('-------------------')
    #print('Type: ' + str(query_result['Type']))
        
    if query_result['Type']=='ANSWER':
        if 'DocumentTitle' in query_result:
            document_title = query_result['DocumentTitle']['Text']
            print('Title: ' + document_title)
        answer_text = query_result['DocumentExcerpt']['Text']
        print(answer_text)


Search results for query: why the Blair rejects Iraq advice calls?

Title: 184
Blair rejects Iraq advice calls

Tony Blair has rejected calls for the publication of advice on the legality of the Iraq war amid growing calls for an investigation.

The prime minister told his monthly press conference the matter had been dealt with by the Attorney General. Earlier, Conservative MP


In [35]:
print ('\nSearch results for query: ' + query + '\n') 

for query_result in response['ResultItems']:
    if query_result['Type']=='DOCUMENT':
        if 'DocumentTitle' in query_result:
            document_title = query_result['DocumentTitle']['Text']
            print('Title: ' + document_title)
        document_text = query_result['DocumentExcerpt']['Text']
        print(document_text) 
        print('*'*120)


Search results for query: why the Blair rejects Iraq advice calls?

Title: 184
...Blair rejects Iraq advice calls

Tony Blair has rejected calls for the publication of advice on the legality of the Iraq war amid growing calls for an investigation.

The...
************************************************************************************************************************
Title: 324
...Iraq advice claim sparks new row

The Tories say ministers must respond in Parliament to claims that the legal advice used to justify the Iraq war was drawn up at Number 10.

Downing Street has denied the...
************************************************************************************************************************
Title: 327
...the Iraq war was drafted by Downing Street officials.

Lord Goldsmith said Lord Falconer and Baroness Morgan played no part in drafting the answer. He added the answer represented his view that the war was legal, but was not a summary of his advice to the PM. The g

In [36]:
print ('\nSearch results for query: ' + query + '\n')

for query_result in response['ResultItems']:
    if query_result['Type']=='QUESTION_ANSWER':
        if 'DocumentTitle' in query_result:
            document_title = query_result['DocumentTitle']['Text']
            print('Title: ' + document_title)
        document_text = query_result['DocumentExcerpt']['Text']
        print(document_text)  
        print('*'*120)


Search results for query: why the Blair rejects Iraq advice calls?



###### store the frequent queries to an index

In [40]:
frequently_asked_ques_response = client.create_faq(
    IndexId=index_id,
    Name='frequnently-asked-questions',
    Description='most typed queries',
    S3Path={
        'Bucket': "frequent-asked-queries",
        'Key':'faqs_for_kendra.csv'
    },
    RoleArn='arn:aws:iam::353664287185:role/kendra_project2'
)

In [41]:
frequently_asked_ques_response

{'Id': 'a2790014-669b-43fd-8f01-4ee8d6f5404f',
 'ResponseMetadata': {'RequestId': '6a210014-ba50-4301-8f63-5480509b1c5d',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '6a210014-ba50-4301-8f63-5480509b1c5d',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '45',
   'date': 'Mon, 29 Jun 2020 07:50:48 GMT'},
  'RetryAttempts': 1}}

In [42]:
faq_id=frequently_asked_ques_response['Id']

# Deleting all the created resources inorder to control the billing section

###### deleting the faq

In [43]:
delete_faq_response = client.delete_faq(
    Id=faq_id,
    IndexId=index_id
)

In [44]:
delete_faq_response

{'ResponseMetadata': {'RequestId': 'a1efb1e9-1b12-4339-aae5-86f4c778f094',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'a1efb1e9-1b12-4339-aae5-86f4c778f094',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'date': 'Mon, 29 Jun 2020 07:54:30 GMT'},
  'RetryAttempts': 2}}

##### Deleting the data_sources

In [45]:
delete_data_source_response = client.delete_data_source(
    Id=data_source_id,
    IndexId=index_id
)

In [46]:
delete_data_source_response

{'ResponseMetadata': {'RequestId': '8e101c93-d7f3-419d-ad1a-7793f6e55a53',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '8e101c93-d7f3-419d-ad1a-7793f6e55a53',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'date': 'Mon, 29 Jun 2020 07:54:34 GMT'},
  'RetryAttempts': 0}}

###### Deleting the index

In [47]:
index_delete_response = client.delete_index(
    Id=index_id
)

In [48]:
index_delete_response

{'ResponseMetadata': {'RequestId': '467b81e3-35a9-451f-9a2f-63dd693638df',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '467b81e3-35a9-451f-9a2f-63dd693638df',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'date': 'Mon, 29 Jun 2020 07:54:38 GMT'},
  'RetryAttempts': 0}}