# Create a Search Service
### Create a Search Service to search for all unstructured data

This is a reminder, we have already processed unstructured sentiment data from the analyst reports

In [None]:
SELECT * FROM default_schema.ai_transcripts_analysts_sentiments

Here, a view is created to combine the sentiment scores with the transcripts in well formatted text for the search service

In [None]:
CREATE OR REPLACE VIEW DEFAULT_SCHEMA.SENTIMENT_WITH_TRANSCRIPTS_FOR_SEARCH AS

WITH 
parsed_transcripts AS (
    SELECT
        primary_ticker,
        event_timestamp,
        event_type,
        created_at,
        PARSE_JSON(transcript) AS transcript_json
    FROM default_schema.unique_transcripts
),
speaker_lookup AS (
    SELECT
        primary_ticker,
        event_timestamp,
        OBJECT_AGG(
            speaker_data.value:speaker::STRING,
            OBJECT_CONSTRUCT(
                'name', speaker_data.value:speaker_data.name::STRING,
                'role', speaker_data.value:speaker_data.role::STRING,
                'company', speaker_data.value:speaker_data.company::STRING
            )
        ) AS speakers
    FROM parsed_transcripts,
    LATERAL FLATTEN(input => transcript_json:speaker_mapping) speaker_data
    GROUP BY primary_ticker, event_timestamp
),
formatted_transcripts AS (
    SELECT
        p.primary_ticker,
        p.event_timestamp,
        p.event_type,
        p.created_at,
        LISTAGG(
            s.speakers[parsed_entry.value:speaker::STRING]:name::STRING ||
            CASE 
                WHEN s.speakers[parsed_entry.value:speaker::STRING]:role::STRING IS NOT NULL
                THEN ' (' || s.speakers[parsed_entry.value:speaker::STRING]:role::STRING || '): '
                ELSE ': '
            END ||
            parsed_entry.value:text::STRING,
            '\n\n'
        ) WITHIN GROUP (ORDER BY parsed_entry.index) AS full_transcript_text
    FROM parsed_transcripts p
    JOIN speaker_lookup s ON p.primary_ticker = s.primary_ticker AND p.event_timestamp = s.event_timestamp
    CROSS JOIN LATERAL FLATTEN(input => p.transcript_json:parsed_transcript) parsed_entry
    GROUP BY p.primary_ticker, p.event_timestamp, p.event_type, p.created_at
)
SELECT
    
    primary_ticker,
    b. sentiment_reason,
    b. unique_analyst_count,
    b.sentiment_score,
    b.event_timestamp,
    event_type,
    created_at,
    full_transcript_text,
    LENGTH(full_transcript_text) AS transcript_length
FROM formatted_transcripts  

natural join

default_schema.ai_transcripts_analysts_sentiments b
;

select * from DEFAULT_SCHEMA.SENTIMENT_WITH_TRANSCRIPTS_FOR_SEARCH limit 10

In [None]:
CREATE OR REPLACE CORTEX SEARCH SERVICE default_schema.dow_analysts_sentiment_analysis
ON FULL_TRANSCRIPT_TEXT
ATTRIBUTES primary_ticker, unique_analyst_count, sentiment_score, sentiment_reason
WAREHOUSE = default_wh
TARGET_LAG = '1 hour'
AS (
    SELECT
        sentiment_reason,
        primary_ticker,
        unique_analyst_count,
        sentiment_score,
        event_timestamp,
        FULL_TRANSCRIPT_TEXT
    FROM DEFAULT_SCHEMA.SENTIMENT_WITH_TRANSCRIPTS_FOR_SEARCH a natural join 
    ACCELERATE_AI_IN_FSI.DEFAULT_SCHEMA.UNIQUE_TRANSCRIPTS b
);

We have also transcribed full earnings calls just for Snowflake and then chucnked them.  larger chunks. We will exclude the EMBED column as that will be handled as part of the search service.  This search will also be supplied with the URL which can be used in an Agent.  The presigned url has an expiry of 7 days, so below, a task is created to refresh the URLs every 

In [None]:
CREATE OR REPLACE TABLE DEFAULT_SCHEMA.FULL_TRANSCRIPTS AS
SELECT * EXCLUDE EMBED, 
       GET_PRESIGNED_URL(@DOCUMENT_AI.EARNINGS_CALLS, RELATIVE_PATH, 604800) AS URL  
FROM DEFAULT_SCHEMA.call_embeds;


CREATE OR REPLACE TASK DEFAULT_SCHEMA.refresh_full_transcripts_task
    WAREHOUSE = DEFAULT_WH
    SCHEDULE = '7200 MINUTE'  -- Run every 5 days
    COMMENT = 'Refreshes FULL_TRANSCRIPTS table every 5 days to regenerate presigned URLs which expire after 7 days'
AS
    CREATE OR REPLACE TABLE DEFAULT_SCHEMA.FULL_TRANSCRIPTS AS
    SELECT * EXCLUDE EMBED, 
           GET_PRESIGNED_URL(@DOCUMENT_AI.EARNINGS_CALLS, RELATIVE_PATH, 604800) AS URL  
    FROM DEFAULT_SCHEMA.call_embeds;

ALTER TASK DEFAULT_SCHEMA.refresh_full_transcripts_task RESUME;

EXECUTE TASK DEFAULT_SCHEMA.refresh_full_transcripts_task;

SELECT * FROM DEFAULT_SCHEMA.FULL_TRANSCRIPTS 

In [None]:
CREATE OR REPLACE CORTEX SEARCH SERVICE default_schema.snow_full_earnings_calls
ON TEXT
ATTRIBUTES URL, SENTIMENT_SCORE, SENTIMENT, SUMMARY,RELATIVE_PATH
WAREHOUSE = default_wh
TARGET_LAG = '1 hour'

AS select * FROM DEFAULT_SCHEMA.FULL_TRANSCRIPTS

### Create A Search Service for Analyst Reports

I would like the user to download the original report so as well as creating a table to reveal the content, i would like the presigned URL to be kept fresh - therefore, like before a task is created to re generate the URLs every 5 days.

In [None]:
CREATE OR REPLACE TABLE DEFAULT_SCHEMA.ANALYST_REPORTS AS
SELECT *, 
       GET_PRESIGNED_URL(@DOCUMENT_AI.ANALYST_REPORTS, RELATIVE_PATH, 604800) AS URL  
FROM ACCELERATE_AI_IN_FSI.DOCUMENT_AI.ANALYST_REPORTS_ALL_DATA;

CREATE OR REPLACE TASK DEFAULT_SCHEMA.refresh_analyst_reports_url_task
    WAREHOUSE = DEFAULT_WH
    SCHEDULE = '7200 MINUTE'  -- Run every 5 days
    COMMENT = 'Refreshes url for analyst reports table every 5 days to regenerate presigned URLs which expire after 7 days'
AS
    CREATE OR REPLACE TABLE DEFAULT_SCHEMA.ANALYST_REPORTS AS
    SELECT * EXCLUDE EMBED, 
       GET_PRESIGNED_URL(@DOCUMENT_AI.ANALYST_REPORTS, RELATIVE_PATH, 604800) AS URL  
FROM ACCELERATE_AI_IN_FSI.DOCUMENT_AI.ANALYST_REPORTS_ALL_DATA;

ALTER TASK DEFAULT_SCHEMA.refresh_analyst_reports_url_task RESUME;

EXECUTE TASK DEFAULT_SCHEMA.refresh_analyst_reports_url_task;

SELECT * FROM DEFAULT_SCHEMA.ANALYST_REPORTS 


Next we will create a search service for the analyst reports

In [None]:
CREATE OR REPLACE CORTEX SEARCH SERVICE default_schema.ANALYST_REPORTS_SEARCH
ON FULL_TEXT
ATTRIBUTES URL, NAME_OF_REPORT_PROVIDER, DOCUMENT_TYPE,DOCUMENT, SUMMARY,RELATIVE_PATH,RATING,DATE_REPORT
WAREHOUSE = default_wh
TARGET_LAG = '1 hour'

AS select * FROM DEFAULT_SCHEMA.ANALYST_REPORTS 

#### CREATE A SECOND SEARCH SERVICE FOR THE INFOGRAPHICS
While processing the infographics, we used multi modal image processing to generate summaries of the KPIs.  Let's add the text to a second search service

In [None]:
CREATE OR REPLACE TABLE DEFAULT_SCHEMA.INFOGRAPHICS_FOR_SEARCH AS 
select *, GET_PRESIGNED_URL(@ACCELERATE_AI_IN_FSI.DOCUMENT_AI.INFOGRAPHICS, RELATIVE_PATH) URL from 
ACCELERATE_AI_IN_FSI.DOCUMENT_AI.VW_INFOGRAPHIC_METRICS;

CREATE OR REPLACE TASK DEFAULT_SCHEMA.refresh_infographics
    WAREHOUSE = DEFAULT_WH
    SCHEDULE = '7200 MINUTE'  -- Run every 5 days
    COMMENT = 'Refreshes url for infogrphics table every 5 days to regenerate presigned URLs which expire after 7 days'
AS

CREATE OR REPLACE TABLE DEFAULT_SCHEMA.INFOGRAPHICS_FOR_SEARCH AS 
select *, GET_PRESIGNED_URL(@ACCELERATE_AI_IN_FSI.DOCUMENT_AI.INFOGRAPHICS, RELATIVE_PATH) URL from 
ACCELERATE_AI_IN_FSI.DOCUMENT_AI.VW_INFOGRAPHIC_METRICS;

ALTER TASK DEFAULT_SCHEMA.refresh_infographics RESUME;

SELECT * FROM DEFAULT_SCHEMA.INFOGRAPHICS_FOR_SEARCH

In [None]:
CREATE OR REPLACE CORTEX SEARCH SERVICE default_schema.INFOGRAPHICS_SEARCH
ON BRANDING
ATTRIBUTES URL, COMPANY_TICKER, RELATIVE_PATH ,TEXT, COMPANY_NAME,REPORT_PERIOD,TICKER
WAREHOUSE = default_wh
TARGET_LAG = '1 hour' AS

SELECT * FROM DEFAULT_SCHEMA.INFOGRAPHICS_FOR_SEARCH

### Create a Search Service for Emails
Finally, let's create a search service for the emails

In [None]:
select * from ACCELERATE_AI_IN_FSI.DEFAULT_SCHEMA.EMAIL_PREVIEWS_EXTRACTED

In [None]:
CREATE OR REPLACE CORTEX SEARCH SERVICE default_schema.EMAILS
ON HTML_CONTENT
ATTRIBUTES TICKER, RATING, SENTIMENT, SUBJECT, CREATED_AT, RECIPIENT_EMAIL
WAREHOUSE = default_wh
TARGET_LAG = '1 hour' AS


select * from ACCELERATE_AI_IN_FSI.DEFAULT_SCHEMA.EMAIL_PREVIEWS_EXTRACTED

The search services will be used in a cortex agent at the end of the lab.  Now, it's time to bring in structured data.  Please Proceed to Cortex Analyst