## Setup

In [6]:
# Connect using pyodbc, sqlalchemy, and pandas
import sqlalchemy
import numpy as np
import pandas as pd

server = "sqlsvr-0092-mdp-02.85f8a2f57eaf.database.windows.net"
database = "Staging"
username = "pisrc-inkoo"
# with open("./db-pass.secret") as f:
#     password = f.read()
password = input("Enter database password: ")
driver = "ODBC Driver 17 for SQL Server"

engine = sqlalchemy.create_engine(
    f"mssql+pyodbc://{username}:{password}@{server}/{database}?driver={driver.replace(' ', '+')}"
)


def query_db(query, params=None):
    return pd.read_sql(query, engine, params=params)


df = query_db("SELECT @@version;")
print(df[""][0])


Microsoft SQL Azure (RTM) - 12.0.2000.8 
	Apr 18 2022 13:01:43 
	Copyright (C) 2021 Microsoft Corporation



## Database information

In [7]:
# Search for database column
column_name = "binge"
query_db(
    """
    SELECT
        c.name AS ColumnName,
        t.name AS TableName
    FROM sys.columns c
    JOIN sys.tables t
    ON c.object_id = t.object_id
    WHERE LOWER(c.name) LIKE ?
    ORDER BY
        TableName,
        ColumnName;
    """,
    [f"%{column_name}%"],
)


Unnamed: 0,ColumnName,TableName
0,BingeCampaignId,RawTraffic
1,BingeCriticalScore,RawTraffic
2,BingeId,RawTraffic
3,BingeName,RawTraffic
4,BingeScoredAsset,RawTraffic
5,BingeScoredAssetPath,RawTraffic
6,BingeScoredAssetScore,RawTraffic


In [8]:
# newly modified tables
query_db(
    """
    SELECT
        name,
        create_date,
        modify_date,
        type
    FROM sys.tables
    WHERE modify_date > '2022-04-01'
    ORDER BY modify_date
    """
)


Unnamed: 0,name,create_date,modify_date,type
0,Transactions,2022-03-30 17:55:50.587,2022-04-05 16:44:42.230,U
1,AccountExtensionBase,2021-09-18 23:04:02.477,2022-04-12 20:51:44.090,U
2,Contact,2021-03-18 15:51:04.023,2022-04-12 20:53:49.530,U
3,MSCRMOnlineAllLeads,2022-04-15 15:57:39.070,2022-04-15 15:57:39.430,U
4,stgMSCRMOnlineAllLeads,2022-04-15 15:57:39.447,2022-04-15 15:57:39.553,U
5,stgMSCRMOnlineAllLeadsSystemFields,2022-04-15 15:57:39.560,2022-04-15 15:57:39.560,U
6,Lead,2021-09-18 23:06:06.540,2022-05-03 13:14:58.053,U
7,Oppty,2021-09-18 23:08:34.327,2022-05-03 13:14:58.083,U
8,OpptyLineItem,2021-09-22 13:06:03.837,2022-05-03 13:14:58.097,U
9,stgLead,2022-05-03 13:14:58.143,2022-05-03 13:14:58.670,U


In [9]:
# Table indices
table_name = 'Staging.aem.RawTraffic'
query_db(
    """
    SELECT
        i.name,
        COL_NAME(c.object_id, c.column_id)
    FROM
        sys.indexes AS i,
        sys.index_columns AS c
    WHERE i.object_id = c.object_id
        AND i.index_id = c.index_id
        AND i.object_id = OBJECT_ID(?)
    """,
    [table_name]
)


Unnamed: 0,name,Unnamed: 2
0,pk_adobeTraffic,SessionVisitorId
1,pk_adobeTraffic,VisitStartDateTime
2,pk_adobeTraffic,VisitPageNumber
3,idx_uploadTime,UploadedAt
4,idx_utmMediumSource,UTM_Medium
5,idx_utmMediumSource,UTM_Source
6,idx_eloquacontactid,EloquaContactId
7,idx_eloquacontactid,mcvisid


## Eloqua Data 

In [50]:
query_db(
    """
    SELECT TOP 5
        EloquaContactId,
        DateCreated,
        DateModified,
        EmailAddress,
        FirstName,
        LastName,
        Company,
        ElqAccountLinkage,
        EMEAregion,
        Industry,
        LanguagePreference,
        SalesAPR,
        ConversionSourceOriginal,
        DCRMAccountId,
        MAPCRMAccountId
    FROM elq.Contact
    WHERE DateModified > '2022-02-22';
    """
)


Unnamed: 0,EloquaContactId,DateCreated,DateModified,EmailAddress,FirstName,LastName,Company,ElqAccountLinkage,EMEAregion,Industry,LanguagePreference,SalesAPR,ConversionSourceOriginal,DCRMAccountId,MAPCRMAccountId
0,CRACP000000000012,2012-03-14 11:42:49.803,2022-02-22 21:35:50.133,cearnshaw@ledger-bennett.co.uk,Chris,Earnshaw,Ledger Bennett,not_linked,UK,OEM,English,,,,
1,CRACP000000000026,2012-03-27 22:08:11.763,2022-02-22 16:04:21.910,fabien.roche@thimonnier.fr,Fabien,ROCHE,THIMONNIER,1b368dda-6617-e711-80f5-fc15b428da60,France,Food & Beverage,French,,Webinar | North American Standards 1/4 | EN,1B368DDA-6617-E711-80F5-FC15B428DA60,4A9D2620-764F-E611-9AE3-78E3B5166060
2,CRACP000000000038,2012-03-27 22:08:11.763,2022-02-22 16:04:21.910,a.adamczyk@evb-technik.net,Adam,Adamczyk,EVB Technik GmbH,9bd94061-8663-e711-8170-e0071b715bc1,Germany,Oil & Gas,German,,,9BD94061-8663-E711-8170-E0071B715BC1,1BB0534C-764F-E611-9AE3-78E3B5166060
3,CRACP000000000051,2012-03-27 22:08:11.763,2022-02-22 16:04:21.910,a.baginski@tecma-aries.com,Alexandre,BAGINSKI,ARIES PACKAGING,099392d4-6617-e711-80f5-fc15b428da60,France,Food & Beverage,French,,,099392D4-6617-E711-80F5-FC15B428DA60,F9DF4B52-764F-E611-9AE3-78E3B5166060
4,CRACP000000000063,2012-03-27 22:08:11.763,2022-02-22 16:04:21.910,a.behrendt@hermes-systeme.de,Andreas,Behrendt,HERMES Systeme GmbH,f85fc169-8e63-e711-8108-e0071b71f9b1,Germany,,German,,,F85FC169-8E63-E711-8108-E0071B71F9B1,A6CD1E26-764F-E611-9AE3-78E3B5166060


In [2]:
# elq.Contact table has duplicate entries for some email addresses:
query_db(
    """
    SELECT
        e.EloquaContactId,
        e.EmailAddress,
        e.FirstName,
        e.LastName,
        e.Company,
        e.Industry
    FROM
        elq.Contact AS e,
        (
            SELECT e.EmailAddress
            FROM elq.Contact AS e
            GROUP BY EmailAddress
            HAVING COUNT(EmailAddress) > 1
        ) AS x
    WHERE e.EmailAddress = x.EmailAddress
    ORDER BY e.EmailAddress
    """
)


Unnamed: 0,EloquaContactId,EmailAddress,FirstName,LastName,Company,Industry
0,CRACP000002957265,!BURGERM@SAMCOTECH.COM,MATT,BURGER,SAMCO TECHNOLOGIES INC,
1,CRACP000011807951,!burgerm@samcotech.com,MATT,BURGER,0006929356,
2,CRACP000014781571,$_moraled5@baxter.com,DAVITA,MORALES,BAXTER HEALTH GUAYAMA,
3,CRACP000012790078,$_moraled5@baxter.com,DAVITA,MORALES,CARVAJAL PULPA Y PAPEL SA,
4,CRACP000000023518,***wthissen@beldick.nl,W.,Thissen,Beldick Automation International B.V.,Oil & Gas
...,...,...,...,...,...,...
3001637,CRACP000011699669,zzuno@chemcut.net,ZACHARY,ZUNO,Chemcut Corp.,
3001638,CRACP000014446901,zzurek@flowserve.com,Aric,Zurek,Flowserve,
3001639,CRACP000013610990,zzurek@flowserve.com,Aric,Zurek,Flowserve,
3001640,CRACP000014500381,zzuspan@flowserve.com,Jeff,Zuspan,Flowserve,


## Adobe Analytics Data

In [27]:
# aem.RawTraffic table has >111 million rows, need to be careful with queries
query_db(
    """
    SELECT TOP 5
        SessionVisitorId,
        VisitStartDateTime,
        EventList,
        PageURL,
        PageName,
        External_Id,
        External_Audience,
        External_AudienceSegment,
        EloquaContactId,
        UTM_Source,
        UTM_Medium,
        UTM_Campaign,
        UTM_Content,
        UTM_Term
    FROM aem.RawTraffic
    WHERE VisitStartDateTime > '2022-02-22'
        AND EloquaContactId <> '';
    """
)


Unnamed: 0,SessionVisitorId,VisitStartDateTime,EventList,PageURL,PageName,External_Id,External_Audience,External_AudienceSegment,EloquaContactId,UTM_Source,UTM_Medium,UTM_Campaign,UTM_Content,UTM_Term
0,5867056734229863082_2480410543252301598,2022-02-22 00:01:01,"279,200,20,100,101,102,103,105,106,108,111,112...",https://www.rockwellautomation.com/en-us/produ...,rockwellautomation:homepage,59389591,Enterprise Business,,CRACP000000863002,Other,Other,,,
1,5867056734229863082_2480410543252301598,2022-02-22 00:01:01,"201=3864.00,202=103.00,203=64.00,204=4387.00,2...",https://www.rockwellautomation.com/en-us/produ...,rockwellautomation:homepage,59389591,Enterprise Business,,CRACP000000863002,Other,Other,,,
2,5867056734229863082_2480410543252301598,2022-02-22 00:01:01,241142500501502503113,https://www.rockwellautomation.com/en-us/produ...,rockwellautomation:homepage,59389591,Enterprise Business,,CRACP000000863002,Other,Other,,,
3,5867056734229863082_2480410543252301598,2022-02-22 00:01:01,241142500501502503113,https://www.rockwellautomation.com/en-us/produ...,rockwellautomation:homepage,59389591,Enterprise Business,,CRACP000000863002,Other,Other,,,
4,5867056734229863082_2480410543252301598,2022-02-22 00:01:01,241142500501502503113,https://www.rockwellautomation.com/en-us/produ...,rockwellautomation:homepage,59389591,Enterprise Business,,CRACP000000863002,Other,Other,,,


In [4]:
# AEM event codes
query_db(
    """
    SELECT *
    FROM aem.InteractionType;"""
)


Unnamed: 0,Code,Desc
0,200,Page Views
1,239,CTA Clicks
2,182,Email Address Identified
3,277,Gate Content Download Events
4,241,Exit Link Clicks
5,260,Video Starts
6,263,Video 50% Complete
7,269,File Downloads


## RA Lead Status

In [13]:
query_db(
    """
    SELECT TOP 5
        emailaddress1,
        createdon,
        ra_leadstage,
        ra_leadstagename,
        statecode,
        statecodename,
        statuscode,
        statuscodename
    FROM crm.Lead;
    """
)

# Leads are entered into crm.Lead table multiple times when status updates
# ra_leadstage does not always seem to agree with ra_...acceptedname and/or ra_...qualifiedname


Unnamed: 0,emailaddress1,createdon,ra_leadstage,ra_leadstagename,statecode,statecodename,statuscode,statuscodename
0,sustarg@milwaukeeforge.com,2022-03-09 20:00:28,4,Awaiting Sales Acceptance,0,Open,1,New
1,teerapanluengnaruemitchai@thaibev.com,2022-03-09 03:49:23,2,Awaiting Tele Acceptance,2,Disqualified,953810009,Does not meet campaign criteria
2,tim.parks@whitepath.com,2022-03-09 19:31:32,5,Awaiting Sales Qualification,0,Open,1,New
3,sherman.wagner@silganpfc.com,2022-03-09 21:04:09,4,Awaiting Sales Acceptance,0,Open,1,New
4,miguel.chavez2@sbdinc.com,2022-03-09 19:35:53,4,Awaiting Sales Acceptance,0,Open,1,New


In [49]:
# Get only the most advanced status
query_db(
    """
    SELECT TOP 5
        emailaddress1,
        MAX(ra_leadstage) as ra_leadstage
    FROM crm.Lead
    WHERE emailaddress1 <> ''
    GROUP BY emailaddress1;
    """
)


Unnamed: 0,emailaddress1,ra_leadstage
0,PNPARIK@GMAIL.COM,5
1,GTHACKER@HOTMAIL.COM,5
2,dragan.mitic@michelin.com,5
3,marco.dutschke@alfatec.de,4
4,apfeifer@c-controls.com,7


In [5]:
# ra_leadstage possible values
query_db(
    """
    SELECT DISTINCT
        ra_leadstage,
        ra_leadstagename
    FROM crm.Lead
    ORDER BY ra_leadstage;
    """
)


Unnamed: 0,ra_leadstage,ra_leadstagename
0,,
1,1.0,Unassigned
2,2.0,Awaiting Tele Acceptance
3,3.0,Awaiting Tele Qualification
4,4.0,Awaiting Sales Acceptance
5,5.0,Awaiting Sales Qualification
6,6.0,Distributor Lead
7,7.0,Qualified
8,8.0,External Lead


In [14]:
query_db(
    """
    SELECT DISTINCT
        statecode,
        statecodename
    FROM crm.Lead
    ORDER BY statecode;
    """
)


Unnamed: 0,statecode,statecodename
0,0,Open
1,1,Qualified
2,2,Disqualified


In [15]:
query_db(
    """
    SELECT DISTINCT
        statuscode,
        statuscodename
    FROM crm.Lead
    ORDER BY statuscode;
    """
)


Unnamed: 0,statuscode,statuscodename
0,1,New
1,2,Contacted
2,3,Qualified
3,4,Not buying or influence location
4,5,Credit hold or watch
5,6,Not Decision Maker
6,7,No Interest
7,953810000,No RA solution
8,953810001,Selling barrier to high
9,953810002,Competitor/Non RA distributor


In [20]:
# crm.AccountExtensionBase has more Rockwell-specific information
query_db(
    """
    SELECT TOP 5
        accountid,
        ra_issalesleadid,
        ra_overallrelationship,
        ra_overallrelationshipname,
        ra_relationshipstatus,
        ra_relationshipstatusname
    FROM crm.AccountExtensionBase
    WHERE ra_overallrelationship <> ''
        AND ra_relationshipstatus <> '';
    """
)


Unnamed: 0,accountid,ra_issalesleadid,ra_overallrelationship,ra_overallrelationshipname,ra_relationshipstatus,ra_relationshipstatusname
0,DDDCF170-12AB-E711-8111-E0071B71F9B1,,953810001,"Moderate to good - some issues exist, but well...",953810000,Buying
1,547B499C-0FAB-E711-8111-E0071B71F9B1,,953810000,Good to excellent - no immediate issues,953810001,Development
2,00F91C47-13AB-E711-8111-E0071B71F9B1,,953810002,Poor - immediate issues exist where business i...,953810000,Buying
3,2BE40CA8-AF3F-EB11-A813-000D3A8DB669,,953810000,Good to excellent - no immediate issues,953810000,Buying
4,7CA53587-AFAE-E911-A962-000D3A3AD193,A6197F25-C975-E611-80E2-C4346BACE0A4,953810000,Good to excellent - no immediate issues,953810000,Buying


## PathFactory data

In [21]:
query_db(
    """
    SELECT TOP 5
        EloquaContactId,
        AssetsViewed,
        ContentJourney,
        EngagementScore,
        EngagementTime,
        ExperienceLookBookId,
        ExperienceName,
        LastViewedContentSourceURL,
        LastViewedContentTitle,
        EventTime,
        SessionStartTimeUTC,
        SessionEndTimeUTC
    FROM elq.PathFactory
    WHERE EloquaContactId <> '';
    """
)


Unnamed: 0,EloquaContactId,AssetsViewed,ContentJourney,EngagementScore,EngagementTime,ExperienceLookBookId,ExperienceName,LastViewedContentSourceURL,LastViewedContentTitle,EventTime,SessionStartTimeUTC,SessionEndTimeUTC
0,CRACP000012402203,1,[manuf-analytics-workbook],0,1,D96FDV,FactoryTalk Analytics: Where Information Tran...,https://app.cdn.lookbookhq.com/lbhq-production...,Get Started With Information Solutions Workbook,2019-04-08 17:32:12,2019-04-08 21:32:11,2019-04-08 21:32:12
1,CRACP000012866788,2,[introducing-ft-innov][ISuite-combined-analyti...,4,79,qmUZE0,InnovationSuite,https://embed.vidyard.com/share/Hd7dh6uxE15Asx...,Explore advanced analytics,2019-04-08 17:49:12,2019-04-08 21:47:53,2019-04-08 21:49:12
2,CRACP000000111436,2,[get_started_mes_metals][journeyvideo],0,46,FJMVa7,MES Metals (T1 clone),http://embed.vidyard.com/share/-mxZ8iWeovNJe5l...,The Journey Toward the Connected Enterprise,2019-04-08 17:53:27,2019-04-08 21:52:41,2019-04-08 21:53:27
3,CRACP000000111436,1,[introducing-ft-innov],0,15,qmUZE0,InnovationSuite,https://embed.vidyard.com/share/iwtGTkozbYVfyH...,Introducing FactoryTalk InnovationSuite powere...,2019-04-08 17:55:51,2019-04-08 21:55:36,2019-04-08 21:55:51
4,CRACP000000109334,7,[analytics-that-matter][factorytalk-analytics-...,9,309,dzU9dW,FactoryTalk Analytics,https://app.cdn.lookbookhq.com/lbhq-production...,Get Started With Information Solutions Workbook,2019-04-08 17:57:05,2019-04-08 21:46:38,2019-04-08 21:57:05


## Joins

In [3]:
query_db(
    """
    SELECT
        e.EmailAddress,
        a.VisitStartDateTime,
        a.EventList,
        a.PageURL,
        a.PageName,
        e.FirstName,
        e.LastName,
        e.Company
    FROM
        aem.RawTraffic as a,
        elq.Contact as e
    WHERE a.VisitStartDateTime > '2022-01-01'
        AND e.EmailAddress = 'inkoo@pisrc.com'
        AND a.EloquaContactId = e.EloquaContactId;
    """
)


Unnamed: 0,EmailAddress,VisitStartDateTime,EventList,PageURL,PageName,FirstName,LastName,Company
0,inkoo@pisrc.com,2022-02-03 23:54:03,"182,183,185,186,187,188,189,190,191,192,193,50...",https://www.rockwellautomation.com/en-us.html,rockwellautomation:homepage,In Koo,Kim,PiSrc LLC
1,inkoo@pisrc.com,2022-02-03 23:54:03,"279,200,20,100,101,105,108,109,110,111,112,114...",https://www.rockwellautomation.com/en-us.html,rockwellautomation:homepage,In Koo,Kim,PiSrc LLC
2,inkoo@pisrc.com,2022-02-03 23:54:03,"201=1752.00,202=9.00,203=1.00,204=1774.00,205=...",https://www.rockwellautomation.com/en-us.html,rockwellautomation:homepage,In Koo,Kim,PiSrc LLC
3,inkoo@pisrc.com,2022-02-04 20:11:04,"279,200,20,100,101,105,108,111,112,114,119,120...",https://www.rockwellautomation.com/en-us.html,rockwellautomation:homepage,In Koo,Kim,PiSrc LLC
4,inkoo@pisrc.com,2022-02-04 20:11:04,"201=2427.00,202=87.00,203=65.00,204=2980.00,20...",https://www.rockwellautomation.com/en-us.html,rockwellautomation:homepage,In Koo,Kim,PiSrc LLC
...,...,...,...,...,...,...,...,...
154,inkoo@pisrc.com,2022-02-10 17:13:05,"201=2541.00,202=132.00,203=131.00,204=2797.00,...",https://www.rockwellautomation.com/en-us.html,rockwellautomation:homepage,In Koo,Kim,PiSrc LLC
155,inkoo@pisrc.com,2022-02-10 17:13:05,"279,200,20,100,101,105,108,109,110,111,112,114...",https://www.rockwellautomation.com/en-us.html,rockwellautomation:homepage,In Koo,Kim,PiSrc LLC
156,inkoo@pisrc.com,2022-02-10 17:13:05,"201=2777.00,202=98.00,203=84.00,204=3144.00,20...",https://www.rockwellautomation.com/en-us.html,rockwellautomation:homepage,In Koo,Kim,PiSrc LLC
157,inkoo@pisrc.com,2022-02-10 17:13:05,"279,200,20,100,101,105,108,111,112,114,119,120...",https://www.rockwellautomation.com/en-us.html,rockwellautomation:homepage,In Koo,Kim,PiSrc LLC


In [None]:
query_db(
    """
    SELECT COUNT(DISTINCT a.EloquaContactId)
    FROM
        aem.RawTraffic as a,
        elq.Contact as e,
        crm.Lead as l
    WHERE a.VisitStartDateTime > '2022-02-01'
        AND a.EloquaContactId = e.EloquaContactId
        AND e.EmailAddress = l.emailaddress1;
    """
)


In [28]:
# WIP join
query_db(
    """
    SELECT TOP 5
        e.EmailAddress,
        a.VisitStartDateTime,
        a.EventList,
        a.PageURL,
        a.PageName,
        e.FirstName,
        e.LastName,
        e.Company,
        l.ra_leadstage,
        l.ra_leadstagename,
        l.ra_teleacceptedname,
        l.ra_telequalifiedname,
        l.ra_salesacceptedname,
        l.ra_salesqualifiedname,
        p.AssetsViewed,
        p.EngagementScore,
        p.EngagementTime,
        p.ExperienceName,
        p.LastViewedContentSourceURL
    FROM
        aem.RawTraffic as a,
        elq.Contact as e,
        crm.Lead as l,
        elq.PathFactory as p
    WHERE a.VisitStartDateTime > '2022-02-01'
        AND a.EloquaContactId = e.EloquaContactId
        AND e.EmailAddress = l.emailaddress1
        AND a.EloquaContactId = p.EloquaContactId;
    """
)


Unnamed: 0,EmailAddress,VisitStartDateTime,EventList,PageURL,PageName,FirstName,LastName,Company,ra_leadstage,ra_leadstagename,ra_teleacceptedname,ra_telequalifiedname,ra_salesacceptedname,ra_salesqualifiedname,AssetsViewed,EngagementScore,EngagementTime,ExperienceName,LastViewedContentSourceURL
0,dmeyer@cimarex.com,2022-02-01 00:13:42,239139500501502503113,https://www.rockwellautomation.com/en-us/produ...,rockwellautomation:homepage,David,Meyer,Cimrex Energy,4,Awaiting Sales Acceptance,Yes,No,,,1,0,6,IIoT,https://cdn.pathfactory.com/assets/10724/conte...
1,dmeyer@cimarex.com,2022-02-01 00:13:42,"279,200,20,100,101,102,103,104,105,108,109,110...",https://www.rockwellautomation.com/en-us/produ...,rockwellautomation:homepage,David,Meyer,Cimrex Energy,4,Awaiting Sales Acceptance,Yes,No,,,1,0,6,IIoT,https://cdn.pathfactory.com/assets/10724/conte...
2,dmeyer@cimarex.com,2022-02-01 00:13:42,239139500501502503113,https://www.rockwellautomation.com/en-us/produ...,rockwellautomation:homepage,David,Meyer,Cimrex Energy,5,Awaiting Sales Qualification,,,,Yes,1,0,6,IIoT,https://cdn.pathfactory.com/assets/10724/conte...
3,dmeyer@cimarex.com,2022-02-01 00:13:42,"201=1605.00,202=65.00,203=60.00,204=1837.00,20...",https://www.rockwellautomation.com/en-us/produ...,rockwellautomation:homepage,David,Meyer,Cimrex Energy,4,Awaiting Sales Acceptance,Yes,No,,,1,0,6,IIoT,https://cdn.pathfactory.com/assets/10724/conte...
4,dmeyer@cimarex.com,2022-02-01 00:13:42,"279,200,20,100,101,102,103,104,105,108,109,110...",https://www.rockwellautomation.com/en-us/produ...,rockwellautomation:homepage,David,Meyer,Cimrex Energy,5,Awaiting Sales Qualification,,,,Yes,1,0,6,IIoT,https://cdn.pathfactory.com/assets/10724/conte...


In [29]:
query_db(
    """
    SELECT COUNT(DISTINCT e.EmailAddress)
    FROM
        aem.RawTraffic as a,
        elq.Contact as e,
        crm.Lead as l,
        elq.PathFactory as p
    WHERE a.VisitStartDateTime > '2022-02-01'
        AND a.EloquaContactId = e.EloquaContactId
        AND e.EmailAddress = l.emailaddress1
        AND a.EloquaContactId = p.EloquaContactId;
    """
)


Unnamed: 0,Unnamed: 1
0,1021


In [32]:
df = query_db(
    """
    SELECT
        e.EmailAddress,
        a.VisitStartDateTime,
        a.EventList,
        a.PageURL,
        a.PageName,
        e.FirstName,
        e.LastName,
        e.Company,
        l.ra_leadstage,
        l.ra_leadstagename,
        l.ra_teleacceptedname,
        l.ra_telequalifiedname,
        l.ra_salesacceptedname,
        l.ra_salesqualifiedname,
        p.AssetsViewed,
        p.EngagementScore,
        p.EngagementTime,
        p.ExperienceName,
        p.LastViewedContentSourceURL
    FROM
        aem.RawTraffic as a,
        elq.Contact as e,
        crm.Lead as l,
        elq.PathFactory as p
    WHERE a.VisitStartDateTime > '2022-02-10'
        AND a.EloquaContactId = e.EloquaContactId
        AND e.EmailAddress = l.emailaddress1
        AND a.EloquaContactId = p.EloquaContactId
        AND e.EmailAddress = 'dmeyer@cimarex.com';
    """
)
df.to_csv("export.csv")
df


Unnamed: 0,EmailAddress,VisitStartDateTime,EventList,PageURL,PageName,FirstName,LastName,Company,ra_leadstage,ra_leadstagename,ra_teleacceptedname,ra_telequalifiedname,ra_salesacceptedname,ra_salesqualifiedname,AssetsViewed,EngagementScore,EngagementTime,ExperienceName,LastViewedContentSourceURL
0,dmeyer@cimarex.com,2022-02-01 00:13:42,"279,200,20,100,101,102,103,104,105,108,109,110...",https://www.rockwellautomation.com/en-us/produ...,rockwellautomation:homepage,David,Meyer,Cimrex Energy,4,Awaiting Sales Acceptance,Yes,No,,,1,0,6,IIoT,https://cdn.pathfactory.com/assets/10724/conte...
1,dmeyer@cimarex.com,2022-02-01 00:13:42,"279,200,20,100,101,102,103,104,105,108,109,110...",https://www.rockwellautomation.com/en-us/produ...,rockwellautomation:homepage,David,Meyer,Cimrex Energy,5,Awaiting Sales Qualification,,,,Yes,1,0,6,IIoT,https://cdn.pathfactory.com/assets/10724/conte...
2,dmeyer@cimarex.com,2022-02-01 00:13:42,"201=1605.00,202=65.00,203=60.00,204=1837.00,20...",https://www.rockwellautomation.com/en-us/produ...,rockwellautomation:homepage,David,Meyer,Cimrex Energy,4,Awaiting Sales Acceptance,Yes,No,,,1,0,6,IIoT,https://cdn.pathfactory.com/assets/10724/conte...
3,dmeyer@cimarex.com,2022-02-01 00:13:42,"201=1605.00,202=65.00,203=60.00,204=1837.00,20...",https://www.rockwellautomation.com/en-us/produ...,rockwellautomation:homepage,David,Meyer,Cimrex Energy,5,Awaiting Sales Qualification,,,,Yes,1,0,6,IIoT,https://cdn.pathfactory.com/assets/10724/conte...
4,dmeyer@cimarex.com,2022-02-01 00:13:42,239139500501502503113,https://www.rockwellautomation.com/en-us/produ...,rockwellautomation:homepage,David,Meyer,Cimrex Energy,4,Awaiting Sales Acceptance,Yes,No,,,1,0,6,IIoT,https://cdn.pathfactory.com/assets/10724/conte...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
319,dmeyer@cimarex.com,2022-02-22 02:34:11,"279,200,20,100,101,102,103,104,105,108,109,110...",https://www.rockwellautomation.com/en-us/suppo...,rockwellautomation:homepage,David,Meyer,Cimrex Energy,5,Awaiting Sales Qualification,,,,Yes,1,0,6,IIoT,https://cdn.pathfactory.com/assets/10724/conte...
320,dmeyer@cimarex.com,2022-02-22 02:34:11,500501502503113,https://www.rockwellautomation.com/en-us/suppo...,rockwellautomation:homepage,David,Meyer,Cimrex Energy,4,Awaiting Sales Acceptance,Yes,No,,,1,0,6,IIoT,https://cdn.pathfactory.com/assets/10724/conte...
321,dmeyer@cimarex.com,2022-02-22 02:34:11,500501502503113,https://www.rockwellautomation.com/en-us/suppo...,rockwellautomation:homepage,David,Meyer,Cimrex Energy,5,Awaiting Sales Qualification,,,,Yes,1,0,6,IIoT,https://cdn.pathfactory.com/assets/10724/conte...
322,dmeyer@cimarex.com,2022-02-22 02:34:11,269168169170500501502503113,https://www.rockwellautomation.com/en-us/suppo...,rockwellautomation:homepage,David,Meyer,Cimrex Energy,4,Awaiting Sales Acceptance,Yes,No,,,1,0,6,IIoT,https://cdn.pathfactory.com/assets/10724/conte...


In [47]:
df = query_db(
    """
    SELECT
        e.EmailAddress,
        e.FirstName,
        e.LastName,
        e.Company,
        l.ra_leadstage,
        p.AssetsViewed,
        p.EngagementScore,
        p.EngagementTime,
        p.ExperienceName,
        p.LastViewedContentSourceURL,
        p.SessionStartTimeUTC
    FROM
        elq.Contact as e,
        (   
            SELECT emailaddress1, MAX(ra_leadstage) as ra_leadstage
            FROM crm.Lead
            GROUP BY emailaddress1
        ) as l,
        elq.PathFactory as p
    WHERE e.EmailAddress = l.emailaddress1
        AND e.EloquaContactId = p.EloquaContactId;
    """
)
df.to_csv("pathfactory_export.csv")
df


Unnamed: 0,EmailAddress,FirstName,LastName,Company,ra_leadstage,AssetsViewed,EngagementScore,EngagementTime,ExperienceName,LastViewedContentSourceURL,SessionStartTimeUTC
0,gennaro.carbone@incontrol.it,Gennaro,Carbone,in.Control s.r.l.,4,1,0,4,Scalable MES IoTW,https://embed.vidyard.com/share/iwtGTkozbYVfyH...,2019-06-24 16:49:24
1,barnardm@capegate.co.za,Mac,Barnard,Cape Gate Pty Ltd,5,1,0,3,InnovationSuite,https://embed.vidyard.com/share/YeBeHU51o4cE4q...,2019-05-28 15:57:57
2,barnardm@capegate.co.za,Mac,Barnard,Cape Gate Pty Ltd,5,1,1,28,InnovationSuite,https://embed.vidyard.com/share/YeBeHU51o4cE4q...,2019-05-28 14:12:58
3,barnardm@capegate.co.za,Mac,Barnard,Cape Gate Pty Ltd,5,1,1,1591,InnovationSuite,https://embed.vidyard.com/share/YeBeHU51o4cE4q...,2019-05-28 12:04:42
4,dbunting@anglianwater.co.uk,Dan,Bunting,Anglian Water Services Ltd,5,5,10,540,InnovationSuite,https://www.youtube.com/watch?v=wwmmgAw8ZAE,2020-02-26 02:37:01
...,...,...,...,...,...,...,...,...,...,...,...
10822,marosal@Microsoft.com,marcelo,rosalen,Microsoft Corporation,3,1,5,75,Industrial Analytics,https://cdn.pathfactory.com/assets/preprocesse...,2021-12-08 20:02:53
10823,alexandre@gwfsi.com.br,Alexandre,Dos Santos Ferreira,GWF SISTEMAS INDUSTRIAIS,4,2,7,641,IIoT,https://cdn.pathfactory.com/assets/10724/conte...,2021-12-09 03:11:20
10824,nishant.singh@faurecia.com,Nishant,Singh,Faurecia Automotive,4,1,1,37,Digital Transformation,https://www.gartner.com/doc/reprints?id=1-25WL...,2021-12-10 16:14:59
10825,adriana.sukmanaputra@daimler.com,Adriana,Sukmanaputra,PT. Daimler Commercial Vehicles Manufacturing ...,2,1,0,0,Industrial Analytics,https://cdn.pathfactory.com/assets/preprocesse...,2022-02-04 11:44:19


# Dashboard

- Enter user (email)
- How often did they visit
- Most frequently visited pages
- Downloaded files/watched videos
- Glass renderer (data tables)
- Streamlit

In [9]:
df = query_db(
    """
    SELECT
        e.EmailAddress,
        a.VisitStartDateTime,
        a.EventList,
        a.PageURL,
        a.PageName,
        e.FirstName,
        e.LastName,
        e.Company,
        l.ra_leadstage,
        l.ra_leadstagename,
        l.ra_teleacceptedname,
        l.ra_telequalifiedname,
        l.ra_salesacceptedname,
        l.ra_salesqualifiedname
    FROM
        aem.RawTraffic as a,
        elq.Contact as e,
        crm.Lead as l
    WHERE a.VisitStartDateTime > '2022-02-10'
        AND a.EloquaContactId = e.EloquaContactId
        AND e.EmailAddress = l.emailaddress1
        AND a.PageURL = 'https://www.rockwellautomation.com/novo-nordisk.html';
    """
)
df


Unnamed: 0,EmailAddress,VisitStartDateTime,EventList,PageURL,PageName,FirstName,LastName,Company,ra_leadstage,ra_leadstagename,ra_teleacceptedname,ra_telequalifiedname,ra_salesacceptedname,ra_salesqualifiedname
0,skalnikovakatka@gmail.com,2022-02-21 11:45:28,273174175176113,https://www.rockwellautomation.com/novo-nordis...,rockwellautomation:homepage,Katka,Skalnikova,Skalnikova,1,Unassigned,,,,
1,skalnikovakatka@gmail.com,2022-02-21 11:45:28,"201=14257.00,202=163.00,203=158.00,204=14584.0...",https://www.rockwellautomation.com/novo-nordis...,rockwellautomation:homepage,Katka,Skalnikova,Skalnikova,1,Unassigned,,,,
2,skalnikovakatka@gmail.com,2022-02-21 11:45:28,"279,200,20,100,105,108,109,110,114,119,120,121...",https://www.rockwellautomation.com/novo-nordis...,rockwellautomation:homepage,Katka,Skalnikova,Skalnikova,1,Unassigned,,,,
3,skalnikovakatka@gmail.com,2022-02-21 15:57:23,"279,200,20,100,105,106,108,109,110,114,119,120...",https://www.rockwellautomation.com/novo-nordis...,rockwellautomation:homepage,Katka,Skalnikova,Skalnikova,1,Unassigned,,,,
4,skalnikovakatka@gmail.com,2022-02-21 15:57:23,"201=1687.00,202=84.00,203=81.00,204=1932.00,20...",https://www.rockwellautomation.com/novo-nordis...,rockwellautomation:homepage,Katka,Skalnikova,Skalnikova,1,Unassigned,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87,skalnikovakatka@gmail.com,2022-02-23 16:02:14,259159160161162113,https://www.rockwellautomation.com/novo-nordis...,rockwellautomation:homepage,Katka,Skalnikova,Skalnikova,2,Awaiting Tele Acceptance,,,,
88,skalnikovakatka@gmail.com,2022-02-23 18:57:53,"279,200,20,100,105,108,109,110,114,119,120,121...",https://www.rockwellautomation.com/novo-nordis...,rockwellautomation:homepage,Katka,Skalnikova,Skalnikova,2,Awaiting Tele Acceptance,,,,
89,skalnikovakatka@gmail.com,2022-02-23 18:57:53,273174175176500501502503113,https://www.rockwellautomation.com/novo-nordis...,rockwellautomation:homepage,Katka,Skalnikova,Skalnikova,2,Awaiting Tele Acceptance,,,,
90,skalnikovakatka@gmail.com,2022-02-23 18:57:53,273174175176113,https://www.rockwellautomation.com/novo-nordis...,rockwellautomation:homepage,Katka,Skalnikova,Skalnikova,2,Awaiting Tele Acceptance,,,,


In [10]:
df.to_csv("novo.csv")


In [11]:
df = query_db(
    """
    SELECT
        e.EmailAddress,
        a.VisitStartDateTime,
        a.EventList,
        a.PageURL,
        a.PageName,
        e.FirstName,
        e.LastName,
        e.Company
    FROM
        aem.RawTraffic as a,
        elq.Contact as e
    WHERE a.VisitStartDateTime > '2022-02-10'
        AND a.EloquaContactId = e.EloquaContactId
        AND a.PageURL = 'https://www.rockwellautomation.com/novo-nordisk.html';
    """
)
df


Unnamed: 0,EmailAddress,VisitStartDateTime,EventList,PageURL,PageName,FirstName,LastName,Company
0,jkallus@ra.rockwell.com,2022-02-11 15:59:05,"279,200,20,100,105,108,109,110,114,119,120,122...",https://www.rockwellautomation.com/novo-nordis...,rockwellautomation:homepage,Jakub,Kallus,Rockwell Automation
1,jkallus@ra.rockwell.com,2022-02-11 15:59:05,"201=7859.00,202=1147.00,203=1144.00,204=9341.0...",https://www.rockwellautomation.com/novo-nordis...,rockwellautomation:homepage,Jakub,Kallus,Rockwell Automation
2,jkallus@ra.rockwell.com,2022-02-11 15:59:05,259160161162113,https://www.rockwellautomation.com/novo-nordis...,rockwellautomation:homepage,Jakub,Kallus,Rockwell Automation
3,jkallus@ra.rockwell.com,2022-02-11 15:59:05,"273,174,175,176,182,183,185,186,187,188,189,19...",https://www.rockwellautomation.com/novo-nordis...,rockwellautomation:homepage,Jakub,Kallus,Rockwell Automation
4,jkallus@ra.rockwell.com,2022-02-11 15:59:05,239139500501502503113,https://www.rockwellautomation.com/novo-nordis...,rockwellautomation:homepage,Jakub,Kallus,Rockwell Automation
...,...,...,...,...,...,...,...,...
864,brad.hemze@rockwellautomation.com,2022-02-23 22:40:44,239139500501502503113,https://www.rockwellautomation.com/novo-nordis...,rockwellautomation:homepage,Brad,Hemze,Rockwell Automation (USA)
865,brad.hemze@rockwellautomation.com,2022-02-23 22:40:44,260160161162113,https://www.rockwellautomation.com/novo-nordis...,rockwellautomation:homepage,Brad,Hemze,Rockwell Automation (USA)
866,brad.hemze@rockwellautomation.com,2022-02-23 22:40:44,262160161162113,https://www.rockwellautomation.com/novo-nordis...,rockwellautomation:homepage,Brad,Hemze,Rockwell Automation (USA)
867,brad.hemze@rockwellautomation.com,2022-02-23 22:40:44,263160161162113,https://www.rockwellautomation.com/novo-nordis...,rockwellautomation:homepage,Brad,Hemze,Rockwell Automation (USA)


In [9]:
df = query_db(
    """
    SELECT TOP 20
        COUNT(a.PageURL) as ViewCount,
        a.PageURL
    FROM
        aem.RawTraffic as a,
        elq.Contact as e
    WHERE a.EloquaContactId = e.EloquaContactId
        AND e.EmailAddress = 'bjdipert@ra.rockwell.com'
    GROUP BY PageURL
    ORDER BY ViewCount DESC
    """
)
df


Unnamed: 0,ViewCount,PageURL
0,145,https://www.rockwellautomation.com/en-us.html
1,55,https://www.rockwellautomation.com/en-us/capab...
2,51,https://www.rockwellautomation.com/lang-select...
3,30,https://www.rockwellautomation.com/en-ie/capab...
4,19,https://www.rockwellautomation.com/en-us/suppo...
5,18,https://www.rockwellautomation.com/en-us/indus...
6,14,https://www.rockwellautomation.com/en-us/produ...
7,12,https://www.rockwellautomation.com/de-at/produ...
8,12,https://www.rockwellautomation.com/en-us/capab...
9,12,https://www.rockwellautomation.com/en-us/indus...


# Pathfactory Content Scoring

In [4]:
# ContentJourney is a varchar that lists the content viewed
# Query all pathfactory visitors that are in Eloqua and have an assigned leadstatus
# Aggregate their pathfactory visit information

df = query_db(
    """
    SELECT
        e.EmailAddress,
        MAX(l.ra_leadstage) AS ra_leadstage,
        SUM(p.AssetsViewed) AS AssetsViewed,
        STRING_AGG(CONVERT(NVARCHAR(max), p.ContentJourney), '') AS ContentJourney,
        SUM(p.EngagementScore) as EngagementScore,
        SUM(p.EngagementTime) as EngagementTime,
        STRING_AGG(CONVERT(NVARCHAR(max), p.ExperienceName), '; ') AS ExperienceName
    FROM
        Staging.elq.Contact AS e,
        (   
            SELECT emailaddress1, MAX(ra_leadstage) AS ra_leadstage
            FROM crm.Lead
            GROUP BY emailaddress1
        ) AS l,
        Staging.elq.PathFactory AS p
    WHERE e.EmailAddress = l.emailaddress1
        AND e.EloquaContactId = p.EloquaContactId
    GROUP BY
        e.EmailAddress;
    """
)
df


Unnamed: 0,EmailAddress,ra_leadstage,AssetsViewed,ContentJourney,EngagementScore,EngagementTime,ExperienceName
0,4lifefreedoms@gmail.com,3,1,[iSuite-ebook],0,17,InnovationSuite
1,565542979@qq.com,5,38,[data-ingest-demo][ThingWorx_8_Video][thingwor...,34,4968,InnovationSuite; Getting to know PTC; HMI; Dat...
2,a.baffoejames@yahoo.com,3,4,[LNS-Connected-Worker][HarborResearchFull][Pra...,8,88,Connected Worker; Digital Transformation; IIoT...
3,a.desai@seksaria.com,3,4,[3AgilePrinciplesDX][Caterpillar-Unearths][3Ag...,18,606,Industrial Analytics
4,a.espinoza31@yahoo.com,3,1,[georgia-pacific-connected-workforce],3,102,Connected Worker
...,...,...,...,...,...,...,...
3585,zimmerd557@gmail.com,2,1,[liveworxkeynote],0,24,LiveWorx
3586,zjna@novonordisk.com,5,3,[INFO-BR006A-EN-P][INFO-BR006A-EN-P][INFO-BR00...,2,32,FactoryTalk Quality Application; FactoryTalk Q...
3587,zrolnik@racontrols.pl,2,9,[NaturalSearchDemo][CMMs-Software-platform][CM...,10,1958,LiveWorx ; Fiix; Fiix; Fiix; Fiix; Is FactoryT...
3588,zwang@quantumscape.com,3,2,[PractitionerGuide-DeployingIIoT][Practitioner...,8,91,IIoT; IIoT


In [107]:
# Get all content items
pathfactory_content = set()
for index, row in df.iterrows():
    content_journey = row["ContentJourney"]
    content_list = content_journey[1:-2].split("][")

    for content_item in content_list:
        pathfactory_content.add(content_item)

len(pathfactory_content)


666

In [5]:
is_qualified_list = list()  # is a user a qualified lead or not
content_journey_list = list()  # split content journey items
for index, row in df.iterrows():
    # instead of for loop, use pandas df apply or map or something similar
    is_qualified = 1 if row["ra_leadstage"] >= 5 else 0  # see mapping above
    is_qualified_list.append(is_qualified)

    content_journey = row["ContentJourney"]
    content_list = content_journey[1:-1].split("][")
    content_set = set(content_list)
    content_journey_list.append(content_set)


# one-hot encode the content_journey
from sklearn.preprocessing import MultiLabelBinarizer

mlb = MultiLabelBinarizer()
content_one_hot = pd.DataFrame(
    mlb.fit_transform(content_journey_list), columns=mlb.classes_
)
content_one_hot  # for each row, a 1 indicates that the user viewed that content
# if we have duration viewed for each content, replace one-hot with duration viewed
# for additional features on each content item, using "embedding" (ask Wei)
# consider adding engagement time (possibly normalize)


Unnamed: 0,0119-000163-3-d-vufo,1gs-w8w-h,1rx-ee-fd,24f3f0734389d0785593,3-agile-principles-ES,3-agile-principles-digital-transformation,3-ways-blog,3-ways-to-save-paperless,33seconds,3AgilePrinciplesDX,...,value-workshop,video-marketing-sale,videohillshirebrandquality,vp-operations,webinar-how-to-get-s,what-is-preventative-maintenance-fiix,whoisleadingdigitaltransformation,whoisleadingdigitaltransmation,why-register,window-into-extruder-ops-article
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3585,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3586,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3587,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3588,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [6]:
# is_qualified_list is the target (Y)
# content_one_host is the input (X)
# currently not using any other features

# split into train and test sets
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(
    content_one_hot, is_qualified_list, test_size=1 / 3, random_state=None, shuffle=True
)


In [None]:
# check feature correlation for outliers
corr_train = pd.DataFrame(
    np.column_stack((y_train, x_train)),
    columns=(["qualified"] + x_train.columns.to_list()),
)
corr = corr_train.corr()
corr.iloc[0].dropna().sort_values()


In [51]:
from sklearn.naive_bayes import MultinomialNB

mnb_model = MultinomialNB()
mnb_model.fit(x_train, y_train)

mnb_model.score(x_test, y_test)


0.5472013366750209

In [57]:
# conditional probability for each content item
mnb_probs = np.exp(mnb_model.feature_log_prob_[0]) * 100
mbn_scores = pd.DataFrame(np.column_stack((mlb.classes_, mnb_probs)))
mbn_scores.to_csv("mnb_scores.csv")
mbn_scores


Unnamed: 0,0,1
0,0119-000163-3-d-vufo,0.02324
1,1gs-w8w-h,0.02324
2,1rx-ee-fd,0.046479
3,24f3f0734389d0785593,0.092958
4,3-agile-principles-ES,0.046479
...,...,...
395,what-is-preventative-maintenance-fiix,0.046479
396,whoisleadingdigitaltransformation,0.255636
397,whoisleadingdigitaltransmation,0.511271
398,why-register,0.162677


In [53]:
from sklearn.linear_model import LogisticRegression

lg_model = LogisticRegression(solver="newton-cg", penalty="l2")
lg_model.fit(x_train, y_train)

lg_model.score(x_test, y_test)
# gives suspiciously accurate result
# consider cross validation


0.5463659147869674

In [54]:
lg_scores = pd.DataFrame(np.column_stack((mlb.classes_, lg_model.coef_[0])))
lg_scores.to_csv("lg_scores.csv")
lg_scores


Unnamed: 0,0,1
0,0119-000163-3-d-vufo,0.0
1,1gs-w8w-h,0.20337
2,1rx-ee-fd,0.101769
3,24f3f0734389d0785593,0.601723
4,3-agile-principles-ES,0.350292
...,...,...
395,what-is-preventative-maintenance-fiix,-0.361093
396,whoisleadingdigitaltransformation,-0.100448
397,whoisleadingdigitaltransmation,-0.100636
398,why-register,0.344465


- Use different models to generate content scores, then calculate correlation between scores to compare models
- Consider how to limit the amount of data processed when calculating new scores (elbow method)
    - Perhaps limit to newest 10000 entries in the eloqua/adobe analytics
    - Consider how often to calculate and updates scores
- Store engagement scores in DB:
    - Table with columns: Binge Experience ID/name, content item name, engagement score
    - When we run data processing, then update engagement score for existing items, add new rows for new content as needed
    - Consider whether we should calculate score from scratch or adjust previous score
- Write calculated scores to AEM via endpoint


# Lead status funnel

In [5]:
df = query_db(
    """
    SELECT
        emailaddress1,
        ra_leadstage,
        ra_leadstagename,
        createdon
    FROM crm.Lead;
    """
)
df.to_csv("lead_status_raw.csv")
df


Unnamed: 0,emailaddress1,ra_leadstage,ra_leadstagename,createdon
0,sustarg@milwaukeeforge.com,4.0,Awaiting Sales Acceptance,2022-03-09 20:00:28
1,teerapanluengnaruemitchai@thaibev.com,2.0,Awaiting Tele Acceptance,2022-03-09 03:49:23
2,tim.parks@whitepath.com,5.0,Awaiting Sales Qualification,2022-03-09 19:31:32
3,sherman.wagner@silganpfc.com,4.0,Awaiting Sales Acceptance,2022-03-09 21:04:09
4,miguel.chavez2@sbdinc.com,4.0,Awaiting Sales Acceptance,2022-03-09 19:35:53
...,...,...,...,...
237599,sylvain.tremblay@traversiers.gouv.qc.ca,1.0,Unassigned,2022-05-18 07:58:29
237600,peter.hartmann@normagroup.com,5.0,Awaiting Sales Qualification,2022-05-18 07:53:42
237601,carolin.betz@roche.com,1.0,Unassigned,2022-05-18 07:57:53
237602,patrick1.gagnon@cssda.gouv.qc.ca,1.0,Unassigned,2022-05-18 08:03:24


In [6]:
df = query_db(
    """
    SELECT
        emailaddress1,
        MAX(ra_leadstage) AS ra_leadstage
    FROM crm.Lead
    GROUP BY emailaddress1;
    """
)
df.to_csv("lead_status_max.csv")
df


Unnamed: 0,emailaddress1,ra_leadstage
0,sha.liao@hamiltoncompany.com,4.0
1,craig.andresen@paccar.com,5.0
2,Joanne.Goh@omdia.com,4.0
3,andrew_stinson@hotmail.com,5.0
4,roy.canterbury@eadcorporate.com,5.0
...,...,...
122186,jonathan.rarey@gilbarco.com,6.0
122187,a_malik@ppl.com.pk,3.0
122188,trex@capt-celina.com,4.0
122189,cchaif@devilletechnologies.com,5.0


In [9]:
# max lead stage count
df = query_db(
    """
    SELECT
        c.ra_leadstage,
        COUNT(c.ra_leadstage) AS count_ra_leadstage
    FROM 
        (
            SELECT
                emailaddress1,
                MAX(ra_leadstage) AS ra_leadstage
            FROM crm.Lead
            GROUP BY emailaddress1
        ) AS c
    GROUP BY c.ra_leadstage
    ORDER BY c.ra_leadstage;
    """
)
df.to_csv("lead_status_max_count.csv")
df


Unnamed: 0,ra_leadstage,count_ra_leadstage
0,,0
1,1.0,1062
2,2.0,11477
3,3.0,14911
4,4.0,39569
5,5.0,50911
6,6.0,520
7,7.0,3725
8,8.0,3
