## Setup

In [1]:
# Connect using pyodbc, sqlalchemy, and pandas
import sqlalchemy
import numpy as np
import pandas as pd

server = "sqlsvr-0092-mdp-02.85f8a2f57eaf.database.windows.net"
database = "Staging"
username = "pisrc-inkoo"
password = input("Enter database password: ")
driver = "ODBC Driver 17 for SQL Server"

engine = sqlalchemy.create_engine(
    f"mssql+pyodbc://{username}:{password}@{server}/{database}?driver={driver.replace(' ', '+')}"
)


def query_db(query, params=None):
    return pd.read_sql(query, engine, params=params)


df = query_db("SELECT @@version;")
print(df[""][0])


Microsoft SQL Azure (RTM) - 12.0.2000.8 
	Apr 18 2022 13:01:43 
	Copyright (C) 2021 Microsoft Corporation



# Status Codes

In [2]:
# ra_leadstage possible values
query_db(
    """
    SELECT DISTINCT
        ra_leadstage,
        ra_leadstagename
    FROM crm.Lead
    ORDER BY ra_leadstage;
    """
)


Unnamed: 0,ra_leadstage,ra_leadstagename
0,,
1,1.0,Unassigned
2,2.0,Awaiting Tele Acceptance
3,3.0,Awaiting Tele Qualification
4,4.0,Awaiting Sales Acceptance
5,5.0,Awaiting Sales Qualification
6,6.0,Distributor Lead
7,7.0,Qualified
8,8.0,External Lead


In [34]:
query_db(
    """
    SELECT DISTINCT
        statecode,
        statecodename
    FROM crm.Lead
    ORDER BY statecode;
    """
)


Unnamed: 0,statecode,statecodename
0,0,Open
1,1,Qualified
2,2,Disqualified


In [35]:
query_db(
    """
    SELECT DISTINCT
        statuscode,
        statuscodename
    FROM crm.Lead
    ORDER BY statuscode;
    """
)


Unnamed: 0,statuscode,statuscodename
0,1,New
1,2,Contacted
2,3,Qualified
3,4,Not buying or influence location
4,5,Credit hold or watch
5,6,Not Decision Maker
6,7,No Interest
7,953810000,No RA solution
8,953810001,Selling barrier to high
9,953810002,Competitor/Non RA distributor


In [36]:
df = query_db(
    """
    SELECT
        emailaddress1,
        createdon,
        ra_leadstage,
        ra_leadstagename,
        statecode,
        statecodename,
        statuscode,
        statuscodename
    FROM crm.Lead
    ORDER BY emailaddress1, createdon;
    """
)
# df.to_csv("lead_status_raw.csv")
df


Unnamed: 0,emailaddress1,createdon,ra_leadstage,ra_leadstagename,statecode,statecodename,statuscode,statuscodename
0,,2000-11-13 18:30:00,5.0,Awaiting Sales Qualification,2,Disqualified,7,No Interest
1,,2001-01-11 18:30:00,5.0,Awaiting Sales Qualification,2,Disqualified,953810004,Unable to make contact
2,,2001-11-09 18:30:00,5.0,Awaiting Sales Qualification,2,Disqualified,7,No Interest
3,,2002-01-10 18:30:00,5.0,Awaiting Sales Qualification,2,Disqualified,7,No Interest
4,,2002-03-28 18:30:00,5.0,Awaiting Sales Qualification,2,Disqualified,4,Not buying or influence location
...,...,...,...,...,...,...,...,...
237599,zzq4993@163.com,2022-02-08 03:35:37,4.0,Awaiting Sales Acceptance,0,Open,1,New
237600,zzq516@163.com,2017-09-21 06:00:00,5.0,Awaiting Sales Qualification,1,Qualified,3,Qualified
237601,zzq516@163.com,2018-01-08 04:18:52,5.0,Awaiting Sales Qualification,1,Qualified,3,Qualified
237602,zzq63@126.com,2016-12-24 06:00:00,5.0,Awaiting Sales Qualification,2,Disqualified,4,Not buying or influence location


In [37]:
# latest status
df = query_db(
    """
    SELECT
        c.emailaddress1,
        c.ra_leadstage,
        c.ra_leadstagename,
        c.statecode,
        c.statecodename,
        c.statuscode,
        c.statuscodename
    FROM crm.Lead AS c
    INNER JOIN (
        SELECT
            emailaddress1,
            MAX(createdon) AS createdon
        FROM crm.Lead
        GROUP BY emailaddress1
    ) AS d
    ON c.emailaddress1 = d.emailaddress1
        AND c.createdon = d.createdon;
    """
)
df.to_csv("lead_status_latest.csv")
df


Unnamed: 0,emailaddress1,ra_leadstage,ra_leadstagename,statecode,statecodename,statuscode,statuscodename
0,zzq4993@163.com,4.0,Awaiting Sales Acceptance,0,Open,1,New
1,zyye@flotech.com.sg,4.0,Awaiting Sales Acceptance,2,Disqualified,6,Not Decision Maker
2,zygmunt.szamborski@mdlz.com,5.0,Awaiting Sales Qualification,1,Qualified,3,Qualified
3,zyg@safe-run.cn,5.0,Awaiting Sales Qualification,2,Disqualified,953810011,Already Active Opportunity
4,zx0230123@163.com,4.0,Awaiting Sales Acceptance,0,Open,1,New
...,...,...,...,...,...,...,...
122228,1137200593@qq.com,5.0,Awaiting Sales Qualification,1,Qualified,3,Qualified
122229,110489msv@mail.ru,4.0,Awaiting Sales Acceptance,2,Disqualified,953810005,Bad Contact Information
122230,1099302426@qq.com,5.0,Awaiting Sales Qualification,1,Qualified,3,Qualified
122231,1029626081@qq.com,4.0,Awaiting Sales Acceptance,2,Disqualified,953810011,Already Active Opportunity


In [38]:
# latest status count by leadstage and statecode
df = query_db(
    """
    WITH q AS (
        SELECT
            c.emailaddress1,
            c.ra_leadstage,
            c.ra_leadstagename,
            c.statecode,
            c.statecodename,
            c.statuscode,
            c.statuscodename
        FROM crm.Lead AS c
        INNER JOIN (
            SELECT
                emailaddress1,
                MAX(createdon) AS createdon
            FROM crm.Lead
            GROUP BY emailaddress1
        ) AS d
        ON c.emailaddress1 = d.emailaddress1
            AND c.createdon = d.createdon
    )
    SELECT
        q.ra_leadstagename,
        q.statecodename,
        COUNT(*) AS count
    FROM q
    GROUP BY
        q.ra_leadstage,
        q.ra_leadstagename,
        q.statecode,
        q.statecodename
    ORDER BY
        q.ra_leadstage,
        q.statecode;
    """
)
df.to_csv("lead_status_latest_count.csv")
df


Unnamed: 0,ra_leadstagename,statecodename,count
0,,Open,5
1,,Disqualified,9
2,Unassigned,Open,510
3,Unassigned,Qualified,27
4,Unassigned,Disqualified,610
5,Awaiting Tele Acceptance,Open,5127
6,Awaiting Tele Acceptance,Qualified,25
7,Awaiting Tele Acceptance,Disqualified,8755
8,Awaiting Tele Qualification,Open,1328
9,Awaiting Tele Qualification,Qualified,15
