## deltaLens Data Compare Sample

In [1]:
#imports

from delta_lens.deltaLens import DeltaLens, EntityComparer
from delta_lens.config import Config, Entity, Side, Transform, Defaults,ReferenceDataset, load_config
from delta_lens.sqliteExport import *
from delta_lens.csvExport import *
from datetime import date

In [2]:
#config

config = load_config("data/legislators.compare.config.json")


In [3]:
runName = f'legislator_compare_{date.today().strftime("%Y-%m-%d")}'

comparator = DeltaLens(runName,config)

comparator.execute(continue_on_error=False)



#### entity_compare_results
Table containing summary of each entity comparison

In [4]:
query = ''' 
WITH comparison AS (
    SELECT coalesce(legislator_legacy."Id", legislator_new."Id") as "Id",
        legislator_legacy."Id fec" as "Id fec_left",
        legislator_new."Id fec" as "Id fec_right",
        (
            legislator_legacy."Id fec" = legislator_new."Id fec"
            OR (
                legislator_legacy."Id fec" IS NULL
                AND legislator_new."Id fec" IS NULL
            )
        ) as "Id fec_match"
       
    FROM legislator_legacy
        FULL OUTER JOIN legislator_new ON legislator_legacy."Id" = legislator_new."Id"
)
SELECT *,
    (
        "Id fec_match" = 1
       
    ) as _full_match
FROM comparison

'''

comparator.con.execute(query).fetch_df()

Unnamed: 0,Id,Id fec_left,Id fec_right,Id fec_match,_full_match
0,B000226,,,True,True
1,B000546,,,True,True
2,B001086,,,True,True
3,C000187,,,True,True
4,C000538,,,True,True
...,...,...,...,...,...
12593,F000475,"[""H2MN01223""]","[""H2MN01223""]",True,True
12594,P000619,"[""H2AK01158""]","[""H2AK01158""]",True,True
12595,R000579,"[""H8NY19223""]","[""H8NY19223""]",True,True
12596,S001219,"[""H2NY23137""]","[""H2NY23137""]",True,True


#### [entitiy]_compare_field_summary
Entity specific table containing summary of field comparison

In [5]:
comparator.con.execute("SELECT * FROM legislator_compare_field_summary").fetch_df()

Unnamed: 0,field,total,matches,match_percentage
0,Bio gender,12598,12598.0,100.0
1,Name,12598,12595.0,99.980003
2,Id wikidata,12598,12595.0,99.980003
3,Name last,12598,12597.0,99.989998
4,Id ballotpedia,12598,12598.0,100.0
5,Id govtrack,12598,12598.0,100.0
6,Id house history,12598,12598.0,100.0
7,Id cspan,12598,12598.0,100.0
8,Other names,12598,12598.0,100.0
9,Name middle,12598,12598.0,100.0


#### [entity]_compare
Entity specific table containing details of comparison

In [6]:
comparator.con.execute(" WITH CTE as (SELECT * FROM  legislator_compare WHERE _full_match = 0) SELECT * FROM CTE USING SAMPLE 5;").fetch_df()

Unnamed: 0,Id,Bio gender_left,Bio gender_right,Bio gender_match,Name_left,Name_right,Name_match,Id wikidata_left,Id wikidata_right,Id wikidata_match,...,Family_match,Name suffix_left,Name suffix_right,Name suffix_match,Name official full_left,Name official full_right,Name official full_match,_exists_left,_exists_right,_full_match
0,B000546,M,M,True,Theodorick Bland,Theodorick Bland,True,Q1749152,Q1749152,True,...,True,,,True,,,True,True,True,False
1,C000187,M,M,True,Daniel Carroll,Daniel Carroll,True,sQ674371,Q674371,False,...,True,,,True,,,True,True,True,False
2,D000013,M,M,True,Tristram Dalton,Tristram Dalton,True,Q1365791s,Q1365791,False,...,True,,,True,,,True,True,True,False
3,G000526,M,M,True,James Gussnn,James Gunn,False,Q956982,Q956982,True,...,True,,,True,,,True,True,True,False
4,H000488,M,M,True,Thomas ssHenderson,Thomas Henderson,False,Q435140,Q435140,True,...,True,,,True,,,True,True,True,False


#### Export to sqlite
Function to export above tables to on-disk format. Sqlite is a popular single file database, well supported in many languages,  for which lots of query tools exist. See [Datasette](https://datasette.io/) package.

In [8]:
export_to_sqlite(comparator.con, f'{runName}.sqlite')

'legislator_compare_2025-02-15.sqlite'

In [None]:
export_to_csv_archive(comparator.con,f'{runName}.tar.gz' )

'legislator_compare_2025-02-15.tar.gz'