Skip to content

Commit

Permalink
feat: add fetch_size argument to diffreader
Browse files Browse the repository at this point in the history
  • Loading branch information
pckhoi committed Dec 6, 2021
1 parent b30ff48 commit c3de545
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 31 deletions.
2 changes: 1 addition & 1 deletion doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
author = 'Khoi Pham'

# The full version, including alpha/beta/rc tags
release = '0.7.3'
release = '0.7.3.1'


# -- General configuration ---------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = wrgl
version = 0.7.3
version = 0.7.3.1
author = Khoi Pham
author_email = pckhoi@gmail.com
description = Data matching utilities
Expand Down
49 changes: 22 additions & 27 deletions wrgl/diffreader.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,61 +209,56 @@ class DiffReader(object):
:var RowIterator removed_rows: iterator for removed rows
:var ModifiedRowIterator modified_rows: iterator for modified rows
"""
_repo: "repository.Repository"
_com_sum1: str
_com_sum2: str
_dr: DiffResult
_cd: ColDiff

column_changes: ColumnChanges
pk_changes: ColumnChanges
added_rows: RowIterator or None = None
removed_rows: RowIterator or None = None
modified_rows: ModifiedRowIterator or None = None

def __init__(self, repo: "repository.Repository", com_sum1: str, com_sum2: str) -> None:
def __init__(self, repo: "repository.Repository", com_sum1: str, com_sum2: str, fetch_size: int = 100) -> None:
"""
:param Repository repo: the repo handle
:param str com_sum1: checksum of the first (newer) commit
:param str com_sum2: checksum of the second (older) commit
"""
self._repo = repo
self._com_sum1 = com_sum1
self._com_sum2 = com_sum2
self._dr = self._repo.diff(com_sum1, com_sum2)
old_tbl = Table(columns=self._dr.old_columns, pk=self._dr.old_pk)
new_tbl = Table(columns=self._dr.columns, pk=self._dr.pk)
self._cd = ColDiff(old_tbl, new_tbl)
dr = repo.diff(com_sum1, com_sum2)
old_tbl = Table(columns=dr.old_columns, pk=dr.old_pk)
new_tbl = Table(columns=dr.columns, pk=dr.pk)
cd = ColDiff(old_tbl, new_tbl)
self.column_changes = ColumnChanges.from_new_old_columns(
self._dr.columns, self._dr.old_columns
dr.columns, dr.old_columns
)
self.pk_changes = ColumnChanges.from_new_old_columns(
new_tbl.primary_key, old_tbl.primary_key
)
if old_tbl.primary_key == new_tbl.primary_key:
self.added_rows = RowIterator(
repo=self._repo,
tbl_sum=self._dr.table_sum,
repo=repo,
tbl_sum=dr.table_sum,
columns=new_tbl.columns,
primary_key=new_tbl.primary_key
primary_key=new_tbl.primary_key,
fetch_size=fetch_size
)
self.removed_rows = RowIterator(
repo=self._repo,
tbl_sum=self._dr.old_table_sum,
repo=repo,
tbl_sum=dr.old_table_sum,
columns=old_tbl.columns,
primary_key=old_tbl.primary_key
primary_key=old_tbl.primary_key,
fetch_size=fetch_size
)
self.modified_rows = ModifiedRowIterator(
repo=self._repo,
tbl_sum1=self._dr.table_sum,
tbl_sum2=self._dr.old_table_sum,
cd=self._cd,
repo=repo,
tbl_sum1=dr.table_sum,
tbl_sum2=dr.old_table_sum,
cd=cd,
columns=[
col.name for col in self._cd.columns
col.name for col in cd.columns
],
primary_key=new_tbl.primary_key
primary_key=new_tbl.primary_key,
fetch_size=fetch_size
)
for rd in self._dr.row_diff:
for rd in dr.row_diff:
if rd.off1 is None:
self.removed_rows.add_offset(rd.off2)
elif rd.off2 is None:
Expand Down
4 changes: 2 additions & 2 deletions wrgl/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ def diff(self, sum1: str, sum2: str) -> DiffResult:
)
return json_loads(r.content, DiffResult)

def diff_reader(self, sum1: str, sum2: str) -> diffreader.DiffReader:
def diff_reader(self, sum1: str, sum2: str, fetch_size: int = 100) -> diffreader.DiffReader:
"""Compares two commits and interpret their differences.
This method is higher level than :func:`Repository.diff` and should
Expand All @@ -301,4 +301,4 @@ def diff_reader(self, sum1: str, sum2: str) -> diffreader.DiffReader:
:rtype: DiffReader
"""
return diffreader.DiffReader(self, sum1, sum2)
return diffreader.DiffReader(self, sum1, sum2, fetch_size)

0 comments on commit c3de545

Please sign in to comment.