Enhance caching to allow things like plus_rowwise- and min_element+ (#9)

eriknw · web-flow · commit ef16f82209be · 2022-05-22T22:56:49.000-05:00
* Enhance caching to allow things like `plus_rowwise` and `min_element`

* Oops.  Fix reduce to scalar in caching.
diff --git a/graphblas_algorithms/algorithms/cluster.py b/graphblas_algorithms/algorithms/cluster.py
@@ -66,8 +66,8 @@ def transitivity_directed_core(G):
     numerator = plus_pair(A @ A.T).new(mask=A.S).reduce_scalar(allow_empty=False).value
     if numerator == 0:
         return 0
-    deg = A.reduce_rowwise("count")
-    denom = (deg * (deg - 1)).reduce().value
+    degrees = G.get_property("row_degrees-")
+    denom = (degrees * (degrees - 1)).reduce().value
     return numerator / denom
 
 
@@ -76,9 +76,10 @@ def transitivity(G):
     if len(G) == 0:
         return 0
     if G.is_directed():
-        return transitivity_directed_core(G)
+        func = transitivity_directed_core
     else:
-        return transitivity_core(G)
+        func = transitivity_core
+    return G._cacheit("transitivity", func, G)
 
 
 def clustering_core(G, mask=None):
@@ -101,10 +102,7 @@ def clustering_directed_core(G, mask=None):
         + plus_pair(AT @ A.T).new(mask=A.S).reduce_rowwise().new(mask=mask)
         + plus_pair(AT @ AT.T).new(mask=A.S).reduce_columnwise().new(mask=mask)
     )
-    recip_degrees = binary.pair(A & AT).reduce_rowwise().new(mask=mask)
-    total_degrees = A.reduce_rowwise("count").new(mask=mask) + A.reduce_columnwise("count").new(
-        mask=mask
-    )
+    recip_degrees, total_degrees = G.get_properties("recip_degrees- total_degrees-", mask=mask)
     return (tri / (total_degrees * (total_degrees - 1) - 2 * recip_degrees)).new(name="clustering")
 
 
@@ -200,6 +198,12 @@ def average_clustering(G, nodes=None, weight=None, count_zeros=True):
         raise ZeroDivisionError()  # Not covered
     mask = G.list_to_mask(nodes)
     if G.is_directed():
-        return average_clustering_directed_core(G, mask=mask, count_zeros=count_zeros)
+        func = average_clustering_directed_core
+    else:
+        func = average_clustering_core
+    if mask is None:
+        return G._cacheit(
+            f"average_clustering(count_zeros={count_zeros})", func, G, count_zeros=count_zeros
+        )
     else:
-        return average_clustering_core(G, mask=mask, count_zeros=count_zeros)
+        return func(G, mask=mask, count_zeros=count_zeros)
diff --git a/graphblas_algorithms/algorithms/link_analysis/pagerank_alg.py b/graphblas_algorithms/algorithms/link_analysis/pagerank_alg.py
@@ -45,10 +45,8 @@ def pagerank_core(
     # Inverse of row_degrees
     # Fold alpha constant into S
     if row_degrees is None:
-        S = A.reduce_rowwise().new(float, name="S")  # XXX: What about self-edges
-        S << alpha / S
-    else:
-        S = (alpha / row_degrees).new(name="S")
+        row_degrees = G.get_property("plus_rowwise+")  # XXX: What about self-edges?
+    S = (alpha / row_degrees).new(name="S")
 
     if A.ss.is_iso:
         # Fold iso-value of A into S
@@ -124,8 +122,7 @@ def pagerank(
     # We'll normalize initial, personalization, and dangling vectors later
     x = G.dict_to_vector(nstart, dtype=float, name="nstart")
     p = G.dict_to_vector(personalization, dtype=float, name="personalization")
-    row_degrees = G._A.reduce_rowwise().new(name="row_degrees")  # XXX: What about self-edges?
-    # row_degrees = G.get_property('plus_rowwise+')  # Maybe?
+    row_degrees = G.get_property("plus_rowwise+")  # XXX: What about self-edges?
     if dangling is not None and row_degrees.nvals < N:
         dangling_weights = G.dict_to_vector(dangling, dtype=float, name="dangling")
     else:
diff --git a/graphblas_algorithms/classes/_caching.py b/graphblas_algorithms/classes/_caching.py
@@ -0,0 +1,178 @@
+from graphblas import agg, op, operator
+
+
+def get_reduce_to_vector(key, opname, methodname):
+    try:
+        op_ = op.from_string(opname)
+    except ValueError:
+        op_ = agg.from_string(opname)
+    op_, opclass = operator.find_opclass(op_)
+    keybase = key[:-1]
+    if key[-1] == "-":
+
+        def get_reduction(G, mask=None):
+            cache = G._cache
+            if mask is not None:
+                if key in cache:
+                    return cache[key].dup(mask=mask)
+                elif cache.get("has_self_edges") is False and f"{keybase}+" in cache:
+                    cache[key] = cache[f"{keybase}+"]
+                    return cache[key].dup(mask=mask)
+                elif "offdiag" in cache:
+                    return getattr(cache["offdiag"], methodname)(op_).new(mask=mask, name=key)
+                elif (
+                    "L-" in cache
+                    and "U-" in cache
+                    and opclass in {"BinaryOp", "Monoid"}
+                    and G.get_property("has_self_edges")
+                ):
+                    return op_(
+                        getattr(cache["L-"], methodname)(op_).new(mask=mask)
+                        | getattr(cache["U-"], methodname)(op_).new(mask=mask)
+                    ).new(name=key)
+                elif not G.get_property("has_self_edges"):
+                    return G.get_property(f"{keybase}+", mask=mask)
+                else:
+                    return getattr(G.get_property("offdiag"), methodname)(op_).new(
+                        mask=mask, name=key
+                    )
+            if key not in cache:
+                if cache.get("has_self_edges") is False and f"{keybase}+" in cache:
+                    cache[key] = cache[f"{keybase}+"]
+                elif "offdiag" in cache:
+                    cache[key] = getattr(cache["offdiag"], methodname)(op_).new(name=key)
+                elif (
+                    "L-" in cache
+                    and "U-" in cache
+                    and opclass in {"BinaryOp", "Monoid"}
+                    and G.get_property("has_self_edges")
+                ):
+                    cache[key] = op_(
+                        getattr(cache["L-"], methodname)(op_)
+                        | getattr(cache["U-"], methodname)(op_)
+                    ).new(name=key)
+                elif not G.get_property("has_self_edges"):
+                    cache[key] = G.get_property(f"{keybase}+")
+                else:
+                    cache[key] = getattr(G.get_property("offdiag"), methodname)(op_).new(name=key)
+            if (
+                "has_self_edges" not in cache
+                and f"{keybase}+" in cache
+                and cache[key].nvals != cache[f"{keybase}+"].nvals
+            ):
+                cache["has_self_edges"] = True
+            elif cache.get("has_self_edges") is False:
+                cache[f"{keybase}+"] = cache[key]
+            return cache[key]
+
+    else:
+
+        def get_reduction(G, mask=None):
+            A = G._A
+            cache = G._cache
+            if mask is not None:
+                if key in cache:
+                    return cache[key].dup(mask=mask)
+                elif cache.get("has_self_edges") is False and f"{keybase}-" in cache:
+                    cache[key] = cache[f"{keybase}-"]
+                    return cache[key].dup(mask=mask)
+                elif methodname == "reduce_columnwise" and "AT" in cache:
+                    return cache["AT"].reduce_rowwise(op_).new(mask=mask, name=key)
+                else:
+                    return getattr(A, methodname)(op_).new(mask=mask, name=key)
+            if key not in cache:
+                if cache.get("has_self_edges") is False and f"{keybase}-" in cache:
+                    cache[key] = cache[f"{keybase}-"]
+                elif methodname == "reduce_columnwise" and "AT" in cache:
+                    cache[key] = cache["AT"].reduce_rowwise(op_).new(name=key)
+                else:
+                    cache[key] = getattr(A, methodname)(op_).new(name=key)
+            if (
+                "has_self_edges" not in cache
+                and f"{keybase}-" in cache
+                and cache[key].nvals != cache[f"{keybase}-"].nvals
+            ):
+                cache["has_self_edges"] = True
+            elif cache.get("has_self_edges") is False:
+                cache[f"{keybase}-"] = cache[key]
+            return cache[key]
+
+    return get_reduction
+
+
+def get_reduce_to_scalar(key, opname):
+    try:
+        op_ = op.from_string(opname)
+    except ValueError:
+        op_ = agg.from_string(opname)
+    op_, opclass = operator.find_opclass(op_)
+    keybase = key[:-1]
+    if key[-1] == "-":
+
+        def get_reduction(G, mask=None):
+            cache = G._cache
+            if key not in cache:
+                if cache.get("has_self_edges") is False and f"{keybase}+" in cache:
+                    cache[key] = cache[f"{keybase}+"]
+                elif f"{opname}_rowwise-" in cache:
+                    cache[key] = cache[f"{opname}_rowwise-"].reduce(op_).new(name=key)
+                elif f"{opname}_columnwise-" in cache:
+                    cache[key] = cache[f"{opname}_columnwise-"].reduce(op_).new(name=key)
+                elif cache.get("has_self_edges") is False and f"{opname}_rowwise+" in cache:
+                    cache[key] = cache[f"{opname}_rowwise+"].reduce(op_).new(name=key)
+                elif cache.get("has_self_edges") is False and f"{opname}_columnwise+" in cache:
+                    cache[key] = cache[f"{opname}_columnwise+"].reduce(op_).new(name=key)
+                elif "offdiag" in cache:
+                    cache[key] = cache["offdiag"].reduce_scalar(op_).new(name=key)
+                elif (
+                    "L-" in cache
+                    and "U-" in cache
+                    and opclass in {"BinaryOp", "Monoid"}
+                    and G.get_property("has_self_edges")
+                ):
+                    return op_(
+                        cache["L-"].reduce(op_)._as_vector() | cache["U-"].reduce(op_)._as_vector()
+                    )[0].new(name=key)
+                elif not G.get_property("has_self_edges"):
+                    cache[key] = G.get_property(f"{keybase}+")
+                else:
+                    cache[key] = G.get_property("offdiag").reduce_scalar(op_).new(name=key)
+            if (
+                "has_self_edges" not in cache
+                and f"{keybase}+" in cache
+                and cache[key] != cache[f"{keybase}+"]
+            ):
+                cache["has_self_edges"] = True
+            elif cache.get("has_self_edges") is False:
+                cache[f"{keybase}+"] = cache[key]
+            return cache[key]
+
+    else:
+
+        def get_reduction(G, mask=None):
+            A = G._A
+            cache = G._cache
+            if key not in cache:
+                if cache.get("has_self_edges") is False and f"{keybase}-" in cache:
+                    cache[key] = cache[f"{keybase}-"]
+                elif f"{opname}_rowwise+" in cache:
+                    cache[key] = cache[f"{opname}_rowwise+"].reduce(op_).new(name=key)
+                elif f"{opname}_columnwise+" in cache:
+                    cache[key] = cache[f"{opname}_columnwise+"].reduce(op_).new(name=key)
+                elif cache.get("has_self_edges") is False and f"{opname}_rowwise-" in cache:
+                    cache[key] = cache[f"{opname}_rowwise-"].reduce(op_).new(name=key)
+                elif cache.get("has_self_edges") is False and f"{opname}_columnwise-" in cache:
+                    cache[key] = cache[f"{opname}_columnwise-"].reduce(op_).new(name=key)
+                else:
+                    cache[key] = A.reduce_scalar(op_).new(name=key)
+            if (
+                "has_self_edges" not in cache
+                and f"{keybase}-" in cache
+                and cache[key] != cache[f"{keybase}-"]
+            ):
+                cache["has_self_edges"] = True
+            elif cache.get("has_self_edges") is False:
+                cache[f"{keybase}-"] = cache[key]
+            return cache[key]
+
+    return get_reduction
diff --git a/graphblas_algorithms/classes/_utils.py b/graphblas_algorithms/classes/_utils.py
@@ -23,15 +23,21 @@ def from_graphblas(cls, A):
 
 
 def get_property(self, name, *, mask=None):
-    return self._get_property[name](self._A, self._cache, mask)
+    return self._get_property[self._cache_aliases.get(name, name)](self, mask)
 
 
 def get_properties(self, names, *, mask=None):
     if isinstance(names, str):
         # Separated by commas and/or spaces
-        names = [name for name in names.replace(" ", ",").split(",") if name]
+        names = [
+            self._cache_aliases.get(name, name)
+            for name in names.replace(" ", ",").split(",")
+            if name
+        ]
+    else:
+        names = [self._cache_aliases.get(name, name) for name in names]
     results = {
-        name: self._get_property[name](self._A, self._cache, mask)
+        name: self._get_property[name](self, mask)
         for name in sorted(names, key=self._property_priority.__getitem__)
     }
     return [results[name] for name in names]
@@ -70,3 +76,9 @@ def vector_to_dict(self, v, *, mask=None, fillvalue=None):
     elif fillvalue is not None and v.nvals < v.size:
         v(mask=~v.S) << fillvalue
     return {self._id_to_key[index]: value for index, value in zip(*v.to_values(sort=False))}
+
+
+def _cacheit(self, key, func, *args, **kwargs):
+    if key not in self._cache:
+        self._cache[key] = func(*args, **kwargs)
+    return self._cache[key]
diff --git a/graphblas_algorithms/classes/digraph.py b/graphblas_algorithms/classes/digraph.py
diff --git a/graphblas_algorithms/classes/graph.py b/graphblas_algorithms/classes/graph.py