rm CondaPkg environment

2023-04-06 13:53:47 +07:00
parent 0a57ed7884
commit c43d949309
3329 changed files with 5725 additions and 447022 deletions
--- a/.CondaPkg/env/Lib/site-packages/networkx/algorithms/similarity.py
+++ b/.CondaPkg/env/Lib/site-packages/networkx/algorithms/similarity.py
@@ -16,9 +16,8 @@ alternative GED algorithms, in order to improve the choices available.
 import math
 import time
 import warnings
-from functools import reduce
+from dataclasses import dataclass
 from itertools import product
-from operator import mul

 import networkx as nx

@@ -187,7 +186,7 @@ def graph_edit_distance(

    """
    bestcost = None
-    for vertex_path, edge_path, cost in optimize_edit_paths(
+    for _, _, cost in optimize_edit_paths(
        G1,
        G2,
        node_match,
@@ -346,7 +345,7 @@ def optimal_edit_paths(
       https://hal.archives-ouvertes.fr/hal-01168816

    """
-    paths = list()
+    paths = []
    bestcost = None
    for vertex_path, edge_path, cost in optimize_edit_paths(
        G1,
@@ -364,7 +363,7 @@ def optimal_edit_paths(
    ):
        # assert bestcost is None or cost <= bestcost
        if bestcost is not None and cost < bestcost:
-            paths = list()
+            paths = []
        paths.append((vertex_path, edge_path))
        bestcost = cost
    return paths, bestcost
@@ -503,7 +502,7 @@ def optimize_graph_edit_distance(
       <10.5220/0005209202710278>. <hal-01168816>
       https://hal.archives-ouvertes.fr/hal-01168816
    """
-    for vertex_path, edge_path, cost in optimize_edit_paths(
+    for _, _, cost in optimize_edit_paths(
        G1,
        G2,
        node_match,
@@ -672,18 +671,12 @@ def optimize_edit_paths(
    import scipy as sp
    import scipy.optimize  # call as sp.optimize

+    @dataclass
    class CostMatrix:
-        def __init__(self, C, lsa_row_ind, lsa_col_ind, ls):
-            # assert C.shape[0] == len(lsa_row_ind)
-            # assert C.shape[1] == len(lsa_col_ind)
-            # assert len(lsa_row_ind) == len(lsa_col_ind)
-            # assert set(lsa_row_ind) == set(range(len(lsa_row_ind)))
-            # assert set(lsa_col_ind) == set(range(len(lsa_col_ind)))
-            # assert ls == C[lsa_row_ind, lsa_col_ind].sum()
-            self.C = C
-            self.lsa_row_ind = lsa_row_ind
-            self.lsa_col_ind = lsa_col_ind
-            self.ls = ls
+        C: ...
+        lsa_row_ind: ...
+        lsa_col_ind: ...
+        ls: ...

    def make_CostMatrix(C, m, n):
        # assert(C.shape == (m + n, m + n))
@@ -694,9 +687,9 @@ def optimize_edit_paths(
        # NOTE: fast reduce of Cv relies on it
        # assert len(lsa_row_ind) == len(lsa_col_ind)
        indexes = zip(range(len(lsa_row_ind)), lsa_row_ind, lsa_col_ind)
-        subst_ind = list(k for k, i, j in indexes if i < m and j < n)
+        subst_ind = [k for k, i, j in indexes if i < m and j < n]
        indexes = zip(range(len(lsa_row_ind)), lsa_row_ind, lsa_col_ind)
-        dummy_ind = list(k for k, i, j in indexes if i >= m and j >= n)
+        dummy_ind = [k for k, i, j in indexes if i >= m and j >= n]
        # assert len(subst_ind) == len(dummy_ind)
        lsa_row_ind[dummy_ind] = lsa_col_ind[subst_ind] + m
        lsa_col_ind[dummy_ind] = lsa_row_ind[subst_ind] + n
@@ -724,7 +717,7 @@ def optimize_edit_paths(
            rind[rind >= k] -= 1
        return rind

-    def match_edges(u, v, pending_g, pending_h, Ce, matched_uv=[]):
+    def match_edges(u, v, pending_g, pending_h, Ce, matched_uv=None):
        """
        Parameters:
            u, v: matched vertices, u=None or v=None for
@@ -748,7 +741,10 @@ def optimize_edit_paths(
        # only attempt to match edges after one node match has been made
        # this will stop self-edges on the first node being automatically deleted
        # even when a substitution is the better option
-        if matched_uv:
+        if matched_uv is None or len(matched_uv) == 0:
+            g_ind = []
+            h_ind = []
+        else:
            g_ind = [
                i
                for i in range(M)
@@ -765,9 +761,6 @@ def optimize_edit_paths(
                    pending_h[j][:2] in ((q, v), (v, q), (q, q)) for p, q in matched_uv
                )
            ]
-        else:
-            g_ind = []
-            h_ind = []

        m = len(g_ind)
        n = len(h_ind)
@@ -778,9 +771,9 @@ def optimize_edit_paths(

            # Forbid structurally invalid matches
            # NOTE: inf remembered from Ce construction
-            for k, i in zip(range(m), g_ind):
+            for k, i in enumerate(g_ind):
                g = pending_g[i][:2]
-                for l, j in zip(range(n), h_ind):
+                for l, j in enumerate(h_ind):
                    h = pending_h[j][:2]
                    if nx.is_directed(G1) or nx.is_directed(G2):
                        if any(
@@ -801,14 +794,14 @@ def optimize_edit_paths(
                    C[k, l] = inf

            localCe = make_CostMatrix(C, m, n)
-            ij = list(
+            ij = [
                (
                    g_ind[k] if k < m else M + h_ind[l],
                    h_ind[l] if l < n else N + g_ind[k],
                )
                for k, l in zip(localCe.lsa_row_ind, localCe.lsa_col_ind)
                if k < m or l < n
-            )
+            ]

        else:
            ij = []
@@ -822,8 +815,7 @@ def optimize_edit_paths(
            m_i = m - sum(1 for t in i if t < m)
            n_j = n - sum(1 for t in j if t < n)
            return make_CostMatrix(reduce_C(Ce.C, i, j, m, n), m_i, n_j)
-        else:
-            return Ce
+        return Ce

    def get_edit_ops(
        matched_uv, pending_u, pending_v, Cv, pending_g, pending_h, Ce, matched_cost
@@ -881,7 +873,7 @@ def optimize_edit_paths(
            yield (i, j), Cv_ij, xy, Ce_xy, Cv.C[i, j] + localCe.ls

        # 2) other candidates, sorted by lower-bound cost estimate
-        other = list()
+        other = []
        fixed_i, fixed_j = i, j
        if m <= n:
            candidates = (
@@ -982,8 +974,9 @@ def optimize_edit_paths(
            # assert not len(pending_g)
            # assert not len(pending_h)
            # path completed!
-            # assert matched_cost <= maxcost.value
-            maxcost.value = min(maxcost.value, matched_cost)
+            # assert matched_cost <= maxcost_value
+            nonlocal maxcost_value
+            maxcost_value = min(maxcost_value, matched_cost)
            yield matched_uv, matched_gh, matched_cost

        else:
@@ -1016,16 +1009,16 @@ def optimize_edit_paths(
                            pending_h[y] if y < len_h else None,
                        )
                    )
-                sortedx = list(sorted(x for x, y in xy))
-                sortedy = list(sorted(y for x, y in xy))
-                G = list(
+                sortedx = sorted(x for x, y in xy)
+                sortedy = sorted(y for x, y in xy)
+                G = [
                    (pending_g.pop(x) if x < len(pending_g) else None)
                    for x in reversed(sortedx)
-                )
-                H = list(
+                ]
+                H = [
                    (pending_h.pop(y) if y < len(pending_h) else None)
                    for y in reversed(sortedy)
-                )
+                ]

                yield from get_edit_paths(
                    matched_uv,
@@ -1051,7 +1044,7 @@ def optimize_edit_paths(
                for y, h in zip(sortedy, reversed(H)):
                    if h is not None:
                        pending_h.insert(y, h)
-                for t in xy:
+                for _ in xy:
                    matched_gh.pop()

    # Initialization
@@ -1167,13 +1160,7 @@ def optimize_edit_paths(
    # debug_print(Ce.C)
    # debug_print()

-    class MaxCost:
-        def __init__(self):
-            # initial upper-bound estimate
-            # NOTE: should work for empty graph
-            self.value = Cv.C.sum() + Ce.C.sum() + 1
-
-    maxcost = MaxCost()
+    maxcost_value = Cv.C.sum() + Ce.C.sum() + 1

    if timeout is not None:
        if timeout <= 0:
@@ -1187,10 +1174,11 @@ def optimize_edit_paths(
        if upper_bound is not None:
            if cost > upper_bound:
                return True
-        if cost > maxcost.value:
+        if cost > maxcost_value:
            return True
-        elif strictly_decreasing and cost >= maxcost.value:
+        if strictly_decreasing and cost >= maxcost_value:
            return True
+        return False

    # Now go!

@@ -1204,7 +1192,7 @@ def optimize_edit_paths(
        # assert sorted(G1.edges) == sorted(g for g, h in edge_path if g is not None)
        # assert sorted(G2.edges) == sorted(h for g, h in edge_path if h is not None)
        # print(vertex_path, edge_path, cost, file = sys.stderr)
-        # assert cost == maxcost.value
+        # assert cost == maxcost_value
        yield list(vertex_path), list(edge_path), cost


@@ -1324,9 +1312,9 @@ def simrank_similarity(

    if isinstance(x, np.ndarray):
        if x.ndim == 1:
-            return {node: val for node, val in zip(G, x)}
-        else:  # x.ndim == 2:
-            return {u: dict(zip(G, row)) for u, row in zip(G, x)}
+            return dict(zip(G, x))
+        # else x.ndim == 2
+        return {u: dict(zip(G, row)) for u, row in zip(G, x)}
    return x


@@ -1596,7 +1584,7 @@ def panther_similarity(G, source, k=5, path_length=5, c=0.5, delta=0.1, eps=None
    top_k_sorted = top_k_unsorted[np.argsort(S[top_k_unsorted])][::-1]

    # Add back the similarity scores
-    top_k_sorted_names = map(lambda n: node_map[n], top_k_sorted)
+    top_k_sorted_names = (node_map[n] for n in top_k_sorted)
    top_k_with_val = dict(zip(top_k_sorted_names, S[top_k_sorted]))

    # Remove the self-similarity