rm CondaPkg environment

This commit is contained in:
ton
2023-04-06 13:53:47 +07:00
parent 0a57ed7884
commit c43d949309
3329 changed files with 5725 additions and 447022 deletions

View File

@@ -16,9 +16,8 @@ alternative GED algorithms, in order to improve the choices available.
import math
import time
import warnings
from functools import reduce
from dataclasses import dataclass
from itertools import product
from operator import mul
import networkx as nx
@@ -187,7 +186,7 @@ def graph_edit_distance(
"""
bestcost = None
for vertex_path, edge_path, cost in optimize_edit_paths(
for _, _, cost in optimize_edit_paths(
G1,
G2,
node_match,
@@ -346,7 +345,7 @@ def optimal_edit_paths(
https://hal.archives-ouvertes.fr/hal-01168816
"""
paths = list()
paths = []
bestcost = None
for vertex_path, edge_path, cost in optimize_edit_paths(
G1,
@@ -364,7 +363,7 @@ def optimal_edit_paths(
):
# assert bestcost is None or cost <= bestcost
if bestcost is not None and cost < bestcost:
paths = list()
paths = []
paths.append((vertex_path, edge_path))
bestcost = cost
return paths, bestcost
@@ -503,7 +502,7 @@ def optimize_graph_edit_distance(
<10.5220/0005209202710278>. <hal-01168816>
https://hal.archives-ouvertes.fr/hal-01168816
"""
for vertex_path, edge_path, cost in optimize_edit_paths(
for _, _, cost in optimize_edit_paths(
G1,
G2,
node_match,
@@ -672,18 +671,12 @@ def optimize_edit_paths(
import scipy as sp
import scipy.optimize # call as sp.optimize
@dataclass
class CostMatrix:
def __init__(self, C, lsa_row_ind, lsa_col_ind, ls):
# assert C.shape[0] == len(lsa_row_ind)
# assert C.shape[1] == len(lsa_col_ind)
# assert len(lsa_row_ind) == len(lsa_col_ind)
# assert set(lsa_row_ind) == set(range(len(lsa_row_ind)))
# assert set(lsa_col_ind) == set(range(len(lsa_col_ind)))
# assert ls == C[lsa_row_ind, lsa_col_ind].sum()
self.C = C
self.lsa_row_ind = lsa_row_ind
self.lsa_col_ind = lsa_col_ind
self.ls = ls
C: ...
lsa_row_ind: ...
lsa_col_ind: ...
ls: ...
def make_CostMatrix(C, m, n):
# assert(C.shape == (m + n, m + n))
@@ -694,9 +687,9 @@ def optimize_edit_paths(
# NOTE: fast reduce of Cv relies on it
# assert len(lsa_row_ind) == len(lsa_col_ind)
indexes = zip(range(len(lsa_row_ind)), lsa_row_ind, lsa_col_ind)
subst_ind = list(k for k, i, j in indexes if i < m and j < n)
subst_ind = [k for k, i, j in indexes if i < m and j < n]
indexes = zip(range(len(lsa_row_ind)), lsa_row_ind, lsa_col_ind)
dummy_ind = list(k for k, i, j in indexes if i >= m and j >= n)
dummy_ind = [k for k, i, j in indexes if i >= m and j >= n]
# assert len(subst_ind) == len(dummy_ind)
lsa_row_ind[dummy_ind] = lsa_col_ind[subst_ind] + m
lsa_col_ind[dummy_ind] = lsa_row_ind[subst_ind] + n
@@ -724,7 +717,7 @@ def optimize_edit_paths(
rind[rind >= k] -= 1
return rind
def match_edges(u, v, pending_g, pending_h, Ce, matched_uv=[]):
def match_edges(u, v, pending_g, pending_h, Ce, matched_uv=None):
"""
Parameters:
u, v: matched vertices, u=None or v=None for
@@ -748,7 +741,10 @@ def optimize_edit_paths(
# only attempt to match edges after one node match has been made
# this will stop self-edges on the first node being automatically deleted
# even when a substitution is the better option
if matched_uv:
if matched_uv is None or len(matched_uv) == 0:
g_ind = []
h_ind = []
else:
g_ind = [
i
for i in range(M)
@@ -765,9 +761,6 @@ def optimize_edit_paths(
pending_h[j][:2] in ((q, v), (v, q), (q, q)) for p, q in matched_uv
)
]
else:
g_ind = []
h_ind = []
m = len(g_ind)
n = len(h_ind)
@@ -778,9 +771,9 @@ def optimize_edit_paths(
# Forbid structurally invalid matches
# NOTE: inf remembered from Ce construction
for k, i in zip(range(m), g_ind):
for k, i in enumerate(g_ind):
g = pending_g[i][:2]
for l, j in zip(range(n), h_ind):
for l, j in enumerate(h_ind):
h = pending_h[j][:2]
if nx.is_directed(G1) or nx.is_directed(G2):
if any(
@@ -801,14 +794,14 @@ def optimize_edit_paths(
C[k, l] = inf
localCe = make_CostMatrix(C, m, n)
ij = list(
ij = [
(
g_ind[k] if k < m else M + h_ind[l],
h_ind[l] if l < n else N + g_ind[k],
)
for k, l in zip(localCe.lsa_row_ind, localCe.lsa_col_ind)
if k < m or l < n
)
]
else:
ij = []
@@ -822,8 +815,7 @@ def optimize_edit_paths(
m_i = m - sum(1 for t in i if t < m)
n_j = n - sum(1 for t in j if t < n)
return make_CostMatrix(reduce_C(Ce.C, i, j, m, n), m_i, n_j)
else:
return Ce
return Ce
def get_edit_ops(
matched_uv, pending_u, pending_v, Cv, pending_g, pending_h, Ce, matched_cost
@@ -881,7 +873,7 @@ def optimize_edit_paths(
yield (i, j), Cv_ij, xy, Ce_xy, Cv.C[i, j] + localCe.ls
# 2) other candidates, sorted by lower-bound cost estimate
other = list()
other = []
fixed_i, fixed_j = i, j
if m <= n:
candidates = (
@@ -982,8 +974,9 @@ def optimize_edit_paths(
# assert not len(pending_g)
# assert not len(pending_h)
# path completed!
# assert matched_cost <= maxcost.value
maxcost.value = min(maxcost.value, matched_cost)
# assert matched_cost <= maxcost_value
nonlocal maxcost_value
maxcost_value = min(maxcost_value, matched_cost)
yield matched_uv, matched_gh, matched_cost
else:
@@ -1016,16 +1009,16 @@ def optimize_edit_paths(
pending_h[y] if y < len_h else None,
)
)
sortedx = list(sorted(x for x, y in xy))
sortedy = list(sorted(y for x, y in xy))
G = list(
sortedx = sorted(x for x, y in xy)
sortedy = sorted(y for x, y in xy)
G = [
(pending_g.pop(x) if x < len(pending_g) else None)
for x in reversed(sortedx)
)
H = list(
]
H = [
(pending_h.pop(y) if y < len(pending_h) else None)
for y in reversed(sortedy)
)
]
yield from get_edit_paths(
matched_uv,
@@ -1051,7 +1044,7 @@ def optimize_edit_paths(
for y, h in zip(sortedy, reversed(H)):
if h is not None:
pending_h.insert(y, h)
for t in xy:
for _ in xy:
matched_gh.pop()
# Initialization
@@ -1167,13 +1160,7 @@ def optimize_edit_paths(
# debug_print(Ce.C)
# debug_print()
class MaxCost:
def __init__(self):
# initial upper-bound estimate
# NOTE: should work for empty graph
self.value = Cv.C.sum() + Ce.C.sum() + 1
maxcost = MaxCost()
maxcost_value = Cv.C.sum() + Ce.C.sum() + 1
if timeout is not None:
if timeout <= 0:
@@ -1187,10 +1174,11 @@ def optimize_edit_paths(
if upper_bound is not None:
if cost > upper_bound:
return True
if cost > maxcost.value:
if cost > maxcost_value:
return True
elif strictly_decreasing and cost >= maxcost.value:
if strictly_decreasing and cost >= maxcost_value:
return True
return False
# Now go!
@@ -1204,7 +1192,7 @@ def optimize_edit_paths(
# assert sorted(G1.edges) == sorted(g for g, h in edge_path if g is not None)
# assert sorted(G2.edges) == sorted(h for g, h in edge_path if h is not None)
# print(vertex_path, edge_path, cost, file = sys.stderr)
# assert cost == maxcost.value
# assert cost == maxcost_value
yield list(vertex_path), list(edge_path), cost
@@ -1324,9 +1312,9 @@ def simrank_similarity(
if isinstance(x, np.ndarray):
if x.ndim == 1:
return {node: val for node, val in zip(G, x)}
else: # x.ndim == 2:
return {u: dict(zip(G, row)) for u, row in zip(G, x)}
return dict(zip(G, x))
# else x.ndim == 2
return {u: dict(zip(G, row)) for u, row in zip(G, x)}
return x
@@ -1596,7 +1584,7 @@ def panther_similarity(G, source, k=5, path_length=5, c=0.5, delta=0.1, eps=None
top_k_sorted = top_k_unsorted[np.argsort(S[top_k_unsorted])][::-1]
# Add back the similarity scores
top_k_sorted_names = map(lambda n: node_map[n], top_k_sorted)
top_k_sorted_names = (node_map[n] for n in top_k_sorted)
top_k_with_val = dict(zip(top_k_sorted_names, S[top_k_sorted]))
# Remove the self-similarity