[analyse] speed up Query analysis for solutions (closes #88559) stable
authorPierre-Yves David <pierre-yves.david@logilab.fr>
Mon, 20 Feb 2012 11:06:12 +0100
branchstable
changeset 684 2a175b352c78
parent 683 bbf7913bcca9
child 685 dd39f2652127
[analyse] speed up Query analysis for solutions (closes #88559) Use set instead of list for domains. This remove some N² complexity. Simplify the visite_relation code to: * avoid computation duplication, * compare string instead of Yams object, * take advantage of set.
ChangeLog
analyze.py
--- a/ChangeLog	Fri Feb 03 17:55:35 2012 +0100
+++ b/ChangeLog	Mon Feb 20 11:06:12 2012 +0100
@@ -1,6 +1,10 @@
 ChangeLog for RQL
 =================
 
+--
+* #88559: speed up query solutions analysis
+
+
 2012-02-03  --  0.31.1
     * #87988: fixed bug in simplify with sub-queries
 
--- a/analyze.py	Fri Feb 03 17:55:35 2012 +0100
+++ b/analyze.py	Mon Feb 20 11:06:12 2012 +0100
@@ -316,9 +316,9 @@
     def set_schema(self, schema):
         self.schema = schema
         # default domains for a variable
-        self._base_domain = [str(etype) for etype in schema.entities()]
-        self._nonfinal_domain = [str(etype) for etype in schema.entities()
-                                 if not etype.final]
+        self._base_domain = set(str(etype) for etype in schema.entities())
+        self._nonfinal_domain = set(str(etype) for etype in schema.entities()
+                                    if not etype.final)
 
     def solve(self, node, constraints):
         # debug info
@@ -339,7 +339,11 @@
         node.set_possible_types(sols, self.kwargs, self.var_solkey)
 
     def _visit(self, node, constraints=None):
-        """Recurse down the tree."""
+        """Recurse down the tree.
+
+            * node: rql node to process
+            * constraints: a XxxCSPProblem object.
+        """
         func = getattr(self, 'visit_%s' % node.__class__.__name__.lower())
         if constraints is None:
             func(node)
@@ -505,17 +509,21 @@
             # filter according to domain necessary for column aliases
             rhsdomain = constraints.domains[rhsvar]
             res = []
-            for fromtype, totypes in rschema.associations():
-                if not fromtype in lhsdomain:
-                    continue
-                ptypes = [str(t) for t in totypes if t in rhsdomain]
-                res.append( [ ([lhsvar], [str(fromtype)]),
-                              ([rhsvar], ptypes) ] )
+            var_types = []
+            same_var = (rhsvar == lhsvar)
+
+            for frometype, toetypes in rschema.associations():
+                fromtype = str(frometype)
+                if fromtype in lhsdomain:
+                    totypes = set(str(t) for t in toetypes)
+                    ptypes = totypes & rhsdomain
+                    res.append( [ ([lhsvar], [str(fromtype)]),
+                                  ([rhsvar], list(ptypes)) ] )
+                    if same_var and (fromtype in totypes): #ptypes ?
+                        var_types.append(fromtype)
             constraints.or_and(res)
-            if rhsvar == lhsvar:
-                res = [str(fromtype) for fromtype, totypes in rschema.associations()
-                       if (fromtype in totypes and fromtype in lhsdomain)]
-                constraints.var_has_types( lhsvar, res )
+            if same_var:
+                constraints.var_has_types( lhsvar, var_types)
         else:
             # XXX consider rhs.get_type?
             lhsdomain = constraints.domains[lhs.name]