normalize NOT to NOT EXISTS when that's the actual meaning of the query. Ease later scope handling. stable
authorSylvain Thénault <sylvain.thenault@logilab.fr>
Wed, 26 May 2010 09:51:30 +0200
branchstable
changeset 541 65393ec1836a
parent 540 fcce1537edb3
child 542 28c79b964198
child 549 5d50dfb146a1
normalize NOT to NOT EXISTS when that's the actual meaning of the query. Ease later scope handling.
ChangeLog
base.py
nodes.py
stcheck.py
stmts.py
test/unittest_analyze.py
test/unittest_stcheck.py
--- a/ChangeLog	Tue May 25 14:12:52 2010 +0200
+++ b/ChangeLog	Wed May 26 09:51:30 2010 +0200
@@ -1,6 +1,10 @@
 ChangeLog for RQL
 =================
 
+--
+    * normalize NOT() to NOT EXISTS() when it makes sense
+
+
 2010-04-20  --  0.26.0
     * setuptools support
 
--- a/base.py	Tue May 25 14:12:52 2010 +0200
+++ b/base.py	Wed May 26 09:51:30 2010 +0200
@@ -57,13 +57,6 @@
         """
         return self.parent.scope
 
-    @property
-    def sqlscope(self):
-        """Return the SQL scope node to which this node belong (eg Select,
-        Exists or Not node)
-        """
-        return self.parent.sqlscope
-
     def get_nodes(self, klass):
         """Return the list of nodes of a given class in the subtree.
 
--- a/nodes.py	Tue May 25 14:12:52 2010 +0200
+++ b/nodes.py	Wed May 26 09:51:30 2010 +0200
@@ -268,10 +268,6 @@
     def __repr__(self, encoding=None, kwargs=None):
         return 'NOT (%s)' % repr(self.children[0])
 
-    @property
-    def sqlscope(self):
-        return self
-
     def ored(self, traverse_scope=False, _fromnode=None):
         # XXX consider traverse_scope ?
         return self.parent.ored(traverse_scope, _fromnode or self)
@@ -338,7 +334,6 @@
     @property
     def scope(self):
         return self
-    sqlscope = scope
 
     def ored(self, traverse_scope=False, _fromnode=None):
         if not traverse_scope:
@@ -1000,8 +995,6 @@
     def get_scope(self):
         return self.query
     scope = property(get_scope, set_scope)
-    sqlscope = scope
-    set_sqlscope = set_scope
 
 
 class Variable(Referenceable):
@@ -1029,7 +1022,6 @@
     def prepare_annotation(self):
         super(Variable, self).prepare_annotation()
         self.stinfo['scope'] = None
-        self.stinfo['sqlscope'] = None
 
     def _set_scope(self, key, scopenode):
         if scopenode is self.stmt or self.stinfo[key] is None:
@@ -1043,12 +1035,6 @@
         return self.stinfo['scope']
     scope = property(get_scope, set_scope)
 
-    def set_sqlscope(self, sqlscopenode):
-        self._set_scope('sqlscope', sqlscopenode)
-    def get_sqlscope(self):
-        return self.stinfo['sqlscope']
-    sqlscope = property(get_sqlscope, set_sqlscope)
-
     def valuable_references(self):
         """return the number of "valuable" references :
         references is in selection or in a non type (is) relations
--- a/stcheck.py	Tue May 25 14:12:52 2010 +0200
+++ b/stcheck.py	Wed May 26 09:51:30 2010 +0200
@@ -15,9 +15,8 @@
 #
 # You should have received a copy of the GNU Lesser General Public License along
 # with rql. If not, see <http://www.gnu.org/licenses/>.
-"""RQL Syntax tree annotator.
+"""RQL Syntax tree annotator"""
 
-"""
 __docformat__ = "restructuredtext en"
 
 from itertools import chain
@@ -27,7 +26,7 @@
 
 from rql._exceptions import BadRQLQuery
 from rql.utils import function_description
-from rql.nodes import (VariableRef, Constant, Not, Exists, Function,
+from rql.nodes import (Relation, VariableRef, Constant, Not, Exists, Function,
                        Variable, variable_refs)
 from rql.stmts import Union
 
@@ -310,6 +309,37 @@
         state.under_not.append(True)
     def leave_not(self, not_, state):
         state.under_not.pop()
+        # NOT normalization
+        child = not_.children[0]
+        if self._should_wrap_by_exists(child):
+            not_.remove(child)
+            not_.append(Exists(child))
+
+    def _should_wrap_by_exists(self, child):
+        if isinstance(child, Exists):
+            return False
+        if not isinstance(child, Relation):
+            return True
+        if child.r_type == 'identity':
+            return False
+        rschema = self.schema.rschema(child.r_type)
+        if rschema.final:
+            return False
+        # XXX no exists for `inlined` relation (allow IS NULL optimization)
+        # unless the lhs variable is only referenced from this neged relation,
+        # in which case it's *not* in the statement's scope, hence EXISTS should
+        # be added anyway
+        if rschema.inlined:
+            references = child.children[0].variable.references()
+            valuable = 0
+            for vref in references:
+                rel = vref.relation()
+                if rel is None or not rel.is_types_restriction():
+                    if valuable:
+                        return False
+                    valuable = 1
+            return True
+        return not child.is_types_restriction()
 
     def visit_relation(self, relation, state):
         if relation.optional and state.under_not:
@@ -349,6 +379,7 @@
                                     '(use IN for %s if desired)' % lhsvar.name)
                     else:
                         state.add_var_info(lhsvar, VAR_HAS_UID_REL)
+
             for vref in relation.children[1].get_nodes(VariableRef):
                 state.add_var_info(vref.variable, VAR_HAS_REL)
         try:
@@ -454,9 +485,8 @@
             for vref in term.get_nodes(VariableRef):
                 vref.variable.stinfo['selected'].add(i)
                 vref.variable.set_scope(node)
-                vref.variable.set_sqlscope(node)
         if node.where is not None:
-            node.where.accept(self, node, node)
+            node.where.accept(self, node)
 
     visit_insert = visit_delete = visit_set = _visit_stmt
 
@@ -538,23 +568,23 @@
                 sol[newvar.name] = sol[var.name]
         rel = exists.add_relation(var, 'identity', newvar)
         # we have to force visit of the introduced relation
-        self.visit_relation(rel, exists, exists)
+        self.visit_relation(rel, exists)
         return newvar
 
     # tree nodes ##############################################################
 
-    def visit_exists(self, node, scope, sqlscope):
-        node.children[0].accept(self, node, node)
+    def visit_exists(self, node, scope):
+        node.children[0].accept(self, node)
 
-    def visit_not(self, node, scope, sqlscope):
-        node.children[0].accept(self, scope, node)
+    def visit_not(self, node, scope):
+        node.children[0].accept(self, scope)
 
-    def visit_and(self, node, scope, sqlscope):
-        node.children[0].accept(self, scope, sqlscope)
-        node.children[1].accept(self, scope, sqlscope)
+    def visit_and(self, node, scope):
+        node.children[0].accept(self, scope)
+        node.children[1].accept(self, scope)
     visit_or = visit_and
 
-    def visit_relation(self, relation, scope, sqlscope):
+    def visit_relation(self, relation, scope):
         #assert relation.parent, repr(relation)
         lhs, rhs = relation.get_parts()
         # may be a constant once rqlst has been simplified
@@ -591,7 +621,6 @@
         rschema = self.schema.rschema(rtype)
         if lhsvar is not None:
             lhsvar.set_scope(scope)
-            lhsvar.set_sqlscope(sqlscope)
             lhsvar.stinfo['relations'].add(relation)
             if rtype in self.special_relations:
                 key = '%srels' % self.special_relations[rtype]
@@ -609,7 +638,6 @@
         for vref in rhs.get_nodes(VariableRef):
             var = vref.variable
             var.set_scope(scope)
-            var.set_sqlscope(sqlscope)
             var.stinfo['relations'].add(relation)
             var.stinfo['rhsrelations'].add(relation)
             if vref is rhs.children[0] and rschema.final:
--- a/stmts.py	Tue May 25 14:12:52 2010 +0200
+++ b/stmts.py	Wed May 26 09:51:30 2010 +0200
@@ -168,7 +168,6 @@
     @property
     def scope(self):
         return self
-    sqlscope = scope
 
     def ored(self, traverse_scope=False, _fromnode=None):
         return None
--- a/test/unittest_analyze.py	Tue May 25 14:12:52 2010 +0200
+++ b/test/unittest_analyze.py	Wed May 26 09:51:30 2010 +0200
@@ -301,7 +301,7 @@
                                 {'X': 'Student', 'T': 'Eetype'}])
 
     def test_not(self):
-        node = self.helper.parse('Any X WHERE not X is Person')
+        node = self.helper.parse('Any X WHERE NOT X is Person')
         self.helper.compute_solutions(node, debug=DEBUG)
         sols = sorted(node.children[0].solutions)
         expected = ALL_SOLS[:]
--- a/test/unittest_stcheck.py	Tue May 25 14:12:52 2010 +0200
+++ b/test/unittest_stcheck.py	Wed May 26 09:51:30 2010 +0200
@@ -226,7 +226,7 @@
         newroot.append(copy)
         self.annotate(newroot)
         self.simplify(newroot)
-        self.assertEquals(newroot.as_string(), 'Any 1,U WHERE 2 owned_by U, NOT 1 owned_by U')
+        self.assertEquals(newroot.as_string(), 'Any 1,U WHERE 2 owned_by U, NOT EXISTS(1 owned_by U)')
         self.assertEquals(copy.defined_vars['U'].valuable_references(), 3)
 
 
@@ -241,23 +241,60 @@
 #         self.annotate(rqlst)
 #         self.failUnless(rqlst.defined_vars['L'].stinfo['attrvar'])
 
-    def test_is_rel_no_scope(self):
-        """is relation used as type restriction should not affect variable's scope,
-        and should not be included in stinfo['relations']"""
+    def test_is_rel_no_scope_1(self):
+        """is relation used as type restriction should not affect variable's
+        scope, and should not be included in stinfo['relations']
+        """
         rqlst = self.parse('Any X WHERE C is Company, EXISTS(X work_for C)').children[0]
         C = rqlst.defined_vars['C']
         self.failIf(C.scope is rqlst, C.scope)
         self.assertEquals(len(C.stinfo['relations']), 1)
+
+    def test_is_rel_no_scope_2(self):
         rqlst = self.parse('Any X, ET WHERE C is ET, EXISTS(X work_for C)').children[0]
         C = rqlst.defined_vars['C']
         self.failUnless(C.scope is rqlst, C.scope)
         self.assertEquals(len(C.stinfo['relations']), 2)
 
-    def test_subquery_annotation(self):
+
+    def test_not_rel_normalization_1(self):
+        rqlst = self.parse('Any X WHERE C is Company, NOT X work_for C').children[0]
+        self.assertEquals(rqlst.as_string(), 'Any X WHERE C is Company, NOT EXISTS(X work_for C)')
+        C = rqlst.defined_vars['C']
+        self.failIf(C.scope is rqlst, C.scope)
+
+    def test_not_rel_normalization_2(self):
+        rqlst = self.parse('Any X, ET WHERE C is ET, NOT X work_for C').children[0]
+        self.assertEquals(rqlst.as_string(), 'Any X,ET WHERE C is ET, NOT EXISTS(X work_for C)')
+        C = rqlst.defined_vars['C']
+        self.failUnless(C.scope is rqlst, C.scope)
+
+    def test_not_rel_normalization_3(self):
+        rqlst = self.parse('Any X WHERE C is Company, X work_for C, NOT C name "World Company"').children[0]
+        self.assertEquals(rqlst.as_string(), "Any X WHERE C is Company, X work_for C, NOT C name 'World Company'")
+        C = rqlst.defined_vars['C']
+        self.failUnless(C.scope is rqlst, C.scope)
+
+    def test_not_rel_normalization_4(self):
+        rqlst = self.parse('Any X WHERE C is Company, NOT (X work_for C, C name "World Company")').children[0]
+        self.assertEquals(rqlst.as_string(), "Any X WHERE C is Company, NOT EXISTS(X work_for C, C name 'World Company')")
+        C = rqlst.defined_vars['C']
+        self.failIf(C.scope is rqlst, C.scope)
+
+    def test_not_rel_normalization_5(self):
+        rqlst = self.parse('Any X WHERE X work_for C, EXISTS(C identity D, NOT Y work_for D, D name "World Company")').children[0]
+        self.assertEquals(rqlst.as_string(), "Any X WHERE X work_for C, EXISTS(C identity D, NOT EXISTS(Y work_for D), D name 'World Company')")
+        D = rqlst.defined_vars['D']
+        self.failIf(D.scope is rqlst, D.scope)
+        self.failUnless(D.scope.parent.scope is rqlst, D.scope.parent.scope)
+
+    def test_subquery_annotation_1(self):
         rqlst = self.parse('Any X WITH X BEING (Any X WHERE C is Company, EXISTS(X work_for C))').children[0]
         C = rqlst.with_[0].query.children[0].defined_vars['C']
         self.failIf(C.scope is rqlst, C.scope)
         self.assertEquals(len(C.stinfo['relations']), 1)
+
+    def test_subquery_annotation_2(self):
         rqlst = self.parse('Any X,ET WITH X,ET BEING (Any X, ET WHERE C is ET, EXISTS(X work_for C))').children[0]
         C = rqlst.with_[0].query.children[0].defined_vars['C']
         self.failUnless(C.scope is rqlst.with_[0].query.children[0], C.scope)