faster first time parse generator

* Generating the set of valid unicode characters is expensive in terms of memory storage. Instead, we define the grammer by the negation of allowed unicode characters. Much faster.
2026-02-27 15:58:45 -03:30 · 2017-04-03 12:43:27 -04:00
parent a8213661fd
commit 6cbdb8d0e6
2 changed files with 14 additions and 10 deletions
--- a/awx/main/fields.py
+++ b/awx/main/fields.py
@@ -4,7 +4,8 @@
 # Python
 import json
 import re
-from pyparsing import infixNotation, opAssoc, Word, Optional, Literal
+import sys
+from pyparsing import infixNotation, opAssoc, Word, Optional, Literal, CharsNotIn

 # Django
 from django.db.models.signals import (
@@ -298,11 +299,8 @@ class ImplicitRoleField(models.ForeignKey):
        Role.rebuild_role_ancestor_list([], child_ids)


-
-unicode_printables = u''.join(unichr(c) for c in xrange(65536) if not unichr(c).isspace())
-unicode_printables += u'.'
-unicode_printables_spaces = u''.join(unichr(c) for c in xrange(65536))
-
+unicode_spaces = [unichr(c) for c in xrange(sys.maxunicode) if unichr(c).isspace()]
+unicode_spaces_other = unicode_spaces + [u'(', u')', u'=', u'"']

 def string_to_type(t):
    if t == 'true':
@@ -461,8 +459,8 @@ class DynamicFilterField(models.TextField):
        * handle optional value quoted: a.b.c=""

        '''
-        atom = Word(unicode_printables, excludeChars=['(', ')', '=', '"'])
-        atom_inside_quotes = Word(unicode_printables_spaces, excludeChars=['"'])
+        atom = CharsNotIn(unicode_spaces_other)
+        atom_inside_quotes = CharsNotIn(u'"')
        atom_quoted = Literal('"') + Optional(atom_inside_quotes) + Literal('"')
        EQUAL = Literal('=')

--- a/awx/main/tests/unit/test_fields.py
+++ b/awx/main/tests/unit/test_fields.py
@@ -32,10 +32,16 @@ class TestDynamicFilterFieldFilterStringToQ():
        'a__b__c__ space  =ggg',
    ])
    def test_invalid_filter_strings(self, filter_string):
-
        with pytest.raises(ParseException):
            DynamicFilterField.filter_string_to_q(filter_string)
-    
+
+    @pytest.mark.parametrize("filter_string,q_expected", [
+        (u'(a=abc\u1F5E3def)', Q(**{u"a": u"abc\u1F5E3def"})),
+    ])
+    def test_unicode(self, filter_string, q_expected):
+        q = DynamicFilterField.filter_string_to_q(filter_string)
+        assert str(q) == str(q_expected)
+
    @pytest.mark.parametrize("filter_string,q_expected", [
        ('(a=b)', Q(**{"a": "b"})),
        ('a=b and c=d', Q(**{"a": "b"}) & Q(**{"c": "d"})),