basic fact search grammar

* Establish a base grammar for handling json path specification and value matching. With boolean logic support and parenthesis grouping i.e. (a.b.c="value") and ((a.b="foo") or (a="bar")) * generate Q() results for passing to Host.objects.filter() * Hooked up via /api/v1/hosts?host_filter=... * DynamicFilterField added to store host filter as string with grammar parser attached as static methods for later use by DynamicInventory & continued use by host_filter=...
2026-05-20 07:17:40 -02:30 · 2017-03-30 17:00:19 -04:00
parent e512836580
commit a8213661fd
5 changed files with 286 additions and 3 deletions
--- a/awx/api/filters.py
+++ b/awx/api/filters.py
@@ -75,7 +75,7 @@ class FieldLookupBackend(BaseFilterBackend):
    '''
    RESERVED_NAMES = ('page', 'page_size', 'format', 'order', 'order_by',
-                      'search', 'type')
+                      'search', 'type', 'host_filter')
    SUPPORTED_LOOKUPS = ('exact', 'iexact', 'contains', 'icontains',
                         'startswith', 'istartswith', 'endswith', 'iendswith',
--- a/awx/api/views.py
+++ b/awx/api/views.py
@@ -78,6 +78,7 @@ from awx.api.metadata import RoleMetadata
 from awx.main.consumers import emit_channel_notification
 from awx.main.models.unified_jobs import ACTIVE_STATES
 from awx.main.scheduler.tasks import run_job_complete
 from awx.main.fields import DynamicFilterField
 logger = logging.getLogger('awx.api.views')
@@ -1701,6 +1702,14 @@ class HostList(ListCreateAPIView):
    serializer_class = HostSerializer
    capabilities_prefetch = ['inventory.admin']
    def get_queryset(self):
        qs = super(HostList, self).get_queryset()
        filter_string = self.request.query_params.get('host_filter', None)
        if filter_string:
            filter_q = DynamicFilterField.filter_string_to_q(filter_string)
            qs = qs.filter(filter_q)
        return qs
 class HostDetail(RetrieveUpdateDestroyAPIView):
--- a/awx/main/fields.py
+++ b/awx/main/fields.py
@@ -1,7 +1,10 @@
 # Copyright (c) 2015 Ansible, Inc.
 # All Rights Reserved.
 # Python
 import json
 import re
 from pyparsing import infixNotation, opAssoc, Word, Optional, Literal
 # Django
 from django.db.models.signals import (
@@ -18,6 +21,7 @@ from django.db.models.fields.related import (
    ReverseManyRelatedObjectsDescriptor,
 )
 from django.utils.encoding import smart_text
 from django.db.models import Q
 # Django-JSONField
 from jsonfield import JSONField as upstream_JSONField
@@ -27,7 +31,7 @@ from awx.main.models.rbac import batch_role_ancestor_rebuilding, Role
 from awx.main.utils import get_current_apps
-__all__ = ['AutoOneToOneField', 'ImplicitRoleField', 'JSONField']
+__all__ = ['AutoOneToOneField', 'ImplicitRoleField', 'JSONField', 'DynamicFilterField']
 class JSONField(upstream_JSONField):
@@ -292,3 +296,189 @@ class ImplicitRoleField(models.ForeignKey):
        child_ids = [x for x in Role_.parents.through.objects.filter(to_role_id__in=role_ids).distinct().values_list('from_role_id', flat=True)]
        Role_.objects.filter(id__in=role_ids).delete()
        Role.rebuild_role_ancestor_list([], child_ids)
 unicode_printables = u''.join(unichr(c) for c in xrange(65536) if not unichr(c).isspace())
 unicode_printables += u'.'
 unicode_printables_spaces = u''.join(unichr(c) for c in xrange(65536))
 def string_to_type(t):
    if t == 'true':
        return True
    elif t == 'false':
        return False
    if re.search('^[-+]?[0-9]+$',t):
        return int(t)
    if re.search('^[-+]?[0-9]+\.[0-9]+$',t):
        return float(t)
    return t
 class DynamicFilterField(models.TextField):
    class BoolOperand(object):
        def __init__(self, t):
            #print("Got t %s" % t)
            kwargs = dict()
            k, v = self._extract_key_value(t)
            k, v = self._json_path_to_contains(k, v)
            kwargs[k] = v
            self.result = Q(**kwargs)
        '''
        TODO: We should be able to express this in the grammar and let 
              pyparsing do the heavy lifting.
        TODO: separate django filter requests from our custom json filter
              request so we don't process the key any. This could be
              accomplished using a whitelist or introspecting the
              relationship refered to to see if it's a jsonb type.
        '''
        def _json_path_to_contains(self, k, v):
            pieces = k.split('__')
            flag_first_arr_found = False
            assembled_k = ''
            assembled_v = v
            last_kv = None
            last_v = None
            contains_count = 0
            for i, piece in enumerate(pieces):
                if flag_first_arr_found is False and piece.endswith('[]'):
                    assembled_k += '%s__contains' % (piece[0:-2])
                    contains_count += 1
                    flag_first_arr_found = True
                elif flag_first_arr_found is False and i == len(pieces) - 1:
                    assembled_k += '%s' % piece
                elif flag_first_arr_found is False:
                    assembled_k += '%s__' % piece
                elif flag_first_arr_found is True:
                    new_kv = dict()
                    if piece.endswith('[]'):
                        new_v = []
                        new_kv[piece[0:-2]] = new_v
                    else:
                        new_v = dict()
                        new_kv[piece] = new_v
                    if last_v is None:
                        last_v = []
                        assembled_v = last_v
                    if type(last_v) is list:
                        last_v.append(new_kv)
                    elif type(last_v) is dict:
                        last_kv[last_kv.keys()[0]] = new_kv
                    last_v = new_v
                    last_kv = new_kv
                    contains_count += 1
            if contains_count > 1:
                if type(last_v) is list:
                    last_v.append(v)
                if type(last_v) is dict:
                    last_kv[last_kv.keys()[0]] = v
            return (assembled_k, assembled_v)
        def _extract_key_value(self, t):
            t_len = len(t)
            k = None
            v = None
            # key
            # "something"=
            v_offset = 2
            if t_len >= 2 and t[0] == "\"" and t[2] == "\"":
                k = t[1]
                v_offset = 4
            # something=
            else:
                k = t[0]
            # value
            # ="something"
            if t_len > (v_offset + 2) and t[v_offset] == "\"" and t[v_offset + 2] == "\"":
                v = t[v_offset + 1]
            # empty ""
            elif t_len > (v_offset + 1):
                v = ""
            # no ""
            else:
                v = string_to_type(t[v_offset])
            return (k, v)
    class BoolBinOp(object):
        def __init__(self, t):
            self.left = t[0][0].result
            self.right = t[0][2].result
            self.result = self.execute_logic(self.left, self.right)
    class BoolAnd(BoolBinOp):
        def execute_logic(self, left, right):
            return left & right
    class BoolOr(BoolBinOp):
        def execute_logic(self, left, right):
            return left | right
    class BoolNot(object):
        def __init__(self,t):
            self.right = t[0][1]
            self.result = ~self.right
    @classmethod
    def filter_string_to_q(cls, filter_string):
        '''
        TODO:
        * handle values with " via: a.b.c.d="hello\"world"
        * handle keys with " via: a.\"b.c="yeah"
        * handle key with __ in it
        * add not support
        * transform [] into contains via: a.b.c[].d[].e.f[]="blah"
        * handle optional value quoted: a.b.c=""
        '''
        atom = Word(unicode_printables, excludeChars=['(', ')', '=', '"'])
        atom_inside_quotes = Word(unicode_printables_spaces, excludeChars=['"'])
        atom_quoted = Literal('"') + Optional(atom_inside_quotes) + Literal('"')
        EQUAL = Literal('=')
        grammar = ((atom_quoted | atom) + EQUAL + Optional((atom_quoted | atom)))
        grammar.setParseAction(cls.BoolOperand)
        boolExpr = infixNotation(grammar, [
            #("not", 1, opAssoc.RIGHT, cls.BoolNot),
            ("and", 2, opAssoc.LEFT, cls.BoolAnd),
            ("or",  2, opAssoc.LEFT, cls.BoolOr),
        ])
        res = boolExpr.parseString('(' + filter_string + ')')
        if len(res) > 0:
            return res[0].result
        raise RuntimeError("Parsing the filter_string %s wen't terribly wrong" % filter_string)
--- a/awx/main/tests/unit/api/test_filters.py
+++ b/awx/main/tests/unit/api/test_filters.py
@@ -7,10 +7,19 @@ from awx.api.filters import FieldLookupBackend
 from awx.main.models import (AdHocCommand, AuthToken, CustomInventoryScript,
                             Credential, Job, JobTemplate, SystemJob,
                             UnifiedJob, User, WorkflowJob,
-                             WorkflowJobTemplate, WorkflowJobOptions)
+                             WorkflowJobTemplate, WorkflowJobOptions,
                             InventorySource)
 from awx.main.models.jobs import JobOptions
 def test_related():
    field_lookup = FieldLookupBackend()
    lookup = '__'.join(['inventory', 'organization', 'pk'])
    field, new_lookup = field_lookup.get_field_from_lookup(InventorySource, lookup)
    print(field)
    print(new_lookup)
@pytest.mark.parametrize(u"empty_value", [u'', ''])
 def test_empty_in(empty_value):
    field_lookup = FieldLookupBackend()
--- a/awx/main/tests/unit/test_fields.py
+++ b/awx/main/tests/unit/test_fields.py
@@ -0,0 +1,75 @@
 # Python
 import pytest
 from pyparsing import ParseException
 # AWX
 from awx.main.fields import DynamicFilterField
 # Django
 from django.db.models import Q
 class TestDynamicFilterFieldFilterStringToQ():
    @pytest.mark.parametrize("filter_string,q_expected", [
        ('facts__facts__blank=""', Q(facts__facts__blank="")),
        ('"facts__facts__ space "="f"', Q(**{ "facts__facts__ space ": "f"})),
        ('"facts__facts__ e "=no_quotes_here', Q(**{ "facts__facts__ e ": "no_quotes_here"})),
        ('a__b__c=3', Q(**{ "a__b__c": 3})),
        ('a__b__c=3.14', Q(**{ "a__b__c": 3.14})),
        ('a__b__c=true', Q(**{ "a__b__c": True})),
        ('a__b__c=false', Q(**{ "a__b__c": False})),
        ('a__b__c="true"', Q(**{ "a__b__c": "true"})),
        #('"a__b\"__c"="true"', Q(**{ "a__b\"__c": "true"})),
        #('a__b\"__c="true"', Q(**{ "a__b\"__c": "true"})),
    ])
    def test_query_generated(self, filter_string, q_expected):
        q = DynamicFilterField.filter_string_to_q(filter_string)
        assert str(q) == str(q_expected)
    @pytest.mark.parametrize("filter_string", [
        'facts__facts__blank='
        'a__b__c__ space  =ggg',
    ])
    def test_invalid_filter_strings(self, filter_string):
        with pytest.raises(ParseException):
            DynamicFilterField.filter_string_to_q(filter_string)
    @pytest.mark.parametrize("filter_string,q_expected", [
        ('(a=b)', Q(**{"a": "b"})),
        ('a=b and c=d', Q(**{"a": "b"}) & Q(**{"c": "d"})),
        ('(a=b and c=d)', Q(**{"a": "b"}) & Q(**{"c": "d"})),
        ('a=b or c=d', Q(**{"a": "b"}) | Q(**{"c": "d"})),
        ('(a=b and c=d) or (e=f)', (Q(**{"a": "b"}) & Q(**{"c": "d"})) | (Q(**{"e": "f"}))),
        ('(a=b) and (c=d or (e=f and (g=h or i=j))) or (y=z)', Q(**{"a": "b"}) & (Q(**{"c": "d"}) | (Q(**{"e": "f"}) & (Q(**{"g": "h"}) | Q(**{"i": "j"})))) | Q(**{"y": "z"}))
    ])
    def test_boolean_parenthesis(self, filter_string, q_expected):
        q = DynamicFilterField.filter_string_to_q(filter_string)
        assert str(q) == str(q_expected)
    @pytest.mark.parametrize("filter_string,q_expected", [
        ('a__b__c[]=3', Q(**{ "a__b__c__contains": 3})),
        ('a__b__c[]=3.14', Q(**{ "a__b__c__contains": 3.14})),
        ('a__b__c[]=true', Q(**{ "a__b__c__contains": True})),
        ('a__b__c[]=false', Q(**{ "a__b__c__contains": False})),
        ('a__b__c[]="true"', Q(**{ "a__b__c__contains": "true"})),
        ('a__b__c[]__d[]="foobar"', Q(**{ "a__b__c__contains": [{"d": ["foobar"]}]})),
        ('a__b__c[]__d="foobar"', Q(**{ "a__b__c__contains": [{"d": "foobar"}]})),
        ('a__b__c[]__d__e="foobar"', Q(**{ "a__b__c__contains": [{"d": {"e": "foobar"}}]})),
        ('a__b__c[]__d__e[]="foobar"', Q(**{ "a__b__c__contains": [{"d": {"e": ["foobar"]}}]})),
        ('a__b__c[]__d__e__f[]="foobar"', Q(**{ "a__b__c__contains": [{"d": {"e": {"f": ["foobar"]}}}]})),
        ('(a__b__c[]__d__e__f[]="foobar") and (a__b__c[]__d__e[]="foobar")', Q(**{ "a__b__c__contains": [{"d": {"e": {"f": ["foobar"]}}}]}) & Q(**{ "a__b__c__contains": [{"d": {"e": ["foobar"]}}]})),
        #('"a__b\"__c"="true"', Q(**{ "a__b\"__c": "true"})),
        #('a__b\"__c="true"', Q(**{ "a__b\"__c": "true"})),
    ])
    def test_contains_query_generated(self, filter_string, q_expected):
        q = DynamicFilterField.filter_string_to_q(filter_string)
        assert str(q) == str(q_expected)
 '''
 #('"facts__quoted_val"="f\"oo"', 1),
 #('facts__facts__arr[]="foo"', 1),
 #('facts__facts__arr_nested[]__a[]="foo"', 1), 
 '''