Merge branch 'devel' of https://github.com/Neo23x0/sigma

2019-12-13 22:01:40 +01:00
parent 5930c1c290 ee4138c48e
commit d2a940a0a6
6 changed files with 157 additions and 21 deletions
@@ -16,7 +16,6 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.

 import asyncio
-import functools
 import sys
 import pprint
 import elasticsearch
@@ -41,6 +40,7 @@ except elasticsearch.exceptions.RequestError as e:

 queries = asyncio.Queue()

+
 # sigmac runner coroutinne
 async def run_sigmac():
    sigmac = asyncio.create_subprocess_exec(
@@ -70,6 +70,7 @@ async def run_sigmac():
    print("* sigmac returned with exit code {}".format(exitcode))
    return exitcode

+
 # Generated query checker loop
 async def check_queries():
    failed = list()
@@ -6,3 +6,4 @@ elasticsearch-async
 setuptools
 wheel
 pymisp
+pytest
@@ -173,9 +173,6 @@ class ElasticsearchDSLBackend(RulenameCommentMixin, ElasticsearchWildcardHandlin
        self.queries[-1]['query']['constant_score']['filter'] = self.generateNode(parsed.parsedSearch)
        if parsed.parsedAgg:
            self.generateAggregation(parsed.parsedAgg)
-        # if parsed.parsedAgg:
-        #     fields += self.generateAggregation(parsed.parsedAgg)
-        # self.fields.update(fields)

    def generateANDNode(self, node):
        andNode = {'bool': {'must': []}}
@@ -253,32 +250,81 @@ class ElasticsearchDSLBackend(RulenameCommentMixin, ElasticsearchWildcardHandlin
        return {'exists': {'field': node.item}}

    def generateAggregation(self, agg):
+        """
+        Generates an Elasticsearch nested aggregation given a SigmaAggregationParser object
+
+        Two conditions are handled here:
+        a) "count() by MyGroupedField > X"
+        b) "count(MyDistinctFieldName) by MyGroupedField > X'
+
+        The case (b) is translated to a the following equivalent SQL query
+
+        ```
+        SELECT MyDistinctFieldName, COUNT(DISTINCT MyDistinctFieldName) FROM Table
+        GROUP BY MyGroupedField HAVING COUNT(DISTINCT MyDistinctFieldName) > 1
+        ```
+
+        The resulting aggregation is set on 'self.queries[-1]["aggs"]' as a Python dict
+
+        :param agg: Input SigmaAggregationParser object that defines a condition
+        :return: None
+        """
        if agg:
            if agg.aggfunc == sigma.parser.condition.SigmaAggregationParser.AGGFUNC_COUNT:
                if agg.groupfield is not None:
-                    self.queries[-1]['aggs'] = {
-                        '%s_count'%(agg.groupfield or ""): {
-                            'terms': {
-                                'field': '%s'%(agg.groupfield + ".keyword" or "")
-                            },
-                            'aggs': {
-                                'limit': {
-                                    'bucket_selector': {
-                                        'buckets_path': {
-                                            'count': '%s_count'%(agg.groupfield or "")
+                    # If the aggregation is 'count(MyDistinctFieldName) by MyGroupedField > XYZ'
+                    if agg.aggfield is not None:
+                        count_agg_group_name = "{}_count".format(agg.groupfield)
+                        count_distinct_agg_name = "{}_distinct".format(agg.aggfield)
+                        script_limit = "params.count {} {}".format(agg.cond_op, agg.condition)
+                        self.queries[-1]['aggs'] = {
+                            count_agg_group_name: {
+                                    "terms": {
+                                        "field": "{}.keyword".format(agg.groupfield)
+                                    },
+                                    "aggs": {
+                                        count_distinct_agg_name: {
+                                            "cardinality": {
+                                                "field": "{}.keyword".format(agg.aggfield)
+                                            }
                                        },
-                                        'script': 'params.count %s %s'%(agg.cond_op, agg.condition)
+                                        "limit": {
+                                            "bucket_selector": {
+                                                "buckets_path": {
+                                                    "count": count_distinct_agg_name
+                                                },
+                                                "script": script_limit
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                    else:  # if the condition is count() by MyGroupedField > XYZ
+                        group_aggname = "{}_count".format(agg.groupfield)
+                        self.queries[-1]['aggs'] = {
+                            group_aggname: {
+                                'terms': {
+                                    'field': '%s' % (agg.groupfield + ".keyword")
+                                },
+                                'aggs': {
+                                    'limit': {
+                                        'bucket_selector': {
+                                            'buckets_path': {
+                                                'count': group_aggname
+                                            },
+                                            'script': 'params.count %s %s' % (agg.cond_op, agg.condition)
+                                        }
                                    }
                                }
                            }
                        }
-                    }
            else:
+                funcname = ""
                for name, idx in agg.aggfuncmap.items():
                    if idx == agg.aggfunc:
                        funcname = name
                        break
-                raise NotImplementedError("%s : The '%s' aggregation operator is not yet implemented for this backend"%(self.title, funcname))
+                raise NotImplementedError("%s : The '%s' aggregation operator is not yet implemented for this backend" % (self.title, funcname))

    def generateBefore(self, parsed):
        self.queries.append({'query': {'constant_score': {'filter': {}}}})
@@ -895,14 +941,18 @@ class ElastalertBackend(MultiRuleOutputMixin):

    def generateAggregation(self, agg):
        if agg:
-            if agg.aggfunc == sigma.parser.condition.SigmaAggregationParser.AGGFUNC_COUNT or agg.aggfunc == sigma.parser.condition.SigmaAggregationParser.AGGFUNC_MIN or agg.aggfunc == sigma.parser.condition.SigmaAggregationParser.AGGFUNC_MAX or agg.aggfunc == sigma.parser.condition.SigmaAggregationParser.AGGFUNC_AVG or agg.aggfunc == sigma.parser.condition.SigmaAggregationParser.AGGFUNC_SUM:
+            if agg.aggfunc == sigma.parser.condition.SigmaAggregationParser.AGGFUNC_COUNT or \
+                    agg.aggfunc == sigma.parser.condition.SigmaAggregationParser.AGGFUNC_MIN or \
+                    agg.aggfunc == sigma.parser.condition.SigmaAggregationParser.AGGFUNC_MAX or \
+                    agg.aggfunc == sigma.parser.condition.SigmaAggregationParser.AGGFUNC_AVG or \
+                    agg.aggfunc == sigma.parser.condition.SigmaAggregationParser.AGGFUNC_SUM:
                return ""
            else:
                for name, idx in agg.aggfuncmap.items():
                    if idx == agg.aggfunc:
                        funcname = name
                        break
-                raise NotImplementedError("%s : The '%s' aggregation operator is not yet implemented for this backend"%(self.title, funcname))
+                raise NotImplementedError("%s : The '%s' aggregation operator is not yet implemented for this backend" % ( self.title, funcname))

    def convertLevel(self, level):
        return {
@@ -24,6 +24,7 @@ COND_OR   = 2
 COND_NOT  = 3
 COND_NULL = 4

+
 # Debugging code
 def dumpNode(node, indent=''):   # pragma: no cover
    """
@@ -42,6 +43,7 @@ def dumpNode(node, indent=''):   # pragma: no cover
                                   repr(node)))
    return node

+
 # Condition Tokenizer
 class SigmaConditionToken:
    """Token of a Sigma condition expression"""
@@ -100,6 +102,7 @@ class SigmaConditionToken:
    def __str__(self):  # pragma: no cover
        return "[ Token: %s: '%s' ]" % (self.tokenstr[self.type], self.matched)

+
 class SigmaConditionTokenizer:
    """Tokenize condition string into token sequence"""
    tokendefs = [      # list of tokens, preferred recognition in given order, (token identifier, matching regular expression). Ignored if token id == None
@@ -172,6 +175,7 @@ class SigmaConditionTokenizer:
    def index(self, item):
        return self.tokens.index(item)

+
 ### Parse Tree Node Classes ###
 class ParseTreeNode:
    """Parse Tree Node Base Class"""
@@ -181,6 +185,7 @@ class ParseTreeNode:
    def __str__(self):  # pragma: no cover
        return "[ %s: %s ]" % (self.__doc__, str([str(item) for item in self.items]))

+
 class ConditionBase(ParseTreeNode):
    """Base class for conditional operations"""
    op = COND_NONE
@@ -198,6 +203,7 @@ class ConditionBase(ParseTreeNode):
    def __len__(self):
        return len(self.items)

+
 class ConditionAND(ConditionBase):
    """AND Condition"""
    op = COND_AND
@@ -208,10 +214,12 @@ class ConditionAND(ConditionBase):
        else:       # called by parser, use given values
            self.items = args

+
 class ConditionOR(ConditionAND):
    """OR Condition"""
    op = COND_OR

+
 class ConditionNOT(ConditionBase):
    """NOT Condition"""
    op = COND_NOT
@@ -235,19 +243,23 @@ class ConditionNOT(ConditionBase):
        except IndexError:
            return None

+
 class ConditionNULLValue(ConditionNOT):
    """Condition: Field value is empty or doesn't exists"""
    pass

+
 class ConditionNotNULLValue(ConditionNULLValue):
    """Condition: Field value is not empty"""
    pass

+
 class NodeSubexpression(ParseTreeNode):
    """Subexpression"""
    def __init__(self, subexpr):
        self.items = subexpr

+
 # Parse tree generators: generate parse tree nodes from extended conditions
 def generateXOf(sigma, val, condclass):
    """
@@ -274,18 +286,22 @@ def generateXOf(sigma, val, condclass):
    else:                               # OR across all items of definition
        return NodeSubexpression(sigma.parse_definition_byname(val.matched, condclass))

+
 def generateAllOf(sigma, op, val):
    """Convert 'all of x' expressions into ConditionAND"""
    return generateXOf(sigma, val, ConditionAND)

+
 def generateOneOf(sigma, op, val):
    """Convert '1 of x' expressions into ConditionOR"""
    return generateXOf(sigma, val, ConditionOR)

+
 def convertId(sigma, op):
    """Convert search identifiers (lists or maps) into condition nodes according to spec defaults"""
    return NodeSubexpression(sigma.parse_definition_byname(op.matched))

+
 # Optimizer
 class SigmaConditionOptimizer:
    """
@@ -548,7 +564,8 @@ class SigmaConditionParser:

    def __len__(self):  # pragma: no cover
        return len(self.parsedSearch)
- 
+
+
 # Aggregation parser
 class SigmaAggregationParser(SimpleParser):
    """Parse Sigma aggregation expression and provide parsed data"""
@@ -599,7 +616,7 @@ class SigmaAggregationParser(SimpleParser):
                SigmaConditionToken.TOKEN_ID: (None, "store_search_id", 9),
            },
            ]
-    finalstates = { -1, 9 }
+    finalstates = {-1, 9}

    # Aggregation functions
    AGGFUNC_COUNT = 1
@@ -0,0 +1,65 @@
+from sigma.backends.elasticsearch import ElasticsearchDSLBackend
+from sigma.configuration import SigmaConfiguration
+from sigma.parser.condition import SigmaAggregationParser
+
+
+def test_backend_elastic():
+    """
+    Test aggregation of the form
+
+    count(aggfield) by GroupField < 3
+    """
+    sigma_config = SigmaConfiguration()
+    backend = ElasticsearchDSLBackend(sigma_config)
+
+    # setup the aggregator input object without calling __init__()
+    agg = object.__new__(SigmaAggregationParser)
+    agg.condition = "3"
+    agg.cond_op = "<"
+    agg.aggfunc = SigmaAggregationParser.AGGFUNC_COUNT
+    agg.aggfield = "aggfield"
+    agg.groupfield = "GroupField"
+
+    # Make queries non-empty
+    backend.queries = [{}]
+
+    backend.generateAggregation(agg)
+
+    inner_agg = backend.queries[0]["aggs"]["GroupField_count"]["aggs"]
+    bucket_selector = backend.queries[0]["aggs"]["GroupField_count"]["aggs"]["limit"]["bucket_selector"]
+    assert len(backend.queries) == 1, "backend has exactly one query"
+    assert ("GroupField_count" in backend.queries[0]["aggs"]), "GroupField_count is the top aggregation key"
+    assert ("aggfield_distinct" in backend.queries[0]["aggs"]["GroupField_count"]["aggs"]), "aggfield_distinct is the nested aggregation key"
+    assert ("GroupField_count" in backend.queries[0]["aggs"]), "GroupField_count is the top aggregation key"
+    assert "{}.keyword".format(agg.aggfield) == inner_agg["aggfield_distinct"]["cardinality"]["field"], "inner agg field must have suffix .keyword"
+    assert ("params.count < 3" in bucket_selector["script"]), "bucket selector script must be 'params.count < 3'"
+    assert "count" in bucket_selector["buckets_path"], "buckets_path must be 'count'"
+
+
+def test_backend_elastic_count_nofield_agg():
+    """
+    Test aggregation of the form
+
+    count() by GroupedField < 3
+    """
+
+    sigma_config = SigmaConfiguration()
+    backend = ElasticsearchDSLBackend(sigma_config)
+
+    # setup the aggregator input object without calling __init__()
+    agg = object.__new__(SigmaAggregationParser)
+    agg.condition = "3"
+    agg.cond_op = "<"
+    agg.aggfunc = SigmaAggregationParser.AGGFUNC_COUNT
+    agg.aggfield = None
+    agg.groupfield = "GroupedField"
+
+    # Make queries non-empty
+    backend.queries = [{}]
+    backend.generateAggregation(agg)
+    bucket_selector = backend.queries[0]["aggs"]["GroupedField_count"]["aggs"]["limit"]["bucket_selector"]
+
+    assert len(backend.queries) == 1, "backend has exactly one query"
+    assert ("GroupedField_count" in backend.queries[0]["aggs"]), "GroupedField_count is the top aggregation key"
+    assert ("params.count < 3" in bucket_selector["script"]), "bucket selector script must be 'params.count < 3'"
+    assert "count" in bucket_selector["buckets_path"], "buckets_path must be 'count'"
@@ -1 +1,3 @@
+
 def test_collection():
+    pass