This commit is contained in:
Thomas Patzke
2019-12-13 22:01:40 +01:00
6 changed files with 157 additions and 21 deletions
+2 -1
View File
@@ -16,7 +16,6 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import asyncio
import functools
import sys
import pprint
import elasticsearch
@@ -41,6 +40,7 @@ except elasticsearch.exceptions.RequestError as e:
queries = asyncio.Queue()
# sigmac runner coroutinne
async def run_sigmac():
sigmac = asyncio.create_subprocess_exec(
@@ -70,6 +70,7 @@ async def run_sigmac():
print("* sigmac returned with exit code {}".format(exitcode))
return exitcode
# Generated query checker loop
async def check_queries():
failed = list()
+1
View File
@@ -6,3 +6,4 @@ elasticsearch-async
setuptools
wheel
pymisp
pytest
+68 -18
View File
@@ -173,9 +173,6 @@ class ElasticsearchDSLBackend(RulenameCommentMixin, ElasticsearchWildcardHandlin
self.queries[-1]['query']['constant_score']['filter'] = self.generateNode(parsed.parsedSearch)
if parsed.parsedAgg:
self.generateAggregation(parsed.parsedAgg)
# if parsed.parsedAgg:
# fields += self.generateAggregation(parsed.parsedAgg)
# self.fields.update(fields)
def generateANDNode(self, node):
andNode = {'bool': {'must': []}}
@@ -253,32 +250,81 @@ class ElasticsearchDSLBackend(RulenameCommentMixin, ElasticsearchWildcardHandlin
return {'exists': {'field': node.item}}
def generateAggregation(self, agg):
"""
Generates an Elasticsearch nested aggregation given a SigmaAggregationParser object
Two conditions are handled here:
a) "count() by MyGroupedField > X"
b) "count(MyDistinctFieldName) by MyGroupedField > X'
The case (b) is translated to a the following equivalent SQL query
```
SELECT MyDistinctFieldName, COUNT(DISTINCT MyDistinctFieldName) FROM Table
GROUP BY MyGroupedField HAVING COUNT(DISTINCT MyDistinctFieldName) > 1
```
The resulting aggregation is set on 'self.queries[-1]["aggs"]' as a Python dict
:param agg: Input SigmaAggregationParser object that defines a condition
:return: None
"""
if agg:
if agg.aggfunc == sigma.parser.condition.SigmaAggregationParser.AGGFUNC_COUNT:
if agg.groupfield is not None:
self.queries[-1]['aggs'] = {
'%s_count'%(agg.groupfield or ""): {
'terms': {
'field': '%s'%(agg.groupfield + ".keyword" or "")
},
'aggs': {
'limit': {
'bucket_selector': {
'buckets_path': {
'count': '%s_count'%(agg.groupfield or "")
# If the aggregation is 'count(MyDistinctFieldName) by MyGroupedField > XYZ'
if agg.aggfield is not None:
count_agg_group_name = "{}_count".format(agg.groupfield)
count_distinct_agg_name = "{}_distinct".format(agg.aggfield)
script_limit = "params.count {} {}".format(agg.cond_op, agg.condition)
self.queries[-1]['aggs'] = {
count_agg_group_name: {
"terms": {
"field": "{}.keyword".format(agg.groupfield)
},
"aggs": {
count_distinct_agg_name: {
"cardinality": {
"field": "{}.keyword".format(agg.aggfield)
}
},
'script': 'params.count %s %s'%(agg.cond_op, agg.condition)
"limit": {
"bucket_selector": {
"buckets_path": {
"count": count_distinct_agg_name
},
"script": script_limit
}
}
}
}
}
else: # if the condition is count() by MyGroupedField > XYZ
group_aggname = "{}_count".format(agg.groupfield)
self.queries[-1]['aggs'] = {
group_aggname: {
'terms': {
'field': '%s' % (agg.groupfield + ".keyword")
},
'aggs': {
'limit': {
'bucket_selector': {
'buckets_path': {
'count': group_aggname
},
'script': 'params.count %s %s' % (agg.cond_op, agg.condition)
}
}
}
}
}
}
else:
funcname = ""
for name, idx in agg.aggfuncmap.items():
if idx == agg.aggfunc:
funcname = name
break
raise NotImplementedError("%s : The '%s' aggregation operator is not yet implemented for this backend"%(self.title, funcname))
raise NotImplementedError("%s : The '%s' aggregation operator is not yet implemented for this backend" % (self.title, funcname))
def generateBefore(self, parsed):
self.queries.append({'query': {'constant_score': {'filter': {}}}})
@@ -895,14 +941,18 @@ class ElastalertBackend(MultiRuleOutputMixin):
def generateAggregation(self, agg):
if agg:
if agg.aggfunc == sigma.parser.condition.SigmaAggregationParser.AGGFUNC_COUNT or agg.aggfunc == sigma.parser.condition.SigmaAggregationParser.AGGFUNC_MIN or agg.aggfunc == sigma.parser.condition.SigmaAggregationParser.AGGFUNC_MAX or agg.aggfunc == sigma.parser.condition.SigmaAggregationParser.AGGFUNC_AVG or agg.aggfunc == sigma.parser.condition.SigmaAggregationParser.AGGFUNC_SUM:
if agg.aggfunc == sigma.parser.condition.SigmaAggregationParser.AGGFUNC_COUNT or \
agg.aggfunc == sigma.parser.condition.SigmaAggregationParser.AGGFUNC_MIN or \
agg.aggfunc == sigma.parser.condition.SigmaAggregationParser.AGGFUNC_MAX or \
agg.aggfunc == sigma.parser.condition.SigmaAggregationParser.AGGFUNC_AVG or \
agg.aggfunc == sigma.parser.condition.SigmaAggregationParser.AGGFUNC_SUM:
return ""
else:
for name, idx in agg.aggfuncmap.items():
if idx == agg.aggfunc:
funcname = name
break
raise NotImplementedError("%s : The '%s' aggregation operator is not yet implemented for this backend"%(self.title, funcname))
raise NotImplementedError("%s : The '%s' aggregation operator is not yet implemented for this backend" % ( self.title, funcname))
def convertLevel(self, level):
return {
+19 -2
View File
@@ -24,6 +24,7 @@ COND_OR = 2
COND_NOT = 3
COND_NULL = 4
# Debugging code
def dumpNode(node, indent=''): # pragma: no cover
"""
@@ -42,6 +43,7 @@ def dumpNode(node, indent=''): # pragma: no cover
repr(node)))
return node
# Condition Tokenizer
class SigmaConditionToken:
"""Token of a Sigma condition expression"""
@@ -100,6 +102,7 @@ class SigmaConditionToken:
def __str__(self): # pragma: no cover
return "[ Token: %s: '%s' ]" % (self.tokenstr[self.type], self.matched)
class SigmaConditionTokenizer:
"""Tokenize condition string into token sequence"""
tokendefs = [ # list of tokens, preferred recognition in given order, (token identifier, matching regular expression). Ignored if token id == None
@@ -172,6 +175,7 @@ class SigmaConditionTokenizer:
def index(self, item):
return self.tokens.index(item)
### Parse Tree Node Classes ###
class ParseTreeNode:
"""Parse Tree Node Base Class"""
@@ -181,6 +185,7 @@ class ParseTreeNode:
def __str__(self): # pragma: no cover
return "[ %s: %s ]" % (self.__doc__, str([str(item) for item in self.items]))
class ConditionBase(ParseTreeNode):
"""Base class for conditional operations"""
op = COND_NONE
@@ -198,6 +203,7 @@ class ConditionBase(ParseTreeNode):
def __len__(self):
return len(self.items)
class ConditionAND(ConditionBase):
"""AND Condition"""
op = COND_AND
@@ -208,10 +214,12 @@ class ConditionAND(ConditionBase):
else: # called by parser, use given values
self.items = args
class ConditionOR(ConditionAND):
"""OR Condition"""
op = COND_OR
class ConditionNOT(ConditionBase):
"""NOT Condition"""
op = COND_NOT
@@ -235,19 +243,23 @@ class ConditionNOT(ConditionBase):
except IndexError:
return None
class ConditionNULLValue(ConditionNOT):
"""Condition: Field value is empty or doesn't exists"""
pass
class ConditionNotNULLValue(ConditionNULLValue):
"""Condition: Field value is not empty"""
pass
class NodeSubexpression(ParseTreeNode):
"""Subexpression"""
def __init__(self, subexpr):
self.items = subexpr
# Parse tree generators: generate parse tree nodes from extended conditions
def generateXOf(sigma, val, condclass):
"""
@@ -274,18 +286,22 @@ def generateXOf(sigma, val, condclass):
else: # OR across all items of definition
return NodeSubexpression(sigma.parse_definition_byname(val.matched, condclass))
def generateAllOf(sigma, op, val):
"""Convert 'all of x' expressions into ConditionAND"""
return generateXOf(sigma, val, ConditionAND)
def generateOneOf(sigma, op, val):
"""Convert '1 of x' expressions into ConditionOR"""
return generateXOf(sigma, val, ConditionOR)
def convertId(sigma, op):
"""Convert search identifiers (lists or maps) into condition nodes according to spec defaults"""
return NodeSubexpression(sigma.parse_definition_byname(op.matched))
# Optimizer
class SigmaConditionOptimizer:
"""
@@ -548,7 +564,8 @@ class SigmaConditionParser:
def __len__(self): # pragma: no cover
return len(self.parsedSearch)
# Aggregation parser
class SigmaAggregationParser(SimpleParser):
"""Parse Sigma aggregation expression and provide parsed data"""
@@ -599,7 +616,7 @@ class SigmaAggregationParser(SimpleParser):
SigmaConditionToken.TOKEN_ID: (None, "store_search_id", 9),
},
]
finalstates = { -1, 9 }
finalstates = {-1, 9}
# Aggregation functions
AGGFUNC_COUNT = 1
+65
View File
@@ -0,0 +1,65 @@
from sigma.backends.elasticsearch import ElasticsearchDSLBackend
from sigma.configuration import SigmaConfiguration
from sigma.parser.condition import SigmaAggregationParser
def test_backend_elastic():
"""
Test aggregation of the form
count(aggfield) by GroupField < 3
"""
sigma_config = SigmaConfiguration()
backend = ElasticsearchDSLBackend(sigma_config)
# setup the aggregator input object without calling __init__()
agg = object.__new__(SigmaAggregationParser)
agg.condition = "3"
agg.cond_op = "<"
agg.aggfunc = SigmaAggregationParser.AGGFUNC_COUNT
agg.aggfield = "aggfield"
agg.groupfield = "GroupField"
# Make queries non-empty
backend.queries = [{}]
backend.generateAggregation(agg)
inner_agg = backend.queries[0]["aggs"]["GroupField_count"]["aggs"]
bucket_selector = backend.queries[0]["aggs"]["GroupField_count"]["aggs"]["limit"]["bucket_selector"]
assert len(backend.queries) == 1, "backend has exactly one query"
assert ("GroupField_count" in backend.queries[0]["aggs"]), "GroupField_count is the top aggregation key"
assert ("aggfield_distinct" in backend.queries[0]["aggs"]["GroupField_count"]["aggs"]), "aggfield_distinct is the nested aggregation key"
assert ("GroupField_count" in backend.queries[0]["aggs"]), "GroupField_count is the top aggregation key"
assert "{}.keyword".format(agg.aggfield) == inner_agg["aggfield_distinct"]["cardinality"]["field"], "inner agg field must have suffix .keyword"
assert ("params.count < 3" in bucket_selector["script"]), "bucket selector script must be 'params.count < 3'"
assert "count" in bucket_selector["buckets_path"], "buckets_path must be 'count'"
def test_backend_elastic_count_nofield_agg():
"""
Test aggregation of the form
count() by GroupedField < 3
"""
sigma_config = SigmaConfiguration()
backend = ElasticsearchDSLBackend(sigma_config)
# setup the aggregator input object without calling __init__()
agg = object.__new__(SigmaAggregationParser)
agg.condition = "3"
agg.cond_op = "<"
agg.aggfunc = SigmaAggregationParser.AGGFUNC_COUNT
agg.aggfield = None
agg.groupfield = "GroupedField"
# Make queries non-empty
backend.queries = [{}]
backend.generateAggregation(agg)
bucket_selector = backend.queries[0]["aggs"]["GroupedField_count"]["aggs"]["limit"]["bucket_selector"]
assert len(backend.queries) == 1, "backend has exactly one query"
assert ("GroupedField_count" in backend.queries[0]["aggs"]), "GroupedField_count is the top aggregation key"
assert ("params.count < 3" in bucket_selector["script"]), "bucket selector script must be 'params.count < 3'"
assert "count" in bucket_selector["buckets_path"], "buckets_path must be 'count'"
+2
View File
@@ -1 +1,3 @@
def test_collection():
pass