Merge branch 'devel' of https://github.com/Neo23x0/sigma
This commit is contained in:
@@ -16,7 +16,6 @@
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import asyncio
|
||||
import functools
|
||||
import sys
|
||||
import pprint
|
||||
import elasticsearch
|
||||
@@ -41,6 +40,7 @@ except elasticsearch.exceptions.RequestError as e:
|
||||
|
||||
queries = asyncio.Queue()
|
||||
|
||||
|
||||
# sigmac runner coroutinne
|
||||
async def run_sigmac():
|
||||
sigmac = asyncio.create_subprocess_exec(
|
||||
@@ -70,6 +70,7 @@ async def run_sigmac():
|
||||
print("* sigmac returned with exit code {}".format(exitcode))
|
||||
return exitcode
|
||||
|
||||
|
||||
# Generated query checker loop
|
||||
async def check_queries():
|
||||
failed = list()
|
||||
|
||||
@@ -6,3 +6,4 @@ elasticsearch-async
|
||||
setuptools
|
||||
wheel
|
||||
pymisp
|
||||
pytest
|
||||
|
||||
@@ -173,9 +173,6 @@ class ElasticsearchDSLBackend(RulenameCommentMixin, ElasticsearchWildcardHandlin
|
||||
self.queries[-1]['query']['constant_score']['filter'] = self.generateNode(parsed.parsedSearch)
|
||||
if parsed.parsedAgg:
|
||||
self.generateAggregation(parsed.parsedAgg)
|
||||
# if parsed.parsedAgg:
|
||||
# fields += self.generateAggregation(parsed.parsedAgg)
|
||||
# self.fields.update(fields)
|
||||
|
||||
def generateANDNode(self, node):
|
||||
andNode = {'bool': {'must': []}}
|
||||
@@ -253,32 +250,81 @@ class ElasticsearchDSLBackend(RulenameCommentMixin, ElasticsearchWildcardHandlin
|
||||
return {'exists': {'field': node.item}}
|
||||
|
||||
def generateAggregation(self, agg):
|
||||
"""
|
||||
Generates an Elasticsearch nested aggregation given a SigmaAggregationParser object
|
||||
|
||||
Two conditions are handled here:
|
||||
a) "count() by MyGroupedField > X"
|
||||
b) "count(MyDistinctFieldName) by MyGroupedField > X'
|
||||
|
||||
The case (b) is translated to a the following equivalent SQL query
|
||||
|
||||
```
|
||||
SELECT MyDistinctFieldName, COUNT(DISTINCT MyDistinctFieldName) FROM Table
|
||||
GROUP BY MyGroupedField HAVING COUNT(DISTINCT MyDistinctFieldName) > 1
|
||||
```
|
||||
|
||||
The resulting aggregation is set on 'self.queries[-1]["aggs"]' as a Python dict
|
||||
|
||||
:param agg: Input SigmaAggregationParser object that defines a condition
|
||||
:return: None
|
||||
"""
|
||||
if agg:
|
||||
if agg.aggfunc == sigma.parser.condition.SigmaAggregationParser.AGGFUNC_COUNT:
|
||||
if agg.groupfield is not None:
|
||||
self.queries[-1]['aggs'] = {
|
||||
'%s_count'%(agg.groupfield or ""): {
|
||||
'terms': {
|
||||
'field': '%s'%(agg.groupfield + ".keyword" or "")
|
||||
},
|
||||
'aggs': {
|
||||
'limit': {
|
||||
'bucket_selector': {
|
||||
'buckets_path': {
|
||||
'count': '%s_count'%(agg.groupfield or "")
|
||||
# If the aggregation is 'count(MyDistinctFieldName) by MyGroupedField > XYZ'
|
||||
if agg.aggfield is not None:
|
||||
count_agg_group_name = "{}_count".format(agg.groupfield)
|
||||
count_distinct_agg_name = "{}_distinct".format(agg.aggfield)
|
||||
script_limit = "params.count {} {}".format(agg.cond_op, agg.condition)
|
||||
self.queries[-1]['aggs'] = {
|
||||
count_agg_group_name: {
|
||||
"terms": {
|
||||
"field": "{}.keyword".format(agg.groupfield)
|
||||
},
|
||||
"aggs": {
|
||||
count_distinct_agg_name: {
|
||||
"cardinality": {
|
||||
"field": "{}.keyword".format(agg.aggfield)
|
||||
}
|
||||
},
|
||||
'script': 'params.count %s %s'%(agg.cond_op, agg.condition)
|
||||
"limit": {
|
||||
"bucket_selector": {
|
||||
"buckets_path": {
|
||||
"count": count_distinct_agg_name
|
||||
},
|
||||
"script": script_limit
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else: # if the condition is count() by MyGroupedField > XYZ
|
||||
group_aggname = "{}_count".format(agg.groupfield)
|
||||
self.queries[-1]['aggs'] = {
|
||||
group_aggname: {
|
||||
'terms': {
|
||||
'field': '%s' % (agg.groupfield + ".keyword")
|
||||
},
|
||||
'aggs': {
|
||||
'limit': {
|
||||
'bucket_selector': {
|
||||
'buckets_path': {
|
||||
'count': group_aggname
|
||||
},
|
||||
'script': 'params.count %s %s' % (agg.cond_op, agg.condition)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else:
|
||||
funcname = ""
|
||||
for name, idx in agg.aggfuncmap.items():
|
||||
if idx == agg.aggfunc:
|
||||
funcname = name
|
||||
break
|
||||
raise NotImplementedError("%s : The '%s' aggregation operator is not yet implemented for this backend"%(self.title, funcname))
|
||||
raise NotImplementedError("%s : The '%s' aggregation operator is not yet implemented for this backend" % (self.title, funcname))
|
||||
|
||||
def generateBefore(self, parsed):
|
||||
self.queries.append({'query': {'constant_score': {'filter': {}}}})
|
||||
@@ -895,14 +941,18 @@ class ElastalertBackend(MultiRuleOutputMixin):
|
||||
|
||||
def generateAggregation(self, agg):
|
||||
if agg:
|
||||
if agg.aggfunc == sigma.parser.condition.SigmaAggregationParser.AGGFUNC_COUNT or agg.aggfunc == sigma.parser.condition.SigmaAggregationParser.AGGFUNC_MIN or agg.aggfunc == sigma.parser.condition.SigmaAggregationParser.AGGFUNC_MAX or agg.aggfunc == sigma.parser.condition.SigmaAggregationParser.AGGFUNC_AVG or agg.aggfunc == sigma.parser.condition.SigmaAggregationParser.AGGFUNC_SUM:
|
||||
if agg.aggfunc == sigma.parser.condition.SigmaAggregationParser.AGGFUNC_COUNT or \
|
||||
agg.aggfunc == sigma.parser.condition.SigmaAggregationParser.AGGFUNC_MIN or \
|
||||
agg.aggfunc == sigma.parser.condition.SigmaAggregationParser.AGGFUNC_MAX or \
|
||||
agg.aggfunc == sigma.parser.condition.SigmaAggregationParser.AGGFUNC_AVG or \
|
||||
agg.aggfunc == sigma.parser.condition.SigmaAggregationParser.AGGFUNC_SUM:
|
||||
return ""
|
||||
else:
|
||||
for name, idx in agg.aggfuncmap.items():
|
||||
if idx == agg.aggfunc:
|
||||
funcname = name
|
||||
break
|
||||
raise NotImplementedError("%s : The '%s' aggregation operator is not yet implemented for this backend"%(self.title, funcname))
|
||||
raise NotImplementedError("%s : The '%s' aggregation operator is not yet implemented for this backend" % ( self.title, funcname))
|
||||
|
||||
def convertLevel(self, level):
|
||||
return {
|
||||
|
||||
@@ -24,6 +24,7 @@ COND_OR = 2
|
||||
COND_NOT = 3
|
||||
COND_NULL = 4
|
||||
|
||||
|
||||
# Debugging code
|
||||
def dumpNode(node, indent=''): # pragma: no cover
|
||||
"""
|
||||
@@ -42,6 +43,7 @@ def dumpNode(node, indent=''): # pragma: no cover
|
||||
repr(node)))
|
||||
return node
|
||||
|
||||
|
||||
# Condition Tokenizer
|
||||
class SigmaConditionToken:
|
||||
"""Token of a Sigma condition expression"""
|
||||
@@ -100,6 +102,7 @@ class SigmaConditionToken:
|
||||
def __str__(self): # pragma: no cover
|
||||
return "[ Token: %s: '%s' ]" % (self.tokenstr[self.type], self.matched)
|
||||
|
||||
|
||||
class SigmaConditionTokenizer:
|
||||
"""Tokenize condition string into token sequence"""
|
||||
tokendefs = [ # list of tokens, preferred recognition in given order, (token identifier, matching regular expression). Ignored if token id == None
|
||||
@@ -172,6 +175,7 @@ class SigmaConditionTokenizer:
|
||||
def index(self, item):
|
||||
return self.tokens.index(item)
|
||||
|
||||
|
||||
### Parse Tree Node Classes ###
|
||||
class ParseTreeNode:
|
||||
"""Parse Tree Node Base Class"""
|
||||
@@ -181,6 +185,7 @@ class ParseTreeNode:
|
||||
def __str__(self): # pragma: no cover
|
||||
return "[ %s: %s ]" % (self.__doc__, str([str(item) for item in self.items]))
|
||||
|
||||
|
||||
class ConditionBase(ParseTreeNode):
|
||||
"""Base class for conditional operations"""
|
||||
op = COND_NONE
|
||||
@@ -198,6 +203,7 @@ class ConditionBase(ParseTreeNode):
|
||||
def __len__(self):
|
||||
return len(self.items)
|
||||
|
||||
|
||||
class ConditionAND(ConditionBase):
|
||||
"""AND Condition"""
|
||||
op = COND_AND
|
||||
@@ -208,10 +214,12 @@ class ConditionAND(ConditionBase):
|
||||
else: # called by parser, use given values
|
||||
self.items = args
|
||||
|
||||
|
||||
class ConditionOR(ConditionAND):
|
||||
"""OR Condition"""
|
||||
op = COND_OR
|
||||
|
||||
|
||||
class ConditionNOT(ConditionBase):
|
||||
"""NOT Condition"""
|
||||
op = COND_NOT
|
||||
@@ -235,19 +243,23 @@ class ConditionNOT(ConditionBase):
|
||||
except IndexError:
|
||||
return None
|
||||
|
||||
|
||||
class ConditionNULLValue(ConditionNOT):
|
||||
"""Condition: Field value is empty or doesn't exists"""
|
||||
pass
|
||||
|
||||
|
||||
class ConditionNotNULLValue(ConditionNULLValue):
|
||||
"""Condition: Field value is not empty"""
|
||||
pass
|
||||
|
||||
|
||||
class NodeSubexpression(ParseTreeNode):
|
||||
"""Subexpression"""
|
||||
def __init__(self, subexpr):
|
||||
self.items = subexpr
|
||||
|
||||
|
||||
# Parse tree generators: generate parse tree nodes from extended conditions
|
||||
def generateXOf(sigma, val, condclass):
|
||||
"""
|
||||
@@ -274,18 +286,22 @@ def generateXOf(sigma, val, condclass):
|
||||
else: # OR across all items of definition
|
||||
return NodeSubexpression(sigma.parse_definition_byname(val.matched, condclass))
|
||||
|
||||
|
||||
def generateAllOf(sigma, op, val):
|
||||
"""Convert 'all of x' expressions into ConditionAND"""
|
||||
return generateXOf(sigma, val, ConditionAND)
|
||||
|
||||
|
||||
def generateOneOf(sigma, op, val):
|
||||
"""Convert '1 of x' expressions into ConditionOR"""
|
||||
return generateXOf(sigma, val, ConditionOR)
|
||||
|
||||
|
||||
def convertId(sigma, op):
|
||||
"""Convert search identifiers (lists or maps) into condition nodes according to spec defaults"""
|
||||
return NodeSubexpression(sigma.parse_definition_byname(op.matched))
|
||||
|
||||
|
||||
# Optimizer
|
||||
class SigmaConditionOptimizer:
|
||||
"""
|
||||
@@ -548,7 +564,8 @@ class SigmaConditionParser:
|
||||
|
||||
def __len__(self): # pragma: no cover
|
||||
return len(self.parsedSearch)
|
||||
|
||||
|
||||
|
||||
# Aggregation parser
|
||||
class SigmaAggregationParser(SimpleParser):
|
||||
"""Parse Sigma aggregation expression and provide parsed data"""
|
||||
@@ -599,7 +616,7 @@ class SigmaAggregationParser(SimpleParser):
|
||||
SigmaConditionToken.TOKEN_ID: (None, "store_search_id", 9),
|
||||
},
|
||||
]
|
||||
finalstates = { -1, 9 }
|
||||
finalstates = {-1, 9}
|
||||
|
||||
# Aggregation functions
|
||||
AGGFUNC_COUNT = 1
|
||||
|
||||
@@ -0,0 +1,65 @@
|
||||
from sigma.backends.elasticsearch import ElasticsearchDSLBackend
|
||||
from sigma.configuration import SigmaConfiguration
|
||||
from sigma.parser.condition import SigmaAggregationParser
|
||||
|
||||
|
||||
def test_backend_elastic():
|
||||
"""
|
||||
Test aggregation of the form
|
||||
|
||||
count(aggfield) by GroupField < 3
|
||||
"""
|
||||
sigma_config = SigmaConfiguration()
|
||||
backend = ElasticsearchDSLBackend(sigma_config)
|
||||
|
||||
# setup the aggregator input object without calling __init__()
|
||||
agg = object.__new__(SigmaAggregationParser)
|
||||
agg.condition = "3"
|
||||
agg.cond_op = "<"
|
||||
agg.aggfunc = SigmaAggregationParser.AGGFUNC_COUNT
|
||||
agg.aggfield = "aggfield"
|
||||
agg.groupfield = "GroupField"
|
||||
|
||||
# Make queries non-empty
|
||||
backend.queries = [{}]
|
||||
|
||||
backend.generateAggregation(agg)
|
||||
|
||||
inner_agg = backend.queries[0]["aggs"]["GroupField_count"]["aggs"]
|
||||
bucket_selector = backend.queries[0]["aggs"]["GroupField_count"]["aggs"]["limit"]["bucket_selector"]
|
||||
assert len(backend.queries) == 1, "backend has exactly one query"
|
||||
assert ("GroupField_count" in backend.queries[0]["aggs"]), "GroupField_count is the top aggregation key"
|
||||
assert ("aggfield_distinct" in backend.queries[0]["aggs"]["GroupField_count"]["aggs"]), "aggfield_distinct is the nested aggregation key"
|
||||
assert ("GroupField_count" in backend.queries[0]["aggs"]), "GroupField_count is the top aggregation key"
|
||||
assert "{}.keyword".format(agg.aggfield) == inner_agg["aggfield_distinct"]["cardinality"]["field"], "inner agg field must have suffix .keyword"
|
||||
assert ("params.count < 3" in bucket_selector["script"]), "bucket selector script must be 'params.count < 3'"
|
||||
assert "count" in bucket_selector["buckets_path"], "buckets_path must be 'count'"
|
||||
|
||||
|
||||
def test_backend_elastic_count_nofield_agg():
|
||||
"""
|
||||
Test aggregation of the form
|
||||
|
||||
count() by GroupedField < 3
|
||||
"""
|
||||
|
||||
sigma_config = SigmaConfiguration()
|
||||
backend = ElasticsearchDSLBackend(sigma_config)
|
||||
|
||||
# setup the aggregator input object without calling __init__()
|
||||
agg = object.__new__(SigmaAggregationParser)
|
||||
agg.condition = "3"
|
||||
agg.cond_op = "<"
|
||||
agg.aggfunc = SigmaAggregationParser.AGGFUNC_COUNT
|
||||
agg.aggfield = None
|
||||
agg.groupfield = "GroupedField"
|
||||
|
||||
# Make queries non-empty
|
||||
backend.queries = [{}]
|
||||
backend.generateAggregation(agg)
|
||||
bucket_selector = backend.queries[0]["aggs"]["GroupedField_count"]["aggs"]["limit"]["bucket_selector"]
|
||||
|
||||
assert len(backend.queries) == 1, "backend has exactly one query"
|
||||
assert ("GroupedField_count" in backend.queries[0]["aggs"]), "GroupedField_count is the top aggregation key"
|
||||
assert ("params.count < 3" in bucket_selector["script"]), "bucket selector script must be 'params.count < 3'"
|
||||
assert "count" in bucket_selector["buckets_path"], "buckets_path must be 'count'"
|
||||
@@ -1 +1,3 @@
|
||||
|
||||
def test_collection():
|
||||
pass
|
||||
|
||||
Reference in New Issue
Block a user