Files
blue-team-tools/tools/sigma/backends/dnif.py
T
2022-06-30 13:03:54 +05:30

325 lines
14 KiB
Python

# Output backends for sigmac
# Copyright 2022 Netmonastery, Inc.#
import re
from .base import SingleTextQueryBackend
from sigma.parser.modifiers.type import SigmaRegularExpressionModifier
from sigma.parser.condition import SigmaAggregationParser
class DnifBackend(SingleTextQueryBackend):
"""Base class for DNIF backend"""
identifier = "dnif"
andToken = " and "
orToken = " or "
notToken = "not"
subExpression = "%s"
listExpression = "%s"
listSeparator = " "
valueExpression = "\"%s\""
nullExpression = "NOT %s=\"*\""
notNullExpression = "%s=\"*\""
mapExpression = "%s == \"%s\""
mapListsSpecialHandling = True
mapListValueExpression = "%s IN %s"
active = True
config_required = True
ymlFileName = None
def __init__(self, sigmaconfig, options=None):
"""
Initialize backend. This gets a sigmaconfig object, which is notified
about the used backend class by
passing the object instance to it.
"""
super().__init__(sigmaconfig)
self.table = None
self.timeframe = None
def generateANDNode(self, node):
"""
Generates and nodes for query
this method accepts the node and returns transformed node
according to the query language
"""
generated = [self.generateNode(val) for val in node]
transformed = []
for generated_node in generated:
if generated_node is not None:
if re.search(self.orToken, generated_node):
transformed.append("(" + generated_node + ")")
else:
transformed.append(generated_node)
return self.andToken.join(transformed)
def default_value_mapping(self, val):
"""
creates default value mapping for
the rules. this method accepts any value
and returns a transformed value
"""
if isinstance(val, int):
return f"== {val}"
default_operator = "=="
if isinstance(val, str) and val[1:-1]:
if "*" in val[1:-1]: # value contains * inside string - use regex match
default_operator = ""
val = re.sub(r'(\\\\\*|\*)', '.*', val)
if "\\" in val:
val = f'@rlike("%", "{val}")'
else:
val = f'rlike("%", "{val}")'
return f'{default_operator} {self.cleanValue(val)}'
elif val.startswith("*") or val.endswith("*"):
default_operator = "like"
if val.startswith("*") and val.endswith("*"):
val = f'%{val[1:-1]}%'
elif val.startswith("*"):
val = f'%{val[1:]}'
elif val.endswith("*"):
val = f'{val[:-1]}%'
if "\\" in val:
return f'{default_operator} "{self.cleanValue(val)}"'
return f'{default_operator} "{self.cleanValue(val)}"'
elif "\\" in val:
return f'{default_operator} @"{self.cleanValue(val)}"'
elif isinstance(val, SigmaRegularExpressionModifier):
default_operator = ""
val = f'rlike("%", "{val}")'
return f'{default_operator} {self.cleanValue(val)}'
return f'{default_operator} "{self.cleanValue(val)}"'
def generateORNode(self, node):
"""
Generates or nodes for query
this method accepts the node and returns transformed node
according to the query language
"""
generated = [self.generateNode(val) for val in node]
transformed = {}
transformed_query = []
for generated_node in generated:
if generated_node is not None:
generated_node = generated_node.split(' == ')
if len(generated_node) == 1:
transformed_query.append(generated_node[0])
else:
if generated_node[0] not in transformed:
transformed[generated_node[0]] = [generated_node[1]]
else:
if generated_node[1] not in transformed[generated_node[0]]:
transformed[generated_node[0]].append(generated_node[1])
if transformed:
_transformed_query = [f'{key} IN ({", ".join(value)})'
for key, value in transformed.items()]
transformed_query.extend(_transformed_query)
return self.orToken.join(transformed_query)
def generateAggregation(self, agg):
"""
Generates aggregations for query
this method accepts the aggregation and
returns a query with aggregation applied to it
according to the query language
"""
if agg is None:
return ""
if agg.aggfunc == SigmaAggregationParser.AGGFUNC_NEAR:
raise NotImplementedError("The 'near' aggregation operator is not yet implemented"+
"for this backend")
if agg.groupfield is None:
if agg.aggfunc_notrans == 'count':
if agg.aggfield is None:
if agg.condition:
if self.timeframe:
return f" | select count(*) as count_col" \
f" | having count_col {agg.cond_op} {agg.condition}" \
f" | duration {self.timeframe}"
return f" | select count(*) as count_col" \
f" | having count_col {agg.cond_op} {agg.condition}"
else:
if self.timeframe:
return f" | groupby {agg.groupfield}" \
f" | select {agg.groupfield}, count(*) as count_col" \
f" | having count_col {agg.cond_op} {agg.condition}" \
f" | duration {self.timeframe}"
return f" | groupby {agg.groupfield}" \
f" | select {agg.groupfield}, count(*) as count_col" \
f" | having count_col {agg.cond_op} {agg.condition}"
if self.timeframe:
return f' | groupby {agg.aggfield or ""}' \
f' | select {agg.aggfield or ""}, distinct_count({agg.aggfield or ""}), count(*) as total_count' \
f' | duration {self.timeframe}'
return " | groupby %s" \
" | select %s, distinct_count(%s), count(*) " \
" as total_count" % (agg.aggfield or "",
agg.aggfield or "",
agg.aggfield or "")
if agg.aggfunc_notrans == 'count':
if agg.aggfield is None:
if agg.condition:
if self.timeframe:
return " | groupby %s" \
" | select %s, count(*) as count_col" \
" | having count_col %s %s" \
" | duration %s" % (agg.groupfield,
agg.groupfield,
agg.cond_op,
agg.condition,
self.timeframe)
return " | groupby %s" \
" | select %s, count(*) as count_col" \
" | having count_col %s %s" % (agg.groupfield,
agg.groupfield,
agg.cond_op,
agg.condition)
if self.timeframe:
return " | groupby %s" \
" | select %s, count(%s)" \
" | duration %s" % (agg.groupfield or "",
agg.groupfield or "",
agg.aggfield or "",
self.timeframe)
return " | groupby %s" \
" | select %s, count(%s)" % (agg.groupfield or "",
agg.groupfield or "",
agg.aggfield or "")
elif agg.aggfunc_notrans == 'sum':
if agg.aggfield is None:
if self.timeframe:
return " | groupby %s" \
" | select %s, sum(*) as count_col" \
" | having count_col %s %s" \
" | duration %s" % (agg.groupfield,
agg.groupfield,
agg.cond_op,
agg.condition,
self.timeframe)
return " | groupby %s" \
" | select %s, sum(*) as count_col" \
" | having count_col %s %s" % (agg.groupfield,
agg.groupfield,
agg.cond_op,
agg.condition)
else:
if self.timeframe:
return " | groupby %s" \
" | select %s, sum(%s)" \
" | duration %s" % (agg.groupfield or "",
agg.groupfield or "",
agg.aggfield or "",
self.timeframe)
return " | groupby %s" \
" | select %s, sum(%s)" % (agg.groupfield or "",
agg.groupfield or "",
agg.aggfield or "")
def generateMapItemNode(self, node):
key, value = node
key = self.fieldNameMapping(key, value)
# handle map items with values list like multiple OR-chained conditions
if type(value) == list:
return self.generateORNode(
[(key, v) for v in value]
)
elif type(value) in (str, int) or isinstance(value, SigmaRegularExpressionModifier): # default value processing'
value_mapping = self.default_value_mapping
mapping = (key, value_mapping)
if len(mapping) == 1:
mapping = mapping[0]
if type(mapping) == str:
return mapping
elif callable(mapping):
return self.generateSubexpressionNode(
self.generateANDNode(
[cond for cond in mapping(key, self.cleanValue(value))]
)
)
elif len(mapping) == 2:
result = list()
# iterate mapping and mapping source value synchronously over key and value
for mapitem, val in zip(mapping, node):
if type(mapitem) == str:
result.append(mapitem)
elif callable(mapitem):
mapitem_value = mapitem(self.cleanValue(val))
if 'rlike' in mapitem_value:
mapitem_value = re.sub(r'\"%\"', result[0], mapitem_value)
result.append(mapitem_value)
for res in result:
if 'rlike' in res:
result[0] = ''
return "{} {}".format(*result)
else:
raise TypeError("Backend does not support map values of type " + str(type(value)))
else:
return super().generateMapItemNode(node)
def generateNOTNode(self, node):
generated = self.generateNode(node.item)
if generated is not None:
return "%s %s" % (self.notToken, generated)
else:
return None
def generateMapItemListNode(self, key, value):
if isinstance(value, SigmaRegularExpressionModifier):
key_mapped = self.fieldNameMapping(key, value)
return {'regexp': {key_mapped: str(value)}}
if not set([type(val) for val in value]).issubset({str, int}):
raise TypeError("List values must be strings or numbers")
if isinstance(value, list):
if 'or' in value:
self.generateORNode(value)
elif 'and' in value:
self.generateANDNode(value)
return ' or '.join(['%s=%s' % (key, self.generateValueNode(item)) for item in value])
def generateTypedValueNode(self, node):
raise NotImplementedError("Node type not implemented for this backend")
def generateNULLValueNode(self, fieldname):
return self.nullExpression % fieldname
def generateNotNULLValueNode(self, node):
raise NotImplementedError("Node type not implemented for this backend")
def generateBefore(self, parsed):
return "stream=%s where " % self.table
def getTable(self, parsed_rule_data):
logsource_data = parsed_rule_data.get('logsource')
if logsource_data.get('category'):
self.table = logsource_data.get('category')
elif logsource_data.get('product'):
self.table = logsource_data.get('product')
elif logsource_data.get('service'):
self.table = logsource_data.get('service')
def generate(self, sigmaparser):
"""Method is called for each sigma rule and receives the parsed rule (SigmaParser)"""
parsed_yaml = sigmaparser.parsedyaml
if parsed_yaml.get('detection').get('timeframe'):
self.timeframe = parsed_yaml['detection']['timeframe']
if sigmaparser.get_logsource() and sigmaparser.get_logsource().index:
self.table = sigmaparser.get_logsource().index[0]
else:
self.getTable(parsed_yaml)
for parsed in sigmaparser.condparsed:
query = self.generateQuery(parsed)
before = self.generateBefore(parsed)
result = ""
if before is not None:
result = before
if query is not None:
result += query
if result.endswith(" | "):
result = result.strip(" | ")
return result