604 lines
21 KiB
Python
604 lines
21 KiB
Python
# Output backends for sigmac
|
|
# Copyright 2016-2018 Thomas Patzke, Florian Roth, Devin Ferguson, Julien Bachmann
|
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU Lesser General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU Lesser General Public License for more details.
|
|
|
|
# You should have received a copy of the GNU Lesser General Public License
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
import json
|
|
import re
|
|
import sys
|
|
import os
|
|
from typing import List, Tuple, Union
|
|
from uuid import uuid4
|
|
|
|
from sigma.parser.condition import SigmaAggregationParser
|
|
|
|
from .elasticsearch import ElasticsearchQuerystringBackend
|
|
|
|
class Atom:
|
|
def __init__(self, field: str, prop: str) -> None:
|
|
self.field = field
|
|
self.prop = prop
|
|
def __str__(self) -> str:
|
|
return "Atom( {}, {} )".format(self.field.replace("\\\\", "\\"), self.prop.replace("\\\\", "\\"))
|
|
|
|
# Root of AST is always a Group
|
|
class Group:
|
|
def __init__(self) -> None:
|
|
pass
|
|
def __str__(self) -> str:
|
|
return "Group( {} )".format(str(self.ary).replace("\\\\", "\\"))
|
|
|
|
class Boolean:
|
|
def __init__(self, expression: Union[Atom, Group]) -> None:
|
|
self.expression = expression
|
|
def __str__(self) -> str:
|
|
return "Boolean( {} )".format(str(self.expression).replace("\\\\", "\\"))
|
|
|
|
class Ary:
|
|
def __init__(self, bool1: Boolean, bool2: List[Tuple[str, Boolean]] = None) -> None:
|
|
self.bool1 = bool1
|
|
self.bool2 = bool2
|
|
def __str__(self) -> str:
|
|
return "Ary( {}, {} )".format(str(self.bool1).replace("\\\\", "\\"), [(rel, str(boolean).replace("\\\\", "\\")) for rel, boolean in self.bool2])
|
|
|
|
def group_init(self, ary: Ary):
|
|
self.ary = ary
|
|
|
|
Group.__init__ = group_init
|
|
|
|
def parse_atom(s: str) -> Atom:
|
|
reg = r"(?<!\\):" # (any character that's not '\') followed by ':'
|
|
return Atom(*re.split(reg, s))
|
|
|
|
'''
|
|
Since root of AST is always a Group, call parse_group to initiate parsing of overall expression.
|
|
'''
|
|
def parse_group(s: str) -> Group:
|
|
return Group(parse_ary(s[1:-1]))
|
|
|
|
'''
|
|
Expand special group in form of A:(B OR C) to (A:B OR A:C)
|
|
'''
|
|
def expand_group(s: str) -> str:
|
|
reg = r"(?<!\\):" # (any character that's not '\') followed by ':'
|
|
|
|
field, props = re.split(reg, s.strip("()")) # props = (prop1 OR prop2...)
|
|
props = props.strip("()").split() # Further split props
|
|
newGroup = []
|
|
|
|
for index in range(len(props)):
|
|
element = props[index]
|
|
if element not in ["AND", "OR"]:
|
|
newGroup.append(f'{field}:{element}')
|
|
else:
|
|
newGroup.append(element)
|
|
|
|
return "(" + " ".join(newGroup) + ")"
|
|
|
|
def parse_boolean(s: str) -> Boolean:
|
|
if "(" not in s:
|
|
expression = parse_atom(s)
|
|
else:
|
|
if s[0] != '(':
|
|
s = expand_group(s)
|
|
expression = parse_group(s)
|
|
|
|
return Boolean(expression)
|
|
|
|
def parse_ary(s: str) -> Ary:
|
|
lst = []
|
|
left = right = level = 0
|
|
|
|
while left < len(s):
|
|
# Going down one level
|
|
if right < len(s) and s[right] == '(':
|
|
level += 1
|
|
|
|
# Going up one level
|
|
elif right < len(s) and s[right] == ')':
|
|
level -= 1
|
|
|
|
# s[left:right] is parse-able
|
|
elif right == len(s) or (s[right] == ' ' and level == 0):
|
|
section = s[left:right]
|
|
|
|
# Handle Boolean case
|
|
if section not in ["AND", "OR"]:
|
|
section = parse_boolean(section)
|
|
|
|
lst.append(section)
|
|
left = right + 1
|
|
|
|
right += 1
|
|
|
|
# [Bool, Rel, Bool, Rel, Bool,...] => Bool, [(Rel, Bool), (Rel, Bool),...]
|
|
bool1 = lst[0]
|
|
bool2 = []
|
|
|
|
for i in range(1, len(lst), 2):
|
|
tupe = (lst[i], lst[i + 1])
|
|
bool2.append(tupe)
|
|
|
|
return Ary(bool1, bool2)
|
|
|
|
def translate_atom(atom: Atom) -> dict:
|
|
return {
|
|
"match": {
|
|
atom.field: atom.prop
|
|
}
|
|
}
|
|
|
|
def translate_group(group: Group) -> dict:
|
|
return translate_ary(group.ary)
|
|
|
|
def translate_boolean(boolean: Boolean) -> dict:
|
|
if type(boolean.expression) is Atom:
|
|
return translate_atom(boolean.expression)
|
|
|
|
return translate_group(boolean.expression)
|
|
|
|
'''
|
|
Combining ary.bool1 and ary.bool2 into array of Boolean grouped by ANDs and split by ORs.
|
|
'''
|
|
def convert_bool_array(bool1: Boolean, boolArr: List[Tuple[str, Boolean]]) -> List[List[Boolean]]:
|
|
result = [[bool1]]
|
|
resultIndex = 0
|
|
|
|
for rel, boolean in boolArr:
|
|
if rel == "AND":
|
|
if resultIndex == len(result):
|
|
result.append([boolean])
|
|
else:
|
|
result[resultIndex].append(boolean)
|
|
else:
|
|
resultIndex += 2
|
|
result.append([boolean])
|
|
|
|
return result
|
|
|
|
'''
|
|
Group atomic match statements together into parent clause and wrap inside bool statement.
|
|
Maintain group match statements, which are already wrapped in bool statement.
|
|
'''
|
|
def adjust_matches(matches: List[dict], clause) -> List[dict]:
|
|
atomicMatches = []
|
|
combinedAtomicMatches = []
|
|
groupMatches = []
|
|
|
|
# Determine if current statement is an atomic match or bool group statement
|
|
for index in range(len(matches)):
|
|
match = matches[index]
|
|
if "match" in match.keys():
|
|
atomicMatches.append(match)
|
|
else:
|
|
groupMatches.append(match)
|
|
|
|
# If any atomic matches, combine under parent clause wrapped in a single bool statement
|
|
if atomicMatches:
|
|
# If there's only one atomic match, it should be wrapped in a bool-must regardless of the parent clause
|
|
clause = "must" if len(atomicMatches) == 1 else clause
|
|
|
|
combinedAtomicMatches = [{
|
|
"bool": {
|
|
clause: atomicMatches
|
|
}
|
|
}]
|
|
|
|
return combinedAtomicMatches + groupMatches
|
|
|
|
def contains_group(booleanArr: List[Boolean]) -> bool:
|
|
for boolean in booleanArr:
|
|
if type(boolean.expression) is Group:
|
|
return True
|
|
|
|
return False
|
|
|
|
def translate_ary(ary: Ary) -> dict:
|
|
parsedTranslation = convert_bool_array(ary.bool1, ary.bool2)
|
|
|
|
clauses = []
|
|
|
|
translateIndex = 0
|
|
while translateIndex < len(parsedTranslation):
|
|
parsedExpression = parsedTranslation[translateIndex]
|
|
currMatches = []
|
|
clause = "must" # default clause is "must"; clause is "should" if multiple consecutive "or" statements
|
|
|
|
# Statement was joined by "or"
|
|
if len(parsedExpression) == 1:
|
|
counter = 1
|
|
tempIndex = translateIndex
|
|
while tempIndex+1 < len(parsedTranslation) and len(parsedTranslation[tempIndex+1]) == 1:
|
|
tempIndex += 1
|
|
counter += 1
|
|
|
|
# If there's more than one, use "should" clause instead of "must"
|
|
if counter > 1:
|
|
clause = "should"
|
|
parsedExpression = []
|
|
|
|
# Rebuild parsed expression to join statements together and fast forward the translate index
|
|
for i in range(counter):
|
|
parsedExpression += parsedTranslation[translateIndex+i]
|
|
|
|
translateIndex = tempIndex
|
|
|
|
# Iterate through each statement and join match statements into array
|
|
for boolean in parsedExpression:
|
|
currMatches.append(translate_boolean(boolean))
|
|
|
|
# If bool array contains a Group which is wrapped in a bool, match statements must also be wrapped in a bool.
|
|
if contains_group(parsedExpression):
|
|
currMatches = adjust_matches(currMatches, clause)
|
|
|
|
currQuery = {
|
|
"bool": {
|
|
clause: currMatches
|
|
}
|
|
}
|
|
|
|
clauses.append(currQuery)
|
|
translateIndex += 1
|
|
|
|
# If only one type of clause, don't use nested bool object
|
|
if len(clauses) > 1:
|
|
return {
|
|
"bool": {
|
|
"should": clauses
|
|
}
|
|
}
|
|
return clauses[0]
|
|
|
|
class OpenSearchBackend(object):
|
|
"""OpenSearch detection rule backend."""
|
|
active = True
|
|
uuid_black_list = []
|
|
options = ElasticsearchQuerystringBackend.options + (
|
|
("put_filename_in_ref", False, "Want to have yml name in reference ?", None),
|
|
("convert_to_url", False, "Want to convert to a URL ?", None),
|
|
("path_to_replace", "../", "The local path to replace with dest_base_url", None),
|
|
("dest_base_url", "https://github.com/SigmaHQ/sigma/tree/master/", "The URL prefix", None),
|
|
("custom_tag", None , "Add custom tag. for multi split with a comma tag1,tag2 ", None),
|
|
)
|
|
isThreshold = False
|
|
|
|
# Default values for fields exclusive to OpenSearch monitors
|
|
RULE_TYPE = "monitor"
|
|
IS_ENABLED = True
|
|
INTERVAL = 5
|
|
UNIT = "MINUTES"
|
|
TRIGGER_NAME = "generated-trigger"
|
|
SEVERITIES = {"informational": "5", "low": "4", "medium": "3", "high": "2", "critical": "1"}
|
|
TRIGGER_SCRIPT = "ctx.results[0].hits.total.value > 0"
|
|
TRIGGER_LANGUAGE = "painless"
|
|
MONITOR_INDICES = ["opensearch-security-logs"]
|
|
NUM_RESULTS = 1
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
super().__init__(*args, **kwargs)
|
|
self.tactics = self._load_mitre_file("tactics")
|
|
self.techniques = self._load_mitre_file("techniques")
|
|
self.rule_threshold = {}
|
|
|
|
'''
|
|
Loads appropriate mitre file and returns mappings as dict.
|
|
'''
|
|
def _load_mitre_file(self, mitre_type):
|
|
try:
|
|
backend_dir = os.path.normpath(os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "config", "mitre"))
|
|
path = os.path.join(backend_dir,"{}.json".format(mitre_type))
|
|
with open(path, 'r') as config_file:
|
|
config = json.load(config_file)
|
|
return config
|
|
except (IOError, OSError) as e:
|
|
print("Failed to open {} configuration file '%s': %s".format(path, str(e)), file=sys.stderr)
|
|
return []
|
|
except json.JSONDecodeError as e:
|
|
print("Failed to parse {} configuration file '%s' as valid YAML: %s" % (path, str(e)), file=sys.stderr)
|
|
return []
|
|
|
|
'''
|
|
Calls parent generate methods to retrieve Sigma rule condition as Elastic Common Schema query.
|
|
Then calls the create_rule method to return final translated object.
|
|
'''
|
|
def generate(self, sigmaparser):
|
|
# reset per-detection variables
|
|
self.rule_threshold = {}
|
|
translation = super().generate(sigmaparser)
|
|
if translation:
|
|
index = sigmaparser.get_logsource().index
|
|
if len(index) == 0:
|
|
index = ["apm-*-transaction", "auditbeat-*", "endgame-*", "filebeat-*", "packetbeat-*", "winlogbeat-*"]
|
|
configs = sigmaparser.parsedyaml
|
|
configs.update({"translation": translation})
|
|
rule = self.create_rule(configs, index)
|
|
return rule
|
|
|
|
'''
|
|
Generates threat detection for OpenSearch monitor, which compiles tactics and techniques found in Sigma tags.
|
|
'''
|
|
def create_threat_description(self, tactics_list, techniques_list):
|
|
threat_list = list()
|
|
for tactic in tactics_list:
|
|
temp_tactics = {
|
|
"tactic": {
|
|
"id": tactic.get("external_id", ""),
|
|
"reference": tactic.get("url", ""),
|
|
"name": tactic.get("tactic", "")
|
|
},
|
|
"framework": "MITRE ATT&CK®"
|
|
}
|
|
temp_techniques = list()
|
|
for tech in techniques_list:
|
|
if tactic.get("tactic", "") in tech.get("tactic", []):
|
|
temp_techniques.append({
|
|
"id": tech.get("technique_id", ""),
|
|
"name": tech.get("technique", ""),
|
|
"reference": tech.get("url", "")
|
|
})
|
|
temp_tactics.update({"technique": temp_techniques})
|
|
threat_list.append(temp_tactics)
|
|
return threat_list
|
|
|
|
'''
|
|
Finds tactics mentioned in Sigma tags.
|
|
'''
|
|
def find_tactics(self, key_name=None, key_id=None):
|
|
for tactic in self.tactics:
|
|
if key_name and key_name == tactic.get("tactic", ""):
|
|
return tactic
|
|
if key_id and key_id == tactic.get("external_id", ""):
|
|
return tactic
|
|
|
|
'''
|
|
Finds techniques mentioned in Sigma tags.
|
|
'''
|
|
def find_technique(self, key_id=None):
|
|
for technique in self.techniques:
|
|
if key_id and key_id == technique.get("technique_id", ""):
|
|
return technique
|
|
|
|
'''
|
|
Maps Sigma severity to OpenSearch numerical severity from 1-5.
|
|
'''
|
|
def map_severity(self, severity):
|
|
severity = severity.lower()
|
|
return self.SEVERITIES[severity] if severity in self.SEVERITIES else self.SEVERITIES["medium"]
|
|
|
|
def create_trigger(self, severity):
|
|
return [
|
|
{
|
|
"name": self.TRIGGER_NAME,
|
|
"severity": self.map_severity(severity),
|
|
"condition": {
|
|
"script": {
|
|
"source": f'{self.TRIGGER_SCRIPT}',
|
|
"lang": self.TRIGGER_LANGUAGE
|
|
}
|
|
},
|
|
"actions": []
|
|
}
|
|
]
|
|
|
|
def build_threshold(self, field, inequality, threshold):
|
|
INEQUALITIES = {"<": "lt", "<=": "lte", ">": "gt", ">=": "gte"}
|
|
|
|
return {
|
|
"range": {
|
|
field: {
|
|
INEQUALITIES[inequality]: threshold
|
|
}
|
|
}
|
|
}
|
|
|
|
'''
|
|
Builds OpenSearch monitor query from translated Elastic Rule query. Forms an abstract syntax tree (AST)
|
|
using the following repeated structures:
|
|
- Atom = A:B
|
|
- Rel = AND | OR
|
|
- Ary = Bool [Rel Bool]*
|
|
- Group = (Ary)
|
|
- SGroup = A:(B OR C)
|
|
- Bool = Atom | Group | SGroup
|
|
|
|
Then translates AST into OpenSearch boolean queries.
|
|
'''
|
|
def build_query(self, translation):
|
|
ast = parse_group(translation)
|
|
translatedQuery = translate_group(ast)
|
|
|
|
if self.isThreshold:
|
|
translatedQuery["bool"]["filter"] = self.rule_threshold
|
|
|
|
return translatedQuery
|
|
|
|
'''
|
|
Builds inputs field of OS monitor.
|
|
'''
|
|
def build_inputs(self, translation):
|
|
return [
|
|
{
|
|
"search": {
|
|
"indices": self.MONITOR_INDICES,
|
|
"query": {
|
|
"size": self.NUM_RESULTS,
|
|
"aggregations": {},
|
|
"query": self.build_query(translation)
|
|
}
|
|
}
|
|
}
|
|
]
|
|
|
|
'''
|
|
Adds Sigma yml file name in references if self.put_filename_in_ref option is True.
|
|
'''
|
|
def build_ymlfile_ref(self, configs):
|
|
if self.put_filename_in_ref == False: # Dont want
|
|
return None
|
|
yml_filename = configs.get("yml_filename")
|
|
yml_path = configs.get("yml_path")
|
|
if yml_filename == None or yml_path == None:
|
|
return None
|
|
|
|
if self.convert_to_url:
|
|
yml_path = yml_path.replace('\\','/') #windows path to url
|
|
self.path_to_replace = self.path_to_replace.replace('\\','/') #windows path to url
|
|
if self.path_to_replace not in yml_path: #Error to change
|
|
return None
|
|
|
|
new_ref = yml_path.replace(self.path_to_replace,self.dest_base_url) + '/' + yml_filename
|
|
else:
|
|
new_ref = yml_filename
|
|
return new_ref
|
|
|
|
'''
|
|
Builds the list of searchable tags. Matches against list of known tags and adds any custom tags.
|
|
'''
|
|
def build_tags_list(self, tags):
|
|
tactics_list = list()
|
|
new_tags = list()
|
|
technics_list = list()
|
|
|
|
for tag in tags:
|
|
tag = tag.replace("attack.", "")
|
|
if re.match("[t][0-9]{4}", tag, re.IGNORECASE):
|
|
tech = self.find_technique(tag.title())
|
|
if tech:
|
|
new_tags.append(tag.title())
|
|
technics_list.append(tech)
|
|
else:
|
|
if "_" in tag:
|
|
tag_list = tag.split("_")
|
|
tag_list = [item.title() for item in tag_list]
|
|
tact = self.find_tactics(key_name=" ".join(tag_list))
|
|
if tact:
|
|
new_tags.append(" ".join(tag_list))
|
|
tactics_list.append(tact)
|
|
elif re.match("[ta][0-9]{4}", tag, re.IGNORECASE):
|
|
tact = self.find_tactics(key_id=tag.upper())
|
|
if tact:
|
|
new_tags.append(tag.upper())
|
|
tactics_list.append(tact)
|
|
else:
|
|
tact = self.find_tactics(key_name=tag.title())
|
|
if tact:
|
|
new_tags.append(tag.title())
|
|
tactics_list.append(tact)
|
|
|
|
if self.custom_tag:
|
|
if ',' in self.custom_tag:
|
|
tag_split = self.custom_tag.split(",")
|
|
for l_tag in tag_split:
|
|
new_tags.append(l_tag)
|
|
else:
|
|
new_tags.append(self.custom_tag)
|
|
|
|
return tactics_list, technics_list, new_tags
|
|
|
|
'''
|
|
Get the rule id of the Sigma rule. If the rule id is blank or isn't unique, generate a random one.
|
|
'''
|
|
def get_rule_id(self, rule_uuid):
|
|
rule_uuid = rule_uuid.lower()
|
|
if rule_uuid == "" or rule_uuid in self.uuid_black_list:
|
|
rule_uuid = str(uuid4())
|
|
self.uuid_black_list.append(rule_uuid)
|
|
rule_id = re.sub(re.compile('[()*+!,\[\].\s"]'), "_", rule_uuid)
|
|
|
|
return rule_id
|
|
|
|
'''
|
|
Gets list of references.
|
|
'''
|
|
def get_references(self, configs):
|
|
references = configs.get("reference") if configs.get("reference") is not None else configs.get("references")
|
|
references = self.build_ref_yaml(references, configs)
|
|
return references
|
|
|
|
'''
|
|
Adds Sigma yml file to references.
|
|
'''
|
|
def build_ref_yaml(self, references, configs):
|
|
add_ref_yml = self.build_ymlfile_ref(configs)
|
|
if add_ref_yml:
|
|
if references is None: # No ref
|
|
references=[]
|
|
if add_ref_yml in references:
|
|
pass # else put a duplicate ref for multi rule file
|
|
else:
|
|
references.append(add_ref_yml)
|
|
|
|
return references
|
|
|
|
'''
|
|
Main method that builds OpenSearch monitor and returns it in JSON format.
|
|
'''
|
|
def create_rule(self, configs, index):
|
|
rule_name = configs.get("title", "")
|
|
|
|
rule_description = configs.get("description", "")
|
|
|
|
inputs = self.build_inputs(configs.get("translation", ""))
|
|
|
|
triggers = self.create_trigger(configs.get("level", "medium"))
|
|
|
|
rule_id = self.get_rule_id(configs.get("id", ""))
|
|
|
|
tactics_list, technics_list, new_tags = self.build_tags_list(configs.get("tags", []))
|
|
threat = self.create_threat_description(tactics_list, technics_list)
|
|
|
|
references = self.get_references(configs)
|
|
|
|
rule = {
|
|
"type": self.RULE_TYPE,
|
|
"name": rule_name,
|
|
"description": rule_description,
|
|
"enabled": self.IS_ENABLED,
|
|
"schedule": {
|
|
"period": {
|
|
"interval": self.INTERVAL,
|
|
"unit": self.UNIT
|
|
}
|
|
},
|
|
"inputs": inputs,
|
|
"tags": new_tags,
|
|
"triggers": triggers,
|
|
"sigma_meta_data": {
|
|
"rule_id": rule_id,
|
|
"threat": threat
|
|
}
|
|
}
|
|
|
|
if references:
|
|
rule.update({"references": references})
|
|
|
|
return json.dumps(rule)
|
|
|
|
class OpenSearchQsBackend(OpenSearchBackend, ElasticsearchQuerystringBackend):
|
|
'''
|
|
Backend class containing the identifier for the -t argument. Can inherit from ElasticsearchQuerystringBackend
|
|
since query string in both OpenSearch monitors and ElasticRule are in Elastic Common Schema.
|
|
'''
|
|
identifier = "opensearch-monitor"
|
|
def __init__(self, *args, **kwargs):
|
|
super().__init__(*args, **kwargs)
|
|
|
|
def generateAggregation(self, agg):
|
|
if agg.aggfunc == SigmaAggregationParser.AGGFUNC_COUNT:
|
|
if agg.aggfield:
|
|
raise NotImplementedError("Threshold rules cannot COUNT(DISTINCT %s)" % agg.aggfield)
|
|
self.isThreshold = True
|
|
self.rule_threshold = self.build_threshold(agg.groupfield, agg.cond_op, agg.condition)
|
|
return ""
|
|
raise NotImplementedError("Aggregation %s is not implemented for this backend" % agg.aggfunc_notrans) |