Files
blue-team-tools/tools/sigma/backends/opensearch.py
T
2021-09-09 15:02:59 -07:00

604 lines
21 KiB
Python

# Output backends for sigmac
# Copyright 2016-2018 Thomas Patzke, Florian Roth, Devin Ferguson, Julien Bachmann
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import json
import re
import sys
import os
from typing import List, Tuple, Union
from uuid import uuid4
from sigma.parser.condition import SigmaAggregationParser
from .elasticsearch import ElasticsearchQuerystringBackend
class Atom:
def __init__(self, field: str, prop: str) -> None:
self.field = field
self.prop = prop
def __str__(self) -> str:
return "Atom( {}, {} )".format(self.field.replace("\\\\", "\\"), self.prop.replace("\\\\", "\\"))
# Root of AST is always a Group
class Group:
def __init__(self) -> None:
pass
def __str__(self) -> str:
return "Group( {} )".format(str(self.ary).replace("\\\\", "\\"))
class Boolean:
def __init__(self, expression: Union[Atom, Group]) -> None:
self.expression = expression
def __str__(self) -> str:
return "Boolean( {} )".format(str(self.expression).replace("\\\\", "\\"))
class Ary:
def __init__(self, bool1: Boolean, bool2: List[Tuple[str, Boolean]] = None) -> None:
self.bool1 = bool1
self.bool2 = bool2
def __str__(self) -> str:
return "Ary( {}, {} )".format(str(self.bool1).replace("\\\\", "\\"), [(rel, str(boolean).replace("\\\\", "\\")) for rel, boolean in self.bool2])
def group_init(self, ary: Ary):
self.ary = ary
Group.__init__ = group_init
def parse_atom(s: str) -> Atom:
reg = r"(?<!\\):" # (any character that's not '\') followed by ':'
return Atom(*re.split(reg, s))
'''
Since root of AST is always a Group, call parse_group to initiate parsing of overall expression.
'''
def parse_group(s: str) -> Group:
return Group(parse_ary(s[1:-1]))
'''
Expand special group in form of A:(B OR C) to (A:B OR A:C)
'''
def expand_group(s: str) -> str:
reg = r"(?<!\\):" # (any character that's not '\') followed by ':'
field, props = re.split(reg, s.strip("()")) # props = (prop1 OR prop2...)
props = props.strip("()").split() # Further split props
newGroup = []
for index in range(len(props)):
element = props[index]
if element not in ["AND", "OR"]:
newGroup.append(f'{field}:{element}')
else:
newGroup.append(element)
return "(" + " ".join(newGroup) + ")"
def parse_boolean(s: str) -> Boolean:
if "(" not in s:
expression = parse_atom(s)
else:
if s[0] != '(':
s = expand_group(s)
expression = parse_group(s)
return Boolean(expression)
def parse_ary(s: str) -> Ary:
lst = []
left = right = level = 0
while left < len(s):
# Going down one level
if right < len(s) and s[right] == '(':
level += 1
# Going up one level
elif right < len(s) and s[right] == ')':
level -= 1
# s[left:right] is parse-able
elif right == len(s) or (s[right] == ' ' and level == 0):
section = s[left:right]
# Handle Boolean case
if section not in ["AND", "OR"]:
section = parse_boolean(section)
lst.append(section)
left = right + 1
right += 1
# [Bool, Rel, Bool, Rel, Bool,...] => Bool, [(Rel, Bool), (Rel, Bool),...]
bool1 = lst[0]
bool2 = []
for i in range(1, len(lst), 2):
tupe = (lst[i], lst[i + 1])
bool2.append(tupe)
return Ary(bool1, bool2)
def translate_atom(atom: Atom) -> dict:
return {
"match": {
atom.field: atom.prop
}
}
def translate_group(group: Group) -> dict:
return translate_ary(group.ary)
def translate_boolean(boolean: Boolean) -> dict:
if type(boolean.expression) is Atom:
return translate_atom(boolean.expression)
return translate_group(boolean.expression)
'''
Combining ary.bool1 and ary.bool2 into array of Boolean grouped by ANDs and split by ORs.
'''
def convert_bool_array(bool1: Boolean, boolArr: List[Tuple[str, Boolean]]) -> List[List[Boolean]]:
result = [[bool1]]
resultIndex = 0
for rel, boolean in boolArr:
if rel == "AND":
if resultIndex == len(result):
result.append([boolean])
else:
result[resultIndex].append(boolean)
else:
resultIndex += 2
result.append([boolean])
return result
'''
Group atomic match statements together into parent clause and wrap inside bool statement.
Maintain group match statements, which are already wrapped in bool statement.
'''
def adjust_matches(matches: List[dict], clause) -> List[dict]:
atomicMatches = []
combinedAtomicMatches = []
groupMatches = []
# Determine if current statement is an atomic match or bool group statement
for index in range(len(matches)):
match = matches[index]
if "match" in match.keys():
atomicMatches.append(match)
else:
groupMatches.append(match)
# If any atomic matches, combine under parent clause wrapped in a single bool statement
if atomicMatches:
# If there's only one atomic match, it should be wrapped in a bool-must regardless of the parent clause
clause = "must" if len(atomicMatches) == 1 else clause
combinedAtomicMatches = [{
"bool": {
clause: atomicMatches
}
}]
return combinedAtomicMatches + groupMatches
def contains_group(booleanArr: List[Boolean]) -> bool:
for boolean in booleanArr:
if type(boolean.expression) is Group:
return True
return False
def translate_ary(ary: Ary) -> dict:
parsedTranslation = convert_bool_array(ary.bool1, ary.bool2)
clauses = []
translateIndex = 0
while translateIndex < len(parsedTranslation):
parsedExpression = parsedTranslation[translateIndex]
currMatches = []
clause = "must" # default clause is "must"; clause is "should" if multiple consecutive "or" statements
# Statement was joined by "or"
if len(parsedExpression) == 1:
counter = 1
tempIndex = translateIndex
while tempIndex+1 < len(parsedTranslation) and len(parsedTranslation[tempIndex+1]) == 1:
tempIndex += 1
counter += 1
# If there's more than one, use "should" clause instead of "must"
if counter > 1:
clause = "should"
parsedExpression = []
# Rebuild parsed expression to join statements together and fast forward the translate index
for i in range(counter):
parsedExpression += parsedTranslation[translateIndex+i]
translateIndex = tempIndex
# Iterate through each statement and join match statements into array
for boolean in parsedExpression:
currMatches.append(translate_boolean(boolean))
# If bool array contains a Group which is wrapped in a bool, match statements must also be wrapped in a bool.
if contains_group(parsedExpression):
currMatches = adjust_matches(currMatches, clause)
currQuery = {
"bool": {
clause: currMatches
}
}
clauses.append(currQuery)
translateIndex += 1
# If only one type of clause, don't use nested bool object
if len(clauses) > 1:
return {
"bool": {
"should": clauses
}
}
return clauses[0]
class OpenSearchBackend(object):
"""OpenSearch detection rule backend."""
active = True
uuid_black_list = []
options = ElasticsearchQuerystringBackend.options + (
("put_filename_in_ref", False, "Want to have yml name in reference ?", None),
("convert_to_url", False, "Want to convert to a URL ?", None),
("path_to_replace", "../", "The local path to replace with dest_base_url", None),
("dest_base_url", "https://github.com/SigmaHQ/sigma/tree/master/", "The URL prefix", None),
("custom_tag", None , "Add custom tag. for multi split with a comma tag1,tag2 ", None),
)
isThreshold = False
# Default values for fields exclusive to OpenSearch monitors
RULE_TYPE = "monitor"
IS_ENABLED = True
INTERVAL = 5
UNIT = "MINUTES"
TRIGGER_NAME = "generated-trigger"
SEVERITIES = {"informational": "5", "low": "4", "medium": "3", "high": "2", "critical": "1"}
TRIGGER_SCRIPT = "ctx.results[0].hits.total.value > 0"
TRIGGER_LANGUAGE = "painless"
MONITOR_INDICES = ["opensearch-security-logs"]
NUM_RESULTS = 1
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.tactics = self._load_mitre_file("tactics")
self.techniques = self._load_mitre_file("techniques")
self.rule_threshold = {}
'''
Loads appropriate mitre file and returns mappings as dict.
'''
def _load_mitre_file(self, mitre_type):
try:
backend_dir = os.path.normpath(os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "config", "mitre"))
path = os.path.join(backend_dir,"{}.json".format(mitre_type))
with open(path, 'r') as config_file:
config = json.load(config_file)
return config
except (IOError, OSError) as e:
print("Failed to open {} configuration file '%s': %s".format(path, str(e)), file=sys.stderr)
return []
except json.JSONDecodeError as e:
print("Failed to parse {} configuration file '%s' as valid YAML: %s" % (path, str(e)), file=sys.stderr)
return []
'''
Calls parent generate methods to retrieve Sigma rule condition as Elastic Common Schema query.
Then calls the create_rule method to return final translated object.
'''
def generate(self, sigmaparser):
# reset per-detection variables
self.rule_threshold = {}
translation = super().generate(sigmaparser)
if translation:
index = sigmaparser.get_logsource().index
if len(index) == 0:
index = ["apm-*-transaction", "auditbeat-*", "endgame-*", "filebeat-*", "packetbeat-*", "winlogbeat-*"]
configs = sigmaparser.parsedyaml
configs.update({"translation": translation})
rule = self.create_rule(configs, index)
return rule
'''
Generates threat detection for OpenSearch monitor, which compiles tactics and techniques found in Sigma tags.
'''
def create_threat_description(self, tactics_list, techniques_list):
threat_list = list()
for tactic in tactics_list:
temp_tactics = {
"tactic": {
"id": tactic.get("external_id", ""),
"reference": tactic.get("url", ""),
"name": tactic.get("tactic", "")
},
"framework": "MITRE ATT&CK®"
}
temp_techniques = list()
for tech in techniques_list:
if tactic.get("tactic", "") in tech.get("tactic", []):
temp_techniques.append({
"id": tech.get("technique_id", ""),
"name": tech.get("technique", ""),
"reference": tech.get("url", "")
})
temp_tactics.update({"technique": temp_techniques})
threat_list.append(temp_tactics)
return threat_list
'''
Finds tactics mentioned in Sigma tags.
'''
def find_tactics(self, key_name=None, key_id=None):
for tactic in self.tactics:
if key_name and key_name == tactic.get("tactic", ""):
return tactic
if key_id and key_id == tactic.get("external_id", ""):
return tactic
'''
Finds techniques mentioned in Sigma tags.
'''
def find_technique(self, key_id=None):
for technique in self.techniques:
if key_id and key_id == technique.get("technique_id", ""):
return technique
'''
Maps Sigma severity to OpenSearch numerical severity from 1-5.
'''
def map_severity(self, severity):
severity = severity.lower()
return self.SEVERITIES[severity] if severity in self.SEVERITIES else self.SEVERITIES["medium"]
def create_trigger(self, severity):
return [
{
"name": self.TRIGGER_NAME,
"severity": self.map_severity(severity),
"condition": {
"script": {
"source": f'{self.TRIGGER_SCRIPT}',
"lang": self.TRIGGER_LANGUAGE
}
},
"actions": []
}
]
def build_threshold(self, field, inequality, threshold):
INEQUALITIES = {"<": "lt", "<=": "lte", ">": "gt", ">=": "gte"}
return {
"range": {
field: {
INEQUALITIES[inequality]: threshold
}
}
}
'''
Builds OpenSearch monitor query from translated Elastic Rule query. Forms an abstract syntax tree (AST)
using the following repeated structures:
- Atom = A:B
- Rel = AND | OR
- Ary = Bool [Rel Bool]*
- Group = (Ary)
- SGroup = A:(B OR C)
- Bool = Atom | Group | SGroup
Then translates AST into OpenSearch boolean queries.
'''
def build_query(self, translation):
ast = parse_group(translation)
translatedQuery = translate_group(ast)
if self.isThreshold:
translatedQuery["bool"]["filter"] = self.rule_threshold
return translatedQuery
'''
Builds inputs field of OS monitor.
'''
def build_inputs(self, translation):
return [
{
"search": {
"indices": self.MONITOR_INDICES,
"query": {
"size": self.NUM_RESULTS,
"aggregations": {},
"query": self.build_query(translation)
}
}
}
]
'''
Adds Sigma yml file name in references if self.put_filename_in_ref option is True.
'''
def build_ymlfile_ref(self, configs):
if self.put_filename_in_ref == False: # Dont want
return None
yml_filename = configs.get("yml_filename")
yml_path = configs.get("yml_path")
if yml_filename == None or yml_path == None:
return None
if self.convert_to_url:
yml_path = yml_path.replace('\\','/') #windows path to url
self.path_to_replace = self.path_to_replace.replace('\\','/') #windows path to url
if self.path_to_replace not in yml_path: #Error to change
return None
new_ref = yml_path.replace(self.path_to_replace,self.dest_base_url) + '/' + yml_filename
else:
new_ref = yml_filename
return new_ref
'''
Builds the list of searchable tags. Matches against list of known tags and adds any custom tags.
'''
def build_tags_list(self, tags):
tactics_list = list()
new_tags = list()
technics_list = list()
for tag in tags:
tag = tag.replace("attack.", "")
if re.match("[t][0-9]{4}", tag, re.IGNORECASE):
tech = self.find_technique(tag.title())
if tech:
new_tags.append(tag.title())
technics_list.append(tech)
else:
if "_" in tag:
tag_list = tag.split("_")
tag_list = [item.title() for item in tag_list]
tact = self.find_tactics(key_name=" ".join(tag_list))
if tact:
new_tags.append(" ".join(tag_list))
tactics_list.append(tact)
elif re.match("[ta][0-9]{4}", tag, re.IGNORECASE):
tact = self.find_tactics(key_id=tag.upper())
if tact:
new_tags.append(tag.upper())
tactics_list.append(tact)
else:
tact = self.find_tactics(key_name=tag.title())
if tact:
new_tags.append(tag.title())
tactics_list.append(tact)
if self.custom_tag:
if ',' in self.custom_tag:
tag_split = self.custom_tag.split(",")
for l_tag in tag_split:
new_tags.append(l_tag)
else:
new_tags.append(self.custom_tag)
return tactics_list, technics_list, new_tags
'''
Get the rule id of the Sigma rule. If the rule id is blank or isn't unique, generate a random one.
'''
def get_rule_id(self, rule_uuid):
rule_uuid = rule_uuid.lower()
if rule_uuid == "" or rule_uuid in self.uuid_black_list:
rule_uuid = str(uuid4())
self.uuid_black_list.append(rule_uuid)
rule_id = re.sub(re.compile('[()*+!,\[\].\s"]'), "_", rule_uuid)
return rule_id
'''
Gets list of references.
'''
def get_references(self, configs):
references = configs.get("reference") if configs.get("reference") is not None else configs.get("references")
references = self.build_ref_yaml(references, configs)
return references
'''
Adds Sigma yml file to references.
'''
def build_ref_yaml(self, references, configs):
add_ref_yml = self.build_ymlfile_ref(configs)
if add_ref_yml:
if references is None: # No ref
references=[]
if add_ref_yml in references:
pass # else put a duplicate ref for multi rule file
else:
references.append(add_ref_yml)
return references
'''
Main method that builds OpenSearch monitor and returns it in JSON format.
'''
def create_rule(self, configs, index):
rule_name = configs.get("title", "")
rule_description = configs.get("description", "")
inputs = self.build_inputs(configs.get("translation", ""))
triggers = self.create_trigger(configs.get("level", "medium"))
rule_id = self.get_rule_id(configs.get("id", ""))
tactics_list, technics_list, new_tags = self.build_tags_list(configs.get("tags", []))
threat = self.create_threat_description(tactics_list, technics_list)
references = self.get_references(configs)
rule = {
"type": self.RULE_TYPE,
"name": rule_name,
"description": rule_description,
"enabled": self.IS_ENABLED,
"schedule": {
"period": {
"interval": self.INTERVAL,
"unit": self.UNIT
}
},
"inputs": inputs,
"tags": new_tags,
"triggers": triggers,
"sigma_meta_data": {
"rule_id": rule_id,
"threat": threat
}
}
if references:
rule.update({"references": references})
return json.dumps(rule)
class OpenSearchQsBackend(OpenSearchBackend, ElasticsearchQuerystringBackend):
'''
Backend class containing the identifier for the -t argument. Can inherit from ElasticsearchQuerystringBackend
since query string in both OpenSearch monitors and ElasticRule are in Elastic Common Schema.
'''
identifier = "opensearch-monitor"
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def generateAggregation(self, agg):
if agg.aggfunc == SigmaAggregationParser.AGGFUNC_COUNT:
if agg.aggfield:
raise NotImplementedError("Threshold rules cannot COUNT(DISTINCT %s)" % agg.aggfield)
self.isThreshold = True
self.rule_threshold = self.build_threshold(agg.groupfield, agg.cond_op, agg.condition)
return ""
raise NotImplementedError("Aggregation %s is not implemented for this backend" % agg.aggfunc_notrans)