# Output backends for sigmac # Copyright 2016-2018 Thomas Patzke, Florian Roth, Devin Ferguson, Julien Bachmann # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . import json import re import sys import os from typing import List, Tuple, Union from uuid import uuid4 from sigma.parser.condition import SigmaAggregationParser from .elasticsearch import ElasticsearchQuerystringBackend class Atom: def __init__(self, field: str, prop: str) -> None: self.field = field self.prop = prop def __str__(self) -> str: return "Atom( {}, {} )".format(self.field.replace("\\\\", "\\"), self.prop.replace("\\\\", "\\")) # Root of AST is always a Group class Group: def __init__(self) -> None: pass def __str__(self) -> str: return "Group( {} )".format(str(self.ary).replace("\\\\", "\\")) class Boolean: def __init__(self, expression: Union[Atom, Group]) -> None: self.expression = expression def __str__(self) -> str: return "Boolean( {} )".format(str(self.expression).replace("\\\\", "\\")) class Ary: def __init__(self, bool1: Boolean, bool2: List[Tuple[str, Boolean]] = None) -> None: self.bool1 = bool1 self.bool2 = bool2 def __str__(self) -> str: return "Ary( {}, {} )".format(str(self.bool1).replace("\\\\", "\\"), [(rel, str(boolean).replace("\\\\", "\\")) for rel, boolean in self.bool2]) def group_init(self, ary: Ary): self.ary = ary Group.__init__ = group_init def parse_atom(s: str) -> Atom: reg = r"(? Group: return Group(parse_ary(s[1:-1])) ''' Expand special group in form of A:(B OR C) to (A:B OR A:C) ''' def expand_group(s: str) -> str: reg = r"(? Boolean: if "(" not in s: expression = parse_atom(s) else: if s[0] != '(': s = expand_group(s) expression = parse_group(s) return Boolean(expression) def parse_ary(s: str) -> Ary: lst = [] left = right = level = 0 while left < len(s): # Going down one level if right < len(s) and s[right] == '(': level += 1 # Going up one level elif right < len(s) and s[right] == ')': level -= 1 # s[left:right] is parse-able elif right == len(s) or (s[right] == ' ' and level == 0): section = s[left:right] # Handle Boolean case if section not in ["AND", "OR"]: section = parse_boolean(section) lst.append(section) left = right + 1 right += 1 # [Bool, Rel, Bool, Rel, Bool,...] => Bool, [(Rel, Bool), (Rel, Bool),...] bool1 = lst[0] bool2 = [] for i in range(1, len(lst), 2): tupe = (lst[i], lst[i + 1]) bool2.append(tupe) return Ary(bool1, bool2) def translate_atom(atom: Atom) -> dict: return { "match": { atom.field: atom.prop } } def translate_group(group: Group) -> dict: return translate_ary(group.ary) def translate_boolean(boolean: Boolean) -> dict: if type(boolean.expression) is Atom: return translate_atom(boolean.expression) return translate_group(boolean.expression) ''' Combining ary.bool1 and ary.bool2 into array of Boolean grouped by ANDs and split by ORs. ''' def convert_bool_array(bool1: Boolean, boolArr: List[Tuple[str, Boolean]]) -> List[List[Boolean]]: result = [[bool1]] resultIndex = 0 for rel, boolean in boolArr: if rel == "AND": if resultIndex == len(result): result.append([boolean]) else: result[resultIndex].append(boolean) else: resultIndex += 2 result.append([boolean]) return result ''' Group atomic match statements together into parent clause and wrap inside bool statement. Maintain group match statements, which are already wrapped in bool statement. ''' def adjust_matches(matches: List[dict], clause) -> List[dict]: atomicMatches = [] combinedAtomicMatches = [] groupMatches = [] # Determine if current statement is an atomic match or bool group statement for index in range(len(matches)): match = matches[index] if "match" in match.keys(): atomicMatches.append(match) else: groupMatches.append(match) # If any atomic matches, combine under parent clause wrapped in a single bool statement if atomicMatches: # If there's only one atomic match, it should be wrapped in a bool-must regardless of the parent clause clause = "must" if len(atomicMatches) == 1 else clause combinedAtomicMatches = [{ "bool": { clause: atomicMatches } }] return combinedAtomicMatches + groupMatches def contains_group(booleanArr: List[Boolean]) -> bool: for boolean in booleanArr: if type(boolean.expression) is Group: return True return False def translate_ary(ary: Ary) -> dict: parsedTranslation = convert_bool_array(ary.bool1, ary.bool2) clauses = [] translateIndex = 0 while translateIndex < len(parsedTranslation): parsedExpression = parsedTranslation[translateIndex] currMatches = [] clause = "must" # default clause is "must"; clause is "should" if multiple consecutive "or" statements # Statement was joined by "or" if len(parsedExpression) == 1: counter = 1 tempIndex = translateIndex while tempIndex+1 < len(parsedTranslation) and len(parsedTranslation[tempIndex+1]) == 1: tempIndex += 1 counter += 1 # If there's more than one, use "should" clause instead of "must" if counter > 1: clause = "should" parsedExpression = [] # Rebuild parsed expression to join statements together and fast forward the translate index for i in range(counter): parsedExpression += parsedTranslation[translateIndex+i] translateIndex = tempIndex # Iterate through each statement and join match statements into array for boolean in parsedExpression: currMatches.append(translate_boolean(boolean)) # If bool array contains a Group which is wrapped in a bool, match statements must also be wrapped in a bool. if contains_group(parsedExpression): currMatches = adjust_matches(currMatches, clause) currQuery = { "bool": { clause: currMatches } } clauses.append(currQuery) translateIndex += 1 # If only one type of clause, don't use nested bool object if len(clauses) > 1: return { "bool": { "should": clauses } } return clauses[0] class OpenSearchBackend(object): """OpenSearch detection rule backend.""" active = True uuid_black_list = [] options = ElasticsearchQuerystringBackend.options + ( ("put_filename_in_ref", False, "Want to have yml name in reference ?", None), ("convert_to_url", False, "Want to convert to a URL ?", None), ("path_to_replace", "../", "The local path to replace with dest_base_url", None), ("dest_base_url", "https://github.com/SigmaHQ/sigma/tree/master/", "The URL prefix", None), ("custom_tag", None , "Add custom tag. for multi split with a comma tag1,tag2 ", None), ) isThreshold = False # Default values for fields exclusive to OpenSearch monitors RULE_TYPE = "monitor" IS_ENABLED = True INTERVAL = 5 UNIT = "MINUTES" TRIGGER_NAME = "generated-trigger" SEVERITIES = {"informational": "5", "low": "4", "medium": "3", "high": "2", "critical": "1"} TRIGGER_SCRIPT = "ctx.results[0].hits.total.value > 0" TRIGGER_LANGUAGE = "painless" MONITOR_INDICES = ["opensearch-security-logs"] NUM_RESULTS = 1 def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.tactics = self._load_mitre_file("tactics") self.techniques = self._load_mitre_file("techniques") self.rule_threshold = {} ''' Loads appropriate mitre file and returns mappings as dict. ''' def _load_mitre_file(self, mitre_type): try: backend_dir = os.path.normpath(os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "config", "mitre")) path = os.path.join(backend_dir,"{}.json".format(mitre_type)) with open(path, 'r') as config_file: config = json.load(config_file) return config except (IOError, OSError) as e: print("Failed to open {} configuration file '%s': %s".format(path, str(e)), file=sys.stderr) return [] except json.JSONDecodeError as e: print("Failed to parse {} configuration file '%s' as valid YAML: %s" % (path, str(e)), file=sys.stderr) return [] ''' Calls parent generate methods to retrieve Sigma rule condition as Elastic Common Schema query. Then calls the create_rule method to return final translated object. ''' def generate(self, sigmaparser): # reset per-detection variables self.rule_threshold = {} translation = super().generate(sigmaparser) if translation: index = sigmaparser.get_logsource().index if len(index) == 0: index = ["apm-*-transaction", "auditbeat-*", "endgame-*", "filebeat-*", "packetbeat-*", "winlogbeat-*"] configs = sigmaparser.parsedyaml configs.update({"translation": translation}) rule = self.create_rule(configs, index) return rule ''' Generates threat detection for OpenSearch monitor, which compiles tactics and techniques found in Sigma tags. ''' def create_threat_description(self, tactics_list, techniques_list): threat_list = list() for tactic in tactics_list: temp_tactics = { "tactic": { "id": tactic.get("external_id", ""), "reference": tactic.get("url", ""), "name": tactic.get("tactic", "") }, "framework": "MITRE ATT&CKĀ®" } temp_techniques = list() for tech in techniques_list: if tactic.get("tactic", "") in tech.get("tactic", []): temp_techniques.append({ "id": tech.get("technique_id", ""), "name": tech.get("technique", ""), "reference": tech.get("url", "") }) temp_tactics.update({"technique": temp_techniques}) threat_list.append(temp_tactics) return threat_list ''' Finds tactics mentioned in Sigma tags. ''' def find_tactics(self, key_name=None, key_id=None): for tactic in self.tactics: if key_name and key_name == tactic.get("tactic", ""): return tactic if key_id and key_id == tactic.get("external_id", ""): return tactic ''' Finds techniques mentioned in Sigma tags. ''' def find_technique(self, key_id=None): for technique in self.techniques: if key_id and key_id == technique.get("technique_id", ""): return technique ''' Maps Sigma severity to OpenSearch numerical severity from 1-5. ''' def map_severity(self, severity): severity = severity.lower() return self.SEVERITIES[severity] if severity in self.SEVERITIES else self.SEVERITIES["medium"] def create_trigger(self, severity): return [ { "name": self.TRIGGER_NAME, "severity": self.map_severity(severity), "condition": { "script": { "source": f'{self.TRIGGER_SCRIPT}', "lang": self.TRIGGER_LANGUAGE } }, "actions": [] } ] def build_threshold(self, field, inequality, threshold): INEQUALITIES = {"<": "lt", "<=": "lte", ">": "gt", ">=": "gte"} return { "range": { field: { INEQUALITIES[inequality]: threshold } } } ''' Builds OpenSearch monitor query from translated Elastic Rule query. Forms an abstract syntax tree (AST) using the following repeated structures: - Atom = A:B - Rel = AND | OR - Ary = Bool [Rel Bool]* - Group = (Ary) - SGroup = A:(B OR C) - Bool = Atom | Group | SGroup Then translates AST into OpenSearch boolean queries. ''' def build_query(self, translation): ast = parse_group(translation) translatedQuery = translate_group(ast) if self.isThreshold: translatedQuery["bool"]["filter"] = self.rule_threshold return translatedQuery ''' Builds inputs field of OS monitor. ''' def build_inputs(self, translation): return [ { "search": { "indices": self.MONITOR_INDICES, "query": { "size": self.NUM_RESULTS, "aggregations": {}, "query": self.build_query(translation) } } } ] ''' Adds Sigma yml file name in references if self.put_filename_in_ref option is True. ''' def build_ymlfile_ref(self, configs): if self.put_filename_in_ref == False: # Dont want return None yml_filename = configs.get("yml_filename") yml_path = configs.get("yml_path") if yml_filename == None or yml_path == None: return None if self.convert_to_url: yml_path = yml_path.replace('\\','/') #windows path to url self.path_to_replace = self.path_to_replace.replace('\\','/') #windows path to url if self.path_to_replace not in yml_path: #Error to change return None new_ref = yml_path.replace(self.path_to_replace,self.dest_base_url) + '/' + yml_filename else: new_ref = yml_filename return new_ref ''' Builds the list of searchable tags. Matches against list of known tags and adds any custom tags. ''' def build_tags_list(self, tags): tactics_list = list() new_tags = list() technics_list = list() for tag in tags: tag = tag.replace("attack.", "") if re.match("[t][0-9]{4}", tag, re.IGNORECASE): tech = self.find_technique(tag.title()) if tech: new_tags.append(tag.title()) technics_list.append(tech) else: if "_" in tag: tag_list = tag.split("_") tag_list = [item.title() for item in tag_list] tact = self.find_tactics(key_name=" ".join(tag_list)) if tact: new_tags.append(" ".join(tag_list)) tactics_list.append(tact) elif re.match("[ta][0-9]{4}", tag, re.IGNORECASE): tact = self.find_tactics(key_id=tag.upper()) if tact: new_tags.append(tag.upper()) tactics_list.append(tact) else: tact = self.find_tactics(key_name=tag.title()) if tact: new_tags.append(tag.title()) tactics_list.append(tact) if self.custom_tag: if ',' in self.custom_tag: tag_split = self.custom_tag.split(",") for l_tag in tag_split: new_tags.append(l_tag) else: new_tags.append(self.custom_tag) return tactics_list, technics_list, new_tags ''' Get the rule id of the Sigma rule. If the rule id is blank or isn't unique, generate a random one. ''' def get_rule_id(self, rule_uuid): rule_uuid = rule_uuid.lower() if rule_uuid == "" or rule_uuid in self.uuid_black_list: rule_uuid = str(uuid4()) self.uuid_black_list.append(rule_uuid) rule_id = re.sub(re.compile('[()*+!,\[\].\s"]'), "_", rule_uuid) return rule_id ''' Gets list of references. ''' def get_references(self, configs): references = configs.get("reference") if configs.get("reference") is not None else configs.get("references") references = self.build_ref_yaml(references, configs) return references ''' Adds Sigma yml file to references. ''' def build_ref_yaml(self, references, configs): add_ref_yml = self.build_ymlfile_ref(configs) if add_ref_yml: if references is None: # No ref references=[] if add_ref_yml in references: pass # else put a duplicate ref for multi rule file else: references.append(add_ref_yml) return references ''' Main method that builds OpenSearch monitor and returns it in JSON format. ''' def create_rule(self, configs, index): rule_name = configs.get("title", "") rule_description = configs.get("description", "") inputs = self.build_inputs(configs.get("translation", "")) triggers = self.create_trigger(configs.get("level", "medium")) rule_id = self.get_rule_id(configs.get("id", "")) tactics_list, technics_list, new_tags = self.build_tags_list(configs.get("tags", [])) threat = self.create_threat_description(tactics_list, technics_list) references = self.get_references(configs) rule = { "type": self.RULE_TYPE, "name": rule_name, "description": rule_description, "enabled": self.IS_ENABLED, "schedule": { "period": { "interval": self.INTERVAL, "unit": self.UNIT } }, "inputs": inputs, "tags": new_tags, "triggers": triggers, "sigma_meta_data": { "rule_id": rule_id, "threat": threat } } if references: rule.update({"references": references}) return json.dumps(rule) class OpenSearchQsBackend(OpenSearchBackend, ElasticsearchQuerystringBackend): ''' Backend class containing the identifier for the -t argument. Can inherit from ElasticsearchQuerystringBackend since query string in both OpenSearch monitors and ElasticRule are in Elastic Common Schema. ''' identifier = "opensearch-monitor" def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) def generateAggregation(self, agg): if agg.aggfunc == SigmaAggregationParser.AGGFUNC_COUNT: if agg.aggfield: raise NotImplementedError("Threshold rules cannot COUNT(DISTINCT %s)" % agg.aggfield) self.isThreshold = True self.rule_threshold = self.build_threshold(agg.groupfield, agg.cond_op, agg.condition) return "" raise NotImplementedError("Aggregation %s is not implemented for this backend" % agg.aggfunc_notrans)