# Output backends for sigmac # Copyright 2016-2018 Thomas Patzke, Florian Roth, juju4 # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . import json import os import re import sys from sigma.backends.base import SingleTextQueryBackend from sigma.backends.exceptions import NotSupportedError from sigma.parser.condition import ConditionOR, SigmaAggregationParser # Sumo specifics # https://help.sumologic.com/05Search/Search-Query-Language # want _index or _sourceCategory for performance # try to get most string match on first line for performance # further sorting can be done with extra parsing # No regex match, must use 'parse regex' https://help.sumologic.com/05Search/Search-Query-Language/01-Parse-Operators/02-Parse-Variable-Patterns-Using-Regex # For some strings like Windows ProcessCmdline or LogonProcess, it might be good to force case lower and upper as Windows is inconsistent in logs class SumoLogicBackend(SingleTextQueryBackend): """Converts Sigma rule into SumoLogic query. Contributed by SOC Prime. https://socprime.com""" identifier = "sumologic" active = True config_required = False default_config = ["sysmon", "sumologic"] index_field = "_sourceCategory" reClear = None andToken = " AND " orToken = " OR " notToken = "!" subExpression = "(%s)" listExpression = "(%s)" listSeparator = ", " valueExpression = "\"%s\"" nullExpression = "isEmpty(%s)" notNullExpression = "!isEmpty(%s)" mapExpression = "%s=%s" mapListsSpecialHandling = True mapListValueExpression = "%s IN %s" interval = None logname = None def generateAggregation(self, agg): # lnx_shell_priv_esc_prep.yml # print("DEBUG generateAggregation(): %s, %s, %s, %s" % (agg.aggfunc_notrans, agg.aggfield, agg.groupfield, agg.cond_op)) if agg.groupfield == 'host': agg.groupfield = 'hostname' if agg.aggfunc_notrans == 'count() by': agg.aggfunc_notrans = 'count by' if agg.aggfunc == SigmaAggregationParser.AGGFUNC_NEAR: raise NotImplementedError("The 'near' aggregation operator is not yet implemented for this backend") if self.keypresent: if not agg.groupfield: if agg.aggfield: agg.aggfunc_notrans = "count_distinct" return " \n| %s(%s) \n| where _count_distinct %s %s" % ( agg.aggfunc_notrans, agg.aggfield, agg.cond_op, agg.condition) else: return " \n| %s | where _count %s %s" % ( agg.aggfunc_notrans, agg.cond_op, agg.condition) elif agg.groupfield: if agg.aggfield: agg.aggfunc_notrans = "count_distinct" return " \n| %s(%s) by %s \n| where _count_distinct %s %s" % ( agg.aggfunc_notrans, agg.aggfield, agg.groupfield, agg.cond_op, agg.condition) else: return " \n| %s by %s \n| where _count %s %s" % ( agg.aggfunc_notrans, agg.groupfield, agg.cond_op, agg.condition) else: return " \n| %s | where _count %s %s" % (agg.aggfunc_notrans, agg.cond_op, agg.condition) else: if not agg.groupfield: if agg.aggfield: agg.aggfunc_notrans = "count_distinct" return " \n| parse \"[%s=*]\" as searched nodrop\n| %s(searched) \n| where _count_distinct %s %s" % ( agg.aggfield, agg.aggfunc_notrans, agg.cond_op, agg.condition) else: return " \n| %s | where _count %s %s" % ( agg.aggfunc_notrans, agg.cond_op, agg.condition) elif agg.groupfield: if agg.aggfield: agg.aggfunc_notrans = "count_distinct" return " \n| parse \"[%s=*]\" as searched nodrop\n| parse \"[%s=*]\" as grpd nodrop\n| %s(searched) by grpd \n| where _count_distinct %s %s" % ( agg.aggfield, agg.groupfield, agg.aggfunc_notrans, agg.cond_op, agg.condition) else: return " \n| parse \"[%s=*]\" as grpd nodrop\n| %s by grpd \n| where _count %s %s" % ( agg.groupfield, agg.aggfunc_notrans, agg.cond_op, agg.condition) else: return " \n| %s | where _count %s %s" % (agg.aggfunc_notrans, agg.cond_op, agg.condition) def generateBefore(self, parsed): # not required but makes query faster, especially if no FER or _index/_sourceCategory if self.logname: return "%s " % self.logname return "" def generate(self, sigmaparser): try: self.product = sigmaparser.parsedyaml['logsource']['product'] # OS or Software except KeyError: self.product = None try: self.service = sigmaparser.parsedyaml['logsource']['service'] # Channel except KeyError: self.service = None try: self.category = sigmaparser.parsedyaml['logsource']['category'] # Channel except KeyError: self.category = None # FIXME! don't get backend config mapping self.indices = sigmaparser.get_logsource().index if len(self.indices) == 0: self.indices = None try: self.interval = sigmaparser.parsedyaml['detection']['timeframe'] except: pass for parsed in sigmaparser.condparsed: query = self.generateQuery(parsed) # FIXME! exclude if expression is regexp but anyway, not directly supported. # Not doing if aggregation ('| count') or key ('=') if not (query.startswith('"') and query.endswith('"')) and not (query.startswith('(') and query.endswith(')')) and not ('|' in query) and not ('=' in query): query = '"%s"' % query before = self.generateBefore(parsed) after = self.generateAfter(parsed) result = "" if before is not None: result = before if query is not None: result += query if after is not None: result += after # adding parenthesis here in case 2 rules are aggregated together - ex: win_possible_applocker_bypass # but does not work if count, where or other piped statements... if '|' in result: return result else: return result def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # TODO/FIXME! depending on deployment configuration, existing FER must be populate here (or backend config?) aFL = ["_sourceCategory", "_view", "_sourceName"] if self.sigmaconfig.config.get("afl_fields"): self.keypresent = True aFL.extend(self.sigmaconfig.config.get("afl_fields")) else: self.keypresent = False for item in self.sigmaconfig.fieldmappings.values(): if item.target_type is list: aFL.extend(item.target) else: aFL.append(item.target) self.allowedFieldsList = list(set(aFL)) # Skip logsource value from sigma document for separate path. # def generateCleanValueNodeLogsource(self, value): # return self.valueExpression % (self.cleanValue(str(value))) # Clearing values from special characters. # Sumologic: only removing '*' (in quotes, is literal. without, is wildcard) and '"' def cleanNode(self, node, key=None): if "*" in node and key and not re.search("[\s]", node): return node elif "*" in node and not key: return [x for x in node.split("*") if x] return node # Clearing values from special characters. def generateMapItemNode(self, node): key, value = node if key in self.allowedFieldsList: if key in ["_sourceCategory", "_sourceName"]: value = "*%s*" % value.lower() return self.mapExpression % (key, value) elif not self.mapListsSpecialHandling and type(value) in ( str, int, list) or self.mapListsSpecialHandling and type(value) in (str, int): if key in ("LogName", "source"): self.logname = value # need cleanValue if sigma entry with single quote return self.mapExpression % (key, self.cleanValue(value, key)) elif type(value) is list: return self.generateMapItemListNode(key, value) elif value is None: return self.nullExpression % (key, ) else: raise TypeError("Backend does not support map values of type " + str(type(value))) else: if not self.mapListsSpecialHandling and type(value) in ( str, int, list) or self.mapListsSpecialHandling and type(value) in (str, int): if type(value) is str: new_value = list() value = self.cleanNode(value) if type(value) == list: new_value.append(self.andToken.join([self.cleanValue(val) for val in value])) else: new_value.append(value) if len(new_value) == 1: if self.generateANDNode(new_value): return self.generateANDNode(new_value) else: # if after cleaning node, it is empty but there is AND statement... make it true. return "true" else: return self.generateORNode(new_value) else: return self.generateValueNode(value) elif type(value) is list: new_value = list() for item in value: item = self.cleanNode(item) if type(item) is list and len(item) == 1: new_value.append(item[0]) elif type(item) is list: new_value.append(self.andToken.join([self.cleanValue(val) for val in item])) else: new_value.append(item) return self.generateORNode(new_value) elif value is None: return self.nullExpression % (key, ) else: raise TypeError("Backend does not support map values of type " + str(type(value))) # from mixins.py # input in simple quotes are not passing through this function. ex: rules/windows/sysmon/sysmon_vul_java_remote_debugging.yml, rules/apt/apt_sofacy_zebrocy.yml # => OK only if field entry with list, not string # => generateNode: call cleanValue def cleanValue(self, val, key=''): if isinstance(val, str): val = re.sub("[^\\\"](\")", "\\\"", val) if re.search("[\W\s]", val):# and not val.startswith('"') and not val.endswith('"'): # or "\\" in node in [] or "/" in node: return self.valueExpression % val return val # for keywords values with space def generateValueNode(self, node, key=''): cV = self.cleanNode(str(node), key) if type(node) is int: return cV if type(cV) is list: return "(%s)" % "AND".join([self.cleanValue(item) for item in cV]) if 'AND' in node and cV: return "(" + cV + ")" elif isinstance(node, str) and node.startswith('"') and node.endswith('"'): return cV else: return self.cleanValue(cV) def generateMapItemListNode(self, key, value): itemslist = list() for item in value: if key in self.allowedFieldsList: itemslist.append('%s = %s' % (key, self.generateValueNode(item, key))) else: itemslist.append('%s' % (self.generateValueNode(item))) return "(" + " OR ".join(itemslist) + ")" # generateORNode algorithm for SumoLogicBackend class. def generateORNode(self, node): if type(node) == ConditionOR and all(isinstance(item, str) for item in node): new_value = list() for value in node: value = self.cleanNode(value) if type(value) is list: new_value.append(self.andToken.join([self.valueExpression % val for val in value])) else: new_value.append(value) return "(" + self.orToken.join([self.generateNode(val) for val in new_value]) + ")" return "(" + self.orToken.join([self.generateNode(val) for val in node]) + ")" class SumoLogicCSE(SumoLogicBackend): """Converts Sigma rule into SumoLogic CSE query. Contributed by SOC Prime. https://socprime.com""" identifier = "sumologic-cse" active = True config_required = False default_config = ["sysmon"] index_field = "metdata_product" reClear = None #reEscape = re.compile('[\\\\"]') andToken = " and " orToken = " or " notToken = "!" subExpression = "(%s)" listExpression = "(%s)" listSeparator = ", " valueExpression = "\"%s\"" nullExpression = "isEmpty(%s)" notNullExpression = "!isEmpty(%s)" mapExpression = "%s=%s" mapListsSpecialHandling = True mapListValueExpression = "%s IN %s" interval = None logname = None def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.allowedFieldsList.extend(["metdata_product", "metdata_vendor"]) def cleanValue(self, val, key=''): if key == 'metadata_deviceEventId' or isinstance(val, int) or val.isdigit(): return val return self.valueExpression % val def cleanNode(self, node, key=None): return node # Clearing values from special characters. def generateMapItemNode(self, node): key, value = node if key: if not self.mapListsSpecialHandling and type(value) in ( str, int, list) or self.mapListsSpecialHandling and type(value) in (str, int): if key in ("LogName", "source"): self.logname = value # need cleanValue if sigma entry with single quote return self.mapExpression % (key, self.cleanValue(value, key)) elif type(value) is list: return self.generateMapItemListNode(key, value) elif value is None: return self.nullExpression % (key,) else: raise TypeError("Backend does not support map values of type " + str(type(value))) raise TypeError("Backend does not support query without key.") def generateMapItemListNode(self, key, value): if len(value) == 1: return self.mapExpression % (key, value[0]) return "%s IN (%s)" % (key, ", ".join([self.cleanValue(item, key) for item in value])) class SumoLogicCSERule(SumoLogicCSE): """Converts Sigma rule into SumoLogic CSE query""" identifier = "sumologic-cse-rule" active = True def __init__(self, *args, **kwargs): """Initialize field mappings""" super().__init__(*args, **kwargs) self.techniques = self._load_mitre_file("techniques") self.allowedCategories = ["Threat Intelligence", "Initial Access", "Execution", "Persistence", "Privilege Escalation", "Defense Evasion", "Credential Access", "Discovery", "Lateral Movement", "Collection", "Command and Control", "Exfiltration", "Impact"] self.defaultCategory = "Unknown/Other" self.results = [] def find_technique(self, key_ids): for key_id in set(key_ids): if not key_id: continue for technique in self.techniques: if key_id == technique.get("technique_id", ""): yield technique def _load_mitre_file(self, mitre_type): try: backend_dir = os.path.normpath(os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "config", "mitre")) path = os.path.join(backend_dir, "{}.json".format(mitre_type)) with open(path) as config_file: config = json.load(config_file) return config except (IOError, OSError) as e: print("Failed to open {} configuration file '%s': %s".format(path, str(e)), file=sys.stderr) return [] except json.JSONDecodeError as e: print("Failed to parse {} configuration file '%s' as valid YAML: %s" % (path, str(e)), file=sys.stderr) return [] def skip_tactics_or_techniques(self, src_technics, src_tactics): tactics = set() technics = set() local_storage_techniques = {item["technique_id"]: item for item in self.find_technique(src_technics)} for key_id in src_technics: src_tactic = local_storage_techniques.get(key_id, {}).get("tactic") if not src_tactic: continue src_tactic = set(src_tactic) for item in src_tactics: if item in src_tactic: technics.add(key_id) tactics.add(item) return sorted(tactics), sorted(technics) def parse_severity(self, old_severity): if old_severity.lower() == "critical": return "high" return old_severity def get_tactics_and_techniques(self, tags): tactics = list() technics = list() for tag in tags: tag = tag.replace("attack.", "") if re.match("[t][0-9]{4}", tag, re.IGNORECASE): technics.append(tag.title()) elif re.match("[s][0-9]{4}", tag, re.IGNORECASE): continue else: if "_" in tag: tag = tag.replace("_", " ") tag = tag.title() tactics.append(tag) return tactics, technics def map_risk_score(self, level): if level == "critical": return 5 elif level == "high": return 4 elif level == "medium": return 3 elif level == "low": return 2 return 1 def create_rule(self, config): tags = config.get("tags", []) tactics, technics = self.get_tactics_and_techniques(tags) tactics, technics = self.skip_tactics_or_techniques(technics, tactics) tactics = list(map(lambda s: s.replace(" ", ""), tactics)) score = self.map_risk_score(config.get("level", "medium")) rule = { "name": "{} by {}".format(config.get("title"), config.get('author')), "description": "{} {}".format(config.get("description"), "Technique: {}.".format(",".join(technics))), "enabled": True, "expression": """{}""".format(config.get("translation", "")), "assetField": "device_hostname", "score": score, "stream": "record" } if tactics and tactics[0] in self.allowedCategories: rule.update({"category": tactics[0]}) else: rule.update({"category": "Unknown/Other"}) self.results.append(rule) #return json.dumps(rule, indent=4, sort_keys=False) def generate(self, sigmaparser): translation = super().generate(sigmaparser) if translation: configs = sigmaparser.parsedyaml configs.update({"translation": translation}) rule = self.create_rule(configs) return rule else: raise NotSupportedError("No table could be determined from Sigma rule") def finalize(self): if len(self.results) == 1: return json.dumps(self.results[0], indent=4, sort_keys=False) elif len(self.results) > 1: return json.dumps(self.results, indent=4, sort_keys=False)