diff --git a/contrib/sigma2sumologic.py b/contrib/sigma2sumologic.py new file mode 100644 index 000000000..b4cb0cef2 --- /dev/null +++ b/contrib/sigma2sumologic.py @@ -0,0 +1,247 @@ +#!/usr/bin/python +# Copyright 2018 juju4 + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . +""" +Project: sigma2sumologic.py +Date: 11 Jan 2019 +Author: juju4 +Version: 1.0 +Description: This script executes sumologic search queries from Sigma SIEM rules. +Workflow: + 1. Convert rules with sigmac + 2. Enrich: add ignore+local custom rules, priority + 3. Format + 4. Get results and save to txt/xlsx files +Requirements: + $ pip install sumologic-sdk pyyaml pandas +""" + +import re +import os, sys, stat +import glob +import subprocess +import argparse +import yaml +import traceback +import logging +from sumologic import SumoLogic +import time +import datetime +import json +import pandas + +logging.basicConfig(level=logging.DEBUG) +logger = logging.getLogger(__name__) +formatter = logging.Formatter('%(asctime)s - %(name)s - p%(process)s {%(pathname)s:%(lineno)d} - %(levelname)s - %(message)s') +handler = logging.FileHandler('sigma2sumo.log') +handler.setFormatter(formatter) +logger.addHandler(handler) + +parser = argparse.ArgumentParser(description='Execute sigma rules in sumologic') +parser.add_argument("--conf", help="script yaml config file", type=str, required=True) +parser.add_argument("--accessid", help="Sumologic Access ID", type=str, required=False) +parser.add_argument("--accesskey", help="Sumologic Access Key", type=str, required=False) +parser.add_argument("--endpoint", help="Sumologic url endpoint", type=str, required=False) +parser.add_argument("--ruledir", help="sigma rule directory path to convert", type=str, required=False) +parser.add_argument("--outdir", help="output directory to create rules", type=str, required=False) +parser.add_argument("--sigmac", help="Sigmac location", default="../tools/sigmac", type=str) +parser.add_argument("--realerttime", help="Realert time (optional value, default 5 minutes)", type=str, default=5) +parser.add_argument("--debug", help="Show debug output", type=bool, default=False) +args = parser.parse_args() + +LIMIT = 100 +delay = 5 + +def rule_element(file_content, elements): + """ + Function used to get specific element from yaml document and return content + :type file_content: str + :type elements: list + :param file_content: + :param elements: list of elements of the yaml document to get "title", "description" + :return: the value of the key in the yaml document + """ + try: + logger.debug("file_content: %s" % file_content) + yaml.safe_load(file_content.replace("---","")) + except: + raise Exception('Unsupported') + element_output = "" + for e in elements: + try: + element_output = yaml.safe_load(file_content.replace("---",""))[e] + except: + pass + if element_output is None: + return "" + return element_output + +def get_rule_as_sumologic(file): + """ + Function used to get sumologic query output from rule file + :type file: str + :param file: rule filename + :return: string query + """ + if not os.path.exists(args.sigmac): + logger.error("Cannot find sigmac rule coverter at '%s', please set a correct location via '--sigmac'") + cmd = [args.sigmac, file, "--target", "sumologic"] + logger.info('get_rule_as_sumologic cmd: %s' % cmd) + process = subprocess.Popen(cmd,stdout=subprocess.PIPE, stderr=subprocess.PIPE) + output, err = process.communicate() + + # output is byte-string... + output = output.decode("utf-8") + err = err.decode("utf-8") + + logger.info('get_rule_as_sumologic output: %s' % output) + logger.info('get_rule_as_sumologic stderr: %s' % err) + if err or "unsupported" in err: + logger.error('Unsupported output at this time') + raise Exception('Unsupported output at this time') + output = output.split("\n") + # Remove empty string from \n + output = [a for a in output if a] + # Handle case of multiple queries returned + if len(output) > 1: + return " OR ".join(output) + return "".join(output) + +if args.help: + parser_print_help() + +if args.conf: + with open(args.conf, 'r') as ymlfile: + cfg = yaml.load(ymlfile) + args.accessid = cfg['accessid'] + args.accesskey = cfg['accesskey'] + args.endpoint = cfg['endpoint'] + args.ruledir = cfg['ruledir'] + args.outdir = cfg['outdir'] + args.sigmac = cfg['sigmac'] + try: + args.recursive = cfg['recursive'] + except: + args.recursive = False + if args.recursive: + globpath = args.ruledir + "/**/*.yml" + else: + globpath = args.ruledir + "/*.yml" + logger.debug("args: %s" % args) + logger.debug("globpath: %s" % globpath) + +if args.outdir and not os.path.isdir(args.outdir): + os.mkdir(args.outdir, stat.S_IRWXU) + +# recursive +for file in glob.iglob(globpath): +# non-recursive (above, not working...) +#for file in glob.iglob(args.ruledir + "/*.yml"): + + file_basename = os.path.basename(os.path.splitext(file)[0]) + file_basenamepath = os.path.splitext(file)[0] + file_ext = os.path.splitext(file)[1] + try: + if file_ext != '.yml': + continue + + logger.info("Processing %s ..." % file_basename) + with open(file, "rb") as f: + file_content = f.read() + + logger.info("Rule file: %s" % file) + + sumo_query = get_rule_as_sumologic(file) + + logger.info(" Checking if custom query file: %s" % file_basenamepath + '.custom') + if os.path.isfile(file_basenamepath + '.custom'): + # FIXME! want to add something in the middle for parsing for example... + logger.info(" Adding custom part to end query from: %s" % file_basenamepath + '.custom') + with open(file_basenamepath + '.custom', "rb") as f: + sumo_query += " " + f.read().decode('utf-8') + elif 'count ' not in sumo_query and ('EventID=' in sumo_query): + sumo_query += " | count _sourceCategory, hostname, EventID, msg_summary, _raw" + elif 'count ' not in sumo_query: + sumo_query += " | count _sourceCategory, hostname, _raw" + + logger.info("Final sumo query: %s" % sumo_query) + + except Exception as e: + if args.debug: + traceback.print_exc() + logger.exception("error generating sumo query " + str(file) + "----" + str(e)) + pass + + try: + # Run query + # https://github.com/SumoLogic/sumologic-python-sdk/blob/master/scripts/search-job.py + sumo = SumoLogic(args.accessid, args.accesskey, args.endpoint) + toTime = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S") + fromTime = datetime.datetime.strptime(toTime, "%Y-%m-%dT%H:%M:%S") - datetime.timedelta(hours = 24) + fromTime = fromTime.strftime("%Y-%m-%dT%H:%M:%S") + timeZone = 'UTC' + byReceiptTime = True + + sj = sumo.search_job(sumo_query, fromTime, toTime, timeZone, byReceiptTime) + + status = sumo.search_job_status(sj) + while status['state'] != 'DONE GATHERING RESULTS': + if status['state'] == 'CANCELLED': + break + time.sleep(delay) + status = sumo.search_job_status(sj) + + except Exception as e: + if args.debug: + traceback.print_exc() + logger.exception("error seaching sumo " + str(file) + "----" + str(e)) + with open(os.path.join(args.outdir, "sigma-" + file_basename + '-error.txt'), "w") as f: + f.write(json.dumps(r, indent=4, sort_keys=True) + " ERROR: %s\n\nQUERY: %s" % (e, sumo_query)) + pass + + logger.info("Sumo search job status: %s" % status['state']) + + try: + if status['state'] == 'DONE GATHERING RESULTS': + count = status['recordCount'] + limit = count if count < LIMIT and count != 0 else LIMIT # compensate bad limit check + r = sumo.search_job_records(sj, limit=limit) + logger.info("Sumo search results: %s" % r) + + logger.info("Saving final sumo query for %s to %s" % (file, os.path.join(args.outdir, "sigma-" + file_basename + '.sumo'))) + with open(os.path.join(args.outdir, "sigma-" + file_basename + '.sumo'), "w") as f: + f.write(sumo_query) + if r and r['records'] != []: + logger.info("Saving results") + # as json text file + with open(os.path.join(args.outdir, "sigma-" + file_basename + '.txt'), "w") as f: + f.write(json.dumps(r, indent=4, sort_keys=True)) + # as excel file + df = pandas.io.json.json_normalize(r['records']) + with pandas.ExcelWriter(os.path.join(args.outdir, "sigma-" + file_basename + ".xlsx")) as writer: + df.to_excel(writer, 'data') + pandas.DataFrame({'References': [ + "timeframe: from %s to %s" % (fromTime, toTime), + "Sumo endpoint: %s" % args.endpoint, + "Sumo query: %s" % sumo_query + ]}).to_excel(writer, 'comments') + + # and do whatever you want, email alert, report, ticket... + + except Exception as e: + if args.debug: + traceback.print_exc() + logger.exception("error saving results " + str(file) + "----" + str(e)) + pass diff --git a/tools/sigma/backends/sumologic.py b/tools/sigma/backends/sumologic.py index 0b5ebcec4..e280453bf 100644 --- a/tools/sigma/backends/sumologic.py +++ b/tools/sigma/backends/sumologic.py @@ -16,6 +16,7 @@ import re import sigma +from sigma.parser.condition import ConditionOR from .base import SingleTextQueryBackend # Sumo specifics @@ -32,7 +33,7 @@ class SumoLogicBackend(SingleTextQueryBackend): active = True index_field = "_index" - reEscape = re.compile('("|(? 5 + #return " | timeslice %s | count_distinct(%s) %s | where _count_distinct > 0" % (self.interval, agg.aggfunc_notrans or "", agg.aggfield or "", agg.groupfield or "") + #return " | timeslice %s | count_distinct(%s) %s | where _count_distinct %s %s" % (self.interval, agg.aggfunc_notrans, agg.aggfield or "", agg.groupfield or "", agg.cond_op, agg.condition) if agg.groupfield == None: #return " | %s(%s) | when _count %s %s" % (agg.aggfunc_notrans, agg.aggfield or "", agg.cond_op, agg.condition) - return " | %s(%s) as val | when val %s %s" % (agg.aggfunc_notrans, agg.aggfield or "", agg.cond_op, agg.condition) + return " | %s %s | where _count %s %s" % (agg.aggfunc_notrans, agg.aggfield or "", agg.cond_op, agg.condition) else: - return " | %s(%s) as val by %s | when val %s %s" % (agg.aggfunc_notrans, agg.aggfield or "", agg.groupfield or "", agg.cond_op, agg.condition) + return " | %s %s by %s | where _count %s %s" % (agg.aggfunc_notrans, agg.aggfield or "", agg.groupfield or "", agg.cond_op, agg.condition) -# TimeFrame condition / within timeframe -# condition | timeslice 5m | count_distinct(f1) as val by f2 | where val > 5 -# Near condition => how near... like timeframe? + def generateBefore(self, parsed): + # not required but makes query faster, especially if no FER or _index/_sourceCategory + if self.logname: + return "%s " % self.logname + if self.service: + return "%s %s " % (self.product, self.service) + return "" + def generate(self, sigmaparser): + try: + self.product = sigmaparser.parsedyaml['logsource']['product'] # OS or Software + self.service = sigmaparser.parsedyaml['logsource']['service'] # Channel + except KeyError: + self.product = None + self.service = None + + try: + self.interval = sigmaparser.parsedyaml['detection']['timeframe'] + except: + pass + + for parsed in sigmaparser.condparsed: + query = self.generateQuery(parsed) + before = self.generateBefore(parsed) + after = self.generateAfter(parsed) + + result = "" + if before is not None: + result = before + if query is not None: + result += query + if after is not None: + result += after + + # adding parenthesis here in case 2 rules are aggregated together - ex: win_possible_applocker_bypass + return "(" + result + ")" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # TODO/FIXME! depending on deployment configuration, existing FER must be populate here (or backend config?) + #aFL = ["EventID"] + aFL = ["EventID", "sourcename", "CommandLine", "NewProcessName", "Image", "ParentImage", "ParentCommandLine", "ParentProcessName"] + for item in self.sigmaconfig.fieldmappings.values(): + if item.target_type is list: + aFL.extend(item.target) + else: + aFL.append(item.target) + self.allowedFieldsList = list(set(aFL)) + + # Skip logsource value from sigma document for separate path. + #def generateCleanValueNodeLogsource(self, value): + # return self.valueExpression % (self.cleanValue(str(value))) + + # Clearing values from special characters. + # Sumologic: only removing '*' (in quotes, is litteral. without, is wildcard) and '"' + def CleanNode(self, node): + search_ptrn = re.compile(r"[\/@?#&%*\(\)\"]") + replace_ptrn = re.compile(r"[\/@?#&%*\(\)\"]") + match = search_ptrn.search(str(node)) + new_node = list() + if match: + replaced_str = replace_ptrn.sub('*', node) + node = [x for x in replaced_str.split('*') if x] + new_node.extend(node) + else: + new_node.append(node) + node = new_node + return node + + # Clearing values from special characters. + def generateMapItemNode(self, node): + key, value = node + if key in self.allowedFieldsList: + if self.mapListsSpecialHandling == False and type(value) in ( + str, int, list) or self.mapListsSpecialHandling == True and type(value) in (str, int): + if key in ("LogName","source"): + self.logname = value + return self.mapExpression % (key, value) + elif type(value) is list: + return self.generateMapItemListNode(key, value) + else: + raise TypeError("Backend does not support map values of type " + str(type(value))) + else: + if self.mapListsSpecialHandling == False and type(value) in ( + str, int, list) or self.mapListsSpecialHandling == True and type(value) in (str, int): + if type(value) is str: + new_value = list() + value = self.CleanNode(value) + if type(value) == list: + new_value.append(self.andToken.join([self.valueExpression % val for val in value])) + else: + new_value.append(value) + if len(new_value)==1: + return "(" + self.generateANDNode(new_value) + ")" + else: + return "(" + self.generateORNode(new_value) + ")" + else: + return self.generateValueNode(value) + elif type(value) is list: + new_value = list() + for item in value: + item = self.CleanNode(item) + if type(item) is list and len(item) == 1: + new_value.append(self.valueExpression % item[0]) + elif type(item) is list: + new_value.append(self.andToken.join([self.valueExpression % val for val in item])) + else: + new_value.append(item) + return self.generateORNode(new_value) + else: + raise TypeError("Backend does not support map values of type " + str(type(value))) + + # from mixins.py + #FIXME! input in simple quotes are not passing through this function. ex: rules/windows/sysmon/sysmon_vul_java_remote_debugging.yml, rules/apt/apt_sofacy_zebrocy.yml + # => OK only if field entry with list, not string + def cleanValue(self, val, key = ''): + print("DEBUG cleanValue0: %s" % val) + if self.reEscape: + val = self.reEscape.sub(self.escapeSubst, val) + if self.reClear: + val = self.reClear.sub("", val) + # in sumologic, if key, can use wildcard outside of double quotes. if inside, it's litteral + if key: + val = re.sub(r'(.+?)\*(.+?)', '\g<1>"*"\g<2>', val, 0) + val = re.sub(r'^\*', '*"', val) + val = re.sub(r'\*$', '"*', val) + # if unbalanced wildcard? + if val.startswith('*"') and not (val.endswith('"*') or val.endswith('"')): + val = val + '"' + if val.endswith('"*') and not (val.startswith('*"') or val.startswith('"')): + val = '"' + val + # double escape if end quote + if val.endswith('\\"*') and not val.endswith('\\\\"*'): + val = re.sub(r'\\"\*$', '\\\\\\"*', val) + print("DEBUG cleanValue1: %s" % val) + return val + + # for keywords values with space + def generateValueNode(self, node, key = ''): + if type(node) is int: + return self.cleanValue(str(node), key) + if 'AND' in node: + return "(" + self.cleanValue(str(node), key) + ")" + else: + return self.cleanValue(str(node), key) + + def generateMapItemListNode(self, key, value): + itemslist = list() + for item in value: + if key in self.allowedFieldsList: + itemslist.append('%s = %s' % (key, self.generateValueNode(item, key))) + else: + itemslist.append('%s' % (self.generateValueNode(item))) + return "(" + " OR ".join(itemslist) + ")" + + # generateORNode algorithm for ArcSightBackend & SumoLogicBackend class. + def generateORNode(self, node): + if type(node) == ConditionOR and all(isinstance(item, str) for item in node): + new_value = list() + for value in node: + value = self.CleanNode(value) + if type(value) is list: + new_value.append(self.andToken.join([self.valueExpression % val for val in value])) + else: + new_value.append(value) + return "(" + self.orToken.join([self.generateNode(val) for val in new_value]) + ")" + return "(" + self.orToken.join([self.generateNode(val) for val in node]) + ")" + + def fieldNameMapping(self, fieldname, value): + """ + Alter field names depending on the value(s). Backends may use this method to perform a final transformation of the field name + in addition to the field mapping defined in the conversion configuration. The field name passed to this method was already + transformed from the original name given in the Sigma rule. + TODO/FIXME! + """ + return fieldname