Merge pull request #234 from juju4/devel-sumo

Sumologic support update
2019-02-09 23:54:23 +01:00
parent d9aceeb7eb 4429d7564f
commit 01dfc23a26
2 changed files with 435 additions and 6 deletions
@@ -0,0 +1,247 @@
+#!/usr/bin/python
+# Copyright 2018 juju4
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Lesser General Public License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+"""
+Project: sigma2sumologic.py
+Date: 11 Jan 2019
+Author: juju4
+Version: 1.0
+Description: This script executes sumologic search queries from Sigma SIEM rules.
+Workflow:
+    1. Convert rules with sigmac
+    2. Enrich: add ignore+local custom rules, priority
+    3. Format
+    4. Get results and save to txt/xlsx files
+Requirements:
+    $ pip install sumologic-sdk pyyaml pandas
+"""
+
+import re
+import os, sys, stat
+import glob
+import subprocess
+import argparse
+import yaml
+import traceback
+import logging
+from sumologic import SumoLogic
+import time
+import datetime
+import json
+import pandas
+
+logging.basicConfig(level=logging.DEBUG)
+logger = logging.getLogger(__name__)
+formatter = logging.Formatter('%(asctime)s - %(name)s - p%(process)s {%(pathname)s:%(lineno)d} - %(levelname)s - %(message)s')
+handler = logging.FileHandler('sigma2sumo.log')
+handler.setFormatter(formatter)
+logger.addHandler(handler)
+
+parser = argparse.ArgumentParser(description='Execute sigma rules in sumologic')
+parser.add_argument("--conf", help="script yaml config file", type=str, required=True)
+parser.add_argument("--accessid", help="Sumologic Access ID", type=str, required=False)
+parser.add_argument("--accesskey", help="Sumologic Access Key", type=str, required=False)
+parser.add_argument("--endpoint", help="Sumologic url endpoint", type=str, required=False)
+parser.add_argument("--ruledir", help="sigma rule directory path to convert", type=str, required=False)
+parser.add_argument("--outdir", help="output directory to create rules", type=str, required=False)
+parser.add_argument("--sigmac", help="Sigmac location", default="../tools/sigmac", type=str)
+parser.add_argument("--realerttime", help="Realert time (optional value, default 5 minutes)", type=str, default=5)
+parser.add_argument("--debug", help="Show debug output", type=bool, default=False)
+args = parser.parse_args()
+
+LIMIT = 100
+delay = 5
+
+def rule_element(file_content, elements):
+    """
+    Function used to get specific element from yaml document and return content
+    :type file_content: str
+    :type elements: list
+    :param file_content:
+    :param elements: list of elements of the yaml document to get "title", "description"
+    :return: the value of the key in the yaml document
+    """
+    try:
+        logger.debug("file_content: %s" % file_content)
+        yaml.safe_load(file_content.replace("---",""))
+    except:
+        raise Exception('Unsupported')
+    element_output = ""
+    for e in elements:
+        try:
+            element_output = yaml.safe_load(file_content.replace("---",""))[e]
+        except:
+            pass
+    if element_output is None:
+        return ""
+    return element_output
+
+def get_rule_as_sumologic(file):
+    """
+    Function used to get sumologic query output from rule file
+    :type file: str
+    :param file: rule filename
+    :return: string query
+    """
+    if not os.path.exists(args.sigmac):
+        logger.error("Cannot find sigmac rule coverter at '%s', please set a correct location via '--sigmac'")
+    cmd = [args.sigmac, file, "--target", "sumologic"]
+    logger.info('get_rule_as_sumologic cmd: %s' % cmd)
+    process = subprocess.Popen(cmd,stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    output, err = process.communicate()
+
+    # output is byte-string...
+    output = output.decode("utf-8")
+    err = err.decode("utf-8")
+
+    logger.info('get_rule_as_sumologic output: %s' % output)
+    logger.info('get_rule_as_sumologic stderr: %s' % err)
+    if err or "unsupported" in err:
+        logger.error('Unsupported output at this time')
+        raise Exception('Unsupported output at this time')
+    output = output.split("\n")
+    # Remove empty string from \n
+    output = [a for a in output if a]
+    # Handle case of multiple queries returned
+    if len(output) > 1:
+        return " OR ".join(output)
+    return "".join(output)
+
+if args.help:
+    parser_print_help()
+
+if args.conf:
+    with open(args.conf, 'r') as ymlfile:
+        cfg = yaml.load(ymlfile)
+    args.accessid = cfg['accessid']
+    args.accesskey = cfg['accesskey']
+    args.endpoint = cfg['endpoint']
+    args.ruledir = cfg['ruledir']
+    args.outdir = cfg['outdir']
+    args.sigmac = cfg['sigmac']
+    try:
+        args.recursive = cfg['recursive']
+    except:
+        args.recursive = False
+    if args.recursive:
+        globpath = args.ruledir + "/**/*.yml"
+    else:
+        globpath = args.ruledir + "/*.yml"
+    logger.debug("args: %s" % args)
+    logger.debug("globpath: %s" % globpath)
+
+if args.outdir and not os.path.isdir(args.outdir):
+    os.mkdir(args.outdir, stat.S_IRWXU)
+
+# recursive
+for file in glob.iglob(globpath):
+# non-recursive (above, not working...)
+#for file in glob.iglob(args.ruledir + "/*.yml"):
+
+    file_basename = os.path.basename(os.path.splitext(file)[0])
+    file_basenamepath = os.path.splitext(file)[0]
+    file_ext = os.path.splitext(file)[1]
+    try:
+        if file_ext != '.yml':
+            continue
+
+        logger.info("Processing %s ..." % file_basename)
+        with open(file, "rb") as f:
+            file_content = f.read()
+
+        logger.info("Rule file: %s" % file)
+
+        sumo_query = get_rule_as_sumologic(file)
+
+        logger.info("  Checking if custom query file: %s" % file_basenamepath + '.custom')
+        if os.path.isfile(file_basenamepath + '.custom'):
+            # FIXME! want to add something in the middle for parsing for example...
+            logger.info("  Adding custom part to end query from: %s" % file_basenamepath + '.custom')
+            with open(file_basenamepath + '.custom', "rb") as f:
+                sumo_query += " " + f.read().decode('utf-8')
+        elif 'count ' not in sumo_query and ('EventID=' in sumo_query):
+                sumo_query += " | count _sourceCategory, hostname, EventID, msg_summary, _raw"
+        elif 'count ' not in sumo_query:
+                sumo_query += " | count _sourceCategory, hostname, _raw"
+
+        logger.info("Final sumo query: %s" % sumo_query)
+
+    except Exception as e:
+        if args.debug:
+            traceback.print_exc()
+        logger.exception("error generating sumo query " + str(file) + "----" + str(e))
+        pass
+
+    try:
+        # Run query
+        # https://github.com/SumoLogic/sumologic-python-sdk/blob/master/scripts/search-job.py
+        sumo = SumoLogic(args.accessid, args.accesskey, args.endpoint)
+        toTime = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
+        fromTime = datetime.datetime.strptime(toTime, "%Y-%m-%dT%H:%M:%S") - datetime.timedelta(hours = 24)
+        fromTime = fromTime.strftime("%Y-%m-%dT%H:%M:%S")
+        timeZone = 'UTC'
+        byReceiptTime = True
+
+        sj = sumo.search_job(sumo_query, fromTime, toTime, timeZone, byReceiptTime)
+
+        status = sumo.search_job_status(sj)
+        while status['state'] != 'DONE GATHERING RESULTS':
+            if status['state'] == 'CANCELLED':
+                break
+            time.sleep(delay)
+            status = sumo.search_job_status(sj)
+
+    except Exception as e:
+        if args.debug:
+            traceback.print_exc()
+        logger.exception("error seaching sumo  " + str(file) + "----" + str(e))
+        with open(os.path.join(args.outdir, "sigma-" + file_basename + '-error.txt'), "w") as f:
+            f.write(json.dumps(r, indent=4, sort_keys=True) + " ERROR: %s\n\nQUERY: %s" % (e, sumo_query))
+        pass
+
+    logger.info("Sumo search job status: %s" % status['state'])
+
+    try:
+        if status['state'] == 'DONE GATHERING RESULTS':
+            count = status['recordCount']
+            limit = count if count < LIMIT and count != 0 else LIMIT # compensate bad limit check
+            r = sumo.search_job_records(sj, limit=limit)
+            logger.info("Sumo search results: %s" % r)
+
+        logger.info("Saving final sumo query for %s to %s" % (file, os.path.join(args.outdir, "sigma-" + file_basename + '.sumo')))
+        with open(os.path.join(args.outdir, "sigma-" + file_basename + '.sumo'), "w") as f:
+            f.write(sumo_query)
+        if r and r['records'] != []:
+            logger.info("Saving results")
+            # as json text file
+            with open(os.path.join(args.outdir, "sigma-" + file_basename + '.txt'), "w") as f:
+                f.write(json.dumps(r, indent=4, sort_keys=True))
+            # as excel file
+            df = pandas.io.json.json_normalize(r['records'])
+            with pandas.ExcelWriter(os.path.join(args.outdir, "sigma-" + file_basename + ".xlsx")) as writer:
+                df.to_excel(writer, 'data')
+                pandas.DataFrame({'References': [
+                    "timeframe: from %s to %s" % (fromTime, toTime),
+                    "Sumo endpoint: %s" % args.endpoint,
+                    "Sumo query: %s" % sumo_query
+                    ]}).to_excel(writer, 'comments')
+
+        # and do whatever you want, email alert, report, ticket...
+
+    except Exception as e:
+        if args.debug:
+            traceback.print_exc()
+        logger.exception("error saving results " + str(file) + "----" + str(e))
+        pass
@@ -16,6 +16,7 @@

 import re
 import sigma
+from sigma.parser.condition import ConditionOR
 from .base import SingleTextQueryBackend

 # Sumo specifics
@@ -32,7 +33,7 @@ class SumoLogicBackend(SingleTextQueryBackend):
    active = True

    index_field = "_index"
-    reEscape = re.compile('("|(?<!\\\\)\\\\(?![*?\\\\]))')
+    #reEscape = re.compile('("|\\\\(?![*?]))')
    reClear = None
    andToken = " AND "
    orToken = " OR "
@@ -46,19 +47,200 @@ class SumoLogicBackend(SingleTextQueryBackend):
    mapExpression = "%s=%s"
    mapListsSpecialHandling = True
    mapListValueExpression = "%s IN %s"
+    interval = None
+    logname = None

    def generateAggregation(self, agg):
        if agg == None:
            return ""
        if agg.aggfunc == sigma.parser.condition.SigmaAggregationParser.AGGFUNC_NEAR:
            raise NotImplementedError("The 'near' aggregation operator is not yet implemented for this backend")
+            # WIP
+            # ex:
+            # (QUERY) | timeslice 5m
+            # | count_distinct(process) _timeslice,hostname
+            # | where _count_distinct > 5
+            #return " | timeslice %s | count_distinct(%s) %s | where _count_distinct > 0" % (self.interval, agg.aggfunc_notrans or "", agg.aggfield or "", agg.groupfield or "")
+            #return " | timeslice %s | count_distinct(%s) %s | where _count_distinct %s %s" % (self.interval, agg.aggfunc_notrans, agg.aggfield or "", agg.groupfield or "", agg.cond_op, agg.condition)
        if agg.groupfield == None:
            #return " | %s(%s) | when _count %s %s" % (agg.aggfunc_notrans, agg.aggfield or "", agg.cond_op, agg.condition)
-            return " | %s(%s) as val | when val %s %s" % (agg.aggfunc_notrans, agg.aggfield or "", agg.cond_op, agg.condition)
+            return " | %s %s | where _count %s %s" % (agg.aggfunc_notrans, agg.aggfield or "", agg.cond_op, agg.condition)
        else:
-            return " | %s(%s) as val by %s | when val %s %s" % (agg.aggfunc_notrans, agg.aggfield or "", agg.groupfield or "", agg.cond_op, agg.condition)
+            return " | %s %s by %s | where _count %s %s" % (agg.aggfunc_notrans, agg.aggfield or "", agg.groupfield or "", agg.cond_op, agg.condition)

-# TimeFrame condition / within timeframe
-# condition | timeslice 5m | count_distinct(f1) as val by f2 | where val > 5
-# Near condition => how near... like timeframe?
+    def generateBefore(self, parsed):
+        # not required but makes query faster, especially if no FER or _index/_sourceCategory
+        if self.logname:
+            return "%s " % self.logname
+        if self.service:
+            return "%s %s " % (self.product, self.service)
+        return ""

+    def generate(self, sigmaparser):
+        try:
+            self.product = sigmaparser.parsedyaml['logsource']['product']   # OS or Software
+            self.service = sigmaparser.parsedyaml['logsource']['service']   # Channel
+        except KeyError:
+            self.product = None
+            self.service = None
+
+        try:
+            self.interval = sigmaparser.parsedyaml['detection']['timeframe']
+        except:
+            pass
+
+        for parsed in sigmaparser.condparsed:
+            query = self.generateQuery(parsed)
+            before = self.generateBefore(parsed)
+            after = self.generateAfter(parsed)
+
+            result = ""
+            if before is not None:
+                result = before
+            if query is not None:
+                result += query
+            if after is not None:
+                result += after
+
+            # adding parenthesis here in case 2 rules are aggregated together - ex: win_possible_applocker_bypass
+            return "(" + result + ")"
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        # TODO/FIXME! depending on deployment configuration, existing FER must be populate here (or backend config?)
+        #aFL = ["EventID"]
+        aFL = ["EventID", "sourcename", "CommandLine", "NewProcessName", "Image", "ParentImage", "ParentCommandLine", "ParentProcessName"]
+        for item in self.sigmaconfig.fieldmappings.values():
+            if item.target_type is list:
+                aFL.extend(item.target)
+            else:
+                aFL.append(item.target)
+        self.allowedFieldsList = list(set(aFL))
+
+    # Skip logsource value from sigma document for separate path.
+    #def generateCleanValueNodeLogsource(self, value):
+    #    return self.valueExpression % (self.cleanValue(str(value)))
+
+    # Clearing values from special characters.
+    # Sumologic: only removing '*' (in quotes, is litteral. without, is wildcard) and '"'
+    def CleanNode(self, node):
+        search_ptrn = re.compile(r"[\/@?#&%*\(\)\"]")
+        replace_ptrn = re.compile(r"[\/@?#&%*\(\)\"]")
+        match = search_ptrn.search(str(node))
+        new_node = list()
+        if match:
+            replaced_str = replace_ptrn.sub('*', node)
+            node = [x for x in replaced_str.split('*') if x]
+            new_node.extend(node)
+        else:
+            new_node.append(node)
+        node = new_node
+        return node
+
+    # Clearing values from special characters.
+    def generateMapItemNode(self, node):
+        key, value = node
+        if key in self.allowedFieldsList:
+            if self.mapListsSpecialHandling == False and type(value) in (
+                    str, int, list) or self.mapListsSpecialHandling == True and type(value) in (str, int):
+                if key in ("LogName","source"):
+                    self.logname = value
+                return self.mapExpression % (key, value)
+            elif type(value) is list:
+                return self.generateMapItemListNode(key, value)
+            else:
+                raise TypeError("Backend does not support map values of type " + str(type(value)))
+        else:
+            if self.mapListsSpecialHandling == False and type(value) in (
+                    str, int, list) or self.mapListsSpecialHandling == True and type(value) in (str, int):
+                if type(value) is str:
+                    new_value = list()
+                    value = self.CleanNode(value)
+                    if type(value) == list:
+                        new_value.append(self.andToken.join([self.valueExpression % val for val in value]))
+                    else:
+                        new_value.append(value)
+                    if len(new_value)==1:
+                        return "(" + self.generateANDNode(new_value) + ")"
+                    else:
+                        return "(" + self.generateORNode(new_value) + ")"
+                else:
+                    return self.generateValueNode(value)
+            elif type(value) is list:
+                new_value = list()
+                for item in value:
+                    item = self.CleanNode(item)
+                    if type(item) is list and len(item) == 1:
+                        new_value.append(self.valueExpression % item[0])
+                    elif type(item) is list:
+                        new_value.append(self.andToken.join([self.valueExpression % val for val in item]))
+                    else:
+                        new_value.append(item)
+                return self.generateORNode(new_value)
+            else:
+                raise TypeError("Backend does not support map values of type " + str(type(value)))
+
+    # from mixins.py
+    #FIXME! input in simple quotes are not passing through this function. ex: rules/windows/sysmon/sysmon_vul_java_remote_debugging.yml, rules/apt/apt_sofacy_zebrocy.yml
+    #   => OK only if field entry with list, not string
+    def cleanValue(self, val, key = ''):
+        print("DEBUG cleanValue0: %s" % val)
+        if self.reEscape:
+            val = self.reEscape.sub(self.escapeSubst, val)
+        if self.reClear:
+            val = self.reClear.sub("", val)
+        # in sumologic, if key, can use wildcard outside of double quotes. if inside, it's litteral
+        if key:
+            val = re.sub(r'(.+?)\*(.+?)', '\g<1>"*"\g<2>', val, 0)
+            val = re.sub(r'^\*', '*"', val)
+            val = re.sub(r'\*$', '"*', val)
+            # if unbalanced wildcard?
+            if val.startswith('*"') and not (val.endswith('"*') or val.endswith('"')):
+                val = val + '"'
+            if val.endswith('"*') and not (val.startswith('*"') or val.startswith('"')):
+                val = '"' + val
+            # double escape if end quote
+            if val.endswith('\\"*') and not val.endswith('\\\\"*'):
+                val = re.sub(r'\\"\*$', '\\\\\\"*', val)
+        print("DEBUG cleanValue1: %s" % val)
+        return val
+
+    # for keywords values with space
+    def generateValueNode(self, node, key = ''):
+        if type(node) is int:
+            return self.cleanValue(str(node), key)
+        if 'AND' in node:
+            return "(" + self.cleanValue(str(node), key) + ")"
+        else:
+            return self.cleanValue(str(node), key)
+
+    def generateMapItemListNode(self, key, value):
+        itemslist = list()
+        for item in value:
+            if key in self.allowedFieldsList:
+                itemslist.append('%s = %s' % (key, self.generateValueNode(item, key)))
+            else:
+                itemslist.append('%s' % (self.generateValueNode(item)))
+        return "(" + " OR ".join(itemslist) + ")"
+
+    # generateORNode algorithm for ArcSightBackend & SumoLogicBackend class.
+    def generateORNode(self, node):
+        if type(node) == ConditionOR and all(isinstance(item, str) for item in node):
+            new_value = list()
+            for value in node:
+                value = self.CleanNode(value)
+                if type(value) is list:
+                    new_value.append(self.andToken.join([self.valueExpression % val for val in value]))
+                else:
+                    new_value.append(value)
+            return "(" + self.orToken.join([self.generateNode(val) for val in new_value]) + ")"
+        return "(" + self.orToken.join([self.generateNode(val) for val in node]) + ")"
+
+    def fieldNameMapping(self, fieldname, value):
+        """
+        Alter field names depending on the value(s). Backends may use this method to perform a final transformation of the field name
+        in addition to the field mapping defined in the conversion configuration. The field name passed to this method was already
+        transformed from the original name given in the Sigma rule.
+        TODO/FIXME!
+        """
+        return fieldname