Merge pull request #234 from juju4/devel-sumo

Sumologic support update
This commit is contained in:
Thomas Patzke
2019-02-09 23:54:23 +01:00
committed by GitHub
2 changed files with 435 additions and 6 deletions
+247
View File
@@ -0,0 +1,247 @@
#!/usr/bin/python
# Copyright 2018 juju4
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
Project: sigma2sumologic.py
Date: 11 Jan 2019
Author: juju4
Version: 1.0
Description: This script executes sumologic search queries from Sigma SIEM rules.
Workflow:
1. Convert rules with sigmac
2. Enrich: add ignore+local custom rules, priority
3. Format
4. Get results and save to txt/xlsx files
Requirements:
$ pip install sumologic-sdk pyyaml pandas
"""
import re
import os, sys, stat
import glob
import subprocess
import argparse
import yaml
import traceback
import logging
from sumologic import SumoLogic
import time
import datetime
import json
import pandas
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)
formatter = logging.Formatter('%(asctime)s - %(name)s - p%(process)s {%(pathname)s:%(lineno)d} - %(levelname)s - %(message)s')
handler = logging.FileHandler('sigma2sumo.log')
handler.setFormatter(formatter)
logger.addHandler(handler)
parser = argparse.ArgumentParser(description='Execute sigma rules in sumologic')
parser.add_argument("--conf", help="script yaml config file", type=str, required=True)
parser.add_argument("--accessid", help="Sumologic Access ID", type=str, required=False)
parser.add_argument("--accesskey", help="Sumologic Access Key", type=str, required=False)
parser.add_argument("--endpoint", help="Sumologic url endpoint", type=str, required=False)
parser.add_argument("--ruledir", help="sigma rule directory path to convert", type=str, required=False)
parser.add_argument("--outdir", help="output directory to create rules", type=str, required=False)
parser.add_argument("--sigmac", help="Sigmac location", default="../tools/sigmac", type=str)
parser.add_argument("--realerttime", help="Realert time (optional value, default 5 minutes)", type=str, default=5)
parser.add_argument("--debug", help="Show debug output", type=bool, default=False)
args = parser.parse_args()
LIMIT = 100
delay = 5
def rule_element(file_content, elements):
"""
Function used to get specific element from yaml document and return content
:type file_content: str
:type elements: list
:param file_content:
:param elements: list of elements of the yaml document to get "title", "description"
:return: the value of the key in the yaml document
"""
try:
logger.debug("file_content: %s" % file_content)
yaml.safe_load(file_content.replace("---",""))
except:
raise Exception('Unsupported')
element_output = ""
for e in elements:
try:
element_output = yaml.safe_load(file_content.replace("---",""))[e]
except:
pass
if element_output is None:
return ""
return element_output
def get_rule_as_sumologic(file):
"""
Function used to get sumologic query output from rule file
:type file: str
:param file: rule filename
:return: string query
"""
if not os.path.exists(args.sigmac):
logger.error("Cannot find sigmac rule coverter at '%s', please set a correct location via '--sigmac'")
cmd = [args.sigmac, file, "--target", "sumologic"]
logger.info('get_rule_as_sumologic cmd: %s' % cmd)
process = subprocess.Popen(cmd,stdout=subprocess.PIPE, stderr=subprocess.PIPE)
output, err = process.communicate()
# output is byte-string...
output = output.decode("utf-8")
err = err.decode("utf-8")
logger.info('get_rule_as_sumologic output: %s' % output)
logger.info('get_rule_as_sumologic stderr: %s' % err)
if err or "unsupported" in err:
logger.error('Unsupported output at this time')
raise Exception('Unsupported output at this time')
output = output.split("\n")
# Remove empty string from \n
output = [a for a in output if a]
# Handle case of multiple queries returned
if len(output) > 1:
return " OR ".join(output)
return "".join(output)
if args.help:
parser_print_help()
if args.conf:
with open(args.conf, 'r') as ymlfile:
cfg = yaml.load(ymlfile)
args.accessid = cfg['accessid']
args.accesskey = cfg['accesskey']
args.endpoint = cfg['endpoint']
args.ruledir = cfg['ruledir']
args.outdir = cfg['outdir']
args.sigmac = cfg['sigmac']
try:
args.recursive = cfg['recursive']
except:
args.recursive = False
if args.recursive:
globpath = args.ruledir + "/**/*.yml"
else:
globpath = args.ruledir + "/*.yml"
logger.debug("args: %s" % args)
logger.debug("globpath: %s" % globpath)
if args.outdir and not os.path.isdir(args.outdir):
os.mkdir(args.outdir, stat.S_IRWXU)
# recursive
for file in glob.iglob(globpath):
# non-recursive (above, not working...)
#for file in glob.iglob(args.ruledir + "/*.yml"):
file_basename = os.path.basename(os.path.splitext(file)[0])
file_basenamepath = os.path.splitext(file)[0]
file_ext = os.path.splitext(file)[1]
try:
if file_ext != '.yml':
continue
logger.info("Processing %s ..." % file_basename)
with open(file, "rb") as f:
file_content = f.read()
logger.info("Rule file: %s" % file)
sumo_query = get_rule_as_sumologic(file)
logger.info(" Checking if custom query file: %s" % file_basenamepath + '.custom')
if os.path.isfile(file_basenamepath + '.custom'):
# FIXME! want to add something in the middle for parsing for example...
logger.info(" Adding custom part to end query from: %s" % file_basenamepath + '.custom')
with open(file_basenamepath + '.custom', "rb") as f:
sumo_query += " " + f.read().decode('utf-8')
elif 'count ' not in sumo_query and ('EventID=' in sumo_query):
sumo_query += " | count _sourceCategory, hostname, EventID, msg_summary, _raw"
elif 'count ' not in sumo_query:
sumo_query += " | count _sourceCategory, hostname, _raw"
logger.info("Final sumo query: %s" % sumo_query)
except Exception as e:
if args.debug:
traceback.print_exc()
logger.exception("error generating sumo query " + str(file) + "----" + str(e))
pass
try:
# Run query
# https://github.com/SumoLogic/sumologic-python-sdk/blob/master/scripts/search-job.py
sumo = SumoLogic(args.accessid, args.accesskey, args.endpoint)
toTime = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
fromTime = datetime.datetime.strptime(toTime, "%Y-%m-%dT%H:%M:%S") - datetime.timedelta(hours = 24)
fromTime = fromTime.strftime("%Y-%m-%dT%H:%M:%S")
timeZone = 'UTC'
byReceiptTime = True
sj = sumo.search_job(sumo_query, fromTime, toTime, timeZone, byReceiptTime)
status = sumo.search_job_status(sj)
while status['state'] != 'DONE GATHERING RESULTS':
if status['state'] == 'CANCELLED':
break
time.sleep(delay)
status = sumo.search_job_status(sj)
except Exception as e:
if args.debug:
traceback.print_exc()
logger.exception("error seaching sumo " + str(file) + "----" + str(e))
with open(os.path.join(args.outdir, "sigma-" + file_basename + '-error.txt'), "w") as f:
f.write(json.dumps(r, indent=4, sort_keys=True) + " ERROR: %s\n\nQUERY: %s" % (e, sumo_query))
pass
logger.info("Sumo search job status: %s" % status['state'])
try:
if status['state'] == 'DONE GATHERING RESULTS':
count = status['recordCount']
limit = count if count < LIMIT and count != 0 else LIMIT # compensate bad limit check
r = sumo.search_job_records(sj, limit=limit)
logger.info("Sumo search results: %s" % r)
logger.info("Saving final sumo query for %s to %s" % (file, os.path.join(args.outdir, "sigma-" + file_basename + '.sumo')))
with open(os.path.join(args.outdir, "sigma-" + file_basename + '.sumo'), "w") as f:
f.write(sumo_query)
if r and r['records'] != []:
logger.info("Saving results")
# as json text file
with open(os.path.join(args.outdir, "sigma-" + file_basename + '.txt'), "w") as f:
f.write(json.dumps(r, indent=4, sort_keys=True))
# as excel file
df = pandas.io.json.json_normalize(r['records'])
with pandas.ExcelWriter(os.path.join(args.outdir, "sigma-" + file_basename + ".xlsx")) as writer:
df.to_excel(writer, 'data')
pandas.DataFrame({'References': [
"timeframe: from %s to %s" % (fromTime, toTime),
"Sumo endpoint: %s" % args.endpoint,
"Sumo query: %s" % sumo_query
]}).to_excel(writer, 'comments')
# and do whatever you want, email alert, report, ticket...
except Exception as e:
if args.debug:
traceback.print_exc()
logger.exception("error saving results " + str(file) + "----" + str(e))
pass
+188 -6
View File
@@ -16,6 +16,7 @@
import re
import sigma
from sigma.parser.condition import ConditionOR
from .base import SingleTextQueryBackend
# Sumo specifics
@@ -32,7 +33,7 @@ class SumoLogicBackend(SingleTextQueryBackend):
active = True
index_field = "_index"
reEscape = re.compile('("|(?<!\\\\)\\\\(?![*?\\\\]))')
#reEscape = re.compile('("|\\\\(?![*?]))')
reClear = None
andToken = " AND "
orToken = " OR "
@@ -46,19 +47,200 @@ class SumoLogicBackend(SingleTextQueryBackend):
mapExpression = "%s=%s"
mapListsSpecialHandling = True
mapListValueExpression = "%s IN %s"
interval = None
logname = None
def generateAggregation(self, agg):
if agg == None:
return ""
if agg.aggfunc == sigma.parser.condition.SigmaAggregationParser.AGGFUNC_NEAR:
raise NotImplementedError("The 'near' aggregation operator is not yet implemented for this backend")
# WIP
# ex:
# (QUERY) | timeslice 5m
# | count_distinct(process) _timeslice,hostname
# | where _count_distinct > 5
#return " | timeslice %s | count_distinct(%s) %s | where _count_distinct > 0" % (self.interval, agg.aggfunc_notrans or "", agg.aggfield or "", agg.groupfield or "")
#return " | timeslice %s | count_distinct(%s) %s | where _count_distinct %s %s" % (self.interval, agg.aggfunc_notrans, agg.aggfield or "", agg.groupfield or "", agg.cond_op, agg.condition)
if agg.groupfield == None:
#return " | %s(%s) | when _count %s %s" % (agg.aggfunc_notrans, agg.aggfield or "", agg.cond_op, agg.condition)
return " | %s(%s) as val | when val %s %s" % (agg.aggfunc_notrans, agg.aggfield or "", agg.cond_op, agg.condition)
return " | %s %s | where _count %s %s" % (agg.aggfunc_notrans, agg.aggfield or "", agg.cond_op, agg.condition)
else:
return " | %s(%s) as val by %s | when val %s %s" % (agg.aggfunc_notrans, agg.aggfield or "", agg.groupfield or "", agg.cond_op, agg.condition)
return " | %s %s by %s | where _count %s %s" % (agg.aggfunc_notrans, agg.aggfield or "", agg.groupfield or "", agg.cond_op, agg.condition)
# TimeFrame condition / within timeframe
# condition | timeslice 5m | count_distinct(f1) as val by f2 | where val > 5
# Near condition => how near... like timeframe?
def generateBefore(self, parsed):
# not required but makes query faster, especially if no FER or _index/_sourceCategory
if self.logname:
return "%s " % self.logname
if self.service:
return "%s %s " % (self.product, self.service)
return ""
def generate(self, sigmaparser):
try:
self.product = sigmaparser.parsedyaml['logsource']['product'] # OS or Software
self.service = sigmaparser.parsedyaml['logsource']['service'] # Channel
except KeyError:
self.product = None
self.service = None
try:
self.interval = sigmaparser.parsedyaml['detection']['timeframe']
except:
pass
for parsed in sigmaparser.condparsed:
query = self.generateQuery(parsed)
before = self.generateBefore(parsed)
after = self.generateAfter(parsed)
result = ""
if before is not None:
result = before
if query is not None:
result += query
if after is not None:
result += after
# adding parenthesis here in case 2 rules are aggregated together - ex: win_possible_applocker_bypass
return "(" + result + ")"
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# TODO/FIXME! depending on deployment configuration, existing FER must be populate here (or backend config?)
#aFL = ["EventID"]
aFL = ["EventID", "sourcename", "CommandLine", "NewProcessName", "Image", "ParentImage", "ParentCommandLine", "ParentProcessName"]
for item in self.sigmaconfig.fieldmappings.values():
if item.target_type is list:
aFL.extend(item.target)
else:
aFL.append(item.target)
self.allowedFieldsList = list(set(aFL))
# Skip logsource value from sigma document for separate path.
#def generateCleanValueNodeLogsource(self, value):
# return self.valueExpression % (self.cleanValue(str(value)))
# Clearing values from special characters.
# Sumologic: only removing '*' (in quotes, is litteral. without, is wildcard) and '"'
def CleanNode(self, node):
search_ptrn = re.compile(r"[\/@?#&%*\(\)\"]")
replace_ptrn = re.compile(r"[\/@?#&%*\(\)\"]")
match = search_ptrn.search(str(node))
new_node = list()
if match:
replaced_str = replace_ptrn.sub('*', node)
node = [x for x in replaced_str.split('*') if x]
new_node.extend(node)
else:
new_node.append(node)
node = new_node
return node
# Clearing values from special characters.
def generateMapItemNode(self, node):
key, value = node
if key in self.allowedFieldsList:
if self.mapListsSpecialHandling == False and type(value) in (
str, int, list) or self.mapListsSpecialHandling == True and type(value) in (str, int):
if key in ("LogName","source"):
self.logname = value
return self.mapExpression % (key, value)
elif type(value) is list:
return self.generateMapItemListNode(key, value)
else:
raise TypeError("Backend does not support map values of type " + str(type(value)))
else:
if self.mapListsSpecialHandling == False and type(value) in (
str, int, list) or self.mapListsSpecialHandling == True and type(value) in (str, int):
if type(value) is str:
new_value = list()
value = self.CleanNode(value)
if type(value) == list:
new_value.append(self.andToken.join([self.valueExpression % val for val in value]))
else:
new_value.append(value)
if len(new_value)==1:
return "(" + self.generateANDNode(new_value) + ")"
else:
return "(" + self.generateORNode(new_value) + ")"
else:
return self.generateValueNode(value)
elif type(value) is list:
new_value = list()
for item in value:
item = self.CleanNode(item)
if type(item) is list and len(item) == 1:
new_value.append(self.valueExpression % item[0])
elif type(item) is list:
new_value.append(self.andToken.join([self.valueExpression % val for val in item]))
else:
new_value.append(item)
return self.generateORNode(new_value)
else:
raise TypeError("Backend does not support map values of type " + str(type(value)))
# from mixins.py
#FIXME! input in simple quotes are not passing through this function. ex: rules/windows/sysmon/sysmon_vul_java_remote_debugging.yml, rules/apt/apt_sofacy_zebrocy.yml
# => OK only if field entry with list, not string
def cleanValue(self, val, key = ''):
print("DEBUG cleanValue0: %s" % val)
if self.reEscape:
val = self.reEscape.sub(self.escapeSubst, val)
if self.reClear:
val = self.reClear.sub("", val)
# in sumologic, if key, can use wildcard outside of double quotes. if inside, it's litteral
if key:
val = re.sub(r'(.+?)\*(.+?)', '\g<1>"*"\g<2>', val, 0)
val = re.sub(r'^\*', '*"', val)
val = re.sub(r'\*$', '"*', val)
# if unbalanced wildcard?
if val.startswith('*"') and not (val.endswith('"*') or val.endswith('"')):
val = val + '"'
if val.endswith('"*') and not (val.startswith('*"') or val.startswith('"')):
val = '"' + val
# double escape if end quote
if val.endswith('\\"*') and not val.endswith('\\\\"*'):
val = re.sub(r'\\"\*$', '\\\\\\"*', val)
print("DEBUG cleanValue1: %s" % val)
return val
# for keywords values with space
def generateValueNode(self, node, key = ''):
if type(node) is int:
return self.cleanValue(str(node), key)
if 'AND' in node:
return "(" + self.cleanValue(str(node), key) + ")"
else:
return self.cleanValue(str(node), key)
def generateMapItemListNode(self, key, value):
itemslist = list()
for item in value:
if key in self.allowedFieldsList:
itemslist.append('%s = %s' % (key, self.generateValueNode(item, key)))
else:
itemslist.append('%s' % (self.generateValueNode(item)))
return "(" + " OR ".join(itemslist) + ")"
# generateORNode algorithm for ArcSightBackend & SumoLogicBackend class.
def generateORNode(self, node):
if type(node) == ConditionOR and all(isinstance(item, str) for item in node):
new_value = list()
for value in node:
value = self.CleanNode(value)
if type(value) is list:
new_value.append(self.andToken.join([self.valueExpression % val for val in value]))
else:
new_value.append(value)
return "(" + self.orToken.join([self.generateNode(val) for val in new_value]) + ")"
return "(" + self.orToken.join([self.generateNode(val) for val in node]) + ")"
def fieldNameMapping(self, fieldname, value):
"""
Alter field names depending on the value(s). Backends may use this method to perform a final transformation of the field name
in addition to the field mapping defined in the conversion configuration. The field name passed to this method was already
transformed from the original name given in the Sigma rule.
TODO/FIXME!
"""
return fieldname