@@ -0,0 +1,247 @@
|
||||
#!/usr/bin/python
|
||||
# Copyright 2018 juju4
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Lesser General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Lesser General Public License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Lesser General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""
|
||||
Project: sigma2sumologic.py
|
||||
Date: 11 Jan 2019
|
||||
Author: juju4
|
||||
Version: 1.0
|
||||
Description: This script executes sumologic search queries from Sigma SIEM rules.
|
||||
Workflow:
|
||||
1. Convert rules with sigmac
|
||||
2. Enrich: add ignore+local custom rules, priority
|
||||
3. Format
|
||||
4. Get results and save to txt/xlsx files
|
||||
Requirements:
|
||||
$ pip install sumologic-sdk pyyaml pandas
|
||||
"""
|
||||
|
||||
import re
|
||||
import os, sys, stat
|
||||
import glob
|
||||
import subprocess
|
||||
import argparse
|
||||
import yaml
|
||||
import traceback
|
||||
import logging
|
||||
from sumologic import SumoLogic
|
||||
import time
|
||||
import datetime
|
||||
import json
|
||||
import pandas
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
logger = logging.getLogger(__name__)
|
||||
formatter = logging.Formatter('%(asctime)s - %(name)s - p%(process)s {%(pathname)s:%(lineno)d} - %(levelname)s - %(message)s')
|
||||
handler = logging.FileHandler('sigma2sumo.log')
|
||||
handler.setFormatter(formatter)
|
||||
logger.addHandler(handler)
|
||||
|
||||
parser = argparse.ArgumentParser(description='Execute sigma rules in sumologic')
|
||||
parser.add_argument("--conf", help="script yaml config file", type=str, required=True)
|
||||
parser.add_argument("--accessid", help="Sumologic Access ID", type=str, required=False)
|
||||
parser.add_argument("--accesskey", help="Sumologic Access Key", type=str, required=False)
|
||||
parser.add_argument("--endpoint", help="Sumologic url endpoint", type=str, required=False)
|
||||
parser.add_argument("--ruledir", help="sigma rule directory path to convert", type=str, required=False)
|
||||
parser.add_argument("--outdir", help="output directory to create rules", type=str, required=False)
|
||||
parser.add_argument("--sigmac", help="Sigmac location", default="../tools/sigmac", type=str)
|
||||
parser.add_argument("--realerttime", help="Realert time (optional value, default 5 minutes)", type=str, default=5)
|
||||
parser.add_argument("--debug", help="Show debug output", type=bool, default=False)
|
||||
args = parser.parse_args()
|
||||
|
||||
LIMIT = 100
|
||||
delay = 5
|
||||
|
||||
def rule_element(file_content, elements):
|
||||
"""
|
||||
Function used to get specific element from yaml document and return content
|
||||
:type file_content: str
|
||||
:type elements: list
|
||||
:param file_content:
|
||||
:param elements: list of elements of the yaml document to get "title", "description"
|
||||
:return: the value of the key in the yaml document
|
||||
"""
|
||||
try:
|
||||
logger.debug("file_content: %s" % file_content)
|
||||
yaml.safe_load(file_content.replace("---",""))
|
||||
except:
|
||||
raise Exception('Unsupported')
|
||||
element_output = ""
|
||||
for e in elements:
|
||||
try:
|
||||
element_output = yaml.safe_load(file_content.replace("---",""))[e]
|
||||
except:
|
||||
pass
|
||||
if element_output is None:
|
||||
return ""
|
||||
return element_output
|
||||
|
||||
def get_rule_as_sumologic(file):
|
||||
"""
|
||||
Function used to get sumologic query output from rule file
|
||||
:type file: str
|
||||
:param file: rule filename
|
||||
:return: string query
|
||||
"""
|
||||
if not os.path.exists(args.sigmac):
|
||||
logger.error("Cannot find sigmac rule coverter at '%s', please set a correct location via '--sigmac'")
|
||||
cmd = [args.sigmac, file, "--target", "sumologic"]
|
||||
logger.info('get_rule_as_sumologic cmd: %s' % cmd)
|
||||
process = subprocess.Popen(cmd,stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
output, err = process.communicate()
|
||||
|
||||
# output is byte-string...
|
||||
output = output.decode("utf-8")
|
||||
err = err.decode("utf-8")
|
||||
|
||||
logger.info('get_rule_as_sumologic output: %s' % output)
|
||||
logger.info('get_rule_as_sumologic stderr: %s' % err)
|
||||
if err or "unsupported" in err:
|
||||
logger.error('Unsupported output at this time')
|
||||
raise Exception('Unsupported output at this time')
|
||||
output = output.split("\n")
|
||||
# Remove empty string from \n
|
||||
output = [a for a in output if a]
|
||||
# Handle case of multiple queries returned
|
||||
if len(output) > 1:
|
||||
return " OR ".join(output)
|
||||
return "".join(output)
|
||||
|
||||
if args.help:
|
||||
parser_print_help()
|
||||
|
||||
if args.conf:
|
||||
with open(args.conf, 'r') as ymlfile:
|
||||
cfg = yaml.load(ymlfile)
|
||||
args.accessid = cfg['accessid']
|
||||
args.accesskey = cfg['accesskey']
|
||||
args.endpoint = cfg['endpoint']
|
||||
args.ruledir = cfg['ruledir']
|
||||
args.outdir = cfg['outdir']
|
||||
args.sigmac = cfg['sigmac']
|
||||
try:
|
||||
args.recursive = cfg['recursive']
|
||||
except:
|
||||
args.recursive = False
|
||||
if args.recursive:
|
||||
globpath = args.ruledir + "/**/*.yml"
|
||||
else:
|
||||
globpath = args.ruledir + "/*.yml"
|
||||
logger.debug("args: %s" % args)
|
||||
logger.debug("globpath: %s" % globpath)
|
||||
|
||||
if args.outdir and not os.path.isdir(args.outdir):
|
||||
os.mkdir(args.outdir, stat.S_IRWXU)
|
||||
|
||||
# recursive
|
||||
for file in glob.iglob(globpath):
|
||||
# non-recursive (above, not working...)
|
||||
#for file in glob.iglob(args.ruledir + "/*.yml"):
|
||||
|
||||
file_basename = os.path.basename(os.path.splitext(file)[0])
|
||||
file_basenamepath = os.path.splitext(file)[0]
|
||||
file_ext = os.path.splitext(file)[1]
|
||||
try:
|
||||
if file_ext != '.yml':
|
||||
continue
|
||||
|
||||
logger.info("Processing %s ..." % file_basename)
|
||||
with open(file, "rb") as f:
|
||||
file_content = f.read()
|
||||
|
||||
logger.info("Rule file: %s" % file)
|
||||
|
||||
sumo_query = get_rule_as_sumologic(file)
|
||||
|
||||
logger.info(" Checking if custom query file: %s" % file_basenamepath + '.custom')
|
||||
if os.path.isfile(file_basenamepath + '.custom'):
|
||||
# FIXME! want to add something in the middle for parsing for example...
|
||||
logger.info(" Adding custom part to end query from: %s" % file_basenamepath + '.custom')
|
||||
with open(file_basenamepath + '.custom', "rb") as f:
|
||||
sumo_query += " " + f.read().decode('utf-8')
|
||||
elif 'count ' not in sumo_query and ('EventID=' in sumo_query):
|
||||
sumo_query += " | count _sourceCategory, hostname, EventID, msg_summary, _raw"
|
||||
elif 'count ' not in sumo_query:
|
||||
sumo_query += " | count _sourceCategory, hostname, _raw"
|
||||
|
||||
logger.info("Final sumo query: %s" % sumo_query)
|
||||
|
||||
except Exception as e:
|
||||
if args.debug:
|
||||
traceback.print_exc()
|
||||
logger.exception("error generating sumo query " + str(file) + "----" + str(e))
|
||||
pass
|
||||
|
||||
try:
|
||||
# Run query
|
||||
# https://github.com/SumoLogic/sumologic-python-sdk/blob/master/scripts/search-job.py
|
||||
sumo = SumoLogic(args.accessid, args.accesskey, args.endpoint)
|
||||
toTime = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
|
||||
fromTime = datetime.datetime.strptime(toTime, "%Y-%m-%dT%H:%M:%S") - datetime.timedelta(hours = 24)
|
||||
fromTime = fromTime.strftime("%Y-%m-%dT%H:%M:%S")
|
||||
timeZone = 'UTC'
|
||||
byReceiptTime = True
|
||||
|
||||
sj = sumo.search_job(sumo_query, fromTime, toTime, timeZone, byReceiptTime)
|
||||
|
||||
status = sumo.search_job_status(sj)
|
||||
while status['state'] != 'DONE GATHERING RESULTS':
|
||||
if status['state'] == 'CANCELLED':
|
||||
break
|
||||
time.sleep(delay)
|
||||
status = sumo.search_job_status(sj)
|
||||
|
||||
except Exception as e:
|
||||
if args.debug:
|
||||
traceback.print_exc()
|
||||
logger.exception("error seaching sumo " + str(file) + "----" + str(e))
|
||||
with open(os.path.join(args.outdir, "sigma-" + file_basename + '-error.txt'), "w") as f:
|
||||
f.write(json.dumps(r, indent=4, sort_keys=True) + " ERROR: %s\n\nQUERY: %s" % (e, sumo_query))
|
||||
pass
|
||||
|
||||
logger.info("Sumo search job status: %s" % status['state'])
|
||||
|
||||
try:
|
||||
if status['state'] == 'DONE GATHERING RESULTS':
|
||||
count = status['recordCount']
|
||||
limit = count if count < LIMIT and count != 0 else LIMIT # compensate bad limit check
|
||||
r = sumo.search_job_records(sj, limit=limit)
|
||||
logger.info("Sumo search results: %s" % r)
|
||||
|
||||
logger.info("Saving final sumo query for %s to %s" % (file, os.path.join(args.outdir, "sigma-" + file_basename + '.sumo')))
|
||||
with open(os.path.join(args.outdir, "sigma-" + file_basename + '.sumo'), "w") as f:
|
||||
f.write(sumo_query)
|
||||
if r and r['records'] != []:
|
||||
logger.info("Saving results")
|
||||
# as json text file
|
||||
with open(os.path.join(args.outdir, "sigma-" + file_basename + '.txt'), "w") as f:
|
||||
f.write(json.dumps(r, indent=4, sort_keys=True))
|
||||
# as excel file
|
||||
df = pandas.io.json.json_normalize(r['records'])
|
||||
with pandas.ExcelWriter(os.path.join(args.outdir, "sigma-" + file_basename + ".xlsx")) as writer:
|
||||
df.to_excel(writer, 'data')
|
||||
pandas.DataFrame({'References': [
|
||||
"timeframe: from %s to %s" % (fromTime, toTime),
|
||||
"Sumo endpoint: %s" % args.endpoint,
|
||||
"Sumo query: %s" % sumo_query
|
||||
]}).to_excel(writer, 'comments')
|
||||
|
||||
# and do whatever you want, email alert, report, ticket...
|
||||
|
||||
except Exception as e:
|
||||
if args.debug:
|
||||
traceback.print_exc()
|
||||
logger.exception("error saving results " + str(file) + "----" + str(e))
|
||||
pass
|
||||
@@ -16,6 +16,7 @@
|
||||
|
||||
import re
|
||||
import sigma
|
||||
from sigma.parser.condition import ConditionOR
|
||||
from .base import SingleTextQueryBackend
|
||||
|
||||
# Sumo specifics
|
||||
@@ -32,7 +33,7 @@ class SumoLogicBackend(SingleTextQueryBackend):
|
||||
active = True
|
||||
|
||||
index_field = "_index"
|
||||
reEscape = re.compile('("|(?<!\\\\)\\\\(?![*?\\\\]))')
|
||||
#reEscape = re.compile('("|\\\\(?![*?]))')
|
||||
reClear = None
|
||||
andToken = " AND "
|
||||
orToken = " OR "
|
||||
@@ -46,19 +47,200 @@ class SumoLogicBackend(SingleTextQueryBackend):
|
||||
mapExpression = "%s=%s"
|
||||
mapListsSpecialHandling = True
|
||||
mapListValueExpression = "%s IN %s"
|
||||
interval = None
|
||||
logname = None
|
||||
|
||||
def generateAggregation(self, agg):
|
||||
if agg == None:
|
||||
return ""
|
||||
if agg.aggfunc == sigma.parser.condition.SigmaAggregationParser.AGGFUNC_NEAR:
|
||||
raise NotImplementedError("The 'near' aggregation operator is not yet implemented for this backend")
|
||||
# WIP
|
||||
# ex:
|
||||
# (QUERY) | timeslice 5m
|
||||
# | count_distinct(process) _timeslice,hostname
|
||||
# | where _count_distinct > 5
|
||||
#return " | timeslice %s | count_distinct(%s) %s | where _count_distinct > 0" % (self.interval, agg.aggfunc_notrans or "", agg.aggfield or "", agg.groupfield or "")
|
||||
#return " | timeslice %s | count_distinct(%s) %s | where _count_distinct %s %s" % (self.interval, agg.aggfunc_notrans, agg.aggfield or "", agg.groupfield or "", agg.cond_op, agg.condition)
|
||||
if agg.groupfield == None:
|
||||
#return " | %s(%s) | when _count %s %s" % (agg.aggfunc_notrans, agg.aggfield or "", agg.cond_op, agg.condition)
|
||||
return " | %s(%s) as val | when val %s %s" % (agg.aggfunc_notrans, agg.aggfield or "", agg.cond_op, agg.condition)
|
||||
return " | %s %s | where _count %s %s" % (agg.aggfunc_notrans, agg.aggfield or "", agg.cond_op, agg.condition)
|
||||
else:
|
||||
return " | %s(%s) as val by %s | when val %s %s" % (agg.aggfunc_notrans, agg.aggfield or "", agg.groupfield or "", agg.cond_op, agg.condition)
|
||||
return " | %s %s by %s | where _count %s %s" % (agg.aggfunc_notrans, agg.aggfield or "", agg.groupfield or "", agg.cond_op, agg.condition)
|
||||
|
||||
# TimeFrame condition / within timeframe
|
||||
# condition | timeslice 5m | count_distinct(f1) as val by f2 | where val > 5
|
||||
# Near condition => how near... like timeframe?
|
||||
def generateBefore(self, parsed):
|
||||
# not required but makes query faster, especially if no FER or _index/_sourceCategory
|
||||
if self.logname:
|
||||
return "%s " % self.logname
|
||||
if self.service:
|
||||
return "%s %s " % (self.product, self.service)
|
||||
return ""
|
||||
|
||||
def generate(self, sigmaparser):
|
||||
try:
|
||||
self.product = sigmaparser.parsedyaml['logsource']['product'] # OS or Software
|
||||
self.service = sigmaparser.parsedyaml['logsource']['service'] # Channel
|
||||
except KeyError:
|
||||
self.product = None
|
||||
self.service = None
|
||||
|
||||
try:
|
||||
self.interval = sigmaparser.parsedyaml['detection']['timeframe']
|
||||
except:
|
||||
pass
|
||||
|
||||
for parsed in sigmaparser.condparsed:
|
||||
query = self.generateQuery(parsed)
|
||||
before = self.generateBefore(parsed)
|
||||
after = self.generateAfter(parsed)
|
||||
|
||||
result = ""
|
||||
if before is not None:
|
||||
result = before
|
||||
if query is not None:
|
||||
result += query
|
||||
if after is not None:
|
||||
result += after
|
||||
|
||||
# adding parenthesis here in case 2 rules are aggregated together - ex: win_possible_applocker_bypass
|
||||
return "(" + result + ")"
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
# TODO/FIXME! depending on deployment configuration, existing FER must be populate here (or backend config?)
|
||||
#aFL = ["EventID"]
|
||||
aFL = ["EventID", "sourcename", "CommandLine", "NewProcessName", "Image", "ParentImage", "ParentCommandLine", "ParentProcessName"]
|
||||
for item in self.sigmaconfig.fieldmappings.values():
|
||||
if item.target_type is list:
|
||||
aFL.extend(item.target)
|
||||
else:
|
||||
aFL.append(item.target)
|
||||
self.allowedFieldsList = list(set(aFL))
|
||||
|
||||
# Skip logsource value from sigma document for separate path.
|
||||
#def generateCleanValueNodeLogsource(self, value):
|
||||
# return self.valueExpression % (self.cleanValue(str(value)))
|
||||
|
||||
# Clearing values from special characters.
|
||||
# Sumologic: only removing '*' (in quotes, is litteral. without, is wildcard) and '"'
|
||||
def CleanNode(self, node):
|
||||
search_ptrn = re.compile(r"[\/@?#&%*\(\)\"]")
|
||||
replace_ptrn = re.compile(r"[\/@?#&%*\(\)\"]")
|
||||
match = search_ptrn.search(str(node))
|
||||
new_node = list()
|
||||
if match:
|
||||
replaced_str = replace_ptrn.sub('*', node)
|
||||
node = [x for x in replaced_str.split('*') if x]
|
||||
new_node.extend(node)
|
||||
else:
|
||||
new_node.append(node)
|
||||
node = new_node
|
||||
return node
|
||||
|
||||
# Clearing values from special characters.
|
||||
def generateMapItemNode(self, node):
|
||||
key, value = node
|
||||
if key in self.allowedFieldsList:
|
||||
if self.mapListsSpecialHandling == False and type(value) in (
|
||||
str, int, list) or self.mapListsSpecialHandling == True and type(value) in (str, int):
|
||||
if key in ("LogName","source"):
|
||||
self.logname = value
|
||||
return self.mapExpression % (key, value)
|
||||
elif type(value) is list:
|
||||
return self.generateMapItemListNode(key, value)
|
||||
else:
|
||||
raise TypeError("Backend does not support map values of type " + str(type(value)))
|
||||
else:
|
||||
if self.mapListsSpecialHandling == False and type(value) in (
|
||||
str, int, list) or self.mapListsSpecialHandling == True and type(value) in (str, int):
|
||||
if type(value) is str:
|
||||
new_value = list()
|
||||
value = self.CleanNode(value)
|
||||
if type(value) == list:
|
||||
new_value.append(self.andToken.join([self.valueExpression % val for val in value]))
|
||||
else:
|
||||
new_value.append(value)
|
||||
if len(new_value)==1:
|
||||
return "(" + self.generateANDNode(new_value) + ")"
|
||||
else:
|
||||
return "(" + self.generateORNode(new_value) + ")"
|
||||
else:
|
||||
return self.generateValueNode(value)
|
||||
elif type(value) is list:
|
||||
new_value = list()
|
||||
for item in value:
|
||||
item = self.CleanNode(item)
|
||||
if type(item) is list and len(item) == 1:
|
||||
new_value.append(self.valueExpression % item[0])
|
||||
elif type(item) is list:
|
||||
new_value.append(self.andToken.join([self.valueExpression % val for val in item]))
|
||||
else:
|
||||
new_value.append(item)
|
||||
return self.generateORNode(new_value)
|
||||
else:
|
||||
raise TypeError("Backend does not support map values of type " + str(type(value)))
|
||||
|
||||
# from mixins.py
|
||||
#FIXME! input in simple quotes are not passing through this function. ex: rules/windows/sysmon/sysmon_vul_java_remote_debugging.yml, rules/apt/apt_sofacy_zebrocy.yml
|
||||
# => OK only if field entry with list, not string
|
||||
def cleanValue(self, val, key = ''):
|
||||
print("DEBUG cleanValue0: %s" % val)
|
||||
if self.reEscape:
|
||||
val = self.reEscape.sub(self.escapeSubst, val)
|
||||
if self.reClear:
|
||||
val = self.reClear.sub("", val)
|
||||
# in sumologic, if key, can use wildcard outside of double quotes. if inside, it's litteral
|
||||
if key:
|
||||
val = re.sub(r'(.+?)\*(.+?)', '\g<1>"*"\g<2>', val, 0)
|
||||
val = re.sub(r'^\*', '*"', val)
|
||||
val = re.sub(r'\*$', '"*', val)
|
||||
# if unbalanced wildcard?
|
||||
if val.startswith('*"') and not (val.endswith('"*') or val.endswith('"')):
|
||||
val = val + '"'
|
||||
if val.endswith('"*') and not (val.startswith('*"') or val.startswith('"')):
|
||||
val = '"' + val
|
||||
# double escape if end quote
|
||||
if val.endswith('\\"*') and not val.endswith('\\\\"*'):
|
||||
val = re.sub(r'\\"\*$', '\\\\\\"*', val)
|
||||
print("DEBUG cleanValue1: %s" % val)
|
||||
return val
|
||||
|
||||
# for keywords values with space
|
||||
def generateValueNode(self, node, key = ''):
|
||||
if type(node) is int:
|
||||
return self.cleanValue(str(node), key)
|
||||
if 'AND' in node:
|
||||
return "(" + self.cleanValue(str(node), key) + ")"
|
||||
else:
|
||||
return self.cleanValue(str(node), key)
|
||||
|
||||
def generateMapItemListNode(self, key, value):
|
||||
itemslist = list()
|
||||
for item in value:
|
||||
if key in self.allowedFieldsList:
|
||||
itemslist.append('%s = %s' % (key, self.generateValueNode(item, key)))
|
||||
else:
|
||||
itemslist.append('%s' % (self.generateValueNode(item)))
|
||||
return "(" + " OR ".join(itemslist) + ")"
|
||||
|
||||
# generateORNode algorithm for ArcSightBackend & SumoLogicBackend class.
|
||||
def generateORNode(self, node):
|
||||
if type(node) == ConditionOR and all(isinstance(item, str) for item in node):
|
||||
new_value = list()
|
||||
for value in node:
|
||||
value = self.CleanNode(value)
|
||||
if type(value) is list:
|
||||
new_value.append(self.andToken.join([self.valueExpression % val for val in value]))
|
||||
else:
|
||||
new_value.append(value)
|
||||
return "(" + self.orToken.join([self.generateNode(val) for val in new_value]) + ")"
|
||||
return "(" + self.orToken.join([self.generateNode(val) for val in node]) + ")"
|
||||
|
||||
def fieldNameMapping(self, fieldname, value):
|
||||
"""
|
||||
Alter field names depending on the value(s). Backends may use this method to perform a final transformation of the field name
|
||||
in addition to the field mapping defined in the conversion configuration. The field name passed to this method was already
|
||||
transformed from the original name given in the Sigma rule.
|
||||
TODO/FIXME!
|
||||
"""
|
||||
return fieldname
|
||||
|
||||
Reference in New Issue
Block a user