From 68d8afe4e65cfa9d6b0e29b1333091eb7566eb8a Mon Sep 17 00:00:00 2001
From: Thomas Patzke <thomas@patzke.org>
Date: Fri, 8 Dec 2017 21:45:05 +0100
Subject: [PATCH] Intermediate refactoring commit: moving code into package

Further splitting sigma.py into smaller parts.
---
 sigma                                         |    1 -
 .../requirements-devel.txt                    |    0
 requirements.txt => tools/requirements.txt    |    0
 setup.cfg => tools/setup.cfg                  |    0
 setup.py => tools/setup.py                    |    0
 tools/{ => sigma}/__init__.py                 |    0
 tools/{ => sigma}/backends.py                 |    0
 tools/{sigma.py => sigma/config.py}           |    2 -
 tools/sigma/parser.py                         | 1097 +++++++++++++++++
 9 files changed, 1097 insertions(+), 3 deletions(-)
 delete mode 120000 sigma
 rename requirements-devel.txt => tools/requirements-devel.txt (100%)
 rename requirements.txt => tools/requirements.txt (100%)
 rename setup.cfg => tools/setup.cfg (100%)
 rename setup.py => tools/setup.py (100%)
 rename tools/{ => sigma}/__init__.py (100%)
 rename tools/{ => sigma}/backends.py (100%)
 rename tools/{sigma.py => sigma/config.py} (99%)
 create mode 100644 tools/sigma/parser.py

diff --git a/sigma b/sigma
deleted file mode 120000
index 557a54764..000000000
--- a/sigma
+++ /dev/null
@@ -1 +0,0 @@
-tools/
\ No newline at end of file
diff --git a/requirements-devel.txt b/tools/requirements-devel.txt
similarity index 100%
rename from requirements-devel.txt
rename to tools/requirements-devel.txt
diff --git a/requirements.txt b/tools/requirements.txt
similarity index 100%
rename from requirements.txt
rename to tools/requirements.txt
diff --git a/setup.cfg b/tools/setup.cfg
similarity index 100%
rename from setup.cfg
rename to tools/setup.cfg
diff --git a/setup.py b/tools/setup.py
similarity index 100%
rename from setup.py
rename to tools/setup.py
diff --git a/tools/__init__.py b/tools/sigma/__init__.py
similarity index 100%
rename from tools/__init__.py
rename to tools/sigma/__init__.py
diff --git a/tools/backends.py b/tools/sigma/backends.py
similarity index 100%
rename from tools/backends.py
rename to tools/sigma/backends.py
diff --git a/tools/sigma.py b/tools/sigma/config.py
similarity index 99%
rename from tools/sigma.py
rename to tools/sigma/config.py
index 1df10c5af..766bf2b2f 100644
--- a/tools/sigma.py
+++ b/tools/sigma/config.py
@@ -1093,7 +1093,5 @@ class SigmaRuleFilter:
         # all tests passed
         return True
 
-
-
 class SigmaRuleFilterParseException(Exception):
     pass
diff --git a/tools/sigma/parser.py b/tools/sigma/parser.py
new file mode 100644
index 000000000..766bf2b2f
--- /dev/null
+++ b/tools/sigma/parser.py
@@ -0,0 +1,1097 @@
+# Sigma parser
+# Copyright 2016-2017 Thomas Patzke, Florian Roth
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Lesser General Public License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+import yaml
+import re
+import logging
+
+logger = logging.getLogger(__name__)
+
+COND_NONE = 0
+COND_AND  = 1
+COND_OR   = 2
+COND_NOT  = 3
+COND_NULL = 4
+
+class SigmaCollectionParser:
+    """
+    Parses a Sigma file that may contain multiple Sigma rules as different YAML documents.
+
+    Special processing of YAML document if 'action' attribute is set to:
+
+    * global: merges attributes from document in all following documents. Accumulates attributes from previous set_global documents
+    * reset: resets global attributes from previous set_global statements
+    * repeat: takes attributes from this YAML document, merges into previous rule YAML and regenerates the rule
+    """
+    def __init__(self, content, config=None, rulefilter=None):
+        if config is None:
+            config = SigmaConfiguration()
+        self.yamls = yaml.safe_load_all(content)
+        globalyaml = dict()
+        self.parsers = list()
+        prevrule = None
+        for yamldoc in self.yamls:
+            action = None
+            try:
+                action = yamldoc['action']
+                del yamldoc['action']
+            except KeyError:
+                pass
+
+            if action == "global":
+                deep_update_dict(globalyaml, yamldoc)
+            elif action == "reset":
+                globalyaml = dict()
+            elif action == "repeat":
+                if prevrule is None:
+                    raise SigmaCollectionParseError("action 'repeat' is only applicable after first valid Sigma rule")
+                newrule = prevrule.copy()
+                deep_update_dict(newrule, yamldoc)
+                if rulefilter is None or rulefilter is not None and not rulefilter.match(newrule):
+                    self.parsers.append(SigmaParser(newrule, config))
+                    prevrule = newrule
+            else:
+                deep_update_dict(yamldoc, globalyaml)
+                if rulefilter is None or rulefilter is not None and rulefilter.match(yamldoc):
+                    self.parsers.append(SigmaParser(yamldoc, config))
+                    prevrule = yamldoc
+        self.config = config
+
+    def generate(self, backend):
+        """Calls backend for all parsed rules"""
+        for parser in self.parsers:
+            backend.generate(parser)
+
+    def __iter__(self):
+        return iter([parser.parsedyaml for parser in self.parsers])
+
+def deep_update_dict(dest, src):
+    for key, value in src.items():
+        if isinstance(value, dict) and key in dest and isinstance(dest[key], dict):     # source is dict, destination key already exists and is dict: merge
+                deep_update_dict(dest[key], value)
+        else:
+            dest[key] = value
+
+class SigmaCollectionParseError(Exception):
+    pass
+
+class SigmaParser:
+    """Parse a Sigma rule (definitions, conditions and aggregations)"""
+    def __init__(self, sigma, config):
+        self.definitions = dict()
+        self.values = dict()
+        self.config = config
+        self.parsedyaml = sigma
+        self.parse_sigma()
+
+    def parse_sigma(self):
+        try:    # definition uniqueness check
+            for definitionName, definition in self.parsedyaml["detection"].items():
+                self.definitions[definitionName] = definition
+                self.extract_values(definition)     # builds key-values-table in self.values
+        except KeyError:
+            raise SigmaParseError("No detection definitions found")
+
+        try:    # tokenization
+            conditions = self.parsedyaml["detection"]["condition"]
+            self.condtoken = list()     # list of tokenized conditions
+            if type(conditions) == str:
+                self.condtoken.append(SigmaConditionTokenizer(conditions))
+            elif type(conditions) == list:
+                for condition in conditions:
+                    self.condtoken.append(SigmaConditionTokenizer(condition))
+        except KeyError:
+            raise SigmaParseError("No condition found")
+
+        self.condparsed = list()        # list of parsed conditions
+        for tokens in self.condtoken:
+            logger.debug("Condition tokens: %s", str(tokens))
+            condparsed = SigmaConditionParser(self, tokens)
+            logger.debug("Condition parse tree: %s", str(condparsed))
+            self.condparsed.append(condparsed)
+
+    def parse_definition_byname(self, definitionName, condOverride=None):
+        try:
+            definition = self.definitions[definitionName]
+        except KeyError as e:
+            raise SigmaParseError("Unknown definition '%s'" % definitionName) from e
+        return self.parse_definition(definition, condOverride)
+
+    def parse_definition(self, definition, condOverride=None):
+        if type(definition) not in (dict, list):
+            raise SigmaParseError("Expected map or list, got type %s: '%s'" % (type(definition), str(definition)))
+
+        if type(definition) == list:    # list of values or maps
+            if condOverride:    # condition given through rule detection condition, e.g. 1 of x
+                cond = condOverride()
+            else:               # no condition given, use default from spec
+                cond = ConditionOR()
+
+            subcond = None
+            for value in definition:
+                if type(value) in (str, int):
+                    cond.add(value)
+                elif type(value) in (dict, list):
+                    cond.add(self.parse_definition(value))
+                else:
+                    raise SigmaParseError("Definition list may only contain plain values or maps")
+        elif type(definition) == dict:      # map
+            cond = ConditionAND()
+            for key, value in definition.items():
+                mapping = self.config.get_fieldmapping(key)
+                if value == None:
+                    fields = mapping.resolve_fieldname(key)
+                    if type(fields) == str:
+                        fields = [ fields ]
+                    for field in fields:
+                        cond.add(ConditionNULLValue(val=field))
+                elif value == "not null":
+                    fields = mapping.resolve_fieldname(key)
+                    if type(fields) == str:
+                        fields = [ fields ]
+                    for field in fields:
+                        cond.add(ConditionNotNULLValue(val=field))
+                else:
+                    cond.add(mapping.resolve(key, value, self))
+
+        return cond
+
+    def extract_values(self, definition):
+        """Extract all values from map key:value pairs info self.values"""
+        if type(definition) == list:     # iterate through items of list
+            for item in definition:
+                self.extract_values(item)
+        elif type(definition) == dict:  # add dict items to map
+            for key, value in definition.items():
+                self.add_value(key, value)
+
+    def add_value(self, key, value):
+        """Add value to values table, create key if it doesn't exist"""
+        if key in self.values:
+            self.values[key].add(str(value))
+        else:
+            self.values[key] = { str(value) }
+
+    def get_logsource(self):
+        """Returns logsource configuration object for current rule"""
+        try:
+            ls_rule = self.parsedyaml['logsource']
+        except KeyError:
+            return None
+
+        try:
+            category = ls_rule['category']
+        except KeyError:
+            category = None
+        try:
+            product = ls_rule['product']
+        except KeyError:
+            product = None
+        try:
+            service = ls_rule['service']
+        except KeyError:
+            service = None
+
+        return self.config.get_logsource(category, product, service)
+
+class SigmaConditionToken:
+    """Token of a Sigma condition expression"""
+    TOKEN_AND    = 1
+    TOKEN_OR     = 2
+    TOKEN_NOT    = 3
+    TOKEN_ID     = 4
+    TOKEN_LPAR   = 5
+    TOKEN_RPAR   = 6
+    TOKEN_PIPE   = 7
+    TOKEN_ONE    = 8
+    TOKEN_ALL    = 9
+    TOKEN_AGG    = 10
+    TOKEN_EQ     = 11
+    TOKEN_LT     = 12
+    TOKEN_LTE    = 13
+    TOKEN_GT     = 14
+    TOKEN_GTE    = 15
+    TOKEN_BY     = 16
+    TOKEN_NEAR   = 17
+
+    tokenstr = [
+            "INVALID",
+            "AND",
+            "OR",
+            "NOT",
+            "ID",
+            "LPAR",
+            "RPAR",
+            "PIPE",
+            "ONE",
+            "ALL",
+            "AGG",
+            "EQ",
+            "LT",
+            "LTE",
+            "GT",
+            "GTE",
+            "BY",
+            "NEAR",
+            ]
+
+    def __init__(self, tokendef, match, pos):
+        self.type = tokendef[0]
+        self.matched = match.group()
+        self.pos = pos
+
+    def __eq__(self, other):
+        if type(other) == int:      # match against type
+            return self.type == other
+        if type(other) == str:      # match against content
+            return self.matched == other
+        else:
+            raise NotImplementedError("SigmaConditionToken can only be compared against token type constants")
+
+    def __str__(self):
+        return "[ Token: %s: '%s' ]" % (self.tokenstr[self.type], self.matched)
+
+class SigmaConditionTokenizer:
+    """Tokenize condition string into token sequence"""
+    tokendefs = [      # list of tokens, preferred recognition in given order, (token identifier, matching regular expression). Ignored if token id == None
+            (SigmaConditionToken.TOKEN_ONE,    re.compile("1 of", re.IGNORECASE)),
+            (SigmaConditionToken.TOKEN_ALL,    re.compile("all of", re.IGNORECASE)),
+            (None,       re.compile("[\\s\\r\\n]+")),
+            (SigmaConditionToken.TOKEN_AGG,    re.compile("count|min|max|avg|sum", re.IGNORECASE)),
+            (SigmaConditionToken.TOKEN_NEAR,   re.compile("near", re.IGNORECASE)),
+            (SigmaConditionToken.TOKEN_BY,     re.compile("by", re.IGNORECASE)),
+            (SigmaConditionToken.TOKEN_EQ,     re.compile("==")),
+            (SigmaConditionToken.TOKEN_LT,     re.compile("<")),
+            (SigmaConditionToken.TOKEN_LTE,    re.compile("<=")),
+            (SigmaConditionToken.TOKEN_GT,     re.compile(">")),
+            (SigmaConditionToken.TOKEN_GTE,    re.compile(">=")),
+            (SigmaConditionToken.TOKEN_PIPE,   re.compile("\\|")),
+            (SigmaConditionToken.TOKEN_AND,    re.compile("and", re.IGNORECASE)),
+            (SigmaConditionToken.TOKEN_OR,     re.compile("or", re.IGNORECASE)),
+            (SigmaConditionToken.TOKEN_NOT,    re.compile("not", re.IGNORECASE)),
+            (SigmaConditionToken.TOKEN_ID,     re.compile("\\w+")),
+            (SigmaConditionToken.TOKEN_LPAR,   re.compile("\\(")),
+            (SigmaConditionToken.TOKEN_RPAR,   re.compile("\\)")),
+            ]
+
+    def __init__(self, condition):
+        if type(condition) == str:          # String that is parsed
+            self.tokens = list()
+            pos = 1
+
+            while len(condition) > 0:
+                for tokendef in self.tokendefs:     # iterate over defined tokens and try to recognize the next one
+                    match = tokendef[1].match(condition)
+                    if match:
+                        if tokendef[0] != None:
+                            self.tokens.append(SigmaConditionToken(tokendef, match, pos + match.start()))
+                        pos += match.end()      # increase position and cut matched prefix from condition
+                        condition = condition[match.end():]
+                        break
+                else:   # no valid token identified
+                    raise SigmaParseError("Unexpected token in condition at position %s" % condition)
+        elif type(condition) == list:       # List of tokens to be converted into SigmaConditionTokenizer class
+            self.tokens = condition
+        else:
+            raise TypeError("SigmaConditionTokenizer constructor expects string or list, got %s" % (type(condition)))
+
+    def __str__(self):
+        return " ".join([str(token) for token in self.tokens])
+
+    def __iter__(self):
+        return iter(self.tokens)
+
+    def __len__(self):
+        return len(self.tokens)
+
+    def __getitem__(self, i):
+        if type(i) == int:
+            return self.tokens[i]
+        elif type(i) == slice:
+            return SigmaConditionTokenizer(self.tokens[i])
+        else:
+            raise IndexError("Expected index or slice")
+
+    def __add__(self, other):
+        if isinstance(other, SigmaConditionTokenizer):
+            return SigmaConditionTokenizer(self.tokens + other.tokens)
+        elif isinstance(other, (SigmaConditionToken, ParseTreeNode)):
+            return SigmaConditionTokenizer(self.tokens + [ other ])
+        else:
+            raise TypeError("+ operator expects SigmaConditionTokenizer or token type, got %s: %s" % (type(other), str(other)))
+
+    def index(self, item):
+        return self.tokens.index(item)
+
+class SigmaParseError(Exception):
+    pass
+
+### Parse Tree Node Classes ###
+class ParseTreeNode:
+    """Parse Tree Node Base Class"""
+    def __init__(self):
+        raise NotImplementedError("ConditionBase is no usable class")
+
+    def __str__(self):
+        return "[ %s: %s ]" % (self.__doc__, str([str(item) for item in self.items]))
+
+class ConditionBase(ParseTreeNode):
+    """Base class for conditional operations"""
+    op = COND_NONE
+    items = None
+
+    def __init__(self):
+        raise NotImplementedError("ConditionBase is no usable class")
+
+    def add(self, item):
+        self.items.append(item)
+
+    def __iter__(self):
+        return iter(self.items)
+
+    def __len__(self):
+        return len(self.items)
+
+class ConditionAND(ConditionBase):
+    """AND Condition"""
+    op = COND_AND
+
+    def __init__(self, sigma=None, op=None, val1=None, val2=None):
+        if sigma == None and op == None and val1 == None and val2 == None:    # no parameters given - initialize empty
+            self.items = list()
+        else:       # called by parser, use given values
+            self.items = [ val1, val2 ]
+
+class ConditionOR(ConditionAND):
+    """OR Condition"""
+    op = COND_OR
+
+class ConditionNOT(ConditionBase):
+    """NOT Condition"""
+    op = COND_NOT
+
+    def __init__(self, sigma=None, op=None, val=None):
+        if sigma == None and op == None and val == None:    # no parameters given - initialize empty
+            self.items = list()
+        else:       # called by parser, use given values
+            self.items = [ val ]
+
+    def add(self, item):
+        if len(self.items) == 0:
+            super.add(item)
+        else:
+            raise ValueError("Only one element allowed")
+
+    @property
+    def item(self):
+        try:
+            return self.items[0]
+        except IndexError:
+            return None
+
+class ConditionNULLValue(ConditionNOT):
+    """Condition: Field value is empty or doesn't exists"""
+    pass
+
+class ConditionNotNULLValue(ConditionNULLValue):
+    """Condition: Field value is not empty"""
+    pass
+
+class NodeSubexpression(ParseTreeNode):
+    """Subexpression"""
+    def __init__(self, subexpr):
+        self.items = subexpr
+
+# Parse tree converters: convert something into one of the parse tree node classes defined above
+def convertAllOf(sigma, op, val):
+    """Convert 'all of x' into ConditionAND"""
+    return NodeSubexpression(sigma.parse_definition_byname(val.matched, ConditionAND))
+
+def convertOneOf(sigma, op, val):
+    """Convert '1 of x' into ConditionOR"""
+    return NodeSubexpression(sigma.parse_definition_byname(val.matched, ConditionOR))
+
+def convertId(sigma, op):
+    """Convert search identifiers (lists or maps) into condition nodes according to spec defaults"""
+    return NodeSubexpression(sigma.parse_definition_byname(op.matched))
+
+# Condition parser class
+class SigmaConditionParser:
+    """Parser for Sigma condition expression"""
+    searchOperators = [     # description of operators: (token id, number of operands, parse tree node class) - order == precedence
+            (SigmaConditionToken.TOKEN_ALL, 1, convertAllOf),
+            (SigmaConditionToken.TOKEN_ONE, 1, convertOneOf),
+            (SigmaConditionToken.TOKEN_ID,  0, convertId),
+            (SigmaConditionToken.TOKEN_NOT, 1, ConditionNOT),
+            (SigmaConditionToken.TOKEN_AND, 2, ConditionAND),
+            (SigmaConditionToken.TOKEN_OR,  2, ConditionOR),
+            ]
+
+    def __init__(self, sigmaParser, tokens):
+        self.sigmaParser = sigmaParser
+        self.config = sigmaParser.config
+
+        if SigmaConditionToken.TOKEN_PIPE in tokens:    # Condition contains atr least one aggregation expression
+            pipepos = tokens.index(SigmaConditionToken.TOKEN_PIPE)
+            self.parsedSearch = self.parseSearch(tokens[:pipepos])
+            self.parsedAgg = SigmaAggregationParser(tokens[pipepos + 1:], self.sigmaParser, self.config)
+        else:
+            self.parsedSearch = self.parseSearch(tokens)
+            self.parsedAgg = None
+
+    def parseSearch(self, tokens):
+        """
+        Iterative parsing of search expression.
+        """
+        # 1. Identify subexpressions with parentheses around them and parse them like a separate search expression
+        while SigmaConditionToken.TOKEN_LPAR in tokens:
+            lPos = tokens.index(SigmaConditionToken.TOKEN_LPAR)
+            lTok = tokens[lPos]
+            try:
+                rPos = tokens.index(SigmaConditionToken.TOKEN_RPAR)
+                rTok = tokens[rPos]
+            except ValueError as e:
+                raise SigmaParseError("Missing matching closing parentheses") from e
+            if lPos + 1 == rPos:
+                raise SigmaParseError("Empty subexpression at " + str(lTok.pos))
+            if lPos > rPos:
+                raise SigmaParseError("Closing parentheses at position " + str(rTok.pos) + " precedes opening at position " + str(lTok.pos))
+
+            subparsed = self.parseSearch(tokens[lPos + 1:rPos])
+            tokens = tokens[:lPos] + NodeSubexpression(subparsed) + tokens[rPos + 1:]   # replace parentheses + expression with group node that contains parsed subexpression
+
+        # 2. Iterate over all known operators in given precedence
+        for operator in self.searchOperators:
+            # 3. reduce all occurrences into corresponding parse tree nodes
+            while operator[0] in tokens:
+                pos_op = tokens.index(operator[0])
+                tok_op = tokens[pos_op]
+                if operator[1] == 0:    # operator
+                    treenode = operator[2](self.sigmaParser, tok_op)
+                    tokens = tokens[:pos_op] + treenode + tokens[pos_op + 1:]
+                elif operator[1] == 1:    # operator value
+                    pos_val = pos_op + 1
+                    tok_val = tokens[pos_val]
+                    treenode = operator[2](self.sigmaParser, tok_op, tok_val)
+                    tokens = tokens[:pos_op] + treenode + tokens[pos_val + 1:]
+                elif operator[1] == 2:    # value1 operator value2
+                    pos_val1 = pos_op - 1
+                    pos_val2 = pos_op + 1
+                    tok_val1 = tokens[pos_val1]
+                    tok_val2 = tokens[pos_val2]
+                    treenode = operator[2](self.sigmaParser, tok_op, tok_val1, tok_val2)
+                    tokens = tokens[:pos_val1] + treenode + tokens[pos_val2 + 1:]
+
+        if len(tokens) != 1:     # parse tree must begin with exactly one node
+            raise ValueError("Parse tree must have exactly one start node!")
+        querycond = tokens[0]
+
+        logsource = self.sigmaParser.get_logsource()
+        if logsource != None:
+            # 4. Integrate conditions from configuration
+            if logsource.conditions != None:
+                cond = ConditionAND()
+                cond.add(logsource.conditions)
+                cond.add(querycond)
+                querycond = cond
+
+            # 5. Integrate index conditions if applicable for backend
+            indexcond = logsource.get_indexcond()
+            if indexcond != None:
+                cond = ConditionAND()
+                cond.add(indexcond)
+                cond.add(querycond)
+                querycond = cond
+
+        return querycond
+
+    def __str__(self):
+        return str(self.parsedSearch)
+
+    def __len__(self):
+        return len(self.parsedSearch)
+
+class SimpleParser:
+    """
+    Rule-defined parser that converts a token stream into a Python object.
+
+    Rules are defined in the class property parsingrules, a list of dict of tuples with the following format:
+    [ { token_0_0: parsing_rule_0_0, token_0_1: parsing_rule_0_1, ..., token_0_n: parsing_rule_0_n } , ... , { token_m_0: parsing_rule_m_0, ... } ]
+
+    Each list index of parsing rules represents a parser state.
+    Each parser state is defined by a dict with associates a token with a rule definition.
+    The rule definition is a tuple that defines what is done next when the parser encounters a token in the current parser state:
+
+    ( storage attribute, transformation function, next ruleset)
+
+    * storage attribute: the name of the object attribute that is used for storage of the attribute
+    * transformation method: name of an object method that is called before storage. It gets a parameter and returns the value that is stored
+    * next state: next parser state
+
+    A None value means that the action (transformation, storage or state change) is not conducted.
+
+    A negative state has the special meaning that no further token is expected and may be used as return value.
+    The set or list finalstates contains valid final states. The parser verifies after the last token that it
+    has reached one of these states. if not, a parse error is raised.
+    """
+
+    def __init__(self, tokens, init_state=0):
+        self.state = init_state
+
+        for token in tokens:
+            if self.state < 0:
+                raise SigmaParseError("No further token expected, but read %s" % (str(token)))
+            try:
+                rule = self.parsingrules[self.state][token.type]
+            except KeyError as e:
+                raise SigmaParseError("Unexpected token %s at %d in aggregation expression" % (str(token), token.pos)) from e
+
+            value = token.matched
+            trans_value = value
+            if rule[1] != None:
+                trans_value = getattr(self, rule[1])(value)
+            if rule[0] != None:
+                setattr(self, rule[0], trans_value)
+                setattr(self, rule[0] + "_notrans", value)
+            if rule[2] != None:
+                self.state = rule[2]
+        if self.state not in self.finalstates:
+            raise SigmaParseError("Unexpected end of aggregation expression, state=%d" % (self.state))
+
+    def __str__(self):
+        return "[ Parsed: %s ]" % (" ".join(["%s=%s" % (key, val) for key, val in self.__dict__.items() ]))
+ 
+class SigmaAggregationParser(SimpleParser):
+    """Parse Sigma aggregation expression and provide parsed data"""
+    parsingrules = [
+            {   # State 0
+                SigmaConditionToken.TOKEN_AGG:  ("aggfunc", "trans_aggfunc", 1),
+                SigmaConditionToken.TOKEN_NEAR: ("aggfunc", "init_near_parsing", 8),
+            },
+            {   # State 1
+                SigmaConditionToken.TOKEN_LPAR: (None, None, 2)
+            },
+            {   # State 2
+                SigmaConditionToken.TOKEN_RPAR: (None, None, 4),
+                SigmaConditionToken.TOKEN_ID: ("aggfield", "trans_fieldname", 3),
+            },
+            {   # State 3
+                SigmaConditionToken.TOKEN_RPAR: (None, None, 4)
+            },
+            {   # State 4
+                SigmaConditionToken.TOKEN_BY: ("cond_op", None, 5),
+                SigmaConditionToken.TOKEN_EQ: ("cond_op", None, 7),
+                SigmaConditionToken.TOKEN_LT: ("cond_op", None, 7),
+                SigmaConditionToken.TOKEN_LTE: ("cond_op", None, 7),
+                SigmaConditionToken.TOKEN_GT: ("cond_op", None, 7),
+                SigmaConditionToken.TOKEN_GTE: ("cond_op", None, 7),
+            },
+            {   # State 5
+                SigmaConditionToken.TOKEN_ID: ("groupfield", "trans_fieldname", 6)
+            },
+            {   # State 6
+                SigmaConditionToken.TOKEN_EQ: ("cond_op", None, 7),
+                SigmaConditionToken.TOKEN_LT: ("cond_op", None, 7),
+                SigmaConditionToken.TOKEN_LTE: ("cond_op", None, 7),
+                SigmaConditionToken.TOKEN_GT: ("cond_op", None, 7),
+                SigmaConditionToken.TOKEN_GTE: ("cond_op", None, 7),
+            },
+            {   # State 7
+                SigmaConditionToken.TOKEN_ID: ("condition", None, -1)
+            },
+            {   # State 8
+                SigmaConditionToken.TOKEN_ID: (None, "store_search_id", 9)
+            },
+            {   # State 9
+                SigmaConditionToken.TOKEN_AND: (None, "set_include", 10),
+            },
+            {   # State 10
+                SigmaConditionToken.TOKEN_NOT: (None, "set_exclude", 8),
+                SigmaConditionToken.TOKEN_ID: (None, "store_search_id", 9),
+            },
+            ]
+    finalstates = { -1, 9 }
+
+    # Aggregation functions
+    AGGFUNC_COUNT = 1
+    AGGFUNC_MIN   = 2
+    AGGFUNC_MAX   = 3
+    AGGFUNC_AVG   = 4
+    AGGFUNC_SUM   = 5
+    AGGFUNC_NEAR  = 6
+    aggfuncmap = {
+            "count": AGGFUNC_COUNT,
+            "min":   AGGFUNC_MIN,
+            "max":   AGGFUNC_MAX,
+            "avg":   AGGFUNC_AVG,
+            "sum":   AGGFUNC_SUM,
+            "near":  AGGFUNC_NEAR,
+            }
+
+    def __init__(self, tokens, parser, config):
+        self.parser = parser
+        self.config = config
+        self.aggfield = ""
+        self.groupfield = None
+        super().__init__(tokens)
+
+    def trans_aggfunc(self, name):
+        """Translate aggregation function name into constant"""
+        try:
+            return self.aggfuncmap[name]
+        except KeyError:
+            raise SigmaParseError("Unknown aggregation function '%s'" % (name))
+
+    def trans_fieldname(self, fieldname):
+        """Translate field name into configured mapped name"""
+        mapped = self.config.get_fieldmapping(fieldname).resolve_fieldname(fieldname)
+        if type(mapped) == str:
+            return mapped
+        else:
+            raise NotImplementedError("Field mappings in aggregations must be single valued")
+
+    def init_near_parsing(self, name):
+        """Initialize data structures for 'near" aggregation operator parsing"""
+        self.include = list()
+        self.exclude = list()
+        self.current = self.include
+        return self.trans_aggfunc(name)
+
+    def store_search_id(self, name):
+        self.current.append(name)
+        return name
+
+    def set_include(self, name):
+        self.current = self.include
+
+    def set_exclude(self, name):
+        self.current = self.exclude
+
+    def trans_timeframe(self, name):
+        return self.parser.parsedyaml["detection"][name]
+
+# Field Mapping Definitions
+def FieldMapping(source, target=None):
+    """Determines target type and instantiate appropriate mapping type"""
+    if target == None:
+        return SimpleFieldMapping(source, source)
+    elif type(target) == str:
+        return SimpleFieldMapping(source, target)
+    elif type(target) == list:
+        return MultiFieldMapping(source, target)
+    elif type(target) == dict:
+        return ConditionalFieldMapping(source, target)
+
+class SimpleFieldMapping:
+    """1:1 field mapping"""
+    target_type = str
+
+    def __init__(self, source, target):
+        """Initialization with generic target type check"""
+        if type(target) != self.target_type:
+            raise TypeError("Target type mismatch: wrong mapping type for this target")
+        self.source = source
+        self.target = target
+
+    def resolve(self, key, value, sigmaparser):
+        """Return mapped field name"""
+        return (self.target, value)
+
+    def resolve_fieldname(self, fieldname):
+        return self.target
+
+class MultiFieldMapping(SimpleFieldMapping):
+    """1:n field mapping that expands target field names into OR conditions"""
+    target_type = list
+
+    def resolve(self, key, value, sigmaparser):
+        """Returns multiple target field names as OR condition"""
+        cond = ConditionOR()
+        for fieldname in self.target:
+            cond.add((fieldname, value))
+        return cond
+
+    def resolve_fieldname(self, fieldname):
+        return self.target
+
+class ConditionalFieldMapping(SimpleFieldMapping):
+    """
+    Conditional field mapping:
+    * key contains field=value condition, value target mapping
+    * key "default" maps when no condition matches
+    * if no condition matches and there is no default, don't perform mapping
+    """
+    target_type = dict
+
+    def __init__(self, source, target):
+        """Init table between condition field names and values"""
+        super().__init__(source, target)
+        self.conditions = dict()    # condition field -> condition value -> target fields
+        self.default = None
+        for condition, target in self.target.items():
+            try:                    # key contains condition (field=value)
+                field, value = condition.split("=")
+                self.add_condition(field, value, target)
+            except ValueError as e:      # no, condition - "default" expected
+                if condition == "default":
+                    if self.default == None:
+                        if type(target) == str:
+                            self.default = [ target ]
+                        elif type(target) == list:
+                            self.default = target
+                        else:
+                            raise SigmaConfigParseError("Default mapping must be single value or list")
+                    else:
+                        raise SigmaConfigParseError("Conditional field mapping can have only one default value, use list for multiple target mappings")
+                else:
+                    raise SigmaConfigParseError("Expected condition or default") from e
+
+    def add_condition(self, field, value, target):
+        if field not in self.conditions:
+            self.conditions[field] = dict()
+        if value not in self.conditions[field]:
+            self.conditions[field][value] = list()
+        if type(target) == str:
+            self.conditions[field][value].append(target)
+        elif type(target) == list:
+            self.conditions[field][value].extend(target)
+
+    def resolve(self, key, value, sigmaparser):
+        # build list of matching target mappings
+        targets = set()
+        for condfield in self.conditions:
+            if condfield in sigmaparser.values:
+                rulefieldvalues = sigmaparser.values[condfield]
+                for condvalue in self.conditions[condfield]:
+                    if condvalue in rulefieldvalues:
+                        targets.update(self.conditions[condfield][condvalue])
+        if len(targets) == 0:       # no matching condition, try with default mapping
+            if self.default != None:
+                targets = self.default
+
+        if len(targets) == 1:     # result set contains only one target, return mapped item (like SimpleFieldMapping)
+            return (targets.pop(), value)
+        elif len(targets) > 1:        # result set contains multiple targets, return all linked as OR condition (like MultiFieldMapping)
+            cond = ConditionOR()
+            for target in targets:
+                cond.add((target, value))
+            return cond
+        else:                       # no mapping found
+            return (key, value)
+
+    def resolve_fieldname(self, fieldname):
+        if self.default != None:
+            return self.default
+        else:
+            return fieldname
+
+# Configuration
+class SigmaConfiguration:
+    """Sigma converter configuration. Contains field mappings and logsource descriptions"""
+    def __init__(self, configyaml=None):
+        if configyaml == None:
+            self.config = None
+            self.fieldmappings = dict()
+            self.logsources = dict()
+            self.logsourcemerging = SigmaLogsourceConfiguration.MM_AND
+            self.defaultindex = None
+            self.backend = None
+        else:
+            config = yaml.safe_load(configyaml)
+            self.config = config
+
+            self.fieldmappings = dict()
+            try:
+                for source, target in config['fieldmappings'].items():
+                    self.fieldmappings[source] = FieldMapping(source, target)
+            except KeyError:
+                pass
+            if type(self.fieldmappings) != dict:
+                raise SigmaConfigParseError("Fieldmappings must be a map")
+
+            try:
+                self.logsourcemerging = config['logsourcemerging']
+            except KeyError:
+                self.logsourcemerging = SigmaLogsourceConfiguration.MM_AND
+
+            try:
+                self.defaultindex = config['defaultindex']
+            except KeyError:
+                self.defaultindex = None
+
+            self.logsources = list()
+            self.backend = None
+
+    def get_fieldmapping(self, fieldname):
+        """Return mapped fieldname if mapping defined or field name given in parameter value"""
+        try:
+            return self.fieldmappings[fieldname]
+        except KeyError:
+            return FieldMapping(fieldname)
+
+    def get_logsource(self, category, product, service):
+        """Return merged log source definition of all logosurces that match criteria"""
+        matching = [logsource for logsource in self.logsources if logsource.matches(category, product, service)]
+        return SigmaLogsourceConfiguration(matching, self.defaultindex)
+
+    def set_backend(self, backend):
+        """Set backend. This is used by other code to determine target properties for index addressing"""
+        self.backend = backend
+        if self.config != None:
+            if 'logsources' in self.config:
+                logsources = self.config['logsources']
+                if type(logsources) != dict:
+                    raise SigmaConfigParseError("Logsources must be a map")
+                for name, logsource in logsources.items():
+                    self.logsources.append(SigmaLogsourceConfiguration(logsource, self.defaultindex, name, self.logsourcemerging, self.get_indexfield()))
+
+    def get_indexfield(self):
+        """Get index condition if index field name is configured"""
+        if self.backend != None:
+            return self.backend.index_field
+
+class SigmaLogsourceConfiguration:
+    """Contains the definition of a log source"""
+    MM_AND = "and"  # Merge all conditions with AND
+    MM_OR  = "or"   # Merge all conditions with OR
+
+    def __init__(self, logsource=None, defaultindex=None, name=None, mergemethod=MM_AND, indexfield=None):
+        self.name = name
+        self.indexfield = indexfield
+        if logsource == None:               # create empty object
+            self.category = None
+            self.product = None
+            self.service = None
+            self.index = list()
+            self.conditions = None
+        elif type(logsource) == list and all([isinstance(o, SigmaLogsourceConfiguration) for o in logsource]):      # list of SigmaLogsourceConfigurations: merge according to mergemethod
+            # Merge category, product and service
+            categories = set([ ls.category for ls in logsource if ls.category != None ])
+            products = set([ ls.product for ls in logsource if ls.product != None ])
+            services = set([ ls.service for ls in logsource if ls.service != None])
+            if len(categories) > 1 or len(products) > 1 or len(services) > 1:
+                raise ValueError("Merged SigmaLogsourceConfigurations must have disjunct categories (%s), products (%s) and services (%s)" % (str(categories), str(products), str(services)))
+
+            try:
+                self.category = categories.pop()
+            except KeyError:
+                self.category = None
+            try:
+                self.product = products.pop()
+            except KeyError:
+                self.product = None
+            try:
+                self.service = services.pop()
+            except KeyError:
+                self.service = None
+
+            # Merge all index patterns
+            self.index = list(set([index for ls in logsource for index in ls.index]))       # unique(flat(logsources.index))
+            if len(self.index) == 0 and defaultindex is not None:   # if no index pattern matched and default index is present: use default index
+                if type(defaultindex) == str:
+                    self.index = [defaultindex]
+                elif type(defaultindex) == list and all([type(i) == str for i in defaultindex]):
+                    self.index = defaultindex
+                else:
+                    raise TypeError("Default index must be string or list of strings")
+
+            # "merge" index field (should never differ between instances because it is provided by backend class
+            indexfields = [ ls.indexfield for ls in logsource if ls.indexfield != None ]
+            try:
+                self.indexfield = indexfields[0]
+            except IndexError:
+                self.indexfield = None
+
+            # Merge conditions according to mergemethod
+            if mergemethod == self.MM_AND:
+                cond = ConditionAND()
+            elif mergemethod == self.MM_OR:
+                cond = ConditionOR()
+            else:
+                raise ValueError("Mergemethod must be '%s' or '%s'" % (self.MM_AND, self.MM_OR))
+            for ls in logsource:
+                if ls.conditions != None:
+                    cond.add(ls.conditions)
+            if len(cond) > 0:
+                self.conditions = cond
+            else:
+                self.conditions = None
+        elif type(logsource) == dict:       # create logsource configuration from parsed yaml
+            if 'category' in logsource and type(logsource['category']) != str \
+                    or 'product' in logsource and type(logsource['product']) != str \
+                    or 'service' in logsource and type(logsource['service']) != str:
+                raise SigmaConfigParseError("Logsource category, product or service must be a string")
+            try:
+                self.category = logsource['category']
+            except KeyError:
+                self.category = None
+            try:
+                self.product = logsource['product']
+            except KeyError:
+                self.product = None
+            try:
+                self.service = logsource['service']
+            except KeyError:
+                self.service = None
+            if self.category == None and self.product == None and self.service == None:
+                raise SigmaConfigParseError("Log source definition will not match")
+
+            if 'index' in logsource:
+                index = logsource['index']
+                if type(index) not in (str, list):
+                    raise SigmaConfigParseError("Logsource index must be string or list of strings")
+                if type(index) == list and not all([type(index) == str for index in logsource['index']]):
+                    raise SigmaConfigParseError("Logsource index patterns must be strings")
+                if type(index) == list:
+                    self.index = index
+                else:
+                    self.index = [ index ]
+            else:
+                # no default index handling here - this branch is executed if log source definitions are parsed from
+                # config and these must not necessarily contain an index definition. A valid index may later be result
+                # from a merge, where default index handling applies.
+                self.index = []
+
+            if 'conditions' in logsource:
+                if type(logsource['conditions']) != dict:
+                    raise SigmaConfigParseError("Logsource conditions must be a map")
+                cond = ConditionAND()
+                for key, value in logsource['conditions'].items():
+                    cond.add((key, value))
+                self.conditions = cond
+            else:
+                self.conditions = None
+        else:
+            raise SigmaConfigParseError("Logsource definitions must be maps")
+
+    def matches(self, category, product, service):
+        """Match log source definition against given criteria, None = ignore"""
+        searched = 0
+        for searchval, selfval in zip((category, product, service), (self.category, self.product, self.service)):
+            if searchval == None and selfval != None:
+                return False
+            if selfval != None:
+                searched += 1
+                if searchval != selfval:
+                    return False
+        if searched:
+            return True
+
+    def get_indexcond(self):
+        """Get index condition if index field name is configured"""
+        cond = ConditionOR()
+        if self.indexfield:
+            for index in self.index:
+                cond.add((self.indexfield, index))
+            return cond
+        else:
+            return None
+
+    def __str__(self):
+        return "[ LogSourceConfiguration: %s %s %s indices: %s ]" % (self.category, self.product, self.service, str(self.index))
+
+class SigmaConfigParseError(Exception):
+    pass
+
+# Rule Filtering
+class SigmaRuleFilter:
+    """Filter for Sigma rules with conditions"""
+    LEVELS = {
+            "low"      : 0,
+            "medium"   : 1,
+            "high"     : 2,
+            "critical" : 3
+            }
+    STATES = ["experimental", "testing", "stable"]
+
+    def __init__(self, expr):
+        self.minlevel   = None 
+        self.maxlevel   = None 
+        self.status     = None
+        self.logsources = list()
+
+        for cond in [c.replace(" ", "") for c in expr.split(",")]:
+            if cond.startswith("level<="):
+                try:
+                    level = cond[cond.index("=") + 1:]
+                    self.maxlevel = self.LEVELS[level]
+                except KeyError as e:
+                    raise SigmaRuleFilterParseException("Unknown level '%s' in condition '%s'" % (level, cond)) from e
+            elif cond.startswith("level>="):
+                try:
+                    level = cond[cond.index("=") + 1:]
+                    self.minlevel = self.LEVELS[level]
+                except KeyError as e:
+                    raise SigmaRuleFilterParseException("Unknown level '%s' in condition '%s'" % (level, cond)) from e
+            elif cond.startswith("level="):
+                try:
+                    level = cond[cond.index("=") + 1:]
+                    self.minlevel = self.LEVELS[level]
+                    self.maxlevel = self.minlevel
+                except KeyError as e:
+                    raise SigmaRuleFilterParseException("Unknown level '%s' in condition '%s'" % (level, cond)) from e
+            elif cond.startswith("status="):
+                self.status = cond[cond.index("=") + 1:]
+                if self.status not in self.STATES:
+                    raise SigmaRuleFilterParseException("Unknown status '%s' in condition '%s'" % (self.status, cond))
+            elif cond.startswith("logsource="):
+                self.logsources.append(cond[cond.index("=") + 1:])
+            else:
+                raise SigmaRuleFilterParseException("Unknown condition '%s'" % cond)
+
+    def match(self, yamldoc):
+        """Match filter conditions against rule"""
+        # Levels
+        if self.minlevel is not None or self.maxlevel is not None:
+            try:
+                level = self.LEVELS[yamldoc['level']]
+            except KeyError:    # missing or invalid level
+                return False    # User wants level restriction, but it's not possible here
+
+            # Minimum level
+            if self.minlevel is not None:
+                if level < self.minlevel:
+                    return False
+            # Maximum level
+            if self.maxlevel is not None:
+                if level > self.maxlevel:
+                    return False
+
+        # Status
+        if self.status is not None:
+            try:
+                status = yamldoc['status']
+            except KeyError:    # missing status
+                return False    # User wants status restriction, but it's not possible here
+            if status != self.status:
+                return False
+
+        # Log Sources
+        if len(self.logsources) > 0:
+            try:
+                logsources = { value for key, value in yamldoc['logsource'].items() }
+            except (KeyError, AttributeError):    # no log source set
+                return False    # User wants status restriction, but it's not possible here
+
+            for logsrc in self.logsources:
+                if logsrc not in logsources:
+                    return False
+
+        # all tests passed
+        return True
+
+class SigmaRuleFilterParseException(Exception):
+    pass