From 398e4527ea9fae30cf2a1cfb76e00b2652600a4a Mon Sep 17 00:00:00 2001 From: neu5ron <> Date: Wed, 11 Mar 2020 11:29:05 -0400 Subject: [PATCH] keyword, analyzed field, case insensitivity --- tools/sigma/backends/elasticsearch.py | 168 ++++++++++++++++++++++---- 1 file changed, 145 insertions(+), 23 deletions(-) diff --git a/tools/sigma/backends/elasticsearch.py b/tools/sigma/backends/elasticsearch.py index 4450f77ff..d4b8d2f89 100644 --- a/tools/sigma/backends/elasticsearch.py +++ b/tools/sigma/backends/elasticsearch.py @@ -35,18 +35,48 @@ class ElasticsearchWildcardHandlingMixin(object): provide configurability with backend parameters. """ options = SingleTextQueryBackend.options + ( - ("keyword_field", "keyword", "Keyword sub-field name", None), - ("keyword_blacklist", None, "Fields that don't have a keyword subfield (wildcards * and ? allowed)", None) + ("keyword_field", "keyword", "Keyword sub-field name (default is: '.keyword'). Set blank value if all keyword fields are the base(top-level) field. Additionally see 'keyword_base_fields' for more granular control of the base & subfield situation.", None), + ("analyzed_sub_field_name", "", "Analyzed sub-field name. By default analyzed field is the base field. Therefore, use this option to make the analyzed field a subfield. An example value would be '.text' ", None), + ("analyzed_sub_fields", None, "Fields that have an analyzed sub-field.", None), + ("keyword_base_fields", None, "Fields that the keyword is base (top-level) field. By default analyzed field is the base field. So use this option to change that logic. Valid options are: list of fields, single field. Also, wildcards * and ? allowed.", None), + ("keyword_whitelist", None, "Fields to always set as keyword. Bypasses case insensitive options. Valid options are: list of fields, single field. Also, wildcards * and ? allowed.", None), + ("keyword_blacklist", None, "Fields to never set as keyword (ie: always set as analyzed field). Bypasses case insensitive options. Valid options are: list of fields, single field. Also, wildcards * and ? allowed.", None), + ("case_insensitive_whitelist", None, "Fields to make the values case insensitive regex. Automatically sets the field as a keyword. Valid options are: list of fields, single field. Also, wildcards * and ? allowed.", None), + ("case_insensitive_blacklist", None, "Fields to exclude from being made into case insensitive regex. Valid options are: list of fields, single field. Also, wildcards * and ? allowed.", None) ) reContainsWildcard = re.compile("(?:(?.*", value ) + # Escape additional values that are treated as specific "operators" within Elastic. (ie: @, ?, &, <, >, and ~) + # reference: https://www.elastic.co/guide/en/elasticsearch/reference/current/regexp-syntax.html#regexp-optional-operators + value = re.sub( r"(((?])", "\g<1>\\\\\g<4>", value ) + # Validate regex + try: + re.compile(value) + return {'is_regex': True, 'value': value} + # Regex failed + except re.error: + raise TypeError( "Regular expression validation error for: '%s')" %str(value) ) + else: + return { 'is_regex': False, 'value': value } + class ElasticsearchQuerystringBackend(ElasticsearchWildcardHandlingMixin, SingleTextQueryBackend): """Converts Sigma rule into Elasticsearch query string. Only searches, no aggregations.""" @@ -81,7 +182,6 @@ class ElasticsearchQuerystringBackend(ElasticsearchWildcardHandlingMixin, Single active = True reEscape = re.compile("([\s+\\-=!(){}\\[\\]^\"~:/]|(?]") andToken = " AND " orToken = " OR " notToken = "NOT " @@ -103,6 +203,11 @@ class ElasticsearchQuerystringBackend(ElasticsearchWildcardHandlingMixin, Single return '""' else: if self.matchKeyword: # don't quote search value on keyword field + if self.CaseInSensitiveField: + make_ci = self.makeCaseInSensitiveValue(result) + result = make_ci.get('value') + if make_ci.get('is_regex'): # Determine if still should be a regex + result = "/%s/" % result # Regex place holders for regex return result else: return "\"%s\"" % result @@ -129,6 +234,7 @@ class ElasticsearchQuerystringBackend(ElasticsearchWildcardHandlingMixin, Single newitems.append(item) newnode = NodeSubexpression(nodetype(None, None, *newitems)) self.matchKeyword = True + print('FINDME:figure this out') result = "\\*.keyword:" + super().generateSubexpressionNode(newnode) self.matchKeyword = False # one of the reasons why the converter needs some major overhaul return result @@ -145,6 +251,7 @@ class ElasticsearchDSLBackend(RulenameCommentMixin, ElasticsearchWildcardHandlin ) interval = None title = None + reEscape = re.compile( "([\s+\\-=!(){}\\[\\]^\"~:/]|(?", str(v))) + value_cleaned = make_ci.get('value') + if not make_ci.get( 'is_regex' ): # Determine if still should be a regex + queryType = 'wildcard' + value_cleaned = self.escapeSlashes( self.cleanValue( str( v ) ) ) + else: + queryType = 'wildcard' + value_cleaned = self.escapeSlashes(self.cleanValue(str(v))) else: queryType = 'match_phrase' value_cleaned = self.cleanValue(str(v)) - res['bool']['should'].append({queryType: {key_mapped: value_cleaned}}) return res elif value is None: @@ -229,9 +343,17 @@ class ElasticsearchDSLBackend(RulenameCommentMixin, ElasticsearchWildcardHandlin return { "bool": { "must_not": { "exists": { "field": key_mapped } } } } elif type(value) in (str, int): key_mapped = self.fieldNameMapping(key, value) - if self.matchKeyword: # searches against keyowrd fields are wildcard searches, phrases otherwise - queryType = 'wildcard' - value_cleaned = self.escapeSlashes(self.cleanValue(str(value))) + if self.matchKeyword: # searches against keyword fields are wildcard searches, phrases otherwise + if self.CaseInSensitiveField: + queryType = 'regexp' + make_ci = self.makeCaseInSensitiveValue( self.reEscape.sub( "\\\\\g<1>", str( value ) ) ) + value_cleaned = make_ci.get( 'value' ) + if not make_ci.get( 'is_regex' ): # Determine if still should be a regex + queryType = 'wildcard' + value_cleaned = self.escapeSlashes( self.cleanValue( str( value ) ) ) + else: + queryType = 'wildcard' + value_cleaned = self.escapeSlashes(self.cleanValue(str(value))) else: queryType = 'match_phrase' value_cleaned = self.cleanValue(str(value))