[Bug] KQL fails validation on uppercase keywords (#3568)

* add todo

* Add a normalize_kql_keywords function to utils

* update rule loader to normalize and warn

* optimized loading

* fix linting

* Moved conversion to kql module.

* Updated unit test

* Refactor KQL parser to normalize keywords via flag

* Fix logic typo

* Update detection_rules/utils.py

Co-authored-by: Justin Ibarra <16747370+brokensound77@users.noreply.github.com>

* Update lib/kql/kql/__init__.py

Co-authored-by: Justin Ibarra <16747370+brokensound77@users.noreply.github.com>

* Updated to fix unit tests and remove warnings

* linting typo

* Added comments

* remove unused imports

* Update kql.parse default

---------

Co-authored-by: Justin Ibarra <16747370+brokensound77@users.noreply.github.com>
Co-authored-by: Mika Ayenson <Mikaayenson@users.noreply.github.com>

(cherry picked from commit 1566c29bae)
This commit is contained in:
Eric Forte
2024-04-04 18:03:30 -04:00
committed by github-actions[bot]
parent 07204987f2
commit c6df1d085f
7 changed files with 25 additions and 12 deletions
+1 -1
View File
@@ -1405,7 +1405,7 @@ def get_unique_query_fields(rule: TOMLRule) -> List[str]:
cfg = set_eql_config(rule.contents.metadata.get('min_stack_version'))
with eql.parser.elasticsearch_syntax, eql.parser.ignore_missing_functions, eql.parser.skip_optimizations, cfg:
parsed = kql.parse(query) if language == 'kuery' else eql.parse_query(query)
parsed = kql.parse(query, normalize_kql_keywords=True) if language == 'kuery' else eql.parse_query(query)
return sorted(set(str(f) for f in parsed if isinstance(f, (eql.ast.Field, kql.ast.Field))))
+3 -3
View File
@@ -36,7 +36,7 @@ class KQLValidator(QueryValidator):
@cached_property
def ast(self) -> kql.ast.Expression:
return kql.parse(self.query)
return kql.parse(self.query, normalize_kql_keywords=True)
@cached_property
def unique_fields(self) -> List[str]:
@@ -80,7 +80,7 @@ class KQLValidator(QueryValidator):
beats_version, ecs_version)
try:
kql.parse(self.query, schema=schema)
kql.parse(self.query, schema=schema, normalize_kql_keywords=True)
except kql.KqlParseError as exc:
message = exc.error_msg
trailer = err_trailer
@@ -135,7 +135,7 @@ class KQLValidator(QueryValidator):
# Validate the query against the schema
try:
kql.parse(self.query, schema=integration_schema)
kql.parse(self.query, schema=integration_schema, normalize_kql_keywords=True)
except kql.KqlParseError as exc:
if exc.error_msg == "Unknown field":
field = extract_error_field(self.query, exc)
+1 -1
View File
@@ -241,7 +241,7 @@ def convert_time_span(span: str) -> int:
def evaluate(rule, events):
"""Evaluate a query against events."""
evaluator = kql.get_evaluator(kql.parse(rule.query))
evaluator = kql.get_evaluator(kql.parse(rule.query, normalize_kql_keywords=True))
filtered = list(filter(evaluator, events))
return filtered
+2 -2
View File
@@ -45,12 +45,12 @@ def to_eql(text, optimize=True, schema=None):
return converted.optimize(recursive=True) if optimize else converted
def parse(text, optimize=True, schema=None):
def parse(text, optimize: bool = True, schema: dict = None, normalize_kql_keywords: bool = False):
if isinstance(text, bytes):
text = text.decode("utf-8")
lark_parsed = lark_parse(text)
converted = KqlParser(text, schema=schema).visit(lark_parsed)
converted = KqlParser(text, schema=schema, normalize_kql_keywords=normalize_kql_keywords).visit(lark_parsed)
return converted.optimize(recursive=True) if optimize else converted
+11 -4
View File
@@ -104,22 +104,29 @@ class BaseKqlParser(Interpreter):
quoted_escapes = {"\\t": "\t", "\\r": "\r", "\\n": "\n", "\\\\": "\\", "\\\"": "\""}
quoted_regex = re.compile("(" + "|".join(re.escape(e) for e in sorted(quoted_escapes)) + ")")
def __init__(self, text, schema=None):
def __init__(self, text: str, schema: dict = None, normalize_kql_keywords: bool = True) -> None:
"""Initialize the parser. Defaults to normalizing KQL keywords to lowercase."""
self.text = text
self.lines = [t.rstrip("\r\n") for t in self.text.splitlines(True)]
self.scoped_field = None
self.mapping_schema = schema
self.star_fields = []
self.normalize_kql_keywords = normalize_kql_keywords
if schema:
for field, field_type in schema.items():
if "*" in field:
self.star_fields.append(wildcard2regex(field))
def assert_lower_token(self, *tokens):
def assert_lower_token(self, *tokens: Token) -> None:
"""Assert that the token is lowercase and converts token if not."""
for token in tokens:
if str(token) != str(token).lower():
raise self.error(token, "Expected '{lower}' but got '{token}'".format(token=token, lower=str(token).lower()))
lower_token = str(token).lower()
if str(token) != lower_token:
if self.normalize_kql_keywords:
token.value = lower_token
else:
raise self.error(token, f"Expected '{lower_token}' but got '{token}'")
def error(self, node, message, end=False, cls=KqlParseError, width=None, **kwargs):
"""Generate an error exception but dont raise it."""
+6
View File
@@ -34,6 +34,12 @@ class LintTests(unittest.TestCase):
with self.assertRaises(kql.KqlParseError):
kql.parse(q)
for q in queries:
# Test query successfully converts and parses
parsed_query = kql.parse(q, normalize_kql_keywords=True)
# Test that the parsed query is not equal to the original query, that the transformation was applied
self.assertNotEqual(str(parsed_query), q, f"Parsed query {parsed_query} matches the original {q}")
def test_lint_precedence(self):
self.validate("a:b or (c:d and e:f)", "a:b or c:d and e:f")
self.validate("(a:b and (c:d or e:f))", "a:b and (c:d or e:f)")
+1 -1
View File
@@ -67,7 +67,7 @@ class TestValidRules(BaseRuleTest):
)
):
source = rule.contents.data.query
tree = kql.parse(source, optimize=False)
tree = kql.parse(source, optimize=False, normalize_kql_keywords=True)
optimized = tree.optimize(recursive=True)
err_message = f'\n{self.rule_str(rule)} Query not optimized for rule\n' \
f'Expected: {optimized}\nActual: {source}'