fix: Allow different order of the metadata fields in ESQL queries (#4956)

* Initial commit

* Python project version bump
This commit is contained in:
Sergey Polzunov
2025-08-02 02:26:39 +02:00
committed by GitHub
parent 04ca2c8128
commit ff46a7ab4a
3 changed files with 66 additions and 4 deletions
+11 -3
View File
@@ -934,9 +934,15 @@ class ESQLRuleData(QueryRuleData):
# Convert the query string to lowercase to handle case insensitivity
query_lower = data["query"].lower()
# Combine both patterns using an OR operator and compile the regex
# Combine both patterns using an OR operator and compile the regex.
# The first part matches the metadata fields in the from clause by allowing one or
# multiple indices and any order of the metadata fields
# The second part matches the stats command with the by clause
combined_pattern = re.compile(
r"(from\s+\S+\s+metadata\s+_id,\s*_version,\s*_index)|(\bstats\b.*?\bby\b)", re.DOTALL
r"(from\s+(?:\S+\s*,\s*)*\S+\s+metadata\s+"
r"(?:_id|_version|_index)(?:,\s*(?:_id|_version|_index)){2})"
r"|(\bstats\b.*?\bby\b)",
re.DOTALL,
)
# Ensure that non-aggregate queries have metadata
@@ -948,7 +954,9 @@ class ESQLRuleData(QueryRuleData):
)
# Enforce KEEP command for ESQL rules
if "| keep" not in query_lower:
# Match | followed by optional whitespace/newlines and then 'keep'
keep_pattern = re.compile(r"\|\s*keep\b", re.IGNORECASE | re.DOTALL)
if not keep_pattern.search(query_lower):
raise ValidationError(
f"Rule: {data['name']} does not contain a 'keep' command -> Add a 'keep' command to the query."
)
+1 -1
View File
@@ -1,6 +1,6 @@
[project]
name = "detection_rules"
version = "1.3.18"
version = "1.3.19"
description = "Detection Rules is the home for rules used by Elastic Security. This repository is used for the development, maintenance, testing, validation, and release of rules for Elastic Securitys Detection Engine."
readme = "README.md"
requires-python = ">=3.12"
+54
View File
@@ -8,14 +8,18 @@
import copy
import unittest
import uuid
from pathlib import Path
import eql
import pytest
import pytoml
from marshmallow import ValidationError
from semver import Version
from detection_rules import utils
from detection_rules.config import load_current_package_version
from detection_rules.rule import TOMLRuleContents
from detection_rules.rule_loader import RuleCollection
from detection_rules.schemas import RULES_CONFIG, downgrade
from detection_rules.version_lock import VersionLockFile
@@ -302,3 +306,53 @@ class TestVersions(unittest.TestCase):
stack_map = utils.load_etc_dump(["stack-schema-map.yaml"])
err_msg = f"There is no entry defined for the current package ({package_version}) in the stack-schema-map"
self.assertIn(package_version, [Version.parse(v) for v in stack_map], err_msg)
class TestESQLValidation(unittest.TestCase):
"""Test ESQL rule validation"""
def test_esql_data_validation(self):
"""Test ESQL rule data validation"""
# A random ESQL rule to deliver a test query
rule_path = Path("rules/windows/defense_evasion_posh_obfuscation_index_reversal.toml")
rule_body = rule_path.read_text()
rule_dict = pytoml.loads(rule_body)
# Most used order of the metadata fields
query = """
FROM logs-windows.powershell_operational* METADATA _id, _version, _index
| WHERE event.code == "4104"
| KEEP event.count
"""
rule_dict["rule"]["query"] = query
_ = RuleCollection().load_dict(rule_dict, path=rule_path)
# The order of the metadata fields from the example in the docs -
# https://www.elastic.co/guide/en/security/8.17/rules-ui-create.html#esql-non-agg-query
query = """
FROM logs-windows.powershell_operational* METADATA _id, _index, _version
| WHERE event.code == "4104"
| KEEP event.count
"""
rule_dict["rule"]["query"] = query
_ = RuleCollection().load_dict(rule_dict, path=rule_path)
# Different metadata fields
with pytest.raises(ValidationError):
query = """
FROM logs-windows.powershell_operational* METADATA _foo, _index
| WHERE event.code == "4104"
| KEEP event.count
"""
rule_dict["rule"]["query"] = query
_ = RuleCollection().load_dict(rule_dict, path=rule_path)
# Missing `keep`
with pytest.raises(ValidationError):
query = """
FROM logs-windows.powershell_operational* METADATA _id, _index, _version
| WHERE event.code == "4104"
"""
rule_dict["rule"]["query"] = query
_ = RuleCollection().load_dict(rule_dict, path=rule_path)