[FR] [DaC] Add fine-grained bypass env var for ES|QL keep and metadata validation (#5869)

* Add fine grain 'keep' req bypass

* Add metadata bypass
This commit is contained in:
Eric Forte
2026-03-24 14:36:45 -04:00
committed by GitHub
parent b14dec9efa
commit 75ffa5ec4e
9 changed files with 196 additions and 37 deletions
+29 -2
View File
@@ -16,7 +16,13 @@ import yaml
from eql.utils import load_dump # type: ignore[reportMissingTypeStubs]
from .misc import discover_tests
from .utils import cached, get_etc_path, load_etc_dump, set_all_validation_bypass
from .utils import (
OPTIONAL_ELASTIC_VALIDATION_BYPASS_ENV,
cached,
get_etc_path,
load_etc_dump,
set_all_validation_bypass,
)
ROOT_DIR = Path(__file__).parent.parent
CUSTOM_RULES_DIR = os.getenv("CUSTOM_RULES_DIR", None)
@@ -208,6 +214,12 @@ class RulesConfig:
exception_dir: Path | None = None
normalize_kql_keywords: bool = True
bypass_optional_elastic_validation: bool = False
bypass_note_validation_and_parse: bool = False
bypass_bbr_lookback_validation: bool = False
bypass_tags_validation: bool = False
bypass_timeline_template_validation: bool = False
bypass_esql_keep_validation: bool = False
bypass_esql_metadata_validation: bool = False
no_tactic_filename: bool = False
def __post_init__(self) -> None:
@@ -323,7 +335,22 @@ def parse_rules_config(path: Path | None = None) -> RulesConfig: # noqa: PLR091
# bypass_optional_elastic_validation
contents["bypass_optional_elastic_validation"] = loaded.get("bypass_optional_elastic_validation", False)
if contents["bypass_optional_elastic_validation"]:
set_all_validation_bypass(contents["bypass_optional_elastic_validation"])
set_all_validation_bypass(True)
for yaml_key in OPTIONAL_ELASTIC_VALIDATION_BYPASS_ENV:
contents[yaml_key] = True
else:
for yaml_key, env_var in OPTIONAL_ELASTIC_VALIDATION_BYPASS_ENV.items():
if yaml_key in loaded:
val = loaded[yaml_key]
if not isinstance(val, bool):
raise SystemExit(
f"`{yaml_key}` in _config.yaml must be a boolean (true/false), not {type(val).__name__}"
)
else:
val = False
contents[yaml_key] = val
if val:
os.environ[env_var] = str(True)
# no_tactic_filename
contents["no_tactic_filename"] = loaded.get("no_tactic_filename", False)
+14 -2
View File
@@ -61,8 +61,20 @@ normalize_kql_keywords: False
# stack-schema-map.yaml file when using a custom rules directory and config.
# auto_gen_schema_file: "etc/auto-gen-schema.json"
# To on bulk disable elastic validation for optional fields, use the following line
# bypass_optional_elastic_validation: True
# Optional Elastic validation bypasses (each true value sets the matching DR_BYPASS_* env var at load time).
#
# 1) Enable every bypass at once:
# bypass_optional_elastic_validation: true
#
# 2) Or set only the bypasses you need (ignored if bypass_optional_elastic_validation is true):
# bypass_note_validation_and_parse: true # DR_BYPASS_NOTE_VALIDATION_AND_PARSE
# bypass_bbr_lookback_validation: true # DR_BYPASS_BBR_LOOKBACK_VALIDATION
# bypass_tags_validation: true # DR_BYPASS_TAGS_VALIDATION
# bypass_timeline_template_validation: true # DR_BYPASS_TIMELINE_TEMPLATE_VALIDATION
# bypass_esql_keep_validation: true # DR_BYPASS_ESQL_KEEP_VALIDATION
# bypass_esql_metadata_validation: true # DR_BYPASS_ESQL_METADATA_VALIDATION
#
# Each must be true or false if present; omitted keys default to false.
# This points to the testing config file (see example under detection_rules/etc/example_test_config.yaml)
# This can either be set here or as the environment variable `DETECTION_RULES_TEST_CONFIG`, with precedence
+38 -25
View File
@@ -981,36 +981,49 @@ class ESQLRuleData(QueryRuleData):
)
# Ensure that non-aggregate queries have metadata
if not combined_pattern.search(query_lower):
raise EsqlSemanticError(
f"Rule: {data['name']} contains a non-aggregate query without"
f" metadata fields '_id', '_version', and '_index' ->"
f" Add 'metadata _id, _version, _index' to the from command or add an aggregate function."
if os.environ.get("DR_BYPASS_ESQL_METADATA_VALIDATION") is None:
bypass_metadata_hint = (
" To bypass ES|QL `FROM` metadata validation, set the environment variable "
"`DR_BYPASS_ESQL_METADATA_VALIDATION`."
)
if not combined_pattern.search(query_lower):
raise EsqlSemanticError(
f"Rule: {data['name']} contains a non-aggregate query without"
f" metadata fields '_id', '_version', and '_index' ->"
f" Add 'metadata _id, _version, _index' to the from command or add an aggregate function."
+ bypass_metadata_hint
)
# Enforce KEEP command for ESQL rules and that METADATA fields are present in non-aggregate queries
# Match | followed by optional whitespace/newlines and then 'keep'
keep_pattern = re.compile(r"\|\s*keep\b\s+([^\|]+)", re.IGNORECASE | re.DOTALL)
keep_matches = list(keep_pattern.finditer(query_lower))
if not keep_matches:
raise EsqlSemanticError(
f"Rule: {data['name']} does not contain a 'keep' command -> Add a 'keep' command to the query."
if os.environ.get("DR_BYPASS_ESQL_KEEP_VALIDATION") is None:
bypass_keep_hint = (
" To bypass ES|QL `keep` validation, set the environment variable `DR_BYPASS_ESQL_KEEP_VALIDATION`."
)
# Match | followed by optional whitespace/newlines and then 'keep'
keep_pattern = re.compile(r"\|\s*keep\b\s+([^\|]+)", re.IGNORECASE | re.DOTALL)
keep_matches = list(keep_pattern.finditer(query_lower))
if not keep_matches:
raise EsqlSemanticError(
f"Rule: {data['name']} does not contain a 'keep' command -> Add a 'keep' command to the query."
+ bypass_keep_hint
)
# Ensure that keep clause includes metadata fields on non-aggregate queries
aggregate_pattern = re.compile(r"\|\s*stats\b(?:\s+([^\|]+?))?(?:\s+by\s+([^\|]+))?", re.IGNORECASE | re.DOTALL)
if not aggregate_pattern.search(query_lower):
for keep_match in keep_matches:
raw_keep = re.sub(r"//.*", "", keep_match.group(1))
keep_fields = [field.strip() for field in raw_keep.split(",") if field.strip()]
if "*" not in keep_fields:
required_metadata = {"_id", "_version", "_index"}
if not required_metadata.issubset(set(map(str.strip, keep_fields))):
raise EsqlSemanticError(
f"Rule: {data['name']} contains a keep clause without"
f" metadata fields '_id', '_version', and '_index' ->"
f" Add '_id', '_version', '_index' to the keep command."
)
# Ensure that keep clause includes metadata fields on non-aggregate queries
aggregate_pattern = re.compile(
r"\|\s*stats\b(?:\s+([^\|]+?))?(?:\s+by\s+([^\|]+))?", re.IGNORECASE | re.DOTALL
)
if not aggregate_pattern.search(query_lower):
for keep_match in keep_matches:
raw_keep = re.sub(r"//.*", "", keep_match.group(1))
keep_fields = [field.strip() for field in raw_keep.split(",") if field.strip()]
if "*" not in keep_fields:
required_metadata = {"_id", "_version", "_index"}
if not required_metadata.issubset(set(map(str.strip, keep_fields))):
raise EsqlSemanticError(
f"Rule: {data['name']} contains a keep clause without"
f" metadata fields '_id', '_version', and '_index' ->"
f" Add '_id', '_version', '_index' to the keep command." + bypass_keep_hint
)
@dataclass(frozen=True, kw_only=True)
+13 -4
View File
@@ -136,12 +136,21 @@ def save_etc_dump(contents: dict[str, Any], path: list[str], sort_keys: bool = T
eql.utils.save_dump(contents, path) # type: ignore[reportUnknownVariableType]
# Top-level _config.yaml key -> DR_BYPASS_* env var set when true at load time
OPTIONAL_ELASTIC_VALIDATION_BYPASS_ENV: dict[str, str] = {
"bypass_note_validation_and_parse": "DR_BYPASS_NOTE_VALIDATION_AND_PARSE",
"bypass_bbr_lookback_validation": "DR_BYPASS_BBR_LOOKBACK_VALIDATION",
"bypass_tags_validation": "DR_BYPASS_TAGS_VALIDATION",
"bypass_timeline_template_validation": "DR_BYPASS_TIMELINE_TEMPLATE_VALIDATION",
"bypass_esql_keep_validation": "DR_BYPASS_ESQL_KEEP_VALIDATION",
"bypass_esql_metadata_validation": "DR_BYPASS_ESQL_METADATA_VALIDATION",
}
def set_all_validation_bypass(env_value: bool = False) -> None:
"""Set all validation bypass environment variables."""
os.environ["DR_BYPASS_NOTE_VALIDATION_AND_PARSE"] = str(env_value)
os.environ["DR_BYPASS_BBR_LOOKBACK_VALIDATION"] = str(env_value)
os.environ["DR_BYPASS_TAGS_VALIDATION"] = str(env_value)
os.environ["DR_BYPASS_TIMELINE_TEMPLATE_VALIDATION"] = str(env_value)
for env_var in OPTIONAL_ELASTIC_VALIDATION_BYPASS_ENV.values():
os.environ[env_var] = str(env_value)
def set_nested_value(obj: dict[str, Any], compound_key: str, value: Any) -> None: