2020-06-29 23:17:38 -06:00
|
|
|
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
2021-03-03 22:12:11 -09:00
|
|
|
# or more contributor license agreements. Licensed under the Elastic License
|
|
|
|
|
# 2.0; you may not use this file except in compliance with the Elastic License
|
|
|
|
|
# 2.0.
|
2020-06-29 23:17:38 -06:00
|
|
|
"""Rule object."""
|
2025-07-01 15:20:55 +02:00
|
|
|
|
2021-09-01 15:29:53 -08:00
|
|
|
import copy
|
2021-05-13 14:27:32 -06:00
|
|
|
import dataclasses
|
2020-06-29 23:17:38 -06:00
|
|
|
import json
|
2022-07-18 15:41:32 -04:00
|
|
|
import os
|
2024-10-09 15:25:36 -04:00
|
|
|
import re
|
2024-04-26 11:12:50 -06:00
|
|
|
import time
|
2021-05-13 14:27:32 -06:00
|
|
|
import typing
|
2021-09-01 15:29:53 -08:00
|
|
|
from abc import ABC, abstractmethod
|
2021-03-24 10:24:32 -06:00
|
|
|
from dataclasses import dataclass, field
|
2021-04-21 14:55:26 -06:00
|
|
|
from functools import cached_property
|
2021-02-10 10:37:26 -09:00
|
|
|
from pathlib import Path
|
2025-07-01 15:20:55 +02:00
|
|
|
from typing import Any, Literal
|
2024-08-06 18:07:12 -04:00
|
|
|
from urllib.parse import urlparse
|
2021-02-08 20:43:16 -09:00
|
|
|
from uuid import uuid4
|
2020-06-29 23:17:38 -06:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
import eql # type: ignore[reportMissingTypeStubs]
|
|
|
|
|
import kql # type: ignore[reportMissingTypeStubs]
|
2024-04-26 11:12:50 -06:00
|
|
|
import marshmallow
|
2022-07-18 15:41:32 -04:00
|
|
|
from marko.block import Document as MarkoDocument
|
|
|
|
|
from marko.ext.gfm import gfm
|
2024-08-06 18:07:12 -04:00
|
|
|
from marshmallow import ValidationError, pre_load, validates_schema
|
2025-07-01 15:20:55 +02:00
|
|
|
from semver import Version
|
2023-02-07 14:26:29 -05:00
|
|
|
|
2022-10-19 09:54:47 -04:00
|
|
|
from . import beats, ecs, endgame, utils
|
2024-08-06 18:07:12 -04:00
|
|
|
from .config import load_current_package_version, parse_rules_config
|
2025-10-15 21:17:07 +02:00
|
|
|
from .esql import get_esql_query_event_dataset_integrations
|
|
|
|
|
from .esql_errors import EsqlSemanticError
|
2025-07-01 15:20:55 +02:00
|
|
|
from .integrations import (
|
|
|
|
|
find_least_compatible_version,
|
|
|
|
|
get_integration_schema_fields,
|
|
|
|
|
load_integrations_manifests,
|
|
|
|
|
load_integrations_schemas,
|
|
|
|
|
)
|
2022-06-27 10:02:15 -05:00
|
|
|
from .mixins import MarshmallowDataclassMixin, StackCompatMixin
|
2022-07-18 15:41:32 -04:00
|
|
|
from .rule_formatter import nested_normalize, toml_write
|
2025-07-01 15:20:55 +02:00
|
|
|
from .schemas import (
|
|
|
|
|
SCHEMA_DIR,
|
|
|
|
|
definitions,
|
|
|
|
|
downgrade,
|
|
|
|
|
get_min_supported_stack_version,
|
|
|
|
|
get_stack_schemas,
|
|
|
|
|
strip_non_public_fields,
|
|
|
|
|
)
|
2022-06-27 10:02:15 -05:00
|
|
|
from .schemas.stack_compat import get_restricted_fields
|
2024-08-06 18:07:12 -04:00
|
|
|
from .utils import PatchedTemplate, cached, convert_time_span, get_nested_value, set_nested_value
|
2025-07-01 15:20:55 +02:00
|
|
|
from .version_lock import VersionLock, loaded_version_lock
|
2024-08-06 18:07:12 -04:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
if typing.TYPE_CHECKING:
|
|
|
|
|
from .remote_validation import RemoteValidator
|
2020-06-29 23:17:38 -06:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
|
|
|
|
|
MIN_FLEET_PACKAGE_VERSION = "7.13.0"
|
|
|
|
|
TIME_NOW = time.strftime("%Y/%m/%d")
|
2024-08-06 18:07:12 -04:00
|
|
|
RULES_CONFIG = parse_rules_config()
|
|
|
|
|
DEFAULT_PREBUILT_RULES_DIRS = RULES_CONFIG.rule_dirs
|
|
|
|
|
DEFAULT_PREBUILT_BBR_DIRS = RULES_CONFIG.bbr_rules_dirs
|
|
|
|
|
BYPASS_VERSION_LOCK = RULES_CONFIG.bypass_version_lock
|
|
|
|
|
|
2020-06-29 23:17:38 -06:00
|
|
|
|
2022-07-18 15:41:32 -04:00
|
|
|
BUILD_FIELD_VERSIONS = {
|
2025-07-01 15:20:55 +02:00
|
|
|
"related_integrations": (Version.parse("8.3.0"), None),
|
|
|
|
|
"required_fields": (Version.parse("8.3.0"), None),
|
|
|
|
|
"setup": (Version.parse("8.3.0"), None),
|
2022-07-18 15:41:32 -04:00
|
|
|
}
|
|
|
|
|
|
2020-06-29 23:17:38 -06:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
@dataclass(kw_only=True)
|
2024-04-26 11:12:50 -06:00
|
|
|
class DictRule:
|
|
|
|
|
"""Simple object wrapper for raw rule dicts."""
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
contents: dict[str, Any]
|
|
|
|
|
path: Path | None = None
|
2024-04-26 11:12:50 -06:00
|
|
|
|
|
|
|
|
@property
|
2025-07-01 15:20:55 +02:00
|
|
|
def metadata(self) -> dict[str, Any]:
|
2024-04-26 11:12:50 -06:00
|
|
|
"""Metadata portion of TOML file rule."""
|
2025-07-01 15:20:55 +02:00
|
|
|
return self.contents.get("metadata", {})
|
2024-04-26 11:12:50 -06:00
|
|
|
|
|
|
|
|
@property
|
2025-07-01 15:20:55 +02:00
|
|
|
def data(self) -> dict[str, Any]:
|
2025-07-31 14:35:00 -04:00
|
|
|
"""Rule portion of TOML file rule. Supports nested and flattened rule dictionaries"""
|
|
|
|
|
return self.contents.get("data", {}) or self.contents or self.contents.get("rule", {})
|
2024-04-26 11:12:50 -06:00
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
def id(self) -> str:
|
2025-07-31 14:35:00 -04:00
|
|
|
"""Get the rule ID. Supports nested and flattened rule dictionaries."""
|
|
|
|
|
return self.data.get("rule_id") or self.data.get("rule", {}).get("rule_id")
|
2024-04-26 11:12:50 -06:00
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
def name(self) -> str:
|
2025-07-31 14:35:00 -04:00
|
|
|
"""Get the rule name. Supports nested and flattened rule dictionaries"""
|
|
|
|
|
return self.data.get("name") or self.data.get("rule", {}).get("name")
|
2024-04-26 11:12:50 -06:00
|
|
|
|
|
|
|
|
def __hash__(self) -> int:
|
|
|
|
|
"""Get the hash of the rule."""
|
|
|
|
|
return hash(self.id + self.name)
|
|
|
|
|
|
|
|
|
|
def __repr__(self) -> str:
|
|
|
|
|
"""Get a string representation of the rule."""
|
|
|
|
|
return f"Rule({self.name} {self.id})"
|
|
|
|
|
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
@dataclass(frozen=True, kw_only=True)
|
2021-03-24 10:24:32 -06:00
|
|
|
class RuleMeta(MarshmallowDataclassMixin):
|
|
|
|
|
"""Data stored in a rule's [metadata] section of TOML."""
|
2025-07-01 15:20:55 +02:00
|
|
|
|
2021-03-24 10:24:32 -06:00
|
|
|
creation_date: definitions.Date
|
|
|
|
|
updated_date: definitions.Date
|
2025-07-01 15:20:55 +02:00
|
|
|
deprecation_date: definitions.Date | None = None
|
2020-06-29 23:17:38 -06:00
|
|
|
|
2021-03-24 10:24:32 -06:00
|
|
|
# Optional fields
|
2025-07-01 15:20:55 +02:00
|
|
|
bypass_bbr_timing: bool | None = None
|
|
|
|
|
comments: str | None = None
|
|
|
|
|
integration: str | list[str] | None = None
|
|
|
|
|
maturity: definitions.Maturity | None = None
|
|
|
|
|
min_stack_version: definitions.SemVer | None = None
|
|
|
|
|
min_stack_comments: str | None = None
|
|
|
|
|
os_type_list: list[definitions.OSType] | None = None
|
|
|
|
|
query_schema_validation: bool | None = None
|
|
|
|
|
related_endpoint_rules: list[str] | None = None
|
|
|
|
|
promotion: bool | None = None
|
2020-06-29 23:17:38 -06:00
|
|
|
|
2021-06-25 17:02:11 -06:00
|
|
|
# Extended information as an arbitrary dictionary
|
2025-07-01 15:20:55 +02:00
|
|
|
extended: dict[str, Any] | None = None
|
2021-06-25 17:02:11 -06:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def get_validation_stack_versions(self) -> dict[str, dict[str, Any]]:
|
2021-06-30 13:26:27 -08:00
|
|
|
"""Get a dict of beats and ecs versions per stack release."""
|
2025-07-01 15:20:55 +02:00
|
|
|
return get_stack_schemas(self.min_stack_version)
|
2021-06-30 13:26:27 -08:00
|
|
|
|
2020-06-29 23:17:38 -06:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
@dataclass(frozen=True, kw_only=True)
|
2023-03-28 07:17:50 -06:00
|
|
|
class RuleTransform(MarshmallowDataclassMixin):
|
|
|
|
|
"""Data stored in a rule's [transform] section of TOML."""
|
|
|
|
|
|
|
|
|
|
# note (investigation guides) Markdown plugins
|
|
|
|
|
# /elastic/kibana/tree/main/x-pack/plugins/security_solution/public/common/components/markdown_editor/plugins
|
|
|
|
|
##############################################
|
|
|
|
|
|
|
|
|
|
# timelines out of scope at the moment
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
@dataclass(frozen=True, kw_only=True)
|
2023-03-28 07:17:50 -06:00
|
|
|
class OsQuery:
|
|
|
|
|
label: str
|
|
|
|
|
query: str
|
2025-07-01 15:20:55 +02:00
|
|
|
ecs_mapping: dict[str, dict[Literal["field", "value"], str]] | None = None
|
2023-03-28 07:17:50 -06:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
@dataclass(frozen=True, kw_only=True)
|
2023-12-08 15:54:40 -03:00
|
|
|
class Investigate:
|
2023-03-28 07:17:50 -06:00
|
|
|
@dataclass(frozen=True)
|
|
|
|
|
class Provider:
|
2023-12-08 15:54:40 -03:00
|
|
|
excluded: bool
|
2023-03-28 07:17:50 -06:00
|
|
|
field: str
|
2023-12-08 15:54:40 -03:00
|
|
|
queryType: definitions.InvestigateProviderQueryType
|
2023-03-28 07:17:50 -06:00
|
|
|
value: str
|
2023-12-08 15:54:40 -03:00
|
|
|
valueType: definitions.InvestigateProviderValueType
|
2023-03-28 07:17:50 -06:00
|
|
|
|
|
|
|
|
label: str
|
2025-07-01 15:20:55 +02:00
|
|
|
description: str | None = None
|
|
|
|
|
providers: list[list[Provider]]
|
|
|
|
|
relativeFrom: str | None = None
|
|
|
|
|
relativeTo: str | None = None
|
2023-03-28 07:17:50 -06:00
|
|
|
|
|
|
|
|
# these must be lists in order to have more than one. Their index in the list is how they will be referenced in the
|
|
|
|
|
# note string templates
|
2025-07-01 15:20:55 +02:00
|
|
|
osquery: list[OsQuery] | None = None
|
|
|
|
|
investigate: list[Investigate] | None = None
|
2023-12-08 15:54:40 -03:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def render_investigate_osquery_to_string(self) -> dict[definitions.TransformTypes, list[str]]:
|
2023-03-28 07:17:50 -06:00
|
|
|
obj = self.to_dict()
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
rendered: dict[definitions.TransformTypes, list[str]] = {"osquery": [], "investigate": []}
|
2023-03-28 07:17:50 -06:00
|
|
|
for plugin, entries in obj.items():
|
|
|
|
|
for entry in entries:
|
2025-07-01 15:20:55 +02:00
|
|
|
if plugin not in rendered:
|
|
|
|
|
raise ValueError(f"Unexpected field value: {plugin}")
|
|
|
|
|
rendered[plugin].append(f"!{{{plugin}{json.dumps(entry, sort_keys=True, separators=(',', ':'))}}}")
|
2023-03-28 07:17:50 -06:00
|
|
|
|
|
|
|
|
return rendered
|
|
|
|
|
|
|
|
|
|
##############################################
|
|
|
|
|
|
|
|
|
|
|
2021-03-24 10:24:32 -06:00
|
|
|
@dataclass(frozen=True)
|
|
|
|
|
class BaseThreatEntry:
|
|
|
|
|
id: str
|
|
|
|
|
name: str
|
|
|
|
|
reference: str
|
2020-06-29 23:17:38 -06:00
|
|
|
|
2024-08-06 18:07:12 -04:00
|
|
|
@pre_load
|
2025-07-01 15:20:55 +02:00
|
|
|
def modify_url(self, data: dict[str, Any], **_: Any) -> dict[str, Any]:
|
2024-08-06 18:07:12 -04:00
|
|
|
"""Modify the URL to support MITRE ATT&CK URLS with and without trailing forward slash."""
|
2025-07-01 15:20:55 +02:00
|
|
|
p = urlparse(data["reference"]) # type: ignore[reportUnknownVariableType]
|
|
|
|
|
if p.scheme and not data["reference"].endswith("/"): # type: ignore[reportUnknownMemberType]
|
|
|
|
|
data["reference"] += "/"
|
2024-08-06 18:07:12 -04:00
|
|
|
return data
|
|
|
|
|
|
2020-06-29 23:17:38 -06:00
|
|
|
|
2021-03-24 10:24:32 -06:00
|
|
|
@dataclass(frozen=True)
|
|
|
|
|
class SubTechnique(BaseThreatEntry):
|
|
|
|
|
"""Mapping to threat subtechnique."""
|
2025-07-01 15:20:55 +02:00
|
|
|
|
2021-03-24 10:24:32 -06:00
|
|
|
reference: definitions.SubTechniqueURL
|
2020-06-29 23:17:38 -06:00
|
|
|
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
@dataclass(frozen=True, kw_only=True)
|
2021-03-24 10:24:32 -06:00
|
|
|
class Technique(BaseThreatEntry):
|
|
|
|
|
"""Mapping to threat subtechnique."""
|
2025-07-01 15:20:55 +02:00
|
|
|
|
2021-03-24 10:24:32 -06:00
|
|
|
# subtechniques are stored at threat[].technique.subtechnique[]
|
|
|
|
|
reference: definitions.TechniqueURL
|
2025-07-01 15:20:55 +02:00
|
|
|
subtechnique: list[SubTechnique] | None = None
|
2020-07-15 08:05:55 -06:00
|
|
|
|
|
|
|
|
|
2021-03-24 10:24:32 -06:00
|
|
|
@dataclass(frozen=True)
|
|
|
|
|
class Tactic(BaseThreatEntry):
|
|
|
|
|
"""Mapping to a threat tactic."""
|
2025-07-01 15:20:55 +02:00
|
|
|
|
2021-03-24 10:24:32 -06:00
|
|
|
reference: definitions.TacticURL
|
2020-06-29 23:17:38 -06:00
|
|
|
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
@dataclass(frozen=True, kw_only=True)
|
2021-03-24 10:24:32 -06:00
|
|
|
class ThreatMapping(MarshmallowDataclassMixin):
|
|
|
|
|
"""Mapping to a threat framework."""
|
2025-07-01 15:20:55 +02:00
|
|
|
|
2025-12-05 12:26:56 -06:00
|
|
|
framework: Literal["MITRE ATT&CK", "MITRE ATLAS"]
|
2021-03-24 10:24:32 -06:00
|
|
|
tactic: Tactic
|
2025-07-01 15:20:55 +02:00
|
|
|
technique: list[Technique] | None = None
|
2020-06-29 23:17:38 -06:00
|
|
|
|
2021-03-24 10:24:32 -06:00
|
|
|
@staticmethod
|
2025-07-01 15:20:55 +02:00
|
|
|
def flatten(threat_mappings: list["ThreatMapping"] | None) -> "FlatThreatMapping":
|
2021-03-24 10:24:32 -06:00
|
|
|
"""Get flat lists of tactic and technique info."""
|
2025-07-01 15:20:55 +02:00
|
|
|
tactic_names: list[str] = []
|
|
|
|
|
tactic_ids: list[str] = []
|
|
|
|
|
technique_ids: set[str] = set()
|
|
|
|
|
technique_names: set[str] = set()
|
|
|
|
|
sub_technique_ids: set[str] = set()
|
|
|
|
|
sub_technique_names: set[str] = set()
|
|
|
|
|
|
|
|
|
|
for entry in threat_mappings or []:
|
2021-03-24 10:24:32 -06:00
|
|
|
tactic_names.append(entry.tactic.name)
|
|
|
|
|
tactic_ids.append(entry.tactic.id)
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
for technique in entry.technique or []:
|
2021-03-24 10:24:32 -06:00
|
|
|
technique_names.add(technique.name)
|
|
|
|
|
technique_ids.add(technique.id)
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
for subtechnique in technique.subtechnique or []:
|
2021-08-05 11:15:07 -08:00
|
|
|
sub_technique_ids.add(subtechnique.id)
|
|
|
|
|
sub_technique_names.add(subtechnique.name)
|
2021-03-24 10:24:32 -06:00
|
|
|
|
|
|
|
|
return FlatThreatMapping(
|
|
|
|
|
tactic_names=sorted(tactic_names),
|
|
|
|
|
tactic_ids=sorted(tactic_ids),
|
|
|
|
|
technique_names=sorted(technique_names),
|
|
|
|
|
technique_ids=sorted(technique_ids),
|
|
|
|
|
sub_technique_names=sorted(sub_technique_names),
|
2025-07-01 15:20:55 +02:00
|
|
|
sub_technique_ids=sorted(sub_technique_ids),
|
2021-03-24 10:24:32 -06:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
@dataclass(frozen=True, kw_only=True)
|
2021-03-24 10:24:32 -06:00
|
|
|
class RiskScoreMapping(MarshmallowDataclassMixin):
|
|
|
|
|
field: str
|
2025-07-01 15:20:55 +02:00
|
|
|
operator: definitions.Operator | None = None
|
|
|
|
|
value: str | None = None
|
2021-03-24 10:24:32 -06:00
|
|
|
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
@dataclass(frozen=True, kw_only=True)
|
2021-03-24 10:24:32 -06:00
|
|
|
class SeverityMapping(MarshmallowDataclassMixin):
|
|
|
|
|
field: str
|
2025-07-01 15:20:55 +02:00
|
|
|
operator: definitions.Operator | None = None
|
|
|
|
|
value: str | None = None
|
|
|
|
|
severity: str | None = None
|
2021-03-24 10:24:32 -06:00
|
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
|
|
|
class FlatThreatMapping(MarshmallowDataclassMixin):
|
2025-07-01 15:20:55 +02:00
|
|
|
tactic_names: list[str]
|
|
|
|
|
tactic_ids: list[str]
|
|
|
|
|
technique_names: list[str]
|
|
|
|
|
technique_ids: list[str]
|
|
|
|
|
sub_technique_names: list[str]
|
|
|
|
|
sub_technique_ids: list[str]
|
2021-03-24 10:24:32 -06:00
|
|
|
|
|
|
|
|
|
2024-02-12 09:55:46 -06:00
|
|
|
@dataclass(frozen=True)
|
|
|
|
|
class AlertSuppressionDuration:
|
|
|
|
|
"""Mapping to alert suppression duration."""
|
2025-07-01 15:20:55 +02:00
|
|
|
|
2024-02-12 09:55:46 -06:00
|
|
|
unit: definitions.TimeUnits
|
|
|
|
|
value: definitions.AlertSuppressionValue
|
|
|
|
|
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
@dataclass(frozen=True, kw_only=True)
|
2023-03-27 15:37:35 -04:00
|
|
|
class AlertSuppressionMapping(MarshmallowDataclassMixin, StackCompatMixin):
|
|
|
|
|
"""Mapping to alert suppression."""
|
|
|
|
|
|
2024-02-12 09:55:46 -06:00
|
|
|
group_by: definitions.AlertSuppressionGroupBy
|
2025-07-01 15:20:55 +02:00
|
|
|
duration: AlertSuppressionDuration | None = None
|
2023-10-19 18:16:54 -04:00
|
|
|
missing_fields_strategy: definitions.AlertSuppressionMissing
|
2023-03-27 15:37:35 -04:00
|
|
|
|
|
|
|
|
|
2024-02-12 09:55:46 -06:00
|
|
|
@dataclass(frozen=True)
|
|
|
|
|
class ThresholdAlertSuppression:
|
|
|
|
|
"""Mapping to alert suppression."""
|
|
|
|
|
|
|
|
|
|
duration: AlertSuppressionDuration
|
|
|
|
|
|
|
|
|
|
|
2024-04-01 17:44:50 -03:00
|
|
|
@dataclass(frozen=True)
|
|
|
|
|
class FilterStateStore:
|
|
|
|
|
store: definitions.StoreType
|
|
|
|
|
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
@dataclass(frozen=True, kw_only=True)
|
2024-04-01 17:44:50 -03:00
|
|
|
class FilterMeta:
|
2025-07-01 15:20:55 +02:00
|
|
|
alias: str | None = None
|
|
|
|
|
disabled: bool | None = None
|
|
|
|
|
negate: bool | None = None
|
|
|
|
|
controlledBy: str | None # identify who owns the filter
|
|
|
|
|
group: str | None # allows grouping of filters
|
|
|
|
|
index: str | None = None
|
|
|
|
|
isMultiIndex: bool | None = None
|
|
|
|
|
type: str | None = None
|
|
|
|
|
key: str | None = None
|
|
|
|
|
params: str | None = None # Expand to FilterMetaParams when needed
|
|
|
|
|
value: str | None = None
|
2024-04-01 17:44:50 -03:00
|
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
|
|
|
class WildcardQuery:
|
|
|
|
|
case_insensitive: bool
|
|
|
|
|
value: str
|
|
|
|
|
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
@dataclass(frozen=True, kw_only=True)
|
2024-04-01 17:44:50 -03:00
|
|
|
class Query:
|
2025-07-01 15:20:55 +02:00
|
|
|
wildcard: dict[str, WildcardQuery] | None = None
|
2024-04-01 17:44:50 -03:00
|
|
|
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
@dataclass(frozen=True, kw_only=True)
|
2024-04-01 17:44:50 -03:00
|
|
|
class Filter:
|
2024-06-18 15:57:14 -05:00
|
|
|
"""Kibana Filter for Base Rule Data."""
|
2025-07-01 15:20:55 +02:00
|
|
|
|
|
|
|
|
# Currently unused in BaseRuleData. Revisit to extend or remove.
|
2024-06-18 15:57:14 -05:00
|
|
|
# https://github.com/elastic/detection-rules/issues/3773
|
2024-04-01 17:44:50 -03:00
|
|
|
meta: FilterMeta
|
2025-07-01 15:20:55 +02:00
|
|
|
state: FilterStateStore | None = field(metadata={"data_key": "$state"})
|
|
|
|
|
query: Query | dict[str, Any] | None = None
|
2024-04-01 17:44:50 -03:00
|
|
|
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
@dataclass(frozen=True, kw_only=True)
|
2022-06-27 10:02:15 -05:00
|
|
|
class BaseRuleData(MarshmallowDataclassMixin, StackCompatMixin):
|
2024-04-01 11:52:46 -05:00
|
|
|
"""Base rule data."""
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
|
class InvestigationFields:
|
2025-07-01 15:20:55 +02:00
|
|
|
field_names: list[definitions.NonEmptyStr]
|
2024-04-01 11:52:46 -05:00
|
|
|
|
2022-07-06 11:49:44 -04:00
|
|
|
@dataclass
|
|
|
|
|
class RequiredFields:
|
|
|
|
|
name: definitions.NonEmptyStr
|
|
|
|
|
type: definitions.NonEmptyStr
|
|
|
|
|
ecs: bool
|
|
|
|
|
|
2022-08-08 13:44:36 -04:00
|
|
|
@dataclass
|
|
|
|
|
class RelatedIntegrations:
|
|
|
|
|
package: definitions.NonEmptyStr
|
|
|
|
|
version: definitions.NonEmptyStr
|
2025-07-01 15:20:55 +02:00
|
|
|
integration: definitions.NonEmptyStr | None = None
|
2022-08-08 13:44:36 -04:00
|
|
|
|
2022-01-25 12:03:27 -09:00
|
|
|
name: definitions.RuleName
|
2025-07-01 15:20:55 +02:00
|
|
|
|
|
|
|
|
author: list[str]
|
|
|
|
|
description: str
|
|
|
|
|
from_: str | None = field(metadata={"data_key": "from"})
|
|
|
|
|
investigation_fields: InvestigationFields | None = field(metadata={"metadata": {"min_compat": "8.11"}})
|
|
|
|
|
related_integrations: list[RelatedIntegrations] | None = field(metadata={"metadata": {"min_compat": "8.3"}})
|
|
|
|
|
required_fields: list[RequiredFields] | None = field(metadata={"metadata": {"min_compat": "8.3"}})
|
|
|
|
|
revision: int | None = field(metadata={"metadata": {"min_compat": "8.8"}})
|
|
|
|
|
setup: definitions.Markdown | None = field(metadata={"metadata": {"min_compat": "8.3"}})
|
|
|
|
|
|
2021-03-24 10:24:32 -06:00
|
|
|
risk_score: definitions.RiskScore
|
|
|
|
|
rule_id: definitions.UUIDString
|
|
|
|
|
severity: definitions.Severity
|
2021-05-13 14:27:32 -06:00
|
|
|
type: definitions.RuleType
|
2025-07-01 15:20:55 +02:00
|
|
|
|
|
|
|
|
actions: list[dict[str, Any]] | None = None
|
|
|
|
|
building_block_type: definitions.BuildingBlockType | None = None
|
|
|
|
|
enabled: bool | None = None
|
|
|
|
|
exceptions_list: list[dict[str, str]] | None = None
|
|
|
|
|
false_positives: list[str] | None = None
|
|
|
|
|
filters: list[dict[str, Any]] | None = None
|
|
|
|
|
interval: definitions.Interval | None = None
|
|
|
|
|
license: str | None = None
|
|
|
|
|
max_signals: definitions.MaxSignals | None = None
|
|
|
|
|
meta: dict[str, Any] | None = None
|
|
|
|
|
note: definitions.Markdown | None = None
|
|
|
|
|
references: list[str] | None = None
|
|
|
|
|
risk_score_mapping: list[RiskScoreMapping] | None = None
|
|
|
|
|
rule_name_override: str | None = None
|
|
|
|
|
severity_mapping: list[SeverityMapping] | None = None
|
|
|
|
|
tags: list[str] | None = None
|
|
|
|
|
threat: list[ThreatMapping] | None = None
|
|
|
|
|
throttle: str | None = None
|
|
|
|
|
timeline_id: definitions.TimelineTemplateId | None = None
|
|
|
|
|
timeline_title: definitions.TimelineTemplateTitle | None = None
|
|
|
|
|
timestamp_override: str | None = None
|
|
|
|
|
to: str | None = None
|
|
|
|
|
version: definitions.PositiveInteger | None = None
|
2021-03-24 10:24:32 -06:00
|
|
|
|
2021-05-13 14:27:32 -06:00
|
|
|
@classmethod
|
2025-07-01 15:20:55 +02:00
|
|
|
def save_schema(cls) -> None:
|
2021-05-13 14:27:32 -06:00
|
|
|
"""Save the schema as a jsonschema."""
|
2025-07-01 15:20:55 +02:00
|
|
|
fields: tuple[dataclasses.Field[Any], ...] = dataclasses.fields(cls)
|
2021-07-21 08:32:54 -08:00
|
|
|
type_field = next(f for f in fields if f.name == "type")
|
2021-05-13 14:27:32 -06:00
|
|
|
rule_type = typing.get_args(type_field.type)[0] if cls != BaseRuleData else "base"
|
|
|
|
|
schema = cls.jsonschema()
|
|
|
|
|
version_dir = SCHEMA_DIR / "master"
|
|
|
|
|
version_dir.mkdir(exist_ok=True, parents=True)
|
|
|
|
|
|
|
|
|
|
# expand out the jsonschema definitions
|
|
|
|
|
with (version_dir / f"master.{rule_type}.json").open("w") as f:
|
|
|
|
|
json.dump(schema, f, indent=2, sort_keys=True)
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def validate_query(self, _: RuleMeta) -> None:
|
2021-04-21 14:55:26 -06:00
|
|
|
pass
|
2021-03-24 10:24:32 -06:00
|
|
|
|
2022-06-27 10:02:15 -05:00
|
|
|
@cached_property
|
2025-07-01 15:20:55 +02:00
|
|
|
def get_restricted_fields(self) -> dict[str, tuple[Version | None, Version | None]] | None:
|
2022-06-27 10:02:15 -05:00
|
|
|
"""Get stack version restricted fields."""
|
2025-07-01 15:20:55 +02:00
|
|
|
fields: list[dataclasses.Field[Any]] = list(dataclasses.fields(self))
|
2022-06-27 10:02:15 -05:00
|
|
|
return get_restricted_fields(fields)
|
|
|
|
|
|
2022-07-18 15:41:32 -04:00
|
|
|
@cached_property
|
2025-07-01 15:20:55 +02:00
|
|
|
def data_validator(self) -> "DataValidator | None":
|
2022-07-18 15:41:32 -04:00
|
|
|
return DataValidator(is_elastic_rule=self.is_elastic_rule, **self.to_dict())
|
|
|
|
|
|
2023-02-02 16:22:44 -05:00
|
|
|
@cached_property
|
|
|
|
|
def notify(self) -> bool:
|
2025-07-01 15:20:55 +02:00
|
|
|
return os.environ.get("DR_NOTIFY_INTEGRATION_UPDATE_AVAILABLE") is not None
|
2023-02-02 16:22:44 -05:00
|
|
|
|
2022-07-18 15:41:32 -04:00
|
|
|
@cached_property
|
2025-07-01 15:20:55 +02:00
|
|
|
def parsed_note(self) -> MarkoDocument | None:
|
2022-07-18 15:41:32 -04:00
|
|
|
dv = self.data_validator
|
|
|
|
|
if dv:
|
|
|
|
|
return dv.parsed_note
|
2025-07-01 15:20:55 +02:00
|
|
|
return None
|
2022-07-18 15:41:32 -04:00
|
|
|
|
|
|
|
|
@property
|
2025-07-01 15:20:55 +02:00
|
|
|
def is_elastic_rule(self) -> bool:
|
|
|
|
|
return "elastic" in [a.lower() for a in self.author]
|
2022-07-18 15:41:32 -04:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def get_build_fields(self) -> dict[str, tuple[Version, None]]:
|
2022-08-25 21:56:16 -06:00
|
|
|
"""Get a list of build-time fields along with the stack versions which they will build within."""
|
|
|
|
|
rule_fields = {f.name: f for f in dataclasses.fields(self)}
|
2025-07-01 15:20:55 +02:00
|
|
|
return {fld: val for fld, val in BUILD_FIELD_VERSIONS.items() if fld in rule_fields}
|
2022-08-25 21:56:16 -06:00
|
|
|
|
2023-03-28 07:17:50 -06:00
|
|
|
@classmethod
|
2025-07-01 15:20:55 +02:00
|
|
|
def process_transforms(cls, transform: RuleTransform, obj: dict[str, Any]) -> dict[str, Any]:
|
2023-03-28 07:17:50 -06:00
|
|
|
"""Process transforms from toml [transform] called in TOMLRuleContents.to_dict."""
|
|
|
|
|
# only create functions that CAREFULLY mutate the obj dict
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
# Format the note field with osquery and investigate plugin strings
|
|
|
|
|
note = obj.get("note")
|
|
|
|
|
if not note:
|
|
|
|
|
return obj
|
2023-03-28 07:17:50 -06:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
rendered = transform.render_investigate_osquery_to_string()
|
|
|
|
|
rendered_patterns: dict[str, Any] = {}
|
|
|
|
|
for plugin, entries in rendered.items():
|
|
|
|
|
rendered_patterns.update(**{f"{plugin}_{i}": e for i, e in enumerate(entries)}) # type: ignore[reportUnknownMemberType]
|
2023-03-28 07:17:50 -06:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
note_template = PatchedTemplate(note)
|
|
|
|
|
rendered_note = note_template.safe_substitute(**rendered_patterns)
|
|
|
|
|
obj["note"] = rendered_note
|
2023-03-28 07:17:50 -06:00
|
|
|
|
|
|
|
|
return obj
|
|
|
|
|
|
2024-08-06 18:07:12 -04:00
|
|
|
@validates_schema
|
2025-07-01 15:20:55 +02:00
|
|
|
def validates_data(self, data: dict[str, Any], **_: Any) -> None:
|
2024-08-06 18:07:12 -04:00
|
|
|
"""Validate fields and data for marshmallow schemas."""
|
|
|
|
|
|
|
|
|
|
# Validate version and revision fields not supplied.
|
2025-07-01 15:20:55 +02:00
|
|
|
disallowed_fields = [field for field in ["version", "revision"] if data.get(field) is not None]
|
2024-08-06 18:07:12 -04:00
|
|
|
if not disallowed_fields:
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
# If version and revision fields are supplied, and using locked versions raise an error.
|
|
|
|
|
if BYPASS_VERSION_LOCK is not True:
|
2025-07-01 15:20:55 +02:00
|
|
|
error_message = " and ".join(disallowed_fields)
|
|
|
|
|
msg = (
|
|
|
|
|
f"Configuration error: Rule {data['name']} - {data['rule_id']} "
|
|
|
|
|
f"should not contain rules with `{error_message}` set."
|
|
|
|
|
)
|
2024-08-06 18:07:12 -04:00
|
|
|
raise ValidationError(msg)
|
|
|
|
|
|
2022-07-18 15:41:32 -04:00
|
|
|
|
|
|
|
|
class DataValidator:
|
|
|
|
|
"""Additional validation beyond base marshmallow schema validation."""
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def __init__( # noqa: PLR0913
|
|
|
|
|
self,
|
|
|
|
|
name: definitions.RuleName,
|
|
|
|
|
is_elastic_rule: bool,
|
|
|
|
|
note: definitions.Markdown | None = None,
|
|
|
|
|
interval: definitions.Interval | None = None,
|
|
|
|
|
building_block_type: definitions.BuildingBlockType | None = None,
|
|
|
|
|
setup: str | None = None,
|
|
|
|
|
**extras: Any,
|
|
|
|
|
) -> None:
|
2022-07-18 15:41:32 -04:00
|
|
|
# only define fields needing additional validation
|
|
|
|
|
self.name = name
|
|
|
|
|
self.is_elastic_rule = is_elastic_rule
|
|
|
|
|
self.note = note
|
2023-06-20 13:00:30 +00:00
|
|
|
# Need to use extras because from is a reserved word in python
|
2025-07-01 15:20:55 +02:00
|
|
|
self.from_ = extras.get("from")
|
2023-06-20 13:00:30 +00:00
|
|
|
self.interval = interval
|
|
|
|
|
self.building_block_type = building_block_type
|
2022-07-18 15:41:32 -04:00
|
|
|
self.setup = setup
|
|
|
|
|
self._setup_in_note = False
|
|
|
|
|
|
|
|
|
|
@cached_property
|
2025-07-01 15:20:55 +02:00
|
|
|
def parsed_note(self) -> MarkoDocument | None:
|
2022-07-18 15:41:32 -04:00
|
|
|
if self.note:
|
|
|
|
|
return gfm.parse(self.note)
|
2025-07-01 15:20:55 +02:00
|
|
|
return None
|
2022-07-18 15:41:32 -04:00
|
|
|
|
|
|
|
|
@property
|
2025-07-01 15:20:55 +02:00
|
|
|
def setup_in_note(self) -> bool:
|
2022-07-18 15:41:32 -04:00
|
|
|
return self._setup_in_note
|
|
|
|
|
|
|
|
|
|
@setup_in_note.setter
|
2025-07-01 15:20:55 +02:00
|
|
|
def setup_in_note(self, value: bool) -> None:
|
2022-07-18 15:41:32 -04:00
|
|
|
self._setup_in_note = value
|
|
|
|
|
|
|
|
|
|
@cached_property
|
|
|
|
|
def skip_validate_note(self) -> bool:
|
2025-07-01 15:20:55 +02:00
|
|
|
return os.environ.get("DR_BYPASS_NOTE_VALIDATION_AND_PARSE") is not None
|
2022-07-18 15:41:32 -04:00
|
|
|
|
2023-06-20 13:00:30 +00:00
|
|
|
@cached_property
|
|
|
|
|
def skip_validate_bbr(self) -> bool:
|
2025-07-01 15:20:55 +02:00
|
|
|
return os.environ.get("DR_BYPASS_BBR_LOOKBACK_VALIDATION") is not None
|
2023-06-20 13:00:30 +00:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def validate_bbr(self, bypass: bool = False) -> None:
|
2023-06-20 13:00:30 +00:00
|
|
|
"""Validate building block type and rule type."""
|
|
|
|
|
|
|
|
|
|
if self.skip_validate_bbr or bypass:
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
def validate_lookback(str_time: str) -> bool:
|
|
|
|
|
"""Validate that the time is at least now-119m and at least 60m respectively."""
|
|
|
|
|
try:
|
|
|
|
|
if "now-" in str_time:
|
|
|
|
|
str_time = str_time[4:]
|
|
|
|
|
time = convert_time_span(str_time)
|
|
|
|
|
# if from time is less than 119m as milliseconds
|
|
|
|
|
if time < 119 * 60 * 1000:
|
|
|
|
|
return False
|
|
|
|
|
else:
|
|
|
|
|
return False
|
|
|
|
|
except Exception as e:
|
2025-07-01 15:20:55 +02:00
|
|
|
raise ValidationError(f"Invalid time format: {e}") from e
|
2023-06-20 13:00:30 +00:00
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
def validate_interval(str_time: str) -> bool:
|
|
|
|
|
"""Validate that the time is at least now-119m and at least 60m respectively."""
|
|
|
|
|
try:
|
|
|
|
|
time = convert_time_span(str_time)
|
|
|
|
|
# if interval time is less than 60m as milliseconds
|
|
|
|
|
if time < 60 * 60 * 1000:
|
|
|
|
|
return False
|
|
|
|
|
except Exception as e:
|
2025-07-01 15:20:55 +02:00
|
|
|
raise ValidationError(f"Invalid time format: {e}") from e
|
2023-06-20 13:00:30 +00:00
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
bypass_instructions = "To bypass, use the environment variable `DR_BYPASS_BBR_LOOKBACK_VALIDATION`"
|
|
|
|
|
if self.building_block_type:
|
|
|
|
|
if not self.from_ or not self.interval:
|
|
|
|
|
raise ValidationError(
|
|
|
|
|
f"{self.name} is invalid."
|
|
|
|
|
"BBR require `from` and `interval` to be defined. "
|
|
|
|
|
"Please set or bypass." + bypass_instructions
|
|
|
|
|
)
|
2025-07-01 15:20:55 +02:00
|
|
|
if not validate_lookback(self.from_) or not validate_interval(self.interval):
|
2023-06-20 13:00:30 +00:00
|
|
|
raise ValidationError(
|
|
|
|
|
f"{self.name} is invalid."
|
|
|
|
|
"Default BBR require `from` and `interval` to be at least now-119m and at least 60m respectively "
|
2025-02-24 20:16:43 +05:30
|
|
|
"(using the now-Xm and Xm format where x is in minutes). "
|
2023-06-20 13:00:30 +00:00
|
|
|
"Please update values or bypass. " + bypass_instructions
|
|
|
|
|
)
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def validate_note(self) -> None:
|
2022-07-18 15:41:32 -04:00
|
|
|
if self.skip_validate_note or not self.note:
|
|
|
|
|
return
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
if not self.parsed_note:
|
|
|
|
|
return
|
|
|
|
|
|
2022-07-18 15:41:32 -04:00
|
|
|
try:
|
|
|
|
|
for child in self.parsed_note.children:
|
|
|
|
|
if child.get_type() == "Heading":
|
|
|
|
|
header = gfm.renderer.render_children(child)
|
|
|
|
|
|
|
|
|
|
if header.lower() == "setup":
|
|
|
|
|
# check that the Setup header is correctly formatted at level 2
|
2025-07-01 15:20:55 +02:00
|
|
|
if child.level != 2: # type: ignore[reportAttributeAccessIssue] # noqa: PLR2004
|
|
|
|
|
raise ValidationError(f"Setup section with wrong header level: {child.level}") # type: ignore[reportAttributeAccessIssue] # noqa: TRY301
|
2022-07-18 15:41:32 -04:00
|
|
|
|
|
|
|
|
# check that the Setup header is capitalized
|
2025-07-01 15:20:55 +02:00
|
|
|
if child.level == 2 and header != "Setup": # type: ignore[reportAttributeAccessIssue] # noqa: PLR2004
|
|
|
|
|
raise ValidationError(f"Setup header has improper casing: {header}") # noqa: TRY301
|
2022-07-18 15:41:32 -04:00
|
|
|
|
|
|
|
|
self.setup_in_note = True
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
# check that the header Config does not exist in the Setup section
|
|
|
|
|
elif child.level == 2 and "config" in header.lower(): # type: ignore[reportAttributeAccessIssue] # noqa: PLR2004
|
|
|
|
|
raise ValidationError(f"Setup header contains Config: {header}") # noqa: TRY301
|
2022-07-18 15:41:32 -04:00
|
|
|
|
|
|
|
|
except Exception as e:
|
2025-07-01 15:20:55 +02:00
|
|
|
raise ValidationError(
|
|
|
|
|
f"Invalid markdown in rule `{self.name}`: {e}. To bypass validation on the `note`"
|
|
|
|
|
f"field, use the environment variable `DR_BYPASS_NOTE_VALIDATION_AND_PARSE`"
|
|
|
|
|
) from e
|
2022-07-18 15:41:32 -04:00
|
|
|
|
|
|
|
|
# raise if setup header is in note and in setup
|
2024-08-06 18:07:12 -04:00
|
|
|
if self.setup_in_note and (self.setup and self.setup != "None"):
|
2022-07-18 15:41:32 -04:00
|
|
|
raise ValidationError("Setup header found in both note and setup fields.")
|
|
|
|
|
|
2021-03-24 10:24:32 -06:00
|
|
|
|
2021-04-21 14:55:26 -06:00
|
|
|
@dataclass
|
|
|
|
|
class QueryValidator:
|
2021-03-24 10:24:32 -06:00
|
|
|
query: str
|
2020-06-29 23:17:38 -06:00
|
|
|
|
|
|
|
|
@property
|
2021-04-21 14:55:26 -06:00
|
|
|
def ast(self) -> Any:
|
2025-07-01 15:20:55 +02:00
|
|
|
raise NotImplementedError
|
2021-04-21 14:55:26 -06:00
|
|
|
|
2022-07-06 11:49:44 -04:00
|
|
|
@property
|
|
|
|
|
def unique_fields(self) -> Any:
|
2025-07-01 15:20:55 +02:00
|
|
|
raise NotImplementedError
|
2022-07-06 11:49:44 -04:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def validate(self, _: "QueryRuleData", __: RuleMeta) -> None:
|
|
|
|
|
raise NotImplementedError
|
2020-06-29 23:17:38 -06:00
|
|
|
|
2022-07-06 11:49:44 -04:00
|
|
|
@cached
|
2025-07-01 15:20:55 +02:00
|
|
|
def get_required_fields(self, index: str) -> list[dict[str, Any]]:
|
2022-07-06 11:49:44 -04:00
|
|
|
"""Retrieves fields needed for the query along with type information from the schema."""
|
2024-05-01 15:00:33 -06:00
|
|
|
|
2023-02-07 14:26:29 -05:00
|
|
|
current_version = Version.parse(load_current_package_version(), optional_minor_and_patch=True)
|
2025-07-01 15:20:55 +02:00
|
|
|
ecs_version = get_stack_schemas()[str(current_version)]["ecs"]
|
|
|
|
|
beats_version = get_stack_schemas()[str(current_version)]["beats"]
|
|
|
|
|
endgame_version = get_stack_schemas()[str(current_version)]["endgame"]
|
2022-07-06 11:49:44 -04:00
|
|
|
ecs_schema = ecs.get_schema(ecs_version)
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
_, beat_schema, schema = self.get_beats_schema(index or [], beats_version, ecs_version)
|
2022-10-19 09:54:47 -04:00
|
|
|
endgame_schema = self.get_endgame_schema(index or [], endgame_version)
|
2022-07-06 11:49:44 -04:00
|
|
|
|
2023-12-11 11:32:38 -06:00
|
|
|
# construct integration schemas
|
|
|
|
|
packages_manifest = load_integrations_manifests()
|
|
|
|
|
integrations_schemas = load_integrations_schemas()
|
2025-10-15 21:17:07 +02:00
|
|
|
datasets: set[str] = set()
|
|
|
|
|
if self.ast:
|
|
|
|
|
datasets, _ = beats.get_datasets_and_modules(self.ast)
|
2025-07-01 15:20:55 +02:00
|
|
|
package_integrations = parse_datasets(list(datasets), packages_manifest)
|
|
|
|
|
int_schema: dict[str, Any] = {}
|
2023-12-11 11:32:38 -06:00
|
|
|
data = {"notify": False}
|
|
|
|
|
|
|
|
|
|
for pk_int in package_integrations:
|
|
|
|
|
package = pk_int["package"]
|
|
|
|
|
integration = pk_int["integration"]
|
2025-07-01 15:20:55 +02:00
|
|
|
schema, _ = get_integration_schema_fields(
|
|
|
|
|
integrations_schemas, package, integration, current_version, packages_manifest, {}, data
|
|
|
|
|
)
|
2023-12-11 11:32:38 -06:00
|
|
|
int_schema.update(schema)
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
required: list[dict[str, Any]] = []
|
|
|
|
|
unique_fields: list[str] = self.unique_fields or []
|
2026-05-01 17:37:31 -05:00
|
|
|
if isinstance(self, ESQLValidator):
|
|
|
|
|
unique_fields = [f for f in unique_fields if not f.startswith(definitions.ESQL_DYNAMIC_FIELD_PREFIXES)]
|
2022-07-06 11:49:44 -04:00
|
|
|
|
|
|
|
|
for fld in unique_fields:
|
2025-07-01 15:20:55 +02:00
|
|
|
field_type = ecs_schema.get(fld, {}).get("type")
|
2022-07-06 11:49:44 -04:00
|
|
|
is_ecs = field_type is not None
|
|
|
|
|
|
2022-10-19 09:54:47 -04:00
|
|
|
if not is_ecs:
|
2023-12-11 11:32:38 -06:00
|
|
|
if int_schema:
|
2025-07-01 15:20:55 +02:00
|
|
|
field_type = int_schema.get(fld)
|
2023-12-11 11:32:38 -06:00
|
|
|
elif beat_schema:
|
2025-07-01 15:20:55 +02:00
|
|
|
field_type = beat_schema.get(fld, {}).get("type")
|
2022-10-19 09:54:47 -04:00
|
|
|
elif endgame_schema:
|
|
|
|
|
field_type = endgame_schema.endgame_schema.get(fld, None)
|
2022-07-06 11:49:44 -04:00
|
|
|
|
2025-10-15 21:17:07 +02:00
|
|
|
if not field_type and isinstance(self, ESQLValidator):
|
|
|
|
|
field_type = self.get_unique_field_type(fld)
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
required.append({"name": fld, "type": field_type or "unknown", "ecs": is_ecs})
|
2022-07-06 11:49:44 -04:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
return sorted(required, key=lambda f: f["name"])
|
2022-07-06 11:49:44 -04:00
|
|
|
|
|
|
|
|
@cached
|
2025-07-01 15:20:55 +02:00
|
|
|
def get_beats_schema(
|
|
|
|
|
self, indices: list[str], beats_version: str, ecs_version: str
|
|
|
|
|
) -> tuple[list[str], dict[str, Any] | None, dict[str, Any]]:
|
2022-07-06 11:49:44 -04:00
|
|
|
"""Get an assembled beats schema."""
|
2025-07-01 15:20:55 +02:00
|
|
|
beat_types = beats.parse_beats_from_index(indices)
|
2022-07-06 11:49:44 -04:00
|
|
|
beat_schema = beats.get_schema_from_kql(self.ast, beat_types, version=beats_version) if beat_types else None
|
2025-07-01 15:20:55 +02:00
|
|
|
schema = ecs.get_kql_schema(version=ecs_version, indexes=indices, beat_schema=beat_schema)
|
2022-07-06 11:49:44 -04:00
|
|
|
return beat_types, beat_schema, schema
|
|
|
|
|
|
2022-10-19 09:54:47 -04:00
|
|
|
@cached
|
2025-07-01 15:20:55 +02:00
|
|
|
def get_endgame_schema(self, indices: list[str], endgame_version: str) -> endgame.EndgameSchema | None:
|
2022-10-19 09:54:47 -04:00
|
|
|
"""Get an assembled flat endgame schema."""
|
2025-09-10 13:11:04 -05:00
|
|
|
# Only include endgame when explicitly requested by TOML via indices
|
|
|
|
|
if not indices or "endgame-*" not in indices:
|
2022-10-19 09:54:47 -04:00
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
endgame_schema = endgame.read_endgame_schema(endgame_version=endgame_version)
|
|
|
|
|
return endgame.EndgameSchema(endgame_schema)
|
|
|
|
|
|
2020-06-29 23:17:38 -06:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
@dataclass(frozen=True, kw_only=True)
|
2021-04-21 14:55:26 -06:00
|
|
|
class QueryRuleData(BaseRuleData):
|
2021-03-24 10:24:32 -06:00
|
|
|
"""Specific fields for query event types."""
|
2020-09-23 17:36:34 -05:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
type: Literal["query"]
|
2021-04-21 14:55:26 -06:00
|
|
|
query: str
|
|
|
|
|
language: definitions.FilterLanguages
|
2025-07-01 15:20:55 +02:00
|
|
|
alert_suppression: AlertSuppressionMapping | None = field(metadata={"metadata": {"min_compat": "8.8"}})
|
|
|
|
|
|
|
|
|
|
index: list[str] | None = None
|
|
|
|
|
data_view_id: str | None = None
|
2020-06-29 23:17:38 -06:00
|
|
|
|
2024-08-06 18:07:12 -04:00
|
|
|
@cached_property
|
|
|
|
|
def index_or_dataview(self) -> list[str]:
|
|
|
|
|
"""Return the index or dataview depending on which is set. If neither returns empty list."""
|
|
|
|
|
if self.index is not None:
|
|
|
|
|
return self.index
|
2025-07-01 15:20:55 +02:00
|
|
|
if self.data_view_id is not None:
|
2024-08-06 18:07:12 -04:00
|
|
|
return [self.data_view_id]
|
2025-07-01 15:20:55 +02:00
|
|
|
return []
|
2024-08-06 18:07:12 -04:00
|
|
|
|
2021-04-21 14:55:26 -06:00
|
|
|
@cached_property
|
2025-07-01 15:20:55 +02:00
|
|
|
def validator(self) -> QueryValidator | None:
|
2021-04-21 14:55:26 -06:00
|
|
|
if self.language == "kuery":
|
|
|
|
|
return KQLValidator(self.query)
|
2025-07-01 15:20:55 +02:00
|
|
|
if self.language == "eql":
|
2021-04-21 14:55:26 -06:00
|
|
|
return EQLValidator(self.query)
|
2025-07-01 15:20:55 +02:00
|
|
|
if self.language == "esql":
|
2023-11-30 09:06:34 -05:00
|
|
|
return ESQLValidator(self.query)
|
2025-07-01 15:20:55 +02:00
|
|
|
return None
|
2020-12-09 07:56:55 +01:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def validate_query(self, meta: RuleMeta) -> None: # type: ignore[reportIncompatibleMethodOverride]
|
2021-04-21 14:55:26 -06:00
|
|
|
validator = self.validator
|
2025-07-01 15:20:55 +02:00
|
|
|
if validator:
|
|
|
|
|
validator.validate(self, meta)
|
2020-09-23 17:36:34 -05:00
|
|
|
|
2021-04-21 14:55:26 -06:00
|
|
|
@cached_property
|
2025-07-01 15:20:55 +02:00
|
|
|
def ast(self) -> Any:
|
2021-04-21 14:55:26 -06:00
|
|
|
validator = self.validator
|
|
|
|
|
if validator is not None:
|
|
|
|
|
return validator.ast
|
2025-07-01 15:20:55 +02:00
|
|
|
return None
|
2020-06-29 23:17:38 -06:00
|
|
|
|
2022-07-06 11:49:44 -04:00
|
|
|
@cached_property
|
2025-07-01 15:20:55 +02:00
|
|
|
def unique_fields(self) -> None:
|
2022-07-06 11:49:44 -04:00
|
|
|
validator = self.validator
|
|
|
|
|
if validator is not None:
|
|
|
|
|
return validator.unique_fields
|
2025-07-01 15:20:55 +02:00
|
|
|
return None
|
2022-07-06 11:49:44 -04:00
|
|
|
|
|
|
|
|
@cached
|
2025-07-01 15:20:55 +02:00
|
|
|
def get_required_fields(self, index: str) -> list[dict[str, Any]] | None:
|
2022-07-06 11:49:44 -04:00
|
|
|
validator = self.validator
|
|
|
|
|
if validator is not None:
|
|
|
|
|
return validator.get_required_fields(index or [])
|
2025-07-01 15:20:55 +02:00
|
|
|
return None
|
2022-07-06 11:49:44 -04:00
|
|
|
|
2024-07-09 15:05:12 -04:00
|
|
|
@validates_schema
|
2025-07-01 15:20:55 +02:00
|
|
|
def validates_index_and_data_view_id(self, data: dict[str, Any], **_: Any) -> None:
|
2024-07-09 15:05:12 -04:00
|
|
|
"""Validate that either index or data_view_id is set, but not both."""
|
2025-07-01 15:20:55 +02:00
|
|
|
if data.get("index") and data.get("data_view_id"):
|
2024-07-09 15:05:12 -04:00
|
|
|
raise ValidationError("Only one of index or data_view_id should be set.")
|
|
|
|
|
|
2020-10-01 00:40:55 -05:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
@dataclass(frozen=True, kw_only=True)
|
2021-03-24 10:24:32 -06:00
|
|
|
class MachineLearningRuleData(BaseRuleData):
|
|
|
|
|
type: Literal["machine_learning"]
|
2020-09-16 08:36:48 -06:00
|
|
|
|
2021-03-24 10:24:32 -06:00
|
|
|
anomaly_threshold: int
|
2025-07-01 15:20:55 +02:00
|
|
|
machine_learning_job_id: str | list[str]
|
|
|
|
|
alert_suppression: AlertSuppressionMapping | None = field(metadata={"metadata": {"min_compat": "8.15"}})
|
2020-09-16 08:36:48 -06:00
|
|
|
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
@dataclass(frozen=True, kw_only=True)
|
2021-04-21 14:55:26 -06:00
|
|
|
class ThresholdQueryRuleData(QueryRuleData):
|
2021-03-24 10:24:32 -06:00
|
|
|
"""Specific fields for query event types."""
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
@dataclass(frozen=True, kw_only=True)
|
2021-03-24 10:24:32 -06:00
|
|
|
class ThresholdMapping(MarshmallowDataclassMixin):
|
|
|
|
|
@dataclass(frozen=True)
|
|
|
|
|
class ThresholdCardinality:
|
|
|
|
|
field: str
|
|
|
|
|
value: definitions.ThresholdValue
|
|
|
|
|
|
2021-07-21 08:32:54 -08:00
|
|
|
field: definitions.CardinalityFields
|
2021-03-24 10:24:32 -06:00
|
|
|
value: definitions.ThresholdValue
|
2025-07-01 15:20:55 +02:00
|
|
|
cardinality: list[ThresholdCardinality] | None = None
|
2021-03-24 10:24:32 -06:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
type: Literal["threshold"] # type: ignore[reportIncompatibleVariableOverride]
|
2021-03-24 10:24:32 -06:00
|
|
|
threshold: ThresholdMapping
|
2025-07-01 15:20:55 +02:00
|
|
|
alert_suppression: ThresholdAlertSuppression | None = field(metadata={"metadata": {"min_compat": "8.12"}}) # type: ignore[reportIncompatibleVariableOverride]
|
2021-03-24 10:24:32 -06:00
|
|
|
|
2025-09-04 09:24:36 -05:00
|
|
|
def validate(self, meta: RuleMeta) -> None:
|
|
|
|
|
"""Validate threshold fields count based on stack version."""
|
|
|
|
|
current_min_stack = load_current_package_version()
|
|
|
|
|
min_stack_raw = meta.min_stack_version or current_min_stack
|
|
|
|
|
min_stack = Version.parse(min_stack_raw, optional_minor_and_patch=True)
|
|
|
|
|
cutoff = Version.parse("9.2.0")
|
|
|
|
|
|
|
|
|
|
default_cap_lt_9_2 = 3
|
|
|
|
|
default_cap_ge_9_2 = 5
|
|
|
|
|
is_ge_9_2 = min_stack >= cutoff
|
|
|
|
|
max_fields_allowed = default_cap_ge_9_2 if is_ge_9_2 else default_cap_lt_9_2
|
|
|
|
|
|
|
|
|
|
fields = self.threshold.field or []
|
|
|
|
|
if len(fields) > max_fields_allowed:
|
|
|
|
|
# Tailored hint based on stack cap in effect
|
|
|
|
|
if is_ge_9_2:
|
|
|
|
|
hint = f" Reduce to {max_fields_allowed} or fewer fields."
|
|
|
|
|
else:
|
|
|
|
|
hint = (
|
|
|
|
|
f" Reduce to {max_fields_allowed} or fewer fields, or set "
|
|
|
|
|
"metadata.min_stack_version to 9.2.0+ "
|
|
|
|
|
f"to allow up to {default_cap_ge_9_2}."
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
raise ValidationError(
|
|
|
|
|
f"threshold field supports at most {max_fields_allowed} field(s) for min_stack_version "
|
|
|
|
|
f"{min_stack_raw or 'unspecified (<9.2 assumed)'}. "
|
|
|
|
|
f"Received {len(fields)} group_by fields." + hint
|
|
|
|
|
)
|
|
|
|
|
|
2021-03-24 10:24:32 -06:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
@dataclass(frozen=True, kw_only=True)
|
2022-12-05 14:07:33 -05:00
|
|
|
class NewTermsRuleData(QueryRuleData):
|
|
|
|
|
"""Specific fields for new terms field rule."""
|
|
|
|
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
|
|
|
class NewTermsMapping(MarshmallowDataclassMixin):
|
|
|
|
|
@dataclass(frozen=True)
|
|
|
|
|
class HistoryWindowStart:
|
|
|
|
|
field: definitions.NonEmptyStr
|
|
|
|
|
value: definitions.NonEmptyStr
|
|
|
|
|
|
|
|
|
|
field: definitions.NonEmptyStr
|
|
|
|
|
value: definitions.NewTermsFields
|
2025-07-01 15:20:55 +02:00
|
|
|
history_window_start: list[HistoryWindowStart]
|
2022-12-05 14:07:33 -05:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
type: Literal["new_terms"] # type: ignore[reportIncompatibleVariableOverride]
|
2022-12-05 14:07:33 -05:00
|
|
|
new_terms: NewTermsMapping
|
2025-07-01 15:20:55 +02:00
|
|
|
alert_suppression: AlertSuppressionMapping | None = field(metadata={"metadata": {"min_compat": "8.14"}})
|
2022-12-05 14:07:33 -05:00
|
|
|
|
2024-07-12 17:17:09 -05:00
|
|
|
@pre_load
|
2025-07-01 15:20:55 +02:00
|
|
|
def preload_data(self, data: dict[str, Any], **_: Any) -> dict[str, Any]:
|
2024-07-12 17:17:09 -05:00
|
|
|
"""Preloads and formats the data to match the required schema."""
|
|
|
|
|
if "new_terms_fields" in data and "history_window_start" in data:
|
|
|
|
|
new_terms_mapping = {
|
|
|
|
|
"field": "new_terms_fields",
|
|
|
|
|
"value": data["new_terms_fields"],
|
2025-07-01 15:20:55 +02:00
|
|
|
"history_window_start": [{"field": "history_window_start", "value": data["history_window_start"]}],
|
2024-07-12 17:17:09 -05:00
|
|
|
}
|
|
|
|
|
data["new_terms"] = new_terms_mapping
|
|
|
|
|
|
|
|
|
|
# cleanup original fields after building into our toml format
|
|
|
|
|
data.pop("new_terms_fields")
|
|
|
|
|
data.pop("history_window_start")
|
|
|
|
|
return data
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def transform(self, obj: dict[str, Any]) -> dict[str, Any]:
|
2022-12-05 14:07:33 -05:00
|
|
|
"""Transforms new terms data to API format for Kibana."""
|
|
|
|
|
obj[obj["new_terms"].get("field")] = obj["new_terms"].get("value")
|
|
|
|
|
obj["history_window_start"] = obj["new_terms"]["history_window_start"][0].get("value")
|
|
|
|
|
del obj["new_terms"]
|
|
|
|
|
return obj
|
|
|
|
|
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
@dataclass(frozen=True, kw_only=True)
|
2021-04-21 14:55:26 -06:00
|
|
|
class EQLRuleData(QueryRuleData):
|
2021-03-24 10:24:32 -06:00
|
|
|
"""EQL rules are a special case of query rules."""
|
2025-07-01 15:20:55 +02:00
|
|
|
|
|
|
|
|
type: Literal["eql"] # type: ignore[reportIncompatibleVariableOverride]
|
2021-04-21 14:55:26 -06:00
|
|
|
language: Literal["eql"]
|
2025-07-01 15:20:55 +02:00
|
|
|
timestamp_field: str | None = field(metadata={"metadata": {"min_compat": "8.0"}})
|
|
|
|
|
event_category_override: str | None = field(metadata={"metadata": {"min_compat": "8.0"}})
|
|
|
|
|
tiebreaker_field: str | None = field(metadata={"metadata": {"min_compat": "8.0"}})
|
|
|
|
|
alert_suppression: AlertSuppressionMapping | None = field(metadata={"metadata": {"min_compat": "8.14"}})
|
2020-07-15 08:05:55 -06:00
|
|
|
|
2021-07-22 13:53:13 -08:00
|
|
|
def convert_relative_delta(self, lookback: str) -> int:
|
|
|
|
|
now = len("now")
|
2025-07-01 15:20:55 +02:00
|
|
|
min_length = now + len("+5m")
|
2021-07-22 13:53:13 -08:00
|
|
|
|
|
|
|
|
if lookback.startswith("now") and len(lookback) >= min_length:
|
2025-07-01 15:20:55 +02:00
|
|
|
lookback = lookback[len("now") :]
|
2021-07-22 13:53:13 -08:00
|
|
|
sign = lookback[0] # + or -
|
|
|
|
|
span = lookback[1:]
|
2023-06-20 13:00:30 +00:00
|
|
|
amount = convert_time_span(span)
|
2021-07-22 13:53:13 -08:00
|
|
|
return amount * (-1 if sign == "-" else 1)
|
2025-07-01 15:20:55 +02:00
|
|
|
return convert_time_span(lookback)
|
2021-07-22 13:53:13 -08:00
|
|
|
|
2023-09-07 09:01:28 -05:00
|
|
|
@cached_property
|
|
|
|
|
def is_sample(self) -> bool:
|
|
|
|
|
"""Checks if the current rule is a sample-based rule."""
|
2025-07-01 15:20:55 +02:00
|
|
|
return eql.utils.get_query_type(self.ast) == "sample" # type: ignore[reportUnknownMemberType]
|
2023-09-07 09:01:28 -05:00
|
|
|
|
2022-04-01 15:27:08 -08:00
|
|
|
@cached_property
|
|
|
|
|
def is_sequence(self) -> bool:
|
|
|
|
|
"""Checks if the current rule is a sequence-based rule."""
|
2025-07-01 15:20:55 +02:00
|
|
|
return eql.utils.get_query_type(self.ast) == "sequence" # type: ignore[reportUnknownMemberType]
|
2022-04-01 15:27:08 -08:00
|
|
|
|
2021-07-22 13:53:13 -08:00
|
|
|
@cached_property
|
2025-07-01 15:20:55 +02:00
|
|
|
def max_span(self) -> int | None:
|
2021-07-22 13:53:13 -08:00
|
|
|
"""Maxspan value for sequence rules if defined."""
|
2025-07-01 15:20:55 +02:00
|
|
|
if not self.ast:
|
|
|
|
|
raise ValueError("No AST found")
|
|
|
|
|
if self.is_sequence and hasattr(self.ast.first, "max_span"):
|
2021-07-22 13:53:13 -08:00
|
|
|
return self.ast.first.max_span.as_milliseconds() if self.ast.first.max_span else None
|
2025-07-01 15:20:55 +02:00
|
|
|
return None
|
2021-07-22 13:53:13 -08:00
|
|
|
|
|
|
|
|
@cached_property
|
2025-07-01 15:20:55 +02:00
|
|
|
def look_back(self) -> int | Literal["unknown"] | None:
|
2021-07-22 13:53:13 -08:00
|
|
|
"""Lookback value of a rule."""
|
|
|
|
|
# https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#date-math
|
|
|
|
|
to = self.convert_relative_delta(self.to) if self.to else 0
|
|
|
|
|
from_ = self.convert_relative_delta(self.from_ or "now-6m")
|
|
|
|
|
|
|
|
|
|
if not (to or from_):
|
2025-07-01 15:20:55 +02:00
|
|
|
return "unknown"
|
|
|
|
|
return to - from_
|
2021-07-22 13:53:13 -08:00
|
|
|
|
|
|
|
|
@cached_property
|
2025-07-01 15:20:55 +02:00
|
|
|
def interval_ratio(self) -> float | None:
|
2021-07-22 13:53:13 -08:00
|
|
|
"""Ratio of interval time window / max_span time window."""
|
|
|
|
|
if self.max_span:
|
2025-07-01 15:20:55 +02:00
|
|
|
interval = convert_time_span(self.interval or "5m")
|
2021-07-22 13:53:13 -08:00
|
|
|
return interval / self.max_span
|
2025-07-01 15:20:55 +02:00
|
|
|
return None
|
2021-07-22 13:53:13 -08:00
|
|
|
|
2020-07-15 08:05:55 -06:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
@dataclass(frozen=True, kw_only=True)
|
2023-12-08 13:46:28 -06:00
|
|
|
class ESQLRuleData(QueryRuleData):
|
|
|
|
|
"""ESQL rules are a special case of query rules."""
|
2025-07-01 15:20:55 +02:00
|
|
|
|
|
|
|
|
type: Literal["esql"] # type: ignore[reportIncompatibleVariableOverride]
|
2023-12-08 13:46:28 -06:00
|
|
|
language: Literal["esql"]
|
|
|
|
|
query: str
|
2025-07-01 15:20:55 +02:00
|
|
|
alert_suppression: AlertSuppressionMapping | None = field(metadata={"metadata": {"min_compat": "8.15"}})
|
2023-12-08 13:46:28 -06:00
|
|
|
|
|
|
|
|
@validates_schema
|
2025-07-01 15:20:55 +02:00
|
|
|
def validates_esql_data(self, data: dict[str, Any], **_: Any) -> None:
|
2023-12-08 13:46:28 -06:00
|
|
|
"""Custom validation for query rule type and subclasses."""
|
2025-07-01 15:20:55 +02:00
|
|
|
if data.get("index"):
|
2025-10-15 21:17:07 +02:00
|
|
|
raise EsqlSemanticError("Index is not a valid field for ES|QL rule type.")
|
2023-12-08 13:46:28 -06:00
|
|
|
|
2024-10-09 15:25:36 -04:00
|
|
|
# Convert the query string to lowercase to handle case insensitivity
|
2025-07-01 15:20:55 +02:00
|
|
|
query_lower = data["query"].lower()
|
2024-10-09 15:25:36 -04:00
|
|
|
|
2025-08-02 02:26:39 +02:00
|
|
|
# Combine both patterns using an OR operator and compile the regex.
|
|
|
|
|
# The first part matches the metadata fields in the from clause by allowing one or
|
|
|
|
|
# multiple indices and any order of the metadata fields
|
|
|
|
|
# The second part matches the stats command with the by clause
|
2024-10-09 15:25:36 -04:00
|
|
|
combined_pattern = re.compile(
|
2025-08-02 02:26:39 +02:00
|
|
|
r"(from\s+(?:\S+\s*,\s*)*\S+\s+metadata\s+"
|
|
|
|
|
r"(?:_id|_version|_index)(?:,\s*(?:_id|_version|_index)){2})"
|
|
|
|
|
r"|(\bstats\b.*?\bby\b)",
|
|
|
|
|
re.DOTALL,
|
2024-10-09 15:25:36 -04:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Ensure that non-aggregate queries have metadata
|
2026-03-24 14:36:45 -04:00
|
|
|
if os.environ.get("DR_BYPASS_ESQL_METADATA_VALIDATION") is None:
|
|
|
|
|
bypass_metadata_hint = (
|
|
|
|
|
" To bypass ES|QL `FROM` metadata validation, set the environment variable "
|
|
|
|
|
"`DR_BYPASS_ESQL_METADATA_VALIDATION`."
|
2024-10-09 15:25:36 -04:00
|
|
|
)
|
2026-03-24 14:36:45 -04:00
|
|
|
if not combined_pattern.search(query_lower):
|
|
|
|
|
raise EsqlSemanticError(
|
|
|
|
|
f"Rule: {data['name']} contains a non-aggregate query without"
|
|
|
|
|
f" metadata fields '_id', '_version', and '_index' ->"
|
|
|
|
|
f" Add 'metadata _id, _version, _index' to the from command or add an aggregate function."
|
|
|
|
|
+ bypass_metadata_hint
|
|
|
|
|
)
|
2024-10-09 15:25:36 -04:00
|
|
|
|
2026-01-14 16:03:24 -05:00
|
|
|
# Enforce KEEP command for ESQL rules and that METADATA fields are present in non-aggregate queries
|
2026-03-24 14:36:45 -04:00
|
|
|
if os.environ.get("DR_BYPASS_ESQL_KEEP_VALIDATION") is None:
|
|
|
|
|
bypass_keep_hint = (
|
|
|
|
|
" To bypass ES|QL `keep` validation, set the environment variable `DR_BYPASS_ESQL_KEEP_VALIDATION`."
|
2024-10-09 21:08:38 -04:00
|
|
|
)
|
2026-03-24 14:36:45 -04:00
|
|
|
# Match | followed by optional whitespace/newlines and then 'keep'
|
|
|
|
|
keep_pattern = re.compile(r"\|\s*keep\b\s+([^\|]+)", re.IGNORECASE | re.DOTALL)
|
|
|
|
|
keep_matches = list(keep_pattern.finditer(query_lower))
|
|
|
|
|
if not keep_matches:
|
|
|
|
|
raise EsqlSemanticError(
|
|
|
|
|
f"Rule: {data['name']} does not contain a 'keep' command -> Add a 'keep' command to the query."
|
|
|
|
|
+ bypass_keep_hint
|
|
|
|
|
)
|
2024-10-09 21:08:38 -04:00
|
|
|
|
2026-03-24 14:36:45 -04:00
|
|
|
# Ensure that keep clause includes metadata fields on non-aggregate queries
|
|
|
|
|
aggregate_pattern = re.compile(
|
|
|
|
|
r"\|\s*stats\b(?:\s+([^\|]+?))?(?:\s+by\s+([^\|]+))?", re.IGNORECASE | re.DOTALL
|
|
|
|
|
)
|
|
|
|
|
if not aggregate_pattern.search(query_lower):
|
|
|
|
|
for keep_match in keep_matches:
|
|
|
|
|
raw_keep = re.sub(r"//.*", "", keep_match.group(1))
|
|
|
|
|
keep_fields = [field.strip() for field in raw_keep.split(",") if field.strip()]
|
|
|
|
|
if "*" not in keep_fields:
|
|
|
|
|
required_metadata = {"_id", "_version", "_index"}
|
|
|
|
|
if not required_metadata.issubset(set(map(str.strip, keep_fields))):
|
|
|
|
|
raise EsqlSemanticError(
|
|
|
|
|
f"Rule: {data['name']} contains a keep clause without"
|
|
|
|
|
f" metadata fields '_id', '_version', and '_index' ->"
|
|
|
|
|
f" Add '_id', '_version', '_index' to the keep command." + bypass_keep_hint
|
|
|
|
|
)
|
2026-01-14 16:03:24 -05:00
|
|
|
|
2023-12-08 13:46:28 -06:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
@dataclass(frozen=True, kw_only=True)
|
2021-04-22 12:03:57 -05:00
|
|
|
class ThreatMatchRuleData(QueryRuleData):
|
|
|
|
|
"""Specific fields for indicator (threat) match rule."""
|
|
|
|
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
|
|
|
class Entries:
|
|
|
|
|
@dataclass(frozen=True)
|
2025-09-09 10:58:53 -05:00
|
|
|
class ThreatMapEntry(StackCompatMixin):
|
2021-04-22 12:03:57 -05:00
|
|
|
field: definitions.NonEmptyStr
|
|
|
|
|
type: Literal["mapping"]
|
|
|
|
|
value: definitions.NonEmptyStr
|
2025-09-09 10:58:53 -05:00
|
|
|
# Use dataclasses.field to avoid shadowing by attribute name "field"
|
|
|
|
|
negate: bool | None = dataclasses.field( # type: ignore[reportIncompatibleVariableOverride]
|
|
|
|
|
metadata={"metadata": {"min_compat": "9.2"}}
|
|
|
|
|
)
|
2021-04-22 12:03:57 -05:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
entries: list[ThreatMapEntry]
|
2021-04-22 12:03:57 -05:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
type: Literal["threat_match"] # type: ignore[reportIncompatibleVariableOverride]
|
2021-04-22 12:03:57 -05:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
concurrent_searches: definitions.PositiveInteger | None = None
|
|
|
|
|
items_per_search: definitions.PositiveInteger | None = None
|
2021-04-22 12:03:57 -05:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
threat_mapping: list[Entries]
|
|
|
|
|
threat_filters: list[dict[str, Any]] | None = None
|
|
|
|
|
threat_query: str | None = None
|
|
|
|
|
threat_language: definitions.FilterLanguages | None = None
|
|
|
|
|
threat_index: list[str]
|
|
|
|
|
threat_indicator_path: str | None = None
|
|
|
|
|
alert_suppression: AlertSuppressionMapping | None = field(metadata={"metadata": {"min_compat": "8.13"}})
|
2021-04-22 12:03:57 -05:00
|
|
|
|
|
|
|
|
def validate_query(self, meta: RuleMeta) -> None:
|
2025-07-01 15:20:55 +02:00
|
|
|
super().validate_query(meta)
|
2021-04-22 12:03:57 -05:00
|
|
|
|
|
|
|
|
if self.threat_query:
|
|
|
|
|
if not self.threat_language:
|
2025-07-01 15:20:55 +02:00
|
|
|
raise ValidationError("`threat_language` required when a `threat_query` is defined")
|
2021-04-22 12:03:57 -05:00
|
|
|
|
|
|
|
|
if self.threat_language == "kuery":
|
|
|
|
|
threat_query_validator = KQLValidator(self.threat_query)
|
|
|
|
|
elif self.threat_language == "eql":
|
|
|
|
|
threat_query_validator = EQLValidator(self.threat_query)
|
|
|
|
|
else:
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
threat_query_validator.validate(self, meta)
|
|
|
|
|
|
2025-09-09 10:58:53 -05:00
|
|
|
def validate(self, meta: RuleMeta) -> None: # noqa: ARG002
|
|
|
|
|
"""Validate negate usage and group semantics for threat mapping."""
|
|
|
|
|
|
|
|
|
|
for idx, group in enumerate(self.threat_mapping or []):
|
|
|
|
|
entries = group.entries or []
|
|
|
|
|
|
|
|
|
|
# Enforce: DOES NOT MATCH entries are allowed only if there is at least
|
|
|
|
|
# one MATCH (non-negated) entry in the same group
|
|
|
|
|
has_negate = any(bool(getattr(e, "negate", False)) for e in entries)
|
|
|
|
|
has_match = any(not bool(getattr(e, "negate", False)) for e in entries)
|
|
|
|
|
if has_negate and not has_match:
|
|
|
|
|
msg = (
|
|
|
|
|
f"threat_mapping group {idx}: DOES NOT MATCH entries require at least one MATCH "
|
|
|
|
|
"(non-negated) entry in the same group."
|
|
|
|
|
)
|
|
|
|
|
raise ValidationError(msg)
|
|
|
|
|
|
|
|
|
|
# Track negate presence per (source.field, indicator.field) pair to detect
|
|
|
|
|
# conflicts where both MATCH and DOES NOT MATCH are defined for the same pair
|
|
|
|
|
pair_to_negates: dict[tuple[str, str], set[bool]] = {}
|
|
|
|
|
for e in entries:
|
|
|
|
|
is_neg = bool(getattr(e, "negate", False))
|
|
|
|
|
pair_to_negates.setdefault((e.field, e.value), set()).add(is_neg)
|
|
|
|
|
|
|
|
|
|
for (src_field, ind_field), flags in pair_to_negates.items():
|
|
|
|
|
if True in flags and False in flags:
|
|
|
|
|
msg = (
|
|
|
|
|
f"threat_mapping group {idx}: cannot define both MATCH and DOES NOT MATCH for the same "
|
|
|
|
|
f"source and indicator fields: '{src_field}' <-> '{ind_field}'."
|
|
|
|
|
)
|
|
|
|
|
raise ValidationError(msg)
|
|
|
|
|
|
2021-04-22 12:03:57 -05:00
|
|
|
|
2021-03-24 10:24:32 -06:00
|
|
|
# All of the possible rule types
|
2021-08-31 21:06:14 -08:00
|
|
|
# Sort inverse of any inheritance - see comment in TOMLRuleContents.to_dict
|
2025-10-09 16:21:21 -04:00
|
|
|
# ThresholdQueryRuleData needs to be first in this union to handle cases where there is ambiguity between
|
|
|
|
|
# ThresholdAlertSuppression and AlertSuppressionMapping. Since AlertSuppressionMapping has duration as an
|
|
|
|
|
# optional field, ThresholdAlertSuppression objects can be mistakenly loaded as an AlertSuppressionMapping
|
|
|
|
|
# object with group_by and missing_fields_strategy as missing parameters, resulting in an error.
|
|
|
|
|
# Checking the type against ThresholdQueryRuleData first in the union prevent this from occurring.
|
|
|
|
|
# Please also keep issue 1141 in mind when handling union schemas.
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
AnyRuleData = (
|
2025-10-09 16:21:21 -04:00
|
|
|
ThresholdQueryRuleData
|
|
|
|
|
| EQLRuleData
|
2025-07-01 15:20:55 +02:00
|
|
|
| ESQLRuleData
|
|
|
|
|
| ThreatMatchRuleData
|
|
|
|
|
| MachineLearningRuleData
|
|
|
|
|
| QueryRuleData
|
|
|
|
|
| NewTermsRuleData
|
|
|
|
|
)
|
2020-07-15 08:05:55 -06:00
|
|
|
|
2020-06-29 23:17:38 -06:00
|
|
|
|
2021-09-01 15:29:53 -08:00
|
|
|
class BaseRuleContents(ABC):
|
|
|
|
|
"""Base contents object for shared methods between active and deprecated rules."""
|
2021-05-13 14:27:32 -06:00
|
|
|
|
2021-03-24 10:24:32 -06:00
|
|
|
@property
|
2021-09-01 15:29:53 -08:00
|
|
|
@abstractmethod
|
2025-07-01 15:20:55 +02:00
|
|
|
def id(self) -> str:
|
2021-09-01 15:29:53 -08:00
|
|
|
pass
|
2020-06-29 23:17:38 -06:00
|
|
|
|
2021-03-24 10:24:32 -06:00
|
|
|
@property
|
2021-09-01 15:29:53 -08:00
|
|
|
@abstractmethod
|
2025-07-01 15:20:55 +02:00
|
|
|
def name(self) -> str:
|
2021-09-01 15:29:53 -08:00
|
|
|
pass
|
2020-06-29 23:17:38 -06:00
|
|
|
|
2021-11-15 08:46:12 -09:00
|
|
|
@property
|
|
|
|
|
@abstractmethod
|
2025-07-01 15:20:55 +02:00
|
|
|
def version_lock(self) -> "VersionLock":
|
2021-11-15 08:46:12 -09:00
|
|
|
pass
|
|
|
|
|
|
2022-03-24 11:56:27 -08:00
|
|
|
@property
|
|
|
|
|
@abstractmethod
|
2025-07-01 15:20:55 +02:00
|
|
|
def type(self) -> str:
|
2022-03-24 11:56:27 -08:00
|
|
|
pass
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def lock_info(self, bump: bool = True) -> dict[str, Any]:
|
2024-08-06 18:07:12 -04:00
|
|
|
version = self.autobumped_version if bump else (self.saved_version or 1)
|
2025-07-01 15:20:55 +02:00
|
|
|
return {"rule_name": self.name, "sha256": self.get_hash(), "version": version, "type": self.type}
|
2021-03-25 14:48:31 -06:00
|
|
|
|
2021-03-24 10:24:32 -06:00
|
|
|
@property
|
2025-04-28 18:29:55 +02:00
|
|
|
def is_dirty(self) -> bool:
|
2021-03-24 10:24:32 -06:00
|
|
|
"""Determine if the rule has changed since its version was locked."""
|
2023-02-07 15:40:51 -05:00
|
|
|
min_stack = Version.parse(self.get_supported_version(), optional_minor_and_patch=True)
|
2023-02-10 13:18:53 -05:00
|
|
|
existing_sha256 = self.version_lock.get_locked_hash(self.id, f"{min_stack.major}.{min_stack.minor}")
|
2020-09-02 09:19:17 -08:00
|
|
|
|
2025-04-28 18:29:55 +02:00
|
|
|
if not existing_sha256:
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
rule_hash = self.get_hash()
|
|
|
|
|
rule_hash_with_integrations = self.get_hash(include_integrations=True)
|
|
|
|
|
|
|
|
|
|
# Checking against current and previous version of the hash to avoid mass version bump
|
2025-07-01 15:20:55 +02:00
|
|
|
return existing_sha256 not in (rule_hash, rule_hash_with_integrations)
|
2021-01-11 08:58:18 -09:00
|
|
|
|
2022-09-19 09:53:30 -06:00
|
|
|
@property
|
2025-07-01 15:20:55 +02:00
|
|
|
def lock_entry(self) -> dict[str, Any] | None:
|
2022-09-19 09:53:30 -06:00
|
|
|
lock_entry = self.version_lock.version_lock.data.get(self.id)
|
|
|
|
|
if lock_entry:
|
|
|
|
|
return lock_entry.to_dict()
|
2025-07-01 15:20:55 +02:00
|
|
|
return None
|
2022-09-19 09:53:30 -06:00
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
def has_forked(self) -> bool:
|
|
|
|
|
"""Determine if the rule has forked at any point (has a previous entry)."""
|
|
|
|
|
lock_entry = self.lock_entry
|
|
|
|
|
if lock_entry:
|
2025-07-01 15:20:55 +02:00
|
|
|
return "previous" in lock_entry
|
2022-09-19 09:53:30 -06:00
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
def is_in_forked_version(self) -> bool:
|
|
|
|
|
"""Determine if the rule is in a forked version."""
|
|
|
|
|
if not self.has_forked:
|
|
|
|
|
return False
|
2025-07-01 15:20:55 +02:00
|
|
|
if not self.lock_entry:
|
|
|
|
|
raise ValueError("No lock entry found")
|
|
|
|
|
locked_min_stack = Version.parse(self.lock_entry["min_stack_version"], optional_minor_and_patch=True)
|
2023-02-07 14:26:29 -05:00
|
|
|
current_package_ver = Version.parse(load_current_package_version(), optional_minor_and_patch=True)
|
2022-09-19 09:53:30 -06:00
|
|
|
return current_package_ver < locked_min_stack
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def get_version_space(self) -> int | None:
|
2022-09-19 09:53:30 -06:00
|
|
|
"""Retrieve the number of version spaces available (None for unbound)."""
|
|
|
|
|
if self.is_in_forked_version:
|
2025-07-01 15:20:55 +02:00
|
|
|
if not self.lock_entry:
|
|
|
|
|
raise ValueError("No lock entry found")
|
|
|
|
|
|
|
|
|
|
current_entry = self.lock_entry["previous"][self.metadata.min_stack_version] # type: ignore[reportAttributeAccessIssue]
|
|
|
|
|
current_version = current_entry["version"]
|
|
|
|
|
max_allowable_version = current_entry["max_allowable_version"]
|
2022-09-19 09:53:30 -06:00
|
|
|
|
|
|
|
|
return max_allowable_version - current_version - 1
|
2025-07-01 15:20:55 +02:00
|
|
|
return None
|
2022-09-19 09:53:30 -06:00
|
|
|
|
2021-03-24 10:24:32 -06:00
|
|
|
@property
|
2025-07-01 15:20:55 +02:00
|
|
|
def saved_version(self) -> int | None:
|
2024-08-06 18:07:12 -04:00
|
|
|
"""Retrieve the version from the version.lock or from the file if version locking is bypassed."""
|
2025-07-01 15:20:55 +02:00
|
|
|
|
|
|
|
|
toml_version = self.data.get("version") # type: ignore[reportAttributeAccessIssue]
|
2024-08-06 18:07:12 -04:00
|
|
|
|
|
|
|
|
if BYPASS_VERSION_LOCK:
|
2025-07-01 15:20:55 +02:00
|
|
|
return toml_version # type: ignore[reportUnknownVariableType]
|
2024-08-06 18:07:12 -04:00
|
|
|
|
|
|
|
|
if toml_version:
|
2025-07-01 15:20:55 +02:00
|
|
|
print(
|
|
|
|
|
f"WARNING: Rule {self.name} - {self.id} has a version set in the rule TOML."
|
|
|
|
|
" This `version` will be ignored and defaulted to the version.lock.json file."
|
|
|
|
|
" Set `bypass_version_lock` to `True` in the rules config to use the TOML version."
|
|
|
|
|
)
|
2024-08-06 18:07:12 -04:00
|
|
|
|
|
|
|
|
return self.version_lock.get_locked_version(self.id, self.get_supported_version())
|
2021-03-24 10:24:32 -06:00
|
|
|
|
|
|
|
|
@property
|
2025-07-01 15:20:55 +02:00
|
|
|
def autobumped_version(self) -> int | None:
|
2021-03-24 10:24:32 -06:00
|
|
|
"""Retrieve the current version of the rule, accounting for automatic increments."""
|
2024-08-06 18:07:12 -04:00
|
|
|
version = self.saved_version
|
|
|
|
|
|
|
|
|
|
if BYPASS_VERSION_LOCK:
|
|
|
|
|
raise NotImplementedError("This method is not implemented when version locking is not in use.")
|
|
|
|
|
|
|
|
|
|
# Default to version 1 if no version is set yet
|
2021-03-24 10:24:32 -06:00
|
|
|
if version is None:
|
2021-01-11 08:58:18 -09:00
|
|
|
return 1
|
|
|
|
|
|
2024-08-06 18:07:12 -04:00
|
|
|
# Auto-increment version if the rule is 'dirty' and not bypassing version lock
|
2021-03-24 10:24:32 -06:00
|
|
|
return version + 1 if self.is_dirty else version
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def get_synthetic_version(self, use_default: bool) -> int | None:
|
2024-08-06 18:07:12 -04:00
|
|
|
"""
|
|
|
|
|
Get the latest actual representation of a rule's version, where changes are accounted for automatically when
|
|
|
|
|
version locking is used, otherwise, return the version defined in the rule toml if present else optionally
|
|
|
|
|
default to 1.
|
|
|
|
|
"""
|
|
|
|
|
return self.autobumped_version or self.saved_version or (1 if use_default else None)
|
|
|
|
|
|
2022-09-19 09:53:30 -06:00
|
|
|
@classmethod
|
2025-07-01 15:20:55 +02:00
|
|
|
def convert_supported_version(cls, stack_version: str | None) -> Version:
|
2022-09-19 09:53:30 -06:00
|
|
|
"""Convert an optional stack version to the minimum for the lock in the form major.minor."""
|
2023-02-07 14:26:29 -05:00
|
|
|
min_version = get_min_supported_stack_version()
|
2022-09-19 09:53:30 -06:00
|
|
|
if stack_version is None:
|
|
|
|
|
return min_version
|
2023-02-07 14:26:29 -05:00
|
|
|
return max(Version.parse(stack_version, optional_minor_and_patch=True), min_version)
|
2022-09-19 09:53:30 -06:00
|
|
|
|
|
|
|
|
def get_supported_version(self) -> str:
|
|
|
|
|
"""Get the lowest stack version for the rule that is currently supported in the form major.minor."""
|
2025-07-01 15:20:55 +02:00
|
|
|
rule_min_stack = self.metadata.get("min_stack_version") # type: ignore[reportAttributeAccessIssue]
|
|
|
|
|
min_stack = self.convert_supported_version(rule_min_stack) # type: ignore[reportUnknownArgumentType]
|
2023-02-07 15:40:51 -05:00
|
|
|
return f"{min_stack.major}.{min_stack.minor}"
|
2022-09-19 09:53:30 -06:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def _post_dict_conversion(self, obj: dict[str, Any]) -> dict[str, Any]:
|
2021-09-01 15:29:53 -08:00
|
|
|
"""Transform the converted API in place before sending to Kibana."""
|
|
|
|
|
|
|
|
|
|
# cleanup the whitespace in the rule
|
|
|
|
|
obj = nested_normalize(obj)
|
|
|
|
|
|
|
|
|
|
# fill in threat.technique so it's never missing
|
|
|
|
|
for threat_entry in obj.get("threat", []):
|
|
|
|
|
threat_entry.setdefault("technique", [])
|
|
|
|
|
|
|
|
|
|
return obj
|
|
|
|
|
|
2026-01-27 23:01:27 -05:00
|
|
|
def _uses_keep_star(self, hashable_dict: dict[str, Any]) -> bool:
|
2026-03-09 19:33:27 -04:00
|
|
|
"""Check if this is an ES|QL rule that uses `| keep *` or fields ending with '*'."""
|
2026-01-27 23:01:27 -05:00
|
|
|
if hashable_dict.get("language") != "esql":
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
query: str | None = hashable_dict.get("query")
|
|
|
|
|
if not isinstance(query, str) or not query:
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
keep_pattern = re.compile(r"\|\s*keep\b\s+([^\|]+)", re.IGNORECASE | re.DOTALL)
|
|
|
|
|
keep_match: re.Match[str] | None = keep_pattern.search(query)
|
|
|
|
|
if keep_match:
|
|
|
|
|
keep_fields: list[str] = [field.strip() for field in keep_match.group(1).split(",")]
|
2026-03-09 19:33:27 -04:00
|
|
|
return any(field == "*" or field.endswith("*") for field in keep_fields)
|
2026-01-27 23:01:27 -05:00
|
|
|
return False
|
|
|
|
|
|
2021-09-01 15:29:53 -08:00
|
|
|
@abstractmethod
|
2025-07-01 15:20:55 +02:00
|
|
|
def to_api_format(self, include_version: bool = True) -> dict[str, Any]:
|
2021-09-01 15:29:53 -08:00
|
|
|
"""Convert the rule to the API format."""
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def get_hashable_content(self, include_version: bool = False, include_integrations: bool = False) -> dict[str, Any]:
|
2025-04-28 18:29:55 +02:00
|
|
|
"""Returns the rule content to be used for calculating the hash value for the rule"""
|
|
|
|
|
|
|
|
|
|
# get the API dict without the version by default, otherwise it'll always be dirty.
|
|
|
|
|
hashable_dict = self.to_api_format(include_version=include_version)
|
|
|
|
|
|
|
|
|
|
# drop related integrations if present
|
|
|
|
|
if not include_integrations:
|
|
|
|
|
hashable_dict.pop("related_integrations", None)
|
|
|
|
|
|
2026-01-27 23:01:27 -05:00
|
|
|
# For ES|QL rules with `| keep *`, exclude required_fields since they're
|
|
|
|
|
# non-deterministic (depend on integration schemas which vary by stack version)
|
|
|
|
|
if self._uses_keep_star(hashable_dict):
|
|
|
|
|
hashable_dict.pop("required_fields", None)
|
|
|
|
|
|
2025-04-28 18:29:55 +02:00
|
|
|
return hashable_dict
|
|
|
|
|
|
2021-09-01 15:29:53 -08:00
|
|
|
@cached
|
2025-04-28 18:29:55 +02:00
|
|
|
def get_hash(self, include_version: bool = False, include_integrations: bool = False) -> str:
|
|
|
|
|
"""Returns a sha256 hash of the rule contents"""
|
|
|
|
|
hashable_contents = self.get_hashable_content(
|
|
|
|
|
include_version=include_version,
|
|
|
|
|
include_integrations=include_integrations,
|
|
|
|
|
)
|
2021-09-01 15:29:53 -08:00
|
|
|
return utils.dict_hash(hashable_contents)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
|
|
|
class TOMLRuleContents(BaseRuleContents, MarshmallowDataclassMixin):
|
|
|
|
|
"""Rule object which maps directly to the TOML layout."""
|
2025-07-01 15:20:55 +02:00
|
|
|
|
2021-09-01 15:29:53 -08:00
|
|
|
metadata: RuleMeta
|
2025-07-01 15:20:55 +02:00
|
|
|
data: AnyRuleData = field(metadata={"data_key": "rule"})
|
|
|
|
|
transform: RuleTransform | None = None
|
2021-11-15 08:46:12 -09:00
|
|
|
|
|
|
|
|
@cached_property
|
2025-07-01 15:20:55 +02:00
|
|
|
def version_lock(self) -> VersionLock: # type: ignore[reportIncompatibleMethodOverride]
|
2024-08-06 18:07:12 -04:00
|
|
|
if RULES_CONFIG.bypass_version_lock is True:
|
2025-07-01 15:20:55 +02:00
|
|
|
err_msg = (
|
|
|
|
|
"Cannot access the version lock when the versioning strategy is configured to bypass the"
|
|
|
|
|
" version lock. Set `bypass_version_lock` to `false` in the rules config to use the version lock."
|
|
|
|
|
)
|
2024-08-06 18:07:12 -04:00
|
|
|
raise ValueError(err_msg)
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
return getattr(self, "_version_lock", None) or loaded_version_lock
|
2024-08-06 18:07:12 -04:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def set_version_lock(self, value: VersionLock) -> None:
|
|
|
|
|
if RULES_CONFIG.bypass_version_lock:
|
|
|
|
|
raise ValueError(
|
|
|
|
|
"Cannot set the version lock when the versioning strategy is configured to bypass the version lock."
|
|
|
|
|
" Set `bypass_version_lock` to `false` in the rules config to use the version lock."
|
|
|
|
|
)
|
2021-11-15 08:46:12 -09:00
|
|
|
|
|
|
|
|
# circumvent frozen class
|
2026-01-12 21:07:07 +05:30
|
|
|
self.__dict__["_version_lock"] = value # type: ignore[reportIndexIssue]
|
2021-09-01 15:29:53 -08:00
|
|
|
|
|
|
|
|
@classmethod
|
2025-07-01 15:20:55 +02:00
|
|
|
def all_rule_types(cls) -> set[str]:
|
|
|
|
|
types: set[str] = set()
|
2021-09-01 15:29:53 -08:00
|
|
|
for subclass in typing.get_args(AnyRuleData):
|
|
|
|
|
field = next(field for field in dataclasses.fields(subclass) if field.name == "type")
|
|
|
|
|
types.update(typing.get_args(field.type))
|
|
|
|
|
|
|
|
|
|
return types
|
|
|
|
|
|
|
|
|
|
@classmethod
|
2025-07-01 15:20:55 +02:00
|
|
|
def get_data_subclass(cls, rule_type: str) -> type[BaseRuleData]:
|
2021-09-01 15:29:53 -08:00
|
|
|
"""Get the proper subclass depending on the rule type"""
|
|
|
|
|
for subclass in typing.get_args(AnyRuleData):
|
|
|
|
|
field = next(field for field in dataclasses.fields(subclass) if field.name == "type")
|
2025-07-01 15:20:55 +02:00
|
|
|
if (rule_type,) == typing.get_args(field.type):
|
2021-09-01 15:29:53 -08:00
|
|
|
return subclass
|
|
|
|
|
|
|
|
|
|
raise ValueError(f"Unknown rule type {rule_type}")
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
def id(self) -> definitions.UUIDString:
|
|
|
|
|
return self.data.rule_id
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
def name(self) -> str:
|
|
|
|
|
return self.data.name
|
|
|
|
|
|
2022-03-24 11:56:27 -08:00
|
|
|
@property
|
|
|
|
|
def type(self) -> str:
|
|
|
|
|
return self.data.type
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def _add_known_nulls(self, rule_dict: dict[str, Any]) -> dict[str, Any]:
|
2024-08-06 18:07:12 -04:00
|
|
|
"""Add known nulls to the rule."""
|
|
|
|
|
# Note this is primarily as a stopgap until add support for Rule Actions
|
|
|
|
|
for pair in definitions.KNOWN_NULL_ENTRIES:
|
|
|
|
|
for compound_key, sub_key in pair.items():
|
|
|
|
|
value = get_nested_value(rule_dict, compound_key)
|
|
|
|
|
if isinstance(value, list):
|
2025-07-01 15:20:55 +02:00
|
|
|
items_to_update: list[dict[str, Any]] = [
|
|
|
|
|
item
|
|
|
|
|
for item in value # type: ignore[reportUnknownVariableType]
|
2026-02-11 15:18:20 -05:00
|
|
|
if isinstance(item, dict)
|
|
|
|
|
and get_nested_value(item, sub_key) is None
|
|
|
|
|
and get_nested_value(item, "action_type_id") not in definitions.SYSTEM_ACTION_TYPE_IDS
|
2024-08-06 18:07:12 -04:00
|
|
|
]
|
|
|
|
|
for item in items_to_update:
|
|
|
|
|
set_nested_value(item, sub_key, None)
|
|
|
|
|
return rule_dict
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def _post_dict_conversion(self, obj: dict[str, Any]) -> dict[str, Any]:
|
2022-07-06 11:49:44 -04:00
|
|
|
"""Transform the converted API in place before sending to Kibana."""
|
2025-07-01 15:20:55 +02:00
|
|
|
_ = super()._post_dict_conversion(obj)
|
2022-07-06 11:49:44 -04:00
|
|
|
|
2022-12-05 14:07:33 -05:00
|
|
|
# build time fields
|
2023-03-28 07:17:50 -06:00
|
|
|
self._convert_add_related_integrations(obj)
|
|
|
|
|
self._convert_add_required_fields(obj)
|
|
|
|
|
self._convert_add_setup(obj)
|
2022-07-06 11:49:44 -04:00
|
|
|
|
|
|
|
|
# validate new fields against the schema
|
2025-07-01 15:20:55 +02:00
|
|
|
rule_type = obj["type"]
|
2022-07-06 11:49:44 -04:00
|
|
|
subclass = self.get_data_subclass(rule_type)
|
|
|
|
|
subclass.from_dict(obj)
|
2022-12-05 14:07:33 -05:00
|
|
|
|
|
|
|
|
# rule type transforms
|
2025-07-01 15:20:55 +02:00
|
|
|
self.data.transform(obj) if hasattr(self.data, "transform") else False # type: ignore[reportAttributeAccessIssue]
|
2022-12-05 14:07:33 -05:00
|
|
|
|
2022-07-06 11:49:44 -04:00
|
|
|
return obj
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def _convert_add_related_integrations(self, obj: dict[str, Any]) -> None:
|
2022-07-06 11:49:44 -04:00
|
|
|
"""Add restricted field related_integrations to the obj."""
|
2022-08-08 13:44:36 -04:00
|
|
|
field_name = "related_integrations"
|
|
|
|
|
package_integrations = obj.get(field_name, [])
|
|
|
|
|
|
|
|
|
|
if not package_integrations and self.metadata.integration:
|
|
|
|
|
packages_manifest = load_integrations_manifests()
|
|
|
|
|
current_stack_version = load_current_package_version()
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
if self.check_restricted_field_version(field_name) and isinstance(
|
|
|
|
|
self.data, QueryRuleData | MachineLearningRuleData
|
|
|
|
|
): # type: ignore[reportUnnecessaryIsInstance]
|
|
|
|
|
if (self.data.get("language") is not None and self.data.get("language") != "lucene") or self.data.get(
|
|
|
|
|
"type"
|
|
|
|
|
) == "machine_learning":
|
|
|
|
|
package_integrations = self.get_packaged_integrations(
|
|
|
|
|
self.data, # type: ignore[reportArgumentType]
|
|
|
|
|
self.metadata,
|
|
|
|
|
packages_manifest,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
if not package_integrations:
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
for package in package_integrations:
|
|
|
|
|
package["version"] = find_least_compatible_version(
|
|
|
|
|
package=package["package"],
|
|
|
|
|
integration=package["integration"],
|
|
|
|
|
current_stack_version=current_stack_version,
|
|
|
|
|
packages_manifest=packages_manifest,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# if integration is not a policy template remove
|
|
|
|
|
if package["version"]:
|
|
|
|
|
version_data = packages_manifest.get(package["package"], {}).get(
|
|
|
|
|
package["version"].strip("^"), {}
|
|
|
|
|
)
|
|
|
|
|
policy_templates = version_data.get("policy_templates", [])
|
|
|
|
|
|
|
|
|
|
if package["integration"] not in policy_templates:
|
|
|
|
|
del package["integration"]
|
|
|
|
|
|
|
|
|
|
# remove duplicate entries
|
|
|
|
|
package_integrations = list({json.dumps(d, sort_keys=True): d for d in package_integrations}.values())
|
|
|
|
|
obj.setdefault("related_integrations", package_integrations)
|
|
|
|
|
|
|
|
|
|
def _convert_add_required_fields(self, obj: dict[str, Any]) -> None:
|
2022-07-06 11:49:44 -04:00
|
|
|
"""Add restricted field required_fields to the obj, derived from the query AST."""
|
2025-07-01 15:20:55 +02:00
|
|
|
if isinstance(self.data, QueryRuleData) and self.data.language != "lucene":
|
|
|
|
|
index: list[str] = obj.get("index") or []
|
2022-07-06 11:49:44 -04:00
|
|
|
required_fields = self.data.get_required_fields(index)
|
|
|
|
|
else:
|
|
|
|
|
required_fields = []
|
|
|
|
|
|
|
|
|
|
field_name = "required_fields"
|
2022-08-08 13:44:36 -04:00
|
|
|
if required_fields and self.check_restricted_field_version(field_name=field_name):
|
2022-07-06 11:49:44 -04:00
|
|
|
obj.setdefault(field_name, required_fields)
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def _convert_add_setup(self, obj: dict[str, Any]) -> None:
|
2022-07-06 11:49:44 -04:00
|
|
|
"""Add restricted field setup to the obj."""
|
2022-07-18 15:41:32 -04:00
|
|
|
rule_note = obj.get("note", "")
|
|
|
|
|
field_name = "setup"
|
|
|
|
|
field_value = obj.get(field_name)
|
|
|
|
|
|
|
|
|
|
if not self.check_explicit_restricted_field_version(field_name):
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
data_validator = self.data.data_validator
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
if not data_validator:
|
|
|
|
|
raise ValueError("No data validator found")
|
|
|
|
|
|
2022-07-18 15:41:32 -04:00
|
|
|
if not data_validator.skip_validate_note and data_validator.setup_in_note and not field_value:
|
|
|
|
|
parsed_note = self.data.parsed_note
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
if not parsed_note:
|
|
|
|
|
raise ValueError("No parsed note found")
|
|
|
|
|
|
2022-07-18 15:41:32 -04:00
|
|
|
# parse note tree
|
|
|
|
|
for i, child in enumerate(parsed_note.children):
|
2025-07-01 15:20:55 +02:00
|
|
|
if child.get_type() == "Heading" and "Setup" in gfm.render(child): # type: ignore[reportArgumentType]
|
|
|
|
|
field_value = self._convert_get_setup_content(parsed_note.children[i + 1 :])
|
2022-07-18 15:41:32 -04:00
|
|
|
|
|
|
|
|
# clean up old note field
|
|
|
|
|
investigation_guide = rule_note.replace("## Setup\n\n", "")
|
|
|
|
|
investigation_guide = investigation_guide.replace(field_value, "").strip()
|
|
|
|
|
obj["note"] = investigation_guide
|
|
|
|
|
obj[field_name] = field_value
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
@cached
|
2025-07-01 15:20:55 +02:00
|
|
|
def _convert_get_setup_content(self, note_tree: list[Any]) -> str:
|
2022-07-18 15:41:32 -04:00
|
|
|
"""Get note paragraph starting from the setup header."""
|
2025-07-01 15:20:55 +02:00
|
|
|
setup: list[str] = []
|
2022-07-18 15:41:32 -04:00
|
|
|
for child in note_tree:
|
|
|
|
|
if child.get_type() == "BlankLine" or child.get_type() == "LineBreak":
|
|
|
|
|
setup.append("\n")
|
|
|
|
|
elif child.get_type() == "CodeSpan":
|
2025-07-01 15:20:55 +02:00
|
|
|
setup.append(f"`{gfm.renderer.render_raw_text(child)}`") # type: ignore[reportUnknownMemberType]
|
2022-07-18 15:41:32 -04:00
|
|
|
elif child.get_type() == "Paragraph":
|
2023-03-28 07:17:50 -06:00
|
|
|
setup.append(self._convert_get_setup_content(child.children))
|
2022-07-18 15:41:32 -04:00
|
|
|
setup.append("\n")
|
|
|
|
|
elif child.get_type() == "FencedCode":
|
2023-03-28 07:17:50 -06:00
|
|
|
setup.append(f"```\n{self._convert_get_setup_content(child.children)}\n```")
|
2022-07-18 15:41:32 -04:00
|
|
|
setup.append("\n")
|
|
|
|
|
elif child.get_type() == "RawText":
|
|
|
|
|
setup.append(child.children)
|
2025-07-01 15:20:55 +02:00
|
|
|
elif child.get_type() == "Heading" and child.level >= 2: # noqa: PLR2004
|
2022-07-18 15:41:32 -04:00
|
|
|
break
|
|
|
|
|
else:
|
2023-03-28 07:17:50 -06:00
|
|
|
setup.append(self._convert_get_setup_content(child.children))
|
2022-07-18 15:41:32 -04:00
|
|
|
|
|
|
|
|
return "".join(setup).strip()
|
2022-07-06 11:49:44 -04:00
|
|
|
|
|
|
|
|
def check_explicit_restricted_field_version(self, field_name: str) -> bool:
|
|
|
|
|
"""Explicitly check restricted fields against global min and max versions."""
|
|
|
|
|
min_stack, max_stack = BUILD_FIELD_VERSIONS[field_name]
|
2025-07-01 15:20:55 +02:00
|
|
|
if not min_stack or not max_stack:
|
|
|
|
|
return True
|
2022-07-06 11:49:44 -04:00
|
|
|
return self.compare_field_versions(min_stack, max_stack)
|
|
|
|
|
|
|
|
|
|
def check_restricted_field_version(self, field_name: str) -> bool:
|
|
|
|
|
"""Check restricted fields against schema min and max versions."""
|
2025-07-01 15:20:55 +02:00
|
|
|
if not self.data.get_restricted_fields:
|
|
|
|
|
raise ValueError("No restricted fields found")
|
|
|
|
|
min_stack, max_stack = self.data.get_restricted_fields[field_name]
|
|
|
|
|
if not min_stack or not max_stack:
|
|
|
|
|
return True
|
2022-07-06 11:49:44 -04:00
|
|
|
return self.compare_field_versions(min_stack, max_stack)
|
|
|
|
|
|
2022-07-18 15:41:32 -04:00
|
|
|
@staticmethod
|
|
|
|
|
def compare_field_versions(min_stack: Version, max_stack: Version) -> bool:
|
|
|
|
|
"""Check current rule version is within min and max stack versions."""
|
2023-02-07 14:26:29 -05:00
|
|
|
current_version = Version.parse(load_current_package_version(), optional_minor_and_patch=True)
|
2022-07-06 11:49:44 -04:00
|
|
|
max_stack = max_stack or current_version
|
2023-02-07 14:26:29 -05:00
|
|
|
return min_stack <= current_version >= max_stack
|
2022-07-06 11:49:44 -04:00
|
|
|
|
2023-02-02 16:22:44 -05:00
|
|
|
@classmethod
|
2025-07-01 15:20:55 +02:00
|
|
|
def get_packaged_integrations(
|
|
|
|
|
cls,
|
|
|
|
|
data: QueryRuleData,
|
|
|
|
|
meta: RuleMeta,
|
|
|
|
|
package_manifest: dict[str, Any],
|
|
|
|
|
) -> list[dict[str, Any]] | None:
|
|
|
|
|
packaged_integrations: list[dict[str, Any]] = []
|
|
|
|
|
datasets, _ = beats.get_datasets_and_modules(data.get("ast") or []) # type: ignore[reportArgumentType]
|
2025-10-15 21:17:07 +02:00
|
|
|
if isinstance(data, ESQLRuleData):
|
|
|
|
|
dataset_objs = get_esql_query_event_dataset_integrations(data.query)
|
|
|
|
|
datasets.update(str(obj) for obj in dataset_objs)
|
2023-10-16 12:48:54 -07:00
|
|
|
# integration is None to remove duplicate references upstream in Kibana
|
2025-07-31 11:00:50 -05:00
|
|
|
# chronologically, event.dataset, data_stream.dataset is checked for package:integration, then rule tags
|
2023-10-16 12:48:54 -07:00
|
|
|
# if both exist, rule tags are only used if defined in definitions for non-dataset packages
|
|
|
|
|
# of machine learning analytic packages
|
|
|
|
|
|
2025-10-15 21:17:07 +02:00
|
|
|
rule_integrations: str | list[str] = meta.get("integration") or []
|
|
|
|
|
if isinstance(rule_integrations, str):
|
|
|
|
|
rule_integrations = [rule_integrations]
|
|
|
|
|
for integration in rule_integrations:
|
|
|
|
|
ineligible_integrations = [
|
|
|
|
|
*definitions.NON_DATASET_PACKAGES,
|
|
|
|
|
*map(str.lower, definitions.MACHINE_LEARNING_PACKAGES),
|
|
|
|
|
]
|
2025-10-27 11:03:48 -04:00
|
|
|
if (
|
|
|
|
|
integration in ineligible_integrations
|
|
|
|
|
or isinstance(data, MachineLearningRuleData)
|
|
|
|
|
or (isinstance(data, ESQLRuleData) and integration not in datasets)
|
|
|
|
|
):
|
2025-10-15 21:17:07 +02:00
|
|
|
packaged_integrations.append({"package": integration, "integration": None})
|
2022-08-08 13:44:36 -04:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
packaged_integrations.extend(parse_datasets(list(datasets), package_manifest))
|
2022-08-08 13:44:36 -04:00
|
|
|
|
|
|
|
|
return packaged_integrations
|
|
|
|
|
|
2021-03-24 10:24:32 -06:00
|
|
|
@validates_schema
|
2025-07-01 15:20:55 +02:00
|
|
|
def post_conversion_validation(self, value: dict[str, Any], **_: Any) -> None:
|
2022-07-18 15:41:32 -04:00
|
|
|
"""Additional validations beyond base marshmallow schemas."""
|
2021-03-24 10:24:32 -06:00
|
|
|
data: AnyRuleData = value["data"]
|
|
|
|
|
metadata: RuleMeta = value["metadata"]
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
if not data.data_validator:
|
|
|
|
|
raise ValueError("No data validator found")
|
|
|
|
|
|
2024-08-06 18:07:12 -04:00
|
|
|
test_config = RULES_CONFIG.test_config
|
2025-07-01 15:20:55 +02:00
|
|
|
if not test_config.check_skip_by_rule_id(value["data"].rule_id):
|
|
|
|
|
bypass = metadata.get("bypass_bbr_timing") or False
|
2024-08-06 18:07:12 -04:00
|
|
|
data.validate_query(metadata)
|
|
|
|
|
data.data_validator.validate_note()
|
2025-07-01 15:20:55 +02:00
|
|
|
data.data_validator.validate_bbr(bypass)
|
|
|
|
|
data.validate(metadata) if hasattr(data, "validate") else False # type: ignore[reportUnknownMemberType]
|
2021-03-24 10:24:32 -06:00
|
|
|
|
2023-12-08 13:46:28 -06:00
|
|
|
@staticmethod
|
2025-07-01 15:20:55 +02:00
|
|
|
def validate_remote(remote_validator: "RemoteValidator", contents: "TOMLRuleContents") -> None:
|
|
|
|
|
_ = remote_validator.validate_rule(contents)
|
2023-12-08 13:46:28 -06:00
|
|
|
|
2024-04-26 11:12:50 -06:00
|
|
|
@classmethod
|
|
|
|
|
def from_rule_resource(
|
2025-07-01 15:20:55 +02:00
|
|
|
cls,
|
|
|
|
|
rule: dict[str, Any],
|
|
|
|
|
creation_date: str = TIME_NOW,
|
|
|
|
|
updated_date: str = TIME_NOW,
|
|
|
|
|
maturity: str = "development",
|
|
|
|
|
) -> "TOMLRuleContents":
|
2024-04-26 11:12:50 -06:00
|
|
|
"""Create a TOMLRuleContents from a kibana rule resource."""
|
2025-07-01 15:20:55 +02:00
|
|
|
integrations = [r["package"] for r in rule["related_integrations"]]
|
2024-08-06 18:07:12 -04:00
|
|
|
meta = {
|
|
|
|
|
"creation_date": creation_date,
|
|
|
|
|
"updated_date": updated_date,
|
|
|
|
|
"maturity": maturity,
|
|
|
|
|
"integration": integrations,
|
|
|
|
|
}
|
2025-07-01 15:20:55 +02:00
|
|
|
return cls.from_dict({"metadata": meta, "rule": rule, "transforms": None}, unknown=marshmallow.EXCLUDE)
|
2024-04-26 11:12:50 -06:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def to_dict(self, strip_none_values: bool = True) -> dict[str, Any]:
|
2021-08-31 21:06:14 -08:00
|
|
|
# Load schemas directly from the data and metadata classes to avoid schema ambiguity which can
|
|
|
|
|
# result from union fields which contain classes and related subclasses (AnyRuleData). See issue #1141
|
|
|
|
|
metadata = self.metadata.to_dict(strip_none_values=strip_none_values)
|
|
|
|
|
data = self.data.to_dict(strip_none_values=strip_none_values)
|
2025-07-01 15:20:55 +02:00
|
|
|
if self.transform:
|
|
|
|
|
data = self.data.process_transforms(self.transform, data)
|
|
|
|
|
dict_obj = {"metadata": metadata, "rule": data}
|
2021-03-24 10:24:32 -06:00
|
|
|
return nested_normalize(dict_obj)
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def flattened_dict(self) -> dict[str, Any]:
|
|
|
|
|
flattened: dict[str, Any] = {}
|
2021-03-24 10:24:32 -06:00
|
|
|
flattened.update(self.data.to_dict())
|
|
|
|
|
flattened.update(self.metadata.to_dict())
|
|
|
|
|
return flattened
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def to_api_format(
|
|
|
|
|
self,
|
|
|
|
|
include_version: bool = not BYPASS_VERSION_LOCK,
|
|
|
|
|
include_metadata: bool = False,
|
|
|
|
|
) -> dict[str, Any]:
|
2021-03-24 10:24:32 -06:00
|
|
|
"""Convert the TOML rule to the API format."""
|
2025-07-01 15:20:55 +02:00
|
|
|
|
2024-01-04 16:02:48 -05:00
|
|
|
rule_dict = self.to_dict()
|
2024-08-06 18:07:12 -04:00
|
|
|
rule_dict = self._add_known_nulls(rule_dict)
|
2025-07-01 15:20:55 +02:00
|
|
|
converted_data = rule_dict["rule"]
|
2023-04-25 10:33:43 -06:00
|
|
|
converted = self._post_dict_conversion(converted_data)
|
2022-07-06 11:49:44 -04:00
|
|
|
|
2024-01-04 16:02:48 -05:00
|
|
|
if include_metadata:
|
2025-07-01 15:20:55 +02:00
|
|
|
converted["meta"] = rule_dict["metadata"]
|
2024-01-04 16:02:48 -05:00
|
|
|
|
2021-01-11 08:58:18 -09:00
|
|
|
if include_version:
|
2021-03-24 10:24:32 -06:00
|
|
|
converted["version"] = self.autobumped_version
|
|
|
|
|
|
|
|
|
|
return converted
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def check_restricted_fields_compatibility(self) -> dict[str, dict[str, Any]]:
|
2022-06-27 10:02:15 -05:00
|
|
|
"""Check for compatibility between restricted fields and the min_stack_version of the rule."""
|
2023-02-07 14:26:29 -05:00
|
|
|
default_min_stack = get_min_supported_stack_version()
|
2022-06-27 10:02:15 -05:00
|
|
|
if self.metadata.min_stack_version is not None:
|
2023-02-07 16:09:17 -05:00
|
|
|
min_stack = Version.parse(self.metadata.min_stack_version, optional_minor_and_patch=True)
|
2022-06-27 10:02:15 -05:00
|
|
|
else:
|
|
|
|
|
min_stack = default_min_stack
|
|
|
|
|
restricted = self.data.get_restricted_fields
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
if not restricted:
|
|
|
|
|
raise ValueError("No restricted fields found")
|
|
|
|
|
|
|
|
|
|
invalid: dict[str, dict[str, Any]] = {}
|
2022-06-27 10:02:15 -05:00
|
|
|
for _field, values in restricted.items():
|
|
|
|
|
if self.data.get(_field) is not None:
|
|
|
|
|
min_allowed, _ = values
|
2025-07-01 15:20:55 +02:00
|
|
|
|
|
|
|
|
if not min_allowed:
|
|
|
|
|
raise ValueError("Min allowed versino is None")
|
|
|
|
|
|
2022-06-27 10:02:15 -05:00
|
|
|
if min_stack < min_allowed:
|
2025-07-01 15:20:55 +02:00
|
|
|
invalid[_field] = {"min_stack_version": min_stack, "min_allowed_version": min_allowed}
|
2022-06-27 10:02:15 -05:00
|
|
|
|
|
|
|
|
return invalid
|
|
|
|
|
|
2021-01-11 08:58:18 -09:00
|
|
|
|
2021-03-24 10:24:32 -06:00
|
|
|
@dataclass
|
|
|
|
|
class TOMLRule:
|
|
|
|
|
contents: TOMLRuleContents = field(hash=True)
|
2025-07-01 15:20:55 +02:00
|
|
|
path: Path | None = None
|
2022-01-31 15:57:46 -09:00
|
|
|
gh_pr: Any = field(hash=False, compare=False, default=None, repr=False)
|
2021-03-24 10:24:32 -06:00
|
|
|
|
|
|
|
|
@property
|
2025-07-01 15:20:55 +02:00
|
|
|
def id(self) -> definitions.UUIDString:
|
2021-03-24 10:24:32 -06:00
|
|
|
return self.contents.id
|
|
|
|
|
|
|
|
|
|
@property
|
2025-07-01 15:20:55 +02:00
|
|
|
def name(self) -> str:
|
2021-03-24 10:24:32 -06:00
|
|
|
return self.contents.data.name
|
2021-01-11 08:58:18 -09:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def get_asset(self) -> dict[str, Any]:
|
2021-03-30 13:31:02 -06:00
|
|
|
"""Generate the relevant fleet compatible asset."""
|
2021-04-05 10:50:58 -06:00
|
|
|
return {"id": self.id, "attributes": self.contents.to_api_format(), "type": definitions.SAVED_OBJECT_TYPE}
|
2021-03-30 13:31:02 -06:00
|
|
|
|
2024-08-06 18:07:12 -04:00
|
|
|
def get_base_rule_dir(self) -> Path | None:
|
|
|
|
|
"""Get the base rule directory for the rule."""
|
2025-07-01 15:20:55 +02:00
|
|
|
if not self.path:
|
|
|
|
|
raise ValueError("No path found")
|
2024-08-06 18:07:12 -04:00
|
|
|
rule_path = self.path.resolve()
|
|
|
|
|
for rules_dir in DEFAULT_PREBUILT_RULES_DIRS + DEFAULT_PREBUILT_BBR_DIRS:
|
|
|
|
|
if rule_path.is_relative_to(rules_dir):
|
|
|
|
|
return rule_path.relative_to(rules_dir)
|
|
|
|
|
return None
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def save_toml(self, strip_none_values: bool = True) -> None:
|
|
|
|
|
if self.path is None:
|
|
|
|
|
raise ValueError(f"Can't save rule {self.name} (self.id) without a path")
|
|
|
|
|
|
|
|
|
|
converted = {
|
|
|
|
|
"metadata": self.contents.metadata.to_dict(),
|
|
|
|
|
"rule": self.contents.data.to_dict(strip_none_values=strip_none_values),
|
|
|
|
|
}
|
2023-03-28 07:17:50 -06:00
|
|
|
if self.contents.transform:
|
2024-08-06 18:07:12 -04:00
|
|
|
converted["transform"] = self.contents.transform.to_dict()
|
2021-01-11 08:58:18 -09:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
if not self.path:
|
|
|
|
|
raise ValueError("No path found")
|
|
|
|
|
|
|
|
|
|
toml_write(converted, self.path.absolute())
|
|
|
|
|
|
|
|
|
|
def save_json(self, path: Path, include_version: bool = True) -> None:
|
|
|
|
|
path = path.with_suffix(".json")
|
|
|
|
|
with path.absolute().open("w", newline="\n") as f:
|
2021-03-24 10:24:32 -06:00
|
|
|
json.dump(self.contents.to_api_format(include_version=include_version), f, sort_keys=True, indent=2)
|
2025-07-01 15:20:55 +02:00
|
|
|
_ = f.write("\n")
|
2020-06-29 23:17:38 -06:00
|
|
|
|
2026-04-10 11:29:15 -04:00
|
|
|
def save_yaml(self, path: Path, contents_override: dict[str, Any] | None = None) -> None:
|
|
|
|
|
"""Save the rule in YAML format."""
|
|
|
|
|
data = contents_override if contents_override is not None else self.contents.to_api_format()
|
|
|
|
|
utils.save_yaml(path.with_suffix(".yaml"), data, use_absolute_path=True)
|
|
|
|
|
|
2021-02-08 20:43:16 -09:00
|
|
|
|
2021-11-15 08:46:12 -09:00
|
|
|
@dataclass(frozen=True)
|
|
|
|
|
class DeprecatedRuleContents(BaseRuleContents):
|
2025-07-01 15:20:55 +02:00
|
|
|
metadata: dict[str, Any]
|
|
|
|
|
data: dict[str, Any]
|
|
|
|
|
transform: dict[str, Any] | None = None
|
2021-11-15 08:46:12 -09:00
|
|
|
|
|
|
|
|
@cached_property
|
2025-07-01 15:20:55 +02:00
|
|
|
def version_lock(self) -> VersionLock: # type: ignore[reportIncompatibleMethodOverride]
|
2021-11-15 08:46:12 -09:00
|
|
|
# VersionLock
|
2025-07-01 15:20:55 +02:00
|
|
|
return getattr(self, "_version_lock", None) or loaded_version_lock
|
2021-11-15 08:46:12 -09:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def set_version_lock(self, value: VersionLock | None) -> None:
|
|
|
|
|
if RULES_CONFIG.bypass_version_lock:
|
|
|
|
|
raise ValueError(
|
|
|
|
|
"Cannot set the version lock when the versioning strategy is configured to bypass the version lock."
|
|
|
|
|
" Set `bypass_version_lock` to `false` in the rules config to use the version lock."
|
|
|
|
|
)
|
2021-11-15 08:46:12 -09:00
|
|
|
|
|
|
|
|
# circumvent frozen class
|
2026-01-12 21:07:07 +05:30
|
|
|
self.__dict__["_version_lock"] = value # type: ignore[reportIndexIssue]
|
2021-11-15 08:46:12 -09:00
|
|
|
|
|
|
|
|
@property
|
2025-07-01 15:20:55 +02:00
|
|
|
def id(self) -> str | None: # type: ignore[reportIncompatibleMethodOverride]
|
|
|
|
|
return self.data.get("rule_id")
|
2021-11-15 08:46:12 -09:00
|
|
|
|
|
|
|
|
@property
|
2025-07-01 15:20:55 +02:00
|
|
|
def name(self) -> str | None: # type: ignore[reportIncompatibleMethodOverride]
|
|
|
|
|
return self.data.get("name")
|
2021-11-15 08:46:12 -09:00
|
|
|
|
2022-03-24 11:56:27 -08:00
|
|
|
@property
|
2025-07-01 15:20:55 +02:00
|
|
|
def type(self) -> str | None: # type: ignore[reportIncompatibleMethodOverride]
|
|
|
|
|
return self.data.get("type")
|
2022-03-24 11:56:27 -08:00
|
|
|
|
2021-11-15 08:46:12 -09:00
|
|
|
@classmethod
|
2025-07-01 15:20:55 +02:00
|
|
|
def from_dict(cls, obj: dict[str, Any]) -> "DeprecatedRuleContents":
|
|
|
|
|
kwargs = {"metadata": obj["metadata"], "data": obj["rule"]}
|
|
|
|
|
kwargs["transform"] = obj.get("transform")
|
2023-03-28 07:17:50 -06:00
|
|
|
return cls(**kwargs)
|
2021-11-15 08:46:12 -09:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def to_api_format(self, include_version: bool = not BYPASS_VERSION_LOCK) -> dict[str, Any]:
|
2021-11-15 08:46:12 -09:00
|
|
|
"""Convert the TOML rule to the API format."""
|
2023-03-28 07:17:50 -06:00
|
|
|
data = copy.deepcopy(self.data)
|
|
|
|
|
if self.transform:
|
|
|
|
|
transform = RuleTransform.from_dict(self.transform)
|
2025-07-01 15:20:55 +02:00
|
|
|
_ = BaseRuleData.process_transforms(transform, data)
|
2023-03-28 07:17:50 -06:00
|
|
|
|
|
|
|
|
converted = data
|
2021-11-15 08:46:12 -09:00
|
|
|
if include_version:
|
|
|
|
|
converted["version"] = self.autobumped_version
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
return self._post_dict_conversion(converted)
|
2021-11-15 08:46:12 -09:00
|
|
|
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
class DeprecatedRule(dict[str, Any]):
|
2021-09-01 15:29:53 -08:00
|
|
|
"""Minimal dict object for deprecated rule."""
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def __init__(self, path: Path, contents: DeprecatedRuleContents, *args: Any, **kwargs: Any) -> None:
|
|
|
|
|
super().__init__(*args, **kwargs)
|
2021-09-01 15:29:53 -08:00
|
|
|
self.path = path
|
2021-11-15 08:46:12 -09:00
|
|
|
self.contents = contents
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def __repr__(self) -> str:
|
|
|
|
|
return f"{type(self).__name__}(contents={self.contents}, path={self.path})"
|
2021-09-01 15:29:53 -08:00
|
|
|
|
|
|
|
|
@property
|
2025-07-01 15:20:55 +02:00
|
|
|
def id(self) -> str | None:
|
2021-09-01 15:29:53 -08:00
|
|
|
return self.contents.id
|
|
|
|
|
|
|
|
|
|
@property
|
2025-07-01 15:20:55 +02:00
|
|
|
def name(self) -> str | None:
|
2021-09-01 15:29:53 -08:00
|
|
|
return self.contents.name
|
|
|
|
|
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def downgrade_contents_from_rule(
|
|
|
|
|
rule: TOMLRule,
|
|
|
|
|
target_version: str,
|
|
|
|
|
replace_id: bool = True,
|
|
|
|
|
include_metadata: bool = False,
|
|
|
|
|
) -> dict[str, Any]:
|
2021-02-08 20:43:16 -09:00
|
|
|
"""Generate the downgraded contents from a rule."""
|
2023-08-02 12:38:48 -05:00
|
|
|
rule_dict = rule.contents.to_dict()["rule"]
|
|
|
|
|
min_stack_version = target_version or rule.contents.metadata.min_stack_version or "8.3.0"
|
2025-07-01 15:20:55 +02:00
|
|
|
min_stack_version = Version.parse(min_stack_version, optional_minor_and_patch=True)
|
2023-08-02 12:38:48 -05:00
|
|
|
rule_dict.setdefault("meta", {}).update(rule.contents.metadata.to_dict())
|
|
|
|
|
|
|
|
|
|
if replace_id:
|
|
|
|
|
rule_dict["rule_id"] = str(uuid4())
|
|
|
|
|
|
|
|
|
|
rule_dict = downgrade(rule_dict, target_version=str(min_stack_version))
|
|
|
|
|
meta = rule_dict.pop("meta")
|
2023-11-14 23:06:04 -05:00
|
|
|
rule_contents_dict = {"rule": rule_dict, "metadata": meta}
|
|
|
|
|
|
|
|
|
|
if rule.contents.transform:
|
|
|
|
|
rule_contents_dict["transform"] = rule.contents.transform.to_dict()
|
|
|
|
|
|
|
|
|
|
rule_contents = TOMLRuleContents.from_dict(rule_contents_dict)
|
2024-01-04 16:02:48 -05:00
|
|
|
payload = rule_contents.to_api_format(include_metadata=include_metadata)
|
2025-07-01 15:20:55 +02:00
|
|
|
return strip_non_public_fields(min_stack_version, payload)
|
2021-04-21 14:55:26 -06:00
|
|
|
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def set_eql_config(min_stack_version_val: str) -> eql.parser.ParserConfig:
|
2023-09-07 09:01:28 -05:00
|
|
|
"""Based on the rule version set the eql functions allowed."""
|
2025-07-01 15:20:55 +02:00
|
|
|
if min_stack_version_val:
|
|
|
|
|
min_stack_version = Version.parse(min_stack_version_val, optional_minor_and_patch=True)
|
2023-09-07 09:01:28 -05:00
|
|
|
else:
|
2025-07-01 15:20:55 +02:00
|
|
|
min_stack_version = Version.parse(load_current_package_version(), optional_minor_and_patch=True)
|
2023-09-07 09:01:28 -05:00
|
|
|
|
|
|
|
|
config = eql.parser.ParserConfig()
|
|
|
|
|
|
|
|
|
|
for feature, version_range in definitions.ELASTICSEARCH_EQL_FEATURES.items():
|
|
|
|
|
if version_range[0] <= min_stack_version <= (version_range[1] or min_stack_version):
|
2025-07-01 15:20:55 +02:00
|
|
|
config.context[feature] = True # type: ignore[reportUnknownMemberType]
|
2023-09-07 09:01:28 -05:00
|
|
|
|
|
|
|
|
return config
|
|
|
|
|
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def get_unique_query_fields(rule: TOMLRule) -> list[str] | None:
|
2021-09-10 10:06:04 -08:00
|
|
|
"""Get a list of unique fields used in a rule query from rule contents."""
|
|
|
|
|
contents = rule.contents.to_api_format()
|
2025-07-01 15:20:55 +02:00
|
|
|
language = contents.get("language")
|
|
|
|
|
query = contents.get("query")
|
|
|
|
|
if language not in ("kuery", "eql"):
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
# remove once py-eql supports ipv6 for cidrmatch
|
|
|
|
|
|
|
|
|
|
min_stack_version = rule.contents.metadata.get("min_stack_version")
|
|
|
|
|
if not min_stack_version:
|
|
|
|
|
raise ValueError("Min stack version not found")
|
|
|
|
|
cfg = set_eql_config(min_stack_version)
|
|
|
|
|
with eql.parser.elasticsearch_syntax, eql.parser.ignore_missing_functions, eql.parser.skip_optimizations, cfg:
|
|
|
|
|
parsed = ( # type: ignore[reportUnknownVariableType]
|
|
|
|
|
kql.parse(query, normalize_kql_keywords=RULES_CONFIG.normalize_kql_keywords) # type: ignore[reportUnknownMemberType]
|
|
|
|
|
if language == "kuery"
|
|
|
|
|
else eql.parse_query(query) # type: ignore[reportUnknownMemberType]
|
|
|
|
|
)
|
|
|
|
|
return sorted({str(f) for f in parsed if isinstance(f, (eql.ast.Field | kql.ast.Field))}) # type: ignore[reportUnknownVariableType]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def parse_datasets(datasets: list[str], package_manifest: dict[str, Any]) -> list[dict[str, Any]]:
|
|
|
|
|
"""Parses datasets into packaged integrations from rule data."""
|
|
|
|
|
packaged_integrations: list[dict[str, Any]] = []
|
|
|
|
|
for _value in sorted(datasets):
|
|
|
|
|
# cleanup extra quotes pulled from ast field
|
|
|
|
|
value = _value.strip('"')
|
|
|
|
|
|
|
|
|
|
integration = "Unknown"
|
|
|
|
|
if "." in value:
|
|
|
|
|
package, integration = value.split(".", 1)
|
|
|
|
|
# Handle cases where endpoint event datasource needs to be parsed uniquely (e.g endpoint.events.network)
|
|
|
|
|
# as endpoint.network
|
|
|
|
|
if package == "endpoint" and "events" in integration:
|
|
|
|
|
integration = integration.split(".")[1]
|
|
|
|
|
else:
|
|
|
|
|
package = value
|
2023-09-07 09:01:28 -05:00
|
|
|
|
2025-10-15 21:17:07 +02:00
|
|
|
if package in package_manifest:
|
2025-07-01 15:20:55 +02:00
|
|
|
packaged_integrations.append({"package": package, "integration": integration})
|
|
|
|
|
return packaged_integrations
|
2021-09-10 10:06:04 -08:00
|
|
|
|
|
|
|
|
|
2021-04-21 14:55:26 -06:00
|
|
|
# avoid a circular import
|
2023-11-28 13:03:09 -06:00
|
|
|
from .rule_validators import EQLValidator, ESQLValidator, KQLValidator # noqa: E402
|