Files
sigma-rules/detection_rules/rule.py
T

453 lines
15 KiB
Python
Raw Normal View History

2020-06-29 23:17:38 -06:00
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
2021-03-03 22:12:11 -09:00
# or more contributor license agreements. Licensed under the Elastic License
# 2.0; you may not use this file except in compliance with the Elastic License
# 2.0.
2020-06-29 23:17:38 -06:00
"""Rule object."""
import json
2021-03-24 10:24:32 -06:00
from dataclasses import dataclass, field
from pathlib import Path
2021-03-24 10:24:32 -06:00
from typing import Literal, Union, Optional, List, Any
2021-02-08 20:43:16 -09:00
from uuid import uuid4
2020-06-29 23:17:38 -06:00
2020-09-16 08:36:48 -06:00
import eql
2021-03-24 10:24:32 -06:00
from marshmallow import validates_schema
2020-06-29 23:17:38 -06:00
import kql
2021-03-24 10:24:32 -06:00
from . import ecs, beats, utils
from .mixins import MarshmallowDataclassMixin
from .rule_formatter import toml_write, nested_normalize
from .schemas import downgrade
from .schemas import definitions
from .utils import get_path, cached
2020-06-29 23:17:38 -06:00
RULES_DIR = get_path("rules")
_META_SCHEMA_REQ_DEFAULTS = {}
2021-03-24 10:24:32 -06:00
@dataclass(frozen=True)
class RuleMeta(MarshmallowDataclassMixin):
"""Data stored in a rule's [metadata] section of TOML."""
creation_date: definitions.Date
updated_date: definitions.Date
deprecation_date: Optional[definitions.Date]
2020-06-29 23:17:38 -06:00
2021-03-24 10:24:32 -06:00
# Optional fields
beats_version: Optional[definitions.SemVer]
ecs_versions: Optional[List[definitions.SemVer]]
comments: Optional[str]
maturity: Optional[definitions.Maturity]
os_type_list: Optional[List[definitions.OSType]]
query_schema_validation: Optional[bool]
related_endpoint_rules: Optional[List[str]]
2020-06-29 23:17:38 -06:00
2021-03-24 10:24:32 -06:00
@dataclass(frozen=True)
class BaseThreatEntry:
id: str
name: str
reference: str
2020-06-29 23:17:38 -06:00
2021-03-24 10:24:32 -06:00
@dataclass(frozen=True)
class SubTechnique(BaseThreatEntry):
"""Mapping to threat subtechnique."""
reference: definitions.SubTechniqueURL
2020-06-29 23:17:38 -06:00
2021-03-24 10:24:32 -06:00
@dataclass(frozen=True)
class Technique(BaseThreatEntry):
"""Mapping to threat subtechnique."""
# subtechniques are stored at threat[].technique.subtechnique[]
reference: definitions.TechniqueURL
subtechnique: Optional[List[SubTechnique]]
2021-03-24 10:24:32 -06:00
@dataclass(frozen=True)
class Tactic(BaseThreatEntry):
"""Mapping to a threat tactic."""
reference: definitions.TacticURL
2020-06-29 23:17:38 -06:00
2021-03-24 10:24:32 -06:00
@dataclass(frozen=True)
class ThreatMapping(MarshmallowDataclassMixin):
"""Mapping to a threat framework."""
framework: Literal["MITRE ATT&CK"]
tactic: Tactic
technique: Optional[List[Technique]]
2020-06-29 23:17:38 -06:00
2021-03-24 10:24:32 -06:00
@staticmethod
def flatten(threat_mappings: Optional[List]) -> 'FlatThreatMapping':
"""Get flat lists of tactic and technique info."""
tactic_names = []
tactic_ids = []
technique_ids = set()
technique_names = set()
sub_technique_ids = set()
sub_technique_names = set()
2020-06-29 23:17:38 -06:00
2021-03-24 10:24:32 -06:00
for entry in (threat_mappings or []):
tactic_names.append(entry.tactic.name)
tactic_ids.append(entry.tactic.id)
for technique in (entry.technique or []):
technique_names.add(technique.name)
technique_ids.add(technique.id)
for subtechnique in (technique.subtechnique or []):
sub_technique_ids.update(subtechnique.id)
sub_technique_names.update(subtechnique.name)
return FlatThreatMapping(
tactic_names=sorted(tactic_names),
tactic_ids=sorted(tactic_ids),
technique_names=sorted(technique_names),
technique_ids=sorted(technique_ids),
sub_technique_names=sorted(sub_technique_names),
sub_technique_ids=sorted(sub_technique_ids)
)
@dataclass(frozen=True)
class RiskScoreMapping(MarshmallowDataclassMixin):
field: str
operator: Optional[definitions.Operator]
value: Optional[str]
@dataclass(frozen=True)
class SeverityMapping(MarshmallowDataclassMixin):
field: str
operator: Optional[definitions.Operator]
value: Optional[str]
severity: Optional[str]
@dataclass(frozen=True)
class FlatThreatMapping(MarshmallowDataclassMixin):
tactic_names: List[str]
tactic_ids: List[str]
technique_names: List[str]
technique_ids: List[str]
sub_technique_names: List[str]
sub_technique_ids: List[str]
@dataclass(frozen=True)
class BaseRuleData(MarshmallowDataclassMixin):
actions: Optional[list]
author: List[str]
building_block_type: Optional[str]
description: Optional[str]
enabled: Optional[bool]
exceptions_list: Optional[list]
license: Optional[str]
false_positives: Optional[List[str]]
filters: Optional[List[dict]]
# trailing `_` required since `from` is a reserved word in python
from_: Optional[str] = field(metadata=dict(data_key="from"))
interval: Optional[definitions.Interval]
max_signals: Optional[definitions.MaxSignals]
meta: Optional[dict]
name: str
note: Optional[definitions.Markdown]
# can we remove this comment?
# explicitly NOT allowed!
# output_index: Optional[str]
references: Optional[List[str]]
risk_score: definitions.RiskScore
risk_score_mapping: Optional[List[RiskScoreMapping]]
rule_id: definitions.UUIDString
rule_name_override: Optional[str]
severity_mapping: Optional[List[SeverityMapping]]
severity: definitions.Severity
tags: Optional[List[str]]
throttle: Optional[str]
timeline_id: Optional[str]
timeline_title: Optional[str]
timestamp_override: Optional[str]
to: Optional[str]
type: Literal[definitions.RuleType]
threat: Optional[List[ThreatMapping]]
@dataclass(frozen=True)
class BaseQueryRuleData(BaseRuleData):
"""Specific fields for query event types."""
type: Literal["query"]
index: Optional[List[str]]
query: str
language: str
2020-06-29 23:17:38 -06:00
@property
2021-03-24 10:24:32 -06:00
def parsed_query(self) -> Optional[object]:
return None
2020-06-29 23:17:38 -06:00
2021-03-24 10:24:32 -06:00
@dataclass(frozen=True)
class KQLRuleData(BaseQueryRuleData):
"""Specific fields for query event types."""
language: Literal["kuery"]
2020-06-29 23:17:38 -06:00
@property
2021-03-24 10:24:32 -06:00
def parsed_query(self) -> kql.ast.Expression:
return kql.parse(self.query)
2020-06-29 23:17:38 -06:00
2020-09-23 17:36:34 -05:00
@property
def unique_fields(self):
2021-03-24 10:24:32 -06:00
return list(set(str(f) for f in self.parsed_query if isinstance(f, kql.ast.Field)))
def to_eql(self) -> eql.ast.Expression:
return kql.to_eql(self.query)
def validate_query(self, beats_version: str, ecs_versions: List[str]):
"""Static method to validate the query, called from the parent which contains [metadata] information."""
indexes = self.index or []
2020-09-23 17:36:34 -05:00
parsed = self.parsed_query
2021-03-24 10:24:32 -06:00
beat_types = [index.split("-")[0] for index in indexes if "beat-*" in index]
beat_schema = beats.get_schema_from_kql(parsed, beat_types, version=beats_version) if beat_types else None
2020-06-29 23:17:38 -06:00
2021-03-24 10:24:32 -06:00
if not ecs_versions:
kql.parse(self.query, schema=ecs.get_kql_schema(indexes=indexes, beat_schema=beat_schema))
else:
for version in ecs_versions:
schema = ecs.get_kql_schema(version=version, indexes=indexes, beat_schema=beat_schema)
2021-03-24 10:24:32 -06:00
try:
kql.parse(self.query, schema=schema)
except kql.KqlParseError as exc:
message = exc.error_msg
trailer = None
if "Unknown field" in message and beat_types:
trailer = "\nTry adding event.module or event.dataset to specify beats module"
2020-09-23 17:36:34 -05:00
2021-03-24 10:24:32 -06:00
raise kql.KqlParseError(exc.error_msg, exc.line, exc.column, exc.source,
len(exc.caret.lstrip()), trailer=trailer) from None
2020-06-29 23:17:38 -06:00
2021-03-24 10:24:32 -06:00
@dataclass(frozen=True)
class LuceneRuleData(BaseQueryRuleData):
"""Specific fields for query event types."""
language: Literal["lucene"]
2020-06-29 23:17:38 -06:00
2021-03-24 10:24:32 -06:00
@dataclass(frozen=True)
class MachineLearningRuleData(BaseRuleData):
type: Literal["machine_learning"]
2020-09-16 08:36:48 -06:00
2021-03-24 10:24:32 -06:00
anomaly_threshold: int
machine_learning_job_id: str
2020-09-16 08:36:48 -06:00
2021-03-24 10:24:32 -06:00
@dataclass(frozen=True)
class ThresholdQueryRuleData(BaseQueryRuleData):
"""Specific fields for query event types."""
@dataclass(frozen=True)
class ThresholdMapping(MarshmallowDataclassMixin):
@dataclass(frozen=True)
class ThresholdCardinality:
field: str
value: definitions.ThresholdValue
field: List[str]
value: definitions.ThresholdValue
cardinality: Optional[ThresholdCardinality]
type: Literal["threshold"]
language: Literal["kuery", "lucene"]
threshold: ThresholdMapping
@dataclass(frozen=True)
class EQLRuleData(BaseQueryRuleData):
"""EQL rules are a special case of query rules."""
type: Literal["eql"]
@property
def parsed_query(self) -> kql.ast.Expression:
with eql.parser.elasticsearch_syntax, eql.parser.ignore_missing_functions:
return eql.parse_query(self.query)
@property
def unique_fields(self):
return list(set(str(f) for f in self.parsed_query if isinstance(f, eql.ast.Field)))
def validate_query(self, beats_version: str, ecs_versions: List[str]):
"""Validate an EQL query while checking TOMLRule."""
# TODO: remove once py-eql supports ipv6 for cidrmatch
2021-03-24 10:24:32 -06:00
# Or, unregister the cidrMatch function and replace it with one that doesn't validate against strict IPv4
with eql.parser.elasticsearch_syntax, eql.parser.ignore_missing_functions:
2021-03-24 10:24:32 -06:00
parsed = eql.parse_query(self.query)
2021-03-24 10:24:32 -06:00
beat_types = [index.split("-")[0] for index in self.index or [] if "beat-*" in index]
beat_schema = beats.get_schema_from_eql(parsed, beat_types, version=beats_version) if beat_types else None
2020-09-16 08:36:48 -06:00
for version in ecs_versions:
2021-03-24 10:24:32 -06:00
schema = ecs.get_kql_schema(indexes=self.index or [], beat_schema=beat_schema, version=version)
2020-09-16 08:36:48 -06:00
try:
2021-03-24 10:24:32 -06:00
# TODO: switch to custom cidrmatch that allows ipv6
with ecs.KqlSchema2Eql(schema), eql.parser.elasticsearch_syntax, eql.parser.ignore_missing_functions:
2021-03-24 10:24:32 -06:00
eql.parse_query(self.query)
2020-09-16 08:36:48 -06:00
except eql.EqlTypeMismatchError:
raise
except eql.EqlParseError as exc:
message = exc.error_msg
trailer = None
if "Unknown field" in message and beat_types:
trailer = "\nTry adding event.module or event.dataset to specify beats module"
2020-09-16 08:36:48 -06:00
2021-03-24 10:24:32 -06:00
raise exc.__class__(exc.error_msg, exc.line, exc.column, exc.source,
len(exc.caret.lstrip()), trailer=trailer) from None
2021-03-24 10:24:32 -06:00
# All of the possible rule types
AnyRuleData = Union[KQLRuleData, LuceneRuleData, MachineLearningRuleData, ThresholdQueryRuleData, EQLRuleData]
2020-06-29 23:17:38 -06:00
2021-03-24 10:24:32 -06:00
@dataclass(frozen=True)
class TOMLRuleContents(MarshmallowDataclassMixin):
"""Rule object which maps directly to the TOML layout."""
metadata: RuleMeta
data: AnyRuleData = field(metadata=dict(data_key="rule"))
2020-06-29 23:17:38 -06:00
2021-03-24 10:24:32 -06:00
@property
def id(self) -> definitions.UUIDString:
return self.data.rule_id
2020-06-29 23:17:38 -06:00
2021-03-24 10:24:32 -06:00
@property
def name(self) -> str:
return self.data.name
2020-06-29 23:17:38 -06:00
2021-03-25 14:48:31 -06:00
def lock_info(self) -> dict:
return {"rule_name": self.name, "sha256": self.sha256(), "version": self.autobumped_version}
2021-03-24 10:24:32 -06:00
@property
def is_dirty(self) -> Optional[bool]:
"""Determine if the rule has changed since its version was locked."""
from .packaging import load_versions
2020-09-02 09:19:17 -08:00
2021-03-24 10:24:32 -06:00
rules_versions = load_versions()
2020-09-02 09:19:17 -08:00
2021-03-24 10:24:32 -06:00
if self.id in rules_versions:
version_info = rules_versions[self.id]
existing_sha256: str = version_info['sha256']
return existing_sha256 != self.sha256()
2021-01-11 08:58:18 -09:00
2021-03-24 10:24:32 -06:00
@property
def latest_version(self) -> Optional[int]:
"""Retrieve the latest known version of the rule."""
2021-01-11 08:58:18 -09:00
from .packaging import load_versions
rules_versions = load_versions()
2021-01-11 08:58:18 -09:00
if self.id in rules_versions:
version_info = rules_versions[self.id]
version = version_info['version']
2021-03-24 10:24:32 -06:00
return version
@property
def autobumped_version(self) -> Optional[int]:
"""Retrieve the current version of the rule, accounting for automatic increments."""
version = self.latest_version
if version is None:
2021-01-11 08:58:18 -09:00
return 1
2021-03-24 10:24:32 -06:00
return version + 1 if self.is_dirty else version
@validates_schema
def validate_query(self, value: dict, **kwargs):
"""Validate queries by calling into the validator for the relevant method."""
data: AnyRuleData = value["data"]
metadata: RuleMeta = value["metadata"]
beats_version = metadata.beats_version or beats.get_max_version()
ecs_versions = metadata.ecs_versions or [ecs.get_max_version()]
# call into these validate methods
if isinstance(data, (EQLRuleData, KQLRuleData)):
if metadata.query_schema_validation is False or metadata.maturity == "deprecated":
# Check the syntax only
_ = data.parsed_query
else:
# otherwise, do a full schema validation
data.validate_query(beats_version=beats_version, ecs_versions=ecs_versions)
def to_dict(self, strip_none_values=True) -> dict:
dict_obj = super(TOMLRuleContents, self).to_dict(strip_none_values=strip_none_values)
return nested_normalize(dict_obj)
def flattened_dict(self) -> dict:
flattened = dict()
flattened.update(self.data.to_dict())
flattened.update(self.metadata.to_dict())
return flattened
@staticmethod
def _post_dict_transform(obj: dict) -> dict:
"""Transform the converted API in place before sending to Kibana."""
# cleanup the whitespace in the rule
obj = nested_normalize(obj, eql_rule=obj.get("language") == "eql")
2021-01-11 08:58:18 -09:00
2021-03-24 10:24:32 -06:00
# fill in threat.technique so it's never missing
for threat_entry in obj.get("threat", []):
threat_entry.setdefault("technique", [])
2021-01-11 08:58:18 -09:00
2021-03-24 10:24:32 -06:00
return obj
def to_api_format(self, include_version=True) -> dict:
"""Convert the TOML rule to the API format."""
converted = self.data.to_dict()
2021-01-11 08:58:18 -09:00
if include_version:
2021-03-24 10:24:32 -06:00
converted["version"] = self.autobumped_version
converted = self._post_dict_transform(converted)
return converted
@cached
def sha256(self) -> str:
# get the hash of the API dict with the version not included, otherwise it'll always be dirty.
hashable_contents = self.to_api_format(include_version=False)
return utils.dict_hash(hashable_contents)
2021-01-11 08:58:18 -09:00
2021-03-24 10:24:32 -06:00
@dataclass
class TOMLRule:
contents: TOMLRuleContents = field(hash=True)
path: Path
gh_pr: Any = field(hash=False, compare=False, default=None, repr=None)
@property
def id(self):
return self.contents.id
@property
def name(self):
return self.contents.data.name
2021-01-11 08:58:18 -09:00
2021-03-24 10:24:32 -06:00
def save_toml(self):
converted = self.contents.to_dict()
toml_write(converted, str(self.path.absolute()))
2021-01-11 08:58:18 -09:00
2021-03-24 10:24:32 -06:00
def save_json(self, path: Path, include_version: bool = True):
with open(str(path.absolute()), 'w', newline='\n') as f:
json.dump(self.contents.to_api_format(include_version=include_version), f, sort_keys=True, indent=2)
f.write('\n')
2020-06-29 23:17:38 -06:00
2021-02-08 20:43:16 -09:00
2021-03-24 10:24:32 -06:00
def downgrade_contents_from_rule(rule: TOMLRule, target_version: str) -> dict:
2021-02-08 20:43:16 -09:00
"""Generate the downgraded contents from a rule."""
2021-03-24 10:24:32 -06:00
payload = rule.contents.to_api_format()
2021-02-08 20:43:16 -09:00
meta = payload.setdefault("meta", {})
2021-03-24 10:24:32 -06:00
meta["original"] = dict(id=rule.id, **rule.contents.metadata.to_dict())
2021-02-08 20:43:16 -09:00
payload["rule_id"] = str(uuid4())
payload = downgrade(payload, target_version)
return payload