537 lines
20 KiB
Python
537 lines
20 KiB
Python
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
|
# or more contributor license agreements. Licensed under the Elastic License
|
|
# 2.0; you may not use this file except in compliance with the Elastic License
|
|
# 2.0.
|
|
"""Rule object."""
|
|
import base64
|
|
import copy
|
|
import hashlib
|
|
import json
|
|
import os
|
|
from pathlib import Path
|
|
from uuid import uuid4
|
|
|
|
import click
|
|
import kql
|
|
import eql
|
|
|
|
from . import ecs, beats
|
|
from .attack import tactics, build_threat_map_entry, matrix
|
|
from .rule_formatter import nested_normalize, toml_write
|
|
from .schemas import CurrentSchema, TomlMetadata, downgrade
|
|
from .utils import get_path, clear_caches, cached
|
|
|
|
|
|
RULES_DIR = get_path("rules")
|
|
_META_SCHEMA_REQ_DEFAULTS = {}
|
|
|
|
|
|
class Rule(object):
|
|
"""Rule class containing all the information about a rule."""
|
|
|
|
def __init__(self, path, contents):
|
|
"""Create a Rule from a toml management format."""
|
|
self.path = os.path.abspath(path)
|
|
self.contents = contents.get('rule', contents)
|
|
self.metadata = contents.get('metadata', self.set_metadata(contents))
|
|
|
|
self.formatted_rule = copy.deepcopy(self.contents).get('query', None)
|
|
|
|
self.validate()
|
|
self.unoptimized_query = self.contents.get('query')
|
|
self._original_hash = self.get_hash()
|
|
|
|
def __str__(self):
|
|
return 'name={}, path={}, query={}'.format(self.name, self.path, self.query)
|
|
|
|
def __repr__(self):
|
|
return '{}(path={}, contents={})'.format(type(self).__name__, repr(self.path), repr(self.contents))
|
|
|
|
def __eq__(self, other):
|
|
if type(self) == type(other):
|
|
return self.get_hash() == other.get_hash()
|
|
return False
|
|
|
|
def __ne__(self, other):
|
|
return not (self == other)
|
|
|
|
def __hash__(self):
|
|
return hash(self.get_hash())
|
|
|
|
def copy(self):
|
|
return Rule(path=self.path, contents={'rule': self.contents.copy(), 'metadata': self.metadata.copy()})
|
|
|
|
@property
|
|
def id(self):
|
|
return self.contents.get("rule_id")
|
|
|
|
@property
|
|
def name(self):
|
|
return self.contents.get("name")
|
|
|
|
@property
|
|
def query(self):
|
|
return self.contents.get('query')
|
|
|
|
@property
|
|
def parsed_query(self):
|
|
if self.query:
|
|
if self.contents['language'] == 'kuery':
|
|
return kql.parse(self.query)
|
|
elif self.contents['language'] == 'eql':
|
|
# TODO: remove once py-eql supports ipv6 for cidrmatch
|
|
with eql.parser.elasticsearch_syntax, eql.parser.ignore_missing_functions:
|
|
return eql.parse_query(self.query)
|
|
|
|
@property
|
|
def filters(self):
|
|
return self.contents.get('filters')
|
|
|
|
@property
|
|
def ecs_version(self):
|
|
return sorted(self.metadata.get('ecs_version', []))
|
|
|
|
@property
|
|
def flattened_contents(self):
|
|
return dict(self.contents, **self.metadata)
|
|
|
|
@property
|
|
def type(self):
|
|
return self.contents.get('type')
|
|
|
|
@property
|
|
def unique_fields(self):
|
|
parsed = self.parsed_query
|
|
if parsed is not None:
|
|
return list(set(str(f) for f in parsed if isinstance(f, (eql.ast.Field, kql.ast.Field))))
|
|
|
|
def to_eql(self):
|
|
if self.query and self.contents['language'] == 'kuery':
|
|
return kql.to_eql(self.query)
|
|
|
|
def get_flat_mitre(self):
|
|
"""Get flat lists of tactic and technique info."""
|
|
tactic_names = []
|
|
tactic_ids = []
|
|
technique_ids = set()
|
|
technique_names = set()
|
|
sub_technique_ids = set()
|
|
sub_technique_names = set()
|
|
|
|
for entry in self.contents.get('threat', []):
|
|
tactic_names.append(entry['tactic']['name'])
|
|
tactic_ids.append(entry['tactic']['id'])
|
|
|
|
for technique in entry.get('technique', []):
|
|
technique_names.add(technique['name'])
|
|
technique_ids.add(technique['id'])
|
|
sub_technique = technique.get('subtechnique', [])
|
|
|
|
sub_technique_ids.update(st['id'] for st in sub_technique)
|
|
sub_technique_names.update(st['name'] for st in sub_technique)
|
|
|
|
flat = {
|
|
'tactic_names': sorted(tactic_names),
|
|
'tactic_ids': sorted(tactic_ids),
|
|
'technique_names': sorted(technique_names),
|
|
'technique_ids': sorted(technique_ids),
|
|
'sub_technique_names': sorted(sub_technique_names),
|
|
'sub_technique_ids': sorted(sub_technique_ids)
|
|
}
|
|
return flat
|
|
|
|
@classmethod
|
|
def get_unique_query_fields(cls, rule_contents):
|
|
"""Get a list of unique fields used in a rule query from rule contents."""
|
|
query = rule_contents.get('query')
|
|
language = rule_contents.get('language')
|
|
if language in ('kuery', 'eql'):
|
|
# TODO: remove once py-eql supports ipv6 for cidrmatch
|
|
with eql.parser.elasticsearch_syntax, eql.parser.ignore_missing_functions:
|
|
parsed = kql.parse(query) if language == 'kuery' else eql.parse_query(query)
|
|
|
|
return sorted(set(str(f) for f in parsed if isinstance(f, (eql.ast.Field, kql.ast.Field))))
|
|
|
|
@staticmethod
|
|
@cached
|
|
def get_meta_schema_required_defaults():
|
|
"""Get the default values for required properties in the metadata schema."""
|
|
required = [v for v in TomlMetadata.get_schema()['required']]
|
|
properties = {k: v for k, v in TomlMetadata.get_schema()['properties'].items() if k in required}
|
|
return {k: v.get('default') or [v['items']['default']] for k, v in properties.items()}
|
|
|
|
def set_metadata(self, contents):
|
|
"""Parse metadata fields and set missing required fields to the default values."""
|
|
metadata = {k: v for k, v in contents.items() if k in TomlMetadata.get_schema()['properties']}
|
|
defaults = self.get_meta_schema_required_defaults().copy()
|
|
defaults.update(metadata)
|
|
return defaults
|
|
|
|
@staticmethod
|
|
def _add_empty_attack_technique(contents: dict = None):
|
|
"""Add empty array to ATT&CK technique threat mapping."""
|
|
threat = contents.get('threat', [])
|
|
|
|
if threat:
|
|
new_threat = []
|
|
|
|
for entry in contents.get('threat', []):
|
|
if 'technique' not in entry:
|
|
new_entry = entry.copy()
|
|
new_entry['technique'] = []
|
|
new_threat.append(new_entry)
|
|
else:
|
|
new_threat.append(entry)
|
|
|
|
contents['threat'] = new_threat
|
|
|
|
return contents
|
|
|
|
def _run_build_time_transforms(self, contents):
|
|
"""Apply changes to rules at build time for rule payload."""
|
|
self._add_empty_attack_technique(contents)
|
|
return contents
|
|
|
|
def rule_format(self, formatted_query=True):
|
|
"""Get the contents and metadata in rule format."""
|
|
contents = self.contents.copy()
|
|
if formatted_query:
|
|
if self.formatted_rule:
|
|
contents['query'] = self.formatted_rule
|
|
return {'metadata': self.metadata, 'rule': contents}
|
|
|
|
def detailed_format(self, add_missing_defaults=True, **additional_details):
|
|
"""Get the rule with expanded details."""
|
|
from .rule_loader import get_non_required_defaults_by_type
|
|
|
|
rule = self.rule_format().copy()
|
|
|
|
if add_missing_defaults:
|
|
non_required_defaults = get_non_required_defaults_by_type(self.type)
|
|
rule['rule'].update({k: v for k, v in non_required_defaults.items() if k not in rule['rule']})
|
|
|
|
rule['details'] = {
|
|
'flat_mitre': self.get_flat_mitre(),
|
|
'relative_path': str(Path(self.path).resolve().relative_to(RULES_DIR)),
|
|
'unique_fields': self.unique_fields,
|
|
|
|
}
|
|
rule['details'].update(**additional_details)
|
|
return rule
|
|
|
|
def normalize(self, indent=2):
|
|
"""Normalize the (api only) contents and return a serialized dump of it."""
|
|
return json.dumps(nested_normalize(self.contents, eql_rule=self.type == 'eql'), sort_keys=True, indent=indent)
|
|
|
|
def get_path(self):
|
|
"""Wrapper around getting path."""
|
|
if not self.path:
|
|
raise ValueError('path not set for rule: \n\t{}'.format(self))
|
|
|
|
return self.path
|
|
|
|
def needs_save(self):
|
|
"""Determines if the rule was changed from original or was never saved."""
|
|
return self._original_hash != self.get_hash()
|
|
|
|
def bump_version(self):
|
|
"""Bump the version of the rule."""
|
|
self.contents['version'] += 1
|
|
|
|
def validate(self, as_rule=False, versioned=False, query=True):
|
|
"""Validate against a rule schema, query schema, and linting."""
|
|
self.normalize()
|
|
|
|
if as_rule:
|
|
schema_cls = CurrentSchema.toml_schema()
|
|
contents = self.rule_format()
|
|
elif versioned:
|
|
schema_cls = CurrentSchema.versioned()
|
|
contents = self.contents
|
|
else:
|
|
schema_cls = CurrentSchema
|
|
contents = self.contents
|
|
|
|
schema_cls.validate(contents, role=self.type)
|
|
|
|
skip_query_validation = self.metadata['maturity'] in ('experimental', 'development') and \
|
|
self.metadata.get('query_schema_validation') is False
|
|
|
|
if query and self.query is not None and not skip_query_validation:
|
|
ecs_versions = self.metadata.get('ecs_version', [ecs.get_max_version()])
|
|
beats_version = self.metadata.get('beats_version', beats.get_max_version())
|
|
indexes = self.contents.get("index", [])
|
|
|
|
if self.contents['language'] == 'kuery':
|
|
self._validate_kql(ecs_versions, beats_version, indexes, self.query, self.name)
|
|
|
|
if self.contents['language'] == 'eql':
|
|
self._validate_eql(ecs_versions, beats_version, indexes, self.query, self.name)
|
|
|
|
@staticmethod
|
|
@cached
|
|
def _validate_eql(ecs_versions, beats_version, indexes, query, name):
|
|
# validate against all specified schemas or the latest if none specified
|
|
# TODO: remove once py-eql supports ipv6 for cidrmatch
|
|
with eql.parser.elasticsearch_syntax, eql.parser.ignore_missing_functions:
|
|
parsed = eql.parse_query(query)
|
|
|
|
beat_types = [index.split("-")[0] for index in indexes if "beat-*" in index]
|
|
beat_schema = beats.get_schema_from_eql(parsed, beat_types, version=beats_version) if beat_types else None
|
|
|
|
ecs_versions = ecs_versions or [ecs_versions]
|
|
schemas = []
|
|
|
|
for version in ecs_versions:
|
|
try:
|
|
schemas.append(ecs.get_kql_schema(indexes=indexes, beat_schema=beat_schema, version=version))
|
|
except KeyError:
|
|
raise KeyError('Unknown ecs schema version: {} in rule {}.\n'
|
|
'Do you need to update schemas?'.format(version, name)) from None
|
|
|
|
for schema in schemas:
|
|
try:
|
|
# TODO: remove once py-eql supports ipv6 for cidrmatch
|
|
with ecs.KqlSchema2Eql(schema), eql.parser.elasticsearch_syntax, eql.parser.ignore_missing_functions:
|
|
eql.parse_query(query)
|
|
|
|
except eql.EqlTypeMismatchError:
|
|
raise
|
|
|
|
except eql.EqlParseError as exc:
|
|
message = exc.error_msg
|
|
trailer = None
|
|
if "Unknown field" in message and beat_types:
|
|
trailer = "\nTry adding event.module or event.dataset to specify beats module"
|
|
|
|
raise type(exc)(exc.error_msg, exc.line, exc.column, exc.source,
|
|
len(exc.caret.lstrip()), trailer=trailer) from None
|
|
|
|
@staticmethod
|
|
@cached
|
|
def _validate_kql(ecs_versions, beats_version, indexes, query, name):
|
|
# validate against all specified schemas or the latest if none specified
|
|
parsed = kql.parse(query)
|
|
beat_types = [index.split("-")[0] for index in indexes if "beat-*" in index]
|
|
beat_schema = beats.get_schema_from_kql(parsed, beat_types, version=beats_version) if beat_types else None
|
|
|
|
if not ecs_versions:
|
|
kql.parse(query, schema=ecs.get_kql_schema(indexes=indexes, beat_schema=beat_schema))
|
|
else:
|
|
for version in ecs_versions:
|
|
try:
|
|
schema = ecs.get_kql_schema(version=version, indexes=indexes, beat_schema=beat_schema)
|
|
except KeyError:
|
|
raise KeyError(
|
|
'Unknown ecs schema version: {} in rule {}.\n'
|
|
'Do you need to update schemas?'.format(version, name))
|
|
|
|
try:
|
|
kql.parse(query, schema=schema)
|
|
except kql.KqlParseError as exc:
|
|
message = exc.error_msg
|
|
trailer = None
|
|
if "Unknown field" in message and beat_types:
|
|
trailer = "\nTry adding event.module or event.dataset to specify beats module"
|
|
|
|
raise kql.KqlParseError(exc.error_msg, exc.line, exc.column, exc.source,
|
|
len(exc.caret.lstrip()), trailer=trailer)
|
|
|
|
def save(self, new_path=None, as_rule=False, verbose=False):
|
|
"""Save as pretty toml rule file as toml."""
|
|
path, _ = os.path.splitext(new_path or self.get_path())
|
|
path += '.toml' if as_rule else '.json'
|
|
|
|
if as_rule:
|
|
toml_write(self.rule_format(), path)
|
|
else:
|
|
with open(path, 'w', newline='\n') as f:
|
|
json.dump(self.get_payload(), f, sort_keys=True, indent=2)
|
|
f.write('\n')
|
|
|
|
if verbose:
|
|
print('Rule {} saved to {}'.format(self.name, path))
|
|
|
|
@classmethod
|
|
def dict_hash(cls, contents, versioned=True):
|
|
"""Get hash from rule contents."""
|
|
if not versioned:
|
|
contents.pop('version', None)
|
|
|
|
contents = base64.b64encode(json.dumps(contents, sort_keys=True).encode('utf-8'))
|
|
return hashlib.sha256(contents).hexdigest()
|
|
|
|
def get_hash(self):
|
|
"""Get a standardized hash of a rule to consistently check for changes."""
|
|
return self.dict_hash(self.get_payload())
|
|
|
|
def get_version(self):
|
|
"""Get the version of the rule."""
|
|
from .packaging import load_versions
|
|
|
|
rules_versions = load_versions()
|
|
|
|
if self.id in rules_versions:
|
|
version_info = rules_versions[self.id]
|
|
version = version_info['version']
|
|
return version + 1 if self.get_hash() != version_info['sha256'] else version
|
|
else:
|
|
return 1
|
|
|
|
def get_payload(self, include_version=False, replace_id=False, embed_metadata=False, target_version=None):
|
|
"""Get rule as uploadable/API-compatible payload."""
|
|
from uuid import uuid4
|
|
from .schemas import downgrade
|
|
|
|
payload = self._run_build_time_transforms(self.contents.copy())
|
|
|
|
if include_version:
|
|
payload['version'] = self.get_version()
|
|
|
|
if embed_metadata:
|
|
meta = payload.setdefault("meta", {})
|
|
meta["original"] = dict(id=self.id, **self.metadata)
|
|
|
|
if replace_id:
|
|
payload["rule_id"] = str(uuid4())
|
|
|
|
if target_version:
|
|
payload = downgrade(payload, target_version)
|
|
|
|
return payload
|
|
|
|
@classmethod
|
|
def build(cls, path=None, rule_type=None, required_only=True, save=True, verbose=False, **kwargs):
|
|
"""Build a rule from data and prompts."""
|
|
from .misc import schema_prompt
|
|
|
|
if verbose and path:
|
|
click.echo(f'[+] Building rule for {path}')
|
|
|
|
kwargs = copy.deepcopy(kwargs)
|
|
|
|
if 'rule' in kwargs and 'metadata' in kwargs:
|
|
kwargs.update(kwargs.pop('metadata'))
|
|
kwargs.update(kwargs.pop('rule'))
|
|
|
|
rule_type = rule_type or kwargs.get('type') or \
|
|
click.prompt('Rule type ({})'.format(', '.join(CurrentSchema.RULE_TYPES)),
|
|
type=click.Choice(CurrentSchema.RULE_TYPES))
|
|
|
|
schema = CurrentSchema.get_schema(role=rule_type)
|
|
props = schema['properties']
|
|
opt_reqs = schema.get('required', [])
|
|
contents = {}
|
|
skipped = []
|
|
|
|
for name, options in props.items():
|
|
|
|
if name == 'type':
|
|
contents[name] = rule_type
|
|
continue
|
|
|
|
# these are set at package release time
|
|
if name == 'version':
|
|
continue
|
|
|
|
if required_only and name not in opt_reqs:
|
|
continue
|
|
|
|
# build this from technique ID
|
|
if name == 'threat':
|
|
threat_map = []
|
|
|
|
while click.confirm('add mitre tactic?'):
|
|
tactic = schema_prompt('mitre tactic name', type='string', enum=tactics, required=True)
|
|
technique_ids = schema_prompt(f'technique or sub-technique IDs for {tactic}', type='array',
|
|
required=False, enum=list(matrix[tactic])) or []
|
|
|
|
try:
|
|
threat_map.append(build_threat_map_entry(tactic, *technique_ids))
|
|
except KeyError as e:
|
|
click.secho(f'Unknown ID: {e.args[0]} - entry not saved for: {tactic}', fg='red', err=True)
|
|
continue
|
|
except ValueError as e:
|
|
click.secho(f'{e} - entry not saved for: {tactic}', fg='red', err=True)
|
|
continue
|
|
|
|
if len(threat_map) > 0:
|
|
contents[name] = threat_map
|
|
continue
|
|
|
|
if name == 'threshold':
|
|
contents[name] = {n: schema_prompt(f'threshold {n}', required=n in options['required'], **opts.copy())
|
|
for n, opts in options['properties'].items()}
|
|
continue
|
|
|
|
if kwargs.get(name):
|
|
contents[name] = schema_prompt(name, value=kwargs.pop(name))
|
|
continue
|
|
|
|
result = schema_prompt(name, required=name in opt_reqs, **options.copy())
|
|
|
|
if result:
|
|
if name not in opt_reqs and result == options.get('default', ''):
|
|
skipped.append(name)
|
|
continue
|
|
|
|
contents[name] = result
|
|
|
|
suggested_path = os.path.join(RULES_DIR, contents['name']) # TODO: UPDATE BASED ON RULE STRUCTURE
|
|
path = os.path.realpath(path or input('File path for rule [{}]: '.format(suggested_path)) or suggested_path)
|
|
|
|
rule = None
|
|
|
|
try:
|
|
rule = cls(path, {'rule': contents})
|
|
except kql.KqlParseError as e:
|
|
if e.error_msg == 'Unknown field':
|
|
warning = ('If using a non-ECS field, you must update "ecs{}.non-ecs-schema.json" under `beats` or '
|
|
'`legacy-endgame` (Non-ECS fields should be used minimally).'.format(os.path.sep))
|
|
click.secho(e.args[0], fg='red', err=True)
|
|
click.secho(warning, fg='yellow', err=True)
|
|
click.pause()
|
|
|
|
# if failing due to a query, loop until resolved or terminated
|
|
while True:
|
|
try:
|
|
contents['query'] = click.edit(contents['query'], extension='.eql')
|
|
rule = cls(path, {'rule': contents})
|
|
except kql.KqlParseError as e:
|
|
click.secho(e.args[0], fg='red', err=True)
|
|
click.pause()
|
|
|
|
if e.error_msg.startswith("Unknown field"):
|
|
# get the latest schema for schema errors
|
|
clear_caches()
|
|
ecs.get_kql_schema(indexes=contents.get("index", []))
|
|
continue
|
|
|
|
break
|
|
|
|
if save:
|
|
rule.save(verbose=True, as_rule=True)
|
|
|
|
if skipped:
|
|
print('Did not set the following values because they are un-required when set to the default value')
|
|
print(' - {}'.format('\n - '.join(skipped)))
|
|
|
|
# rta_mappings.add_rule_to_mapping_file(rule)
|
|
# click.echo('Placeholder added to rule-mapping.yml')
|
|
|
|
click.echo('Rule will validate against the latest ECS schema available (and beats if necessary)')
|
|
click.echo(' - to have a rule validate against specific ECS schemas, add them to metadata->ecs_versions')
|
|
click.echo(' - to have a rule validate against a specific beats schema, add it to metadata->beats_version')
|
|
|
|
return rule
|
|
|
|
|
|
def downgrade_contents_from_rule(rule: Rule, target_version: str) -> dict:
|
|
"""Generate the downgraded contents from a rule."""
|
|
payload = rule.contents.copy()
|
|
meta = payload.setdefault("meta", {})
|
|
meta["original"] = dict(id=rule.id, **rule.metadata)
|
|
payload["rule_id"] = str(uuid4())
|
|
payload = downgrade(payload, target_version)
|
|
return payload
|