Add rule loader and dependencies
Co-Authored-By: Justin Ibarra <brokensound77@users.noreply.github.com>
This commit is contained in:
@@ -0,0 +1,357 @@
|
||||
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
# or more contributor license agreements. Licensed under the Elastic License;
|
||||
# you may not use this file except in compliance with the Elastic License.
|
||||
"""Rule object."""
|
||||
import base64
|
||||
import copy
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
|
||||
import click
|
||||
import kql
|
||||
|
||||
from . import ecs, beats
|
||||
from .attack import TACTICS, build_threat_map_entry, technique_lookup
|
||||
from .rule_formatter import nested_normalize, toml_write
|
||||
from .schema import metadata_schema, schema_validate, get_schema
|
||||
from .utils import get_path, clear_caches, cached
|
||||
|
||||
|
||||
RULES_DIR = get_path("rules")
|
||||
RULE_TYPE_OPTIONS = ['machine_learning', 'query', 'saved_id']
|
||||
_META_SCHEMA_REQ_DEFAULTS = {}
|
||||
|
||||
|
||||
class Rule(object):
|
||||
"""Rule class containing all the information about a rule."""
|
||||
|
||||
def __init__(self, path, contents, tune=False):
|
||||
"""Create a Rule from a toml management format."""
|
||||
self.path = os.path.realpath(path)
|
||||
self.contents = contents.get('rule', contents)
|
||||
self.metadata = self.set_metadata(contents.get('metadata', contents))
|
||||
|
||||
self.formatted_rule = copy.deepcopy(self.contents).get('query', None)
|
||||
|
||||
self.validate()
|
||||
self.unoptimized_query = self.contents.get('query')
|
||||
|
||||
if tune:
|
||||
self.tune_rule = True
|
||||
self.tune()
|
||||
|
||||
self._original_hash = self.get_hash()
|
||||
|
||||
def __str__(self):
|
||||
return 'name={}, path={}, query={}'.format(self.name, self.path, self.query)
|
||||
|
||||
def __repr__(self):
|
||||
return '{}(path={}, contents={}, tune={})'.format(type(self).__name__, repr(self.path), repr(self.contents),
|
||||
repr(self.tune_rule))
|
||||
|
||||
def __eq__(self, other):
|
||||
if type(self) == type(other):
|
||||
return self.get_hash() == other.get_hash()
|
||||
return False
|
||||
|
||||
def copy(self):
|
||||
return Rule(path=self.path, contents={'rule': self.contents.copy(), 'metadata': self.metadata.copy()})
|
||||
|
||||
@property
|
||||
def id(self):
|
||||
return self.contents.get("rule_id")
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return self.contents.get("name")
|
||||
|
||||
@property
|
||||
def query(self):
|
||||
return self.contents.get('query')
|
||||
|
||||
@property
|
||||
def parsed_kql(self):
|
||||
if self.query and self.contents['language'] == 'kuery':
|
||||
return kql.parse(self.query)
|
||||
|
||||
@property
|
||||
def filters(self):
|
||||
return self.contents.get('filters')
|
||||
|
||||
@property
|
||||
def ecs_version(self):
|
||||
return sorted(self.metadata.get('ecs_version', []))
|
||||
|
||||
@property
|
||||
def flattened_contents(self):
|
||||
return dict(self.contents, **self.metadata)
|
||||
|
||||
@property
|
||||
def type(self):
|
||||
return self.contents.get('type')
|
||||
|
||||
def to_eql(self):
|
||||
if self.query and self.contents['language'] == 'kuery':
|
||||
return kql.to_eql(self.query)
|
||||
|
||||
@staticmethod
|
||||
@cached
|
||||
def get_meta_schema_required_defaults():
|
||||
"""Get the default values for required properties in the metadata schema."""
|
||||
required = [v for v in metadata_schema['required']]
|
||||
properties = {k: v for k, v in metadata_schema['properties'].items() if k in required}
|
||||
return {k: v.get('default') or [v['items']['default']] for k, v in properties.items()}
|
||||
|
||||
def set_metadata(self, contents):
|
||||
"""Parse metadata fields and set missing required fields to the default values."""
|
||||
metadata = {k: v for k, v in contents.items() if k in metadata_schema['properties']}
|
||||
defaults = self.get_meta_schema_required_defaults().copy()
|
||||
defaults.update(metadata)
|
||||
return defaults
|
||||
|
||||
def rule_format(self, formatted_query=True):
|
||||
"""Get the contents in rule format."""
|
||||
contents = self.contents.copy()
|
||||
if formatted_query:
|
||||
if self.formatted_rule:
|
||||
contents['query'] = self.formatted_rule
|
||||
return {'metadata': self.metadata, 'rule': contents}
|
||||
|
||||
def normalize(self, indent=2):
|
||||
"""Normalize the (api only) contents and return a serialized dump of it."""
|
||||
return json.dumps(nested_normalize(self.contents), sort_keys=True, indent=indent)
|
||||
|
||||
def tune(self):
|
||||
"""Tune query by including applicable fields derived from metadata."""
|
||||
# if not self.query:
|
||||
# return
|
||||
#
|
||||
# self.unoptimized_query = self.contents.get('query')
|
||||
#
|
||||
# if not hasattr(self.parsed_query, 'terms'):
|
||||
# # can prepend here if we want
|
||||
# return
|
||||
#
|
||||
# # TODO: This is error prone and absolutely can/should be better done with a custom walker to:
|
||||
# # - find these fields
|
||||
# # - move them to the front/highest precedence
|
||||
# # - dedup+update them with these values from metadata
|
||||
# # I am going to leave it for now as a good mechanism for testing the theory and since it only impacts at
|
||||
# # "package" time and will open an issue in the meantime
|
||||
#
|
||||
# # add os version
|
||||
# # many os ecs fields - will optimize later
|
||||
# # if not any(str(term.left) == '' for term in parsed_query.terms) and self.metadata.get('os_type_list'):
|
||||
# # self.contents['query'] = ':({}) and '.format(' or '.join(self.metadata['_os_type_list'])) + self.query
|
||||
#
|
||||
# # add ecs version
|
||||
# # handle these better with eql2kql
|
||||
# compares = [str(term.left) == 'ecs.version' for term in self.parsed_query.terms
|
||||
# if isinstance(term, Comparison)]
|
||||
# in_sets = [str(term.expression) == 'ecs.version' for term in self.parsed_query.terms
|
||||
# if isinstance(term, InSet)]
|
||||
#
|
||||
# if any(in_sets):
|
||||
# pass
|
||||
# elif any(compares):
|
||||
# pass
|
||||
# elif not (any(compares) or any(in_sets)):
|
||||
# ecs_query = ' or '.join(self.metadata['ecs_version'])
|
||||
# self.contents['query'] = 'ecs.version:({}) and '.format(ecs_query) + self.query
|
||||
|
||||
def untune(self):
|
||||
"""Restore query to pre-tuned state."""
|
||||
# self.contents['query'] = self.unoptimized_query
|
||||
|
||||
def get_path(self):
|
||||
"""Wrapper around getting path."""
|
||||
if not self.path:
|
||||
raise ValueError('path not set for rule: \n\t{}'.format(self))
|
||||
|
||||
return self.path
|
||||
|
||||
def needs_save(self):
|
||||
"""Determines if the rule was changed from original or was never saved."""
|
||||
return self._original_hash != self.get_hash()
|
||||
|
||||
@classmethod # TODO
|
||||
def from_eql_rule(cls, path, contents, validate=False):
|
||||
"""Create a rule from loaded rule (toml) contents."""
|
||||
# if validate:
|
||||
# jsonschema.validate(contents, rule_schema)
|
||||
|
||||
return cls(path, contents)
|
||||
|
||||
def bump_version(self):
|
||||
"""Bump the version of the rule."""
|
||||
self.contents['version'] += 1
|
||||
|
||||
def validate(self, as_rule=False, versioned=False):
|
||||
"""Validate against a rule schema, query schema, and linting."""
|
||||
self.normalize()
|
||||
|
||||
if as_rule:
|
||||
schema_validate(self.rule_format(), as_rule=True)
|
||||
else:
|
||||
schema_validate(self.contents, versioned=versioned)
|
||||
|
||||
if self.query and self.contents['language'] == 'kuery':
|
||||
# validate against all specified schemas or the latest if none specified
|
||||
ecs_versions = self.metadata.get('ecs_version')
|
||||
|
||||
indexes = self.contents.get("index", [])
|
||||
beat_types = [index.split("-")[0] for index in indexes if "beat-*" in index]
|
||||
beat_schema = beats.get_schema_for_query(self.parsed_kql, beat_types) if beat_types else None
|
||||
|
||||
if not ecs_versions:
|
||||
kql.parse(self.query, schema=ecs.get_kql_schema(indexes=indexes, beat_schema=beat_schema))
|
||||
else:
|
||||
for version in ecs_versions:
|
||||
try:
|
||||
schema = ecs.get_kql_schema(version=version, indexes=indexes, beat_schema=beat_schema)
|
||||
except KeyError:
|
||||
raise KeyError(
|
||||
'Unknown ecs schema version: {} in rule {}.\n'
|
||||
'Do you need to update schemas?'.format(version, self.name))
|
||||
|
||||
try:
|
||||
kql.parse(self.query, schema=schema)
|
||||
except kql.KqlParseError as exc:
|
||||
message = exc.error_msg
|
||||
trailer = None
|
||||
if "Unknown field" in message and beat_types:
|
||||
trailer = "\nTry adding event.module and event.dataset to specify beats module"
|
||||
|
||||
raise kql.KqlParseError(exc.error_msg, exc.line, exc.column, exc.source,
|
||||
len(exc.caret.lstrip()), trailer=trailer)
|
||||
|
||||
def save(self, new_path=None, as_rule=False, verbose=False):
|
||||
"""Save as pretty toml rule file as toml."""
|
||||
path, _ = os.path.splitext(new_path or self.get_path())
|
||||
path += '.toml' if as_rule else '.json'
|
||||
|
||||
if as_rule:
|
||||
toml_write(self.rule_format(), path)
|
||||
else:
|
||||
with open(path, 'w', newline='\n') as f:
|
||||
json.dump(self.contents, f, sort_keys=True, indent=2)
|
||||
f.write('\n')
|
||||
|
||||
if verbose:
|
||||
print('Rule {} saved to {}'.format(self.name, path))
|
||||
|
||||
def get_hash(self):
|
||||
"""Get a standardized hash of a rule to consistently check for changes."""
|
||||
contents = base64.b64encode(json.dumps(self.contents, sort_keys=True).encode('utf-8'))
|
||||
return hashlib.sha256(contents).hexdigest()
|
||||
|
||||
@classmethod
|
||||
def build(cls, path=None, rule_type=None, required_only=True, save=True, **kwargs):
|
||||
"""Build a rule from data and prompts."""
|
||||
from .misc import schema_prompt
|
||||
# from .rule_loader import rta_mappings
|
||||
|
||||
kwargs = copy.deepcopy(kwargs)
|
||||
|
||||
while rule_type not in RULE_TYPE_OPTIONS:
|
||||
rule_type = click.prompt('Rule type ({})'.format(', '.join(RULE_TYPE_OPTIONS)))
|
||||
|
||||
schema = get_schema(rule_type)
|
||||
props = schema['properties']
|
||||
opt_reqs = schema.get('required', [])
|
||||
contents = {}
|
||||
skipped = []
|
||||
|
||||
for name, options in props.items():
|
||||
|
||||
if name == 'type':
|
||||
contents[name] = rule_type
|
||||
continue
|
||||
|
||||
# these are set at package release time
|
||||
if name == 'version':
|
||||
continue
|
||||
|
||||
if required_only and name not in opt_reqs:
|
||||
continue
|
||||
|
||||
# build this from technique ID
|
||||
if name == 'threat':
|
||||
threat_map = []
|
||||
|
||||
while click.confirm('add mitre tactic?'):
|
||||
tactic = schema_prompt('mitre tactic name', type='string', enum=TACTICS, required=True)
|
||||
technique_ids = schema_prompt(f'technique IDs for {tactic}', type='array', required=True,
|
||||
enum=list(technique_lookup))
|
||||
threat_map.append(build_threat_map_entry(tactic, *technique_ids))
|
||||
|
||||
if len(threat_map) > 0:
|
||||
contents[name] = threat_map
|
||||
continue
|
||||
|
||||
if kwargs.get(name):
|
||||
contents[name] = schema_prompt(kwargs.pop(name))
|
||||
continue
|
||||
|
||||
result = schema_prompt(name, required=name in opt_reqs, **options)
|
||||
|
||||
if result:
|
||||
if name not in opt_reqs and result == options.get('default', ''):
|
||||
skipped.append(name)
|
||||
continue
|
||||
|
||||
contents[name] = result
|
||||
|
||||
metadata = {}
|
||||
ecs_version = schema_prompt('ecs_version', required=False, value=None,
|
||||
**metadata_schema['properties']['ecs_version'])
|
||||
if ecs_version:
|
||||
metadata['ecs_version'] = ecs_version
|
||||
|
||||
# validate before creating
|
||||
schema_validate(contents)
|
||||
|
||||
suggested_path = os.path.join(RULES_DIR, contents['name']) # TODO: UPDATE BASED ON RULE STRUCTURE
|
||||
path = os.path.realpath(path or input('File path for rule [{}]: '.format(suggested_path)) or suggested_path)
|
||||
|
||||
rule = None
|
||||
|
||||
try:
|
||||
rule = cls(path, {'rule': contents, 'metadata': metadata})
|
||||
except kql.KqlParseError as e:
|
||||
if e.error_msg == 'Unknown field':
|
||||
warning = ('If using a non-ECS field, you must update "ecs{}.non-ecs-schema.json" under `beats` or '
|
||||
'`legacy-endgame` (Non-ECS fields should be used minimally).'.format(os.path.sep))
|
||||
click.secho(e.args[0], fg='red', err=True)
|
||||
click.secho(warning, fg='yellow', err=True)
|
||||
click.pause()
|
||||
|
||||
# if failing due to a query, loop until resolved or terminated
|
||||
while True:
|
||||
try:
|
||||
contents['query'] = click.edit(contents['query'], extension='.eql')
|
||||
rule = cls(path, {'rule': contents, 'metadata': metadata})
|
||||
except kql.KqlParseError as e:
|
||||
click.secho(e.args[0], fg='red', err=True)
|
||||
click.pause()
|
||||
|
||||
if e.error_msg.startswith("Unknown field"):
|
||||
# get the latest schema for schema errors
|
||||
clear_caches()
|
||||
ecs.get_kql_schema(indexes=contents.get("index", []))
|
||||
continue
|
||||
|
||||
break
|
||||
|
||||
if save:
|
||||
rule.save(verbose=True, as_rule=True)
|
||||
|
||||
if skipped:
|
||||
print('Did not set the following values because they are un-required when set to the default value')
|
||||
print(' - {}'.format('\n - '.join(skipped)))
|
||||
|
||||
# rta_mappings.add_rule_to_mapping_file(rule)
|
||||
click.echo('Placeholder added to rule-mapping.yml')
|
||||
|
||||
return rule
|
||||
Reference in New Issue
Block a user