diff --git a/detection_rules/devtools.py b/detection_rules/devtools.py index 22281acbb..70971955e 100644 --- a/detection_rules/devtools.py +++ b/detection_rules/devtools.py @@ -28,7 +28,8 @@ from .eswrap import CollectEvents, add_range_to_dsl from .ghwrap import GithubClient from .main import root from .misc import PYTHON_LICENSE, add_client, client_error -from .packaging import PACKAGE_FILE, Package, RELEASE_DIR, current_stack_version, manage_versions +from .packaging import PACKAGE_FILE, Package, RELEASE_DIR, current_stack_version +from .version_lock import manage_versions, load_versions from .rule import AnyRuleData, BaseRuleData, QueryRuleData, TOMLRule from .rule_loader import RuleCollection, production_filter from .semver import Version @@ -66,11 +67,15 @@ def build_release(config_file, update_version_lock, release=None, verbose=True): if verbose: click.echo('[+] Building package {}'.format(config.get('name'))) - package = Package.from_config(config, update_version_lock=update_version_lock, verbose=verbose) + package = Package.from_config(config, verbose=verbose) + + if update_version_lock: + manage_versions(package.rules, save_changes=True, verbose=verbose) + package.save(verbose=verbose) if verbose: - package.get_package_hash(verbose=True) + package.get_package_hash(verbose=verbose) click.echo(f'- {len(package.rules)} rules included') return package @@ -565,9 +570,9 @@ def package_stats(ctx, token, threads): new, modified, errors = rule_loader.load_github_pr_rules(labels=[release], token=token, threads=threads) click.echo(f'Total rules as of {release} package: {len(current_package.rules)}') - click.echo(f'New rules: {len(current_package.new_rules_ids)}') - click.echo(f'Modified rules: {len(current_package.changed_rule_ids)}') - click.echo(f'Deprecated rules: {len(current_package.removed_rule_ids)}') + click.echo(f'New rules: {len(current_package.new_ids)}') + click.echo(f'Modified rules: {len(current_package.changed_ids)}') + click.echo(f'Deprecated rules: {len(current_package.removed_ids)}') click.echo('\n-----\n') click.echo('Rules in active PRs for current package: ') @@ -637,7 +642,6 @@ def search_rule_prs(ctx, no_loop, query, columns, language, token, threads): def deprecate_rule(ctx: click.Context, rule_file: str): """Deprecate a rule.""" import pytoml - from .packaging import load_versions version_info = load_versions() rule_file = Path(rule_file) diff --git a/detection_rules/packaging.py b/detection_rules/packaging.py index d0e1b7482..81eddc0b8 100644 --- a/detection_rules/packaging.py +++ b/detection_rules/packaging.py @@ -23,7 +23,8 @@ from .rule import TOMLRule, QueryRuleData, ThreatMapping from .rule import downgrade_contents_from_rule from .rule_loader import RuleCollection, DEFAULT_RULES_DIR from .schemas import definitions -from .utils import Ndjson, dict_hash, get_path, get_etc_path, load_etc_dump, save_etc_dump +from .utils import Ndjson, get_path, get_etc_path, load_etc_dump +from .version_lock import manage_versions RELEASE_DIR = get_path("releases") PACKAGE_FILE = get_etc_path('packages.yml') @@ -68,77 +69,13 @@ def load_current_package_version() -> str: return load_etc_dump('packages.yml')['package']['name'] -@cached -def load_versions(current_versions: dict = None): - """Load the versions file.""" - return current_versions or load_etc_dump('version.lock.json') - - -def manage_versions(rules: List[TOMLRule], current_versions: dict = None, - exclude_version_update=False, add_new=True, save_changes=False, - verbose=True) -> (List[str], List[str], List[str]): - """Update the contents of the version.lock file and optionally save changes.""" - current_versions = load_versions(current_versions) - versions_hash = dict_hash(current_versions) - rule_deprecations = load_etc_dump('deprecated_rules.json') - - echo = click.echo if verbose else (lambda x: None) - - already_deprecated = set(rule_deprecations) - deprecated_rules = set(rule.id for rule in rules if rule.contents.metadata.maturity == "deprecated") - new_rules = set(rule.id for rule in rules if rule.contents.latest_version is None) - deprecated_rules - changed_rules = set(rule.id for rule in rules if rule.contents.is_dirty) - deprecated_rules - - # manage deprecated rules - newly_deprecated = deprecated_rules - already_deprecated - - if not (new_rules or changed_rules or newly_deprecated): - return list(changed_rules), list(new_rules), list(newly_deprecated) - - echo('Rule changes detected!') - - if not save_changes: - echo('run `build-release --update-version-lock` to update version.lock.json and deprecated_rules.json') - return list(changed_rules), list(new_rules), list(newly_deprecated) - - for rule in rules: - contents = rule.contents.lock_info(bump=not exclude_version_update) - - if rule.contents.metadata.maturity == "production": - current_versions[rule.id] = contents - - elif rule.id in newly_deprecated: - current_versions[rule.id] = contents - rule_deprecations[rule.id] = { - "rule_name": rule.name, - "stack_version": current_stack_version, - "deprecation_date": rule.contents.metadata.deprecation_date - } - - new_hash = dict_hash(current_versions) - - if versions_hash != new_hash: - save_etc_dump(current_versions, 'version.lock.json') - echo('Updated version.lock.json file') - - if newly_deprecated: - save_etc_dump(rule_deprecations, 'deprecated_rules.json') - echo('Updated deprecated_rules.json file') - - echo(f' - {len(changed_rules)} changed rules') - echo(f' - {len(new_rules)} new rules') - echo(f' - {len(newly_deprecated)} newly deprecated rules') - - return changed_rules, list(new_rules), newly_deprecated - - class Package(object): """Packaging object for siem rules and releases.""" def __init__(self, rules: List[TOMLRule], name: str, deprecated_rules: Optional[List[TOMLRule]] = None, - release: Optional[bool] = False, current_versions: Optional[dict] = None, + release: Optional[bool] = False, min_version: Optional[int] = None, max_version: Optional[int] = None, - update_version_lock: Optional[bool] = False, registry_data: Optional[dict] = None, + registry_data: Optional[dict] = None, verbose: Optional[bool] = True): """Initialize a package.""" self.name = name @@ -147,18 +84,14 @@ class Package(object): self.release = release self.registry_data = registry_data or {} - self.changed_rule_ids, self.new_rules_ids, self.removed_rule_ids = self._add_versions(current_versions, - update_version_lock, - verbose=verbose) + if min_version is not None: + self.rules = [r for r in self.rules if min_version <= r.contents.latest_version] - if min_version or max_version: - self.rules = [r for r in self.rules - if (min_version or 0) <= r.contents['version'] <= (max_version or r.contents['version'])] + if max_version is not None: + self.rules = [r for r in self.rules if max_version >= r.contents.latest_version] - def _add_versions(self, current_versions, update_versions_lock=False, verbose=True): - """Add versions to rules at load time.""" - return manage_versions(self.rules, current_versions=current_versions, - save_changes=update_versions_lock, verbose=verbose) + self.changed_ids, self.new_ids, self.removed_ids = \ + manage_versions(self.rules, verbose=False, save_changes=False) @classmethod def load_configs(cls): @@ -252,7 +185,7 @@ class Package(object): if self.release: self._generate_registry_package(save_dir) - self.save_release_files(extras_dir, self.changed_rule_ids, self.new_rules_ids, self.removed_rule_ids) + self.save_release_files(extras_dir, self.changed_ids, self.new_ids, self.removed_ids) # zip all rules only and place in extras shutil.make_archive(os.path.join(extras_dir, self.name), 'zip', root_dir=os.path.dirname(rules_dir), @@ -308,7 +241,7 @@ class Package(object): return sha256 @classmethod - def from_config(cls, config: dict = None, update_version_lock: bool = False, verbose: bool = False) -> 'Package': + def from_config(cls, config: dict = None, verbose: bool = False) -> 'Package': """Load a rules package given a config.""" all_rules = RuleCollection.default() config = config or {} @@ -325,9 +258,7 @@ class Package(object): if verbose: click.echo(f' - {len(all_rules) - len(rules)} rules excluded from package') - package = cls(rules, deprecated_rules=deprecated_rules, update_version_lock=update_version_lock, - verbose=verbose, **config) - + package = cls(rules, deprecated_rules=deprecated_rules, verbose=verbose, **config) return package def generate_summary_and_changelog(self, changed_rule_ids, new_rule_ids, removed_rules): @@ -505,10 +436,6 @@ class Package(object): readme_file.write_text(readme_text) notice_file.write_text(notice_contents) - def bump_versions(self, save_changes=False, current_versions=None): - """Bump the versions of all production rules included in a release and optionally save changes.""" - return manage_versions(self.rules, current_versions=current_versions, save_changes=save_changes) - def create_bulk_index_body(self) -> Tuple[Ndjson, Ndjson]: """Create a body to bulk index into a stack.""" package_hash = self.get_package_hash(verbose=False) @@ -534,9 +461,9 @@ class Package(object): summary_doc['rule_names'].append(rule.name) summary_doc['rule_hashes'].append(rule.contents.sha256()) - if rule.id in self.new_rules_ids: + if rule.id in self.new_ids: status = 'new' - elif rule.id in self.changed_rule_ids: + elif rule.id in self.changed_ids: status = 'modified' else: status = 'unmodified' diff --git a/detection_rules/rule.py b/detection_rules/rule.py index 37439489f..654d08d3a 100644 --- a/detection_rules/rule.py +++ b/detection_rules/rule.py @@ -399,31 +399,26 @@ class TOMLRuleContents(MarshmallowDataclassMixin): def lock_info(self, bump=True) -> dict: version = self.autobumped_version if bump else (self.latest_version or 1) - return {"rule_name": self.name, "sha256": self.sha256(), "version": version} + contents = {"rule_name": self.name, "sha256": self.sha256(), "version": version} + + return contents @property def is_dirty(self) -> Optional[bool]: """Determine if the rule has changed since its version was locked.""" - from .packaging import load_versions + from .version_lock import get_locked_hash - rules_versions = load_versions() + existing_sha256 = get_locked_hash(self.id, self.metadata.min_stack_version) - if self.id in rules_versions: - version_info = rules_versions[self.id] - existing_sha256: str = version_info['sha256'] + if existing_sha256 is not None: return existing_sha256 != self.sha256() @property def latest_version(self) -> Optional[int]: """Retrieve the latest known version of the rule.""" - from .packaging import load_versions + from .version_lock import get_locked_version - rules_versions = load_versions() - - if self.id in rules_versions: - version_info = rules_versions[self.id] - version = version_info['version'] - return version + return get_locked_version(self.id, self.metadata.min_stack_version) @property def autobumped_version(self) -> Optional[int]: diff --git a/detection_rules/version_lock.py b/detection_rules/version_lock.py new file mode 100644 index 000000000..a93b1d614 --- /dev/null +++ b/detection_rules/version_lock.py @@ -0,0 +1,165 @@ +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License +# 2.0; you may not use this file except in compliance with the Elastic License +# 2.0. +"""Helper utilities to manage the version lock.""" +from copy import deepcopy +from typing import List, Optional + +import click + +from .rule import TOMLRule +from .semver import Version +from .utils import dict_hash, load_etc_dump, save_etc_dump, cached + +ETC_VERSION_LOCK_FILE = "version.lock.json" +ETC_DEPRECATED_RULES_FILE = "deprecated_rules.json" +MIN_LOCK_VERSION_DEFAULT = Version("7.13.0") + + +def _convert_lock_version(stack_version: Optional[str]) -> Version: + """Convert an optional stack version to the minimum for the lock.""" + if stack_version is None: + return MIN_LOCK_VERSION_DEFAULT + return max(Version(stack_version), MIN_LOCK_VERSION_DEFAULT) + + +@cached +def get_locked_version(rule_id: str, min_stack_version: Optional[str] = None) -> Optional[int]: + rules_versions = load_versions() + + if rule_id in rules_versions: + latest_version_info = rules_versions[rule_id] + stack_version_info = latest_version_info.get("previous", {}).get(min_stack_version, latest_version_info) + return stack_version_info['version'] + + +@cached +def get_locked_hash(rule_id: str, min_stack_version: Optional[str] = None) -> Optional[str]: + rules_versions = load_versions() + + # Get the version info matching the min_stack_version if present + if rule_id in rules_versions: + latest_version_info = rules_versions[rule_id] + stack_version_info = latest_version_info.get("previous", {}).get(min_stack_version, latest_version_info) + existing_sha256: str = stack_version_info['sha256'] + return existing_sha256 + + +def manage_versions(rules: List[TOMLRule], + exclude_version_update=False, save_changes=False, + verbose=True) -> (List[str], List[str], List[str]): + """Update the contents of the version.lock file and optionally save changes.""" + from .packaging import current_stack_version + + current_versions = deepcopy(load_versions()) + versions_hash = dict_hash(current_versions) + rule_deprecations = load_etc_dump(ETC_DEPRECATED_RULES_FILE) + + verbose_echo = click.echo if verbose else (lambda x: None) + + already_deprecated = set(rule_deprecations) + deprecated_rules = set(rule.id for rule in rules if rule.contents.metadata.maturity == "deprecated") + new_rules = set(rule.id for rule in rules if rule.contents.latest_version is None) - deprecated_rules + changed_rules = set(rule.id for rule in rules if rule.contents.is_dirty) - deprecated_rules + + # manage deprecated rules + newly_deprecated = deprecated_rules - already_deprecated + + if not (new_rules or changed_rules or newly_deprecated): + return list(changed_rules), list(new_rules), list(newly_deprecated) + + verbose_echo('Rule changes detected!') + + for rule in rules: + if rule.contents.metadata.maturity == "production" or rule.id in newly_deprecated: + # assume that older stacks are always locked first + min_stack = _convert_lock_version(rule.contents.metadata.min_stack_version) + + lock_info = rule.contents.lock_info(bump=not exclude_version_update) + current_rule_lock: dict = current_versions.setdefault(rule.id, {}) + + # scenarios to handle, assuming older stacks are always locked first: + # 1) no breaking changes ever made or the first time a rule is created + # 2) on the latest, after a breaking change has been locked + # 3) on the latest stack, locking in a breaking change + # 4) on an old stack, after a breaking change has been made + latest_locked_stack_version = _convert_lock_version(current_rule_lock.get("min_stack_version")) + + if not current_rule_lock or min_stack == latest_locked_stack_version: + # 1) no breaking changes ever made or the first time a rule is created + # 2) on the latest, after a breaking change has been locked + current_rule_lock.update(lock_info) + + # add the min_stack_version to the lock if it's explicitly set + if rule.contents.metadata.min_stack_version is not None: + current_rule_lock["min_stack_version"] = str(min_stack) + + elif min_stack > latest_locked_stack_version: + # 3) on the latest stack, locking in a breaking change + previous_lock_info = { + "rule_name": current_rule_lock["rule_name"], + "sha256": current_rule_lock["sha256"], + "version": current_rule_lock["version"], + } + current_rule_lock.setdefault("previous", {}) + + # move the current locked info into the previous section + current_rule_lock["previous"][str(latest_locked_stack_version)] = previous_lock_info + + # overwrite the "latest" part of the lock at the top level + current_rule_lock.update(lock_info, min_stack_version=str(min_stack)) + + elif min_stack < latest_locked_stack_version: + # 4) on an old stack, after a breaking change has been made + assert str(min_stack) in current_rule_lock.get("previous", {}), \ + f"Expected {rule.id} @ v{min_stack} in the rule lock" + + # TODO: Figure out whether we support locking old versions and if we want to + # "leave room" by skipping versions when breaking changes are made. + # We can still inspect the version lock manually after locks are made, + # since it's a good summary of everything that happens + current_rule_lock["previous"][str(min_stack)] = lock_info + continue + else: + raise RuntimeError("Unreachable code") + + for rule in rules: + if rule.id in newly_deprecated: + rule_deprecations[rule.id] = { + "rule_name": rule.name, + "stack_version": current_stack_version, + "deprecation_date": rule.contents.metadata.deprecation_date + } + + if save_changes or verbose: + click.echo(f' - {len(changed_rules)} changed rules') + click.echo(f' - {len(new_rules)} new rules') + click.echo(f' - {len(newly_deprecated)} newly deprecated rules') + + if not save_changes: + verbose_echo('run `build-release --update-version-lock` to update version.lock.json and deprecated_rules.json') + return list(changed_rules), list(new_rules), list(newly_deprecated) + + new_hash = dict_hash(current_versions) + + if versions_hash != new_hash: + save_etc_dump(current_versions, ETC_VERSION_LOCK_FILE) + click.echo('Updated version.lock.json file') + + if newly_deprecated: + save_etc_dump(rule_deprecations, ETC_DEPRECATED_RULES_FILE) + click.echo('Updated deprecated_rules.json file') + + return changed_rules, list(new_rules), newly_deprecated + + +@cached +def load_versions(): + """Load the versions file.""" + return load_etc_dump(ETC_VERSION_LOCK_FILE) + + +def save_versions(current_versions: dict): + save_etc_dump(current_versions, ETC_VERSION_LOCK_FILE) + print('Updated version.lock.json file') diff --git a/tests/test_all_rules.py b/tests/test_all_rules.py index 75888e7a3..78047821d 100644 --- a/tests/test_all_rules.py +++ b/tests/test_all_rules.py @@ -14,7 +14,7 @@ import eql import kql from detection_rules import attack -from detection_rules.packaging import load_versions +from detection_rules.version_lock import load_versions from detection_rules.rule import QueryRuleData from detection_rules.rule_loader import FILE_PATTERN from detection_rules.schemas import definitions diff --git a/tests/test_packages.py b/tests/test_packages.py index 6abca7dd1..05bba2c56 100644 --- a/tests/test_packages.py +++ b/tests/test_packages.py @@ -57,8 +57,7 @@ class TestPackages(BaseRuleTest): """Test the generation of the package summary.""" rules = self.production_rules package = Package(rules, 'test-package') - changed_rule_ids, new_rule_ids, deprecated_rule_ids = package.bump_versions(save_changes=False) - package.generate_summary_and_changelog(changed_rule_ids, new_rule_ids, deprecated_rule_ids) + package.generate_summary_and_changelog(package.changed_ids, package.new_ids, package.removed_ids) def test_rule_versioning(self): """Test that all rules are properly versioned and tracked"""