From 7d973a3b074fbea57f77d0e5f96fa66933d1e5e9 Mon Sep 17 00:00:00 2001 From: Mika Ayenson Date: Mon, 8 Aug 2022 13:44:36 -0400 Subject: [PATCH] add new field `related_integrations` to the post build (#2060) * add new field `related_integrations` to the post build * add exception for endpoint `integration` * Skip rules without related integrations * lint * refactor related_integrations to TOMLRuleContents class * update to reflect required_fields updates * add todo * add new line for linting * related_integrations updates, get_packaged_integrations returns list of dictionaries, started work on integrations py * build_integrations_manifest command completed * initial test completed for post-building related_integrations * removed get_integration_manifest method from rule, removed global integrations path * moved integration related methods to integrations.py and fixed flake issues * adjustments for PipedQuery from eql sequence rules and packages with no integration * adjusted github client import for integrations.py * Update detection_rules/devtools.py Co-authored-by: Justin Ibarra * Update detection_rules/devtools.py Co-authored-by: Justin Ibarra * added integration manifest schema, made adjustments * Update detection_rules/integrations.py * Update detection_rules/rule.py Co-authored-by: Justin Ibarra * Update detection_rules/rule.py Co-authored-by: Justin Ibarra * Update detection_rules/integrations.py Co-authored-by: Justin Ibarra * Update detection_rules/integrations.py Co-authored-by: Justin Ibarra * Update detection_rules/integrations.py Co-authored-by: Justin Ibarra * Update detection_rules/rule.py Co-authored-by: Justin Ibarra * removed get_integrations_package to consolidate code * removed type list return * adjusted import flake errors * Update detection_rules/integrations.py Co-authored-by: Justin Ibarra * Update detection_rules/integrations.py Co-authored-by: Justin Ibarra * adjusted indentation error * adjusted rule.get_packaged_integrations to account for kql.ast.OrExpr if event.dataset is not set * Update detection_rules/devtools.py Co-authored-by: Justin Ibarra * Update detection_rules/devtools.py Co-authored-by: Justin Ibarra * Update detection_rules/integrations.py Co-authored-by: Justin Ibarra * Update detection_rules/integrations.py Co-authored-by: Justin Ibarra * adjusted find_least_compatible_version in integrations.py * Update detection_rules/integrations.py Co-authored-by: Justin Ibarra * fixed flake issues * adjusted get_packaged_integrations * iterate the ast for literal event.dataset values * Update detection_rules/integrations.py Co-authored-by: Justin Ibarra * Update detection_rules/integrations.py Co-authored-by: Justin Ibarra * Update detection_rules/integrations.py Co-authored-by: Justin Ibarra * Update detection_rules/integrations.py Co-authored-by: Justin Ibarra * made small adjustments to address errors during build manifests command * addressing integrations.find_least_compatible method to return None instead of raise error only * Update detection_rules/integrations.py Co-authored-by: Mika Ayenson Co-authored-by: Justin Ibarra Co-authored-by: Terrance DeJesus Co-authored-by: Terrance DeJesus <99630311+terrancedejesus@users.noreply.github.com> --- detection_rules/devtools.py | 15 +++ .../etc/integration-manifests.json.gz | Bin 0 -> 2070 bytes detection_rules/integrations.py | 122 ++++++++++++++++++ detection_rules/rule.py | 73 ++++++++++- detection_rules/utils.py | 1 + 5 files changed, 205 insertions(+), 6 deletions(-) create mode 100644 detection_rules/etc/integration-manifests.json.gz create mode 100644 detection_rules/integrations.py diff --git a/detection_rules/devtools.py b/detection_rules/devtools.py index a780631e2..9073bb257 100644 --- a/detection_rules/devtools.py +++ b/detection_rules/devtools.py @@ -40,6 +40,7 @@ from .rule_loader import RuleCollection, production_filter from .schemas import definitions, get_stack_versions from .semver import Version from .utils import dict_hash, get_path, get_etc_path, load_dump +from .integrations import build_integrations_manifest RULES_DIR = get_path('rules') GH_CONFIG = Path.home() / ".config" / "gh" / "hosts.yml" @@ -1088,3 +1089,17 @@ def get_branches(outfile: Path): branch_list = get_stack_versions(drop_patch=True) target_branches = json.dumps(branch_list[:-1]) + "\n" outfile.write_text(target_branches) + + +@dev_group.group('integrations') +def integrations_group(): + """Commands for dev integrations methods.""" + + +@integrations_group.command('build-manifests') +@click.option('--overwrite', '-o', is_flag=True, help="Overwrite the existing integrations-manifest.json.gz file") +@click.option("--token", required=True, prompt=get_github_token() is None, default=get_github_token(), + help="GitHub token to use for the PR", hide_input=True) +def build_integration_manifests(overwrite: bool, token: str): + """Builds consolidated integrations manifests file.""" + build_integrations_manifest(token, overwrite) diff --git a/detection_rules/etc/integration-manifests.json.gz b/detection_rules/etc/integration-manifests.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..79d501d56e596e3a8384c5468c0d128ba3215e27 GIT binary patch literal 2070 zcmV+x2bZKvHEp1_LX=Y_}baO6hb8l_{?VW3n;x-h9 z|4PKCU8zuV<5H=W(5PCis%l4?s`H`hj7%_0L`bM@hGlo>zt>Iz1V|vsKyE%-X@`(F z2|vQCxRV}DCVDAW6V(NC#B`0g})pv#J^s8 zj%{t88D1v3{RAiWnoH{2W+i~ z%{Izsm*E*fei6P0Z|!2@AKJ`jFRQWG4R#P^w)Ha&%e3)VhKY-uw!JS*TW`F$<+&3( zSQPu1+hgRS&MPJ~Jfx?7N7cFbH?g(aQDx>}>^W#1y#Nk>z1URwJy4-Uj#;rh$H5k( zisQ|wG9}(Z*)l5}hQ=#}b_`>7^(ql4juCvWlB~J>9l+5HyR1_bpI(%YQ5YYz;)ixj zi5FA;Mk|LY6ccfr6#=82_9k+JQGWbTo(BHeW`{@vMCTFdP>U`jf?9L|kpZ>nG9su& z7Z90Hi_Rbt1<|(^!G0P3-D!n7zKRYuPa+kpjbStrE3%9catR!6jqk=?1J_rI*&HeT z#z;vq^ggczzw)*(j!Lw`iRd@{P5A~>k1V#!$YOjr#Wvezx+Jzcvm9>~6nW<`b+kRi ztFzhpsqYSmo!L5Xf>@a_(|fvz9$F@evS2w(Tol^paRNBxIjrUJ8! z%=kY^Dw;Q@ZT}bB3(vKgCywCV@HAkTn_UmstaxR^1PXYl{<0p%6*7d`S9uP{$m;UUA6sHYfQvQ3f(c|+2-mt z&gL5&b{09*W@+fJh}hnWD(47^K&x$GS`;2L!}U)NJg0ZsD zx7@BtDDh_8o2ut8YD+1_p>ZqiE@?@3*s?>?6fGTxfDxgXm z-33&8RB+x)iOPV!DlIA#P#vQhbc`%H!*cWKLH$YMI1grTiI1j=m2nvz z{QGjUkMiUQ)>Wa=c!g%e73vLahfh&~X8KTLtpx6(scCZJ>Le`7L-M(VWy^;968=Qt zNnjF7*KZ&_#qCv5;YatMYm@!fXbgFB-y{QKKn=+_e3uW13P28lKp&DZy-PA6eE^d2 zd2rc)3<1dDix1XxoA1{>jR>90y}V9-Kg8H}95I)mI2!TLGb6}$c1vr-tQWB~+3=lt z6Mv>eJoa~oEzSSq_+L6`F`k6K%4U4`YqSwdWD?AosQdrB!NPz3AO2G=wWAxaD4O~4 z+1I5tb-Z=((M=pyCFjuyOBa|C+uF{uxIo|dR*!hBJcM(sDIZ3U46oNU^+a2KSUfS6`#T^Z>r&N2o7vH;3qCZoc@Q;pj>XrUXnMb%G z&khcK%V{6~vqn?@7rYh2rhYLyK9zkrhOAed4pgmlvjbc6+NxcAEsu1zZjYZ^=CVeZ ztV7$arpfPn4M>vN(4wU=+kloWli4PiEsxoKFk2q82Vk}=X7}KnEtA;>m@SXlJuq7q zv-{AEpEp~qz!UmnPn%{u1XyG=-l($gH}<-AQSj8yGk|GTzI9ZJ+QCN_6^|@ zw^{f6MZx)MjERc_n}R8ZQ_s*4DVt<_WRn`PfMc(QUL>$xgML=z(I1=lkRRs|2y=X(JE zp#4Q$u$&cToYsvmI=ivSQb4{S@+y!og}erJzBKYs=gT6m0r^tM>(JYjMBb=B`9L;9 zHs{n9!?v1wp`E6UG;e2z>E{^D|EdI?G1I+$}!b9M>YE;r~E=Ss_t zw`$Owb9qCo8g#S+e^l_N!u|}Q{Z!l^Xg?M9N9#iSskA>T_)|H5^aFpOf2*)RMsw_` z(ZDp@OPRoe0%HR`aHB$Q^x^DYS#KaRQ)O@9s;jErKt!gJ-n_m24_@t&u>aWr05u&6 A&Hw-a literal 0 HcmV?d00001 diff --git a/detection_rules/integrations.py b/detection_rules/integrations.py new file mode 100644 index 000000000..e40156b58 --- /dev/null +++ b/detection_rules/integrations.py @@ -0,0 +1,122 @@ +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License +# 2.0; you may not use this file except in compliance with the Elastic License +# 2.0. + +"""Functions to support and interact with Kibana integrations.""" +import gzip +import json +import os +import re +from pathlib import Path +from typing import Union + +import yaml +from marshmallow import EXCLUDE, Schema, fields, post_load + +from .ghwrap import GithubClient +from .semver import Version +from .utils import INTEGRATION_RULE_DIR, cached, get_etc_path, read_gzip + +MANIFEST_FILE_PATH = Path(get_etc_path('integration-manifests.json.gz')) + + +@cached +def load_integrations_manifests() -> dict: + """Load the consolidated integrations manifest.""" + return json.loads(read_gzip(get_etc_path('integration-manifests.json.gz'))) + + +class IntegrationManifestSchema(Schema): + name = fields.Str(required=True) + version = fields.Str(required=True) + release = fields.Str(required=True) + description = fields.Str(required=True) + conditions = fields.Dict(required=True) + policy_templates = fields.List(fields.Dict, required=True) + owner = fields.Dict(required=True) + + @post_load + def transform_policy_template(self, data, **kwargs): + data["policy_templates"] = [policy["name"] for policy in data["policy_templates"]] + return data + + +def build_integrations_manifest(token: str, overwrite: bool) -> None: + """Builds a new local copy of manifest.yaml from integrations Github.""" + if overwrite: + if os.path.exists(MANIFEST_FILE_PATH): + os.remove(MANIFEST_FILE_PATH) + rule_integrations = [d.name for d in Path(INTEGRATION_RULE_DIR).glob('*') if d.is_dir()] + if "endpoint" in rule_integrations: + rule_integrations.remove("endpoint") + + final_integration_manifests = {integration: {} for integration in rule_integrations} + + # initialize github client and point to package-storage prod + github = GithubClient(token) + client = github.authenticated_client + organization = client.get_organization("elastic") + repository = organization.get_repo("package-storage") + pkg_storage_prod_branch = repository.get_branch("production") + pkg_storage_branch_sha = pkg_storage_prod_branch.commit.sha + + for integration in rule_integrations: + integration_manifests = get_integration_manifests(repository, pkg_storage_branch_sha, + pkg_path=f"packages/{integration}") + for manifest in integration_manifests: + validated_manifest = IntegrationManifestSchema(unknown=EXCLUDE).load(manifest) + package_version = validated_manifest.pop("version") + final_integration_manifests[integration][package_version] = validated_manifest + + manifest_file = gzip.open(MANIFEST_FILE_PATH, "w+") + manifest_file_bytes = json.dumps(final_integration_manifests).encode("utf-8") + manifest_file.write(manifest_file_bytes) + + +def find_least_compatible_version(package: str, integration: str, + current_stack_version: str, packages_manifest: dict) -> Union[str, None]: + """Finds least compatible version for specified integration based on stack version supplied.""" + integration_manifests = {k: v for k, v in sorted(packages_manifest[package].items(), key=Version)} + + def compare_versions(int_ver: str, pkg_ver: str) -> bool: + """Compares integration and package version""" + pkg_major, pkg_minor = Version(pkg_ver) + integration_major, integration_minor = Version(int_ver)[:2] + + if int(integration_major) < int(pkg_major) or int(pkg_major) > int(integration_major): + return False + + compatible = Version(int_ver) <= Version(pkg_ver) + return compatible + + for version, manifest in integration_manifests.items(): + for kibana_compat_vers in re.sub(r"\>|\<|\=|\^", "", manifest["conditions"]["kibana.version"]).split(" || "): + if compare_versions(kibana_compat_vers, current_stack_version): + return version + print(f"no compatible version for integration {package}:{integration}") + return None + + +def get_integration_manifests(repository, sha: str, pkg_path: str) -> list: + """Iterates over specified integrations from package-storage and combines manifests per version.""" + integration = pkg_path.split("/")[-1] + versioned_packages = repository.get_dir_contents(pkg_path, ref=sha) + versions = [p.path.split("/")[-1] for p in versioned_packages] + + manifests = [] + for version in versions: + contents = repository.get_dir_contents(f"{pkg_path}/{version}", ref=sha) + print(f"Processing {integration} - Version: {version}") + + processing_version = contents[0].path.split("/")[2] + manifest_content = [c for c in contents if "manifest" in c.path] + + if len(manifest_content) < 1: + raise Exception(f"manifest file does not exist for {integration}:{processing_version}") + + path = manifest_content[0].path + manifest_content = yaml.safe_load(repository.get_contents(path, ref=sha).decoded_content.decode()) + manifests.append(manifest_content) + + return manifests diff --git a/detection_rules/rule.py b/detection_rules/rule.py index d899c4153..4c3b01606 100644 --- a/detection_rules/rule.py +++ b/detection_rules/rule.py @@ -17,11 +17,14 @@ from uuid import uuid4 import eql import kql +from kql.ast import FieldComparison from marko.block import Document as MarkoDocument from marko.ext.gfm import gfm from marshmallow import ValidationError, validates_schema from . import beats, ecs, utils +from .integrations import (find_least_compatible_version, + load_integrations_manifests) from .misc import load_current_package_version from .mixins import MarshmallowDataclassMixin, StackCompatMixin from .rule_formatter import nested_normalize, toml_write @@ -165,6 +168,12 @@ class BaseRuleData(MarshmallowDataclassMixin, StackCompatMixin): type: definitions.NonEmptyStr ecs: bool + @dataclass + class RelatedIntegrations: + package: definitions.NonEmptyStr + version: definitions.NonEmptyStr + integration: Optional[definitions.NonEmptyStr] + actions: Optional[list] author: List[str] building_block_type: Optional[str] @@ -186,7 +195,7 @@ class BaseRuleData(MarshmallowDataclassMixin, StackCompatMixin): # explicitly NOT allowed! # output_index: Optional[str] references: Optional[List[str]] - related_integrations: Optional[List[str]] = field(metadata=dict(metadata=dict(min_compat="8.3"))) + related_integrations: Optional[List[RelatedIntegrations]] = field(metadata=dict(metadata=dict(min_compat="8.3"))) required_fields: Optional[List[RequiredFields]] = field(metadata=dict(metadata=dict(min_compat="8.3"))) risk_score: definitions.RiskScore risk_score_mapping: Optional[List[RiskScoreMapping]] @@ -665,7 +674,7 @@ class TOMLRuleContents(BaseRuleContents, MarshmallowDataclassMixin): """Transform the converted API in place before sending to Kibana.""" super()._post_dict_transform(obj) - self.add_related_integrations(obj) + self._add_related_integrations(obj) self._add_required_fields(obj) self._add_setup(obj) @@ -675,10 +684,37 @@ class TOMLRuleContents(BaseRuleContents, MarshmallowDataclassMixin): subclass.from_dict(obj) return obj - def add_related_integrations(self, obj: dict) -> None: + def _add_related_integrations(self, obj: dict) -> None: """Add restricted field related_integrations to the obj.""" - # field_name = "related_integrations" - ... + field_name = "related_integrations" + package_integrations = obj.get(field_name, []) + + if not package_integrations and self.metadata.integration: + packages_manifest = load_integrations_manifests() + current_stack_version = load_current_package_version() + + if self.check_restricted_field_version(field_name): + if isinstance(self.data, QueryRuleData) and self.data.language != 'lucene': + package_integrations = self._get_packaged_integrations(packages_manifest) + + if not package_integrations: + return + + for package in package_integrations: + package["version"] = find_least_compatible_version( + package=package["package"], + integration=package["integration"], + current_stack_version=current_stack_version, + packages_manifest=packages_manifest) + + # if integration is not a policy template remove + if package["version"]: + policy_templates = packages_manifest[ + package["package"]][package["version"]]["policy_templates"] + if package["integration"] not in policy_templates: + del package["integration"] + + obj.setdefault("related_integrations", package_integrations) def _add_required_fields(self, obj: dict) -> None: """Add restricted field required_fields to the obj, derived from the query AST.""" @@ -689,7 +725,7 @@ class TOMLRuleContents(BaseRuleContents, MarshmallowDataclassMixin): required_fields = [] field_name = "required_fields" - if self.check_restricted_field_version(field_name=field_name): + if required_fields and self.check_restricted_field_version(field_name=field_name): obj.setdefault(field_name, required_fields) def _add_setup(self, obj: dict) -> None: @@ -759,6 +795,31 @@ class TOMLRuleContents(BaseRuleContents, MarshmallowDataclassMixin): max_stack = max_stack or current_version return Version(min_stack) <= current_version >= Version(max_stack) + def _get_packaged_integrations(self, package_manifest: dict) -> Optional[List[dict]]: + packaged_integrations = [] + datasets = set() + + for node in self.data.get('ast', []): + if isinstance(node, eql.ast.Comparison) and str(node.left) == 'event.dataset': + datasets.update(set(n.value for n in node if isinstance(n, eql.ast.Literal))) + elif isinstance(node, FieldComparison) and str(node.field) == 'event.dataset': + datasets.update(set(str(n) for n in node if isinstance(n, kql.ast.Value))) + + if not datasets: + return + + for value in sorted(datasets): + integration = 'Unknown' + if '.' in value: + package, integration = value.split('.', 1) + else: + package = value + + if package in list(package_manifest): + packaged_integrations.append({"package": package, "integration": integration}) + + return packaged_integrations + @validates_schema def post_validation(self, value: dict, **kwargs): """Additional validations beyond base marshmallow schemas.""" diff --git a/detection_rules/utils.py b/detection_rules/utils.py index 50eefe170..5eca054f9 100644 --- a/detection_rules/utils.py +++ b/detection_rules/utils.py @@ -33,6 +33,7 @@ import kql CURR_DIR = os.path.dirname(os.path.abspath(__file__)) ROOT_DIR = os.path.dirname(CURR_DIR) ETC_DIR = os.path.join(ROOT_DIR, "detection_rules", "etc") +INTEGRATION_RULE_DIR = os.path.join(ROOT_DIR, "rules", "integrations") class NonelessDict(dict):