[FR] Re-factor Build Integrations Manifest (#2274)

* adjusted how integrations list is created

* removed unused import and addressed linting errors

* adjusted integration_manifest dictionary to only load latest major

* adjusted manifests sourcing from GH to EPR CDN

* addressed flake errors

* added some additional comments and formatting

* updaing integration-manifests file

* adjusted test_integration testing

* addressed flake errors

* Update detection_rules/integrations.py

Co-authored-by: Justin Ibarra <brokensound77@users.noreply.github.com>

* Update detection_rules/integrations.py

Co-authored-by: Justin Ibarra <brokensound77@users.noreply.github.com>

* added folder unit tests

* updated unit test to remove network calls

* Update tests/test_all_rules.py

Co-authored-by: Mika Ayenson <Mikaayenson@users.noreply.github.com>

Co-authored-by: Justin Ibarra <brokensound77@users.noreply.github.com>
Co-authored-by: Mika Ayenson <Mikaayenson@users.noreply.github.com>
This commit is contained in:
Terrance DeJesus
2022-09-28 09:33:49 -04:00
committed by GitHub
parent 1b6355eee9
commit b31a1b761c
5 changed files with 63 additions and 55 deletions
+6 -4
View File
@@ -1131,8 +1131,10 @@ def integrations_group():
@integrations_group.command('build-manifests')
@click.option('--overwrite', '-o', is_flag=True, help="Overwrite the existing integrations-manifest.json.gz file")
@click.option("--token", required=True, prompt=get_github_token() is None, default=get_github_token(),
help="GitHub token to use for the PR", hide_input=True)
def build_integration_manifests(overwrite: bool, token: str):
def build_integration_manifests(overwrite: bool):
"""Builds consolidated integrations manifests file."""
build_integrations_manifest(token, overwrite)
click.echo("loading rules to determine all integration tags")
rules = RuleCollection.default()
integration_tags = list(set([r.contents.metadata.integration for r in rules if r.contents.metadata.integration]))
click.echo(f"integration tags identified: {integration_tags}")
build_integrations_manifest(overwrite, integration_tags)
Binary file not shown.
+26 -38
View File
@@ -11,12 +11,11 @@ import re
from pathlib import Path
from typing import Union
import yaml
import requests
from marshmallow import EXCLUDE, Schema, fields, post_load
from .ghwrap import GithubClient
from .semver import Version
from .utils import INTEGRATION_RULE_DIR, cached, get_etc_path, read_gzip
from .utils import cached, get_etc_path, read_gzip
MANIFEST_FILE_PATH = Path(get_etc_path('integration-manifests.json.gz'))
@@ -34,7 +33,7 @@ class IntegrationManifestSchema(Schema):
description = fields.Str(required=True)
conditions = fields.Dict(required=True)
policy_templates = fields.List(fields.Dict, required=True)
owner = fields.Dict(required=True)
owner = fields.Dict(required=False)
@post_load
def transform_policy_template(self, data, **kwargs):
@@ -42,28 +41,16 @@ class IntegrationManifestSchema(Schema):
return data
def build_integrations_manifest(token: str, overwrite: bool) -> None:
def build_integrations_manifest(overwrite: bool, rule_integrations: list) -> None:
"""Builds a new local copy of manifest.yaml from integrations Github."""
if overwrite:
if os.path.exists(MANIFEST_FILE_PATH):
os.remove(MANIFEST_FILE_PATH)
rule_integrations = [d.name for d in Path(INTEGRATION_RULE_DIR).glob('*') if d.is_dir()]
if "endpoint" in rule_integrations:
rule_integrations.remove("endpoint")
final_integration_manifests = {integration: {} for integration in rule_integrations}
# initialize github client and point to package-storage prod
github = GithubClient(token)
client = github.authenticated_client
organization = client.get_organization("elastic")
repository = organization.get_repo("package-storage")
pkg_storage_prod_branch = repository.get_branch("production")
pkg_storage_branch_sha = pkg_storage_prod_branch.commit.sha
for integration in rule_integrations:
integration_manifests = get_integration_manifests(repository, pkg_storage_branch_sha,
pkg_path=f"packages/{integration}")
integration_manifests = get_integration_manifests(integration)
for manifest in integration_manifests:
validated_manifest = IntegrationManifestSchema(unknown=EXCLUDE).load(manifest)
package_version = validated_manifest.pop("version")
@@ -72,6 +59,7 @@ def build_integrations_manifest(token: str, overwrite: bool) -> None:
manifest_file = gzip.open(MANIFEST_FILE_PATH, "w+")
manifest_file_bytes = json.dumps(final_integration_manifests).encode("utf-8")
manifest_file.write(manifest_file_bytes)
print(f"final integrations manifests dumped: {MANIFEST_FILE_PATH}")
def find_least_compatible_version(package: str, integration: str,
@@ -79,6 +67,11 @@ def find_least_compatible_version(package: str, integration: str,
"""Finds least compatible version for specified integration based on stack version supplied."""
integration_manifests = {k: v for k, v in sorted(packages_manifest[package].items(), key=Version)}
# trim integration_manifests to only the latest major entries
max_major, *_ = max([Version(manifest_version) for manifest_version in integration_manifests])
latest_major_integration_manifests = \
{k: v for k, v in integration_manifests.items() if Version(k)[0] == max_major}
def compare_versions(int_ver: str, pkg_ver: str) -> bool:
"""Compares integration and package version"""
pkg_major, pkg_minor = Version(pkg_ver)
@@ -90,33 +83,28 @@ def find_least_compatible_version(package: str, integration: str,
compatible = Version(int_ver) <= Version(pkg_ver)
return compatible
for version, manifest in integration_manifests.items():
for kibana_compat_vers in re.sub(r"\>|\<|\=|\^", "", manifest["conditions"]["kibana.version"]).split(" || "):
for version, manifest in latest_major_integration_manifests.items():
for kibana_compat_vers in re.sub(r"\>|\<|\=|\^", "", manifest["conditions"]["kibana"]["version"]).split(" || "):
if compare_versions(kibana_compat_vers, current_stack_version):
return version
return f"^{version}"
print(f"no compatible version for integration {package}:{integration}")
return None
def get_integration_manifests(repository, sha: str, pkg_path: str) -> list:
def get_integration_manifests(integration: str) -> list:
"""Iterates over specified integrations from package-storage and combines manifests per version."""
integration = pkg_path.split("/")[-1]
versioned_packages = repository.get_dir_contents(pkg_path, ref=sha)
versions = [p.path.split("/")[-1] for p in versioned_packages]
epr_search_url = "https://epr.elastic.co/search"
manifests = []
for version in versions:
contents = repository.get_dir_contents(f"{pkg_path}/{version}", ref=sha)
print(f"Processing {integration} - Version: {version}")
# link for search parameters - https://github.com/elastic/package-registry
epr_search_parameters = {"package": f"{integration}", "prerelease": "true",
"all": "true", "include_policy_templates": "true"}
epr_search_response = requests.get(epr_search_url, params=epr_search_parameters)
epr_search_response.raise_for_status()
manifests = epr_search_response.json()
processing_version = contents[0].path.split("/")[2]
manifest_content = [c for c in contents if "manifest" in c.path]
if len(manifest_content) < 1:
raise Exception(f"manifest file does not exist for {integration}:{processing_version}")
path = manifest_content[0].path
manifest_content = yaml.safe_load(repository.get_contents(path, ref=sha).decoded_content.decode())
manifests.append(manifest_content)
if not manifests:
raise ValueError(f"EPR search for {integration} integration package returned empty list")
print(f"loaded {integration} manifests from the following package versions: "
f"{[manifest['version'] for manifest in manifests]}")
return manifests
+1 -1
View File
@@ -769,7 +769,7 @@ class TOMLRuleContents(BaseRuleContents, MarshmallowDataclassMixin):
# if integration is not a policy template remove
if package["version"]:
policy_templates = packages_manifest[
package["package"]][package["version"]]["policy_templates"]
package["package"]][package["version"].strip("^")]["policy_templates"]
if package["integration"] not in policy_templates:
del package["integration"]
+30 -12
View File
@@ -11,17 +11,18 @@ from collections import defaultdict
from pathlib import Path
import kql
from detection_rules import attack
from detection_rules.beats import parse_beats_from_index
from detection_rules.packaging import current_stack_version
from detection_rules.rule import QueryRuleData
from detection_rules.rule import (QueryRuleData, TOMLRuleContents,
load_integrations_manifests)
from detection_rules.rule_loader import FILE_PATTERN
from detection_rules.schemas import definitions
from detection_rules.semver import Version
from detection_rules.utils import get_path, load_etc_dump
from detection_rules.utils import INTEGRATION_RULE_DIR, get_path, load_etc_dump
from detection_rules.version_lock import default_version_lock
from rta import get_available_tests
from .base import BaseRuleTest
@@ -440,19 +441,36 @@ class TestRuleMetadata(BaseRuleTest):
"""Test that rules in integrations folders have matching integration defined."""
failures = []
for rule in self.production_rules:
rules_path = get_path('rules')
*_, grandparent, parent, _ = rule.path.parts
in_integrations = grandparent == 'integrations'
integration = rule.contents.metadata.get('integration')
has_integration = integration is not None
packages_manifest = load_integrations_manifests()
if (in_integrations or has_integration) and (parent != integration):
err_msg = f'{self.rule_str(rule)}\nintegration: {integration}\npath: {rule.path.relative_to(rules_path)}' # noqa: E501
for rule in self.production_rules:
rule_integration = rule.contents.metadata.get('integration')
# checks if metadata tag matches from a list of integrations in EPR
if rule_integration and rule_integration not in packages_manifest.keys():
err_msg = f"{self.rule_str(rule)} integration '{rule_integration}' unknown"
failures.append(err_msg)
# checks if the rule path matches the intended integration
valid_integration_folders = [p.name for p in list(Path(INTEGRATION_RULE_DIR).glob("*"))]
if rule_integration and rule_integration in valid_integration_folders:
if rule_integration != rule.path.parent.name:
err_msg = f'{self.rule_str(rule)} {rule_integration} tag, but path is {rule.path.parent.name}'
failures.append(err_msg)
# checks if event.dataset exists in query object and a tag exists in metadata
if isinstance(rule.contents.data, QueryRuleData) and rule.contents.data.language != 'lucene':
trc = TOMLRuleContents(rule.contents.metadata, rule.contents.data)
package_integrations = trc._get_packaged_integrations(packages_manifest)
if package_integrations and not rule_integration:
err_msg = f'{self.rule_str(rule)} integration tag should exist: '
if failures:
err_msg = 'The following rules have missing/incorrect integrations or are not in an integrations folder:\n'
err_msg = """
The following rules have missing or invalid integrations tags.
Try updating the integrations manifest file:
- `python -m detection_rules dev integrations build-manifests`\n
"""
self.fail(err_msg + '\n'.join(failures))
def test_rule_demotions(self):