[FR] Add ML Jobs to Schemas and Unit Test for Validation (#3161)
* adding machine learning job id validation * Update rules/ml/credential_access_ml_auth_spike_in_logon_events_from_a_source_ip.toml * Update tests/test_all_rules.py * adding integration manifests and schemas from main * rebuilt manifests and schemas with lmd * fixed unit test linting * adding manifests and schemas for other analytic packages * updated manifests and schemas; adjusted unit test for verbosity * sorted imports
This commit is contained in:
Binary file not shown.
Binary file not shown.
@@ -23,6 +23,7 @@ from . import ecs
|
||||
from .beats import flatten_ecs_schema
|
||||
from .misc import load_current_package_version
|
||||
from .utils import cached, get_etc_path, read_gzip, unzip
|
||||
from .schemas import definitions
|
||||
|
||||
MANIFEST_FILE_PATH = Path(get_etc_path('integration-manifests.json.gz'))
|
||||
SCHEMA_FILE_PATH = Path(get_etc_path('integration-schemas.json.gz'))
|
||||
@@ -137,12 +138,12 @@ def build_integrations_schemas(overwrite: bool, integration: str = None) -> None
|
||||
# Open the zip file
|
||||
with unzip(response.content) as zip_ref:
|
||||
for file in zip_ref.namelist():
|
||||
file_data_bytes = zip_ref.read(file)
|
||||
# Check if the file is a match
|
||||
if glob.fnmatch.fnmatch(file, '*/fields/*.yml'):
|
||||
integration_name = Path(file).parent.parent.name
|
||||
final_integration_schemas[package][version].setdefault(integration_name, {})
|
||||
file_data = zip_ref.read(file)
|
||||
schema_fields = yaml.safe_load(file_data)
|
||||
schema_fields = yaml.safe_load(file_data_bytes)
|
||||
|
||||
# Parse the schema and add to the integration_manifests
|
||||
data = flatten_ecs_schema(schema_fields)
|
||||
@@ -150,7 +151,14 @@ def build_integrations_schemas(overwrite: bool, integration: str = None) -> None
|
||||
|
||||
final_integration_schemas[package][version][integration_name].update(flat_data)
|
||||
|
||||
del file_data
|
||||
# add machine learning jobs to the schema
|
||||
if integration in list(map(str.lower, definitions.MACHINE_LEARNING_PACKAGES)):
|
||||
if glob.fnmatch.fnmatch(file, '*/ml_module/*ml.json'):
|
||||
ml_module = json.loads(file_data_bytes)
|
||||
job_ids = [job['id'] for job in ml_module['attributes']['jobs']]
|
||||
final_integration_schemas[package][version]['jobs'] = job_ids
|
||||
|
||||
del file_data_bytes
|
||||
|
||||
# Write the final integration schemas to disk
|
||||
with gzip.open(SCHEMA_FILE_PATH, "w") as schema_file:
|
||||
|
||||
+50
-5
@@ -10,24 +10,27 @@ import unittest
|
||||
import uuid
|
||||
import warnings
|
||||
from collections import defaultdict
|
||||
from marshmallow import ValidationError
|
||||
from pathlib import Path
|
||||
|
||||
import eql.ast
|
||||
from marshmallow import ValidationError
|
||||
from semver import Version
|
||||
|
||||
import kql
|
||||
from detection_rules import attack
|
||||
from detection_rules.beats import parse_beats_from_index
|
||||
from detection_rules.integrations import load_integrations_schemas
|
||||
from detection_rules.integrations import (find_latest_compatible_version,
|
||||
load_integrations_manifests,
|
||||
load_integrations_schemas)
|
||||
from detection_rules.misc import load_current_package_version
|
||||
from detection_rules.packaging import current_stack_version
|
||||
from detection_rules.rule import (QueryRuleData, TOMLRuleContents,
|
||||
load_integrations_manifests, QueryValidator)
|
||||
from detection_rules.rule import (QueryRuleData, QueryValidator,
|
||||
TOMLRuleContents)
|
||||
from detection_rules.rule_loader import FILE_PATTERN
|
||||
from detection_rules.rule_validators import EQLValidator, KQLValidator
|
||||
from detection_rules.schemas import definitions, get_stack_schemas
|
||||
from detection_rules.utils import INTEGRATION_RULE_DIR, get_path, load_etc_dump, PatchedTemplate
|
||||
from detection_rules.utils import (INTEGRATION_RULE_DIR, PatchedTemplate,
|
||||
get_path, load_etc_dump)
|
||||
from detection_rules.version_lock import default_version_lock
|
||||
from rta import get_available_tests
|
||||
|
||||
@@ -894,6 +897,48 @@ class TestIntegrationRules(BaseRuleTest):
|
||||
self.fail(f'The following ({len(failures)}) rules have a `min_stack_version` defined but missing comments:'
|
||||
f'\n{err_msg}')
|
||||
|
||||
def test_ml_integration_jobs_exist(self):
|
||||
"""Test that machine learning jobs exist in the integration."""
|
||||
failures = []
|
||||
|
||||
ml_integration_names = list(map(str.lower, definitions.MACHINE_LEARNING_PACKAGES))
|
||||
integration_schemas = load_integrations_schemas()
|
||||
integration_manifests = load_integrations_manifests()
|
||||
|
||||
for rule in self.all_rules:
|
||||
if rule.contents.data.type == "machine_learning":
|
||||
ml_integration_name = next((i for i in rule.contents.metadata.integration
|
||||
if i in ml_integration_names), None)
|
||||
if ml_integration_name:
|
||||
if "machine_learning_job_id" not in dir(rule.contents.data):
|
||||
failures.append(f'{self.rule_str(rule)} missing `machine_learning_job_id`')
|
||||
else:
|
||||
rule_job_id = rule.contents.data.machine_learning_job_id
|
||||
ml_schema = integration_schemas.get(ml_integration_name)
|
||||
min_version = Version.parse(
|
||||
rule.contents.metadata.min_stack_version or load_current_package_version(),
|
||||
optional_minor_and_patch=True
|
||||
)
|
||||
latest_compat_ver = find_latest_compatible_version(
|
||||
package=ml_integration_name,
|
||||
integration="",
|
||||
rule_stack_version=min_version,
|
||||
packages_manifest=integration_manifests
|
||||
)
|
||||
compat_integration_schema = ml_schema[latest_compat_ver[0]]
|
||||
if rule_job_id not in compat_integration_schema['jobs']:
|
||||
failures.append(
|
||||
f'{self.rule_str(rule)} machine_learning_job_id `{rule_job_id}` not found '
|
||||
f'in version `{latest_compat_ver[0]}` of `{ml_integration_name}` integration. '
|
||||
f'existing jobs: {compat_integration_schema["jobs"]}'
|
||||
)
|
||||
|
||||
if failures:
|
||||
err_msg = '\n'.join(failures)
|
||||
self.fail(
|
||||
f'The following ({len(failures)}) rules are missing a valid `machine_learning_job_id`:\n{err_msg}'
|
||||
)
|
||||
|
||||
|
||||
class TestRuleTiming(BaseRuleTest):
|
||||
"""Test rule timing and timestamps."""
|
||||
|
||||
Reference in New Issue
Block a user