[FR] Add ML Jobs to Schemas and Unit Test for Validation (#3161)

* adding machine learning job id validation

* Update rules/ml/credential_access_ml_auth_spike_in_logon_events_from_a_source_ip.toml

* Update tests/test_all_rules.py

* adding integration manifests and schemas from main

* rebuilt manifests and schemas with lmd

* fixed unit test linting

* adding manifests and schemas for other analytic packages

* updated manifests and schemas; adjusted unit test for verbosity

* sorted imports
This commit is contained in:
Terrance DeJesus
2023-10-12 10:51:12 -04:00
committed by GitHub
parent 3f2a709370
commit 3e212e2b74
4 changed files with 61 additions and 8 deletions
Binary file not shown.
Binary file not shown.
+11 -3
View File
@@ -23,6 +23,7 @@ from . import ecs
from .beats import flatten_ecs_schema
from .misc import load_current_package_version
from .utils import cached, get_etc_path, read_gzip, unzip
from .schemas import definitions
MANIFEST_FILE_PATH = Path(get_etc_path('integration-manifests.json.gz'))
SCHEMA_FILE_PATH = Path(get_etc_path('integration-schemas.json.gz'))
@@ -137,12 +138,12 @@ def build_integrations_schemas(overwrite: bool, integration: str = None) -> None
# Open the zip file
with unzip(response.content) as zip_ref:
for file in zip_ref.namelist():
file_data_bytes = zip_ref.read(file)
# Check if the file is a match
if glob.fnmatch.fnmatch(file, '*/fields/*.yml'):
integration_name = Path(file).parent.parent.name
final_integration_schemas[package][version].setdefault(integration_name, {})
file_data = zip_ref.read(file)
schema_fields = yaml.safe_load(file_data)
schema_fields = yaml.safe_load(file_data_bytes)
# Parse the schema and add to the integration_manifests
data = flatten_ecs_schema(schema_fields)
@@ -150,7 +151,14 @@ def build_integrations_schemas(overwrite: bool, integration: str = None) -> None
final_integration_schemas[package][version][integration_name].update(flat_data)
del file_data
# add machine learning jobs to the schema
if integration in list(map(str.lower, definitions.MACHINE_LEARNING_PACKAGES)):
if glob.fnmatch.fnmatch(file, '*/ml_module/*ml.json'):
ml_module = json.loads(file_data_bytes)
job_ids = [job['id'] for job in ml_module['attributes']['jobs']]
final_integration_schemas[package][version]['jobs'] = job_ids
del file_data_bytes
# Write the final integration schemas to disk
with gzip.open(SCHEMA_FILE_PATH, "w") as schema_file:
+50 -5
View File
@@ -10,24 +10,27 @@ import unittest
import uuid
import warnings
from collections import defaultdict
from marshmallow import ValidationError
from pathlib import Path
import eql.ast
from marshmallow import ValidationError
from semver import Version
import kql
from detection_rules import attack
from detection_rules.beats import parse_beats_from_index
from detection_rules.integrations import load_integrations_schemas
from detection_rules.integrations import (find_latest_compatible_version,
load_integrations_manifests,
load_integrations_schemas)
from detection_rules.misc import load_current_package_version
from detection_rules.packaging import current_stack_version
from detection_rules.rule import (QueryRuleData, TOMLRuleContents,
load_integrations_manifests, QueryValidator)
from detection_rules.rule import (QueryRuleData, QueryValidator,
TOMLRuleContents)
from detection_rules.rule_loader import FILE_PATTERN
from detection_rules.rule_validators import EQLValidator, KQLValidator
from detection_rules.schemas import definitions, get_stack_schemas
from detection_rules.utils import INTEGRATION_RULE_DIR, get_path, load_etc_dump, PatchedTemplate
from detection_rules.utils import (INTEGRATION_RULE_DIR, PatchedTemplate,
get_path, load_etc_dump)
from detection_rules.version_lock import default_version_lock
from rta import get_available_tests
@@ -894,6 +897,48 @@ class TestIntegrationRules(BaseRuleTest):
self.fail(f'The following ({len(failures)}) rules have a `min_stack_version` defined but missing comments:'
f'\n{err_msg}')
def test_ml_integration_jobs_exist(self):
"""Test that machine learning jobs exist in the integration."""
failures = []
ml_integration_names = list(map(str.lower, definitions.MACHINE_LEARNING_PACKAGES))
integration_schemas = load_integrations_schemas()
integration_manifests = load_integrations_manifests()
for rule in self.all_rules:
if rule.contents.data.type == "machine_learning":
ml_integration_name = next((i for i in rule.contents.metadata.integration
if i in ml_integration_names), None)
if ml_integration_name:
if "machine_learning_job_id" not in dir(rule.contents.data):
failures.append(f'{self.rule_str(rule)} missing `machine_learning_job_id`')
else:
rule_job_id = rule.contents.data.machine_learning_job_id
ml_schema = integration_schemas.get(ml_integration_name)
min_version = Version.parse(
rule.contents.metadata.min_stack_version or load_current_package_version(),
optional_minor_and_patch=True
)
latest_compat_ver = find_latest_compatible_version(
package=ml_integration_name,
integration="",
rule_stack_version=min_version,
packages_manifest=integration_manifests
)
compat_integration_schema = ml_schema[latest_compat_ver[0]]
if rule_job_id not in compat_integration_schema['jobs']:
failures.append(
f'{self.rule_str(rule)} machine_learning_job_id `{rule_job_id}` not found '
f'in version `{latest_compat_ver[0]}` of `{ml_integration_name}` integration. '
f'existing jobs: {compat_integration_schema["jobs"]}'
)
if failures:
err_msg = '\n'.join(failures)
self.fail(
f'The following ({len(failures)}) rules are missing a valid `machine_learning_job_id`:\n{err_msg}'
)
class TestRuleTiming(BaseRuleTest):
"""Test rule timing and timestamps."""