[FR] Add ML Jobs to Schemas and Unit Test for Validation (#3161)

* adding machine learning job id validation

* Update rules/ml/credential_access_ml_auth_spike_in_logon_events_from_a_source_ip.toml

* Update tests/test_all_rules.py

* adding integration manifests and schemas from main

* rebuilt manifests and schemas with lmd

* fixed unit test linting

* adding manifests and schemas for other analytic packages

* updated manifests and schemas; adjusted unit test for verbosity

* sorted imports

(cherry picked from commit 3e212e2b74)
This commit is contained in:
Terrance DeJesus
2023-10-12 10:51:12 -04:00
committed by github-actions[bot]
parent 788f2ce884
commit 0308e32ea0
4 changed files with 61 additions and 8 deletions
Binary file not shown.
Binary file not shown.
+11 -3
View File
@@ -23,6 +23,7 @@ from . import ecs
from .beats import flatten_ecs_schema
from .misc import load_current_package_version
from .utils import cached, get_etc_path, read_gzip, unzip
from .schemas import definitions
MANIFEST_FILE_PATH = Path(get_etc_path('integration-manifests.json.gz'))
SCHEMA_FILE_PATH = Path(get_etc_path('integration-schemas.json.gz'))
@@ -137,12 +138,12 @@ def build_integrations_schemas(overwrite: bool, integration: str = None) -> None
# Open the zip file
with unzip(response.content) as zip_ref:
for file in zip_ref.namelist():
file_data_bytes = zip_ref.read(file)
# Check if the file is a match
if glob.fnmatch.fnmatch(file, '*/fields/*.yml'):
integration_name = Path(file).parent.parent.name
final_integration_schemas[package][version].setdefault(integration_name, {})
file_data = zip_ref.read(file)
schema_fields = yaml.safe_load(file_data)
schema_fields = yaml.safe_load(file_data_bytes)
# Parse the schema and add to the integration_manifests
data = flatten_ecs_schema(schema_fields)
@@ -150,7 +151,14 @@ def build_integrations_schemas(overwrite: bool, integration: str = None) -> None
final_integration_schemas[package][version][integration_name].update(flat_data)
del file_data
# add machine learning jobs to the schema
if integration in list(map(str.lower, definitions.MACHINE_LEARNING_PACKAGES)):
if glob.fnmatch.fnmatch(file, '*/ml_module/*ml.json'):
ml_module = json.loads(file_data_bytes)
job_ids = [job['id'] for job in ml_module['attributes']['jobs']]
final_integration_schemas[package][version]['jobs'] = job_ids
del file_data_bytes
# Write the final integration schemas to disk
with gzip.open(SCHEMA_FILE_PATH, "w") as schema_file: