[FR] Add Support for Multi-Fields and Validation in Rules (#2882)
This commit is contained in:
@@ -23,16 +23,19 @@ from typing import Dict, List, Optional, Tuple
|
||||
import click
|
||||
import pytoml
|
||||
import requests.exceptions
|
||||
from semver import Version
|
||||
import yaml
|
||||
from elasticsearch import Elasticsearch
|
||||
from eql.table import Table
|
||||
from semver import Version
|
||||
|
||||
from kibana.connector import Kibana
|
||||
|
||||
from . import attack, rule_loader, utils
|
||||
from .beats import (download_beats_schema, download_latest_beats_schema,
|
||||
refresh_main_schema)
|
||||
from .cli_utils import single_collection
|
||||
from .docs import IntegrationSecurityDocs, IntegrationSecurityDocsMDX
|
||||
from .ecs import download_endpoint_schemas, download_schemas
|
||||
from .endgame import EndgameSchemaManager
|
||||
from .eswrap import CollectEvents, add_range_to_dsl
|
||||
from .ghwrap import GithubClient, update_gist
|
||||
@@ -47,7 +50,7 @@ from .misc import PYTHON_LICENSE, add_client, client_error
|
||||
from .packaging import (CURRENT_RELEASE_PATH, PACKAGE_FILE, RELEASE_DIR,
|
||||
Package, current_stack_version)
|
||||
from .rule import (AnyRuleData, BaseRuleData, DeprecatedRule, QueryRuleData,
|
||||
ThreatMapping, TOMLRule, TOMLRuleContents, RuleTransform)
|
||||
RuleTransform, ThreatMapping, TOMLRule, TOMLRuleContents)
|
||||
from .rule_loader import RuleCollection, production_filter
|
||||
from .schemas import definitions, get_stack_versions
|
||||
from .utils import (dict_hash, get_etc_path, get_path, load_dump,
|
||||
@@ -1279,18 +1282,58 @@ def update_rule_data_schemas():
|
||||
cls.save_schema()
|
||||
|
||||
|
||||
@schemas_group.command("generate-endgame")
|
||||
@schemas_group.command("generate")
|
||||
@click.option("--token", required=True, prompt=get_github_token() is None, default=get_github_token(),
|
||||
help="GitHub token to use for the PR", hide_input=True)
|
||||
@click.option("--endgame-version", "-e", required=True, help="Tagged version from TBD. e.g., 1.9.0")
|
||||
@click.option("--schema", "-s", required=True, type=click.Choice(["endgame", "ecs", "beats", "endpoint"]),
|
||||
help="Schema to generate")
|
||||
@click.option("--schema-version", "-sv", help="Tagged version from TBD. e.g., 1.9.0")
|
||||
@click.option("--endpoint-target", "-t", type=str, default="endpoint", help="Target endpoint schema")
|
||||
@click.option("--overwrite", is_flag=True, help="Overwrite if versions exist")
|
||||
def generate_endgame_schema(token: str, endgame_version: str, overwrite: bool):
|
||||
"""Download Endgame-ECS mapping.json and generate flattend schema."""
|
||||
def generate_schema(token: str, schema: str, schema_version: str, endpoint_target: str, overwrite: bool):
|
||||
"""Download schemas and generate flattend schema."""
|
||||
github = GithubClient(token)
|
||||
client = github.authenticated_client
|
||||
schema_manager = EndgameSchemaManager(client, endgame_version)
|
||||
|
||||
if schema_version and not Version.parse(schema_version):
|
||||
raise click.BadParameter(f"Invalid schema version: {schema_version}")
|
||||
|
||||
click.echo(f"Generating {schema} schema")
|
||||
if schema == "endgame":
|
||||
if not schema_version:
|
||||
raise click.BadParameter("Schema version required")
|
||||
schema_manager = EndgameSchemaManager(client, schema_version)
|
||||
schema_manager.save_schemas(overwrite=overwrite)
|
||||
|
||||
# ecs, beats and endpoint schemas are refreshed during release
|
||||
# these schemas do not require a schema version
|
||||
if schema == "ecs":
|
||||
download_schemas(refresh_all=True)
|
||||
if schema == "beats":
|
||||
if not schema_version:
|
||||
download_latest_beats_schema()
|
||||
refresh_main_schema()
|
||||
else:
|
||||
download_beats_schema(schema_version)
|
||||
|
||||
# endpoint package custom schemas can be downloaded
|
||||
# this download requires a specific schema target
|
||||
if schema == "endpoint":
|
||||
repo = client.get_repo("elastic/endpoint-package")
|
||||
contents = repo.get_contents("custom_schemas")
|
||||
optional_endpoint_targets = [
|
||||
Path(f.path).name.replace("custom_", "").replace(".yml", "")
|
||||
for f in contents if f.name.endswith(".yml") or Path(f.path).name == endpoint_target
|
||||
]
|
||||
|
||||
if not endpoint_target:
|
||||
raise click.BadParameter("Endpoint target required")
|
||||
if endpoint_target not in optional_endpoint_targets:
|
||||
raise click.BadParameter(f"""Invalid endpoint schema target: {endpoint_target}
|
||||
\n Schema Options: {optional_endpoint_targets}""")
|
||||
download_endpoint_schemas(endpoint_target)
|
||||
click.echo(f"Done generating {schema} schema")
|
||||
|
||||
|
||||
@dev_group.group('attack')
|
||||
def attack_group():
|
||||
|
||||
+59
-6
@@ -20,8 +20,10 @@ import yaml
|
||||
from .utils import (DateTimeEncoder, cached, get_etc_path, gzip_compress,
|
||||
load_etc_dump, read_gzip, unzip)
|
||||
|
||||
ETC_NAME = "ecs_schemas"
|
||||
ECS_SCHEMAS_DIR = get_etc_path(ETC_NAME)
|
||||
ECS_NAME = "ecs_schemas"
|
||||
ECS_SCHEMAS_DIR = get_etc_path(ECS_NAME)
|
||||
ENDPOINT_NAME = "endpoint_schemas"
|
||||
ENDPOINT_SCHEMAS_DIR = get_etc_path(ENDPOINT_NAME)
|
||||
|
||||
|
||||
def add_field(schema, name, info):
|
||||
@@ -117,11 +119,16 @@ def get_eql_schema(version=None, index_patterns=None):
|
||||
field_type = schema_info.get('type', '')
|
||||
add_field(converted, field, convert_type(field_type))
|
||||
|
||||
# add non-ecs schema
|
||||
if index_patterns:
|
||||
for index_name in index_patterns:
|
||||
for k, v in flatten(get_index_schema(index_name)).items():
|
||||
add_field(converted, k, convert_type(v))
|
||||
|
||||
# add endpoint custom schema
|
||||
for k, v in flatten(get_endpoint_schemas()).items():
|
||||
add_field(converted, k, convert_type(v))
|
||||
|
||||
return converted
|
||||
|
||||
|
||||
@@ -198,6 +205,9 @@ def get_kql_schema(version=None, indexes=None, beat_schema=None) -> dict:
|
||||
for index_name in indexes:
|
||||
converted.update(**flatten(get_index_schema(index_name)))
|
||||
|
||||
# add endpoint custom schema
|
||||
converted.update(**flatten(get_endpoint_schemas()))
|
||||
|
||||
if isinstance(beat_schema, dict):
|
||||
converted = dict(flatten_multi_fields(beat_schema), **converted)
|
||||
|
||||
@@ -214,7 +224,7 @@ def download_schemas(refresh_master=True, refresh_all=False, verbose=True):
|
||||
version = Version.parse(release.get('tag_name', '').lstrip('v'))
|
||||
|
||||
# we don't ever want beta
|
||||
if not version or version < (1, 0, 1) or version in existing:
|
||||
if not version or version < Version.parse("1.0.1") or version in existing:
|
||||
continue
|
||||
|
||||
schema_dir = os.path.join(ECS_SCHEMAS_DIR, str(version))
|
||||
@@ -236,7 +246,7 @@ def download_schemas(refresh_master=True, refresh_all=False, verbose=True):
|
||||
out_file = file_name.replace(".yml", ".json.gz")
|
||||
|
||||
compressed = gzip_compress(json.dumps(contents, sort_keys=True, cls=DateTimeEncoder))
|
||||
new_path = get_etc_path(ETC_NAME, str(version), out_file)
|
||||
new_path = get_etc_path(ECS_NAME, str(version), out_file)
|
||||
with open(new_path, 'wb') as f:
|
||||
f.write(compressed)
|
||||
|
||||
@@ -259,12 +269,55 @@ def download_schemas(refresh_master=True, refresh_all=False, verbose=True):
|
||||
shutil.rmtree(m, ignore_errors=True)
|
||||
|
||||
master_dir = "master_{}".format(master_ver)
|
||||
os.makedirs(get_etc_path(ETC_NAME, master_dir), exist_ok=True)
|
||||
os.makedirs(get_etc_path(ECS_NAME, master_dir), exist_ok=True)
|
||||
|
||||
compressed = gzip_compress(json.dumps(master_schema, sort_keys=True, cls=DateTimeEncoder))
|
||||
new_path = get_etc_path(ETC_NAME, master_dir, "ecs_flat.json.gz")
|
||||
new_path = get_etc_path(ECS_NAME, master_dir, "ecs_flat.json.gz")
|
||||
with open(new_path, 'wb') as f:
|
||||
f.write(compressed)
|
||||
|
||||
if verbose:
|
||||
print('Saved files to {}: \n\t- {}'.format(master_dir, 'ecs_flat.json.gz'))
|
||||
|
||||
|
||||
def download_endpoint_schemas(target: str, overwrite: bool = True) -> None:
|
||||
"""Download endpoint custom schemas."""
|
||||
|
||||
# location of custom schema YAML files
|
||||
url = "https://raw.githubusercontent.com/elastic/endpoint-package/main/custom_schemas"
|
||||
r = requests.get(f"{url}/custom_{target}.yml")
|
||||
if r.status_code == 404:
|
||||
r = requests.get(f"{url}/{target}/custom_{target}.yaml")
|
||||
r.raise_for_status()
|
||||
schema = yaml.safe_load(r.text)[0]
|
||||
root_name = schema["name"]
|
||||
fields = schema["fields"]
|
||||
flattened = {}
|
||||
|
||||
# iterate over nested fields and flatten them
|
||||
for f in fields:
|
||||
if 'multi_fields' in f:
|
||||
for mf in f['multi_fields']:
|
||||
flattened[f"{root_name}.{f['name']}.{mf['name']}"] = mf['type']
|
||||
else:
|
||||
flattened[f"{root_name}.{f['name']}"] = f['type']
|
||||
|
||||
# save schema to disk
|
||||
Path(ENDPOINT_SCHEMAS_DIR).mkdir(parents=True, exist_ok=True)
|
||||
compressed = gzip_compress(json.dumps(flattened, sort_keys=True, cls=DateTimeEncoder))
|
||||
new_path = Path(ENDPOINT_SCHEMAS_DIR) / f"endpoint_{target}.json.gz"
|
||||
if overwrite:
|
||||
shutil.rmtree(new_path, ignore_errors=True)
|
||||
with open(new_path, 'wb') as f:
|
||||
f.write(compressed)
|
||||
print(f"Saved endpoint schema to {new_path}")
|
||||
|
||||
|
||||
@cached
|
||||
def get_endpoint_schemas() -> dict:
|
||||
"""Load endpoint schemas."""
|
||||
schema = {}
|
||||
existing = glob.glob(os.path.join(ENDPOINT_SCHEMAS_DIR, '*.json.gz'))
|
||||
for f in existing:
|
||||
schema.update(json.loads(read_gzip(f)))
|
||||
return schema
|
||||
|
||||
Binary file not shown.
@@ -76,8 +76,7 @@
|
||||
"process.Ext.relative_file_name_modify_time": "double",
|
||||
"process.Ext.relative_file_creation_time": "double",
|
||||
"Target.process.name": "keyword",
|
||||
"process.Ext.api.name": "keyword",
|
||||
"process.name.caseless": "keyword"
|
||||
"process.Ext.api.name": "keyword"
|
||||
},
|
||||
"logs-windows.*": {
|
||||
"powershell.file.script_block_text": "text"
|
||||
|
||||
@@ -113,6 +113,9 @@ class KQLValidator(QueryValidator):
|
||||
# add non-ecs-schema fields for edge cases not added to the integration
|
||||
for index_name in data.index:
|
||||
integration_schema.update(**ecs.flatten(ecs.get_index_schema(index_name)))
|
||||
|
||||
# add endpoint schema fields for multi-line fields
|
||||
integration_schema.update(**ecs.flatten(ecs.get_endpoint_schemas()))
|
||||
combined_schema.update(**integration_schema)
|
||||
|
||||
try:
|
||||
@@ -243,6 +246,9 @@ class EQLValidator(QueryValidator):
|
||||
# add non-ecs-schema fields for edge cases not added to the integration
|
||||
for index_name in data.index:
|
||||
integration_schema.update(**ecs.flatten(ecs.get_index_schema(index_name)))
|
||||
|
||||
# add endpoint schema fields for multi-line fields
|
||||
integration_schema.update(**ecs.flatten(ecs.get_endpoint_schemas()))
|
||||
combined_schema.update(**integration_schema)
|
||||
|
||||
eql_schema = ecs.KqlSchema2Eql(integration_schema)
|
||||
|
||||
Reference in New Issue
Block a user