[FR] Add Support for Multi-Fields and Validation in Rules (#2882)

This commit is contained in:
Terrance DeJesus
2023-06-28 20:35:33 -04:00
committed by GitHub
parent a7e605a0e5
commit 73970eb2f2
5 changed files with 117 additions and 16 deletions
+51 -8
View File
@@ -23,16 +23,19 @@ from typing import Dict, List, Optional, Tuple
import click
import pytoml
import requests.exceptions
from semver import Version
import yaml
from elasticsearch import Elasticsearch
from eql.table import Table
from semver import Version
from kibana.connector import Kibana
from . import attack, rule_loader, utils
from .beats import (download_beats_schema, download_latest_beats_schema,
refresh_main_schema)
from .cli_utils import single_collection
from .docs import IntegrationSecurityDocs, IntegrationSecurityDocsMDX
from .ecs import download_endpoint_schemas, download_schemas
from .endgame import EndgameSchemaManager
from .eswrap import CollectEvents, add_range_to_dsl
from .ghwrap import GithubClient, update_gist
@@ -47,7 +50,7 @@ from .misc import PYTHON_LICENSE, add_client, client_error
from .packaging import (CURRENT_RELEASE_PATH, PACKAGE_FILE, RELEASE_DIR,
Package, current_stack_version)
from .rule import (AnyRuleData, BaseRuleData, DeprecatedRule, QueryRuleData,
ThreatMapping, TOMLRule, TOMLRuleContents, RuleTransform)
RuleTransform, ThreatMapping, TOMLRule, TOMLRuleContents)
from .rule_loader import RuleCollection, production_filter
from .schemas import definitions, get_stack_versions
from .utils import (dict_hash, get_etc_path, get_path, load_dump,
@@ -1279,17 +1282,57 @@ def update_rule_data_schemas():
cls.save_schema()
@schemas_group.command("generate-endgame")
@schemas_group.command("generate")
@click.option("--token", required=True, prompt=get_github_token() is None, default=get_github_token(),
help="GitHub token to use for the PR", hide_input=True)
@click.option("--endgame-version", "-e", required=True, help="Tagged version from TBD. e.g., 1.9.0")
@click.option("--schema", "-s", required=True, type=click.Choice(["endgame", "ecs", "beats", "endpoint"]),
help="Schema to generate")
@click.option("--schema-version", "-sv", help="Tagged version from TBD. e.g., 1.9.0")
@click.option("--endpoint-target", "-t", type=str, default="endpoint", help="Target endpoint schema")
@click.option("--overwrite", is_flag=True, help="Overwrite if versions exist")
def generate_endgame_schema(token: str, endgame_version: str, overwrite: bool):
"""Download Endgame-ECS mapping.json and generate flattend schema."""
def generate_schema(token: str, schema: str, schema_version: str, endpoint_target: str, overwrite: bool):
"""Download schemas and generate flattend schema."""
github = GithubClient(token)
client = github.authenticated_client
schema_manager = EndgameSchemaManager(client, endgame_version)
schema_manager.save_schemas(overwrite=overwrite)
if schema_version and not Version.parse(schema_version):
raise click.BadParameter(f"Invalid schema version: {schema_version}")
click.echo(f"Generating {schema} schema")
if schema == "endgame":
if not schema_version:
raise click.BadParameter("Schema version required")
schema_manager = EndgameSchemaManager(client, schema_version)
schema_manager.save_schemas(overwrite=overwrite)
# ecs, beats and endpoint schemas are refreshed during release
# these schemas do not require a schema version
if schema == "ecs":
download_schemas(refresh_all=True)
if schema == "beats":
if not schema_version:
download_latest_beats_schema()
refresh_main_schema()
else:
download_beats_schema(schema_version)
# endpoint package custom schemas can be downloaded
# this download requires a specific schema target
if schema == "endpoint":
repo = client.get_repo("elastic/endpoint-package")
contents = repo.get_contents("custom_schemas")
optional_endpoint_targets = [
Path(f.path).name.replace("custom_", "").replace(".yml", "")
for f in contents if f.name.endswith(".yml") or Path(f.path).name == endpoint_target
]
if not endpoint_target:
raise click.BadParameter("Endpoint target required")
if endpoint_target not in optional_endpoint_targets:
raise click.BadParameter(f"""Invalid endpoint schema target: {endpoint_target}
\n Schema Options: {optional_endpoint_targets}""")
download_endpoint_schemas(endpoint_target)
click.echo(f"Done generating {schema} schema")
@dev_group.group('attack')
+59 -6
View File
@@ -20,8 +20,10 @@ import yaml
from .utils import (DateTimeEncoder, cached, get_etc_path, gzip_compress,
load_etc_dump, read_gzip, unzip)
ETC_NAME = "ecs_schemas"
ECS_SCHEMAS_DIR = get_etc_path(ETC_NAME)
ECS_NAME = "ecs_schemas"
ECS_SCHEMAS_DIR = get_etc_path(ECS_NAME)
ENDPOINT_NAME = "endpoint_schemas"
ENDPOINT_SCHEMAS_DIR = get_etc_path(ENDPOINT_NAME)
def add_field(schema, name, info):
@@ -117,11 +119,16 @@ def get_eql_schema(version=None, index_patterns=None):
field_type = schema_info.get('type', '')
add_field(converted, field, convert_type(field_type))
# add non-ecs schema
if index_patterns:
for index_name in index_patterns:
for k, v in flatten(get_index_schema(index_name)).items():
add_field(converted, k, convert_type(v))
# add endpoint custom schema
for k, v in flatten(get_endpoint_schemas()).items():
add_field(converted, k, convert_type(v))
return converted
@@ -198,6 +205,9 @@ def get_kql_schema(version=None, indexes=None, beat_schema=None) -> dict:
for index_name in indexes:
converted.update(**flatten(get_index_schema(index_name)))
# add endpoint custom schema
converted.update(**flatten(get_endpoint_schemas()))
if isinstance(beat_schema, dict):
converted = dict(flatten_multi_fields(beat_schema), **converted)
@@ -214,7 +224,7 @@ def download_schemas(refresh_master=True, refresh_all=False, verbose=True):
version = Version.parse(release.get('tag_name', '').lstrip('v'))
# we don't ever want beta
if not version or version < (1, 0, 1) or version in existing:
if not version or version < Version.parse("1.0.1") or version in existing:
continue
schema_dir = os.path.join(ECS_SCHEMAS_DIR, str(version))
@@ -236,7 +246,7 @@ def download_schemas(refresh_master=True, refresh_all=False, verbose=True):
out_file = file_name.replace(".yml", ".json.gz")
compressed = gzip_compress(json.dumps(contents, sort_keys=True, cls=DateTimeEncoder))
new_path = get_etc_path(ETC_NAME, str(version), out_file)
new_path = get_etc_path(ECS_NAME, str(version), out_file)
with open(new_path, 'wb') as f:
f.write(compressed)
@@ -259,12 +269,55 @@ def download_schemas(refresh_master=True, refresh_all=False, verbose=True):
shutil.rmtree(m, ignore_errors=True)
master_dir = "master_{}".format(master_ver)
os.makedirs(get_etc_path(ETC_NAME, master_dir), exist_ok=True)
os.makedirs(get_etc_path(ECS_NAME, master_dir), exist_ok=True)
compressed = gzip_compress(json.dumps(master_schema, sort_keys=True, cls=DateTimeEncoder))
new_path = get_etc_path(ETC_NAME, master_dir, "ecs_flat.json.gz")
new_path = get_etc_path(ECS_NAME, master_dir, "ecs_flat.json.gz")
with open(new_path, 'wb') as f:
f.write(compressed)
if verbose:
print('Saved files to {}: \n\t- {}'.format(master_dir, 'ecs_flat.json.gz'))
def download_endpoint_schemas(target: str, overwrite: bool = True) -> None:
"""Download endpoint custom schemas."""
# location of custom schema YAML files
url = "https://raw.githubusercontent.com/elastic/endpoint-package/main/custom_schemas"
r = requests.get(f"{url}/custom_{target}.yml")
if r.status_code == 404:
r = requests.get(f"{url}/{target}/custom_{target}.yaml")
r.raise_for_status()
schema = yaml.safe_load(r.text)[0]
root_name = schema["name"]
fields = schema["fields"]
flattened = {}
# iterate over nested fields and flatten them
for f in fields:
if 'multi_fields' in f:
for mf in f['multi_fields']:
flattened[f"{root_name}.{f['name']}.{mf['name']}"] = mf['type']
else:
flattened[f"{root_name}.{f['name']}"] = f['type']
# save schema to disk
Path(ENDPOINT_SCHEMAS_DIR).mkdir(parents=True, exist_ok=True)
compressed = gzip_compress(json.dumps(flattened, sort_keys=True, cls=DateTimeEncoder))
new_path = Path(ENDPOINT_SCHEMAS_DIR) / f"endpoint_{target}.json.gz"
if overwrite:
shutil.rmtree(new_path, ignore_errors=True)
with open(new_path, 'wb') as f:
f.write(compressed)
print(f"Saved endpoint schema to {new_path}")
@cached
def get_endpoint_schemas() -> dict:
"""Load endpoint schemas."""
schema = {}
existing = glob.glob(os.path.join(ENDPOINT_SCHEMAS_DIR, '*.json.gz'))
for f in existing:
schema.update(json.loads(read_gzip(f)))
return schema
+1 -2
View File
@@ -76,8 +76,7 @@
"process.Ext.relative_file_name_modify_time": "double",
"process.Ext.relative_file_creation_time": "double",
"Target.process.name": "keyword",
"process.Ext.api.name": "keyword",
"process.name.caseless": "keyword"
"process.Ext.api.name": "keyword"
},
"logs-windows.*": {
"powershell.file.script_block_text": "text"
+6
View File
@@ -113,6 +113,9 @@ class KQLValidator(QueryValidator):
# add non-ecs-schema fields for edge cases not added to the integration
for index_name in data.index:
integration_schema.update(**ecs.flatten(ecs.get_index_schema(index_name)))
# add endpoint schema fields for multi-line fields
integration_schema.update(**ecs.flatten(ecs.get_endpoint_schemas()))
combined_schema.update(**integration_schema)
try:
@@ -243,6 +246,9 @@ class EQLValidator(QueryValidator):
# add non-ecs-schema fields for edge cases not added to the integration
for index_name in data.index:
integration_schema.update(**ecs.flatten(ecs.get_index_schema(index_name)))
# add endpoint schema fields for multi-line fields
integration_schema.update(**ecs.flatten(ecs.get_endpoint_schemas()))
combined_schema.update(**integration_schema)
eql_schema = ecs.KqlSchema2Eql(integration_schema)