diff --git a/detection_rules/devtools.py b/detection_rules/devtools.py index 25df0ac75..5dfbf37af 100644 --- a/detection_rules/devtools.py +++ b/detection_rules/devtools.py @@ -23,16 +23,19 @@ from typing import Dict, List, Optional, Tuple import click import pytoml import requests.exceptions -from semver import Version import yaml from elasticsearch import Elasticsearch from eql.table import Table +from semver import Version from kibana.connector import Kibana from . import attack, rule_loader, utils +from .beats import (download_beats_schema, download_latest_beats_schema, + refresh_main_schema) from .cli_utils import single_collection from .docs import IntegrationSecurityDocs, IntegrationSecurityDocsMDX +from .ecs import download_endpoint_schemas, download_schemas from .endgame import EndgameSchemaManager from .eswrap import CollectEvents, add_range_to_dsl from .ghwrap import GithubClient, update_gist @@ -47,7 +50,7 @@ from .misc import PYTHON_LICENSE, add_client, client_error from .packaging import (CURRENT_RELEASE_PATH, PACKAGE_FILE, RELEASE_DIR, Package, current_stack_version) from .rule import (AnyRuleData, BaseRuleData, DeprecatedRule, QueryRuleData, - ThreatMapping, TOMLRule, TOMLRuleContents, RuleTransform) + RuleTransform, ThreatMapping, TOMLRule, TOMLRuleContents) from .rule_loader import RuleCollection, production_filter from .schemas import definitions, get_stack_versions from .utils import (dict_hash, get_etc_path, get_path, load_dump, @@ -1279,17 +1282,57 @@ def update_rule_data_schemas(): cls.save_schema() -@schemas_group.command("generate-endgame") +@schemas_group.command("generate") @click.option("--token", required=True, prompt=get_github_token() is None, default=get_github_token(), help="GitHub token to use for the PR", hide_input=True) -@click.option("--endgame-version", "-e", required=True, help="Tagged version from TBD. e.g., 1.9.0") +@click.option("--schema", "-s", required=True, type=click.Choice(["endgame", "ecs", "beats", "endpoint"]), + help="Schema to generate") +@click.option("--schema-version", "-sv", help="Tagged version from TBD. e.g., 1.9.0") +@click.option("--endpoint-target", "-t", type=str, default="endpoint", help="Target endpoint schema") @click.option("--overwrite", is_flag=True, help="Overwrite if versions exist") -def generate_endgame_schema(token: str, endgame_version: str, overwrite: bool): - """Download Endgame-ECS mapping.json and generate flattend schema.""" +def generate_schema(token: str, schema: str, schema_version: str, endpoint_target: str, overwrite: bool): + """Download schemas and generate flattend schema.""" github = GithubClient(token) client = github.authenticated_client - schema_manager = EndgameSchemaManager(client, endgame_version) - schema_manager.save_schemas(overwrite=overwrite) + + if schema_version and not Version.parse(schema_version): + raise click.BadParameter(f"Invalid schema version: {schema_version}") + + click.echo(f"Generating {schema} schema") + if schema == "endgame": + if not schema_version: + raise click.BadParameter("Schema version required") + schema_manager = EndgameSchemaManager(client, schema_version) + schema_manager.save_schemas(overwrite=overwrite) + + # ecs, beats and endpoint schemas are refreshed during release + # these schemas do not require a schema version + if schema == "ecs": + download_schemas(refresh_all=True) + if schema == "beats": + if not schema_version: + download_latest_beats_schema() + refresh_main_schema() + else: + download_beats_schema(schema_version) + + # endpoint package custom schemas can be downloaded + # this download requires a specific schema target + if schema == "endpoint": + repo = client.get_repo("elastic/endpoint-package") + contents = repo.get_contents("custom_schemas") + optional_endpoint_targets = [ + Path(f.path).name.replace("custom_", "").replace(".yml", "") + for f in contents if f.name.endswith(".yml") or Path(f.path).name == endpoint_target + ] + + if not endpoint_target: + raise click.BadParameter("Endpoint target required") + if endpoint_target not in optional_endpoint_targets: + raise click.BadParameter(f"""Invalid endpoint schema target: {endpoint_target} + \n Schema Options: {optional_endpoint_targets}""") + download_endpoint_schemas(endpoint_target) + click.echo(f"Done generating {schema} schema") @dev_group.group('attack') diff --git a/detection_rules/ecs.py b/detection_rules/ecs.py index faa8bf25e..1418c9647 100644 --- a/detection_rules/ecs.py +++ b/detection_rules/ecs.py @@ -20,8 +20,10 @@ import yaml from .utils import (DateTimeEncoder, cached, get_etc_path, gzip_compress, load_etc_dump, read_gzip, unzip) -ETC_NAME = "ecs_schemas" -ECS_SCHEMAS_DIR = get_etc_path(ETC_NAME) +ECS_NAME = "ecs_schemas" +ECS_SCHEMAS_DIR = get_etc_path(ECS_NAME) +ENDPOINT_NAME = "endpoint_schemas" +ENDPOINT_SCHEMAS_DIR = get_etc_path(ENDPOINT_NAME) def add_field(schema, name, info): @@ -117,11 +119,16 @@ def get_eql_schema(version=None, index_patterns=None): field_type = schema_info.get('type', '') add_field(converted, field, convert_type(field_type)) + # add non-ecs schema if index_patterns: for index_name in index_patterns: for k, v in flatten(get_index_schema(index_name)).items(): add_field(converted, k, convert_type(v)) + # add endpoint custom schema + for k, v in flatten(get_endpoint_schemas()).items(): + add_field(converted, k, convert_type(v)) + return converted @@ -198,6 +205,9 @@ def get_kql_schema(version=None, indexes=None, beat_schema=None) -> dict: for index_name in indexes: converted.update(**flatten(get_index_schema(index_name))) + # add endpoint custom schema + converted.update(**flatten(get_endpoint_schemas())) + if isinstance(beat_schema, dict): converted = dict(flatten_multi_fields(beat_schema), **converted) @@ -214,7 +224,7 @@ def download_schemas(refresh_master=True, refresh_all=False, verbose=True): version = Version.parse(release.get('tag_name', '').lstrip('v')) # we don't ever want beta - if not version or version < (1, 0, 1) or version in existing: + if not version or version < Version.parse("1.0.1") or version in existing: continue schema_dir = os.path.join(ECS_SCHEMAS_DIR, str(version)) @@ -236,7 +246,7 @@ def download_schemas(refresh_master=True, refresh_all=False, verbose=True): out_file = file_name.replace(".yml", ".json.gz") compressed = gzip_compress(json.dumps(contents, sort_keys=True, cls=DateTimeEncoder)) - new_path = get_etc_path(ETC_NAME, str(version), out_file) + new_path = get_etc_path(ECS_NAME, str(version), out_file) with open(new_path, 'wb') as f: f.write(compressed) @@ -259,12 +269,55 @@ def download_schemas(refresh_master=True, refresh_all=False, verbose=True): shutil.rmtree(m, ignore_errors=True) master_dir = "master_{}".format(master_ver) - os.makedirs(get_etc_path(ETC_NAME, master_dir), exist_ok=True) + os.makedirs(get_etc_path(ECS_NAME, master_dir), exist_ok=True) compressed = gzip_compress(json.dumps(master_schema, sort_keys=True, cls=DateTimeEncoder)) - new_path = get_etc_path(ETC_NAME, master_dir, "ecs_flat.json.gz") + new_path = get_etc_path(ECS_NAME, master_dir, "ecs_flat.json.gz") with open(new_path, 'wb') as f: f.write(compressed) if verbose: print('Saved files to {}: \n\t- {}'.format(master_dir, 'ecs_flat.json.gz')) + + +def download_endpoint_schemas(target: str, overwrite: bool = True) -> None: + """Download endpoint custom schemas.""" + + # location of custom schema YAML files + url = "https://raw.githubusercontent.com/elastic/endpoint-package/main/custom_schemas" + r = requests.get(f"{url}/custom_{target}.yml") + if r.status_code == 404: + r = requests.get(f"{url}/{target}/custom_{target}.yaml") + r.raise_for_status() + schema = yaml.safe_load(r.text)[0] + root_name = schema["name"] + fields = schema["fields"] + flattened = {} + + # iterate over nested fields and flatten them + for f in fields: + if 'multi_fields' in f: + for mf in f['multi_fields']: + flattened[f"{root_name}.{f['name']}.{mf['name']}"] = mf['type'] + else: + flattened[f"{root_name}.{f['name']}"] = f['type'] + + # save schema to disk + Path(ENDPOINT_SCHEMAS_DIR).mkdir(parents=True, exist_ok=True) + compressed = gzip_compress(json.dumps(flattened, sort_keys=True, cls=DateTimeEncoder)) + new_path = Path(ENDPOINT_SCHEMAS_DIR) / f"endpoint_{target}.json.gz" + if overwrite: + shutil.rmtree(new_path, ignore_errors=True) + with open(new_path, 'wb') as f: + f.write(compressed) + print(f"Saved endpoint schema to {new_path}") + + +@cached +def get_endpoint_schemas() -> dict: + """Load endpoint schemas.""" + schema = {} + existing = glob.glob(os.path.join(ENDPOINT_SCHEMAS_DIR, '*.json.gz')) + for f in existing: + schema.update(json.loads(read_gzip(f))) + return schema diff --git a/detection_rules/etc/endpoint_schemas/endpoint_process.json.gz b/detection_rules/etc/endpoint_schemas/endpoint_process.json.gz new file mode 100644 index 000000000..536277550 Binary files /dev/null and b/detection_rules/etc/endpoint_schemas/endpoint_process.json.gz differ diff --git a/detection_rules/etc/non-ecs-schema.json b/detection_rules/etc/non-ecs-schema.json index d08784e5a..86df14d28 100644 --- a/detection_rules/etc/non-ecs-schema.json +++ b/detection_rules/etc/non-ecs-schema.json @@ -76,8 +76,7 @@ "process.Ext.relative_file_name_modify_time": "double", "process.Ext.relative_file_creation_time": "double", "Target.process.name": "keyword", - "process.Ext.api.name": "keyword", - "process.name.caseless": "keyword" + "process.Ext.api.name": "keyword" }, "logs-windows.*": { "powershell.file.script_block_text": "text" diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py index e6818fe13..079a6edee 100644 --- a/detection_rules/rule_validators.py +++ b/detection_rules/rule_validators.py @@ -113,6 +113,9 @@ class KQLValidator(QueryValidator): # add non-ecs-schema fields for edge cases not added to the integration for index_name in data.index: integration_schema.update(**ecs.flatten(ecs.get_index_schema(index_name))) + + # add endpoint schema fields for multi-line fields + integration_schema.update(**ecs.flatten(ecs.get_endpoint_schemas())) combined_schema.update(**integration_schema) try: @@ -243,6 +246,9 @@ class EQLValidator(QueryValidator): # add non-ecs-schema fields for edge cases not added to the integration for index_name in data.index: integration_schema.update(**ecs.flatten(ecs.get_index_schema(index_name))) + + # add endpoint schema fields for multi-line fields + integration_schema.update(**ecs.flatten(ecs.get_endpoint_schemas())) combined_schema.update(**integration_schema) eql_schema = ecs.KqlSchema2Eql(integration_schema)