2020-06-29 23:17:38 -06:00
|
|
|
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
2021-03-03 22:12:11 -09:00
|
|
|
# or more contributor license agreements. Licensed under the Elastic License
|
|
|
|
|
# 2.0; you may not use this file except in compliance with the Elastic License
|
|
|
|
|
# 2.0.
|
2020-06-29 23:17:38 -06:00
|
|
|
|
|
|
|
|
"""ECS Schemas management."""
|
2025-07-01 15:20:55 +02:00
|
|
|
|
2020-06-29 23:17:38 -06:00
|
|
|
import copy
|
2023-02-07 14:26:29 -05:00
|
|
|
import json
|
2020-06-29 23:17:38 -06:00
|
|
|
import os
|
|
|
|
|
import shutil
|
2025-07-01 15:20:55 +02:00
|
|
|
from pathlib import Path
|
|
|
|
|
from typing import Any
|
2020-06-29 23:17:38 -06:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
import eql # type: ignore[reportMissingTypeStubs]
|
|
|
|
|
import eql.types # type: ignore[reportMissingTypeStubs]
|
2023-02-07 14:26:29 -05:00
|
|
|
import requests
|
2020-06-29 23:17:38 -06:00
|
|
|
import yaml
|
2025-07-01 15:20:55 +02:00
|
|
|
from semver import Version
|
2020-06-29 23:17:38 -06:00
|
|
|
|
2024-08-06 18:07:12 -04:00
|
|
|
from .config import CUSTOM_RULES_DIR, parse_rules_config
|
|
|
|
|
from .custom_schemas import get_custom_schemas
|
2024-08-28 10:48:00 -04:00
|
|
|
from .integrations import load_integrations_schemas
|
2025-07-01 15:20:55 +02:00
|
|
|
from .utils import DateTimeEncoder, cached, get_etc_path, gzip_compress, load_etc_dump, read_gzip, unzip
|
2020-06-29 23:17:38 -06:00
|
|
|
|
2023-06-28 20:35:33 -04:00
|
|
|
ECS_NAME = "ecs_schemas"
|
2025-07-01 15:20:55 +02:00
|
|
|
ECS_SCHEMAS_DIR = get_etc_path([ECS_NAME])
|
2023-06-28 20:35:33 -04:00
|
|
|
ENDPOINT_NAME = "endpoint_schemas"
|
2025-07-01 15:20:55 +02:00
|
|
|
ENDPOINT_SCHEMAS_DIR = get_etc_path([ENDPOINT_NAME])
|
2024-08-06 18:07:12 -04:00
|
|
|
RULES_CONFIG = parse_rules_config()
|
2020-06-29 23:17:38 -06:00
|
|
|
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def add_field(schema: dict[str, Any], name: str, info: Any) -> None:
|
2020-06-29 23:17:38 -06:00
|
|
|
"""Nest a dotted field within a dictionary."""
|
|
|
|
|
if "." not in name:
|
|
|
|
|
schema[name] = info
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
top, remaining = name.split(".", 1)
|
|
|
|
|
if not isinstance(schema.get(top), dict):
|
|
|
|
|
schema[top] = {}
|
|
|
|
|
add_field(schema, remaining, info)
|
|
|
|
|
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def _recursive_merge(existing: dict[str, Any], new: dict[str, Any], depth: int = 0) -> dict[str, Any]:
|
2020-06-29 23:17:38 -06:00
|
|
|
"""Return an existing dict merged into a new one."""
|
|
|
|
|
for key, value in existing.items():
|
|
|
|
|
if isinstance(value, dict):
|
|
|
|
|
if depth == 0:
|
|
|
|
|
new = copy.deepcopy(new)
|
|
|
|
|
|
|
|
|
|
node = new.setdefault(key, {})
|
2025-07-01 15:20:55 +02:00
|
|
|
_ = _recursive_merge(value, node, depth + 1) # type: ignore[reportUnknownArgumentType]
|
2020-06-29 23:17:38 -06:00
|
|
|
else:
|
|
|
|
|
new[key] = value
|
|
|
|
|
|
|
|
|
|
return new
|
|
|
|
|
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def get_schema_files() -> list[Path]:
|
2020-06-29 23:17:38 -06:00
|
|
|
"""Get schema files from ecs directory."""
|
2025-07-01 15:20:55 +02:00
|
|
|
return list(ECS_SCHEMAS_DIR.glob("**/*.json.gz"))
|
2020-06-29 23:17:38 -06:00
|
|
|
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def get_schema_map() -> dict[str, Any]:
|
2020-06-29 23:17:38 -06:00
|
|
|
"""Get local schema files by version."""
|
2025-07-01 15:20:55 +02:00
|
|
|
schema_map: dict[str, Any] = {}
|
2020-06-29 23:17:38 -06:00
|
|
|
|
|
|
|
|
for file_name in get_schema_files():
|
|
|
|
|
path, name = os.path.split(file_name)
|
2025-07-01 15:20:55 +02:00
|
|
|
name = name.split(".")[0]
|
|
|
|
|
version = Path(path).name
|
2020-06-29 23:17:38 -06:00
|
|
|
schema_map.setdefault(version, {})[name] = file_name
|
|
|
|
|
|
|
|
|
|
return schema_map
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@cached
|
2025-07-01 15:20:55 +02:00
|
|
|
def get_schemas() -> dict[str, Any]:
|
2020-06-29 23:17:38 -06:00
|
|
|
"""Get local schemas."""
|
|
|
|
|
schema_map = get_schema_map()
|
|
|
|
|
|
|
|
|
|
for version, values in schema_map.items():
|
|
|
|
|
for name, file_name in values.items():
|
2020-12-01 23:24:20 +01:00
|
|
|
schema_map[version][name] = json.loads(read_gzip(file_name))
|
2020-06-29 23:17:38 -06:00
|
|
|
|
|
|
|
|
return schema_map
|
|
|
|
|
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def get_max_version(include_master: bool = False) -> str:
|
2020-06-29 23:17:38 -06:00
|
|
|
"""Get maximum available schema version."""
|
|
|
|
|
versions = get_schema_map().keys()
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
if include_master and any(v.startswith("master") for v in versions):
|
|
|
|
|
paths = list(ECS_SCHEMAS_DIR.glob("master*"))
|
|
|
|
|
return paths[0].name
|
2020-06-29 23:17:38 -06:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
return str(max([Version.parse(v) for v in versions if not v.startswith("master")]))
|
2020-06-29 23:17:38 -06:00
|
|
|
|
|
|
|
|
|
|
|
|
|
@cached
|
2025-07-01 15:20:55 +02:00
|
|
|
def get_schema(version: str | None = None, name: str = "ecs_flat") -> dict[str, Any]:
|
2020-06-29 23:17:38 -06:00
|
|
|
"""Get schema by version."""
|
2025-07-01 15:20:55 +02:00
|
|
|
if version == "master":
|
2021-04-26 07:07:04 -05:00
|
|
|
version = get_max_version(include_master=True)
|
|
|
|
|
|
2020-12-01 23:24:20 +01:00
|
|
|
return get_schemas()[version or str(get_max_version())][name]
|
2020-06-29 23:17:38 -06:00
|
|
|
|
|
|
|
|
|
|
|
|
|
@cached
|
2025-07-01 15:20:55 +02:00
|
|
|
def get_eql_schema(version: str | None = None, index_patterns: list[str] | None = None) -> dict[str, Any]:
|
2020-06-29 23:17:38 -06:00
|
|
|
"""Return schema in expected format for eql."""
|
2025-07-01 15:20:55 +02:00
|
|
|
schema = get_schema(version, name="ecs_flat")
|
|
|
|
|
str_types = ("text", "ip", "keyword", "date", "object", "geo_point")
|
|
|
|
|
num_types = ("float", "integer", "long")
|
2020-06-29 23:17:38 -06:00
|
|
|
schema = schema.copy()
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def convert_type(t: str) -> str:
|
|
|
|
|
return "string" if t in str_types else "number" if t in num_types else "boolean"
|
2020-06-29 23:17:38 -06:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
converted: dict[str, Any] = {}
|
2020-06-29 23:17:38 -06:00
|
|
|
|
|
|
|
|
for field, schema_info in schema.items():
|
2025-07-01 15:20:55 +02:00
|
|
|
field_type = schema_info.get("type", "")
|
2020-06-29 23:17:38 -06:00
|
|
|
add_field(converted, field, convert_type(field_type))
|
|
|
|
|
|
2023-06-28 20:35:33 -04:00
|
|
|
# add non-ecs schema
|
2020-06-29 23:17:38 -06:00
|
|
|
if index_patterns:
|
|
|
|
|
for index_name in index_patterns:
|
|
|
|
|
for k, v in flatten(get_index_schema(index_name)).items():
|
|
|
|
|
add_field(converted, k, convert_type(v))
|
|
|
|
|
|
2024-08-06 18:07:12 -04:00
|
|
|
# add custom schema
|
|
|
|
|
if index_patterns and CUSTOM_RULES_DIR:
|
|
|
|
|
for index_name in index_patterns:
|
|
|
|
|
for k, v in flatten(get_custom_index_schema(index_name)).items():
|
|
|
|
|
add_field(converted, k, convert_type(v))
|
|
|
|
|
|
2023-06-28 20:35:33 -04:00
|
|
|
# add endpoint custom schema
|
|
|
|
|
for k, v in flatten(get_endpoint_schemas()).items():
|
|
|
|
|
add_field(converted, k, convert_type(v))
|
|
|
|
|
|
2020-06-29 23:17:38 -06:00
|
|
|
return converted
|
|
|
|
|
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def flatten(schema: dict[str, Any]) -> dict[str, Any]:
|
|
|
|
|
flattened: dict[str, Any] = {}
|
2020-06-29 23:17:38 -06:00
|
|
|
for k, v in schema.items():
|
|
|
|
|
if isinstance(v, dict):
|
2025-07-01 15:20:55 +02:00
|
|
|
flattened.update((k + "." + vk, vv) for vk, vv in flatten(v).items()) # type: ignore[reportUnknownArgumentType]
|
2020-06-29 23:17:38 -06:00
|
|
|
else:
|
|
|
|
|
flattened[k] = v
|
|
|
|
|
return flattened
|
|
|
|
|
|
|
|
|
|
|
2024-08-28 10:48:00 -04:00
|
|
|
@cached
|
2025-07-01 15:20:55 +02:00
|
|
|
def get_all_flattened_schema() -> dict[str, Any]:
|
2024-08-28 10:48:00 -04:00
|
|
|
"""Load all schemas into a flattened dictionary."""
|
2025-07-01 15:20:55 +02:00
|
|
|
all_flattened_schema: dict[str, Any] = {}
|
|
|
|
|
for schema in get_non_ecs_schema().values():
|
2024-08-28 10:48:00 -04:00
|
|
|
all_flattened_schema.update(flatten(schema))
|
|
|
|
|
|
|
|
|
|
ecs_schemas = get_schemas()
|
|
|
|
|
for version in ecs_schemas:
|
|
|
|
|
for index, info in ecs_schemas[version]["ecs_flat"].items():
|
|
|
|
|
all_flattened_schema.update({index: info["type"]})
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
for integration_schema in load_integrations_schemas().values():
|
|
|
|
|
for index_schema in integration_schema.values():
|
2024-08-28 10:48:00 -04:00
|
|
|
# Detect if ML integration
|
|
|
|
|
if "jobs" in index_schema:
|
|
|
|
|
ml_schemas = {k: v for k, v in index_schema.items() if k != "jobs"}
|
2025-07-01 15:20:55 +02:00
|
|
|
for ml_schema in ml_schemas.values():
|
2024-08-28 10:48:00 -04:00
|
|
|
all_flattened_schema.update(flatten(ml_schema))
|
|
|
|
|
else:
|
|
|
|
|
all_flattened_schema.update(flatten(index_schema))
|
|
|
|
|
|
|
|
|
|
return all_flattened_schema
|
|
|
|
|
|
|
|
|
|
|
2020-06-29 23:17:38 -06:00
|
|
|
@cached
|
2025-07-01 15:20:55 +02:00
|
|
|
def get_non_ecs_schema() -> Any:
|
2020-06-29 23:17:38 -06:00
|
|
|
"""Load non-ecs schema."""
|
2025-07-01 15:20:55 +02:00
|
|
|
return load_etc_dump(["non-ecs-schema.json"])
|
2020-06-29 23:17:38 -06:00
|
|
|
|
|
|
|
|
|
2024-08-06 18:07:12 -04:00
|
|
|
@cached
|
2025-07-01 15:20:55 +02:00
|
|
|
def get_custom_index_schema(index_name: str, stack_version: str | None = None) -> Any:
|
2024-08-06 18:07:12 -04:00
|
|
|
"""Load custom schema."""
|
|
|
|
|
custom_schemas = get_custom_schemas(stack_version)
|
|
|
|
|
index_schema = custom_schemas.get(index_name, {})
|
2025-07-01 15:20:55 +02:00
|
|
|
ccs_schema = custom_schemas.get(index_name.replace("::", ":").split(":", 1)[-1], {})
|
2024-08-06 18:07:12 -04:00
|
|
|
index_schema.update(ccs_schema)
|
|
|
|
|
return index_schema
|
|
|
|
|
|
|
|
|
|
|
2020-06-29 23:17:38 -06:00
|
|
|
@cached
|
2025-07-01 15:20:55 +02:00
|
|
|
def get_index_schema(index_name: str) -> Any:
|
2024-08-06 18:07:12 -04:00
|
|
|
"""Load non-ecs schema."""
|
|
|
|
|
non_ecs_schema = get_non_ecs_schema()
|
|
|
|
|
index_schema = non_ecs_schema.get(index_name, {})
|
2025-07-01 15:20:55 +02:00
|
|
|
ccs_schema = non_ecs_schema.get(index_name.replace("::", ":").split(":", 1)[-1], {})
|
2024-08-06 18:07:12 -04:00
|
|
|
index_schema.update(ccs_schema)
|
|
|
|
|
return index_schema
|
2020-06-29 23:17:38 -06:00
|
|
|
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def flatten_multi_fields(schema: dict[str, Any]) -> dict[str, Any]:
|
|
|
|
|
converted: dict[str, Any] = {}
|
2020-06-29 23:17:38 -06:00
|
|
|
for field, info in schema.items():
|
|
|
|
|
converted[field] = info["type"]
|
|
|
|
|
for subfield in info.get("multi_fields", []):
|
|
|
|
|
converted[field + "." + subfield["name"]] = subfield["type"]
|
|
|
|
|
|
|
|
|
|
return converted
|
|
|
|
|
|
|
|
|
|
|
2020-09-16 08:36:48 -06:00
|
|
|
class KqlSchema2Eql(eql.Schema):
|
2025-07-01 15:20:55 +02:00
|
|
|
type_mapping = { # noqa: RUF012
|
2020-09-16 08:36:48 -06:00
|
|
|
"keyword": eql.types.TypeHint.String,
|
|
|
|
|
"ip": eql.types.TypeHint.String,
|
|
|
|
|
"float": eql.types.TypeHint.Numeric,
|
2021-10-28 08:57:43 -05:00
|
|
|
"integer": eql.types.TypeHint.Numeric,
|
2020-10-01 00:40:55 -05:00
|
|
|
"boolean": eql.types.TypeHint.Boolean,
|
2020-09-16 08:36:48 -06:00
|
|
|
}
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def __init__(self, kql_schema: dict[str, Any]) -> None:
|
2020-09-16 08:36:48 -06:00
|
|
|
self.kql_schema = kql_schema
|
2025-07-01 15:20:55 +02:00
|
|
|
eql.Schema.__init__(self, {}, allow_any=True, allow_generic=False, allow_missing=False) # type: ignore[reportUnknownMemberType]
|
2020-09-16 08:36:48 -06:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def validate_event_type(self, _: Any) -> bool: # type: ignore[reportIncompatibleMethodOverride]
|
2020-09-16 08:36:48 -06:00
|
|
|
# allow all event types to fill in X:
|
|
|
|
|
# `X` where ....
|
|
|
|
|
return True
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def get_event_type_hint(self, _: Any, path: list[str]) -> tuple[Any, None]: # type: ignore[reportIncompatibleMethodOverride]
|
|
|
|
|
from kql.parser import elasticsearch_type_family # type: ignore[reportMissingTypeStubs]
|
2021-10-28 08:57:43 -05:00
|
|
|
|
2020-09-16 08:36:48 -06:00
|
|
|
dotted = ".".join(path)
|
|
|
|
|
elasticsearch_type = self.kql_schema.get(dotted)
|
2025-07-01 15:20:55 +02:00
|
|
|
if not elasticsearch_type:
|
|
|
|
|
return None, None
|
|
|
|
|
|
2021-10-28 08:57:43 -05:00
|
|
|
es_type_family = elasticsearch_type_family(elasticsearch_type)
|
|
|
|
|
eql_hint = self.type_mapping.get(es_type_family)
|
2020-09-16 08:36:48 -06:00
|
|
|
|
|
|
|
|
if eql_hint is not None:
|
|
|
|
|
return eql_hint, None
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
return None, None
|
|
|
|
|
|
2020-09-16 08:36:48 -06:00
|
|
|
|
2020-06-29 23:17:38 -06:00
|
|
|
@cached
|
2025-07-01 15:20:55 +02:00
|
|
|
def get_kql_schema(
|
|
|
|
|
version: str | None = None,
|
|
|
|
|
indexes: list[str] | None = None,
|
|
|
|
|
beat_schema: dict[str, Any] | None = None,
|
|
|
|
|
) -> dict[str, Any]:
|
2020-06-29 23:17:38 -06:00
|
|
|
"""Get schema for KQL."""
|
2025-07-01 15:20:55 +02:00
|
|
|
indexes = indexes or []
|
|
|
|
|
converted = flatten_multi_fields(get_schema(version, name="ecs_flat"))
|
2020-06-29 23:17:38 -06:00
|
|
|
|
2024-08-06 18:07:12 -04:00
|
|
|
# non-ecs schema
|
2020-06-29 23:17:38 -06:00
|
|
|
for index_name in indexes:
|
|
|
|
|
converted.update(**flatten(get_index_schema(index_name)))
|
|
|
|
|
|
2024-08-06 18:07:12 -04:00
|
|
|
# custom schema
|
|
|
|
|
if CUSTOM_RULES_DIR:
|
|
|
|
|
for index_name in indexes:
|
|
|
|
|
converted.update(**flatten(get_custom_index_schema(index_name)))
|
|
|
|
|
|
2023-06-28 20:35:33 -04:00
|
|
|
# add endpoint custom schema
|
|
|
|
|
converted.update(**flatten(get_endpoint_schemas()))
|
|
|
|
|
|
2020-06-29 23:17:38 -06:00
|
|
|
if isinstance(beat_schema, dict):
|
|
|
|
|
converted = dict(flatten_multi_fields(beat_schema), **converted)
|
|
|
|
|
|
|
|
|
|
return converted
|
|
|
|
|
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def download_schemas(refresh_master: bool = True, refresh_all: bool = False, verbose: bool = True) -> None:
|
2020-06-29 23:17:38 -06:00
|
|
|
"""Download additional schemas from ecs releases."""
|
2023-02-07 14:26:29 -05:00
|
|
|
existing = [Version.parse(v) for v in get_schema_map()] if not refresh_all else []
|
2025-07-01 15:20:55 +02:00
|
|
|
url = "https://api.github.com/repos/elastic/ecs/releases"
|
|
|
|
|
releases = requests.get(url, timeout=30)
|
2020-06-29 23:17:38 -06:00
|
|
|
|
|
|
|
|
for release in releases.json():
|
2025-07-01 15:20:55 +02:00
|
|
|
version = Version.parse(release.get("tag_name", "").lstrip("v"))
|
2020-06-29 23:17:38 -06:00
|
|
|
|
|
|
|
|
# we don't ever want beta
|
2023-06-28 20:35:33 -04:00
|
|
|
if not version or version < Version.parse("1.0.1") or version in existing:
|
2020-06-29 23:17:38 -06:00
|
|
|
continue
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
schema_dir = ECS_SCHEMAS_DIR / str(version)
|
|
|
|
|
schema_dir.mkdir(exist_ok=True)
|
2020-06-29 23:17:38 -06:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
resp = requests.get(release["zipball_url"], timeout=30)
|
|
|
|
|
with unzip(resp.content) as archive:
|
2020-06-29 23:17:38 -06:00
|
|
|
name_list = archive.namelist()
|
|
|
|
|
base = name_list[0]
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
members = [f"{base}generated/ecs/ecs_flat.yml", f"{base}generated/ecs/ecs_nested.yml"]
|
|
|
|
|
saved: list[str] = []
|
2020-06-29 23:17:38 -06:00
|
|
|
|
|
|
|
|
for member in members:
|
2025-07-01 15:20:55 +02:00
|
|
|
file_name = Path(member).name
|
2020-06-29 23:17:38 -06:00
|
|
|
|
2020-07-15 08:05:55 -06:00
|
|
|
# load as yaml, save as json
|
|
|
|
|
contents = yaml.safe_load(archive.read(member))
|
2020-12-01 23:24:20 +01:00
|
|
|
out_file = file_name.replace(".yml", ".json.gz")
|
|
|
|
|
|
|
|
|
|
compressed = gzip_compress(json.dumps(contents, sort_keys=True, cls=DateTimeEncoder))
|
2025-07-01 15:20:55 +02:00
|
|
|
new_path = get_etc_path([ECS_NAME, str(version), out_file])
|
|
|
|
|
with new_path.open("wb") as f:
|
|
|
|
|
_ = f.write(compressed)
|
2020-12-01 23:24:20 +01:00
|
|
|
|
2020-07-15 08:05:55 -06:00
|
|
|
saved.append(out_file)
|
2020-06-29 23:17:38 -06:00
|
|
|
|
|
|
|
|
if verbose:
|
2025-07-01 15:20:55 +02:00
|
|
|
print("Saved files to {}: \n\t- {}".format(schema_dir, "\n\t- ".join(saved)))
|
2020-06-29 23:17:38 -06:00
|
|
|
|
|
|
|
|
# handle working master separately
|
|
|
|
|
if refresh_master:
|
2025-07-01 15:20:55 +02:00
|
|
|
master_ver = requests.get(
|
|
|
|
|
"https://raw.githubusercontent.com/elastic/ecs/master/version",
|
|
|
|
|
timeout=30,
|
|
|
|
|
)
|
2023-02-07 14:26:29 -05:00
|
|
|
master_ver = Version.parse(master_ver.text.strip())
|
2025-07-01 15:20:55 +02:00
|
|
|
master_schema = requests.get(
|
|
|
|
|
"https://raw.githubusercontent.com/elastic/ecs/master/generated/ecs/ecs_flat.yml",
|
|
|
|
|
timeout=30,
|
|
|
|
|
)
|
2020-06-29 23:17:38 -06:00
|
|
|
master_schema = yaml.safe_load(master_schema.text)
|
|
|
|
|
|
|
|
|
|
# prepend with underscore so that we can differentiate the fact that this is a working master version
|
|
|
|
|
# but first clear out any existing masters, since we only ever want 1 at a time
|
2025-07-01 15:20:55 +02:00
|
|
|
existing_master = ECS_SCHEMAS_DIR.glob("master_*")
|
2020-06-29 23:17:38 -06:00
|
|
|
for m in existing_master:
|
|
|
|
|
shutil.rmtree(m, ignore_errors=True)
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
master_dir = f"master_{master_ver}"
|
|
|
|
|
master_dir_path = get_etc_path([ECS_NAME, master_dir])
|
|
|
|
|
master_dir_path.mkdir(exist_ok=True)
|
2020-06-29 23:17:38 -06:00
|
|
|
|
2020-12-01 23:24:20 +01:00
|
|
|
compressed = gzip_compress(json.dumps(master_schema, sort_keys=True, cls=DateTimeEncoder))
|
2025-07-01 15:20:55 +02:00
|
|
|
new_path = get_etc_path([ECS_NAME, master_dir, "ecs_flat.json.gz"])
|
|
|
|
|
with new_path.open("wb") as f:
|
|
|
|
|
_ = f.write(compressed)
|
2020-06-29 23:17:38 -06:00
|
|
|
|
|
|
|
|
if verbose:
|
2025-07-01 15:20:55 +02:00
|
|
|
print("Saved files to {}: \n\t- {}".format(master_dir, "ecs_flat.json.gz"))
|
2023-06-28 20:35:33 -04:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def download_endpoint_schemas(target: str, overwrite: bool = True) -> None:
|
|
|
|
|
"""Download endpoint custom schemas."""
|
|
|
|
|
|
|
|
|
|
# location of custom schema YAML files
|
|
|
|
|
url = "https://raw.githubusercontent.com/elastic/endpoint-package/main/custom_schemas"
|
2025-07-01 15:20:55 +02:00
|
|
|
r = requests.get(f"{url}/custom_{target}.yml", timeout=30)
|
|
|
|
|
if r.status_code == 404: # noqa: PLR2004
|
|
|
|
|
r = requests.get(f"{url}/{target}/custom_{target}.yaml", timeout=30)
|
2023-06-28 20:35:33 -04:00
|
|
|
r.raise_for_status()
|
|
|
|
|
schema = yaml.safe_load(r.text)[0]
|
|
|
|
|
root_name = schema["name"]
|
|
|
|
|
fields = schema["fields"]
|
|
|
|
|
flattened = {}
|
|
|
|
|
|
|
|
|
|
# iterate over nested fields and flatten them
|
|
|
|
|
for f in fields:
|
2025-07-01 15:20:55 +02:00
|
|
|
if "multi_fields" in f:
|
|
|
|
|
for mf in f["multi_fields"]:
|
|
|
|
|
flattened[f"{root_name}.{f['name']}.{mf['name']}"] = mf["type"]
|
2023-06-28 20:35:33 -04:00
|
|
|
else:
|
2025-07-01 15:20:55 +02:00
|
|
|
flattened[f"{root_name}.{f['name']}"] = f["type"]
|
2023-06-28 20:35:33 -04:00
|
|
|
|
|
|
|
|
# save schema to disk
|
2024-05-23 17:36:51 -04:00
|
|
|
ENDPOINT_SCHEMAS_DIR.mkdir(parents=True, exist_ok=True)
|
2023-06-28 20:35:33 -04:00
|
|
|
compressed = gzip_compress(json.dumps(flattened, sort_keys=True, cls=DateTimeEncoder))
|
2024-05-23 17:36:51 -04:00
|
|
|
new_path = ENDPOINT_SCHEMAS_DIR / f"endpoint_{target}.json.gz"
|
2023-06-28 20:35:33 -04:00
|
|
|
if overwrite:
|
|
|
|
|
shutil.rmtree(new_path, ignore_errors=True)
|
2025-07-01 15:20:55 +02:00
|
|
|
with new_path.open("wb") as f:
|
|
|
|
|
_ = f.write(compressed)
|
2023-06-28 20:35:33 -04:00
|
|
|
print(f"Saved endpoint schema to {new_path}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@cached
|
2025-07-01 15:20:55 +02:00
|
|
|
def get_endpoint_schemas() -> dict[str, Any]:
|
2023-06-28 20:35:33 -04:00
|
|
|
"""Load endpoint schemas."""
|
2025-07-01 15:20:55 +02:00
|
|
|
schema: dict[str, Any] = {}
|
|
|
|
|
existing = ENDPOINT_SCHEMAS_DIR.glob("*.json.gz")
|
2023-06-28 20:35:33 -04:00
|
|
|
for f in existing:
|
|
|
|
|
schema.update(json.loads(read_gzip(f)))
|
|
|
|
|
return schema
|