Files
sigma-rules/detection_rules/custom_schemas.py
T
Sergey Polzunov 1fb60d6475 fix: type hinting fixes and additional code checks (#4790)
* first pass

* Adding a dedicated code checking workflow

* Type fixes

* linting config and python version bump

* Type hints

* Drop incorrect config option

* More fixes

* Style fixes

* CI adjustments

* Pyproject fixes

* CI & pyproject fixes

* Proper version bump

* Tests formatting

* Resolve cirtular dependency

* Test fixes

* Make sure the tests are formatted correctly

* Check tweaks

* Bumping python version in CI images

* Pin marshmallow do 3.x because 4.x is not supported

* License fix

* Convert path to str

* Making myself a codeowner

* Missing kwargs param

* Adding a missing kwargs to `set_score`

* Update .github/CODEOWNERS

Co-authored-by: Mika Ayenson, PhD <Mikaayenson@users.noreply.github.com>

* Dropping unnecessary raise

* Dropping skipped test

* Drop unnecessary var

* Drop unused commented-out func

* Disable typehinting for the whole func

* Update linting command

* Invalid type hist on the input param

* Incorrect field type

* Incorrect value used fix

* Stricter values check

* Simpler function call

* Type condition fix

* TOML formatter fix

* Simpligy output conditions

* Formatting

* Use proper types instead of aliases

* MITRE attack fixes

* Using pathlib.Path for an argument

* Use proper method to update a set from a dict

* First round of `ruff` fixes

* More fixes

* More fixes

* Hack against cyclic dependency

* Ignore `PLC0415`

* Remove unused markers

* Cleanup

* Fixing the incorrect condition

* Update .github/CODEOWNERS

Co-authored-by: Mika Ayenson, PhD <Mikaayenson@users.noreply.github.com>

* Set explicit default values for optional fields

* Update the guidelines

* Adding None Defaults

---------

Co-authored-by: Mika Ayenson, PhD <Mikaayenson@users.noreply.github.com>
Co-authored-by: eric-forte-elastic <eric.forte@elastic.co>
2025-07-01 08:20:55 -05:00

116 lines
4.6 KiB
Python

# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
# or more contributor license agreements. Licensed under the Elastic License
# 2.0; you may not use this file except in compliance with the Elastic License
# 2.0.
"""Custom Schemas management."""
import uuid
from pathlib import Path
from typing import Any
import eql # type: ignore[reportMissingTypeStubs]
from eql import load_dump, save_dump # type: ignore[reportMissingTypeStubs]
from .config import parse_rules_config
from .utils import cached, clear_caches
RULES_CONFIG = parse_rules_config()
RESERVED_SCHEMA_NAMES = ["beats", "ecs", "endgame"]
@cached
def get_custom_schemas(stack_version: str | None = None) -> dict[str, Any]:
"""Load custom schemas if present."""
custom_schema_dump: dict[str, Any] = {}
stack_versions = [stack_version] if stack_version else RULES_CONFIG.stack_schema_map.keys()
for version in stack_versions:
stack_schema_map = RULES_CONFIG.stack_schema_map[version]
for schema, value in stack_schema_map.items():
if schema not in RESERVED_SCHEMA_NAMES:
schema_path = Path(value)
if not schema_path.is_absolute():
schema_path = RULES_CONFIG.stack_schema_map_file.parent / value
if schema_path.is_file():
custom_schema_dump.update(eql.utils.load_dump(str(schema_path))) # type: ignore[reportUnknownMemberType]
else:
raise ValueError(f"Custom schema must be a file: {schema_path}")
return custom_schema_dump
def resolve_schema_path(path: str) -> Path:
"""Helper function to resolve the schema path."""
path_obj = Path(path)
return path_obj if path_obj.is_absolute() else RULES_CONFIG.stack_schema_map_file.parent.joinpath(path)
def update_data(index: str, field: str, data: dict[str, Any], field_type: str | None = None) -> dict[str, Any]:
"""Update the schema entry with the appropriate index and field."""
data.setdefault(index, {})[field] = field_type if field_type else "keyword"
return data
def update_stack_schema_map(
stack_schema_map: dict[str, Any],
auto_gen_schema_file: str,
) -> tuple[dict[str, Any], str | None, str]:
"""Update the stack-schema-map.yaml file with the appropriate auto_gen_schema_file location."""
random_uuid = str(uuid.uuid4())
auto_generated_id = None
for val in stack_schema_map.values():
key_found = False
for key, value in val.items():
value_path = resolve_schema_path(value)
if value_path == Path(auto_gen_schema_file).resolve() and key not in RESERVED_SCHEMA_NAMES:
auto_generated_id = key
key_found = True
break
if key_found is False:
if auto_generated_id is None:
auto_generated_id = random_uuid
val[auto_generated_id] = str(auto_gen_schema_file)
return stack_schema_map, auto_generated_id, random_uuid
def clean_stack_schema_map(
stack_schema_map: dict[str, Any], auto_generated_id: str, random_uuid: str
) -> dict[str, Any]:
"""Clean up the stack-schema-map.yaml file replacing the random UUID with a known key if possible."""
for val in stack_schema_map.values():
if random_uuid in val:
val[auto_generated_id] = val.pop(random_uuid)
return stack_schema_map
def update_auto_generated_schema(index: str, field: str, field_type: str | None = None) -> None:
"""Load custom schemas if present."""
auto_gen_schema_file = str(RULES_CONFIG.auto_gen_schema_file)
stack_schema_map_file = str(RULES_CONFIG.stack_schema_map_file)
# Update autogen schema file
data = load_dump(auto_gen_schema_file)
data = update_data(index, field, data, field_type)
save_dump(data, auto_gen_schema_file)
# Update the stack-schema-map.yaml file with the appropriate auto_gen_schema_file location
stack_schema_map = load_dump(stack_schema_map_file)
stack_schema_map, auto_generated_id, random_uuid = update_stack_schema_map(stack_schema_map, auto_gen_schema_file)
if not auto_generated_id:
raise ValueError("Autogenerated ID not found")
save_dump(stack_schema_map, stack_schema_map_file)
# Clean up the stack-schema-map.yaml file replacing the random UUID with the auto_generated_id
stack_schema_map = load_dump(stack_schema_map_file)
stack_schema_map = clean_stack_schema_map(stack_schema_map, auto_generated_id, random_uuid)
save_dump(stack_schema_map, stack_schema_map_file)
RULES_CONFIG.stack_schema_map = stack_schema_map
# IMPORTANT must clear cache in order to reload schema
clear_caches()