Files
sigma-rules/detection_rules/beats.py
T
Andrew Pease e0f2e8b4a9 Add dataset and index to network rules (#15)
* Add dataset and index to network rules
* Restore iptables changes
* Fix beats parsing logic
* Updated date and ECS version
* Only update modules if empty

Co-authored-by: Justin Ibarra <brokensound77@users.noreply.github.com>
Co-authored-by: Ross Wolf <31489089+rw-access@users.noreply.github.com>
2020-07-08 13:19:35 -06:00

176 lines
6.1 KiB
Python

# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
# or more contributor license agreements. Licensed under the Elastic License;
# you may not use this file except in compliance with the Elastic License.
"""ECS Schemas management."""
import os
import kql
import requests
import yaml
from .semver import Version
from .utils import unzip, load_etc_dump, save_etc_dump, get_etc_path
def download_latest_beats_schema():
"""Download additional schemas from ecs releases."""
url = 'https://api.github.com/repos/elastic/beats/releases'
releases = requests.get(url)
latest_release = max(releases.json(), key=lambda release: Version(release["tag_name"].lstrip("v")))
print(f"Downloading beats {latest_release['tag_name']}")
response = requests.get(latest_release['zipball_url'])
print(f"Downloaded {len(response.content) / 1024.0 / 1024.0:.2f} MB release.")
fs = {}
parsed = {}
with unzip(response.content) as archive:
base_directory = archive.namelist()[0]
for name in archive.namelist():
if os.path.basename(name) in ("fields.yml", "fields.common.yml", "config.yml"):
contents = archive.read(name)
# chop off the base directory name
key = name[len(base_directory):]
if key.startswith("x-pack"):
key = key[len("x-pack") + 1:]
try:
decoded = yaml.safe_load(contents)
except yaml.YAMLError:
print(f"Error loading {name}")
# create a hierarchical structure
parsed[key] = decoded
branch = fs
directory, base_name = os.path.split(key)
for limb in directory.split(os.path.sep):
branch = branch.setdefault("folders", {}).setdefault(limb, {})
branch.setdefault("files", {})[base_name] = decoded
# remove all non-beat directories
fs = {k: v for k, v in fs.get("folders", {}).items() if k.endswith("beat")}
print(f"Saving etc/beats_schema/{latest_release['tag_name']}.yml")
save_etc_dump(fs, "beats_schemas", latest_release["tag_name"] + ".yml")
def _flatten_schema(schema: list, prefix="") -> list:
if schema is None:
# sometimes we see `fields: null` in the yaml
return []
flattened = []
for s in schema:
if s.get("type") == "group":
flattened.extend(_flatten_schema(s["fields"], prefix=prefix + s["name"] + "."))
elif "fields" in s:
flattened.extend(_flatten_schema(s["fields"], prefix=prefix))
elif "name" in s and "description" in s:
s = s.copy()
# type is implicitly keyword if not defined
# example: https://github.com/elastic/beats/blob/master/packetbeat/_meta/fields.common.yml#L7-L12
s.setdefault("type", "keyword")
s["name"] = prefix + s["name"]
flattened.append(s)
return flattened
def get_field_schema(base_directory, prefix="", include_common=False):
base_directory = base_directory.get("folders", {}).get("_meta", {}).get("files", {})
flattened = []
file_names = ("fields.yml", "fields.common.yml") if include_common else ("fields.yml", )
for name in file_names:
if name in base_directory:
flattened.extend(_flatten_schema(base_directory[name], prefix=prefix))
return flattened
def get_beat_root_schema(schema: dict, beat: str):
if beat not in schema:
raise KeyError(f"Unknown beats module {beat}")
beat_dir = schema[beat]
flattened = get_field_schema(beat_dir, include_common=True)
return {field["name"]: field for field in sorted(flattened, key=lambda f: f["name"])}
def get_beats_sub_schema(schema: dict, beat: str, module: str, *datasets: str):
if beat not in schema:
raise KeyError(f"Unknown beats module {beat}")
flattened = []
beat_dir = schema[beat]
module_dir = beat_dir.get("folders", {}).get("module", {}).get("folders", {}).get(module, {})
# if we only have a module then we'll work with what we got
if not datasets:
datasets = [d for d in module_dir.get("folders", {}) if not d.startswith("_")]
for dataset in datasets:
# replace aws.s3 -> s3
if dataset.startswith(module + "."):
dataset = dataset[len(module) + 1:]
dataset_dir = module_dir.get("folders", {}).get(dataset, {})
flattened.extend(get_field_schema(dataset_dir, prefix=module + ".", include_common=True))
return {field["name"]: field for field in sorted(flattened, key=lambda f: f["name"])}
SCHEMA = None
def read_beats_schema():
global SCHEMA
if SCHEMA is None:
beats_schemas = os.listdir(get_etc_path("beats_schemas"))
latest = max(beats_schemas, key=lambda b: Version(b.lstrip("v")))
SCHEMA = load_etc_dump("beats_schemas", latest)
return SCHEMA
def get_schema_for_query(tree: kql.ast, beats: list) -> dict:
filtered = {}
modules = set()
datasets = set()
# extract out event.module and event.dataset from the query's AST
for node in tree:
if isinstance(node, kql.ast.FieldComparison) and node.field == kql.ast.Field("event.module"):
modules.update(child.value for child in node.value if isinstance(child, kql.ast.String))
if isinstance(node, kql.ast.FieldComparison) and node.field == kql.ast.Field("event.dataset"):
datasets.update(child.value for child in node.value if isinstance(child, kql.ast.String))
beats_schema = read_beats_schema()
# infer the module if only a dataset are defined
if not modules:
modules.update(ds.split(".")[0] for ds in datasets if "." in ds)
for beat in beats:
# if no modules are specified then grab them all
# all_modules = list(beats_schema.get(beat, {}).get("folders", {}).get("module", {}).get("folders", {}))
# beat_modules = modules or all_modules
filtered.update(get_beat_root_schema(beats_schema, beat))
for module in modules:
filtered.update(get_beats_sub_schema(beats_schema, beat, module, *datasets))
return filtered