2020-06-29 23:17:38 -06:00
|
|
|
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
2021-03-03 22:12:11 -09:00
|
|
|
# or more contributor license agreements. Licensed under the Elastic License
|
|
|
|
|
# 2.0; you may not use this file except in compliance with the Elastic License
|
|
|
|
|
# 2.0.
|
2020-06-29 23:17:38 -06:00
|
|
|
|
2020-10-07 22:15:33 +02:00
|
|
|
"""Elasticsearch cli commands."""
|
2025-07-01 15:20:55 +02:00
|
|
|
|
2020-06-29 23:17:38 -06:00
|
|
|
import json
|
2022-11-01 11:14:40 -08:00
|
|
|
import sys
|
2020-06-29 23:17:38 -06:00
|
|
|
import time
|
2020-11-17 23:08:00 +01:00
|
|
|
from collections import defaultdict
|
2025-07-01 15:20:55 +02:00
|
|
|
from pathlib import Path
|
|
|
|
|
from typing import IO, Any
|
2020-06-29 23:17:38 -06:00
|
|
|
|
|
|
|
|
import click
|
2020-11-17 23:08:00 +01:00
|
|
|
import elasticsearch
|
2025-07-01 15:20:55 +02:00
|
|
|
import kql # type: ignore[reportMissingTypeStubs]
|
2020-11-17 23:08:00 +01:00
|
|
|
from elasticsearch import Elasticsearch
|
2022-04-06 11:52:22 -08:00
|
|
|
from elasticsearch.client import AsyncSearchClient
|
2020-06-29 23:17:38 -06:00
|
|
|
|
2024-08-06 18:07:12 -04:00
|
|
|
from .config import parse_rules_config
|
2020-06-29 23:17:38 -06:00
|
|
|
from .main import root
|
2025-07-01 15:20:55 +02:00
|
|
|
from .misc import add_params, elasticsearch_options, get_elasticsearch_client, nested_get, raise_client_error
|
2021-03-24 10:24:32 -06:00
|
|
|
from .rule import TOMLRule
|
2025-03-05 12:35:57 +01:00
|
|
|
from .rule_loader import RuleCollection
|
2025-07-01 15:20:55 +02:00
|
|
|
from .utils import event_sort, format_command_options, get_path, normalize_timing_and_sort, unix_time_to_formatted
|
2021-09-03 14:35:59 -06:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
COLLECTION_DIR = get_path(["collections"])
|
|
|
|
|
MATCH_ALL: dict[str, dict[str, Any]] = {"bool": {"filter": [{"match_all": {}}]}}
|
2024-08-06 18:07:12 -04:00
|
|
|
RULES_CONFIG = parse_rules_config()
|
2020-06-29 23:17:38 -06:00
|
|
|
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def add_range_to_dsl(dsl_filter: list[dict[str, Any]], start_time: str, end_time: str = "now") -> None:
|
2020-11-17 23:08:00 +01:00
|
|
|
dsl_filter.append(
|
2025-07-01 15:20:55 +02:00
|
|
|
{
|
|
|
|
|
"range": {
|
|
|
|
|
"@timestamp": {
|
|
|
|
|
"gt": start_time,
|
|
|
|
|
"lte": end_time,
|
|
|
|
|
"format": "strict_date_optional_time",
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
}
|
2020-11-17 23:08:00 +01:00
|
|
|
)
|
2020-06-29 23:17:38 -06:00
|
|
|
|
2020-11-17 23:08:00 +01:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def parse_unique_field_results(
|
|
|
|
|
rule_type: str,
|
|
|
|
|
unique_fields: list[str],
|
|
|
|
|
search_results: dict[str, Any],
|
|
|
|
|
) -> dict[str, Any]:
|
|
|
|
|
parsed_results: dict[str, dict[str, int]] = defaultdict(lambda: defaultdict(int))
|
|
|
|
|
hits = search_results["hits"]
|
|
|
|
|
hits = hits["hits"] if rule_type != "eql" else hits.get("events") or hits.get("sequences", [])
|
2022-09-06 15:53:47 -06:00
|
|
|
for hit in hits:
|
|
|
|
|
for field in unique_fields:
|
2025-07-01 15:20:55 +02:00
|
|
|
if "events" in hit:
|
|
|
|
|
match: list[Any] = []
|
|
|
|
|
for event in hit["events"]:
|
|
|
|
|
matched = nested_get(event["_source"], field)
|
|
|
|
|
match.extend([matched] if not isinstance(matched, list) else matched) # type: ignore[reportUnknownArgumentType]
|
2022-09-13 11:38:29 -04:00
|
|
|
if not match:
|
|
|
|
|
continue
|
|
|
|
|
else:
|
2025-07-01 15:20:55 +02:00
|
|
|
match = nested_get(hit["_source"], field)
|
2022-09-13 11:38:29 -04:00
|
|
|
if not match:
|
|
|
|
|
continue
|
2022-09-06 15:53:47 -06:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
match = ",".join(sorted(match)) if isinstance(match, list) else match # type: ignore[reportUnknownArgumentType]
|
|
|
|
|
parsed_results[field][match] += 1 # type: ignore[reportUnknownArgumentType]
|
2022-09-06 15:53:47 -06:00
|
|
|
# if rule.type == eql, structure is different
|
2025-07-01 15:20:55 +02:00
|
|
|
return {"results": parsed_results} if parsed_results else {}
|
2022-09-06 15:53:47 -06:00
|
|
|
|
|
|
|
|
|
2025-03-05 12:35:57 +01:00
|
|
|
class Events:
|
2020-06-29 23:17:38 -06:00
|
|
|
"""Events collected from Elasticsearch."""
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def __init__(self, events: dict[str, Any]) -> None:
|
|
|
|
|
self.events = self._normalize_event_timing(events)
|
2020-06-29 23:17:38 -06:00
|
|
|
|
|
|
|
|
@staticmethod
|
2025-07-01 15:20:55 +02:00
|
|
|
def _normalize_event_timing(events: dict[str, Any]) -> dict[str, Any]:
|
2020-06-29 23:17:38 -06:00
|
|
|
"""Normalize event timestamps and sort."""
|
|
|
|
|
for agent_type, _events in events.items():
|
|
|
|
|
events[agent_type] = normalize_timing_and_sort(_events)
|
|
|
|
|
|
|
|
|
|
return events
|
|
|
|
|
|
2020-11-17 23:08:00 +01:00
|
|
|
@staticmethod
|
2025-07-01 15:20:55 +02:00
|
|
|
def _get_dump_dir(
|
|
|
|
|
rta_name: str | None = None,
|
|
|
|
|
host_id: str | None = None,
|
|
|
|
|
host_os_family: str | None = None,
|
|
|
|
|
) -> Path:
|
2020-06-29 23:17:38 -06:00
|
|
|
"""Prepare and get the dump path."""
|
2023-06-12 20:03:33 +00:00
|
|
|
if rta_name and host_os_family:
|
2025-07-01 15:20:55 +02:00
|
|
|
dump_dir = get_path(["unit_tests", "data", "true_positives", rta_name, host_os_family])
|
|
|
|
|
dump_dir.mkdir(parents=True, exist_ok=True)
|
2020-06-29 23:17:38 -06:00
|
|
|
return dump_dir
|
2025-07-01 15:20:55 +02:00
|
|
|
time_str = time.strftime("%Y%m%dT%H%M%SL")
|
|
|
|
|
dump_dir = COLLECTION_DIR / (host_id or "unknown_host") / time_str
|
|
|
|
|
dump_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
return dump_dir
|
2020-06-29 23:17:38 -06:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def evaluate_against_rule(self, rule_id: str, verbose: bool = True) -> list[Any]:
|
2020-06-29 23:17:38 -06:00
|
|
|
"""Evaluate a rule against collected events and update mapping."""
|
2022-09-06 15:53:47 -06:00
|
|
|
rule = RuleCollection.default().id_map.get(rule_id)
|
2025-07-01 15:20:55 +02:00
|
|
|
if not rule:
|
|
|
|
|
raise ValueError(f"Unable to find rule with ID {rule_id}")
|
2020-06-29 23:17:38 -06:00
|
|
|
merged_events = combine_sources(*self.events.values())
|
2024-08-06 18:07:12 -04:00
|
|
|
filtered = evaluate(rule, merged_events, normalize_kql_keywords=RULES_CONFIG.normalize_kql_keywords)
|
2020-06-29 23:17:38 -06:00
|
|
|
|
2025-03-05 12:35:57 +01:00
|
|
|
if verbose:
|
2025-07-01 15:20:55 +02:00
|
|
|
click.echo("Matching results found")
|
2020-06-29 23:17:38 -06:00
|
|
|
|
2025-03-05 12:35:57 +01:00
|
|
|
return filtered
|
2020-06-29 23:17:38 -06:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def echo_events(self, pager: bool = False, pretty: bool = True) -> None:
|
2020-06-29 23:17:38 -06:00
|
|
|
"""Print events to stdout."""
|
|
|
|
|
echo_fn = click.echo_via_pager if pager else click.echo
|
|
|
|
|
echo_fn(json.dumps(self.events, indent=2 if pretty else None, sort_keys=True))
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def save(self, rta_name: str | None = None, dump_dir: Path | None = None, host_id: str | None = None) -> None:
|
2020-06-29 23:17:38 -06:00
|
|
|
"""Save collected events."""
|
2025-07-01 15:20:55 +02:00
|
|
|
if not self.events:
|
|
|
|
|
raise ValueError("Nothing to save. Run Collector.run() method first or verify logging")
|
2020-06-29 23:17:38 -06:00
|
|
|
|
2023-06-12 20:03:33 +00:00
|
|
|
host_os_family = None
|
2025-07-01 15:20:55 +02:00
|
|
|
for key in self.events:
|
|
|
|
|
if self.events.get(key, {})[0].get("host", {}).get("id") == host_id:
|
|
|
|
|
host_os_family = self.events.get(key, {})[0].get("host", {}).get("os").get("family")
|
2023-06-12 20:03:33 +00:00
|
|
|
break
|
|
|
|
|
if not host_os_family:
|
2025-07-01 15:20:55 +02:00
|
|
|
click.echo(f"Unable to determine host.os.family for host_id: {host_id}")
|
|
|
|
|
host_os_family = click.prompt(
|
|
|
|
|
"Please enter the host.os.family for this host_id",
|
|
|
|
|
type=click.Choice(["windows", "macos", "linux"]),
|
|
|
|
|
default="windows",
|
|
|
|
|
)
|
2023-06-12 20:03:33 +00:00
|
|
|
|
|
|
|
|
dump_dir = dump_dir or self._get_dump_dir(rta_name=rta_name, host_id=host_id, host_os_family=host_os_family)
|
2020-06-29 23:17:38 -06:00
|
|
|
|
|
|
|
|
for source, events in self.events.items():
|
2025-07-01 15:20:55 +02:00
|
|
|
path = dump_dir / (source + ".ndjson")
|
|
|
|
|
with path.open("w") as f:
|
|
|
|
|
f.writelines([json.dumps(e, sort_keys=True) + "\n" for e in events])
|
|
|
|
|
click.echo(f"{len(events)} events saved to: {path}")
|
2020-06-29 23:17:38 -06:00
|
|
|
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
class CollectEvents:
|
2020-06-29 23:17:38 -06:00
|
|
|
"""Event collector for elastic stack."""
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def __init__(self, client: Elasticsearch, max_events: int = 3000) -> None:
|
|
|
|
|
self.client = client
|
2020-11-17 23:08:00 +01:00
|
|
|
self.max_events = max_events
|
2020-06-29 23:17:38 -06:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def _build_timestamp_map(self, index: str) -> dict[str, Any]:
|
2020-06-29 23:17:38 -06:00
|
|
|
"""Build a mapping of indexes to timestamp data formats."""
|
2025-07-01 15:20:55 +02:00
|
|
|
mappings = self.client.indices.get_mapping(index=index)
|
|
|
|
|
return {n: m["mappings"].get("properties", {}).get("@timestamp", {}) for n, m in mappings.items()}
|
2020-06-29 23:17:38 -06:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def _get_last_event_time(self, index: str, dsl: dict[str, Any] | None = None) -> None | str:
|
2020-06-29 23:17:38 -06:00
|
|
|
"""Get timestamp of most recent event."""
|
2025-07-01 15:20:55 +02:00
|
|
|
last_event = self.client.search(query=dsl, index=index, size=1, sort="@timestamp:desc")["hits"]["hits"]
|
2020-11-17 23:08:00 +01:00
|
|
|
if not last_event:
|
2025-07-01 15:20:55 +02:00
|
|
|
return None
|
2020-06-29 23:17:38 -06:00
|
|
|
|
2020-11-17 23:08:00 +01:00
|
|
|
last_event = last_event[0]
|
2025-07-01 15:20:55 +02:00
|
|
|
index = last_event["_index"]
|
|
|
|
|
timestamp = last_event["_source"]["@timestamp"]
|
2020-11-17 23:08:00 +01:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
timestamp_map = self._build_timestamp_map(index)
|
|
|
|
|
event_date_format = timestamp_map[index].get("format", "").split("||")
|
2020-06-29 23:17:38 -06:00
|
|
|
|
|
|
|
|
# there are many native supported date formats and even custom data formats, but most, including beats use the
|
|
|
|
|
# default `strict_date_optional_time`. It would be difficult to try to account for all possible formats, so this
|
|
|
|
|
# will work on the default and unix time.
|
2025-07-01 15:20:55 +02:00
|
|
|
if set(event_date_format) & {"epoch_millis", "epoch_second"}:
|
2020-06-29 23:17:38 -06:00
|
|
|
timestamp = unix_time_to_formatted(timestamp)
|
|
|
|
|
|
|
|
|
|
return timestamp
|
|
|
|
|
|
2020-11-17 23:08:00 +01:00
|
|
|
@staticmethod
|
2025-07-01 15:20:55 +02:00
|
|
|
def _prep_query(
|
|
|
|
|
query: str | dict[str, Any],
|
|
|
|
|
language: str,
|
|
|
|
|
index: str | list[str] | tuple[str],
|
|
|
|
|
start_time: str | None = None,
|
|
|
|
|
end_time: str | None = None,
|
|
|
|
|
) -> tuple[str, dict[str, Any], str | None]:
|
2020-11-17 23:08:00 +01:00
|
|
|
"""Prep a query for search."""
|
2025-07-01 15:20:55 +02:00
|
|
|
index_str = ",".join(index if isinstance(index, (list | tuple)) else index.split(","))
|
|
|
|
|
lucene_query = str(query) if language == "lucene" else None
|
|
|
|
|
|
|
|
|
|
if language in ("kql", "kuery"):
|
|
|
|
|
formatted_dsl = {"query": kql.to_dsl(query)} # type: ignore[reportUnknownMemberType]
|
|
|
|
|
elif language == "eql":
|
|
|
|
|
formatted_dsl = {"query": query, "filter": MATCH_ALL}
|
|
|
|
|
elif language == "lucene":
|
|
|
|
|
formatted_dsl: dict[str, Any] = {"query": {"bool": {"filter": []}}}
|
|
|
|
|
elif language == "dsl":
|
|
|
|
|
formatted_dsl = {"query": query}
|
2020-11-17 23:08:00 +01:00
|
|
|
else:
|
2025-07-01 15:20:55 +02:00
|
|
|
raise ValueError(f"Unknown search language: {language}")
|
2020-11-17 23:08:00 +01:00
|
|
|
|
|
|
|
|
if start_time or end_time:
|
2025-07-01 15:20:55 +02:00
|
|
|
end_time = end_time or "now"
|
|
|
|
|
dsl = (
|
|
|
|
|
formatted_dsl["filter"]["bool"]["filter"]
|
|
|
|
|
if language == "eql"
|
|
|
|
|
else formatted_dsl["query"]["bool"].setdefault("filter", [])
|
|
|
|
|
)
|
|
|
|
|
if not start_time:
|
|
|
|
|
raise ValueError("No start time provided")
|
|
|
|
|
|
2020-11-17 23:08:00 +01:00
|
|
|
add_range_to_dsl(dsl, start_time, end_time)
|
|
|
|
|
|
|
|
|
|
return index_str, formatted_dsl, lucene_query
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def search( # noqa: PLR0913
|
|
|
|
|
self,
|
|
|
|
|
query: str | dict[str, Any],
|
|
|
|
|
language: str,
|
|
|
|
|
index: str | list[str] = "*",
|
|
|
|
|
start_time: str | None = None,
|
|
|
|
|
end_time: str | None = None,
|
|
|
|
|
size: int | None = None,
|
|
|
|
|
**kwargs: Any,
|
|
|
|
|
) -> list[Any]:
|
2020-11-17 23:08:00 +01:00
|
|
|
"""Search an elasticsearch instance."""
|
2025-07-01 15:20:55 +02:00
|
|
|
index_str, formatted_dsl, lucene_query = self._prep_query(
|
|
|
|
|
query=query, language=language, index=index, start_time=start_time, end_time=end_time
|
|
|
|
|
)
|
2020-11-17 23:08:00 +01:00
|
|
|
formatted_dsl.update(size=size or self.max_events)
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
if language == "eql":
|
|
|
|
|
results = self.client.eql.search(body=formatted_dsl, index=index_str, **kwargs)["hits"]
|
|
|
|
|
results = results.get("events") or results.get("sequences", [])
|
2020-11-17 23:08:00 +01:00
|
|
|
else:
|
2025-07-01 15:20:55 +02:00
|
|
|
results = self.client.search(
|
|
|
|
|
body=formatted_dsl,
|
|
|
|
|
q=lucene_query,
|
|
|
|
|
index=index_str,
|
|
|
|
|
allow_no_indices=True,
|
|
|
|
|
ignore_unavailable=True,
|
|
|
|
|
**kwargs,
|
|
|
|
|
)["hits"]["hits"]
|
2020-11-17 23:08:00 +01:00
|
|
|
|
|
|
|
|
return results
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def search_from_rule(
|
|
|
|
|
self,
|
|
|
|
|
rules: RuleCollection,
|
|
|
|
|
start_time: str | None = None,
|
|
|
|
|
end_time: str = "now",
|
|
|
|
|
size: int | None = None,
|
|
|
|
|
) -> dict[str, Any]:
|
2020-11-17 23:08:00 +01:00
|
|
|
"""Search an elasticsearch instance using a rule."""
|
|
|
|
|
async_client = AsyncSearchClient(self.client)
|
2025-07-01 15:20:55 +02:00
|
|
|
survey_results: dict[str, Any] = {}
|
|
|
|
|
multi_search: list[dict[str, Any]] = []
|
|
|
|
|
multi_search_rules: list[TOMLRule] = []
|
|
|
|
|
async_searches: list[tuple[TOMLRule, Any]] = []
|
|
|
|
|
eql_searches: list[tuple[TOMLRule, dict[str, Any]]] = []
|
2020-11-17 23:08:00 +01:00
|
|
|
|
|
|
|
|
for rule in rules:
|
2025-07-01 15:20:55 +02:00
|
|
|
if not rule.contents.data.get("query"):
|
2020-11-17 23:08:00 +01:00
|
|
|
continue
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
language = rule.contents.data.get("language")
|
|
|
|
|
query = rule.contents.data.query # type: ignore[reportAttributeAccessIssue]
|
2022-09-06 15:53:47 -06:00
|
|
|
rule_type = rule.contents.data.type
|
2025-07-01 15:20:55 +02:00
|
|
|
index_str, formatted_dsl, _ = self._prep_query(
|
|
|
|
|
query=query, # type: ignore[reportUnknownArgumentType]
|
|
|
|
|
language=language, # type: ignore[reportUnknownArgumentType]
|
|
|
|
|
index=rule.contents.data.get("index", "*"), # type: ignore[reportUnknownArgumentType]
|
|
|
|
|
start_time=start_time,
|
|
|
|
|
end_time=end_time,
|
|
|
|
|
)
|
2020-11-17 23:08:00 +01:00
|
|
|
formatted_dsl.update(size=size or self.max_events)
|
|
|
|
|
|
|
|
|
|
# prep for searches: msearch for kql | async search for lucene | eql client search for eql
|
2025-07-01 15:20:55 +02:00
|
|
|
if language == "kuery":
|
2020-11-17 23:08:00 +01:00
|
|
|
multi_search_rules.append(rule)
|
2025-07-01 15:20:55 +02:00
|
|
|
multi_search.append({"index": index_str, "allow_no_indices": "true", "ignore_unavailable": "true"})
|
2022-09-06 15:53:47 -06:00
|
|
|
multi_search.append(formatted_dsl)
|
2025-07-01 15:20:55 +02:00
|
|
|
elif language == "lucene":
|
2020-11-17 23:08:00 +01:00
|
|
|
# wait for 0 to try and force async with no immediate results (not guaranteed)
|
2025-07-01 15:20:55 +02:00
|
|
|
result = async_client.submit(
|
|
|
|
|
body=formatted_dsl,
|
|
|
|
|
q=query, # type: ignore[reportUnknownArgumentType]
|
|
|
|
|
index=index_str,
|
|
|
|
|
allow_no_indices=True,
|
|
|
|
|
ignore_unavailable=True,
|
|
|
|
|
wait_for_completion_timeout=0,
|
|
|
|
|
)
|
|
|
|
|
if result["is_running"] is True:
|
|
|
|
|
async_searches.append((rule, result["id"]))
|
2020-11-17 23:08:00 +01:00
|
|
|
else:
|
2025-07-01 15:20:55 +02:00
|
|
|
survey_results[rule.id] = parse_unique_field_results(
|
|
|
|
|
rule_type, ["process.name"], result["response"]
|
|
|
|
|
)
|
|
|
|
|
elif language == "eql":
|
|
|
|
|
eql_body: dict[str, Any] = {
|
|
|
|
|
"index": index_str,
|
|
|
|
|
"params": {"ignore_unavailable": "true", "allow_no_indices": "true"},
|
|
|
|
|
"body": {"query": query, "filter": formatted_dsl["filter"]},
|
2020-11-17 23:08:00 +01:00
|
|
|
}
|
2022-09-06 15:53:47 -06:00
|
|
|
eql_searches.append((rule, eql_body))
|
2020-11-17 23:08:00 +01:00
|
|
|
|
|
|
|
|
# assemble search results
|
2022-09-06 15:53:47 -06:00
|
|
|
multi_search_results = self.client.msearch(searches=multi_search)
|
2025-07-01 15:20:55 +02:00
|
|
|
for index, result in enumerate(multi_search_results["responses"]):
|
2020-11-17 23:08:00 +01:00
|
|
|
try:
|
|
|
|
|
rule = multi_search_rules[index]
|
2025-07-01 15:20:55 +02:00
|
|
|
survey_results[rule.id] = parse_unique_field_results(
|
|
|
|
|
rule.contents.data.type,
|
|
|
|
|
rule.contents.data.unique_fields, # type: ignore[reportAttributeAccessIssje]
|
|
|
|
|
result,
|
|
|
|
|
)
|
2020-11-17 23:08:00 +01:00
|
|
|
except KeyError:
|
2025-07-01 15:20:55 +02:00
|
|
|
survey_results[multi_search_rules[index].id] = {"error_retrieving_results": True}
|
2020-11-17 23:08:00 +01:00
|
|
|
|
2022-09-06 15:53:47 -06:00
|
|
|
for entry in eql_searches:
|
|
|
|
|
rule, search_args = entry
|
2020-11-17 23:08:00 +01:00
|
|
|
try:
|
|
|
|
|
result = self.client.eql.search(**search_args)
|
2025-07-01 15:20:55 +02:00
|
|
|
survey_results[rule.id] = parse_unique_field_results(
|
|
|
|
|
rule.contents.data.type,
|
|
|
|
|
rule.contents.data.unique_fields, # type: ignore[reportAttributeAccessIssue]
|
|
|
|
|
result, # type: ignore[reportAttributeAccessIssue]
|
|
|
|
|
)
|
2020-11-17 23:08:00 +01:00
|
|
|
except (elasticsearch.NotFoundError, elasticsearch.RequestError) as e:
|
2025-07-01 15:20:55 +02:00
|
|
|
survey_results[rule.id] = {"error_retrieving_results": True, "error": e.info["error"]["reason"]}
|
2020-11-17 23:08:00 +01:00
|
|
|
|
2022-09-06 15:53:47 -06:00
|
|
|
for entry in async_searches:
|
|
|
|
|
rule: TOMLRule
|
|
|
|
|
rule, async_id = entry
|
2025-07-01 15:20:55 +02:00
|
|
|
result = async_client.get(id=async_id)["response"]
|
|
|
|
|
survey_results[rule.id] = parse_unique_field_results(rule.contents.data.type, ["process.name"], result)
|
2020-11-17 23:08:00 +01:00
|
|
|
|
|
|
|
|
return survey_results
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def count(
|
|
|
|
|
self,
|
|
|
|
|
query: str,
|
|
|
|
|
language: str,
|
|
|
|
|
index: str | list[str],
|
|
|
|
|
start_time: str | None = None,
|
|
|
|
|
end_time: str | None = "now",
|
|
|
|
|
) -> Any:
|
2020-11-17 23:08:00 +01:00
|
|
|
"""Get a count of documents from elasticsearch."""
|
2025-07-01 15:20:55 +02:00
|
|
|
index_str, formatted_dsl, lucene_query = self._prep_query(
|
|
|
|
|
query=query,
|
|
|
|
|
language=language,
|
|
|
|
|
index=index,
|
|
|
|
|
start_time=start_time,
|
|
|
|
|
end_time=end_time,
|
|
|
|
|
)
|
2020-11-17 23:08:00 +01:00
|
|
|
|
|
|
|
|
# EQL API has no count endpoint
|
2025-07-01 15:20:55 +02:00
|
|
|
if language == "eql":
|
|
|
|
|
results = self.search(
|
|
|
|
|
query=query,
|
|
|
|
|
language=language,
|
|
|
|
|
index=index,
|
|
|
|
|
start_time=start_time,
|
|
|
|
|
end_time=end_time,
|
|
|
|
|
size=1000,
|
|
|
|
|
)
|
2020-11-17 23:08:00 +01:00
|
|
|
return len(results)
|
2025-07-01 15:20:55 +02:00
|
|
|
resp = self.client.count(
|
|
|
|
|
body=formatted_dsl,
|
|
|
|
|
index=index_str,
|
|
|
|
|
q=lucene_query,
|
|
|
|
|
allow_no_indices=True,
|
|
|
|
|
ignore_unavailable=True,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
return resp["count"]
|
|
|
|
|
|
|
|
|
|
def count_from_rule(
|
|
|
|
|
self,
|
|
|
|
|
rules: RuleCollection,
|
|
|
|
|
start_time: str | None = None,
|
|
|
|
|
end_time: str | None = "now",
|
|
|
|
|
) -> dict[str, Any]:
|
2020-11-17 23:08:00 +01:00
|
|
|
"""Get a count of documents from elasticsearch using a rule."""
|
2025-07-01 15:20:55 +02:00
|
|
|
survey_results: dict[str, Any] = {}
|
2020-06-29 23:17:38 -06:00
|
|
|
|
2022-09-06 15:53:47 -06:00
|
|
|
for rule in rules.rules:
|
2025-07-01 15:20:55 +02:00
|
|
|
rule_results: dict[str, Any] = {"rule_id": rule.id, "name": rule.name}
|
2020-06-29 23:17:38 -06:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
if not rule.contents.data.get("query"):
|
2020-11-17 23:08:00 +01:00
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
try:
|
2025-07-01 15:20:55 +02:00
|
|
|
rule_results["search_count"] = self.count(
|
|
|
|
|
query=rule.contents.data.query, # type: ignore[reportAttributeAccessIssue]
|
|
|
|
|
language=rule.contents.data.language, # type: ignore[reportAttributeAccessIssue]
|
|
|
|
|
index=rule.contents.data.get("index", "*"), # type: ignore[reportAttributeAccessIssue]
|
|
|
|
|
start_time=start_time,
|
|
|
|
|
end_time=end_time,
|
|
|
|
|
)
|
2020-11-17 23:08:00 +01:00
|
|
|
except (elasticsearch.NotFoundError, elasticsearch.RequestError):
|
2025-07-01 15:20:55 +02:00
|
|
|
rule_results["search_count"] = -1
|
2020-11-17 23:08:00 +01:00
|
|
|
|
|
|
|
|
survey_results[rule.id] = rule_results
|
|
|
|
|
|
|
|
|
|
return survey_results
|
|
|
|
|
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def evaluate(rule: TOMLRule, events: list[Any], normalize_kql_keywords: bool = False) -> list[Any]:
|
|
|
|
|
"""Evaluate a query against events."""
|
|
|
|
|
evaluator = kql.get_evaluator(kql.parse(rule.query), normalize_kql_keywords=normalize_kql_keywords) # type: ignore[reportUnknownMemberType]
|
|
|
|
|
return list(filter(evaluator, events)) # type: ignore[reportUnknownMemberType]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def combine_sources(sources: list[Any]) -> list[Any]:
|
|
|
|
|
"""Combine lists of events from multiple sources."""
|
|
|
|
|
combined: list[Any] = []
|
|
|
|
|
for source in sources:
|
|
|
|
|
combined.extend(source.copy())
|
|
|
|
|
|
|
|
|
|
return event_sort(combined)
|
|
|
|
|
|
|
|
|
|
|
2025-03-05 12:35:57 +01:00
|
|
|
class CollectEventsWithDSL(CollectEvents):
|
|
|
|
|
"""Collect events from elasticsearch."""
|
2020-06-29 23:17:38 -06:00
|
|
|
|
|
|
|
|
@staticmethod
|
2025-07-01 15:20:55 +02:00
|
|
|
def _group_events_by_type(events: list[Any]) -> dict[str, list[Any]]:
|
2020-06-29 23:17:38 -06:00
|
|
|
"""Group events by agent.type."""
|
2025-07-01 15:20:55 +02:00
|
|
|
event_by_type: dict[str, list[Any]] = {}
|
2020-06-29 23:17:38 -06:00
|
|
|
|
2020-11-17 23:08:00 +01:00
|
|
|
for event in events:
|
2025-07-01 15:20:55 +02:00
|
|
|
event_by_type.setdefault(event["_source"]["agent"]["type"], []).append(event["_source"])
|
2020-06-29 23:17:38 -06:00
|
|
|
|
|
|
|
|
return event_by_type
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
def run(self, dsl: dict[str, Any], indexes: str | list[str], start_time: str) -> Events:
|
2020-06-29 23:17:38 -06:00
|
|
|
"""Collect the events."""
|
2025-07-01 15:20:55 +02:00
|
|
|
results = self.search(
|
|
|
|
|
dsl,
|
|
|
|
|
language="dsl",
|
|
|
|
|
index=indexes,
|
|
|
|
|
start_time=start_time,
|
|
|
|
|
end_time="now",
|
|
|
|
|
size=5000,
|
|
|
|
|
sort=[{"@timestamp": {"order": "asc"}}],
|
|
|
|
|
)
|
2020-11-17 23:08:00 +01:00
|
|
|
events = self._group_events_by_type(results)
|
2025-03-05 12:35:57 +01:00
|
|
|
return Events(events)
|
2020-06-29 23:17:38 -06:00
|
|
|
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
@root.command("normalize-data")
|
|
|
|
|
@click.argument("events-file", type=Path)
|
|
|
|
|
def normalize_data(events_file: Path) -> None:
|
2020-10-07 22:15:33 +02:00
|
|
|
"""Normalize Elasticsearch data timestamps and sort."""
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
file_name = events_file.name
|
|
|
|
|
content = events_file.read_text()
|
|
|
|
|
lines = content.splitlines()
|
|
|
|
|
|
|
|
|
|
events = Events({file_name: [json.loads(line) for line in lines]})
|
|
|
|
|
events.save(dump_dir=events_file.parent)
|
2020-10-07 22:15:33 +02:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
|
|
|
|
|
@root.group("es")
|
2020-11-17 23:08:00 +01:00
|
|
|
@add_params(*elasticsearch_options)
|
2020-10-07 22:15:33 +02:00
|
|
|
@click.pass_context
|
2025-07-01 15:20:55 +02:00
|
|
|
def es_group(ctx: click.Context, **kwargs: Any) -> None:
|
2020-10-07 22:15:33 +02:00
|
|
|
"""Commands for integrating with Elasticsearch."""
|
2025-07-01 15:20:55 +02:00
|
|
|
_ = ctx.ensure_object(dict) # type: ignore[reportUnknownVariableType]
|
2020-10-07 22:15:33 +02:00
|
|
|
|
|
|
|
|
# only initialize an es client if the subcommand is invoked without help (hacky)
|
2022-11-01 11:14:40 -08:00
|
|
|
if sys.argv[-1] in ctx.help_option_names:
|
2025-07-01 15:20:55 +02:00
|
|
|
click.echo("Elasticsearch client:")
|
2020-10-07 22:15:33 +02:00
|
|
|
click.echo(format_command_options(ctx))
|
|
|
|
|
|
|
|
|
|
else:
|
2025-07-01 15:20:55 +02:00
|
|
|
ctx.obj["es"] = get_elasticsearch_client(ctx=ctx, **kwargs)
|
2020-10-07 22:15:33 +02:00
|
|
|
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
@es_group.command("collect-events")
|
|
|
|
|
@click.argument("host-id")
|
|
|
|
|
@click.option("--query", "-q", help="KQL query to scope search")
|
|
|
|
|
@click.option("--index", "-i", multiple=True, help="Index(es) to search against (default: all indexes)")
|
|
|
|
|
@click.option("--rta-name", "-r", help="Name of RTA in order to save events directly to unit tests data directory")
|
|
|
|
|
@click.option("--rule-id", help="Updates rule mapping in rule-mapping.yaml file (requires --rta-name)")
|
|
|
|
|
@click.option("--view-events", is_flag=True, help="Print events after saving")
|
2020-10-07 22:15:33 +02:00
|
|
|
@click.pass_context
|
2025-07-01 15:20:55 +02:00
|
|
|
def collect_events( # noqa: PLR0913
|
|
|
|
|
ctx: click.Context,
|
|
|
|
|
host_id: str,
|
|
|
|
|
query: str,
|
|
|
|
|
index: list[str],
|
|
|
|
|
rta_name: str,
|
|
|
|
|
rule_id: str,
|
|
|
|
|
view_events: bool,
|
|
|
|
|
) -> Events:
|
2020-06-29 23:17:38 -06:00
|
|
|
"""Collect events from Elasticsearch."""
|
2025-07-01 15:20:55 +02:00
|
|
|
client: Elasticsearch = ctx.obj["es"]
|
|
|
|
|
dsl = kql.to_dsl(query) if query else MATCH_ALL # type: ignore[reportUnknownMemberType]
|
|
|
|
|
dsl["bool"].setdefault("filter", []).append( # type: ignore[reportUnknownMemberType]
|
|
|
|
|
{
|
|
|
|
|
"bool": {
|
|
|
|
|
"should": [{"match_phrase": {"host.id": host_id}}],
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
)
|
2020-06-29 23:17:38 -06:00
|
|
|
|
|
|
|
|
try:
|
2025-03-05 12:35:57 +01:00
|
|
|
collector = CollectEventsWithDSL(client)
|
2020-11-17 23:08:00 +01:00
|
|
|
start = time.time()
|
2025-07-01 15:20:55 +02:00
|
|
|
click.pause("Press any key once detonation is complete ...")
|
|
|
|
|
start_time = f"now-{round(time.time() - start) + 5}s"
|
|
|
|
|
events = collector.run(dsl, index or "*", start_time) # type: ignore[reportUnknownArgument]
|
2020-11-17 23:08:00 +01:00
|
|
|
events.save(rta_name=rta_name, host_id=host_id)
|
2020-06-29 23:17:38 -06:00
|
|
|
|
2020-10-07 22:15:33 +02:00
|
|
|
if rta_name and rule_id:
|
2025-07-01 15:20:55 +02:00
|
|
|
_ = events.evaluate_against_rule(rule_id)
|
2020-06-29 23:17:38 -06:00
|
|
|
|
2020-10-07 22:15:33 +02:00
|
|
|
if view_events and events.events:
|
|
|
|
|
events.echo_events(pager=True)
|
2020-06-29 23:17:38 -06:00
|
|
|
|
2020-10-07 22:15:33 +02:00
|
|
|
except AssertionError as e:
|
2025-07-01 15:20:55 +02:00
|
|
|
error_msg = "No events collected! Verify events are streaming and that the agent-hostname is correct"
|
|
|
|
|
raise_client_error(error_msg, e, ctx=ctx)
|
|
|
|
|
|
|
|
|
|
return events
|
2020-12-02 08:25:33 +01:00
|
|
|
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
@es_group.command("index-rules")
|
|
|
|
|
@click.option("--query", "-q", help="Optional KQL query to limit to specific rules")
|
|
|
|
|
@click.option("--from-file", "-f", type=click.File("r"), help="Load a previously saved uploadable bulk file")
|
|
|
|
|
@click.option("--save_files", "-s", is_flag=True, help="Optionally save the bulk request to a file")
|
2021-02-10 10:37:26 -09:00
|
|
|
@click.pass_context
|
2025-07-01 15:20:55 +02:00
|
|
|
def index_repo(ctx: click.Context, query: str, from_file: IO[Any] | None, save_files: bool) -> None:
|
2021-02-10 10:37:26 -09:00
|
|
|
"""Index rules based on KQL search results to an elasticsearch instance."""
|
|
|
|
|
from .main import generate_rules_index
|
|
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
es_client: Elasticsearch = ctx.obj["es"]
|
2021-02-10 10:37:26 -09:00
|
|
|
|
|
|
|
|
if from_file:
|
|
|
|
|
bulk_upload_docs = from_file.read()
|
|
|
|
|
|
|
|
|
|
# light validation only
|
|
|
|
|
try:
|
|
|
|
|
index_body = [json.loads(line) for line in bulk_upload_docs.splitlines()]
|
2025-07-01 15:20:55 +02:00
|
|
|
click.echo(f"{len([r for r in index_body if 'rule' in r])} rules included")
|
2021-02-10 10:37:26 -09:00
|
|
|
except json.JSONDecodeError:
|
2025-07-01 15:20:55 +02:00
|
|
|
raise_client_error(f"Improperly formatted bulk request file: {from_file.name}")
|
2021-02-10 10:37:26 -09:00
|
|
|
else:
|
2025-07-01 15:20:55 +02:00
|
|
|
bulk_upload_docs, _ = ctx.invoke(generate_rules_index, query=query, save_files=save_files)
|
2021-02-10 10:37:26 -09:00
|
|
|
|
2025-07-01 15:20:55 +02:00
|
|
|
_ = es_client.bulk(operations=bulk_upload_docs)
|