[Rule Tuning] Revert Event Dataset for Security Alert Index (#5994)

* [Rule Tuning] Revert Event Dataset for Security Alert Index; Add Unit Test

---------

Co-authored-by: Isai <59296946+imays11@users.noreply.github.com>
This commit is contained in:
Terrance DeJesus
2026-04-28 13:17:03 -04:00
committed by GitHub
parent 0f521a0848
commit 53f26965e3
7 changed files with 60 additions and 61 deletions
@@ -1,7 +1,7 @@
[metadata]
creation_date = "2022/11/16"
maturity = "production"
updated_date = "2026/04/10"
updated_date = "2026/04/27"
[rule]
author = ["Elastic"]
@@ -28,26 +28,26 @@ from .alerts-security.*
kibana.alert.risk_score > 21 and
not KQL("""kibana.alert.rule.tags : "Rule Type: Higher-Order Rule" """)
| stats
Esql.rule_name_distinct_count = COUNT_DISTINCT(kibana.alert.rule.name),
Esql.rule_id_distinct_count = COUNT_DISTINCT(kibana.alert.rule.rule_id),
Esql.kibana_alert_rule_name_distinct_count = COUNT_DISTINCT(kibana.alert.rule.name),
Esql.kibana_alert_rule_rule_id_distinct_count = COUNT_DISTINCT(kibana.alert.rule.rule_id),
Esql.host_id_distinct_count = COUNT_DISTINCT(host.id),
Esql.risk_score_distinct_count = COUNT_DISTINCT(kibana.alert.risk_score),
Esql.data_stream_dataset_distinct_count = COUNT_DISTINCT(data_stream.dataset),
Esql.rule_name_values = VALUES(kibana.alert.rule.name),
Esql.risk_score_values = VALUES(kibana.alert.risk_score),
Esql.data_stream_dataset_values = VALUES(data_stream.dataset),
Esql.kibana_alert_risk_score_distinct_count = COUNT_DISTINCT(kibana.alert.risk_score),
Esql.event_dataset_distinct_count = COUNT_DISTINCT(event.dataset),
Esql.kibana_alert_rule_name_values = VALUES(kibana.alert.rule.name),
Esql.kibana_alert_risk_score_values = VALUES(kibana.alert.risk_score),
Esql.event_dataset_values = VALUES(event.dataset),
Esql.event_module_values = VALUES(event.module),
Esql.process_command_line = VALUES(process.command_line),
Esql.process_command_line_values = VALUES(process.command_line),
Esql.host_id_values = VALUES(host.id),
Esql.source_ip_values = VALUES(source.ip),
Esql.destination_ip_values = VALUES(destination.ip) by user.id
| where Esql.rule_name_distinct_count >= 4 AND Esql.rule_id_distinct_count >= 2 and
// Exclude known system accounts with matches in more than one host
not (
(length(TO_STRING(user.id)) <= 4 or user.id IN ("S-1-5-18", "S-1-5-19", "S-1-5-20", "0")) and
(Esql.host_id_distinct_count >= 2 or Esql.host_id_distinct_count == 0)
)
| where Esql.kibana_alert_rule_name_distinct_count >= 4 AND Esql.kibana_alert_rule_rule_id_distinct_count >= 2 and
// Exclude known system accounts with matches in more than one host
not (
(length(TO_STRING(user.id)) <= 4 or user.id IN ("S-1-5-18", "S-1-5-19", "S-1-5-20", "0")) and
(Esql.host_id_distinct_count >= 2 or Esql.host_id_distinct_count == 0)
)
| keep user.id, Esql.*
'''
@@ -3,7 +3,7 @@ creation_date = "2026/02/03"
maturity = "production"
min_stack_comments = "ES|QL COMPLETION command requires Elastic Managed LLM (gp-llm-v2) available in 9.3.0+"
min_stack_version = "9.3.0"
updated_date = "2026/04/10"
updated_date = "2026/04/27"
[rule]
author = ["Elastic"]
@@ -110,7 +110,7 @@ from .alerts-security.* METADATA _id, _version, _index
Esql.host_name_values = VALUES(host.name),
Esql.source_ip_values = VALUES(source.ip),
Esql.destination_ip_values = VALUES(destination.ip),
Esql.data_stream_dataset_values = VALUES(data_stream.dataset),
Esql.event_dataset_values = VALUES(event.dataset),
Esql.process_executable_values = VALUES(process.executable),
Esql.user_email_values = VALUES(user.email),
Esql.timestamp_min = MIN(@timestamp),
@@ -131,7 +131,7 @@ from .alerts-security.* METADATA _id, _version, _index
| eval Esql.hosts_str = COALESCE(MV_CONCAT(Esql.host_name_values, ", "), "unknown")
| eval Esql.source_ips_str = COALESCE(MV_CONCAT(TO_STRING(Esql.source_ip_values), ", "), "unknown")
| eval Esql.destination_ips_str = COALESCE(MV_CONCAT(TO_STRING(Esql.destination_ip_values), ", "), "unknown")
| eval Esql.datasets_str = COALESCE(MV_CONCAT(Esql.data_stream_dataset_values, ", "), "unknown")
| eval Esql.datasets_str = COALESCE(MV_CONCAT(Esql.event_dataset_values, ", "), "unknown")
| eval Esql.processes_str = COALESCE(MV_CONCAT(Esql.process_executable_values, ", "), "unknown")
| eval Esql.users_email_str = COALESCE(MV_CONCAT(Esql.user_email_values, "; "), "n/a")
| eval alert_summary = CONCAT("User: ", user.name, " | Email: ", Esql.users_email_str, " | Alerts: ", TO_STRING(Esql.alerts_count), " | Distinct rules: ", TO_STRING(Esql.kibana_alert_rule_name_count_distinct), " | Hosts affected: ", TO_STRING(Esql.host_name_count_distinct), " | Time window: ", Esql.time_window_minutes, " min | Max risk: ", TO_STRING(Esql.kibana_alert_risk_score_max), " | Rules: ", Esql.rules_str, " | Tactics: ", Esql.tactics_str, " | Techniques: ", Esql.techniques_str, " | Hosts: ", Esql.hosts_str, " | Source IPs: ", Esql.source_ips_str, " | Destination IPs: ", Esql.destination_ips_str, " | Data sources: ", Esql.datasets_str, " | Processes: ", Esql.processes_str)
@@ -1,7 +1,7 @@
[metadata]
creation_date = "2025/11/19"
maturity = "production"
updated_date = "2026/04/10"
updated_date = "2026/04/27"
[rule]
author = ["Elastic"]
@@ -28,7 +28,7 @@ from .alerts-security.* metadata _id
// filter for alerts with populated risk score, excluding threat_match rules, deprecated and some other noisy ones.
| where kibana.alert.risk_score > 0 and
kibana.alert.rule.name IS NOT NULL and
host.id is not null and data_stream.dataset is not null and
host.id is not null and event.dataset is not null and
kibana.alert.rule.type != "threat_match" and
// Top noisy influencing rules
not kibana.alert.rule.name in ("Agent Spoofing - Mismatched Agent ID", "Compression DLL Loaded by Unusual Process", "Process Termination followed by Deletion", "Suspicious PrintSpooler Service Executable File Creation", "Potential PrintNightmare File Modification") and
@@ -1,7 +1,7 @@
[metadata]
creation_date = "2026/01/12"
maturity = "production"
updated_date = "2026/04/10"
updated_date = "2026/04/27"
[rule]
author = ["Elastic"]
@@ -28,7 +28,7 @@ from .alerts-security.* metadata _id
| where kibana.alert.risk_score > 21 and
kibana.alert.rule.name IS NOT NULL and
host.id is not null and data_stream.dataset is not null and
host.id is not null and event.dataset is not null and
// excluding ML and Threat Match rules as they tend to be noisy
not kibana.alert.rule.type in ("threat_match", "machine_learning") and
@@ -1,7 +1,7 @@
[metadata]
creation_date = "2026/01/09"
maturity = "production"
updated_date = "2026/04/10"
updated_date = "2026/04/27"
[rule]
author = ["Elastic"]
@@ -23,39 +23,39 @@ type = "esql"
query = '''
from .alerts-security.*
| WHERE data_stream.dataset in ("crowdstrike.alert", "crowdstrike.falcon", "sentinel_one.alert", "sentinel_one.threat", "m365_defender.alert") and
| WHERE event.dataset in ("crowdstrike.alert", "crowdstrike.falcon", "sentinel_one.alert", "sentinel_one.threat", "m365_defender.alert") and
host.id is not null and kibana.alert.risk_score > 21 and
not (event.module == "crowdstrike" and (kibana.alert.rule.name like "* at *" or kibana.alert.rule.name like "* on *" or kibana.alert.rule.name == "EICARTestFileWrittenWin")) and
not KQL("""kibana.alert.rule.tags : "Rule Type: Higher-Order Rule" """)
| stats Esql.alerts_count = COUNT(*),
Esql.rule_risk_score_distinct_count = COUNT_DISTINCT(kibana.alert.risk_score),
Esql.unique_rules_count = COUNT_DISTINCT(kibana.alert.rule.name),
Esql.processes_count = COUNT_DISTINCT(process.executable),
Esql.files_count = COUNT_DISTINCT(file.path),
Esql.process_cmdline_count = COUNT_DISTINCT(process.command_line),
Esql.rule_risk_score_values = VALUES(kibana.alert.risk_score),
Esql.process_path_values = VALUES(process.executable),
Esql.file_path_values = VALUES(file.path),
Esql.user_name_values = VALUES(user.name),
Esql.process_command_line_values = VALUES(process.command_line),
Esql.process_parent_command_line_values = VALUES(process.parent.command_line),
Esql.rule_name_values = VALUES(kibana.alert.rule.name) by host.id, host.name, event.module
Esql.kibana_alert_risk_score_distinct_count = COUNT_DISTINCT(kibana.alert.risk_score),
Esql.kibana_alert_rule_name_distinct_count = COUNT_DISTINCT(kibana.alert.rule.name),
Esql.process_executable_distinct_count = COUNT_DISTINCT(process.executable),
Esql.file_path_distinct_count = COUNT_DISTINCT(file.path),
Esql.process_command_line_distinct_count = COUNT_DISTINCT(process.command_line),
Esql.kibana_alert_risk_score_values = VALUES(kibana.alert.risk_score),
Esql.process_executable_values = VALUES(process.executable),
Esql.file_path_values = VALUES(file.path),
Esql.user_name_values = VALUES(user.name),
Esql.process_command_line_values = VALUES(process.command_line),
Esql.process_parent_command_line_values = VALUES(process.parent.command_line),
Esql.kibana_alert_rule_name_values = VALUES(kibana.alert.rule.name) by host.id, host.name, event.module
| where (
// 3+ unique rules or processes
(
Esql.unique_rules_count >= 3 or
(Esql.processes_count >= 3 and Esql.rule_name_values == "External Alerts")
) and
// and 2+ rules of different severity, or 1 high/critical severity rule
(
Esql.rule_risk_score_distinct_count >= 2 or
Esql.rule_risk_score_values == 73 or
Esql.rule_risk_score_values == 99
)
(
Esql.kibana_alert_rule_name_distinct_count >= 3 or
(Esql.process_executable_distinct_count >= 3 and Esql.kibana_alert_rule_name_values == "External Alerts")
) and
// and 2+ rules of different severity, or 1 high/critical severity rule
(
Esql.kibana_alert_risk_score_distinct_count >= 2 or
Esql.kibana_alert_risk_score_values == 73 or
Esql.kibana_alert_risk_score_values == 99
)
) or
// or 5+ unique rules from the same host for 1+ path/command_line/process
(Esql.unique_rules_count >= 5 and Esql.alerts_count <= 50 and
(Esql.files_count >= 1 or Esql.process_cmdline_count >= 1 or Esql.processes_count >= 1)
// or 5+ unique rules from the same host for 1+ path/command_line/process
(Esql.kibana_alert_rule_name_distinct_count >= 5 and Esql.alerts_count <= 50 and
(Esql.file_path_distinct_count >= 1 or Esql.process_command_line_distinct_count >= 1 or Esql.process_executable_distinct_count >= 1)
)
| KEEP event.module, host.id, host.name, Esql.*
'''
@@ -1,7 +1,7 @@
[metadata]
creation_date = "2026/01/07"
maturity = "production"
updated_date = "2026/04/10"
updated_date = "2026/04/27"
[rule]
author = ["Elastic"]
@@ -28,16 +28,15 @@ FROM .alerts-security.*
not kibana.alert.rule.type in ("threat_match", "machine_learning", "new_terms") and
not kibana.alert.rule.name like "Deprecated - *" and kibana.alert.rule.name != "My First Rule" and
// covered by 7306ce7d-5c90-4f42-aa6c-12b0dc2fe3b8
data_stream.dataset != "endpoint.alerts" and
event.dataset != "endpoint.alerts" and
not KQL("""kibana.alert.rule.tags : "Rule Type: Higher-Order Rule" """)
| STATS Esql.alerts_count = count(*),
Esql.first_time_seen = MIN(@timestamp),
Esql.last_time_seen = MAX(@timestamp),
Esql.process_executable = VALUES(process.executable),
Esql.cmd_line = VALUES(process.command_line),
Esql.parent_executable = VALUES(process.parent.executable),
Esql.file_path_values = VALUES(file.path),
Esql.file_path_values = VALUES(file.path),
Esql.process_command_line_values = VALUES(process.command_line),
Esql.process_parent_executable_values = VALUES(process.parent.executable),
Esql.file_path_values = VALUES(file.path),
Esql.dll_path_values = VALUES(dll.path),
Esql.user_id_values = VALUES(user.id),
Esql.user_name_values = VALUES(user.name),
@@ -45,12 +44,12 @@ FROM .alerts-security.*
Esql.host_id_values = VALUES(host.id),
Esql.event_module_values = VALUES(event.module),
Esql.source_ip_values = VALUES(source.ip),
Esql.rule_name_values = VALUES(kibana.alert.rule.name),
Esql.agents_distinct_count = COUNT_DISTINCT(agent.id) by kibana.alert.rule.name
Esql.kibana_alert_rule_name_values = VALUES(kibana.alert.rule.name),
Esql.agent_id_distinct_count = COUNT_DISTINCT(agent.id) by kibana.alert.rule.name
// fist time seen in the last 5 days - defined in the rule schedule Additional look-back time
| eval Esql.recent = DATE_DIFF("minute", Esql.first_time_seen, now())
// first time seen is within 10m of the rule execution time
| where Esql.recent <= 10 and Esql.agents_distinct_count == 1 and Esql.alerts_count <= 10 and (Esql.last_time_seen == Esql.first_time_seen)
| where Esql.recent <= 10 and Esql.agent_id_distinct_count == 1 and Esql.alerts_count <= 10 and (Esql.last_time_seen == Esql.first_time_seen)
// Move single values to their corresponding ECS fields for alerts exclusion
| eval host.id = mv_min(Esql.host_id_values)
@@ -2,7 +2,7 @@
creation_date = "2026/02/20"
integration = ["okta"]
maturity = "production"
updated_date = "2026/04/10"
updated_date = "2026/04/27"
[rule]
author = ["Elastic"]
@@ -78,9 +78,9 @@ type = "eql"
query = '''
sequence by user.name with maxspan=30m
[any where data_stream.dataset == "okta.system" and
[any where event.dataset == "okta.system" and
kibana.alert.rule.rule_id == "6f1bb4b2-7dc8-11ee-92b2-f661ea17fbcd"]
[any where data_stream.dataset == "okta.system" and
[any where event.dataset == "okta.system" and
kibana.alert.rule.rule_id != null and
kibana.alert.severity != "low" and
kibana.alert.rule.rule_id not in (