From 044629ebf467ff930fdc85ee3df3cce1f073a890 Mon Sep 17 00:00:00 2001
From: Apoorva Joshi <30438249+ajosh0504@users.noreply.github.com>
Date: Mon, 16 Oct 2023 12:48:54 -0700
Subject: [PATCH] [New Rule] Adding DGA Rules from Advanced Analytic DGA
 Package (#3102)

* Adding DGA rules

* Adding references

* updated rule tags and queries

* Updating min stack version

* added logic to handle ml jobs

* added code comments for clarity

* removing subbed security docs folder

* added event dataset to queries for endpoint; updated note

* removed event dataset

---------

Co-authored-by: Terrance DeJesus <99630311+terrancedejesus@users.noreply.github.com>
Co-authored-by: terrancedejesus <terrance.dejesus@elastic.co>

(cherry picked from commit a5a606e80486aac9397730e857ca6dd29d5609e6)
---
 detection_rules/integrations.py               |  4 +-
 detection_rules/rule.py                       | 22 ++++---
 ...ml_dga_activity_using_sunburst_domain.toml | 65 +++++++++++++++++++
 ...d_control_ml_dga_high_sum_probability.toml | 52 +++++++++++++++
 ...l_ml_dns_request_high_dga_probability.toml | 65 +++++++++++++++++++
 ..._request_predicted_to_be_a_dga_domain.toml | 65 +++++++++++++++++++
 6 files changed, 262 insertions(+), 11 deletions(-)
 create mode 100644 rules/integrations/dga/command_and_control_ml_dga_activity_using_sunburst_domain.toml
 create mode 100644 rules/integrations/dga/command_and_control_ml_dga_high_sum_probability.toml
 create mode 100644 rules/integrations/dga/command_and_control_ml_dns_request_high_dga_probability.toml
 create mode 100644 rules/integrations/dga/command_and_control_ml_dns_request_predicted_to_be_a_dga_domain.toml

diff --git a/detection_rules/integrations.py b/detection_rules/integrations.py
index 2c264d051..3c5ae4859 100644
--- a/detection_rules/integrations.py
+++ b/detection_rules/integrations.py
@@ -335,7 +335,9 @@ def get_integration_schema_data(data, meta, package_integrations: dict) -> Gener
                 if integration is None:
                     # Use all fields from each dataset
                     for dataset in integrations_schemas[package][package_version]:
-                        schema.update(integrations_schemas[package][package_version][dataset])
+                        # ignore jobs from machine learning packages
+                        if dataset != "jobs":
+                            schema.update(integrations_schemas[package][package_version][dataset])
                 else:
                     if integration not in integrations_schemas[package][package_version]:
                         raise ValueError(f"Integration {integration} not found in package {package} "
diff --git a/detection_rules/rule.py b/detection_rules/rule.py
index b7679b9ad..e27b004c8 100644
--- a/detection_rules/rule.py
+++ b/detection_rules/rule.py
@@ -1127,16 +1127,18 @@ class TOMLRuleContents(BaseRuleContents, MarshmallowDataclassMixin):
             elif isinstance(node, FieldComparison) and str(node.field) == 'event.dataset':
                 datasets.update(set(str(n) for n in node if isinstance(n, kql.ast.Value)))
 
-        if not datasets:
-            # windows and endpoint integration do not have event.dataset fields in queries
-            # integration is None to remove duplicate references upstream in Kibana
-            rule_integrations = meta.get("integration", [])
-            if rule_integrations:
-                for integration in rule_integrations:
-                    ineligible_integrations = definitions.NON_DATASET_PACKAGES + \
-                        [*map(str.lower, definitions.MACHINE_LEARNING_PACKAGES)]
-                    if integration in ineligible_integrations or isinstance(data, MachineLearningRuleData):
-                        packaged_integrations.append({"package": integration, "integration": None})
+        # integration is None to remove duplicate references upstream in Kibana
+        # chronologically, event.dataset is checked for package:integration, then rule tags
+        # if both exist, rule tags are only used if defined in definitions for non-dataset packages
+        # of machine learning analytic packages
+
+        rule_integrations = meta.get("integration", [])
+        if rule_integrations:
+            for integration in rule_integrations:
+                ineligible_integrations = definitions.NON_DATASET_PACKAGES + \
+                    [*map(str.lower, definitions.MACHINE_LEARNING_PACKAGES)]
+                if integration in ineligible_integrations or isinstance(data, MachineLearningRuleData):
+                    packaged_integrations.append({"package": integration, "integration": None})
 
         for value in sorted(datasets):
             integration = 'Unknown'
diff --git a/rules/integrations/dga/command_and_control_ml_dga_activity_using_sunburst_domain.toml b/rules/integrations/dga/command_and_control_ml_dga_activity_using_sunburst_domain.toml
new file mode 100644
index 000000000..beb987af0
--- /dev/null
+++ b/rules/integrations/dga/command_and_control_ml_dga_activity_using_sunburst_domain.toml
@@ -0,0 +1,65 @@
+[metadata]
+creation_date = "2023/09/14"
+integration = ["dga","endpoint","network_traffic"]
+maturity = "production"
+min_stack_comments = "DGA package job ID and rule removal updates"
+min_stack_version = "8.9.0"
+updated_date = "2023/10/16"
+
+[rule]
+author = ["Elastic"]
+description = """
+A supervised machine learning model has identified a DNS question name that used by the SUNBURST malware and is
+predicted to be the result of a Domain Generation Algorithm.
+"""
+from = "now-10m"
+index = ["logs-endpoint.events.*", "logs-network_traffic.*"]
+language = "kuery"
+license = "Elastic License v2"
+name = "Machine Learning Detected DGA activity using a known SUNBURST DNS domain"
+note = """## Setup
+
+The Domain Generation Algorithm (DGA) integration must be enabled and related ML jobs configured for this rule to be effective. Please refer to this rule's references for more information.
+"""
+references = [
+    "https://www.elastic.co/guide/en/security/current/prebuilt-ml-jobs.html",
+    "https://docs.elastic.co/en/integrations/dga"
+]
+risk_score = 99
+rule_id = "bcaa15ce-2d41-44d7-a322-918f9db77766"
+severity = "critical"
+tags = [
+    "Domain: Network",
+    "Domain: Endpoint",
+    "Data Source: Elastic Defend",
+    "Use Case: Domain Generation Algorithm Detection",
+    "Rule Type: ML",
+    "Rule Type: Machine Learning",
+    "Tactic: Command and Control",
+]
+timestamp_override = "event.ingested"
+type = "query"
+
+query = '''
+ml_is_dga.malicious_prediction:1 and dns.question.registered_domain:avsvmcloud.com
+'''
+
+
+[[rule.threat]]
+framework = "MITRE ATT&CK"
+[[rule.threat.technique]]
+id = "T1568"
+name = "Dynamic Resolution"
+reference = "https://attack.mitre.org/techniques/T1568/"
+[[rule.threat.technique.subtechnique]]
+id = "T1568.002"
+name = "Domain Generation Algorithms"
+reference = "https://attack.mitre.org/techniques/T1568/002/"
+
+
+
+[rule.threat.tactic]
+id = "TA0011"
+name = "Command and Control"
+reference = "https://attack.mitre.org/tactics/TA0011/"
+
diff --git a/rules/integrations/dga/command_and_control_ml_dga_high_sum_probability.toml b/rules/integrations/dga/command_and_control_ml_dga_high_sum_probability.toml
new file mode 100644
index 000000000..7da14e887
--- /dev/null
+++ b/rules/integrations/dga/command_and_control_ml_dga_high_sum_probability.toml
@@ -0,0 +1,52 @@
+[metadata]
+creation_date = "2023/09/14"
+integration = ["dga","endpoint","network_traffic"]
+maturity = "production"
+min_stack_comments = "DGA package job ID and rule removal updates"
+min_stack_version = "8.9.0"
+updated_date = "2023/10/16"
+
+[rule]
+anomaly_threshold = 70
+author = ["Elastic"]
+description = """
+A population analysis machine learning job detected potential DGA (domain generation algorithm) activity. Such activity
+is often used by malware command and control (C2) channels. This machine learning job looks for a source IP address
+making DNS requests that have an aggregate high probability of being DGA activity.
+"""
+from = "now-45m"
+interval = "15m"
+license = "Elastic License v2"
+machine_learning_job_id = "dga_high_sum_probability"
+name = "Potential DGA Activity"
+note = """## Setup
+
+The Domain Generation Algorithm (DGA) integration must be enabled and related ML jobs configured for this rule to be effective. Please refer to this rule's references for more information.
+"""
+references = [
+    "https://www.elastic.co/guide/en/security/current/prebuilt-ml-jobs.html",
+    "https://docs.elastic.co/en/integrations/dga"
+]
+risk_score = 21
+rule_id = "ff0d807d-869b-4a0d-a493-52bc46d2f1b1"
+severity = "low"
+tags = [
+    "Use Case: Domain Generation Algorithm Detection",
+    "Rule Type: ML",
+    "Rule Type: Machine Learning",
+    "Tactic: Command and Control",
+]
+type = "machine_learning"
+[[rule.threat]]
+framework = "MITRE ATT&CK"
+[[rule.threat.technique]]
+id = "T1568"
+name = "Dynamic Resolution"
+reference = "https://attack.mitre.org/techniques/T1568/"
+
+
+[rule.threat.tactic]
+id = "TA0011"
+name = "Command and Control"
+reference = "https://attack.mitre.org/tactics/TA0011/"
+
diff --git a/rules/integrations/dga/command_and_control_ml_dns_request_high_dga_probability.toml b/rules/integrations/dga/command_and_control_ml_dns_request_high_dga_probability.toml
new file mode 100644
index 000000000..cc4594266
--- /dev/null
+++ b/rules/integrations/dga/command_and_control_ml_dns_request_high_dga_probability.toml
@@ -0,0 +1,65 @@
+[metadata]
+creation_date = "2023/09/14"
+integration = ["dga","endpoint","network_traffic"]
+maturity = "production"
+min_stack_comments = "DGA package job ID and rule removal updates"
+min_stack_version = "8.9.0"
+updated_date = "2023/10/16"
+
+[rule]
+author = ["Elastic"]
+description = """
+A supervised machine learning model has identified a DNS question name with a high probability of sourcing from a Domain
+Generation Algorithm (DGA), which could indicate command and control network activity.
+"""
+from = "now-10m"
+index = ["logs-endpoint.events.*", "logs-network_traffic.*"]
+language = "kuery"
+license = "Elastic License v2"
+name = "Machine Learning Detected a DNS Request With a High DGA Probability Score"
+note = """## Setup
+
+The Domain Generation Algorithm (DGA) integration must be enabled and related ML jobs configured for this rule to be effective. Please refer to this rule's references for more information.
+"""
+references = [
+    "https://www.elastic.co/guide/en/security/current/prebuilt-ml-jobs.html",
+    "https://docs.elastic.co/en/integrations/dga"
+]
+risk_score = 21
+rule_id = "da7f5803-1cd4-42fd-a890-0173ae80ac69"
+severity = "low"
+tags = [
+    "Domain: Network",
+    "Domain: Endpoint",
+    "Data Source: Elastic Defend",
+    "Use Case: Domain Generation Algorithm Detection",
+    "Rule Type: ML",
+    "Rule Type: Machine Learning",
+    "Tactic: Command and Control",
+]
+timestamp_override = "event.ingested"
+type = "query"
+
+query = '''
+ml_is_dga.malicious_probability > 0.98
+'''
+
+
+[[rule.threat]]
+framework = "MITRE ATT&CK"
+[[rule.threat.technique]]
+id = "T1568"
+name = "Dynamic Resolution"
+reference = "https://attack.mitre.org/techniques/T1568/"
+[[rule.threat.technique.subtechnique]]
+id = "T1568.002"
+name = "Domain Generation Algorithms"
+reference = "https://attack.mitre.org/techniques/T1568/002/"
+
+
+
+[rule.threat.tactic]
+id = "TA0011"
+name = "Command and Control"
+reference = "https://attack.mitre.org/tactics/TA0011/"
+
diff --git a/rules/integrations/dga/command_and_control_ml_dns_request_predicted_to_be_a_dga_domain.toml b/rules/integrations/dga/command_and_control_ml_dns_request_predicted_to_be_a_dga_domain.toml
new file mode 100644
index 000000000..680850d32
--- /dev/null
+++ b/rules/integrations/dga/command_and_control_ml_dns_request_predicted_to_be_a_dga_domain.toml
@@ -0,0 +1,65 @@
+[metadata]
+creation_date = "2023/09/14"
+integration = ["dga","endpoint","network_traffic"]
+maturity = "production"
+min_stack_comments = "DGA package job ID and rule removal updates"
+min_stack_version = "8.9.0"
+updated_date = "2023/10/16"
+
+[rule]
+author = ["Elastic"]
+description = """
+A supervised machine learning model has identified a DNS question name that is predicted to be the result of a Domain
+Generation Algorithm (DGA), which could indicate command and control network activity.
+"""
+from = "now-10m"
+index = ["logs-endpoint.events.*", "logs-network_traffic.*"]
+language = "kuery"
+license = "Elastic License v2"
+name = "Machine Learning Detected a DNS Request Predicted to be a DGA Domain"
+note = """## Setup
+
+The Domain Generation Algorithm (DGA) integration must be enabled and related ML jobs configured for this rule to be effective. Please refer to this rule's references for more information.
+"""
+references = [
+    "https://www.elastic.co/guide/en/security/current/prebuilt-ml-jobs.html",
+    "https://docs.elastic.co/en/integrations/dga"
+]
+risk_score = 21
+rule_id = "f3403393-1fd9-4686-8f6e-596c58bc00b4"
+severity = "low"
+tags = [
+    "Domain: Network",
+    "Domain: Endpoint",
+    "Data Source: Elastic Defend",
+    "Use Case: Domain Generation Algorithm Detection",
+    "Rule Type: ML",
+    "Rule Type: Machine Learning",
+    "Tactic: Command and Control",
+]
+timestamp_override = "event.ingested"
+type = "query"
+
+query = '''
+ml_is_dga.malicious_prediction:1 and not dns.question.registered_domain:avsvmcloud.com
+'''
+
+
+[[rule.threat]]
+framework = "MITRE ATT&CK"
+[[rule.threat.technique]]
+id = "T1568"
+name = "Dynamic Resolution"
+reference = "https://attack.mitre.org/techniques/T1568/"
+[[rule.threat.technique.subtechnique]]
+id = "T1568.002"
+name = "Domain Generation Algorithms"
+reference = "https://attack.mitre.org/techniques/T1568/002/"
+
+
+
+[rule.threat.tactic]
+id = "TA0011"
+name = "Command and Control"
+reference = "https://attack.mitre.org/tactics/TA0011/"
+