Merge PR #5803 from @swachchhanda000 - chore: ci: regression test id consistency check

chore: ci: regression test id consistency check
---------

Co-authored-by: phantinuss <79651203+phantinuss@users.noreply.github.com>
This commit is contained in:
Swachchhanda Shrawan Poudel
2025-12-10 14:42:22 +05:45
committed by GitHub
parent 13aae8c1ea
commit 6af6ad8ef7
+138 -2
View File
@@ -55,6 +55,7 @@ def load_info_yaml(
# Extract test data from regression_tests_info
test_data = []
regression_tests = info_data.get("regression_tests_info", [])
rule_metadata = info_data.get("rule_metadata", [])
for test in regression_tests:
if not isinstance(test, dict):
@@ -85,6 +86,11 @@ def load_info_yaml(
"provider": test.get("provider", ""),
}
)
info_metadata_rule_id = None
for metadata_entry in rule_metadata:
if not isinstance(metadata_entry, dict):
continue
info_metadata_rule_id = metadata_entry.get("id", "")
if test_data:
results.append(
@@ -92,6 +98,7 @@ def load_info_yaml(
"path": file_path,
"id": rule_id,
"tests": test_data,
"info_metadata_rule_id": info_metadata_rule_id,
}
)
@@ -531,6 +538,128 @@ def print_summary(total_tests: int, passed_tests: int, failures: List[Dict]) ->
print("=" * 60)
def check_rule_id_consistency(rules_with_tests: List[Dict]) -> List[Dict]:
"""Check if rule IDs are consistent between rule files and their info.yml files.
Also checks if rule IDs match the test file names.
Returns:
List of dicts containing information about inconsistent rule IDs
"""
inconsistent_rules = []
for rule_info in rules_with_tests:
rule_id = rule_info["id"]
info_metadata_rule_id = rule_info.get("info_metadata_rule_id", "")
rule_path = rule_info["path"]
tests = rule_info.get("tests", [])
# Check rule ID vs info.yml rule_metadata[0].id consistency
if not info_metadata_rule_id:
inconsistent_rules.append(
{
"rule_id": rule_id,
"info_metadata_rule_id": info_metadata_rule_id,
"rule_path": rule_path,
"issue": "missing_info_metadata_rule_id",
"expected": rule_id,
"actual": info_metadata_rule_id,
"message": "info.yml is missing rule_metadata or rule_metadata[0].id",
}
)
elif rule_id != info_metadata_rule_id:
inconsistent_rules.append(
{
"rule_id": rule_id,
"info_metadata_rule_id": info_metadata_rule_id,
"rule_path": rule_path,
"issue": "rule_vs_info_metadata_mismatch",
"expected": rule_id,
"actual": info_metadata_rule_id,
"message": f"Rule ID '{rule_id}' in rule file does not match "
f"info.yml rule_metadata[0].id '{info_metadata_rule_id}'",
}
)
# Check rule ID vs test file name consistency
for test in tests:
test_path = test.get("path", "")
if test_path:
# Extract filename without extension
filename = os.path.basename(test_path)
name_without_ext = os.path.splitext(filename)[0]
file_ext = os.path.splitext(filename)[1].lower()
# Check if the filename (without extension) matches the rule ID
# Only check for .evtx and .json files (.json is optional conversion of .evtx)
if file_ext in [".evtx", ".json"] and name_without_ext != rule_id:
expected_filename = f"{rule_id}{file_ext}"
inconsistent_rules.append(
{
"rule_id": rule_id,
"test_filename": filename,
"rule_path": rule_path,
"test_path": test_path,
"issue": "rule_vs_testfile_mismatch",
"expected": expected_filename,
"actual": filename,
"message": f"Rule ID '{rule_id}' does not match test file"
f"name '{name_without_ext}' (expected: {rule_id}{file_ext})",
}
)
if inconsistent_rules:
print("\nERROR: Found rule ID inconsistencies:")
print("=" * 60)
print()
# Group by issue type for better readability
rule_vs_info_issues = [
r
for r in inconsistent_rules
if r.get("issue")
in ["rule_vs_info_metadata_mismatch", "missing_info_metadata_rule_id"]
]
rule_vs_testfile_issues = [
r
for r in inconsistent_rules
if r.get("issue") == "rule_vs_testfile_mismatch"
]
if rule_vs_info_issues:
print("RULE ID vs INFO.YML RULE_METADATA[0].ID MISMATCHES:")
print("-" * 50)
for inconsistent in rule_vs_info_issues:
print(f"Rule file ID: {inconsistent['rule_id']}")
print(
f"Info.yml rule_metadata[0].id: {inconsistent['info_metadata_rule_id']}"
)
print(f"Expected: {inconsistent['expected']}")
print(f"Actual: {inconsistent['actual']}")
print(f"Rule file: {inconsistent['rule_path']}")
print(f"Message: {inconsistent['message']}")
print("-" * 50)
print()
if rule_vs_testfile_issues:
print("RULE ID vs TEST FILE NAME MISMATCHES:")
print("-" * 40)
for inconsistent in rule_vs_testfile_issues:
print(f"Rule ID: {inconsistent['rule_id']}")
print(f"Expected filename: {inconsistent['expected']}")
print(f"Actual filename: {inconsistent['actual']}")
print(f"Rule file: {inconsistent['rule_path']}")
print(f"Test file: {inconsistent['test_path']}")
print(f"{inconsistent['message']}")
print()
print("<=>" * 20)
print("Rule IDs must match between:")
print("1. Rule files ID and their info.yml rule_metadata[0].id")
print("2. Rule files ID and their test file names (EVTX/JSON files)")
print(" Note: JSON files are optional conversions of EVTX files")
return inconsistent_rules
def main():
"""Main function to run regression tests for Sigma rules."""
args = parse_arguments()
@@ -541,12 +670,19 @@ def main():
rules_with_tests, missing_files, missing_regression_tests_path = (
find_rules_with_tests(args.rules_paths)
)
print(f"Found {len(rules_with_tests)} rules with test data")
print(f"Found {len(rules_with_tests)} rule(s) with regression tests configured.\n")
print("Checking for consistent rule <--> test mapping...")
inconsistent_rules = check_rule_id_consistency(rules_with_tests)
if inconsistent_rules:
sys.exit(1)
else:
print("All rules are mapped correctly.")
validate_missing_tests(args, rules_with_tests, missing_regression_tests_path)
check_missing_test_files(missing_files)
print()
if not rules_with_tests:
print("No rules with test data found")
sys.exit(1)