From 6af6ad8ef70e1da44f547fd59d9bb1e1c5424ca5 Mon Sep 17 00:00:00 2001 From: Swachchhanda Shrawan Poudel <87493836+swachchhanda000@users.noreply.github.com> Date: Wed, 10 Dec 2025 14:42:22 +0545 Subject: [PATCH] Merge PR #5803 from @swachchhanda000 - chore: ci: regression test id consistency check chore: ci: regression test id consistency check --------- Co-authored-by: phantinuss <79651203+phantinuss@users.noreply.github.com> --- tests/regression_tests_runner.py | 140 ++++++++++++++++++++++++++++++- 1 file changed, 138 insertions(+), 2 deletions(-) diff --git a/tests/regression_tests_runner.py b/tests/regression_tests_runner.py index 04b22daa2..4f7637967 100644 --- a/tests/regression_tests_runner.py +++ b/tests/regression_tests_runner.py @@ -55,6 +55,7 @@ def load_info_yaml( # Extract test data from regression_tests_info test_data = [] regression_tests = info_data.get("regression_tests_info", []) + rule_metadata = info_data.get("rule_metadata", []) for test in regression_tests: if not isinstance(test, dict): @@ -85,6 +86,11 @@ def load_info_yaml( "provider": test.get("provider", ""), } ) + info_metadata_rule_id = None + for metadata_entry in rule_metadata: + if not isinstance(metadata_entry, dict): + continue + info_metadata_rule_id = metadata_entry.get("id", "") if test_data: results.append( @@ -92,6 +98,7 @@ def load_info_yaml( "path": file_path, "id": rule_id, "tests": test_data, + "info_metadata_rule_id": info_metadata_rule_id, } ) @@ -531,6 +538,128 @@ def print_summary(total_tests: int, passed_tests: int, failures: List[Dict]) -> print("=" * 60) +def check_rule_id_consistency(rules_with_tests: List[Dict]) -> List[Dict]: + """Check if rule IDs are consistent between rule files and their info.yml files. + Also checks if rule IDs match the test file names. + + Returns: + List of dicts containing information about inconsistent rule IDs + """ + inconsistent_rules = [] + + for rule_info in rules_with_tests: + rule_id = rule_info["id"] + info_metadata_rule_id = rule_info.get("info_metadata_rule_id", "") + rule_path = rule_info["path"] + tests = rule_info.get("tests", []) + + # Check rule ID vs info.yml rule_metadata[0].id consistency + if not info_metadata_rule_id: + inconsistent_rules.append( + { + "rule_id": rule_id, + "info_metadata_rule_id": info_metadata_rule_id, + "rule_path": rule_path, + "issue": "missing_info_metadata_rule_id", + "expected": rule_id, + "actual": info_metadata_rule_id, + "message": "info.yml is missing rule_metadata or rule_metadata[0].id", + } + ) + elif rule_id != info_metadata_rule_id: + inconsistent_rules.append( + { + "rule_id": rule_id, + "info_metadata_rule_id": info_metadata_rule_id, + "rule_path": rule_path, + "issue": "rule_vs_info_metadata_mismatch", + "expected": rule_id, + "actual": info_metadata_rule_id, + "message": f"Rule ID '{rule_id}' in rule file does not match " + f"info.yml rule_metadata[0].id '{info_metadata_rule_id}'", + } + ) + + # Check rule ID vs test file name consistency + for test in tests: + test_path = test.get("path", "") + if test_path: + # Extract filename without extension + filename = os.path.basename(test_path) + name_without_ext = os.path.splitext(filename)[0] + file_ext = os.path.splitext(filename)[1].lower() + + # Check if the filename (without extension) matches the rule ID + # Only check for .evtx and .json files (.json is optional conversion of .evtx) + if file_ext in [".evtx", ".json"] and name_without_ext != rule_id: + expected_filename = f"{rule_id}{file_ext}" + inconsistent_rules.append( + { + "rule_id": rule_id, + "test_filename": filename, + "rule_path": rule_path, + "test_path": test_path, + "issue": "rule_vs_testfile_mismatch", + "expected": expected_filename, + "actual": filename, + "message": f"Rule ID '{rule_id}' does not match test file" + f"name '{name_without_ext}' (expected: {rule_id}{file_ext})", + } + ) + + if inconsistent_rules: + print("\nERROR: Found rule ID inconsistencies:") + print("=" * 60) + print() + + # Group by issue type for better readability + rule_vs_info_issues = [ + r + for r in inconsistent_rules + if r.get("issue") + in ["rule_vs_info_metadata_mismatch", "missing_info_metadata_rule_id"] + ] + rule_vs_testfile_issues = [ + r + for r in inconsistent_rules + if r.get("issue") == "rule_vs_testfile_mismatch" + ] + + if rule_vs_info_issues: + print("RULE ID vs INFO.YML RULE_METADATA[0].ID MISMATCHES:") + print("-" * 50) + for inconsistent in rule_vs_info_issues: + print(f"Rule file ID: {inconsistent['rule_id']}") + print( + f"Info.yml rule_metadata[0].id: {inconsistent['info_metadata_rule_id']}" + ) + print(f"Expected: {inconsistent['expected']}") + print(f"Actual: {inconsistent['actual']}") + print(f"Rule file: {inconsistent['rule_path']}") + print(f"Message: {inconsistent['message']}") + print("-" * 50) + print() + + if rule_vs_testfile_issues: + print("RULE ID vs TEST FILE NAME MISMATCHES:") + print("-" * 40) + for inconsistent in rule_vs_testfile_issues: + print(f"Rule ID: {inconsistent['rule_id']}") + print(f"Expected filename: {inconsistent['expected']}") + print(f"Actual filename: {inconsistent['actual']}") + print(f"Rule file: {inconsistent['rule_path']}") + print(f"Test file: {inconsistent['test_path']}") + print(f"{inconsistent['message']}") + print() + + print("<=>" * 20) + print("Rule IDs must match between:") + print("1. Rule files ID and their info.yml rule_metadata[0].id") + print("2. Rule files ID and their test file names (EVTX/JSON files)") + print(" Note: JSON files are optional conversions of EVTX files") + return inconsistent_rules + + def main(): """Main function to run regression tests for Sigma rules.""" args = parse_arguments() @@ -541,12 +670,19 @@ def main(): rules_with_tests, missing_files, missing_regression_tests_path = ( find_rules_with_tests(args.rules_paths) ) - print(f"Found {len(rules_with_tests)} rules with test data") + + print(f"Found {len(rules_with_tests)} rule(s) with regression tests configured.\n") + + print("Checking for consistent rule <--> test mapping...") + inconsistent_rules = check_rule_id_consistency(rules_with_tests) + if inconsistent_rules: + sys.exit(1) + else: + print("All rules are mapped correctly.") validate_missing_tests(args, rules_with_tests, missing_regression_tests_path) check_missing_test_files(missing_files) print() - if not rules_with_tests: print("No rules with test data found") sys.exit(1)