From b31a1b761c86ec3f4a98564b87a10743b43cd33f Mon Sep 17 00:00:00 2001 From: Terrance DeJesus <99630311+terrancedejesus@users.noreply.github.com> Date: Wed, 28 Sep 2022 09:33:49 -0400 Subject: [PATCH] [FR] Re-factor Build Integrations Manifest (#2274) * adjusted how integrations list is created * removed unused import and addressed linting errors * adjusted integration_manifest dictionary to only load latest major * adjusted manifests sourcing from GH to EPR CDN * addressed flake errors * added some additional comments and formatting * updaing integration-manifests file * adjusted test_integration testing * addressed flake errors * Update detection_rules/integrations.py Co-authored-by: Justin Ibarra * Update detection_rules/integrations.py Co-authored-by: Justin Ibarra * added folder unit tests * updated unit test to remove network calls * Update tests/test_all_rules.py Co-authored-by: Mika Ayenson Co-authored-by: Justin Ibarra Co-authored-by: Mika Ayenson --- detection_rules/devtools.py | 10 +-- .../etc/integration-manifests.json.gz | Bin 2162 -> 2206 bytes detection_rules/integrations.py | 64 +++++++----------- detection_rules/rule.py | 2 +- tests/test_all_rules.py | 42 ++++++++---- 5 files changed, 63 insertions(+), 55 deletions(-) diff --git a/detection_rules/devtools.py b/detection_rules/devtools.py index 570d331b4..e10ce9a9c 100644 --- a/detection_rules/devtools.py +++ b/detection_rules/devtools.py @@ -1131,8 +1131,10 @@ def integrations_group(): @integrations_group.command('build-manifests') @click.option('--overwrite', '-o', is_flag=True, help="Overwrite the existing integrations-manifest.json.gz file") -@click.option("--token", required=True, prompt=get_github_token() is None, default=get_github_token(), - help="GitHub token to use for the PR", hide_input=True) -def build_integration_manifests(overwrite: bool, token: str): +def build_integration_manifests(overwrite: bool): """Builds consolidated integrations manifests file.""" - build_integrations_manifest(token, overwrite) + click.echo("loading rules to determine all integration tags") + rules = RuleCollection.default() + integration_tags = list(set([r.contents.metadata.integration for r in rules if r.contents.metadata.integration])) + click.echo(f"integration tags identified: {integration_tags}") + build_integrations_manifest(overwrite, integration_tags) diff --git a/detection_rules/etc/integration-manifests.json.gz b/detection_rules/etc/integration-manifests.json.gz index 63f727a79742968a64a0c54d1038fda00473acba..28039ef74991a07865a2cd80c25ffb3f1c3ba64a 100644 GIT binary patch delta 2201 zcmV;K2xj;45S|eTABzYG!a5pbkq94u?{C^T7{~vWsIP9);K&aGy?0f0nx@lfm3F$l zINd6f7-}`dIorUxt^fPkCQw=+B;Y`PJesryW0E|d?__q}x$&^RQ5(3tDok7R^mqz~tqahWcUuf7g{y4T&_ z)$_CbL5O+4NFvg&Zq%8qkFmfLhIFFL6QilRjr1}yfi7aq^jRlFJj_PwOB^9CNboCp z`7OUvS7128i0c!Kus}Mk*R>mGPrqUI3>syRCvU)@J+^mof$S`c@Ephb#4;}mBRb*g zNM#sPHeG#2fgp1to>rfE!bm)S2_iaM{-h8Qe-Dwr9tgL7Um9m^7-t1Eif|yfrLJX- z8J$`;D?p_XbE-$o`uWrPI+V40#n}xrtLyXN@~RK150J7U+1k~i5avfWc>!kwxMRS{ zf8cU&$f~yy3;{*1H-)26>;&*>JSG9ww(jY+DbYr|>z%{K!zEwWU|SCG z!sbjooUVHg9@|d8;RTm}^LXbx-mtPB#i5lT#_>}9s?l(bm;anqVdc3vz-0#)5By~h z7aO?j%AYxVM1lbG_b|a?pRr6HW6%a$EZ+z zp6HBO50uOI9Zf+zFpPwiFpW!Qes6_X<R`FQn>!_1;UQX5#7)`U{6nx_j(sW)9@G8J-V7@%s&E{nT zuL8V==KafQGOug%{^d59m(w#}-+MLSwQg?HctjBkNj&MuD|I3=#=%n%;m&j-jvGw@ zX#0j|j6>_U0GbVd(27EH%zK*O0%$fsD-O*8Xhor!@1)XiVa?_&KZlq7{R-N#MJz95+%t?eNIJ9np0CR4AUP0P_6PdbsFGdGzHzw@arNHp& z2_rK;)i{hI0sAX(=OyA~&kZ}8h@U%fH;=;{h;=9p(_{rHGj!+_NQHy@Yk|uzrOIh6 zuFOdUr4>OmljnJhN?yqN8_gmipYKea@bYw=3p-PNI61&62PYT&DJLiJr#zfI@TZ)d zz@PH9HNl^Ma)cq%M}hLZ?(2MJCYs7K?VVz(u2R+%)3aDtY>Mfp)ZTO0Z(iQhRR7N_NIkr{N?TCcdyoD@&%%z^1P)4>^-$`5KX*9b)@4$_S38#HMf z?*pMz#<~HE%<`EdM(B+10rks=+q-uafFw z{Him>f{}o)Q&B>eQG)A~gwbzLFTV;Na5#&9Holh{2&QPU&Q#K$EWcVt7A-wJ7KljO zJH_`^IbOEZuBCE+tU*~pG~*KSD}gpRn%VQrX_g9pJymc)h0Cijs6k;++e4`bc&Chw zpI$v^)d<`nH4Z&;%`4=BLKjkKy=PY}Rhr{mo5>Ap@Q^=NujSe(jS$HOk*Y4zHI$@( zI*0@%sp=v*Jt#@l7s+n4NN!U^vO%Qki{yYvRTs%=bam%7LL{#SwVjD|IIG?X)Nk6r zLH+yXe2}DSk_;OBL}`RTLlCIy0@)x?l?A$n5>#1%R1#Y7)!wbWtd%D>BaKv(TrfnU z^``|A!Gn%VdXl$E!;$7a(szghNB@a`bYg_2qcB|-Moi@C7f36~LYD zk(Y`&*~~oSDuC&AMASTq4K8xCKXPYF!ka1jE^~dsBkOY-tYiIh^X2oqP0`QuDy; zgfVR|S^z=YQnk&TOw~+nSNAX3AF;u0ZIyQ0SBe{_^m^PhS3H~xWxHjHdr-z(r+6O} zZ<*o)P`qV|4?*!(Del8_ymg9yd!TsB6!$^#Rw+Jcd#$d)g#3YwTdBSc>bF>Z57ck1 z`onTZN0%X$-~^R+0>=d>*m43D*Gpo|$?{U^+GYxK`O^@q;S()j>g?0jk`peu#p=CBm bYX!*$NGC1z2aukhpZ^C4KW3FEkl_FTNCH6s delta 2156 zcmV-y2$T1o5%LfSABzYG-^1}@kq94uYmeeK6o&swG@o{*Ldg}V)JkYotyWdFBW>0F zP<2Ko7$%yKq_!D$_VV9rJHTXSNC+VzCO%qehlG&ieO}wdCx?@7SN`J^6GZM4M6Z4^ zzFnF9L4P1$%`o*zkgy<*`0~{|vEhW=8`mUw2tvHTGh^cWm{Q{t_E#iekH&|8h~Wi6 z@*_VPSAHDL07z5|vDMe`U#K$S(gH~MGOG^BNjK9$;WCq1V$m}8*D`zx0 zD;VSZcjIOxM|dR>S%9b*kqxcrFd}G02M{^XiVh=!R&)T73$5rdB2$@vt3_o=iB;(9 zieR}%et!GA-b8FtWo9)lQk)(*t2JG7KxL`f!XR(9aG6rv?noJSMoLbQ;+V<#WP3iR zPp|L;AC-g>(WPhN5aN~CU>=j@b{YAM4|42ryG#=jL<>KRSLsI@gJrb$jCALsk(%aW+}GV!77%8~&1iU=l33Pbl0>0dm4` zo!6Zc9H?8jqn7U6G?!yWj|W78ctwpORwCXX_?VUk<+cjy(8c8I5dMDgrv&X)AuuSu z(bFV?VYzRb^TDv%W8tdD!U7h>*I=5mFoDH}h0`7juWzfy!U7f>7H)gvg@P=gk8D|Z zz+%T@1lOHA7Gq$4kzrv1iyaFKSnTr0TWlBJhDG7enPg66HlitBKP3Jt zJZHuemditt^Bw#8`Yhx9DHml52`~OpY&0_xL?})@?=Xl%KVLiph%!@_^CGbT$;&t~ zw36d)Bj||)I?gL3HXu3b-hz%uJQ!YIA{hdb*GNhRQ=jH0-J&3~L1m!$b{&&@njnAs z3uUt~Rq*qFcI(=IOZw_`p2dS-zS(ZVq&^fZppZAit%HIoZbDoL2Cum;aUgM6DGbBY zE5^M-Ho5*lyT|JI{Ng7fjTK325l|*zK$Ssd1FCv%69B5R+2+ln8ucB2_4$~<=Y)_k@G;LliBp@3%p<*L z=s+GR-=bn>T0wYN9QIjwSgdF(f3!;qoz5$CJFd{Ku^-;3C@|+jr?SAp*mD~a^YbOE zN-RzeYhL04omz+@n<@EmbAFd4-O^cl)tnX4;3wA~8qAD5BZlgkku_l`b?5!8Kr8@K zPgZ1qM}fEiq^?}I&H{lkq^8urjsh71kfUlY=m%sBK#r=upcN4L=$bX%riBg)e(RT? z?<9yVF(ZM$EylV;U-?#_@znUVshViKnSB2Aww`HCw#a~}fWQa}!rjsuG5=01%^o?s zcqXg2;;GEN!oDT{gmPtwmeZMp8X>meyI-S!N3ld^X&qNv{vV|a|H(i6P!SEX8?Pu@ z;1^X89+K#Jk=He?>j@x(f)dT(kOB_ngabI+`k&0Rrl*MLZ3`Kn5InzhS$5yFx~MB> zYFSZMajoMMbrtj_cYjrgsw(VuROH-Sy)YG{dRr8HeH{OM9{7H5%b-%Xc)*_a?HQDR z_%T8$H_lMQw`rHMCEA`lX5WfxY}}UH5O3V8Xd?f!Mss>twbIX6s}25X{!c>=Br)i`gE0 zXX|9P17_=Ewg+bGV)hV*aiz?bKF#QVT`+IlcnGj)YCL?GYiayQt6Q2r1^E*)Z*=5@Y7g-lAn_34|OMyJ=&lxs;xT1_pujgu2APBL%eitu$l<~KJdYfbK z)}kb!d>G+)wrdg+e|dcRlz)e3t1$CEKQl&z6w+Jrb6TWtrR7a(@SqHN6Uetl-URZk zkhh@Cw?-b?d|Tu#Am0jk8%CR!$U8v(75Vhc845V&o|qP&#|vwJzE~$`Ggg1Bjn`v+O^7hxu&XWO|8^g^|xbxb;a5`iFGepZ1ut&-I8c_Ut$wOkgbwLuaSo8=sCR^ z0nGYfjvImL>I%~YnDxLs;Q;{<#Hg!$Pur1GPj>@sFrZZfhE>vu_W(b~lLr;CL)6F> z4~RR5IX0McesgvS*%3GB0hdh6AGB=MsBeKk1^$@ekHY?pq5mlE5A+{fh5fMx(0`Qn z$5g-{8~iEo#{_?r_QwK$6!ynbV0?4HpIU!5T(dm`y+dto>pkS+f;kPk;ei_pxiJJc il#N1 None: +def build_integrations_manifest(overwrite: bool, rule_integrations: list) -> None: """Builds a new local copy of manifest.yaml from integrations Github.""" if overwrite: if os.path.exists(MANIFEST_FILE_PATH): os.remove(MANIFEST_FILE_PATH) - rule_integrations = [d.name for d in Path(INTEGRATION_RULE_DIR).glob('*') if d.is_dir()] - if "endpoint" in rule_integrations: - rule_integrations.remove("endpoint") final_integration_manifests = {integration: {} for integration in rule_integrations} - # initialize github client and point to package-storage prod - github = GithubClient(token) - client = github.authenticated_client - organization = client.get_organization("elastic") - repository = organization.get_repo("package-storage") - pkg_storage_prod_branch = repository.get_branch("production") - pkg_storage_branch_sha = pkg_storage_prod_branch.commit.sha - for integration in rule_integrations: - integration_manifests = get_integration_manifests(repository, pkg_storage_branch_sha, - pkg_path=f"packages/{integration}") + integration_manifests = get_integration_manifests(integration) for manifest in integration_manifests: validated_manifest = IntegrationManifestSchema(unknown=EXCLUDE).load(manifest) package_version = validated_manifest.pop("version") @@ -72,6 +59,7 @@ def build_integrations_manifest(token: str, overwrite: bool) -> None: manifest_file = gzip.open(MANIFEST_FILE_PATH, "w+") manifest_file_bytes = json.dumps(final_integration_manifests).encode("utf-8") manifest_file.write(manifest_file_bytes) + print(f"final integrations manifests dumped: {MANIFEST_FILE_PATH}") def find_least_compatible_version(package: str, integration: str, @@ -79,6 +67,11 @@ def find_least_compatible_version(package: str, integration: str, """Finds least compatible version for specified integration based on stack version supplied.""" integration_manifests = {k: v for k, v in sorted(packages_manifest[package].items(), key=Version)} + # trim integration_manifests to only the latest major entries + max_major, *_ = max([Version(manifest_version) for manifest_version in integration_manifests]) + latest_major_integration_manifests = \ + {k: v for k, v in integration_manifests.items() if Version(k)[0] == max_major} + def compare_versions(int_ver: str, pkg_ver: str) -> bool: """Compares integration and package version""" pkg_major, pkg_minor = Version(pkg_ver) @@ -90,33 +83,28 @@ def find_least_compatible_version(package: str, integration: str, compatible = Version(int_ver) <= Version(pkg_ver) return compatible - for version, manifest in integration_manifests.items(): - for kibana_compat_vers in re.sub(r"\>|\<|\=|\^", "", manifest["conditions"]["kibana.version"]).split(" || "): + for version, manifest in latest_major_integration_manifests.items(): + for kibana_compat_vers in re.sub(r"\>|\<|\=|\^", "", manifest["conditions"]["kibana"]["version"]).split(" || "): if compare_versions(kibana_compat_vers, current_stack_version): - return version + return f"^{version}" print(f"no compatible version for integration {package}:{integration}") return None -def get_integration_manifests(repository, sha: str, pkg_path: str) -> list: +def get_integration_manifests(integration: str) -> list: """Iterates over specified integrations from package-storage and combines manifests per version.""" - integration = pkg_path.split("/")[-1] - versioned_packages = repository.get_dir_contents(pkg_path, ref=sha) - versions = [p.path.split("/")[-1] for p in versioned_packages] + epr_search_url = "https://epr.elastic.co/search" - manifests = [] - for version in versions: - contents = repository.get_dir_contents(f"{pkg_path}/{version}", ref=sha) - print(f"Processing {integration} - Version: {version}") + # link for search parameters - https://github.com/elastic/package-registry + epr_search_parameters = {"package": f"{integration}", "prerelease": "true", + "all": "true", "include_policy_templates": "true"} + epr_search_response = requests.get(epr_search_url, params=epr_search_parameters) + epr_search_response.raise_for_status() + manifests = epr_search_response.json() - processing_version = contents[0].path.split("/")[2] - manifest_content = [c for c in contents if "manifest" in c.path] - - if len(manifest_content) < 1: - raise Exception(f"manifest file does not exist for {integration}:{processing_version}") - - path = manifest_content[0].path - manifest_content = yaml.safe_load(repository.get_contents(path, ref=sha).decoded_content.decode()) - manifests.append(manifest_content) + if not manifests: + raise ValueError(f"EPR search for {integration} integration package returned empty list") + print(f"loaded {integration} manifests from the following package versions: " + f"{[manifest['version'] for manifest in manifests]}") return manifests diff --git a/detection_rules/rule.py b/detection_rules/rule.py index fb33b9231..5bf20653e 100644 --- a/detection_rules/rule.py +++ b/detection_rules/rule.py @@ -769,7 +769,7 @@ class TOMLRuleContents(BaseRuleContents, MarshmallowDataclassMixin): # if integration is not a policy template remove if package["version"]: policy_templates = packages_manifest[ - package["package"]][package["version"]]["policy_templates"] + package["package"]][package["version"].strip("^")]["policy_templates"] if package["integration"] not in policy_templates: del package["integration"] diff --git a/tests/test_all_rules.py b/tests/test_all_rules.py index 0cf4cbc26..0211be14e 100644 --- a/tests/test_all_rules.py +++ b/tests/test_all_rules.py @@ -11,17 +11,18 @@ from collections import defaultdict from pathlib import Path import kql - from detection_rules import attack from detection_rules.beats import parse_beats_from_index from detection_rules.packaging import current_stack_version -from detection_rules.rule import QueryRuleData +from detection_rules.rule import (QueryRuleData, TOMLRuleContents, + load_integrations_manifests) from detection_rules.rule_loader import FILE_PATTERN from detection_rules.schemas import definitions from detection_rules.semver import Version -from detection_rules.utils import get_path, load_etc_dump +from detection_rules.utils import INTEGRATION_RULE_DIR, get_path, load_etc_dump from detection_rules.version_lock import default_version_lock from rta import get_available_tests + from .base import BaseRuleTest @@ -440,19 +441,36 @@ class TestRuleMetadata(BaseRuleTest): """Test that rules in integrations folders have matching integration defined.""" failures = [] - for rule in self.production_rules: - rules_path = get_path('rules') - *_, grandparent, parent, _ = rule.path.parts - in_integrations = grandparent == 'integrations' - integration = rule.contents.metadata.get('integration') - has_integration = integration is not None + packages_manifest = load_integrations_manifests() - if (in_integrations or has_integration) and (parent != integration): - err_msg = f'{self.rule_str(rule)}\nintegration: {integration}\npath: {rule.path.relative_to(rules_path)}' # noqa: E501 + for rule in self.production_rules: + rule_integration = rule.contents.metadata.get('integration') + + # checks if metadata tag matches from a list of integrations in EPR + if rule_integration and rule_integration not in packages_manifest.keys(): + err_msg = f"{self.rule_str(rule)} integration '{rule_integration}' unknown" failures.append(err_msg) + # checks if the rule path matches the intended integration + valid_integration_folders = [p.name for p in list(Path(INTEGRATION_RULE_DIR).glob("*"))] + if rule_integration and rule_integration in valid_integration_folders: + if rule_integration != rule.path.parent.name: + err_msg = f'{self.rule_str(rule)} {rule_integration} tag, but path is {rule.path.parent.name}' + failures.append(err_msg) + + # checks if event.dataset exists in query object and a tag exists in metadata + if isinstance(rule.contents.data, QueryRuleData) and rule.contents.data.language != 'lucene': + trc = TOMLRuleContents(rule.contents.metadata, rule.contents.data) + package_integrations = trc._get_packaged_integrations(packages_manifest) + if package_integrations and not rule_integration: + err_msg = f'{self.rule_str(rule)} integration tag should exist: ' + if failures: - err_msg = 'The following rules have missing/incorrect integrations or are not in an integrations folder:\n' + err_msg = """ + The following rules have missing or invalid integrations tags. + Try updating the integrations manifest file: + - `python -m detection_rules dev integrations build-manifests`\n + """ self.fail(err_msg + '\n'.join(failures)) def test_rule_demotions(self):