elixir-europe · bedroesb · May 27, 2026 · May 27, 2026 · May 27, 2026 · May 27, 2026
diff --git a/.github/workflows/multi-omics-submission-test.yaml b/.github/workflows/multi-omics-submission-test.yaml
@@ -17,7 +17,7 @@ jobs:
 
       # Paths for MARS repo and ISA template
       REPOSITORY_SERVICES_PATH: ${{ github.workspace }}/MARS/repository-services
-      ISA_TEMPLATE_PATH: ${{ github.workspace }}/MARS/test-data/biosamples-input-isa.json
+      ISA_TEMPLATE_PATH: ${{ github.workspace }}/MARS/test-data/biosamples-input-isa-multi.json
 
       # Credentials from GitHub secrets
       WEBIN_USERNAME: ${{ secrets.WEBIN_USERNAME }}

diff --git a/MARS b/MARS
diff --git a/README.md b/README.md
@@ -404,5 +404,3 @@ python mars_cli.py --credential-service-name metabolights  --username-credential
 
 [To set up and run the MARS tool locally using Docker, follow these steps](../repository-services/README.md)
 
-
-
diff --git a/mars_lib/isa_json.py b/mars_lib/isa_json.py
@@ -484,29 +484,26 @@ def update_isa_json(isa_json: IsaJson, repo_response: RepositoryResponse) -> Isa
 
             add_accession_to_data_file_node(updated_node, accession.value)
         else:
-            # Add study accession to study comments
             updated_study = apply_filter(study_filter, investigation.studies)
-
-            study_accession_comment: Comment = Comment(
+            accession_comment = Comment(
                 name=f"{target_repository}_{target_level}_accession",
                 value=accession.value,
             )
-            updated_study.comments.append(study_accession_comment)
-
-            # Add study accession to assay comments
-            updated_assay = next(
-                filter(
-                    lambda assay: is_assay_for_target_repo(assay, target_repository),
-                    updated_study.assays,
-                ),
-                None,
-            )
-            if updated_assay:
-                assay_accession_comment: Comment = Comment(
-                    name=f"{target_repository}_{target_level}_accession",
-                    value=accession.value,
+
+            if target_level == "study":
+                updated_study.comments.append(accession_comment)
+            else:
+                updated_assay = next(
+                    filter(
+                        lambda assay: is_assay_for_target_repo(
+                            assay, target_repository
+                        ),
+                        updated_study.assays,
+                    ),
+                    None,
                 )
-                updated_assay.comments.append(assay_accession_comment)
+                if updated_assay:
+                    updated_assay.comments.append(accession_comment)
     isa_json.investigation = investigation
     return isa_json
 
@@ -525,10 +522,13 @@ def map_data_files_to_repositories(
     for assay in assays:
         target_repo_comment: Comment = detect_target_repo_comment(assay.comments)
         # This is an effect of everything being optional in the Comment model.
-        # Should we decide to make the value mandatory, this guard clause would not be necessary anymore.
+        # Should we decide to make the value mandatory, this guard clause
+        # would not be necessary anymore.
         if target_repo_comment.value is None:
             raise ValueError(
-                f"At least one assay in the ISA-JSON has no '{TARGET_REPO_KEY}' comment. Mapping not possible. Make sure all assays in the ISA-JSON have this comment!"
+                f"At least one assay in the ISA-JSON has no "
+                f"'{TARGET_REPO_KEY}' comment. Mapping not possible. "
+                f"Make sure all assays in the ISA-JSON have this comment!"
             )
         assay_data_files = [df.name for df in assay.dataFiles]
 
@@ -555,7 +555,11 @@ def map_data_files_to_repositories(
 
     [
         print_and_log(
-            msg=f"File '{rf['short_name']}' could not be mapped to any data file in the ISA-JSON. For this reason, it will be skipped during submission!",
+            msg=(
+                f"File '{rf['short_name']}' could not be mapped to any data "
+                f"file in the ISA-JSON. For this reason, it will be skipped "
+                f"during submission!"
+            ),
             level="warning",
         )
         for rf in remaining_files

diff --git a/scripts/isa_generator.py b/scripts/isa_generator.py
@@ -40,34 +40,33 @@ def _md5_of_file(path: Path) -> str:
     return h.hexdigest()
 
 
-def _get_first_assay(isa_obj: dict[str, Any]) -> dict[str, Any] | None:
+def _get_all_assays(isa_obj: dict[str, Any]) -> List[dict[str, Any]]:
     """
-    Navigate to investigation.studies[0].assays[0] (if present).
+    Return all assays found under investigation.studies[*].assays[*].
     """
     inv = isa_obj.get("investigation")
     if inv is None:
         inv = isa_obj
 
     if not isinstance(inv, dict):
-        return None
+        return []
 
     studies = inv.get("studies") or []
-    if not isinstance(studies, list) or not studies:
-        return None
+    if not isinstance(studies, list):
+        return []
 
-    first_study = studies[0]
-    if not isinstance(first_study, dict):
-        return None
+    assays: List[dict[str, Any]] = []
+    for study in studies:
+        if not isinstance(study, dict):
+            continue
 
-    assays = first_study.get("assays") or []
-    if not isinstance(assays, list) or not assays:
-        return None
+        study_assays = study.get("assays") or []
+        if not isinstance(study_assays, list):
+            continue
 
-    first_assay = assays[0]
-    if not isinstance(first_assay, dict):
-        return None
+        assays.extend(assay for assay in study_assays if isinstance(assay, dict))
 
-    return first_assay
+    return assays
 
 
 def _ensure_comment(comments: List[dict[str, Any]], name: str, value: str) -> None:
@@ -83,22 +82,22 @@ def _ensure_comment(comments: List[dict[str, Any]], name: str, value: str) -> No
 
 
 def _update_datafiles_with_generated_files(
-    assay: dict[str, Any],
+    assays: List[dict[str, Any]],
     data_dir: Path,
-    n_files: int,
+    n_files: int | None,
 ) -> List[Path]:
     """
-    For the first assay, update its dataFiles entries with newly generated .fastq.gz files.
+    Update assay dataFiles entries with newly generated .fastq.gz files.
 
-    Behaviour per dataFiles[i] (for i < n_files):
+    Behaviour per touched data file:
 
       - Generate a unique .fastq.gz file based on the existing 'name':
           e.g. ENA_TEST2.R2.fastq.gz -> ENA_TEST2.R2_<suffix>.fastq.gz
         (if name doesn't end with .fastq.gz, just append _<suffix>.fastq.gz)
 
       - Write a dummy FASTQ into that file and compute its MD5.
 
-      - Update the dataFiles[i] object:
+      - Update the dataFiles entry:
           * "name" = new file name
           * in "comments":
               - "file name"       -> new file name
@@ -107,66 +106,65 @@ def _update_datafiles_with_generated_files(
               - "checksum_method" -> "MD5"
             (existing "accession", "submission date", etc. are kept as-is)
     """
-    data_files_json = assay.get("dataFiles") or []
-    if not isinstance(data_files_json, list):
-        return []
-
     generated_paths: List[Path] = []
     suffix = _timestamp_suffix()
+    updated_count = 0
 
-    # We only touch up to n_files entries, and only those that look like objects
-    for i, df_json in enumerate(data_files_json):
-        if i >= n_files:
-            break
-        if not isinstance(df_json, dict):
+    for assay in assays:
+        data_files_json = assay.get("dataFiles") or []
+        if not isinstance(data_files_json, list):
             continue
 
-        original_name = df_json.get("name")
-        if not isinstance(original_name, str) or not original_name:
-            continue
+        for df_json in data_files_json:
+            if n_files is not None and updated_count >= n_files:
+                return generated_paths
+            if not isinstance(df_json, dict):
+                continue
+
+            original_name = df_json.get("name")
+            if not isinstance(original_name, str) or not original_name:
+                continue
 
-        # Build unique .fastq.gz name
-        if original_name.endswith(".fastq.gz"):
-            base = original_name[:-len(".fastq.gz")]
-            new_name = f"{base}_{suffix}.fastq.gz"
-        else:
-            new_name = f"{original_name}_{suffix}.fastq.gz"
+            if original_name.endswith(".fastq.gz"):
+                base = original_name[:-len(".fastq.gz")]
+                new_name = f"{base}_{suffix}.fastq.gz"
+            else:
+                new_name = f"{original_name}_{suffix}.fastq.gz"
 
-        file_path = data_dir / new_name
-        _write_dummy_fastq_gz(file_path)
-        md5 = _md5_of_file(file_path)
+            file_path = data_dir / new_name
+            _write_dummy_fastq_gz(file_path)
+            md5 = _md5_of_file(file_path)
 
-        # Update the JSON entry
-        df_json["name"] = new_name
+            df_json["name"] = new_name
 
-        comments = df_json.get("comments")
-        if not isinstance(comments, list):
-            comments = []
-            df_json["comments"] = comments
+            comments = df_json.get("comments")
+            if not isinstance(comments, list):
+                comments = []
+                df_json["comments"] = comments
 
-        _ensure_comment(comments, "file name", new_name)
-        _ensure_comment(comments, "file type", "fastq")
-        _ensure_comment(comments, "file checksum", md5)
-        _ensure_comment(comments, "checksum_method", "MD5")
-        # DO NOT touch 'accession' or 'submission date' if present
+            _ensure_comment(comments, "file name", new_name)
+            _ensure_comment(comments, "file type", "fastq")
+            _ensure_comment(comments, "file checksum", md5)
+            _ensure_comment(comments, "checksum_method", "MD5")
 
-        generated_paths.append(file_path)
+            generated_paths.append(file_path)
+            updated_count += 1
 
     return generated_paths
 
 
 def generate_isa_json_with_data(
     work_dir: Path,
     template_path: Path,
-    n_files: int = 2,
+    n_files: int | None = None,
 ) -> Tuple[Path, List[Path]]:
     """
     PoC behaviour:
 
       1. Load ISA-JSON template from template_path.
-      2. Find investigation.studies[0].assays[0].dataFiles.
-      3. For up to n_files entries in dataFiles, generate UNIQUE .fastq.gz files
-         and update:
+      2. Find all investigation.studies[*].assays[*].dataFiles.
+      3. For each data file (or the first n_files when limited), generate UNIQUE
+         .fastq.gz files and update:
            - dataFiles[i]["name"]
            - dataFiles[i]["comments"] entries for file name, type, checksum, method.
       4. Write the resulting ISA-JSON to work_dir / 'isa.json'.
@@ -177,12 +175,12 @@ def generate_isa_json_with_data(
 
     isa_obj = json.loads(template_path.read_text())
 
-    assay = _get_first_assay(isa_obj)
+    assays = _get_all_assays(isa_obj)
     generated_paths: List[Path] = []
-    if assay is not None:
+    if assays:
         data_dir = work_dir / "data"
         generated_paths = _update_datafiles_with_generated_files(
-            assay=assay,
+            assays=assays,
             data_dir=data_dir,
             n_files=n_files,
         )

diff --git a/scripts/prepare_poc_submission.py b/scripts/prepare_poc_submission.py
@@ -125,7 +125,6 @@ def main() -> None:
     isa_path, data_files = generate_isa_json_with_data(
         work_dir=work_dir,
         template_path=isa_template,
-        n_files=2,
     )
 
     cred_path = write_credentials_json(work_dir)

diff --git a/tests/fixtures/mars_receipts/biosamples_success_response.json b/tests/fixtures/mars_receipts/biosamples_success_response.json
@@ -13,7 +13,7 @@
                     "key": "studies",
                     "where": {
                         "key": "title",
-                        "value": "Arabidopsis thaliana"
+                        "value": "Integrated multi-omics profiling of Arabidopsis thaliana under controlled experimental conditions"
                     }
                 },
                 {
@@ -38,7 +38,7 @@
                     "key": "studies",
                     "where": {
                         "key": "title",
-                        "value": "Arabidopsis thaliana"
+                        "value": "Integrated multi-omics profiling of Arabidopsis thaliana under controlled experimental conditions"
                     }
                 },
                 {

diff --git a/tests/test_isa_json.py b/tests/test_isa_json.py
@@ -35,7 +35,10 @@ def test_load_isa_json():
     # Should test the validation process of the ISA JSON file where root has 'investigation' as key.
     valid_isa_json02 = load_isa_json("MARS/test-data/biosamples-input-isa.json", False)
     assert len(valid_isa_json02.investigation.studies) == 1
-    assert valid_isa_json02.investigation.studies[0].title == "Arabidopsis thaliana"
+    assert (
+        valid_isa_json02.investigation.studies[0].title
+        == "Integrated multi-omics profiling of Arabidopsis thaliana under controlled experimental conditions"
+    )
 
     with pytest.raises(ValidationError):
         load_isa_json("./tests/fixtures/invalid_investigation.json", True)
@@ -245,7 +248,7 @@ def test_update_study_materials_with_accession_categories():
     )
 
 
-def test_update_study_and_assay_with_ena_study_accession_comment():
+def test_update_study_only_with_ena_study_accession_comment():
     json_path = "tests/fixtures/isa_jsons/1_after_biosamples.json"
     isa_json = load_isa_json(json_path, False)
     response_file_path = "tests/fixtures/mars_receipts/ena_success_response.json"
@@ -267,10 +270,10 @@ def test_update_study_and_assay_with_ena_study_accession_comment():
         None,
     )
     assay_comments = ena_assay.comments
-    accession_comment = filter(
-        lambda x: x.name == "ena_study_accession", assay_comments
-    )
-    assert next(accession_comment).value == ena_study_accession_number
+    accession_comments = [
+        comment for comment in assay_comments if comment.name == "ena_study_accession"
+    ]
+    assert accession_comments == []
 
 
 def test_update_datafile_comment_with_accession_comment_present():
+7 −31		...isajson-ena/src/main/java/com/elixir/biohackaton/ISAToSRA/controller/WebinIsaToXmlSubmissionController.java
+149 −4		...tory-services/isajson-ena/src/main/java/com/elixir/biohackaton/ISAToSRA/sra/service/MarsReceiptService.java
+414 −162		...rvices/isajson-ena/src/main/java/com/elixir/biohackaton/ISAToSRA/sra/service/WebinExperimentXmlCreator.java
+0 −50		...-services/isajson-ena/src/main/java/com/elixir/biohackaton/ISAToSRA/sra/service/WebinProjectXmlCreator.java
+77 −30		...tory-services/isajson-ena/src/main/java/com/elixir/biohackaton/ISAToSRA/sra/service/WebinRunXmlCreator.java
+169 −55		...ry-services/isajson-ena/src/main/java/com/elixir/biohackaton/ISAToSRA/sra/service/WebinStudyXmlCreator.java
+28 −0		...itory-services/isajson-ena/src/test/java/com/elixir/biohackaton/ISAToSRA/WebinExperimentXmlCreatorTest.java
+40 −12		repository-services/isajson-ena/src/test/java/com/elixir/biohackaton/ISAToSRA/WebinRunXmlCreatorTest.java
+60 −4		repository-services/receipt/src/main/java/com/elixir/biohackaton/ISAToSRA/receipt/MarsReceiptProvider.java
+1,617 −0		test-data/biosamples-input-isa-multi.json
+126 −78		test-data/biosamples-input-isa.json
+81 −17		test-data/biosamples-modified-isa.json
+2 −2		test-data/biosamples-original-isa-no-accesion-char.json
+87 −23		test-data/biosamples-original-isa.json
+3 −3		test-data/ena-receipt-invalid.json
+2 −2		test-data/ena-receipt.json
+2 −2		test-data/mars-ena-receipt-invalid.json
+8 −1		test-data/mars-ena-receipt.json
Original file line number	Diff line number	Diff line change
Expand Up		@@ -404,5 +404,3 @@ python mars_cli.py --credential-service-name metabolights --username-credential

		[To set up and run the MARS tool locally using Docker, follow these steps](../repository-services/README.md)