feat: Introduce nnUNet inference pipeline with DICOM conversion, pretrained model installation, and configuration, updating gitignore to exclude archives.

2026-01-27 10:45:12 +08:00 · 2026-01-27 10:45:12 +08:00 · 78e3c012ef
commit 78e3c012ef
parent 2bfa789006
9 changed files with 165 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -162,3 +162,5 @@ cython_debug/
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
 *.pth
 *.zip
--- a/src/2508.yaml
+++ b/src/2508.yaml
@ -0,0 +1,5 @@
 dataset_name_or_id:  2508                    # models trained on NTUH database
 # dataset_name_or_id:  3828                    # models trained on BraTS + NTUH
 nnunet_preprocessed: "./nnUNet/preprocessed" # directory for storing pre-processed data (optional)
 nnunet_raw:          "./nnUNet/raw/"          # directory for storing formated raw data (optional)
 nnunet_results:      "./nnUNet/results"      # directory for storing trained model checkpoints (optional)
--- a/src/2602.yaml
+++ b/src/2602.yaml
@ -0,0 +1,4 @@
 dataset_name_or_id:  2602                    # models trained on BraTS + NTUH
 nnunet_preprocessed: "./nnUNet/preprocessed" # directory for storing pre-processed data (optional)
 nnunet_raw:          "./nnUNet/raw/"          # directory for storing formated raw data (optional)
 nnunet_results:      "./nnUNet/results"      # directory for storing trained model checkpoints (optional)
--- a/src/3828.yaml
+++ b/src/3828.yaml
@ -0,0 +1,5 @@
 # dataset_name_or_id:  2508                    # models trained on NTUH database
 dataset_name_or_id:  3828                    # models trained on BraTS + NTUH
 nnunet_preprocessed: "./nnUNet/preprocessed" # directory for storing pre-processed data (optional)
 nnunet_raw:          "./nnUNet/raw/"          # directory for storing formated raw data (optional)
 nnunet_results:      "./nnUNet/results"      # directory for storing trained model checkpoints (optional)
--- a/src/check_dicom.py
+++ b/src/check_dicom.py
@ -0,0 +1,26 @@
 import pydicom
 import os
 import sys
 filepath = "/mnt/nextcloud/2026/計畫/TSHA_腦腫瘤驗證標記資料/Brain Tumor Label example/1.2.826.0.1.3680043.8.498.16118352232694493510220170548312112804.dcm"
 try:
    ds = pydicom.dcmread(filepath)
    print(f"Transfer Syntax: {ds.file_meta.TransferSyntaxUID}")
    try:
        arr = ds.pixel_array
        print("Successfully accessed pixel_array")
        # Try converting to uncompressed
        ds.decompress()
        print("Successfully decompressed")
    except Exception as e:
        print(f"Error accessing pixel_array or decompressing: {e}")
        # Check for handlers
        import pydicom.uids
        print(f"Available handlers for this syntax: {pydicom.config.pixel_data_handlers}")
 except Exception as e:
    print(f"Error reading file: {e}")
--- a/src/inference.py
+++ b/src/inference.py
@ -0,0 +1,110 @@
 import os
 import shutil
 from monai.apps.nnunet import nnUNetV2Runner
 import subprocess
 DATASET="Dataset2508_CKTarget"
 YAML   ="2508.yaml"
 DATASET="Dataset2602_BraTS-CK"
 YAML   ="2602.yaml"
 DATA_DIR='/mnt/nextcloud/2026/計畫/TSHA_腦腫瘤驗證標記資料/Brain Tumor Label example'
 imagesTs      = f'./nnUNet/raw/{DATASET}/imagesTs/'
 postprocessed = f'./nnUNet/results/{DATASET}/ensemble_predictions_postprocessed'
 OUTPUT_DIR = '/mnt/b4/Public/0'
 OUTPUT_DIR = '/mnt/nextcloud/2026/計畫/TSHA_腦腫瘤驗證標記資料/標記資料'
 def main():
    if not os.path.exists(DATA_DIR):
        print(f"Error: DATA_DIR {DATA_DIR} does not exist.")
    else:
        if os.path.exists(imagesTs):
            shutil.rmtree(imagesTs)
        os.makedirs(imagesTs)
        print(f"Converting DICOM from {DATA_DIR} to NIfTI in {imagesTs}...")
        try:
            # dicom2nifti.convert_directory(DATA_DIR, imagesTs, compression=True, reorient=True)
            cmd = ["dcm2niix", "-o", imagesTs, "-f", "%n", "-z", "y", DATA_DIR]
            print(' '.join(cmd))
            subprocess.run(cmd, check=True)
            print("Conversion completed successfully.")
        except subprocess.CalledProcessError:
            print("dcm2niix failed, likely due to compression. Attempting decompression with gdcmconv...")
            # Create a temporary directory for decompressed files
            temp_dir = os.path.join(imagesTs, "temp_decompressed")
            if os.path.exists(temp_dir):
                shutil.rmtree(temp_dir)
            os.makedirs(temp_dir)
            try:
                # Decompress Dicom files
                # Check for gdcmconv
                if shutil.which("gdcmconv") is None:
                     raise FileNotFoundError("gdcmconv not found. Please install gdcm-tools (e.g. conda install -c conda-forge gdcm).")
                files_processed = 0
                for fname in os.listdir(DATA_DIR):
                    fpath = os.path.join(DATA_DIR, fname)
                    if os.path.isfile(fpath):
                        # Attempt validation/decompression
                        subprocess.run(["gdcmconv", "-w", fpath, os.path.join(temp_dir, fname)], check=False)
                        files_processed += 1
                if files_processed == 0:
                     raise Exception("No files found to decompress.")
                print(f"Decompressed {files_processed} files to {temp_dir}")
                # Retry conversion
                cmd = ["dcm2niix", "-o", imagesTs, "-f", "%n", "-z", "y", temp_dir]
                print(' '.join(cmd))
                subprocess.run(cmd, check=True)
                print("Conversion completed successfully (after decompression).")
            except Exception as e:
                print(f"Fallback conversion failed: {e}")
            finally:
                # Cleanup
                if os.path.exists(temp_dir):
                    shutil.rmtree(temp_dir)
        except Exception as e:
            print(f"Conversion failed: {e}")
    INPUT_LIST = []
    for f in sorted(os.scandir(imagesTs), key=lambda x: x.name):
        if f.name.endswith(".nii.gz"):
            if not f.name.endswith("_0000.nii.gz"):
                new_path = f.path.replace(".nii.gz", "_0000.nii.gz")
                INPUT_LIST.append(new_path)
                os.rename(f.path, new_path)
                print(f.path, new_path)
    runner = nnUNetV2Runner(YAML)
    runner.predict_ensemble_postprocessing()
    for p in INPUT_LIST:
        basename = os.path.basename(p).replace("_0000.nii.gz", ".nii.gz")
        preprocessed = os.path.join(f'./nnUNet/results/{DATASET}/ensemble_predictions_postprocessed/', basename)
        output_file = os.path.join(OUTPUT_DIR, basename)
        shutil.move(preprocessed, output_file)
        print(preprocessed, output_file)
    # clean up
    shutil.rmtree(f'./nnUNet/results/{DATASET}/pred_2d', ignore_errors=True)
    shutil.rmtree(f'./nnUNet/results/{DATASET}/pred_3d_fullres', ignore_errors=True)
    shutil.rmtree(f'./nnUNet/results/{DATASET}/pred_3d_lowres', ignore_errors=True)
    shutil.rmtree(f'./nnUNet/results/{DATASET}/ensemble_predictions')
    shutil.rmtree(f'./nnUNet/results/{DATASET}/ensemble_predictions_postprocessed')
 if __name__ == "__main__":
    main()
--- a/src/input.yaml
+++ b/src/input.yaml
@ -0,0 +1,5 @@
 # dataset_name_or_id:  2508                    # models trained on NTUH database
 dataset_name_or_id:  3828                    # models trained on BraTS + NTUH
 nnunet_preprocessed: "./nnUNet/preprocessed" # directory for storing pre-processed data (optional)
 nnunet_raw:          "./nnUNet/raw"          # directory for storing formated raw data (optional)
 nnunet_results:      "./nnUNet/results"      # directory for storing trained model checkpoints (optional)
--- a/src/install_model.sh
+++ b/src/install_model.sh
@ -0,0 +1,7 @@
 SCRIPT_PATH="$(dirname "$(readlink -f "$0")")"
 export nnUNet_preprocessed=./nnUNet/preprocessed
 export nnUNet_results=./nnUNet/results
 export nnUNet_raw=./nnUNet/rawy
 #nnUNetv2_install_pretrained_model_from_zip $SCRIPT_PATH/3828.zip
 nnUNetv2_install_pretrained_model_from_zip $SCRIPT_PATH/2602.zip
--- a/src/nnUNet
+++ b/src/nnUNet
@ -0,0 +1 @@
 /mnt/b4/Public/nnUNet/