Readers

The readers below are used to read your raw spatial data into a SpatialData object. Choose the right function below, according to your technology of interest.

Warning

Due to many updates in the data format provided by the different companies, you might have issues loading your data. In this case, consider opening an issue detailing the version of the machine you used and the error log, as well as an example of file names that you are trying to read.

`sopa.io.xenium(path, image_models_kwargs=None, imread_kwargs=None, cells_boundaries=False, cells_table=False, nucleus_labels=False, cells_labels=False, cells_as_circles=False, nucleus_boundaries=False, qv_threshold=None, **kwargs)`

Read Xenium data as a SpatialData object. For more information, refer to spatialdata-io.

This function reads the following files

transcripts.parquet: transcripts locations and names
experiment.xenium: metadata file
morphology_focus.ome.tif: morphology image (or a directory, for recent versions of the Xenium)

Parameters:

Name	Type	Description	Default
`path`	`str \| Path`	Path to the Xenium directory containing all the experiment files	required
`image_models_kwargs`	`dict \| None`	Keyword arguments passed to `spatialdata.models.Image2DModel`.	`None`
`imread_kwargs`	`dict \| None`	Keyword arguments passed to `dask_image.imread.imread`.	`None`
`cells_boundaries`	`int`	Whether to read cell boundaries	`False`
`cells_table`	`int`	Whether to read cell table	`False`
`nucleus_labels`	`int`	Whether to read nucleus labels	`False`
`cells_labels`	`int`	Whether to read cell labels	`False`
`cells_as_circles`	`int`	Whether to read cells as circles	`False`
`nucleus_boundaries`	`int`	Whether to read nucleus boundaries	`False`
`qv_threshold`	`int \| None`	Whether to add a "low_quality_transcript" column to transcripts. Transcripts with a QV value below `qv_threshold` will not be used during segmentation.	`None`
`kwargs`	`int`	Additional keyword arguments passed to `spatialdata_io.xenium	`{}`

Returns:

Type	Description
`SpatialData`	A `SpatialData` object representing the Xenium experiment

Source code in sopa/io/reader/xenium.py

def xenium(
    path: str | Path,
    image_models_kwargs: dict | None = None,
    imread_kwargs: dict | None = None,
    cells_boundaries: int = False,
    cells_table: int = False,
    nucleus_labels: int = False,
    cells_labels: int = False,
    cells_as_circles: int = False,
    nucleus_boundaries: int = False,
    qv_threshold: int | None = None,
    **kwargs: int,
) -> SpatialData:
    """Read Xenium data as a `SpatialData` object. For more information, refer to [spatialdata-io](https://spatialdata.scverse.org/projects/io/en/latest/generated/spatialdata_io.xenium.html).

    This function reads the following files:
        - `transcripts.parquet`: transcripts locations and names
        - `experiment.xenium`: metadata file
        - `morphology_focus.ome.tif`: morphology image (or a directory, for recent versions of the Xenium)


    Args:
        path: Path to the Xenium directory containing all the experiment files
        image_models_kwargs: Keyword arguments passed to `spatialdata.models.Image2DModel`.
        imread_kwargs: Keyword arguments passed to `dask_image.imread.imread`.
        cells_boundaries: Whether to read cell boundaries
        cells_table: Whether to read cell table
        nucleus_labels: Whether to read nucleus labels
        cells_labels: Whether to read cell labels
        cells_as_circles: Whether to read cells as circles
        nucleus_boundaries: Whether to read nucleus boundaries
        qv_threshold: Whether to add a "low_quality_transcript" column to transcripts. Transcripts with a QV value below `qv_threshold` will not be used during segmentation.
        kwargs: Additional keyword arguments passed to `spatialdata_io.xenium

    Returns:
        A `SpatialData` object representing the Xenium experiment
    """
    from spatialdata_io.readers.xenium import xenium as xenium_spatialdata_io

    image_models_kwargs, imread_kwargs = _default_image_kwargs(image_models_kwargs, imread_kwargs)

    sdata: SpatialData = xenium_spatialdata_io(
        path,
        cells_table=cells_table,
        nucleus_labels=nucleus_labels,
        cells_labels=cells_labels,
        cells_as_circles=cells_as_circles,
        nucleus_boundaries=nucleus_boundaries,
        cells_boundaries=cells_boundaries,
        image_models_kwargs=image_models_kwargs,
        imread_kwargs=imread_kwargs,
        **kwargs,
    )

    if "table" in sdata.tables:
        sdata["table"].uns[ATTRS_KEY]["region"] = "cell_boundaries"
        sdata["table"].obs["region"] = "cell_boundaries"
        sdata["table"].obs["region"] = sdata["table"].obs["region"].astype("category")

    ensure_string_channel_names(sdata)

    ### Add Sopa attributes to detect the spatial elements
    if "morphology_focus" in sdata.images:
        sdata.attrs[SopaAttrs.CELL_SEGMENTATION] = "morphology_focus"

    if "he_image" in sdata.images:
        sdata.attrs[SopaAttrs.TISSUE_SEGMENTATION] = "he_image"

    if "transcripts" in sdata.points:
        sdata.attrs[SopaAttrs.TRANSCRIPTS] = "transcripts"
        if qv_threshold:
            sdata.points["transcripts"][SopaKeys.LOW_QUALITY_TRANSCRIPT_KEY] = (
                sdata.points["transcripts"].qv < qv_threshold
            )

        if "cell_id" in sdata.points["transcripts"].columns:
            sdata.attrs[SopaAttrs.PRIOR_TUPLE_KEY] = ["cell_id", "UNASSIGNED"]

        if qv_threshold is not None:
            assert "qv" in sdata.points["transcripts"].columns, "QV column not found in `sdata['transcripts']`"

            sdata.points["transcripts"][SopaKeys.LOW_QUALITY_TRANSCRIPT_KEY] = (
                sdata.points["transcripts"]["qv"] < qv_threshold
            )

    sdata.attrs[SopaAttrs.XENIUM_OUTPUT_PATH] = str(Path(path).resolve())

    return sdata

`sopa.io.merscope(path, backend=None, z_layers=3, region_name=None, slide_name=None, image_models_kwargs=None, imread_kwargs=None, **kwargs)`

Read MERSCOPE data as a SpatialData object. For more information, refer to spatialdata-io.

This function reads the following files

detected_transcripts.csv: transcripts locations and names
all the images under the images directory
images/micron_to_mosaic_pixel_transform.csv: affine transformation

Parameters:

Name	Type	Description	Default
`path`	`str \| Path`	Path to the MERSCOPE directory containing all the experiment files	required
`backend`	`Literal['dask_image', 'rioxarray'] \| None`	Either `"dask_image"` or `"rioxarray"` (the latter uses less RAM, but requires `rioxarray` to be installed). By default, uses `"rioxarray"` if and only if the `rioxarray` library is installed.	`None`
`z_layers`	`int \| list[int] \| None`	Indices of the z-layers to consider. Either one `int` index, or a list of `int` indices. If `None`, then no image is loaded. By default, only the middle layer is considered (that is, layer 3).	`3`
`region_name`	`str \| None`	Name of the region of interest, e.g., `'region_0'`. If `None` then the name of the `path` directory is used.	`None`
`slide_name`	`str \| None`	Name of the slide/run. If `None` then the name of the parent directory of `path` is used (whose name starts with a date).	`None`
`image_models_kwargs`	`dict \| None`	Keyword arguments passed to `spatialdata.models.Image2DModel`.	`None`
`imread_kwargs`	`dict \| None`	Keyword arguments passed to `dask_image.imread.imread`.	`None`

Returns:

Type	Description
`SpatialData`	A `SpatialData` object representing the MERSCOPE experiment

Source code in sopa/io/reader/merscope.py

def merscope(
    path: str | Path,
    backend: Literal["dask_image", "rioxarray"] | None = None,
    z_layers: int | list[int] | None = 3,
    region_name: str | None = None,
    slide_name: str | None = None,
    image_models_kwargs: dict | None = None,
    imread_kwargs: dict | None = None,
    **kwargs: int,
) -> SpatialData:
    """Read MERSCOPE data as a `SpatialData` object. For more information, refer to [spatialdata-io](https://spatialdata.scverse.org/projects/io/en/latest/generated/spatialdata_io.merscope.html).

    This function reads the following files:
        - `detected_transcripts.csv`: transcripts locations and names
        - all the images under the `images` directory
        - `images/micron_to_mosaic_pixel_transform.csv`: affine transformation

    Args:
        path: Path to the MERSCOPE directory containing all the experiment files
        backend: Either `"dask_image"` or `"rioxarray"` (the latter uses less RAM, but requires `rioxarray` to be installed). By default, uses `"rioxarray"` if and only if the `rioxarray` library is installed.
        z_layers: Indices of the z-layers to consider. Either one `int` index, or a list of `int` indices. If `None`, then no image is loaded. By default, only the middle layer is considered (that is, layer 3).
        region_name: Name of the region of interest, e.g., `'region_0'`. If `None` then the name of the `path` directory is used.
        slide_name: Name of the slide/run. If `None` then the name of the parent directory of `path` is used (whose name starts with a date).
        image_models_kwargs: Keyword arguments passed to `spatialdata.models.Image2DModel`.
        imread_kwargs: Keyword arguments passed to `dask_image.imread.imread`.

    Returns:
        A `SpatialData` object representing the MERSCOPE experiment
    """
    from spatialdata_io.readers.merscope import merscope as merscope_spatialdata_io

    image_models_kwargs, imread_kwargs = _default_image_kwargs(image_models_kwargs, imread_kwargs)

    sdata: SpatialData = merscope_spatialdata_io(
        path,
        backend=backend,
        z_layers=z_layers,
        region_name=region_name,
        slide_name=slide_name,
        image_models_kwargs=image_models_kwargs,
        imread_kwargs=imread_kwargs,
        cells_boundaries=False,
        cells_table=False,
        **kwargs,
    )

    ### Add Sopa attributes to detect the spatial elements
    if z_layers is not None:
        if not isinstance(z_layers, int) and len(z_layers) == 1:
            z_layers = z_layers[0]
        if isinstance(z_layers, int):
            for key in sdata.images:
                if key.endswith(f"_z{z_layers}"):
                    sdata.attrs[SopaAttrs.CELL_SEGMENTATION] = key
        else:
            log.warning(
                f"Multiple z-layers provided: {z_layers}. Not deciding which image should be used for cell segmentation."
            )

    for key in sdata.points:
        if key.endswith("_transcripts"):
            sdata.attrs[SopaAttrs.TRANSCRIPTS] = key

            if "cell_id" in sdata.points[key].columns:
                sdata.attrs[SopaAttrs.PRIOR_TUPLE_KEY] = ["cell_id", -1]
            break

    return sdata

`sopa.io.visium_hd(path, fullres_image_file=None, image_models_kwargs=None, imread_kwargs=None, var_names_make_unique=True, **kwargs)`

Read Visium HD data as a SpatialData object. For more information, refer to spatialdata-io.

Info

If your fullres_image_file is not in the microscope_image directory, you can specify the path to the image file using the fullres_image_file argument.

Parameters:

Name	Type	Description	Default
`path`	`str \| Path`	Path to the Visium HD directory containing all the experiment files	required
`fullres_image_file`	`str \| Path \| None`	Path to the full-resolution image. By default the image is searched in the `'microscope_image'` directory.	`None`
`image_models_kwargs`	`dict \| None`	Keyword arguments passed to `spatialdata.models.Image2DModel`.	`None`
`imread_kwargs`	`dict \| None`	Keyword arguments passed to `dask_image.imread.imread`.	`None`
`var_names_make_unique`	`bool`	If True, ensure that the var names are unique.	`True`
`kwargs`	`int`	Additional keyword arguments passed to `spatialdata_io.visium_hd`.	`{}`

Returns:

Type	Description
`SpatialData`	A `SpatialData` object representing the Xenium experiment

Source code in sopa/io/reader/visium_hd.py

def visium_hd(
    path: str | Path,
    fullres_image_file: str | Path | None = None,
    image_models_kwargs: dict | None = None,
    imread_kwargs: dict | None = None,
    var_names_make_unique: bool = True,
    **kwargs: int,
) -> SpatialData:
    """Read Visium HD data as a `SpatialData` object. For more information, refer to [spatialdata-io](https://spatialdata.scverse.org/projects/io/en/latest/generated/spatialdata_io.visium_hd.html).

    !!! info
        If your `fullres_image_file` is not in the `microscope_image` directory, you can specify the path to the image file using the `fullres_image_file` argument.

    Args:
        path: Path to the Visium HD directory containing all the experiment files
        fullres_image_file: Path to the full-resolution image. By default the image is searched in the `'microscope_image'` directory.
        image_models_kwargs: Keyword arguments passed to `spatialdata.models.Image2DModel`.
        imread_kwargs: Keyword arguments passed to `dask_image.imread.imread`.
        var_names_make_unique: If True, ensure that the var names are unique.
        kwargs: Additional keyword arguments passed to `spatialdata_io.visium_hd`.

    Returns:
        A `SpatialData` object representing the Xenium experiment
    """
    from spatialdata_io.readers.visium_hd import visium_hd as visium_hd_spatialdata_io

    image_models_kwargs, imread_kwargs = _default_image_kwargs(image_models_kwargs, imread_kwargs)

    del image_models_kwargs["scale_factors"]  # already set in the spatialdata_io reader

    sdata: SpatialData = visium_hd_spatialdata_io(
        path,
        fullres_image_file=fullres_image_file,
        image_models_kwargs=image_models_kwargs,
        imread_kwargs=imread_kwargs,
        **kwargs,
    )

    ensure_string_channel_names(sdata)  # Ensure that channel names are strings

    ### Add Sopa attributes to detect the spatial elements
    for key in sdata.images:
        if key.endswith("_full_image"):
            sdata.attrs[SopaAttrs.CELL_SEGMENTATION] = key
        elif key.endswith("_hires_image"):
            sdata.attrs[SopaAttrs.TISSUE_SEGMENTATION] = key

    _sanity_check_images(sdata)

    for key, adata in sdata.tables.items():
        if key.endswith("_002um"):
            sdata.attrs[SopaAttrs.BINS_TABLE] = key
        if var_names_make_unique:
            adata.var_names_make_unique()

    for key in sdata.shapes:
        if key.endswith("_002um"):
            shapes_bounding_box(sdata, key)
            break

    return sdata

`sopa.io.cosmx(path, dataset_id=None, fov=None, read_proteins=False, image_models_kwargs=None, imread_kwargs=None, flip_image=False)`

Read Cosmx Nanostring data. The fields of view are stitched together, except if fov is provided.

This function reads the following files

*_fov_positions_file.csv or *_fov_positions_file.csv.gz: FOV locations
Morphology2D directory: all the FOVs morphology images
*_tx_file.csv.gz or *_tx_file.csv: Transcripts location and names
If read_proteins is True, all the images under the nested ProteinImages directories will be read

These files must be exported as flat files in AtomX. That is: within a study, click on "Export" and then select files from the "Flat CSV Files" section (transcripts flat and FOV position flat).

Parameters:

Name	Type	Description	Default
`path`	`str \| Path`	Path to the root directory containing Nanostring files.	required
`dataset_id`	`str \| None`	Optional name of the dataset (needs to be provided if not inferred).	`None`
`fov`	`int \| None`	Number of one single field of view to be read. If not provided, reads all FOVs and create a stitched image.	`None`
`read_proteins`	`bool`	Whether to read the proteins or the transcripts.	`False`
`image_models_kwargs`	`dict \| None`	Keyword arguments passed to `spatialdata.models.Image2DModel`.	`None`
`imread_kwargs`	`dict \| None`	Keyword arguments passed to `dask_image.imread.imread`.	`None`
`flip_image`	`bool`	For some buggy exports of AtomX 1.3.2, `flip_image=True` has to be used for stitching. See this issue.	`False`

Returns:

Type	Description
`SpatialData`	A `SpatialData` object representing the CosMX experiment

Source code in sopa/io/reader/cosmx.py

def cosmx(
    path: str | Path,
    dataset_id: str | None = None,
    fov: int | None = None,
    read_proteins: bool = False,
    image_models_kwargs: dict | None = None,
    imread_kwargs: dict | None = None,
    flip_image: bool = False,
) -> SpatialData:
    """
    Read *Cosmx Nanostring* data. The fields of view are stitched together, except if `fov` is provided.

    This function reads the following files:
        - `*_fov_positions_file.csv` or `*_fov_positions_file.csv.gz`: FOV locations
        - `Morphology2D` directory: all the FOVs morphology images
        - `*_tx_file.csv.gz` or `*_tx_file.csv`: Transcripts location and names
        - If `read_proteins` is `True`, all the images under the nested `ProteinImages` directories will be read

        These files must be exported as flat files in AtomX. That is: within a study, click on "Export" and then select files from the "Flat CSV Files" section (transcripts flat and FOV position flat).

    Args:
        path: Path to the root directory containing *Nanostring* files.
        dataset_id: Optional name of the dataset (needs to be provided if not inferred).
        fov: Number of one single field of view to be read. If not provided, reads all FOVs and create a stitched image.
        read_proteins: Whether to read the proteins or the transcripts.
        image_models_kwargs: Keyword arguments passed to `spatialdata.models.Image2DModel`.
        imread_kwargs: Keyword arguments passed to `dask_image.imread.imread`.
        flip_image: For some buggy exports of AtomX 1.3.2, `flip_image=True` has to be used for stitching. See [this](https://github.com/gustaveroussy/sopa/issues/231) issue.

    Returns:
        A `SpatialData` object representing the CosMX experiment
    """
    path = Path(path)
    image_models_kwargs, imread_kwargs = _default_image_kwargs(image_models_kwargs, imread_kwargs)

    dataset_id = _infer_dataset_id(path, dataset_id)
    fov_locs = _read_fov_locs(path, dataset_id)

    protein_dir_dict = {}
    if read_proteins:
        protein_dir_dict = {
            int(protein_dir.parent.name[3:]): protein_dir for protein_dir in list(path.rglob("**/FOV*/ProteinImages"))
        }
        assert len(protein_dir_dict), f"No directory called 'ProteinImages' was found under {path}"

    ### Read image(s)
    images_dir = _find_dir(path, "Morphology2D")
    morphology_coords = _cosmx_morphology_coords(images_dir)

    if fov is None:
        image, c_coords = _read_stitched_image(
            images_dir,
            fov_locs,
            protein_dir_dict,
            morphology_coords,
            flip_image,
            **imread_kwargs,
        )
        image_name = "stitched_image"
    else:
        log.info(f"Reading single FOV ({fov}), the image will not be stitched")
        fov_file = _find_matching_fov_file(images_dir, fov)

        image, c_coords = _read_fov_image(fov_file, protein_dir_dict.get(fov), morphology_coords, **imread_kwargs)
        image_name = f"F{fov:0>5}_image"

    parsed_image = Image2DModel.parse(image, dims=("c", "y", "x"), c_coords=c_coords, **image_models_kwargs)

    if read_proteins:
        return SpatialData(images={image_name: parsed_image}, attrs={SopaAttrs.CELL_SEGMENTATION: image_name})

    ### Read transcripts
    transcripts_data = _read_transcripts_csv(path, dataset_id)

    if fov is None:
        transcripts_data["x"] = transcripts_data["x_global_px"] - fov_locs["xmin"].min()
        transcripts_data["y"] = transcripts_data["y_global_px"] - fov_locs["ymin"].min()
        coordinates = None
        points_name = "points"
    else:
        transcripts_data = transcripts_data[transcripts_data["fov"] == fov]
        coordinates = {"x": "x_local_px", "y": "y_local_px"}
        points_name = f"F{fov:0>5}_points"

    from spatialdata_io._constants._constants import CosmxKeys

    transcripts = PointsModel.parse(
        transcripts_data,
        coordinates=coordinates,
        feature_key=CosmxKeys.TARGET_OF_TRANSCRIPT,
    )

    return SpatialData(
        images={image_name: parsed_image},
        points={points_name: transcripts},
        attrs={
            SopaAttrs.CELL_SEGMENTATION: image_name,
            SopaAttrs.TRANSCRIPTS: points_name,
            SopaAttrs.PRIOR_TUPLE_KEY: ("unique_cell_id", 0),
        },
    )

`sopa.io.molecular_cartography(path, region, image_models_kwargs=None, imread_kwargs=None)`

Read Molecular Cartography data from Resolve Bioscience as a SpatialData object.

Parameters:

Name	Type	Description	Default
`path`	`str \| Path`	Path to the directory containing the `.tiff` images and `_results.txt` files.	required
`region`	`str`	Name of the region to read. The region name can be found before the `_results.txt` file, e.g. `A2-1`.	required
`image_models_kwargs`	`dict \| None`	Keyword arguments passed to `spatialdata.models.Image2DModel`.	`None`
`imread_kwargs`	`dict \| None`	Keyword arguments passed to `dask_image.imread.imread`.	`None`

Returns:

Type	Description
`SpatialData`	A `SpatialData` object representing the Resolve Bioscience experiment

Source code in sopa/io/reader/molecular_cartography.py

def molecular_cartography(
    path: str | Path,
    region: str,
    image_models_kwargs: dict | None = None,
    imread_kwargs: dict | None = None,
) -> SpatialData:
    """Read *Molecular Cartography* data from *Resolve Bioscience* as a `SpatialData` object.

    Args:
        path: Path to the directory containing the `.tiff` images and `_results.txt` files.
        region: Name of the region to read. The region name can be found before the `_results.txt` file, e.g. `A2-1`.
        image_models_kwargs: Keyword arguments passed to `spatialdata.models.Image2DModel`.
        imread_kwargs: Keyword arguments passed to `dask_image.imread.imread`.

    Returns:
        A `SpatialData` object representing the *Resolve Bioscience* experiment
    """
    image_models_kwargs, imread_kwargs = _default_image_kwargs(image_models_kwargs, imread_kwargs)

    path = Path(path)
    dataset_id = _get_dataset_id(path, region)

    # Read the points
    transcripts = pd.read_csv(path / f"{dataset_id}_results.txt", sep="\t", header=None)
    transcripts.columns = ["x", "y", "z", "target_name", "unnamed"]

    transcripts = PointsModel.parse(transcripts, feature_key="target_name")
    transcripts_name = f"{dataset_id}_points"

    # Read the images
    images_paths = list(path.glob(f"{dataset_id}_*.tiff"))
    c_coords = [image_path.stem.split("_")[-1] for image_path in images_paths]

    image = Image2DModel.parse(
        da.concatenate([imread(image_path, **imread_kwargs) for image_path in images_paths], axis=0),
        dims=("c", "y", "x"),
        c_coords=c_coords,
        rgb=None,
        **image_models_kwargs,
    )
    image_name = f"{dataset_id}_image"

    return SpatialData(
        images={image_name: image},
        points={transcripts_name: transcripts},
        attrs={
            SopaAttrs.CELL_SEGMENTATION: image_name,
            SopaAttrs.TRANSCRIPTS: transcripts_name,
        },
    )

`sopa.io.macsima(path, **kwargs)`

Read MACSIMA data as a SpatialData object

Notes

For all dulicated name, their index will be added in brackets after, for instance you may find DAPI (1).

Parameters:

Name	Type	Description	Default
`path`	`Path`	Path to the directory containing the MACSIMA `.tif` images	required
`kwargs`	`int`	Kwargs for the `_general_tif_directory_reader`	`{}`

Returns:

Type	Description
`SpatialData`	A `SpatialData` object with a 2D-image of shape `(C, Y, X)`

Source code in sopa/io/reader/macsima.py

def macsima(path: Path, **kwargs: int) -> SpatialData:
    """Read MACSIMA data as a `SpatialData` object

    Notes:
        For all dulicated name, their index will be added in brackets after, for instance you may find `DAPI (1)`.

    Args:
        path: Path to the directory containing the MACSIMA `.tif` images
        kwargs: Kwargs for the `_general_tif_directory_reader`

    Returns:
        A `SpatialData` object with a 2D-image of shape `(C, Y, X)`
    """
    files = list(Path(path).glob("*.tif"))

    if any("A-" in file.name for file in files):  # non-ome.tif format
        return _general_tif_directory_reader(path, files_to_channels=_get_channel_names_macsima, **kwargs)

    return _general_tif_directory_reader(path, **kwargs)

`sopa.io.phenocycler(path, channels_renaming=None, image_models_kwargs=None)`

Read Phenocycler data as a SpatialData object

Parameters:

Name	Type	Description	Default
`path`	`str \| Path`	Path to a `.qptiff` file, or a `.tif` file (if exported from QuPath)	required
`channels_renaming`	`dict \| None`	A dictionnary whose keys correspond to channels and values to their corresponding new name. Not all channels need to be renamed.	`None`
`image_models_kwargs`	`dict \| None`	Keyword arguments passed to `spatialdata.models.Image2DModel`.	`None`

Returns:

Type	Description
`SpatialData`	A `SpatialData` object with a 2D-image of shape `(C, Y, X)`

Source code in sopa/io/reader/phenocycler.py

def phenocycler(
    path: str | Path, channels_renaming: dict | None = None, image_models_kwargs: dict | None = None
) -> SpatialData:
    """Read Phenocycler data as a `SpatialData` object

    Args:
        path: Path to a `.qptiff` file, or a `.tif` file (if exported from QuPath)
        channels_renaming: A dictionnary whose keys correspond to channels and values to their corresponding new name. Not all channels need to be renamed.
        image_models_kwargs: Keyword arguments passed to `spatialdata.models.Image2DModel`.

    Returns:
        A `SpatialData` object with a 2D-image of shape `(C, Y, X)`
    """
    image_models_kwargs, _ = _default_image_kwargs(image_models_kwargs)

    path = Path(path)
    image_name = path.absolute().stem

    if path.suffix == ".qptiff":
        with tf.TiffFile(path) as tif:
            series = tif.series[0]
            names = _deduplicate_names([_get_channel_name_qptiff(page.description) for page in series])

            delayed_image = delayed(lambda series: series.asarray())(tif)
            image = da.from_delayed(delayed_image, dtype=series.dtype, shape=series.shape)
    elif path.suffix == ".tif":
        image = imread(path)
        names = _get_IJ_channel_names(path)
    else:
        raise ValueError(f"Unsupported file extension {path.suffix}. Must be '.qptiff' or '.tif'.")

    names = _rename_channels(names, channels_renaming)
    image = image.rechunk(chunks=image_models_kwargs["chunks"])

    image = Image2DModel.parse(
        image,
        dims=("c", "y", "x"),
        c_coords=names,
        **image_models_kwargs,
    )

    return SpatialData(images={image_name: image}, attrs={SopaAttrs.CELL_SEGMENTATION: image_name})

`sopa.io.hyperion(path, image_models_kwargs=None, imread_kwargs=None)`

Read Hyperion data as a SpatialData object

Parameters:

Name	Type	Description	Default
`path`	`Path`	Path to the directory containing the Hyperion `.tiff` images	required
`image_models_kwargs`	`dict \| None`	Keyword arguments passed to `spatialdata.models.Image2DModel`.	`None`
`imread_kwargs`	`dict \| None`	Keyword arguments passed to `dask_image.imread.imread`.	`None`

Returns:

Type	Description
`SpatialData`	A `SpatialData` object with a 2D-image of shape `(C, Y, X)`

Source code in sopa/io/reader/hyperion.py

def hyperion(path: Path, image_models_kwargs: dict | None = None, imread_kwargs: dict | None = None) -> SpatialData:
    """Read Hyperion data as a `SpatialData` object

    Args:
        path: Path to the directory containing the Hyperion `.tiff` images
        image_models_kwargs: Keyword arguments passed to `spatialdata.models.Image2DModel`.
        imread_kwargs: Keyword arguments passed to `dask_image.imread.imread`.

    Returns:
        A `SpatialData` object with a 2D-image of shape `(C, Y, X)`
    """
    image_models_kwargs, imread_kwargs = _default_image_kwargs(image_models_kwargs, imread_kwargs)

    files = [file for file in Path(path).iterdir() if file.suffix == ".tiff"]

    names = _get_channel_names_hyperion(files)
    image = da.concatenate(
        [imread(file, **imread_kwargs) for file in files],
        axis=0,
    )

    image = image.rechunk(chunks=image_models_kwargs["chunks"])

    log.info(f"Found channel names {names}")

    image_name = Path(path).absolute().stem

    image = DataArray(image, dims=["c", "y", "x"], name=image_name, coords={"c": names})
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        image = _clip_intensity_values(image)

    image = Image2DModel.parse(
        image,
        c_coords=image.coords["c"].values,
        **image_models_kwargs,
    )

    return SpatialData(images={image_name: image}, attrs={SopaAttrs.CELL_SEGMENTATION: image_name})

`sopa.io.bioio(path, z_stack=0, image_models_kwargs=None, bioio_kwargs=None)`

Read an image using bioio. It supports special formats such as ND2, CZI, LIF, or DV.

Extra dependencies

To use this reader, you'll need the bioio dependency (pip install bioio). You may need extra dependencies specific to your format, see their documentation.

Parameters:

Name	Type	Description	Default
`path`	`Path`	Path to the image file	required
`z_stack`	`int`	(Only for 3D images) Index of the stack in the z-axis to use.	`0`
`image_models_kwargs`	`dict \| None`	Keyword arguments passed to `spatialdata.models.Image2DModel`.	`None`
`bioio_kwargs`	`dict \| None`	Keyword arguments passed to `bioio.BioImage`.	`None`

Returns:

Type	Description
`SpatialData`	A `SpatialData` object with a 2D-image of shape `(C, Y, X)`

Source code in sopa/io/reader/generic.py

def bioio(
    path: Path,
    z_stack: int = 0,
    image_models_kwargs: dict | None = None,
    bioio_kwargs: dict | None = None,
) -> SpatialData:
    """Read an image using [bioio](https://github.com/bioio-devs/bioio). It supports special formats such as `ND2`, `CZI`, `LIF`, or `DV`.

    !!! note "Extra dependencies"
        To use this reader, you'll need the `bioio` dependency (`pip install bioio`). You may need extra dependencies specific to your format, see their [documentation](https://bioio-devs.github.io/bioio/OVERVIEW.html#reader-installation).

    Args:
        path: Path to the image file
        z_stack: (Only for 3D images) Index of the stack in the z-axis to use.
        image_models_kwargs: Keyword arguments passed to `spatialdata.models.Image2DModel`.
        bioio_kwargs: Keyword arguments passed to `bioio.BioImage`.

    Returns:
        A `SpatialData` object with a 2D-image of shape `(C, Y, X)`
    """
    image_models_kwargs, _ = _default_image_kwargs(image_models_kwargs, None)
    bioio_kwargs = {} if bioio_kwargs is None else bioio_kwargs

    try:
        from bioio import BioImage
    except ImportError:
        raise ImportError("You need to install bioio, e.g. by running `pip install bioio`")

    xarr: xr.DataArray = BioImage(path, **bioio_kwargs).xarray_dask_data

    assert len(xarr.coords["T"]) == 1, f"Only one time dimension is supported, found {len(xarr.coords['T'])}."

    if len(xarr.coords["Z"]) > 1:
        log.info(f"3D image found, only reading {z_stack:=}")

    xarr = xarr.isel(T=0, Z=z_stack).rename({"C": "c", "Y": "y", "X": "x"})
    xarr = _image_int_dtype(xarr)

    image = Image2DModel.parse(xarr, c_coords=xarr.coords["c"].values, **image_models_kwargs)

    return SpatialData(images={"image": image}, attrs={SopaAttrs.CELL_SEGMENTATION: "image"})

`sopa.io.aicsimageio(path, z_stack=0, image_models_kwargs=None, aics_kwargs=None)`

Read an image using AICSImageIO. It supports special formats such as ND2, CZI, LIF, or DV.

Extra dependencies

To use this reader, you'll need the aicsimageio dependency (pip install aicsimageio). To read .czi images, you'll also need to install aicspylibczi (for instance pip install aicspylibczi).

Parameters:

Name	Type	Description	Default
`path`	`Path`	Path to the image file	required
`z_stack`	`int`	(Only for 3D images) Index of the stack in the z-axis to use.	`0`
`image_models_kwargs`	`dict \| None`	Keyword arguments passed to `spatialdata.models.Image2DModel`.	`None`
`aics_kwargs`	`dict \| None`	Keyword arguments passed to `aicsimageio.AICSImage`.	`None`

Returns:

Type	Description
`SpatialData`	A `SpatialData` object with a 2D-image of shape `(C, Y, X)`

Source code in sopa/io/reader/generic.py

def aicsimageio(
    path: Path,
    z_stack: int = 0,
    image_models_kwargs: dict | None = None,
    aics_kwargs: dict | None = None,
) -> SpatialData:
    """Read an image using [AICSImageIO](https://github.com/AllenCellModeling/aicsimageio). It supports special formats such as `ND2`, `CZI`, `LIF`, or `DV`.

    !!! note "Extra dependencies"
        To use this reader, you'll need the `aicsimageio` dependency (`pip install aicsimageio`). To read `.czi` images, you'll also need to install `aicspylibczi` (for instance `pip install aicspylibczi`).

    Args:
        path: Path to the image file
        z_stack: (Only for 3D images) Index of the stack in the z-axis to use.
        image_models_kwargs: Keyword arguments passed to `spatialdata.models.Image2DModel`.
        aics_kwargs: Keyword arguments passed to `aicsimageio.AICSImage`.

    Returns:
        A `SpatialData` object with a 2D-image of shape `(C, Y, X)`
    """
    log.warning("The `aicsimageio` reader is deprecated. Use `sopa.io.bioio` instead.")
    image_models_kwargs, _ = _default_image_kwargs(image_models_kwargs, None)
    aics_kwargs = {} if aics_kwargs is None else aics_kwargs

    try:
        from aicsimageio import AICSImage
    except ImportError:
        raise ImportError("You need to install aicsimageio, e.g. by running `pip install aicsimageio`")

    xarr: xr.DataArray = AICSImage(path, **aics_kwargs).xarray_dask_data

    assert len(xarr.coords["T"]) == 1, f"Only one time dimension is supported, found {len(xarr.coords['T'])}."

    if len(xarr.coords["Z"]) > 1:
        log.info(f"3D image found, only reading {z_stack:=}")

    xarr = xarr.isel(T=0, Z=z_stack).rename({"C": "c", "Y": "y", "X": "x"})
    xarr = _image_int_dtype(xarr)

    image = Image2DModel.parse(xarr, c_coords=xarr.coords["c"].values, **image_models_kwargs)

    return SpatialData(images={"image": image}, attrs={SopaAttrs.CELL_SEGMENTATION: "image"})

`sopa.io.ome_tif(path, as_image=False)`

Read an .ome.tif image. This image should be a 2D image (with possibly multiple channels). Typically, this function can be used to open Xenium IF images.

Parameters:

Name	Type	Description	Default
`path`	`str \| Path`	Path to the `.ome.tif` image	required
`as_image`	`bool`	If `True`, will return a `DataArray` object	`False`

Returns:

Type	Description
`DataArray \| SpatialData`	A `DataArray` or a `SpatialData` object

Source code in sopa/io/reader/utils.py

def ome_tif(path: str | Path, as_image: bool = False) -> DataArray | SpatialData:
    """Read an `.ome.tif` image. This image should be a 2D image (with possibly multiple channels).
    Typically, this function can be used to open Xenium IF images.

    Args:
        path: Path to the `.ome.tif` image
        as_image: If `True`, will return a `DataArray` object

    Returns:
        A `DataArray` or a `SpatialData` object
    """
    image_models_kwargs, _ = _default_image_kwargs()
    image_name = Path(path).absolute().name.split(".")[0]
    image: da.Array = imread(path)

    if image.ndim == 4:
        assert image.shape[0] == 1, "4D images not supported"
        image = da.moveaxis(image[0], 2, 0)
        log.info(f"Transformed 4D image into a 3D image of shape (c, y, x) = {image.shape}")
    elif image.ndim != 3:
        raise ValueError(f"Number of dimensions not supported: {image.ndim}")

    image = image.rechunk(chunks=image_models_kwargs["chunks"])

    try:
        channel_names = _ome_channels_names(path)
    except:
        channel_names = []
    if len(channel_names) != len(image):
        channel_names = [str(i) for i in range(len(image))]
        log.warning(f"Channel names couldn't be read. Using {channel_names} instead.")

    image = DataArray(image, dims=["c", "y", "x"], name=image_name, coords={"c": channel_names})
    image = _image_int_dtype(image)

    if as_image:
        return image

    image = Image2DModel.parse(
        image,
        c_coords=channel_names,
        **image_models_kwargs,
    )

    return SpatialData(images={image_name: image}, attrs={SopaAttrs.CELL_SEGMENTATION: image_name})

`sopa.io.wsi(path, chunks=(3, 256, 256), as_image=False, backend='tiffslide')`

Read a WSI into a SpatialData object

Parameters:

Name	Type	Description	Default
`path`	`str \| Path`	Path to the WSI	required
`chunks`	`tuple[int, int, int]`	Tuple representing the chunksize for the dimensions `(C, Y, X)`.	`(3, 256, 256)`
`as_image`	`bool`	If `True`, returns a, image instead of a `SpatialData` object	`False`
`backend`	`Literal['tiffslide', 'openslide']`	The library to use as a backend in order to load the WSI. One of: `"openslide"`, `"tiffslide"`.	`'tiffslide'`

Returns:

Type	Description
`SpatialData \| DataTree`	A `SpatialData` object with a multiscale 2D-image of shape `(C, Y, X)`, or just the DataTree if `as_image=True`

Source code in sopa/io/reader/wsi.py

def wsi(
    path: str | Path,
    chunks: tuple[int, int, int] = (3, 256, 256),
    as_image: bool = False,
    backend: Literal["tiffslide", "openslide"] = "tiffslide",
) -> SpatialData | DataTree:
    """Read a WSI into a `SpatialData` object

    Args:
        path: Path to the WSI
        chunks: Tuple representing the chunksize for the dimensions `(C, Y, X)`.
        as_image: If `True`, returns a, image instead of a `SpatialData` object
        backend: The library to use as a backend in order to load the WSI. One of: `"openslide"`, `"tiffslide"`.

    Returns:
        A `SpatialData` object with a multiscale 2D-image of shape `(C, Y, X)`, or just the DataTree if `as_image=True`
    """
    image_name, img, slide, slide_metadata = _open_wsi(path, backend=backend)

    images = {}
    for level, key in enumerate(sorted(img.keys(), key=int)):
        suffix = key if key != "0" else ""

        scale_image = DataArray(
            img[key].transpose("S", f"Y{suffix}", f"X{suffix}"),
            dims=("c", "y", "x"),
        ).chunk(chunks)

        scale_factor = slide.level_downsamples[level]

        scale_image = Image2DModel.parse(
            scale_image[:3, :, :],
            transformations={"global": _get_scale_transformation(scale_factor)},
            c_coords=("r", "g", "b"),
        )
        scale_image.coords["y"] = scale_factor * scale_image.coords["y"]
        scale_image.coords["x"] = scale_factor * scale_image.coords["x"]

        images[f"scale{key}"] = Dataset({"image": scale_image})

    multiscale_image = DataTree.from_dict(images)
    sdata = SpatialData(images={image_name: multiscale_image}, attrs={SopaAttrs.TISSUE_SEGMENTATION: image_name})
    sdata[image_name].attrs["metadata"] = slide_metadata
    sdata[image_name].attrs["backend"] = backend
    sdata[image_name].name = image_name

    if as_image:
        return multiscale_image

    return sdata

`sopa.io.toy_dataset(*_, length=2048, cell_density=0.0001, n_points_per_cell=100, c_coords=['DAPI', 'CK', 'CD3', 'CD20'], genes=['EPCAM', 'CD3E', 'CD20', 'CXCL4', 'CXCL10'], sigma_factor=0.05, pixel_size=0.1, seed=0, include_vertices=False, include_image=True, include_he_image=True, apply_blur=True, as_output=False, transcript_cell_id_as_merscope=False, add_nan_gene_name=False, continuous_z_stack=False, add_second_points_key=False)`

Generate a dummy dataset composed of cells generated uniformly in a square. It also has transcripts.

Parameters:

Name	Type	Description	Default
`length`	`int`	Size of the square, in pixels	`2048`
`cell_density`	`float`	Density of cells per pixel^2	`0.0001`
`n_points_per_cell`	`int`	Mean number of transcripts per cell	`100`
`c_coords`	`list[str]`	Channel names	`['DAPI', 'CK', 'CD3', 'CD20']`
`genes`	`int \| list[str]`	Number of different genes, or list of gene names	`['EPCAM', 'CD3E', 'CD20', 'CXCL4', 'CXCL10']`
`sigma_factor`	`float`	Factor used to determine `sigma` for the gaussian blur.	`0.05`
`pixel_size`	`float`	Number of microns in one pixel.	`0.1`
`seed`	`int`	Numpy random seed	`0`
`include_vertices`	`bool`	Whether to include the vertices of the cells (as points) in the spatialdata object	`False`
`include_image`	`bool`	Whether to include the image in the spatialdata object	`True`
`apply_blur`	`bool`	Whether to apply gaussian blur on the image (without blur, cells are just one pixel)	`True`
`as_output`	`bool`	If `True`, the data will have the same format than an output of Sopa	`False`
`transcript_cell_id_as_merscope`	`bool`	If `True`, the cell IDs in the transcripts will start from 0, as in MERSCOPE	`False`
`add_nan_gene_name`	`bool`	If `True`, a NaN value will be added to the gene names for testing purposes	`False`
`continuous_z_stack`	`bool`	If `True`, the z-stack values will be continuous (not integers)	`False`
`add_second_points_key`	`bool`	If `True`, a second points key will be added to the dataset with dummy data for testing purposes	`False`

Returns:

Type	Description
`SpatialData`	A SpatialData object with a 2D image (`sdata["image"]`), the cells polygon boundaries (`sdata["cells"]`), the transcripts (`sdata["transcripts"]`), and optional cell vertices (`sdata["vertices"]`) if `include_vertices` is `True`.

Source code in sopa/utils/data.py

def toy_dataset(
    *_,
    length: int = 2_048,
    cell_density: float = 1e-4,
    n_points_per_cell: int = 100,
    c_coords: list[str] = ["DAPI", "CK", "CD3", "CD20"],  # noqa: B006
    genes: int | list[str] = ["EPCAM", "CD3E", "CD20", "CXCL4", "CXCL10"],  # noqa: B006
    sigma_factor: float = 0.05,
    pixel_size: float = 0.1,
    seed: int = 0,
    include_vertices: bool = False,
    include_image: bool = True,
    include_he_image: bool = True,
    apply_blur: bool = True,
    as_output: bool = False,
    transcript_cell_id_as_merscope: bool = False,
    add_nan_gene_name: bool = False,
    continuous_z_stack: bool = False,
    add_second_points_key: bool = False,
) -> SpatialData:
    """Generate a dummy dataset composed of cells generated uniformly in a square. It also has transcripts.

    Args:
        length: Size of the square, in pixels
        cell_density: Density of cells per pixel^2
        n_points_per_cell: Mean number of transcripts per cell
        c_coords: Channel names
        genes: Number of different genes, or list of gene names
        sigma_factor: Factor used to determine `sigma` for the gaussian blur.
        pixel_size: Number of microns in one pixel.
        seed: Numpy random seed
        include_vertices: Whether to include the vertices of the cells (as points) in the spatialdata object
        include_image: Whether to include the image in the spatialdata object
        apply_blur: Whether to apply gaussian blur on the image (without blur, cells are just one pixel)
        as_output: If `True`, the data will have the same format than an output of Sopa
        transcript_cell_id_as_merscope: If `True`, the cell IDs in the transcripts will start from 0, as in MERSCOPE
        add_nan_gene_name: If `True`, a NaN value will be added to the gene names for testing purposes
        continuous_z_stack: If `True`, the z-stack values will be continuous (not integers)
        add_second_points_key: If `True`, a second points key will be added to the dataset with dummy data for testing purposes

    Returns:
        A SpatialData object with a 2D image (`sdata["image"]`), the cells polygon boundaries (`sdata["cells"]`), the transcripts (`sdata["transcripts"]`), and optional cell vertices (`sdata["vertices"]`) if `include_vertices` is `True`.
    """
    np.random.seed(seed)

    grid_width = max(1, int(length * np.sqrt(cell_density)))
    dx = length / grid_width
    sigma = dx * sigma_factor
    n_cells = grid_width**2
    radius = int(dx) // 4
    cell_types_index = np.random.randint(0, max(1, len(c_coords) - 1), n_cells)

    log.info(
        f"Image of size {(len(c_coords), length, length)} with {n_cells} cells and {n_points_per_cell} transcripts per cell"
    )

    ### Compute cell vertices (xy array)
    vertices_x = dx / 2 + np.arange(grid_width) * dx
    x, y = np.meshgrid(vertices_x, vertices_x)
    xy = np.stack([x.ravel(), y.ravel()], axis=1)
    xy += np.random.uniform(-dx / 2, dx / 2, size=xy.shape)
    xy = xy.clip(0, length - 1).astype(int)

    vertices = pd.DataFrame(xy, columns=["x", "y"])

    ### Create images
    images = {}

    if include_image:
        x_circle, y_circle = _circle_coords(radius)

        image = np.zeros((len(c_coords), length, length))
        for i, (x, y) in enumerate(xy):
            y_coords = (y + y_circle).clip(0, image.shape[1] - 1)
            x_coords = (x + x_circle).clip(0, image.shape[2] - 1)
            image[0, y_coords, x_coords] = 1
            if len(c_coords) > 1:
                image[cell_types_index[i] + 1, y_coords, x_coords] = 1
        if apply_blur:
            image = gaussian_filter(image, sigma=sigma, axes=(1, 2))
        image = (image / image.max() * 255).astype(np.uint8)
        image = da.from_array(image, chunks=(1, 1024, 1024))
        images["image"] = Image2DModel.parse(image, c_coords=c_coords, dims=["c", "y", "x"])

    if include_he_image:
        he_image = _he_image(length // 2)
        scale = length / (length // 2)
        images["he_image"] = Image2DModel.parse(
            he_image,
            dims=["c", "y", "x"],
            transformations={"global": Scale([scale, scale], axes=["x", "y"])},
            scale_factors=[2, 2],
        )

    ### Create cell boundaries
    cells = [Point(vertex).buffer(radius).simplify(tolerance=1) for vertex in xy]
    bbox = box(0, 0, length - 1, length - 1)
    cells = [cell.intersection(bbox) for cell in cells]
    gdf = gpd.GeoDataFrame(geometry=cells)
    shapes_key = "cellpose_boundaries" if as_output else "cells"
    shapes = {shapes_key: ShapesModel.parse(gdf)}

    ### Create transcripts
    n_genes = n_cells * n_points_per_cell
    point_cell_index = np.arange(n_cells).repeat(n_points_per_cell)
    points_coords = radius / 2 * np.random.randn(n_genes, 2) + xy[point_cell_index]
    points_coords = points_coords.clip(0, length - 1)

    if isinstance(genes, int):
        gene_names = np.random.choice([f"gene_{i}" for i in range(genes)], size=n_genes)
    elif len(genes) and len(genes) >= len(c_coords) - 1:
        gene_names = np.full(n_genes, "", dtype="<U5")
        for i in range(len(genes)):
            where_cell_type = np.where(cell_types_index[point_cell_index] == i)[0]
            probabilities = np.full(len(genes), 0.2 / (len(genes) - 1))
            probabilities[i] = 0.8
            gene_names[where_cell_type] = np.random.choice(genes, len(where_cell_type), p=probabilities)
    else:
        gene_names = np.random.choice(genes, size=n_genes)

    gene_names = gene_names.astype(object)
    if add_nan_gene_name:
        gene_names[3] = np.nan  # Add a nan value for tests
        gene_names[4] = "blank"  # Add a blank value for tests

    z_stack = seed + np.random.randint(-1, 2, len(points_coords))
    if continuous_z_stack:
        z_stack = z_stack + np.random.randn(len(points_coords)) / 10

    df = pd.DataFrame({
        "x": points_coords[:, 0],
        "y": points_coords[:, 1],
        "z_stack": z_stack,
        "genes": gene_names,
    })

    # apply an arbritrary transformation for a more complete test case
    affine = np.array([[pixel_size, 0, 100], [0, pixel_size, 600], [0, 0, 1]])
    df[["x", "y"]] = df[["x", "y"]] @ affine[:2, :2].T + affine[:2, 2]
    affine = Affine(affine, input_axes=["x", "y"], output_axes=["x", "y"]).inverse()

    df = dd.from_pandas(df, chunksize=2_000_000)

    points = {
        "transcripts": PointsModel.parse(
            df, transformations={"global": affine, "microns": Identity()}, feature_key="genes"
        ),
    }

    if add_second_points_key:
        misc_df = pd.DataFrame({"x": [0, 1], "y": [0, 1]})  # dummy dataframe for testing purposes
        points["misc"] = PointsModel.parse(misc_df, transformations={"global": Identity()})

    if include_vertices:
        points["vertices"] = PointsModel.parse(vertices)

    sdata = SpatialData(
        images=images,
        points=points,
        shapes=shapes,
        attrs={SopaAttrs.TRANSCRIPTS: "transcripts", SopaAttrs.PRIOR_TUPLE_KEY: ("cell_id", 0)},
    )

    if include_image:
        sdata.attrs[SopaAttrs.CELL_SEGMENTATION] = "image"

    if include_he_image:
        sdata.attrs[SopaAttrs.TISSUE_SEGMENTATION] = "he_image"

    from ..spatial import assign_transcript_to_cell

    assign_transcript_to_cell(sdata, "transcripts", shapes_key, "cell_id", unassigned_value=0)

    sdata["transcripts"]["cell_id"] = sdata["transcripts"]["cell_id"].astype(int) - int(transcript_cell_id_as_merscope)

    if as_output:
        _add_table(sdata)

    return sdata