Utils

Cache operations

`sopa.utils.get_cache_dir(sdata)`

Get the cache directory for a SpatialData object.

Parameters:

Name	Type	Description	Default
`sdata`	`SpatialData`	A `SpatialData` object.	required

Returns:

Type	Description
`Path`	A `Path` to the cache directory.

Source code in sopa/utils/utils.py

def get_cache_dir(sdata: SpatialData) -> Path:
    """Get the cache directory for a SpatialData object.

    Args:
        sdata: A `SpatialData` object.

    Returns:
        A `Path` to the cache directory.
    """
    if sdata.is_backed():  # inside the zarr directory
        cache_dir = sdata.path.resolve() / SopaFiles.SOPA_CACHE_DIR
    elif SopaAttrs.UID in sdata.attrs:  # existing cache in the home directory
        cache_dir = HOME_CACHE_DIR / sdata.attrs[SopaAttrs.UID]
    else:  # create a new cache directory in the home directory
        import uuid

        uid = str(uuid.uuid4())
        sdata.attrs[SopaAttrs.UID] = uid
        cache_dir = HOME_CACHE_DIR / str(uid)

    cache_dir.mkdir(exist_ok=True, parents=True)

    return cache_dir

`sopa.utils.delete_cache(sdata=None)`

Delete the cache directory (the entire cache, or the cache of one specific SpatialData object).

Parameters:

Name	Type	Description	Default
`sdata`	`SpatialData \| None`	The SpatialData object whose cache is to be deleted. If None, the entire cache is deleted.	`None`

Source code in sopa/utils/utils.py

def delete_cache(sdata: SpatialData | None = None) -> None:
    """Delete the cache directory (the entire cache, or the cache of one specific SpatialData object).

    Args:
        sdata: The SpatialData object whose cache is to be deleted. If None, the entire cache is deleted.
    """
    import shutil

    if sdata is not None:
        cache_dir = get_cache_dir(sdata)
        shutil.rmtree(cache_dir)
        return

    for sub_dir in list(HOME_CACHE_DIR.iterdir()):
        if sub_dir.is_dir():
            shutil.rmtree(sub_dir)

`sopa.utils.delete_transcripts_patches_dirs(sdata)`

Delete the cache directories containing the transcript patches (for instance, for Baysor or ComSeg)

Parameters:

Name	Type	Description	Default
`sdata`	`SpatialData`	A `SpatialData` object.	required

Source code in sopa/utils/utils.py

def delete_transcripts_patches_dirs(sdata: SpatialData):
    """Delete the cache directories containing the transcript patches (for instance, for Baysor or ComSeg)

    Args:
        sdata: A `SpatialData` object.
    """
    import shutil

    for patch_dir in get_transcripts_patches_dirs(sdata):
        shutil.rmtree(patch_dir)

Accessing the elements

`sopa.utils.get_spatial_element(element_dict, key=None, return_key=False, as_spatial_image=False)`

Gets an element from a SpatialData object.

Parameters:

Name	Type	Description	Default
`element_dict`	`dict[str, SpatialElement]`	Dictionnary whose values are spatial elements (e.g., `sdata.images`).	required
`key`	`str \| None`	Optional element key. If `None`, returns the only element (if only one).	`None`
`return_key`	`bool`	Whether to also return the key of the element.	`False`
`as_spatial_image`	`bool`	Whether to return the element as a `SpatialImage` (if it is a `DataTree`)	`False`

Returns:

Type	Description
`SpatialElement \| tuple[str, SpatialElement]`	If `return_key` is False, only the element is returned, else a tuple `(element_key, element)`

Source code in sopa/utils/utils.py

def get_spatial_element(
    element_dict: dict[str, SpatialElement],
    key: str | None = None,
    return_key: bool = False,
    as_spatial_image: bool = False,
) -> SpatialElement | tuple[str, SpatialElement]:
    """Gets an element from a SpatialData object.

    Args:
        element_dict: Dictionnary whose values are spatial elements (e.g., `sdata.images`).
        key: Optional element key. If `None`, returns the only element (if only one).
        return_key: Whether to also return the key of the element.
        as_spatial_image: Whether to return the element as a `SpatialImage` (if it is a `DataTree`)

    Returns:
        If `return_key` is False, only the element is returned, else a tuple `(element_key, element)`
    """
    assert len(element_dict), "No spatial element was found in the dict."

    if key is not None:
        assert key in element_dict, f"Spatial element '{key}' not found."
        return _return_element(element_dict, key, return_key, as_spatial_image)

    assert len(element_dict) > 0, (
        "No spatial element found. Provide an element key to denote which element you want to use."
    )
    assert len(element_dict) == 1, (
        f"Multiple valid elements found: {', '.join(element_dict.keys())}. Provide an element key to denote which element you want to use."
    )

    key = next(iter(element_dict.keys()))

    return _return_element(element_dict, key, return_key, as_spatial_image)

`sopa.utils.get_spatial_image(sdata, key=None, return_key=False, valid_attr=SopaAttrs.CELL_SEGMENTATION)`

Gets a DataArray from a SpatialData object (if the image has multiple scale, the scale0 is returned)

Parameters:

Name	Type	Description	Default
`sdata`	`SpatialData`	SpatialData object.	required
`key`	`str \| None`	Optional image key. If `None`, returns the only image (if only one), or tries to find an image with `valid_attr`.	`None`
`return_key`	`bool`	Whether to also return the key of the image.	`False`
`valid_attr`	`str`	Attribute that the image must have to be considered valid.	`CELL_SEGMENTATION`

Returns:

Type	Description
`DataArray \| tuple[str, DataArray]`	If `return_key` is False, only the image is returned, else a tuple `(image_key, image)`

Source code in sopa/utils/utils.py

def get_spatial_image(
    sdata: SpatialData,
    key: str | None = None,
    return_key: bool = False,
    valid_attr: str = SopaAttrs.CELL_SEGMENTATION,
) -> DataArray | tuple[str, DataArray]:
    """Gets a DataArray from a SpatialData object (if the image has multiple scale, the `scale0` is returned)

    Args:
        sdata: SpatialData object.
        key: Optional image key. If `None`, returns the only image (if only one), or tries to find an image with `valid_attr`.
        return_key: Whether to also return the key of the image.
        valid_attr: Attribute that the image must have to be considered valid.

    Returns:
        If `return_key` is False, only the image is returned, else a tuple `(image_key, image)`
    """
    return get_spatial_element(
        sdata.images,
        key=key or sdata.attrs.get(valid_attr),
        return_key=return_key,
        as_spatial_image=True,
    )

`sopa.utils.get_boundaries(sdata, return_key=False, warn=False, key=None, table_key=None)`

Gets cell segmentation boundaries of a SpatialData object after running Sopa.

Parameters:

Name	Type	Description	Default
`sdata`	`SpatialData`	A SpatialData object	required
`return_key`	`bool`	Whether to return the key of the shapes or not.	`False`
`warn`	`bool`	If `True`, prints a warning if no boundary is found. Else, raises an error.	`False`
`key`	`str \| None`	A valid `shapes_key` or None.	`None`
`table_key`	`str \| None`	Name of the table used to find the corresponding boundaries.	`None`

Returns:

Type	Description
`GeoDataFrame \| tuple[str, GeoDataFrame] \| None`	A `GeoDataFrame` containing the boundaries, or a tuple `(shapes_key, geo_df)`

Source code in sopa/utils/utils.py

def get_boundaries(
    sdata: SpatialData,
    return_key: bool = False,
    warn: bool = False,
    key: str | None = None,
    table_key: str | None = None,
) -> gpd.GeoDataFrame | tuple[str, gpd.GeoDataFrame] | None:
    """Gets cell segmentation boundaries of a SpatialData object after running Sopa.

    Args:
        sdata: A SpatialData object
        return_key: Whether to return the key of the shapes or not.
        warn: If `True`, prints a warning if no boundary is found. Else, raises an error.
        key: A valid `shapes_key` or None.
        table_key: Name of the table used to find the corresponding boundaries.

    Returns:
        A `GeoDataFrame` containing the boundaries, or a tuple `(shapes_key, geo_df)`
    """
    assert key is None or table_key is None, "Provide only one of `key` or `table_key`"

    if table_key is not None:
        key = sdata.tables[table_key].uns[ATTRS_KEY]["region"]
        assert isinstance(key, str)
        return get_spatial_element(sdata.shapes, key=key, return_key=return_key)

    key = key or sdata.attrs.get(SopaAttrs.BOUNDARIES)

    if key is not None:
        return get_spatial_element(sdata.shapes, key=key, return_key=return_key)

    VALID_BOUNDARIES = [
        SopaKeys.PROSEG_BOUNDARIES,
        SopaKeys.BAYSOR_BOUNDARIES,
        SopaKeys.STARDIST_BOUNDARIES,
        SopaKeys.COMSEG_BOUNDARIES,
        SopaKeys.CELLPOSE_BOUNDARIES,
    ]
    for key in VALID_BOUNDARIES:
        res = _try_get_boundaries(sdata, key, return_key)
        if res is not None:
            return res

    error_message = "sdata object has no valid segmentation boundary. Consider running Sopa segmentation first."

    if not warn:
        raise ValueError(error_message)

    log.warning(error_message)
    return (None, None) if return_key else None

`sopa.utils.get_intensities(sdata, table_key=SopaKeys.TABLE)`

Gets the intensity dataframe of shape n_obs x n_channels

Parameters:

Name	Type	Description	Default
`sdata`	`SpatialData`	A `SpatialData` object.	required
`table_key`	`str`	Key of `sdata` containing to table from which intensities will be extracted.	`TABLE`

Returns:

Type	Description
`DataFrame \| None`	A pandas DataFrame containing the intensities, or `None` if no intensities are found.

Source code in sopa/utils/utils.py

def get_intensities(sdata: SpatialData, table_key: str = SopaKeys.TABLE) -> pd.DataFrame | None:
    """Gets the intensity dataframe of shape `n_obs x n_channels`

    Args:
        sdata: A `SpatialData` object.
        table_key: Key of `sdata` containing to table from which intensities will be extracted.

    Returns:
        A pandas DataFrame containing the intensities, or `None` if no intensities are found.
    """
    assert table_key in sdata.tables, f"No '{table_key}' found in sdata.tables"

    adata = sdata.tables[table_key]

    if not adata.uns[SopaKeys.UNS_KEY][SopaKeys.UNS_HAS_INTENSITIES]:
        return None

    if adata.uns[SopaKeys.UNS_KEY][SopaKeys.UNS_HAS_TRANSCRIPTS]:
        return adata.obsm[SopaKeys.INTENSITIES_OBSM]

    return adata.to_df()

`sopa.utils.get_channel_names(image, image_key=None)`

Get the channel names of an image or a SpatialData object.

Parameters:

Name	Type	Description	Default
`image`	`DataArray \| DataTree \| SpatialData`	Either a `DataArray`, a `DataTree`, or a `SpatialData` object. If a `SpatialData` object, the `image_key` argument can be used.	required
`image_key`	`str \| None`	If `image` is a SpatialData object, the key of the image to get the channel names from. If `None`, tries to get it automatically.	`None`

Returns:

Type	Description
`ndarray`	An array of channel names.

Source code in sopa/utils/image.py

def get_channel_names(image: DataArray | DataTree | SpatialData, image_key: str | None = None) -> np.ndarray:
    """Get the channel names of an image or a SpatialData object.

    Args:
        image: Either a `DataArray`, a `DataTree`, or a `SpatialData` object. If a `SpatialData` object, the `image_key` argument can be used.
        image_key: If `image` is a SpatialData object, the key of the image to get the channel names from. If `None`, tries to get it automatically.

    Returns:
        An array of channel names.
    """
    if isinstance(image, SpatialData):
        image = get_spatial_image(image, key=image_key)

    if isinstance(image, DataArray):
        return image.coords["c"].values
    if isinstance(image, DataTree):
        return image["scale0"].coords["c"].values
    raise ValueError(f"Image must be a DataTree or a DataArray. Found: {type(image)}")

`sopa.utils.set_sopa_attrs(sdata, cell_segmentation_key=None, tissue_segmentation_key=None, transcripts_key=None, boundaries_key=None, bins_table_key=None)`

Stores in the SpatialData object the keys of the main elements used in Sopa. This allows Sopa to retreive with elements should be used for each operation.

Info

The attrs are already stored in sdata.attrs when reading data with sopa.io. Use this function only if you already stored on disk a SpatialData object without the attrs (with sopa<2.0.0).

Parameters:

Name	Type	Description	Default
`sdata`	`SpatialData`	A `SpatialData` object.	required
`cell_segmentation_key`	`str \| None`	Name of the image to be used for cell segmentation (highest resolution image).	`None`
`tissue_segmentation_key`	`str \| None`	Name of the image to be used for tissue segmentation (medium/low resolution image).	`None`
`transcripts_key`	`str \| None`	Name of the points containing the transcripts.	`None`
`boundaries_key`	`str \| None`	Name of the shapes containing the cell boundaries.	`None`
`bins_table_key`	`str \| None`	Name of the table containing the bins (e.g., for Visium HD data).	`None`

Source code in sopa/utils/utils.py

def set_sopa_attrs(
    sdata: SpatialData,
    cell_segmentation_key: str | None = None,
    tissue_segmentation_key: str | None = None,
    transcripts_key: str | None = None,
    boundaries_key: str | None = None,
    bins_table_key: str | None = None,
):
    """Stores in the `SpatialData` object the keys of the main elements used in Sopa.
    This allows Sopa to retreive with elements should be used for each operation.

    !!! info
        The attrs are already stored in `sdata.attrs` when reading data with `sopa.io`.
        Use this function only if you already stored on disk a SpatialData object without the attrs (with `sopa<2.0.0`).

    Args:
        sdata: A `SpatialData` object.
        cell_segmentation_key: Name of the image to be used for cell segmentation (highest resolution image).
        tissue_segmentation_key: Name of the image to be used for tissue segmentation (medium/low resolution image).
        transcripts_key: Name of the points containing the transcripts.
        boundaries_key: Name of the shapes containing the cell boundaries.
        bins_table_key: Name of the table containing the bins (e.g., for Visium HD data).
    """
    if cell_segmentation_key is not None:
        assert cell_segmentation_key in sdata.images
        sdata.attrs[SopaAttrs.CELL_SEGMENTATION] = cell_segmentation_key

    if tissue_segmentation_key is not None:
        assert tissue_segmentation_key in sdata.images
        sdata.attrs[SopaAttrs.TISSUE_SEGMENTATION] = tissue_segmentation_key

    if transcripts_key is not None:
        assert transcripts_key in sdata.points
        sdata.attrs[SopaAttrs.TRANSCRIPTS] = transcripts_key

    if boundaries_key is not None:
        assert boundaries_key in sdata.shapes
        sdata.attrs[SopaAttrs.BOUNDARIES] = boundaries_key

    if bins_table_key is not None:
        assert bins_table_key in sdata.tables
        sdata.attrs[SopaAttrs.BINS_TABLE] = bins_table_key

Transformations and scaling

`sopa.utils.to_intrinsic(sdata, element, target_element)`

Transforms a SpatialElement into the intrinsic coordinate system of another SpatialElement

Parameters:

Name	Type	Description	Default
`sdata`	`SpatialData`	A SpatialData object	required
`element`	`SpatialElement \| str`	`SpatialElement` to transform, or its key. We recommend it to choose a vector element (for instance, points or shapes).	required
`target_element`	`SpatialElement \| str`	`SpatialElement` of the target coordinate system, or its key.	required

Returns:

Type	Description
`SpatialElement`	The `element` with coordinates transformed to the intrinsic coordinate system of `target_element`.

Source code in sopa/utils/utils.py

def to_intrinsic(
    sdata: SpatialData, element: SpatialElement | str, target_element: SpatialElement | str
) -> SpatialElement:
    """Transforms a `SpatialElement` into the intrinsic coordinate system of another `SpatialElement`

    Args:
        sdata: A SpatialData object
        element: `SpatialElement` to transform, or its key. We recommend it to choose a vector element (for instance, points or shapes).
        target_element: `SpatialElement` of the target coordinate system, or its key.

    Returns:
        The `element` with coordinates transformed to the intrinsic coordinate system of `target_element`.
    """
    element = sdata[element] if isinstance(element, str) else element
    target_element = sdata[target_element] if isinstance(target_element, str) else target_element

    for cs, transformation in get_transformation(element, get_all=True).items():
        if isinstance(transformation, Identity):
            target_transformations = get_transformation(target_element, get_all=True)
            if isinstance(target_transformations.get(cs), Identity):
                return element  # no transformation needed
            break

    try:
        transformation = get_transformation_between_coordinate_systems(sdata, element, target_element)
    except:
        transformations1 = get_transformation(element, get_all=True)
        transformations2 = get_transformation(target_element, get_all=True)

        common_keys = list(set(transformations1.keys()) & set(transformations2.keys()))

        if not common_keys:
            raise ValueError("No common coordinate system found between the two elements")

        cs = "global" if "global" in common_keys else common_keys.pop()

        transformation = Sequence([transformations1[cs], transformations2[cs].inverse()])

    return spatialdata.transform(element, transformation=transformation, maintain_positioning=True)

`sopa.utils.scale_dtype(arr, dtype)`

Change the dtype of an array but keep the scale compared to the type maximum value.

Example

For an array of dtype uint8 being transformed to np.uint16, the value 255 will become 65535

Parameters:

Name	Type	Description	Default
`arr`	`ndarray`	A `numpy` array	required
`dtype`	`dtype`	Target `numpy` data type	required

Returns:

Type	Description
`ndarray`	A scaled `numpy` array with the dtype provided.

Source code in sopa/utils/image.py

def scale_dtype(arr: np.ndarray, dtype: np.dtype) -> np.ndarray:
    """Change the dtype of an array but keep the scale compared to the type maximum value.

    !!! note "Example"
        For an array of dtype `uint8` being transformed to `np.uint16`, the value `255` will become `65535`

    Args:
        arr: A `numpy` array
        dtype: Target `numpy` data type

    Returns:
        A scaled `numpy` array with the dtype provided.
    """
    assert_is_integer_dtype(arr.dtype)
    assert_is_integer_dtype(dtype)

    if arr.dtype == dtype:
        return arr

    factor = np.iinfo(dtype).max / np.iinfo(arr.dtype).max
    return (arr * factor).astype(dtype)

`sopa.utils.resize_numpy(arr, scale_factor, dims, output_shape)`

Resize a numpy image

Parameters:

Name	Type	Description	Default
`arr`	`ndarray`	a `numpy` array	required
`scale_factor`	`float`	Scale factor of resizing, e.g. `2` will decrease the width by 2	required
`dims`	`list[str]`	List of dimension names. Only `"x"` and `"y"` are resized.	required
`output_shape`	`list[int]`	Size of the output array	required

Returns:

Type	Description
`ndarray`	Resized array

Source code in sopa/utils/image.py

def resize_numpy(arr: np.ndarray, scale_factor: float, dims: list[str], output_shape: list[int]) -> np.ndarray:
    """Resize a numpy image

    Args:
        arr: a `numpy` array
        scale_factor: Scale factor of resizing, e.g. `2` will decrease the width by 2
        dims: List of dimension names. Only `"x"` and `"y"` are resized.
        output_shape: Size of the output array

    Returns:
        Resized array
    """
    resize_dims = [dim in ["x", "y"] for dim in dims]
    transform = np.diag([scale_factor if resize_dim else 1 for resize_dim in resize_dims])

    return dask_image.ndinterp.affine_transform(arr, matrix=transform, output_shape=output_shape).compute()

Cell-type annotation

`sopa.utils.tangram_annotate(sdata, adata_sc, cell_type_key, reference_preprocessing=None, bag_size=10000, max_obs_reference=10000, density_prior='uniform', **kwargs)`

Tangram multi-level annotation. Tangram is run on multiple bags of cells to decrease the RAM usage.

Info

You need to install tangram-sc to use this function. You can install it via pip install tangram-sc.

Parameters:

Name	Type	Description	Default
`sdata`	`SpatialData`	A `SpatialData` object	required
`adata_sc`	`AnnData`	A scRNAseq annotated reference	required
`cell_type_key`	`str`	Key of `adata_sc.obs` containing the cell types. For multi-level annotation, provide other levels like such: if `cell_type_key = "ct"`, then `"ct_level1"` and `"ct_level2"` are the two next levels	required
`reference_preprocessing`	`str \| None`	Preprocessing method used on the reference. Can be `"log1p"` (normalize_total + log1p) or `"normalized"` (just normalize_total). By default, consider that no processing was applied (raw counts)	`None`
`bag_size`	`int`	Size of each bag on which tangram will be run. Use smaller bags to lower the RAM usage	`10000`
`max_obs_reference`	`int`	Maximum number of cells used in `adata_sc` at each level. Decrease it to lower the RAM usage.	`10000`
`density_prior`	`str`	Density prior used in Tangram. Can be `"uniform"` or `"rna_count_based"`.	`'uniform'`

Source code in sopa/utils/annotation.py

def tangram_annotate(
    sdata: SpatialData,
    adata_sc: AnnData,
    cell_type_key: str,
    reference_preprocessing: str | None = None,
    bag_size: int = 10_000,
    max_obs_reference: int = 10_000,
    density_prior: str = "uniform",
    **kwargs,
):
    """Tangram multi-level annotation. Tangram is run on multiple bags of cells to decrease the RAM usage.

    !!! info
        You need to install `tangram-sc` to use this function. You can install it via `pip install tangram-sc`.

    Args:
        sdata: A `SpatialData` object
        adata_sc: A scRNAseq annotated reference
        cell_type_key: Key of `adata_sc.obs` containing the cell types. For multi-level annotation, provide other levels like such: if `cell_type_key = "ct"`, then `"ct_level1"` and `"ct_level2"` are the two next levels
        reference_preprocessing: Preprocessing method used on the reference. Can be `"log1p"` (normalize_total + log1p) or `"normalized"` (just normalize_total). By default, consider that no processing was applied (raw counts)
        bag_size: Size of each bag on which tangram will be run. Use smaller bags to lower the RAM usage
        max_obs_reference: Maximum number of cells used in `adata_sc` at each level. Decrease it to lower the RAM usage.
        density_prior: Density prior used in Tangram. Can be `"uniform"` or `"rna_count_based"`.
    """
    assert SopaKeys.TABLE in sdata.tables, f"No '{SopaKeys.TABLE}' found in sdata.tables"

    ad_sp = sdata.tables[SopaKeys.TABLE]

    MultiLevelAnnotation(
        ad_sp,
        adata_sc,
        cell_type_key,
        reference_preprocessing,
        bag_size,
        max_obs_reference,
        density_prior,
        **kwargs,
    ).run()

`sopa.utils.higher_z_score(adata, marker_cell_dict, cell_type_key='cell_type')`

Simple channel-based segmentation using a marker-to-population dictionary

Parameters:

Name	Type	Description	Default
`adata`	`AnnData`	An `AnnData` object	required
`marker_cell_dict`	`dict`	Dictionary whose keys are channels, and values are the corresponding populations.	required
`cell_type_key`	`str`	Key of `adata.obs` where annotations will be stored	`'cell_type'`

Source code in sopa/utils/annotation.py

def higher_z_score(adata: AnnData, marker_cell_dict: dict, cell_type_key: str = "cell_type"):
    """Simple channel-based segmentation using a marker-to-population dictionary

    Args:
        adata: An `AnnData` object
        marker_cell_dict: Dictionary whose keys are channels, and values are the corresponding populations.
        cell_type_key: Key of `adata.obs` where annotations will be stored
    """
    adata.obsm[SopaKeys.Z_SCORES] = preprocess_fluo(adata)

    markers, cell_types = list(marker_cell_dict.keys()), np.array(list(marker_cell_dict.values()))
    ct_indices = adata.obsm[SopaKeys.Z_SCORES][markers].values.argmax(1)

    adata.obs[cell_type_key] = cell_types[ct_indices]
    adata.uns[SopaKeys.UNS_KEY][SopaKeys.UNS_CELL_TYPES] = [cell_type_key]

    log.info(f"Annotation counts: {adata.obs[cell_type_key].value_counts()}")

`sopa.utils.preprocess_fluo(adata)`

Preprocess fluorescence data. For each column \(X\), we compute \(asinh(\frac{X}{5Q(0.2, X)})\) and apply standardization

Parameters:

Name	Type	Description	Default
`adata`	`AnnData`	An `AnnData` object	required

Returns:

Type	Description
`DataFrame`	A dataframe of preprocessed channels intensities

Source code in sopa/utils/annotation.py

def preprocess_fluo(adata: AnnData) -> pd.DataFrame:
    """Preprocess fluorescence data. For each column $X$, we compute $asinh(\\frac{X}{5Q(0.2, X)})$ and apply standardization

    Args:
        adata: An `AnnData` object

    Returns:
        A dataframe of preprocessed channels intensities
    """
    df = adata.obsm[SopaKeys.INTENSITIES_OBSM] if SopaKeys.INTENSITIES_OBSM in adata.obsm else adata.to_df()

    divider = 5 * np.quantile(df, 0.2, axis=0)
    divider[divider == 0] = df.max(axis=0)[divider == 0]

    scaled = np.arcsinh(df / divider)
    return (scaled - scaled.mean(0)) / scaled.std(0)