Spatial operations

`sopa.spatial.sjoin(sdata, left_element, right_element, how='left', target_coordinate_system=None, **kwargs)`

Spatial join of two shapes GeoDataFrames, as in geopandas.sjoin.

Shapes are automatically aligned on the same coordinate system (which can be chosen using the target_coordinate_system argument).

Parameters:

Name	Type	Description	Default
`sdata`	`SpatialData`	A `SpatialData` object	required
`left_element`	`str \| GeoDataFrame`	The name of a GeoDataFrame in `sdata`, or the GeoDataFrame itself	required
`right_element`	`str \| GeoDataFrame`	The name of a GeoDataFrame in `sdata`, or the GeoDataFrame itself	required
`how`	`str`	The GeoPandas type of join. By default, left geometries are retained.	`'left'`
`target_coordinate_system`	`str \| None`	The name of the coordinate system on which the shapes will be transformed. By default, uses the intrinsic coordinate system of the `left_element`.	`None`
`**kwargs`	`int`	Kwargs provided to the geopandas.sjoin function	`{}`

Returns:

Type	Description
`GeoDataFrame`	The joined `GeoDataFrame`

Source code in sopa/spatial/join.py

def sjoin(
    sdata: SpatialData,
    left_element: str | gpd.GeoDataFrame,
    right_element: str | gpd.GeoDataFrame,
    how: str = "left",
    target_coordinate_system: str | None = None,
    **kwargs: int,
) -> gpd.GeoDataFrame:
    """Spatial join of two `shapes` GeoDataFrames, as in [geopandas.sjoin](https://geopandas.org/en/stable/docs/reference/api/geopandas.sjoin.html).

    Shapes are automatically aligned on the same coordinate system (which can be chosen using the `target_coordinate_system` argument).

    Args:
        sdata: A `SpatialData` object
        left_element: The name of a GeoDataFrame in `sdata`, or the GeoDataFrame itself
        right_element: The name of a GeoDataFrame in `sdata`, or the GeoDataFrame itself
        how: The GeoPandas type of join. By default, left geometries are retained.
        target_coordinate_system: The name of the coordinate system on which the shapes will be transformed. By default, uses the intrinsic coordinate system of the `left_element`.
        **kwargs: Kwargs provided to the [geopandas.sjoin](https://geopandas.org/en/stable/docs/reference/api/geopandas.sjoin.html) function

    Returns:
        The joined `GeoDataFrame`
    """
    if isinstance(left_element, str):
        left_element = sdata[left_element]
    if isinstance(right_element, str):
        right_element = sdata[right_element]

    if target_coordinate_system is None:
        right_element = to_intrinsic(sdata, right_element, left_element)
    else:
        left_element = sdata.transform_element_to_coordinate_system(left_element, target_coordinate_system)
        right_element = sdata.transform_element_to_coordinate_system(right_element, target_coordinate_system)

    return gpd.sjoin(left_element, right_element, how=how, **kwargs)

`sopa.spatial.assign_transcript_to_cell(sdata, points_key=None, shapes_key=None, key_added='cell_index', unassigned_value=None)`

Assign each transcript to a cell based on the cell boundaries. It updates the transcript dataframe by adding a new column.

Parameters:

Name	Type	Description	Default
`sdata`	`SpatialData`	A `SpatialData` object	required
`points_key`	`str \| None`	Key of the spatialdata object containing the transcript dataframe.	`None`
`shapes_key`	`str \| None`	Key of the spatialdata object containing the cell boundaries.	`None`
`key_added`	`str`	Key that will be added to the transcript dataframe containing the cell ID	`'cell_index'`
`unassigned_value`	`int \| None`	If `None`, transcripts that are not inside any cell will be assigned to NaN. If an integer, this value will be used as the unassigned value.	`None`

Source code in sopa/spatial/join.py

def assign_transcript_to_cell(
    sdata: SpatialData,
    points_key: str | None = None,
    shapes_key: str | None = None,
    key_added: str = "cell_index",
    unassigned_value: int | None = None,
):
    """Assign each transcript to a cell based on the cell boundaries. It updates the transcript dataframe by adding a new column.

    Args:
        sdata: A `SpatialData` object
        points_key: Key of the spatialdata object containing the transcript dataframe.
        shapes_key: Key of the spatialdata object containing the cell boundaries.
        key_added: Key that will be added to the transcript dataframe containing the cell ID
        unassigned_value: If `None`, transcripts that are not inside any cell will be assigned to NaN. If an integer, this value will be used as the unassigned value.
    """
    df = get_spatial_element(sdata.points, points_key or sdata.attrs.get(SopaAttrs.TRANSCRIPTS))
    geo_df = get_boundaries(sdata, key=shapes_key)

    geo_df = to_intrinsic(sdata, geo_df, df)
    geo_df = geo_df.reset_index()

    get_cell_id = partial(_get_cell_id, geo_df, unassigned_value=unassigned_value)

    if isinstance(df, dd.DataFrame):
        df[key_added] = df.map_partitions(get_cell_id)
    else:
        raise TypeError(f"Invalid dataframe type: {type(df)}")

`sopa.spatial.mean_distance(adata, group_key, target_group_key=None, ignore_zeros=False, correction=False, relative_change=False)`

Mean distance between two groups (typically, between cell-types, or between cell-types and domains).

Warning

When computing distances for multiple slides, differences in cell-type proportions can introduce bias. For example, if group G is more abundant in one slide, the average distance from other groups to G will naturally appear smaller in that slide, simply due to higher local density. If you want to perform fair comparisons across samples, consider using the correction argument to adjust for this bias.

Note

The distance is a number of hops, i.e. a distance of 10 between a pDC and a T cell means that there are 10 cells on the closest path from one to the other cell.

Parameters:

Name	Type	Description	Default
`adata`	`AnnData`	An `AnnData` object, or a `SpatialData object`	required
`group_key`	`str`	Key of `adata.obs` containing the groups	required
`target_group_key`	`str \| None`	Key of `adata.obs` containing the target groups (by default, uses `group_key`)	`None`
`ignore_zeros`	`bool`	If `True`, a cell distance to its own group is not 0, but the distance to the closest other cell of the same group.	`False`
`correction`	`bool`	If `True`, the computed distance is corrected by subtracting the expected distance under a null model where target labels are randomly distributed on a grid. This correction accounts for uneven label representation, which could otherwise deflate distances to more abundant target cell types.	`False`
`relative_change`	`bool`	Only used if `correction` is `True`. If `True`, the correction is applied as a relative change, i.e. the distance difference is also divided by the expected distance under the null model defined previously.	`False`

Returns:

Type	Description
`DataFrame`	`DataFrame` of shape `n_groups * n_groups_target` of mean hop-distances

Source code in sopa/spatial/distance.py

def mean_distance(
    adata: AnnData,
    group_key: str,
    target_group_key: str | None = None,
    ignore_zeros: bool = False,
    correction: bool = False,
    relative_change: bool = False,
) -> pd.DataFrame:
    """Mean distance between two groups (typically, between cell-types, or between cell-types and domains).

    !!! warning
        When computing distances for multiple slides, differences in cell-type proportions can introduce bias. For example, if group G is more abundant in one slide, the average distance from other groups to G will naturally appear smaller in that slide, simply due to higher local density. If you want to perform fair comparisons across samples, consider using the `correction` argument to adjust for this bias.

    Note:
        The distance is a number of hops, i.e. a distance of 10 between a pDC and a T cell means that there are 10 cells on the closest path from one to the other cell.

    Args:
        adata: An `AnnData` object, or a `SpatialData object`
        group_key: Key of `adata.obs` containing the groups
        target_group_key: Key of `adata.obs` containing the target groups (by default, uses `group_key`)
        ignore_zeros: If `True`, a cell distance to its own group is not 0, but the distance to the closest other cell of the same group.
        correction: If `True`, the computed distance is corrected by subtracting the expected distance under a null model where target labels are randomly distributed on a grid. This correction accounts for uneven label representation, which could otherwise deflate distances to more abundant target cell types.
        relative_change: Only used if `correction` is `True`. If `True`, the correction is applied as a relative change, i.e. the distance difference is also divided by the expected distance under the null model defined previously.

    Returns:
        `DataFrame` of shape `n_groups * n_groups_target` of mean hop-distances
    """
    if isinstance(adata, SpatialData):
        adata = adata.tables[SopaKeys.TABLE]

    target_group_key = group_key if target_group_key is None else target_group_key

    df_distances = cells_to_groups(adata, target_group_key, None, ignore_zeros=ignore_zeros)

    if ignore_zeros:
        df_distances.replace(0, np.nan, inplace=True)

    df_distances[group_key] = adata.obs[group_key]
    df_distances = df_distances.groupby(group_key, observed=False).mean()
    df_distances.columns.name = target_group_key

    if correction:
        log.info("Correcting the distances is still experimental, don't hesitate to report issues or feedback")
        proportions = adata.obs[target_group_key].value_counts(normalize=True)
        likelihood = proportions.map(lambda p: _random_distance_likelihood(p, ignore_zeros))

        df_distances -= likelihood
        if relative_change:
            df_distances /= likelihood

        if (not ignore_zeros) and (group_key == target_group_key):
            # distance to cell to same group is 0, we don't correct it
            df_distances.values[np.diag_indices_from(df_distances)] = 0

    return df_distances

`sopa.spatial.vectorize_niches(adata, niche_key, buffer='auto', perc_area_th=0.05)`

Converts the niches to shapely polygons, and put into a GeoDataFrame. Note that each niche can appear multiple times, as they can be separated by other niches ; in this case, we call them different "components" of the same niche ID.

Plot components

You can show niches components with GeoPandas

gdf = vectorize_niches(adata, niche_key)
gdf.plot(column=niche_key)

Parameters:

Name	Type	Description	Default
`adata`	`AnnData \| SpatialData`	An `AnnData` object, or a `SpatialData object`	required
`niche_key`	`str`	Key of `adata.obs` containing the niches	required
`buffer`	`int \| str`	Expansion radius applied on components. By default, `3 * mean_distance_neighbors`	`'auto'`
`perc_area_th`	`float`	For each niche, components whose area is less than `perc_area_th * max_component_area` will be removed	`0.05`

Returns:

Type	Description
`GeoDataFrame`	A `GeoDataFrame` with geometries for each niche component. We also compute the area/perimeter/roundness of each component.

Source code in sopa/spatial/morpho.py

def vectorize_niches(
    adata: AnnData | SpatialData,
    niche_key: str,
    buffer: int | str = "auto",
    perc_area_th: float = 0.05,
) -> gpd.GeoDataFrame:
    """Converts the niches to shapely polygons, and put into a `GeoDataFrame`. Note that each niche can appear multiple times, as they can be separated by other niches ; in this case, we call them different "components" of the same niche ID.

    Plot components:
        You can show niches components with GeoPandas
        ```py
        gdf = vectorize_niches(adata, niche_key)
        gdf.plot(column=niche_key)
        ```

    Args:
        adata: An `AnnData` object, or a `SpatialData object`
        niche_key: Key of `adata.obs` containing the niches
        buffer: Expansion radius applied on components. By default, `3 * mean_distance_neighbors`
        perc_area_th: For each niche, components whose area is less than `perc_area_th * max_component_area` will be removed

    Returns:
        A `GeoDataFrame` with geometries for each niche component. We also compute the area/perimeter/roundness of each component.
    """
    if isinstance(adata, SpatialData):
        adata = adata.tables[SopaKeys.TABLE]

    _check_has_delaunay(adata)
    data = {"geometry": [], niche_key: []}

    delaunay = Delaunay(adata.obsm["spatial"])
    connectivities = adata.obsp["spatial_connectivities"]
    values = adata.obs[niche_key].values

    keep = (
        (connectivities[delaunay.simplices[:, 0], delaunay.simplices[:, 1]].A1 == 1)
        & (connectivities[delaunay.simplices[:, 0], delaunay.simplices[:, 2]].A1 == 1)
        & (connectivities[delaunay.simplices[:, 1], delaunay.simplices[:, 2]].A1 == 1)
        & (values[delaunay.simplices[:, 0]] == values[delaunay.simplices[:, 1]])
        & (values[delaunay.simplices[:, 0]] == values[delaunay.simplices[:, 2]])
        & (values[delaunay.simplices[:, 0]] == values[delaunay.simplices[:, 2]])
    )  # Keep simplices that are in the original Delaunay graph, and which are not in between different value categories

    neighbors = np.where(np.isin(delaunay.neighbors, np.where(~keep)[0]), -1, delaunay.neighbors)

    simplices_to_visit = set(np.where(keep)[0])

    while simplices_to_visit:
        component = Component(adata, delaunay, neighbors)
        component.visit(simplices_to_visit)

        data["geometry"].append(component.polygon)
        data[niche_key].append(values[component.first_vertex_index()])

    gdf = gpd.GeoDataFrame(data)

    if buffer is not None and buffer != 0:
        gdf = _clean_components(adata, gdf, niche_key, buffer)

    gdf[SopaKeys.GEOMETRY_LENGTH] = gdf.length
    gdf[SopaKeys.GEOMETRY_AREA] = gdf.area
    gdf[SopaKeys.GEOMETRY_ROUNDNESS] = 4 * np.pi * gdf[SopaKeys.GEOMETRY_AREA] / gdf[SopaKeys.GEOMETRY_LENGTH] ** 2

    # Remove minor components (compared to the largest component of its corresponding niche)
    gdf = gdf[gdf.area >= gdf[niche_key].map(gdf.groupby(niche_key).area.max() * perc_area_th)]

    return gdf

`sopa.spatial.niches_geometry_stats(adata, niche_key, aggregation='min', key_added_suffix='_distance_to_niche_', **vectorize_niches_kwargs)`

Computes statistics over niches geometries

Details

n_components: Number of connected component of a niche (a component is a group of neighbor cells with the same niche attribute)
length: Mean distance of the exterior/boundary of the components of a niche
area: Mean area of the components of a niche
roundness: Float value between 0 and 1. The higher the value, the closer to a circle. Computed via 4 * pi * area / length**2
mean_distance_to_niche_X: mean distance to the niche (between the two closest points of the niches)

Parameters:

Name	Type	Description	Default
`adata`	`AnnData \| SpatialData`	An `AnnData` object, or a `SpatialData object`	required
`niche_key`	`str`	Key of `adata.obs` containing the niches	required
`aggregation`	`str \| list[str]`	Aggregation mode. Either one string such as `"min"`, or a list such as `["mean", "min"]`.	`'min'`
`key_added_suffix`	`str`	Suffix added in the DataFrame columns. Defaults to "distance_to_niche".	`'_distance_to_niche_'`
`vectorize_niches_kwargs`	`str`	Kwargs to the `sopa.spatial.vectorize_niches` function	`{}`

Returns:

Type	Description
`GeoDataFrame`	A `DataFrame` of shape `n_niches * n_statistics`

Source code in sopa/spatial/morpho.py

def niches_geometry_stats(
    adata: AnnData | SpatialData,
    niche_key: str,
    aggregation: str | list[str] = "min",
    key_added_suffix: str = "_distance_to_niche_",
    **vectorize_niches_kwargs: str,
) -> gpd.GeoDataFrame:
    """Computes statistics over niches geometries

    Details:
        - `n_components`: Number of connected component of a niche (a component is a group of neighbor cells with the same niche attribute)
        - `length`: Mean distance of the exterior/boundary of the components of a niche
        - `area`: Mean area of the components of a niche
        - `roundness`: Float value between 0 and 1. The higher the value, the closer to a circle. Computed via `4 * pi * area / length**2`
        - `mean_distance_to_niche_X`: mean distance to the niche (between the two closest points of the niches)

    Args:
        adata: An `AnnData` object, or a `SpatialData object`
        niche_key: Key of `adata.obs` containing the niches
        aggregation: Aggregation mode. Either one string such as `"min"`, or a list such as `["mean", "min"]`.
        key_added_suffix: Suffix added in the DataFrame columns. Defaults to "_distance_to_niche_".
        vectorize_niches_kwargs: Kwargs to the `sopa.spatial.vectorize_niches` function

    Returns:
        A `DataFrame` of shape `n_niches * n_statistics`
    """
    if isinstance(adata, SpatialData):
        adata = adata.tables[SopaKeys.TABLE]

    gdf = vectorize_niches(adata, niche_key, **vectorize_niches_kwargs)
    value_counts = gdf[niche_key].value_counts()

    assert len(gdf), "No niche geometry found, stats can't be computed"

    log.info(f"Computing pairwise distances between {len(gdf)} components")
    pairwise_distances: pd.DataFrame = gdf.geometry.apply(lambda g: gdf.distance(g))
    pairwise_distances[niche_key] = gdf[niche_key]

    if isinstance(aggregation, str):
        aggregation = [aggregation]

    for aggr in aggregation:
        df = pairwise_distances.groupby(niche_key).aggregate(aggr).T
        df.columns = [f"{aggr}{key_added_suffix}{c}" for c in df.columns]
        gdf[df.columns] = df

    df_stats: pd.DataFrame = gdf.groupby(niche_key)[gdf.columns[2:]].mean()
    df_stats.insert(0, SopaKeys.GEOMETRY_COUNT, value_counts)

    return df_stats

`sopa.spatial.cells_to_groups(adata, group_key, key_added_prefix=None, ignore_zeros=False)`

Compute the hop-distance between each cell and a cell category/group.

Parameters:

Name	Type	Description	Default
`adata`	`AnnData`	An `AnnData` object, or a `SpatialData object`	required
`group_key`	`str`	Key of `adata.obs` containing the groups	required
`key_added_prefix`	`str \| None`	Prefix to the key added in `adata.obsm`. If `None`, will return the `DataFrame` instead of saving it.	`None`
`ignore_zeros`	`bool`	If `True`, a cell distance to its own group is not 0, but the distance to the closest other cell of the same group.	`False`

Returns:

Type	Description
`DataFrame \| None`	A `Dataframe` of shape `n_obs * n_groups`, or `None` if `key_added_prefix` was provided (in this case, the dataframe is saved in `"{key_added_prefix}{group_key}"`)

Source code in sopa/spatial/distance.py

def cells_to_groups(
    adata: AnnData,
    group_key: str,
    key_added_prefix: str | None = None,
    ignore_zeros: bool = False,
) -> pd.DataFrame | None:
    """Compute the hop-distance between each cell and a cell category/group.

    Args:
        adata: An `AnnData` object, or a `SpatialData object`
        group_key: Key of `adata.obs` containing the groups
        key_added_prefix: Prefix to the key added in `adata.obsm`. If `None`, will return the `DataFrame` instead of saving it.
        ignore_zeros: If `True`, a cell distance to its own group is not 0, but the distance to the closest other cell of the same group.

    Returns:
        A `Dataframe` of shape `n_obs * n_groups`, or `None` if `key_added_prefix` was provided (in this case, the dataframe is saved in `"{key_added_prefix}{group_key}"`)
    """
    if isinstance(adata, SpatialData):
        adata = adata.tables[SopaKeys.TABLE]

    _check_has_delaunay(adata)

    distances_to_groups = {}

    if adata.obs[group_key].dtype.name != "category":
        log.info(f"Converting adata.obs['{group_key}'] to category")
        adata.obs[group_key] = adata.obs[group_key].astype("category")

    for group_id in tqdm(adata.obs[group_key].cat.categories):
        group_nodes = np.where(adata.obs[group_key] == group_id)[0]

        distances = np.full(adata.n_obs, np.nan)

        if not ignore_zeros:
            distances[group_nodes] = 0
            visited = set(group_nodes)
        else:
            visited = set()

        queue = group_nodes
        current_distance = 0

        while len(queue):
            distances[queue] = current_distance

            neighbors = set(adata.obsp["spatial_connectivities"][queue].indices)
            queue = np.array(list(neighbors - visited))
            visited |= neighbors

            current_distance += 1

        distances_to_groups[group_id] = distances

    df_distances = pd.DataFrame(distances_to_groups, index=adata.obs_names)

    if key_added_prefix is None:
        return df_distances
    adata.obsm[f"{key_added_prefix}{group_key}"] = df_distances

`sopa.spatial.spatial_neighbors(adata, radius, library_key=None, percentile=None, set_diag=False)`

Create a Delaunay graph from spatial coordinates. This function comes from squidpy.

Parameters:

Name	Type	Description	Default
`adata`	`AnnData \| SpatialData`	AnnData object	required
`radius`	`tuple[float, float] \| None`	tuple that prunes the final graph to only contain edges in interval `[min(radius), max(radius)]`. If `None`, all edges are kept.	required
`library_key`	`str \| None`	Optional batch key in adata.obs	`None`
`percentile`	`float \| None`	Percentile of the distances to use as threshold.	`None`
`set_diag`	`bool`	Whether to set the diagonal of the spatial connectivities to `1.0`.	`False`

Source code in sopa/spatial/_build.py

def spatial_neighbors(
    adata: AnnData | SpatialData,
    radius: tuple[float, float] | None,
    library_key: str | None = None,
    percentile: float | None = None,
    set_diag: bool = False,
):
    """Create a Delaunay graph from spatial coordinates. This function comes from [squidpy](https://squidpy.readthedocs.io/en/latest/api/squidpy.gr.spatial_neighbors.html#squidpy.gr.spatial_neighbors).

    Args:
        adata: AnnData object
        radius: tuple that prunes the final graph to only contain edges in interval `[min(radius), max(radius)]`. If `None`, all edges are kept.
        library_key: Optional batch key in adata.obs
        percentile: Percentile of the distances to use as threshold.
        set_diag: Whether to set the diagonal of the spatial connectivities to `1.0`.
    """
    if isinstance(adata, SpatialData):
        adata = adata.tables[SopaKeys.TABLE]

    assert radius is None or len(radius) == 2, "Radius is expected to be a tuple (min_radius, max_radius)"

    log.info("Computing delaunay graph")

    if library_key is not None:
        assert adata.obs[library_key].dtype == "category"
        libs = adata.obs[library_key].cat.categories
        make_index_unique(adata.obs_names)
    else:
        libs = [None]

    _build_fun = partial(
        _spatial_neighbor,
        set_diag=set_diag,
        radius=radius,
        percentile=percentile,
    )

    if library_key is not None:
        mats: list[tuple[spmatrix, spmatrix]] = []
        ixs = []  # type: ignore[var-annotated]
        for lib in libs:
            ixs.extend(np.where(adata.obs[library_key] == lib)[0])
            mats.append(_build_fun(adata[adata.obs[library_key] == lib]))
        ixs = np.argsort(ixs)  # type: ignore[assignment] # invert
        Adj = block_diag([m[0] for m in mats], format="csr")[ixs, :][:, ixs]
        Dst = block_diag([m[1] for m in mats], format="csr")[ixs, :][:, ixs]
    else:
        Adj, Dst = _build_fun(adata)

    adata.obsp["spatial_connectivities"] = Adj
    adata.obsp["spatial_distances"] = Dst

`sopa.spatial.prepare_network(adata, cell_type_key, niche_key, clip_weight=3, node_colors=('#5c7dc4', '#f05541'), node_sizes=(1.3, 5))`

Create a dataframe representing weights between cell-types and/or niches. This can be later use to plot a cell-type/niche represention of a whole slide using the netgraph library.

Parameters:

Name	Type	Description	Default
`adata`	`AnnData`	An `AnnData` object	required
`cell_type_key`	`str`	Key of `adata.obs` containing the cell types	required
`niche_key`	`str`	Key of `adata.obs` containing the niches	required
`clip_weight`	`float`	Maximum weight	`3`
`node_colors`	`tuple[str]`	Tuple of (cell-type color, niche color)	`('#5c7dc4', '#f05541')`
`node_sizes`	`tuple[float \| int]`	Tuple of (cell-type size, niche size)	`(1.3, 5)`

Returns:

Type	Description
`tuple[DataFrame, dict, dict, dict]`	A DataFrame of weights between cell-types and/or niches, and three dict for netgraph display

Source code in sopa/spatial/distance.py

def prepare_network(
    adata: AnnData,
    cell_type_key: str,
    niche_key: str,
    clip_weight: float = 3,
    node_colors: tuple[str] = ("#5c7dc4", "#f05541"),
    node_sizes: tuple[float | int] = (1.3, 5),
) -> tuple[pd.DataFrame, dict, dict, dict]:
    """Create a dataframe representing weights between cell-types and/or niches.
    This can be later use to plot a cell-type/niche represention of a whole slide
    using the netgraph library.

    Args:
        adata: An `AnnData` object
        cell_type_key: Key of `adata.obs` containing the cell types
        niche_key: Key of `adata.obs` containing the niches
        clip_weight: Maximum weight
        node_colors: Tuple of (cell-type color, niche color)
        node_sizes: Tuple of (cell-type size, niche size)

    Returns:
        A DataFrame of weights between cell-types and/or niches, and three dict for netgraph display
    """
    node_color, node_size, node_shape = {}, {}, {}

    log.info("Computing all distances for the 4 pairs of categories")
    weights = mean_distance(adata, cell_type_key)
    top_right = mean_distance(adata, cell_type_key, niche_key)
    bottom_left = mean_distance(adata, niche_key, cell_type_key)
    bottom_right = mean_distance(adata, niche_key, niche_key)

    for pop in weights.index:
        node_color[pop] = node_colors[0]
        node_size[pop] = node_sizes[0]
        node_shape[pop] = "o"

    for niche in bottom_right.index:
        node_color[niche] = node_colors[1]
        node_size[niche] = node_sizes[1]
        node_shape[niche] = "h"

    # assemble dataframe per-block
    bottom_left[bottom_right.columns] = bottom_right
    weights[top_right.columns] = top_right
    weights = pd.concat([weights, bottom_left], axis=0).copy()

    # convert distances to symmetric weights
    weights = 1 / weights
    np.fill_diagonal(weights.values, 0)
    weights = weights.clip(0, clip_weight)
    weights = (weights.T + weights) / 2

    return weights, node_color, node_size, node_shape