New Upstream Release - python-anndata

Ready changes

Summary

Merged new upstream version: 0.9.1 (was: 0.8.0).

Diff

diff --git a/.azure-pipelines.yml b/.azure-pipelines.yml
index be36b9a..fc277d2 100644
--- a/.azure-pipelines.yml
+++ b/.azure-pipelines.yml
@@ -4,18 +4,22 @@ trigger:
 variables:
   PIP_CACHE_DIR: $(Pipeline.Workspace)/.pip
   RUN_COVERAGE: no
+  PRERELEASE_DEPENDENCIES: no
 
 jobs:
 - job: PyTest
   pool:
-    vmImage: 'ubuntu-18.04'
+    vmImage: 'ubuntu-22.04'
   strategy:
     matrix:
-      Python39:
-        python.version: '3.9'
+      Python310:
+        python.version: '3.10'
         RUN_COVERAGE: yes
-      Python37:
-        python.version: '3.7'
+      Python38:
+        python.version: '3.8'
+      PreRelease:
+        python.version: '3.10'
+        PRERELEASE_DEPENDENCIES: yes
   steps:
   - task: UsePythonVersion@0
     inputs:
@@ -36,6 +40,14 @@ jobs:
       pip install pytest-cov wheel
       pip install .[dev,test]
     displayName: 'Install dependencies'
+    condition: eq(variables['PRERELEASE_DEPENDENCIES'], 'no')
+
+  - script: |
+      python -m pip install --pre --upgrade pip
+      pip install --pre pytest-cov wheel
+      pip install --pre .[dev,test]
+    displayName: 'Install dependencies release candidates'
+    condition: eq(variables['PRERELEASE_DEPENDENCIES'], 'yes')
 
   - script: |
       pip list
@@ -70,13 +82,13 @@ jobs:
 
 - job: CheckBuild
   pool:
-    vmImage: 'ubuntu-18.04'
+    vmImage: 'ubuntu-22.04'
   steps:
 
   - task: UsePythonVersion@0
     inputs:
-      versionSpec: '3.9'
-    displayName: 'Use Python 3.9'
+      versionSpec: '3.10'
+    displayName: 'Use Python 3.10'
 
   - script: |
       python -m pip install --upgrade pip
diff --git a/.flake8 b/.flake8
deleted file mode 100644
index 56c251e..0000000
--- a/.flake8
+++ /dev/null
@@ -1,41 +0,0 @@
-# Can't yet be moved to the pyproject.toml due to https://github.com/PyCQA/flake8/issues/234
-[flake8]
-max-line-length = 88
-ignore = # module imported but unused -> required for Scanpys API
-         F401,
-         # line break before a binary operator -> black does not adhere to PEP8
-         W503,
-         # line break occured after a binary operator -> black does not adhere to PEP8
-         W504,
-         # line too long -> we accept long comment lines; black gets rid of long code lines
-         E501,
-         # whitespace before : -> black does not adhere to PEP8
-         E203,
-         # missing whitespace after ,', ';', or ':' -> black does not adhere to PEP8
-         E231,
-         # module level import not at top of file -> required to circumvent circular imports for Scanpys API
-         E402,
-         # continuation line over-indented for hanging indent -> black does not adhere to PEP8
-         E126,
-         # E266 too many leading '#' for block comment -> Scanpy allows them for comments into sections
-         E262,
-         # inline comment should start with '# ' -> Scanpy allows them for specific explanations
-         E266,
-         # Do not assign a lambda expression, use a def -> Scanpy allows lambda expression assignments,
-         E731,
-         # allow I, O, l as variable names -> I is the identity matrix, i, j, k, l is reasonable indexing notation
-         E741
- per-file-ignores =
-    # F811 Redefinition of unused name from line, does not play nice with pytest fixtures
-    tests/test*.py: F811
-    # F821 Undefined name, can't import AnnData or it'd be a circular import
-    anndata/compat/_overloaded_dict.py: F821
-    # E721 comparing types, but we specifically are checking that we aren't getting subtypes (views)
-    anndata/tests/test_readwrite.py: E721
-exclude =
-    .git,
-    __pycache__,
-    build,
-    docs/_build,
-    dist,
-
diff --git a/.github/actions/close-stale.yml b/.github/actions/close-stale.yml
new file mode 100644
index 0000000..ead958d
--- /dev/null
+++ b/.github/actions/close-stale.yml
@@ -0,0 +1,18 @@
+name: "Close stale issues"
+on:
+  schedule:
+    - cron: "0 2 * * *"
+  workflow_dispatch:
+
+jobs:
+  stale:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/stale@v5
+        with:
+          days-before-issue-stale: -1 # We don't want to mark issues as stale in this action
+          days-before-issue-close: 14
+          days-before-pr-close: -1 # don't close PRs
+          stale-issue-label: stale
+          any-of-labels: "needs info"
+          debug-only: true # enable dry-run, remove when we know from the logs it's working.
diff --git a/.github/actions/label-stale.yml b/.github/actions/label-stale.yml
new file mode 100644
index 0000000..0b2151d
--- /dev/null
+++ b/.github/actions/label-stale.yml
@@ -0,0 +1,21 @@
+name: "Label stale issues"
+on:
+  schedule:
+    - cron: "30 1 * * *"
+  workflow_dispatch:
+
+jobs:
+  stale:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/stale@v5
+        with:
+          days-before-issue-stale: 60
+          days-before-pr-stale: -1 # We don't want to mark PRs as stale
+          days-before-close: -1 # We don't want to close issues in this action
+          stale-issue-label: stale
+          exempt-issue-labels: pinned,enhancement
+          stale-issue-message: |
+            This issue has been automatically marked as stale because it has not had recent activity.
+            Please add a comment if you want to keep the issue open. Thank you for your contributions!
+          debug-only: true # enable dry-run, remove when we know from the logs it's working.
diff --git a/.github/workflows/close-stale.yml b/.github/workflows/close-stale.yml
new file mode 100644
index 0000000..ead958d
--- /dev/null
+++ b/.github/workflows/close-stale.yml
@@ -0,0 +1,18 @@
+name: "Close stale issues"
+on:
+  schedule:
+    - cron: "0 2 * * *"
+  workflow_dispatch:
+
+jobs:
+  stale:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/stale@v5
+        with:
+          days-before-issue-stale: -1 # We don't want to mark issues as stale in this action
+          days-before-issue-close: 14
+          days-before-pr-close: -1 # don't close PRs
+          stale-issue-label: stale
+          any-of-labels: "needs info"
+          debug-only: true # enable dry-run, remove when we know from the logs it's working.
diff --git a/.github/workflows/label-stale.yml b/.github/workflows/label-stale.yml
new file mode 100644
index 0000000..0b2151d
--- /dev/null
+++ b/.github/workflows/label-stale.yml
@@ -0,0 +1,21 @@
+name: "Label stale issues"
+on:
+  schedule:
+    - cron: "30 1 * * *"
+  workflow_dispatch:
+
+jobs:
+  stale:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/stale@v5
+        with:
+          days-before-issue-stale: 60
+          days-before-pr-stale: -1 # We don't want to mark PRs as stale
+          days-before-close: -1 # We don't want to close issues in this action
+          stale-issue-label: stale
+          exempt-issue-labels: pinned,enhancement
+          stale-issue-message: |
+            This issue has been automatically marked as stale because it has not had recent activity.
+            Please add a comment if you want to keep the issue open. Thank you for your contributions!
+          debug-only: true # enable dry-run, remove when we know from the logs it's working.
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
deleted file mode 100644
index 7233479..0000000
--- a/.github/workflows/pre-commit.yml
+++ /dev/null
@@ -1,14 +0,0 @@
-name: pre-commit
-
-on:
-  pull_request:
-  push:
-    branches: [master]
-
-jobs:
-  pre-commit:
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v2
-    - uses: actions/setup-python@v2
-    - uses: pre-commit/action@v2.0.0
diff --git a/.gitignore b/.gitignore
index a383679..4053f74 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,6 +16,10 @@ __pycache__/
 /data/
 /tmp.zarr/
 test.h5ad
+.coverage
+
+# jupyter
+.ipynb_checkpoints
 
 # docs
 /docs/generated/
@@ -23,4 +27,5 @@ test.h5ad
 
 # IDEs
 /.idea/
+/.vscode/
 
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..78ec4b3
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "docs/tutorials/notebooks"]
+	path = docs/tutorials/notebooks
+	url = https://github.com/scverse/anndata-tutorials
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index e59e241..730383f 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,14 +1,11 @@
 repos:
 -   repo: https://github.com/psf/black
-    rev: 21.8b0
+    rev: 23.1.0
     hooks:
     -   id: black
--   repo: https://github.com/PyCQA/flake8
-    rev: 3.9.2
-    hooks:
-    -   id: flake8
--   repo: https://github.com/pre-commit/mirrors-autopep8
-    rev: v1.5.7
-    hooks:
-    -   id: autopep8
-        args: ["-i"]
+- repo: https://github.com/charliermarsh/ruff-pre-commit
+  # Ruff version.
+  rev: 'v0.0.248'
+  hooks:
+    - id: ruff
+      args: ["--fix"]
diff --git a/.readthedocs.yml b/.readthedocs.yml
index 5d5cad2..f3231a5 100644
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@@ -1,12 +1,17 @@
 version: 2
 build:
-  image: latest
+  os: ubuntu-20.04
+  tools:
+    python: '3.10'
 sphinx:
   configuration: docs/conf.py
 python:
-  version: 3.7
   install:
   - method: pip
     path: .
     extra_requirements:
     - doc
+submodules:
+    include:
+        - "docs/tutorials/notebooks"
+    recursive: true
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..028bbe8
--- /dev/null
+++ b/README.md
@@ -0,0 +1,25 @@
+[![Build Status](https://dev.azure.com/scverse/anndata/_apis/build/status/scverse.anndata?branchName=main)](https://dev.azure.com/scverse/anndata/_build)
+[![Conda](https://img.shields.io/conda/vn/conda-forge/anndata.svg)](https://anaconda.org/conda-forge/anndata)
+[![Coverage](https://codecov.io/gh/scverse/anndata/branch/main/graph/badge.svg?token=IN1mJN1Wi8)](https://codecov.io/gh/scverse/anndata)
+[![Docs](https://readthedocs.com/projects/icb-anndata/badge/?version=latest)](https://anndata.readthedocs.io)
+[![PyPI](https://img.shields.io/pypi/v/anndata.svg)](https://pypi.org/project/anndata)
+[![PyPIDownloadsMonth](https://img.shields.io/pypi/dm/scanpy?logo=PyPI&color=blue)](https://pypi.org/project/anndata)
+[![PyPIDownloadsTotal](https://pepy.tech/badge/anndata)](https://pepy.tech/project/anndata)
+[![Stars](https://img.shields.io/github/stars/scverse/anndata?logo=GitHub&color=yellow)](https://github.com/scverse/anndata/stargazers)
+
+```{image} https://raw.githubusercontent.com/scverse/anndata/main/docs/_static/img/anndata_schema.svg
+:align: right
+:width: 350px
+```
+
+# anndata - Annotated data
+
+anndata is a Python package for handling annotated data matrices in memory and on disk, positioned between pandas and xarray. anndata offers a broad range of computationally efficient features including, among others, sparse data support, lazy operations, and a PyTorch interface.
+
+- Discuss development on [GitHub](https://github.com/scverse/anndata).
+- Read the [documentation](https://anndata.readthedocs.io).
+- Ask questions on the [scverse Discourse](https://discourse.scverse.org).
+- Install via `pip install anndata` or `conda install anndata -c conda-forge`.
+- Consider citing the [anndata paper](https://doi.org/10.1101/2021.12.16.473007).
+- See [Scanpy's documentation](https://scanpy.readthedocs.io/) for usage
+  related to single cell data. anndata was initially built for Scanpy.
diff --git a/README.rst b/README.rst
deleted file mode 100644
index e2812ee..0000000
--- a/README.rst
+++ /dev/null
@@ -1,31 +0,0 @@
-|Stars| |PyPI| |PyPIDownloadsTotal| |PyPIDownloadsMonth| |Conda| |Docs| |Build Status| |Coverage|
-
-.. |Stars| image:: https://img.shields.io/github/stars/theislab/anndata?logo=GitHub&color=yellow
-   :target: https://github.com/theislab/anndata/stargazers
-.. |PyPI| image:: https://img.shields.io/pypi/v/anndata.svg
-   :target: https://pypi.org/project/anndata
-.. |PyPIDownloadsTotal| image:: https://pepy.tech/badge/anndata
-   :target: https://pepy.tech/project/anndata
-.. |PyPIDownloadsMonth| image:: https://img.shields.io/pypi/dm/scanpy?logo=PyPI&color=blue
-   :target: https://pypi.org/project/anndata
-.. |Conda| image:: https://img.shields.io/conda/vn/conda-forge/anndata.svg
-   :target: https://anaconda.org/conda-forge/anndata
-.. |Docs| image:: https://readthedocs.com/projects/icb-anndata/badge/?version=latest
-   :target: https://anndata.readthedocs.io
-.. |Build Status| image:: https://dev.azure.com/theislab/anndata/_apis/build/status/theislab.anndata?branchName=master
-   :target: https://dev.azure.com/theislab/anndata/_build
-.. |Coverage| image:: https://codecov.io/gh/theislab/anndata/branch/master/graph/badge.svg?token=IN1mJN1Wi8
-   :target: https://codecov.io/gh/theislab/anndata
-
-
-anndata - Annotated data
-========================
-
-anndata is a Python package for handling annotated data matrices in memory and on disk, positioned between pandas and xarray. anndata offers a broad range of computationally efficient features including, among others, sparse data support, lazy operations, and a PyTorch interface.
-
-* Read the `documentation <https://anndata.readthedocs.io>`_.
-* Ask questions on the `scverse Discourse <https://discourse.scverse.org>`_.
-* Install via ``pip install anndata`` or ``conda install anndata -c conda-forge``.
-
-.. would be nice to have the schema also on GitHub, but it’s much too wide there, hence need to duplicate description
-.. GitHub doesn’t plan to resolve scaling images: https://github.com/github/markup/issues/295
diff --git a/anndata/__init__.py b/anndata/__init__.py
index ad7e4c6..ba7861d 100644
--- a/anndata/__init__.py
+++ b/anndata/__init__.py
@@ -18,7 +18,12 @@ if not within_flit():
         read_mtx,
         read_zarr,
     )
-    from ._warnings import OldFormatWarning, WriteWarning, ImplicitModificationWarning
+    from ._warnings import (
+        OldFormatWarning,
+        WriteWarning,
+        ImplicitModificationWarning,
+        ExperimentalFeatureWarning,
+    )
 
     # backwards compat / shortcut for default format
     from ._io import read_h5ad as read
diff --git a/anndata/_core/aligned_mapping.py b/anndata/_core/aligned_mapping.py
index 2c84307..b993257 100644
--- a/anndata/_core/aligned_mapping.py
+++ b/anndata/_core/aligned_mapping.py
@@ -1,18 +1,22 @@
 from abc import ABC, abstractmethod
 from collections import abc as cabc
+from copy import copy
 from typing import Union, Optional, Type, ClassVar, TypeVar  # Special types
 from typing import Iterator, Mapping, Sequence  # ABCs
 from typing import Tuple, List, Dict  # Generic base types
+import warnings
 
 import numpy as np
 import pandas as pd
 from scipy.sparse import spmatrix
 
-from ..utils import deprecated, ensure_df_homogeneous
+from ..utils import deprecated, ensure_df_homogeneous, dim_len
 from . import raw, anndata
 from .views import as_view
 from .access import ElementRef
 from .index import _subset
+from anndata.compat import AwkArray
+from anndata._warnings import ExperimentalFeatureWarning
 
 
 OneDIdx = Union[Sequence[int], Sequence[bool], slice]
@@ -46,15 +50,37 @@ class AlignedMapping(cabc.MutableMapping, ABC):
 
     def _validate_value(self, val: V, key: str) -> V:
         """Raises an error if value is invalid"""
+        if isinstance(val, AwkArray):
+            warnings.warn(
+                "Support for Awkward Arrays is currently experimental. "
+                "Behavior may change in the future. Please report any issues you may encounter!",
+                ExperimentalFeatureWarning,
+                # stacklevel=3,
+            )
+            # Prevent from showing up every time an awkward array is used
+            # You'd think `once` works, but it doesn't at the repl and in notebooks
+            warnings.filterwarnings(
+                "ignore",
+                category=ExperimentalFeatureWarning,
+                message="Support for Awkward Arrays is currently experimental.*",
+            )
         for i, axis in enumerate(self.axes):
-            if self.parent.shape[axis] != val.shape[i]:
+            if self.parent.shape[axis] != dim_len(val, i):
                 right_shape = tuple(self.parent.shape[a] for a in self.axes)
-                raise ValueError(
-                    f"Value passed for key {key!r} is of incorrect shape. "
-                    f"Values of {self.attrname} must match dimensions "
-                    f"{self.axes} of parent. Value had shape {val.shape} while "
-                    f"it should have had {right_shape}."
-                )
+                actual_shape = tuple(dim_len(val, a) for a, _ in enumerate(self.axes))
+                if actual_shape[i] is None and isinstance(val, AwkArray):
+                    raise ValueError(
+                        f"The AwkwardArray is of variable length in dimension {i}.",
+                        f"Try ak.to_regular(array, {i}) before including the array in AnnData",
+                    )
+                else:
+                    raise ValueError(
+                        f"Value passed for key {key!r} is of incorrect shape. "
+                        f"Values of {self.attrname} must match dimensions "
+                        f"{self.axes} of parent. Value had shape {actual_shape} while "
+                        f"it should have had {right_shape}."
+                    )
+
         if not self._allow_df and isinstance(val, pd.DataFrame):
             name = self.attrname.title().rstrip("s")
             val = ensure_df_homogeneous(val, f"{name} {key!r}")
@@ -84,7 +110,11 @@ class AlignedMapping(cabc.MutableMapping, ABC):
     def copy(self):
         d = self._actual_class(self.parent, self._axis)
         for k, v in self.items():
-            d[k] = v.copy()
+            if isinstance(v, AwkArray):
+                # Shallow copy since awkward array buffers are immutable
+                d[k] = copy(v)
+            else:
+                d[k] = v.copy()
         return d
 
     def _view(self, parent: "anndata.AnnData", subset_idx: I):
diff --git a/anndata/_core/anndata.py b/anndata/_core/anndata.py
index e48873b..38680b9 100644
--- a/anndata/_core/anndata.py
+++ b/anndata/_core/anndata.py
@@ -10,7 +10,7 @@ from functools import partial, singledispatch
 from pathlib import Path
 from os import PathLike
 from textwrap import dedent
-from typing import Any, Union, Optional  # Meta
+from typing import Any, Union, Optional, Literal  # Meta
 from typing import Iterable, Sequence, Mapping, MutableMapping  # Generic ABCs
 from typing import Tuple, List  # Generic
 
@@ -45,17 +45,13 @@ from .views import (
 )
 from .sparse_dataset import SparseDataset
 from .. import utils
-from ..utils import convert_to_dict, ensure_df_homogeneous
+from ..utils import convert_to_dict, ensure_df_homogeneous, dim_len
 from ..logging import anndata_logger as logger
 from ..compat import (
     ZarrArray,
     ZappyArray,
     DaskArray,
-    Literal,
-    _slice_uns_sparse_matrices,
     _move_adj_mtx,
-    _overloaded_uns,
-    OverloadedDict,
 )
 
 
@@ -103,7 +99,7 @@ def _check_2d_shape(X):
 @singledispatch
 def _gen_dataframe(anno, length, index_names):
     if anno is None or len(anno) == 0:
-        return pd.DataFrame(index=pd.RangeIndex(0, length, name=None).astype(str))
+        return pd.DataFrame({}, index=pd.RangeIndex(0, length, name=None).astype(str))
     for index_name in index_names:
         if index_name in anno:
             return pd.DataFrame(
@@ -167,8 +163,6 @@ class AnnData(metaclass=utils.DeprecationMixinMeta):
         If passing a :class:`~numpy.ndarray`, it needs to have a structured datatype.
     layers
         Key-indexed multi-dimensional arrays aligned to dimensions of `X`.
-    dtype
-        Data type used for storage.
     shape
         Shape tuple (#observations, #variables). Can only be provided if `X` is `None`.
     filename
@@ -272,7 +266,7 @@ class AnnData(metaclass=utils.DeprecationMixinMeta):
         varm: Optional[Union[np.ndarray, Mapping[str, Sequence[Any]]]] = None,
         layers: Optional[Mapping[str, Union[np.ndarray, sparse.spmatrix]]] = None,
         raw: Optional[Mapping[str, Any]] = None,
-        dtype: Optional[Union[np.dtype, str]] = None,
+        dtype: Optional[Union[np.dtype, type, str]] = None,
         shape: Optional[Tuple[int, int]] = None,
         filename: Optional[PathLike] = None,
         filemode: Optional[Literal["r", "r+"]] = None,
@@ -340,17 +334,14 @@ class AnnData(metaclass=utils.DeprecationMixinMeta):
         self._layers = adata_ref.layers._view(self, (oidx, vidx))
         self._obsp = adata_ref.obsp._view(self, oidx)
         self._varp = adata_ref.varp._view(self, vidx)
-        # Speical case for old neighbors, backwards compat. Remove in anndata 0.8.
-        uns_new = _slice_uns_sparse_matrices(
-            copy(adata_ref._uns), self._oidx, adata_ref.n_obs
-        )
         # fix categories
-        self._remove_unused_categories(adata_ref.obs, obs_sub, uns_new)
-        self._remove_unused_categories(adata_ref.var, var_sub, uns_new)
+        uns = copy(adata_ref._uns)
+        self._remove_unused_categories(adata_ref.obs, obs_sub, uns)
+        self._remove_unused_categories(adata_ref.var, var_sub, uns)
         # set attributes
         self._obs = DataFrameView(obs_sub, view_args=(self, "obs"))
         self._var = DataFrameView(var_sub, view_args=(self, "var"))
-        self._uns = uns_new
+        self._uns = uns
         self._n_obs = len(self.obs)
         self._n_vars = len(self.var)
 
@@ -452,27 +443,20 @@ class AnnData(metaclass=utils.DeprecationMixinMeta):
                 raise ValueError("`shape` needs to be `None` if `X` is not `None`.")
             _check_2d_shape(X)
             # if type doesn’t match, a copy is made, otherwise, use a view
-            if dtype is None and X.dtype != np.float32:
+            if dtype is not None:
                 warnings.warn(
-                    f"X.dtype being converted to np.float32 from {X.dtype}. In the next "
-                    "version of anndata (0.9) conversion will not be automatic. Pass "
-                    "dtype explicitly to avoid this warning. Pass "
-                    "`AnnData(X, dtype=X.dtype, ...)` to get the future behavour.",
-                    FutureWarning,
-                    stacklevel=3,
+                    "The dtype argument will be deprecated in anndata 0.10.0",
+                    PendingDeprecationWarning,
                 )
-                dtype = np.float32
-            elif dtype is None:
-                dtype = np.float32
-            if issparse(X) or isinstance(X, ma.MaskedArray):
-                # TODO: maybe use view on data attribute of sparse matrix
-                #       as in readwrite.read_10x_h5
-                if X.dtype != np.dtype(dtype):
+                if issparse(X) or isinstance(X, ma.MaskedArray):
+                    # TODO: maybe use view on data attribute of sparse matrix
+                    #       as in readwrite.read_10x_h5
+                    if X.dtype != np.dtype(dtype):
+                        X = X.astype(dtype)
+                elif isinstance(X, (ZarrArray, DaskArray)):
                     X = X.astype(dtype)
-            elif isinstance(X, ZarrArray):
-                X = X.astype(dtype)
-            else:  # is np.ndarray or a subclass, convert to true np.ndarray
-                X = np.array(X, dtype, copy=False)
+                else:  # is np.ndarray or a subclass, convert to true np.ndarray
+                    X = np.array(X, dtype, copy=False)
             # data matrix and shape
             self._X = X
             self._n_obs, self._n_vars = self._X.shape
@@ -774,7 +758,7 @@ class AnnData(metaclass=utils.DeprecationMixinMeta):
         else:
             if self.is_view:
                 self._init_as_actual(self.copy())
-            self._raw = Raw(value)
+            self._raw = Raw(self, X=value.X, var=value.var, varm=value.varm)
 
     @raw.deleter
     def raw(self):
@@ -824,7 +808,7 @@ class AnnData(metaclass=utils.DeprecationMixinMeta):
         # fmt: off
         if (
             not isinstance(value, pd.RangeIndex)
-            and not infer_dtype(value) in ("string", "bytes")
+            and infer_dtype(value) not in ("string", "bytes")
         ):
             sample = list(value[: min(len(value), 5)])
             warnings.warn(dedent(
@@ -860,7 +844,7 @@ class AnnData(metaclass=utils.DeprecationMixinMeta):
 
     @obs.deleter
     def obs(self):
-        self.obs = pd.DataFrame(index=self.obs_names)
+        self.obs = pd.DataFrame({}, index=self.obs_names)
 
     @property
     def obs_names(self) -> pd.Index:
@@ -883,7 +867,7 @@ class AnnData(metaclass=utils.DeprecationMixinMeta):
 
     @var.deleter
     def var(self):
-        self.var = pd.DataFrame(index=self.var_names)
+        self.var = pd.DataFrame({}, index=self.var_names)
 
     @property
     def var_names(self) -> pd.Index:
@@ -901,7 +885,6 @@ class AnnData(metaclass=utils.DeprecationMixinMeta):
         uns = self._uns
         if self.is_view:
             uns = DictView(uns, view_args=(self, "_uns"))
-        uns = _overloaded_uns(self, uns)
         return uns
 
     @uns.setter
@@ -910,7 +893,7 @@ class AnnData(metaclass=utils.DeprecationMixinMeta):
             raise ValueError(
                 "Only mutable mapping types (e.g. dict) are allowed for `.uns`."
             )
-        if isinstance(value, (OverloadedDict, DictView)):
+        if isinstance(value, DictView):
             value = value.copy()
         if self.is_view:
             self._init_as_actual(self.copy())
@@ -992,7 +975,7 @@ class AnnData(metaclass=utils.DeprecationMixinMeta):
     @property
     def varp(self) -> Union[PairwiseArrays, PairwiseArraysView]:
         """\
-        Pairwise annotation of observations,
+        Pairwise annotation of variables/features,
         a mutable mapping with array-like values.
 
         Stores for each key a two or higher-dimensional :class:`~numpy.ndarray`
@@ -1079,7 +1062,11 @@ class AnnData(metaclass=utils.DeprecationMixinMeta):
             else:
                 # change from memory to backing-mode
                 # write the content of self to disk
-                self.write(filename, force_dense=True)
+                if self.raw is not None:
+                    as_dense = ("X", "raw/X")
+                else:
+                    as_dense = ("X",)
+                self.write(filename, as_dense=as_dense)
             # open new file for accessing
             self.file.open(filename, "r+")
             # as the data is stored on disk, we can safely set self._X to None
@@ -1157,10 +1144,10 @@ class AnnData(metaclass=utils.DeprecationMixinMeta):
             raise ValueError("Only list-like `categories` is supported.")
         if key in self.obs:
             old_categories = self.obs[key].cat.categories.tolist()
-            self.obs[key].cat.rename_categories(categories, inplace=True)
+            self.obs[key] = self.obs[key].cat.rename_categories(categories)
         elif key in self.var:
             old_categories = self.var[key].cat.categories.tolist()
-            self.var[key].cat.rename_categories(categories, inplace=True)
+            self.var[key] = self.var[key].cat.rename_categories(categories)
         else:
             raise ValueError(f"{key} is neither in `.obs` nor in `.var`.")
         # this is not a good solution
@@ -1219,7 +1206,7 @@ class AnnData(metaclass=utils.DeprecationMixinMeta):
                 # TODO: We should only check if non-null values are unique, but
                 # this would break cases where string columns with nulls could
                 # be written as categorical, but not as string.
-                # Possible solution: https://github.com/theislab/anndata/issues/504
+                # Possible solution: https://github.com/scverse/anndata/issues/504
                 if len(c.categories) >= len(c):
                     continue
                 # Ideally this could be done inplace
@@ -1244,11 +1231,7 @@ class AnnData(metaclass=utils.DeprecationMixinMeta):
         Same as `adata = adata[:, index]`, but inplace.
         """
         adata_subset = self[:, index].copy()
-        if adata_subset._has_X():
-            dtype = adata_subset.X.dtype
-        else:
-            dtype = None
-        self._init_as_actual(adata_subset, dtype=dtype)
+        self._init_as_actual(adata_subset)
 
     def _inplace_subset_obs(self, index: Index1D):
         """\
@@ -1257,11 +1240,7 @@ class AnnData(metaclass=utils.DeprecationMixinMeta):
         Same as `adata = adata[index, :]`, but inplace.
         """
         adata_subset = self[index].copy()
-        if adata_subset._has_X():
-            dtype = adata_subset.X.dtype
-        else:
-            dtype = None
-        self._init_as_actual(adata_subset, dtype=dtype)
+        self._init_as_actual(adata_subset)
 
     # TODO: Update, possibly remove
     def __setitem__(
@@ -1313,7 +1292,6 @@ class AnnData(metaclass=utils.DeprecationMixinMeta):
             varp=self.obsp.copy(),
             filename=self.filename,
             layers={k: t_csr(v) for k, v in self.layers.items()},
-            dtype=self.X.dtype.name if X is not None else "float32",
         )
 
     T = property(transpose)
@@ -1467,10 +1445,8 @@ class AnnData(metaclass=utils.DeprecationMixinMeta):
                 new[key] = getattr(self, key).copy()
         if "X" in kwargs:
             new["X"] = kwargs["X"]
-            new["dtype"] = new["X"].dtype
         elif self._has_X():
             new["X"] = self.X.copy()
-            new["dtype"] = new["X"].dtype
         if "uns" in kwargs:
             new["uns"] = kwargs["uns"]
         else:
@@ -1481,8 +1457,13 @@ class AnnData(metaclass=utils.DeprecationMixinMeta):
             new["raw"] = self.raw.copy()
         return AnnData(**new)
 
-    def to_memory(self) -> "AnnData":
-        """Load backed AnnData object into memory.
+    def to_memory(self, copy=False) -> "AnnData":
+        """Return a new AnnData object with all backed arrays loaded into memory.
+
+        Params
+        ------
+            copy:
+                Whether the arrays that are already in-memory should be copied.
 
         Example
         -------
@@ -1493,19 +1474,33 @@ class AnnData(metaclass=utils.DeprecationMixinMeta):
             backed = anndata.read_h5ad("file.h5ad", backed="r")
             mem = backed[backed.obs["cluster"] == "a", :].to_memory()
         """
-        if not self.isbacked:
-            raise ValueError("Object is already in memory.")
-        else:
-            elems = {"X": to_memory(self.X)}
-            if self.raw is not None:
-                elems["raw"] = {
-                    "X": to_memory(self.raw.X),
-                    "var": self.raw.var,
-                    "varm": self.raw.varm,
-                }
-            adata = self._mutated_copy(**elems)
+        new = {}
+        for attr_name in [
+            "X",
+            "obs",
+            "var",
+            "obsm",
+            "varm",
+            "obsp",
+            "varp",
+            "layers",
+            "uns",
+        ]:
+            attr = getattr(self, attr_name, None)
+            if attr is not None:
+                new[attr_name] = to_memory(attr, copy)
+
+        if self.raw is not None:
+            new["raw"] = {
+                "X": to_memory(self.raw.X, copy),
+                "var": to_memory(self.raw.var, copy),
+                "varm": to_memory(self.raw.varm, copy),
+            }
+
+        if self.isbacked:
             self.file.close()
-        return adata
+
+        return AnnData(**new)
 
     def copy(self, filename: Optional[PathLike] = None) -> "AnnData":
         """Full copy, optionally on disk."""
@@ -1628,12 +1623,12 @@ class AnnData(metaclass=utils.DeprecationMixinMeta):
             obs: 'anno1', 'anno2', 'batch'
             var: 'annoA-0', 'annoA-1', 'annoA-2', 'annoB-2'
         >>> adata.X
-        array([[2., 3.],
-               [5., 6.],
-               [3., 2.],
-               [6., 5.],
-               [3., 2.],
-               [6., 5.]], dtype=float32)
+        array([[2, 3],
+               [5, 6],
+               [3, 2],
+               [6, 5],
+               [3, 2],
+               [6, 5]])
         >>> adata.obs
              anno1 anno2 batch
         s1-0    c1   NaN     0
@@ -1670,9 +1665,9 @@ class AnnData(metaclass=utils.DeprecationMixinMeta):
                [nan,  3.,  2.,  1.],
                [nan,  6.,  5.,  4.],
                [nan,  3.,  2.,  1.],
-               [nan,  6.,  5.,  4.]], dtype=float32)
+               [nan,  6.,  5.,  4.]])
         >>> outer.X.sum(axis=0)
-        array([nan, 25., 23., nan], dtype=float32)
+        array([nan, 25., 23., nan])
         >>> import pandas as pd
         >>> Xdf = pd.DataFrame(outer.X, columns=outer.var_names)
         >>> Xdf
@@ -1688,7 +1683,7 @@ class AnnData(metaclass=utils.DeprecationMixinMeta):
         b    25.0
         c    23.0
         d    10.0
-        dtype: float32
+        dtype: float64
 
         One way to deal with missing values is to use masked arrays:
 
@@ -1708,10 +1703,9 @@ class AnnData(metaclass=utils.DeprecationMixinMeta):
                 [ True, False, False, False],
                 [ True, False, False, False],
                 [ True, False, False, False]],
-          fill_value=1e+20,
-          dtype=float32)
+          fill_value=1e+20)
         >>> outer.X.sum(axis=0).data
-        array([ 5., 25., 23., 10.], dtype=float32)
+        array([ 5., 25., 23., 10.])
 
         The masked array is not saved but has to be reinstantiated after saving.
 
@@ -1724,7 +1718,7 @@ class AnnData(metaclass=utils.DeprecationMixinMeta):
                [nan,  3.,  2.,  1.],
                [nan,  6.,  5.,  4.],
                [nan,  3.,  2.,  1.],
-               [nan,  6.,  5.,  4.]], dtype=float32)
+               [nan,  6.,  5.,  4.]])
 
         For sparse data, everything behaves similarly,
         except that for `join='outer'`, zeros are added.
@@ -1758,8 +1752,16 @@ class AnnData(metaclass=utils.DeprecationMixinMeta):
         """
         from .merge import concat, merge_outer, merge_dataframes, merge_same
 
+        warnings.warn(
+            "The AnnData.concatenate method is deprecated in favour of the "
+            "anndata.concat function. Please use anndata.concat instead.\n\n"
+            "See the tutorial for concat at: "
+            "https://anndata.readthedocs.io/en/latest/concatenation.html",
+            FutureWarning,
+        )
+
         if self.isbacked:
-            raise ValueError("Currently, concatenate does only work in memory mode.")
+            raise ValueError("Currently, concatenate only works in memory mode.")
 
         if len(adatas) == 0:
             return self.copy()
@@ -1852,7 +1854,7 @@ class AnnData(metaclass=utils.DeprecationMixinMeta):
         if "obsm" in key:
             obsm = self._obsm
             if (
-                not all([o.shape[0] == self._n_obs for o in obsm.values()])
+                not all([dim_len(o, 0) == self._n_obs for o in obsm.values()])
                 and len(obsm.dim_names) != self._n_obs
             ):
                 raise ValueError(
@@ -1862,7 +1864,7 @@ class AnnData(metaclass=utils.DeprecationMixinMeta):
         if "varm" in key:
             varm = self._varm
             if (
-                not all([v.shape[0] == self._n_vars for v in varm.values()])
+                not all([dim_len(v, 0) == self._n_vars for v in varm.values()])
                 and len(varm.dim_names) != self._n_vars
             ):
                 raise ValueError(
@@ -1875,7 +1877,6 @@ class AnnData(metaclass=utils.DeprecationMixinMeta):
         filename: Optional[PathLike] = None,
         compression: Optional[Literal["gzip", "lzf"]] = None,
         compression_opts: Union[int, Any] = None,
-        force_dense: Optional[bool] = None,
         as_dense: Sequence[str] = (),
     ):
         """\
@@ -1898,15 +1899,47 @@ class AnnData(metaclass=utils.DeprecationMixinMeta):
         filename
             Filename of data file. Defaults to backing file.
         compression
-            See the h5py :ref:`dataset_compression`.
+            For [`lzf`, `gzip`], see the h5py :ref:`dataset_compression`.
+
+            Alternative compression filters such as `zstd` can be passed
+            from the :doc:`hdf5plugin <hdf5plugin:usage>` library.
+            Experimental.
+
+            Usage example::
+
+                import hdf5plugin
+                adata.write_h5ad(
+                    filename,
+                    compression=hdf5plugin.FILTERS["zstd"]
+                )
+
+            .. note::
+                Datasets written with hdf5plugin-provided compressors
+                cannot be opened without first loading the hdf5plugin
+                library using `import hdf5plugin`. When using alternative
+                compression filters such as `zstd`, consider writing to
+                `zarr` format instead of `h5ad`, as the `zarr` library
+                provides a more transparent compression pipeline.
+
         compression_opts
-            See the h5py :ref:`dataset_compression`.
+            For [`lzf`, `gzip`], see the h5py :ref:`dataset_compression`.
+
+            Alternative compression filters such as `zstd` can be configured
+            using helpers from the :doc:`hdf5plugin <hdf5plugin:usage>`
+            library. Experimental.
+
+            Usage example (setting `zstd` compression level to 5)::
+
+                import hdf5plugin
+                adata.write_h5ad(
+                    filename,
+                    compression=hdf5plugin.FILTERS["zstd"],
+                    compression_opts=hdf5plugin.Zstd(clevel=5).filter_options
+                )
+
         as_dense
             Sparse arrays in AnnData object to write as dense. Currently only
             supports `X` and `raw/X`.
-        force_dense
-            Write sparse data as a dense matrix.
-            Defaults to `True` if object is backed, otherwise to `False`.
         """
         from .._io.write import _write_h5ad
 
@@ -1920,7 +1953,6 @@ class AnnData(metaclass=utils.DeprecationMixinMeta):
             self,
             compression=compression,
             compression_opts=compression_opts,
-            force_dense=force_dense,
             as_dense=as_dense,
         )
 
diff --git a/anndata/_core/file_backing.py b/anndata/_core/file_backing.py
index 3e48bec..0240187 100644
--- a/anndata/_core/file_backing.py
+++ b/anndata/_core/file_backing.py
@@ -1,13 +1,14 @@
 from functools import singledispatch
 from os import PathLike
 from pathlib import Path
-from typing import Optional, Union, Iterator
+from typing import Optional, Union, Iterator, Literal
+from collections.abc import Mapping
 
 import h5py
 
 from . import anndata
 from .sparse_dataset import SparseDataset
-from ..compat import Literal, ZarrArray
+from ..compat import ZarrArray, DaskArray, AwkArray
 
 
 class AnnDataFileManager:
@@ -92,20 +93,43 @@ class AnnDataFileManager:
 
 
 @singledispatch
-def to_memory(x):
+def to_memory(x, copy=False):
     """Permissivley convert objects to in-memory representation.
 
     If they already are in-memory, (or are just unrecognized) pass a copy through.
     """
-    return x.copy()
+    if copy and hasattr(x, "copy"):
+        return x.copy()
+    else:
+        return x
 
 
 @to_memory.register(ZarrArray)
 @to_memory.register(h5py.Dataset)
-def _(x):
+def _(x, copy=False):
     return x[...]
 
 
 @to_memory.register(SparseDataset)
-def _(x: SparseDataset):
+def _(x: SparseDataset, copy=False):
     return x.to_memory()
+
+
+@to_memory.register(DaskArray)
+def _(x, copy=False):
+    return x.compute()
+
+
+@to_memory.register(Mapping)
+def _(x: Mapping, copy=False):
+    return {k: to_memory(v, copy=copy) for k, v in x.items()}
+
+
+@to_memory.register(AwkArray)
+def _(x, copy=False):
+    from copy import copy as _copy
+
+    if copy:
+        return _copy(x)
+    else:
+        return x
diff --git a/anndata/_core/index.py b/anndata/_core/index.py
index 8082d48..859c1bc 100644
--- a/anndata/_core/index.py
+++ b/anndata/_core/index.py
@@ -7,10 +7,7 @@ import h5py
 import numpy as np
 import pandas as pd
 from scipy.sparse import spmatrix, issparse
-
-
-Index1D = Union[slice, int, str, np.int64, np.ndarray]
-Index = Union[Index1D, Tuple[Index1D, Index1D], spmatrix]
+from ..compat import AwkArray, DaskArray, Index, Index1D
 
 
 def _normalize_indices(
@@ -127,6 +124,14 @@ def _subset(a: Union[np.ndarray, pd.DataFrame], subset_idx: Index):
     return a[subset_idx]
 
 
+@_subset.register(DaskArray)
+def _subset_dask(a: DaskArray, subset_idx: Index):
+    if all(isinstance(x, cabc.Iterable) for x in subset_idx):
+        subset_idx = np.ix_(*subset_idx)
+        return a.vindex[subset_idx]
+    return a[subset_idx]
+
+
 @_subset.register(spmatrix)
 def _subset_spmatrix(a: spmatrix, subset_idx: Index):
     # Correcting for indexing behaviour of sparse.spmatrix
@@ -140,6 +145,13 @@ def _subset_df(df: pd.DataFrame, subset_idx: Index):
     return df.iloc[subset_idx]
 
 
+@_subset.register(AwkArray)
+def _subset_awkarray(a: AwkArray, subset_idx: Index):
+    if all(isinstance(x, cabc.Iterable) for x in subset_idx):
+        subset_idx = np.ix_(*subset_idx)
+    return a[subset_idx]
+
+
 # Registration for SparseDataset occurs in sparse_dataset.py
 @_subset.register(h5py.Dataset)
 def _subset_dataset(d, subset_idx):
diff --git a/anndata/_core/merge.py b/anndata/_core/merge.py
index 9a03922..25a0628 100644
--- a/anndata/_core/merge.py
+++ b/anndata/_core/merge.py
@@ -6,18 +6,31 @@ from collections.abc import Mapping, MutableSet
 from functools import reduce, singledispatch
 from itertools import repeat
 from operator import and_, or_, sub
-from typing import Any, Callable, Collection, Iterable, Optional, Tuple, TypeVar, Union
+from typing import (
+    Any,
+    Callable,
+    Collection,
+    Iterable,
+    Optional,
+    Tuple,
+    TypeVar,
+    Union,
+    Literal,
+)
 import typing
-from warnings import warn
+from warnings import warn, filterwarnings
 
+from natsort import natsorted
 import numpy as np
 import pandas as pd
 from scipy import sparse
 from scipy.sparse import spmatrix
 
 from .anndata import AnnData
-from ..compat import Literal
-from ..utils import asarray
+from ..compat import AwkArray, DaskArray
+from ..utils import asarray, dim_len
+from .index import _subset, make_slice
+from anndata._warnings import ExperimentalFeatureWarning
 
 T = TypeVar("T")
 
@@ -97,6 +110,21 @@ def equal_dataframe(a, b) -> bool:
     return a.equals(b)
 
 
+@equal.register(DaskArray)
+def equal_dask_array(a, b) -> bool:
+    import dask.array as da
+    from dask.base import tokenize
+
+    if a is b:
+        return True
+    if a.shape != b.shape:
+        return False
+    if isinstance(b, DaskArray):
+        if tokenize(a) == tokenize(b):
+            return True
+    return da.equal(a, b, where=~(da.isnan(a) == da.isnan(b))).all()
+
+
 @equal.register(np.ndarray)
 def equal_array(a, b) -> bool:
     return equal(pd.DataFrame(a), pd.DataFrame(asarray(b)))
@@ -127,6 +155,13 @@ def equal_sparse(a, b) -> bool:
         return False
 
 
+@equal.register(AwkArray)
+def equal_awkward(a, b) -> bool:
+    from ..compat import awkward as ak
+
+    return ak.almost_equal(a, b)
+
+
 def as_sparse(x):
     if not isinstance(x, sparse.spmatrix):
         return sparse.csr_matrix(x)
@@ -134,6 +169,59 @@ def as_sparse(x):
         return x
 
 
+def unify_categorical_dtypes(dfs):
+    """
+    Attempts to unify categorical datatypes from multiple dataframes
+    """
+    # Get shared categorical columns
+    df_dtypes = [dict(df.dtypes) for df in dfs]
+    columns = reduce(lambda x, y: x.union(y), [df.columns for df in dfs])
+
+    dtypes = {col: list() for col in columns}
+    for col in columns:
+        for df in df_dtypes:
+            dtypes[col].append(df.get(col, None))
+
+    dtypes = {k: v for k, v in dtypes.items() if unifiable_dtype(v)}
+
+    if len(dtypes) == 0:
+        return dfs
+    else:
+        dfs = [df.copy(deep=False) for df in dfs]
+
+    new_dtypes = {}
+    for col in dtypes.keys():
+        categories = reduce(
+            lambda x, y: x.union(y),
+            [x.categories for x in dtypes[col] if not pd.isnull(x)],
+        )
+        new_dtypes[col] = pd.CategoricalDtype(natsorted(categories), ordered=False)
+
+    for df in dfs:
+        for col, dtype in new_dtypes.items():
+            if col in df:
+                df[col] = df[col].astype(dtype)
+
+    return dfs
+
+
+def unifiable_dtype(col: pd.Series) -> bool:
+    """
+    Check if dtypes are mergable categoricals.
+
+    Currently, this means they must be unordered categoricals.
+    """
+    dtypes = set()
+    ordered = False
+    for dtype in col:
+        if pd.api.types.is_categorical_dtype(dtype):
+            dtypes.add(dtype.categories.dtype)
+            ordered = ordered | dtype.ordered
+        elif not pd.isnull(dtype):
+            return False
+    return len(dtypes) == 1 and not ordered
+
+
 ###################
 # Per element logic
 ###################
@@ -247,7 +335,7 @@ def resolve_merge_strategy(
 #####################
 
 
-class Reindexer(object):
+class Reindexer:
     """
     Indexing to be applied to axis of 2d array orthogonal to the axis being concatenated.
 
@@ -267,7 +355,6 @@ class Reindexer(object):
     def __init__(self, old_idx, new_idx):
         self.old_idx = old_idx
         self.new_idx = new_idx
-
         self.no_change = new_idx.equals(old_idx)
 
         new_pos = new_idx.get_indexer(old_idx)
@@ -287,12 +374,16 @@ class Reindexer(object):
 
         Missing values are to be replaced with `fill_value`.
         """
-        if self.no_change and (el.shape[axis] == len(self.old_idx)):
+        if self.no_change and (dim_len(el, axis) == len(self.old_idx)):
             return el
         if isinstance(el, pd.DataFrame):
             return self._apply_to_df(el, axis=axis, fill_value=fill_value)
         elif isinstance(el, sparse.spmatrix):
             return self._apply_to_sparse(el, axis=axis, fill_value=fill_value)
+        elif isinstance(el, AwkArray):
+            return self._apply_to_awkward(el, axis=axis, fill_value=fill_value)
+        elif isinstance(el, DaskArray):
+            return self._apply_to_dask_array(el, axis=axis, fill_value=fill_value)
         else:
             return self._apply_to_array(el, axis=axis, fill_value=fill_value)
 
@@ -301,6 +392,23 @@ class Reindexer(object):
             fill_value = np.NaN
         return el.reindex(self.new_idx, axis=axis, fill_value=fill_value)
 
+    def _apply_to_dask_array(self, el: DaskArray, *, axis, fill_value=None):
+        import dask.array as da
+
+        if fill_value is None:
+            fill_value = default_fill_value([el])
+        shape = list(el.shape)
+        if el.shape[axis] == 0:
+            # Presumably faster since it won't allocate the full array
+            shape[axis] = len(self.new_idx)
+            return da.broadcast_to(fill_value, tuple(shape))
+
+        indexer = self.old_idx.get_indexer(self.new_idx)
+
+        sub_el = _subset(el, make_slice(indexer, axis, len(shape)))
+        sub_el[make_slice(indexer == -1, axis, len(shape))] = fill_value
+        return sub_el
+
     def _apply_to_array(self, el, *, axis, fill_value=None):
         if fill_value is None:
             fill_value = default_fill_value([el])
@@ -370,6 +478,22 @@ class Reindexer(object):
 
         return out
 
+    def _apply_to_awkward(self, el: AwkArray, *, axis, fill_value=None):
+        import awkward as ak
+
+        if self.no_change:
+            return el
+        elif axis == 1:  # Indexing by field
+            if self.new_idx.isin(self.old_idx).all():  # inner join
+                return el[self.new_idx]
+            else:  # outer join
+                # TODO: this code isn't actually hit, we should refactor
+                raise Exception("This should be unreachable, please open an issue.")
+        else:
+            if len(self.new_idx) > len(self.old_idx):
+                el = ak.pad_none(el, 1, axis=axis)  # axis == 0
+            return el[self.old_idx.get_indexer(self.new_idx)]
+
 
 def merge_indices(
     inds: Iterable[pd.Index], join: Literal["inner", "outer"]
@@ -409,7 +533,7 @@ def gen_reindexer(new_var: pd.Index, cur_var: pd.Index):
            [0., 1., 0.],
            [0., 0., 1.],
            [0., 1., 0.],
-           [1., 0., 0.]], dtype=float32)
+           [1., 0., 0.]])
     """
     return Reindexer(cur_var, new_var)
 
@@ -430,10 +554,23 @@ def concat_arrays(arrays, reindexers, axis=0, index=None, fill_value=None):
             )
         # TODO: behaviour here should be chosen through a merge strategy
         df = pd.concat(
-            [f(x) for f, x in zip(reindexers, arrays)], ignore_index=True, axis=axis
+            unify_categorical_dtypes([f(x) for f, x in zip(reindexers, arrays)]),
+            ignore_index=True,
+            axis=axis,
         )
         df.index = index
         return df
+    elif any(isinstance(a, AwkArray) for a in arrays):
+        from ..compat import awkward as ak
+
+        if not all(
+            isinstance(a, AwkArray) or a is MissingVal or 0 in a.shape for a in arrays
+        ):
+            raise NotImplementedError(
+                "Cannot concatenate an AwkwardArray with other array types."
+            )
+
+        return ak.concatenate([f(a) for f, a in zip(reindexers, arrays)], axis=axis)
     elif any(isinstance(a, sparse.spmatrix) for a in arrays):
         sparse_stack = (sparse.vstack, sparse.hstack)[axis]
         return sparse_stack(
@@ -479,6 +616,15 @@ def gen_inner_reindexers(els, new_index, axis: Literal[0, 1] = 0):
             lambda x, y: x.intersection(y), (df_indices(el) for el in els)
         )
         reindexers = [Reindexer(df_indices(el), common_ind) for el in els]
+    elif any(isinstance(el, AwkArray) for el in els if not_missing(el)):
+        if not all(isinstance(el, AwkArray) for el in els if not_missing(el)):
+            raise NotImplementedError(
+                "Cannot concatenate an AwkwardArray with other array types."
+            )
+        common_keys = intersect_keys(el.fields for el in els)
+        reindexers = [
+            Reindexer(pd.Index(el.fields), pd.Index(list(common_keys))) for el in els
+        ]
     else:
         min_ind = min(el.shape[alt_axis] for el in els)
         reindexers = [
@@ -493,13 +639,41 @@ def gen_outer_reindexers(els, shapes, new_index: pd.Index, *, axis=0):
         reindexers = [
             (lambda x: x)
             if not_missing(el)
-            else (lambda x: pd.DataFrame(index=range(shape)))
+            else (lambda _, shape=shape: pd.DataFrame(index=range(shape)))
             for el, shape in zip(els, shapes)
         ]
-    else:
-        # if fill_value is None:
-        # fill_value = default_fill_value(els)
+    elif any(isinstance(el, AwkArray) for el in els if not_missing(el)):
+        import awkward as ak
 
+        if not all(isinstance(el, AwkArray) for el in els if not_missing(el)):
+            raise NotImplementedError(
+                "Cannot concatenate an AwkwardArray with other array types."
+            )
+        warn(
+            "Outer joins on awkward.Arrays will have different return values in the future."
+            "For details, and to offer input, please see:\n\n\t"
+            "https://github.com/scverse/anndata/issues/898",
+            ExperimentalFeatureWarning,
+        )
+        filterwarnings(
+            "ignore",
+            category=ExperimentalFeatureWarning,
+            message=r"Outer joins on awkward.Arrays will have different return values.*",
+        )
+        # all_keys = union_keys(el.fields for el in els if not_missing(el))
+        reindexers = []
+        for el in els:
+            if not_missing(el):
+                reindexers.append(lambda x: x)
+            else:
+                reindexers.append(
+                    lambda x: ak.pad_none(
+                        ak.Array([]),
+                        len(x),
+                        0,
+                    )
+                )
+    else:
         max_col = max(el.shape[1] for el in els if not_missing(el))
         orig_cols = [el.shape[1] if not_missing(el) else 0 for el in els]
         reindexers = [
@@ -602,7 +776,7 @@ def dim_size(adata, *, axis=None, dim=None) -> int:
     return adata.shape[ax]
 
 
-# TODO: Resolve https://github.com/theislab/anndata/issues/678 and remove this function
+# TODO: Resolve https://github.com/scverse/anndata/issues/678 and remove this function
 def concat_Xs(adatas, reindexers, axis, fill_value):
     """
     Shimy until support for some missing X's is implemented.
@@ -618,7 +792,7 @@ def concat_Xs(adatas, reindexers, axis, fill_value):
     elif any(X is None for X in Xs):
         raise NotImplementedError(
             "Some (but not all) of the AnnData's to be concatenated had no .X value. "
-            "Concatenation is currently only implmented for cases where all or none of"
+            "Concatenation is currently only implemented for cases where all or none of"
             " the AnnData's have .X assigned."
         )
     else:
@@ -642,12 +816,6 @@ def concat(
 
     See the :doc:`concatenation <../concatenation>` section in the docs for a more in-depth description.
 
-    .. warning::
-
-        This function is marked as experimental for the `0.7` release series, and will
-        supercede the :meth:`AnnData.concatenate() <anndata.AnnData.concatenate>` method
-        in future releases.
-
     Params
     ------
     adatas
@@ -731,14 +899,14 @@ def concat(
 
     >>> ad.concat([a, b]).to_df()
         var1  var2
-    s1   0.0   1.0
-    s2   2.0   3.0
-    s3   4.0   5.0
-    s4   7.0   8.0
+    s1     0     1
+    s2     2     3
+    s3     4     5
+    s4     7     8
     >>> ad.concat([a, c], axis=1).to_df()
         var1  var2  var3  var4
-    s1   0.0   1.0  10.0  11.0
-    s2   2.0   3.0  12.0  13.0
+    s1     0     1    10    11
+    s2     2     3    12    13
 
     Inner and outer joins
 
@@ -757,10 +925,10 @@ def concat(
     Index(['var1', 'var2', 'var3'], dtype='object')
     >>> outer.to_df()  # Sparse arrays are padded with zeroes by default
         var1  var2  var3
-    s1   0.0   1.0   0.0
-    s2   2.0   3.0   0.0
-    s3   4.0   5.0   6.0
-    s4   7.0   8.0   9.0
+    s1     0     1     0
+    s2     2     3     0
+    s3     4     5     6
+    s4     7     8     9
 
     Keeping track of source objects
 
@@ -856,7 +1024,9 @@ def concat(
 
     # Annotation for concatenation axis
     concat_annot = pd.concat(
-        [getattr(a, dim) for a in adatas], join=join, ignore_index=True
+        unify_categorical_dtypes([getattr(a, dim) for a in adatas]),
+        join=join,
+        ignore_index=True,
     )
     concat_annot.index = concat_indices
     if label is not None:
@@ -924,7 +1094,6 @@ def concat(
             [
                 AnnData(
                     X=a.raw.X,
-                    dtype=a.raw.X.dtype,
                     obs=pd.DataFrame(index=a.obs_names),
                     var=a.raw.var,
                     varm=a.raw.varm,
@@ -947,7 +1116,6 @@ def concat(
     return AnnData(
         **{
             "X": X,
-            "dtype": None if X is None else X.dtype,
             "layers": layers,
             dim: concat_annot,
             alt_dim: alt_annot,
diff --git a/anndata/_core/views.py b/anndata/_core/views.py
index 2774712..4604a6a 100644
--- a/anndata/_core/views.py
+++ b/anndata/_core/views.py
@@ -1,5 +1,6 @@
 from contextlib import contextmanager
 from copy import deepcopy
+from enum import Enum
 from functools import reduce, singledispatch, wraps
 from typing import Any, KeysView, Optional, Sequence, Tuple
 import warnings
@@ -9,9 +10,10 @@ import pandas as pd
 from pandas.api.types import is_bool_dtype
 from scipy import sparse
 
+import anndata
 from anndata._warnings import ImplicitModificationWarning
 from .access import ElementRef
-from ..compat import ZappyArray
+from ..compat import ZappyArray, AwkArray, DaskArray
 
 
 class _SetItemMixin:
@@ -91,16 +93,51 @@ class ArrayView(_SetItemMixin, np.ndarray):
         return self.copy()
 
 
+# Extends DaskArray
+# Calls parent __new__ constructor since
+# even calling astype on a dask array
+# needs a .compute() call to actually happen.
+# So no construction by view casting like ArrayView
+class DaskArrayView(_SetItemMixin, DaskArray):
+    def __new__(
+        cls,
+        input_array: DaskArray,
+        view_args: Tuple["anndata.AnnData", str, Tuple[str, ...]] = None,
+    ):
+        arr = super().__new__(
+            cls,
+            dask=input_array.dask,
+            name=input_array.name,
+            chunks=input_array.chunks,
+            dtype=input_array.dtype,
+            meta=input_array._meta,
+            shape=input_array.shape,
+        )
+        if view_args is not None:
+            view_args = ElementRef(*view_args)
+        arr._view_args = view_args
+
+        return arr
+
+    def __array_finalize__(self, obj: Optional[DaskArray]):
+        if obj is not None:
+            self._view_args = getattr(obj, "_view_args", None)
+
+    def keys(self) -> KeysView[str]:
+        # it’s a structured array
+        return self.dtype.names
+
+
 # Unlike array views, SparseCSRView and SparseCSCView
 # do not propagate through subsetting
 class SparseCSRView(_ViewMixin, sparse.csr_matrix):
-    # https://github.com/theislab/anndata/issues/656
+    # https://github.com/scverse/anndata/issues/656
     def copy(self) -> sparse.csr_matrix:
         return sparse.csr_matrix(self).copy()
 
 
 class SparseCSCView(_ViewMixin, sparse.csc_matrix):
-    # https://github.com/theislab/anndata/issues/656
+    # https://github.com/scverse/anndata/issues/656
     def copy(self) -> sparse.csc_matrix:
         return sparse.csc_matrix(self).copy()
 
@@ -130,6 +167,11 @@ def as_view_array(array, view_args):
     return ArrayView(array, view_args=view_args)
 
 
+@as_view.register(DaskArray)
+def as_view_dask_array(array, view_args):
+    return DaskArrayView(array, view_args=view_args)
+
+
 @as_view.register(pd.DataFrame)
 def as_view_df(df, view_args):
     return DataFrameView(df, view_args=view_args)
@@ -157,6 +199,69 @@ def as_view_zappy(z, view_args):
     return z
 
 
+try:
+    from ..compat import awkward as ak
+    import weakref
+
+    # Registry to store weak references from AwkwardArrayViews to their parent AnnData container
+    _registry = weakref.WeakValueDictionary()
+    _PARAM_NAME = "_view_args"
+
+    class AwkwardArrayView(_ViewMixin, AwkArray):
+        @property
+        def _view_args(self):
+            """Override _view_args to retrieve the values from awkward arrays parameters.
+
+            Awkward arrays cannot be subclassed like other python objects. Instead subclasses need
+            to be attached as "behavior". These "behaviors" cannot take any additional parameters (as we do
+            for other data types to store `_view_args`). Therefore, we need to store `_view_args` using awkward's
+            parameter mechanism. These parameters need to be json-serializable, which is why we can't store
+            ElementRef directly, but need to replace the reference to the parent AnnDataView container with a weak
+            reference.
+            """
+            parent_key, attrname, keys = self.layout.parameter(_PARAM_NAME)
+            parent = _registry[parent_key]
+            return ElementRef(parent, attrname, keys)
+
+        def __copy__(self) -> AwkArray:
+            """
+            Turn the AwkwardArrayView into an actual AwkwardArray with no special behavior.
+
+            Need to override __copy__ instead of `.copy()` as awkward arrays don't implement `.copy()`
+            and are copied using python's standard copy mechanism in `aligned_mapping.py`.
+            """
+            array = self
+            # makes a shallow copy and removes the reference to the original AnnData object
+            array = ak.with_parameter(self, _PARAM_NAME, None)
+            array = ak.with_parameter(array, "__array__", None)
+            return array
+
+    @as_view.register(AwkArray)
+    def as_view_awkarray(array, view_args):
+        parent, attrname, keys = view_args
+        parent_key = f"target-{id(parent)}"
+        _registry[parent_key] = parent
+        # TODO: See https://github.com/scverse/anndata/pull/647#discussion_r963494798_ for more details and
+        # possible strategies to stack behaviors.
+        # A better solution might be based on xarray-style "attrs", once this is implemented
+        # https://github.com/scikit-hep/awkward/issues/1391#issuecomment-1412297114
+        if type(array).__name__ != "Array":
+            raise NotImplementedError(
+                "Cannot create a view of an awkward array with __array__ parameter. "
+                "Please open an issue in the AnnData repo and describe your use-case."
+            )
+        array = ak.with_parameter(array, _PARAM_NAME, (parent_key, attrname, keys))
+        array = ak.with_parameter(array, "__array__", "AwkwardArrayView")
+        return array
+
+    ak.behavior["AwkwardArrayView"] = AwkwardArrayView
+
+except ImportError:
+
+    class AwkwardArrayView:
+        pass
+
+
 def _resolve_idxs(old, new, adata):
     t = tuple(_resolve_idx(old[i], new[i], adata.shape[i]) for i in (0, 1))
     return t
diff --git a/anndata/_io/h5ad.py b/anndata/_io/h5ad.py
index d3a9c4b..9c9133d 100644
--- a/anndata/_io/h5ad.py
+++ b/anndata/_io/h5ad.py
@@ -3,7 +3,7 @@ from functools import partial
 from warnings import warn
 from pathlib import Path
 from types import MappingProxyType
-from typing import Callable, Type, TypeVar, Union
+from typing import Callable, Type, TypeVar, Union, Literal
 from typing import Collection, Sequence, Mapping
 
 import h5py
@@ -18,8 +18,8 @@ from ..compat import (
     _from_fixed_length_strings,
     _decode_structured_array,
     _clean_uns,
-    Literal,
 )
+from ..experimental import read_dispatched
 from .utils import (
     H5PY_V3,
     report_read_key_on_error,
@@ -37,21 +37,12 @@ def write_h5ad(
     filepath: Union[Path, str],
     adata: AnnData,
     *,
-    force_dense: bool = None,
     as_dense: Sequence[str] = (),
     dataset_kwargs: Mapping = MappingProxyType({}),
     **kwargs,
 ) -> None:
-    if force_dense is not None:
-        warn(
-            "The `force_dense` argument is deprecated. Use `as_dense` instead.",
-            FutureWarning,
-        )
-    if force_dense is True:
-        if adata.raw is not None:
-            as_dense = ("X", "raw/X")
-        else:
-            as_dense = ("X",)
+    from anndata.experimental import write_dispatched
+
     if isinstance(as_dense, str):
         as_dense = [as_dense]
     if "raw.X" in as_dense:
@@ -72,8 +63,11 @@ def write_h5ad(
     mode = "a" if adata.isbacked else "w"
     if adata.isbacked:  # close so that we can reopen below
         adata.file.close()
+
     with h5py.File(filepath, mode) as f:
         # TODO: Use spec writing system for this
+        # Currently can't use write_dispatched here because this function is also called to do an
+        # inplace update of a backed object, which would delete "/"
         f = f["/"]
         f.attrs.setdefault("encoding-type", "anndata")
         f.attrs.setdefault("encoding-version", "0.1.0")
@@ -147,19 +141,13 @@ def read_h5ad_backed(filename: Union[str, Path], mode: Literal["r", "r+"]) -> An
 
     d["raw"] = _read_raw(f, attrs={"var", "varm"})
 
-    X_dset = f.get("X", None)
-    if X_dset is None:
-        pass
-    elif isinstance(X_dset, h5py.Group):
-        d["dtype"] = X_dset["data"].dtype
-    elif hasattr(X_dset, "dtype"):
-        d["dtype"] = f["X"].dtype
-    else:
-        raise ValueError()
+    adata = AnnData(**d)
 
-    _clean_uns(d)
+    # Backwards compat to <0.7
+    if isinstance(f["obs"], h5py.Dataset):
+        _clean_uns(adata)
 
-    return AnnData(**d)
+    return adata
 
 
 def read_h5ad(
@@ -182,6 +170,13 @@ def read_h5ad(
         instead of fully loading it into memory (`memory` mode).
         If you want to modify backed attributes of the AnnData object,
         you need to choose `'r+'`.
+
+        Currently, `backed` only support updates to `X`. That means any
+        changes to other slots like `obs` will not be written to disk in
+        `backed` mode. If you would like save changes made to these slots
+        of a `backed` :class:`~anndata.AnnData`, write them to a new file
+        (see :meth:`~anndata.AnnData.write`). For an example, see
+        [here] (https://anndata-tutorials.readthedocs.io/en/latest/getting-started.html#Partial-reading-of-large-data).
     as_sparse
         If an array was saved as dense, passing its name here will read it as
         a sparse_matrix, by chunk of size `chunk_size`.
@@ -222,36 +217,42 @@ def read_h5ad(
     )
 
     with h5py.File(filename, "r") as f:
-        d = {}
-        for k in f.keys():
-            # Backwards compat for old raw
-            if k == "raw" or k.startswith("raw."):
-                continue
-            if k == "X" and "X" in as_sparse:
-                d[k] = rdasp(f[k])
-            elif k == "raw":
-                assert False, "unexpected raw format"
-            elif k in {"obs", "var"}:
+
+        def callback(func, elem_name: str, elem, iospec):
+            if iospec.encoding_type == "anndata" or elem_name.endswith("/"):
+                return AnnData(
+                    **{
+                        # This is covering up backwards compat in the anndata initializer
+                        # In most cases we should be able to call `func(elen[k])` instead
+                        k: read_dispatched(elem[k], callback)
+                        for k in elem.keys()
+                        if not k.startswith("raw.")
+                    }
+                )
+            elif elem_name.startswith("/raw."):
+                return None
+            elif elem_name == "/X" and "X" in as_sparse:
+                return rdasp(elem)
+            elif elem_name == "/raw":
+                return _read_raw(f, as_sparse, rdasp)
+            elif elem_name in {"/obs", "/var"}:
                 # Backwards compat
-                d[k] = read_dataframe(f[k])
-            else:  # Base case
-                d[k] = read_elem(f[k])
-
-        d["raw"] = _read_raw(f, as_sparse, rdasp)
-
-        X_dset = f.get("X", None)
-        if X_dset is None:
-            pass
-        elif isinstance(X_dset, h5py.Group):
-            d["dtype"] = X_dset["data"].dtype
-        elif hasattr(X_dset, "dtype"):
-            d["dtype"] = f["X"].dtype
-        else:
-            raise ValueError()
+                return read_dataframe(elem)
+            return func(elem)
 
-    _clean_uns(d)  # backwards compat
+        adata = read_dispatched(f, callback=callback)
 
-    return AnnData(**d)
+        # Backwards compat (should figure out which version)
+        if "raw.X" in f:
+            raw = AnnData(**_read_raw(f, as_sparse, rdasp))
+            raw.obs_names = adata.obs_names
+            adata.raw = raw
+
+        # Backwards compat to <0.7
+        if isinstance(f["obs"], h5py.Dataset):
+            _clean_uns(adata)
+
+    return adata
 
 
 def _read_raw(
@@ -260,7 +261,7 @@ def _read_raw(
     rdasp: Callable[[h5py.Dataset], sparse.spmatrix] = None,
     *,
     attrs: Collection[str] = ("X", "var", "varm"),
-):
+) -> dict:
     if as_sparse:
         assert rdasp is not None, "must supply rdasp if as_sparse is supplied"
     raw = {}
diff --git a/anndata/_io/read.py b/anndata/_io/read.py
index c20b54e..43383ab 100644
--- a/anndata/_io/read.py
+++ b/anndata/_io/read.py
@@ -20,7 +20,8 @@ from .h5ad import read_h5ad
 
 try:
     from .zarr import read_zarr
-except ImportError as e:  # noqa: F841
+except ImportError as _e:
+    e = _e
 
     def read_zarr(*_, **__):
         raise e
@@ -76,7 +77,7 @@ def read_excel(
     X = df.values[:, 1:]
     row = dict(row_names=df.iloc[:, 0].values.astype(str))
     col = dict(col_names=np.array(df.columns[1:], dtype=str))
-    return AnnData(X, row, col, dtype=dtype)
+    return AnnData(X, row, col)
 
 
 def read_umi_tools(filename: PathLike, dtype=None) -> AnnData:
@@ -93,15 +94,13 @@ def read_umi_tools(filename: PathLike, dtype=None) -> AnnData:
     table = pd.read_table(filename, dtype={"gene": "category", "cell": "category"})
 
     X = sparse.csr_matrix(
-        (table["count"], (table["cell"].cat.codes, table["gene"].cat.codes))
+        (table["count"], (table["cell"].cat.codes, table["gene"].cat.codes)),
+        dtype=dtype,
     )
     obs = pd.DataFrame(index=pd.Index(table["cell"].cat.categories, name="cell"))
     var = pd.DataFrame(index=pd.Index(table["gene"].cat.categories, name="gene"))
 
-    if dtype is None:
-        dtype = X.dtype
-
-    return AnnData(X=X, obs=obs, var=var, dtype=dtype)
+    return AnnData(X=X, obs=obs, var=var)
 
 
 def read_hdf(filename: PathLike, key: str) -> AnnData:
@@ -133,7 +132,7 @@ def read_hdf(filename: PathLike, key: str) -> AnnData:
         for iname, name in enumerate(["row_names", "col_names"]):
             if name in keys:
                 rows_cols[iname][name] = f[name][()]
-    adata = AnnData(X, rows_cols[0], rows_cols[1], dtype=X.dtype.name)
+    adata = AnnData(X, rows_cols[0], rows_cols[1])
     return adata
 
 
@@ -228,7 +227,7 @@ def read_loom(
         )
         if obsm_mapping != {}:
             raise ValueError(
-                "Recieved values for both `obsm_names` and `obsm_mapping`. This is "
+                "Received values for both `obsm_names` and `obsm_mapping`. This is "
                 "ambiguous, only pass `obsm_mapping`."
             )
         obsm_mapping = obsm_names
@@ -240,7 +239,7 @@ def read_loom(
         )
         if varm_mapping != {}:
             raise ValueError(
-                "Recieved values for both `varm_names` and `varm_mapping`. This is "
+                "Received values for both `varm_names` and `varm_mapping`. This is "
                 "ambiguous, only pass `varm_mapping`."
             )
         varm_mapping = varm_names
@@ -252,6 +251,7 @@ def read_loom(
         if X_name not in lc.layers.keys():
             X_name = ""
         X = lc.layers[X_name].sparse().T.tocsr() if sparse else lc.layers[X_name][()].T
+        X = X.astype(dtype, copy=False)
 
         layers = OrderedDict()
         if X_name != "":
@@ -295,7 +295,6 @@ def read_loom(
             obsm=obsm if obsm else None,
             varm=varm if varm else None,
             uns=uns,
-            dtype=dtype,
         )
     return adata
 
@@ -318,7 +317,7 @@ def read_mtx(filename: PathLike, dtype: str = "float32") -> AnnData:
     from scipy.sparse import csr_matrix
 
     X = csr_matrix(X)
-    return AnnData(X, dtype=dtype)
+    return AnnData(X)
 
 
 def read_text(
@@ -472,7 +471,6 @@ def _read_text(
         data,
         obs=dict(obs_names=row_names),
         var=dict(var_names=col_names),
-        dtype=dtype,
     )
 
 
diff --git a/anndata/_io/specs/__init__.py b/anndata/_io/specs/__init__.py
index 162ecd7..c25f798 100644
--- a/anndata/_io/specs/__init__.py
+++ b/anndata/_io/specs/__init__.py
@@ -1,2 +1,2 @@
 from . import methods
-from .registry import write_elem, read_elem, get_spec, _REGISTRY
+from .registry import write_elem, get_spec, read_elem, _REGISTRY, Reader, Writer, IOSpec
diff --git a/anndata/_io/specs/methods.py b/anndata/_io/specs/methods.py
index 71c216a..9584f82 100644
--- a/anndata/_io/specs/methods.py
+++ b/anndata/_io/specs/methods.py
@@ -3,7 +3,7 @@ from __future__ import annotations
 from os import PathLike
 from collections.abc import Mapping
 from functools import partial
-from typing import Union
+from typing import Union, Literal
 from types import MappingProxyType
 from warnings import warn
 
@@ -19,16 +19,16 @@ from anndata._core.merge import intersect_keys
 from anndata._core.sparse_dataset import SparseDataset
 from anndata._core import views
 from anndata.compat import (
-    Literal,
-    OverloadedDict,
     ZarrArray,
     ZarrGroup,
+    DaskArray,
     _read_attr,
     _from_fixed_length_strings,
     _decode_structured_array,
 )
 from anndata._io.utils import report_write_key_on_error, check_key, H5PY_V3
 from anndata._warnings import OldFormatWarning
+from anndata.compat import AwkArray
 
 from .registry import (
     _REGISTRY,
@@ -41,6 +41,7 @@ from .registry import (
 
 H5Array = h5py.Dataset
 H5Group = h5py.Group
+H5File = h5py.File
 
 
 ####################
@@ -74,9 +75,10 @@ H5Group = h5py.Group
 # Note: there is no need for writing in a backwards compatible format, maybe
 
 
+@_REGISTRY.register_read(H5File, IOSpec("", ""))
 @_REGISTRY.register_read(H5Group, IOSpec("", ""))
 @_REGISTRY.register_read(H5Array, IOSpec("", ""))
-def read_basic(elem):
+def read_basic(elem, _reader):
     from anndata._io import h5ad
 
     warn(
@@ -89,14 +91,14 @@ def read_basic(elem):
         # Backwards compat sparse arrays
         if "h5sparse_format" in elem.attrs:
             return SparseDataset(elem).to_memory()
-        return {k: read_elem(v) for k, v in elem.items()}
+        return {k: _reader.read_elem(v) for k, v in elem.items()}
     elif isinstance(elem, h5py.Dataset):
         return h5ad.read_dataset(elem)  # TODO: Handle legacy
 
 
 @_REGISTRY.register_read(ZarrGroup, IOSpec("", ""))
 @_REGISTRY.register_read(ZarrArray, IOSpec("", ""))
-def read_basic_zarr(elem):
+def read_basic_zarr(elem, _reader):
     from anndata._io import zarr
 
     warn(
@@ -109,7 +111,7 @@ def read_basic_zarr(elem):
         # Backwards compat sparse arrays
         if "h5sparse_format" in elem.attrs:
             return SparseDataset(elem).to_memory()
-        return {k: read_elem(v) for k, v in elem.items()}
+        return {k: _reader.read_elem(v) for k, v in elem.items()}
     elif isinstance(elem, ZarrArray):
         return zarr.read_dataset(elem)  # TODO: Handle legacy
 
@@ -212,25 +214,27 @@ def _read_partial(group, *, items=None, indices=(slice(None), slice(None))):
 
 @_REGISTRY.register_write(ZarrGroup, AnnData, IOSpec("anndata", "0.1.0"))
 @_REGISTRY.register_write(H5Group, AnnData, IOSpec("anndata", "0.1.0"))
-def write_anndata(f, k, adata, dataset_kwargs=MappingProxyType({})):
+def write_anndata(f, k, adata, _writer, dataset_kwargs=MappingProxyType({})):
     g = f.require_group(k)
-    write_elem(g, "X", adata.X, dataset_kwargs=dataset_kwargs)
-    write_elem(g, "obs", adata.obs, dataset_kwargs=dataset_kwargs)
-    write_elem(g, "var", adata.var, dataset_kwargs=dataset_kwargs)
-    write_elem(g, "obsm", dict(adata.obsm), dataset_kwargs=dataset_kwargs)
-    write_elem(g, "varm", dict(adata.varm), dataset_kwargs=dataset_kwargs)
-    write_elem(g, "obsp", dict(adata.obsp), dataset_kwargs=dataset_kwargs)
-    write_elem(g, "varp", dict(adata.varp), dataset_kwargs=dataset_kwargs)
-    write_elem(g, "layers", dict(adata.layers), dataset_kwargs=dataset_kwargs)
-    write_elem(g, "uns", dict(adata.uns), dataset_kwargs=dataset_kwargs)
-    write_elem(g, "raw", adata.raw, dataset_kwargs=dataset_kwargs)
+    _writer.write_elem(g, "X", adata.X, dataset_kwargs=dataset_kwargs)
+    _writer.write_elem(g, "obs", adata.obs, dataset_kwargs=dataset_kwargs)
+    _writer.write_elem(g, "var", adata.var, dataset_kwargs=dataset_kwargs)
+    _writer.write_elem(g, "obsm", dict(adata.obsm), dataset_kwargs=dataset_kwargs)
+    _writer.write_elem(g, "varm", dict(adata.varm), dataset_kwargs=dataset_kwargs)
+    _writer.write_elem(g, "obsp", dict(adata.obsp), dataset_kwargs=dataset_kwargs)
+    _writer.write_elem(g, "varp", dict(adata.varp), dataset_kwargs=dataset_kwargs)
+    _writer.write_elem(g, "layers", dict(adata.layers), dataset_kwargs=dataset_kwargs)
+    _writer.write_elem(g, "uns", dict(adata.uns), dataset_kwargs=dataset_kwargs)
+    _writer.write_elem(g, "raw", adata.raw, dataset_kwargs=dataset_kwargs)
 
 
 @_REGISTRY.register_read(H5Group, IOSpec("anndata", "0.1.0"))
 @_REGISTRY.register_read(H5Group, IOSpec("raw", "0.1.0"))
+@_REGISTRY.register_read(H5File, IOSpec("anndata", "0.1.0"))
+@_REGISTRY.register_read(H5File, IOSpec("raw", "0.1.0"))
 @_REGISTRY.register_read(ZarrGroup, IOSpec("anndata", "0.1.0"))
 @_REGISTRY.register_read(ZarrGroup, IOSpec("raw", "0.1.0"))
-def read_anndata(elem):
+def read_anndata(elem, _reader):
     d = {}
     for k in [
         "X",
@@ -245,19 +249,17 @@ def read_anndata(elem):
         "raw",
     ]:
         if k in elem:
-            d[k] = read_elem(elem[k])
-        if "X" in d:
-            d["dtype"] = d["X"].dtype
+            d[k] = _reader.read_elem(elem[k])
     return AnnData(**d)
 
 
 @_REGISTRY.register_write(H5Group, Raw, IOSpec("raw", "0.1.0"))
 @_REGISTRY.register_write(ZarrGroup, Raw, IOSpec("raw", "0.1.0"))
-def write_raw(f, k, raw, dataset_kwargs=MappingProxyType({})):
+def write_raw(f, k, raw, _writer, dataset_kwargs=MappingProxyType({})):
     g = f.create_group(k)
-    write_elem(g, "X", raw.X, dataset_kwargs=dataset_kwargs)
-    write_elem(g, "var", raw.var, dataset_kwargs=dataset_kwargs)
-    write_elem(g, "varm", dict(raw.varm), dataset_kwargs=dataset_kwargs)
+    _writer.write_elem(g, "X", raw.X, dataset_kwargs=dataset_kwargs)
+    _writer.write_elem(g, "var", raw.var, dataset_kwargs=dataset_kwargs)
+    _writer.write_elem(g, "varm", dict(raw.varm), dataset_kwargs=dataset_kwargs)
 
 
 ############
@@ -267,18 +269,16 @@ def write_raw(f, k, raw, dataset_kwargs=MappingProxyType({})):
 
 @_REGISTRY.register_read(H5Group, IOSpec("dict", "0.1.0"))
 @_REGISTRY.register_read(ZarrGroup, IOSpec("dict", "0.1.0"))
-def read_mapping(elem):
-    return {k: read_elem(v) for k, v in elem.items()}
+def read_mapping(elem, _reader):
+    return {k: _reader.read_elem(v) for k, v in elem.items()}
 
 
-@_REGISTRY.register_write(H5Group, OverloadedDict, IOSpec("dict", "0.1.0"))
 @_REGISTRY.register_write(H5Group, dict, IOSpec("dict", "0.1.0"))
-@_REGISTRY.register_write(ZarrGroup, OverloadedDict, IOSpec("dict", "0.1.0"))
 @_REGISTRY.register_write(ZarrGroup, dict, IOSpec("dict", "0.1.0"))
-def write_mapping(f, k, v, dataset_kwargs=MappingProxyType({})):
+def write_mapping(f, k, v, _writer, dataset_kwargs=MappingProxyType({})):
     g = f.create_group(k)
     for sub_k, sub_v in v.items():
-        write_elem(g, sub_k, sub_v, dataset_kwargs=dataset_kwargs)
+        _writer.write_elem(g, sub_k, sub_v, dataset_kwargs=dataset_kwargs)
 
 
 ##############
@@ -288,11 +288,11 @@ def write_mapping(f, k, v, dataset_kwargs=MappingProxyType({})):
 
 @_REGISTRY.register_write(H5Group, list, IOSpec("array", "0.2.0"))
 @_REGISTRY.register_write(ZarrGroup, list, IOSpec("array", "0.2.0"))
-def write_list(f, k, elem, dataset_kwargs=MappingProxyType({})):
-    write_elem(f, k, np.array(elem), dataset_kwargs=dataset_kwargs)
+def write_list(f, k, elem, _writer, dataset_kwargs=MappingProxyType({})):
+    _writer.write_elem(f, k, np.array(elem), dataset_kwargs=dataset_kwargs)
 
 
-# TODO: Is this the right behaviour for MaskedArrays?
+# TODO: Is this the right behavior for MaskedArrays?
 # It's in the `AnnData.concatenate` docstring, but should we keep it?
 @_REGISTRY.register_write(H5Group, views.ArrayView, IOSpec("array", "0.2.0"))
 @_REGISTRY.register_write(H5Group, np.ndarray, IOSpec("array", "0.2.0"))
@@ -302,33 +302,46 @@ def write_list(f, k, elem, dataset_kwargs=MappingProxyType({})):
 @_REGISTRY.register_write(ZarrGroup, np.ndarray, IOSpec("array", "0.2.0"))
 @_REGISTRY.register_write(ZarrGroup, h5py.Dataset, IOSpec("array", "0.2.0"))
 @_REGISTRY.register_write(ZarrGroup, np.ma.MaskedArray, IOSpec("array", "0.2.0"))
-def write_basic(f, k, elem, dataset_kwargs=MappingProxyType({})):
-    """Write methods which underlying library handles nativley."""
+def write_basic(f, k, elem, _writer, dataset_kwargs=MappingProxyType({})):
+    """Write methods which underlying library handles natively."""
     f.create_dataset(k, data=elem, **dataset_kwargs)
 
 
+@_REGISTRY.register_write(ZarrGroup, DaskArray, IOSpec("array", "0.2.0"))
+@_REGISTRY.register_write(H5Group, DaskArray, IOSpec("array", "0.2.0"))
+def write_basic_dask(f, k, elem, _writer, dataset_kwargs=MappingProxyType({})):
+    import dask.array as da
+
+    g = f.require_dataset(k, shape=elem.shape, dtype=elem.dtype, **dataset_kwargs)
+    da.store(elem, g)
+
+
 @_REGISTRY.register_read(H5Array, IOSpec("array", "0.2.0"))
 @_REGISTRY.register_read(ZarrArray, IOSpec("array", "0.2.0"))
 @_REGISTRY.register_read(ZarrArray, IOSpec("string-array", "0.2.0"))
-def read_array(elem):
+def read_array(elem, _reader):
     return elem[()]
 
 
 @_REGISTRY.register_read_partial(H5Array, IOSpec("array", "0.2.0"))
-@_REGISTRY.register_read_partial(ZarrArray, IOSpec("array", "0.2.0"))
 @_REGISTRY.register_read_partial(ZarrArray, IOSpec("string-array", "0.2.0"))
 def read_array_partial(elem, *, items=None, indices=(slice(None, None))):
     return elem[indices]
 
 
+@_REGISTRY.register_read_partial(ZarrArray, IOSpec("array", "0.2.0"))
+def read_zarr_array_partial(elem, *, items=None, indices=(slice(None, None))):
+    return elem.oindex[indices]
+
+
 # arrays of strings
 @_REGISTRY.register_read(H5Array, IOSpec("string-array", "0.2.0"))
-def read_string_array(d):
-    return read_array(d.asstr())
+def read_string_array(d, _reader):
+    return read_array(d.asstr(), _reader=_reader)
 
 
 @_REGISTRY.register_read_partial(H5Array, IOSpec("string-array", "0.2.0"))
-def read_array_partial(d, items=None, indices=slice(None)):
+def read_string_array_partial(d, items=None, indices=slice(None)):
     return read_array_partial(d.asstr(), items=items, indices=indices)
 
 
@@ -340,7 +353,7 @@ def read_array_partial(d, items=None, indices=slice(None)):
 )
 @_REGISTRY.register_write(H5Group, (np.ndarray, "U"), IOSpec("string-array", "0.2.0"))
 @_REGISTRY.register_write(H5Group, (np.ndarray, "O"), IOSpec("string-array", "0.2.0"))
-def write_vlen_string_array(f, k, elem, dataset_kwargs=MappingProxyType({})):
+def write_vlen_string_array(f, k, elem, _writer, dataset_kwargs=MappingProxyType({})):
     """Write methods which underlying library handles nativley."""
     str_dtype = h5py.special_dtype(vlen=str)
     f.create_dataset(k, data=elem.astype(str_dtype), dtype=str_dtype, **dataset_kwargs)
@@ -354,7 +367,9 @@ def write_vlen_string_array(f, k, elem, dataset_kwargs=MappingProxyType({})):
 )
 @_REGISTRY.register_write(ZarrGroup, (np.ndarray, "U"), IOSpec("string-array", "0.2.0"))
 @_REGISTRY.register_write(ZarrGroup, (np.ndarray, "O"), IOSpec("string-array", "0.2.0"))
-def write_vlen_string_array_zarr(f, k, elem, dataset_kwargs=MappingProxyType({})):
+def write_vlen_string_array_zarr(
+    f, k, elem, _writer, dataset_kwargs=MappingProxyType({})
+):
     import numcodecs
 
     f.create_dataset(
@@ -385,7 +400,7 @@ def _to_hdf5_vlen_strings(value: np.ndarray) -> np.ndarray:
 
 @_REGISTRY.register_read(H5Array, IOSpec("rec-array", "0.2.0"))
 @_REGISTRY.register_read(ZarrArray, IOSpec("rec-array", "0.2.0"))
-def read_recarray(d):
+def read_recarray(d, _reader):
     value = d[()]
     dtype = value.dtype
     value = _from_fixed_length_strings(value)
@@ -396,13 +411,13 @@ def read_recarray(d):
 
 @_REGISTRY.register_write(H5Group, (np.ndarray, "V"), IOSpec("rec-array", "0.2.0"))
 @_REGISTRY.register_write(H5Group, np.recarray, IOSpec("rec-array", "0.2.0"))
-def write_recarray(f, k, elem, dataset_kwargs=MappingProxyType({})):
+def write_recarray(f, k, elem, _writer, dataset_kwargs=MappingProxyType({})):
     f.create_dataset(k, data=_to_hdf5_vlen_strings(elem), **dataset_kwargs)
 
 
 @_REGISTRY.register_write(ZarrGroup, (np.ndarray, "V"), IOSpec("rec-array", "0.2.0"))
 @_REGISTRY.register_write(ZarrGroup, np.recarray, IOSpec("rec-array", "0.2.0"))
-def write_recarray_zarr(f, k, elem, dataset_kwargs=MappingProxyType({})):
+def write_recarray_zarr(f, k, elem, _writer, dataset_kwargs=MappingProxyType({})):
     from anndata.compat import _to_fixed_length_strings
 
     f.create_dataset(k, data=_to_fixed_length_strings(elem), **dataset_kwargs)
@@ -414,13 +429,18 @@ def write_recarray_zarr(f, k, elem, dataset_kwargs=MappingProxyType({})):
 
 
 def write_sparse_compressed(
-    f, key, value, fmt: Literal["csr", "csc"], dataset_kwargs=MappingProxyType({})
+    f,
+    key,
+    value,
+    _writer,
+    fmt: Literal["csr", "csc"],
+    dataset_kwargs=MappingProxyType({}),
 ):
     g = f.create_group(key)
     g.attrs["shape"] = value.shape
 
-    # Allow resizing
-    if "maxshape" not in dataset_kwargs:
+    # Allow resizing for hdf5
+    if isinstance(f, H5Group) and "maxshape" not in dataset_kwargs:
         dataset_kwargs = dict(maxshape=(None,), **dataset_kwargs)
 
     g.create_dataset("data", data=value.data, **dataset_kwargs)
@@ -458,9 +478,14 @@ _REGISTRY.register_write(ZarrGroup, views.SparseCSCView, IOSpec("csc_matrix", "0
 
 @_REGISTRY.register_write(H5Group, SparseDataset, IOSpec("", "0.1.0"))
 @_REGISTRY.register_write(ZarrGroup, SparseDataset, IOSpec("", "0.1.0"))
-def write_sparse_dataset(f, k, elem, dataset_kwargs=MappingProxyType({})):
+def write_sparse_dataset(f, k, elem, _writer, dataset_kwargs=MappingProxyType({})):
     write_sparse_compressed(
-        f, k, elem.to_backed(), fmt=elem.format_str, dataset_kwargs=dataset_kwargs
+        f,
+        k,
+        elem.to_backed(),
+        _writer,
+        fmt=elem.format_str,
+        dataset_kwargs=dataset_kwargs,
     )
     # TODO: Cleaner way to do this
     f[k].attrs["encoding-type"] = f"{elem.format_str}_matrix"
@@ -471,16 +496,54 @@ def write_sparse_dataset(f, k, elem, dataset_kwargs=MappingProxyType({})):
 @_REGISTRY.register_read(H5Group, IOSpec("csr_matrix", "0.1.0"))
 @_REGISTRY.register_read(ZarrGroup, IOSpec("csc_matrix", "0.1.0"))
 @_REGISTRY.register_read(ZarrGroup, IOSpec("csr_matrix", "0.1.0"))
-def read_sparse(elem):
+def read_sparse(elem, _reader):
     return SparseDataset(elem).to_memory()
 
 
 @_REGISTRY.register_read_partial(H5Group, IOSpec("csc_matrix", "0.1.0"))
 @_REGISTRY.register_read_partial(H5Group, IOSpec("csr_matrix", "0.1.0"))
+@_REGISTRY.register_read_partial(ZarrGroup, IOSpec("csc_matrix", "0.1.0"))
+@_REGISTRY.register_read_partial(ZarrGroup, IOSpec("csr_matrix", "0.1.0"))
 def read_sparse_partial(elem, *, items=None, indices=(slice(None), slice(None))):
     return SparseDataset(elem)[indices]
 
 
+#################
+# Awkward array #
+#################
+
+
+@_REGISTRY.register_write(H5Group, AwkArray, IOSpec("awkward-array", "0.1.0"))
+@_REGISTRY.register_write(ZarrGroup, AwkArray, IOSpec("awkward-array", "0.1.0"))
+@_REGISTRY.register_write(
+    H5Group, views.AwkwardArrayView, IOSpec("awkward-array", "0.1.0")
+)
+@_REGISTRY.register_write(
+    ZarrGroup, views.AwkwardArrayView, IOSpec("awkward-array", "0.1.0")
+)
+def write_awkward(f, k, v, _writer, dataset_kwargs=MappingProxyType({})):
+    from anndata.compat import awkward as ak
+
+    group = f.create_group(k)
+    form, length, container = ak.to_buffers(ak.to_packed(v))
+    group.attrs["length"] = length
+    group.attrs["form"] = form.to_json()
+    for k, v in container.items():
+        _writer.write_elem(group, k, v, dataset_kwargs=dataset_kwargs)
+
+
+@_REGISTRY.register_read(H5Group, IOSpec("awkward-array", "0.1.0"))
+@_REGISTRY.register_read(ZarrGroup, IOSpec("awkward-array", "0.1.0"))
+def read_awkward(elem, _reader):
+    from anndata.compat import awkward as ak
+
+    form = _read_attr(elem.attrs, "form")
+    length = _read_attr(elem.attrs, "length")
+    container = {k: _reader.read_elem(elem[k]) for k in elem.keys()}
+
+    return ak.from_buffers(form, length, container)
+
+
 ##############
 # DataFrames #
 ##############
@@ -490,7 +553,7 @@ def read_sparse_partial(elem, *, items=None, indices=(slice(None), slice(None)))
 @_REGISTRY.register_write(H5Group, pd.DataFrame, IOSpec("dataframe", "0.2.0"))
 @_REGISTRY.register_write(ZarrGroup, views.DataFrameView, IOSpec("dataframe", "0.2.0"))
 @_REGISTRY.register_write(ZarrGroup, pd.DataFrame, IOSpec("dataframe", "0.2.0"))
-def write_dataframe(f, key, df, dataset_kwargs=MappingProxyType({})):
+def write_dataframe(f, key, df, _writer, dataset_kwargs=MappingProxyType({})):
     # Check arguments
     for reserved in ("_index",):
         if reserved in df.columns:
@@ -508,20 +571,24 @@ def write_dataframe(f, key, df, dataset_kwargs=MappingProxyType({})):
     # ._values is "the best" array representation. It's the true array backing the
     # object, where `.values` is always a np.ndarray and .array is always a pandas
     # array.
-    write_elem(group, index_name, df.index._values, dataset_kwargs=dataset_kwargs)
+    _writer.write_elem(
+        group, index_name, df.index._values, dataset_kwargs=dataset_kwargs
+    )
     for colname, series in df.items():
         # TODO: this should write the "true" representation of the series (i.e. the underlying array or ndarray depending)
-        write_elem(group, colname, series._values, dataset_kwargs=dataset_kwargs)
+        _writer.write_elem(
+            group, colname, series._values, dataset_kwargs=dataset_kwargs
+        )
 
 
 @_REGISTRY.register_read(H5Group, IOSpec("dataframe", "0.2.0"))
 @_REGISTRY.register_read(ZarrGroup, IOSpec("dataframe", "0.2.0"))
-def read_dataframe(elem):
+def read_dataframe(elem, _reader):
     columns = list(_read_attr(elem.attrs, "column-order"))
     idx_key = _read_attr(elem.attrs, "_index")
     df = pd.DataFrame(
-        {k: read_elem(elem[k]) for k in columns},
-        index=read_elem(elem[idx_key]),
+        {k: _reader.read_elem(elem[k]) for k in columns},
+        index=_reader.read_elem(elem[idx_key]),
         columns=list(columns),
     )
     if idx_key != "_index":
@@ -557,7 +624,7 @@ def read_dataframe_partial(
 
 @_REGISTRY.register_read(H5Group, IOSpec("dataframe", "0.1.0"))
 @_REGISTRY.register_read(ZarrGroup, IOSpec("dataframe", "0.1.0"))
-def read_dataframe_0_1_0(elem):
+def read_dataframe_0_1_0(elem, _reader):
     columns = _read_attr(elem.attrs, "column-order")
     idx_key = _read_attr(elem.attrs, "_index")
     df = pd.DataFrame(
@@ -609,31 +676,33 @@ def read_partial_dataframe_0_1_0(
 
 @_REGISTRY.register_write(H5Group, pd.Categorical, IOSpec("categorical", "0.2.0"))
 @_REGISTRY.register_write(ZarrGroup, pd.Categorical, IOSpec("categorical", "0.2.0"))
-def write_categorical(f, k, v, dataset_kwargs=MappingProxyType({})):
+def write_categorical(f, k, v, _writer, dataset_kwargs=MappingProxyType({})):
     g = f.create_group(k)
     g.attrs["ordered"] = bool(v.ordered)
 
-    write_elem(g, "codes", v.codes, dataset_kwargs=dataset_kwargs)
-    write_elem(g, "categories", v.categories._values, dataset_kwargs=dataset_kwargs)
+    _writer.write_elem(g, "codes", v.codes, dataset_kwargs=dataset_kwargs)
+    _writer.write_elem(
+        g, "categories", v.categories._values, dataset_kwargs=dataset_kwargs
+    )
 
 
 @_REGISTRY.register_read(H5Group, IOSpec("categorical", "0.2.0"))
 @_REGISTRY.register_read(ZarrGroup, IOSpec("categorical", "0.2.0"))
-def read_categorical(elem):
+def read_categorical(elem, _reader):
     return pd.Categorical.from_codes(
-        codes=read_elem(elem["codes"]),
-        categories=read_elem(elem["categories"]),
-        ordered=_read_attr(elem.attrs, "ordered"),
+        codes=_reader.read_elem(elem["codes"]),
+        categories=_reader.read_elem(elem["categories"]),
+        ordered=bool(_read_attr(elem.attrs, "ordered")),
     )
 
 
 @_REGISTRY.register_read_partial(H5Group, IOSpec("categorical", "0.2.0"))
 @_REGISTRY.register_read_partial(ZarrGroup, IOSpec("categorical", "0.2.0"))
-def read_categorical(elem, *, items=None, indices=(slice(None),)):
+def read_partial_categorical(elem, *, items=None, indices=(slice(None),)):
     return pd.Categorical.from_codes(
         codes=read_elem_partial(elem["codes"], indices=indices),
         categories=read_elem(elem["categories"]),
-        ordered=_read_attr(elem.attrs, "ordered"),
+        ordered=bool(_read_attr(elem.attrs, "ordered")),
     )
 
 
@@ -654,33 +723,33 @@ def read_categorical(elem, *, items=None, indices=(slice(None),)):
 @_REGISTRY.register_write(
     ZarrGroup, pd.arrays.BooleanArray, IOSpec("nullable-boolean", "0.1.0")
 )
-def write_nullable_integer(f, k, v, dataset_kwargs=MappingProxyType({})):
+def write_nullable_integer(f, k, v, _writer, dataset_kwargs=MappingProxyType({})):
     g = f.create_group(k)
     if v._mask is not None:
-        write_elem(g, "mask", v._mask, dataset_kwargs=dataset_kwargs)
-    write_elem(g, "values", v._data, dataset_kwargs=dataset_kwargs)
+        _writer.write_elem(g, "mask", v._mask, dataset_kwargs=dataset_kwargs)
+    _writer.write_elem(g, "values", v._data, dataset_kwargs=dataset_kwargs)
 
 
 @_REGISTRY.register_read(H5Group, IOSpec("nullable-integer", "0.1.0"))
 @_REGISTRY.register_read(ZarrGroup, IOSpec("nullable-integer", "0.1.0"))
-def read_nullable_integer(elem):
+def read_nullable_integer(elem, _reader):
     if "mask" in elem:
         return pd.arrays.IntegerArray(
-            read_elem(elem["values"]), mask=read_elem(elem["mask"])
+            _reader.read_elem(elem["values"]), mask=_reader.read_elem(elem["mask"])
         )
     else:
-        return pd.array(read_elem(elem["values"]))
+        return pd.array(_reader.read_elem(elem["values"]))
 
 
 @_REGISTRY.register_read(H5Group, IOSpec("nullable-boolean", "0.1.0"))
 @_REGISTRY.register_read(ZarrGroup, IOSpec("nullable-boolean", "0.1.0"))
-def read_nullable_boolean(elem):
+def read_nullable_boolean(elem, _reader):
     if "mask" in elem:
         return pd.arrays.BooleanArray(
-            read_elem(elem["values"]), mask=read_elem(elem["mask"])
+            _reader.read_elem(elem["values"]), mask=_reader.read_elem(elem["mask"])
         )
     else:
-        return pd.array(read_elem(elem["values"]))
+        return pd.array(_reader.read_elem(elem["values"]))
 
 
 ###########
@@ -690,15 +759,15 @@ def read_nullable_boolean(elem):
 
 @_REGISTRY.register_read(H5Array, IOSpec("numeric-scalar", "0.2.0"))
 @_REGISTRY.register_read(ZarrArray, IOSpec("numeric-scalar", "0.2.0"))
-def read_scalar(elem):
+def read_scalar(elem, _reader):
     return elem[()]
 
 
-def write_scalar(f, key, value, dataset_kwargs=MappingProxyType({})):
+def write_scalar(f, key, value, _writer, dataset_kwargs=MappingProxyType({})):
     return f.create_dataset(key, data=np.array(value), **dataset_kwargs)
 
 
-def write_hdf5_scalar(f, key, value, dataset_kwargs=MappingProxyType({})):
+def write_hdf5_scalar(f, key, value, _writer, dataset_kwargs=MappingProxyType({})):
     # Can’t compress scalars, error is thrown
     dataset_kwargs = dataset_kwargs.copy()
     dataset_kwargs.pop("compression", None)
@@ -723,12 +792,12 @@ _REGISTRY.register_write(ZarrGroup, np.str_, IOSpec("string", "0.2.0"))(write_sc
 
 
 @_REGISTRY.register_read(H5Array, IOSpec("string", "0.2.0"))
-def read_hdf5_string(elem):
+def read_hdf5_string(elem, _reader):
     return elem.asstr()[()]
 
 
 @_REGISTRY.register_read(ZarrArray, IOSpec("string", "0.2.0"))
-def read_zarr_string(elem):
+def read_zarr_string(elem, _reader):
     return str(elem[()])
 
 
@@ -738,7 +807,7 @@ _REGISTRY.register_read(ZarrArray, IOSpec("bytes", "0.2.0"))(read_scalar)
 
 @_REGISTRY.register_write(H5Group, np.str_, IOSpec("string", "0.2.0"))
 @_REGISTRY.register_write(H5Group, str, IOSpec("string", "0.2.0"))
-def write_string(f, k, v, dataset_kwargs):
+def write_string(f, k, v, _writer, dataset_kwargs):
     dataset_kwargs = dataset_kwargs.copy()
     dataset_kwargs.pop("compression", None)
     dataset_kwargs.pop("compression_opts", None)
diff --git a/anndata/_io/specs/registry.py b/anndata/_io/specs/registry.py
index b24697b..45f3fa5 100644
--- a/anndata/_io/specs/registry.py
+++ b/anndata/_io/specs/registry.py
@@ -1,22 +1,50 @@
 from __future__ import annotations
 
-from collections.abc import Mapping
+from collections.abc import Mapping, Callable, Iterable
+from dataclasses import dataclass
 from functools import singledispatch, wraps
-from typing import Any, NamedTuple, Tuple, Type, Callable, Union
+from types import MappingProxyType
+from typing import Any, Union
 
-
-from anndata.compat import _read_attr, ZarrArray, ZarrGroup, H5Group, H5Array
+from anndata.compat import _read_attr
+from anndata._types import StorageType, GroupStorageType
 from anndata._io.utils import report_write_key_on_error, report_read_key_on_error
 
 # TODO: This probably should be replaced by a hashable Mapping due to conversion b/w "_" and "-"
 # TODO: Should filetype be included in the IOSpec if it changes the encoding? Or does the intent that these things be "the same" overrule that?
 
 
-class IOSpec(NamedTuple):
+@dataclass(frozen=True)
+class IOSpec:
     encoding_type: str
     encoding_version: str
 
 
+# TODO: Should this subclass from LookupError?
+class IORegistryError(Exception):
+    @classmethod
+    def _from_write_parts(cls, dest_type, typ, modifiers) -> IORegistryError:
+        msg = f"No method registered for writing {typ} into {dest_type}"
+        if modifiers:
+            msg += f" with {modifiers}"
+        return cls(msg)
+
+    @classmethod
+    def _from_read_parts(
+        cls,
+        method: str,
+        registry: Mapping,
+        src_typ: StorageType,
+        spec: IOSpec,
+    ) -> IORegistryError:
+        # TODO: Improve error message if type exists, but version does not
+        msg = (
+            f"No {method} method registered for {spec} from {src_typ}. "
+            "You may need to update your installation of anndata."
+        )
+        return cls(msg)
+
+
 def write_spec(spec: IOSpec):
     def decorator(func: Callable):
         @wraps(func)
@@ -31,47 +59,72 @@ def write_spec(spec: IOSpec):
     return decorator
 
 
-class IORegistry(object):
+class IORegistry:
     def __init__(self):
-        self.read: Mapping[tuple[str, IOSpec], Callable] = {}
-        self.read_partial: Mapping[Tuple[str, IOSpec], Callable] = {}
-        self.write: Mapping[Union[Type, Tuple[Type, str]], Callable] = {}
+        self.read: dict[tuple[type, IOSpec, frozenset[str]], Callable] = {}
+        self.read_partial: dict[tuple[type, IOSpec, frozenset[str]], Callable] = {}
+        self.write: dict[
+            tuple[type, type | tuple[type, str], frozenset[str]], Callable
+        ] = {}
+        self.write_specs: dict[Union[type, tuple[type, str]], IOSpec] = {}
 
     def register_write(
         self,
-        dest_type,
-        typ: Union[type, tuple[type, str]],
-        spec,
-        modifiers: frozenset(str) = frozenset(),
+        dest_type: type,
+        src_type: type | tuple[type, str],
+        spec: IOSpec | Mapping[str, str],
+        modifiers: Iterable[str] = frozenset(),
     ):
         spec = proc_spec(spec)
         modifiers = frozenset(modifiers)
 
+        # Record specification for src_type
+        if src_type in self.write_specs and (spec != self.write_specs[src_type]):
+            # First check for consistency
+            current_spec = self.write_specs[src_type]
+            raise TypeError(
+                "Cannot overwrite IO specifications. Attempted to overwrite encoding "
+                f"for {src_type} from {current_spec} to {spec}"
+            )
+        else:
+            self.write_specs[src_type] = spec
+
         def _register(func):
-            self.write[(dest_type, typ, modifiers)] = write_spec(spec)(func)
+            self.write[(dest_type, src_type, modifiers)] = write_spec(spec)(func)
             return func
 
         return _register
 
-    def get_writer(self, dest_type, typ, modifiers=frozenset()):
+    def get_writer(
+        self,
+        dest_type: type,
+        src_type: type | tuple[type, str],
+        modifiers: frozenset[str] = frozenset(),
+    ):
         import h5py
 
         if dest_type is h5py.File:
             dest_type = h5py.Group
-        modifiers = frozenset(modifiers)
-
-        if (dest_type, typ, modifiers) not in self.write:
-            raise TypeError(
-                f"No method has been defined for writing {typ} elements to {dest_type}"
-            )
 
-        return self.write[(dest_type, typ, modifiers)]
+        if (dest_type, src_type, modifiers) in self.write:
+            return self.write[(dest_type, src_type, modifiers)]
+        else:
+            raise IORegistryError._from_write_parts(dest_type, src_type, modifiers)
 
-    def has_writer(self, dest_type, typ, modifiers):
-        modifiers = frozenset(modifiers)
-        return (dest_type, typ, modifiers) in self.write
+    def has_writer(
+        self,
+        dest_type: type,
+        src_type: type | tuple[type, str],
+        modifiers: frozenset[str],
+    ):
+        return (dest_type, src_type, modifiers) in self.write
 
-    def register_read(self, src_type, spec, modifiers: frozenset[str] = frozenset()):
+    def register_read(
+        self,
+        src_type: type,
+        spec: IOSpec | Mapping[str, str],
+        modifiers: Iterable[str] = frozenset(),
+    ):
         spec = proc_spec(spec)
         modifiers = frozenset(modifiers)
 
@@ -81,16 +134,26 @@ class IORegistry(object):
 
         return _register
 
-    def get_reader(self, src_type, spec, modifiers=frozenset()):
-        modifiers = frozenset(modifiers)
-        return self.read[(src_type, spec, modifiers)]
+    def get_reader(
+        self, src_type: type, spec: IOSpec, modifiers: frozenset[str] = frozenset()
+    ):
+        if (src_type, spec, modifiers) in self.read:
+            return self.read[(src_type, spec, modifiers)]
+        else:
+            raise IORegistryError._from_read_parts(
+                "read", _REGISTRY.read, src_type, spec
+            )
 
-    def has_reader(self, src_type, spec, modifiers=frozenset()):
-        modifiers = frozenset(modifiers)
+    def has_reader(
+        self, src_type: type, spec: IOSpec, modifiers: frozenset[str] = frozenset()
+    ):
         return (src_type, spec, modifiers) in self.read
 
     def register_read_partial(
-        self, src_type, spec, modifiers: frozenset[str] = frozenset()
+        self,
+        src_type: type,
+        spec: IOSpec | Mapping[str, str],
+        modifiers: Iterable[str] = frozenset(),
     ):
         spec = proc_spec(spec)
         modifiers = frozenset(modifiers)
@@ -101,9 +164,22 @@ class IORegistry(object):
 
         return _register
 
-    def get_partial_reader(self, src_type, spec, modifiers=frozenset()):
-        modifiers = frozenset(modifiers)
-        return self.read_partial[(src_type, spec, modifiers)]
+    def get_partial_reader(
+        self, src_type: type, spec: IOSpec, modifiers: frozenset[str] = frozenset()
+    ):
+        if (src_type, spec, modifiers) in self.read_partial:
+            return self.read_partial[(src_type, spec, modifiers)]
+        else:
+            raise IORegistryError._from_read_parts(
+                "read_partial", _REGISTRY.read_partial, src_type, spec
+            )
+
+    def get_spec(self, elem: Any) -> IOSpec:
+        if hasattr(elem, "dtype"):
+            typ = (type(elem), elem.dtype.kind)
+            if typ in self.write_specs:
+                return self.write_specs[typ]
+        return self.write_specs[type(elem)]
 
 
 _REGISTRY = IORegistry()
@@ -125,7 +201,7 @@ def proc_spec_mapping(spec) -> IOSpec:
 
 
 def get_spec(
-    elem: "Union[h5py.Dataset, h5py.Group, zarr.Group, zarr.Dataset]",
+    elem: StorageType,
 ) -> IOSpec:
     return proc_spec(
         {
@@ -135,52 +211,146 @@ def get_spec(
     )
 
 
-@report_write_key_on_error
+class Reader:
+    def __init__(
+        self, registry: IORegistry, callback: Union[Callable, None] = None
+    ) -> None:
+        self.registry = registry
+        self.callback = callback
+
+    @report_read_key_on_error
+    def read_elem(
+        self,
+        elem: StorageType,
+        modifiers: frozenset(str) = frozenset(),
+    ) -> Any:
+        """Read an element from a store. See exported function for more details."""
+        from functools import partial
+
+        read_func = self.registry.get_reader(
+            type(elem), get_spec(elem), frozenset(modifiers)
+        )
+        read_func = partial(read_func, _reader=self)
+        if self.callback is not None:
+            return self.callback(read_func, elem.name, elem, iospec=get_spec(elem))
+        else:
+            return read_func(elem)
+
+
+class Writer:
+    def __init__(
+        self,
+        registry: IORegistry,
+        callback: Union[
+            Callable[
+                [
+                    GroupStorageType,
+                    str,
+                    StorageType,
+                    dict,
+                ],
+                None,
+            ],
+            None,
+        ] = None,
+    ):
+        self.registry = registry
+        self.callback = callback
+
+    @report_write_key_on_error
+    def write_elem(
+        self,
+        store: GroupStorageType,
+        k: str,
+        elem,
+        *,
+        dataset_kwargs=MappingProxyType({}),
+        modifiers=frozenset(),
+    ):
+        from functools import partial
+        from pathlib import PurePosixPath
+
+        dest_type = type(store)
+        t = type(elem)
+
+        if elem is None:
+            return lambda *_, **__: None
+
+        # Normalize k to abosulte path
+        if not PurePosixPath(k).is_absolute():
+            k = str(PurePosixPath(store.name) / k)
+
+        if k == "/":
+            store.clear()
+        elif k in store:
+            del store[k]
+        if (
+            hasattr(elem, "dtype")
+            and (dest_type, (t, elem.dtype.kind), modifiers) in self.registry.write
+        ):
+            write_func = partial(
+                self.registry.get_writer(dest_type, (t, elem.dtype.kind), modifiers),
+                _writer=self,
+            )
+        else:
+            write_func = partial(
+                self.registry.get_writer(dest_type, t, modifiers),
+                _writer=self,
+            )
+
+        if self.callback is not None:
+            return self.callback(
+                write_func,
+                store,
+                k,
+                elem,
+                dataset_kwargs=dataset_kwargs,
+                iospec=self.registry.get_spec(elem),
+            )
+        else:
+            return write_func(store, k, elem, dataset_kwargs=dataset_kwargs)
+
+
+def read_elem(elem: StorageType) -> Any:
+    """
+    Read an element from a store.
+
+    Assumes that the element is encoded using the anndata encoding. This function will
+    determine the encoded type using the encoding metadata stored in elem's attributes.
+
+    Params
+    ------
+    elem
+        The stored element.
+    """
+    return Reader(_REGISTRY).read_elem(elem)
+
+
 def write_elem(
-    f: "Union[H5Group, ZarrGroup]",
+    store: GroupStorageType,
     k: str,
     elem: Any,
-    *args,
-    modifiers=frozenset(),
-    **kwargs,
-):
+    *,
+    dataset_kwargs: Mapping = MappingProxyType({}),
+) -> None:
     """
-    Write an element to a disk store using it's anndata encoding.
+    Write an element to a storage group using anndata encoding.
 
     Params
     ------
-    f
-        The store to write to.
+    store
+        The group to write to.
     k
-        The key to write for this value.
+        The key to write to in the group. Note that absolute paths will be written
+        from the root.
     elem
-        The element to write as k to f.
+        The element to write. Typically an in-memory object, e.g. an AnnData, pandas
+        dataframe, scipy sparse matrix, etc.
+    dataset_kwargs
+        Keyword arguments to pass to the stores dataset creation function.
+        E.g. for zarr this would be `chunks`, `compressor`.
     """
-    dest_type = type(f)
-    if elem is None:
-        return
-    t = type(elem)
-    if k == "/":
-        f.clear()
-    elif k in f:
-        del f[k]
-    if (
-        hasattr(elem, "dtype")
-        and (dest_type, (t, elem.dtype.kind), modifiers) in _REGISTRY.write
-    ):
-        _REGISTRY.get_writer(dest_type, (t, elem.dtype.kind), modifiers)(
-            f, k, elem, *args, **kwargs
-        )
-    else:
-        _REGISTRY.get_writer(dest_type, t, modifiers)(f, k, elem, *args, **kwargs)
-
-
-def read_elem(
-    elem: Union[H5Array, H5Group, ZarrGroup, ZarrArray],
-    modifiers: frozenset(str) = frozenset(),
-) -> Any:
-    """Read an element from an on disk store."""
-    return _REGISTRY.get_reader(type(elem), get_spec(elem), frozenset(modifiers))(elem)
+    Writer(_REGISTRY).write_elem(store, k, elem, dataset_kwargs=dataset_kwargs)
 
 
 # TODO: If all items would be read, just call normal read method
@@ -195,3 +365,21 @@ def read_elem_partial(
     return _REGISTRY.get_partial_reader(
         type(elem), get_spec(elem), frozenset(modifiers)
     )(elem, items=items, indices=indices)
+
+
+@singledispatch
+def elem_key(elem) -> str:
+    return elem.name
+
+
+#     raise NotImplementedError()
+
+# @elem_key.register(ZarrGroup)
+# @elem_key.register(ZarrArray)
+# def _(elem):
+#     return elem.name
+
+# @elem_key.register(H5Array)
+# @elem_key.register(H5Group)
+# def _(elem):
+#     re
diff --git a/anndata/_io/utils.py b/anndata/_io/utils.py
index 76ee084..388f9f0 100644
--- a/anndata/_io/utils.py
+++ b/anndata/_io/utils.py
@@ -1,14 +1,18 @@
+from __future__ import annotations
+
 from enum import Enum
 from functools import wraps, singledispatch
+from typing import Callable
 from warnings import warn
 
 from packaging import version
 import h5py
 
 from .._core.sparse_dataset import SparseDataset
+from anndata.compat import H5Group, ZarrGroup
 
 # For allowing h5py v3
-# https://github.com/theislab/anndata/issues/442
+# https://github.com/scverse/anndata/issues/442
 H5PY_V3 = version.parse(h5py.__version__).major >= 3
 
 # -------------------------------------------------------------------------------
@@ -176,19 +180,28 @@ def report_read_key_on_error(func):
     >>> read_arr(z["X"])  # doctest: +SKIP
     """
 
+    def re_raise_error(e, elem):
+        if isinstance(e, AnnDataReadError):
+            raise e
+        else:
+            parent = _get_parent(elem)
+            raise AnnDataReadError(
+                f"Above error raised while reading key {elem.name!r} of "
+                f"type {type(elem)} from {parent}."
+            ) from e
+
     @wraps(func)
-    def func_wrapper(elem, *args, **kwargs):
+    def func_wrapper(*args, **kwargs):
+        from anndata._io.specs import Reader
+
+        # Figure out signature (method vs function) by going through args
+        for elem in args:
+            if not isinstance(elem, Reader):
+                break
         try:
-            return func(elem, *args, **kwargs)
+            return func(*args, **kwargs)
         except Exception as e:
-            if isinstance(e, AnnDataReadError):
-                raise e
-            else:
-                parent = _get_parent(elem)
-                raise AnnDataReadError(
-                    f"Above error raised while reading key {elem.name!r} of "
-                    f"type {type(elem)} from {parent}."
-                )
+            re_raise_error(e, elem)
 
     return func_wrapper
 
@@ -208,20 +221,31 @@ def report_write_key_on_error(func):
     >>> write_arr(z, "X", X)  # doctest: +SKIP
     """
 
+    def re_raise_error(e, elem, key):
+        if "Above error raised while writing key" in format(e):
+            raise
+        else:
+            parent = _get_parent(elem)
+            raise type(e)(
+                f"{e}\n\n"
+                f"Above error raised while writing key {key!r} of {type(elem)} "
+                f"to {parent}"
+            ) from e
+
     @wraps(func)
-    def func_wrapper(elem, key, val, *args, **kwargs):
+    def func_wrapper(*args, **kwargs):
+        from anndata._io.specs import Writer
+
+        # Figure out signature (method vs function) by going through args
+        for i in range(len(args)):
+            elem = args[i]
+            key = args[i + 1]
+            if not isinstance(elem, Writer):
+                break
         try:
-            return func(elem, key, val, *args, **kwargs)
+            return func(*args, **kwargs)
         except Exception as e:
-            if "Above error raised while writing key" in format(e):
-                raise
-            else:
-                parent = _get_parent(elem)
-                raise type(e)(
-                    f"{e}\n\n"
-                    f"Above error raised while writing key {key!r} of {type(elem)} "
-                    f"to {parent}"
-                ) from e
+            re_raise_error(e, elem, key)
 
     return func_wrapper
 
@@ -231,7 +255,14 @@ def report_write_key_on_error(func):
 # -------------------------------------------------------------------------------
 
 
-def _read_legacy_raw(f, modern_raw, read_df, read_attr, *, attrs=("X", "var", "varm")):
+def _read_legacy_raw(
+    f: ZarrGroup | H5Group,
+    modern_raw,  # TODO: type
+    read_df: Callable,
+    read_attr: Callable,
+    *,
+    attrs=("X", "var", "varm"),
+) -> dict:
     """\
     Backwards compat for reading legacy raw.
     Makes sure that no modern raw group coexists with legacy raw.* groups.
diff --git a/anndata/_io/zarr.py b/anndata/_io/zarr.py
index 552bb52..d65379e 100644
--- a/anndata/_io/zarr.py
+++ b/anndata/_io/zarr.py
@@ -13,6 +13,7 @@ from ..compat import (
     _from_fixed_length_strings,
     _clean_uns,
 )
+from ..experimental import read_dispatched, write_dispatched
 from .utils import (
     report_read_key_on_error,
     _read_legacy_raw,
@@ -28,7 +29,7 @@ def write_zarr(
     store: Union[MutableMapping, str, Path],
     adata: AnnData,
     chunks=None,
-    **dataset_kwargs,
+    **ds_kwargs,
 ) -> None:
     if isinstance(store, Path):
         store = str(store)
@@ -39,21 +40,14 @@ def write_zarr(
     f = zarr.open(store, mode="w")
     f.attrs.setdefault("encoding-type", "anndata")
     f.attrs.setdefault("encoding-version", "0.1.0")
-    if chunks is not None and not isinstance(adata.X, sparse.spmatrix):
-        write_elem(
-            f, "X", adata.X, dataset_kwargs=dict(chunks=chunks, **dataset_kwargs)
-        )
-    else:
-        write_elem(f, "X", adata.X, dataset_kwargs=dataset_kwargs)
-    write_elem(f, "obs", adata.obs, dataset_kwargs=dataset_kwargs)
-    write_elem(f, "var", adata.var, dataset_kwargs=dataset_kwargs)
-    write_elem(f, "obsm", dict(adata.obsm), dataset_kwargs=dataset_kwargs)
-    write_elem(f, "varm", dict(adata.varm), dataset_kwargs=dataset_kwargs)
-    write_elem(f, "obsp", dict(adata.obsp), dataset_kwargs=dataset_kwargs)
-    write_elem(f, "varp", dict(adata.varp), dataset_kwargs=dataset_kwargs)
-    write_elem(f, "layers", dict(adata.layers), dataset_kwargs=dataset_kwargs)
-    write_elem(f, "uns", dict(adata.uns), dataset_kwargs=dataset_kwargs)
-    write_elem(f, "raw", adata.raw, dataset_kwargs=dataset_kwargs)
+
+    def callback(func, s, k, elem, dataset_kwargs, iospec):
+        if chunks is not None and not isinstance(elem, sparse.spmatrix) and k == "/X":
+            func(s, k, elem, dataset_kwargs=dict(chunks=chunks, **dataset_kwargs))
+        else:
+            func(s, k, elem, dataset_kwargs=dataset_kwargs)
+
+    write_dispatched(f, "/", adata, callback=callback, dataset_kwargs=ds_kwargs)
 
 
 def read_zarr(store: Union[str, Path, MutableMapping, zarr.Group]) -> AnnData:
@@ -70,27 +64,38 @@ def read_zarr(store: Union[str, Path, MutableMapping, zarr.Group]) -> AnnData:
 
     f = zarr.open(store, mode="r")
 
-    if "encoding-type" in f.attrs:
-        return read_elem(f[""])
-
-    # Backwards compat
-    d = {}
-    for k in f.keys():
-        if k.startswith("raw."):
-            continue
-        if k in {"obs", "var"}:
-            d[k] = read_dataframe(f[k])
-        else:  # Base case
-            d[k] = read_elem(f[k])
-
-    d["raw"] = _read_legacy_raw(f, d.get("raw"), read_dataframe, read_elem)
-
-    if "X" in d:
-        d["dtype"] = d["X"].dtype
-
-    _clean_uns(d)
-
-    return AnnData(**d)
+    # Read with handling for backwards compat
+    def callback(func, elem_name: str, elem, iospec):
+        if iospec.encoding_type == "anndata" or elem_name.endswith("/"):
+            return AnnData(
+                **{
+                    k: read_dispatched(v, callback)
+                    for k, v in elem.items()
+                    if not k.startswith("raw.")
+                }
+            )
+        elif elem_name.startswith("/raw."):
+            return None
+        elif elem_name in {"/obs", "/var"}:
+            return read_dataframe(elem)
+        elif elem_name == "/raw":
+            # Backwards compat
+            return _read_legacy_raw(f, func(elem), read_dataframe, func)
+        return func(elem)
+
+    adata = read_dispatched(f, callback=callback)
+
+    # Backwards compat (should figure out which version)
+    if "raw.X" in f:
+        raw = AnnData(**_read_legacy_raw(f, adata.raw, read_dataframe, read_elem))
+        raw.obs_names = adata.obs_names
+        adata.raw = raw
+
+    # Backwards compat for <0.7
+    if isinstance(f["obs"], zarr.Array):
+        _clean_uns(adata)
+
+    return adata
 
 
 @report_read_key_on_error
diff --git a/anndata/_metadata.py b/anndata/_metadata.py
index 2084ef9..8770e50 100644
--- a/anndata/_metadata.py
+++ b/anndata/_metadata.py
@@ -27,10 +27,7 @@ try:
     refresh_entry_points()
     __version__ = get_version(root="..", relative_to=__file__)
 except (ImportError, LookupError, FileNotFoundError):
-    try:
-        from importlib.metadata import metadata
-    except ImportError:
-        from importlib_metadata import metadata
+    from importlib.metadata import metadata
 
     meta = metadata(here.name)
     __version__ = meta["Version"]
diff --git a/anndata/_types.py b/anndata/_types.py
new file mode 100644
index 0000000..d5b6b1c
--- /dev/null
+++ b/anndata/_types.py
@@ -0,0 +1,16 @@
+"""
+Defines some useful types for this library. Should probably be cleaned up before thinking about exporting.
+"""
+from typing import Union
+
+from anndata.compat import H5Array, H5Group, ZarrArray, ZarrGroup
+
+__all__ = [
+    "ArrayStorageType",
+    "GroupStorageType",
+    "StorageType",
+]
+
+ArrayStorageType = Union[ZarrArray, H5Array]
+GroupStorageType = Union[ZarrGroup, H5Group]
+StorageType = Union[ArrayStorageType, GroupStorageType]
diff --git a/anndata/_warnings.py b/anndata/_warnings.py
index 9409f6a..5bc0c46 100644
--- a/anndata/_warnings.py
+++ b/anndata/_warnings.py
@@ -21,3 +21,9 @@ class ImplicitModificationWarning(UserWarning):
     """
 
     pass
+
+
+class ExperimentalFeatureWarning(Warning):
+    """Raised when an unstable experimental feature is used."""
+
+    pass
diff --git a/anndata/compat/__init__.py b/anndata/compat/__init__.py
index 5ccd357..14d86a5 100644
--- a/anndata/compat/__init__.py
+++ b/anndata/compat/__init__.py
@@ -1,8 +1,10 @@
+from __future__ import annotations
+
 from copy import deepcopy
 from functools import reduce, singledispatch, wraps
 from codecs import decode
 from inspect import signature, Parameter
-from typing import Any, Collection, Union, Mapping, MutableMapping, Optional
+from typing import Any, Tuple, Union, Mapping, MutableMapping, Optional
 from warnings import warn
 
 import h5py
@@ -10,20 +12,19 @@ from scipy.sparse import spmatrix
 import numpy as np
 import pandas as pd
 
-from ._overloaded_dict import _overloaded_uns, OverloadedDict
-from .._core.index import _subset
-
 
 class Empty:
     pass
 
 
-H5Group = Union[h5py.Group, h5py.File]
+Index1D = Union[slice, int, str, np.int64, np.ndarray]
+Index = Union[Index1D, Tuple[Index1D, Index1D], spmatrix]
+H5Group = h5py.Group
 H5Array = h5py.Dataset
 
 
 # try importing zarr, dask, and zappy
-from packaging import version
+from packaging import version as _v
 
 try:
     from zarr.core import Array as ZarrArray
@@ -41,6 +42,19 @@ except ImportError:
             return "mock zarr.core.Group"
 
 
+try:
+    import awkward
+
+    AwkArray = awkward.Array
+
+except ImportError:
+
+    class AwkArray:
+        @staticmethod
+        def __repr__():
+            return "mock awkward.highlevel.Array"
+
+
 try:
     from zappy.base import ZappyArray
 except ImportError:
@@ -61,23 +75,6 @@ except ImportError:
             return "mock dask.array.core.Array"
 
 
-try:
-    from typing import Literal
-except ImportError:
-    try:
-        from typing_extensions import Literal
-    except ImportError:
-
-        class LiteralMeta(type):
-            def __getitem__(cls, values):
-                if not isinstance(values, tuple):
-                    values = (values,)
-                return type("Literal_", (Literal,), dict(__args__=values))
-
-        class Literal(metaclass=LiteralMeta):
-            pass
-
-
 @singledispatch
 def _read_attr(attrs: Mapping, name: str, default: Optional[Any] = Empty):
     if default is Empty:
@@ -189,31 +186,31 @@ def _to_fixed_length_strings(value: np.ndarray) -> np.ndarray:
 #############################
 
 
-def _clean_uns(d: Mapping[str, MutableMapping[str, Union[pd.Series, str, int]]]):
+def _clean_uns(adata: "AnnData"):  # noqa: F821
     """
     Compat function for when categorical keys were stored in uns.
     This used to be buggy because when storing categorical columns in obs and var with
     the same column name, only one `<colname>_categories` is retained.
     """
     k_to_delete = set()
-    for cats_name, cats in d.get("uns", {}).items():
+    for cats_name, cats in adata.uns.items():
         if not cats_name.endswith("_categories"):
             continue
         name = cats_name.replace("_categories", "")
         # fix categories with a single category
         if isinstance(cats, (str, int)):
             cats = [cats]
-        for ann in ["obs", "var"]:
-            if name not in d[ann]:
+        for ann in [adata.obs, adata.var]:
+            if name not in ann:
                 continue
-            codes: np.ndarray = d[ann][name].values
+            codes: np.ndarray = ann[name].values
             # hack to maybe find the axis the categories were for
             if not np.all(codes < len(cats)):
                 continue
-            d[ann][name] = pd.Categorical.from_codes(codes, cats)
+            ann[name] = pd.Categorical.from_codes(codes, cats)
             k_to_delete.add(cats_name)
     for cats_name in k_to_delete:
-        del d["uns"][cats_name]
+        del adata.uns[cats_name]
 
 
 def _move_adj_mtx(d):
@@ -247,31 +244,6 @@ def _find_sparse_matrices(d: Mapping, n: int, keys: tuple, paths: list):
     return paths
 
 
-def _slice_uns_sparse_matrices(uns: MutableMapping, oidx: "Index1d", orig_n_obs: int):
-    """slice sparse spatrices of n_obs × n_obs in self.uns"""
-    if isinstance(oidx, slice) and len(range(*oidx.indices(orig_n_obs))) == orig_n_obs:
-        return uns  # slice of entire dimension is a no-op
-
-    paths = _find_sparse_matrices(uns, orig_n_obs, (), [])
-
-    if not paths:
-        return uns
-
-    uns = deepcopy(uns)
-    for path in paths:
-        str_path = "".join(f"['{key}']" for key in path)
-        warn(
-            f"During AnnData slicing, found matrix at .uns{str_path} that happens"
-            f" to be dimensioned at n_obs×n_obs ({orig_n_obs}×{orig_n_obs}).\n\n"
-            "These matrices should now be stored in the .obsp attribute.\n"
-            "This slicing behavior will be removed in anndata 0.8.",
-            FutureWarning,
-        )
-        d = reduce(lambda d, k: d[k], path[:-1], uns)
-        d[path[-1]] = _subset(d[path[-1]], (oidx, oidx))
-    return uns
-
-
 # This function was adapted from scikit-learn
 # github.com/scikit-learn/scikit-learn/blob/master/sklearn/utils/validation.py
 def _deprecate_positional_args(func=None, *, version: str = "1.0 (renaming of 0.25)"):
diff --git a/anndata/compat/_overloaded_dict.py b/anndata/compat/_overloaded_dict.py
deleted file mode 100644
index 3c24c21..0000000
--- a/anndata/compat/_overloaded_dict.py
+++ /dev/null
@@ -1,211 +0,0 @@
-from collections.abc import MutableMapping
-from functools import partial
-from typing import Any, Callable, List, Mapping, Optional, Union
-from warnings import warn
-from weakref import proxy
-
-
-class KeyOverload:
-    """
-    This class contains the information neccesary to overload a key of a dict.
-
-    It's like a descriptor, but for a key of a dict instead of an attribute.
-
-    Register getter, setter, and deleter methods by passing them at instantiation,
-    or assigning them to the `._get`, `._set`, and `._delete` attributes respectivley.
-    These functions will be passed the parent `OverloadedDict` and the key as their first
-    two arguments. The get and delete methods will be called by the parent with no
-    additional arguments, while the setter will be passed the value to set.
-
-    Note that the parent is not set on instantiation. It's currently assumed that's added
-    when the parent is constructed.
-
-    Attrs
-    -----
-    key
-        Key in parent dict to overload
-    parent
-        The parent OverloadedDict this key is attached to.
-    """
-
-    def __init__(
-        self,
-        key,
-        get: Optional[Callable] = None,
-        set: Optional[Callable] = None,
-        delete: Optional[Callable] = None,
-    ):
-        self.key = key
-        if get is not None:
-            self._get = get
-        if set is not None:
-            self._set = set
-        if delete is not None:
-            self._delete = delete
-
-    @staticmethod
-    def _get(parent, key):
-        """Default key getter."""
-        return parent.data[key]
-
-    @staticmethod
-    def _set(parent, key, value):
-        parent.data[key] = value
-
-    @staticmethod
-    def _delete(parent, key):
-        del parent.data[key]
-
-    @property
-    def get(self):
-        return partial(self._get, self.parent, self.key)
-
-    @property
-    def set(self):
-        return partial(self._set, self.parent, self.key)
-
-    @property
-    def delete(self):
-        return partial(self._delete, self.parent, self.key)
-
-
-class OverloadedDict(MutableMapping):
-    """A mapping where some of the keys have been overloaded.
-
-    Each overloaded key should be defined as an KeyOverload instance, and can have
-    specific getter, settter, and deleter methods. Additionally, overloaded keys don't
-    show up in iteration or from `__contains__` calls unless they exist in `.data`.
-
-    Attrs
-    -----
-    data
-        Wrapped mapping.
-    overloaded
-        Maps from keys to overloaded behaviours.
-    """
-
-    data: Mapping
-    overloaded: Mapping[Any, KeyOverload]
-
-    def __init__(self, data: Mapping, *, overloaded: Mapping[Any, KeyOverload]):
-        self.data = data
-        self.overloaded = overloaded
-        for v in overloaded.values():
-            v.parent = proxy(self)
-
-    def __getitem__(self, key):
-        if key in self.overloaded:
-            return self.overloaded[key].get()
-        else:
-            return self.data[key]
-
-    def __setitem__(self, key, value):
-        if key in self.overloaded:
-            self.overloaded[key].set(value)
-        else:
-            self.data[key] = value
-
-    def __delitem__(self, key):
-        if key in self.overloaded:
-            self.overloaded[key].delete()
-        else:
-            del self.data[key]
-
-    def __contains__(self, key):
-        return key in self.data
-
-    def __iter__(self):
-        return iter(self.data)
-
-    def __len__(self):
-        return len(self.data)
-
-    def __repr__(self):
-        return (
-            f"OverloadedDict, wrapping:\n\t{self.data!r}\nWith overloaded keys:"
-            f"\n\t{list(self.overloaded.keys())}."
-        )
-
-    def copy(self) -> dict:
-        return self.data.copy()
-
-    def keys(self):
-        return self.data.keys()
-
-    def _ipython_key_completions_(self) -> List[str]:
-        return list(self.keys())
-
-
-#######################################
-# Handling .uns["neighbors"]
-#######################################
-
-
-def _access_warn(key, cur_loc):
-    warn(
-        f"This location for '{key}' is deprecated. It has been moved to {cur_loc}, "
-        "and will not be accesible here in a future version of anndata.",
-        FutureWarning,
-        stacklevel=4,
-    )
-
-
-def _adjacency_getter(ovld: OverloadedDict, key, adata: "AnnData"):
-    """For overloading:
-
-    >>> mtx = adata.uns["neighbors"]["connectivities"]  # doctest: +SKIP
-    >>> mtx = adata.uns["neighbors"]["distances"]  # doctest: +SKIP
-    """
-    _access_warn(key, f".obsp[{key}]")
-    return adata.obsp[key]
-
-
-def _adjacency_setter(ovld: OverloadedDict, key, value, adata: "AnnData"):
-    """For overloading:
-
-    >>> adata.uns["neighbors"]["connectivities"] = mtx  # doctest: +SKIP
-    >>> adata.uns["neighbors"]["distances"] = mtx  # doctest: +SKIP
-    """
-    _access_warn(key, f".obsp[{key}]")
-    adata.obsp[key] = value
-
-
-def _neighbors_setter(ovld: OverloadedDict, key, neighbors: Mapping, adata: "AnnData"):
-    """For overloading: `adata.uns["neighbors"] = d`."""
-    for k in ("distances", "connectivities"):
-        if k in neighbors:
-            _access_warn(k, f".obsp[{k}]")
-            adata.obsp[k] = neighbors.pop(k)
-    ovld.data[key] = neighbors
-
-
-def _neighbors_getter(ovld: OverloadedDict, key, adata: "AnnData"):
-    """For overloading: `adata.uns["neighbors"]`"""
-    return OverloadedDict(
-        ovld.data[key],
-        overloaded={
-            "connectivities": KeyOverload(
-                "connectivities",
-                get=partial(_adjacency_getter, adata=adata),
-                set=partial(_adjacency_setter, adata=adata),
-            ),
-            "distances": KeyOverload(
-                "distances",
-                get=partial(_adjacency_getter, adata=adata),
-                set=partial(_adjacency_setter, adata=adata),
-            ),
-        },
-    )
-
-
-def _overloaded_uns(adata: "AnnData", uns: Union[dict, "DictView"]) -> OverloadedDict:
-    return OverloadedDict(
-        uns,
-        overloaded={
-            "neighbors": KeyOverload(
-                "neighbors",
-                get=partial(_neighbors_getter, adata=adata),
-                set=partial(_neighbors_setter, adata=adata),
-            ),
-        },
-    )
diff --git a/anndata/experimental/__init__.py b/anndata/experimental/__init__.py
index 5c2c6a1..073910d 100644
--- a/anndata/experimental/__init__.py
+++ b/anndata/experimental/__init__.py
@@ -1,4 +1,109 @@
+from __future__ import annotations
+
+from types import MappingProxyType
+from typing import Callable, Any
+
 from .multi_files import AnnCollection
 from .pytorch import AnnLoader
 
-from anndata._io.specs import read_elem, write_elem
+from anndata._io.specs import read_elem, write_elem, IOSpec
+from anndata._types import StorageType, GroupStorageType
+
+__all__ = [
+    "AnnCollection",
+    "AnnLoader",
+    "read_elem",
+    "write_elem",
+    "read_dispatched",
+    "write_dispatched",
+    "IOSpec",
+]
+
+
+def read_dispatched(
+    elem: StorageType,
+    callback: Callable[[Callable[[StorageType], Any], str, StorageType, IOSpec], Any],
+) -> Any:
+    """
+    Read elem, calling the callback at each sub-element.
+
+    Params
+    ------
+    elem
+        Storage container (e.g. `h5py.Group`, `zarr.Group`). This must have anndata
+        element specifications.
+    callback
+        Function to call at each anndata encoded element. See details below for
+        signature.
+
+
+    The callback has the following signature:
+
+    * `read_func` (`Callable`): A callable which takes the encoded element and returns it's decoded value.
+      This is the default decoding function, and what to call if you don't want to modify the decoding.
+      It will call this callback again at the next element encoding it sees.
+    * `key` (`str`): They absolute key of the element in the store. This will be an absolute key.
+    * `elem` (`StorageType`): The encoded element.
+    * `iospec` (`IOSpec`): The specification of the element. This is passed as a keyword argument.
+
+    See Also
+    --------
+
+    :doc:`/tutorials/notebooks/{read,write}_dispatched`
+    """
+    from anndata._io.specs import Reader, _REGISTRY
+
+    reader = Reader(_REGISTRY, callback=callback)
+
+    return reader.read_elem(elem)
+
+
+def write_dispatched(
+    store: GroupStorageType,
+    key: str,
+    elem: Any,
+    callback: Callable[
+        [Callable[[StorageType, str, Any], None], GroupStorageType, str, Any, dict],
+        None,
+    ],
+    *,
+    dataset_kwargs=MappingProxyType({}),
+) -> None:
+    """
+    Write elem to store, recusively calling callback at each sub-element.
+
+    Params
+    ------
+    store
+        Storage container to be written to.
+    key
+        Key to write element to. To write to the root group, use "/".
+    elem
+        The element to write. Probably an AnnData.
+    callback
+        Function called when writing each element. See below for signature.
+    dataset_kwargs
+        Keyword arguments to pass to the dataset creation function.
+
+
+    The callback has the following signature:
+
+    * `write_func` (`Callable`): A callable which takes the in memory element and writes it to the store.
+      This is the default encoding function, and what to call if you don't want to change behaviour at this level.
+    * `store` (`GroupStorageType`): The store to write to.
+    * `key` (`str`): The key to write elem into store at. This will be an absolute key.
+    * `elem` (`Any`): The element to write.
+    * `dataset_kwargs` (`dict`): Keyword arguments to pass to the dataset creation function. This is passed as a keyword argument.
+    * `iospec` (`IOSpec`): The specification of the element. This is passed as a keyword argument.
+
+
+    See Also
+    --------
+
+    :doc:`/tutorials/notebooks/{read,write}_dispatched`
+    """
+    from anndata._io.specs import Writer, _REGISTRY
+
+    writer = Writer(_REGISTRY, callback=callback)
+
+    writer.write_elem(store, key, elem, dataset_kwargs=dataset_kwargs)
diff --git a/anndata/experimental/multi_files/_anncollection.py b/anndata/experimental/multi_files/_anncollection.py
index 3ab7201..e8e574b 100644
--- a/anndata/experimental/multi_files/_anncollection.py
+++ b/anndata/experimental/multi_files/_anncollection.py
@@ -5,9 +5,8 @@ import numpy as np
 import pandas as pd
 import warnings
 
-from typing import Dict, Union, Optional, Sequence, Callable
+from typing import Dict, Union, Optional, Sequence, Callable, Literal
 
-from ...compat import Literal
 from ..._core.anndata import AnnData
 from ..._core.index import _normalize_indices, _normalize_index, Index
 from ..._core.views import _resolve_idx
@@ -151,7 +150,7 @@ class _IterateViewMixin:
                 batch = self[:, idx]
             else:
                 batch = self[idx]
-            # only happens if the last batch is smaller then batch_size
+            # only happens if the last batch is smaller than batch_size
             if len(batch) < batch_size and drop_last:
                 continue
 
@@ -483,7 +482,7 @@ class AnnCollectionView(_ConcatViewMixin, _IterateViewMixin):
         A function or a Mapping of functions which will be applied
         to the values of attributes (`.X`) or to specific keys of these attributes
         (`.obs`, `.obsm`, `.layers`).
-        The keys of the the Mapping should correspond to the attributes or keys of the
+        The keys of the Mapping should correspond to the attributes or keys of the
         attributes (hierarchically) and the values should be functions used for conversion.
 
         Examples
@@ -807,7 +806,7 @@ class AnnCollection(_ConcatViewMixin, _IterateViewMixin):
         (`.obs`, `.obsm`, `.layers`) of subset objects. The converters are not
         applied to `.obs` and `.obsm` (if present) of this object, only to the attributes
         of subset objects.
-        The keys of the the Mapping should correspond to the attributes or keys of the
+        The keys of the Mapping should correspond to the attributes or keys of the
         attributes (hierarchically) and the values should be functions used for conversion.
 
         Examples
@@ -980,7 +979,7 @@ class LazyAttrData(_IterateViewMixin):
             return _dtypes[self.attr][self.key]
 
         attr = self[:1]
-        if hasattr(attr, dtype):
+        if hasattr(attr, "dtype"):
             return attr.dtype
         else:
             return None
diff --git a/anndata/experimental/pytorch/_annloader.py b/anndata/experimental/pytorch/_annloader.py
index 409d3f3..67651f4 100644
--- a/anndata/experimental/pytorch/_annloader.py
+++ b/anndata/experimental/pytorch/_annloader.py
@@ -15,7 +15,6 @@ try:
     import torch
     from torch.utils.data import Sampler, BatchSampler, Dataset, DataLoader
 except ImportError:
-    warnings.warn("Сould not load pytorch.")
     Sampler, BatchSampler, Dataset, DataLoader = object, object, object, object
 
 
@@ -37,7 +36,7 @@ class BatchIndexSampler(Sampler):
         for i in range(0, self.n_obs, self.batch_size):
             batch = indices[i : min(i + self.batch_size, self.n_obs)]
 
-            # only happens if the last batch is smaller then batch_size
+            # only happens if the last batch is smaller than batch_size
             if len(batch) < self.batch_size and self.drop_last:
                 continue
 
@@ -132,7 +131,6 @@ class AnnLoader(DataLoader):
         use_cuda: bool = False,
         **kwargs,
     ):
-
         if isinstance(adatas, AnnData):
             adatas = [adatas]
 
diff --git a/anndata/logging.py b/anndata/logging.py
index c9d1d97..f5feac0 100644
--- a/anndata/logging.py
+++ b/anndata/logging.py
@@ -27,7 +27,7 @@ def get_memory_usage():
         meminfo = process.memory_info()
     except AttributeError:
         meminfo = process.get_memory_info()
-    mem = meminfo[0] / 2 ** 30  # output in GB
+    mem = meminfo[0] / 2**30  # output in GB
     mem_diff = mem
     global _previous_memory_usage
     if _previous_memory_usage is not None:
diff --git a/anndata/tests/conftest.py b/anndata/tests/conftest.py
index 3225b9f..6256b98 100644
--- a/anndata/tests/conftest.py
+++ b/anndata/tests/conftest.py
@@ -3,6 +3,7 @@ import warnings
 import pytest
 
 import anndata
+from anndata.tests.helpers import subset_func
 
 # TODO: Should be done in pyproject.toml, see anndata/conftest.py
 warnings.filterwarnings("ignore", category=anndata.OldFormatWarning)
diff --git a/anndata/tests/helpers.py b/anndata/tests/helpers.py
index e3ded19..8af3b2e 100644
--- a/anndata/tests/helpers.py
+++ b/anndata/tests/helpers.py
@@ -1,7 +1,7 @@
 from functools import singledispatch, wraps
 from string import ascii_letters
-from typing import Tuple, Optional
-from collections.abc import Mapping
+from typing import Tuple, Optional, Type
+from collections.abc import Mapping, Collection
 import warnings
 
 import h5py
@@ -10,12 +10,26 @@ import pandas as pd
 from pandas.api.types import is_numeric_dtype
 import pytest
 from scipy import sparse
+import random
 
 from anndata import AnnData, Raw
 from anndata._core.views import ArrayView
 from anndata._core.sparse_dataset import SparseDataset
 from anndata._core.aligned_mapping import AlignedMapping
 from anndata.utils import asarray
+from anndata.compat import AwkArray, DaskArray
+
+# Give this to gen_adata when dask array support is expected.
+GEN_ADATA_DASK_ARGS = dict(
+    obsm_types=(
+        sparse.csr_matrix,
+        np.ndarray,
+        pd.DataFrame,
+        DaskArray,
+    ),
+    varm_types=(sparse.csr_matrix, np.ndarray, pd.DataFrame, DaskArray),
+    layers_types=(sparse.csr_matrix, np.ndarray, pd.DataFrame, DaskArray),
+)
 
 
 def gen_vstr_recarray(m, n, dtype=None):
@@ -55,6 +69,65 @@ def gen_typed_df(n, index=None):
     )
 
 
+def _gen_awkward_inner(shape, rng, dtype):
+    # the maximum length a ragged dimension can take
+    MAX_RAGGED_DIM_LEN = 20
+    if not len(shape):
+        # abort condition -> no dimension left, return an actual value instead
+        return dtype(rng.randrange(1000))
+    else:
+        curr_dim_len = shape[0]
+        lil = []
+        if curr_dim_len is None:
+            # ragged dimension, set random length
+            curr_dim_len = rng.randrange(MAX_RAGGED_DIM_LEN)
+
+        for _ in range(curr_dim_len):
+            lil.append(_gen_awkward_inner(shape[1:], rng, dtype))
+
+        return lil
+
+
+def gen_awkward(shape, dtype=np.int32):
+    """Function to generate an awkward array with random values.
+
+    Awkward array dimensions can either be fixed-length ("regular") or variable length ("ragged")
+    (the first dimension is always fixed-length).
+
+
+    Parameters
+    ----------
+    shape
+        shape of the array to be generated. Any dimension specified as `None` will be simulated as ragged.
+    """
+    import awkward as ak
+
+    if shape[0] is None:
+        raise ValueError("The first dimension must be fixed-length.")
+
+    rng = random.Random(123)
+    shape = np.array(shape)
+
+    if np.any(shape == 0):
+        # use empty numpy array for fixed dimensions, then add empty singletons for ragged dimensions
+        var_dims = [i for i, s in enumerate(shape) if s is None]
+        shape = [s for s in shape if s is not None]
+        arr = ak.Array(np.empty(shape, dtype=dtype))
+        for d in var_dims:
+            arr = ak.singletons(arr, axis=d - 1)
+        return arr
+    else:
+        lil = _gen_awkward_inner(shape, rng, dtype)
+        arr = ak.values_astype(AwkArray(lil), dtype)
+
+    # make fixed-length dimensions regular
+    for i, d in enumerate(shape):
+        if d is not None:
+            arr = ak.to_regular(arr, i)
+
+    return arr
+
+
 def gen_typed_df_t2_size(m, n, index=None, columns=None) -> pd.DataFrame:
     s = 0
     df = pd.DataFrame()
@@ -77,8 +150,18 @@ def gen_adata(
     X_dtype=np.float32,
     # obs_dtypes,
     # var_dtypes,
-    obsm_types: "Collection[Type]" = (sparse.csr_matrix, np.ndarray, pd.DataFrame),
-    varm_types: "Collection[Type]" = (sparse.csr_matrix, np.ndarray, pd.DataFrame),
+    obsm_types: "Collection[Type]" = (
+        sparse.csr_matrix,
+        np.ndarray,
+        pd.DataFrame,
+        AwkArray,
+    ),
+    varm_types: "Collection[Type]" = (
+        sparse.csr_matrix,
+        np.ndarray,
+        pd.DataFrame,
+        AwkArray,
+    ),
     layers_types: "Collection[Type]" = (sparse.csr_matrix, np.ndarray, pd.DataFrame),
 ) -> AnnData:
     """\
@@ -104,6 +187,8 @@ def gen_adata(
     layers_types
         What kinds of containers should be in `.layers`?
     """
+    import dask.array as da
+
     M, N = shape
     obs_names = pd.Index(f"cell{i}" for i in range(shape[0]))
     var_names = pd.Index(f"gene{i}" for i in range(shape[1]))
@@ -121,16 +206,22 @@ def gen_adata(
         array=np.random.random((M, 50)),
         sparse=sparse.random(M, 100, format="csr"),
         df=gen_typed_df(M, obs_names),
+        awk_2d_ragged=gen_awkward((M, None)),
+        da=da.random.random((M, 50)),
     )
     obsm = {k: v for k, v in obsm.items() if type(v) in obsm_types}
     varm = dict(
         array=np.random.random((N, 50)),
         sparse=sparse.random(N, 100, format="csr"),
         df=gen_typed_df(N, var_names),
+        awk_2d_ragged=gen_awkward((N, None)),
+        da=da.random.random((N, 50)),
     )
     varm = {k: v for k, v in varm.items() if type(v) in varm_types}
     layers = dict(
-        array=np.random.random((M, N)), sparse=sparse.random(M, N, format="csr")
+        array=np.random.random((M, N)),
+        sparse=sparse.random(M, N, format="csr"),
+        da=da.random.random((M, N)),
     )
     layers = {k: v for k, v in layers.items() if type(v) in layers_types}
     obsp = dict(
@@ -147,6 +238,8 @@ def gen_adata(
             scalar_float=3.0,
             nested_further=dict(array=np.arange(5)),
         ),
+        awkward_regular=gen_awkward((10, 5)),
+        awkward_ragged=gen_awkward((12, None, None)),
         # U_recarray=gen_vstr_recarray(N, 5, "U4")
     )
     adata = AnnData(
@@ -158,7 +251,6 @@ def gen_adata(
         layers=layers,
         obsp=obsp,
         varp=varp,
-        dtype=X_dtype,
         uns=uns,
     )
     return adata
@@ -322,6 +414,17 @@ def assert_equal_h5py_dataset(a, b, exact=False, elem_name=None):
     assert_equal(b, a, exact, elem_name=elem_name)
 
 
+@assert_equal.register(DaskArray)
+def assert_equal_dask_array(a, b, exact=False, elem_name=None):
+    from dask.array.utils import assert_eq
+
+    if exact:
+        assert_eq(a, b, check_dtype=True, check_type=True, check_graph=False)
+    else:
+        # TODO: Why does it fail when check_graph=True
+        assert_eq(a, b, check_dtype=False, check_type=False, check_graph=False)
+
+
 @assert_equal.register(pd.DataFrame)
 def are_equal_dataframe(a, b, exact=False, elem_name=None):
     if not isinstance(b, pd.DataFrame):
@@ -337,6 +440,15 @@ def are_equal_dataframe(a, b, exact=False, elem_name=None):
     )
 
 
+@assert_equal.register(AwkArray)
+def assert_equal_awkarray(a, b, exact=False, elem_name=None):
+    import awkward as ak
+
+    if exact:
+        assert a.type == b.type, f"{a.type} != {b.type}, {format_msg(elem_name)}"
+    assert ak.to_list(a) == ak.to_list(b), format_msg(elem_name)
+
+
 @assert_equal.register(Mapping)
 def assert_equal_mapping(a, b, exact=False, elem_name=None):
     assert set(a.keys()) == set(b.keys()), format_msg(elem_name)
@@ -452,3 +564,15 @@ def assert_adata_equal(
             exact,
             elem_name=fmt_name(attr),
         )
+
+
+@singledispatch
+def as_dense_dask_array(a):
+    import dask.array as da
+
+    return da.asarray(a)
+
+
+@as_dense_dask_array.register(sparse.spmatrix)
+def _(a):
+    return as_dense_dask_array(a.toarray())
diff --git a/anndata/tests/test_anncollection.py b/anndata/tests/test_anncollection.py
index abb7eb7..b8def95 100644
--- a/anndata/tests/test_anncollection.py
+++ b/anndata/tests/test_anncollection.py
@@ -12,13 +12,11 @@ _dense = lambda a: a.toarray() if issparse(a) else a
 
 @pytest.fixture
 def adatas(request):
-    adata1 = ad.AnnData(
-        X=request.param([[1, 2, 0], [4, 5, 0], [7, 8, 0]]), dtype="float32"
-    )
+    adata1 = ad.AnnData(X=request.param([[1, 2, 0], [4, 5, 0], [7, 8, 0]]))
     adata1.obs["a_test"] = ["a", "a", "b"]
     adata1.obsm["o_test"] = np.ones((adata1.n_obs, 2))
 
-    adata2 = ad.AnnData(X=request.param([[1, 3, 0], [9, 8, 0]]), dtype="float32")
+    adata2 = ad.AnnData(X=request.param([[1, 3, 0], [9, 8, 0]]))
     adata2.obs["a_test"] = ["c", "c"]
     adata2.obsm["o_test"] = np.zeros((adata2.n_obs, 2))
 
diff --git a/anndata/tests/test_annot.py b/anndata/tests/test_annot.py
index 2913b7c..025c7d5 100644
--- a/anndata/tests/test_annot.py
+++ b/anndata/tests/test_annot.py
@@ -39,7 +39,7 @@ def test_to_categorical_ordering(dtype):
 
 
 def test_non_str_to_not_categorical():
-    # Test case based on https://github.com/theislab/anndata/issues/141#issuecomment-802105259
+    # Test case based on https://github.com/scverse/anndata/issues/141#issuecomment-802105259
     obs = pd.DataFrame(index=[f"cell-{i}" for i in range(5)]).assign(
         str_with_nan=["foo", "bar", None, np.nan, "foo"],
         boolean_with_nan_and_none=[True, False, np.nan, None, True],
diff --git a/anndata/tests/test_awkward.py b/anndata/tests/test_awkward.py
new file mode 100644
index 0000000..97c7c42
--- /dev/null
+++ b/anndata/tests/test_awkward.py
@@ -0,0 +1,371 @@
+"""Tests related to awkward arrays"""
+import pytest
+import numpy as np
+import numpy.testing as npt
+
+from anndata.tests.helpers import assert_equal, gen_adata, gen_awkward
+from anndata.compat import awkward as ak
+from anndata import ImplicitModificationWarning
+from anndata.utils import dim_len
+from anndata import AnnData, read_h5ad
+import anndata
+import pandas as pd
+
+
+@pytest.mark.parametrize(
+    "array,shape",
+    [
+        # numpy array
+        [ak.Array(np.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5))), (2, 3, 4, 5)],
+        # record
+        [ak.Array([{"a": 1, "b": 2}, {"a": 1, "b": 3}]), (2, 2)],
+        # ListType, variable length
+        [ak.Array([[1], [2, 3], [4, 5, 6]]), (3, None)],
+        # ListType, happens to have the same length, but is not regular
+        [ak.Array([[2], [3], [4]]), (3, None)],
+        # RegularType + nested ListType
+        [ak.to_regular(ak.Array([[[1, 2], [3]], [[2], [3, 4, 5]]]), 1), (2, 2, None)],
+        # nested record
+        [
+            ak.to_regular(ak.Array([[{"a": 0}, {"b": 1}], [{"c": 2}, {"d": 3}]]), 1),
+            (2, 2, 4),
+        ],
+        # mixed types (variable length)
+        [ak.Array([[1, 2], ["a"]]), (2, None)],
+        # mixed types (but regular)
+        [ak.to_regular(ak.Array([[1, 2], ["a", "b"]]), 1), (2, 2)],
+        # zero-size edge cases
+        [ak.Array(np.ones((0, 7))), (0, 7)],
+        [ak.Array(np.ones((7, 0))), (7, 0)],
+        # UnionType of two regular types with different dimensions
+        [
+            ak.concatenate([ak.Array(np.ones((2, 2))), ak.Array(np.ones((2, 3)))]),
+            (4, None),
+        ],
+        # UnionType of two regular types with same dimension
+        [
+            ak.concatenate(
+                [
+                    ak.Array(np.ones((2, 2))),
+                    ak.Array(np.array([["a", "a"], ["a", "a"]])),
+                ]
+            ),
+            (4, 2),
+        ],
+        # Array of string types
+        [ak.Array(["a", "b", "c"]), (3,)],
+        [ak.Array([["a", "b"], ["c", "d"], ["e", "f"]]), (3, None)],
+        [ak.to_regular(ak.Array([["a", "b"], ["c", "d"], ["e", "f"]]), 1), (3, 2)],
+    ],
+)
+def test_dim_len(array, shape):
+    """Test that dim_len returns the right value for awkward arrays."""
+    for axis, size in enumerate(shape):
+        assert size == dim_len(array, axis)
+
+    # Requesting the size for an axis higher than the array has dimensions should raise a TypeError
+    with pytest.raises(TypeError):
+        dim_len(array, len(shape))
+
+
+@pytest.mark.parametrize(
+    "field,value,valid",
+    [
+        ["obsm", gen_awkward((10, 5)), True],
+        ["obsm", gen_awkward((10, None)), True],
+        ["obsm", gen_awkward((10, None, None)), True],
+        ["obsm", gen_awkward((10, 5, None)), True],
+        ["obsm", gen_awkward((8, 10)), False],
+        ["obsm", gen_awkward((8, None)), False],
+        ["varm", gen_awkward((20, 5)), True],
+        ["varm", gen_awkward((20, None)), True],
+        ["varm", gen_awkward((20, None, None)), True],
+        ["varm", gen_awkward((20, 5, None)), True],
+        ["varm", gen_awkward((8, 20)), False],
+        ["varm", gen_awkward((8, None)), False],
+        ["uns", gen_awkward((7,)), True],
+        ["uns", gen_awkward((7, None)), True],
+        ["uns", gen_awkward((7, None, None)), True],
+    ],
+)
+def test_set_awkward(field, value, valid):
+    """Check if we can set obsm, .varm and .uns with different types
+    of awkward arrays and if error messages are properly raised when the dimensions do not align.
+    """
+    adata = gen_adata((10, 20), varm_types=(), obsm_types=(), layers_types=())
+
+    def _assign():
+        getattr(adata, field)["test"] = value
+
+    if not valid:
+        with pytest.raises(ValueError):
+            _assign()
+    else:
+        _assign()
+
+
+@pytest.mark.parametrize("key", ["obsm", "varm", "uns"])
+def test_copy(key):
+    """Check that modifying a copy does not modify the original"""
+    adata = gen_adata((3, 3), varm_types=(), obsm_types=(), layers_types=())
+    getattr(adata, key)["awk"] = ak.Array([{"a": [1], "b": [2], "c": [3]}] * 3)
+    adata_copy = adata.copy()
+    getattr(adata_copy, key)["awk"]["c"] = np.full((3, 1), 4)
+    getattr(adata_copy, key)["awk"]["d"] = np.full((3, 1), 5)
+
+    # values in copy were correctly set
+    npt.assert_equal(getattr(adata_copy, key)["awk"]["c"], np.full((3, 1), 4))
+    npt.assert_equal(getattr(adata_copy, key)["awk"]["d"], np.full((3, 1), 5))
+
+    # values in original were not updated
+    npt.assert_equal(getattr(adata, key)["awk"]["c"], np.full((3, 1), 3))
+    with pytest.raises(IndexError):
+        getattr(adata, key)["awk"]["d"]
+
+
+@pytest.mark.parametrize("key", ["obsm", "varm"])
+def test_view(key):
+    """Check that modifying a view does not modify the original"""
+    adata = gen_adata((3, 3), varm_types=(), obsm_types=(), layers_types=())
+    getattr(adata, key)["awk"] = ak.Array([{"a": [1], "b": [2], "c": [3]}] * 3)
+    adata_view = adata[:2, :2]
+
+    with pytest.warns(ImplicitModificationWarning, match="initializing view as actual"):
+        getattr(adata_view, key)["awk"]["c"] = np.full((2, 1), 4)
+        getattr(adata_view, key)["awk"]["d"] = np.full((2, 1), 5)
+
+    # values in view were correctly set
+    npt.assert_equal(getattr(adata_view, key)["awk"]["c"], np.full((2, 1), 4))
+    npt.assert_equal(getattr(adata_view, key)["awk"]["d"], np.full((2, 1), 5))
+
+    # values in original were not updated
+    npt.assert_equal(getattr(adata, key)["awk"]["c"], np.full((3, 1), 3))
+    with pytest.raises(IndexError):
+        getattr(adata, key)["awk"]["d"]
+
+
+def test_view_of_awkward_array_with_custom_behavior():
+    """Currently can't create view of arrays with custom __name__ (in this case "string")
+    See https://github.com/scverse/anndata/pull/647#discussion_r963494798_"""
+    adata = gen_adata((3, 3), varm_types=(), obsm_types=(), layers_types=())
+    adata.obsm["awk_string"] = ak.Array(["AAA", "BBB", "CCC"])
+    adata_view = adata[:2]
+
+    with pytest.raises(NotImplementedError):
+        adata_view.obsm["awk_string"]
+
+
+@pytest.mark.parametrize(
+    "array",
+    [
+        # numpy array
+        ak.Array(np.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5))),
+        # record
+        ak.Array([{"a": 1, "b": 2}, {"a": 1, "b": 3}]),
+        # ListType, variable length
+        ak.Array([[1], [2, 3], [4, 5, 6]]),
+        # RegularType + nested ListType
+        ak.to_regular(ak.Array([[[1, 2], [3]], [[2], [3, 4, 5]]]), 1),
+        # nested record
+        ak.to_regular(ak.Array([[{"a": 0}, {"b": 1}], [{"c": 2}, {"d": 3}]]), 1),
+        # mixed types (variable length)
+        ak.Array([[1, 2], ["a"]]),
+        # zero-size edge cases
+        ak.Array(np.ones((0, 7))),
+        ak.Array(np.ones((7, 0))),
+        # UnionType of two regular types with different dimensions
+        ak.concatenate([ak.Array(np.ones((2, 2))), ak.Array(np.ones((2, 3)))]),
+        # UnionType of two regular types with same dimension
+        ak.concatenate(
+            [
+                ak.Array(np.ones((2, 2))),
+                ak.Array(np.array([["a", "a"], ["a", "a"]])),
+            ]
+        ),
+        # categorical array
+        ak.to_categorical(ak.Array([["a", "b", "c"], ["a", "b"]])),
+        ak.to_categorical(ak.Array([[1, 1, 2], [3, 3]])),
+        # tyical record type with AIRR data consisting of different dtypes
+        ak.Array(
+            [
+                [
+                    {
+                        "v_call": "TRV1",
+                        "junction_aa": "ADDEEKK",
+                        "productive": True,
+                        "locus": None,
+                        "consensus_count": 3,
+                    },
+                    {
+                        "v_call": "TRV2",
+                        "productive": False,
+                        "locus": "TRA",
+                        "consensus_count": 4,
+                    },
+                ],
+                [
+                    {
+                        "v_call": None,
+                        "junction_aa": "ADDEKK",
+                        "productive": None,
+                        "locus": "IGK",
+                        "consensus_count": 3,
+                    }
+                ],
+            ]
+        ),
+    ],
+)
+def test_awkward_io(tmp_path, array):
+    adata = AnnData()
+    adata.uns["awk"] = array
+    adata_path = tmp_path / "adata.h5ad"
+    adata.write_h5ad(adata_path)
+
+    adata2 = read_h5ad(adata_path)
+
+    assert_equal(adata.uns["awk"], adata2.uns["awk"], exact=True)
+
+
+# @pytest.mark.parametrize("join", ["outer", "inner"])
+@pytest.mark.parametrize(
+    "arrays,join,expected",
+    [
+        pytest.param(
+            [ak.Array([{"a": [1, 2], "b": [1, 2]}, {"a": [3], "b": [4]}]), None],
+            "inner",
+            None,
+            id="awk:recordoflists_null-inner",
+        ),
+        pytest.param(
+            [ak.Array([{"a": [1, 2], "b": [1, 2]}, {"a": [3], "b": [4]}]), None],
+            "outer",
+            ak.Array(
+                [{"a": [1, 2], "b": [1, 2]}, {"a": [3], "b": [4]}, None, None, None]
+            ),
+            # maybe should return: ak.Array([{"a": [1, 2], "b": [1, 2]}, {"a": [3], "b": [4]}, {}, {}, {}]),
+            id="awk:recordoflists_null-outer",
+        ),
+        pytest.param(
+            [ak.Array([[{"a": 1}, {"a": 2}], []]), None],
+            "outer",
+            ak.Array([[{"a": 1}, {"a": 2}], [], None, None, None]),
+            # maybe should return: ak.Array([[{"a": 1}, {"a": 2}], [], [], []]),
+            id="awk:listofrecords_null-outer",
+        ),
+        pytest.param(
+            [None, ak.Array([{"a": [1, 2], "b": [1, 2]}, {"a": [3], "b": [4]}])],
+            "inner",
+            None,
+            id="null_awk-inner",
+        ),
+        pytest.param(
+            [None, ak.Array([{"a": [1, 2], "b": [1, 2]}, {"a": [3], "b": [4]}])],
+            "outer",
+            ak.Array(
+                [None, None, None, {"a": [1, 2], "b": [1, 2]}, {"a": [3], "b": [4]}]
+            ),
+            # maybe should return: ak.Array([{}, {}, {}, {"a": [1, 2], "b": [1, 2]}, {"a": [3], "b": [4]}]),
+            id="null_awk:recordoflists-outer",
+        ),
+        pytest.param(
+            [ak.Array([{"a": 1}, {"a": 2}]), ak.Array([{"a": 3}, {"a": 4}])],
+            "inner",
+            ak.Array([{"a": i} for i in range(1, 5)]),
+            id="awk-simple-record",
+        ),
+        pytest.param(
+            [
+                ak.Array([{"a": 1, "b": 1}, {"a": 2, "b": 2}]),
+                ak.Array([{"a": 3}, {"a": 4}]),
+            ],
+            "inner",
+            ak.Array([{"a": i} for i in range(1, 5)]),
+            id="awk-simple-record-inner",
+        ),
+        # TODO:
+        # pytest.param(
+        #     [
+        #         ak.Array([{"a": 1, "b": 1}, {"a": 2, "b": 2}]),
+        #         ak.Array([{"a": 3}, {"a": 4}]),
+        #     ],
+        #     "outer",
+        #     ak.Array([{"a": 1, "b": 1}, {"a": 2, "b": 2}, {"a": 3}, {"a": 4},]),
+        #     id="awk-simple-record-outer",
+        # ),
+        pytest.param(
+            [
+                None,
+                ak.Array([{"a": [1, 2], "b": [1, 2]}, {"a": [3], "b": [4]}]),
+                pd.DataFrame(),
+            ],
+            "outer",
+            NotImplementedError,  # TODO: ak.Array([{}, {}, {}, {"a": [1, 2], "b": [1, 2]}, {"a": [3], "b": [4]}]),
+            id="null_awk_empty-pd",
+        ),
+        pytest.param(
+            [
+                ak.Array([{"a": [1, 2], "b": [1, 2]}, {"a": [3], "b": [4]}]),
+                pd.DataFrame(),
+            ],
+            "outer",
+            NotImplementedError,  # TODO: ak.Array([{"a": [1, 2], "b": [1, 2]}, {"a": [3], "b": [4]}]),
+            id="awk_empty-pd",
+        ),
+        pytest.param(
+            [
+                ak.Array([{"a": [1, 2], "b": [1, 2]}, {"a": [3], "b": [4]}]),
+                pd.DataFrame().assign(a=[3, 4], b=[5, 6]),
+            ],
+            "outer",  # TODO: Should try inner too if implemented
+            NotImplementedError,
+        ),
+        pytest.param(
+            [
+                ak.Array([{"a": [1, 2], "b": [1, 2]}, {"a": [3], "b": [4]}]),
+                np.ones((3, 2)),
+            ],
+            "outer",
+            NotImplementedError,
+        ),
+    ],
+)
+@pytest.mark.parametrize("key", ["obsm", "varm"])
+def test_concat_mixed_types(key, arrays, expected, join):
+    """Test that concatenation of AwkwardArrays with arbitrary types, but zero length dimension
+    or missing values works."""
+    axis = 0 if key == "obsm" else 1
+
+    to_concat = []
+    cell_id, gene_id = 0, 0
+    for a in arrays:
+        shape = np.array([3, 3])  # default shape (in case of missing array)
+        if a is not None:
+            length = dim_len(a, 0)
+            shape[axis] = length
+
+        tmp_adata = gen_adata(
+            tuple(shape), varm_types=(), obsm_types=(), layers_types=()
+        )
+        prev_cell_id, prev_gene_id = cell_id, gene_id
+        cell_id, gene_id = cell_id + shape[0], gene_id + shape[1]
+        tmp_adata.obs_names = pd.RangeIndex(prev_cell_id, cell_id).astype(str)
+        tmp_adata.var_names = pd.RangeIndex(prev_gene_id, gene_id).astype(str)
+        if a is not None:
+            if isinstance(a, pd.DataFrame):
+                a.set_index(
+                    tmp_adata.obs_names if key == "obsm" else tmp_adata.var_names,
+                    inplace=True,
+                )
+            getattr(tmp_adata, key)["test"] = a
+
+        to_concat.append(tmp_adata)
+
+    if isinstance(expected, type) and issubclass(expected, Exception):
+        with pytest.raises(expected):
+            anndata.concat(to_concat, axis=axis, join=join)
+    else:
+        print(to_concat)
+        result_adata = anndata.concat(to_concat, axis=axis, join=join)
+        result = getattr(result_adata, key).get("test", None)
+        assert_equal(expected, result, exact=True)
diff --git a/anndata/tests/test_base.py b/anndata/tests/test_base.py
index 220d549..2bce50e 100644
--- a/anndata/tests/test_base.py
+++ b/anndata/tests/test_base.py
@@ -1,4 +1,5 @@
 from itertools import product
+import warnings
 
 import numpy as np
 from numpy import ma
@@ -164,7 +165,7 @@ def test_setting_index_names_error(attr):
     orig = adata_sparse[:2, :2]
     adata = adata_sparse[:2, :2]
     assert getattr(adata, attr).name is None
-    with pytest.raises(ValueError, match=fr"AnnData expects \.{attr[:3]}\.index\.name"):
+    with pytest.raises(ValueError, match=rf"AnnData expects \.{attr[:3]}\.index\.name"):
         setattr(adata, attr, pd.Index(["x", "y"], name=0))
     assert adata.is_view
     assert getattr(adata, attr).tolist() != ["x", "y"]
@@ -322,21 +323,6 @@ def test_slicing_strings():
         adata[["A", "B", "not_in_obs"], :]
 
 
-def test_slicing_graphs():
-    # Testing for deprecated behaviour of connectivity matrices in .uns["neighbors"]
-    with pytest.warns(FutureWarning, match=r".obsp\['connectivities'\]"):
-        adata = AnnData(
-            np.array([[1, 2], [3, 4], [5, 6]]),
-            uns=dict(neighbors=dict(connectivities=sp.csr_matrix(np.ones((3, 3))))),
-        )
-
-    adata_sub = adata[[0, 1], :]
-    with pytest.warns(FutureWarning):
-        assert adata_sub.uns["neighbors"]["connectivities"].shape[0] == 2
-        assert adata.uns["neighbors"]["connectivities"].shape[0] == 3
-        assert adata_sub.copy().uns["neighbors"]["connectivities"].shape[0] == 2
-
-
 def test_slicing_series():
     adata = AnnData(
         np.array([[1, 2], [3, 4], [5, 6]]),
@@ -452,7 +438,9 @@ def test_rename_categories():
     adata.uns["tool"]["params"] = dict(groupby="cat_anno")
 
     new_categories = ["c", "d"]
-    adata.rename_categories("cat_anno", new_categories)
+    with warnings.catch_warnings():
+        warnings.simplefilter("error")
+        adata.rename_categories("cat_anno", new_categories)
 
     assert list(adata.obs["cat_anno"].cat.categories) == new_categories
     assert list(adata.uns["tool"]["cat_array"].dtype.names) == new_categories
@@ -604,3 +592,21 @@ def test_copy():
         assert_eq_not_id(map_sprs.keys(), map_copy.keys())
         for key in map_sprs.keys():
             assert_eq_not_id(map_sprs[key], map_copy[key])
+
+
+def test_to_memory_no_copy():
+    adata = gen_adata((3, 5))
+    mem = adata.to_memory()
+
+    assert mem.X is adata.X
+    # Currently does not hold for `obs`, `var`, but should in future
+    for key in adata.layers:
+        assert mem.layers[key] is adata.layers[key]
+    for key in adata.obsm:
+        assert mem.obsm[key] is adata.obsm[key]
+    for key in adata.varm:
+        assert mem.varm[key] is adata.varm[key]
+    for key in adata.obsp:
+        assert mem.obsp[key] is adata.obsp[key]
+    for key in adata.varp:
+        assert mem.varp[key] is adata.varp[key]
diff --git a/anndata/tests/test_concatenate.py b/anndata/tests/test_concatenate.py
index f6e9634..e9f0041 100644
--- a/anndata/tests/test_concatenate.py
+++ b/anndata/tests/test_concatenate.py
@@ -2,6 +2,7 @@ from collections.abc import Hashable
 from copy import deepcopy
 from itertools import chain, product
 from functools import partial, singledispatch
+from typing import Any, List, Callable
 import warnings
 
 import numpy as np
@@ -17,8 +18,14 @@ from anndata import AnnData, Raw, concat
 from anndata._core.index import _subset
 from anndata._core import merge
 from anndata.tests import helpers
-from anndata.tests.helpers import assert_equal, gen_adata
+from anndata.tests.helpers import (
+    assert_equal,
+    as_dense_dask_array,
+    gen_adata,
+    GEN_ADATA_DASK_ARGS,
+)
 from anndata.utils import asarray
+from anndata.compat import DaskArray, AwkArray
 
 
 @singledispatch
@@ -27,12 +34,17 @@ def filled_like(a, fill_value=None):
 
 
 @filled_like.register(np.ndarray)
-def _filled_array(a, fill_value=None):
+def _filled_array_np(a, fill_value=None):
     if fill_value is None:
         fill_value = np.nan
     return np.broadcast_to(fill_value, a.shape)
 
 
+@filled_like.register(DaskArray)
+def _filled_array(a, fill_value=None):
+    return as_dense_dask_array(_filled_array_np(a, fill_value))
+
+
 @filled_like.register(sparse.spmatrix)
 def _filled_sparse(a, fill_value=None):
     if fill_value is None:
@@ -60,9 +72,11 @@ def make_idx_tuple(idx, axis):
     return tuple(tup)
 
 
+# Will call func(sparse_matrix) so these types should be sparse compatible
+# See array_type if only dense arrays are expected as input.
 @pytest.fixture(
-    params=[asarray, sparse.csr_matrix, sparse.csc_matrix],
-    ids=["np_array", "scipy_csr", "scipy_csc"],
+    params=[asarray, sparse.csr_matrix, sparse.csc_matrix, as_dense_dask_array],
+    ids=["np_array", "scipy_csr", "scipy_csc", "dask_array"],
 )
 def array_type(request):
     return request.param
@@ -139,7 +153,7 @@ def test_concat_interface_errors():
     ],
 )
 def test_concatenate_roundtrip(join_type, array_type, concat_func, backwards_compat):
-    adata = gen_adata((100, 10), X_type=array_type)
+    adata = gen_adata((100, 10), X_type=array_type, **GEN_ADATA_DASK_ARGS)
 
     remaining = adata.obs_names
     subsets = []
@@ -430,20 +444,27 @@ def test_concatenate_fill_value(fill_val):
 
     adata1 = gen_adata((10, 10))
     adata1.obsm = {
-        k: v for k, v in adata1.obsm.items() if not isinstance(v, pd.DataFrame)
+        k: v
+        for k, v in adata1.obsm.items()
+        if not isinstance(v, (pd.DataFrame, AwkArray))
     }
     adata2 = gen_adata((10, 5))
     adata2.obsm = {
         k: v[:, : v.shape[1] // 2]
         for k, v in adata2.obsm.items()
-        if not isinstance(v, pd.DataFrame)
+        if not isinstance(v, (pd.DataFrame, AwkArray))
     }
     adata3 = gen_adata((7, 3))
     adata3.obsm = {
         k: v[:, : v.shape[1] // 3]
         for k, v in adata3.obsm.items()
-        if not isinstance(v, pd.DataFrame)
+        if not isinstance(v, (pd.DataFrame, AwkArray))
     }
+    # remove AwkArrays from adata.var, as outer joins are not yet implemented for them
+    for tmp_ad in [adata1, adata2, adata3]:
+        for k in [k for k, v in tmp_ad.varm.items() if isinstance(v, AwkArray)]:
+            del tmp_ad.varm[k]
+
     joined = adata1.concatenate([adata2, adata3], join="outer", fill_value=fill_val)
 
     ptr = 0
@@ -666,6 +687,100 @@ def test_concatenate_with_raw():
     assert adata_all.raw is None
 
 
+def test_concatenate_awkward(join_type):
+    import awkward as ak
+
+    a = ak.Array([[{"a": 1, "b": "foo"}], [{"a": 2, "b": "bar"}, {"a": 3, "b": "baz"}]])
+    b = ak.Array(
+        [
+            [{"a": 4}, {"a": 5}],
+            [{"a": 6}],
+            [{"a": 7}],
+        ]
+    )
+
+    adata_a = AnnData(np.zeros((2, 0), dtype=float), obsm={"awk": a})
+    adata_b = AnnData(np.zeros((3, 0), dtype=float), obsm={"awk": b})
+
+    if join_type == "inner":
+        expected = ak.Array(
+            [
+                [{"a": 1}],
+                [{"a": 2}, {"a": 3}],
+                [{"a": 4}, {"a": 5}],
+                [{"a": 6}],
+                [{"a": 7}],
+            ]
+        )
+    elif join_type == "outer":
+        # TODO: This is what we would like to return, but waiting on:
+        # * https://github.com/scikit-hep/awkward/issues/2182 and awkward 2.1.0
+        # * https://github.com/scikit-hep/awkward/issues/2173
+        # expected = ak.Array(
+        #     [
+        #         [{"a": 1, "b": "foo"}],
+        #         [{"a": 2, "b": "bar"}, {"a": 3, "b": "baz"}],
+        #         [{"a": 4, "b": None}, {"a": 5, "b": None}],
+        #         [{"a": 6, "b": None}],
+        #         [{"a": 7, "b": None}],
+        #     ]
+        # )
+        expected = ak.concatenate(
+            [  # I don't think I can construct a UnionArray directly
+                ak.Array(
+                    [
+                        [{"a": 1, "b": "foo"}],
+                        [{"a": 2, "b": "bar"}, {"a": 3, "b": "baz"}],
+                    ]
+                ),
+                ak.Array(
+                    [
+                        [{"a": 4}, {"a": 5}],
+                        [{"a": 6}],
+                        [{"a": 7}],
+                    ]
+                ),
+            ]
+        )
+
+    result = concat([adata_a, adata_b], join=join_type).obsm["awk"]
+
+    assert_equal(expected, result)
+
+
+@pytest.mark.parametrize(
+    "other",
+    [
+        pd.DataFrame({"a": [4, 5, 6], "b": ["foo", "bar", "baz"]}, index=list("cde")),
+        np.ones((3, 2)),
+        sparse.random(3, 100, format="csr"),
+    ],
+)
+def test_awkward_does_not_mix(join_type, other):
+    import awkward as ak
+
+    awk = ak.Array(
+        [[{"a": 1, "b": "foo"}], [{"a": 2, "b": "bar"}, {"a": 3, "b": "baz"}]]
+    )
+
+    adata_a = AnnData(
+        np.zeros((2, 3), dtype=float),
+        obs=pd.DataFrame(index=list("ab")),
+        obsm={"val": awk},
+    )
+    adata_b = AnnData(
+        np.zeros((3, 3), dtype=float),
+        obs=pd.DataFrame(index=list("cde")),
+        obsm={"val": other},
+    )
+
+    with pytest.raises(
+        NotImplementedError,
+        match="Cannot concatenate an AwkwardArray with other array types",
+    ):
+        concat([adata_a, adata_b], join=join_type)
+
+
 def test_pairwise_concat(axis, array_type):
     dim_sizes = [[100, 200, 50], [50, 50, 50]]
     if axis:
@@ -979,8 +1094,8 @@ def test_concatenate_uns(unss, merge_strategy, result, value_gen):
 
 
 def test_transposed_concat(array_type, axis, join_type, merge_strategy, fill_val):
-    lhs = gen_adata((10, 10), X_type=array_type)
-    rhs = gen_adata((10, 12), X_type=array_type)
+    lhs = gen_adata((10, 10), X_type=array_type, **GEN_ADATA_DASK_ARGS)
+    rhs = gen_adata((10, 12), X_type=array_type, **GEN_ADATA_DASK_ARGS)
 
     a = concat([lhs, rhs], axis=axis, join=join_type, merge=merge_strategy)
     b = concat(
@@ -990,14 +1105,14 @@ def test_transposed_concat(array_type, axis, join_type, merge_strategy, fill_val
     assert_equal(a, b)
 
 
-def test_batch_key(axis):
+def test_batch_key(axis, array_type):
     """Test that concat only adds a label if the key is provided"""
 
     def get_annot(adata):
         return getattr(adata, ("obs", "var")[axis])
 
-    lhs = gen_adata((10, 10))
-    rhs = gen_adata((10, 12))
+    lhs = gen_adata((10, 10), **GEN_ADATA_DASK_ARGS)
+    rhs = gen_adata((10, 12), **GEN_ADATA_DASK_ARGS)
 
     # There is probably a prettier way to do this
     annot = get_annot(concat([lhs, rhs], axis=axis))
@@ -1038,6 +1153,71 @@ def test_concat_categories_from_mapping():
     )
 
 
+def test_concat_categories_maintain_dtype():
+    a = AnnData(
+        X=np.ones((5, 1)),
+        obs=pd.DataFrame(
+            {
+                "cat": pd.Categorical(list("aabcc")),
+                "cat_ordered": pd.Categorical(list("aabcc"), ordered=True),
+            },
+            index=[f"cell{i:02}" for i in range(5)],
+        ),
+    )
+    b = AnnData(
+        X=np.ones((5, 1)),
+        obs=pd.DataFrame(
+            {
+                "cat": pd.Categorical(list("bccdd")),
+                "cat_ordered": pd.Categorical(list("bccdd"), ordered=True),
+            },
+            index=[f"cell{i:02}" for i in range(5, 10)],
+        ),
+    )
+    c = AnnData(
+        X=np.ones((5, 1)),
+        obs=pd.DataFrame(
+            {
+                "cat_ordered": pd.Categorical(list("bccdd"), ordered=True),
+            },
+            index=[f"cell{i:02}" for i in range(5, 10)],
+        ),
+    )
+
+    result = concat({"a": a, "b": b, "c": c}, join="outer")
+
+    assert pd.api.types.is_categorical_dtype(
+        result.obs["cat"]
+    ), f"Was {result.obs['cat'].dtype}"
+    assert pd.api.types.is_string_dtype(result.obs["cat_ordered"])
+
+
+def test_concat_ordered_categoricals_retained():
+    a = AnnData(
+        X=np.ones((5, 1)),
+        obs=pd.DataFrame(
+            {
+                "cat_ordered": pd.Categorical(list("aabcd"), ordered=True),
+            },
+            index=[f"cell{i:02}" for i in range(5)],
+        ),
+    )
+    b = AnnData(
+        X=np.ones((5, 1)),
+        obs=pd.DataFrame(
+            {
+                "cat_ordered": pd.Categorical(list("abcdd"), ordered=True),
+            },
+            index=[f"cell{i:02}" for i in range(5, 10)],
+        ),
+    )
+
+    c = concat([a, b])
+
+    assert pd.api.types.is_categorical_dtype(c.obs["cat_ordered"])
+    assert c.obs["cat_ordered"].cat.ordered
+
+
 def test_concat_names(axis):
     def get_annot(adata):
         return getattr(adata, ("obs", "var")[axis])
@@ -1072,12 +1252,29 @@ def expected_shape(a, b, axis, join):
     "shape", [pytest.param((8, 0), id="no_var"), pytest.param((0, 10), id="no_obs")]
 )
 def test_concat_size_0_dim(axis, join_type, merge_strategy, shape):
-    # https://github.com/theislab/anndata/issues/526
+    # https://github.com/scverse/anndata/issues/526
     a = gen_adata((5, 7))
     b = gen_adata(shape)
     alt_axis = 1 - axis
     dim = ("obs", "var")[axis]
 
+    # TODO: Remove, see: https://github.com/scverse/anndata/issues/905
+    import awkward as ak
+
+    if (
+        (join_type == "inner")
+        and (merge_strategy in ("same", "unique"))
+        and ((axis, shape.index(0)) in [(0, 1), (1, 0)])
+        and ak.__version__ == "2.0.7"  # indicates if a release has happened
+    ):
+        aligned_mapping = (b.obsm, b.varm)[1 - axis]
+        to_remove = []
+        for k, v in aligned_mapping.items():
+            if isinstance(v, ak.Array):
+                to_remove.append(k)
+        for k in to_remove:
+            aligned_mapping.pop(k)
+
     expected_size = expected_shape(a, b, axis=axis, join=join_type)
     result = concat(
         {"a": a, "b": b},
@@ -1123,10 +1320,10 @@ def test_concat_size_0_dim(axis, join_type, merge_strategy, shape):
                     )
 
 
-@pytest.mark.parametrize("elem", ["sparse", "array", "df"])
+@pytest.mark.parametrize("elem", ["sparse", "array", "df", "da"])
 def test_concat_outer_aligned_mapping(elem):
-    a = gen_adata((5, 5))
-    b = gen_adata((3, 5))
+    a = gen_adata((5, 5), **GEN_ADATA_DASK_ARGS)
+    b = gen_adata((3, 5), **GEN_ADATA_DASK_ARGS)
     del b.obsm[elem]
 
     concated = concat({"a": a, "b": b}, join="outer", label="group")
@@ -1136,7 +1333,7 @@ def test_concat_outer_aligned_mapping(elem):
 
 
 def test_concatenate_size_0_dim():
-    # https://github.com/theislab/anndata/issues/526
+    # https://github.com/scverse/anndata/issues/526
 
     a = gen_adata((5, 10))
     b = gen_adata((5, 0))
@@ -1161,13 +1358,11 @@ def test_concat_null_X():
     assert_equal(no_X, orig)
 
 
-# https://github.com/theislab/ehrapy/issues/151#issuecomment-1016753744
+# https://github.com/scverse/ehrapy/issues/151#issuecomment-1016753744
 def test_concat_X_dtype():
-    adatas_orig = {
-        k: AnnData(np.ones((20, 10), dtype=np.int8), dtype=np.int8) for k in list("abc")
-    }
+    adatas_orig = {k: AnnData(np.ones((20, 10), dtype=np.int8)) for k in list("abc")}
     for adata in adatas_orig.values():
-        adata.raw = AnnData(np.ones((20, 30), dtype=np.float64), dtype=np.float64)
+        adata.raw = AnnData(np.ones((20, 30), dtype=np.float64))
 
     result = concat(adatas_orig, index_unique="-")
 
@@ -1179,3 +1374,43 @@ def test_concat_X_dtype():
 # def test_concatenate_uns_types():
 #     from anndata._core.merge import UNS_STRATEGIES, UNS_STRATEGIES_TYPE
 #     assert set(UNS_STRATEGIES.keys()) == set(UNS_STRATEGIES_TYPE.__args__)
+
+
+# Tests how dask plays with other types on concatenation.
+def test_concat_different_types_dask(merge_strategy, array_type):
+    from scipy import sparse
+    import anndata as ad
+    import dask.array as da
+
+    varm_array = sparse.random(5, 20, density=0.5, format="csr")
+
+    ad1 = ad.AnnData(X=np.ones((5, 5)), varm={"a": varm_array})
+    ad1_other = ad.AnnData(X=np.ones((5, 5)), varm={"a": array_type(varm_array)})
+    ad2 = ad.AnnData(X=np.zeros((5, 5)), varm={"a": da.ones(5, 20)})
+
+    result1 = ad.concat([ad1, ad2], merge=merge_strategy)
+    target1 = ad.concat([ad1_other, ad2], merge=merge_strategy)
+    result2 = ad.concat([ad2, ad1], merge=merge_strategy)
+    target2 = ad.concat([ad2, ad1_other], merge=merge_strategy)
+
+    assert_equal(result1, target1)
+    assert_equal(result2, target2)
+
+
+def test_outer_concat_with_missing_value_for_df():
+    # https://github.com/scverse/anndata/issues/901
+    # TODO: Extend this test to cover all cases of missing values
+    # TODO: Check values
+    a_idx = ["a", "b", "c", "d", "e"]
+    b_idx = ["f", "g", "h", "i", "j", "k", "l", "m"]
+    a = AnnData(
+        np.ones((5, 5)),
+        obs=pd.DataFrame(index=a_idx),
+    )
+    b = AnnData(
+        np.zeros((8, 9)),
+        obs=pd.DataFrame(index=b_idx),
+        obsm={"df": pd.DataFrame({"col": np.arange(8)}, index=b_idx)},
+    )
+
+    concat([a, b], join="outer")
diff --git a/anndata/tests/test_dask.py b/anndata/tests/test_dask.py
index a179bd0..e2d820e 100644
--- a/anndata/tests/test_dask.py
+++ b/anndata/tests/test_dask.py
@@ -3,12 +3,53 @@ For tests using dask
 """
 import anndata as ad
 import pandas as pd
-
+from anndata._core.anndata import AnnData
 import pytest
+from anndata.tests.helpers import (
+    as_dense_dask_array,
+    GEN_ADATA_DASK_ARGS,
+    gen_adata,
+    assert_equal,
+)
+from anndata.compat import DaskArray
 
 pytest.importorskip("dask.array")
 
 
+@pytest.fixture(
+    params=[
+        [(2000, 1000), (100, 100)],
+        [(200, 100), (100, 100)],
+        [(200, 100), (100, 100)],
+        [(20, 10), (1, 1)],
+        [(20, 10), (1, 1)],
+    ]
+)
+def sizes(request):
+    return request.param
+
+
+@pytest.fixture(params=["h5ad", "zarr"])
+def diskfmt(request):
+    return request.param
+
+
+@pytest.fixture
+def adata(sizes):
+    import dask.array as da
+    import numpy as np
+
+    (M, N), chunks = sizes
+    X = da.random.random((M, N), chunks=chunks)
+    obs = pd.DataFrame(
+        {"batch": np.random.choice(["a", "b"], M)},
+        index=[f"cell{i:03d}" for i in range(M)],
+    )
+    var = pd.DataFrame(index=[f"gene{i:03d}" for i in range(N)])
+
+    return AnnData(X, obs=obs, var=var)
+
+
 def test_dask_X_view():
     import dask.array as da
 
@@ -20,3 +61,247 @@ def test_dask_X_view():
     adata.X = da.ones((M, N))
     view = adata[:30]
     view.copy()
+
+
+def test_dask_write(adata, tmp_path, diskfmt):
+    import dask.array as da
+    import numpy as np
+
+    pth = tmp_path / f"test_write.{diskfmt}"
+    write = lambda x, y: getattr(x, f"write_{diskfmt}")(y)
+    read = lambda x: getattr(ad, f"read_{diskfmt}")(x)
+
+    M, N = adata.X.shape
+    adata.obsm["a"] = da.random.random((M, 10))
+    adata.obsm["b"] = da.random.random((M, 10))
+    adata.varm["a"] = da.random.random((N, 10))
+
+    orig = adata
+    write(orig, pth)
+    curr = read(pth)
+
+    with pytest.raises(Exception):
+        assert_equal(curr.obsm["a"], curr.obsm["b"])
+
+    assert_equal(curr.varm["a"], orig.varm["a"])
+    assert_equal(curr.obsm["a"], orig.obsm["a"])
+
+    assert isinstance(curr.X, np.ndarray)
+    assert isinstance(curr.obsm["a"], np.ndarray)
+    assert isinstance(curr.varm["a"], np.ndarray)
+    assert isinstance(orig.X, DaskArray)
+    assert isinstance(orig.obsm["a"], DaskArray)
+    assert isinstance(orig.varm["a"], DaskArray)
+
+
+def test_dask_to_memory_check_array_types(adata, tmp_path, diskfmt):
+    import dask.array as da
+    import numpy as np
+
+    pth = tmp_path / f"test_write.{diskfmt}"
+    write = lambda x, y: getattr(x, f"write_{diskfmt}")(y)
+    read = lambda x: getattr(ad, f"read_{diskfmt}")(x)
+
+    M, N = adata.X.shape
+    adata.obsm["a"] = da.random.random((M, 10))
+    adata.obsm["b"] = da.random.random((M, 10))
+    adata.varm["a"] = da.random.random((N, 10))
+
+    orig = adata
+    write(orig, pth)
+    curr = read(pth)
+
+    assert isinstance(orig.X, DaskArray)
+    assert isinstance(orig.obsm["a"], DaskArray)
+    assert isinstance(orig.varm["a"], DaskArray)
+
+    mem = orig.to_memory()
+
+    with pytest.raises(Exception):
+        assert_equal(curr.obsm["a"], curr.obsm["b"])
+
+    assert_equal(curr.varm["a"], orig.varm["a"])
+    assert_equal(curr.obsm["a"], orig.obsm["a"])
+    assert_equal(mem.obsm["a"], orig.obsm["a"])
+    assert_equal(mem.varm["a"], orig.varm["a"])
+
+    assert isinstance(curr.X, np.ndarray)
+    assert isinstance(curr.obsm["a"], np.ndarray)
+    assert isinstance(curr.varm["a"], np.ndarray)
+    assert isinstance(mem.X, np.ndarray)
+    assert isinstance(mem.obsm["a"], np.ndarray)
+    assert isinstance(mem.varm["a"], np.ndarray)
+    assert isinstance(orig.X, DaskArray)
+    assert isinstance(orig.obsm["a"], DaskArray)
+    assert isinstance(orig.varm["a"], DaskArray)
+
+
+def test_dask_to_memory_copy_check_array_types(adata, tmp_path, diskfmt):
+    import dask.array as da
+    import numpy as np
+
+    pth = tmp_path / f"test_write.{diskfmt}"
+    write = lambda x, y: getattr(x, f"write_{diskfmt}")(y)
+    read = lambda x: getattr(ad, f"read_{diskfmt}")(x)
+
+    M, N = adata.X.shape
+    adata.obsm["a"] = da.random.random((M, 10))
+    adata.obsm["b"] = da.random.random((M, 10))
+    adata.varm["a"] = da.random.random((N, 10))
+
+    orig = adata
+    write(orig, pth)
+    curr = read(pth)
+
+    mem = orig.to_memory(copy=True)
+
+    with pytest.raises(Exception):
+        assert_equal(curr.obsm["a"], curr.obsm["b"])
+
+    assert_equal(curr.varm["a"], orig.varm["a"])
+    assert_equal(curr.obsm["a"], orig.obsm["a"])
+    assert_equal(mem.obsm["a"], orig.obsm["a"])
+    assert_equal(mem.varm["a"], orig.varm["a"])
+
+    assert isinstance(curr.X, np.ndarray)
+    assert isinstance(curr.obsm["a"], np.ndarray)
+    assert isinstance(curr.varm["a"], np.ndarray)
+    assert isinstance(mem.X, np.ndarray)
+    assert isinstance(mem.obsm["a"], np.ndarray)
+    assert isinstance(mem.varm["a"], np.ndarray)
+    assert isinstance(orig.X, DaskArray)
+    assert isinstance(orig.obsm["a"], DaskArray)
+    assert isinstance(orig.varm["a"], DaskArray)
+
+
+def test_dask_copy_check_array_types(adata):
+    import dask.array as da
+
+    M, N = adata.X.shape
+    adata.obsm["a"] = da.random.random((M, 10))
+    adata.obsm["b"] = da.random.random((M, 10))
+    adata.varm["a"] = da.random.random((N, 10))
+
+    orig = adata
+    curr = adata.copy()
+
+    with pytest.raises(Exception):
+        assert_equal(curr.obsm["a"], curr.obsm["b"])
+
+    assert_equal(curr.varm["a"], orig.varm["a"])
+    assert_equal(curr.obsm["a"], orig.obsm["a"])
+
+    assert isinstance(curr.X, DaskArray)
+    assert isinstance(curr.obsm["a"], DaskArray)
+    assert isinstance(curr.varm["a"], DaskArray)
+    assert isinstance(orig.X, DaskArray)
+    assert isinstance(orig.obsm["a"], DaskArray)
+    assert isinstance(orig.varm["a"], DaskArray)
+
+
+def test_assign_X(adata):
+    """Check if assignment works"""
+    import dask.array as da
+    import numpy as np
+    from anndata.compat import DaskArray
+
+    adata.X = da.ones(adata.X.shape)
+    prev_type = type(adata.X)
+    adata_copy = adata.copy()
+
+    adata.X = -1 * da.ones(adata.X.shape)
+    assert prev_type is DaskArray and type(adata_copy.X) is DaskArray
+    assert_equal(adata.X, -1 * np.ones(adata.X.shape))
+    assert_equal(adata_copy.X, np.ones(adata.X.shape))
+
+
+# Test if dask arrays turn into numpy arrays after to_memory is called
+def test_dask_to_memory_unbacked():
+    import numpy as np
+
+    orig = gen_adata((15, 10), X_type=as_dense_dask_array, **GEN_ADATA_DASK_ARGS)
+    orig.uns = {"da": {"da": as_dense_dask_array(np.ones(12))}}
+
+    assert isinstance(orig.X, DaskArray)
+    assert isinstance(orig.obsm["da"], DaskArray)
+    assert isinstance(orig.layers["da"], DaskArray)
+    assert isinstance(orig.varm["da"], DaskArray)
+    assert isinstance(orig.uns["da"]["da"], DaskArray)
+
+    curr = orig.to_memory()
+
+    assert_equal(orig, curr)
+    assert isinstance(curr.X, np.ndarray)
+    assert isinstance(curr.obsm["da"], np.ndarray)
+    assert isinstance(curr.varm["da"], np.ndarray)
+    assert isinstance(curr.layers["da"], np.ndarray)
+    assert isinstance(curr.uns["da"]["da"], np.ndarray)
+    assert isinstance(orig.X, DaskArray)
+    assert isinstance(orig.obsm["da"], DaskArray)
+    assert isinstance(orig.layers["da"], DaskArray)
+    assert isinstance(orig.varm["da"], DaskArray)
+    assert isinstance(orig.uns["da"]["da"], DaskArray)
+
+
+# Test if dask arrays turn into numpy arrays after to_memory is called
+def test_dask_to_memory_copy_unbacked():
+    import numpy as np
+
+    orig = gen_adata((15, 10), X_type=as_dense_dask_array, **GEN_ADATA_DASK_ARGS)
+    orig.uns = {"da": {"da": as_dense_dask_array(np.ones(12))}}
+
+    curr = orig.to_memory(copy=True)
+
+    assert_equal(orig, curr)
+    assert isinstance(curr.X, np.ndarray)
+    assert isinstance(curr.obsm["da"], np.ndarray)
+    assert isinstance(curr.varm["da"], np.ndarray)
+    assert isinstance(curr.layers["da"], np.ndarray)
+    assert isinstance(curr.uns["da"]["da"], np.ndarray)
+    assert isinstance(orig.X, DaskArray)
+    assert isinstance(orig.obsm["da"], DaskArray)
+    assert isinstance(orig.layers["da"], DaskArray)
+    assert isinstance(orig.varm["da"], DaskArray)
+    assert isinstance(orig.uns["da"]["da"], DaskArray)
+
+
+def test_to_memory_raw():
+    import numpy as np
+    import dask.array as da
+
+    orig = gen_adata((20, 10), **GEN_ADATA_DASK_ARGS)
+    orig.X = da.ones((20, 10))
+
+    with_raw = orig[:, ::2].copy()
+    with_raw.raw = orig.copy()
+
+    assert isinstance(with_raw.raw.X, DaskArray)
+    assert isinstance(with_raw.raw.varm["da"], DaskArray)
+
+    curr = with_raw.to_memory()
+
+    assert isinstance(with_raw.raw.X, DaskArray)
+    assert isinstance(with_raw.raw.varm["da"], DaskArray)
+    assert isinstance(curr.raw.X, np.ndarray)
+    assert isinstance(curr.raw.varm["da"], np.ndarray)
+
+
+def test_to_memory_copy_raw():
+    import numpy as np
+    import dask.array as da
+
+    orig = gen_adata((20, 10), **GEN_ADATA_DASK_ARGS)
+    orig.X = da.ones((20, 10))
+
+    with_raw = orig[:, ::2].copy()
+    with_raw.raw = orig.copy()
+
+    assert isinstance(with_raw.raw.X, DaskArray)
+    assert isinstance(with_raw.raw.varm["da"], DaskArray)
+
+    curr = with_raw.to_memory(copy=True)
+
+    assert isinstance(with_raw.raw.X, DaskArray)
+    assert isinstance(with_raw.raw.varm["da"], DaskArray)
+    assert isinstance(curr.raw.X, np.ndarray)
+    assert isinstance(curr.raw.varm["da"], np.ndarray)
diff --git a/anndata/tests/test_dask_view_mem.py b/anndata/tests/test_dask_view_mem.py
new file mode 100644
index 0000000..6d75847
--- /dev/null
+++ b/anndata/tests/test_dask_view_mem.py
@@ -0,0 +1,157 @@
+import pytest
+
+import anndata as ad
+
+pytest.importorskip("pytest_memray")
+
+# ------------------------------------------------------------------------------
+# Some test data
+# ------------------------------------------------------------------------------
+
+
+@pytest.fixture(params=["layers", "obsm", "varm"])
+def mapping_name(request):
+    return request.param
+
+
+@pytest.fixture(params=["obs", "var"])
+def attr_name(request):
+    return request.param
+
+
+@pytest.fixture(params=[True, False])
+def give_chunks(request):
+    return request.param
+
+
+# ------------------------------------------------------------------------------
+# The test functions
+# ------------------------------------------------------------------------------
+
+
+# Does some stuff so that dask can cache the
+# subclasscheck before the run.
+@pytest.fixture
+def alloc_cache():
+    import dask.array as da
+
+    N = 2**6
+    size = ((N, N), (N, N))
+
+    adata = ad.AnnData(
+        da.random.random(*size),
+        **{
+            "layers": dict(m=da.random.random(*size)),
+            "obsm": dict(m=da.random.random(*size)),
+            "obs": dict(m=da.random.random((N))),
+            "var": dict(m=da.random.random((N))),
+            "varm": dict(m=da.random.random(*size)),
+        },
+    )
+    subset = adata[:10, :][:, :10]
+    for mn in ["varm", "obsm", "layers"]:
+        m = getattr(subset, mn)["m"]
+        m[0, 0] = 100
+    _ = adata.to_memory(copy=False)
+
+
+# Theoretically this is expected to allocate:
+# N*N*4 bytes per matrix (we have 2).
+# N*4 bytes per index (we have 1).
+# N*N*(2**3) + N*(2**2) bytes
+# N*N*(2**3) + N*(2**2) bytes
+# 2**19 + 2**10
+# if we put a 2 factor on 2**19
+# the results seems more accurate with the experimental results
+# For example from dask.random we allocate 1mb
+@pytest.mark.usefixtures("alloc_cache")
+@pytest.mark.limit_memory("1.5 MB")
+def test_size_of_view(mapping_name, give_chunks):
+    import dask.array as da
+
+    N = 2**8
+    size = ((N, N), (N, N)) if give_chunks else ((N, N), "auto")
+
+    adata = ad.AnnData(
+        da.random.random(*size),
+        **{mapping_name: dict(m=da.random.random(*size))},
+    )
+    _ = adata.to_memory(copy=False)
+
+
+# Normally should expect something around 90 kbs
+# Pandas does some indexing stuff that requires more sometimes
+# since the array we allocated would be 4mb for both arrays + 2mb
+# Thus, if we allocated it all it should at least have 6mb
+# experimentally we should at least have 10mb
+# for index this should be ok
+@pytest.mark.usefixtures("alloc_cache")
+@pytest.mark.limit_memory("1.5 MB")
+def test_modify_view_mapping_component_memory(mapping_name, give_chunks):
+    import dask.array as da
+
+    N = 2**8
+    M = 2**9
+
+    size = ((M, M), (M, M)) if give_chunks else ((M, M), "auto")
+
+    adata = ad.AnnData(
+        da.random.random(*size),
+        **{mapping_name: dict(m=da.random.random(*size))},
+    )
+    subset = adata[:N, :N]
+    assert subset.is_view
+    m = getattr(subset, mapping_name)["m"]
+    m[0, 0] = 100
+
+
+# Normally should expect something around 90 kbs
+# Pandas does some indexing stuff that requires more sometimes
+# since the array we allocated would be 4mb for both arrays + 2mb
+# Thus, if we allocated it all it should at least have 6mb
+# experimentally we should at least have 10mb
+# for index this should be ok
+@pytest.mark.usefixtures("alloc_cache")
+@pytest.mark.limit_memory("1.5 MB")
+def test_modify_view_X_memory(mapping_name, give_chunks):
+    import dask.array as da
+
+    N = 2**8
+    M = 2**9
+
+    size = ((M, M), (M, M)) if give_chunks else ((M, M), "auto")
+
+    adata = ad.AnnData(
+        da.random.random(*size),
+        **{mapping_name: dict(m=da.random.random(*size))},
+    )
+    subset = adata[:N, :N]
+    assert subset.is_view
+    m = subset.X
+    m[0, 0] = 100
+
+
+# Normally should expect something around 90 kbs
+# Pandas does some indexing stuff that requires more sometimes
+# since the array we allocated would be 4mb for both arrays + 2mb
+# Thus, if we allocated it all it should at least have 6mb
+# experimentally we should at least have 10mb
+# for index this should be ok
+@pytest.mark.usefixtures("alloc_cache")
+@pytest.mark.limit_memory("1.5 MB")
+def test_modify_view_mapping_obs_var_memory(attr_name, give_chunks):
+    import dask.array as da
+
+    N = 2**8
+    M = 2**9
+
+    size = ((M, M), (M, M)) if give_chunks else ((M, M), "auto")
+
+    adata = ad.AnnData(
+        da.random.random(*size),
+        **{attr_name: dict(m=da.random.random(M))},
+    )
+    subset = adata[:N, :N]
+    assert subset.is_view
+    m = getattr(subset, attr_name)["m"]
+    m[0] = 100
diff --git a/anndata/tests/test_deprecations.py b/anndata/tests/test_deprecations.py
index 24e79d6..aad0068 100644
--- a/anndata/tests/test_deprecations.py
+++ b/anndata/tests/test_deprecations.py
@@ -69,162 +69,31 @@ def test_obsvar_vector_Xlayer(adata):
     adata = adata.copy()
     adata.layers["X"] = adata.X * 3
 
-    with pytest.warns(None) as records:
+    with warnings.catch_warnings():
+        warnings.simplefilter("error")
         adata.var_vector("s1", layer="X")
         adata.obs_vector("a", layer="X")
 
-    for r in records:
-        # This time it shouldn’t throw a warning
-        if "anndata" in r.filename:
-            assert r.category is not FutureWarning
-
-
-def test_force_dense_deprecated(tmp_path):
-    dense_pth = tmp_path / "dense.h5ad"
-    adata = AnnData(X=sparse.random(10, 10, format="csr"))
-    adata.raw = adata
-
-    with pytest.warns(FutureWarning):
-        adata.write_h5ad(dense_pth, force_dense=True)
-    with h5py.File(dense_pth, "r") as f:
-        assert isinstance(f["X"], h5py.Dataset)
-        assert isinstance(f["raw/X"], h5py.Dataset)
-
-    dense = ad.read_h5ad(dense_pth)
-
-    assert isinstance(dense.X, np.ndarray)
-    assert isinstance(dense.raw.X, np.ndarray)
-    assert_equal(adata, dense)
-
-
-#######################################
-# Dealing with uns adj matrices
-#######################################
-
-
-def test_get_uns_neighbors_deprecated(adata):
-    n = adata.shape[0]
-    mtx = sparse.random(n, n, density=0.3, format="csr")
-    adata.obsp["connectivities"] = mtx
-    adata.uns["neighbors"] = {}
-
-    with pytest.warns(FutureWarning):
-        from_uns = adata.uns["neighbors"]["connectivities"]
-
-    assert_equal(from_uns, mtx)
-
-    with pytest.warns(None) as rec:
-        v = adata[: n // 2]
-        assert not rec
-
-    with pytest.warns(FutureWarning):
-        from_uns_v = v.uns["neighbors"]["connectivities"]
-
-    assert_equal(from_uns_v, v.obsp["connectivities"])
-
-
-def test_set_uns_neighbors_deprecated(adata):
-    n = adata.shape[0]
-    mtx = sparse.random(n, n, format="csr")
-    adata.uns["neighbors"] = {}
-
-    with pytest.warns(FutureWarning):
-        adata.uns["neighbors"]["connectivities"] = sparse.random(n, n, format="csr")
-
-    assert_equal(adata.obsp["connectivities"], mtx)
-    with pytest.warns(FutureWarning):
-        assert_equal(adata.uns["neighbors"]["connectivities"], mtx)
-
-    # Make sure that we can write to uns normally:
-    adata.uns["new_key"] = 100
-    assert adata.uns["new_key"] == 100
-
-
-def test_slice_uns_sparse_deprecated():
-    adata = AnnData(sparse.csr_matrix((500, 10)))
-    n = adata.shape[0]
-    mtx = sparse.random(n, n, density=0.2, format="csr")
-    adata.uns["sparse_mtx"] = mtx
-
-    with pytest.warns(FutureWarning):
-        v = adata[: n // 2]
-
-    assert_equal(adata.uns["sparse_mtx"], mtx)
-    assert_equal(v.uns["sparse_mtx"], mtx[: n // 2, : n // 2])
-
-
-@pytest.fixture
-def adata_neighbors():
-    return ad.AnnData(
-        X=sparse.random(100, 200, format="csr"),
-        obsp=dict(
-            distances=sparse.random(100, 100, format="csr"),
-            connectivities=sparse.random(100, 100, format="csr"),
-        ),
-        uns={"neighbors": {"params": {"method": "umap", "n_neighbors": 10}}},
-    )
-
-
-def test_deprecated_neighbors_get_mtx(adata_neighbors):
-    """Test getting neighbor matrices from adata.uns"""
-    adata = adata_neighbors
-
-    with pytest.warns(FutureWarning):
-        assert_equal(adata.obsp["distances"], adata.uns["neighbors"]["distances"])
-    with pytest.warns(FutureWarning):
-        assert_equal(
-            adata.obsp["connectivities"], adata.uns["neighbors"]["connectivities"]
-        )
-
-
-def test_deprecated_neighbors_get_other(adata_neighbors):
-    """Test getting other fields from adata.uns"""
-    adata = adata_neighbors
-
-    # This shouldn't throw a warning
-    with pytest.warns(None) as rec:
-        assert adata.uns["neighbors"]["params"] == {"method": "umap", "n_neighbors": 10}
-        assert not rec
-
-
-def test_deprecated_neighbors_set_other(adata_neighbors):
-    adata = adata_neighbors
-
-    # This shouldn't throw a warning
-    with pytest.warns(None) as rec:
-        adata.uns["neighbors"]["new_key"] = 10
-        assert adata.uns["neighbors"]["new_key"] == 10
-        # Test nested
-        adata.uns["neighbors"]["params"]["new_param"] = 100
-        assert adata.uns["neighbors"]["params"]["new_param"] == 100
-        assert adata.uns["neighbors"]["params"] == {
-            "method": "umap",
-            "n_neighbors": 10,
-            "new_param": 100,
-        }
-
-        assert not rec
-
 
 # This should break in 0.9
 def test_dtype_warning():
     # Tests a warning is thrown
-    with pytest.warns(FutureWarning):
-        a = AnnData(np.ones((3, 3), dtype=np.float64))
+    with pytest.warns(PendingDeprecationWarning):
+        a = AnnData(np.ones((3, 3)), dtype=np.float32)
     assert a.X.dtype == np.float32
 
     # This shouldn't warn, shouldn't copy
     with warnings.catch_warnings(record=True) as record:
         b_X = np.ones((3, 3), dtype=np.float64)
-        b = AnnData(b_X, dtype=np.float64)
+        b = AnnData(b_X)
         assert not record
     assert b_X is b.X
     assert b.X.dtype == np.float64
 
-    # Shouldn't warn, should copy
-    with warnings.catch_warnings(record=True) as record:
+    # Should warn, should copy
+    with pytest.warns(PendingDeprecationWarning):
         c_X = np.ones((3, 3), dtype=np.float32)
-        c = AnnData(np.ones((3, 3), dtype=np.float32), dtype=np.float64)
+        c = AnnData(c_X, dtype=np.float64)
         assert not record
     assert c_X is not c.X
     assert c.X.dtype == np.float64
diff --git a/anndata/tests/test_hdf5_backing.py b/anndata/tests/test_hdf5_backing.py
index f6d0276..987d013 100644
--- a/anndata/tests/test_hdf5_backing.py
+++ b/anndata/tests/test_hdf5_backing.py
@@ -7,7 +7,13 @@ import numpy as np
 from scipy import sparse
 
 import anndata as ad
-from anndata.tests.helpers import gen_adata, assert_equal, subset_func
+from anndata.tests.helpers import (
+    as_dense_dask_array,
+    GEN_ADATA_DASK_ARGS,
+    gen_adata,
+    assert_equal,
+    subset_func,
+)
 from anndata.utils import asarray
 
 subset_func2 = subset_func
@@ -42,13 +48,12 @@ def adata():
         obsm=dict(o1=np.zeros((X.shape[0], 10))),
         varm=dict(v1=np.ones((X.shape[1], 20))),
         layers=dict(float=X.astype(float), sparse=sparse.csr_matrix(X)),
-        dtype="int32",
     )
 
 
 @pytest.fixture(
-    params=[sparse.csr_matrix, sparse.csc_matrix, np.array],
-    ids=["scipy-csr", "scipy-csc", "np-array"],
+    params=[sparse.csr_matrix, sparse.csc_matrix, np.array, as_dense_dask_array],
+    ids=["scipy-csr", "scipy-csc", "np-array", "dask_array"],
 )
 def mtx_format(request):
     return request.param
@@ -64,8 +69,8 @@ def backed_mode(request):
     return request.param
 
 
-@pytest.fixture(params=[True, False])
-def force_dense(request):
+@pytest.fixture(params=(("X",), ()))
+def as_dense(request):
     return request.param
 
 
@@ -73,8 +78,9 @@ def force_dense(request):
 # The test functions
 # -------------------------------------------------------------------------------
 
+
 # TODO: Check to make sure obs, obsm, layers, ... are written and read correctly as well
-def test_read_write_X(tmp_path, mtx_format, backed_mode, force_dense):
+def test_read_write_X(tmp_path, mtx_format, backed_mode, as_dense):
     base_pth = Path(tmp_path)
     orig_pth = base_pth / "orig.h5ad"
     backed_pth = base_pth / "backed.h5ad"
@@ -83,7 +89,7 @@ def test_read_write_X(tmp_path, mtx_format, backed_mode, force_dense):
     orig.write(orig_pth)
 
     backed = ad.read(orig_pth, backed=backed_mode)
-    backed.write(backed_pth, as_dense=["X"])
+    backed.write(backed_pth, as_dense=as_dense)
     backed.file.close()
 
     from_backed = ad.read(backed_pth)
@@ -157,7 +163,7 @@ def test_backing_copy(adata, tmp_path, backing_h5ad):
 def test_backed_raw(tmp_path):
     backed_pth = tmp_path / "backed.h5ad"
     final_pth = tmp_path / "final.h5ad"
-    mem_adata = gen_adata((10, 10))
+    mem_adata = gen_adata((10, 10), **GEN_ADATA_DASK_ARGS)
     mem_adata.raw = mem_adata
     mem_adata.write(backed_pth)
 
@@ -202,7 +208,7 @@ def test_backed_raw_subset(tmp_path, array_type, subset_func, subset_func2):
     # Value equivalent
     assert_equal(mem_v, backed_v)
     # Type and value equivalent
-    assert_equal(mem_v.copy(), backed_v.to_memory(), exact=True)
+    assert_equal(mem_v.copy(), backed_v.to_memory(copy=True), exact=True)
     assert backed_v.is_view
     assert backed_v.isbacked
 
@@ -219,12 +225,13 @@ def test_backed_raw_subset(tmp_path, array_type, subset_func, subset_func2):
     [
         pytest.param(asarray, id="dense_array"),
         pytest.param(sparse.csr_matrix, id="csr_matrix"),
+        pytest.param(as_dense_dask_array, id="dask_array"),
     ],
 )
 def test_to_memory_full(tmp_path, array_type):
     backed_pth = tmp_path / "backed.h5ad"
-    mem_adata = gen_adata((15, 10), X_type=array_type)
-    mem_adata.raw = gen_adata((15, 12), X_type=array_type)
+    mem_adata = gen_adata((15, 10), X_type=array_type, **GEN_ADATA_DASK_ARGS)
+    mem_adata.raw = gen_adata((15, 12), X_type=array_type, **GEN_ADATA_DASK_ARGS)
     mem_adata.write_h5ad(backed_pth, compression="lzf")
 
     backed_adata = ad.read_h5ad(backed_pth, backed="r")
@@ -236,12 +243,6 @@ def test_to_memory_full(tmp_path, array_type):
     assert_equal(mem_adata, backed_adata.to_memory())
 
 
-def test_to_memory_error():
-    adata = gen_adata((5, 3))
-    with pytest.raises(ValueError):
-        adata.to_memory()
-
-
 def test_double_index(adata, backing_h5ad):
     adata.filename = backing_h5ad
     with pytest.raises(ValueError):
diff --git a/anndata/tests/test_helpers.py b/anndata/tests/test_helpers.py
index 321a621..eb074c5 100644
--- a/anndata/tests/test_helpers.py
+++ b/anndata/tests/test_helpers.py
@@ -6,7 +6,14 @@ import numpy as np
 from scipy import sparse
 
 import anndata as ad
-from anndata.tests.helpers import assert_equal, report_name, gen_adata
+from anndata.tests.helpers import (
+    assert_equal,
+    gen_awkward,
+    report_name,
+    gen_adata,
+    asarray,
+)
+from anndata.utils import dim_len
 
 # Testing to see if all error types can have the key name appended.
 # Currently fails for 22/118 since they have required arguments. Not sure what to do about that.
@@ -40,6 +47,33 @@ def reusable_adata():
     return gen_adata((10, 10))
 
 
+@pytest.mark.parametrize(
+    "shape, datashape",
+    [
+        [(4, 2), "4 * 2 * int32"],
+        [(100, 200, None), "100 * 200 * var * int32"],
+        [(4, None), "4 * var * int32"],
+        [(0, 4), "0 * 4 * int32"],
+        [(4, 0), "4 * 0 * int32"],
+        [(8, None, None), "8 * var * var * int32"],
+        [(8, None, None, None), "8 * var * var * var * int32"],
+        [(4, None, 8), "4 * var * 8 * int32"],
+        [(100, 200, 4), "100 * 200 * 4 * int32"],
+        [(4, 0, 0), "4 * 0 * 0 * int32"],
+        [(0, 0, 0), "0 * 0 * 0 * int32"],
+        [(0, None), "0 * var * int32"],
+    ],
+)
+def test_gen_awkward(shape, datashape):
+    import awkward as ak
+
+    arr = gen_awkward(shape)
+    for i, s in enumerate(shape):
+        assert dim_len(arr, i) == s
+    arr_type = ak.types.from_datashape(datashape)
+    assert arr.type == arr_type
+
+
 # Does this work for every warning?
 def test_report_name():
     def raise_error():
@@ -133,7 +167,7 @@ def test_assert_equal_raw():
 
 def test_assert_equal_raw_presence():
     # This was causing some testing issues during
-    # https://github.com/theislab/anndata/pull/542
+    # https://github.com/scverse/anndata/pull/542
     a = gen_adata((10, 20))
     b = a.copy()
     a.raw = a.copy()
@@ -188,3 +222,27 @@ def test_assert_equal_aligned_mapping_empty():
         with pytest.raises(AssertionError):
             assert_equal(getattr(adata, attr), getattr(diff_idx, attr))
         assert_equal(getattr(adata, attr), getattr(same_idx, attr))
+
+
+def test_assert_equal_dask_arrays():
+    import dask.array as da
+
+    a = da.from_array([[1, 2, 3], [4, 5, 6]])
+    b = da.from_array([[1, 2, 3], [4, 5, 6]])
+
+    assert_equal(a, b)
+
+    c = da.ones(10, dtype="int32")
+    d = da.ones(10, dtype="int64")
+    assert_equal(c, d)
+
+
+def test_assert_equal_dask_sparse_arrays():
+    import dask.array as da
+    from scipy import sparse
+
+    x = sparse.random(10, 10, format="csr", density=0.1)
+    y = da.from_array(asarray(x))
+
+    assert_equal(x, y)
+    assert_equal(y, x)
diff --git a/anndata/tests/test_inplace_subset.py b/anndata/tests/test_inplace_subset.py
index 89d0b53..e305be4 100644
--- a/anndata/tests/test_inplace_subset.py
+++ b/anndata/tests/test_inplace_subset.py
@@ -2,13 +2,18 @@ import numpy as np
 import pytest
 from scipy import sparse
 
-from anndata.tests.helpers import assert_equal, gen_adata, subset_func
+from anndata.tests.helpers import (
+    assert_equal,
+    gen_adata,
+    as_dense_dask_array,
+)
 from anndata.utils import asarray
+from anndata.compat import DaskArray
 
 
 @pytest.fixture(
-    params=[np.array, sparse.csr_matrix, sparse.csc_matrix],
-    ids=["np_array", "scipy_csr", "scipy_csc"],
+    params=[np.array, sparse.csr_matrix, sparse.csc_matrix, as_dense_dask_array],
+    ids=["np_array", "scipy_csr", "scipy_csc", "dask_array"],
 )
 def matrix_type(request):
     return request.param
diff --git a/anndata/tests/test_io_backwards_compat.py b/anndata/tests/test_io_backwards_compat.py
index 83de718..a060d17 100644
--- a/anndata/tests/test_io_backwards_compat.py
+++ b/anndata/tests/test_io_backwards_compat.py
@@ -3,6 +3,8 @@ from pathlib import Path
 import pytest
 
 import anndata as ad
+import pandas as pd
+from scipy import sparse
 from anndata.tests.helpers import assert_equal
 
 ARCHIVE_PTH = Path(__file__).parent / "data/archives"
@@ -13,6 +15,11 @@ def archive_dir(request):
     return request.param
 
 
+@pytest.fixture(params=["h5ad", "zarr"])
+def diskfmt(request):
+    return request.param
+
+
 def test_backwards_compat_files(archive_dir):
     with pytest.warns(ad.OldFormatWarning):
         from_h5ad = ad.read_h5ad(archive_dir / "adata.h5ad")
@@ -20,3 +27,26 @@ def test_backwards_compat_files(archive_dir):
         from_zarr = ad.read_zarr(archive_dir / "adata.zarr.zip")
 
     assert_equal(from_h5ad, from_zarr, exact=True)
+
+
+def test_clean_uns_backwards_compat(tmp_path, diskfmt):
+    pth = tmp_path / f"test_write.{diskfmt}"
+    write = lambda x, y: getattr(x, f"write_{diskfmt}")(y)
+    read = lambda x: getattr(ad, f"read_{diskfmt}")(x)
+
+    orig = ad.AnnData(
+        sparse.csr_matrix((3, 5), dtype="float32"),
+        obs=pd.DataFrame(
+            {"a": pd.Categorical(list("aab")), "b": [1, 2, 3]},
+            index=[f"cell_{i}" for i in range(3)],
+        ),
+        uns={
+            "a_categories": "some string",
+            "b_categories": "another string",
+        },
+    )
+
+    write(orig, pth)
+    from_disk = read(pth)
+
+    assert_equal(orig, from_disk)
diff --git a/anndata/tests/test_io_dispatched.py b/anndata/tests/test_io_dispatched.py
new file mode 100644
index 0000000..b43eb67
--- /dev/null
+++ b/anndata/tests/test_io_dispatched.py
@@ -0,0 +1,183 @@
+import re
+
+from scipy import sparse
+import h5py
+import zarr
+
+import anndata as ad
+from anndata.experimental import (
+    read_dispatched,
+    write_dispatched,
+    read_elem,
+    write_elem,
+)
+from anndata.tests.helpers import gen_adata, assert_equal
+
+
+def test_read_dispatched_w_regex():
+    def read_only_axis_dfs(func, elem_name: str, elem, iospec):
+        if iospec.encoding_type == "anndata":
+            return func(elem)
+        elif re.match(r"^/((obs)|(var))?(/.*)?$", elem_name):
+            return func(elem)
+        else:
+            return None
+
+    adata = gen_adata((1000, 100))
+    z = zarr.group()
+
+    write_elem(z, "/", adata)
+
+    expected = ad.AnnData(obs=adata.obs, var=adata.var)
+    actual = read_dispatched(z, read_only_axis_dfs)
+
+    assert_equal(expected, actual)
+
+
+def test_read_dispatched_dask():
+    import dask.array as da
+
+    def read_as_dask_array(func, elem_name: str, elem, iospec):
+        if iospec.encoding_type in (
+            "dataframe",
+            "csr_matrix",
+            "csc_matrix",
+            "awkward-array",
+        ):
+            # Preventing recursing inside of these types
+            return read_elem(elem)
+        elif iospec.encoding_type == "array":
+            return da.from_zarr(elem)
+        else:
+            return func(elem)
+
+    adata = gen_adata((1000, 100))
+    z = zarr.group()
+    write_elem(z, "/", adata)
+
+    dask_adata = read_dispatched(z, read_as_dask_array)
+
+    assert isinstance(dask_adata.layers["array"], da.Array)
+    assert isinstance(dask_adata.obsm["array"], da.Array)
+    assert isinstance(dask_adata.uns["nested"]["nested_further"]["array"], da.Array)
+
+    expected = read_elem(z)
+    actual = dask_adata.to_memory(copy=False)
+
+    assert_equal(expected, actual)
+
+
+def test_read_dispatched_null_case():
+    adata = gen_adata((100, 100))
+    z = zarr.group()
+    write_elem(z, "/", adata)
+
+    expected = read_elem(z)
+    actual = read_dispatched(z, lambda _, __, x, **___: read_elem(x))
+
+    assert_equal(expected, actual)
+
+
+def test_write_dispatched_chunks():
+    from itertools import repeat, chain
+
+    def determine_chunks(elem_shape, specified_chunks):
+        chunk_iterator = chain(specified_chunks, repeat(None))
+        return tuple(e if c is None else c for e, c in zip(elem_shape, chunk_iterator))
+
+    adata = gen_adata((1000, 100))
+
+    def write_chunked(func, store, k, elem, dataset_kwargs, iospec):
+        M, N = 13, 42
+
+        def set_copy(d, **kwargs):
+            d = dict(d)
+            d.update(kwargs)
+            return d
+
+        # TODO: Should the passed path be absolute?
+        path = "/" + store.path + "/" + k
+        if hasattr(elem, "shape") and not isinstance(
+            elem, (sparse.spmatrix, ad.AnnData)
+        ):
+            if re.match(r"^/((X)|(layers)).*", path):
+                chunks = (M, N)
+            elif path.startswith("/obsp"):
+                chunks = (M, M)
+            elif path.startswith("/obs"):
+                chunks = (M,)
+            elif path.startswith("/varp"):
+                chunks = (N, N)
+            elif path.startswith("/var"):
+                chunks = (N,)
+            else:
+                chunks = dataset_kwargs.get("chunks", ())
+            func(
+                store,
+                k,
+                elem,
+                dataset_kwargs=set_copy(
+                    dataset_kwargs, chunks=determine_chunks(elem.shape, chunks)
+                ),
+            )
+        else:
+            func(store, k, elem, dataset_kwargs=dataset_kwargs)
+
+    z = zarr.group()
+
+    write_dispatched(z, "/", adata, callback=write_chunked)
+
+    def check_chunking(k, v):
+        if (
+            not isinstance(v, zarr.Array)
+            or v.shape == ()
+            or any(k.endswith(x) for x in ("data", "indices", "indptr"))
+        ):
+            return
+        if re.match(r"obs[mp]?/\w+", k):
+            assert v.chunks[0] == 13
+        elif re.match(r"var[mp]?/\w+", k):
+            assert v.chunks[0] == 42
+
+    z.visititems(check_chunking)
+
+
+def test_io_dispatched_keys(tmp_path):
+    h5ad_write_keys = []
+    zarr_write_keys = []
+    h5ad_read_keys = []
+    zarr_read_keys = []
+
+    h5ad_path = tmp_path / "test.h5ad"
+    zarr_path = tmp_path / "test.zarr"
+
+    def h5ad_writer(func, store, k, elem, dataset_kwargs, iospec):
+        h5ad_write_keys.append(k)
+        func(store, k, elem, dataset_kwargs=dataset_kwargs)
+
+    def zarr_writer(func, store, k, elem, dataset_kwargs, iospec):
+        zarr_write_keys.append(k)
+        func(store, k, elem, dataset_kwargs=dataset_kwargs)
+
+    def h5ad_reader(func, elem_name: str, elem, iospec):
+        h5ad_read_keys.append(elem_name)
+        return func(elem)
+
+    def zarr_reader(func, elem_name: str, elem, iospec):
+        zarr_read_keys.append(elem_name)
+        return func(elem)
+
+    adata = gen_adata((50, 100))
+
+    with h5py.File(h5ad_path, "w") as f:
+        write_dispatched(f, "/", adata, callback=h5ad_writer)
+        _ = read_dispatched(f, h5ad_reader)
+
+    with zarr.open_group(zarr_path, "w") as f:
+        write_dispatched(f, "/", adata, callback=zarr_writer)
+        _ = read_dispatched(f, zarr_reader)
+
+    assert h5ad_write_keys == zarr_write_keys
+    assert h5ad_read_keys == zarr_read_keys
+
+    assert sorted(h5ad_write_keys) == sorted(h5ad_read_keys)
diff --git a/anndata/tests/test_io_elementwise.py b/anndata/tests/test_io_elementwise.py
index 3a32242..7fa8052 100644
--- a/anndata/tests/test_io_elementwise.py
+++ b/anndata/tests/test_io_elementwise.py
@@ -1,8 +1,9 @@
 """
 Tests that each element in an anndata is written correctly
 """
-from tempfile import TemporaryDirectory
-from pathlib import Path
+from __future__ import annotations
+
+import re
 
 import h5py
 import numpy as np
@@ -11,9 +12,10 @@ import pytest
 from scipy import sparse
 import zarr
 
-
 import anndata as ad
-from anndata.compat import _read_attr
+from anndata._io.specs.registry import IORegistryError, _REGISTRY, get_spec, IOSpec
+from anndata._io.utils import AnnDataReadError
+from anndata.compat import _read_attr, H5Group, ZarrGroup
 from anndata._io.specs import write_elem, read_elem
 from anndata.tests.helpers import assert_equal, gen_adata
 
@@ -24,17 +26,18 @@ def diskfmt(request):
 
 
 @pytest.fixture(scope="function", params=["h5", "zarr"])
-def store(request):
-    with TemporaryDirectory() as tmpdir:
-
-        if request.param == "h5":
-            file = h5py.File(Path(tmpdir) / "test.h5", "w")
-            store = file["/"]
-        elif request.param == "zarr":
-            store = zarr.open(Path(tmpdir) / "test.zarr")
-
+def store(request, tmp_path) -> H5Group | ZarrGroup:
+    if request.param == "h5":
+        file = h5py.File(tmp_path / "test.h5", "w")
+        store = file["/"]
+    elif request.param == "zarr":
+        store = zarr.open(tmp_path / "test.zarr", "w")
+    else:
+        assert False
+
+    try:
         yield store
-
+    finally:
         if request.param == "h5":
             file.close()
 
@@ -88,6 +91,7 @@ def test_io_spec(store, value, encoding_type):
 
     from_disk = read_elem(store[key])
     assert_equal(value, from_disk)
+    assert get_spec(store[key]) == _REGISTRY.get_spec(value)
 
 
 def test_io_spec_raw(store):
@@ -110,3 +114,69 @@ def test_write_to_root(store):
 
     assert "anndata" == _read_attr(store.attrs, "encoding-type")
     assert_equal(from_disk, adata)
+
+
+@pytest.mark.parametrize(
+    ["attribute", "value"],
+    [
+        ("encoding-type", "floob"),
+        ("encoding-version", "10000.0"),
+    ],
+)
+def test_read_iospec_not_found(store, attribute, value):
+    adata = gen_adata((3, 2))
+
+    write_elem(store, "/", adata)
+    store["obs"].attrs.update({attribute: value})
+
+    with pytest.raises(
+        AnnDataReadError, match=r"while reading key '/(obs)?'"
+    ) as exc_info:
+        read_elem(store)
+    msg = str(exc_info.value.__cause__)
+
+    assert "No read method registered for IOSpec" in msg
+    assert f"{attribute.replace('-', '_')}='{value}'" in msg
+
+
+@pytest.mark.parametrize(
+    ["obj"],
+    [(b"x",)],
+)
+def test_write_io_error(store, obj):
+    full_pattern = re.compile(
+        rf"No method registered for writing {type(obj)} into .*Group"
+    )
+    with pytest.raises(IORegistryError, match=r"while writing key '/el'") as exc_info:
+        write_elem(store, "/el", obj)
+    msg = str(exc_info.value.__cause__)
+    assert re.search(full_pattern, msg)
+
+
+def test_categorical_order_type(store):
+    # https://github.com/scverse/anndata/issues/853
+    cat = pd.Categorical([0, 1], ordered=True)
+    write_elem(store, "ordered", cat)
+    write_elem(store, "unordered", cat.set_ordered(False))
+
+    assert read_elem(store["ordered"]).ordered is True
+    assert type(read_elem(store["ordered"]).ordered) == bool
+    assert read_elem(store["unordered"]).ordered is False
+    assert type(read_elem(store["unordered"]).ordered) == bool
+
+
+def test_override_specification():
+    """
+    Test that trying to overwrite an existing encoding raises an error.
+    """
+    from copy import deepcopy
+
+    registry = deepcopy(_REGISTRY)
+
+    with pytest.raises(TypeError):
+
+        @registry.register_write(
+            ZarrGroup, ad.AnnData, IOSpec("some new type", "0.1.0")
+        )
+        def _(store, key, adata):
+            pass
diff --git a/anndata/tests/test_io_partial.py b/anndata/tests/test_io_partial.py
new file mode 100644
index 0000000..1c92669
--- /dev/null
+++ b/anndata/tests/test_io_partial.py
@@ -0,0 +1,86 @@
+from importlib.util import find_spec
+from anndata import AnnData
+from anndata._io.specs.registry import read_elem_partial, read_elem
+from anndata._io import write_zarr, _write_h5ad
+from scipy.sparse import csr_matrix, issparse
+from pathlib import Path
+import numpy as np
+import pytest
+import zarr
+import h5py
+
+X = np.array([[1.0, 0.0, 3.0], [4.0, 0.0, 6.0], [0.0, 8.0, 0.0]], dtype="float32")
+X_check = np.array([[4.0, 0.0], [0.0, 8.0]], dtype="float32")
+
+WRITER = dict(h5ad=_write_h5ad, zarr=write_zarr)
+READER = dict(h5ad=h5py.File, zarr=zarr.open)
+
+
+@pytest.mark.parametrize("typ", [np.asarray, csr_matrix])
+@pytest.mark.parametrize("accessor", ["h5ad", "zarr"])
+def test_read_partial_X(tmp_path, typ, accessor):
+    adata = AnnData(X=typ(X))
+
+    path = Path(tmp_path) / ("test_tp_X." + accessor)
+
+    WRITER[accessor](path, adata)
+
+    with READER[accessor](path, mode="r") as store:
+        if accessor == "zarr":
+            X_part = read_elem_partial(store["X"], indices=([1, 2], [0, 1]))
+        else:
+            # h5py doesn't allow fancy indexing across multiple dimensions
+            X_part = read_elem_partial(store["X"], indices=([1, 2],))
+            X_part = X_part[:, [0, 1]]
+
+    assert np.all(X_check == X_part)
+
+
+@pytest.mark.skipif(not find_spec("scanpy"), reason="Scanpy is not installed")
+@pytest.mark.parametrize("accessor", ["h5ad", "zarr"])
+def test_read_partial_adata(tmp_path, accessor):
+    import scanpy as sc
+
+    adata = sc.datasets.pbmc68k_reduced()
+
+    path = Path(tmp_path) / ("test_rp." + accessor)
+
+    WRITER[accessor](path, adata)
+
+    storage = READER[accessor](path, mode="r")
+
+    obs_idx = [1, 2]
+    var_idx = [0, 3]
+    adata_sbs = adata[obs_idx, var_idx]
+
+    if accessor == "zarr":
+        part = read_elem_partial(storage["X"], indices=(obs_idx, var_idx))
+    else:
+        # h5py doesn't allow fancy indexing across multiple dimensions
+        part = read_elem_partial(storage["X"], indices=(obs_idx,))
+        part = part[:, var_idx]
+    assert np.all(part == adata_sbs.X)
+
+    part = read_elem_partial(storage["obs"], indices=(obs_idx,))
+    assert np.all(part.keys() == adata_sbs.obs.keys())
+    assert np.all(part.index == adata_sbs.obs.index)
+
+    part = read_elem_partial(storage["var"], indices=(var_idx,))
+    assert np.all(part.keys() == adata_sbs.var.keys())
+    assert np.all(part.index == adata_sbs.var.index)
+
+    for key in storage["obsm"].keys():
+        part = read_elem_partial(storage["obsm"][key], indices=(obs_idx,))
+        assert np.all(part == adata_sbs.obsm[key])
+
+    for key in storage["varm"].keys():
+        part = read_elem_partial(storage["varm"][key], indices=(var_idx,))
+        np.testing.assert_equal(part, adata_sbs.varm[key])
+
+    for key in storage["obsp"].keys():
+        part = read_elem_partial(storage["obsp"][key], indices=(obs_idx, obs_idx))
+        part = part.toarray()
+        assert np.all(part == adata_sbs.obsp[key])
+
+    # check uns just in case
+    np.testing.assert_equal(read_elem(storage["uns"]).keys(), adata.uns.keys())
diff --git a/anndata/tests/test_io_utils.py b/anndata/tests/test_io_utils.py
index e2d23e3..09c1f6c 100644
--- a/anndata/tests/test_io_utils.py
+++ b/anndata/tests/test_io_utils.py
@@ -6,8 +6,12 @@ import h5py
 import pandas as pd
 
 import anndata as ad
+from anndata._io.specs.registry import IORegistryError
 from anndata.compat import _clean_uns
-from anndata._io.utils import report_read_key_on_error, AnnDataReadError
+from anndata._io.utils import (
+    report_read_key_on_error,
+    AnnDataReadError,
+)
 
 
 @pytest.fixture(params=["h5ad", "zarr"])
@@ -46,20 +50,22 @@ def test_write_error_info(diskfmt, tmp_path):
     # Assuming we don't define a writer for tuples
     a = ad.AnnData(uns={"a": {"b": {"c": (1, 2, 3)}}})
 
-    with pytest.raises(Exception, match=r"Above error raised while writing key 'c'"):
+    with pytest.raises(
+        IORegistryError, match=r"Above error raised while writing key 'c'"
+    ):
         write(a)
 
 
 def test_clean_uns():
-    d = dict(
+    adata = ad.AnnData(
         uns=dict(species_categories=["a", "b"]),
-        obs=dict(species=pd.Series([0, 1, 0])),
-        var=dict(species=pd.Series([0, 1, 0, 2])),
+        obs=pd.DataFrame({"species": [0, 1, 0]}, index=["a", "b", "c"]),
+        var=pd.DataFrame({"species": [0, 1, 0, 2]}, index=["a", "b", "c", "d"]),
     )
-    _clean_uns(d)
-    assert "species_categories" not in d["uns"]
-    assert isinstance(d["obs"]["species"], pd.Categorical)
-    assert d["obs"]["species"].tolist() == ["a", "b", "a"]
+    _clean_uns(adata)
+    assert "species_categories" not in adata.uns
+    assert pd.api.types.is_categorical_dtype(adata.obs["species"])
+    assert adata.obs["species"].tolist() == ["a", "b", "a"]
     # var’s categories were overwritten by obs’s,
     # which we can detect here because var has too high codes
-    assert isinstance(d["var"]["species"], pd.Series)
+    assert pd.api.types.is_integer_dtype(adata.var["species"])
diff --git a/anndata/tests/test_layers.py b/anndata/tests/test_layers.py
index 55c19bf..4f9f009 100644
--- a/anndata/tests/test_layers.py
+++ b/anndata/tests/test_layers.py
@@ -1,4 +1,5 @@
 from importlib.util import find_spec
+import warnings
 
 import pytest
 import numpy as np
@@ -46,7 +47,7 @@ def test_views():
     "df,homogenous,dtype",
     [
         (lambda: gen_typed_df_t2_size(*X.shape), True, np.object_),
-        (lambda: pd.DataFrame(X ** 2), False, np.int_),
+        (lambda: pd.DataFrame(X**2), False, np.int_),
     ],
 )
 def test_set_dataframe(homogenous, df, dtype):
@@ -55,9 +56,9 @@ def test_set_dataframe(homogenous, df, dtype):
         with pytest.warns(UserWarning, match=r"Layer 'df'.*dtype object"):
             adata.layers["df"] = df()
     else:
-        with pytest.warns(None) as warnings:
+        with warnings.catch_warnings():
+            warnings.simplefilter("error")
             adata.layers["df"] = df()
-            assert not len(warnings)
     assert isinstance(adata.layers["df"], np.ndarray)
     assert np.issubdtype(adata.layers["df"].dtype, dtype)
 
diff --git a/anndata/tests/test_obsmvarm.py b/anndata/tests/test_obsmvarm.py
index 5570226..1c08e75 100644
--- a/anndata/tests/test_obsmvarm.py
+++ b/anndata/tests/test_obsmvarm.py
@@ -20,7 +20,7 @@ def adata():
     return anndata.AnnData(X, obs=obs, var=var)
 
 
-def test_assigmnent_dict(adata):
+def test_assignment_dict(adata):
     d_obsm = dict(
         a=pd.DataFrame(
             dict(a1=np.ones(M), a2=[f"a{i}" for i in range(M)]),
@@ -101,3 +101,25 @@ def test_setting_sparse(adata):
         adata.varm["b"] = bad_varm_sparse
 
     assert h == joblib.hash(adata)
+
+
+def test_setting_daskarray(adata):
+    import dask.array as da
+
+    adata.obsm["a"] = da.ones((M, 10))
+    adata.varm["a"] = da.ones((N, 10))
+    assert da.all(adata.obsm["a"] == da.ones((M, 10)))
+    assert da.all(adata.varm["a"] == da.ones((N, 10)))
+    assert type(adata.obsm["a"]) == da.Array
+    assert type(adata.varm["a"]) == da.Array
+
+    h = joblib.hash(adata)
+    with pytest.raises(ValueError):
+        adata.obsm["b"] = da.ones((int(M / 2), 10))
+    with pytest.raises(ValueError):
+        adata.obsm["b"] = da.ones((int(M * 2), 10))
+    with pytest.raises(ValueError):
+        adata.varm["b"] = da.ones((int(N / 2), 10))
+    with pytest.raises(ValueError):
+        adata.varm["b"] = da.ones((int(N * 2), 10))
+    assert h == joblib.hash(adata)
diff --git a/anndata/tests/test_obspvarp.py b/anndata/tests/test_obspvarp.py
index 39f4ac3..8ff025d 100644
--- a/anndata/tests/test_obspvarp.py
+++ b/anndata/tests/test_obspvarp.py
@@ -1,4 +1,6 @@
 # TODO: These tests should share code with test_layers, and test_obsmvarm
+import warnings
+
 import joblib
 import numpy as np
 import pandas as pd
@@ -96,8 +98,30 @@ def test_setting_dataframe(adata, field, dim, homogenous, df, dtype):
         with pytest.warns(UserWarning, match=rf"{field.title()} 'df'.*dtype object"):
             getattr(adata, field)["df"] = df(dim)
     else:
-        with pytest.warns(None) as warnings:
+        with warnings.catch_warnings():
+            warnings.simplefilter("error")
             getattr(adata, field)["df"] = df(dim)
-            assert not len(warnings)
     assert isinstance(getattr(adata, field)["df"], np.ndarray)
     assert np.issubdtype(getattr(adata, field)["df"].dtype, dtype)
+
+
+def test_setting_daskarray(adata):
+    import dask.array as da
+
+    adata.obsp["a"] = da.ones((M, M))
+    adata.varp["a"] = da.ones((N, N))
+    assert da.all(adata.obsp["a"] == da.ones((M, M)))
+    assert da.all(adata.varp["a"] == da.ones((N, N)))
+    assert type(adata.obsp["a"]) == da.Array
+    assert type(adata.varp["a"]) == da.Array
+
+    h = joblib.hash(adata)
+    with pytest.raises(ValueError):
+        adata.obsp["b"] = da.ones((int(M / 2), M))
+    with pytest.raises(ValueError):
+        adata.obsp["b"] = da.ones((M, int(M * 2)))
+    with pytest.raises(ValueError):
+        adata.varp["b"] = da.ones((int(N / 2), 10))
+    with pytest.raises(ValueError):
+        adata.varp["b"] = da.ones((N, int(N * 2)))
+    assert h == joblib.hash(adata)
diff --git a/anndata/tests/test_raw.py b/anndata/tests/test_raw.py
index a4b5b72..c376a54 100644
--- a/anndata/tests/test_raw.py
+++ b/anndata/tests/test_raw.py
@@ -3,7 +3,7 @@ import pytest
 
 import anndata as ad
 from anndata._core.anndata import ImplicitModificationWarning
-from anndata.tests.helpers import assert_equal, gen_adata
+from anndata.tests.helpers import assert_equal, gen_adata, GEN_ADATA_DASK_ARGS
 
 
 # -------------------------------------------------------------------------------
@@ -35,7 +35,7 @@ uns_dict = dict(  # unstructured annotation
 @pytest.fixture
 def adata_raw():
     adata = ad.AnnData(
-        np.array(data), obs=obs_dict, var=var_dict, uns=uns_dict, dtype="int32"
+        np.array(data, dtype="int32"), obs=obs_dict, var=var_dict, uns=uns_dict
     )
     adata.raw = adata
     # Make them different shapes
@@ -60,7 +60,7 @@ def test_raw_del(adata_raw):
 
 
 def test_raw_set_as_none(adata_raw):
-    # Test for theislab/anndata#445
+    # Test for scverse/anndata#445
     a = adata_raw
     b = adata_raw.copy()
 
@@ -125,7 +125,7 @@ def test_raw_view_backed(adata_raw, backing_h5ad):
 
 
 def test_raw_as_parent_view():
-    # https://github.com/theislab/anndata/issues/288
+    # https://github.com/scverse/anndata/issues/288
     a = ad.AnnData(np.ones((4, 3)))
     a.varm["PCs"] = np.ones((3, 3))
     a.raw = a
@@ -136,8 +136,8 @@ def test_raw_as_parent_view():
 
 
 def test_to_adata():
-    # https://github.com/theislab/anndata/pull/404
-    adata = gen_adata((20, 10))
+    # https://github.com/scverse/anndata/pull/404
+    adata = gen_adata((20, 10), **GEN_ADATA_DASK_ARGS)
 
     with_raw = adata[:, ::2].copy()
     with_raw.raw = adata.copy()
@@ -147,3 +147,18 @@ def test_to_adata():
     del adata.layers, adata.varp
 
     assert_equal(adata, with_raw.raw.to_adata())
+
+
+def test_to_adata_populates_obs():
+    adata = gen_adata((20, 10), **GEN_ADATA_DASK_ARGS)
+
+    del adata.layers, adata.uns, adata.varp
+    adata_w_raw = adata.copy()
+
+    raw = adata.copy()
+    del raw.obs, raw.obsm, raw.obsp, raw.uns
+
+    adata_w_raw.raw = raw
+    from_raw = adata_w_raw.raw.to_adata()
+
+    assert_equal(adata, from_raw)
diff --git a/anndata/tests/test_readwrite.py b/anndata/tests/test_readwrite.py
index 2c45816..a862adc 100644
--- a/anndata/tests/test_readwrite.py
+++ b/anndata/tests/test_readwrite.py
@@ -1,8 +1,9 @@
+import re
+from contextlib import contextmanager
 from importlib.util import find_spec
 from os import PathLike
 from pathlib import Path
 from string import ascii_letters
-import tempfile
 import warnings
 
 import h5py
@@ -14,10 +15,12 @@ from scipy.sparse import csr_matrix, csc_matrix
 import zarr
 
 import anndata as ad
+from anndata._io.utils import AnnDataReadError
+from anndata._io.specs.registry import IORegistryError
 from anndata.utils import asarray
-from anndata.compat import _read_attr
+from anndata.compat import _read_attr, DaskArray
 
-from anndata.tests.helpers import gen_adata, assert_equal
+from anndata.tests.helpers import gen_adata, assert_equal, as_dense_dask_array
 
 HERE = Path(__file__).parent
 
@@ -96,7 +99,7 @@ diskfmt2 = diskfmt
 # ------------------------------------------------------------------------------
 
 
-@pytest.mark.parametrize("typ", [np.array, csr_matrix])
+@pytest.mark.parametrize("typ", [np.array, csr_matrix, as_dense_dask_array])
 def test_readwrite_roundtrip(typ, tmp_path, diskfmt, diskfmt2):
     tmpdir = Path(tmp_path)
     pth1 = tmpdir / f"first.{diskfmt}"
@@ -117,11 +120,9 @@ def test_readwrite_roundtrip(typ, tmp_path, diskfmt, diskfmt2):
     assert_equal(adata2, adata1)
 
 
-@pytest.mark.parametrize("typ", [np.array, csr_matrix])
-def test_readwrite_h5ad(typ, dataset_kwargs, backing_h5ad):
-    tmpdir = tempfile.TemporaryDirectory()
-    tmpdirpth = Path(tmpdir.name)
-    mid_pth = tmpdirpth / "mid.h5ad"
+@pytest.mark.parametrize("typ", [np.array, csr_matrix, as_dense_dask_array])
+def test_readwrite_h5ad(tmp_path, typ, dataset_kwargs, backing_h5ad):
+    mid_pth = tmp_path / "mid.h5ad"
 
     X = typ(X_list)
     adata_src = ad.AnnData(X, obs=obs_dict, var=var_dict, uns=uns_dict)
@@ -141,20 +142,24 @@ def test_readwrite_h5ad(typ, dataset_kwargs, backing_h5ad):
     assert is_categorical_dtype(adata.raw.var["vanno2"])
     pd.testing.assert_frame_equal(adata.obs, adata_src.obs)
     pd.testing.assert_frame_equal(adata.var, adata_src.var)
-    assert np.all(adata.var.index == adata_src.var.index)
+    assert_equal(adata.var.index, adata_src.var.index)
     assert adata.var.index.dtype == adata_src.var.index.dtype
-    assert type(adata.raw.X) is type(adata_src.raw.X)
-    assert type(adata.raw.varm) is type(adata_src.raw.varm)
-    assert np.allclose(asarray(adata.raw.X), asarray(adata_src.raw.X))
+
+    assert isinstance(adata_src.raw.X, (type(adata.raw.X), DaskArray))
+    assert isinstance(
+        adata_src.uns["uns4"]["c"], (type(adata.uns["uns4"]["c"]), DaskArray)
+    )
+    assert isinstance(adata_src.varm, (type(adata.varm), DaskArray))
+
+    assert_equal(adata.raw.X, adata_src.raw.X)
     pd.testing.assert_frame_equal(adata.raw.var, adata_src.raw.var)
     assert isinstance(adata.uns["uns4"]["a"], (int, np.integer))
     assert isinstance(adata_src.uns["uns4"]["a"], (int, np.integer))
-    assert type(adata.uns["uns4"]["c"]) is type(adata_src.uns["uns4"]["c"])
     assert_equal(adata, adata_src)
 
 
 @pytest.mark.skipif(not find_spec("zarr"), reason="Zarr is not installed")
-@pytest.mark.parametrize("typ", [np.array, csr_matrix])
+@pytest.mark.parametrize("typ", [np.array, csr_matrix, as_dense_dask_array])
 def test_readwrite_zarr(typ, tmp_path):
     X = typ(X_list)
     adata_src = ad.AnnData(X, obs=obs_dict, var=var_dict, uns=uns_dict)
@@ -171,21 +176,30 @@ def test_readwrite_zarr(typ, tmp_path):
     assert is_categorical_dtype(adata.raw.var["vanno2"])
     pd.testing.assert_frame_equal(adata.obs, adata_src.obs)
     pd.testing.assert_frame_equal(adata.var, adata_src.var)
-    assert np.all(adata.var.index == adata_src.var.index)
+    assert_equal(adata.var.index, adata_src.var.index)
     assert adata.var.index.dtype == adata_src.var.index.dtype
-    assert type(adata.raw.X) is type(adata_src.raw.X)
-    assert np.allclose(asarray(adata.raw.X), asarray(adata_src.raw.X))
-    assert np.all(adata.raw.var == adata_src.raw.var)
+
+    # Dev. Note:
+    # either load as same type or load the convert DaskArray to array
+    # since we tested if assigned types and loaded types are DaskArray
+    # this would also work if they work
+    assert isinstance(adata_src.raw.X, (type(adata.raw.X), DaskArray))
+    assert isinstance(
+        adata_src.uns["uns4"]["c"], (type(adata.uns["uns4"]["c"]), DaskArray)
+    )
+    assert isinstance(adata_src.varm, (type(adata.varm), DaskArray))
+
+    assert_equal(adata.raw.X, adata_src.raw.X)
+    assert_equal(adata.raw.var, adata_src.raw.var)
     assert isinstance(adata.uns["uns4"]["a"], (int, np.integer))
     assert isinstance(adata_src.uns["uns4"]["a"], (int, np.integer))
-    assert type(adata.uns["uns4"]["c"]) is type(adata_src.uns["uns4"]["c"])
     assert_equal(adata, adata_src)
 
 
-@pytest.mark.parametrize("typ", [np.array, csr_matrix])
+@pytest.mark.parametrize("typ", [np.array, csr_matrix, as_dense_dask_array])
 def test_readwrite_maintain_X_dtype(typ, backing_h5ad):
-    X = typ(X_list)
-    adata_src = ad.AnnData(X, dtype="int8")
+    X = typ(X_list).astype("int8")
+    adata_src = ad.AnnData(X)
     adata_src.write(backing_h5ad)
 
     adata = ad.read(backing_h5ad)
@@ -215,7 +229,7 @@ def test_maintain_layers(rw):
     assert not np.any((orig.layers["sparse"] != curr.layers["sparse"]).toarray())
 
 
-@pytest.mark.parametrize("typ", [np.array, csr_matrix])
+@pytest.mark.parametrize("typ", [np.array, csr_matrix, as_dense_dask_array])
 def test_readwrite_h5ad_one_dimension(typ, backing_h5ad):
     X = typ(X_list)
     adata_src = ad.AnnData(X, obs=obs_dict, var=var_dict, uns=uns_dict)
@@ -226,7 +240,7 @@ def test_readwrite_h5ad_one_dimension(typ, backing_h5ad):
     assert_equal(adata, adata_one)
 
 
-@pytest.mark.parametrize("typ", [np.array, csr_matrix])
+@pytest.mark.parametrize("typ", [np.array, csr_matrix, as_dense_dask_array])
 def test_readwrite_backed(typ, backing_h5ad):
     X = typ(X_list)
     adata_src = ad.AnnData(X, obs=obs_dict, var=var_dict, uns=uns_dict)
@@ -242,11 +256,9 @@ def test_readwrite_backed(typ, backing_h5ad):
 
 
 @pytest.mark.parametrize("typ", [np.array, csr_matrix, csc_matrix])
-def test_readwrite_equivalent_h5ad_zarr(typ):
-    tmpdir = tempfile.TemporaryDirectory()
-    tmpdirpth = Path(tmpdir.name)
-    h5ad_pth = tmpdirpth / "adata.h5ad"
-    zarr_pth = tmpdirpth / "adata.zarr"
+def test_readwrite_equivalent_h5ad_zarr(tmp_path, typ):
+    h5ad_pth = tmp_path / "adata.h5ad"
+    zarr_pth = tmp_path / "adata.zarr"
 
     M, N = 100, 101
     adata = gen_adata((M, N), X_type=typ)
@@ -260,6 +272,41 @@ def test_readwrite_equivalent_h5ad_zarr(typ):
     assert_equal(from_h5ad, from_zarr, exact=True)
 
 
+@contextmanager
+def store_context(path: Path):
+    if path.suffix == ".zarr":
+        store = zarr.open(path, "r+")
+    else:
+        file = h5py.File(path, "r+")
+        store = file["/"]
+    yield store
+    if "file" in locals():
+        file.close()
+
+
+@pytest.mark.parametrize(
+    ["name", "read", "write"],
+    [
+        ("adata.h5ad", ad.read_h5ad, ad.AnnData.write_h5ad),
+        ("adata.zarr", ad.read_zarr, ad.AnnData.write_zarr),
+    ],
+)
+def test_read_full_io_error(tmp_path, name, read, write):
+    adata = gen_adata((4, 3))
+    path = tmp_path / name
+    write(adata, path)
+    with store_context(path) as store:
+        store["obs"].attrs["encoding-type"] = "invalid"
+    with pytest.raises(
+        AnnDataReadError, match=r"raised while reading key '/obs'"
+    ) as exc_info:
+        read(path)
+    assert re.search(
+        r"No read method registered for IOSpec\(encoding_type='invalid', encoding_version='0.2.0'\)",
+        str(exc_info.value.__cause__),
+    )
+
+
 @pytest.mark.parametrize(
     "compression,compression_opts",
     [
@@ -270,7 +317,7 @@ def test_readwrite_equivalent_h5ad_zarr(typ):
     ],
 )
 def test_hdf5_compression_opts(tmp_path, compression, compression_opts):
-    # https://github.com/theislab/anndata/issues/497
+    # https://github.com/scverse/anndata/issues/497
     pth = Path(tmp_path) / "adata.h5ad"
     adata = gen_adata((10, 8))
     kwargs = {}
@@ -455,7 +502,7 @@ def test_write_csv(typ, tmp_path):
 
 @pytest.mark.parametrize("typ", [np.array, csr_matrix])
 def test_write_csv_view(typ, tmp_path):
-    # https://github.com/theislab/anndata/issues/401
+    # https://github.com/scverse/anndata/issues/401
     import hashlib
 
     def md5_path(pth: PathLike) -> bytes:
@@ -505,8 +552,6 @@ def test_write_csv_view(typ, tmp_path):
     ],
 )
 def test_readwrite_hdf5_empty(read, write, name, tmp_path):
-    if read is ad.read_zarr:
-        pytest.importorskip("zarr")
     adata = ad.AnnData(uns=dict(empty=np.array([], dtype=float)))
     write(tmp_path / name, adata)
     ad_read = read(tmp_path / name)
@@ -546,7 +591,7 @@ def test_write_categorical(tmp_path, diskfmt):
 def test_write_categorical_index(tmp_path, diskfmt):
     adata_pth = tmp_path / f"adata.{diskfmt}"
     orig = ad.AnnData(
-        uns={"df": pd.DataFrame(index=pd.Categorical(list("aabcd")))},
+        uns={"df": pd.DataFrame({}, index=pd.Categorical(list("aabcd")))},
     )
     getattr(orig, f"write_{diskfmt}")(adata_pth)
     curr = getattr(ad, f"read_{diskfmt}")(adata_pth)
@@ -566,17 +611,17 @@ def test_dataframe_reserved_columns(tmp_path, diskfmt):
     for colname in reserved:
         to_write = orig.copy()
         to_write.obs[colname] = np.ones(5)
-        with pytest.raises(ValueError) as e:
+        with pytest.raises(ValueError) as exc_info:
             getattr(to_write, f"write_{diskfmt}")(adata_pth)
-        assert colname in str(e.value)
+        assert colname in str(exc_info.value.__cause__)
     for colname in reserved:
         to_write = orig.copy()
         to_write.varm["df"] = pd.DataFrame(
             {colname: list("aabcd")}, index=to_write.var_names
         )
-        with pytest.raises(ValueError) as e:
+        with pytest.raises(ValueError) as exc_info:
             getattr(to_write, f"write_{diskfmt}")(adata_pth)
-        assert colname in str(e.value)
+        assert colname in str(exc_info.value.__cause__)
 
 
 def test_write_large_categorical(tmp_path, diskfmt):
@@ -609,7 +654,7 @@ def test_write_large_categorical(tmp_path, diskfmt):
 
 
 def test_write_string_types(tmp_path, diskfmt):
-    # https://github.com/theislab/anndata/issues/456
+    # https://github.com/scverse/anndata/issues/456
     adata_pth = tmp_path / f"adata.{diskfmt}"
 
     adata = ad.AnnData(
@@ -630,8 +675,9 @@ def test_write_string_types(tmp_path, diskfmt):
 
     adata.obs[b"c"] = np.zeros(3)
     # This should error, and tell you which key is at fault
-    with pytest.raises(TypeError, match=str(b"c")):
+    with pytest.raises(TypeError, match=r"writing key 'obs'") as exc_info:
         write(adata_pth)
+    assert str(b"c") in str(exc_info.value.__cause__)
 
 
 @pytest.mark.parametrize(
@@ -765,7 +811,7 @@ def test_io_dtype(tmp_path, diskfmt, dtype):
     read = lambda pth: getattr(ad, f"read_{diskfmt}")(pth)
     write = lambda adata, pth: getattr(adata, f"write_{diskfmt}")(pth)
 
-    orig = ad.AnnData(np.ones((5, 8), dtype=dtype), dtype=dtype)
+    orig = ad.AnnData(np.ones((5, 8), dtype=dtype))
     write(orig, pth)
     curr = read(pth)
 
diff --git a/anndata/tests/test_uns.py b/anndata/tests/test_uns.py
index 21d98fc..7bccc78 100644
--- a/anndata/tests/test_uns.py
+++ b/anndata/tests/test_uns.py
@@ -8,7 +8,7 @@ from anndata.tests.helpers import assert_equal
 
 
 def test_uns_color_subset():
-    # Tests for https://github.com/theislab/anndata/issues/257
+    # Tests for https://github.com/scverse/anndata/issues/257
     obs = pd.DataFrame(
         {
             "cat1": pd.Categorical(list("aabcd")),
diff --git a/anndata/tests/test_views.py b/anndata/tests/test_views.py
index e176112..0afac60 100644
--- a/anndata/tests/test_views.py
+++ b/anndata/tests/test_views.py
@@ -10,14 +10,16 @@ import pytest
 import anndata as ad
 from anndata._core.index import _normalize_index
 from anndata._core.views import ArrayView, SparseCSRView, SparseCSCView
+from anndata.compat import DaskArray
 from anndata.utils import asarray
-
 from anndata.tests.helpers import (
     gen_adata,
     subset_func,
     slice_subset,
     single_subset,
     assert_equal,
+    as_dense_dask_array,
+    GEN_ADATA_DASK_ARGS,
 )
 
 # ------------------------------------------------------------------------------
@@ -38,7 +40,6 @@ var_dict = dict(vanno1=[3.1, 3.2, 3.3])
 # unstructured annotation
 uns_dict = dict(oanno1_colors=["#000000", "#FFFFFF"], uns2=["some annotation"])
 
-
 subset_func2 = subset_func
 
 
@@ -61,8 +62,8 @@ def adata_parameterized(request):
 
 
 @pytest.fixture(
-    params=[np.array, sparse.csr_matrix, sparse.csc_matrix],
-    ids=["np_array", "scipy_csr", "scipy_csc"],
+    params=[np.array, sparse.csr_matrix, sparse.csc_matrix, as_dense_dask_array],
+    ids=["np_array", "scipy_csr", "scipy_csc", "dask_array"],
 )
 def matrix_type(request):
     return request.param
@@ -79,8 +80,8 @@ def mapping_name(request):
 
 
 def test_views():
-    X = np.array(X_list)
-    adata = ad.AnnData(X, obs=obs_dict, var=var_dict, uns=uns_dict, dtype="int32")
+    X = np.array(X_list, dtype="int32")
+    adata = ad.AnnData(X, obs=obs_dict, var=var_dict, uns=uns_dict)
 
     assert adata[:, 0].is_view
     assert adata[:, 0].X.tolist() == np.reshape([1, 4, 7], (3, 1)).tolist()
@@ -181,7 +182,7 @@ def test_set_var(adata, subset_func):
 
 
 def test_drop_obs_column():
-    adata = ad.AnnData(np.array(X_list), obs=obs_dict, dtype="int32")
+    adata = ad.AnnData(np.array(X_list, dtype="int32"), obs=obs_dict)
 
     subset = adata[:2]
     assert subset.is_view
@@ -336,7 +337,7 @@ def test_set_subset_varm(adata, subset_func):
 
 @pytest.mark.parametrize("attr", ["obsm", "varm", "obsp", "varp", "layers"])
 def test_view_failed_delitem(attr):
-    adata = gen_adata((10, 10))
+    adata = gen_adata((10, 10), **GEN_ADATA_DASK_ARGS)
     view = adata[5:7, :][:, :5]
     adata_hash = joblib.hash(adata)
     view_hash = joblib.hash(view)
@@ -351,7 +352,7 @@ def test_view_failed_delitem(attr):
 
 @pytest.mark.parametrize("attr", ["obsm", "varm", "obsp", "varp", "layers"])
 def test_view_delitem(attr):
-    adata = gen_adata((10, 10))
+    adata = gen_adata((10, 10), **GEN_ADATA_DASK_ARGS)
     getattr(adata, attr)["to_delete"] = np.ones((10, 10))
     # Shouldn’t be a subclass, should be an ndarray
     assert type(getattr(adata, attr)["to_delete"]) is np.ndarray
@@ -372,7 +373,7 @@ def test_view_delitem(attr):
     "attr", ["X", "obs", "var", "obsm", "varm", "obsp", "varp", "layers", "uns"]
 )
 def test_view_delattr(attr, subset_func):
-    base = gen_adata((10, 10))
+    base = gen_adata((10, 10), **GEN_ADATA_DASK_ARGS)
     orig_hash = joblib.hash(base)
     subset = base[subset_func(base.obs_names), subset_func(base.var_names)]
     empty = ad.AnnData(obs=subset.obs[[]], var=subset.var[[]])
@@ -390,7 +391,7 @@ def test_view_delattr(attr, subset_func):
 )
 def test_view_setattr_machinery(attr, subset_func, subset_func2):
     # Tests that setting attributes on a view doesn't mess anything up too bad
-    adata = gen_adata((10, 10))
+    adata = gen_adata((10, 10), **GEN_ADATA_DASK_ARGS)
     view = adata[subset_func(adata.obs_names), subset_func2(adata.var_names)]
 
     actual = view.copy()
@@ -461,7 +462,7 @@ def test_view_of_view_modification():
 
 
 def test_double_index(subset_func, subset_func2):
-    adata = gen_adata((10, 10))
+    adata = gen_adata((10, 10), **GEN_ADATA_DASK_ARGS)
     obs_subset = subset_func(adata.obs_names)
     var_subset = subset_func2(adata.var_names)
     v1 = adata[obs_subset, var_subset]
@@ -483,7 +484,7 @@ def test_view_retains_ndarray_subclass():
 
 
 def test_modify_uns_in_copy():
-    # https://github.com/theislab/anndata/issues/571
+    # https://github.com/scverse/anndata/issues/571
     adata = ad.AnnData(np.ones((5, 5)), uns={"parent": {"key": "value"}})
     adata_copy = adata[:3].copy()
     adata_copy.uns["parent"]["key"] = "new_value"
@@ -492,7 +493,7 @@ def test_modify_uns_in_copy():
 
 @pytest.mark.parametrize("index", [-101, 100, (slice(None), -101), (slice(None), 100)])
 def test_invalid_scalar_index(adata, index):
-    # https://github.com/theislab/anndata/issues/619
+    # https://github.com/scverse/anndata/issues/619
     with pytest.raises(IndexError, match=r".*index.* out of range\."):
         _ = adata[index]
 
@@ -536,7 +537,7 @@ def test_deepcopy_subset(adata, spmat: type):
     np.testing.assert_array_equal(adata.obsp["spmat"].shape, (10, 10))
 
 
-# https://github.com/theislab/anndata/issues/680
+# https://github.com/scverse/anndata/issues/680
 @pytest.mark.parametrize("array_type", [asarray, sparse.csr_matrix, sparse.csc_matrix])
 @pytest.mark.parametrize("attr", ["X", "layers", "obsm", "varm", "obsp", "varp"])
 def test_view_mixin_copies_data(adata, array_type: type, attr):
diff --git a/anndata/tests/test_x.py b/anndata/tests/test_x.py
index b1c1669..fb33350 100644
--- a/anndata/tests/test_x.py
+++ b/anndata/tests/test_x.py
@@ -30,7 +30,7 @@ def diskfmt(request):
 @pytest.mark.parametrize("orig_array_type", UNLABELLED_ARRAY_TYPES)
 @pytest.mark.parametrize("new_array_type", UNLABELLED_ARRAY_TYPES)
 def test_setter_singular_dim(shape, orig_array_type, new_array_type):
-    # https://github.com/theislab/anndata/issues/500
+    # https://github.com/scverse/anndata/issues/500
     adata = gen_adata(shape, X_type=orig_array_type)
     adata.X = new_array_type(np.ones(shape))
     np.testing.assert_equal(asarray(adata.X), 1)
diff --git a/anndata/utils.py b/anndata/utils.py
index bc00b02..2ab0a6b 100644
--- a/anndata/utils.py
+++ b/anndata/utils.py
@@ -59,6 +59,115 @@ def convert_to_dict_nonetype(obj: None):
     return dict()
 
 
+@singledispatch
+def dim_len(x, axis):
+    """\
+    Return the size of an array in dimension `axis`.
+
+    Returns None if `x` is an awkward array with variable length in the requested dimension.
+    """
+    return x.shape[axis]
+
+
+try:
+    from .compat import awkward as ak
+
+    def _size_at_depth(layout, depth, lateral_context, **kwargs):
+        """Callback function for dim_len_awkward, resolving the dim_len for a given level"""
+        if layout.is_numpy:
+            # if it's an embedded rectilinear array, we have to deal with its shape
+            # which might not be 1-dimensional
+            if layout.is_unknown:
+                shape = (0,)
+            else:
+                shape = layout.shape
+            numpy_axis = lateral_context["axis"] - depth + 1
+            if not (1 <= numpy_axis < len(shape)):
+                raise TypeError(f"axis={lateral_context['axis']} is too deep")
+            lateral_context["out"] = shape[numpy_axis]
+            return ak.contents.EmptyArray()
+
+        elif layout.is_list and depth == lateral_context["axis"]:
+            if layout.parameter("__array__") in ("string", "bytestring"):
+                # Strings are implemented like an array of lists of uint8 (ListType(NumpyType(...)))
+                # which results in an extra hierarchy-level that shouldn't show up in dim_len
+                # See https://github.com/scikit-hep/awkward/discussions/1654#discussioncomment-3736747
+                raise TypeError(f"axis={lateral_context['axis']} is too deep")
+
+            if layout.is_regular:
+                # if it's a regular list, you want the size
+                lateral_context["out"] = layout.size
+            else:
+                # if it's an irregular list, you want a null token
+                lateral_context["out"] = -1
+            return ak.contents.EmptyArray()
+
+        elif layout.is_record and depth == lateral_context["axis"]:
+            lateral_context["out"] = len(layout.fields)
+            return ak.contents.EmptyArray()
+
+        elif layout.is_record:
+            # currently, we don't recurse into records
+            # in theory we could, just not sure how to do it at the moment
+            # Would need to consider cases like: scalars, unevenly sized values
+            raise TypeError(
+                f"Cannot recurse into record type found at axis={lateral_context['axis']}"
+            )
+
+        elif layout.is_union:
+            # if it's a union, you could get the result of each union branch
+            # separately and see if they're all the same; if not, it's an error
+            result = None
+            for content in layout.contents:
+                context = {"axis": lateral_context["axis"]}
+                ak.transform(
+                    _size_at_depth,
+                    content,
+                    lateral_context=context,
+                )
+                if result is None:
+                    result = context["out"]
+                elif result != context["out"]:
+                    # Union branches have different lengths -> return null token
+                    lateral_context["out"] = -1
+                    return ak.contents.EmptyArray()
+            lateral_context["out"] = result
+            return ak.contents.EmptyArray()
+
+    @dim_len.register(ak.Array)
+    def dim_len_awkward(array, axis):
+        """Get the length of an awkward array in a given dimension
+
+        Returns None if the dimension is of variable length.
+
+        Code adapted from @jpivarski's solution in https://github.com/scikit-hep/awkward/discussions/1654#discussioncomment-3521574
+        """
+        if axis < 0:  # negative axis is another can of worms... maybe later
+            raise NotImplementedError("Does not support negative axis")
+        elif axis == 0:
+            return len(array)
+        else:
+            # communicate with the recursive function using a context (lateral)
+            context = {"axis": axis}
+
+            # "transform" but we don't care what kind of array it returns
+            ak.transform(
+                _size_at_depth,
+                array,
+                lateral_context=context,
+            )
+
+            # Use `None` as null token.
+            return None if context["out"] == -1 else context["out"]
+
+    @asarray.register(ak.Array)
+    def asarray_awkward(x):
+        return x
+
+except ImportError:
+    pass
+
+
 def make_index_unique(index: pd.Index, join: str = "-"):
     """
     Makes the index unique by appending a number string to each duplicate index element:
diff --git a/debian/changelog b/debian/changelog
index d782769..7db1b89 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+python-anndata (0.9.1-1) UNRELEASED; urgency=low
+
+  * New upstream release.
+
+ -- Debian Janitor <janitor@jelmer.uk>  Sat, 17 Jun 2023 04:48:56 -0000
+
 python-anndata (0.8.0-4) unstable; urgency=medium
 
   * Team upload.
diff --git a/debian/patches/32bit_test_fail_skip b/debian/patches/32bit_test_fail_skip
index 0177324..a30daa6 100644
--- a/debian/patches/32bit_test_fail_skip
+++ b/debian/patches/32bit_test_fail_skip
@@ -1,13 +1,15 @@
 Author: Michael R. Crusoe <crusoe@debian.org>
 Description: remove assert failing the builds for 32bit archs
 Forwarded: https://github.com/theislab/anndata/issues/443
---- python-anndata.orig/anndata/tests/test_layers.py
-+++ python-anndata/anndata/tests/test_layers.py
-@@ -59,7 +59,6 @@
+Index: python-anndata.git/anndata/tests/test_layers.py
+===================================================================
+--- python-anndata.git.orig/anndata/tests/test_layers.py
++++ python-anndata.git/anndata/tests/test_layers.py
+@@ -60,7 +60,6 @@ def test_set_dataframe(homogenous, df, d
+             warnings.simplefilter("error")
              adata.layers["df"] = df()
-             assert not len(warnings)
      assert isinstance(adata.layers["df"], np.ndarray)
 -    assert np.issubdtype(adata.layers["df"].dtype, dtype)
-
-
+ 
+ 
  def test_readwrite(backing_h5ad):
diff --git a/debian/patches/ignore_numba_failures.patch b/debian/patches/ignore_numba_failures.patch
index db0eb00..d5e2d32 100644
--- a/debian/patches/ignore_numba_failures.patch
+++ b/debian/patches/ignore_numba_failures.patch
@@ -7,9 +7,11 @@ Description: python3-numba is not yet ported to Python3.10 (see #1000336
  .
  FIXME: This patch should be deactivated if python3-numba is available for Python3.10
 
---- a/anndata/tests/test_readwrite.py
-+++ b/anndata/tests/test_readwrite.py
-@@ -342,6 +342,7 @@ def test_changed_obs_var_names(tmp_path,
+Index: python-anndata.git/anndata/tests/test_readwrite.py
+===================================================================
+--- python-anndata.git.orig/anndata/tests/test_readwrite.py
++++ python-anndata.git/anndata/tests/test_readwrite.py
+@@ -396,6 +396,7 @@ def test_changed_obs_var_names(tmp_path,
          assert_equal(read, modified, exact=True)
  
  
@@ -17,7 +19,7 @@ Description: python3-numba is not yet ported to Python3.10 (see #1000336
  @pytest.mark.skipif(not find_spec("loompy"), reason="Loompy is not installed")
  @pytest.mark.parametrize("typ", [np.array, csr_matrix])
  @pytest.mark.parametrize("obsm_mapping", [{}, dict(X_composed=["oanno3", "oanno4"])])
-@@ -386,6 +387,7 @@ def test_readwrite_loom(typ, obsm_mappin
+@@ -440,6 +441,7 @@ def test_readwrite_loom(typ, obsm_mappin
      assert adata.var_names.name == var_dim
  
  
@@ -25,9 +27,11 @@ Description: python3-numba is not yet ported to Python3.10 (see #1000336
  @pytest.mark.skipif(not find_spec("loompy"), reason="Loompy is not installed")
  def test_readloom_deprecations(tmp_path):
      loom_pth = tmp_path / "test.loom"
---- a/anndata/tests/test_layers.py
-+++ b/anndata/tests/test_layers.py
-@@ -70,6 +70,7 @@ def test_readwrite(backing_h5ad):
+Index: python-anndata.git/anndata/tests/test_layers.py
+===================================================================
+--- python-anndata.git.orig/anndata/tests/test_layers.py
++++ python-anndata.git/anndata/tests/test_layers.py
+@@ -71,6 +71,7 @@ def test_readwrite(backing_h5ad):
      assert (adata.layers["L"] == adata_read.layers["L"]).all()
  
  
diff --git a/debian/patches/ignore_test_importing_scanpy.patch b/debian/patches/ignore_test_importing_scanpy.patch
index 2cf6c83..daa7d79 100644
--- a/debian/patches/ignore_test_importing_scanpy.patch
+++ b/debian/patches/ignore_test_importing_scanpy.patch
@@ -3,9 +3,11 @@ Last-Update: Thu, 09 Dec 2021 12:42:27 +0100
 Description: The example doc tries to import some data from scanpy which is not yet packaged
  Thus the test would fail and the test is removed here
 
---- a/anndata/experimental/multi_files/_anncollection.py
-+++ b/anndata/experimental/multi_files/_anncollection.py
-@@ -629,29 +629,6 @@ class AnnCollection(_ConcatViewMixin, _I
+Index: python-anndata.git/anndata/experimental/multi_files/_anncollection.py
+===================================================================
+--- python-anndata.git.orig/anndata/experimental/multi_files/_anncollection.py
++++ python-anndata.git/anndata/experimental/multi_files/_anncollection.py
+@@ -638,29 +638,6 @@ class AnnCollection(_ConcatViewMixin, _I
          is not important, for example, when using them for stochastic gradient descent.
          In this case the performance of subsetting can be a bit better.
  
diff --git a/docs/_key_contributors.rst b/docs/_key_contributors.rst
index 02fe3b0..f2af27f 100644
--- a/docs/_key_contributors.rst
+++ b/docs/_key_contributors.rst
@@ -5,6 +5,6 @@
    * `Alex Wolf`_: initial conception/development
    * Philipp Angerer: initial conception/development, software quality
 
-.. _contributions graph: https://github.com/theislab/anndata/graphs/contributors
+.. _contributions graph: https://github.com/scverse/anndata/graphs/contributors
 .. _Isaac Virshup: https://twitter.com/ivirshup
 .. _Alex Wolf: https://twitter.com/falexwolf
diff --git a/docs/api.rst b/docs/api.md
similarity index 63%
rename from docs/api.rst
rename to docs/api.md
index fa455f7..59c3c9a 100644
--- a/docs/api.rst
+++ b/docs/api.md
@@ -1,37 +1,43 @@
-API
-===
+# API
 
+```{eval-rst}
 .. module:: anndata
+```
 
 The central class:
 
+```{eval-rst}
 .. autosummary::
    :toctree: generated/
 
    AnnData
+```
 
-Combining
----------
+## Combining
 
 Combining AnnData objects. See also the section on concatenation.
 
+```{eval-rst}
 .. autosummary::
    :toctree: generated/
 
    concat
+```
 
-Reading
--------
+## Reading
 
 Reading anndata’s native file format `.h5ad`.
 
+```{eval-rst}
 .. autosummary::
    :toctree: generated/
 
    read_h5ad
+```
 
 Reading other file formats.
 
+```{eval-rst}
 .. autosummary::
    :toctree: generated/
 
@@ -44,57 +50,75 @@ Reading other file formats.
    read_umi_tools
    read_zarr
 
+```
 
-Writing
--------
+## Writing
 
 Writing to anndata’s native file format `.h5ad`.
 
+```{eval-rst}
 .. autosummary::
    :toctree: generated/
 
    AnnData.write
+```
 
 Writing to other formats.
 
+```{eval-rst}
 .. autosummary::
    :toctree: generated/
 
    AnnData.write_csvs
    AnnData.write_loom
    AnnData.write_zarr
+```
 
-.. _experimental_api:
+(experimental-api)=
 
-Experimental API
-----------------
+## Experimental API
 
-.. warning::
-
-   API's in the experimenal module are currently in development and subject to change at any time.
+```{warning}
+API's in the experimenal module are currently in development and subject to change at any time.
+```
 
 Two classes for working with batched access to collections of many `AnnData` objects or `h5ad` files. In paritcular, for pytorch-based models.
 
+```{eval-rst}
 .. autosummary::
    :toctree: generated/
 
    experimental.AnnCollection
    experimental.AnnLoader
+```
 
-Low level methods for reading and writing elements of an `AnnData`` object to a store:
-
+Low level methods for reading and writing elements of an `` AnnData` `` object to a store:
 
+```{eval-rst}
 .. autosummary::
    :toctree: generated/
 
    experimental.read_elem
    experimental.write_elem
+```
+
+Utilities for customizing the IO process:
+
+```{eval-rst}
+.. autosummary::
+   :toctree: generated/
+
+   experimental.read_dispatched
+   experimental.write_dispatched
+   experimental.IOSpec
 
+```
 
-Errors and warnings
--------------------
+## Errors and warnings
 
+```{eval-rst}
 .. autosummary::
    :toctree: generated/
 
    ImplicitModificationWarning
+```
diff --git a/docs/benchmarks.md b/docs/benchmarks.md
new file mode 100644
index 0000000..f6e459c
--- /dev/null
+++ b/docs/benchmarks.md
@@ -0,0 +1,11 @@
+# Benchmarks
+
+Computational operations in anndata are consistently benchmarked [here](https://github.com/ivirshup/anndata-benchmarks).
+
+Below follows a simple benchmark showing read-write efficiency.
+
+```{toctree}
+:maxdepth: 1
+
+benchmark-read-write
+```
diff --git a/docs/benchmarks.rst b/docs/benchmarks.rst
deleted file mode 100644
index e13877d..0000000
--- a/docs/benchmarks.rst
+++ /dev/null
@@ -1,11 +0,0 @@
-Benchmarks
-==========
-
-Computational operations in anndata are consistently benchmarked `here <https://github.com/ivirshup/anndata-benchmarks>`__.
-
-Below follows a simple benchmark showing read-write efficiency.
-
-.. toctree::
-   :maxdepth: 1
-
-   benchmark-read-write
diff --git a/docs/concatenation.rst b/docs/concatenation.rst
index c8147ac..0b48280 100644
--- a/docs/concatenation.rst
+++ b/docs/concatenation.rst
@@ -1,10 +1,6 @@
 Concatenation
 =============
 
-.. warning::
-
-    The :func:`~anndata.concat` function is marked as experimental for the `0.7` release series, and will supercede the :meth:`AnnData.concatenate() <anndata.AnnData.concatenate>` method in future releases. While the current API is not likely to change much, this gives us a bit of freedom to make sure we've got the arguments and feature set right.
-
 With :func:`~anndata.concat`, :class:`~anndata.AnnData` objects can be combined via a composition of two operations: concatenation and merging.
 
 * Concatenation is when we keep all sub elements of each object, and stack these elements in an ordered way.
@@ -65,13 +61,13 @@ For example, given two anndata objects with differing variables:
            [0., 1.],
            [0., 0.],
            [0., 1.],
-           [1., 0.]], dtype=float32)
+           [1., 0.]])
     >>> ad.concat([a, b], join="outer").X.toarray()
     array([[1., 0., 0.],
            [0., 1., 0.],
            [0., 0., 1.],
            [0., 1., 0.],
-           [1., 0., 0.]], dtype=float32)
+           [1., 0., 0.]])
 
 The join argument is used for any element which has both (1) an axis being concatenated and (2) has an axis not being concatenated.
 When concatenating along the `obs` dimension, this means elements of `.X`, `obs`, `.layers`, and `.obsm` will be affected by the choice of `join`.
diff --git a/docs/conf.py b/docs/conf.py
index 6dafd2c..6c60881 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -31,13 +31,20 @@ release = version
 # default settings
 templates_path = ["_templates"]
 html_static_path = ["_static"]
-source_suffix = ".rst"
+source_suffix = [".rst", ".md"]
 master_doc = "index"
 default_role = "literal"
-exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
+exclude_patterns = [
+    "_build",
+    "Thumbs.db",
+    ".DS_Store",
+    "**.ipynb_checkpoints",
+    "tutorials/notebooks/*.rst",
+]
 pygments_style = "sphinx"
 
 extensions = [
+    "myst_parser",
     "sphinx.ext.autodoc",
     "sphinx.ext.intersphinx",
     "sphinx.ext.doctest",
@@ -46,14 +53,17 @@ extensions = [
     "sphinx.ext.napoleon",
     "sphinx.ext.autosummary",
     "sphinx_autodoc_typehints",  # needs to be after napoleon
+    "sphinx_issues",
+    "sphinxext.opengraph",
     "scanpydoc",
     "nbsphinx",
-    *[p.stem for p in (HERE / "extensions").glob("*.py")],
+    "IPython.sphinxext.ipython_console_highlighting",
 ]
 
 # Generate the API documentation when building
 autosummary_generate = True
 autodoc_member_order = "bysource"
+issues_github_path = "scverse/anndata"
 # autodoc_default_flags = ['members']
 napoleon_google_docstring = False
 napoleon_numpy_docstring = True
@@ -61,13 +71,25 @@ napoleon_include_init_with_doc = False
 napoleon_use_rtype = True  # having a separate entry generally helps readability
 napoleon_use_param = True
 napoleon_custom_sections = [("Params", "Parameters")]
+typehints_defaults = "braces"
 todo_include_todos = False
 nitpicky = True  # Report broken links
 nitpick_ignore = [
+    ("py:class", "scipy.sparse.base.spmatrix"),
     ("py:meth", "pandas.DataFrame.iloc"),
     ("py:meth", "pandas.DataFrame.loc"),
+    ("py:class", "anndata._core.views.ArrayView"),
+    ("py:class", "anndata._core.raw.Raw"),
+    *[
+        ("py:class", f"anndata._core.aligned_mapping.{cls}{kind}")
+        for cls in "Layers AxisArrays PairwiseArrays".split()
+        for kind in ["", "View"]
+    ],
+]
+suppress_warnings = [
+    "ref.citation",
+    "myst.header",  # https://github.com/executablebooks/MyST-Parser/issues/262
 ]
-suppress_warnings = ["ref.citation"]
 
 
 def setup(app: Sphinx):
@@ -77,6 +99,7 @@ def setup(app: Sphinx):
 
 intersphinx_mapping = dict(
     h5py=("https://docs.h5py.org/en/latest/", None),
+    hdf5plugin=("https://hdf5plugin.readthedocs.io/en/latest/", None),
     loompy=("https://linnarssonlab.org/loompy/", None),
     numpy=("https://numpy.org/doc/stable/", None),
     pandas=("https://pandas.pydata.org/pandas-docs/stable/", None),
@@ -84,22 +107,20 @@ intersphinx_mapping = dict(
     scipy=("https://docs.scipy.org/doc/scipy/", None),
     sklearn=("https://scikit-learn.org/stable/", None),
     zarr=("https://zarr.readthedocs.io/en/stable/", None),
-    xarray=("http://xarray.pydata.org/en/stable/", None),
+    xarray=("https://xarray.pydata.org/en/stable/", None),
 )
 qualname_overrides = {
     "h5py._hl.group.Group": "h5py.Group",
     "h5py._hl.files.File": "h5py.File",
+    "h5py._hl.dataset.Dataset": "h5py.Dataset",
     "anndata._core.anndata.AnnData": "anndata.AnnData",
-    # Temporarily
-    "anndata._core.raw.Raw": "anndata.AnnData",
-    "anndata._core.views.ArrayView": "numpy.ndarray",
-    **{
-        f"anndata._core.aligned_mapping.{cls}{kind}": "typing.Mapping"
-        for cls in "Layers AxisArrays PairwiseArrays".split()
-        for kind in ["", "View"]
-    },
 }
 
+# -- Social cards ---------------------------------------------------------
+
+ogp_site_url = "https://anndata.readthedocs.io/"
+ogp_image = "https://anndata.readthedocs.io/en/latest/_static/img/anndata_schema.svg"
+
 # -- Options for HTML output ----------------------------------------------
 
 
@@ -107,7 +128,7 @@ html_theme = "scanpydoc"
 html_theme_options = dict(navigation_depth=4)
 html_context = dict(
     display_github=True,  # Integrate GitHub
-    github_user="theislab",  # Username
+    github_user="scverse",  # Username
     github_repo="anndata",  # Repo name
     github_version="master",  # Version
     conf_py_path="/docs/",  # Path in the checkout to the docs root
diff --git a/docs/contributing.md b/docs/contributing.md
new file mode 100644
index 0000000..e9b57e2
--- /dev/null
+++ b/docs/contributing.md
@@ -0,0 +1,7 @@
+# Contributing
+
+AnnData follows the development practices outlined in the [Scanpy contribution guide](https://scanpy.readthedocs.io/en/latest/dev/index.html).
+
+```{eval-rst}
+.. include:: _key_contributors.rst
+```
diff --git a/docs/contributing.rst b/docs/contributing.rst
deleted file mode 100644
index a0ace43..0000000
--- a/docs/contributing.rst
+++ /dev/null
@@ -1,6 +0,0 @@
-Contributing
-============
-
-To be expanded. For now, see the `Scanpy contribution guide <https://scanpy.readthedocs.io/en/latest/dev/index.html>`_.
-
-.. include:: _key_contributors.rst
\ No newline at end of file
diff --git a/docs/extensions/github_links.py b/docs/extensions/github_links.py
deleted file mode 100644
index 224edf1..0000000
--- a/docs/extensions/github_links.py
+++ /dev/null
@@ -1,50 +0,0 @@
-from types import MappingProxyType
-from typing import Any, Mapping, Sequence, NamedTuple
-
-from docutils import nodes
-from docutils.parsers.rst.directives import class_option
-from docutils.parsers.rst.states import Inliner
-from sphinx.application import Sphinx
-from sphinx.config import Config
-
-
-class AutoLink(NamedTuple):
-    class_name: str
-    url_template: str
-    title_template: str = "{}"
-    options: Mapping[str, Any] = MappingProxyType({"class": class_option})
-
-    def __call__(
-        self,
-        name: str,
-        rawtext: str,
-        text: str,
-        lineno: int,
-        inliner: Inliner,
-        options: Mapping[str, Any] = MappingProxyType({}),
-        content: Sequence[str] = (),
-    ):
-        url = self.url_template.format(text)
-        title = self.title_template.format(text)
-        options = {**dict(classes=[self.class_name]), **options}
-        node = nodes.reference(rawtext, title, refuri=url, **options)
-        return [node], []
-
-
-def register_links(app: Sphinx, config: Config):
-    gh_url = "https://github.com/{github_user}/{github_repo}".format_map(
-        config.html_context
-    )
-    app.add_role("pr", AutoLink("pr", f"{gh_url}/pull/{{}}", "PR {}"))
-    app.add_role("issue", AutoLink("issue", f"{gh_url}/issues/{{}}", "issue {}"))
-    app.add_role("noteversion", AutoLink("noteversion", f"{gh_url}/releases/tag/{{}}"))
-    # tutorial links
-    tutorials_url = "https://anndata-tutorials.readthedocs.io/en/latest/"
-    app.add_role(
-        "tutorial",
-        AutoLink("tutorial", f"{tutorials_url}{{}}.html", "→ tutorial: {}"),
-    )
-
-
-def setup(app: Sphinx):
-    app.connect("config-inited", register_links)
diff --git a/docs/fileformat-prose.md b/docs/fileformat-prose.md
new file mode 100644
index 0000000..c459a43
--- /dev/null
+++ b/docs/fileformat-prose.md
@@ -0,0 +1,403 @@
+# On-disk format
+
+```{note}
+These docs are written for anndata 0.8.
+Files written before this version may differ in some conventions,
+but will still be read by newer versions of the library.
+```
+
+AnnData objects are saved on disk to hierarchical array stores like [HDF5]
+(via {doc}`H5py <h5py:index>`) and {doc}`zarr:index`.
+This allows us to have very similar structures in disk and on memory.
+
+As an example we’ll look into a typical `.h5ad` object that’s been through an analysis.
+This structure should be largely equivalent to Zarr structure, though there are a few minor differences.
+
+## Elements
+
+
+ <!-- I’ve started using h5py since I couldn’t figure out a nice way to print attributes from bash. -->
+
+```python
+>>> import h5py
+>>> f = h5py.File("02_processed.h5ad", "r")
+>>> list(f.keys())
+['X', 'layers', 'obs', 'obsm', 'uns', 'var', 'varm']
+```
+
+<!-- ```bash
+$ h5ls 02_processed.h5ad
+X                        Group
+layers                   Group
+obs                      Group
+obsm                     Group
+uns                      Group
+var                      Group
+varm                     Group
+``` -->
+
+In general, `AnnData` objects are comprised of a various types of elements.
+Each element is encoded as either an Array (or Dataset in hdf5 terminology) or a collection of elements (e.g. Group) in the store.
+We record the type of an element using the `encoding-type` and `encoding-version` keys in it's attributes.
+For example, we can this file represents an `AnnData` object from this metadata:
+
+```python
+>>> dict(f.attrs)
+{'encoding-type': 'anndata', 'encoding-version': '0.1.0'}
+```
+
+Using this information, we're able to dispatch onto readers for the different element types that you'd find in an anndata.
+
+### Element Specification
+
+* An element can be any object within the storage hierarchy (typically an array or group) with associated metadata
+* An element MUST have a string-valued field `"encoding-type"` in its metadata
+* An element MUST have a string-valued field `"encoding-version"` in its metadata that can be evaluated to a version
+
+### AnnData specification (v0.1.0)
+
+* An `AnnData` object MUST be a group. 
+* The group's metadata MUST include entries: `"encoding-type": "anndata"`, `"encoding-version": "0.1.0"`.
+* An `AnnData` group MUST contain entries `"obs"` and `"var"`, which MUST be dataframes (though this may only have an index with no columns).
+* The group MAY contain an entry `X`, which MUST be either a dense or sparse array and whose shape MUST be (`n_obs`, `n_var`)
+* The group MAY contain a mapping `layers`. Entries in `layers` MUST be dense or sparse arrays which have shapes (`n_obs`, `n_var`)
+* The group MAY contain a mapping `obsm`. Entries in `obsm` MUST be sparse arrays, dense arrays, or dataframes. These entries MUST have a first dimension of size `n_obs`
+* The group MAY contain a mapping `varm`. Entries in `varm` MUST be sparse arrays, dense arrays, or dataframes. These entries MUST have a first dimension of size `n_var`
+* The group MAY contain a mapping `obsp`. Entries in `obsp` MUST be sparse or dense arrays. The entries first two dimensions MUST be of size `n_obs`
+* The group MAY contain a mapping `varp`. Entries in `varp` MUST be sparse or dense arrays. The entries first two dimensions MUST be of size `n_var`
+* The group MAY contain a mapping `uns`. Entries in `uns` MUST be an anndata encoded type.
+
+## Dense arrays
+
+Dense numeric arrays have the most simple representation on disk,
+as they have native equivalents in H5py {doc}`h5py:high/dataset` and Zarr {ref}`Arrays <zarr:tutorial_create>`.
+We can see an example of this with dimensionality reductions stored in the `obsm` group:
+
+```python
+>>> f["obsm"].visititems(print)
+X_pca <HDF5 dataset "X_pca": shape (38410, 50), type "<f4">
+X_umap <HDF5 dataset "X_umap": shape (38410, 2), type "<f4">
+
+>>> dict(f["obsm"]["X_pca"].attrs)
+{'encoding-type': 'array', 'encoding-version': '0.2.0'}
+```
+
+<!-- ```bash
+$ h5ls 02_processed.h5ad/obsm
+X_pca                    Dataset {38410, 50}
+X_umap                   Dataset {38410, 2}
+``` -->
+
+### Dense arrays specification (v0.2.0)
+
+* Dense arrays MUST be stored in an Array object
+* Dense arrays MUST have the entries `'encoding-type': 'array'` and `'encoding-version': '0.2.0'` in their metadata
+
+## Sparse arrays
+
+Sparse arrays don’t have a native representations in HDF5 or Zarr,
+so we've defined our own based on their in-memory structure.
+Currently two sparse data formats are supported by `AnnData` objects, CSC and CSR
+(corresponding to {class}`scipy.sparse.csc_matrix` and {class}`scipy.sparse.csr_matrix` respectively).
+These formats represent a two-dimensional sparse array with
+three one-dimensional arrays, `indptr`, `indices`, and `data`.
+
+```{note}
+A full description of these formats is out of scope for this document,
+but are [easy to find].
+```
+
+We represent a sparse array as a `Group` on-disk,
+where the kind and shape of the sparse array is defined in the `Group`'s attributes:
+
+```python
+>>> dict(f["X"].attrs)
+{'encoding-type': 'csr_matrix',
+ 'encoding-version': '0.1.0',
+ 'shape': array([38410, 27899])}
+```
+
+The group contains three arrays:
+
+```python
+>>> f["X"].visititems(print)
+data <HDF5 dataset "data": shape (41459314,), type "<f4">
+indices <HDF5 dataset "indices": shape (41459314,), type "<i4">
+indptr <HDF5 dataset "indptr": shape (38411,), type "<i4">
+```
+
+<!-- ```bash
+$ h5ls 02_processed.h5ad/X
+data                     Dataset {41459314/Inf}
+indices                  Dataset {41459314/Inf}
+indptr                   Dataset {38411/Inf}
+``` -->
+
+### Sparse array specification (v0.1.0)
+
+* Each sparse array MUST be its own group
+* The group MUST contain arrays `indices`, `indptr`, and `data`
+* The group's metadata MUST contain:
+    * `"encoding-type"`, which is set to `"csr_matrix"` or `"csc_matrix"` for compressed sparse row and compressed sparse column, respectively.
+    * `"encoding-version"`, which is set to `"0.1.0"`
+    * `"shape"` which is an integer array of length 2 whose values are the sizes of the array's dimensions
+
+## DataFrames
+
+DataFrames are saved as a columnar format in a group, so each column of a DataFrame is saved as a separate array.
+We save a little more information in the attributes here.
+
+```python
+>>> dict(f["obs"].attrs)
+{'_index': 'Cell',
+ 'column-order': array(['sample', 'cell_type', 'n_genes_by_counts',
+        'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts',
+        'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes',
+        'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes',
+        'total_counts_mito', 'log1p_total_counts_mito', 'pct_counts_mito',
+        'label_by_score'], dtype=object),
+ 'encoding-type': 'dataframe',
+ 'encoding-version': '0.2.0'}
+```
+
+These attributes identify the index of the dataframe, as well as the original order of the columns.
+Each column in this dataframe is encoded as its own array.
+
+```python
+>>> dict(f["obs"]["total_counts"].attrs)
+{'encoding-type': 'array', 'encoding-version': '0.2.0'}
+
+>>> dict(f["obs"]["cell_type"].attrs)
+{'encoding-type': 'categorical', 'encoding-version': '0.2.0', 'ordered': False}
+```
+
+### Dataframe Specification (v0.2.0)
+
+* A dataframe MUST be stored as a group
+* The group's metadata:
+    * MUST contain the field `"_index"`, whose value is the key of the array to be used as an index
+    * MUST contain encoding metadata `"encoding-type": "dataframe"`, `"encoding-version": "0.2.0"`
+    * MUST contain `"column-order"` an array of strings denoting the order of column entries
+* The group MUST contain an array for the index
+* Each entry in the group MUST correspond to an array with equivalent first dimensions
+* Each entry SHOULD share chunk sizes (in the HDF5 or zarr container)
+
+## Mappings
+
+Mappings are simply stored as `Group`s on disk.
+These are distinct from DataFrames and sparse arrays since they don’t have any special attributes.
+A `Group` is created for any `Mapping` in the AnnData object,
+including the standard `obsm`, `varm`, `layers`, and `uns`.
+Notably, this definition is used recursively within `uns`:
+
+```python
+>>> f["uns"].visititems(print)
+[...]
+pca <HDF5 group "/uns/pca" (2 members)>
+pca/variance <HDF5 dataset "variance": shape (50,), type "<f4">
+pca/variance_ratio <HDF5 dataset "variance_ratio": shape (50,), type "<f4">
+[...]
+```
+
+### Mapping specifications (v0.1.0)
+
+* Each mapping MUST be its own group
+* The groups metadata MUST contain the encoding metadata `"encoding-type": "dict"`, `"encoding-version": "0.1.0"`
+
+## Scalars
+
+Zero dimensional arrays are used for scalar values (i.e. single values like strings, numbers or booleans).
+These should only occur inside of `uns`, and are commonly saved parameters:
+
+```python
+>>> f["uns/neighbors/params"].visititems(print)
+method <HDF5 dataset "method": shape (), type "|O">
+metric <HDF5 dataset "metric": shape (), type "|O">
+n_neighbors <HDF5 dataset "n_neighbors": shape (), type "<i8">
+>>> f["uns/neighbors/params/metric"][()]
+'euclidean'
+>>> dict(f["uns/neighbors/params/metric"].attrs)
+{'encoding-type': 'string', 'encoding-version': '0.2.0'}
+```
+
+### Scalar specification (v0.2.0)
+
+* Scalars MUST be written as a 0 dimensional array
+* Numeric scalars
+    * MUST have `"encoding-type": "numeric-scalar"`, `"encoding-version": "0.2.0"` in their metadata
+    * MUST be a single numeric value, including boolean, unsigned integer, signed integer,  floating point, or complex floating point
+* String scalars
+    * MUST have `"encoding-type": "string"`, `"encoding-version": "0.2.0"` in their metadata
+    * In zarr, scalar strings MUST be stored as a fixed length unicode dtype
+    * In HDF5, scalar strings MUST be stored as a variable length utf-8 encoded string dtype
+
+## Categorical arrays
+
+```python
+>>> categorical = f["obs"]["cell_type"]
+>>> dict(categorical.attrs)
+{'encoding-type': 'categorical', 'encoding-version': '0.2.0', 'ordered': False}
+```
+
+Discrete values can be efficiently represented with categorical arrays (similar to `factors` in `R`).
+These arrays encode the values as small width integers (`codes`), which map to the original label set (`categories`).
+Each entry in the `codes` array is the zero-based index of the encoded value in the `categories` array. 
+To represent a missing value, a code of `-1` is used.
+We store these two arrays separately.
+
+```python
+>>> categorical.visititems(print)
+categories <HDF5 dataset "categories": shape (22,), type "|O">
+codes <HDF5 dataset "codes": shape (38410,), type "|i1">
+```
+
+### Categorical array specification (v0.2.0)
+
+* Categorical arrays MUST be stored as a group
+* The group's metadata MUST contain the encoding metadata `"encoding-type": "categorical"`, `"encoding-version": "0.2.0"`
+* The group's metadata MUST contain the boolean valued field `"ordered"`, which indicates whether the categories are ordered
+* The group MUST contain an integer valued array named `"codes"` whose maximum value is the number of categories - 1
+    * The `"codes"` array MAY contain signed integer values. If so, the code `-1` denotes a missing value
+* The group MUST contain an array called `"categories"`
+
+## String arrays
+
+Arrays of strings are handled differently than numeric arrays since numpy doesn't really have a good way of representing arrays of unicode strings.
+`anndata` assumes strings are text-like data, so uses a variable length encoding.
+
+```python
+>>> dict(categorical["categories"].attrs)
+{'encoding-type': 'string-array', 'encoding-version': '0.2.0'}
+```
+
+### String array specifications (v0.2.0)
+
+* String arrays MUST be stored in arrays
+* The arrays's metadata MUST contain the encoding metadata `"encoding-type": "string-array"`, `"encoding-version": "0.2.0"`
+* In `zarr`, string arrays MUST be stored using `numcodecs`' `VLenUTF8` codec
+* In `HDF5`, string arrays MUST be stored using the variable length string data type, with a utf-8 encoding
+
+## Nullable integers and booleans
+
+We support IO with Pandas nullable integer and boolean arrays.
+We represent these on disk similar to `numpy` masked arrays, `julia` nullable arrays, or `arrow` validity bitmaps (see {issue}`504` for more discussion).
+That is, we store an indicator array (or mask) of null values alongside the array of all values.
+
+```python
+>>> h5_file = h5py.File("anndata_format.h5", "a")
+>>> int_array = pd.array([1, None, 3, 4])
+>>> int_array
+<IntegerArray>
+[1, <NA>, 3, 4]
+Length: 4, dtype: Int64
+>>> write_elem(h5_file, "nullable_integer", int_array)
+
+>>> h5_file["nullable_integer"].visititems(print)
+mask <HDF5 dataset "mask": shape (4,), type "|b1">
+values <HDF5 dataset "values": shape (4,), type "<i8">
+
+>>> dict(h5_file["nullable_integer"].attrs)
+{'encoding-type': 'nullable-integer', 'encoding-version': '0.1.0'}
+```
+
+### Nullable integer specifications (v0.1.0)
+
+* Nullable integers MUST be stored as a group
+* The group's attributes MUST have contain the encoding metadata `"encoding-type": "nullable-integer"`, `"encoding-version": "0.1.0"`
+* The group MUST contain an integer valued array under the key `"values"`
+* The group MUST contain an boolean valued array under the key `"mask"`
+
+### Nullable boolean specifications (v0.1.0)
+
+* Nullable booleans MUST be stored as a group
+* The group's attributes MUST have contain the encoding metadata `"encoding-type": "nullable-boolean"`, `"encoding-version": "0.1.0"`
+* The group MUST contain an boolean valued array under the key `"values"`
+* The group MUST contain an boolean valued array under the key `"mask"`
+* The `"values"` and `"mask"` arrays MUST be the same shape
+
+## AwkwardArrays
+
+```{warning}
+**Experimental**
+
+Support for ragged arrays via awkward array is considered experimental under the 0.9.0 release series.
+Please direct feedback on it's implementation to [https://github.com/scverse/anndata](https://github.com/scverse/anndata).
+```
+
+Ragged arrays are supported in `anndata` through the [Awkward
+Array](https://awkward-array.org/) library. For storage on disk, we
+break down the awkward array into it’s constituent arrays using
+[`ak.to_buffers`](https://awkward-array.readthedocs.io/en/latest/_auto/ak.to_buffers.html)
+then writing these arrays using `anndata`’s methods.
+
+The container of arrays is stored in a group called `"container"`
+
+
+```python
+>>> import zarr
+>>> z = zarr.open("airr.zarr", "r")
+>>> awkward_group = z["obsm/airr"]
+>>> awkward_group.tree()
+```
+
+```
+airr
+    └── container
+        ├── node0-offsets (17,) int64
+        ├── node2-offsets (40,) int64
+        ├── node3-data (117,) uint8
+        ├── node4-offsets (40,) int64
+        └── node5-data (117,) uint8
+```
+
+The length of the array is saved to it’s own `"length"` attribute,
+while metadata for the array structure is serialized and saved to the
+`“form”` attribute.
+
+```python
+>>> dict(awkward_group.attrs)
+```
+
+
+```python
+{
+    'encoding-type': 'awkward-array',
+    'encoding-version': '0.1.0',
+    'form': '{"class": "ListOffsetArray", "offsets": "i64", "content": {"class": '
+            '"RecordArray", "contents": {"locus": {"class": "ListOffsetArray", '
+            '"offsets": "i64", "content": {"class": "NumpyArray", "primitive": '
+            '"uint8", "inner_shape": [], "has_identifier": false, "parameters": '
+            '{"__array__": "char"}, "form_key": "node3"}, "has_identifier": '
+            'false, "parameters": {"__array__": "string"}, "form_key": "node2"}, '
+            '"junction_aa": {"class": "ListOffsetArray", "offsets": "i64", '
+            '"content": {"class": "NumpyArray", "primitive": "uint8", '
+            '"inner_shape": [], "has_identifier": false, "parameters": '
+            '{"__array__": "char"}, "form_key": "node5"}, "has_identifier": '
+            'false, "parameters": {"__array__": "string"}, "form_key": "node4"}}, '
+            '"has_identifier": false, "parameters": {}, "form_key": "node1"}, '
+            '"has_identifier": false, "parameters": {}, "form_key": "node0"}'
+    'length': 16
+}
+```
+
+These can be read back as awkward arrays using the
+[`ak.from_buffers`](https://awkward-array.readthedocs.io/en/latest/_auto/ak.from_buffers.html)
+function:
+
+```python
+>>> import awkward as ak
+>>> from anndata.experimental import read_elem
+>>> ak.from_buffers(
+...     awkward_group.attrs["form"],
+...     awkward_group.attrs["length"],
+...     {k: read_elem(v) for k, v in awkward_group.items()}
+... )
+```
+
+```
+<Array [[], [...], ..., [{locus: 'TRD', ...}]] type='16 * var * {locus: str...'>
+```
+
+
+[easy to find]: https://en.wikipedia.org/wiki/Sparse_matrix#Compressed_sparse_row_(CSR,_CRS_or_Yale_format)
+[hdf5]: https://en.wikipedia.org/wiki/Hierarchical_Data_Format
\ No newline at end of file
diff --git a/docs/fileformat-prose.rst b/docs/fileformat-prose.rst
deleted file mode 100644
index da495d9..0000000
--- a/docs/fileformat-prose.rst
+++ /dev/null
@@ -1,204 +0,0 @@
-On-disk format
---------------
-
-.. note::
-   These docs are written for anndata 0.8.
-   Files written before this version may differ in some conventions,
-   but will still be read by newer versions of the library.
-
-AnnData objects are saved on disk to hierarchichal array stores like HDF5_
-(via :doc:`H5py <h5py:index>`) and :doc:`zarr:index`.
-This allows us to have very similar structures in disk and on memory.
-
-As an example we’ll look into a typical `.h5ad` object that’s been through an analysis.
-This structure should be largely equivalent to Zarr structure, though there are a few minor differences.
-
-.. _HDF5: https://en.wikipedia.org/wiki/Hierarchical_Data_Format
-.. I’ve started using h5py since I couldn’t figure out a nice way to print attributes from bash.
-
->>> import h5py
->>> f = h5py.File("02_processed.h5ad", "r")
->>> list(f.keys())
-['X', 'layers', 'obs', 'obsm', 'uns', 'var', 'varm']
-
-.. .. code:: bash
-
-..    $ h5ls 02_processed.h5ad
-..    X                        Group
-..    layers                   Group
-..    obs                      Group
-..    obsm                     Group
-..    uns                      Group
-..    var                      Group
-..    varm                     Group
-
-In general, `AnnData` objects are comprised of a various types of elements.
-Each element is encoded as either an Array (or Dataset in hdf5 terminology) or a collection of elements (e.g. Group) in the store.
-We record the type of an element using the `encoding-type` and `encoding-version` keys in it's attributes.
-For example, we can this file represents an `AnnData` object from this metadata:
-
->>> dict(f.attrs)
-{'encoding-type': 'anndata', 'encoding-version': '0.1.0'}
-
-Using this information, we're able to dispatch onto readers for the different element types that you'd find in an anndata.
-
-Dense arrays
-~~~~~~~~~~~~
-
-Dense numeric arrays have the most simple representation on disk,
-as they have native equivalents in H5py :doc:`h5py:high/dataset` and Zarr :ref:`Arrays <zarr:tutorial_create>`.
-We can see an example of this with dimensionality reductions stored in the `obsm` group:
-
->>> f["obsm"].visititems(print)
-X_pca <HDF5 dataset "X_pca": shape (38410, 50), type "<f4">
-X_umap <HDF5 dataset "X_umap": shape (38410, 2), type "<f4">
-
->>> dict(f["obsm"]["X_pca"].attrs)
-{'encoding-type': 'array', 'encoding-version': '0.2.0'}
-
-.. .. code:: bash
-
-..    $ h5ls 02_processed.h5ad/obsm
-..    X_pca                    Dataset {38410, 50}
-..    X_umap                   Dataset {38410, 2}
-
-Sparse arrays
-~~~~~~~~~~~~~
-
-Sparse arrays don’t have a native representations in HDF5 or Zarr,
-so we've defined our own based on their in-memory structure.
-Currently two sparse data formats are supported by `AnnData` objects, CSC and CSR
-(corresponding to :class:`scipy.sparse.csc_matrix` and :class:`scipy.sparse.csr_matrix` respectivley).
-These formats represent a two-dimensional sparse array with
-three one-dimensional arrays, `indptr`, `indices`, and `data`.
-
-.. note::
-   A full description of these formats is out of scope for this document,
-   but are `easy to find`_.
-
-.. _easy to find: https://en.wikipedia.org/wiki/Sparse_matrix#Compressed_sparse_row_(CSR,_CRS_or_Yale_format)
-
-We represent a sparse array as a `Group` on-disk,
-where the kind and shape of the sparse array is defined in the `Group`'s attributes:
-
->>> dict(f["X"].attrs)
-{'encoding-type': 'csr_matrix',
- 'encoding-version': '0.1.0',
- 'shape': array([38410, 27899])}
-
-Inside the group are the three constituent arrays:
-
->>> f["X"].visititems(print)
-data <HDF5 dataset "data": shape (41459314,), type "<f4">
-indices <HDF5 dataset "indices": shape (41459314,), type "<i4">
-indptr <HDF5 dataset "indptr": shape (38411,), type "<i4">
-
-.. .. code:: bash
-
-..    $ h5ls 02_processed.h5ad/X
-..    data                     Dataset {41459314/Inf}
-..    indices                  Dataset {41459314/Inf}
-..    indptr                   Dataset {38411/Inf}
-
-DataFrames
-~~~~~~~~~~
-
-DataFrames are saved as a columnar format in a group, so each column of a DataFrame is saved as a seperate array.
-We save a little more information in the attributes here.
-
->>> dict(f["obs"].attrs)
-{'_index': 'Cell',
- 'column-order': array(['sample', 'cell_type', 'n_genes_by_counts',
-        'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts',
-        'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes',
-        'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes',
-        'total_counts_mito', 'log1p_total_counts_mito', 'pct_counts_mito',
-        'label_by_score'], dtype=object),
- 'encoding-type': 'dataframe',
- 'encoding-version': '0.2.0'}
-
-These attributes identify the index of the dataframe, as well as the original order of the columns.
-Each column in this dataframe is encoded as it's own array.
-
->>> dict(f["obs"]["total_counts"].attrs)
-{'encoding-type': 'array', 'encoding-version': '0.2.0'}
-
->>> dict(f["obs"]["cell_type"].attrs)
-{'encoding-type': 'categorical', 'encoding-version': '0.2.0', 'ordered': False}
-
-Mappings
-~~~~~~~~
-
-Mappings are simply stored as `Group` s on disk.
-These are distinct from DataFrames and sparse arrays since they don’t have any special attributes.
-A `Group` is created for any `Mapping` in the AnnData object,
-including the standard `obsm`, `varm`, `layers`, and `uns`.
-Notably, this definition is used recursively within `uns`:
-
->>> f["uns"].visititems(print)
-[...]
-pca <HDF5 group "/uns/pca" (2 members)>
-pca/variance <HDF5 dataset "variance": shape (50,), type "<f4">
-pca/variance_ratio <HDF5 dataset "variance_ratio": shape (50,), type "<f4">
-[...]
-
-Scalars
-~~~~~~~
-
-Zero dimensional arrays are used for scalar values (i.e. single values like strings, numbers or booleans).
-These should only occur inside of `uns`, and are common inside of saved parameters:
-
->>> f["uns/neighbors/params"].visititems(print)
-method <HDF5 dataset "method": shape (), type "|O">
-metric <HDF5 dataset "metric": shape (), type "|O">
-n_neighbors <HDF5 dataset "n_neighbors": shape (), type "<i8">
->>> f["uns/neighbors/params/metric"][()]
-'euclidean'
->>> dict(f["uns/neighbors/params/metric"].attrs)
-{'encoding-type': 'string', 'encoding-version': '0.2.0'}
-
-Categorical arrays
-~~~~~~~~~~~~~~~~~~
-
->>> categorical = f["obs"]["cell_type"]
->>> dict(categorical.attrs)
-{'encoding-type': 'categorical', 'encoding-version': '0.2.0', 'ordered': False}
-
-Discrete labels can be efficiently represented with categorical arrays (similar to `factors` in `R`).
-These arrays encode the labels as small width integers (`codes`), which map to the original label set (`categories`).
-We store these two arrays seperatley
-
->>> categorical.visititems(print)
-categories <HDF5 dataset "categories": shape (22,), type "|O">
-codes <HDF5 dataset "codes": shape (38410,), type "|i1">
-
-String arrays
-~~~~~~~~~~~~~
-
-Arrays of strings are handled differently than numeric arrays since numpy doesn't really have a good way of representing arrays of unicode strings.
-`anndata` assumes strings are text like data, so are variable length.
-
->>> dict(categorical["categories"].attrs)
-{'encoding-type': 'string-array', 'encoding-version': '0.2.0'}
-
-Nullable integers and booleans
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-We support IO with Pandas nullable integer and boolean arrays.
-We represent these on disk similar to `numpy` masked arrays, `julia` nullable arrays, or `arrow` validity bitmaps (see :issue:`504` for more discussion).
-That is, we store a indicator array (or mask) of null values alongside the array of all values.
-
->>> h5_file = h5py.File("anndata_format.h5", "a")
->>> int_array = pd.array([1, None, 3, 4])
->>> int_array
-<IntegerArray>
-[1, <NA>, 3, 4]
-Length: 4, dtype: Int64
->>> write_elem(h5_file, "nullable_integer", int_array)
-
->>> h5_file["nullable_integer"].visititems(print)
-mask <HDF5 dataset "mask": shape (4,), type "|b1">
-values <HDF5 dataset "values": shape (4,), type "<i8">
-
->>> dict(h5_file["nullable_integer"].attrs)
-{'encoding-type': 'nullable-integer', 'encoding-version': '0.1.0'}
diff --git a/docs/index.md b/docs/index.md
new file mode 100644
index 0000000..f9fc8b2
--- /dev/null
+++ b/docs/index.md
@@ -0,0 +1,35 @@
+```{eval-rst}
+.. role:: small
+```
+
+```{eval-rst}
+.. role:: smaller
+```
+
+```{include} ../README.md
+```
+
+# News
+
+```{include} news.md
+```
+
+# Latest additions
+
+```{include} release-notes/release-latest.md
+```
+
+```{toctree}
+:hidden: true
+:maxdepth: 1
+
+tutorials/index
+api
+concatenation
+fileformat-prose
+interoperability
+benchmarks
+contributing
+release-notes/index
+references
+```
diff --git a/docs/index.rst b/docs/index.rst
deleted file mode 100644
index 46ead61..0000000
--- a/docs/index.rst
+++ /dev/null
@@ -1,41 +0,0 @@
-.. include:: ../README.rst
-   :end-line: 22
-
-.. role:: small
-.. role:: smaller
-
-.. image:: _static/img/anndata_schema.svg
-   :align: right
-   :width: 40%
-
-anndata is a Python package for handling annotated data matrices in memory and on disk, positioned between pandas and xarray. anndata offers a broad range of computationally efficient features including, among others, sparse data support, lazy operations, and a PyTorch interface.
-
-* Discuss development on `GitHub <https://github.com/theislab/anndata>`_.
-* Ask questions on the `scverse Discourse <https://discourse.scverse.org>`_.
-* Install via `pip install anndata` or `conda install anndata -c conda-forge`.
-* Consider citing the `anndata paper <https://doi.org/10.1101/2021.12.16.473007>`__.
-* See `Scanpy's documentation <https://scanpy.readthedocs.io/>`__ for usage
-  related to single cell data. anndata was initially built for Scanpy.
-
-News
-----
-
-.. include:: news.rst
-
-Latest additions
-----------------
-
-.. include:: release-notes/release-latest.rst
-
-.. toctree::
-   :maxdepth: 1
-   :hidden:
-
-   tutorials
-   api
-   concatenation
-   fileformat-prose
-   benchmarks
-   contributing
-   release-notes/index
-   references
diff --git a/docs/interoperability.md b/docs/interoperability.md
new file mode 100644
index 0000000..bfa315c
--- /dev/null
+++ b/docs/interoperability.md
@@ -0,0 +1,25 @@
+# Interoperability
+
+The on-disk representation of anndata files can be read from other
+languages. Here we list interfaces for working with AnnData from your
+language of choice:
+
+## R
+
+- [zellkonverter](https://bioconductor.org/packages/release/bioc/html/zellkonverter.html) zellkonverter provides basilisk based tooling for loading from `h5ad` files to `SingleCellExperiment`
+- [anndata](https://anndata.dynverse.org) provides an R implementation of `AnnData` as well as IO for the HDF5 format.
+- [MuData](https://bioconductor.org/packages/release/bioc/html/MuData.html) provides IO for `AnnData` and `MuData` stored in HDF5 to Bioconductor's `SingleCellExperiment` and `MultiAssayExperiment` objects.
+- [MuDataSeurat](https://pmbio.github.io/MuDataSeurat/) provides IO from `AnnData` and `MuData` stored in HDF5 to `Seurat` objects.
+
+## Julia
+
+- [Muon.jl](https://docs.juliahub.com/Muon/QfqCh/0.1.1/objects/) provides Julia implementations of `AnnData` and `MuData` objects, as well as IO for the HDF5 format
+- [scVI.jl](https://maren-ha.github.io/scVI.jl/index.html) provides a Julia implementation of `AnnData` as well as IO for the HDF5 format.
+
+## Javascript
+
+- [Vitessce](https://github.com/vitessce/vitessce) contains loaders from `AnnData`s stored as Zarr, and uses this to provide interactive visualization
+
+## Rust
+
+- [anndata-rs](https://github.com/kaizhang/anndata-rs) provides a Rust implementation of `AnnData` as well as advanced IO support for the HDF5 storage format.
diff --git a/docs/news.md b/docs/news.md
new file mode 100644
index 0000000..fae034f
--- /dev/null
+++ b/docs/news.md
@@ -0,0 +1,14 @@
+```{eval-rst}
+.. role:: small
+```
+
+# Muon paper published {small}`2022-02-02`
+
+Muon has been published in Genome Biology [^cite_bredikhin22].
+Muon is a framework for multimodal data built on top of `AnnData`.
+
+Check out [Muon](https://muon.readthedocs.io/en/latest/) and its datastructure [MuData](https://mudata.readthedocs.io/en/latest/).
+
+# COVID-19 datasets distributed as `h5ad` {small}`2020-04-01`
+
+In a joint initiative, the Wellcome Sanger Institute, the Human Cell Atlas, and the CZI distribute datasets related to COVID-19 via anndata's `h5ad` files: [covid19cellatlas.org](https://www.covid19cellatlas.org/).
diff --git a/docs/news.rst b/docs/news.rst
deleted file mode 100644
index 8e2ef53..0000000
--- a/docs/news.rst
+++ /dev/null
@@ -1,14 +0,0 @@
-.. role:: small
-
-Muon paper published :small:`2022-02-02`
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Muon has been published in Genome Biology [Bredikhin22]_.
-Muon is a framework for multimodal data built on top of `AnnData`.
-
-Check out `Muon <https://muon.readthedocs.io/en/latest/>`_ and its datastructure `MuData <https://mudata.readthedocs.io/en/latest/>`_.
-
-COVID-19 datasets distributed as `h5ad` :small:`2020-04-01`
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-In a joint initiative, the Wellcome Sanger Institute, the Human Cell Atlas, and the CZI distribute datasets related to COVID-19 via anndata's `h5ad` files: `covid19cellatlas.org <https://www.covid19cellatlas.org/>`__.
diff --git a/docs/references.rst b/docs/references.rst
index 82d28b5..8642780 100644
--- a/docs/references.rst
+++ b/docs/references.rst
@@ -18,8 +18,8 @@ References
 
 .. [Murphy12]
    Murphy (2012,
-   *Machine Learning: A Probabilisitc Perspective*,
-   MIT Press https://mitpress.mit.edu/books/machine-learning-0.
+   *Machine Learning: A Probabilistic Perspective*,
+   MIT Press https://mitpress.mit.edu/9780262018029/machine-learning/.
 
 .. [Wolf18] Wolf *et al.* (2018),
    *Scanpy: large-scale single-cell gene expression data analysis*,
diff --git a/docs/release-notes/0.4.0.md b/docs/release-notes/0.4.0.md
new file mode 100644
index 0000000..7aefa55
--- /dev/null
+++ b/docs/release-notes/0.4.0.md
@@ -0,0 +1,8 @@
+### 0.4.0 {small}`23 December, 2017`
+
+- read/write [.loom](https://loompy.org) files
+- scalability beyond dataset sizes that fit into memory: see this [blog post]
+- {class}`~anndata.AnnData` has a {class}`~anndata.AnnData.raw` attribute, which simplifies storing the data matrix when you consider it *raw*: see the [clustering tutorial]
+
+[blog post]: http://falexwolf.de/blog/171223_AnnData_indexing_views_HDF5-backing/
+[clustering tutorial]: https://github.com/scverse/scanpy_usage/tree/master/170505_seurat
diff --git a/docs/release-notes/0.4.0.rst b/docs/release-notes/0.4.0.rst
deleted file mode 100644
index 6bd2667..0000000
--- a/docs/release-notes/0.4.0.rst
+++ /dev/null
@@ -1,9 +0,0 @@
-0.4.0 :small:`23 December, 2017`
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-- read/write `.loom <https://loompy.org>`_ files
-- scalability beyond dataset sizes that fit into memory: see this `blog post`_
-- :class:`~anndata.AnnData` has a :class:`~anndata.AnnData.raw` attribute, which simplifies storing the data matrix when you consider it *raw*: see the `clustering tutorial`_
-
-.. _blog post: http://falexwolf.de/blog/171223_AnnData_indexing_views_HDF5-backing/
-.. _clustering tutorial: https://github.com/theislab/scanpy_usage/tree/master/170505_seurat
diff --git a/docs/release-notes/0.5.0.md b/docs/release-notes/0.5.0.md
new file mode 100644
index 0000000..476f757
--- /dev/null
+++ b/docs/release-notes/0.5.0.md
@@ -0,0 +1,9 @@
+### 0.5.0 {small}`9 February, 2018`
+
+- inform about duplicates in {class}`~anndata.AnnData.var_names` and resolve them using {func}`~anndata.AnnData.var_names_make_unique`
+- automatically remove unused categories after slicing
+- read/write [.loom](https://loompy.org) files using loompy 2
+- fixed read/write for a few text file formats
+- read [UMI tools] files: {func}`~anndata.read_umi_tools`
+
+[umi tools]: https://github.com/CGATOxford/UMI-tools
diff --git a/docs/release-notes/0.5.0.rst b/docs/release-notes/0.5.0.rst
deleted file mode 100644
index c2675e7..0000000
--- a/docs/release-notes/0.5.0.rst
+++ /dev/null
@@ -1,10 +0,0 @@
-0.5.0 :small:`9 February, 2018`
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-- inform about duplicates in :class:`~anndata.AnnData.var_names` and resolve them using :func:`~anndata.AnnData.var_names_make_unique`
-- automatically remove unused categories after slicing
-- read/write `.loom <https://loompy.org>`_ files using loompy 2
-- fixed read/write for a few text file formats
-- read `UMI tools`_ files: :func:`~anndata.read_umi_tools`
-
-.. _UMI tools: https://github.com/CGATOxford/UMI-tools
diff --git a/docs/release-notes/0.6.0.md b/docs/release-notes/0.6.0.md
new file mode 100644
index 0000000..853a969
--- /dev/null
+++ b/docs/release-notes/0.6.0.md
@@ -0,0 +1,33 @@
+### 0.6.\* {small}`2019-*-*`
+
+- better support for aligned mappings (obsm, varm, layers)
+  `0.6.22` {pr}`155` {smaller}`I Virshup`
+- convenience accesors {func}`~anndata.AnnData.obs_vector`, {func}`~anndata.AnnData.var_vector` for 1d arrays.
+  `0.6.21` {pr}`144` {smaller}`I Virshup`
+- compatibility with Scipy >=1.3 by removing `IndexMixin` dependency.
+  `0.6.20` {pr}`151` {smaller}`P Angerer`
+- bug fix for second-indexing into views.
+  `0.6.19` {smaller}`P Angerer`
+- bug fix for reading excel files.
+  `0.6.19` {smaller}`A Wolf`
+- changed default compression to `None` in {func}`~anndata.AnnData.write_h5ad` to speed up read and write, disk space use is usually less critical.
+  `0.6.16` {smaller}`A Wolf`
+- maintain dtype upon copy.
+  `0.6.13` {smaller}`A Wolf`
+- {attr}`~anndata.AnnData.layers` inspired by [.loom](https://loompy.org) files allows their information lossless reading via {func}`~anndata.read_loom`.
+  `0.6.7`–`0.6.9` {pr}`46` & {pr}`48` {smaller}`S Rybakov`
+- support for reading zarr files: {func}`~anndata.read_zarr`
+  `0.6.7` {pr}`38` {smaller}`T White`
+- initialization from pandas DataFrames
+  `0.6.` {smaller}`A Wolf`
+- iteration over chunks {func}`~anndata.AnnData.chunked_X` and {func}`~anndata.AnnData.chunk_X`
+  `0.6.1` {pr}`20` {smaller}`S Rybakov`
+
+### 0.6.0 {small}`1 May, 2018`
+
+- compatibility with Seurat converter
+- tremendous speedup for {func}`~anndata.AnnData.concatenate`
+- bug fix for deep copy of unstructured annotation after slicing
+- bug fix for reading HDF5 stored single-category annotations
+- `'outer join'` concatenation: adds zeros for concatenation of sparse data and nans for dense data
+- better memory efficiency in loom exports
diff --git a/docs/release-notes/0.6.0.rst b/docs/release-notes/0.6.0.rst
deleted file mode 100644
index 7c3a228..0000000
--- a/docs/release-notes/0.6.0.rst
+++ /dev/null
@@ -1,35 +0,0 @@
-0.6.* :small:`2019-*-*`
-~~~~~~~~~~~~~~~~~~~~~~~~~
-
-- better support for aligned mappings (obsm, varm, layers)
-  :noteversion:`0.6.22` :pr:`155` :smaller:`I Virshup`
-- convenience accesors :func:`~anndata.AnnData.obs_vector`, :func:`~anndata.AnnData.var_vector` for 1d arrays.
-  :noteversion:`0.6.21` :pr:`144` :smaller:`I Virshup`
-- compatibility with Scipy >=1.3 by removing `IndexMixin` dependency.
-  :noteversion:`0.6.20` :pr:`151` :smaller:`P Angerer`
-- bug fix for second-indexing into views.
-  :noteversion:`0.6.19` :smaller:`P Angerer`
-- bug fix for reading excel files.
-  :noteversion:`0.6.19` :smaller:`A Wolf`
-- changed default compression to `None` in :func:`~anndata.AnnData.write_h5ad` to speed up read and write, disk space use is usually less critical.
-  :noteversion:`0.6.16` :smaller:`A Wolf`
-- maintain dtype upon copy.
-  :noteversion:`0.6.13` :smaller:`A Wolf`
-- :attr:`~anndata.AnnData.layers` inspired by `.loom <https://loompy.org>`_ files allows their information lossless reading via :func:`~anndata.read_loom`.
-  :noteversion:`0.6.7`–:noteversion:`0.6.9` :pr:`46` & :pr:`48` :smaller:`S Rybakov`
-- support for reading zarr files: :func:`~anndata.read_zarr`
-  :noteversion:`0.6.7` :pr:`38` :smaller:`T White`
-- initialization from pandas DataFrames
-  :noteversion:`0.6.` :smaller:`A Wolf`
-- iteration over chunks :func:`~anndata.AnnData.chunked_X` and :func:`~anndata.AnnData.chunk_X`
-  :noteversion:`0.6.1` :pr:`20` :smaller:`S Rybakov`
-
-0.6.0 :small:`1 May, 2018`
-~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-- compatibility with Seurat converter
-- tremendous speedup for :func:`~anndata.AnnData.concatenate`
-- bug fix for deep copy of unstructured annotation after slicing
-- bug fix for reading HDF5 stored single-category annotations
-- `'outer join'` concatenation: adds zeros for concatenation of sparse data and nans for dense data
-- better memory efficiency in loom exports
diff --git a/docs/release-notes/0.7.0.md b/docs/release-notes/0.7.0.md
new file mode 100644
index 0000000..c81970d
--- /dev/null
+++ b/docs/release-notes/0.7.0.md
@@ -0,0 +1,44 @@
+### 0.7.0 {small}`22 January, 2020`
+
+```{warning}
+Breaking changes introduced between `0.6.22.post1` and `0.7`:
+
+- Elements of {class}`~anndata.AnnData`s don’t have their dimensionality reduced when the main object is subset.
+  This is to maintain consistency when subsetting. See discussion in {issue}`145`.
+- Internal modules like `anndata.core` are private and their contents are not stable: See {issue}`174`.
+- The old deprecated attributes `.smp*`. `.add` and `.data` have been removed.
+```
+
+~~~{rubric} View overhaul {pr}`164`
+~~~
+
+- Indexing into a view no longer keeps a reference to intermediate view, see {issue}`62`.
+- Views are now lazy. Elements of view of AnnData are not indexed until they’re accessed.
+- Indexing with scalars no longer reduces dimensionality of contained arrays, see {issue}`145`.
+- All elements of AnnData should now follow the same rules about how they’re subset, see {issue}`145`.
+- Can now index by observations and variables at the same time.
+
+~~~{rubric} IO overhaul {pr}`167`
+~~~
+
+- Reading and writing has been overhauled for simplification and speed.
+- Time and memory usage can be half of previous in typical use cases
+- Zarr backend now supports sparse arrays, and generally is closer to having the same features as HDF5.
+- Backed mode should see significant speed and memory improvements for access along compressed dimensions and IO. PR {pr}`241`.
+- {class}`~pandas.Categorical`s can now be ordered (PR {pr}`230`) and written to disk with a large number of categories (PR {pr}`217`).
+
+~~~{rubric} Mapping attributes overhaul {smaller}`(obsm, varm, layers, ...)`
+~~~
+
+- New attributes {attr}`~anndata.AnnData.obsp` and {attr}`~anndata.AnnData.varp` have been added for two dimensional arrays where each axis corresponds to a single axis of the AnnData object. PR {pr}`207`.
+- These are intended to store values like cell-by-cell graphs, which are currently stored in {attr}`~anndata.AnnData.uns`.
+- Sparse arrays are now allowed as values in all mapping attributes.
+- DataFrames are now allowed as values in {attr}`~anndata.AnnData.obsm` and {attr}`~anndata.AnnData.varm`.
+- All mapping attributes now share an implementation and will have the same behaviour. PR {pr}`164`.
+
+```{rubric} Miscellaneous improvements
+```
+
+- Mapping attributes now have ipython tab completion (e.g. `adata.obsm["\\t` can provide suggestions) PR {pr}`183`.
+- {class}`~anndata.AnnData` attributes are now delete-able (e.g. `del adata.raw`) PR {pr}`242`.
+- Many many bug fixes
diff --git a/docs/release-notes/0.7.0.rst b/docs/release-notes/0.7.0.rst
deleted file mode 100644
index 64be748..0000000
--- a/docs/release-notes/0.7.0.rst
+++ /dev/null
@@ -1,40 +0,0 @@
-0.7.0 :small:`22 January, 2020`
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-.. warning::
-   Breaking changes introduced between `0.6.22.post1` and `0.7`:
-
-   - Elements of :class:`~anndata.AnnData`\ s don’t have their dimensionality reduced when the main object is subset.
-     This is to maintain consistency when subsetting. See discussion in :issue:`145`.
-   - Internal modules like `anndata.core` are private and their contents are not stable: See :issue:`174`.
-   - The old deprecated attributes `.smp*`. `.add` and `.data` have been removed.
-
-.. rubric:: View overhaul :pr:`164`
-
-- Indexing into a view no longer keeps a reference to intermediate view, see :issue:`62`.
-- Views are now lazy. Elements of view of AnnData are not indexed until they’re accessed.
-- Indexing with scalars no longer reduces dimensionality of contained arrays, see :issue:`145`.
-- All elements of AnnData should now follow the same rules about how they’re subset, see :issue:`145`.
-- Can now index by observations and variables at the same time.
-
-.. rubric:: IO overhaul :pr:`167`
-
-- Reading and writing has been overhauled for simplification and speed.
-- Time and memory usage can be half of previous in typical use cases
-- Zarr backend now supports sparse arrays, and generally is closer to having the same features as HDF5.
-- Backed mode should see significant speed and memory improvements for access along compressed dimensions and IO. PR :pr:`241`.
-- :class:`~pandas.Categorical`\ s can now be ordered (PR :pr:`230`) and written to disk with a large number of categories (PR :pr:`217`).
-
-.. rubric:: Mapping attributes overhaul :smaller:`(obsm, varm, layers, ...)`
-
-- New attributes :attr:`~anndata.AnnData.obsp` and :attr:`~anndata.AnnData.varp` have been added for two dimensional arrays where each axis corresponds to a single axis of the AnnData object. PR :pr:`207`.
-- These are intended to store values like cell-by-cell graphs, which are currently stored in :attr:`~anndata.AnnData.uns`.
-- Sparse arrays are now allowed as values in all mapping attributes.
-- DataFrames are now allowed as values in :attr:`~anndata.AnnData.obsm` and :attr:`~anndata.AnnData.varm`.
-- All mapping attributes now share an implementation and will have the same behaviour. PR :pr:`164`.
-
-.. rubric:: Miscellaneous improvements
-
-- Mapping attributes now have ipython tab completion (e.g. `adata.obsm["\\t` can provide suggestions) PR :pr:`183`.
-- :class:`~anndata.AnnData` attributes are now delete-able (e.g. `del adata.raw`) PR :pr:`242`.
-- Many many bug fixes
\ No newline at end of file
diff --git a/docs/release-notes/0.7.2.md b/docs/release-notes/0.7.2.md
new file mode 100644
index 0000000..44665b3
--- /dev/null
+++ b/docs/release-notes/0.7.2.md
@@ -0,0 +1,28 @@
+### 0.7.2 {small}`15 May, 2020`
+
+~~~{rubric} Concatenation overhaul {smaller}`I Virshup`
+~~~
+
+- Elements of `uns` can now be merged, see {pr}`350`
+- Outer joins now work for `layers` and `obsm`, see {pr}`352`
+- Fill value for outer joins can now be specified
+- Expect improvments in performance, see {issue}`303`
+
+```{rubric} Functionality
+```
+
+- {attr}`~anndata.AnnData.obsp` and {attr}`~anndata.AnnData.varp` can now be transposed {pr}`370` {smaller}`A Wolf`
+- {meth}`~anndata.AnnData.obs_names_make_unique` is now better at making values unique, and will warn if ambiguities arise {pr}`345` {smaller}`M Weiden`
+- {attr}`~anndata.AnnData.obsp` is now preferred for storing pairwise relationships between observations. In practice, this means there will be deprecation warnings and reformatting applied to objects which stored connectivities under `uns["neighbors"]`. Square matrices in {attr}`~anndata.AnnData.uns` will no longer be sliced (use `.{obs,var}p` instead). {pr}`337` {smaller}`I Virshup`
+- {class}`~anndata.ImplicitModificationWarning` is now exported {pr}`315` {smaller}`P Angerer`
+- Better support for {class}`~numpy.ndarray` subclasses stored in `AnnData` objects {pr}`335` {smaller}`michalk8`
+
+```{rubric} Bug fixes
+```
+
+- Fixed inplace modification of {class}`~pandas.Index` objects by the make unique function {pr}`348` {smaller}`I Virshup`
+- Passing ambiguous keys to {meth}`~anndata.AnnData.obs_vector` and {meth}`~anndata.AnnData.var_vector` now throws errors {pr}`340` {smaller}`I Virshup`
+- Fix instantiating {class}`~anndata.AnnData` objects from {class}`~pandas.DataFrame` {pr}`316` {smaller}`P Angerer`
+- Fixed indexing into `AnnData` objects with arrays like `adata[adata[:, gene].X > 0]` {pr}`332` {smaller}`I Virshup`
+- Fixed type of version {pr}`315` {smaller}`P Angerer`
+- Fixed deprecated import from {mod}`pandas` {pr}`319` {smaller}`P Angerer`
diff --git a/docs/release-notes/0.7.2.rst b/docs/release-notes/0.7.2.rst
deleted file mode 100644
index ef48ad2..0000000
--- a/docs/release-notes/0.7.2.rst
+++ /dev/null
@@ -1,26 +0,0 @@
-0.7.2 :small:`15 May, 2020`
-~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-.. rubric:: Concatenation overhaul :smaller:`I Virshup`
-
-- Elements of `uns` can now be merged, see :pr:`350`
-- Outer joins now work for `layers` and `obsm`, see :pr:`352`
-- Fill value for outer joins can now be specified
-- Expect improvments in performance, see :issue:`303`
-
-.. rubric:: Functionality
-
-- :attr:`~anndata.AnnData.obsp` and :attr:`~anndata.AnnData.varp` can now be transposed :pr:`370` :smaller:`A Wolf`
-- :meth:`~anndata.AnnData.obs_names_make_unique` is now better at making values unique, and will warn if ambiguities arise :pr:`345` :smaller:`M Weiden`
-- :attr:`~anndata.AnnData.obsp` is now preferred for storing pairwise relationships between observations. In practice, this means there will be deprecation warnings and reformatting applied to objects which stored connectivities under `uns["neighbors"]`. Square matrices in :attr:`~anndata.AnnData.uns` will no longer be sliced (use `.{obs,var}p` instead). :pr:`337` :smaller:`I Virshup`
-- :class:`~anndata.ImplicitModificationWarning` is now exported :pr:`315` :smaller:`P Angerer`
-- Better support for :class:`~numpy.ndarray` subclasses stored in `AnnData` objects :pr:`335` :smaller:`michalk8`
-
-.. rubric:: Bug fixes
-
-- Fixed inplace modification of :class:`~pandas.Index` objects by the make unique function :pr:`348` :smaller:`I Virshup`
-- Passing ambiguous keys to :meth:`~anndata.AnnData.obs_vector` and :meth:`~anndata.AnnData.var_vector` now throws errors :pr:`340` :smaller:`I Virshup`
-- Fix instantiating :class:`~anndata.AnnData` objects from :class:`~pandas.DataFrame` :pr:`316` :smaller:`P Angerer`
-- Fixed indexing into `AnnData` objects with arrays like `adata[adata[:, gene].X > 0]` :pr:`332` :smaller:`I Virshup`
-- Fixed type of version :pr:`315` :smaller:`P Angerer`
-- Fixed deprecated import from :mod:`pandas` :pr:`319` :smaller:`P Angerer`
diff --git a/docs/release-notes/0.7.3.md b/docs/release-notes/0.7.3.md
new file mode 100644
index 0000000..f03be2a
--- /dev/null
+++ b/docs/release-notes/0.7.3.md
@@ -0,0 +1,6 @@
+### 0.7.3 {small}`20 May, 2020`
+
+```{rubric} Bug fixes
+```
+
+- Fixed bug where graphs used too much memory when copying {pr}`381` {smaller}`I Virshup`
diff --git a/docs/release-notes/0.7.3.rst b/docs/release-notes/0.7.3.rst
deleted file mode 100644
index a3b163c..0000000
--- a/docs/release-notes/0.7.3.rst
+++ /dev/null
@@ -1,6 +0,0 @@
-0.7.3 :small:`20 May, 2020`
-~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-.. rubric:: Bug fixes
-
-- Fixed bug where graphs used too much memory when copying :pr:`381` :smaller:`I Virshup`
diff --git a/docs/release-notes/0.7.4.md b/docs/release-notes/0.7.4.md
new file mode 100644
index 0000000..16efc35
--- /dev/null
+++ b/docs/release-notes/0.7.4.md
@@ -0,0 +1,18 @@
+### 0.7.4 {small}`10 July, 2020`
+
+~~~{rubric} Concatenation overhaul {pr}`378` {smaller}`I Virshup`
+~~~
+
+- New function {func}`anndata.concat` for concatenating `AnnData` objects along either observations or variables
+- New documentation section: {doc}`/concatenation`
+
+```{rubric} Functionality
+```
+
+- AnnData object created from dataframes with sparse values will have sparse `.X` {pr}`395` {smaller}`I Virshup`
+
+```{rubric} Bug fixes
+```
+
+- Fixed error from `AnnData.concatenate` by bumping minimum versions of numpy and pandas {issue}`385`
+- Fixed colors being incorrectly changed when `AnnData` object was subset {pr}`388`
diff --git a/docs/release-notes/0.7.4.rst b/docs/release-notes/0.7.4.rst
deleted file mode 100644
index d20f756..0000000
--- a/docs/release-notes/0.7.4.rst
+++ /dev/null
@@ -1,16 +0,0 @@
-0.7.4 :small:`10 July, 2020`
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-.. rubric:: Concatenation overhaul :pr:`378` :smaller:`I Virshup`
-
-- New function :func:`anndata.concat` for concatenating `AnnData` objects along either observations or variables
-- New documentation section: :doc:`/concatenation`
-
-.. rubric:: Functionality
-
-- AnnData object created from dataframes with sparse values will have sparse `.X` :pr:`395` :smaller:`I Virshup`
-
-.. rubric:: Bug fixes
-
-- Fixed error from `AnnData.concatenate` by bumping minimum versions of numpy and pandas :issue:`385`
-- Fixed colors being incorrectly changed when `AnnData` object was subset :pr:`388`
diff --git a/docs/release-notes/0.7.5.md b/docs/release-notes/0.7.5.md
new file mode 100644
index 0000000..9000c78
--- /dev/null
+++ b/docs/release-notes/0.7.5.md
@@ -0,0 +1,13 @@
+### 0.7.5 {small}`12 November, 2020`
+
+```{rubric} Functionality
+```
+
+- Added ipython tab completion and a useful return from `.keys` to `adata.uns` {pr}`415` {smaller}`I Virshup`
+
+```{rubric} Bug fixes
+```
+
+- Compatibility with `h5py>=3` strings {pr}`444` {smaller}`I Virshup`
+- Allow `adata.raw = None`, as is documented {pr}`447` {smaller}`I Virshup`
+- Fix warnings from pandas 1.1 {pr}`425` {smaller}`I Virshup`
diff --git a/docs/release-notes/0.7.5.rst b/docs/release-notes/0.7.5.rst
deleted file mode 100644
index 051baca..0000000
--- a/docs/release-notes/0.7.5.rst
+++ /dev/null
@@ -1,12 +0,0 @@
-0.7.5 :small:`12 November, 2020`
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-.. rubric:: Functionality
-
-- Added ipython tab completion and a useful return from `.keys` to `adata.uns` :pr:`415` :smaller:`I Virshup`
-
-.. rubric:: Bug fixes
-
-- Compatibility with `h5py>=3` strings :pr:`444` :smaller:`I Virshup`
-- Allow `adata.raw = None`, as is documented :pr:`447` :smaller:`I Virshup`
-- Fix warnings from pandas 1.1 :pr:`425` :smaller:`I Virshup`
diff --git a/docs/release-notes/0.7.6.md b/docs/release-notes/0.7.6.md
new file mode 100644
index 0000000..8fc8725
--- /dev/null
+++ b/docs/release-notes/0.7.6.md
@@ -0,0 +1,24 @@
+### 0.7.6 {small}`11 April, 2021`
+
+```{rubric} New features
+```
+
+- Added {meth}`anndata.AnnData.to_memory` for returning an in memory object from a backed one {pr}`470` {pr}`542` {smaller}`V Bergen` {smaller}`I Virshup`
+- {meth}`anndata.AnnData.write_loom` now writes `obs_names` and `var_names` using the `Index`'s `.name` attribute, if set {pr}`538` {smaller}`I Virshup`
+
+```{rubric} Bug fixes
+```
+
+- Fixed bug where `np.str_` column names errored at write time {pr}`457` {smaller}`I Virshup`
+- Fixed "value.index does not match parent’s axis 0/1 names" error triggered when a data frame is stored in obsm/varm after obs_names/var_names is updated {pr}`461` {smaller}`G Eraslan`
+- Fixed `adata.write_csvs` when `adata` is a view {pr}`462` {smaller}`I Virshup`
+- Fixed null values being converted to strings when strings are converted to categorical {pr}`529` {smaller}`I Virshup`
+- Fixed handling of compression key word arguments {pr}`536` {smaller}`I Virshup`
+- Fixed copying a backed `AnnData` from changing which file the original object points at {pr}`533` {smaller}`ilia-kats`
+- Fixed a bug where calling `AnnData.concatenate` an `AnnData` with no variables would error {pr}`537` {smaller}`I Virshup`
+
+```{rubric} Deprecations
+```
+
+- Passing positional arguments to {func}`anndata.read_loom` besides the path is now deprecated {pr}`538` {smaller}`I Virshup`
+- {func}`anndata.read_loom` arguments `obsm_names` and `varm_names` are now deprecated in favour of `obsm_mapping` and `varm_mapping` {pr}`538` {smaller}`I Virshup`
diff --git a/docs/release-notes/0.7.6.rst b/docs/release-notes/0.7.6.rst
deleted file mode 100644
index e84ba13..0000000
--- a/docs/release-notes/0.7.6.rst
+++ /dev/null
@@ -1,22 +0,0 @@
-0.7.6 :small:`11 April, 2021`
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-.. rubric:: New features
-
-- Added :meth:`anndata.AnnData.to_memory` for returning an in memory object from a backed one :pr:`470` :pr:`542` :smaller:`V Bergen` :smaller:`I Virshup`
-- :meth:`anndata.AnnData.write_loom` now writes `obs_names` and `var_names` using the `Index`'s `.name` attribute, if set :pr:`538` :smaller:`I Virshup`
-
-.. rubric:: Bug fixes
-
-- Fixed bug where `np.str_` column names errored at write time :pr:`457` :smaller:`I Virshup`
-- Fixed "value.index does not match parent’s axis 0/1 names" error triggered when a data frame is stored in obsm/varm after obs_names/var_names is updated :pr:`461` :smaller:`G Eraslan`
-- Fixed `adata.write_csvs` when `adata` is a view :pr:`462` :smaller:`I Virshup`
-- Fixed null values being converted to strings when strings are converted to categorical :pr:`529` :smaller:`I Virshup`
-- Fixed handling of compression key word arguments :pr:`536` :smaller:`I Virshup`
-- Fixed copying a backed `AnnData` from changing which file the original object points at :pr:`533` :smaller:`ilia-kats`
-- Fixed a bug where calling `AnnData.concatenate` an `AnnData` with no variables would error :pr:`537` :smaller:`I Virshup`
-
-.. rubric:: Deprecations
-
-- Passing positional arguments to :func:`anndata.read_loom` besides the path is now deprecated :pr:`538` :smaller:`I Virshup`
-- :func:`anndata.read_loom` arguments `obsm_names` and `varm_names` are now deprecated in favour of `obsm_mapping` and `varm_mapping` :pr:`538` :smaller:`I Virshup`
diff --git a/docs/release-notes/0.7.7.md b/docs/release-notes/0.7.7.md
new file mode 100644
index 0000000..411f7f7
--- /dev/null
+++ b/docs/release-notes/0.7.7.md
@@ -0,0 +1,14 @@
+### 0.7.7 {small}`9 November, 2021`
+
+```{rubric} Bug fixes
+```
+
+- Fixed propagation of import error when importing `write_zarr` but not all dependencies are installed {pr}`579` {smaller}`R Hillje`
+- Fixed issue with `.uns` sub-dictionaries being referenced by copies {pr}`576` {smaller}`I Virshup`
+- Fixed out-of-bounds integer indices not raising {class}`IndexError` {pr}`630` {smaller}`M Klein`
+- Fixed backed `SparseDataset` indexing with scipy 1.7.2 {pr}`638` {smaller}`I Virshup`
+
+```{rubric} Development processes
+```
+
+- Use PEPs 621 (standardized project metadata), 631 (standardized dependencies), and 660 (standardized editable installs) {pr}`639` {smaller}`I Virshup`
diff --git a/docs/release-notes/0.7.7.rst b/docs/release-notes/0.7.7.rst
deleted file mode 100644
index 59af11a..0000000
--- a/docs/release-notes/0.7.7.rst
+++ /dev/null
@@ -1,13 +0,0 @@
-0.7.7 :small:`9 November, 2021`
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-.. rubric:: Bug fixes
-
-- Fixed propagation of import error when importing `write_zarr` but not all dependencies are installed :pr:`579` :smaller:`R Hillje`
-- Fixed issue with `.uns` sub-dictionaries being referenced by copies :pr:`576` :smaller:`I Virshup`
-- Fixed out-of-bounds integer indices not raising :class:`IndexError` :pr:`630` :smaller:`M Klein`
-- Fixed backed `SparseDataset` indexing with scipy 1.7.2 :pr:`638` :smaller:`I Virshup`
-
-.. rubric:: Development processes
-
-- Use PEPs 621 (standardized project metadata), 631 (standardized dependencies), and 660 (standardized editable installs) :pr:`639` :smaller:`I Virshup`
diff --git a/docs/release-notes/0.7.8.md b/docs/release-notes/0.7.8.md
new file mode 100644
index 0000000..0ca52a0
--- /dev/null
+++ b/docs/release-notes/0.7.8.md
@@ -0,0 +1,6 @@
+### 0.7.8 {small}`9 November, 2021`
+
+```{rubric} Bug fixes
+```
+
+- Re-include test helpers {pr}`641` {smaller}`I Virshup`
diff --git a/docs/release-notes/0.7.8.rst b/docs/release-notes/0.7.8.rst
deleted file mode 100644
index c669f0c..0000000
--- a/docs/release-notes/0.7.8.rst
+++ /dev/null
@@ -1,6 +0,0 @@
-0.7.8 :small:`9 November, 2021`
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-.. rubric:: Bug fixes
-
-- Re-include test helpers :pr:`641` :smaller:`I Virshup`
diff --git a/docs/release-notes/0.8.0.md b/docs/release-notes/0.8.0.md
new file mode 100644
index 0000000..6b9bec3
--- /dev/null
+++ b/docs/release-notes/0.8.0.md
@@ -0,0 +1,45 @@
+### 0.8.0 {small}`14th March, 2022`
+
+```{rubric} IO Specification
+```
+
+```{warning}
+The on disk format of AnnData objects has been updated with this release.
+Previous releases of `anndata` will not be able to read all files written by this version.
+
+For discussion of possible future solutions to this issue, see {issue}`698`
+```
+
+Internal handling of IO has been overhauled.
+This should make it much easier to support new datatypes, use partial access, and use `AnnData` internally in other formats.
+
+- Each element should be tagged with an `encoding_type` and `encoding_version`. See updated docs on the {doc}`file format </fileformat-prose>`
+- Support for nullable integer and boolean data arrays. More data types to come!
+- Experimental support for low level access to the IO API via {func}`~anndata.experimental.read_elem` and {func}`~anndata.experimental.write_elem`
+
+```{rubric} Features
+```
+
+- Added PyTorch dataloader {class}`~anndata.experimental.AnnLoader` and lazy concatenation object {class}`~anndata.experimental.AnnCollection`. See the [tutorials] {pr}`416` {smaller}`S Rybakov`
+- Compatibility with `h5ad` files written from Julia {pr}`569` {smaller}`I Kats`
+- Many logging messages that should have been warnings are now warnings {pr}`650` {smaller}`I Virshup`
+- Significantly more efficient {func}`anndata.read_umi_tools` {pr}`661` {smaller}`I Virshup`
+- Fixed deepcopy of a copy of a view retaining sparse matrix view mixin type {pr}`670` {smaller}`M Klein`
+- In many cases {attr}`~anndata.AnnData.X` can now be `None` {pr}`463` {smaller}`R Cannoodt` {pr}`677` {smaller}`I Virshup`. Remaining work is documented in {issue}`467`.
+- Removed hard `xlrd` dependency {smaller}`I Virshup`
+- `obs` and `var` dataframes are no longer copied by default on `AnnData` instantiation {issue}`371` {smaller}`I Virshup`
+
+```{rubric} Bug fixes
+```
+
+- Fixed issue where `.copy` was creating sparse matrices views when copying {pr}`670` {smaller}`michalk8`
+- Fixed issue where `.X` matrix read in from `zarr` would always have `float32` values {pr}`701` {smaller}`I Virshup`
+- `` Raw.to_adata` `` now includes `obsp` in the output {pr}`404` {smaller}`G Eraslan`
+
+```{rubric} Dependencies
+```
+
+- `xlrd` dropped as a hard dependency
+- Now requires `h5py` `v3.0.0` or newer
+
+[tutorials]: https://anndata-tutorials.readthedocs.io/en/latest/index.html
diff --git a/docs/release-notes/0.8.0.rst b/docs/release-notes/0.8.0.rst
deleted file mode 100644
index 9e32833..0000000
--- a/docs/release-notes/0.8.0.rst
+++ /dev/null
@@ -1,40 +0,0 @@
-0.8.0 :small:`14th March, 2022`
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-.. rubric:: IO Specification
-
-.. warning:: The on disk format of AnnData objects has been updated with this release.
-    Previous releases of `anndata` will not be able to read all files written by this version.
-
-    For discussion of possible future solutions to this issue, see :issue:`698`
-
-Internal handling of IO has been overhauled.
-This should make it much easier to support new datatypes, use partial access, and use `AnnData` internally in other formats.
-
-- Each element should be tagged with an `encoding_type` and `encoding_version`. See updated docs on the :doc:`file format </fileformat-prose>`
-- Support for nullable integer and boolean data arrays. More data types to come!
-- Experimental support for low level access to the IO API via :func:`~anndata.experimental.read_elem` and :func:`~anndata.experimental.write_elem`
-
-.. rubric:: Features
-
-- Added PyTorch dataloader :class:`~anndata.experimental.AnnLoader` and lazy concatenation object :class:`~anndata.experimental.AnnCollection`. See the `tutorials`_ :pr:`416` :smaller:`S Rybakov`
-- Compatibility with `h5ad` files written from Julia :pr:`569` :smaller:`I Kats`
-- Many logging messages that should have been warnings are now warnings :pr:`650` :smaller:`I Virshup`
-- Significantly more efficient :func:`anndata.read_umi_tools` :pr:`661` :smaller:`I Virshup`
-- Fixed deepcopy of a copy of a view retaining sparse matrix view mixin type :pr:`670` :smaller:`M Klein`
-- In many cases :attr:`~anndata.AnnData.X` can now be `None` :pr:`463` :smaller:`R Cannoodt` :pr:`677` :smaller:`I Virshup`. Remaining work is documented in :issue:`467`.
-- Removed hard `xlrd` dependency :smaller:`I Virshup`
-- `obs` and `var` dataframes are no longer copied by default on `AnnData` instantiation :issue:`371` :smaller:`I Virshup`
-
-.. _tutorials: https://anndata-tutorials.readthedocs.io/en/latest/index.html
-
-.. rubric:: Bug fixes
-
-- Fixed issue where `.copy` was creating sparse matrices views when copying :pr:`670` :smaller:`michalk8`
-- Fixed issue where `.X` matrix read in from `zarr` would always have `float32` values :pr:`701` :smaller:`I Virshup`
-- `Raw.to_adata`` now includes `obsp` in the output :pr:`404` :smaller:`G Eraslan`
-
-.. rubric:: Dependencies
-
-* `xlrd` dropped as a hard dependency
-* Now requires `h5py` `v3.0.0` or newer
\ No newline at end of file
diff --git a/docs/release-notes/0.8.1.md b/docs/release-notes/0.8.1.md
new file mode 100644
index 0000000..8de88d1
--- /dev/null
+++ b/docs/release-notes/0.8.1.md
@@ -0,0 +1,11 @@
+### 0.8.1 {small}`the future`
+
+```{rubric} Bug fixes
+```
+
+- Fix warning from `rename_categories` {pr}`790` {smaller}`I Virshup`
+- Remove backwards compat checks for categories in `uns` when we can tell the file is new enough {pr}`790` {smaller}`I Virshup`
+- Categorical arrays are now created with a python `bool` instead of a `numpy.bool_` {pr}`856`
+
+```{rubric} Documentation
+```
diff --git a/docs/release-notes/0.9.0.md b/docs/release-notes/0.9.0.md
new file mode 100644
index 0000000..0a7d2ff
--- /dev/null
+++ b/docs/release-notes/0.9.0.md
@@ -0,0 +1,54 @@
+### 0.9.0 {small}`2023-04-11`
+
+```{rubric} Features
+```
+
+- Added experimental support for dask arrays {pr}`813` {user}`syelman` {user}`rahulbshrestha`
+- `obsm`, `varm` and `uns` can now hold [AwkwardArrays](https://awkward-array.org/quickstart.html) {pr}`647` {user}`giovp`, {user}`grst`, {user}`ivirshup`
+- Added experimental functions {func}`anndata.experimental.read_dispatched` and {func}`anndata.experimental.write_dispatched` which allow customizing IO with a callback {pr}`873` {user}`ilan-gold` {user}`ivirshup`
+- Better error messages during IO {pr}`734` {user}`flying-sheep`, {user}`ivirshup`
+- Unordered categorical columns are no longer cast to object during {func}`anndata.concat` {pr}`763` {user}`ivirshup`
+
+```{rubric} Documentation
+```
+
+- New tutorials for experimental features
+
+  > - {doc}`/tutorials/notebooks/anndata_dask_array` – {pr}`886` {user}`syelman`
+  > - {doc}`/tutorials/notebooks/{read,write}_dispatched` – {pr}`scverse/anndata-tutorials#17` {user}`ilan-gold`
+  > - {doc}`/tutorials/notebooks/awkward-arrays` – {pr}`scverse/anndata-tutorials#15` {user}`grst`
+
+- {doc}`File format description </fileformat-prose>` now includes a more formal specification {pr}`882` {user}`ivirshup`
+
+- {doc}`/interoperability`: new page on interoperability with other packages {pr}`831` {user}`ivirshup`
+
+- Expanded docstring more documentation for `backed` argument of {func}`anndata.read_h5ad` {pr}`812` {user}`jeskowagner`
+
+- Documented how to use alternative compression methods for the `h5ad` file format, see {meth}`AnnData.write_h5ad() <anndata.AnnData.write_h5ad>` {pr}`857` {user}`nigeil`
+
+- General typo corrections 😅 {pr}`870` {user}`folded`
+
+```{rubric} Breaking changes
+```
+
+- The `AnnData` `dtype` argument no longer defaults to `float32` {pr}`854` {user}`ivirshup`
+- Previously deprecated `force_dense` arugment {meth}`AnnData.write_h5ad() <anndata.AnnData.write_h5ad>` has been removed. {pr}`855` {user}`ivirshup`
+- Previously deprecated behaviour around storing adjacency matrices in `uns` has been removed {pr}`866` {user}`ivirshup`
+
+```{rubric} Other updates
+```
+
+- Bump minimum python version to 3.8 {pr}`820` {user}`ivirshup`
+
+```{rubric} Deprecations
+```
+
+- {meth}`AnnData.concatenate() <anndata.AnnData.concatenate>` is now deprecated in favour of {func}`anndata.concat` {pr}`845` {user}`ivirshup`
+
+```{rubric} Bug fixes
+```
+
+- Fixed order dependent outer concatenation bug {pr}`904` {user}`ivirshup`, reported by {user}`szalata`
+- Fixed bug in renaming categories {pr}`790` {user}`ivirshup`, reported by {user}`perrin-isir`
+- Fixed IO bug when keys in `uns` ended in `_categories` {pr}`806` {user}`ivirshup`, reported by {user}`Hrovatin`
+- Fixed `raw.to_adata` not populating `obs` aligned values when `raw` was assigned through the setter {pr}`939` {user}`ivirshup`
diff --git a/docs/release-notes/0.9.1.md b/docs/release-notes/0.9.1.md
new file mode 100644
index 0000000..30c7ac7
--- /dev/null
+++ b/docs/release-notes/0.9.1.md
@@ -0,0 +1,6 @@
+### 0.9.1 {small}`2023-04-11`
+
+```{rubric} Bugfix
+```
+
+* Fixing windows support {pr}`958` {user}`Koncopd`
diff --git a/docs/release-notes/index.md b/docs/release-notes/index.md
new file mode 100644
index 0000000..7a1832b
--- /dev/null
+++ b/docs/release-notes/index.md
@@ -0,0 +1,45 @@
+# Release notes
+
+```{include} /release-notes/release-latest.md
+```
+
+## Version 0.7
+
+```{include} /release-notes/0.7.8.md
+```
+
+```{include} /release-notes/0.7.7.md
+```
+
+```{include} /release-notes/0.7.6.md
+```
+
+```{include} /release-notes/0.7.5.md
+```
+
+```{include} /release-notes/0.7.4.md
+```
+
+```{include} /release-notes/0.7.3.md
+```
+
+```{include} /release-notes/0.7.2.md
+```
+
+```{include} /release-notes/0.7.0.md
+```
+
+## Version 0.6
+
+```{include} /release-notes/0.6.0.md
+```
+
+## Version 0.5
+
+```{include} /release-notes/0.5.0.md
+```
+
+## Version 0.4
+
+```{include} /release-notes/0.4.0.md
+```
diff --git a/docs/release-notes/index.rst b/docs/release-notes/index.rst
deleted file mode 100644
index 6982322..0000000
--- a/docs/release-notes/index.rst
+++ /dev/null
@@ -1,31 +0,0 @@
-Release notes
-=============
-
-.. include:: release-latest.rst
-
-Version 0.7
------------
-
-.. include:: 0.7.8.rst
-.. include:: 0.7.7.rst
-.. include:: 0.7.6.rst
-.. include:: 0.7.5.rst
-.. include:: 0.7.4.rst
-.. include:: 0.7.3.rst
-.. include:: 0.7.2.rst
-.. include:: 0.7.0.rst
-
-Version 0.6
------------
-
-.. include:: 0.6.0.rst
-
-Version 0.5
------------
-
-.. include:: 0.5.0.rst
-
-Version 0.4
------------
-
-.. include:: 0.4.0.rst
diff --git a/docs/release-notes/release-latest.md b/docs/release-notes/release-latest.md
new file mode 100644
index 0000000..d60c3ca
--- /dev/null
+++ b/docs/release-notes/release-latest.md
@@ -0,0 +1,15 @@
+## Version 0.9
+
+```{include} /release-notes/0.9.1.md
+```
+
+```{include} /release-notes/0.9.0.md
+```
+
+## Version 0.8
+
+```{include} /release-notes/0.8.1.md
+```
+
+```{include} /release-notes/0.8.0.md
+```
diff --git a/docs/release-notes/release-latest.rst b/docs/release-notes/release-latest.rst
deleted file mode 100644
index 9ad916c..0000000
--- a/docs/release-notes/release-latest.rst
+++ /dev/null
@@ -1,4 +0,0 @@
-Version 0.8
------------
-
-.. include:: /release-notes/0.8.0.rst
\ No newline at end of file
diff --git a/docs/tutorials.rst b/docs/tutorials.rst
deleted file mode 100644
index a1b32e5..0000000
--- a/docs/tutorials.rst
+++ /dev/null
@@ -1,10 +0,0 @@
-Tutorials
-=========
-
-For a quick introduction to `AnnData`, check out :tutorial:`getting-started`.
-
-For working with the experimental data loaders (see :ref:`experimental_api`):
-
-* For an example of working with pytorch :tutorial:`annloader`
-* For lazily concatenating multiple files :tutorial:`anncollection`
-* For interfacing pytorch models with lazy collections :tutorial:`anncollection-annloader`
diff --git a/docs/tutorials/index.md b/docs/tutorials/index.md
new file mode 100644
index 0000000..f62e796
--- /dev/null
+++ b/docs/tutorials/index.md
@@ -0,0 +1,17 @@
+# Tutorials
+
+For a quick introduction to `AnnData`, check out {doc}`Getting Started with AnnData <notebooks/getting-started>`.
+
+For working with the experimental data loaders also see {ref}`experimental-api`.
+
+```{toctree}
+:maxdepth: 1
+
+notebooks/getting-started
+notebooks/annloader
+notebooks/anncollection
+notebooks/anncollection-annloader
+notebooks/anndata_dask_array
+notebooks/awkward-arrays
+notebooks/{read,write}_dispatched
+```
diff --git a/pyproject.toml b/pyproject.toml
index 1bf196c..8446526 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -3,13 +3,12 @@ build-backend = "flit_core.buildapi"
 requires = [
     "flit_core >=3.4,<4",
     "setuptools_scm",
-    "importlib_metadata>=0.7; python_version < '3.8'",
 ]
 
 [project]
 name = "anndata"
 description = "Annotated data."
-requires-python = ">=3.7"
+requires-python = ">=3.8"
 license = {file = "LICENSE"}
 authors = [
     {name = "Philipp Angerer"},
@@ -22,7 +21,7 @@ maintainers = [
     {name = "Philipp Angerer", email = "philipp.angerer@helmholtz-muenchen.de"},
     {name = "Alex Wolf", email = "f.alex.wolf@gmx.de"},
 ]
-readme = {file = "README.rst", content-type="text/x-rst"}
+readme = {file = "README.md", content-type="text/markdown"}
 classifiers = [
     "License :: OSI Approved :: BSD License",
     "Environment :: Console",
@@ -34,9 +33,9 @@ classifiers = [
     "Operating System :: Microsoft :: Windows",
     "Operating System :: POSIX :: Linux",
     "Programming Language :: Python :: 3",
-    "Programming Language :: Python :: 3.7",
     "Programming Language :: Python :: 3.8",
     "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
     "Topic :: Scientific/Engineering :: Bio-Informatics",
     "Topic :: Scientific/Engineering :: Visualization",
 ]
@@ -47,16 +46,13 @@ dependencies = [
     "h5py>=3",
     "natsort",
     "packaging>=20",
-    # for getting the stable version
-    "importlib_metadata>=0.7; python_version < '3.8'",
-    "typing_extensions; python_version < '3.8'",  # Remove once we depend on python > 3.7
 ]
 dynamic = ["version"]
 
 [project.urls]
 Documentation = "https://anndata.readthedocs.io/"
-Source = "https://github.com/theislab/anndata"
-Home-page = "https://github.com/theislab/anndata"
+Source = "https://github.com/scverse/anndata"
+Home-page = "https://github.com/scverse/anndata"
 
 
 [project.optional-dependencies]
@@ -68,14 +64,17 @@ dev = [
     "docutils",
 ]
 doc = [
-    "sphinx>=4.1,<4.2",
-    "sphinx-rtd-theme",
+    "sphinx>=4.4",
+    "sphinx-rtd-theme>=1.1.1",
     "sphinx-autodoc-typehints>=1.11.0",
     "sphinx_issues",
+    "sphinxext.opengraph",
     "nbsphinx",
-    "scanpydoc>=0.7.3",
-    "typing_extensions; python_version < '3.8'",
+    "scanpydoc>=0.7.7",
     "zarr",
+    "awkward>=2.0.7",
+    "IPython", # For syntax highlighting in notebooks
+    "myst_parser",
 ]
 test = [
     "loompy>=3.0.5",
@@ -83,12 +82,14 @@ test = [
     "pytest-cov>=2.10",
     "zarr",
     "matplotlib",
-    "sklearn",
+    "scikit-learn",
     "openpyxl",
     "joblib",
     "boltons",
     "scanpy",
     "dask[array]",
+    "awkward>=2.0.6",
+    "pytest_memray",
 ]
 
 [tool.flit.sdist]
@@ -118,5 +119,54 @@ xfail_strict = true
 
 [tool.black]
 line-length = 88
-target-version = ["py37"]
+target-version = ["py38"]
 exclude = "^/build/.*$"
+
+[tool.ruff]
+exclude = [
+    ".git",
+    "__pycache__",
+    "build",
+    "docs/_build",
+    "dist",
+]
+ignore = [
+    # module imported but unused -> required for Scanpys API
+    "F401",
+    # line too long -> we accept long comment lines; black gets rid of long code lines
+    "E501",
+    # module level import not at top of file -> required to circumvent circular imports for Scanpys API
+    "E402",
+    # Do not assign a lambda expression, use a def -> Scanpy allows lambda expression assignments,
+    "E731",
+    # allow I, O, l as variable names -> I is the identity matrix, i, j, k, l is reasonable indexing notation
+    "E741",
+    ## Flake8 rules not supported by ruff:
+    # line break before a binary operator -> black does not adhere to PEP8
+    # "W503",
+    # line break occured after a binary operator -> black does not adhere to PEP8
+    # "W504",
+    # whitespace before : -> black does not adhere to PEP8
+    # "E203",
+    # missing whitespace after ,', ';', or ':' -> black does not adhere to PEP8
+    # "E231",
+    # continuation line over-indented for hanging indent -> black does not adhere to PEP8
+    # "E126",
+    # too many leading '# ' for block comment -> Scanpy allows them for comments into sections
+    # "E262",
+    # inline comment should start with '#' -> Scanpy allows them for specific explanations
+    # "E266",
+]
+line-length = 88
+select = [
+    "E",
+    "F",
+    "W",
+]
+[tool.ruff.per-file-ignores]
+# E721 comparing types, but we specifically are checking that we aren't getting subtypes (views)
+"anndata/tests/test_readwrite.py" = ["E721"]
+# F811 Redefinition of unused name from line, does not play nice with pytest fixtures
+"tests/test*.py" = ["F811"]
+# F821 Undefined name, can't import AnnData or it'd be a circular import
+"anndata/compat/_overloaded_dict.py" = ["F821"]

More details

Full run details

Historical runs