diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 02f32036dd..b0cab4826c 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -136,6 +136,52 @@ jobs:
           name: codecov-umbrella
           verbose: true
 
+  test-numpy1:
+    name: Numpy 1.x
+    runs-on: ubuntu-24.04
+    defaults:
+      run:
+        shell: bash
+    steps:
+      - name: Cancel Previous Runs
+        uses: styfle/cancel-workflow-action@0.12.1
+        with:
+          access_token: ${{ github.token }}
+
+      - name: Checkout
+        uses: actions/checkout@v4.2.2
+
+      - name: Setup Python
+        uses: actions/setup-python@v5.4.0
+        with:
+          python-version: '3.12'
+
+      - name: Install dependencies
+        working-directory: python
+        run: |
+          pip install -r requirements/CI-complete/requirements.txt
+          pip install "numpy<2"
+
+      - name: Build module
+        working-directory: python
+        run: |
+          python setup.py build_ext --inplace
+
+      - name: Run tests with numpy 1.x
+        working-directory: python
+        run: |
+          python -m pytest -x --cov=tskit --cov-report=xml --cov-branch -n2 tests/test_lowlevel.py tests/test_highlevel.py
+
+      - name: Upload coverage to Codecov
+        uses: codecov/codecov-action@v5.4.0
+        with:
+          token: ${{ secrets.CODECOV_TOKEN }}
+          working-directory: python
+          fail_ci_if_error: false
+          flags: python-tests-numpy1
+          name: codecov-numpy1
+          verbose: true
+
   msys2:
     runs-on: windows-latest
     strategy:
diff --git a/python/CHANGELOG.rst b/python/CHANGELOG.rst
index 623469fc40..2e708228c6 100644
--- a/python/CHANGELOG.rst
+++ b/python/CHANGELOG.rst
@@ -19,6 +19,7 @@
 - Add ``TreeSequence.mutations_edge`` which returns the edge ID for each mutation's
   edge. (:user:`benjeffery`, :pr:`3226`, :issue:`3189`)
 
+
 **Bugfixes**
 
 - Fix bug in ``TreeSequence.pair_coalescence_counts`` when ``span_normalise=True``
@@ -30,6 +31,17 @@
 - ``ltrim``, ``rtrim``, ``trim`` and ``shift`` raise an error if used on a tree sequence
   containing a reference sequence (:user:`hyanwong`, :pr:`3210`, :issue:`2091`)
 
+- Add ``TreeSequence.sites_ancestral_state`` and ``TreeSequence.mutations_derived_state`` properties
+  to return the ancestral state of sites and derived state of mutations as NumPy arrays of
+  the new numpy 2.0 StringDType.
+  This requires numpy version 2 or greater, as such this is now the minimum version stated in tskit's
+  dependencies. If you try to use another python module that was compiled against numpy 1.X you may see
+  the error "A module that was compiled using NumPy 1.x cannot be run in NumPy 2.0.0 as it may crash.".
+  If no newer version of the module is avaliable you can still use it with tskit and numpy 1.X by
+  building tskit from source with numpy 1.X using ``pip install tskit --no-binary tskit``. However
+  any use of the new properties will result in a ``RuntimeError``.
+  (:user:`benjeffery`, :pr:`3228`, :issue:`2632`)
+
 --------------------
 [0.6.4] - 2025-05-21
 --------------------
diff --git a/python/_tskitmodule.c b/python/_tskitmodule.c
index b358ae91ab..cab445e5d0 100644
--- a/python/_tskitmodule.c
+++ b/python/_tskitmodule.c
@@ -23,16 +23,27 @@
  * SOFTWARE.
  */
 
-#define PY_SSIZE_T_CLEAN
-#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
 #define TSK_BUG_ASSERT_MESSAGE                                                          \
     "Please open an issue on"                                                           \
     " GitHub, ideally with a reproducible example."                                     \
     " (https://github.com/tskit-dev/tskit/issues)"
 
+#define PY_SSIZE_T_CLEAN
 #include <Python.h>
-#include <structmember.h>
+#include <numpy/numpyconfig.h>
+
+#if defined(NPY_2_0_API_VERSION) && NPY_API_VERSION >= NPY_2_0_API_VERSION
+#define NPY_NO_DEPRECATED_API NPY_2_0_API_VERSION
+#undef NPY_FEATURE_VERSION
+#define NPY_FEATURE_VERSION NPY_2_0_API_VERSION
+#define HAVE_NUMPY_2 1
+#else
+#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
+#define HAVE_NUMPY_2 0
+#endif
 #include <numpy/arrayobject.h>
+
+#include <structmember.h>
 #include <float.h>
 
 #include "kastore.h"
@@ -10802,6 +10813,60 @@ TreeSequence_make_array(TreeSequence *self, tsk_size_t size, int dtype, void *da
     return make_owned_array((PyObject *) self, size, dtype, data);
 }
 
+#if HAVE_NUMPY_2
+PyObject *
+TreeSequence_decode_ragged_string_column(
+    TreeSequence *self, tsk_size_t num_rows, const char *data, const tsk_size_t *offset)
+{
+    PyObject *ret = NULL;
+    PyObject *array = NULL;
+    char *array_data = NULL;
+    npy_intp dims[1];
+    tsk_size_t i;
+    int pack_result;
+    npy_string_allocator *allocator = NULL;
+    PyArray_StringDTypeObject *string_dtype
+        = (PyArray_StringDTypeObject *) PyArray_DescrFromType(NPY_VSTRING);
+    /* This can only fail if an invalid dtype is passed */
+    assert(string_dtype != NULL);
+
+    dims[0] = (npy_intp) num_rows;
+    array = PyArray_Zeros(1, dims, (PyArray_Descr *) string_dtype, 0);
+    if (array == NULL) {
+        goto out;
+    }
+    array_data = (char *) PyArray_DATA((PyArrayObject *) array);
+    allocator = NpyString_acquire_allocator(string_dtype);
+    for (i = 0; i < num_rows; i++) {
+        pack_result = NpyString_pack(allocator,
+            (npy_packed_static_string
+                    *) (array_data + (i * ((PyArray_Descr *) string_dtype)->elsize)),
+            data + offset[i], offset[i + 1] - offset[i]);
+        if (pack_result < 0) {
+            PyErr_SetString(PyExc_MemoryError, "could not pack string.");
+            goto out;
+        }
+    }
+    /* Release the allocator before we call any other Python C API functions
+     * which may require the GIL.
+     */
+    NpyString_release_allocator(allocator);
+    allocator = NULL;
+
+    /* Clear the writeable flag to match other arrays semantics */
+    PyArray_CLEARFLAGS((PyArrayObject *) array, NPY_ARRAY_WRITEABLE);
+
+    ret = array;
+    array = NULL;
+out:
+    if (allocator != NULL) {
+        NpyString_release_allocator(allocator);
+    }
+    Py_XDECREF(array);
+    return ret;
+}
+#endif
+
 static PyObject *
 TreeSequence_get_individuals_flags(TreeSequence *self, void *closure)
 {
@@ -11049,6 +11114,24 @@ TreeSequence_get_sites_position(TreeSequence *self, void *closure)
     return ret;
 }
 
+#if HAVE_NUMPY_2
+static PyObject *
+TreeSequence_get_sites_ancestral_state(TreeSequence *self, void *closure)
+{
+    PyObject *ret = NULL;
+    tsk_site_table_t sites;
+
+    if (TreeSequence_check_state(self) != 0) {
+        goto out;
+    }
+    sites = self->tree_sequence->tables->sites;
+    ret = TreeSequence_decode_ragged_string_column(
+        self, sites.num_rows, sites.ancestral_state, sites.ancestral_state_offset);
+out:
+    return ret;
+}
+#endif
+
 static PyObject *
 TreeSequence_get_sites_metadata(TreeSequence *self, void *closure)
 {
@@ -11141,6 +11224,24 @@ TreeSequence_get_mutations_time(TreeSequence *self, void *closure)
     return ret;
 }
 
+#if HAVE_NUMPY_2
+static PyObject *
+TreeSequence_get_mutations_derived_state(TreeSequence *self, void *closure)
+{
+    PyObject *ret = NULL;
+    tsk_mutation_table_t mutations;
+
+    if (TreeSequence_check_state(self) != 0) {
+        goto out;
+    }
+    mutations = self->tree_sequence->tables->mutations;
+    ret = TreeSequence_decode_ragged_string_column(self, mutations.num_rows,
+        mutations.derived_state, mutations.derived_state_offset);
+out:
+    return ret;
+}
+#endif
+
 static PyObject *
 TreeSequence_get_mutations_metadata(TreeSequence *self, void *closure)
 {
@@ -11719,6 +11820,11 @@ static PyGetSetDef TreeSequence_getsetters[] = {
     { .name = "sites_position",
         .get = (getter) TreeSequence_get_sites_position,
         .doc = "The site position array" },
+#if HAVE_NUMPY_2
+    { .name = "sites_ancestral_state",
+        .get = (getter) TreeSequence_get_sites_ancestral_state,
+        .doc = "The site ancestral state array" },
+#endif
     { .name = "sites_metadata",
         .get = (getter) TreeSequence_get_sites_metadata,
         .doc = "The site metadata array" },
@@ -11737,6 +11843,11 @@ static PyGetSetDef TreeSequence_getsetters[] = {
     { .name = "mutations_time",
         .get = (getter) TreeSequence_get_mutations_time,
         .doc = "The mutation time array" },
+#if HAVE_NUMPY_2
+    { .name = "mutations_derived_state",
+        .get = (getter) TreeSequence_get_mutations_derived_state,
+        .doc = "The mutation derived state array" },
+#endif
     { .name = "mutations_metadata",
         .get = (getter) TreeSequence_get_mutations_metadata,
         .doc = "The mutation metadata array" },
@@ -14606,11 +14717,24 @@ static struct PyModuleDef tskitmodule = {
 PyObject *
 PyInit__tskit(void)
 {
-    PyObject *module = PyModule_Create(&tskitmodule);
-    if (module == NULL) {
+    PyObject *module;
+
+#if HAVE_NUMPY_2
+    if (PyArray_ImportNumPyAPI() < 0) {
         return NULL;
     }
+#else
     import_array();
+#endif
+
+    module = PyModule_Create(&tskitmodule);
+    if (module == NULL) {
+        return NULL;
+    }
+
+    if (PyModule_AddIntConstant(module, "HAS_NUMPY_2", HAVE_NUMPY_2)) {
+        return NULL;
+    }
 
     if (register_lwt_class(module) != 0) {
         return NULL;
diff --git a/python/pyproject.toml b/python/pyproject.toml
index a24a4fb868..80b9cc1f78 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -43,7 +43,7 @@ keywords = [
 requires-python = ">=3.9"
 dependencies = [
     "jsonschema>=3.0.0",
-    "numpy>=1.23.5",
+    "numpy>=2",
 ]
 
 [project.urls]
diff --git a/python/requirements/development.txt b/python/requirements/development.txt
index 39653c87e7..2a6d27ec62 100644
--- a/python/requirements/development.txt
+++ b/python/requirements/development.txt
@@ -17,7 +17,6 @@ msprime>=1.0.0
 networkx
 newick
 ninja
-numpy
 packaging
 portion
 pre-commit
diff --git a/python/requirements/development.yml b/python/requirements/development.yml
index 6aefcbd22a..abe9e91cf2 100644
--- a/python/requirements/development.yml
+++ b/python/requirements/development.yml
@@ -22,7 +22,7 @@ dependencies:
   - msprime>=1.0.0
   - networkx
   - ninja
-  - numpy<2
+  - numpy
   - packaging
   - portion
   - pre-commit
diff --git a/python/tests/test_highlevel.py b/python/tests/test_highlevel.py
index d0e690dec0..1f476be574 100644
--- a/python/tests/test_highlevel.py
+++ b/python/tests/test_highlevel.py
@@ -5321,6 +5321,83 @@ def test_mixed_sample_status(self):
         assert_array_equal(result, expected)
 
 
+class TestRaggedArrays:
+    @pytest.mark.skipif(not _tskit.HAS_NUMPY_2, reason="Requires NumPy 2.0 or higher")
+    @pytest.mark.parametrize("num_rows", [0, 1, 100])
+    @pytest.mark.parametrize("column", ["ancestral_state", "derived_state"])
+    def test_site_ancestral_state(self, num_rows, column):
+        tables = tskit.TableCollection(sequence_length=100)
+        rng = random.Random(42)
+        for i in range(num_rows):
+            state_length = rng.randint(0, 10)
+            state = "".join(
+                chr(rng.randint(0x1F300, 0x1F6FF)) for _ in range(state_length)
+            )
+            if column == "ancestral_state":
+                tables.sites.add_row(position=i, ancestral_state=state)
+            elif column == "derived_state":
+                tables.nodes.add_row()
+                tables.sites.add_row(position=i, ancestral_state="A")
+                tables.mutations.add_row(site=i, node=0, derived_state=state)
+        ts = tables.tree_sequence()
+        a = getattr(
+            ts,
+            (
+                "sites_ancestral_state"
+                if column == "ancestral_state"
+                else "mutations_derived_state"
+            ),
+        )
+        assert isinstance(a, np.ndarray)
+        assert a.shape == (num_rows,)
+        assert a.dtype == np.dtype("T")
+        assert a.size == num_rows
+
+        # Check that the value is cached
+        assert a is getattr(
+            ts,
+            (
+                "sites_ancestral_state"
+                if column == "ancestral_state"
+                else "mutations_derived_state"
+            ),
+        )
+
+        for state, row in itertools.zip_longest(
+            a, ts.sites() if column == "ancestral_state" else ts.mutations()
+        ):
+            assert state == getattr(row, column)
+
+    @pytest.mark.skipif(not _tskit.HAS_NUMPY_2, reason="Requires NumPy 2.0 or higher")
+    @pytest.mark.parametrize("ts", tsutil.get_example_tree_sequences())
+    def test_equality_sites_ancestral_state(self, ts):
+        assert_array_equal(
+            ts.sites_ancestral_state, [site.ancestral_state for site in ts.sites()]
+        )
+
+    @pytest.mark.skipif(not _tskit.HAS_NUMPY_2, reason="Requires NumPy 2.0 or higher")
+    @pytest.mark.parametrize("ts", tsutil.get_example_tree_sequences())
+    def test_equality_mutations_derived_state(self, ts):
+        assert_array_equal(
+            ts.mutations_derived_state,
+            [mutation.derived_state for mutation in ts.mutations()],
+        )
+
+    @pytest.mark.skipif(_tskit.HAS_NUMPY_2, reason="Test only on Numpy 1.X")
+    @pytest.mark.parametrize(
+        "column", ["sites_ancestral_state", "mutations_derived_state"]
+    )
+    def test_ragged_array_not_supported(self, column):
+        tables = tskit.TableCollection(sequence_length=100)
+        ts = tables.tree_sequence()
+
+        with pytest.raises(
+            RuntimeError,
+            match="requires numpy 2.0",
+        ):
+            getattr(ts, column)
+
+
 class TestSampleNodesByPloidy:
     @pytest.mark.parametrize(
         "n_samples,ploidy,expected",
diff --git a/python/tests/test_lowlevel.py b/python/tests/test_lowlevel.py
index ffdff495a2..9cc8206cb1 100644
--- a/python/tests/test_lowlevel.py
+++ b/python/tests/test_lowlevel.py
@@ -1922,11 +1922,11 @@ def test_array_read_only(self, name, ts_fixture):
     def test_array_properties(self, name, ts_fixture):
         ts_fixture = ts_fixture.ll_tree_sequence
         a = getattr(ts_fixture, name)
-        assert a.base == ts_fixture
         assert not a.flags.writeable
         assert a.flags.aligned
         assert a.flags.c_contiguous
         assert not a.flags.owndata
+        assert a.base == ts_fixture
         b = getattr(ts_fixture, name)
         assert a is not b
         assert np.all(a == b)
@@ -1976,6 +1976,104 @@ def test_generated_columns(self, ts_fixture, name):
         a2[:] = 0
         assert a3 is not a2
 
+    @pytest.mark.skipif(not _tskit.HAS_NUMPY_2, reason="Requires NumPy 2.0+")
+    @pytest.mark.parametrize(
+        "string_array", ["sites_ancestral_state", "mutations_derived_state"]
+    )
+    @pytest.mark.parametrize(
+        "str_lengths",
+        ["none", "all-0", "all-1", "all-2", "mixed", "very_long", "unicode"],
+    )
+    def test_string_arrays(self, ts_fixture, str_lengths, string_array):
+        if str_lengths == "none":
+            ts = tskit.TableCollection(1.0).tree_sequence()
+        else:
+            if str_lengths == "all-1":
+                ts = ts_fixture
+                if string_array == "sites_ancestral_state":
+                    assert ts.num_sites > 0
+                    assert {len(site.ancestral_state) for site in ts.sites()} == {1}
+                elif string_array == "mutations_derived_state":
+                    assert ts.num_mutations > 0
+                    assert {len(mut.derived_state) for mut in ts.mutations()} == {1}
+            else:
+                tables = ts_fixture.dump_tables()
+
+                str_map = {
+                    "all-0": lambda i, item: "",
+                    "all-2": lambda i, item: chr(ord("A") + (i % 26)) * 2,
+                    "mixed": lambda i, item: chr(ord("A") + (i % 26)) * (i % 20),
+                    "very_long": lambda i, item: "A" * 100_000_000 if i == 1 else "T",
+                    "unicode": lambda i, item: "🧬" * (i + 1),
+                }
+
+                if string_array == "sites_ancestral_state":
+                    sites = tables.sites.copy()
+                    tables.sites.clear()
+                    get_ancestral_state = str_map[str_lengths]
+                    for i, site in enumerate(sites):
+                        tables.sites.append(
+                            site.replace(ancestral_state=get_ancestral_state(i, site))
+                        )
+                elif string_array == "mutations_derived_state":
+                    mutations = tables.mutations.copy()
+                    tables.mutations.clear()
+                    get_derived_state = str_map[str_lengths]
+                    for i, mutation in enumerate(mutations):
+                        tables.mutations.append(
+                            mutation.replace(
+                                derived_state=get_derived_state(i, mutation)
+                            )
+                        )
+
+                ts = tables.tree_sequence()
+        ll_ts = ts.ll_tree_sequence
+
+        a = getattr(ll_ts, string_array)
+
+        # Contents
+        if str_lengths == "none":
+            assert a.size == 0
+        else:
+            if string_array == "sites_ancestral_state":
+                for site in ts.sites():
+                    assert a[site.id] == site.ancestral_state
+            elif string_array == "mutations_derived_state":
+                for mutation in ts.mutations():
+                    assert a[mutation.id] == mutation.derived_state
+
+        # Read only
+        with pytest.raises(AttributeError, match="not writable"):
+            setattr(ll_ts, string_array, None)
+        with pytest.raises(AttributeError, match="not writable"):
+            delattr(ll_ts, string_array)
+
+        with pytest.raises(ValueError, match="assignment destination"):
+            a[:] = 0
+        with pytest.raises(ValueError, match="assignment destination"):
+            a[0] = 0
+
+        # Properties
+        assert a.dtype == np.dtypes.StringDType()
+        assert a.flags.aligned
+        assert a.flags.c_contiguous
+        b = getattr(ll_ts, string_array)
+        assert a is not b
+        assert np.all(a == b)
+
+        # Lifetime
+        a1 = getattr(ll_ts, string_array)
+        a2 = a1.copy()
+        assert a1 is not a2
+        del ll_ts
+        # Do some memory operations
+        a3 = np.ones(10**6)
+        assert np.all(a1 == a2)
+        del a1
+        # Just do something to touch memory
+        a2[:] = 0
+        assert a3 is not a2
+
 
 class StatsInterfaceMixin:
     """
diff --git a/python/tskit/trees.py b/python/tskit/trees.py
index 59cf5a26ae..e740181f81 100644
--- a/python/tskit/trees.py
+++ b/python/tskit/trees.py
@@ -4138,6 +4138,8 @@ def __init__(self, ll_tree_sequence):
         self._individuals_location = None
         self._individuals_nodes = None
         self._mutations_edge = None
+        self._sites_ancestral_state = None
+        self._mutations_derived_state = None
         # NOTE: when we've implemented read-only access via the underlying
         # tables we can replace these arrays with reference to the read-only
         # tables here (and remove the low-level boilerplate).
@@ -5952,6 +5954,20 @@ def sites_position(self):
         """
         return self._sites_position
 
+    @property
+    def sites_ancestral_state(self):
+        """
+        The ``ancestral_state`` column in the
+        :ref:`sec_site_table_definition` as a numpy array (dtype=StringDtype).
+        """
+        if not _tskit.HAS_NUMPY_2:
+            raise RuntimeError(
+                "The sites_ancestral_state property requires numpy 2.0 or later."
+            )
+        if self._sites_ancestral_state is None:
+            self._sites_ancestral_state = self._ll_tree_sequence.sites_ancestral_state
+        return self._sites_ancestral_state
+
     @property
     def sites_metadata(self):
         """
@@ -6009,6 +6025,22 @@ def mutations_time(self):
         """
         return self._mutations_time
 
+    @property
+    def mutations_derived_state(self):
+        """
+        Access to the ``derived_state`` column in the
+        :ref:`sec_mutation_table_definition` as a numpy array (dtype=StringDtype).
+        """
+        if not _tskit.HAS_NUMPY_2:
+            raise RuntimeError(
+                "The mutations_derived_state property requires numpy 2.0 or later."
+            )
+        if self._mutations_derived_state is None:
+            self._mutations_derived_state = (
+                self._ll_tree_sequence.mutations_derived_state
+            )
+        return self._mutations_derived_state
+
     @property
     def mutations_metadata(self):
         """