Просмотр исходного кода

Closes #21363: Implement cursor-based pagination for the REST API (#21594)

Jeremy Stretch 10 часов назад
Родитель
Сommit
6659bb3abe

+ 44 - 1
docs/integrations/rest-api.md

@@ -341,7 +341,7 @@ When retrieving devices and virtual machines via the REST API, each will include
 
 ## Pagination
 
-API responses which contain a list of many objects will be paginated for efficiency. The root JSON object returned by a list endpoint contains the following attributes:
+API responses which contain a list of many objects will be paginated for efficiency. NetBox employs offset-based pagination by default, which forms a page by skipping the number of objects indicated by the `offset` URL parameter. The root JSON object returned by a list endpoint contains the following attributes:
 
 * `count`: The total number of all objects matching the query
 * `next`: A hyperlink to the next page of results (if applicable)
@@ -398,6 +398,49 @@ The maximum number of objects that can be returned is limited by the [`MAX_PAGE_
 !!! warning
     Disabling the page size limit introduces a potential for very resource-intensive requests, since one API request can effectively retrieve an entire table from the database.
 
+### Cursor-Based Pagination
+
+For large datasets, offset-based pagination can become inefficient because the database must scan all rows up to the offset. As an alternative, cursor-based pagination uses the `start` query parameter to filter results by primary key (PK), enabling efficient keyset pagination.
+
+To use cursor-based pagination, pass `start` (the minimum PK value) and `limit` (the page size):
+
+```
+http://netbox/api/dcim/devices/?start=0&limit=100
+```
+
+This returns objects with an `id` greater than or equal to zero, ordered by PK, limited to 100 results. Below is an example showing an arbitrary `start` value.
+
+```json
+{
+    "count": null,
+    "next": "http://netbox/api/dcim/devices/?start=356&limit=100",
+    "previous": null,
+    "results": [
+        {
+            "id": 109,
+            "name": "dist-router07",
+            ...
+        },
+        ...
+        {
+            "id": 356,
+            "name": "acc-switch492",
+            ...
+        }
+    ]
+}
+```
+
+To iterate through all results, use the `id` of the last object in each response plus one as the `start` value for the next request. Continue until `next` is null.
+
+!!! info
+    Some important differences from offset-based pagination:
+
+    * `start` and `offset` are **mutually exclusive**; specifying both will result in a 400 error.
+    * Results are always ordered by primary key when using `start`. This is required to ensure deterministic behavior.
+    * `count` is always `null` in cursor mode, as counting all matching rows would partially negate its performance benefit.
+    * `previous` is always `null`: cursor-based pagination supports only forward navigation.
+
 ## Interacting with Objects
 
 ### Retrieving Multiple Objects

+ 106 - 8
netbox/netbox/api/pagination.py

@@ -1,18 +1,40 @@
 from django.db.models import QuerySet
+from django.utils.translation import gettext_lazy as _
+from rest_framework.exceptions import ValidationError
 from rest_framework.pagination import LimitOffsetPagination
+from rest_framework.utils.urls import remove_query_param, replace_query_param
 
 from netbox.api.exceptions import QuerySetNotOrdered
 from netbox.config import get_config
 
 
-class OptionalLimitOffsetPagination(LimitOffsetPagination):
+class NetBoxPagination(LimitOffsetPagination):
     """
-    Override the stock paginator to allow setting limit=0 to disable pagination for a request. This returns all objects
-    matching a query, but retains the same format as a paginated request. The limit can only be disabled if
-    MAX_PAGE_SIZE has been set to 0 or None.
+    Provides two mutually exclusive pagination mechanisms: offset-based and cursor-based.
+
+    Offset-based pagination employs `offset` and (optionally) `limit` parameters to page through results following the
+    model's natural order. `offset` indicates the number of results to skip. This provides very human-friendly behavior,
+    but performance can suffer when querying very large data sets due the overhead required to determine the starting
+    point in the database.
+
+    Cursor-based pagination employs `start` and (optionally) `limit` parameters to page through results as ordered by
+    the model's primary key (i.e. `id`). `start` indicates the numeric ID of the first object to return; `limit`
+    indicates the maximum number of objects to return beginning with the specified ID. Objects *must* be ordered by ID
+    to ensure pagination is consistent. This approach is less human-friendly but offers superior performance to
+    offset-based pagination. In cursor mode, `count` is omitted (null) for performance.
+
+    Offset- and cursor-based pagination are mutually exclusive: Only `offset` _or_ `start` is permitted for a request.
+
+    `limit` may be set to zero (`?limit=0`). This returns all objects matching a query, but retains the same format as
+    a paginated request. The limit can only be disabled if `MAX_PAGE_SIZE` has been set to 0 or None.
     """
+    start_query_param = 'start'
+
     def __init__(self):
         self.default_limit = get_config().PAGINATE_COUNT
+        self.start = None
+        self._page_length = 0
+        self._last_pk = None
 
     def paginate_queryset(self, queryset, request, view=None):
 
@@ -22,15 +44,42 @@ class OptionalLimitOffsetPagination(LimitOffsetPagination):
                 "ordering has been applied to the queryset for this API endpoint."
             )
 
+        self.start = self.get_start(request)
+        self.limit = self.get_limit(request)
+        self.request = request
+
+        # Cursor-based pagination
+        if self.start is not None:
+            if self.offset_query_param in request.query_params:
+                raise ValidationError(
+                    _("'{start_param}' and '{offset_param}' are mutually exclusive.").format(
+                        start_param=self.start_query_param,
+                        offset_param=self.offset_query_param,
+                    )
+                )
+            if 'ordering' in request.query_params:
+                raise ValidationError(_("Ordering cannot be specified in conjunction with cursor-based pagination."))
+
+            self.count = None
+            self.offset = 0
+
+            queryset = queryset.filter(pk__gte=self.start).order_by('pk')
+            results = list(queryset[:self.limit]) if self.limit else list(queryset)
+
+            self._page_length = len(results)
+            if results:
+                self._last_pk = results[-1].pk if hasattr(results[-1], 'pk') else results[-1]['pk']
+
+            return results
+
+        # Offset-based pagination
         if isinstance(queryset, QuerySet):
             self.count = self.get_queryset_count(queryset)
         else:
             # We're dealing with an iterable, not a QuerySet
             self.count = len(queryset)
 
-        self.limit = self.get_limit(request)
         self.offset = self.get_offset(request)
-        self.request = request
 
         if self.limit and self.count > self.limit and self.template is not None:
             self.display_page_controls = True
@@ -42,6 +91,25 @@ class OptionalLimitOffsetPagination(LimitOffsetPagination):
             return list(queryset[self.offset:self.offset + self.limit])
         return list(queryset[self.offset:])
 
+    def get_start(self, request):
+        try:
+            value = int(request.query_params[self.start_query_param])
+            if value < 0:
+                raise ValidationError(
+                    _("Invalid '{param}' parameter: must be a non-negative integer.").format(
+                        param=self.start_query_param,
+                    )
+                )
+            return value
+        except KeyError:
+            return None
+        except (ValueError, TypeError):
+            raise ValidationError(
+                _("Invalid '{param}' parameter: must be a non-negative integer.").format(
+                    param=self.start_query_param,
+                )
+            )
+
     def get_limit(self, request):
         max_limit = self.default_limit
         MAX_PAGE_SIZE = get_config().MAX_PAGE_SIZE
@@ -75,6 +143,16 @@ class OptionalLimitOffsetPagination(LimitOffsetPagination):
         if not self.limit:
             return None
 
+        # Cursor mode
+        if self.start is not None:
+            if self._page_length < self.limit:
+                return None
+            url = self.request.build_absolute_uri()
+            url = replace_query_param(url, self.start_query_param, self._last_pk + 1)
+            url = replace_query_param(url, self.limit_query_param, self.limit)
+            url = remove_query_param(url, self.offset_query_param)
+            return url
+
         return super().get_next_link()
 
     def get_previous_link(self):
@@ -83,10 +161,30 @@ class OptionalLimitOffsetPagination(LimitOffsetPagination):
         if not self.limit:
             return None
 
-        return super().get_previous_link()
+        # Cursor mode: forward-only
+        if self.start is not None:
+            return None
 
+        return super().get_previous_link()
 
-class StripCountAnnotationsPaginator(OptionalLimitOffsetPagination):
+    def get_schema_operation_parameters(self, view):
+        parameters = super().get_schema_operation_parameters(view)
+        parameters.append({
+            'name': self.start_query_param,
+            'required': False,
+            'in': 'query',
+            'description': (
+                'Cursor-based pagination: return results with pk >= start, ordered by pk. '
+                'Mutually exclusive with offset.'
+            ),
+            'schema': {
+                'type': 'integer',
+            },
+        })
+        return parameters
+
+
+class StripCountAnnotationsPaginator(NetBoxPagination):
     """
     Strips the annotations on the queryset before getting the count
     to optimize pagination of complex queries.

+ 1 - 1
netbox/netbox/settings.py

@@ -724,7 +724,7 @@ REST_FRAMEWORK = {
         'rest_framework.filters.OrderingFilter',
     ),
     'DEFAULT_METADATA_CLASS': 'netbox.api.metadata.BulkOperationMetadata',
-    'DEFAULT_PAGINATION_CLASS': 'netbox.api.pagination.OptionalLimitOffsetPagination',
+    'DEFAULT_PAGINATION_CLASS': 'netbox.api.pagination.NetBoxPagination',
     'DEFAULT_PARSER_CLASSES': (
         'rest_framework.parsers.JSONParser',
         'rest_framework.parsers.MultiPartParser',

+ 33 - 2
netbox/netbox/tests/test_api.py

@@ -2,10 +2,11 @@ import uuid
 
 from django.test import RequestFactory, TestCase
 from django.urls import reverse
+from rest_framework.exceptions import ValidationError
 from rest_framework.request import Request
 
 from netbox.api.exceptions import QuerySetNotOrdered
-from netbox.api.pagination import OptionalLimitOffsetPagination
+from netbox.api.pagination import NetBoxPagination
 from users.models import Token
 from utilities.testing import APITestCase
 
@@ -48,7 +49,7 @@ class AppTest(APITestCase):
 class OptionalLimitOffsetPaginationTest(TestCase):
 
     def setUp(self):
-        self.paginator = OptionalLimitOffsetPagination()
+        self.paginator = NetBoxPagination()
         self.factory = RequestFactory()
 
     def _make_drf_request(self, path='/', query_params=None):
@@ -80,3 +81,33 @@ class OptionalLimitOffsetPaginationTest(TestCase):
         request = self._make_drf_request()
 
         self.paginator.paginate_queryset(iterable, request)  # Should not raise exception
+
+    def test_get_start_returns_none_when_absent(self):
+        """get_start() returns None when start param is not in the request"""
+        request = self._make_drf_request()
+        self.assertIsNone(self.paginator.get_start(request))
+
+    def test_get_start_returns_integer(self):
+        """get_start() returns an integer when start param is present"""
+        request = self._make_drf_request(query_params={'start': '42'})
+        self.assertEqual(self.paginator.get_start(request), 42)
+
+    def test_get_start_raises_for_negative(self):
+        """get_start() raises ValidationError for negative values"""
+        request = self._make_drf_request(query_params={'start': '-1'})
+        with self.assertRaises(ValidationError):
+            self.paginator.get_start(request)
+
+    def test_cursor_and_offset_conflict_raises_validation_error(self):
+        """paginate_queryset() raises ValidationError when both start and offset are specified"""
+        queryset = Token.objects.all().order_by('created')
+        request = self._make_drf_request(query_params={'start': '1', 'offset': '10'})
+        with self.assertRaises(ValidationError):
+            self.paginator.paginate_queryset(queryset, request)
+
+    def test_cursor_and_ordering_conflict_raises_validation_error(self):
+        """paginate_queryset() raises ValidationError when both start and ordering are specified"""
+        queryset = Token.objects.all().order_by('created')
+        request = self._make_drf_request(query_params={'start': '1', 'ordering': 'created'})
+        with self.assertRaises(ValidationError):
+            self.paginator.paginate_queryset(queryset, request)

+ 110 - 0
netbox/utilities/tests/test_api.py

@@ -187,6 +187,116 @@ class APIPaginationTestCase(APITestCase):
         self.assertIsNone(response.data['previous'])
         self.assertEqual(len(response.data['results']), 100)
 
+    def test_cursor_pagination(self):
+        """Basic cursor pagination returns results ordered by PK with correct next link."""
+        first_pk = Site.objects.order_by('pk').values_list('pk', flat=True).first()
+        response = self.client.get(f'{self.url}?start={first_pk}&limit=10', format='json', **self.header)
+
+        self.assertHttpStatus(response, status.HTTP_200_OK)
+        self.assertIsNone(response.data['count'])
+        self.assertIsNone(response.data['previous'])
+        self.assertEqual(len(response.data['results']), 10)
+
+        # Results should be ordered by PK
+        pks = [r['id'] for r in response.data['results']]
+        self.assertEqual(pks, sorted(pks))
+
+        # Next link should use start parameter
+        last_pk = pks[-1]
+        self.assertIn(f'start={last_pk + 1}', response.data['next'])
+        self.assertIn('limit=10', response.data['next'])
+
+    def test_cursor_pagination_last_page(self):
+        """Cursor pagination returns null next link when fewer results than limit."""
+        last_pk = Site.objects.order_by('pk').values_list('pk', flat=True).last()
+        response = self.client.get(f'{self.url}?start={last_pk}&limit=10', format='json', **self.header)
+
+        self.assertHttpStatus(response, status.HTTP_200_OK)
+        self.assertEqual(len(response.data['results']), 1)
+        self.assertIsNone(response.data['next'])
+        self.assertIsNone(response.data['previous'])
+
+    def test_cursor_pagination_no_results(self):
+        """Cursor pagination beyond all PKs returns empty results."""
+        max_pk = Site.objects.order_by('pk').values_list('pk', flat=True).last()
+        response = self.client.get(f'{self.url}?start={max_pk + 1000}&limit=10', format='json', **self.header)
+
+        self.assertHttpStatus(response, status.HTTP_200_OK)
+        self.assertEqual(len(response.data['results']), 0)
+        self.assertIsNone(response.data['next'])
+
+    def test_cursor_and_offset_conflict(self):
+        """Specifying both start and offset returns a 400 error."""
+        with disable_warnings('django.request'):
+            response = self.client.get(f'{self.url}?start=1&offset=10', format='json', **self.header)
+        self.assertHttpStatus(response, status.HTTP_400_BAD_REQUEST)
+
+    def test_cursor_and_ordering_conflict(self):
+        """Specifying both start and ordering returns a 400 error."""
+        with disable_warnings('django.request'):
+            response = self.client.get(f'{self.url}?start=1&ordering=name', format='json', **self.header)
+        self.assertHttpStatus(response, status.HTTP_400_BAD_REQUEST)
+
+    def test_cursor_negative_start(self):
+        """Negative start value returns a 400 error."""
+        with disable_warnings('django.request'):
+            response = self.client.get(f'{self.url}?start=-1', format='json', **self.header)
+        self.assertHttpStatus(response, status.HTTP_400_BAD_REQUEST)
+
+    def test_cursor_with_filters(self):
+        """Cursor pagination works alongside other query filters."""
+        response = self.client.get(f'{self.url}?start=0&limit=10&name=Site 1', format='json', **self.header)
+
+        self.assertHttpStatus(response, status.HTTP_200_OK)
+        self.assertIsNone(response.data['count'])
+        results = response.data['results']
+        self.assertEqual(len(results), 1)
+        self.assertEqual(results[0]['name'], 'Site 1')
+
+    def test_offset_multi_page_traversal(self):
+        """Traverse all 100 objects using offset pagination and verify complete, non-overlapping coverage."""
+        collected_pks = []
+        url = f'{self.url}?limit=10'
+
+        while url:
+            response = self.client.get(url, format='json', **self.header)
+            self.assertHttpStatus(response, status.HTTP_200_OK)
+            self.assertEqual(response.data['count'], 100)
+            collected_pks.extend(r['id'] for r in response.data['results'])
+            url = response.data['next']
+
+        # Should have collected exactly 100 unique objects
+        self.assertEqual(len(set(collected_pks)), 100)
+
+    def test_cursor_multi_page_traversal(self):
+        """Traverse all 100 objects using cursor pagination and verify complete, non-overlapping coverage."""
+        collected_pks = []
+        first_pk = Site.objects.order_by('pk').values_list('pk', flat=True).first()
+        url = f'{self.url}?start={first_pk}&limit=10'
+
+        while url:
+            response = self.client.get(url, format='json', **self.header)
+            self.assertHttpStatus(response, status.HTTP_200_OK)
+            self.assertIsNone(response.data['count'])
+            self.assertIsNone(response.data['previous'])
+
+            page_pks = [r['id'] for r in response.data['results']]
+
+            # Each page should be ordered by PK
+            self.assertEqual(page_pks, sorted(page_pks))
+
+            # No overlap with previously collected PKs
+            self.assertFalse(set(page_pks) & set(collected_pks))
+
+            collected_pks.extend(page_pks)
+            url = response.data['next']
+
+        # Should have collected exactly 100 unique objects
+        self.assertEqual(len(set(collected_pks)), 100)
+
+        # Full result set should be in PK order
+        self.assertEqual(collected_pks, sorted(collected_pks))
+
 
 class APIOrderingTestCase(APITestCase):
     user_permissions = ('dcim.view_site',)