Просмотр исходного кода

Closes #21363: Implement cursor-based pagination for the REST API (#21594)

Jeremy Stretch 13 часов назад
Родитель
Сommit
6659bb3abe

+ 44 - 1
docs/integrations/rest-api.md

@@ -341,7 +341,7 @@ When retrieving devices and virtual machines via the REST API, each will include
 
 
 ## Pagination
 ## Pagination
 
 
-API responses which contain a list of many objects will be paginated for efficiency. The root JSON object returned by a list endpoint contains the following attributes:
+API responses which contain a list of many objects will be paginated for efficiency. NetBox employs offset-based pagination by default, which forms a page by skipping the number of objects indicated by the `offset` URL parameter. The root JSON object returned by a list endpoint contains the following attributes:
 
 
 * `count`: The total number of all objects matching the query
 * `count`: The total number of all objects matching the query
 * `next`: A hyperlink to the next page of results (if applicable)
 * `next`: A hyperlink to the next page of results (if applicable)
@@ -398,6 +398,49 @@ The maximum number of objects that can be returned is limited by the [`MAX_PAGE_
 !!! warning
 !!! warning
     Disabling the page size limit introduces a potential for very resource-intensive requests, since one API request can effectively retrieve an entire table from the database.
     Disabling the page size limit introduces a potential for very resource-intensive requests, since one API request can effectively retrieve an entire table from the database.
 
 
+### Cursor-Based Pagination
+
+For large datasets, offset-based pagination can become inefficient because the database must scan all rows up to the offset. As an alternative, cursor-based pagination uses the `start` query parameter to filter results by primary key (PK), enabling efficient keyset pagination.
+
+To use cursor-based pagination, pass `start` (the minimum PK value) and `limit` (the page size):
+
+```
+http://netbox/api/dcim/devices/?start=0&limit=100
+```
+
+This returns objects with an `id` greater than or equal to zero, ordered by PK, limited to 100 results. Below is an example showing an arbitrary `start` value.
+
+```json
+{
+    "count": null,
+    "next": "http://netbox/api/dcim/devices/?start=356&limit=100",
+    "previous": null,
+    "results": [
+        {
+            "id": 109,
+            "name": "dist-router07",
+            ...
+        },
+        ...
+        {
+            "id": 356,
+            "name": "acc-switch492",
+            ...
+        }
+    ]
+}
+```
+
+To iterate through all results, use the `id` of the last object in each response plus one as the `start` value for the next request. Continue until `next` is null.
+
+!!! info
+    Some important differences from offset-based pagination:
+
+    * `start` and `offset` are **mutually exclusive**; specifying both will result in a 400 error.
+    * Results are always ordered by primary key when using `start`. This is required to ensure deterministic behavior.
+    * `count` is always `null` in cursor mode, as counting all matching rows would partially negate its performance benefit.
+    * `previous` is always `null`: cursor-based pagination supports only forward navigation.
+
 ## Interacting with Objects
 ## Interacting with Objects
 
 
 ### Retrieving Multiple Objects
 ### Retrieving Multiple Objects

+ 106 - 8
netbox/netbox/api/pagination.py

@@ -1,18 +1,40 @@
 from django.db.models import QuerySet
 from django.db.models import QuerySet
+from django.utils.translation import gettext_lazy as _
+from rest_framework.exceptions import ValidationError
 from rest_framework.pagination import LimitOffsetPagination
 from rest_framework.pagination import LimitOffsetPagination
+from rest_framework.utils.urls import remove_query_param, replace_query_param
 
 
 from netbox.api.exceptions import QuerySetNotOrdered
 from netbox.api.exceptions import QuerySetNotOrdered
 from netbox.config import get_config
 from netbox.config import get_config
 
 
 
 
-class OptionalLimitOffsetPagination(LimitOffsetPagination):
+class NetBoxPagination(LimitOffsetPagination):
     """
     """
-    Override the stock paginator to allow setting limit=0 to disable pagination for a request. This returns all objects
-    matching a query, but retains the same format as a paginated request. The limit can only be disabled if
-    MAX_PAGE_SIZE has been set to 0 or None.
+    Provides two mutually exclusive pagination mechanisms: offset-based and cursor-based.
+
+    Offset-based pagination employs `offset` and (optionally) `limit` parameters to page through results following the
+    model's natural order. `offset` indicates the number of results to skip. This provides very human-friendly behavior,
+    but performance can suffer when querying very large data sets due the overhead required to determine the starting
+    point in the database.
+
+    Cursor-based pagination employs `start` and (optionally) `limit` parameters to page through results as ordered by
+    the model's primary key (i.e. `id`). `start` indicates the numeric ID of the first object to return; `limit`
+    indicates the maximum number of objects to return beginning with the specified ID. Objects *must* be ordered by ID
+    to ensure pagination is consistent. This approach is less human-friendly but offers superior performance to
+    offset-based pagination. In cursor mode, `count` is omitted (null) for performance.
+
+    Offset- and cursor-based pagination are mutually exclusive: Only `offset` _or_ `start` is permitted for a request.
+
+    `limit` may be set to zero (`?limit=0`). This returns all objects matching a query, but retains the same format as
+    a paginated request. The limit can only be disabled if `MAX_PAGE_SIZE` has been set to 0 or None.
     """
     """
+    start_query_param = 'start'
+
     def __init__(self):
     def __init__(self):
         self.default_limit = get_config().PAGINATE_COUNT
         self.default_limit = get_config().PAGINATE_COUNT
+        self.start = None
+        self._page_length = 0
+        self._last_pk = None
 
 
     def paginate_queryset(self, queryset, request, view=None):
     def paginate_queryset(self, queryset, request, view=None):
 
 
@@ -22,15 +44,42 @@ class OptionalLimitOffsetPagination(LimitOffsetPagination):
                 "ordering has been applied to the queryset for this API endpoint."
                 "ordering has been applied to the queryset for this API endpoint."
             )
             )
 
 
+        self.start = self.get_start(request)
+        self.limit = self.get_limit(request)
+        self.request = request
+
+        # Cursor-based pagination
+        if self.start is not None:
+            if self.offset_query_param in request.query_params:
+                raise ValidationError(
+                    _("'{start_param}' and '{offset_param}' are mutually exclusive.").format(
+                        start_param=self.start_query_param,
+                        offset_param=self.offset_query_param,
+                    )
+                )
+            if 'ordering' in request.query_params:
+                raise ValidationError(_("Ordering cannot be specified in conjunction with cursor-based pagination."))
+
+            self.count = None
+            self.offset = 0
+
+            queryset = queryset.filter(pk__gte=self.start).order_by('pk')
+            results = list(queryset[:self.limit]) if self.limit else list(queryset)
+
+            self._page_length = len(results)
+            if results:
+                self._last_pk = results[-1].pk if hasattr(results[-1], 'pk') else results[-1]['pk']
+
+            return results
+
+        # Offset-based pagination
         if isinstance(queryset, QuerySet):
         if isinstance(queryset, QuerySet):
             self.count = self.get_queryset_count(queryset)
             self.count = self.get_queryset_count(queryset)
         else:
         else:
             # We're dealing with an iterable, not a QuerySet
             # We're dealing with an iterable, not a QuerySet
             self.count = len(queryset)
             self.count = len(queryset)
 
 
-        self.limit = self.get_limit(request)
         self.offset = self.get_offset(request)
         self.offset = self.get_offset(request)
-        self.request = request
 
 
         if self.limit and self.count > self.limit and self.template is not None:
         if self.limit and self.count > self.limit and self.template is not None:
             self.display_page_controls = True
             self.display_page_controls = True
@@ -42,6 +91,25 @@ class OptionalLimitOffsetPagination(LimitOffsetPagination):
             return list(queryset[self.offset:self.offset + self.limit])
             return list(queryset[self.offset:self.offset + self.limit])
         return list(queryset[self.offset:])
         return list(queryset[self.offset:])
 
 
+    def get_start(self, request):
+        try:
+            value = int(request.query_params[self.start_query_param])
+            if value < 0:
+                raise ValidationError(
+                    _("Invalid '{param}' parameter: must be a non-negative integer.").format(
+                        param=self.start_query_param,
+                    )
+                )
+            return value
+        except KeyError:
+            return None
+        except (ValueError, TypeError):
+            raise ValidationError(
+                _("Invalid '{param}' parameter: must be a non-negative integer.").format(
+                    param=self.start_query_param,
+                )
+            )
+
     def get_limit(self, request):
     def get_limit(self, request):
         max_limit = self.default_limit
         max_limit = self.default_limit
         MAX_PAGE_SIZE = get_config().MAX_PAGE_SIZE
         MAX_PAGE_SIZE = get_config().MAX_PAGE_SIZE
@@ -75,6 +143,16 @@ class OptionalLimitOffsetPagination(LimitOffsetPagination):
         if not self.limit:
         if not self.limit:
             return None
             return None
 
 
+        # Cursor mode
+        if self.start is not None:
+            if self._page_length < self.limit:
+                return None
+            url = self.request.build_absolute_uri()
+            url = replace_query_param(url, self.start_query_param, self._last_pk + 1)
+            url = replace_query_param(url, self.limit_query_param, self.limit)
+            url = remove_query_param(url, self.offset_query_param)
+            return url
+
         return super().get_next_link()
         return super().get_next_link()
 
 
     def get_previous_link(self):
     def get_previous_link(self):
@@ -83,10 +161,30 @@ class OptionalLimitOffsetPagination(LimitOffsetPagination):
         if not self.limit:
         if not self.limit:
             return None
             return None
 
 
-        return super().get_previous_link()
+        # Cursor mode: forward-only
+        if self.start is not None:
+            return None
 
 
+        return super().get_previous_link()
 
 
-class StripCountAnnotationsPaginator(OptionalLimitOffsetPagination):
+    def get_schema_operation_parameters(self, view):
+        parameters = super().get_schema_operation_parameters(view)
+        parameters.append({
+            'name': self.start_query_param,
+            'required': False,
+            'in': 'query',
+            'description': (
+                'Cursor-based pagination: return results with pk >= start, ordered by pk. '
+                'Mutually exclusive with offset.'
+            ),
+            'schema': {
+                'type': 'integer',
+            },
+        })
+        return parameters
+
+
+class StripCountAnnotationsPaginator(NetBoxPagination):
     """
     """
     Strips the annotations on the queryset before getting the count
     Strips the annotations on the queryset before getting the count
     to optimize pagination of complex queries.
     to optimize pagination of complex queries.

+ 1 - 1
netbox/netbox/settings.py

@@ -724,7 +724,7 @@ REST_FRAMEWORK = {
         'rest_framework.filters.OrderingFilter',
         'rest_framework.filters.OrderingFilter',
     ),
     ),
     'DEFAULT_METADATA_CLASS': 'netbox.api.metadata.BulkOperationMetadata',
     'DEFAULT_METADATA_CLASS': 'netbox.api.metadata.BulkOperationMetadata',
-    'DEFAULT_PAGINATION_CLASS': 'netbox.api.pagination.OptionalLimitOffsetPagination',
+    'DEFAULT_PAGINATION_CLASS': 'netbox.api.pagination.NetBoxPagination',
     'DEFAULT_PARSER_CLASSES': (
     'DEFAULT_PARSER_CLASSES': (
         'rest_framework.parsers.JSONParser',
         'rest_framework.parsers.JSONParser',
         'rest_framework.parsers.MultiPartParser',
         'rest_framework.parsers.MultiPartParser',

+ 33 - 2
netbox/netbox/tests/test_api.py

@@ -2,10 +2,11 @@ import uuid
 
 
 from django.test import RequestFactory, TestCase
 from django.test import RequestFactory, TestCase
 from django.urls import reverse
 from django.urls import reverse
+from rest_framework.exceptions import ValidationError
 from rest_framework.request import Request
 from rest_framework.request import Request
 
 
 from netbox.api.exceptions import QuerySetNotOrdered
 from netbox.api.exceptions import QuerySetNotOrdered
-from netbox.api.pagination import OptionalLimitOffsetPagination
+from netbox.api.pagination import NetBoxPagination
 from users.models import Token
 from users.models import Token
 from utilities.testing import APITestCase
 from utilities.testing import APITestCase
 
 
@@ -48,7 +49,7 @@ class AppTest(APITestCase):
 class OptionalLimitOffsetPaginationTest(TestCase):
 class OptionalLimitOffsetPaginationTest(TestCase):
 
 
     def setUp(self):
     def setUp(self):
-        self.paginator = OptionalLimitOffsetPagination()
+        self.paginator = NetBoxPagination()
         self.factory = RequestFactory()
         self.factory = RequestFactory()
 
 
     def _make_drf_request(self, path='/', query_params=None):
     def _make_drf_request(self, path='/', query_params=None):
@@ -80,3 +81,33 @@ class OptionalLimitOffsetPaginationTest(TestCase):
         request = self._make_drf_request()
         request = self._make_drf_request()
 
 
         self.paginator.paginate_queryset(iterable, request)  # Should not raise exception
         self.paginator.paginate_queryset(iterable, request)  # Should not raise exception
+
+    def test_get_start_returns_none_when_absent(self):
+        """get_start() returns None when start param is not in the request"""
+        request = self._make_drf_request()
+        self.assertIsNone(self.paginator.get_start(request))
+
+    def test_get_start_returns_integer(self):
+        """get_start() returns an integer when start param is present"""
+        request = self._make_drf_request(query_params={'start': '42'})
+        self.assertEqual(self.paginator.get_start(request), 42)
+
+    def test_get_start_raises_for_negative(self):
+        """get_start() raises ValidationError for negative values"""
+        request = self._make_drf_request(query_params={'start': '-1'})
+        with self.assertRaises(ValidationError):
+            self.paginator.get_start(request)
+
+    def test_cursor_and_offset_conflict_raises_validation_error(self):
+        """paginate_queryset() raises ValidationError when both start and offset are specified"""
+        queryset = Token.objects.all().order_by('created')
+        request = self._make_drf_request(query_params={'start': '1', 'offset': '10'})
+        with self.assertRaises(ValidationError):
+            self.paginator.paginate_queryset(queryset, request)
+
+    def test_cursor_and_ordering_conflict_raises_validation_error(self):
+        """paginate_queryset() raises ValidationError when both start and ordering are specified"""
+        queryset = Token.objects.all().order_by('created')
+        request = self._make_drf_request(query_params={'start': '1', 'ordering': 'created'})
+        with self.assertRaises(ValidationError):
+            self.paginator.paginate_queryset(queryset, request)

+ 110 - 0
netbox/utilities/tests/test_api.py

@@ -187,6 +187,116 @@ class APIPaginationTestCase(APITestCase):
         self.assertIsNone(response.data['previous'])
         self.assertIsNone(response.data['previous'])
         self.assertEqual(len(response.data['results']), 100)
         self.assertEqual(len(response.data['results']), 100)
 
 
+    def test_cursor_pagination(self):
+        """Basic cursor pagination returns results ordered by PK with correct next link."""
+        first_pk = Site.objects.order_by('pk').values_list('pk', flat=True).first()
+        response = self.client.get(f'{self.url}?start={first_pk}&limit=10', format='json', **self.header)
+
+        self.assertHttpStatus(response, status.HTTP_200_OK)
+        self.assertIsNone(response.data['count'])
+        self.assertIsNone(response.data['previous'])
+        self.assertEqual(len(response.data['results']), 10)
+
+        # Results should be ordered by PK
+        pks = [r['id'] for r in response.data['results']]
+        self.assertEqual(pks, sorted(pks))
+
+        # Next link should use start parameter
+        last_pk = pks[-1]
+        self.assertIn(f'start={last_pk + 1}', response.data['next'])
+        self.assertIn('limit=10', response.data['next'])
+
+    def test_cursor_pagination_last_page(self):
+        """Cursor pagination returns null next link when fewer results than limit."""
+        last_pk = Site.objects.order_by('pk').values_list('pk', flat=True).last()
+        response = self.client.get(f'{self.url}?start={last_pk}&limit=10', format='json', **self.header)
+
+        self.assertHttpStatus(response, status.HTTP_200_OK)
+        self.assertEqual(len(response.data['results']), 1)
+        self.assertIsNone(response.data['next'])
+        self.assertIsNone(response.data['previous'])
+
+    def test_cursor_pagination_no_results(self):
+        """Cursor pagination beyond all PKs returns empty results."""
+        max_pk = Site.objects.order_by('pk').values_list('pk', flat=True).last()
+        response = self.client.get(f'{self.url}?start={max_pk + 1000}&limit=10', format='json', **self.header)
+
+        self.assertHttpStatus(response, status.HTTP_200_OK)
+        self.assertEqual(len(response.data['results']), 0)
+        self.assertIsNone(response.data['next'])
+
+    def test_cursor_and_offset_conflict(self):
+        """Specifying both start and offset returns a 400 error."""
+        with disable_warnings('django.request'):
+            response = self.client.get(f'{self.url}?start=1&offset=10', format='json', **self.header)
+        self.assertHttpStatus(response, status.HTTP_400_BAD_REQUEST)
+
+    def test_cursor_and_ordering_conflict(self):
+        """Specifying both start and ordering returns a 400 error."""
+        with disable_warnings('django.request'):
+            response = self.client.get(f'{self.url}?start=1&ordering=name', format='json', **self.header)
+        self.assertHttpStatus(response, status.HTTP_400_BAD_REQUEST)
+
+    def test_cursor_negative_start(self):
+        """Negative start value returns a 400 error."""
+        with disable_warnings('django.request'):
+            response = self.client.get(f'{self.url}?start=-1', format='json', **self.header)
+        self.assertHttpStatus(response, status.HTTP_400_BAD_REQUEST)
+
+    def test_cursor_with_filters(self):
+        """Cursor pagination works alongside other query filters."""
+        response = self.client.get(f'{self.url}?start=0&limit=10&name=Site 1', format='json', **self.header)
+
+        self.assertHttpStatus(response, status.HTTP_200_OK)
+        self.assertIsNone(response.data['count'])
+        results = response.data['results']
+        self.assertEqual(len(results), 1)
+        self.assertEqual(results[0]['name'], 'Site 1')
+
+    def test_offset_multi_page_traversal(self):
+        """Traverse all 100 objects using offset pagination and verify complete, non-overlapping coverage."""
+        collected_pks = []
+        url = f'{self.url}?limit=10'
+
+        while url:
+            response = self.client.get(url, format='json', **self.header)
+            self.assertHttpStatus(response, status.HTTP_200_OK)
+            self.assertEqual(response.data['count'], 100)
+            collected_pks.extend(r['id'] for r in response.data['results'])
+            url = response.data['next']
+
+        # Should have collected exactly 100 unique objects
+        self.assertEqual(len(set(collected_pks)), 100)
+
+    def test_cursor_multi_page_traversal(self):
+        """Traverse all 100 objects using cursor pagination and verify complete, non-overlapping coverage."""
+        collected_pks = []
+        first_pk = Site.objects.order_by('pk').values_list('pk', flat=True).first()
+        url = f'{self.url}?start={first_pk}&limit=10'
+
+        while url:
+            response = self.client.get(url, format='json', **self.header)
+            self.assertHttpStatus(response, status.HTTP_200_OK)
+            self.assertIsNone(response.data['count'])
+            self.assertIsNone(response.data['previous'])
+
+            page_pks = [r['id'] for r in response.data['results']]
+
+            # Each page should be ordered by PK
+            self.assertEqual(page_pks, sorted(page_pks))
+
+            # No overlap with previously collected PKs
+            self.assertFalse(set(page_pks) & set(collected_pks))
+
+            collected_pks.extend(page_pks)
+            url = response.data['next']
+
+        # Should have collected exactly 100 unique objects
+        self.assertEqual(len(set(collected_pks)), 100)
+
+        # Full result set should be in PK order
+        self.assertEqual(collected_pks, sorted(collected_pks))
+
 
 
 class APIOrderingTestCase(APITestCase):
 class APIOrderingTestCase(APITestCase):
     user_permissions = ('dcim.view_site',)
     user_permissions = ('dcim.view_site',)