Просмотр исходного кода

Closes #21788: Return CSV export as a streaming response (#21974)

Jeremy Stretch 1 месяц назад
Родитель
Сommit
0563cc4585

+ 12 - 0
docs/configuration/miscellaneous.md

@@ -123,6 +123,18 @@ The maximum size (in bytes) of an incoming HTTP request (i.e. `GET` or `POST` da
 
 ---
 
+## STREAMING_EXPORTS
+
+!!! note "This parameter was introduced in NetBox v4.6."
+
+Default: `False`
+
+When set to `True`, CSV bulk exports are returned as a streaming HTTP response, emitting rows to the client as they are rendered rather than buffering the entire dataset in memory first. This can significantly reduce memory usage and time-to-first-byte for very large exports.
+
+Because streaming responses do not have a `Content-Length` header and defer errors until after the response has begun, this behavior is opt-in.
+
+---
+
 ## ENFORCE_GLOBAL_UNIQUE
 
 !!! tip "Dynamic Configuration Parameter"

+ 17 - 2
netbox/dcim/tests/test_views.py

@@ -1,9 +1,12 @@
+import csv
 import json
 from decimal import Decimal
+from io import StringIO
 from zoneinfo import ZoneInfo
 
 import yaml
 from django.contrib.contenttypes.models import ContentType
+from django.http import StreamingHttpResponse
 from django.test import override_settings, tag
 from django.urls import reverse
 from netaddr import EUI
@@ -1192,6 +1195,7 @@ console-ports:
                 self.assertHttpStatus(response, 200)
                 self.assertContains(response, "Record 1 console-ports[1]: Must be a dictionary.")
 
+    @override_settings(STREAMING_EXPORTS=True)
     def test_export_objects(self):
         url = reverse('dcim:devicetype_list')
         self.add_permissions('dcim.view_devicetype')
@@ -1204,10 +1208,15 @@ console-ports:
         self.assertEqual(data[0]['manufacturer'], 'Manufacturer 1')
         self.assertEqual(data[0]['model'], 'Device Type 1')
 
-        # Test table-based export
+        # Test table-based export (streams row-by-row)
         response = self.client.get(f'{url}?export=table')
         self.assertHttpStatus(response, 200)
         self.assertEqual(response.get('Content-Type'), 'text/csv; charset=utf-8')
+        self.assertIsInstance(response, StreamingHttpResponse)
+        content = b''.join(response.streaming_content).decode('utf-8')
+        rows = list(csv.reader(StringIO(content)))
+        self.assertGreater(len(rows), 1)
+        self.assertEqual(len(rows) - 1, DeviceType.objects.count())
 
 
 class ModuleTypeTestCase(ViewTestCases.PrimaryObjectViewTestCase):
@@ -1573,6 +1582,7 @@ module-bays:
         self.assertEqual(mb1.name, 'Module Bay 1')
         self.assertEqual(mb1.position, '1')
 
+    @override_settings(STREAMING_EXPORTS=True)
     def test_export_objects(self):
         url = reverse('dcim:moduletype_list')
         self.add_permissions('dcim.view_moduletype')
@@ -1585,10 +1595,15 @@ module-bays:
         self.assertEqual(data[0]['manufacturer'], 'Manufacturer 1')
         self.assertEqual(data[0]['model'], 'Module Type 1')
 
-        # Test table-based export
+        # Test table-based export (streams row-by-row)
         response = self.client.get(f'{url}?export=table')
         self.assertHttpStatus(response, 200)
         self.assertEqual(response.get('Content-Type'), 'text/csv; charset=utf-8')
+        self.assertIsInstance(response, StreamingHttpResponse)
+        content = b''.join(response.streaming_content).decode('utf-8')
+        rows = list(csv.reader(StringIO(content)))
+        self.assertGreater(len(rows), 1)
+        self.assertEqual(len(rows) - 1, ModuleType.objects.count())
 
 
 class ModuleTypeProfileTestCase(ViewTestCases.OrganizationalObjectViewTestCase):

+ 5 - 0
netbox/netbox/configuration_example.py

@@ -140,6 +140,11 @@ EMAIL = {
     'FROM_EMAIL': '',
 }
 
+# Return CSV bulk exports as a streaming HTTP response, which avoids buffering the entire dataset in memory before
+# sending it to the client. This is recommended for very large exports, but it alters the response behavior so it is
+# disabled by default.
+STREAMING_EXPORTS = False
+
 # Exempt certain models from the enforcement of view permissions. Models listed here will be viewable by all users and
 # by anonymous users. List models in the form `<app>.<model>`. Add '*' to this list to exempt all models.
 EXEMPT_VIEW_PERMISSIONS = [

+ 1 - 0
netbox/netbox/settings.py

@@ -121,6 +121,7 @@ DEFAULT_PERMISSIONS = getattr(configuration, 'DEFAULT_PERMISSIONS', {
 DEVELOPER = getattr(configuration, 'DEVELOPER', False)
 DOCS_ROOT = getattr(configuration, 'DOCS_ROOT', os.path.join(os.path.dirname(BASE_DIR), 'docs'))
 EMAIL = getattr(configuration, 'EMAIL', {})
+STREAMING_EXPORTS = getattr(configuration, 'STREAMING_EXPORTS', False)
 EVENTS_PIPELINE = getattr(configuration, 'EVENTS_PIPELINE', [
     'extras.events.process_event_queue',
 ])

+ 13 - 4
netbox/netbox/views/generic/bulk_views.py

@@ -3,6 +3,7 @@ import re
 from collections import Counter
 from copy import deepcopy
 
+from django.conf import settings
 from django.contrib import messages
 from django.contrib.contenttypes.fields import GenericForeignKey, GenericRel
 from django.core.exceptions import FieldDoesNotExist, ObjectDoesNotExist, ValidationError
@@ -25,7 +26,7 @@ from netbox.models.features import ChangeLoggingMixin
 from netbox.object_actions import AddObject, BulkDelete, BulkEdit, BulkExport, BulkImport, BulkRename
 from utilities.error_handlers import handle_protectederror
 from utilities.exceptions import AbortRequest, PermissionsViolation
-from utilities.export import TableExport
+from utilities.export import TableExport, stream_table_csv_response
 from utilities.forms import BulkDeleteForm, BulkRenameForm, restrict_form_fields
 from utilities.forms.bulk_import import BulkImportForm
 from utilities.htmx import htmx_partial
@@ -103,15 +104,23 @@ class ObjectListView(BaseMultiObjectView, ActionsMixin, TableMixin):
         # those currently visible in the configured table view.
         table._apply_prefetching(columns=[c for c in all_columns if c not in exclude_columns])
 
+        filename = filename or f'netbox_{self.queryset.model._meta.verbose_name_plural}.csv'
+
+        if settings.STREAMING_EXPORTS:
+            return stream_table_csv_response(
+                table=table,
+                exclude_columns=exclude_columns,
+                filename=filename,
+                delimiter=delimiter,
+            )
+
         exporter = TableExport(
             export_format=TableExport.CSV,
             table=table,
             exclude_columns=exclude_columns,
             delimiter=delimiter,
         )
-        return exporter.response(
-            filename=filename or f'netbox_{self.queryset.model._meta.verbose_name_plural}.csv'
-        )
+        return exporter.response(filename=filename)
 
     def export_template(self, template, request):
         """

+ 66 - 0
netbox/utilities/export.py

@@ -1,12 +1,21 @@
+import csv
+
+from django.http import StreamingHttpResponse
+from django.utils.encoding import force_str
 from django.utils.translation import gettext_lazy as _
+from django_tables2.data import TableQuerysetData
 from django_tables2.export import TableExport as TableExport_
+from django_tables2.rows import BoundRow
 
 from utilities.constants import CSV_DELIMITERS
 
 __all__ = (
     'TableExport',
+    'stream_table_csv_response',
 )
 
+EXPORT_CHUNK_SIZE = 1000
+
 
 class TableExport(TableExport_):
     """
@@ -24,3 +33,60 @@ class TableExport(TableExport_):
             delimiter = CSV_DELIMITERS[self.delimiter]
             return self.dataset.export(self.format, delimiter=delimiter)
         return super().export()
+
+
+class _EchoBuffer:
+    """
+    File-like object whose write() simply returns the value written, so csv.writer output can be
+    captured row-by-row and fed to a StreamingHttpResponse.
+    """
+    def write(self, value):
+        return value
+
+
+def stream_table_csv_response(table, exclude_columns=None, filename=None, delimiter=None):
+    """
+    Return a StreamingHttpResponse that emits the given table's rows as CSV without first buffering
+    the entire dataset in memory. Queryset-backed tables are iterated in chunks using
+    QuerySet.iterator() to cap peak memory; prefetched relations are preserved under Django 4.1+.
+
+    Args:
+        table: The django-tables2 Table instance to export
+        exclude_columns: Iterable of column names to omit from the export
+        filename: If set, a Content-Disposition header is included in the HTTP response, indicating its treatment
+            as a file attachment with the specified name.
+        delimiter: Name of a delimiter in utilities.constants.CSV_DELIMITERS (defaults to 'comma')
+    """
+    if delimiter and delimiter not in CSV_DELIMITERS:
+        raise ValueError(_("Invalid delimiter name: {name}").format(name=delimiter))
+    csv_delimiter = CSV_DELIMITERS[delimiter or 'comma']
+    exclude_columns = exclude_columns or set()
+
+    columns = [
+        column for column in table.columns.iterall()
+        if not (column.column.exclude_from_export or column.name in exclude_columns)
+    ]
+
+    writer = csv.writer(_EchoBuffer(), delimiter=csv_delimiter)
+
+    def iter_records():
+        if isinstance(table.data, TableQuerysetData):
+            yield from table.data.data.iterator(chunk_size=EXPORT_CHUNK_SIZE)
+        else:
+            yield from table.data
+
+    def row_generator():
+        yield writer.writerow([
+            force_str(column.header, strings_only=True) for column in columns
+        ])
+        for record in iter_records():
+            row = BoundRow(record, table=table)
+            yield writer.writerow([
+                force_str(row.get_cell_value(column.name), strings_only=True)
+                for column in columns
+            ])
+
+    response = StreamingHttpResponse(row_generator(), content_type='text/csv; charset=utf-8')
+    if filename is not None:
+        response['Content-Disposition'] = f'attachment; filename="{filename}"'
+    return response