Browse Source

Closes #21788: Return CSV export as a streaming response (#21974)

Jeremy Stretch 1 month ago
parent
commit
0563cc4585

+ 12 - 0
docs/configuration/miscellaneous.md

@@ -123,6 +123,18 @@ The maximum size (in bytes) of an incoming HTTP request (i.e. `GET` or `POST` da
 
 
 ---
 ---
 
 
+## STREAMING_EXPORTS
+
+!!! note "This parameter was introduced in NetBox v4.6."
+
+Default: `False`
+
+When set to `True`, CSV bulk exports are returned as a streaming HTTP response, emitting rows to the client as they are rendered rather than buffering the entire dataset in memory first. This can significantly reduce memory usage and time-to-first-byte for very large exports.
+
+Because streaming responses do not have a `Content-Length` header and defer errors until after the response has begun, this behavior is opt-in.
+
+---
+
 ## ENFORCE_GLOBAL_UNIQUE
 ## ENFORCE_GLOBAL_UNIQUE
 
 
 !!! tip "Dynamic Configuration Parameter"
 !!! tip "Dynamic Configuration Parameter"

+ 17 - 2
netbox/dcim/tests/test_views.py

@@ -1,9 +1,12 @@
+import csv
 import json
 import json
 from decimal import Decimal
 from decimal import Decimal
+from io import StringIO
 from zoneinfo import ZoneInfo
 from zoneinfo import ZoneInfo
 
 
 import yaml
 import yaml
 from django.contrib.contenttypes.models import ContentType
 from django.contrib.contenttypes.models import ContentType
+from django.http import StreamingHttpResponse
 from django.test import override_settings, tag
 from django.test import override_settings, tag
 from django.urls import reverse
 from django.urls import reverse
 from netaddr import EUI
 from netaddr import EUI
@@ -1192,6 +1195,7 @@ console-ports:
                 self.assertHttpStatus(response, 200)
                 self.assertHttpStatus(response, 200)
                 self.assertContains(response, "Record 1 console-ports[1]: Must be a dictionary.")
                 self.assertContains(response, "Record 1 console-ports[1]: Must be a dictionary.")
 
 
+    @override_settings(STREAMING_EXPORTS=True)
     def test_export_objects(self):
     def test_export_objects(self):
         url = reverse('dcim:devicetype_list')
         url = reverse('dcim:devicetype_list')
         self.add_permissions('dcim.view_devicetype')
         self.add_permissions('dcim.view_devicetype')
@@ -1204,10 +1208,15 @@ console-ports:
         self.assertEqual(data[0]['manufacturer'], 'Manufacturer 1')
         self.assertEqual(data[0]['manufacturer'], 'Manufacturer 1')
         self.assertEqual(data[0]['model'], 'Device Type 1')
         self.assertEqual(data[0]['model'], 'Device Type 1')
 
 
-        # Test table-based export
+        # Test table-based export (streams row-by-row)
         response = self.client.get(f'{url}?export=table')
         response = self.client.get(f'{url}?export=table')
         self.assertHttpStatus(response, 200)
         self.assertHttpStatus(response, 200)
         self.assertEqual(response.get('Content-Type'), 'text/csv; charset=utf-8')
         self.assertEqual(response.get('Content-Type'), 'text/csv; charset=utf-8')
+        self.assertIsInstance(response, StreamingHttpResponse)
+        content = b''.join(response.streaming_content).decode('utf-8')
+        rows = list(csv.reader(StringIO(content)))
+        self.assertGreater(len(rows), 1)
+        self.assertEqual(len(rows) - 1, DeviceType.objects.count())
 
 
 
 
 class ModuleTypeTestCase(ViewTestCases.PrimaryObjectViewTestCase):
 class ModuleTypeTestCase(ViewTestCases.PrimaryObjectViewTestCase):
@@ -1573,6 +1582,7 @@ module-bays:
         self.assertEqual(mb1.name, 'Module Bay 1')
         self.assertEqual(mb1.name, 'Module Bay 1')
         self.assertEqual(mb1.position, '1')
         self.assertEqual(mb1.position, '1')
 
 
+    @override_settings(STREAMING_EXPORTS=True)
     def test_export_objects(self):
     def test_export_objects(self):
         url = reverse('dcim:moduletype_list')
         url = reverse('dcim:moduletype_list')
         self.add_permissions('dcim.view_moduletype')
         self.add_permissions('dcim.view_moduletype')
@@ -1585,10 +1595,15 @@ module-bays:
         self.assertEqual(data[0]['manufacturer'], 'Manufacturer 1')
         self.assertEqual(data[0]['manufacturer'], 'Manufacturer 1')
         self.assertEqual(data[0]['model'], 'Module Type 1')
         self.assertEqual(data[0]['model'], 'Module Type 1')
 
 
-        # Test table-based export
+        # Test table-based export (streams row-by-row)
         response = self.client.get(f'{url}?export=table')
         response = self.client.get(f'{url}?export=table')
         self.assertHttpStatus(response, 200)
         self.assertHttpStatus(response, 200)
         self.assertEqual(response.get('Content-Type'), 'text/csv; charset=utf-8')
         self.assertEqual(response.get('Content-Type'), 'text/csv; charset=utf-8')
+        self.assertIsInstance(response, StreamingHttpResponse)
+        content = b''.join(response.streaming_content).decode('utf-8')
+        rows = list(csv.reader(StringIO(content)))
+        self.assertGreater(len(rows), 1)
+        self.assertEqual(len(rows) - 1, ModuleType.objects.count())
 
 
 
 
 class ModuleTypeProfileTestCase(ViewTestCases.OrganizationalObjectViewTestCase):
 class ModuleTypeProfileTestCase(ViewTestCases.OrganizationalObjectViewTestCase):

+ 5 - 0
netbox/netbox/configuration_example.py

@@ -140,6 +140,11 @@ EMAIL = {
     'FROM_EMAIL': '',
     'FROM_EMAIL': '',
 }
 }
 
 
+# Return CSV bulk exports as a streaming HTTP response, which avoids buffering the entire dataset in memory before
+# sending it to the client. This is recommended for very large exports, but it alters the response behavior so it is
+# disabled by default.
+STREAMING_EXPORTS = False
+
 # Exempt certain models from the enforcement of view permissions. Models listed here will be viewable by all users and
 # Exempt certain models from the enforcement of view permissions. Models listed here will be viewable by all users and
 # by anonymous users. List models in the form `<app>.<model>`. Add '*' to this list to exempt all models.
 # by anonymous users. List models in the form `<app>.<model>`. Add '*' to this list to exempt all models.
 EXEMPT_VIEW_PERMISSIONS = [
 EXEMPT_VIEW_PERMISSIONS = [

+ 1 - 0
netbox/netbox/settings.py

@@ -121,6 +121,7 @@ DEFAULT_PERMISSIONS = getattr(configuration, 'DEFAULT_PERMISSIONS', {
 DEVELOPER = getattr(configuration, 'DEVELOPER', False)
 DEVELOPER = getattr(configuration, 'DEVELOPER', False)
 DOCS_ROOT = getattr(configuration, 'DOCS_ROOT', os.path.join(os.path.dirname(BASE_DIR), 'docs'))
 DOCS_ROOT = getattr(configuration, 'DOCS_ROOT', os.path.join(os.path.dirname(BASE_DIR), 'docs'))
 EMAIL = getattr(configuration, 'EMAIL', {})
 EMAIL = getattr(configuration, 'EMAIL', {})
+STREAMING_EXPORTS = getattr(configuration, 'STREAMING_EXPORTS', False)
 EVENTS_PIPELINE = getattr(configuration, 'EVENTS_PIPELINE', [
 EVENTS_PIPELINE = getattr(configuration, 'EVENTS_PIPELINE', [
     'extras.events.process_event_queue',
     'extras.events.process_event_queue',
 ])
 ])

+ 13 - 4
netbox/netbox/views/generic/bulk_views.py

@@ -3,6 +3,7 @@ import re
 from collections import Counter
 from collections import Counter
 from copy import deepcopy
 from copy import deepcopy
 
 
+from django.conf import settings
 from django.contrib import messages
 from django.contrib import messages
 from django.contrib.contenttypes.fields import GenericForeignKey, GenericRel
 from django.contrib.contenttypes.fields import GenericForeignKey, GenericRel
 from django.core.exceptions import FieldDoesNotExist, ObjectDoesNotExist, ValidationError
 from django.core.exceptions import FieldDoesNotExist, ObjectDoesNotExist, ValidationError
@@ -25,7 +26,7 @@ from netbox.models.features import ChangeLoggingMixin
 from netbox.object_actions import AddObject, BulkDelete, BulkEdit, BulkExport, BulkImport, BulkRename
 from netbox.object_actions import AddObject, BulkDelete, BulkEdit, BulkExport, BulkImport, BulkRename
 from utilities.error_handlers import handle_protectederror
 from utilities.error_handlers import handle_protectederror
 from utilities.exceptions import AbortRequest, PermissionsViolation
 from utilities.exceptions import AbortRequest, PermissionsViolation
-from utilities.export import TableExport
+from utilities.export import TableExport, stream_table_csv_response
 from utilities.forms import BulkDeleteForm, BulkRenameForm, restrict_form_fields
 from utilities.forms import BulkDeleteForm, BulkRenameForm, restrict_form_fields
 from utilities.forms.bulk_import import BulkImportForm
 from utilities.forms.bulk_import import BulkImportForm
 from utilities.htmx import htmx_partial
 from utilities.htmx import htmx_partial
@@ -103,15 +104,23 @@ class ObjectListView(BaseMultiObjectView, ActionsMixin, TableMixin):
         # those currently visible in the configured table view.
         # those currently visible in the configured table view.
         table._apply_prefetching(columns=[c for c in all_columns if c not in exclude_columns])
         table._apply_prefetching(columns=[c for c in all_columns if c not in exclude_columns])
 
 
+        filename = filename or f'netbox_{self.queryset.model._meta.verbose_name_plural}.csv'
+
+        if settings.STREAMING_EXPORTS:
+            return stream_table_csv_response(
+                table=table,
+                exclude_columns=exclude_columns,
+                filename=filename,
+                delimiter=delimiter,
+            )
+
         exporter = TableExport(
         exporter = TableExport(
             export_format=TableExport.CSV,
             export_format=TableExport.CSV,
             table=table,
             table=table,
             exclude_columns=exclude_columns,
             exclude_columns=exclude_columns,
             delimiter=delimiter,
             delimiter=delimiter,
         )
         )
-        return exporter.response(
-            filename=filename or f'netbox_{self.queryset.model._meta.verbose_name_plural}.csv'
-        )
+        return exporter.response(filename=filename)
 
 
     def export_template(self, template, request):
     def export_template(self, template, request):
         """
         """

+ 66 - 0
netbox/utilities/export.py

@@ -1,12 +1,21 @@
+import csv
+
+from django.http import StreamingHttpResponse
+from django.utils.encoding import force_str
 from django.utils.translation import gettext_lazy as _
 from django.utils.translation import gettext_lazy as _
+from django_tables2.data import TableQuerysetData
 from django_tables2.export import TableExport as TableExport_
 from django_tables2.export import TableExport as TableExport_
+from django_tables2.rows import BoundRow
 
 
 from utilities.constants import CSV_DELIMITERS
 from utilities.constants import CSV_DELIMITERS
 
 
 __all__ = (
 __all__ = (
     'TableExport',
     'TableExport',
+    'stream_table_csv_response',
 )
 )
 
 
+EXPORT_CHUNK_SIZE = 1000
+
 
 
 class TableExport(TableExport_):
 class TableExport(TableExport_):
     """
     """
@@ -24,3 +33,60 @@ class TableExport(TableExport_):
             delimiter = CSV_DELIMITERS[self.delimiter]
             delimiter = CSV_DELIMITERS[self.delimiter]
             return self.dataset.export(self.format, delimiter=delimiter)
             return self.dataset.export(self.format, delimiter=delimiter)
         return super().export()
         return super().export()
+
+
+class _EchoBuffer:
+    """
+    File-like object whose write() simply returns the value written, so csv.writer output can be
+    captured row-by-row and fed to a StreamingHttpResponse.
+    """
+    def write(self, value):
+        return value
+
+
+def stream_table_csv_response(table, exclude_columns=None, filename=None, delimiter=None):
+    """
+    Return a StreamingHttpResponse that emits the given table's rows as CSV without first buffering
+    the entire dataset in memory. Queryset-backed tables are iterated in chunks using
+    QuerySet.iterator() to cap peak memory; prefetched relations are preserved under Django 4.1+.
+
+    Args:
+        table: The django-tables2 Table instance to export
+        exclude_columns: Iterable of column names to omit from the export
+        filename: If set, a Content-Disposition header is included in the HTTP response, indicating its treatment
+            as a file attachment with the specified name.
+        delimiter: Name of a delimiter in utilities.constants.CSV_DELIMITERS (defaults to 'comma')
+    """
+    if delimiter and delimiter not in CSV_DELIMITERS:
+        raise ValueError(_("Invalid delimiter name: {name}").format(name=delimiter))
+    csv_delimiter = CSV_DELIMITERS[delimiter or 'comma']
+    exclude_columns = exclude_columns or set()
+
+    columns = [
+        column for column in table.columns.iterall()
+        if not (column.column.exclude_from_export or column.name in exclude_columns)
+    ]
+
+    writer = csv.writer(_EchoBuffer(), delimiter=csv_delimiter)
+
+    def iter_records():
+        if isinstance(table.data, TableQuerysetData):
+            yield from table.data.data.iterator(chunk_size=EXPORT_CHUNK_SIZE)
+        else:
+            yield from table.data
+
+    def row_generator():
+        yield writer.writerow([
+            force_str(column.header, strings_only=True) for column in columns
+        ])
+        for record in iter_records():
+            row = BoundRow(record, table=table)
+            yield writer.writerow([
+                force_str(row.get_cell_value(column.name), strings_only=True)
+                for column in columns
+            ])
+
+    response = StreamingHttpResponse(row_generator(), content_type='text/csv; charset=utf-8')
+    if filename is not None:
+        response['Content-Disposition'] = f'attachment; filename="{filename}"'
+    return response