فهرست منبع

Closes #22526: Avoid query timeouts when updating custom fields on a large number of objects (#22556)

Jeremy Stretch 1 هفته پیش
والد
کامیت
68c1153325
3فایلهای تغییر یافته به همراه102 افزوده شده و 20 حذف شده
  1. 8 0
      netbox/extras/constants.py
  2. 52 20
      netbox/extras/models/customfields.py
  3. 42 0
      netbox/extras/tests/test_customfields.py

+ 8 - 0
netbox/extras/constants.py

@@ -6,6 +6,14 @@ from extras.choices import LogLevelChoices
 # Custom fields
 CUSTOMFIELD_EMPTY_VALUES = (None, '', [])
 
+# Maximum number of objects to update per query when provisioning, removing, or renaming custom
+# field data. Bounding the number of rows touched by each statement prevents very large tables from
+# exceeding the database statement timeout (JSONB updates rewrite each affected row). This value
+# sits at the throughput "knee": benchmarking jsonb_set() across a 1M-row table showed throughput
+# plateaus by ~5K rows/statement (raising it further yields no meaningful speedup), while keeping
+# each statement orders of magnitude below a typical statement timeout.
+CUSTOMFIELD_DATA_BATCH_SIZE = 5000
+
 # ImageAttachment
 IMAGE_ATTACHMENT_IMAGE_FORMATS = {
     'avif': 'image/avif',

+ 52 - 20
netbox/extras/models/customfields.py

@@ -8,7 +8,7 @@ import jsonschema
 from django import forms
 from django.conf import settings
 from django.core.validators import RegexValidator, ValidationError
-from django.db import models
+from django.db import models, transaction
 from django.db.models import F, Func, Value
 from django.db.models.expressions import RawSQL
 from django.urls import reverse
@@ -19,6 +19,7 @@ from jsonschema.exceptions import ValidationError as JSONValidationError
 
 from core.models import ObjectType
 from extras.choices import *
+from extras.constants import CUSTOMFIELD_DATA_BATCH_SIZE
 from extras.data import CHOICE_SETS
 from extras.fields import ChoiceSetField
 from netbox.context import query_cache
@@ -322,6 +323,32 @@ class CustomField(CloningMixin, ExportTemplatesMixin, OwnerMixin, ChangeLoggedMo
             return self.choice_set.get_choice_color(value)
         return None
 
+    @staticmethod
+    def _update_object_data(model, **update_kwargs):
+        """
+        Apply an UPDATE to the custom_field_data of every instance of the given model in batches,
+        bounding the number of rows touched by each statement. A single unbounded UPDATE across
+        millions of rows can exceed the database statement timeout, because JSONB updates rewrite
+        each affected row in full. Batches are selected via keyset pagination on the primary key.
+
+        The batched updates are wrapped in a transaction so that the operation remains atomic, as
+        it was when performed by a single UPDATE. This guards against partially-applied data (e.g.
+        a renamed field landing on only some objects) should the loop be interrupted when not
+        already running inside a request's transaction. Batching avoids the statement timeout
+        regardless, as that limit applies per statement rather than per transaction.
+        """
+        with transaction.atomic():
+            last_pk = 0
+            while True:
+                pks = list(
+                    model.objects.filter(pk__gt=last_pk).order_by('pk')
+                    .values_list('pk', flat=True)[:CUSTOMFIELD_DATA_BATCH_SIZE]
+                )
+                if not pks:
+                    break
+                model.objects.filter(pk__in=pks).update(**update_kwargs)
+                last_pk = pks[-1]
+
     def populate_initial_data(self, content_types):
         """
         Populate initial custom field data upon either a) the creation of a new CustomField, or
@@ -333,14 +360,16 @@ class CustomField(CloningMixin, ExportTemplatesMixin, OwnerMixin, ChangeLoggedMo
         else:
             value = Value(self.default, models.JSONField())
         for ct in content_types:
-            ct.model_class().objects.update(
-                custom_field_data=Func(
-                    F('custom_field_data'),
-                    Value([self.name]),
-                    value,
-                    function='jsonb_set'
+            if model := ct.model_class():
+                self._update_object_data(
+                    model,
+                    custom_field_data=Func(
+                        F('custom_field_data'),
+                        Value([self.name]),
+                        value,
+                        function='jsonb_set'
+                    )
                 )
-            )
 
     def remove_stale_data(self, content_types):
         """
@@ -349,7 +378,8 @@ class CustomField(CloningMixin, ExportTemplatesMixin, OwnerMixin, ChangeLoggedMo
         """
         for ct in content_types:
             if model := ct.model_class():
-                model.objects.update(
+                self._update_object_data(
+                    model,
                     custom_field_data=F('custom_field_data') - self.name
                 )
 
@@ -359,17 +389,19 @@ class CustomField(CloningMixin, ExportTemplatesMixin, OwnerMixin, ChangeLoggedMo
         one, copying the value of the old key.
         """
         for ct in self.object_types.all():
-            ct.model_class().objects.update(
-                custom_field_data=Func(
-                    F('custom_field_data') - old_name,
-                    Value([new_name]),
-                    Func(
-                        F('custom_field_data'),
-                        function='jsonb_extract_path_text',
-                        template=f"to_jsonb(%(expressions)s -> '{old_name}')"
-                    ),
-                    function='jsonb_set')
-            )
+            if model := ct.model_class():
+                self._update_object_data(
+                    model,
+                    custom_field_data=Func(
+                        F('custom_field_data') - old_name,
+                        Value([new_name]),
+                        Func(
+                            F('custom_field_data'),
+                            function='jsonb_extract_path_text',
+                            template=f"to_jsonb(%(expressions)s -> '{old_name}')"
+                        ),
+                        function='jsonb_set')
+                )
 
     def clean(self):
         super().clean()

+ 42 - 0
netbox/extras/tests/test_customfields.py

@@ -1,6 +1,7 @@
 import datetime
 import json
 from decimal import Decimal
+from unittest.mock import patch
 
 from django.core.exceptions import ValidationError
 from django.test import tag
@@ -628,6 +629,47 @@ class CustomFieldTestCase(TestCase):
         self.assertNotIn('field1', site.custom_field_data)
         self.assertEqual(site.custom_field_data['field2'], FIELD_DATA)
 
+    @patch('extras.models.customfields.CUSTOMFIELD_DATA_BATCH_SIZE', 2)
+    def test_batched_object_data_updates(self):
+        """
+        Provisioning, renaming, and removing custom field data is applied in batches. Use a small
+        batch size to ensure the data on every object is updated across multiple batches.
+        """
+        # The existing sites (created in setUpTestData) span multiple batches of size 2
+        site_count = Site.objects.count()
+        self.assertGreater(site_count, 2)
+
+        # Provisioning: a default value is populated onto every existing object
+        cf = CustomField.objects.create(
+            name='batched_field',
+            type=CustomFieldTypeChoices.TYPE_TEXT,
+            default='foo'
+        )
+        cf.object_types.set([self.object_type])
+        self.assertEqual(
+            Site.objects.filter(custom_field_data__batched_field='foo').count(),
+            site_count
+        )
+
+        # Renaming: the key is renamed on every existing object, preserving its value
+        cf.name = 'renamed_field'
+        cf.save()
+        self.assertEqual(
+            Site.objects.filter(custom_field_data__renamed_field='foo').count(),
+            site_count
+        )
+        self.assertEqual(
+            Site.objects.filter(custom_field_data__has_key='batched_field').count(),
+            0
+        )
+
+        # Removal: the key is stripped from every existing object when the field is deleted
+        cf.delete()
+        self.assertEqual(
+            Site.objects.filter(custom_field_data__has_key='renamed_field').count(),
+            0
+        )
+
     def test_default_value_validation(self):
         choiceset = CustomFieldChoiceSet.objects.create(
             name="Test Choice Set",