Ver Fonte

Closes #22526: Avoid query timeouts when updating custom fields on a large number of objects (#22556)

Jeremy Stretch há 1 semana atrás
pai
commit
68c1153325

+ 8 - 0
netbox/extras/constants.py

@@ -6,6 +6,14 @@ from extras.choices import LogLevelChoices
 # Custom fields
 # Custom fields
 CUSTOMFIELD_EMPTY_VALUES = (None, '', [])
 CUSTOMFIELD_EMPTY_VALUES = (None, '', [])
 
 
+# Maximum number of objects to update per query when provisioning, removing, or renaming custom
+# field data. Bounding the number of rows touched by each statement prevents very large tables from
+# exceeding the database statement timeout (JSONB updates rewrite each affected row). This value
+# sits at the throughput "knee": benchmarking jsonb_set() across a 1M-row table showed throughput
+# plateaus by ~5K rows/statement (raising it further yields no meaningful speedup), while keeping
+# each statement orders of magnitude below a typical statement timeout.
+CUSTOMFIELD_DATA_BATCH_SIZE = 5000
+
 # ImageAttachment
 # ImageAttachment
 IMAGE_ATTACHMENT_IMAGE_FORMATS = {
 IMAGE_ATTACHMENT_IMAGE_FORMATS = {
     'avif': 'image/avif',
     'avif': 'image/avif',

+ 52 - 20
netbox/extras/models/customfields.py

@@ -8,7 +8,7 @@ import jsonschema
 from django import forms
 from django import forms
 from django.conf import settings
 from django.conf import settings
 from django.core.validators import RegexValidator, ValidationError
 from django.core.validators import RegexValidator, ValidationError
-from django.db import models
+from django.db import models, transaction
 from django.db.models import F, Func, Value
 from django.db.models import F, Func, Value
 from django.db.models.expressions import RawSQL
 from django.db.models.expressions import RawSQL
 from django.urls import reverse
 from django.urls import reverse
@@ -19,6 +19,7 @@ from jsonschema.exceptions import ValidationError as JSONValidationError
 
 
 from core.models import ObjectType
 from core.models import ObjectType
 from extras.choices import *
 from extras.choices import *
+from extras.constants import CUSTOMFIELD_DATA_BATCH_SIZE
 from extras.data import CHOICE_SETS
 from extras.data import CHOICE_SETS
 from extras.fields import ChoiceSetField
 from extras.fields import ChoiceSetField
 from netbox.context import query_cache
 from netbox.context import query_cache
@@ -322,6 +323,32 @@ class CustomField(CloningMixin, ExportTemplatesMixin, OwnerMixin, ChangeLoggedMo
             return self.choice_set.get_choice_color(value)
             return self.choice_set.get_choice_color(value)
         return None
         return None
 
 
+    @staticmethod
+    def _update_object_data(model, **update_kwargs):
+        """
+        Apply an UPDATE to the custom_field_data of every instance of the given model in batches,
+        bounding the number of rows touched by each statement. A single unbounded UPDATE across
+        millions of rows can exceed the database statement timeout, because JSONB updates rewrite
+        each affected row in full. Batches are selected via keyset pagination on the primary key.
+
+        The batched updates are wrapped in a transaction so that the operation remains atomic, as
+        it was when performed by a single UPDATE. This guards against partially-applied data (e.g.
+        a renamed field landing on only some objects) should the loop be interrupted when not
+        already running inside a request's transaction. Batching avoids the statement timeout
+        regardless, as that limit applies per statement rather than per transaction.
+        """
+        with transaction.atomic():
+            last_pk = 0
+            while True:
+                pks = list(
+                    model.objects.filter(pk__gt=last_pk).order_by('pk')
+                    .values_list('pk', flat=True)[:CUSTOMFIELD_DATA_BATCH_SIZE]
+                )
+                if not pks:
+                    break
+                model.objects.filter(pk__in=pks).update(**update_kwargs)
+                last_pk = pks[-1]
+
     def populate_initial_data(self, content_types):
     def populate_initial_data(self, content_types):
         """
         """
         Populate initial custom field data upon either a) the creation of a new CustomField, or
         Populate initial custom field data upon either a) the creation of a new CustomField, or
@@ -333,14 +360,16 @@ class CustomField(CloningMixin, ExportTemplatesMixin, OwnerMixin, ChangeLoggedMo
         else:
         else:
             value = Value(self.default, models.JSONField())
             value = Value(self.default, models.JSONField())
         for ct in content_types:
         for ct in content_types:
-            ct.model_class().objects.update(
-                custom_field_data=Func(
-                    F('custom_field_data'),
-                    Value([self.name]),
-                    value,
-                    function='jsonb_set'
+            if model := ct.model_class():
+                self._update_object_data(
+                    model,
+                    custom_field_data=Func(
+                        F('custom_field_data'),
+                        Value([self.name]),
+                        value,
+                        function='jsonb_set'
+                    )
                 )
                 )
-            )
 
 
     def remove_stale_data(self, content_types):
     def remove_stale_data(self, content_types):
         """
         """
@@ -349,7 +378,8 @@ class CustomField(CloningMixin, ExportTemplatesMixin, OwnerMixin, ChangeLoggedMo
         """
         """
         for ct in content_types:
         for ct in content_types:
             if model := ct.model_class():
             if model := ct.model_class():
-                model.objects.update(
+                self._update_object_data(
+                    model,
                     custom_field_data=F('custom_field_data') - self.name
                     custom_field_data=F('custom_field_data') - self.name
                 )
                 )
 
 
@@ -359,17 +389,19 @@ class CustomField(CloningMixin, ExportTemplatesMixin, OwnerMixin, ChangeLoggedMo
         one, copying the value of the old key.
         one, copying the value of the old key.
         """
         """
         for ct in self.object_types.all():
         for ct in self.object_types.all():
-            ct.model_class().objects.update(
-                custom_field_data=Func(
-                    F('custom_field_data') - old_name,
-                    Value([new_name]),
-                    Func(
-                        F('custom_field_data'),
-                        function='jsonb_extract_path_text',
-                        template=f"to_jsonb(%(expressions)s -> '{old_name}')"
-                    ),
-                    function='jsonb_set')
-            )
+            if model := ct.model_class():
+                self._update_object_data(
+                    model,
+                    custom_field_data=Func(
+                        F('custom_field_data') - old_name,
+                        Value([new_name]),
+                        Func(
+                            F('custom_field_data'),
+                            function='jsonb_extract_path_text',
+                            template=f"to_jsonb(%(expressions)s -> '{old_name}')"
+                        ),
+                        function='jsonb_set')
+                )
 
 
     def clean(self):
     def clean(self):
         super().clean()
         super().clean()

+ 42 - 0
netbox/extras/tests/test_customfields.py

@@ -1,6 +1,7 @@
 import datetime
 import datetime
 import json
 import json
 from decimal import Decimal
 from decimal import Decimal
+from unittest.mock import patch
 
 
 from django.core.exceptions import ValidationError
 from django.core.exceptions import ValidationError
 from django.test import tag
 from django.test import tag
@@ -628,6 +629,47 @@ class CustomFieldTestCase(TestCase):
         self.assertNotIn('field1', site.custom_field_data)
         self.assertNotIn('field1', site.custom_field_data)
         self.assertEqual(site.custom_field_data['field2'], FIELD_DATA)
         self.assertEqual(site.custom_field_data['field2'], FIELD_DATA)
 
 
+    @patch('extras.models.customfields.CUSTOMFIELD_DATA_BATCH_SIZE', 2)
+    def test_batched_object_data_updates(self):
+        """
+        Provisioning, renaming, and removing custom field data is applied in batches. Use a small
+        batch size to ensure the data on every object is updated across multiple batches.
+        """
+        # The existing sites (created in setUpTestData) span multiple batches of size 2
+        site_count = Site.objects.count()
+        self.assertGreater(site_count, 2)
+
+        # Provisioning: a default value is populated onto every existing object
+        cf = CustomField.objects.create(
+            name='batched_field',
+            type=CustomFieldTypeChoices.TYPE_TEXT,
+            default='foo'
+        )
+        cf.object_types.set([self.object_type])
+        self.assertEqual(
+            Site.objects.filter(custom_field_data__batched_field='foo').count(),
+            site_count
+        )
+
+        # Renaming: the key is renamed on every existing object, preserving its value
+        cf.name = 'renamed_field'
+        cf.save()
+        self.assertEqual(
+            Site.objects.filter(custom_field_data__renamed_field='foo').count(),
+            site_count
+        )
+        self.assertEqual(
+            Site.objects.filter(custom_field_data__has_key='batched_field').count(),
+            0
+        )
+
+        # Removal: the key is stripped from every existing object when the field is deleted
+        cf.delete()
+        self.assertEqual(
+            Site.objects.filter(custom_field_data__has_key='renamed_field').count(),
+            0
+        )
+
     def test_default_value_validation(self):
     def test_default_value_validation(self):
         choiceset = CustomFieldChoiceSet.objects.create(
         choiceset = CustomFieldChoiceSet.objects.create(
             name="Test Choice Set",
             name="Test Choice Set",