Просмотр исходного кода

Closes #11163: Auto-detect data format during bulk import

jeremystretch 3 лет назад
Родитель
Сommit
80ced6b782

+ 2 - 2
docs/release-notes/version-3.4.md

@@ -39,9 +39,9 @@ can be saved and applied to future queries as
 
 Saved filters can be kept private, or shared among NetBox users. They can be applied to both UI and REST API searches.
 
-### JSON/YAML Bulk Imports ([#4347](https://github.com/netbox-community/netbox/issues/4347))
+#### JSON/YAML Bulk Imports ([#4347](https://github.com/netbox-community/netbox/issues/4347))
 
-NetBox's bulk import feature, which was previously limited to CSV-formatted data for most types of objects, has been extended to accept data formatted in JSON or YAML as well. This enables users to directly import objects from a variety of sources without needing to first convert data to CSV.
+NetBox's bulk import feature, which was previously limited to CSV-formatted data for most types of objects, has been extended to accept data formatted in JSON or YAML as well. This enables users to directly import objects from a variety of sources without needing to first convert data to CSV. NetBox will attempt to automatically determine the format of import data if not specified by the user.
 
 #### Update Existing Objects via Bulk Import ([#7961](https://github.com/netbox-community/netbox/issues/7961))
 

+ 0 - 1
netbox/netbox/forms/base.py

@@ -1,6 +1,5 @@
 from django import forms
 from django.contrib.contenttypes.models import ContentType
-from django.core.exceptions import ValidationError
 from django.db.models import Q
 from django.utils.translation import gettext as _
 

+ 2 - 0
netbox/utilities/choices.py

@@ -204,11 +204,13 @@ class ButtonColorChoices(ChoiceSet):
 #
 
 class ImportFormatChoices(ChoiceSet):
+    AUTO = 'auto'
     CSV = 'csv'
     JSON = 'json'
     YAML = 'yaml'
 
     CHOICES = [
+        (AUTO, 'Auto-detect'),
         (CSV, 'CSV'),
         (JSON, 'JSON'),
         (YAML, 'YAML'),

+ 40 - 18
netbox/utilities/forms/forms.py

@@ -163,10 +163,9 @@ class ImportForm(BootstrapMixin, forms.Form):
         label="Data file",
         required=False
     )
-    # TODO: Enable auto-detection of format
     format = forms.ChoiceField(
         choices=ImportFormatChoices,
-        initial=ImportFormatChoices.CSV,
+        initial=ImportFormatChoices.AUTO,
         widget=StaticSelect()
     )
 
@@ -174,7 +173,6 @@ class ImportForm(BootstrapMixin, forms.Form):
 
     def clean(self):
         super().clean()
-        format = self.cleaned_data['format']
 
         # Determine whether we're reading from form data or an uploaded file
         if self.cleaned_data['data'] and self.cleaned_data['data_file']:
@@ -186,6 +184,12 @@ class ImportForm(BootstrapMixin, forms.Form):
         else:
             data = self.cleaned_data['data']
 
+        # Determine the data format
+        if self.cleaned_data['format'] == ImportFormatChoices.AUTO:
+            format = self._detect_format(data)
+        else:
+            format = self.cleaned_data['format']
+
         # Process data according to the selected format
         if format == ImportFormatChoices.CSV:
             self.cleaned_data['data'] = self._clean_csv(data)
@@ -194,7 +198,28 @@ class ImportForm(BootstrapMixin, forms.Form):
         elif format == ImportFormatChoices.YAML:
             self.cleaned_data['data'] = self._clean_yaml(data)
 
+    def _detect_format(self, data):
+        """
+        Attempt to automatically detect the format (CSV, JSON, or YAML) of the given data, or raise
+        a ValidationError.
+        """
+        try:
+            if data[0] in ('{', '['):
+                return ImportFormatChoices.JSON
+            if data.startswith('---') or data.startswith('- '):
+                return ImportFormatChoices.YAML
+            if ',' in data.split('\n', 1)[0]:
+                return ImportFormatChoices.CSV
+        except IndexError:
+            pass
+        raise forms.ValidationError({
+            'format': _('Unable to detect data format. Please specify.')
+        })
+
     def _clean_csv(self, data):
+        """
+        Clean CSV-formatted data. The first row will be treated as column headers.
+        """
         stream = StringIO(data.strip())
         reader = csv.reader(stream)
         headers, records = parse_csv(reader)
@@ -205,6 +230,9 @@ class ImportForm(BootstrapMixin, forms.Form):
         return records
 
     def _clean_json(self, data):
+        """
+        Clean JSON-formatted data. If only a single object is defined, it will be encapsulated as a list.
+        """
         try:
             data = json.loads(data)
             # Accommodate for users entering single objects
@@ -217,30 +245,24 @@ class ImportForm(BootstrapMixin, forms.Form):
             })
 
     def _clean_yaml(self, data):
+        """
+        Clean YAML-formatted data. Data must be either
+          a) A single document comprising a list of dictionaries (each representing an object), or
+          b) Multiple documents, separated with the '---' token
+        """
         records = []
         try:
             for data in yaml.load_all(data, Loader=yaml.SafeLoader):
-                # checks here are to support both arrays and multiple documents in
-                # yaml data and return as a consistent list for processing (array):
-                #     - address: 10.0.1.0/24
-                #       status: active
-                #     - address: 10.0.1.1/24
-                #       status: active
-                # vs (multi-document):
-                #     - address: 10.0.1.0/24
-                #       status: active
-                #     ---
-                #     - address: 10.0.1.1/24
-                #       status: active
-                # device_type output uses multi-document format, but array format
-                # is more common output from other tools.
                 if type(data) == list:
                     records.extend(data)
                 elif type(data) == dict:
                     records.append(data)
                 else:
                     raise forms.ValidationError({
-                        self.data_field: "Invalid YAML data: data must be dictionaries or lists of dictionaries"
+                        self.data_field: _(
+                            "Invalid YAML data. Data must be in the form of multiple documents, or a single document "
+                            "comprising a list of dictionaries."
+                        )
                     })
         except yaml.error.YAMLError as err:
             raise forms.ValidationError({

+ 49 - 0
netbox/utilities/tests/test_forms.py

@@ -2,6 +2,8 @@ from django import forms
 from django.test import TestCase
 
 from ipam.forms import IPAddressImportForm
+from utilities.choices import ImportFormatChoices
+from utilities.forms import ImportForm
 from utilities.forms.fields import CSVDataField
 from utilities.forms.utils import expand_alphanumeric_pattern, expand_ipaddress_pattern
 
@@ -365,3 +367,50 @@ class CSVDataFieldTest(TestCase):
         """
         with self.assertRaises(forms.ValidationError):
             self.field.clean(input)
+
+
+class ImportFormTest(TestCase):
+
+    def test_format_detection(self):
+        form = ImportForm()
+
+        data = (
+            "a,b,c\n"
+            "1,2,3\n"
+            "4,5,6\n"
+        )
+        self.assertEqual(form._detect_format(data), ImportFormatChoices.CSV)
+
+        data = '{"a": 1, "b": 2, "c": 3"}'
+        self.assertEqual(form._detect_format(data), ImportFormatChoices.JSON)
+
+        data = '[{"a": 1, "b": 2, "c": 3"}, {"a": 4, "b": 5, "c": 6"}]'
+        self.assertEqual(form._detect_format(data), ImportFormatChoices.JSON)
+
+        data = (
+            "- a: 1\n"
+            "  b: 2\n"
+            "  c: 3\n"
+            "- a: 4\n"
+            "  b: 5\n"
+            "  c: 6\n"
+        )
+        self.assertEqual(form._detect_format(data), ImportFormatChoices.YAML)
+
+        data = (
+            "---\n"
+            "a: 1\n"
+            "b: 2\n"
+            "c: 3\n"
+            "---\n"
+            "a: 4\n"
+            "b: 5\n"
+            "c: 6\n"
+        )
+        self.assertEqual(form._detect_format(data), ImportFormatChoices.YAML)
+
+        # Invalid data
+        with self.assertRaises(forms.ValidationError):
+            form._detect_format('')
+        with self.assertRaises(forms.ValidationError):
+            form._detect_format('?')