Explorar el Código

Adds csv dialect detection to bulk import view (#13563)

* adds csv dialect detection to bulk import view #13239

* adds sane delimiters for dialect detection #13239

* adds csv delimiter tests #13239

* adds csv delimiter on the form

* pass delimiter to clean_csv method #13239

* fix tests for csv import #13239

* fix tests for csv import #13239

* fix tests for csv import #13239

* fix tests for csv import #13239

* Improve auto-detection of import data format

* Misc cleanup

* Include tab as a supported delimiting character for auto-detection

* Move delimiting chars to a separate constant for easy reference

---------

Co-authored-by: Jeremy Stretch <jstretch@netboxlabs.com>
Abhimanyu Saharan hace 2 años
padre
commit
b7cfb2f7d9

+ 4 - 1
netbox/dcim/tests/test_views.py

@@ -17,7 +17,7 @@ from dcim.constants import *
 from dcim.models import *
 from ipam.models import ASN, RIR, VLAN, VRF
 from tenancy.models import Tenant
-from utilities.choices import ImportFormatChoices
+from utilities.choices import CSVDelimiterChoices, ImportFormatChoices
 from utilities.testing import ViewTestCases, create_tags, create_test_device, post_data
 from wireless.models import WirelessLAN
 
@@ -2014,6 +2014,7 @@ class ModuleTestCase(
             'data': {
                 'data': '\n'.join(csv_data),
                 'format': ImportFormatChoices.CSV,
+                'csv_delimiter': CSVDelimiterChoices.AUTO,
             }
         }
 
@@ -2030,6 +2031,7 @@ class ModuleTestCase(
             'data': {
                 'data': '\n'.join(csv_data),
                 'format': ImportFormatChoices.CSV,
+                'csv_delimiter': CSVDelimiterChoices.AUTO,
             }
         }
 
@@ -2106,6 +2108,7 @@ class ModuleTestCase(
             'data': {
                 'data': '\n'.join(csv_data),
                 'format': ImportFormatChoices.CSV,
+                'csv_delimiter': CSVDelimiterChoices.AUTO,
             }
         }
 

+ 6 - 1
netbox/extras/tests/test_customfields.py

@@ -12,6 +12,7 @@ from dcim.models import Manufacturer, Rack, Site
 from extras.choices import *
 from extras.models import CustomField, CustomFieldChoiceSet
 from ipam.models import VLAN
+from utilities.choices import CSVDelimiterChoices, ImportFormatChoices
 from utilities.testing import APITestCase, TestCase
 from virtualization.models import VirtualMachine
 
@@ -1176,7 +1177,11 @@ class CustomFieldImportTest(TestCase):
         )
         csv_data = '\n'.join(','.join(row) for row in data)
 
-        response = self.client.post(reverse('dcim:site_import'), {'data': csv_data, 'format': 'csv'})
+        response = self.client.post(reverse('dcim:site_import'), {
+            'data': csv_data,
+            'format': ImportFormatChoices.CSV,
+            'csv_delimiter': CSVDelimiterChoices.AUTO,
+        })
         self.assertEqual(response.status_code, 302)
         self.assertEqual(Site.objects.count(), 3)
 

+ 2 - 1
netbox/netbox/tests/test_import.py

@@ -3,7 +3,7 @@ from django.test import override_settings
 
 from dcim.models import *
 from users.models import ObjectPermission
-from utilities.choices import ImportFormatChoices
+from utilities.choices import CSVDelimiterChoices, ImportFormatChoices
 from utilities.testing import ModelViewTestCase, create_tags
 
 
@@ -30,6 +30,7 @@ class CSVImportTestCase(ModelViewTestCase):
         data = {
             'format': ImportFormatChoices.CSV,
             'data': self._get_csv_data(csv_data),
+            'csv_delimiter': CSVDelimiterChoices.AUTO,
         }
 
         # Assign model-level permission

+ 1 - 0
netbox/templates/generic/bulk_import.html

@@ -45,6 +45,7 @@ Context:
             <input type="hidden" name="import_method" value="direct" />
             {% render_field form.data %}
             {% render_field form.format %}
+            {% render_field form.csv_delimiter %}
             <div class="form-group">
               <div class="col col-md-12 text-end">
                 <button type="submit" name="data_submit" class="btn btn-primary">{% trans "Submit" %}</button>

+ 16 - 0
netbox/utilities/choices.py

@@ -1,6 +1,8 @@
 from django.conf import settings
 from django.utils.translation import gettext_lazy as _
 
+from .constants import CSV_DELIMITERS
+
 
 class ChoiceSetMeta(type):
     """
@@ -230,3 +232,17 @@ class ImportFormatChoices(ChoiceSet):
         (JSON, 'JSON'),
         (YAML, 'YAML'),
     ]
+
+
+class CSVDelimiterChoices(ChoiceSet):
+    AUTO = 'auto'
+    COMMA = CSV_DELIMITERS['comma']
+    SEMICOLON = CSV_DELIMITERS['semicolon']
+    TAB = CSV_DELIMITERS['tab']
+
+    CHOICES = [
+        (AUTO, _('Auto-detect')),
+        (COMMA, _('Comma')),
+        (SEMICOLON, _('Semicolon')),
+        (TAB, _('Tab')),
+    ]

+ 11 - 0
netbox/utilities/constants.py

@@ -58,3 +58,14 @@ HTTP_REQUEST_META_SAFE_COPY = [
     'SERVER_NAME',
     'SERVER_PORT',
 ]
+
+
+#
+# CSV-style format delimiters
+#
+
+CSV_DELIMITERS = {
+    'comma': ',',
+    'semicolon': ';',
+    'tab': '\t',
+}

+ 42 - 8
netbox/utilities/forms/bulk_import.py

@@ -7,10 +7,10 @@ from django import forms
 from django.utils.translation import gettext as _
 
 from core.forms.mixins import SyncedDataMixin
-from utilities.choices import ImportFormatChoices
+from utilities.choices import CSVDelimiterChoices, ImportFormatChoices, ImportMethodChoices
+from utilities.constants import CSV_DELIMITERS
 from utilities.forms.utils import parse_csv
 from .mixins import BootstrapMixin
-from ..choices import ImportMethodChoices
 
 
 class BulkImportForm(BootstrapMixin, SyncedDataMixin, forms.Form):
@@ -24,13 +24,20 @@ class BulkImportForm(BootstrapMixin, SyncedDataMixin, forms.Form):
         help_text=_("Enter object data in CSV, JSON or YAML format.")
     )
     upload_file = forms.FileField(
-        label="Data file",
+        label=_("Data file"),
         required=False
     )
     format = forms.ChoiceField(
         choices=ImportFormatChoices,
         initial=ImportFormatChoices.AUTO
     )
+    csv_delimiter = forms.ChoiceField(
+        choices=CSVDelimiterChoices,
+        initial=CSVDelimiterChoices.AUTO,
+        label=_("CSV delimiter"),
+        help_text=_("The character which delimits CSV fields. Applies only to CSV format."),
+        required=False
+    )
 
     data_field = 'data'
 
@@ -54,13 +61,18 @@ class BulkImportForm(BootstrapMixin, SyncedDataMixin, forms.Form):
 
         # Determine the data format
         if self.cleaned_data['format'] == ImportFormatChoices.AUTO:
-            format = self._detect_format(data)
+            if self.cleaned_data['csv_delimiter'] != CSVDelimiterChoices.AUTO:
+                # Specifying the CSV delimiter implies CSV format
+                format = ImportFormatChoices.CSV
+            else:
+                format = self._detect_format(data)
         else:
             format = self.cleaned_data['format']
 
         # Process data according to the selected format
         if format == ImportFormatChoices.CSV:
-            self.cleaned_data['data'] = self._clean_csv(data)
+            delimiter = self.cleaned_data.get('csv_delimiter', CSVDelimiterChoices.AUTO)
+            self.cleaned_data['data'] = self._clean_csv(data, delimiter=delimiter)
         elif format == ImportFormatChoices.JSON:
             self.cleaned_data['data'] = self._clean_json(data)
         elif format == ImportFormatChoices.YAML:
@@ -78,7 +90,10 @@ class BulkImportForm(BootstrapMixin, SyncedDataMixin, forms.Form):
                 return ImportFormatChoices.JSON
             if data.startswith('---') or data.startswith('- '):
                 return ImportFormatChoices.YAML
-            if ',' in data.split('\n', 1)[0]:
+            # Look for any of the CSV delimiters in the first line (ignoring the default 'auto' choice)
+            first_line = data.split('\n', 1)[0]
+            csv_delimiters = CSV_DELIMITERS.values()
+            if any(x in first_line for x in csv_delimiters):
                 return ImportFormatChoices.CSV
         except IndexError:
             pass
@@ -86,12 +101,31 @@ class BulkImportForm(BootstrapMixin, SyncedDataMixin, forms.Form):
             'format': _('Unable to detect data format. Please specify.')
         })
 
-    def _clean_csv(self, data):
+    def _clean_csv(self, data, delimiter=CSVDelimiterChoices.AUTO):
         """
         Clean CSV-formatted data. The first row will be treated as column headers.
         """
+        # Determine the CSV dialect
+        if delimiter == CSVDelimiterChoices.AUTO:
+            # This uses a rough heuristic to detect the CSV dialect based on the presence of supported delimiting
+            # characters. If the data is malformed, we'll fall back to the default Excel dialect.
+            delimiters = ''.join(CSV_DELIMITERS.values())
+            try:
+                dialect = csv.Sniffer().sniff(data.strip(), delimiters=delimiters)
+            except csv.Error:
+                dialect = csv.excel
+        elif delimiter in (CSVDelimiterChoices.COMMA, CSVDelimiterChoices.SEMICOLON):
+            dialect = csv.excel
+            dialect.delimiter = delimiter
+        elif delimiter == CSVDelimiterChoices.TAB:
+            dialect = csv.excel_tab
+        else:
+            raise forms.ValidationError({
+                'csv_delimiter': _('Invalid CSV delimiter'),
+            })
+
         stream = StringIO(data.strip())
-        reader = csv.reader(stream)
+        reader = csv.reader(stream, dialect=dialect)
         headers, records = parse_csv(reader)
 
         # Set CSV headers for reference by the model form

+ 8 - 4
netbox/utilities/testing/views.py

@@ -11,7 +11,7 @@ from extras.choices import ObjectChangeActionChoices
 from extras.models import ObjectChange
 from netbox.models.features import ChangeLoggingMixin
 from users.models import ObjectPermission
-from utilities.choices import ImportFormatChoices
+from utilities.choices import CSVDelimiterChoices, ImportFormatChoices
 from .base import ModelTestCase
 from .utils import disable_warnings, post_data
 
@@ -580,7 +580,8 @@ class ViewTestCases:
         def test_bulk_import_objects_without_permission(self):
             data = {
                 'data': self._get_csv_data(),
-                'format': 'csv',
+                'format': ImportFormatChoices.CSV,
+                'csv_delimiter': CSVDelimiterChoices.AUTO,
             }
 
             # Test GET without permission
@@ -597,7 +598,8 @@ class ViewTestCases:
             initial_count = self._get_queryset().count()
             data = {
                 'data': self._get_csv_data(),
-                'format': 'csv',
+                'format': ImportFormatChoices.CSV,
+                'csv_delimiter': CSVDelimiterChoices.AUTO,
             }
 
             # Assign model-level permission
@@ -626,6 +628,7 @@ class ViewTestCases:
             data = {
                 'format': ImportFormatChoices.CSV,
                 'data': csv_data,
+                'csv_delimiter': CSVDelimiterChoices.AUTO,
             }
 
             # Assign model-level permission
@@ -658,7 +661,8 @@ class ViewTestCases:
             initial_count = self._get_queryset().count()
             data = {
                 'data': self._get_csv_data(),
-                'format': 'csv',
+                'format': ImportFormatChoices.CSV,
+                'csv_delimiter': CSVDelimiterChoices.AUTO,
             }
 
             # Assign constrained permission

+ 33 - 0
netbox/utilities/tests/test_forms.py

@@ -331,3 +331,36 @@ class ImportFormTest(TestCase):
             form._detect_format('')
         with self.assertRaises(forms.ValidationError):
             form._detect_format('?')
+
+    def test_csv_delimiters(self):
+        form = BulkImportForm()
+
+        data = (
+            "a,b,c\n"
+            "1,2,3\n"
+            "4,5,6\n"
+        )
+        self.assertEqual(form._clean_csv(data, delimiter=','), [
+            {'a': '1', 'b': '2', 'c': '3'},
+            {'a': '4', 'b': '5', 'c': '6'},
+        ])
+
+        data = (
+            "a;b;c\n"
+            "1;2;3\n"
+            "4;5;6\n"
+        )
+        self.assertEqual(form._clean_csv(data, delimiter=';'), [
+            {'a': '1', 'b': '2', 'c': '3'},
+            {'a': '4', 'b': '5', 'c': '6'},
+        ])
+
+        data = (
+            "a\tb\tc\n"
+            "1\t2\t3\n"
+            "4\t5\t6\n"
+        )
+        self.assertEqual(form._clean_csv(data, delimiter='\t'), [
+            {'a': '1', 'b': '2', 'c': '3'},
+            {'a': '4', 'b': '5', 'c': '6'},
+        ])