|
@@ -7,10 +7,10 @@ from django import forms
|
|
|
from django.utils.translation import gettext as _
|
|
from django.utils.translation import gettext as _
|
|
|
|
|
|
|
|
from core.forms.mixins import SyncedDataMixin
|
|
from core.forms.mixins import SyncedDataMixin
|
|
|
-from utilities.choices import ImportFormatChoices
|
|
|
|
|
|
|
+from utilities.choices import CSVDelimiterChoices, ImportFormatChoices, ImportMethodChoices
|
|
|
|
|
+from utilities.constants import CSV_DELIMITERS
|
|
|
from utilities.forms.utils import parse_csv
|
|
from utilities.forms.utils import parse_csv
|
|
|
from .mixins import BootstrapMixin
|
|
from .mixins import BootstrapMixin
|
|
|
-from ..choices import ImportMethodChoices
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class BulkImportForm(BootstrapMixin, SyncedDataMixin, forms.Form):
|
|
class BulkImportForm(BootstrapMixin, SyncedDataMixin, forms.Form):
|
|
@@ -24,13 +24,20 @@ class BulkImportForm(BootstrapMixin, SyncedDataMixin, forms.Form):
|
|
|
help_text=_("Enter object data in CSV, JSON or YAML format.")
|
|
help_text=_("Enter object data in CSV, JSON or YAML format.")
|
|
|
)
|
|
)
|
|
|
upload_file = forms.FileField(
|
|
upload_file = forms.FileField(
|
|
|
- label="Data file",
|
|
|
|
|
|
|
+ label=_("Data file"),
|
|
|
required=False
|
|
required=False
|
|
|
)
|
|
)
|
|
|
format = forms.ChoiceField(
|
|
format = forms.ChoiceField(
|
|
|
choices=ImportFormatChoices,
|
|
choices=ImportFormatChoices,
|
|
|
initial=ImportFormatChoices.AUTO
|
|
initial=ImportFormatChoices.AUTO
|
|
|
)
|
|
)
|
|
|
|
|
+ csv_delimiter = forms.ChoiceField(
|
|
|
|
|
+ choices=CSVDelimiterChoices,
|
|
|
|
|
+ initial=CSVDelimiterChoices.AUTO,
|
|
|
|
|
+ label=_("CSV delimiter"),
|
|
|
|
|
+ help_text=_("The character which delimits CSV fields. Applies only to CSV format."),
|
|
|
|
|
+ required=False
|
|
|
|
|
+ )
|
|
|
|
|
|
|
|
data_field = 'data'
|
|
data_field = 'data'
|
|
|
|
|
|
|
@@ -54,13 +61,18 @@ class BulkImportForm(BootstrapMixin, SyncedDataMixin, forms.Form):
|
|
|
|
|
|
|
|
# Determine the data format
|
|
# Determine the data format
|
|
|
if self.cleaned_data['format'] == ImportFormatChoices.AUTO:
|
|
if self.cleaned_data['format'] == ImportFormatChoices.AUTO:
|
|
|
- format = self._detect_format(data)
|
|
|
|
|
|
|
+ if self.cleaned_data['csv_delimiter'] != CSVDelimiterChoices.AUTO:
|
|
|
|
|
+ # Specifying the CSV delimiter implies CSV format
|
|
|
|
|
+ format = ImportFormatChoices.CSV
|
|
|
|
|
+ else:
|
|
|
|
|
+ format = self._detect_format(data)
|
|
|
else:
|
|
else:
|
|
|
format = self.cleaned_data['format']
|
|
format = self.cleaned_data['format']
|
|
|
|
|
|
|
|
# Process data according to the selected format
|
|
# Process data according to the selected format
|
|
|
if format == ImportFormatChoices.CSV:
|
|
if format == ImportFormatChoices.CSV:
|
|
|
- self.cleaned_data['data'] = self._clean_csv(data)
|
|
|
|
|
|
|
+ delimiter = self.cleaned_data.get('csv_delimiter', CSVDelimiterChoices.AUTO)
|
|
|
|
|
+ self.cleaned_data['data'] = self._clean_csv(data, delimiter=delimiter)
|
|
|
elif format == ImportFormatChoices.JSON:
|
|
elif format == ImportFormatChoices.JSON:
|
|
|
self.cleaned_data['data'] = self._clean_json(data)
|
|
self.cleaned_data['data'] = self._clean_json(data)
|
|
|
elif format == ImportFormatChoices.YAML:
|
|
elif format == ImportFormatChoices.YAML:
|
|
@@ -78,7 +90,10 @@ class BulkImportForm(BootstrapMixin, SyncedDataMixin, forms.Form):
|
|
|
return ImportFormatChoices.JSON
|
|
return ImportFormatChoices.JSON
|
|
|
if data.startswith('---') or data.startswith('- '):
|
|
if data.startswith('---') or data.startswith('- '):
|
|
|
return ImportFormatChoices.YAML
|
|
return ImportFormatChoices.YAML
|
|
|
- if ',' in data.split('\n', 1)[0]:
|
|
|
|
|
|
|
+ # Look for any of the CSV delimiters in the first line (ignoring the default 'auto' choice)
|
|
|
|
|
+ first_line = data.split('\n', 1)[0]
|
|
|
|
|
+ csv_delimiters = CSV_DELIMITERS.values()
|
|
|
|
|
+ if any(x in first_line for x in csv_delimiters):
|
|
|
return ImportFormatChoices.CSV
|
|
return ImportFormatChoices.CSV
|
|
|
except IndexError:
|
|
except IndexError:
|
|
|
pass
|
|
pass
|
|
@@ -86,12 +101,31 @@ class BulkImportForm(BootstrapMixin, SyncedDataMixin, forms.Form):
|
|
|
'format': _('Unable to detect data format. Please specify.')
|
|
'format': _('Unable to detect data format. Please specify.')
|
|
|
})
|
|
})
|
|
|
|
|
|
|
|
- def _clean_csv(self, data):
|
|
|
|
|
|
|
+ def _clean_csv(self, data, delimiter=CSVDelimiterChoices.AUTO):
|
|
|
"""
|
|
"""
|
|
|
Clean CSV-formatted data. The first row will be treated as column headers.
|
|
Clean CSV-formatted data. The first row will be treated as column headers.
|
|
|
"""
|
|
"""
|
|
|
|
|
+ # Determine the CSV dialect
|
|
|
|
|
+ if delimiter == CSVDelimiterChoices.AUTO:
|
|
|
|
|
+ # This uses a rough heuristic to detect the CSV dialect based on the presence of supported delimiting
|
|
|
|
|
+ # characters. If the data is malformed, we'll fall back to the default Excel dialect.
|
|
|
|
|
+ delimiters = ''.join(CSV_DELIMITERS.values())
|
|
|
|
|
+ try:
|
|
|
|
|
+ dialect = csv.Sniffer().sniff(data.strip(), delimiters=delimiters)
|
|
|
|
|
+ except csv.Error:
|
|
|
|
|
+ dialect = csv.excel
|
|
|
|
|
+ elif delimiter in (CSVDelimiterChoices.COMMA, CSVDelimiterChoices.SEMICOLON):
|
|
|
|
|
+ dialect = csv.excel
|
|
|
|
|
+ dialect.delimiter = delimiter
|
|
|
|
|
+ elif delimiter == CSVDelimiterChoices.TAB:
|
|
|
|
|
+ dialect = csv.excel_tab
|
|
|
|
|
+ else:
|
|
|
|
|
+ raise forms.ValidationError({
|
|
|
|
|
+ 'csv_delimiter': _('Invalid CSV delimiter'),
|
|
|
|
|
+ })
|
|
|
|
|
+
|
|
|
stream = StringIO(data.strip())
|
|
stream = StringIO(data.strip())
|
|
|
- reader = csv.reader(stream)
|
|
|
|
|
|
|
+ reader = csv.reader(stream, dialect=dialect)
|
|
|
headers, records = parse_csv(reader)
|
|
headers, records = parse_csv(reader)
|
|
|
|
|
|
|
|
# Set CSV headers for reference by the model form
|
|
# Set CSV headers for reference by the model form
|