data_backends.py 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. import logging
  2. import os
  3. import re
  4. import tempfile
  5. from contextlib import contextmanager
  6. from pathlib import Path
  7. from urllib.parse import urlparse
  8. from django import forms
  9. from django.core.exceptions import ImproperlyConfigured
  10. from django.utils.translation import gettext as _
  11. from netbox.data_backends import DataBackend
  12. from netbox.utils import register_data_backend
  13. from utilities.constants import HTTP_PROXY_SUPPORTED_SCHEMAS, HTTP_PROXY_SUPPORTED_SOCK_SCHEMAS
  14. from utilities.proxy import resolve_proxies
  15. from utilities.socks import ProxyPoolManager
  16. from .exceptions import SyncError
  17. __all__ = (
  18. 'GitBackend',
  19. 'LocalBackend',
  20. 'S3Backend',
  21. 'url_has_embedded_credentials',
  22. )
  23. logger = logging.getLogger('netbox.data_backends')
  24. def url_has_embedded_credentials(url):
  25. """
  26. Check if a URL contains embedded credentials (username in the URL).
  27. URLs like 'https://user@bitbucket.org/...' have embedded credentials.
  28. This is used to avoid passing explicit credentials to dulwich when the
  29. URL already contains them, which would cause authentication conflicts.
  30. """
  31. parsed = urlparse(url)
  32. return bool(parsed.username)
  33. @register_data_backend()
  34. class LocalBackend(DataBackend):
  35. name = 'local'
  36. label = _('Local')
  37. is_local = True
  38. @contextmanager
  39. def fetch(self):
  40. logger.debug("Data source type is local; skipping fetch")
  41. local_path = urlparse(self.url).path # Strip file:// scheme
  42. yield local_path
  43. @register_data_backend()
  44. class GitBackend(DataBackend):
  45. name = 'git'
  46. label = 'Git'
  47. parameters = {
  48. 'username': forms.CharField(
  49. required=False,
  50. label=_('Username'),
  51. widget=forms.TextInput(attrs={'class': 'form-control'}),
  52. help_text=_("Only used for cloning with HTTP(S)"),
  53. ),
  54. 'password': forms.CharField(
  55. required=False,
  56. label=_('Password'),
  57. widget=forms.TextInput(attrs={'class': 'form-control'}),
  58. help_text=_("Only used for cloning with HTTP(S)"),
  59. ),
  60. 'branch': forms.CharField(
  61. required=False,
  62. label=_('Branch'),
  63. widget=forms.TextInput(attrs={'class': 'form-control'})
  64. )
  65. }
  66. sensitive_parameters = ['password']
  67. def init_config(self):
  68. from dulwich.config import ConfigDict
  69. # Initialize backend config
  70. config = ConfigDict()
  71. self.socks_proxy = None
  72. # Apply HTTP proxy (if configured)
  73. proxies = resolve_proxies(url=self.url, context={'client': self}) or {}
  74. if proxy := proxies.get(self.url_scheme):
  75. if urlparse(proxy).scheme not in HTTP_PROXY_SUPPORTED_SCHEMAS:
  76. raise ImproperlyConfigured(f"Unsupported Git DataSource proxy scheme: {urlparse(proxy).scheme}")
  77. if self.url_scheme in ('http', 'https'):
  78. config.set("http", "proxy", proxy)
  79. if urlparse(proxy).scheme in HTTP_PROXY_SUPPORTED_SOCK_SCHEMAS:
  80. self.socks_proxy = proxy
  81. return config
  82. @contextmanager
  83. def fetch(self):
  84. from dulwich import porcelain
  85. local_path = tempfile.TemporaryDirectory()
  86. clone_args = {
  87. "branch": self.params.get('branch'),
  88. "config": self.config,
  89. "errstream": porcelain.NoneStream(),
  90. }
  91. # check if using socks for proxy - if so need to use custom pool_manager
  92. if self.socks_proxy:
  93. clone_args['pool_manager'] = ProxyPoolManager(self.socks_proxy)
  94. if self.url_scheme in ('http', 'https'):
  95. # Only pass explicit credentials if URL doesn't already contain embedded username
  96. # to avoid credential conflicts (see #20902)
  97. if not url_has_embedded_credentials(self.url) and self.params.get('username'):
  98. clone_args.update(
  99. {
  100. "username": self.params.get('username'),
  101. "password": self.params.get('password'),
  102. }
  103. )
  104. if self.url_scheme:
  105. clone_args["quiet"] = True
  106. clone_args["depth"] = 1
  107. logger.debug(f"Cloning git repo: {self.url}")
  108. try:
  109. porcelain.clone(self.url, local_path.name, **clone_args)
  110. except BaseException as e:
  111. raise SyncError(_("Fetching remote data failed ({name}): {error}").format(name=type(e).__name__, error=e))
  112. yield local_path.name
  113. local_path.cleanup()
  114. @register_data_backend()
  115. class S3Backend(DataBackend):
  116. name = 'amazon-s3'
  117. label = 'Amazon S3'
  118. parameters = {
  119. 'aws_access_key_id': forms.CharField(
  120. label=_('AWS access key ID'),
  121. widget=forms.TextInput(attrs={'class': 'form-control'})
  122. ),
  123. 'aws_secret_access_key': forms.CharField(
  124. label=_('AWS secret access key'),
  125. widget=forms.TextInput(attrs={'class': 'form-control'})
  126. ),
  127. }
  128. sensitive_parameters = ['aws_secret_access_key']
  129. REGION_REGEX = r's3\.([a-z0-9-]+)\.amazonaws\.com'
  130. def init_config(self):
  131. from botocore.config import Config as Boto3Config
  132. # Initialize backend config
  133. return Boto3Config(
  134. proxies=resolve_proxies(url=self.url, context={'client': self}),
  135. )
  136. @contextmanager
  137. def fetch(self):
  138. import boto3
  139. local_path = tempfile.TemporaryDirectory()
  140. # Initialize the S3 resource and bucket
  141. aws_access_key_id = self.params.get('aws_access_key_id')
  142. aws_secret_access_key = self.params.get('aws_secret_access_key')
  143. s3 = boto3.resource(
  144. 's3',
  145. region_name=self._region_name,
  146. aws_access_key_id=aws_access_key_id,
  147. aws_secret_access_key=aws_secret_access_key,
  148. config=self.config,
  149. endpoint_url=self._endpoint_url
  150. )
  151. bucket = s3.Bucket(self._bucket_name)
  152. # Download all files within the specified path
  153. for obj in bucket.objects.filter(Prefix=self._remote_path):
  154. local_filename = os.path.join(local_path.name, obj.key)
  155. # Build local path
  156. Path(os.path.dirname(local_filename)).mkdir(parents=True, exist_ok=True)
  157. bucket.download_file(obj.key, local_filename)
  158. yield local_path.name
  159. local_path.cleanup()
  160. @property
  161. def _region_name(self):
  162. domain = urlparse(self.url).netloc
  163. if m := re.match(self.REGION_REGEX, domain):
  164. return m.group(1)
  165. return None
  166. @property
  167. def _bucket_name(self):
  168. url_path = urlparse(self.url).path.lstrip('/')
  169. return url_path.split('/')[0]
  170. @property
  171. def _endpoint_url(self):
  172. url_path = urlparse(self.url)
  173. return url_path._replace(params="", fragment="", query="", path="").geturl()
  174. @property
  175. def _remote_path(self):
  176. url_path = urlparse(self.url).path.lstrip('/')
  177. if '/' in url_path:
  178. return url_path.split('/', 1)[1]
  179. return ''