2
0
Эх сурвалжийг харах

closes #3104 - add support for exposing prometheus metrics

John Anderson 6 жил өмнө
parent
commit
f057a2c016

+ 23 - 0
CHANGELOG.md

@@ -58,6 +58,29 @@ https://nms.local/nodes/?name={{ obj.name }}
 Custom links appear as buttons at the top of the object view. Grouped links will render as a dropdown menu beneath a
 single button.
 
+### Prometheus Metrics ([#3104](https://github.com/digitalocean/netbox/issues/3104))
+
+NetBox now supports optionally exposing native Prometheus metrics from the application. [Prometheus](https://prometheus.io/)
+is a popular time series metric platform used for monitoring. NetBox exposes metrics at the `/metrics` HTTP endpoint, e.g.
+`https://netbox.local/metrics`. Metric exposition can be toggled with the `METRICS_ENABLED` configuration setting. Metrics
+are exposed by default.
+
+NetBox makes use of the [django-prometheus](https://github.com/korfuri/django-prometheus) library to export a number of
+different types of metrics, including:
+
+- Per model insert, update, and delete counters
+- Per view request counters
+- Per view request latency histograms
+- Request body size histograms
+- Response body size histograms
+- Response code counters
+- Database connection, execution, and error counters
+- Cache hit, miss, and invalidation counters
+- Django middleware latency histograms
+- Other Django related metadata metrics
+
+For the exhaustive list of exposed metrics, visit the `/metrics` endpoint on your NetBox instance.
+
 ## Changes
 
 ### New Dependency: Redis

+ 4 - 0
base_requirements.txt

@@ -22,6 +22,10 @@ django-filter
 # https://github.com/django-mptt/django-mptt
 django-mptt
 
+# Prometheus metrics library for Django
+# https://github.com/korfuri/django-prometheus
+django-prometheus
+
 # Abstraction models for rendering and paginating HTML tables
 # https://github.com/jieter/django-tables2
 django-tables2

+ 22 - 0
docs/additional-features/prometheus-metrics.md

@@ -0,0 +1,22 @@
+# Prometheus Metrics
+
+NetBox supports optionally exposing native Prometheus metrics from the application. [Prometheus](https://prometheus.io/) is a popular time series metric platform used for monitoring.
+
+NetBox exposes metrics at the `/metrics` HTTP endpoint, e.g. `https://netbox.local/metrics`. Metric exposition can be toggled with the `METRICS_ENABLED` configuration setting. Metrics are exposed by default.
+
+## Metric Types
+
+NetBox makes use of the [django-prometheus](https://github.com/korfuri/django-prometheus) library to export a number of different types of metrics, including:
+
+- Per model insert, update, and delete counters
+- Per view request counters
+- Per view request latency histograms
+- Request body size histograms
+- Response body size histograms
+- Response code counters
+- Database connection, execution, and error counters
+- Cache hit, miss, and invalidation counters
+- Django middleware latency histograms
+- Other Django related metadata metrics
+
+For the exhaustive list of exposed metrics, visit the `/metrics` endpoint on your NetBox instance.

+ 8 - 0
docs/configuration/optional-settings.md

@@ -197,6 +197,14 @@ The file path to the location where media files (such as image attachments) are
 
 ---
 
+## METRICS_ENABLED
+
+Default: True
+
+Toggle exposing Prometheus metrics at `/metrics`. See the [Prometheus Metrics](../additional-features/prometheus-metrics/) documentation for more details.
+
+---
+
 ## NAPALM_USERNAME
 
 ## NAPALM_PASSWORD

+ 1 - 0
mkdocs.yml

@@ -37,6 +37,7 @@ pages:
         - Webhooks: 'additional-features/webhooks.md'
         - Change Logging: 'additional-features/change-logging.md'
         - Caching: 'additional-features/caching.md'
+        - Prometheus Metrics: 'additional-features/prometheus-metrics.md'
     - Administration:
         - Replicating NetBox: 'administration/replicating-netbox.md'
         - NetBox Shell: 'administration/netbox-shell.md'

+ 3 - 0
netbox/extras/apps.py

@@ -7,6 +7,9 @@ class ExtrasConfig(AppConfig):
     name = "extras"
 
     def ready(self):
+
+        import extras.signals
+
         # Check that we can connect to the configured Redis database if webhooks are enabled.
         if settings.WEBHOOKS_ENABLED:
             try:

+ 13 - 1
netbox/extras/middleware.py

@@ -7,6 +7,7 @@ from django.conf import settings
 from django.db.models.signals import post_delete, post_save
 from django.utils import timezone
 from django.utils.functional import curry
+from django_prometheus.models import model_deletes, model_inserts, model_updates
 
 from extras.webhooks import enqueue_webhooks
 from .constants import (
@@ -37,15 +38,20 @@ def _record_object_deleted(request, instance, **kwargs):
     if hasattr(instance, 'log_change'):
         instance.log_change(request.user, request.id, OBJECTCHANGE_ACTION_DELETE)
 
+    # Enqueue webhooks
     enqueue_webhooks(instance, request.user, request.id, OBJECTCHANGE_ACTION_DELETE)
 
+    # Increment metric counters
+    model_deletes.labels(instance._meta.model_name).inc()
+
 
 class ObjectChangeMiddleware(object):
     """
-    This middleware performs two functions in response to an object being created, updated, or deleted:
+    This middleware performs three functions in response to an object being created, updated, or deleted:
 
         1. Create an ObjectChange to reflect the modification to the object in the changelog.
         2. Enqueue any relevant webhooks.
+        3. Increment metric counter for the event type
 
     The post_save and pre_delete signals are employed to catch object modifications, however changes are recorded a bit
     differently for each. Objects being saved are cached into thread-local storage for action *after* the response has
@@ -85,6 +91,12 @@ class ObjectChangeMiddleware(object):
             # Enqueue webhooks
             enqueue_webhooks(obj, request.user, request.id, action)
 
+            # Increment metric counters
+            if action == OBJECTCHANGE_ACTION_CREATE:
+                model_inserts.labels(obj._meta.model_name).inc()
+            elif action == OBJECTCHANGE_ACTION_UPDATE:
+                model_updates.labels(obj._meta.model_name).inc()
+
         # Housekeeping: 1% chance of clearing out expired ObjectChanges
         if _thread_locals.changed_objects and settings.CHANGELOG_RETENTION and random.randint(1, 100) == 1:
             cutoff = timezone.now() - timedelta(days=settings.CHANGELOG_RETENTION)

+ 22 - 0
netbox/extras/signals.py

@@ -0,0 +1,22 @@
+from cacheops.signals import cache_invalidated, cache_read
+from prometheus_client import Counter
+
+
+cacheops_cache_hit = Counter('cacheops_cache_hit', 'Number of cache hits')
+cacheops_cache_miss = Counter('cacheops_cache_miss', 'Number of cache misses')
+cacheops_cache_invalidated = Counter('cacheops_cache_invalidated', 'Number of cache invalidations')
+
+
+def cache_read_collector(sender, func, hit, **kwargs):
+    if hit:
+        cacheops_cache_hit.inc()
+    else:
+        cacheops_cache_miss.inc()
+
+
+def cache_invalidated_collector(sender, obj_dict, **kwargs):
+    cacheops_cache_invalidated.inc()
+
+
+cache_read.connect(cache_read_collector)
+cache_invalidated.connect(cache_invalidated_collector)

+ 3 - 0
netbox/netbox/configuration.example.py

@@ -129,6 +129,9 @@ MAX_PAGE_SIZE = 1000
 # the default value of this setting is derived from the installed location.
 # MEDIA_ROOT = '/opt/netbox/netbox/media'
 
+# Expose Prometheus monitoring metrics at the HTTP endpoint '/metrics'
+METRICS_ENABLED = True
+
 # Credentials that NetBox will uses to authenticate to devices when connecting via NAPALM.
 NAPALM_USERNAME = ''
 NAPALM_PASSWORD = ''

+ 19 - 4
netbox/netbox/settings.py

@@ -77,6 +77,7 @@ LOGIN_TIMEOUT = getattr(configuration, 'LOGIN_TIMEOUT', None)
 MAINTENANCE_MODE = getattr(configuration, 'MAINTENANCE_MODE', False)
 MAX_PAGE_SIZE = getattr(configuration, 'MAX_PAGE_SIZE', 1000)
 MEDIA_ROOT = getattr(configuration, 'MEDIA_ROOT', os.path.join(BASE_DIR, 'media')).rstrip('/')
+METRICS_ENABLED = getattr(configuration, 'METRICS_ENABLED', True)
 NAPALM_ARGS = getattr(configuration, 'NAPALM_ARGS', {})
 NAPALM_PASSWORD = getattr(configuration, 'NAPALM_PASSWORD', '')
 NAPALM_TIMEOUT = getattr(configuration, 'NAPALM_TIMEOUT', 30)
@@ -98,9 +99,14 @@ WEBHOOKS_ENABLED = getattr(configuration, 'WEBHOOKS_ENABLED', False)
 #
 
 # Only PostgreSQL is supported
-DATABASE.update({
-    'ENGINE': 'django.db.backends.postgresql'
-})
+if METRICS_ENABLED:
+    DATABASE.update({
+        'ENGINE': 'django_prometheus.db.backends.postgresql'
+    })
+else:
+    DATABASE.update({
+        'ENGINE': 'django.db.backends.postgresql'
+    })
 
 DATABASES = {
     'default': DATABASE,
@@ -161,6 +167,7 @@ INSTALLED_APPS = [
     'debug_toolbar',
     'django_filters',
     'django_tables2',
+    'django_prometheus',
     'mptt',
     'rest_framework',
     'taggit',
@@ -185,6 +192,7 @@ if WEBHOOKS_ENABLED:
 # Middleware
 MIDDLEWARE = (
     'debug_toolbar.middleware.DebugToolbarMiddleware',
+    'django_prometheus.middleware.PrometheusBeforeMiddleware',
     'corsheaders.middleware.CorsMiddleware',
     'django.contrib.sessions.middleware.SessionMiddleware',
     'django.middleware.common.CommonMiddleware',
@@ -197,6 +205,7 @@ MIDDLEWARE = (
     'utilities.middleware.LoginRequiredMiddleware',
     'utilities.middleware.APIVersionMiddleware',
     'extras.middleware.ObjectChangeMiddleware',
+    'django_prometheus.middleware.PrometheusAfterMiddleware',
 )
 
 ROOT_URLCONF = 'netbox.urls'
@@ -337,7 +346,7 @@ else:
     REDIS_CACHE_CON_STRING = 'redis://'
 
 if REDIS_PASSWORD:
-    REDIS_CACHE_CON_STRING = '{}@{}'.format(REDIS_PASSWORD, REDIS_CACHE_CON_STRING)
+    REDIS_CACHE_CON_STRING = '{}{}@'.format(REDIS_CACHE_CON_STRING, REDIS_PASSWORD)
 
 REDIS_CACHE_CON_STRING = '{}{}:{}/{}'.format(REDIS_CACHE_CON_STRING, REDIS_HOST, REDIS_PORT, REDIS_CACHE_DATABASE)
 
@@ -365,6 +374,12 @@ CACHEOPS = {
 CACHEOPS_DEGRADE_ON_FAILURE = True
 
 
+#
+# Django Prometheus
+#
+PROMETHEUS_EXPORT_MIGRATIONS = False
+
+
 #
 # Django filters
 #

+ 5 - 0
netbox/netbox/urls.py

@@ -73,6 +73,11 @@ if settings.DEBUG:
         url(r'^__debug__/', include(debug_toolbar.urls)),
     ]
 
+if settings.METRICS_ENABLED:
+    _patterns += [
+        url('', include('django_prometheus.urls')),
+    ]
+
 # Prepend BASE_PATH
 urlpatterns = [
     url(r'^{}'.format(settings.BASE_PATH), include(_patterns))

+ 1 - 0
requirements.txt

@@ -4,6 +4,7 @@ django-cors-headers==2.5.2
 django-debug-toolbar==1.11
 django-filter==2.1.0
 django-mptt==0.9.1
+django-prometheus==1.0.15
 django-tables2==2.0.6
 django-taggit==1.1.0
 django-taggit-serializer==0.1.7