docker-compose.yml 922 B

123456789101112131415161718192021
  1. ---
  2. version: '3'
  3. services:
  4. nvidia_exporter: #to export data from DCGM host, need DCGM installed of an equal or newer version to the container on the host system https://github.com/NVIDIA/DCGM
  5. image: nvcr.io/nvidia/k8s/dcgm-exporter:2.3.2-2.6.2-ubuntu20.04
  6. container_name: nvidia_exporter
  7. runtime: nvidia
  8. cap_add:
  9. - SYS_ADMIN
  10. environment:
  11. - NVIDIA_VISIBLE_DEVICES=all
  12. - NVIDIA_DRIVER_CAPABILITIES=all
  13. ports:
  14. - "9400:9400"
  15. restart: unless-stopped
  16. #NVIDIA Data Center GPU Manager: To export data from DCGM host to prometheus, you need DCGM installed on host as well as nvidia container toolkit
  17. #https://github.com/NVIDIA/DCGM
  18. #https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#install-guide
  19. #https://github.com/NVIDIA/dcgm-exporter and https://docs.nvidia.com/datacenter/cloud-native/gpu-telemetry/dcgm-exporter.html