feat: add GPU monitoring for NVIDIA GPUs

- add GPU monitoring for NVIDIA GPUs
2024-08-08 12:51:41 -07:00 · 2024-08-08 12:51:41 -07:00 · b2d4fd06a4
parent f5b3b43d01
commit b2d4fd06a4
4 changed files with 242 additions and 6 deletions
--- a/requirements.txt
+++ b/requirements.txt
@ -1,7 +1,8 @@
-PyQt6
-psutil
-requests
+PyQt6~=6.7.1
+psutil~=5.9.8
+requests~=2.32.3
 numpy<2.0.0
-torch
-sentencepiece
-PyYAML
+torch~=1.13.1
+sentencepiece~=0.2.0
+PyYAML~=6.0.2
+pynvml~=11.5.3
--- a/src/AutoGGUF.py
+++ b/src/AutoGGUF.py
@ -10,12 +10,14 @@
 from PyQt6.QtGui import *
 from PyQt6.QtWidgets import *

+
 from DownloadThread import DownloadThread
 from KVOverrideEntry import KVOverrideEntry
 from Logger import Logger
 from ModelInfoDialog import ModelInfoDialog
 from QuantizationThread import QuantizationThread
 from TaskListItem import TaskListItem
+from GPUMonitor import GPUMonitor
 from error_handling import show_error, handle_error
 from imports_and_globals import ensure_directory, open_file_safe, resource_path
 from localizations import *
@ -62,9 +64,12 @@ def __init__(self):
        # System info
        self.ram_bar = QProgressBar()
        self.cpu_label = QLabel(CPU_USAGE)
+        self.gpu_monitor = GPUMonitor()
        left_layout.addWidget(QLabel(RAM_USAGE))
        left_layout.addWidget(self.ram_bar)
        left_layout.addWidget(self.cpu_label)
+        left_layout.addWidget(QLabel(GPU_USAGE))
+        left_layout.addWidget(self.gpu_monitor)

        # Modify the backend selection
        backend_layout = QHBoxLayout()
--- a/src/GPUMonitor.py
+++ b/src/GPUMonitor.py
@ -0,0 +1,206 @@
+import pynvml
+from PyQt6.QtCore import QTimer
+from PyQt6.QtGui import QPainter, QPen, QColor
+from PyQt6.QtWidgets import (
+    QWidget,
+    QHBoxLayout,
+    QVBoxLayout,
+    QProgressBar,
+    QLabel,
+    QDialog,
+    QTabWidget,
+    QGraphicsView,
+    QGraphicsScene,
+    QGraphicsLineItem,
+    QComboBox,
+)
+
+from localizations import (
+    GPU_USAGE_FORMAT,
+    GPU_DETAILS,
+    GPU_USAGE_OVER_TIME,
+    VRAM_USAGE_OVER_TIME,
+    NO_GPU_DETECTED,
+    AMD_GPU_NOT_SUPPORTED,
+)
+
+
+class SimpleGraph(QGraphicsView):
+    def __init__(self, title, parent=None):
+        super().__init__(parent)
+        self.setScene(QGraphicsScene(self))
+        self.setRenderHint(QPainter.RenderHint.Antialiasing)
+
+        self.setMinimumHeight(200)
+        self.title = title
+        self.data = []
+
+    def update_data(self, data):
+        self.data = data
+        self.scene().clear()
+        if not self.data:
+            return
+
+        width = self.width() - 40
+        height = self.height() - 40
+        max_value = 100  # Fixed to 100% for GPU usage
+
+        # Draw axes
+        self.scene().addLine(20, height + 20, width + 20, height + 20)
+        self.scene().addLine(20, 20, 20, height + 20)
+
+        # Draw title
+        self.scene().addText(self.title).setPos(width // 2, 0)
+
+        # Draw graph
+        path = QPen(QColor(0, 120, 212), 2)  # Blue color, 2px width
+        for i in range(1, len(self.data)):
+            x1 = 20 + (i - 1) * width / (len(self.data) - 1)
+            y1 = 20 + height - (self.data[i - 1] * height / max_value)
+            x2 = 20 + i * width / (len(self.data) - 1)
+            y2 = 20 + height - (self.data[i] * height / max_value)
+            line = QGraphicsLineItem(x1, y1, x2, y2)
+            line.setPen(path)
+            self.scene().addItem(line)
+
+    def resizeEvent(self, event):
+        super().resizeEvent(event)
+        self.update_data(self.data)
+
+
+class GPUMonitor(QWidget):
+    def __init__(self, parent=None):
+        super().__init__(parent)
+        self.setMinimumHeight(30)
+        self.setMaximumHeight(30)
+
+        layout = QHBoxLayout(self)
+        layout.setContentsMargins(0, 0, 0, 0)
+
+        self.gpu_selector = QComboBox()
+        self.gpu_selector.setVisible(False)
+        self.gpu_selector.currentIndexChanged.connect(self.change_gpu)
+        layout.addWidget(self.gpu_selector)
+
+        self.gpu_bar = QProgressBar()
+        self.gpu_bar.setTextVisible(False)
+        layout.addWidget(self.gpu_bar)
+
+        self.gpu_label = QLabel()
+        layout.addWidget(self.gpu_label)
+
+        self.timer = QTimer(self)
+        self.timer.timeout.connect(self.update_gpu_info)
+        self.timer.start(200)  # Update every 0.2 seconds
+
+        self.gpu_data = []
+        self.vram_data = []
+
+        self.handles = []
+        self.current_gpu = 0
+
+        try:
+            pynvml.nvmlInit()
+            device_count = pynvml.nvmlDeviceGetCount()
+            for i in range(device_count):
+                handle = pynvml.nvmlDeviceGetHandleByIndex(i)
+                name = pynvml.nvmlDeviceGetName(handle)
+                # Handle both string and bytes cases
+                if isinstance(name, bytes):
+                    name = name.decode("utf-8")
+                self.handles.append(handle)
+                self.gpu_selector.addItem(f"NVIDIA GPU {i}: {name}")
+
+            if device_count > 1:
+                self.gpu_selector.setVisible(True)
+
+            if device_count == 0:
+                self.check_for_amd_gpu()
+
+        except pynvml.NVMLError:
+            self.check_for_amd_gpu()
+
+        if not self.handles:
+            self.gpu_label.setText(NO_GPU_DETECTED)
+
+    def check_for_amd_gpu(self):
+        # This is a placeholder. Implementing AMD GPU detection would require
+        # platform-specific methods or additional libraries.
+        self.gpu_label.setText(AMD_GPU_NOT_SUPPORTED)
+
+    def change_gpu(self, index):
+        self.current_gpu = index
+        self.gpu_data.clear()
+        self.vram_data.clear()
+
+    def update_gpu_info(self):
+        if self.handles:
+            try:
+                handle = self.handles[self.current_gpu]
+                utilization = pynvml.nvmlDeviceGetUtilizationRates(handle)
+                memory = pynvml.nvmlDeviceGetMemoryInfo(handle)
+
+                gpu_usage = utilization.gpu
+                vram_usage = (memory.used / memory.total) * 100
+
+                self.gpu_bar.setValue(int(vram_usage))
+                self.gpu_label.setText(
+                    GPU_USAGE_FORMAT.format(
+                        gpu_usage,
+                        vram_usage,
+                        memory.used // 1024 // 1024,
+                        memory.total // 1024 // 1024,
+                    )
+                )
+
+                self.gpu_data.append(gpu_usage)
+                self.vram_data.append(vram_usage)
+
+                if len(self.gpu_data) > 60:
+                    self.gpu_data.pop(0)
+                    self.vram_data.pop(0)
+            except pynvml.NVMLError:
+                self.gpu_bar.setValue(0)
+                self.gpu_label.setText(GPU_USAGE_FORMAT.format(0, 0, 0, 0))
+
+    def mouseDoubleClickEvent(self, event):
+        if self.handles:
+            self.show_detailed_stats()
+
+    def show_detailed_stats(self):
+        dialog = QDialog(self)
+        dialog.setWindowTitle(GPU_DETAILS)
+        dialog.setMinimumSize(800, 600)
+
+        layout = QVBoxLayout(dialog)
+
+        if len(self.handles) > 1:
+            gpu_selector = QComboBox()
+            gpu_selector.addItems(
+                [
+                    self.gpu_selector.itemText(i)
+                    for i in range(self.gpu_selector.count())
+                ]
+            )
+            gpu_selector.setCurrentIndex(self.current_gpu)
+            gpu_selector.currentIndexChanged.connect(self.change_gpu)
+            layout.addWidget(gpu_selector)
+
+        tab_widget = QTabWidget()
+        layout.addWidget(tab_widget)
+
+        gpu_graph = SimpleGraph(GPU_USAGE_OVER_TIME)
+        vram_graph = SimpleGraph(VRAM_USAGE_OVER_TIME)
+
+        gpu_graph.update_data(self.gpu_data)
+        vram_graph.update_data(self.vram_data)
+
+        tab_widget.addTab(gpu_graph, GPU_USAGE_OVER_TIME)
+        tab_widget.addTab(vram_graph, VRAM_USAGE_OVER_TIME)
+
+        dialog.exec()
+
+    def closeEvent(self, event):
+        if self.handles:
+            pynvml.nvmlShutdown()
+        super().closeEvent(event)
--- a/src/localizations.py
+++ b/src/localizations.py
@ -23,6 +23,18 @@ def __init__(self):
        self.AVAILABLE_MODELS = "Available Models:"
        self.REFRESH_MODELS = "Refresh Models"

+        # GPU Monitoring
+        self.GPU_USAGE = "GPU Usage:"
+        self.GPU_USAGE_FORMAT = "GPU: {:.1f}% | VRAM: {:.1f}% ({} MB / {} MB)"
+        self.GPU_DETAILS = "GPU Details"
+        self.GPU_USAGE_OVER_TIME = "GPU Usage Over Time"
+        self.VRAM_USAGE_OVER_TIME = "VRAM Usage Over Time"
+        self.PERCENTAGE = "Percentage"
+        self.TIME = "Time (s)"
+        self.NO_GPU_DETECTED = "No GPU detected"
+        self.SELECT_GPU = "Select GPU"
+        self.AMD_GPU_NOT_SUPPORTED = "AMD GPU detected, but not supported"
+
        # Quantization
        self.QUANTIZATION_TYPE = "Quantization Type:"
        self.ALLOW_REQUANTIZE = "Allow Requantize"
@ -345,6 +357,18 @@ def __init__(self):
        self.AVAILABLE_MODELS = "Modèles disponibles :"
        self.REFRESH_MODELS = "Rafraîchir les modèles"

+        # Surveillance GPU
+        self.GPU_USAGE = "Utilisation GPU :"
+        self.GPU_USAGE_FORMAT = "GPU : {:.1f}% | VRAM : {:.1f}% ({} Mo / {} Mo)"
+        self.GPU_DETAILS = "Détails GPU"
+        self.GPU_USAGE_OVER_TIME = "Utilisation GPU dans le temps"
+        self.VRAM_USAGE_OVER_TIME = "Utilisation VRAM dans le temps"
+        self.PERCENTAGE = "Pourcentage"
+        self.TIME = "Temps (s)"
+        self.NO_GPU_DETECTED = "Aucun GPU détecté"
+        self.SELECT_GPU = "Sélectionner GPU"
+        self.AMD_GPU_NOT_SUPPORTED = "GPU AMD détecté, mais non pris en charge"
+
        # Quantification
        self.QUANTIZATION_TYPE = "Type de quantification :"
        self.ALLOW_REQUANTIZE = "Autoriser la requantification"