feat: add GPU monitoring for NVIDIA GPUs

- add GPU monitoring for NVIDIA GPUs
2024-08-08 12:51:41 -07:00 · 2024-08-08 12:51:41 -07:00 · b2d4fd06a4
parent f5b3b43d01
commit b2d4fd06a4
4 changed files with 242 additions and 6 deletions
--- a/requirements.txt
+++ b/requirements.txt
@ -1,7 +1,8 @@
-PyQt6
+PyQt6~=6.7.1
-psutil
+psutil~=5.9.8
-requests
+requests~=2.32.3
 numpy<2.0.0
-torch
+torch~=1.13.1
-sentencepiece
+sentencepiece~=0.2.0
-PyYAML
+PyYAML~=6.0.2
 pynvml~=11.5.3
--- a/src/AutoGGUF.py
+++ b/src/AutoGGUF.py
@ -10,12 +10,14 @@
 from PyQt6.QtGui import *
 from PyQt6.QtWidgets import *
 from DownloadThread import DownloadThread
 from KVOverrideEntry import KVOverrideEntry
 from Logger import Logger
 from ModelInfoDialog import ModelInfoDialog
 from QuantizationThread import QuantizationThread
 from TaskListItem import TaskListItem
 from GPUMonitor import GPUMonitor
 from error_handling import show_error, handle_error
 from imports_and_globals import ensure_directory, open_file_safe, resource_path
 from localizations import *
@ -62,9 +64,12 @@ def __init__(self):
        # System info
        self.ram_bar = QProgressBar()
        self.cpu_label = QLabel(CPU_USAGE)
        self.gpu_monitor = GPUMonitor()
        left_layout.addWidget(QLabel(RAM_USAGE))
        left_layout.addWidget(self.ram_bar)
        left_layout.addWidget(self.cpu_label)
        left_layout.addWidget(QLabel(GPU_USAGE))
        left_layout.addWidget(self.gpu_monitor)
        # Modify the backend selection
        backend_layout = QHBoxLayout()
--- a/src/GPUMonitor.py
+++ b/src/GPUMonitor.py
@ -0,0 +1,206 @@
 import pynvml
 from PyQt6.QtCore import QTimer
 from PyQt6.QtGui import QPainter, QPen, QColor
 from PyQt6.QtWidgets import (
    QWidget,
    QHBoxLayout,
    QVBoxLayout,
    QProgressBar,
    QLabel,
    QDialog,
    QTabWidget,
    QGraphicsView,
    QGraphicsScene,
    QGraphicsLineItem,
    QComboBox,
 )
 from localizations import (
    GPU_USAGE_FORMAT,
    GPU_DETAILS,
    GPU_USAGE_OVER_TIME,
    VRAM_USAGE_OVER_TIME,
    NO_GPU_DETECTED,
    AMD_GPU_NOT_SUPPORTED,
 )
 class SimpleGraph(QGraphicsView):
    def __init__(self, title, parent=None):
        super().__init__(parent)
        self.setScene(QGraphicsScene(self))
        self.setRenderHint(QPainter.RenderHint.Antialiasing)
        self.setMinimumHeight(200)
        self.title = title
        self.data = []
    def update_data(self, data):
        self.data = data
        self.scene().clear()
        if not self.data:
            return
        width = self.width() - 40
        height = self.height() - 40
        max_value = 100  # Fixed to 100% for GPU usage
        # Draw axes
        self.scene().addLine(20, height + 20, width + 20, height + 20)
        self.scene().addLine(20, 20, 20, height + 20)
        # Draw title
        self.scene().addText(self.title).setPos(width // 2, 0)
        # Draw graph
        path = QPen(QColor(0, 120, 212), 2)  # Blue color, 2px width
        for i in range(1, len(self.data)):
            x1 = 20 + (i - 1) * width / (len(self.data) - 1)
            y1 = 20 + height - (self.data[i - 1] * height / max_value)
            x2 = 20 + i * width / (len(self.data) - 1)
            y2 = 20 + height - (self.data[i] * height / max_value)
            line = QGraphicsLineItem(x1, y1, x2, y2)
            line.setPen(path)
            self.scene().addItem(line)
    def resizeEvent(self, event):
        super().resizeEvent(event)
        self.update_data(self.data)
 class GPUMonitor(QWidget):
    def __init__(self, parent=None):
        super().__init__(parent)
        self.setMinimumHeight(30)
        self.setMaximumHeight(30)
        layout = QHBoxLayout(self)
        layout.setContentsMargins(0, 0, 0, 0)
        self.gpu_selector = QComboBox()
        self.gpu_selector.setVisible(False)
        self.gpu_selector.currentIndexChanged.connect(self.change_gpu)
        layout.addWidget(self.gpu_selector)
        self.gpu_bar = QProgressBar()
        self.gpu_bar.setTextVisible(False)
        layout.addWidget(self.gpu_bar)
        self.gpu_label = QLabel()
        layout.addWidget(self.gpu_label)
        self.timer = QTimer(self)
        self.timer.timeout.connect(self.update_gpu_info)
        self.timer.start(200)  # Update every 0.2 seconds
        self.gpu_data = []
        self.vram_data = []
        self.handles = []
        self.current_gpu = 0
        try:
            pynvml.nvmlInit()
            device_count = pynvml.nvmlDeviceGetCount()
            for i in range(device_count):
                handle = pynvml.nvmlDeviceGetHandleByIndex(i)
                name = pynvml.nvmlDeviceGetName(handle)
                # Handle both string and bytes cases
                if isinstance(name, bytes):
                    name = name.decode("utf-8")
                self.handles.append(handle)
                self.gpu_selector.addItem(f"NVIDIA GPU {i}: {name}")
            if device_count > 1:
                self.gpu_selector.setVisible(True)
            if device_count == 0:
                self.check_for_amd_gpu()
        except pynvml.NVMLError:
            self.check_for_amd_gpu()
        if not self.handles:
            self.gpu_label.setText(NO_GPU_DETECTED)
    def check_for_amd_gpu(self):
        # This is a placeholder. Implementing AMD GPU detection would require
        # platform-specific methods or additional libraries.
        self.gpu_label.setText(AMD_GPU_NOT_SUPPORTED)
    def change_gpu(self, index):
        self.current_gpu = index
        self.gpu_data.clear()
        self.vram_data.clear()
    def update_gpu_info(self):
        if self.handles:
            try:
                handle = self.handles[self.current_gpu]
                utilization = pynvml.nvmlDeviceGetUtilizationRates(handle)
                memory = pynvml.nvmlDeviceGetMemoryInfo(handle)
                gpu_usage = utilization.gpu
                vram_usage = (memory.used / memory.total) * 100
                self.gpu_bar.setValue(int(vram_usage))
                self.gpu_label.setText(
                    GPU_USAGE_FORMAT.format(
                        gpu_usage,
                        vram_usage,
                        memory.used // 1024 // 1024,
                        memory.total // 1024 // 1024,
                    )
                )
                self.gpu_data.append(gpu_usage)
                self.vram_data.append(vram_usage)
                if len(self.gpu_data) > 60:
                    self.gpu_data.pop(0)
                    self.vram_data.pop(0)
            except pynvml.NVMLError:
                self.gpu_bar.setValue(0)
                self.gpu_label.setText(GPU_USAGE_FORMAT.format(0, 0, 0, 0))
    def mouseDoubleClickEvent(self, event):
        if self.handles:
            self.show_detailed_stats()
    def show_detailed_stats(self):
        dialog = QDialog(self)
        dialog.setWindowTitle(GPU_DETAILS)
        dialog.setMinimumSize(800, 600)
        layout = QVBoxLayout(dialog)
        if len(self.handles) > 1:
            gpu_selector = QComboBox()
            gpu_selector.addItems(
                [
                    self.gpu_selector.itemText(i)
                    for i in range(self.gpu_selector.count())
                ]
            )
            gpu_selector.setCurrentIndex(self.current_gpu)
            gpu_selector.currentIndexChanged.connect(self.change_gpu)
            layout.addWidget(gpu_selector)
        tab_widget = QTabWidget()
        layout.addWidget(tab_widget)
        gpu_graph = SimpleGraph(GPU_USAGE_OVER_TIME)
        vram_graph = SimpleGraph(VRAM_USAGE_OVER_TIME)
        gpu_graph.update_data(self.gpu_data)
        vram_graph.update_data(self.vram_data)
        tab_widget.addTab(gpu_graph, GPU_USAGE_OVER_TIME)
        tab_widget.addTab(vram_graph, VRAM_USAGE_OVER_TIME)
        dialog.exec()
    def closeEvent(self, event):
        if self.handles:
            pynvml.nvmlShutdown()
        super().closeEvent(event)
--- a/src/localizations.py
+++ b/src/localizations.py
@ -23,6 +23,18 @@ def __init__(self):
        self.AVAILABLE_MODELS = "Available Models:"
        self.REFRESH_MODELS = "Refresh Models"
        # GPU Monitoring
        self.GPU_USAGE = "GPU Usage:"
        self.GPU_USAGE_FORMAT = "GPU: {:.1f}% | VRAM: {:.1f}% ({} MB / {} MB)"
        self.GPU_DETAILS = "GPU Details"
        self.GPU_USAGE_OVER_TIME = "GPU Usage Over Time"
        self.VRAM_USAGE_OVER_TIME = "VRAM Usage Over Time"
        self.PERCENTAGE = "Percentage"
        self.TIME = "Time (s)"
        self.NO_GPU_DETECTED = "No GPU detected"
        self.SELECT_GPU = "Select GPU"
        self.AMD_GPU_NOT_SUPPORTED = "AMD GPU detected, but not supported"
        # Quantization
        self.QUANTIZATION_TYPE = "Quantization Type:"
        self.ALLOW_REQUANTIZE = "Allow Requantize"
@ -345,6 +357,18 @@ def __init__(self):
        self.AVAILABLE_MODELS = "Modèles disponibles :"
        self.REFRESH_MODELS = "Rafraîchir les modèles"
        # Surveillance GPU
        self.GPU_USAGE = "Utilisation GPU :"
        self.GPU_USAGE_FORMAT = "GPU : {:.1f}% | VRAM : {:.1f}% ({} Mo / {} Mo)"
        self.GPU_DETAILS = "Détails GPU"
        self.GPU_USAGE_OVER_TIME = "Utilisation GPU dans le temps"
        self.VRAM_USAGE_OVER_TIME = "Utilisation VRAM dans le temps"
        self.PERCENTAGE = "Pourcentage"
        self.TIME = "Temps (s)"
        self.NO_GPU_DETECTED = "Aucun GPU détecté"
        self.SELECT_GPU = "Sélectionner GPU"
        self.AMD_GPU_NOT_SUPPORTED = "GPU AMD détecté, mais non pris en charge"
        # Quantification
        self.QUANTIZATION_TYPE = "Type de quantification :"
        self.ALLOW_REQUANTIZE = "Autoriser la requantification"