feat: add GPU monitoring for NVIDIA GPUs

- add GPU monitoring for NVIDIA GPUs
This commit is contained in:
BuildTools 2024-08-08 12:51:41 -07:00
parent f5b3b43d01
commit b2d4fd06a4
No known key found for this signature in database
GPG Key ID: 3270C066C15D530B
4 changed files with 242 additions and 6 deletions

View File

@ -1,7 +1,8 @@
PyQt6 PyQt6~=6.7.1
psutil psutil~=5.9.8
requests requests~=2.32.3
numpy<2.0.0 numpy<2.0.0
torch torch~=1.13.1
sentencepiece sentencepiece~=0.2.0
PyYAML PyYAML~=6.0.2
pynvml~=11.5.3

View File

@ -10,12 +10,14 @@
from PyQt6.QtGui import * from PyQt6.QtGui import *
from PyQt6.QtWidgets import * from PyQt6.QtWidgets import *
from DownloadThread import DownloadThread from DownloadThread import DownloadThread
from KVOverrideEntry import KVOverrideEntry from KVOverrideEntry import KVOverrideEntry
from Logger import Logger from Logger import Logger
from ModelInfoDialog import ModelInfoDialog from ModelInfoDialog import ModelInfoDialog
from QuantizationThread import QuantizationThread from QuantizationThread import QuantizationThread
from TaskListItem import TaskListItem from TaskListItem import TaskListItem
from GPUMonitor import GPUMonitor
from error_handling import show_error, handle_error from error_handling import show_error, handle_error
from imports_and_globals import ensure_directory, open_file_safe, resource_path from imports_and_globals import ensure_directory, open_file_safe, resource_path
from localizations import * from localizations import *
@ -62,9 +64,12 @@ def __init__(self):
# System info # System info
self.ram_bar = QProgressBar() self.ram_bar = QProgressBar()
self.cpu_label = QLabel(CPU_USAGE) self.cpu_label = QLabel(CPU_USAGE)
self.gpu_monitor = GPUMonitor()
left_layout.addWidget(QLabel(RAM_USAGE)) left_layout.addWidget(QLabel(RAM_USAGE))
left_layout.addWidget(self.ram_bar) left_layout.addWidget(self.ram_bar)
left_layout.addWidget(self.cpu_label) left_layout.addWidget(self.cpu_label)
left_layout.addWidget(QLabel(GPU_USAGE))
left_layout.addWidget(self.gpu_monitor)
# Modify the backend selection # Modify the backend selection
backend_layout = QHBoxLayout() backend_layout = QHBoxLayout()

206
src/GPUMonitor.py Normal file
View File

@ -0,0 +1,206 @@
import pynvml
from PyQt6.QtCore import QTimer
from PyQt6.QtGui import QPainter, QPen, QColor
from PyQt6.QtWidgets import (
QWidget,
QHBoxLayout,
QVBoxLayout,
QProgressBar,
QLabel,
QDialog,
QTabWidget,
QGraphicsView,
QGraphicsScene,
QGraphicsLineItem,
QComboBox,
)
from localizations import (
GPU_USAGE_FORMAT,
GPU_DETAILS,
GPU_USAGE_OVER_TIME,
VRAM_USAGE_OVER_TIME,
NO_GPU_DETECTED,
AMD_GPU_NOT_SUPPORTED,
)
class SimpleGraph(QGraphicsView):
def __init__(self, title, parent=None):
super().__init__(parent)
self.setScene(QGraphicsScene(self))
self.setRenderHint(QPainter.RenderHint.Antialiasing)
self.setMinimumHeight(200)
self.title = title
self.data = []
def update_data(self, data):
self.data = data
self.scene().clear()
if not self.data:
return
width = self.width() - 40
height = self.height() - 40
max_value = 100 # Fixed to 100% for GPU usage
# Draw axes
self.scene().addLine(20, height + 20, width + 20, height + 20)
self.scene().addLine(20, 20, 20, height + 20)
# Draw title
self.scene().addText(self.title).setPos(width // 2, 0)
# Draw graph
path = QPen(QColor(0, 120, 212), 2) # Blue color, 2px width
for i in range(1, len(self.data)):
x1 = 20 + (i - 1) * width / (len(self.data) - 1)
y1 = 20 + height - (self.data[i - 1] * height / max_value)
x2 = 20 + i * width / (len(self.data) - 1)
y2 = 20 + height - (self.data[i] * height / max_value)
line = QGraphicsLineItem(x1, y1, x2, y2)
line.setPen(path)
self.scene().addItem(line)
def resizeEvent(self, event):
super().resizeEvent(event)
self.update_data(self.data)
class GPUMonitor(QWidget):
def __init__(self, parent=None):
super().__init__(parent)
self.setMinimumHeight(30)
self.setMaximumHeight(30)
layout = QHBoxLayout(self)
layout.setContentsMargins(0, 0, 0, 0)
self.gpu_selector = QComboBox()
self.gpu_selector.setVisible(False)
self.gpu_selector.currentIndexChanged.connect(self.change_gpu)
layout.addWidget(self.gpu_selector)
self.gpu_bar = QProgressBar()
self.gpu_bar.setTextVisible(False)
layout.addWidget(self.gpu_bar)
self.gpu_label = QLabel()
layout.addWidget(self.gpu_label)
self.timer = QTimer(self)
self.timer.timeout.connect(self.update_gpu_info)
self.timer.start(200) # Update every 0.2 seconds
self.gpu_data = []
self.vram_data = []
self.handles = []
self.current_gpu = 0
try:
pynvml.nvmlInit()
device_count = pynvml.nvmlDeviceGetCount()
for i in range(device_count):
handle = pynvml.nvmlDeviceGetHandleByIndex(i)
name = pynvml.nvmlDeviceGetName(handle)
# Handle both string and bytes cases
if isinstance(name, bytes):
name = name.decode("utf-8")
self.handles.append(handle)
self.gpu_selector.addItem(f"NVIDIA GPU {i}: {name}")
if device_count > 1:
self.gpu_selector.setVisible(True)
if device_count == 0:
self.check_for_amd_gpu()
except pynvml.NVMLError:
self.check_for_amd_gpu()
if not self.handles:
self.gpu_label.setText(NO_GPU_DETECTED)
def check_for_amd_gpu(self):
# This is a placeholder. Implementing AMD GPU detection would require
# platform-specific methods or additional libraries.
self.gpu_label.setText(AMD_GPU_NOT_SUPPORTED)
def change_gpu(self, index):
self.current_gpu = index
self.gpu_data.clear()
self.vram_data.clear()
def update_gpu_info(self):
if self.handles:
try:
handle = self.handles[self.current_gpu]
utilization = pynvml.nvmlDeviceGetUtilizationRates(handle)
memory = pynvml.nvmlDeviceGetMemoryInfo(handle)
gpu_usage = utilization.gpu
vram_usage = (memory.used / memory.total) * 100
self.gpu_bar.setValue(int(vram_usage))
self.gpu_label.setText(
GPU_USAGE_FORMAT.format(
gpu_usage,
vram_usage,
memory.used // 1024 // 1024,
memory.total // 1024 // 1024,
)
)
self.gpu_data.append(gpu_usage)
self.vram_data.append(vram_usage)
if len(self.gpu_data) > 60:
self.gpu_data.pop(0)
self.vram_data.pop(0)
except pynvml.NVMLError:
self.gpu_bar.setValue(0)
self.gpu_label.setText(GPU_USAGE_FORMAT.format(0, 0, 0, 0))
def mouseDoubleClickEvent(self, event):
if self.handles:
self.show_detailed_stats()
def show_detailed_stats(self):
dialog = QDialog(self)
dialog.setWindowTitle(GPU_DETAILS)
dialog.setMinimumSize(800, 600)
layout = QVBoxLayout(dialog)
if len(self.handles) > 1:
gpu_selector = QComboBox()
gpu_selector.addItems(
[
self.gpu_selector.itemText(i)
for i in range(self.gpu_selector.count())
]
)
gpu_selector.setCurrentIndex(self.current_gpu)
gpu_selector.currentIndexChanged.connect(self.change_gpu)
layout.addWidget(gpu_selector)
tab_widget = QTabWidget()
layout.addWidget(tab_widget)
gpu_graph = SimpleGraph(GPU_USAGE_OVER_TIME)
vram_graph = SimpleGraph(VRAM_USAGE_OVER_TIME)
gpu_graph.update_data(self.gpu_data)
vram_graph.update_data(self.vram_data)
tab_widget.addTab(gpu_graph, GPU_USAGE_OVER_TIME)
tab_widget.addTab(vram_graph, VRAM_USAGE_OVER_TIME)
dialog.exec()
def closeEvent(self, event):
if self.handles:
pynvml.nvmlShutdown()
super().closeEvent(event)

View File

@ -23,6 +23,18 @@ def __init__(self):
self.AVAILABLE_MODELS = "Available Models:" self.AVAILABLE_MODELS = "Available Models:"
self.REFRESH_MODELS = "Refresh Models" self.REFRESH_MODELS = "Refresh Models"
# GPU Monitoring
self.GPU_USAGE = "GPU Usage:"
self.GPU_USAGE_FORMAT = "GPU: {:.1f}% | VRAM: {:.1f}% ({} MB / {} MB)"
self.GPU_DETAILS = "GPU Details"
self.GPU_USAGE_OVER_TIME = "GPU Usage Over Time"
self.VRAM_USAGE_OVER_TIME = "VRAM Usage Over Time"
self.PERCENTAGE = "Percentage"
self.TIME = "Time (s)"
self.NO_GPU_DETECTED = "No GPU detected"
self.SELECT_GPU = "Select GPU"
self.AMD_GPU_NOT_SUPPORTED = "AMD GPU detected, but not supported"
# Quantization # Quantization
self.QUANTIZATION_TYPE = "Quantization Type:" self.QUANTIZATION_TYPE = "Quantization Type:"
self.ALLOW_REQUANTIZE = "Allow Requantize" self.ALLOW_REQUANTIZE = "Allow Requantize"
@ -345,6 +357,18 @@ def __init__(self):
self.AVAILABLE_MODELS = "Modèles disponibles :" self.AVAILABLE_MODELS = "Modèles disponibles :"
self.REFRESH_MODELS = "Rafraîchir les modèles" self.REFRESH_MODELS = "Rafraîchir les modèles"
# Surveillance GPU
self.GPU_USAGE = "Utilisation GPU :"
self.GPU_USAGE_FORMAT = "GPU : {:.1f}% | VRAM : {:.1f}% ({} Mo / {} Mo)"
self.GPU_DETAILS = "Détails GPU"
self.GPU_USAGE_OVER_TIME = "Utilisation GPU dans le temps"
self.VRAM_USAGE_OVER_TIME = "Utilisation VRAM dans le temps"
self.PERCENTAGE = "Pourcentage"
self.TIME = "Temps (s)"
self.NO_GPU_DETECTED = "Aucun GPU détecté"
self.SELECT_GPU = "Sélectionner GPU"
self.AMD_GPU_NOT_SUPPORTED = "GPU AMD détecté, mais non pris en charge"
# Quantification # Quantification
self.QUANTIZATION_TYPE = "Type de quantification :" self.QUANTIZATION_TYPE = "Type de quantification :"
self.ALLOW_REQUANTIZE = "Autoriser la requantification" self.ALLOW_REQUANTIZE = "Autoriser la requantification"