feat: add GPU monitoring for NVIDIA GPUs

- add GPU monitoring for NVIDIA GPUs
This commit is contained in:
BuildTools 2024-08-08 12:51:41 -07:00
parent f5b3b43d01
commit b2d4fd06a4
No known key found for this signature in database
GPG Key ID: 3270C066C15D530B
4 changed files with 242 additions and 6 deletions

View File

@ -1,7 +1,8 @@
PyQt6
psutil
requests
PyQt6~=6.7.1
psutil~=5.9.8
requests~=2.32.3
numpy<2.0.0
torch
sentencepiece
PyYAML
torch~=1.13.1
sentencepiece~=0.2.0
PyYAML~=6.0.2
pynvml~=11.5.3

View File

@ -10,12 +10,14 @@
from PyQt6.QtGui import *
from PyQt6.QtWidgets import *
from DownloadThread import DownloadThread
from KVOverrideEntry import KVOverrideEntry
from Logger import Logger
from ModelInfoDialog import ModelInfoDialog
from QuantizationThread import QuantizationThread
from TaskListItem import TaskListItem
from GPUMonitor import GPUMonitor
from error_handling import show_error, handle_error
from imports_and_globals import ensure_directory, open_file_safe, resource_path
from localizations import *
@ -62,9 +64,12 @@ def __init__(self):
# System info
self.ram_bar = QProgressBar()
self.cpu_label = QLabel(CPU_USAGE)
self.gpu_monitor = GPUMonitor()
left_layout.addWidget(QLabel(RAM_USAGE))
left_layout.addWidget(self.ram_bar)
left_layout.addWidget(self.cpu_label)
left_layout.addWidget(QLabel(GPU_USAGE))
left_layout.addWidget(self.gpu_monitor)
# Modify the backend selection
backend_layout = QHBoxLayout()

206
src/GPUMonitor.py Normal file
View File

@ -0,0 +1,206 @@
import pynvml
from PyQt6.QtCore import QTimer
from PyQt6.QtGui import QPainter, QPen, QColor
from PyQt6.QtWidgets import (
QWidget,
QHBoxLayout,
QVBoxLayout,
QProgressBar,
QLabel,
QDialog,
QTabWidget,
QGraphicsView,
QGraphicsScene,
QGraphicsLineItem,
QComboBox,
)
from localizations import (
GPU_USAGE_FORMAT,
GPU_DETAILS,
GPU_USAGE_OVER_TIME,
VRAM_USAGE_OVER_TIME,
NO_GPU_DETECTED,
AMD_GPU_NOT_SUPPORTED,
)
class SimpleGraph(QGraphicsView):
def __init__(self, title, parent=None):
super().__init__(parent)
self.setScene(QGraphicsScene(self))
self.setRenderHint(QPainter.RenderHint.Antialiasing)
self.setMinimumHeight(200)
self.title = title
self.data = []
def update_data(self, data):
self.data = data
self.scene().clear()
if not self.data:
return
width = self.width() - 40
height = self.height() - 40
max_value = 100 # Fixed to 100% for GPU usage
# Draw axes
self.scene().addLine(20, height + 20, width + 20, height + 20)
self.scene().addLine(20, 20, 20, height + 20)
# Draw title
self.scene().addText(self.title).setPos(width // 2, 0)
# Draw graph
path = QPen(QColor(0, 120, 212), 2) # Blue color, 2px width
for i in range(1, len(self.data)):
x1 = 20 + (i - 1) * width / (len(self.data) - 1)
y1 = 20 + height - (self.data[i - 1] * height / max_value)
x2 = 20 + i * width / (len(self.data) - 1)
y2 = 20 + height - (self.data[i] * height / max_value)
line = QGraphicsLineItem(x1, y1, x2, y2)
line.setPen(path)
self.scene().addItem(line)
def resizeEvent(self, event):
super().resizeEvent(event)
self.update_data(self.data)
class GPUMonitor(QWidget):
def __init__(self, parent=None):
super().__init__(parent)
self.setMinimumHeight(30)
self.setMaximumHeight(30)
layout = QHBoxLayout(self)
layout.setContentsMargins(0, 0, 0, 0)
self.gpu_selector = QComboBox()
self.gpu_selector.setVisible(False)
self.gpu_selector.currentIndexChanged.connect(self.change_gpu)
layout.addWidget(self.gpu_selector)
self.gpu_bar = QProgressBar()
self.gpu_bar.setTextVisible(False)
layout.addWidget(self.gpu_bar)
self.gpu_label = QLabel()
layout.addWidget(self.gpu_label)
self.timer = QTimer(self)
self.timer.timeout.connect(self.update_gpu_info)
self.timer.start(200) # Update every 0.2 seconds
self.gpu_data = []
self.vram_data = []
self.handles = []
self.current_gpu = 0
try:
pynvml.nvmlInit()
device_count = pynvml.nvmlDeviceGetCount()
for i in range(device_count):
handle = pynvml.nvmlDeviceGetHandleByIndex(i)
name = pynvml.nvmlDeviceGetName(handle)
# Handle both string and bytes cases
if isinstance(name, bytes):
name = name.decode("utf-8")
self.handles.append(handle)
self.gpu_selector.addItem(f"NVIDIA GPU {i}: {name}")
if device_count > 1:
self.gpu_selector.setVisible(True)
if device_count == 0:
self.check_for_amd_gpu()
except pynvml.NVMLError:
self.check_for_amd_gpu()
if not self.handles:
self.gpu_label.setText(NO_GPU_DETECTED)
def check_for_amd_gpu(self):
# This is a placeholder. Implementing AMD GPU detection would require
# platform-specific methods or additional libraries.
self.gpu_label.setText(AMD_GPU_NOT_SUPPORTED)
def change_gpu(self, index):
self.current_gpu = index
self.gpu_data.clear()
self.vram_data.clear()
def update_gpu_info(self):
if self.handles:
try:
handle = self.handles[self.current_gpu]
utilization = pynvml.nvmlDeviceGetUtilizationRates(handle)
memory = pynvml.nvmlDeviceGetMemoryInfo(handle)
gpu_usage = utilization.gpu
vram_usage = (memory.used / memory.total) * 100
self.gpu_bar.setValue(int(vram_usage))
self.gpu_label.setText(
GPU_USAGE_FORMAT.format(
gpu_usage,
vram_usage,
memory.used // 1024 // 1024,
memory.total // 1024 // 1024,
)
)
self.gpu_data.append(gpu_usage)
self.vram_data.append(vram_usage)
if len(self.gpu_data) > 60:
self.gpu_data.pop(0)
self.vram_data.pop(0)
except pynvml.NVMLError:
self.gpu_bar.setValue(0)
self.gpu_label.setText(GPU_USAGE_FORMAT.format(0, 0, 0, 0))
def mouseDoubleClickEvent(self, event):
if self.handles:
self.show_detailed_stats()
def show_detailed_stats(self):
dialog = QDialog(self)
dialog.setWindowTitle(GPU_DETAILS)
dialog.setMinimumSize(800, 600)
layout = QVBoxLayout(dialog)
if len(self.handles) > 1:
gpu_selector = QComboBox()
gpu_selector.addItems(
[
self.gpu_selector.itemText(i)
for i in range(self.gpu_selector.count())
]
)
gpu_selector.setCurrentIndex(self.current_gpu)
gpu_selector.currentIndexChanged.connect(self.change_gpu)
layout.addWidget(gpu_selector)
tab_widget = QTabWidget()
layout.addWidget(tab_widget)
gpu_graph = SimpleGraph(GPU_USAGE_OVER_TIME)
vram_graph = SimpleGraph(VRAM_USAGE_OVER_TIME)
gpu_graph.update_data(self.gpu_data)
vram_graph.update_data(self.vram_data)
tab_widget.addTab(gpu_graph, GPU_USAGE_OVER_TIME)
tab_widget.addTab(vram_graph, VRAM_USAGE_OVER_TIME)
dialog.exec()
def closeEvent(self, event):
if self.handles:
pynvml.nvmlShutdown()
super().closeEvent(event)

View File

@ -23,6 +23,18 @@ def __init__(self):
self.AVAILABLE_MODELS = "Available Models:"
self.REFRESH_MODELS = "Refresh Models"
# GPU Monitoring
self.GPU_USAGE = "GPU Usage:"
self.GPU_USAGE_FORMAT = "GPU: {:.1f}% | VRAM: {:.1f}% ({} MB / {} MB)"
self.GPU_DETAILS = "GPU Details"
self.GPU_USAGE_OVER_TIME = "GPU Usage Over Time"
self.VRAM_USAGE_OVER_TIME = "VRAM Usage Over Time"
self.PERCENTAGE = "Percentage"
self.TIME = "Time (s)"
self.NO_GPU_DETECTED = "No GPU detected"
self.SELECT_GPU = "Select GPU"
self.AMD_GPU_NOT_SUPPORTED = "AMD GPU detected, but not supported"
# Quantization
self.QUANTIZATION_TYPE = "Quantization Type:"
self.ALLOW_REQUANTIZE = "Allow Requantize"
@ -345,6 +357,18 @@ def __init__(self):
self.AVAILABLE_MODELS = "Modèles disponibles :"
self.REFRESH_MODELS = "Rafraîchir les modèles"
# Surveillance GPU
self.GPU_USAGE = "Utilisation GPU :"
self.GPU_USAGE_FORMAT = "GPU : {:.1f}% | VRAM : {:.1f}% ({} Mo / {} Mo)"
self.GPU_DETAILS = "Détails GPU"
self.GPU_USAGE_OVER_TIME = "Utilisation GPU dans le temps"
self.VRAM_USAGE_OVER_TIME = "Utilisation VRAM dans le temps"
self.PERCENTAGE = "Pourcentage"
self.TIME = "Temps (s)"
self.NO_GPU_DETECTED = "Aucun GPU détecté"
self.SELECT_GPU = "Sélectionner GPU"
self.AMD_GPU_NOT_SUPPORTED = "GPU AMD détecté, mais non pris en charge"
# Quantification
self.QUANTIZATION_TYPE = "Type de quantification :"
self.ALLOW_REQUANTIZE = "Autoriser la requantification"