AutoGGUF/src/AutoGGUF.py

2037 lines
80 KiB
Python

import json
import os
import shutil
import urllib.error
import urllib.request
from datetime import datetime
from functools import partial, wraps
from typing import List
from PySide6.QtCore import *
from PySide6.QtGui import *
from PySide6.QtWidgets import *
import lora_conversion
import presets
import ui_update
import utils
from CustomTitleBar import CustomTitleBar
from GPUMonitor import GPUMonitor
from Localizations import *
from Logger import Logger
from Plugins import Plugins
from QuantizationThread import QuantizationThread
from TaskListItem import TaskListItem
from error_handling import handle_error, show_error
from globals import (
ensure_directory,
load_dotenv,
open_file_safe,
process_args,
resource_path,
show_about,
verify_gguf,
)
class AutoGGUF(QMainWindow):
def validate_input(*fields):
def decorator(func):
@wraps(func)
def wrapper(self, *args, **kwargs):
for field in fields:
value = getattr(self, field).text().strip()
# Length check
if len(value) > 1024:
show_error(self.logger, f"{field} exceeds maximum length")
# Normalize path
normalized_path = os.path.normpath(value)
# Check for path traversal attempts
if ".." in normalized_path:
show_error(self.logger, f"Invalid path in {field}")
# Disallow control characters and null bytes
if re.search(r"[\x00-\x1f\x7f]", value):
show_error(self.logger, f"Invalid characters in {field}")
return func(self, *args, **kwargs)
return wrapper
return decorator
def __init__(self, args: List[str]) -> None:
super().__init__()
init_timer = QElapsedTimer()
init_timer.start()
self.parse_resolution = ui_update.parse_resolution.__get__(self)
self.log_dir_name = os.environ.get("AUTOGGUF_LOG_DIR_NAME", "logs")
width, height = self.parse_resolution()
self.logger = Logger("AutoGGUF", self.log_dir_name)
self.logger.info(INITIALIZING_AUTOGGUF)
self.setWindowTitle(WINDOW_TITLE)
self.setWindowIcon(QIcon(resource_path("assets/favicon.ico")))
self.setGeometry(100, 100, width, height)
self.setWindowFlag(Qt.FramelessWindowHint)
load_dotenv(self) # Loads the .env file
process_args(args) # Load any command line parameters
# Configuration
self.model_dir_name = os.environ.get("AUTOGGUF_MODEL_DIR_NAME", "models")
self.output_dir_name = os.environ.get(
"AUTOGGUF_OUTPUT_DIR_NAME", "quantized_models"
)
self.resize_factor = float(
os.environ.get("AUTOGGUF_RESIZE_FACTOR", 1.1)
) # 10% increase/decrease
self.default_width, self.default_height = width, height
self.resize(self.default_width, self.default_height)
ensure_directory(os.path.abspath(self.output_dir_name))
ensure_directory(os.path.abspath(self.model_dir_name))
# References
self.update_base_model_visibility = partial(
ui_update.update_base_model_visibility, self
)
self.update_assets = ui_update.update_assets.__get__(self)
self.update_cuda_option = ui_update.update_cuda_option.__get__(self)
self.update_cuda_backends = ui_update.update_cuda_backends.__get__(self)
self.download_llama_cpp = utils.download_llama_cpp.__get__(self)
self.refresh_releases = utils.refresh_releases.__get__(self)
self.browse_lora_input = utils.browse_lora_input.__get__(self)
self.browse_lora_output = utils.browse_lora_output.__get__(self)
self.convert_lora = lora_conversion.convert_lora.__get__(self)
self.show_about = show_about.__get__(self)
self.save_preset = presets.save_preset.__get__(self)
self.load_preset = presets.load_preset.__get__(self)
self.browse_export_lora_model = (
lora_conversion.browse_export_lora_model.__get__(self)
)
self.browse_export_lora_output = (
lora_conversion.browse_export_lora_output.__get__(self)
)
self.add_lora_adapter = lora_conversion.add_lora_adapter.__get__(self)
self.export_lora = lora_conversion.export_lora.__get__(self)
self.browse_models = utils.browse_models.__get__(self)
self.browse_output = utils.browse_output.__get__(self)
self.browse_logs = utils.browse_logs.__get__(self)
self.browse_imatrix = utils.browse_imatrix.__get__(self)
self.get_models_data = utils.get_models_data.__get__(self)
self.get_tasks_data = utils.get_tasks_data.__get__(self)
self.add_kv_override = partial(utils.add_kv_override, self)
self.remove_kv_override = partial(utils.remove_kv_override, self)
self.cancel_task = partial(TaskListItem.cancel_task, self)
self.delete_task = partial(TaskListItem.delete_task, self)
self.show_task_context_menu = partial(TaskListItem.show_task_context_menu, self)
self.show_task_properties = partial(TaskListItem.show_task_properties, self)
self.toggle_gpu_offload_auto = partial(ui_update.toggle_gpu_offload_auto, self)
self.update_threads_spinbox = partial(ui_update.update_threads_spinbox, self)
self.update_threads_slider = partial(ui_update.update_threads_slider, self)
self.update_gpu_offload_spinbox = partial(
ui_update.update_gpu_offload_spinbox, self
)
self.update_gpu_offload_slider = partial(
ui_update.update_gpu_offload_slider, self
)
self.update_model_info = partial(ui_update.update_model_info, self.logger)
self.update_system_info = partial(ui_update.update_system_info, self)
self.update_download_progress = partial(
ui_update.update_download_progress, self
)
self.delete_lora_adapter_item = partial(
lora_conversion.delete_lora_adapter_item, self
)
self.lora_conversion_finished = partial(
lora_conversion.lora_conversion_finished, self
)
self.parse_progress = partial(QuantizationThread.parse_progress, self)
self.create_label = partial(ui_update.create_label, self)
self.browse_imatrix_datafile = ui_update.browse_imatrix_datafile.__get__(self)
self.browse_imatrix_model = ui_update.browse_imatrix_model.__get__(self)
self.browse_imatrix_output = ui_update.browse_imatrix_output.__get__(self)
self.restart_task = partial(TaskListItem.restart_task, self)
self.browse_hf_outfile = ui_update.browse_hf_outfile.__get__(self)
self.browse_hf_model_input = ui_update.browse_hf_model_input.__get__(self)
self.browse_base_model = ui_update.browse_base_model.__get__(self)
self.reset_size = ui_update.reset_size.__get__(self)
self.resize_window = partial(ui_update.resize_window, self)
self.show_detailed_stats_std = partial(GPUMonitor.show_detailed_stats_std, self)
self.show_cpu_graph = partial(GPUMonitor.show_cpu_graph, self)
self.show_ram_graph = partial(GPUMonitor.show_ram_graph, self)
self.rename_model = partial(utils.rename_model, self)
self.show_model_context_menu = partial(utils.show_model_context_menu, self)
# Set up main widget and layout
main_widget = QWidget()
main_layout = QVBoxLayout(main_widget)
main_layout.setContentsMargins(0, 0, 0, 0)
main_layout.setSpacing(0)
# Custom title bar
self.title_bar = CustomTitleBar(self)
main_layout.addWidget(self.title_bar)
# Menu bar
self.menubar = QMenuBar()
self.title_bar.layout().insertWidget(1, self.menubar)
# File menu
file_menu = self.menubar.addMenu(f"&{FILE}")
close_action = QAction(f"&{CLOSE}", self)
close_action.setShortcut(QKeySequence("Alt+F4"))
close_action.triggered.connect(self.close)
save_preset_action = QAction(f"&{SAVE_PRESET}", self)
save_preset_action.setShortcut(QKeySequence("Ctrl+S"))
save_preset_action.triggered.connect(self.save_preset)
load_preset_action = QAction(f"&{SAVE_PRESET}", self)
load_preset_action.setShortcut(QKeySequence("Ctrl+S"))
load_preset_action.triggered.connect(self.load_preset)
file_menu.addAction(close_action)
file_menu.addAction(save_preset_action)
file_menu.addAction(load_preset_action)
# AutoFP8 Window
self.fp8_dialog = QDialog(self)
self.fp8_dialog.setWindowTitle(QUANTIZE_TO_FP8_DYNAMIC)
self.fp8_dialog.setFixedWidth(500)
self.fp8_layout = QVBoxLayout()
# Input path
input_layout = QHBoxLayout()
self.fp8_input = QLineEdit()
input_button = QPushButton(BROWSE)
input_button.clicked.connect(
lambda: self.fp8_input.setText(
QFileDialog.getExistingDirectory(self, OPEN_MODEL_FOLDER)
)
)
input_layout.addWidget(QLabel(INPUT_MODEL))
input_layout.addWidget(self.fp8_input)
input_layout.addWidget(input_button)
self.fp8_layout.addLayout(input_layout)
# Output path
output_layout = QHBoxLayout()
self.fp8_output = QLineEdit()
output_button = QPushButton(BROWSE)
output_button.clicked.connect(
lambda: self.fp8_output.setText(
QFileDialog.getExistingDirectory(self, OPEN_MODEL_FOLDER)
)
)
output_layout.addWidget(QLabel(OUTPUT))
output_layout.addWidget(self.fp8_output)
output_layout.addWidget(output_button)
self.fp8_layout.addLayout(output_layout)
# Quantize button
quantize_button = QPushButton(QUANTIZE)
quantize_button.clicked.connect(
lambda: self.quantize_to_fp8_dynamic(
self.fp8_input.text(), self.fp8_output.text()
)
)
self.fp8_layout.addWidget(quantize_button)
self.fp8_dialog.setLayout(self.fp8_layout)
# Split GGUF Window
self.split_gguf_dialog = QDialog(self)
self.split_gguf_dialog.setWindowTitle(SPLIT_GGUF)
self.split_gguf_dialog.setFixedWidth(500)
self.split_gguf_layout = QVBoxLayout()
# Input path
input_layout = QHBoxLayout()
self.split_gguf_input = QLineEdit()
input_button = QPushButton(BROWSE)
input_button.clicked.connect(
lambda: self.split_gguf_input.setText(
QFileDialog.getOpenFileName(self, SELECT_FILE, filter=GGUF_FILES)[0]
)
)
input_layout.addWidget(QLabel(INPUT_MODEL))
input_layout.addWidget(self.split_gguf_input)
input_layout.addWidget(input_button)
self.split_gguf_layout.addLayout(input_layout)
# Output path
output_layout = QHBoxLayout()
self.split_gguf_output = QLineEdit()
output_button = QPushButton(BROWSE)
output_button.clicked.connect(
lambda: self.split_gguf_output.setText(
QFileDialog.getOpenFileName(self, SELECT_FILE, filter=GGUF_FILES)[0]
)
)
output_layout.addWidget(QLabel(OUTPUT))
output_layout.addWidget(self.split_gguf_output)
output_layout.addWidget(output_button)
self.split_gguf_layout.addLayout(output_layout)
# Split options
split_options_layout = QHBoxLayout()
self.split_max_size = QLineEdit()
self.split_max_size.setPlaceholderText(SIZE_IN_UNITS)
self.split_max_tensors = QLineEdit()
self.split_max_tensors.setPlaceholderText(NUMBER_OF_TENSORS)
split_options_layout.addWidget(QLabel(SPLIT_MAX_SIZE))
split_options_layout.addWidget(self.split_max_size)
split_options_layout.addWidget(QLabel(SPLIT_MAX_TENSORS))
split_options_layout.addWidget(self.split_max_tensors)
self.split_gguf_layout.addLayout(split_options_layout)
# Split button
split_button = QPushButton(SPLIT_GGUF)
split_button.clicked.connect(
lambda: self.split_gguf(
self.split_gguf_input.text(),
self.split_gguf_output.text(),
self.split_max_size.text(),
self.split_max_tensors.text(),
)
)
self.split_gguf_layout.addWidget(split_button)
self.split_gguf_dialog.setLayout(self.split_gguf_layout)
# Merge GGUF Window
self.merge_gguf_dialog = QDialog(self)
self.merge_gguf_dialog.setWindowTitle(MERGE_GGUF)
self.merge_gguf_dialog.setFixedWidth(500)
self.merge_gguf_layout = QVBoxLayout()
# Input path
input_layout = QHBoxLayout()
self.merge_gguf_input = QLineEdit()
input_button = QPushButton(BROWSE)
input_button.clicked.connect(
lambda: self.merge_gguf_input.setText(
QFileDialog.getOpenFileName(self, SELECT_FILE, filter=GGUF_FILES)[0]
)
)
input_layout.addWidget(QLabel(INPUT_MODEL))
input_layout.addWidget(self.merge_gguf_input)
input_layout.addWidget(input_button)
self.merge_gguf_layout.addLayout(input_layout)
# Output path
output_layout = QHBoxLayout()
self.merge_gguf_output = QLineEdit()
output_button = QPushButton(BROWSE)
output_button.clicked.connect(
lambda: self.merge_gguf_output.setText(
QFileDialog.getOpenFileName(self, SELECT_FILE, filter=GGUF_FILES)[0]
)
)
output_layout.addWidget(QLabel(OUTPUT))
output_layout.addWidget(self.merge_gguf_output)
output_layout.addWidget(output_button)
self.merge_gguf_layout.addLayout(output_layout)
# Split button
split_button = QPushButton(MERGE_GGUF)
split_button.clicked.connect(
lambda: self.merge_gguf(
self.merge_gguf_input.text(),
self.merge_gguf_output.text(),
)
)
self.merge_gguf_layout.addWidget(split_button)
self.merge_gguf_dialog.setLayout(self.merge_gguf_layout)
# HF Upload Window
self.hf_upload_dialog = QDialog(self)
self.hf_upload_dialog.setWindowTitle(HF_UPLOAD)
self.hf_upload_dialog.setFixedWidth(500)
self.hf_upload_layout = QVBoxLayout()
# Form layout for inputs
form_layout = QFormLayout()
# Repo input
self.hf_repo_input = QLineEdit()
form_layout.addRow(HF_REPOSITORY, self.hf_repo_input)
# Remote path input
self.hf_remote_path_input = QLineEdit()
form_layout.addRow(HF_REMOTE_PATH, self.hf_remote_path_input)
# Local file/folder input
local_path_layout = QHBoxLayout()
self.hf_local_path_input = QLineEdit()
local_path_button = QPushButton(BROWSE)
local_path_button.clicked.connect(self.browse_local_path)
local_path_layout.addWidget(self.hf_local_path_input)
local_path_layout.addWidget(local_path_button)
form_layout.addRow(HF_LOCAL_PATH, local_path_layout)
self.hf_upload_layout.addLayout(form_layout)
# Upload type (file or folder)
upload_type_group = QGroupBox(UPLOAD_TYPE)
upload_type_layout = QHBoxLayout()
self.upload_type_group = QButtonGroup()
self.upload_type_file = QRadioButton(FILE)
self.upload_type_folder = QRadioButton(FOLDER)
self.upload_type_group.addButton(self.upload_type_file)
self.upload_type_group.addButton(self.upload_type_folder)
upload_type_layout.addWidget(self.upload_type_file)
upload_type_layout.addWidget(self.upload_type_folder)
upload_type_group.setLayout(upload_type_layout)
self.hf_upload_layout.addWidget(upload_type_group)
# Repo type (dataset/space/model)
repo_type_group = QGroupBox(HF_REPOSITORY_TYPE)
repo_type_layout = QHBoxLayout()
self.repo_type_group = QButtonGroup()
self.repo_type_model = QRadioButton(MODEL)
self.repo_type_dataset = QRadioButton(DATASET)
self.repo_type_space = QRadioButton(SPACE)
self.repo_type_group.addButton(self.repo_type_model)
self.repo_type_group.addButton(self.repo_type_dataset)
self.repo_type_group.addButton(self.repo_type_space)
repo_type_layout.addWidget(self.repo_type_model)
repo_type_layout.addWidget(self.repo_type_dataset)
repo_type_layout.addWidget(self.repo_type_space)
repo_type_group.setLayout(repo_type_layout)
self.hf_upload_layout.addWidget(repo_type_group)
# Upload button
upload_button = QPushButton(UPLOAD)
upload_button.clicked.connect(self.transfer_to_hf)
self.hf_upload_layout.addWidget(upload_button)
self.hf_upload_dialog.setLayout(self.hf_upload_layout)
# Tools menu
tools_menu = self.menubar.addMenu(f"&{TOOLS}")
autofp8_action = QAction(f"&{AUTOFP8}", self)
autofp8_action.setShortcut(QKeySequence("Shift+Q"))
autofp8_action.triggered.connect(self.fp8_dialog.exec)
split_gguf_action = QAction(f"&{SPLIT_GGUF}", self)
split_gguf_action.setShortcut(QKeySequence("Shift+G"))
split_gguf_action.triggered.connect(self.split_gguf_dialog.exec)
merge_gguf_action = QAction(f"&{MERGE_GGUF}", self)
merge_gguf_action.setShortcut(QKeySequence("Shift+M"))
merge_gguf_action.triggered.connect(self.merge_gguf_dialog.exec)
hf_transfer_action = QAction(f"&{HF_TRANSFER}", self)
hf_transfer_action.setShortcut(QKeySequence("Shift+H"))
hf_transfer_action.triggered.connect(self.hf_upload_dialog.exec)
tools_menu.addAction(hf_transfer_action)
tools_menu.addAction(autofp8_action)
tools_menu.addAction(split_gguf_action)
tools_menu.addAction(merge_gguf_action)
# Help menu
help_menu = self.menubar.addMenu(f"&{HELP}")
about_action = QAction(f"&{ABOUT}", self)
about_action.setShortcut(QKeySequence("Ctrl+Q"))
about_action.triggered.connect(self.show_about)
help_menu.addAction(about_action)
# Content widget
content_widget = QWidget()
content_layout = QHBoxLayout(content_widget)
# Wrap content in a scroll area
scroll_area = QScrollArea()
scroll_area.setWidgetResizable(True) # Allow content to resize
scroll_area.setWidget(content_widget)
# Add scroll area to main layout
main_layout.addWidget(scroll_area)
self.setCentralWidget(main_widget)
# Styling
self.setStyleSheet(
"""
AutoGGUF {
background-color: #2b2b2b;
border-radius: 10px;
}
"""
)
# Initialize threads
self.quant_threads = []
# Add all widgets to content_layout
left_widget = QWidget()
right_widget = QWidget()
left_widget.setMinimumWidth(1100)
right_widget.setMinimumWidth(400)
left_layout = QVBoxLayout(left_widget)
right_layout = QVBoxLayout(right_widget)
content_layout.addWidget(left_widget)
content_layout.addWidget(right_widget)
# System info
self.ram_bar = QProgressBar()
self.cpu_bar = QProgressBar()
self.cpu_label = QLabel()
self.gpu_monitor = GPUMonitor()
left_layout.addWidget(QLabel(RAM_USAGE))
left_layout.addWidget(self.ram_bar)
left_layout.addWidget(QLabel(CPU_USAGE))
left_layout.addWidget(self.cpu_bar)
left_layout.addWidget(QLabel(GPU_USAGE))
left_layout.addWidget(self.gpu_monitor)
# Add mouse click event handlers for RAM and CPU bars
self.ram_bar.mouseDoubleClickEvent = self.show_ram_graph
self.cpu_bar.mouseDoubleClickEvent = self.show_cpu_graph
# Initialize data lists for CPU and RAM usage
self.cpu_data = []
self.ram_data = []
# Timer for updating system info
self.timer = QTimer()
self.timer.timeout.connect(self.update_system_info)
self.timer.start(200)
# Backend selection
backend_layout = QHBoxLayout()
self.backend_combo = QComboBox()
self.refresh_backends_button = QPushButton(REFRESH_BACKENDS)
self.refresh_backends_button.clicked.connect(self.refresh_backends)
backend_layout.addWidget(QLabel(BACKEND))
backend_layout.addWidget(self.backend_combo)
backend_layout.addWidget(self.refresh_backends_button)
left_layout.addLayout(backend_layout)
# Download llama.cpp section
download_group = QGroupBox(DOWNLOAD_LLAMACPP)
download_layout = QFormLayout()
self.release_combo = QComboBox()
self.refresh_releases_button = QPushButton(REFRESH_RELEASES)
self.refresh_releases_button.clicked.connect(self.refresh_releases)
release_layout = QHBoxLayout()
release_layout.addWidget(self.release_combo)
release_layout.addWidget(self.refresh_releases_button)
download_layout.addRow(SELECT_RELEASE, release_layout)
self.asset_combo = QComboBox()
self.asset_combo.currentIndexChanged.connect(self.update_cuda_option)
download_layout.addRow(SELECT_ASSET, self.asset_combo)
self.cuda_extract_checkbox = QCheckBox(EXTRACT_CUDA_FILES)
self.cuda_extract_checkbox.setVisible(False)
download_layout.addRow(self.cuda_extract_checkbox)
self.cuda_backend_label = QLabel(SELECT_CUDA_BACKEND)
self.cuda_backend_label.setVisible(False)
self.backend_combo_cuda = QComboBox()
self.backend_combo_cuda.setVisible(False)
download_layout.addRow(self.cuda_backend_label, self.backend_combo_cuda)
self.download_progress = QProgressBar()
self.download_button = QPushButton(DOWNLOAD)
self.download_button.clicked.connect(self.download_llama_cpp)
download_layout.addRow(self.download_progress)
download_layout.addRow(self.download_button)
download_group.setLayout(download_layout)
right_layout.addWidget(download_group)
# Models path
models_layout = QHBoxLayout()
self.models_input = QLineEdit(os.path.abspath(self.model_dir_name))
models_button = QPushButton(BROWSE)
models_button.clicked.connect(self.browse_models)
models_layout.addWidget(QLabel(MODELS_PATH))
models_layout.addWidget(self.models_input)
models_layout.addWidget(models_button)
left_layout.addLayout(models_layout)
# Output path
output_layout = QHBoxLayout()
self.output_input = QLineEdit(os.path.abspath(self.output_dir_name))
output_button = QPushButton(BROWSE)
output_button.clicked.connect(self.browse_output)
output_layout.addWidget(QLabel(OUTPUT_PATH))
output_layout.addWidget(self.output_input)
output_layout.addWidget(output_button)
left_layout.addLayout(output_layout)
# Logs path
logs_layout = QHBoxLayout()
self.logs_input = QLineEdit(os.path.abspath(self.log_dir_name))
logs_button = QPushButton(BROWSE)
logs_button.clicked.connect(self.browse_logs)
logs_layout.addWidget(QLabel(LOGS_PATH))
logs_layout.addWidget(self.logs_input)
logs_layout.addWidget(logs_button)
left_layout.addLayout(logs_layout)
# Model list
self.model_tree = QTreeWidget()
self.model_tree.setHeaderHidden(True)
left_layout.addWidget(QLabel(AVAILABLE_MODELS))
left_layout.addWidget(self.model_tree)
# Ssupport right-click menu
self.model_tree.setContextMenuPolicy(Qt.ContextMenuPolicy.CustomContextMenu)
self.model_tree.customContextMenuRequested.connect(self.show_model_context_menu)
# Refresh models button
refresh_models_button = QPushButton(REFRESH_MODELS)
refresh_models_button.clicked.connect(self.load_models)
left_layout.addWidget(refresh_models_button)
# Import Model button
import_model_button = QPushButton(IMPORT_MODEL)
import_model_button.clicked.connect(self.import_model)
left_layout.addWidget(import_model_button)
# Quantization options
quant_options_scroll = QScrollArea()
quant_options_widget = QWidget()
quant_options_layout = QFormLayout()
self.quant_type = QListWidget()
self.quant_type.setMinimumHeight(100)
self.quant_type.setMinimumWidth(150)
self.quant_type.setSelectionMode(QListWidget.SelectionMode.MultiSelection)
quant_types = [
"IQ2_XXS",
"IQ2_XS",
"IQ2_S",
"IQ2_M",
"IQ1_S",
"IQ1_M",
"Q2_K",
"Q2_K_S",
"IQ3_XXS",
"IQ3_S",
"IQ3_M",
"IQ3_XS",
"Q3_K_S",
"Q3_K_M",
"Q3_K_L",
"IQ4_NL",
"IQ4_XS",
"Q4_K_S",
"Q4_K_M",
"Q5_K_S",
"Q5_K_M",
"Q6_K",
"Q8_0",
"Q4_0",
"Q4_1",
"Q5_0",
"Q5_1",
"Q4_0_4_4",
"Q4_0_4_8",
"Q4_0_8_8",
"BF16",
"F16",
"F32",
"COPY",
]
self.quant_type.addItems(quant_types)
quant_options_layout.addRow(
self.create_label(QUANTIZATION_TYPE, SELECT_QUANTIZATION_TYPE),
self.quant_type,
)
self.allow_requantize = QCheckBox(ALLOW_REQUANTIZE)
self.leave_output_tensor = QCheckBox(LEAVE_OUTPUT_TENSOR)
self.pure = QCheckBox(PURE)
quant_options_layout.addRow(
self.create_label("", ALLOWS_REQUANTIZING), self.allow_requantize
)
quant_options_layout.addRow(
self.create_label("", LEAVE_OUTPUT_WEIGHT), self.leave_output_tensor
)
quant_options_layout.addRow(
self.create_label("", DISABLE_K_QUANT_MIXTURES), self.pure
)
self.imatrix = QLineEdit()
self.imatrix_button = QPushButton(BROWSE)
self.imatrix_button.clicked.connect(self.browse_imatrix)
imatrix_layout = QHBoxLayout()
imatrix_layout.addWidget(self.imatrix)
imatrix_layout.addWidget(self.imatrix_button)
quant_options_layout.addRow(
self.create_label(IMATRIX, USE_DATA_AS_IMPORTANCE_MATRIX), imatrix_layout
)
self.include_weights = QLineEdit()
self.exclude_weights = QLineEdit()
quant_options_layout.addRow(
self.create_label(INCLUDE_WEIGHTS, USE_IMPORTANCE_MATRIX_FOR_TENSORS),
self.include_weights,
)
quant_options_layout.addRow(
self.create_label(EXCLUDE_WEIGHTS, DONT_USE_IMPORTANCE_MATRIX_FOR_TENSORS),
self.exclude_weights,
)
tensor_types = [
"Q2_K",
"Q2_K_S",
"Q3_K_S",
"Q3_K_M",
"Q3_K_L",
"Q4_K_S",
"Q4_K_M",
"Q5_K_S",
"Q5_K_M",
"Q6_K",
"Q8_0",
"Q4_0",
"Q4_1",
"Q5_0",
"Q5_1",
"BF16",
"F16",
"F32",
]
self.use_output_tensor_type = QCheckBox(USE_OUTPUT_TENSOR_TYPE)
self.output_tensor_type = QComboBox()
self.output_tensor_type.addItems(tensor_types)
self.output_tensor_type.setEnabled(False)
self.use_output_tensor_type.toggled.connect(
lambda checked: self.output_tensor_type.setEnabled(checked)
)
output_tensor_layout = QHBoxLayout()
output_tensor_layout.addWidget(self.use_output_tensor_type)
output_tensor_layout.addWidget(self.output_tensor_type)
quant_options_layout.addRow(
self.create_label(OUTPUT_TENSOR_TYPE, USE_THIS_TYPE_FOR_OUTPUT_WEIGHT),
output_tensor_layout,
)
self.use_token_embedding_type = QCheckBox(USE_TOKEN_EMBEDDING_TYPE)
self.token_embedding_type = QComboBox()
self.token_embedding_type.addItems(tensor_types)
self.token_embedding_type.setEnabled(False)
self.use_token_embedding_type.toggled.connect(
lambda checked: self.token_embedding_type.setEnabled(checked)
)
token_embedding_layout = QHBoxLayout()
token_embedding_layout.addWidget(self.use_token_embedding_type)
token_embedding_layout.addWidget(self.token_embedding_type)
quant_options_layout.addRow(
self.create_label(TOKEN_EMBEDDING_TYPE, USE_THIS_TYPE_FOR_TOKEN_EMBEDDINGS),
token_embedding_layout,
)
self.keep_split = QCheckBox(KEEP_SPLIT)
self.override_kv = QLineEdit()
quant_options_layout.addRow(
self.create_label("", WILL_GENERATE_QUANTIZED_MODEL_IN_SAME_SHARDS),
self.keep_split,
)
# KV Override section
self.kv_override_widget = QWidget()
self.kv_override_layout = QVBoxLayout(self.kv_override_widget)
self.kv_override_entries = []
add_override_button = QPushButton(ADD_NEW_OVERRIDE)
add_override_button.clicked.connect(self.add_kv_override)
kv_override_scroll = QScrollArea()
kv_override_scroll.setWidgetResizable(True)
kv_override_scroll.setWidget(self.kv_override_widget)
kv_override_scroll.setMinimumHeight(200)
kv_override_main_layout = QVBoxLayout()
kv_override_main_layout.addWidget(kv_override_scroll)
kv_override_main_layout.addWidget(add_override_button)
quant_options_layout.addRow(
self.create_label(KV_OVERRIDES, OVERRIDE_MODEL_METADATA),
kv_override_main_layout,
)
self.extra_arguments = QLineEdit()
quant_options_layout.addRow(
self.create_label(EXTRA_ARGUMENTS, EXTRA_COMMAND_ARGUMENTS),
self.extra_arguments,
)
quant_options_widget.setLayout(quant_options_layout)
quant_options_scroll.setWidget(quant_options_widget)
quant_options_scroll.setWidgetResizable(True)
left_layout.addWidget(quant_options_scroll)
# Quantize button layout
quantize_layout = QHBoxLayout()
quantize_button = QPushButton(QUANTIZE_MODEL)
quantize_button.clicked.connect(self.quantize_model)
save_preset_button = QPushButton(SAVE_PRESET)
save_preset_button.clicked.connect(self.save_preset)
load_preset_button = QPushButton(LOAD_PRESET)
load_preset_button.clicked.connect(self.load_preset)
quantize_layout.addWidget(quantize_button)
quantize_layout.addWidget(save_preset_button)
quantize_layout.addWidget(load_preset_button)
left_layout.addLayout(quantize_layout)
# Task list
self.task_list = QListWidget()
self.task_list.setSelectionMode(QListWidget.SelectionMode.NoSelection)
self.task_list.itemDoubleClicked.connect(self.show_task_details)
left_layout.addWidget(QLabel(TASKS))
left_layout.addWidget(self.task_list)
# IMatrix section
imatrix_group = QGroupBox(IMATRIX_GENERATION)
imatrix_layout = QFormLayout()
self.imatrix_datafile = QLineEdit()
self.imatrix_datafile_button = QPushButton(BROWSE)
self.imatrix_datafile_button.clicked.connect(self.browse_imatrix_datafile)
imatrix_datafile_layout = QHBoxLayout()
imatrix_datafile_layout.addWidget(self.imatrix_datafile)
imatrix_datafile_layout.addWidget(self.imatrix_datafile_button)
imatrix_layout.addRow(
self.create_label(DATA_FILE, INPUT_DATA_FILE_FOR_IMATRIX),
imatrix_datafile_layout,
)
self.imatrix_model = QLineEdit()
self.imatrix_model_button = QPushButton(BROWSE)
self.imatrix_model_button.clicked.connect(self.browse_imatrix_model)
imatrix_model_layout = QHBoxLayout()
imatrix_model_layout.addWidget(self.imatrix_model)
imatrix_model_layout.addWidget(self.imatrix_model_button)
imatrix_layout.addRow(
self.create_label(MODEL, MODEL_TO_BE_QUANTIZED), imatrix_model_layout
)
self.imatrix_output = QLineEdit()
self.imatrix_output_button = QPushButton(BROWSE)
self.imatrix_output_button.clicked.connect(self.browse_imatrix_output)
imatrix_output_layout = QHBoxLayout()
imatrix_output_layout.addWidget(self.imatrix_output)
imatrix_output_layout.addWidget(self.imatrix_output_button)
imatrix_layout.addRow(
self.create_label(OUTPUT, OUTPUT_PATH_FOR_GENERATED_IMATRIX),
imatrix_output_layout,
)
self.imatrix_frequency = QSpinBox()
self.imatrix_frequency.setRange(1, 100)
self.imatrix_frequency.setValue(1)
imatrix_layout.addRow(
self.create_label(OUTPUT_FREQUENCY, HOW_OFTEN_TO_SAVE_IMATRIX),
self.imatrix_frequency,
)
self.imatrix_ctx_size = QSpinBox()
self.imatrix_ctx_size.setRange(1, 1048576)
self.imatrix_ctx_size.setValue(512)
imatrix_layout.addRow(
self.create_label(CONTEXT_SIZE, CONTEXT_SIZE_FOR_IMATRIX),
self.imatrix_ctx_size,
)
threads_layout = QHBoxLayout()
self.threads_slider = QSlider(Qt.Orientation.Horizontal)
self.threads_slider.setRange(1, 64)
self.threads_slider.valueChanged.connect(self.update_threads_spinbox)
self.threads_spinbox = QSpinBox()
self.threads_spinbox.setRange(1, 128)
self.threads_spinbox.valueChanged.connect(self.update_threads_slider)
self.threads_spinbox.setMinimumWidth(75)
threads_layout.addWidget(self.threads_slider)
threads_layout.addWidget(self.threads_spinbox)
imatrix_layout.addRow(
self.create_label(THREADS, NUMBER_OF_THREADS_FOR_IMATRIX), threads_layout
)
gpu_offload_layout = QHBoxLayout()
self.gpu_offload_slider = QSlider(Qt.Orientation.Horizontal)
self.gpu_offload_slider.setRange(0, 200)
self.gpu_offload_slider.valueChanged.connect(self.update_gpu_offload_spinbox)
self.gpu_offload_spinbox = QSpinBox()
self.gpu_offload_spinbox.setRange(0, 1000)
self.gpu_offload_spinbox.valueChanged.connect(self.update_gpu_offload_slider)
self.gpu_offload_spinbox.setMinimumWidth(75)
self.gpu_offload_auto = QCheckBox(AUTO)
self.gpu_offload_auto.stateChanged.connect(self.toggle_gpu_offload_auto)
gpu_offload_layout.addWidget(self.gpu_offload_slider)
gpu_offload_layout.addWidget(self.gpu_offload_spinbox)
gpu_offload_layout.addWidget(self.gpu_offload_auto)
imatrix_layout.addRow(
self.create_label(GPU_OFFLOAD, SET_GPU_OFFLOAD_VALUE), gpu_offload_layout
)
imatrix_generate_button = QPushButton(GENERATE_IMATRIX)
imatrix_generate_button.clicked.connect(self.generate_imatrix)
imatrix_layout.addRow(imatrix_generate_button)
imatrix_group.setLayout(imatrix_layout)
right_layout.addWidget(imatrix_group)
# LoRA Conversion Section
lora_group = QGroupBox(LORA_CONVERSION)
lora_layout = QFormLayout()
self.lora_input = QLineEdit()
lora_input_button = QPushButton(BROWSE)
lora_input_button.clicked.connect(self.browse_lora_input)
lora_input_layout = QHBoxLayout()
lora_input_layout.addWidget(self.lora_input)
lora_input_layout.addWidget(lora_input_button)
lora_layout.addRow(
self.create_label(LORA_INPUT_PATH, SELECT_LORA_INPUT_DIRECTORY),
lora_input_layout,
)
self.lora_output = QLineEdit()
lora_output_button = QPushButton(BROWSE)
lora_output_button.clicked.connect(self.browse_lora_output)
lora_output_layout = QHBoxLayout()
lora_output_layout.addWidget(self.lora_output)
lora_output_layout.addWidget(lora_output_button)
lora_layout.addRow(
self.create_label(LORA_OUTPUT_PATH, SELECT_LORA_OUTPUT_FILE),
lora_output_layout,
)
self.lora_output_type_combo = QComboBox()
self.lora_output_type_combo.addItems(["GGML", "GGUF"])
self.lora_output_type_combo.currentIndexChanged.connect(
self.update_base_model_visibility
)
lora_layout.addRow(
self.create_label(OUTPUT_TYPE, SELECT_OUTPUT_TYPE),
self.lora_output_type_combo,
)
self.base_model_label = self.create_label(BASE_MODEL, SELECT_BASE_MODEL_FILE)
self.base_model_path = QLineEdit()
base_model_button = QPushButton(BROWSE)
base_model_button.clicked.connect(self.browse_base_model)
base_model_layout = QHBoxLayout()
base_model_layout.addWidget(self.base_model_path, 1)
base_model_layout.addWidget(base_model_button)
self.base_model_widget = QWidget()
self.base_model_widget.setLayout(base_model_layout)
self.base_model_wrapper = QWidget()
wrapper_layout = QHBoxLayout(self.base_model_wrapper)
wrapper_layout.addWidget(self.base_model_label)
wrapper_layout.addWidget(self.base_model_widget, 1)
wrapper_layout.setContentsMargins(0, 0, 0, 0)
lora_layout.addRow(self.base_model_wrapper)
self.update_base_model_visibility(self.lora_output_type_combo.currentIndex())
lora_convert_button = QPushButton(CONVERT_LORA)
lora_convert_button.clicked.connect(self.convert_lora)
lora_layout.addRow(lora_convert_button)
lora_group.setLayout(lora_layout)
right_layout.addWidget(lora_group)
# Export LoRA
export_lora_group = QGroupBox(EXPORT_LORA)
export_lora_layout = QFormLayout()
self.export_lora_model = QLineEdit()
export_lora_model_button = QPushButton(BROWSE)
export_lora_model_button.clicked.connect(self.browse_export_lora_model)
export_lora_model_layout = QHBoxLayout()
export_lora_model_layout.addWidget(self.export_lora_model)
export_lora_model_layout.addWidget(export_lora_model_button)
export_lora_layout.addRow(
self.create_label(MODEL, SELECT_MODEL_FILE), export_lora_model_layout
)
self.export_lora_output = QLineEdit()
export_lora_output_button = QPushButton(BROWSE)
export_lora_output_button.clicked.connect(self.browse_export_lora_output)
export_lora_output_layout = QHBoxLayout()
export_lora_output_layout.addWidget(self.export_lora_output)
export_lora_output_layout.addWidget(export_lora_output_button)
export_lora_layout.addRow(
self.create_label(OUTPUT, SELECT_OUTPUT_FILE), export_lora_output_layout
)
self.export_lora_adapters = QListWidget()
add_adapter_button = QPushButton(ADD_ADAPTER)
add_adapter_button.clicked.connect(self.add_lora_adapter)
adapters_layout = QVBoxLayout()
adapters_layout.addWidget(self.export_lora_adapters)
buttons_layout = QHBoxLayout()
buttons_layout.addWidget(add_adapter_button)
adapters_layout.addLayout(buttons_layout)
export_lora_layout.addRow(
self.create_label(GGML_LORA_ADAPTERS, SELECT_LORA_ADAPTER_FILES),
adapters_layout,
)
self.export_lora_threads = QSpinBox()
self.export_lora_threads.setRange(1, 64)
self.export_lora_threads.setValue(8)
export_lora_layout.addRow(
self.create_label(THREADS, NUMBER_OF_THREADS_FOR_LORA_EXPORT),
self.export_lora_threads,
)
export_lora_button = QPushButton(EXPORT_LORA)
export_lora_button.clicked.connect(self.export_lora)
export_lora_layout.addRow(export_lora_button)
export_lora_group.setLayout(export_lora_layout)
right_layout.addWidget(export_lora_group)
# HuggingFace to GGUF Conversion
hf_to_gguf_group = QGroupBox(HF_TO_GGUF_CONVERSION)
hf_to_gguf_layout = QFormLayout()
self.hf_model_input = QLineEdit()
hf_model_input_button = QPushButton(BROWSE)
hf_model_input_button.clicked.connect(self.browse_hf_model_input)
hf_model_input_layout = QHBoxLayout()
hf_model_input_layout.addWidget(self.hf_model_input)
hf_model_input_layout.addWidget(hf_model_input_button)
hf_to_gguf_layout.addRow(MODEL_DIRECTORY, hf_model_input_layout)
self.hf_outfile = QLineEdit()
hf_outfile_button = QPushButton(BROWSE)
hf_outfile_button.clicked.connect(self.browse_hf_outfile)
hf_outfile_layout = QHBoxLayout()
hf_outfile_layout.addWidget(self.hf_outfile)
hf_outfile_layout.addWidget(hf_outfile_button)
hf_to_gguf_layout.addRow(OUTPUT_FILE, hf_outfile_layout)
self.hf_outtype = QComboBox()
self.hf_outtype.addItems(["f32", "f16", "bf16", "q8_0", "auto"])
hf_to_gguf_layout.addRow(OUTPUT_TYPE, self.hf_outtype)
self.hf_vocab_only = QCheckBox(VOCAB_ONLY)
hf_to_gguf_layout.addRow(self.hf_vocab_only)
self.hf_use_temp_file = QCheckBox(USE_TEMP_FILE)
hf_to_gguf_layout.addRow(self.hf_use_temp_file)
self.hf_no_lazy = QCheckBox(NO_LAZY_EVALUATION)
hf_to_gguf_layout.addRow(self.hf_no_lazy)
self.hf_verbose = QCheckBox(VERBOSE)
hf_to_gguf_layout.addRow(self.hf_verbose)
self.hf_dry_run = QCheckBox(DRY_RUN)
hf_to_gguf_layout.addRow(self.hf_dry_run)
self.hf_model_name = QLineEdit()
hf_to_gguf_layout.addRow(MODEL_NAME, self.hf_model_name)
self.hf_split_max_size = QLineEdit()
hf_to_gguf_layout.addRow(SPLIT_MAX_SIZE, self.hf_split_max_size)
hf_to_gguf_convert_button = QPushButton(CONVERT_HF_TO_GGUF)
hf_to_gguf_convert_button.clicked.connect(self.convert_hf_to_gguf)
hf_to_gguf_layout.addRow(hf_to_gguf_convert_button)
hf_to_gguf_group.setLayout(hf_to_gguf_layout)
right_layout.addWidget(hf_to_gguf_group)
# Modify the task list to support right-click menu
self.task_list.setContextMenuPolicy(Qt.ContextMenuPolicy.CustomContextMenu)
self.task_list.customContextMenuRequested.connect(self.show_task_context_menu)
# Set initial state
self.update_base_model_visibility(self.lora_output_type_combo.currentIndex())
# Initialize releases and backends
if os.environ.get("AUTOGGUF_CHECK_BACKEND", "").lower() == "enabled":
self.refresh_releases()
self.refresh_backends()
if os.environ.get("AUTOGGUF_CHECK_UPDATE", "").lower() == "enabled":
self.logger.info(CHECKING_FOR_UPDATES)
self.check_for_updates()
# Load theme based on environment variable
theme_path = os.environ.get("AUTOGGUF_THEME")
if theme_path:
try:
with open(theme_path, "r") as f:
theme = f.read()
self.setStyleSheet(theme)
except (FileNotFoundError, OSError):
# If the specified theme file is not found or inaccessible,
# fall back to the default theme
with open(resource_path("assets/default.css"), "r") as f:
default_theme = f.read()
self.setStyleSheet(default_theme)
else:
# If the environment variable is not set, use the default theme
with open(resource_path("assets/default.css"), "r") as f:
default_theme = f.read()
self.setStyleSheet(default_theme)
# Imported models from external paths
self.imported_models = []
# Load models
self.load_models()
# Load plugins
self.plugins = Plugins.load_plugins(self)
Plugins.apply_plugins(self)
# Finish initialization
self.logger.info(AUTOGGUF_INITIALIZATION_COMPLETE)
self.logger.info(STARTUP_ELASPED_TIME.format(init_timer.elapsed()))
def resizeEvent(self, event) -> None:
super().resizeEvent(event)
path = QPainterPath()
path.addRoundedRect(self.rect(), 10, 10)
mask = QRegion(path.toFillPolygon().toPolygon())
self.setMask(mask)
def delete_model(self, item):
model_name = item.text(0)
reply = QMessageBox.question(
self,
CONFIRM_DELETE,
DELETE_WARNING.format(model_name),
QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No,
QMessageBox.StandardButton.No,
)
if reply == QMessageBox.StandardButton.Yes:
model_path = os.path.join(self.models_input.text(), model_name)
try:
os.remove(model_path)
self.model_tree.takeTopLevelItem(
self.model_tree.indexOfTopLevelItem(item)
)
self.logger.info(MODEL_DELETED_SUCCESSFULLY.format(model_name))
except Exception as e:
show_error(self.logger, f"Error deleting model: {e}")
def check_for_updates(self) -> None:
try:
url = "https://api.github.com/repos/leafspark/AutoGGUF/releases/latest"
req = urllib.request.Request(url)
with urllib.request.urlopen(req) as response:
if response.status != 200:
raise urllib.error.HTTPError(
url, response.status, "HTTP Error", response.headers, None
)
latest_release = json.loads(response.read().decode("utf-8"))
latest_version = latest_release["tag_name"].replace("v", "")
if latest_version > AUTOGGUF_VERSION.replace("v", ""):
self.prompt_for_update(latest_release)
except urllib.error.URLError as e:
self.logger.warning(f"{ERROR_CHECKING_FOR_UPDATES} {e}")
def prompt_for_update(self, release) -> None:
update_message = QMessageBox()
update_message.setIcon(QMessageBox.Information)
update_message.setWindowTitle(UPDATE_AVAILABLE)
update_message.setText(NEW_VERSION_AVAILABLE.format(release["tag_name"]))
update_message.setInformativeText(DOWNLOAD_NEW_VERSION)
update_message.addButton(QMessageBox.StandardButton.Yes)
update_message.addButton(QMessageBox.StandardButton.No)
update_message.setDefaultButton(QMessageBox.StandardButton.Yes)
if update_message.exec() == QMessageBox.StandardButton.Yes:
QDesktopServices.openUrl(QUrl(release["html_url"]))
def keyPressEvent(self, event) -> None:
if event.modifiers() == Qt.ControlModifier:
if (
event.key() == Qt.Key_Equal
): # Qt.Key_Plus doesn't work on some keyboards
self.resize_window(larger=True)
elif event.key() == Qt.Key_Minus:
self.resize_window(larger=False)
elif event.key() == Qt.Key_0:
self.reset_size()
super().keyPressEvent(event)
def refresh_backends(self) -> None:
self.logger.info(REFRESHING_BACKENDS)
llama_bin = os.path.abspath("llama_bin")
os.makedirs(llama_bin, exist_ok=True)
self.backend_combo.clear()
valid_backends = [
(item, os.path.join(llama_bin, item))
for item in os.listdir(llama_bin)
if os.path.isdir(os.path.join(llama_bin, item))
and "cudart-llama" not in item.lower()
]
if valid_backends:
for name, path in valid_backends:
self.backend_combo.addItem(name, userData=path)
self.backend_combo.setEnabled(
True
) # Enable the combo box if there are valid backends
else:
self.backend_combo.addItem(NO_BACKENDS_AVAILABLE)
self.backend_combo.setEnabled(False)
self.logger.info(FOUND_VALID_BACKENDS.format(len(valid_backends)))
def save_task_preset(self, task_item) -> None:
self.logger.info(SAVING_TASK_PRESET.format(task_item.task_name))
for thread in self.quant_threads:
if thread.log_file == task_item.log_file:
preset = {
"command": thread.command,
"backend_path": thread.cwd,
"log_file": thread.log_file,
}
file_name, _ = QFileDialog.getSaveFileName(
self, SAVE_TASK_PRESET, "", JSON_FILES
)
if file_name:
with open(file_name, "w") as f:
json.dump(preset, f, indent=4)
QMessageBox.information(
self, TASK_PRESET_SAVED, TASK_PRESET_SAVED_TO.format(file_name)
)
break
def download_finished(self, extract_dir) -> None:
self.logger.info(DOWNLOAD_FINISHED_EXTRACTED_TO.format(extract_dir))
self.download_button.setEnabled(True)
self.download_progress.setValue(100)
if (
self.cuda_extract_checkbox.isChecked()
and self.cuda_extract_checkbox.isVisible()
):
cuda_backend = self.backend_combo_cuda.currentData()
if cuda_backend and cuda_backend != NO_SUITABLE_CUDA_BACKENDS:
self.extract_cuda_files(extract_dir, cuda_backend)
QMessageBox.information(
self,
DOWNLOAD_COMPLETE,
LLAMACPP_DOWNLOADED_AND_EXTRACTED.format(extract_dir, cuda_backend),
)
else:
QMessageBox.warning(
self, CUDA_EXTRACTION_FAILED, NO_SUITABLE_CUDA_BACKEND_FOUND
)
else:
QMessageBox.information(
self,
DOWNLOAD_COMPLETE,
LLAMACPP_BINARY_DOWNLOADED_AND_EXTRACTED.format(extract_dir),
)
self.refresh_backends() # Refresh the backends after successful download
self.update_cuda_option() # Update CUDA options in case a CUDA-capable backend was downloaded
# Select the newly downloaded backend
new_backend_name = os.path.basename(extract_dir)
index = self.backend_combo.findText(new_backend_name)
if index >= 0:
self.backend_combo.setCurrentIndex(index)
def validate_quantization_inputs(self) -> None:
self.logger.debug(VALIDATING_QUANTIZATION_INPUTS)
errors = []
if not self.backend_combo.currentData():
errors.append(NO_BACKEND_SELECTED)
if not self.models_input.text():
errors.append(MODELS_PATH_REQUIRED)
if not self.output_input.text():
errors.append(OUTPUT_PATH_REQUIRED)
if not self.logs_input.text():
errors.append(LOGS_PATH_REQUIRED)
if not self.model_tree.currentItem():
errors.append(NO_MODEL_SELECTED)
if errors:
raise ValueError("\n".join(errors))
def load_models(self) -> None:
self.logger.info(LOADING_MODELS)
models_dir = self.models_input.text()
ensure_directory(models_dir)
self.model_tree.clear()
sharded_models = {}
single_models = []
concatenated_models = []
shard_pattern = re.compile(r"(.*)-(\d+)-of-(\d+)\.gguf$")
concat_pattern = re.compile(r"(.*)\.gguf\.part(\d+)of(\d+)$")
for file in os.listdir(models_dir):
full_path = os.path.join(models_dir, file)
if file.endswith(".gguf"):
if not verify_gguf(full_path):
show_error(self.logger, INVALID_GGUF_FILE.format(file))
continue
match = shard_pattern.match(file)
if match:
base_name, shard_num, total_shards = match.groups()
if base_name not in sharded_models:
sharded_models[base_name] = []
sharded_models[base_name].append((int(shard_num), file))
else:
single_models.append(file)
else:
match = concat_pattern.match(file)
if match:
concatenated_models.append(file)
if hasattr(self, "imported_models"):
for imported_model in self.imported_models:
file_name = os.path.basename(imported_model)
if (
file_name not in single_models
and file_name not in concatenated_models
):
if verify_gguf(imported_model):
single_models.append(file_name)
else:
show_error(
self.logger, INVALID_GGUF_FILE.format(imported_model)
)
for base_name, shards in sharded_models.items():
parent_item = QTreeWidgetItem(self.model_tree)
parent_item.setText(0, SHARDED_MODEL_NAME.format(base_name))
first_shard = sorted(shards, key=lambda x: x[0])[0][1]
parent_item.setData(0, Qt.ItemDataRole.UserRole, first_shard)
for _, shard_file in sorted(shards):
child_item = QTreeWidgetItem(parent_item)
child_item.setText(0, shard_file)
child_item.setData(0, Qt.ItemDataRole.UserRole, shard_file)
for model in sorted(single_models):
self.add_model_to_tree(model)
for model in sorted(concatenated_models):
item = self.add_model_to_tree(model)
item.setForeground(0, Qt.gray)
item.setToolTip(0, CONCATENATED_FILE_WARNING)
self.model_tree.expandAll()
self.logger.info(
LOADED_MODELS.format(
len(single_models) + len(sharded_models) + len(concatenated_models)
)
)
if concatenated_models:
self.logger.warning(
CONCATENATED_FILES_FOUND.format(len(concatenated_models))
)
def add_model_to_tree(self, model) -> QTreeWidgetItem:
item = QTreeWidgetItem(self.model_tree)
item.setText(0, model)
if hasattr(self, "imported_models") and model in [
os.path.basename(m) for m in self.imported_models
]:
full_path = next(
m for m in self.imported_models if os.path.basename(m) == model
)
item.setData(0, Qt.ItemDataRole.UserRole, full_path)
item.setToolTip(0, IMPORTED_MODEL_TOOLTIP.format(full_path))
else:
item.setData(0, Qt.ItemDataRole.UserRole, model)
return item
def extract_cuda_files(self, extract_dir, destination) -> None:
self.logger.info(EXTRACTING_CUDA_FILES.format(extract_dir, destination))
for root, dirs, files in os.walk(extract_dir):
for file in files:
if file.lower().endswith(".dll"):
source_path = os.path.join(root, file)
dest_path = os.path.join(destination, file)
shutil.copy2(source_path, dest_path)
def download_error(self, error_message) -> None:
self.logger.error(DOWNLOAD_ERROR.format(error_message))
self.download_button.setEnabled(True)
self.download_progress.setValue(0)
show_error(self.logger, DOWNLOAD_FAILED.format(error_message))
# Clean up any partially downloaded files
asset = self.asset_combo.currentData()
if asset:
partial_file = os.path.join(os.path.abspath("llama_bin"), asset["name"])
if os.path.exists(partial_file):
os.remove(partial_file)
def browse_local_path(self) -> None:
if self.upload_type_file.isChecked():
file_path, _ = QFileDialog.getOpenFileName(self, SELECT_FILE)
if file_path:
self.hf_local_path_input.setText(file_path)
elif self.upload_type_folder.isChecked():
folder_path = QFileDialog.getExistingDirectory(self, SELECT_FOLDER)
if folder_path:
self.hf_local_path_input.setText(folder_path)
@validate_input("hf_repo_input", "hf_local_path_input", "hf_remote_path_input")
def transfer_to_hf(self) -> None:
hf_repo = self.hf_repo_input.text()
local_path = self.hf_local_path_input.text()
remote_path = self.hf_remote_path_input.text()
type = "upload"
if not hf_repo or not local_path or not type:
QMessageBox.warning(self, ERROR, ALL_FIELDS_REQUIRED)
return
try:
command = ["huggingface-cli", type, hf_repo, local_path]
if remote_path:
command.append(remote_path)
# Add repo type argument if selected
if self.repo_type_model.isChecked():
command.append("--repo-type=model")
elif self.repo_type_dataset.isChecked():
command.append("--repo-type=dataset")
elif self.repo_type_space.isChecked():
command.append("--repo-type=space")
logs_path = self.logs_input.text()
ensure_directory(logs_path)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
log_file = os.path.join(logs_path, f"hf_transfer_{timestamp}.log")
# Log command
command_str = " ".join(command)
self.logger.info(HUGGINGFACE_UPLOAD_COMMAND + command_str)
thread = QuantizationThread(command, os.getcwd(), log_file)
self.quant_threads.append(thread)
task_name_temp = (
UPLOADING if self.upload_type_file.isChecked() else UPLOADING_FOLDER
)
task_name = HF_TRANSFER_TASK_NAME.format(
task_name_temp, local_path, hf_repo, local_path
)
task_item = TaskListItem(
task_name,
log_file,
show_progress_bar=False,
logger=self.logger,
quant_threads=self.quant_threads,
)
list_item = QListWidgetItem(self.task_list)
list_item.setSizeHint(task_item.sizeHint())
self.task_list.addItem(list_item)
self.task_list.setItemWidget(list_item, task_item)
thread.status_signal.connect(task_item.update_status)
thread.finished_signal.connect(
lambda: self.task_finished(thread, task_item)
)
thread.error_signal.connect(
lambda err: handle_error(self.logger, err, task_item)
)
thread.start()
except Exception as e:
show_error(self.logger, ERROR_STARTING_HF_TRANSFER.format(str(e)))
self.logger.info(STARTED_HUGGINGFACE_TRANSFER.format(type))
def quantize_to_fp8_dynamic(self, model_dir: str, output_dir: str) -> None:
if model_dir or output_dir == "":
show_error(
self.logger,
f"{ERROR_STARTING_AUTOFP8_QUANTIZATION}: {NO_MODEL_SELECTED}",
)
return
self.logger.info(
QUANTIZING_TO_WITH_AUTOFP8.format(os.path.basename(model_dir), output_dir)
)
try:
command = [
"python",
"src/quantize_to_fp8_dynamic.py",
model_dir,
output_dir,
]
logs_path = self.logs_input.text()
ensure_directory(logs_path)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
log_file = os.path.join(logs_path, f"autofp8_{timestamp}.log")
thread = QuantizationThread(command, os.getcwd(), log_file)
self.quant_threads.append(thread)
task_name = QUANTIZING_WITH_AUTOFP8.format(os.path.basename(model_dir))
task_item = TaskListItem(
task_name,
log_file,
show_progress_bar=False,
logger=self.logger,
quant_threads=self.quant_threads,
)
list_item = QListWidgetItem(self.task_list)
list_item.setSizeHint(task_item.sizeHint())
self.task_list.addItem(list_item)
self.task_list.setItemWidget(list_item, task_item)
thread.status_signal.connect(task_item.update_status)
thread.finished_signal.connect(
lambda: self.task_finished(thread, task_item)
)
thread.error_signal.connect(
lambda err: handle_error(self.logger, err, task_item)
)
thread.start()
except Exception as e:
show_error(self.logger, f"{ERROR_STARTING_AUTOFP8_QUANTIZATION}: {e}")
self.logger.info(AUTOFP8_QUANTIZATION_TASK_STARTED)
@validate_input(
"hf_model_input",
"hf_outfile",
"hf_split_max_size",
"hf_model_name",
"logs_input",
)
def convert_hf_to_gguf(self) -> None:
self.logger.info(STARTING_HF_TO_GGUF_CONVERSION)
try:
model_dir = self.hf_model_input.text()
if not model_dir:
raise ValueError(MODEL_DIRECTORY_REQUIRED)
command = ["python", "src/convert_hf_to_gguf.py"]
if self.hf_vocab_only.isChecked():
command.append("--vocab-only")
if self.hf_outfile.text():
command.extend(["--outfile", self.hf_outfile.text()])
command.extend(["--outtype", self.hf_outtype.currentText()])
if self.hf_use_temp_file.isChecked():
command.append("--use-temp-file")
if self.hf_no_lazy.isChecked():
command.append("--no-lazy")
if self.hf_model_name.text():
command.extend(["--model-name", self.hf_model_name.text()])
if self.hf_verbose.isChecked():
command.append("--verbose")
if self.hf_split_max_size.text():
command.extend(["--split-max-size", self.hf_split_max_size.text()])
if self.hf_dry_run.isChecked():
command.append("--dry-run")
command.append(model_dir)
logs_path = self.logs_input.text()
ensure_directory(logs_path)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
log_file = os.path.join(logs_path, f"hf_to_gguf_{timestamp}.log")
# Log command
command_str = " ".join(command)
self.logger.info(HF_TO_GGUF_CONVERSION_COMMAND.format(command_str))
thread = QuantizationThread(command, os.getcwd(), log_file)
self.quant_threads.append(thread)
task_name = CONVERTING_TO_GGUF.format(os.path.basename(model_dir))
task_item = TaskListItem(
task_name,
log_file,
show_progress_bar=False,
logger=self.logger,
quant_threads=self.quant_threads,
)
list_item = QListWidgetItem(self.task_list)
list_item.setSizeHint(task_item.sizeHint())
self.task_list.addItem(list_item)
self.task_list.setItemWidget(list_item, task_item)
thread.status_signal.connect(task_item.update_status)
thread.finished_signal.connect(
lambda: self.task_finished(thread, task_item)
)
thread.error_signal.connect(
lambda err: handle_error(self.logger, err, task_item)
)
thread.start()
except Exception as e:
show_error(self.logger, ERROR_STARTING_HF_TO_GGUF_CONVERSION.format(str(e)))
self.logger.info(HF_TO_GGUF_CONVERSION_TASK_STARTED)
def split_gguf(
self, model_dir: str, output_dir: str, max_size: str, max_tensors: str
) -> None:
if not model_dir or not output_dir:
show_error(self.logger, f"{SPLIT_GGUF_ERROR}: {NO_MODEL_SELECTED}")
return
self.logger.info(SPLIT_GGUF_TASK_STARTED)
try:
command = [
"llama-gguf-split",
]
if max_size:
command.extend(["--split-max-size", max_size])
if max_tensors:
command.extend(["--split-max-tensors", max_tensors])
command.extend([model_dir, output_dir])
logs_path = self.logs_input.text()
ensure_directory(logs_path)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
log_file = os.path.join(logs_path, f"gguf_split_{timestamp}.log")
thread = QuantizationThread(command, os.getcwd(), log_file)
self.quant_threads.append(thread)
task_name = SPLIT_GGUF_DYNAMIC.format(os.path.basename(model_dir))
task_item = TaskListItem(
task_name,
log_file,
show_progress_bar=False,
logger=self.logger,
quant_threads=self.quant_threads,
)
list_item = QListWidgetItem(self.task_list)
list_item.setSizeHint(task_item.sizeHint())
self.task_list.addItem(list_item)
self.task_list.setItemWidget(list_item, task_item)
thread.status_signal.connect(task_item.update_status)
thread.finished_signal.connect(
lambda: self.task_finished(thread, task_item)
)
thread.error_signal.connect(
lambda err: handle_error(self.logger, err, task_item)
)
thread.start()
except Exception as e:
show_error(self.logger, SPLIT_GGUF_ERROR.format(e))
self.logger.info(SPLIT_GGUF_TASK_FINISHED)
def merge_gguf(self, model_dir: str, output_dir: str) -> None:
if not model_dir or not output_dir:
show_error(self.logger, f"Error merging GGUF: No model selected")
return
self.logger.info("Merge GGUF task started.")
try:
command = ["llama-gguf-split", "--merge"]
command.extend([model_dir, output_dir])
logs_path = self.logs_input.text()
ensure_directory(logs_path)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
log_file = os.path.join(logs_path, f"gguf_merge_{timestamp}.log")
thread = QuantizationThread(command, os.getcwd(), log_file)
self.quant_threads.append(thread)
task_name = "Merging GGUFs {}".format(os.path.basename(model_dir))
task_item = TaskListItem(
task_name,
log_file,
show_progress_bar=False,
logger=self.logger,
quant_threads=self.quant_threads,
)
list_item = QListWidgetItem(self.task_list)
list_item.setSizeHint(task_item.sizeHint())
self.task_list.addItem(list_item)
self.task_list.setItemWidget(list_item, task_item)
thread.status_signal.connect(task_item.update_status)
thread.finished_signal.connect(
lambda: self.task_finished(thread, task_item)
)
thread.error_signal.connect(
lambda err: handle_error(self.logger, err, task_item)
)
thread.start()
except Exception as e:
show_error(self.logger, "Error starting merge GGUF task: {}".format(e))
self.logger.info("Split GGUF task finished.")
def quantize_model(self) -> None:
self.logger.info(STARTING_MODEL_QUANTIZATION)
try:
self.validate_quantization_inputs()
selected_item = self.model_tree.currentItem()
if not selected_item:
raise ValueError(NO_MODEL_SELECTED)
model_file = selected_item.data(0, Qt.ItemDataRole.UserRole)
model_name = selected_item.text(0).replace(" (sharded)", "")
backend_path = self.backend_combo.currentData()
if not backend_path:
raise ValueError(NO_BACKEND_SELECTED)
selected_quant_types = [
item.text() for item in self.quant_type.selectedItems()
]
if not selected_quant_types:
raise ValueError(NO_QUANTIZATION_TYPE_SELECTED)
input_path = os.path.join(self.models_input.text(), model_file)
if not os.path.exists(input_path):
raise FileNotFoundError(INPUT_FILE_NOT_EXIST.format(input_path))
tasks = [] # List to keep track of all tasks
for quant_type in selected_quant_types:
# Start building the output name
output_name_parts = [
os.path.splitext(model_name)[0],
"converted",
quant_type,
]
# Check for output tensor options
if (
self.use_output_tensor_type.isChecked()
or self.leave_output_tensor.isChecked()
):
output_tensor_part = "o"
if self.use_output_tensor_type.isChecked():
output_tensor_part += (
"." + self.output_tensor_type.currentText()
)
output_name_parts.append(output_tensor_part)
# Check for embedding tensor options
if self.use_token_embedding_type.isChecked():
embd_tensor_part = "t." + self.token_embedding_type.currentText()
output_name_parts.append(embd_tensor_part)
# Check for pure option
if self.pure.isChecked():
output_name_parts.append("pure")
# Check for requantize option
if self.allow_requantize.isChecked():
output_name_parts.append("rq")
# Check for KV override
kv_used = bool
if any(
entry.get_override_string() for entry in self.kv_override_entries
):
output_name_parts.append("kv")
kv_used = True
# Join all parts with underscores and add .gguf extension
output_name = "_".join(output_name_parts) + ".gguf"
output_path = os.path.join(self.output_input.text(), output_name)
command = [os.path.join(backend_path, "llama-quantize")]
if self.allow_requantize.isChecked():
command.append("--allow-requantize")
if self.leave_output_tensor.isChecked():
command.append("--leave-output-tensor")
if self.pure.isChecked():
command.append("--pure")
if self.imatrix.text():
command.extend(["--imatrix", self.imatrix.text()])
if self.include_weights.text():
command.extend(["--include-weights", self.include_weights.text()])
if self.exclude_weights.text():
command.extend(["--exclude-weights", self.exclude_weights.text()])
if self.use_output_tensor_type.isChecked():
command.extend(
[
"--output-tensor-type",
self.output_tensor_type.currentText().lower(),
]
)
if self.use_token_embedding_type.isChecked():
command.extend(
[
"--token-embedding-type",
self.token_embedding_type.currentText().lower(),
]
)
if self.keep_split.isChecked():
command.append("--keep-split")
if self.kv_override_entries:
for entry in self.kv_override_entries:
override_string = entry.get_override_string(
model_name=model_name,
quant_type=quant_type,
output_path=output_path,
quantization_parameters=[
kv_used, # If KV overrides are used
self.allow_requantize.isChecked(), # If requantize is used
self.pure.isChecked(), # If pure tensors option is used
self.leave_output_tensor.isChecked(), # If leave output tensor option is used
],
)
if override_string:
command.extend(["--override-kv", override_string])
command.extend([input_path, output_path, quant_type])
# Add extra arguments
if self.extra_arguments.text():
command.extend(self.extra_arguments.text().split())
logs_path = self.logs_input.text()
ensure_directory(logs_path)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
log_file = os.path.join(
logs_path, f"{model_name}_{timestamp}_{quant_type}.log"
)
# Log quant command
command_str = " ".join(command)
self.logger.info(f"{QUANTIZATION_COMMAND}: {command_str}")
thread = QuantizationThread(command, backend_path, log_file)
self.quant_threads.append(thread)
task_item = TaskListItem(
QUANTIZING_MODEL_TO.format(model_name, quant_type),
log_file,
show_properties=True,
logger=self.logger,
)
list_item = QListWidgetItem(self.task_list)
list_item.setSizeHint(task_item.sizeHint())
self.task_list.addItem(list_item)
self.task_list.setItemWidget(list_item, task_item)
tasks.append(
(thread, task_item)
) # Add the thread and task_item to our list
# Connect the output signal to the new progress parsing function
thread.output_signal.connect(
lambda line, ti=task_item: self.parse_progress(line, ti)
)
thread.status_signal.connect(task_item.update_status)
thread.finished_signal.connect(
lambda t=thread, ti=task_item: self.task_finished(t, ti)
)
thread.error_signal.connect(
lambda err, ti=task_item: handle_error(self.logger, err, ti)
)
thread.model_info_signal.connect(self.update_model_info)
# Start all threads after setting them up
for thread, _ in tasks:
thread.start()
self.logger.info(QUANTIZATION_TASK_STARTED.format(model_name))
except ValueError as e:
show_error(self.logger, str(e))
except FileNotFoundError as e:
show_error(self.logger, str(e))
except Exception as e:
show_error(self.logger, ERROR_STARTING_QUANTIZATION.format(str(e)))
def task_finished(self, thread, task_item) -> None:
self.logger.info(TASK_FINISHED.format(thread.log_file))
if thread in self.quant_threads:
self.quant_threads.remove(thread)
task_item.update_status(COMPLETED)
self.setAttribute(Qt.WA_WindowModified, True) # Set modified flag
def show_task_details(self, item) -> None:
self.logger.debug(SHOWING_TASK_DETAILS_FOR.format(item.text()))
task_item = self.task_list.itemWidget(item)
if task_item:
log_dialog = QDialog(self)
log_dialog.setWindowTitle(LOG_FOR.format(task_item.task_name))
log_dialog.setGeometry(200, 200, 800, 600)
log_text = QPlainTextEdit()
log_text.setReadOnly(True)
layout = QVBoxLayout()
layout.addWidget(log_text)
log_dialog.setLayout(layout)
# Load existing content
if os.path.exists(task_item.log_file):
with open_file_safe(task_item.log_file, "r") as f:
log_text.setPlainText(f.read())
# Connect to the thread if it's still running
for thread in self.quant_threads:
if thread.log_file == task_item.log_file:
thread.output_signal.connect(log_text.appendPlainText)
break
log_dialog.exec()
def import_model(self) -> None:
self.logger.info(IMPORTING_MODEL)
file_path, _ = QFileDialog.getOpenFileName(
self, SELECT_MODEL_TO_IMPORT, "", GGUF_FILES
)
if file_path:
file_name = os.path.basename(file_path)
# Verify GGUF file
if not verify_gguf(file_path):
show_error(self.logger, INVALID_GGUF_FILE.format(file_name))
return
reply = QMessageBox.question(
self,
CONFIRM_IMPORT,
IMPORT_MODEL_CONFIRMATION.format(file_name),
QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No,
QMessageBox.StandardButton.No,
)
if reply == QMessageBox.StandardButton.Yes:
self.imported_models.append(file_path)
self.load_models()
self.logger.info(MODEL_IMPORTED_SUCCESSFULLY.format(file_name))
@validate_input(
"imatrix_model", "imatrix_datafile", "imatrix_model", "imatrix_output"
)
def generate_imatrix(self) -> None:
self.logger.info(STARTING_IMATRIX_GENERATION)
try:
backend_path = self.backend_combo.currentData()
if not os.path.exists(backend_path):
raise FileNotFoundError(BACKEND_PATH_NOT_EXIST.format(backend_path))
# Check if the Model area is empty
if not self.imatrix_model.text():
raise ValueError(MODEL_PATH_REQUIRED_FOR_IMATRIX)
command = [
os.path.join(backend_path, "llama-imatrix"),
"-f",
self.imatrix_datafile.text(),
"-m",
self.imatrix_model.text(),
"-o",
self.imatrix_output.text(),
"--output-frequency",
str(self.imatrix_frequency.value()),
"--ctx-size",
str(self.imatrix_ctx_size.value()),
"--threads",
str(self.threads_spinbox.value()),
]
if self.gpu_offload_auto.isChecked():
command.extend(["-ngl", "99"])
elif self.gpu_offload_spinbox.value() > 0:
command.extend(["-ngl", str(self.gpu_offload_spinbox.value())])
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
log_file = os.path.join(self.logs_input.text(), f"imatrix_{timestamp}.log")
# Log command
command_str = " ".join(command)
self.logger.info(f"{IMATRIX_GENERATION_COMMAND}: {command_str}")
thread = QuantizationThread(command, backend_path, log_file)
self.quant_threads.append(thread)
task_name = GENERATING_IMATRIX_FOR.format(
os.path.basename(self.imatrix_model.text())
)
task_item = TaskListItem(
task_name,
log_file,
show_progress_bar=True,
logger=self.logger,
quant_threads=self.quant_threads,
)
list_item = QListWidgetItem(self.task_list)
list_item.setSizeHint(task_item.sizeHint())
self.task_list.addItem(list_item)
self.task_list.setItemWidget(list_item, task_item)
imatrix_chunks = None
thread.status_signal.connect(task_item.update_status)
thread.output_signal.connect(
lambda line, ti=task_item: self.parse_progress(line, ti, imatrix_chunks)
)
thread.finished_signal.connect(
lambda: self.task_finished(thread, task_item)
)
thread.error_signal.connect(
lambda err: handle_error(self.logger, err, task_item)
)
thread.start()
except Exception as e:
show_error(self.logger, ERROR_STARTING_IMATRIX_GENERATION.format(str(e)))
self.logger.info(IMATRIX_GENERATION_TASK_STARTED)
def closeEvent(self, event: QCloseEvent) -> None:
self.logger.info(APPLICATION_CLOSING)
if self.quant_threads:
reply = QMessageBox.question(
self,
WARNING,
TASK_RUNNING_WARNING,
QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No,
QMessageBox.StandardButton.No,
)
if reply == QMessageBox.StandardButton.Yes:
for thread in self.quant_threads:
thread.terminate()
event.accept()
else:
event.ignore()
else:
event.accept()
self.logger.info(APPLICATION_CLOSED)