fix kv load bug+input validaton, add imatrix options

imatrix and ui changes, bugfix code
This commit is contained in:
BuildTools 2024-08-03 21:59:59 -07:00
parent 00660727d2
commit 294fe6ea5b
3 changed files with 174 additions and 34 deletions

View File

@ -215,6 +215,10 @@ def __init__(self):
quant_options_scroll.setWidgetResizable(True) quant_options_scroll.setWidgetResizable(True)
left_layout.addWidget(quant_options_scroll) left_layout.addWidget(quant_options_scroll)
# Add this after the KV override section
self.extra_arguments = QLineEdit()
quant_options_layout.addRow(self.create_label(EXTRA_ARGUMENTS, "Additional command-line arguments"), self.extra_arguments)
# Quantize button layout # Quantize button layout
quantize_layout = QHBoxLayout() quantize_layout = QHBoxLayout()
quantize_button = QPushButton(QUANTIZE_MODEL) quantize_button = QPushButton(QUANTIZE_MODEL)
@ -263,10 +267,33 @@ def __init__(self):
imatrix_output_layout.addWidget(self.imatrix_output_button) imatrix_output_layout.addWidget(self.imatrix_output_button)
imatrix_layout.addRow(self.create_label(OUTPUT, OUTPUT_PATH_FOR_GENERATED_IMATRIX), imatrix_output_layout) imatrix_layout.addRow(self.create_label(OUTPUT, OUTPUT_PATH_FOR_GENERATED_IMATRIX), imatrix_output_layout)
self.imatrix_frequency = QLineEdit() self.imatrix_frequency = QSpinBox()
self.imatrix_frequency.setRange(1, 100) # Set the range from 1 to 100
self.imatrix_frequency.setValue(1) # Set a default value
imatrix_layout.addRow(self.create_label(OUTPUT_FREQUENCY, HOW_OFTEN_TO_SAVE_IMATRIX), self.imatrix_frequency) imatrix_layout.addRow(self.create_label(OUTPUT_FREQUENCY, HOW_OFTEN_TO_SAVE_IMATRIX), self.imatrix_frequency)
# GPU Offload for IMatrix # Context size input (now a spinbox)
self.imatrix_ctx_size = QSpinBox()
self.imatrix_ctx_size.setRange(1, 1048576) # Up to one million tokens
self.imatrix_ctx_size.setValue(512) # Set a default value
imatrix_layout.addRow(self.create_label(CONTEXT_SIZE, CONTEXT_SIZE_FOR_IMATRIX), self.imatrix_ctx_size)
# Threads input with slider and spinbox
threads_layout = QHBoxLayout()
self.threads_slider = QSlider(Qt.Orientation.Horizontal)
self.threads_slider.setRange(1, 64)
self.threads_slider.valueChanged.connect(self.update_threads_spinbox)
self.threads_spinbox = QSpinBox()
self.threads_spinbox.setRange(1, 128)
self.threads_spinbox.valueChanged.connect(self.update_threads_slider)
self.threads_spinbox.setMinimumWidth(75)
threads_layout.addWidget(self.threads_slider)
threads_layout.addWidget(self.threads_spinbox)
imatrix_layout.addRow(self.create_label(THREADS, NUMBER_OF_THREADS_FOR_IMATRIX), threads_layout)
# GPU Offload for IMatrix (corrected version)
gpu_offload_layout = QHBoxLayout() gpu_offload_layout = QHBoxLayout()
self.gpu_offload_slider = QSlider(Qt.Orientation.Horizontal) self.gpu_offload_slider = QSlider(Qt.Orientation.Horizontal)
self.gpu_offload_slider.setRange(0, 200) self.gpu_offload_slider.setRange(0, 200)
@ -275,7 +302,7 @@ def __init__(self):
self.gpu_offload_spinbox = QSpinBox() self.gpu_offload_spinbox = QSpinBox()
self.gpu_offload_spinbox.setRange(0, 1000) self.gpu_offload_spinbox.setRange(0, 1000)
self.gpu_offload_spinbox.valueChanged.connect(self.update_gpu_offload_slider) self.gpu_offload_spinbox.valueChanged.connect(self.update_gpu_offload_slider)
self.gpu_offload_spinbox.setMinimumWidth(75) # Set the minimum width to 75 pixels self.gpu_offload_spinbox.setMinimumWidth(75)
self.gpu_offload_auto = QCheckBox(AUTO) self.gpu_offload_auto = QCheckBox(AUTO)
self.gpu_offload_auto.stateChanged.connect(self.toggle_gpu_offload_auto) self.gpu_offload_auto.stateChanged.connect(self.toggle_gpu_offload_auto)
@ -349,7 +376,8 @@ def save_preset(self):
"use_token_embedding_type": self.use_token_embedding_type.isChecked(), "use_token_embedding_type": self.use_token_embedding_type.isChecked(),
"token_embedding_type": self.token_embedding_type.currentText(), "token_embedding_type": self.token_embedding_type.currentText(),
"keep_split": self.keep_split.isChecked(), "keep_split": self.keep_split.isChecked(),
"kv_overrides": [entry.get_override_string() for entry in self.kv_override_entries] "kv_overrides": [entry.get_override_string() for entry in self.kv_override_entries],
"extra_arguments": self.extra_arguments.text()
} }
file_name, _ = QFileDialog.getSaveFileName(self, SAVE_PRESET, "", JSON_FILES) file_name, _ = QFileDialog.getSaveFileName(self, SAVE_PRESET, "", JSON_FILES)
@ -379,6 +407,7 @@ def load_preset(self):
self.use_token_embedding_type.setChecked(preset.get("use_token_embedding_type", False)) self.use_token_embedding_type.setChecked(preset.get("use_token_embedding_type", False))
self.token_embedding_type.setCurrentText(preset.get("token_embedding_type", "")) self.token_embedding_type.setCurrentText(preset.get("token_embedding_type", ""))
self.keep_split.setChecked(preset.get("keep_split", False)) self.keep_split.setChecked(preset.get("keep_split", False))
self.extra_arguments.setText(preset.get("extra_arguments", ""))
# Clear existing KV overrides and add new ones # Clear existing KV overrides and add new ones
for entry in self.kv_override_entries: for entry in self.kv_override_entries:
@ -391,19 +420,6 @@ def load_preset(self):
QMessageBox.critical(self, ERROR, FAILED_TO_LOAD_PRESET.format(str(e))) QMessageBox.critical(self, ERROR, FAILED_TO_LOAD_PRESET.format(str(e)))
self.logger.info(PRESET_LOADED_FROM.format(file_name)) self.logger.info(PRESET_LOADED_FROM.format(file_name))
def add_kv_override(self, override_string=None):
self.logger.debug(ADDING_KV_OVERRIDE.format(override_string))
entry = KVOverrideEntry()
entry.deleted.connect(self.remove_kv_override)
if override_string:
key, value = override_string.split('=')
type_, val = value.split(':')
entry.key_input.setText(key)
entry.type_combo.setCurrentText(type_)
entry.value_input.setText(val)
self.kv_override_layout.addWidget(entry)
self.kv_override_entries.append(entry)
def save_task_preset(self, task_item): def save_task_preset(self, task_item):
self.logger.info(SAVING_TASK_PRESET.format(task_item.task_name)) self.logger.info(SAVING_TASK_PRESET.format(task_item.task_name))
for thread in self.quant_threads: for thread in self.quant_threads:
@ -611,6 +627,23 @@ def show_task_properties(self, item):
model_info_dialog.exec() model_info_dialog.exec()
break break
def update_threads_spinbox(self, value):
self.threads_spinbox.setValue(value)
def update_threads_slider(self, value):
self.threads_slider.setValue(value)
def update_gpu_offload_spinbox(self, value):
self.gpu_offload_spinbox.setValue(value)
def update_gpu_offload_slider(self, value):
self.gpu_offload_slider.setValue(value)
def toggle_gpu_offload_auto(self, state):
is_auto = state == Qt.CheckState.Checked
self.gpu_offload_slider.setEnabled(not is_auto)
self.gpu_offload_spinbox.setEnabled(not is_auto)
def cancel_task(self, item): def cancel_task(self, item):
self.logger.info(CANCELLING_TASK.format(item.text())) self.logger.info(CANCELLING_TASK.format(item.text()))
task_item = self.task_list.itemWidget(item) task_item = self.task_list.itemWidget(item)
@ -710,9 +743,15 @@ def validate_quantization_inputs(self):
if errors: if errors:
raise ValueError("\n".join(errors)) raise ValueError("\n".join(errors))
def add_kv_override(self): def add_kv_override(self, override_string=None):
entry = KVOverrideEntry() entry = KVOverrideEntry()
entry.deleted.connect(self.remove_kv_override) entry.deleted.connect(self.remove_kv_override)
if override_string:
key, value = override_string.split('=')
type_, val = value.split(':')
entry.key_input.setText(key)
entry.type_combo.setCurrentText(type_)
entry.value_input.setText(val)
self.kv_override_layout.addWidget(entry) self.kv_override_layout.addWidget(entry)
self.kv_override_entries.append(entry) self.kv_override_entries.append(entry)
@ -736,9 +775,40 @@ def quantize_model(self):
quant_type = self.quant_type.currentText() quant_type = self.quant_type.currentText()
input_path = os.path.join(self.models_input.text(), model_name) input_path = os.path.join(self.models_input.text(), model_name)
output_name = f"{os.path.splitext(model_name)[0]}_{quant_type}.gguf" model_name = selected_model.text()
output_path = os.path.join(self.output_input.text(), output_name) quant_type = self.quant_type.currentText()
# Start building the output name
output_name_parts = [os.path.splitext(model_name)[0], "converted", quant_type]
# Check for output tensor options
if self.use_output_tensor_type.isChecked() or self.leave_output_tensor.isChecked():
output_tensor_part = "o"
if self.use_output_tensor_type.isChecked():
output_tensor_part += "." + self.output_tensor_type.currentText()
output_name_parts.append(output_tensor_part)
# Check for embedding tensor options
if self.use_token_embedding_type.isChecked():
embd_tensor_part = "t." + self.token_embedding_type.currentText()
output_name_parts.append(embd_tensor_part)
# Check for pure option
if self.pure.isChecked():
output_name_parts.append("pure")
# Check for requantize option
if self.allow_requantize.isChecked():
output_name_parts.append("rq")
# Check for KV override
if any(entry.get_override_string() for entry in self.kv_override_entries):
output_name_parts.append("kv")
# Join all parts with underscores and add .gguf extension
output_name = "_".join(output_name_parts) + ".gguf"
output_path = os.path.join(self.output_input.text(), output_name)
if not os.path.exists(input_path): if not os.path.exists(input_path):
raise FileNotFoundError(INPUT_FILE_NOT_EXIST.format(input_path)) raise FileNotFoundError(INPUT_FILE_NOT_EXIST.format(input_path))
@ -770,6 +840,10 @@ def quantize_model(self):
command.extend([input_path, output_path, quant_type]) command.extend([input_path, output_path, quant_type])
# Add extra arguments
if self.extra_arguments.text():
command.extend(self.extra_arguments.text().split())
logs_path = self.logs_input.text() logs_path = self.logs_input.text()
ensure_directory(logs_path) ensure_directory(logs_path)
@ -875,7 +949,9 @@ def generate_imatrix(self):
"-f", self.imatrix_datafile.text(), "-f", self.imatrix_datafile.text(),
"-m", self.imatrix_model.text(), "-m", self.imatrix_model.text(),
"-o", self.imatrix_output.text(), "-o", self.imatrix_output.text(),
"--output-frequency", self.imatrix_frequency.text() "--output-frequency", str(self.imatrix_frequency.value()),
"--ctx-size", str(self.imatrix_ctx_size.value()),
"--threads", str(self.threads_spinbox.value())
] ]
if self.gpu_offload_auto.isChecked(): if self.gpu_offload_auto.isChecked():

View File

@ -1,5 +1,6 @@
from PyQt6.QtWidgets import QWidget, QHBoxLayout, QLineEdit, QComboBox, QPushButton from PyQt6.QtWidgets import QWidget, QHBoxLayout, QLineEdit, QComboBox, QPushButton
from PyQt6.QtCore import pyqtSignal from PyQt6.QtCore import pyqtSignal, QRegularExpression
from PyQt6.QtGui import QDoubleValidator, QIntValidator, QRegularExpressionValidator
class KVOverrideEntry(QWidget): class KVOverrideEntry(QWidget):
deleted = pyqtSignal(QWidget) deleted = pyqtSignal(QWidget)
@ -11,6 +12,9 @@ def __init__(self, parent=None):
self.key_input = QLineEdit() self.key_input = QLineEdit()
self.key_input.setPlaceholderText("Key") self.key_input.setPlaceholderText("Key")
# Set validator for key input (letters and dots only)
key_validator = QRegularExpressionValidator(QRegularExpression(r'[A-Za-z.]+'))
self.key_input.setValidator(key_validator)
layout.addWidget(self.key_input) layout.addWidget(self.key_input)
self.type_combo = QComboBox() self.type_combo = QComboBox()
@ -26,8 +30,22 @@ def __init__(self, parent=None):
delete_button.clicked.connect(self.delete_clicked) delete_button.clicked.connect(self.delete_clicked)
layout.addWidget(delete_button) layout.addWidget(delete_button)
# Connect type change to validator update
self.type_combo.currentTextChanged.connect(self.update_validator)
# Initialize validator
self.update_validator(self.type_combo.currentText())
def delete_clicked(self): def delete_clicked(self):
self.deleted.emit(self) self.deleted.emit(self)
def get_override_string(self): def get_override_string(self):
return f"{self.key_input.text()}={self.type_combo.currentText()}:{self.value_input.text()}" return f"{self.key_input.text()}={self.type_combo.currentText()}:{self.value_input.text()}"
def update_validator(self, type_):
if type_ == "int":
self.value_input.setValidator(QIntValidator())
elif type_ == "float":
self.value_input.setValidator(QDoubleValidator())
else: # str
self.value_input.setValidator(None)

View File

@ -164,6 +164,12 @@ def __init__(self):
self.SET_GPU_OFFLOAD_VALUE = "" self.SET_GPU_OFFLOAD_VALUE = ""
self.COMPLETED = "" self.COMPLETED = ""
self.REFRESH_MODELS = "" self.REFRESH_MODELS = ""
self.EXTRA_ARGUMENTS = ""
self.EXTRA_ARGUMENTS_LABEL = ""
self.CONTEXT_SIZE = ""
self.CONTEXT_SIZE_FOR_IMATRIX = ""
self.THREADS = ""
self.NUMBER_OF_THREADS_FOR_IMATRIX = ""
class _English(_Localization): class _English(_Localization):
def __init__(self): def __init__(self):
@ -330,6 +336,13 @@ def __init__(self):
self.SET_GPU_OFFLOAD_VALUE = "Set GPU offload value (-ngl)" self.SET_GPU_OFFLOAD_VALUE = "Set GPU offload value (-ngl)"
self.COMPLETED = "Completed" self.COMPLETED = "Completed"
self.REFRESH_MODELS = "Refresh Models" self.REFRESH_MODELS = "Refresh Models"
self.EXTRA_ARGUMENTS = "Extra Arguments:"
self.EXTRA_ARGUMENTS_LABEL = "Additional command-line arguments"
self.CONTEXT_SIZE = "Context Size:"
self.CONTEXT_SIZE_FOR_IMATRIX = "Context size for IMatrix generation"
self.THREADS = "Threads:"
self.NUMBER_OF_THREADS_FOR_IMATRIX = "Number of threads for IMatrix generation"
class _French: class _French:
# French localization # French localization
def __init__(self): def __init__(self):
@ -4889,12 +4902,45 @@ def __init__(self):
} }
def set_language(lang_code): def set_language(lang_code):
globallobals
global WINDOW_TITLE, RAM_USAGE, CPU_USAGE, BACKEND, REFRESH_BACKENDS, MODELS_PATH, OUTPUT_PATH, LOGS_PATH
global BROWSE, AVAILABLE_MODELS, QUANTIZATION_TYPE, ALLOW_REQUANTIZE, LEAVE_OUTPUT_TENSOR, PURE, IMATRIX
global INCLUDE_WEIGHTS, EXCLUDE_WEIGHTS, USE_OUTPUT_TENSOR_TYPE, USE_TOKEN_EMBEDDING_TYPE, KEEP_SPLIT
global KV_OVERRIDES, ADD_NEW_OVERRIDE, QUANTIZE_MODEL, SAVE_PRESET, LOAD_PRESET, TASKS, DOWNLOAD_LLAMACPP
global SELECT_RELEASE, SELECT_ASSET, EXTRACT_CUDA_FILES, SELECT_CUDA_BACKEND, DOWNLOAD, IMATRIX_GENERATION
global DATA_FILE, MODEL, OUTPUT, OUTPUT_FREQUENCY, GPU_OFFLOAD, AUTO, GENERATE_IMATRIX, ERROR, WARNING
global PROPERTIES, CANCEL, RESTART, DELETE, CONFIRM_DELETION, TASK_RUNNING_WARNING, YES, NO, DOWNLOAD_COMPLETE
global CUDA_EXTRACTION_FAILED, PRESET_SAVED, PRESET_LOADED, NO_ASSET_SELECTED, DOWNLOAD_FAILED, NO_BACKEND_SELECTED
global NO_MODEL_SELECTED, REFRESH_RELEASES, NO_SUITABLE_CUDA_BACKENDS, LLAMACPP_DOWNLOADED_EXTRACTED, CUDA_FILES_EXTRACTED
global NO_SUITABLE_CUDA_BACKEND_EXTRACTION, ERROR_FETCHING_RELEASES, CONFIRM_DELETION_TITLE, LOG_FOR, ALL_FILES
global GGUF_FILES, DAT_FILES, JSON_FILES, FAILED_LOAD_PRESET, INITIALIZING_AUTOGGUF, AUTOGGUF_INITIALIZATION_COMPLETE
global REFRESHING_BACKENDS, NO_BACKENDS_AVAILABLE, FOUND_VALID_BACKENDS, SAVING_PRESET, PRESET_SAVED_TO, LOADING_PRESET
global PRESET_LOADED_FROM, ADDING_KV_OVERRIDE, SAVING_TASK_PRESET, TASK_PRESET_SAVED, TASK_PRESET_SAVED_TO, RESTARTING_TASK
global IN_PROGRESS, DOWNLOAD_FINISHED_EXTRACTED_TO, LLAMACPP_DOWNLOADED_AND_EXTRACTED, NO_SUITABLE_CUDA_BACKEND_FOUND
global LLAMACPP_BINARY_DOWNLOADED_AND_EXTRACTED, REFRESHING_LLAMACPP_RELEASES, UPDATING_ASSET_LIST, UPDATING_CUDA_OPTIONS
global STARTING_LLAMACPP_DOWNLOAD, UPDATING_CUDA_BACKENDS, NO_CUDA_BACKEND_SELECTED, EXTRACTING_CUDA_FILES, DOWNLOAD_ERROR
global SHOWING_TASK_CONTEXT_MENU, SHOWING_PROPERTIES_FOR_TASK, CANCELLING_TASK, CANCELED, DELETING_TASK, LOADING_MODELS, LOADED_MODELS
global BROWSING_FOR_MODELS_DIRECTORY, SELECT_MODELS_DIRECTORY, BROWSING_FOR_OUTPUT_DIRECTORY, SELECT_OUTPUT_DIRECTORY
global BROWSING_FOR_LOGS_DIRECTORY, SELECT_LOGS_DIRECTORY, BROWSING_FOR_IMATRIX_FILE, SELECT_IMATRIX_FILE, RAM_USAGE_FORMAT
global CPU_USAGE_FORMAT, VALIDATING_QUANTIZATION_INPUTS, MODELS_PATH_REQUIRED, OUTPUT_PATH_REQUIRED, LOGS_PATH_REQUIRED
global STARTING_MODEL_QUANTIZATION, INPUT_FILE_NOT_EXIST, QUANTIZING_MODEL_TO, QUANTIZATION_TASK_STARTED, ERROR_STARTING_QUANTIZATION
global UPDATING_MODEL_INFO, TASK_FINISHED, SHOWING_TASK_DETAILS_FOR, BROWSING_FOR_IMATRIX_DATA_FILE, SELECT_DATA_FILE
global BROWSING_FOR_IMATRIX_MODEL_FILE, SELECT_MODEL_FILE, BROWSING_FOR_IMATRIX_OUTPUT_FILE, SELECT_OUTPUT_FILE
global STARTING_IMATRIX_GENERATION, BACKEND_PATH_NOT_EXIST, GENERATING_IMATRIX, ERROR_STARTING_IMATRIX_GENERATION
global IMATRIX_GENERATION_TASK_STARTED, ERROR_MESSAGE, TASK_ERROR, APPLICATION_CLOSING, APPLICATION_CLOSED, SELECT_QUANTIZATION_TYPE
global ALLOWS_REQUANTIZING, LEAVE_OUTPUT_WEIGHT, DISABLE_K_QUANT_MIXTURES, USE_DATA_AS_IMPORTANCE_MATRIX, USE_IMPORTANCE_MATRIX_FOR_TENSORS
global DONT_USE_IMPORTANCE_MATRIX_FOR_TENSORS, OUTPUT_TENSOR_TYPE, USE_THIS_TYPE_FOR_OUTPUT_WEIGHT, TOKEN_EMBEDDING_TYPE, USE_THIS_TYPE_FOR_TOKEN_EMBEDDINGS
global WILL_GENERATE_QUANTIZED_MODEL_IN_SAME_SHARDS, OVERRIDE_MODEL_METADATA, INPUT_DATA_FILE_FOR_IMATRIX, MODEL_TO_BE_QUANTIZED
global OUTPUT_PATH_FOR_GENERATED_IMATRIX, HOW_OFTEN_TO_SAVE_IMATRIX, SET_GPU_OFFLOAD_VALUE, COMPLETED, REFRESH_MODELS
global CONTEXT_SIZE, CONTEXT_SIZE_FOR_IMATRIX, THREADS, NUMBER_OF_THREADS_FOR_IMATRIX, EXTRA_ARGUMENTS, EXTRA_ARGUMENTS_LABEL
loc = _languages.get(lang_code, _English)() loc = _languages.get(lang_code, _English)()
for key, value in loc.__dict__.items(): english_loc = _English() # Create an instance of English localization for fallback
for key in dir(english_loc):
if not key.startswith('_'): if not key.startswith('_'):
globals()[key] = value globals()[key] = getattr(loc, key, getattr(english_loc, key))
# Get the language from the AUTOGGUF_LANGUAGE environment variable, default to 'en' # Get the language from the AUTOGGUF_LANGUAGE environment variable, default to 'en'
language_code = os.getenv('AUTOGGUF_LANGUAGE', 'en-US') language_code = os.getenv('AUTOGGUF_LANGUAGE', 'en-US')