fix kv load bug+input validaton, add imatrix options

imatrix and ui changes, bugfix code
This commit is contained in:
BuildTools 2024-08-03 21:59:59 -07:00
parent 00660727d2
commit 294fe6ea5b
3 changed files with 174 additions and 34 deletions

View File

@ -215,6 +215,10 @@ def __init__(self):
quant_options_scroll.setWidgetResizable(True)
left_layout.addWidget(quant_options_scroll)
# Add this after the KV override section
self.extra_arguments = QLineEdit()
quant_options_layout.addRow(self.create_label(EXTRA_ARGUMENTS, "Additional command-line arguments"), self.extra_arguments)
# Quantize button layout
quantize_layout = QHBoxLayout()
quantize_button = QPushButton(QUANTIZE_MODEL)
@ -263,10 +267,33 @@ def __init__(self):
imatrix_output_layout.addWidget(self.imatrix_output_button)
imatrix_layout.addRow(self.create_label(OUTPUT, OUTPUT_PATH_FOR_GENERATED_IMATRIX), imatrix_output_layout)
self.imatrix_frequency = QLineEdit()
self.imatrix_frequency = QSpinBox()
self.imatrix_frequency.setRange(1, 100) # Set the range from 1 to 100
self.imatrix_frequency.setValue(1) # Set a default value
imatrix_layout.addRow(self.create_label(OUTPUT_FREQUENCY, HOW_OFTEN_TO_SAVE_IMATRIX), self.imatrix_frequency)
# GPU Offload for IMatrix
# Context size input (now a spinbox)
self.imatrix_ctx_size = QSpinBox()
self.imatrix_ctx_size.setRange(1, 1048576) # Up to one million tokens
self.imatrix_ctx_size.setValue(512) # Set a default value
imatrix_layout.addRow(self.create_label(CONTEXT_SIZE, CONTEXT_SIZE_FOR_IMATRIX), self.imatrix_ctx_size)
# Threads input with slider and spinbox
threads_layout = QHBoxLayout()
self.threads_slider = QSlider(Qt.Orientation.Horizontal)
self.threads_slider.setRange(1, 64)
self.threads_slider.valueChanged.connect(self.update_threads_spinbox)
self.threads_spinbox = QSpinBox()
self.threads_spinbox.setRange(1, 128)
self.threads_spinbox.valueChanged.connect(self.update_threads_slider)
self.threads_spinbox.setMinimumWidth(75)
threads_layout.addWidget(self.threads_slider)
threads_layout.addWidget(self.threads_spinbox)
imatrix_layout.addRow(self.create_label(THREADS, NUMBER_OF_THREADS_FOR_IMATRIX), threads_layout)
# GPU Offload for IMatrix (corrected version)
gpu_offload_layout = QHBoxLayout()
self.gpu_offload_slider = QSlider(Qt.Orientation.Horizontal)
self.gpu_offload_slider.setRange(0, 200)
@ -275,7 +302,7 @@ def __init__(self):
self.gpu_offload_spinbox = QSpinBox()
self.gpu_offload_spinbox.setRange(0, 1000)
self.gpu_offload_spinbox.valueChanged.connect(self.update_gpu_offload_slider)
self.gpu_offload_spinbox.setMinimumWidth(75) # Set the minimum width to 75 pixels
self.gpu_offload_spinbox.setMinimumWidth(75)
self.gpu_offload_auto = QCheckBox(AUTO)
self.gpu_offload_auto.stateChanged.connect(self.toggle_gpu_offload_auto)
@ -349,7 +376,8 @@ def save_preset(self):
"use_token_embedding_type": self.use_token_embedding_type.isChecked(),
"token_embedding_type": self.token_embedding_type.currentText(),
"keep_split": self.keep_split.isChecked(),
"kv_overrides": [entry.get_override_string() for entry in self.kv_override_entries]
"kv_overrides": [entry.get_override_string() for entry in self.kv_override_entries],
"extra_arguments": self.extra_arguments.text()
}
file_name, _ = QFileDialog.getSaveFileName(self, SAVE_PRESET, "", JSON_FILES)
@ -379,6 +407,7 @@ def load_preset(self):
self.use_token_embedding_type.setChecked(preset.get("use_token_embedding_type", False))
self.token_embedding_type.setCurrentText(preset.get("token_embedding_type", ""))
self.keep_split.setChecked(preset.get("keep_split", False))
self.extra_arguments.setText(preset.get("extra_arguments", ""))
# Clear existing KV overrides and add new ones
for entry in self.kv_override_entries:
@ -391,19 +420,6 @@ def load_preset(self):
QMessageBox.critical(self, ERROR, FAILED_TO_LOAD_PRESET.format(str(e)))
self.logger.info(PRESET_LOADED_FROM.format(file_name))
def add_kv_override(self, override_string=None):
self.logger.debug(ADDING_KV_OVERRIDE.format(override_string))
entry = KVOverrideEntry()
entry.deleted.connect(self.remove_kv_override)
if override_string:
key, value = override_string.split('=')
type_, val = value.split(':')
entry.key_input.setText(key)
entry.type_combo.setCurrentText(type_)
entry.value_input.setText(val)
self.kv_override_layout.addWidget(entry)
self.kv_override_entries.append(entry)
def save_task_preset(self, task_item):
self.logger.info(SAVING_TASK_PRESET.format(task_item.task_name))
for thread in self.quant_threads:
@ -611,6 +627,23 @@ def show_task_properties(self, item):
model_info_dialog.exec()
break
def update_threads_spinbox(self, value):
self.threads_spinbox.setValue(value)
def update_threads_slider(self, value):
self.threads_slider.setValue(value)
def update_gpu_offload_spinbox(self, value):
self.gpu_offload_spinbox.setValue(value)
def update_gpu_offload_slider(self, value):
self.gpu_offload_slider.setValue(value)
def toggle_gpu_offload_auto(self, state):
is_auto = state == Qt.CheckState.Checked
self.gpu_offload_slider.setEnabled(not is_auto)
self.gpu_offload_spinbox.setEnabled(not is_auto)
def cancel_task(self, item):
self.logger.info(CANCELLING_TASK.format(item.text()))
task_item = self.task_list.itemWidget(item)
@ -710,9 +743,15 @@ def validate_quantization_inputs(self):
if errors:
raise ValueError("\n".join(errors))
def add_kv_override(self):
def add_kv_override(self, override_string=None):
entry = KVOverrideEntry()
entry.deleted.connect(self.remove_kv_override)
if override_string:
key, value = override_string.split('=')
type_, val = value.split(':')
entry.key_input.setText(key)
entry.type_combo.setCurrentText(type_)
entry.value_input.setText(val)
self.kv_override_layout.addWidget(entry)
self.kv_override_entries.append(entry)
@ -736,9 +775,40 @@ def quantize_model(self):
quant_type = self.quant_type.currentText()
input_path = os.path.join(self.models_input.text(), model_name)
output_name = f"{os.path.splitext(model_name)[0]}_{quant_type}.gguf"
output_path = os.path.join(self.output_input.text(), output_name)
model_name = selected_model.text()
quant_type = self.quant_type.currentText()
# Start building the output name
output_name_parts = [os.path.splitext(model_name)[0], "converted", quant_type]
# Check for output tensor options
if self.use_output_tensor_type.isChecked() or self.leave_output_tensor.isChecked():
output_tensor_part = "o"
if self.use_output_tensor_type.isChecked():
output_tensor_part += "." + self.output_tensor_type.currentText()
output_name_parts.append(output_tensor_part)
# Check for embedding tensor options
if self.use_token_embedding_type.isChecked():
embd_tensor_part = "t." + self.token_embedding_type.currentText()
output_name_parts.append(embd_tensor_part)
# Check for pure option
if self.pure.isChecked():
output_name_parts.append("pure")
# Check for requantize option
if self.allow_requantize.isChecked():
output_name_parts.append("rq")
# Check for KV override
if any(entry.get_override_string() for entry in self.kv_override_entries):
output_name_parts.append("kv")
# Join all parts with underscores and add .gguf extension
output_name = "_".join(output_name_parts) + ".gguf"
output_path = os.path.join(self.output_input.text(), output_name)
if not os.path.exists(input_path):
raise FileNotFoundError(INPUT_FILE_NOT_EXIST.format(input_path))
@ -770,6 +840,10 @@ def quantize_model(self):
command.extend([input_path, output_path, quant_type])
# Add extra arguments
if self.extra_arguments.text():
command.extend(self.extra_arguments.text().split())
logs_path = self.logs_input.text()
ensure_directory(logs_path)
@ -875,7 +949,9 @@ def generate_imatrix(self):
"-f", self.imatrix_datafile.text(),
"-m", self.imatrix_model.text(),
"-o", self.imatrix_output.text(),
"--output-frequency", self.imatrix_frequency.text()
"--output-frequency", str(self.imatrix_frequency.value()),
"--ctx-size", str(self.imatrix_ctx_size.value()),
"--threads", str(self.threads_spinbox.value())
]
if self.gpu_offload_auto.isChecked():

View File

@ -1,5 +1,6 @@
from PyQt6.QtWidgets import QWidget, QHBoxLayout, QLineEdit, QComboBox, QPushButton
from PyQt6.QtCore import pyqtSignal
from PyQt6.QtCore import pyqtSignal, QRegularExpression
from PyQt6.QtGui import QDoubleValidator, QIntValidator, QRegularExpressionValidator
class KVOverrideEntry(QWidget):
deleted = pyqtSignal(QWidget)
@ -11,6 +12,9 @@ def __init__(self, parent=None):
self.key_input = QLineEdit()
self.key_input.setPlaceholderText("Key")
# Set validator for key input (letters and dots only)
key_validator = QRegularExpressionValidator(QRegularExpression(r'[A-Za-z.]+'))
self.key_input.setValidator(key_validator)
layout.addWidget(self.key_input)
self.type_combo = QComboBox()
@ -26,8 +30,22 @@ def __init__(self, parent=None):
delete_button.clicked.connect(self.delete_clicked)
layout.addWidget(delete_button)
# Connect type change to validator update
self.type_combo.currentTextChanged.connect(self.update_validator)
# Initialize validator
self.update_validator(self.type_combo.currentText())
def delete_clicked(self):
self.deleted.emit(self)
def get_override_string(self):
return f"{self.key_input.text()}={self.type_combo.currentText()}:{self.value_input.text()}"
def update_validator(self, type_):
if type_ == "int":
self.value_input.setValidator(QIntValidator())
elif type_ == "float":
self.value_input.setValidator(QDoubleValidator())
else: # str
self.value_input.setValidator(None)

View File

@ -164,6 +164,12 @@ def __init__(self):
self.SET_GPU_OFFLOAD_VALUE = ""
self.COMPLETED = ""
self.REFRESH_MODELS = ""
self.EXTRA_ARGUMENTS = ""
self.EXTRA_ARGUMENTS_LABEL = ""
self.CONTEXT_SIZE = ""
self.CONTEXT_SIZE_FOR_IMATRIX = ""
self.THREADS = ""
self.NUMBER_OF_THREADS_FOR_IMATRIX = ""
class _English(_Localization):
def __init__(self):
@ -330,6 +336,13 @@ def __init__(self):
self.SET_GPU_OFFLOAD_VALUE = "Set GPU offload value (-ngl)"
self.COMPLETED = "Completed"
self.REFRESH_MODELS = "Refresh Models"
self.EXTRA_ARGUMENTS = "Extra Arguments:"
self.EXTRA_ARGUMENTS_LABEL = "Additional command-line arguments"
self.CONTEXT_SIZE = "Context Size:"
self.CONTEXT_SIZE_FOR_IMATRIX = "Context size for IMatrix generation"
self.THREADS = "Threads:"
self.NUMBER_OF_THREADS_FOR_IMATRIX = "Number of threads for IMatrix generation"
class _French:
# French localization
def __init__(self):
@ -4889,12 +4902,45 @@ def __init__(self):
}
def set_language(lang_code):
global WINDOW_TITLE, RAM_USAGE, CPU_USAGE, BACKEND, REFRESH_BACKENDS, MODELS_PATH, OUTPUT_PATH, LOGS_PATH, BROWSE, AVAILABLE_MODELS, QUANTIZATION_TYPE, ALLOW_REQUANTIZE, LEAVE_OUTPUT_TENSOR, PURE, IMATRIX, INCLUDE_WEIGHTS, EXCLUDE_WEIGHTS, USE_OUTPUT_TENSOR_TYPE, USE_TOKEN_EMBEDDING_TYPE, KEEP_SPLIT, KV_OVERRIDES, ADD_NEW_OVERRIDE, QUANTIZE_MODEL, SAVE_PRESET, LOAD_PRESET, TASKS, DOWNLOAD_LLAMACPP, SELECT_RELEASE, SELECT_ASSET, EXTRACT_CUDA_FILES, SELECT_CUDA_BACKEND, DOWNLOAD, IMATRIX_GENERATION, DATA_FILE, MODEL, OUTPUT, OUTPUT_FREQUENCY, GPU_OFFLOAD, AUTO, GENERATE_IMATRIX, ERROR, WARNING, PROPERTIES, CANCEL, RESTART, DELETE, CONFIRM_DELETION, TASK_RUNNING_WARNING, YES, NO, DOWNLOAD_COMPLETE, CUDA_EXTRACTION_FAILED, PRESET_SAVED, PRESET_LOADED, NO_ASSET_SELECTED, DOWNLOAD_FAILED, NO_BACKEND_SELECTED, NO_MODEL_SELECTED, REFRESH_RELEASES, NO_SUITABLE_CUDA_BACKENDS, LLAMACPP_DOWNLOADED_EXTRACTED, CUDA_FILES_EXTRACTED, NO_SUITABLE_CUDA_BACKEND_EXTRACTION, ERROR_FETCHING_RELEASES, CONFIRM_DELETION_TITLE, LOG_FOR, ALL_FILES, GGUF_FILES, DAT_FILES, JSON_FILES, FAILED_LOAD_PRESET, INITIALIZING_AUTOGGUF, AUTOGGUF_INITIALIZATION_COMPLETE, REFRESHING_BACKENDS, NO_BACKENDS_AVAILABLE, FOUND_VALID_BACKENDS, SAVING_PRESET, PRESET_SAVED_TO, LOADING_PRESET, PRESET_LOADED_FROM, ADDING_KV_OVERRIDE, SAVING_TASK_PRESET, TASK_PRESET_SAVED, TASK_PRESET_SAVED_TO, RESTARTING_TASK, IN_PROGRESS, DOWNLOAD_FINISHED_EXTRACTED_TO, LLAMACPP_DOWNLOADED_AND_EXTRACTED, NO_SUITABLE_CUDA_BACKEND_FOUND, LLAMACPP_BINARY_DOWNLOADED_AND_EXTRACTED, REFRESHING_LLAMACPP_RELEASES, UPDATING_ASSET_LIST, UPDATING_CUDA_OPTIONS, STARTING_LLAMACPP_DOWNLOAD, UPDATING_CUDA_BACKENDS, NO_CUDA_BACKEND_SELECTED, EXTRACTING_CUDA_FILES, DOWNLOAD_ERROR, SHOWING_TASK_CONTEXT_MENU, SHOWING_PROPERTIES_FOR_TASK, CANCELLING_TASK, CANCELED, DELETING_TASK, LOADING_MODELS, LOADED_MODELS, BROWSING_FOR_MODELS_DIRECTORY, SELECT_MODELS_DIRECTORY, BROWSING_FOR_OUTPUT_DIRECTORY, SELECT_OUTPUT_DIRECTORY, BROWSING_FOR_LOGS_DIRECTORY, SELECT_LOGS_DIRECTORY, BROWSING_FOR_IMATRIX_FILE, SELECT_IMATRIX_FILE, RAM_USAGE_FORMAT, CPU_USAGE_FORMAT, VALIDATING_QUANTIZATION_INPUTS, MODELS_PATH_REQUIRED, OUTPUT_PATH_REQUIRED, LOGS_PATH_REQUIRED, STARTING_MODEL_QUANTIZATION, INPUT_FILE_NOT_EXIST, QUANTIZING_MODEL_TO, QUANTIZATION_TASK_STARTED, ERROR_STARTING_QUANTIZATION, UPDATING_MODEL_INFO, TASK_FINISHED, SHOWING_TASK_DETAILS_FOR, BROWSING_FOR_IMATRIX_DATA_FILE, SELECT_DATA_FILE, BROWSING_FOR_IMATRIX_MODEL_FILE, SELECT_MODEL_FILE, BROWSING_FOR_IMATRIX_OUTPUT_FILE, SELECT_OUTPUT_FILE, STARTING_IMATRIX_GENERATION, BACKEND_PATH_NOT_EXIST, GENERATING_IMATRIX, ERROR_STARTING_IMATRIX_GENERATION, IMATRIX_GENERATION_TASK_STARTED, ERROR_MESSAGE, TASK_ERROR, APPLICATION_CLOSING, APPLICATION_CLOSED, SELECT_QUANTIZATION_TYPE, ALLOWS_REQUANTIZING, LEAVE_OUTPUT_WEIGHT, DISABLE_K_QUANT_MIXTURES, USE_DATA_AS_IMPORTANCE_MATRIX, USE_IMPORTANCE_MATRIX_FOR_TENSORS, DONT_USE_IMPORTANCE_MATRIX_FOR_TENSORS, OUTPUT_TENSOR_TYPE, USE_THIS_TYPE_FOR_OUTPUT_WEIGHT, TOKEN_EMBEDDING_TYPE, USE_THIS_TYPE_FOR_TOKEN_EMBEDDINGS, WILL_GENERATE_QUANTIZED_MODEL_IN_SAME_SHARDS, OVERRIDE_MODEL_METADATA, INPUT_DATA_FILE_FOR_IMATRIX, MODEL_TO_BE_QUANTIZED, OUTPUT_PATH_FOR_GENERATED_IMATRIX, HOW_OFTEN_TO_SAVE_IMATRIX, SET_GPU_OFFLOAD_VALUE, COMPLETED, REFRESH_MODELS
# Globals
global WINDOW_TITLE, RAM_USAGE, CPU_USAGE, BACKEND, REFRESH_BACKENDS, MODELS_PATH, OUTPUT_PATH, LOGS_PATH
global BROWSE, AVAILABLE_MODELS, QUANTIZATION_TYPE, ALLOW_REQUANTIZE, LEAVE_OUTPUT_TENSOR, PURE, IMATRIX
global INCLUDE_WEIGHTS, EXCLUDE_WEIGHTS, USE_OUTPUT_TENSOR_TYPE, USE_TOKEN_EMBEDDING_TYPE, KEEP_SPLIT
global KV_OVERRIDES, ADD_NEW_OVERRIDE, QUANTIZE_MODEL, SAVE_PRESET, LOAD_PRESET, TASKS, DOWNLOAD_LLAMACPP
global SELECT_RELEASE, SELECT_ASSET, EXTRACT_CUDA_FILES, SELECT_CUDA_BACKEND, DOWNLOAD, IMATRIX_GENERATION
global DATA_FILE, MODEL, OUTPUT, OUTPUT_FREQUENCY, GPU_OFFLOAD, AUTO, GENERATE_IMATRIX, ERROR, WARNING
global PROPERTIES, CANCEL, RESTART, DELETE, CONFIRM_DELETION, TASK_RUNNING_WARNING, YES, NO, DOWNLOAD_COMPLETE
global CUDA_EXTRACTION_FAILED, PRESET_SAVED, PRESET_LOADED, NO_ASSET_SELECTED, DOWNLOAD_FAILED, NO_BACKEND_SELECTED
global NO_MODEL_SELECTED, REFRESH_RELEASES, NO_SUITABLE_CUDA_BACKENDS, LLAMACPP_DOWNLOADED_EXTRACTED, CUDA_FILES_EXTRACTED
global NO_SUITABLE_CUDA_BACKEND_EXTRACTION, ERROR_FETCHING_RELEASES, CONFIRM_DELETION_TITLE, LOG_FOR, ALL_FILES
global GGUF_FILES, DAT_FILES, JSON_FILES, FAILED_LOAD_PRESET, INITIALIZING_AUTOGGUF, AUTOGGUF_INITIALIZATION_COMPLETE
global REFRESHING_BACKENDS, NO_BACKENDS_AVAILABLE, FOUND_VALID_BACKENDS, SAVING_PRESET, PRESET_SAVED_TO, LOADING_PRESET
global PRESET_LOADED_FROM, ADDING_KV_OVERRIDE, SAVING_TASK_PRESET, TASK_PRESET_SAVED, TASK_PRESET_SAVED_TO, RESTARTING_TASK
global IN_PROGRESS, DOWNLOAD_FINISHED_EXTRACTED_TO, LLAMACPP_DOWNLOADED_AND_EXTRACTED, NO_SUITABLE_CUDA_BACKEND_FOUND
global LLAMACPP_BINARY_DOWNLOADED_AND_EXTRACTED, REFRESHING_LLAMACPP_RELEASES, UPDATING_ASSET_LIST, UPDATING_CUDA_OPTIONS
global STARTING_LLAMACPP_DOWNLOAD, UPDATING_CUDA_BACKENDS, NO_CUDA_BACKEND_SELECTED, EXTRACTING_CUDA_FILES, DOWNLOAD_ERROR
global SHOWING_TASK_CONTEXT_MENU, SHOWING_PROPERTIES_FOR_TASK, CANCELLING_TASK, CANCELED, DELETING_TASK, LOADING_MODELS, LOADED_MODELS
global BROWSING_FOR_MODELS_DIRECTORY, SELECT_MODELS_DIRECTORY, BROWSING_FOR_OUTPUT_DIRECTORY, SELECT_OUTPUT_DIRECTORY
global BROWSING_FOR_LOGS_DIRECTORY, SELECT_LOGS_DIRECTORY, BROWSING_FOR_IMATRIX_FILE, SELECT_IMATRIX_FILE, RAM_USAGE_FORMAT
global CPU_USAGE_FORMAT, VALIDATING_QUANTIZATION_INPUTS, MODELS_PATH_REQUIRED, OUTPUT_PATH_REQUIRED, LOGS_PATH_REQUIRED
global STARTING_MODEL_QUANTIZATION, INPUT_FILE_NOT_EXIST, QUANTIZING_MODEL_TO, QUANTIZATION_TASK_STARTED, ERROR_STARTING_QUANTIZATION
global UPDATING_MODEL_INFO, TASK_FINISHED, SHOWING_TASK_DETAILS_FOR, BROWSING_FOR_IMATRIX_DATA_FILE, SELECT_DATA_FILE
global BROWSING_FOR_IMATRIX_MODEL_FILE, SELECT_MODEL_FILE, BROWSING_FOR_IMATRIX_OUTPUT_FILE, SELECT_OUTPUT_FILE
global STARTING_IMATRIX_GENERATION, BACKEND_PATH_NOT_EXIST, GENERATING_IMATRIX, ERROR_STARTING_IMATRIX_GENERATION
global IMATRIX_GENERATION_TASK_STARTED, ERROR_MESSAGE, TASK_ERROR, APPLICATION_CLOSING, APPLICATION_CLOSED, SELECT_QUANTIZATION_TYPE
global ALLOWS_REQUANTIZING, LEAVE_OUTPUT_WEIGHT, DISABLE_K_QUANT_MIXTURES, USE_DATA_AS_IMPORTANCE_MATRIX, USE_IMPORTANCE_MATRIX_FOR_TENSORS
global DONT_USE_IMPORTANCE_MATRIX_FOR_TENSORS, OUTPUT_TENSOR_TYPE, USE_THIS_TYPE_FOR_OUTPUT_WEIGHT, TOKEN_EMBEDDING_TYPE, USE_THIS_TYPE_FOR_TOKEN_EMBEDDINGS
global WILL_GENERATE_QUANTIZED_MODEL_IN_SAME_SHARDS, OVERRIDE_MODEL_METADATA, INPUT_DATA_FILE_FOR_IMATRIX, MODEL_TO_BE_QUANTIZED
global OUTPUT_PATH_FOR_GENERATED_IMATRIX, HOW_OFTEN_TO_SAVE_IMATRIX, SET_GPU_OFFLOAD_VALUE, COMPLETED, REFRESH_MODELS
global CONTEXT_SIZE, CONTEXT_SIZE_FOR_IMATRIX, THREADS, NUMBER_OF_THREADS_FOR_IMATRIX, EXTRA_ARGUMENTS, EXTRA_ARGUMENTS_LABEL
loc = _languages.get(lang_code, _English)()
for key, value in loc.__dict__.items():
english_loc = _English() # Create an instance of English localization for fallback
for key in dir(english_loc):
if not key.startswith('_'):
globals()[key] = value
globals()[key] = getattr(loc, key, getattr(english_loc, key))
# Get the language from the AUTOGGUF_LANGUAGE environment variable, default to 'en'
language_code = os.getenv('AUTOGGUF_LANGUAGE', 'en-US')