From dc9a1c897120f935eacb6fa3b41bce265197f6f2 Mon Sep 17 00:00:00 2001 From: BuildTools Date: Mon, 5 Aug 2024 12:11:08 -0700 Subject: [PATCH] feat: support multiple quantization types This commit adds the ability to select and run multiple quantization types simultaneously. It includes: - Replacing the quantization type dropdown with a multi-select list - Updating preset saving and loading to handle multiple quantization types - Modifying the quantize_model function to process all selected types - fix formatting issue with previous commit - use error and in progress messages from localizations in QuantizationThread --- docs/AutoGGUF.py | 21 ++- src/AutoGGUF.py | 339 +++++++++++++++++++++----------------- src/QuantizationThread.py | 5 +- src/localizations.py | 188 +++++++++++++-------- 4 files changed, 327 insertions(+), 226 deletions(-) diff --git a/docs/AutoGGUF.py b/docs/AutoGGUF.py index d2d3cfd..5d39648 100644 --- a/docs/AutoGGUF.py +++ b/docs/AutoGGUF.py @@ -125,10 +125,25 @@ def load_models(self): def quantize_model(self): """ - Start the model quantization process. + Initiates the quantization process for the selected model with the chosen quantization types. - This method collects all the quantization settings, constructs the - quantization command, and starts a new thread to run the quantization process. + This function performs the following steps: + 1. Validates the input parameters and selected model. + 2. Retrieves the backend path and selected quantization types. + 3. For each selected quantization type: + a. Constructs the output file name based on the quantization options. + b. Builds the quantization command with all selected options. + c. Creates a new thread for the quantization process. + d. Sets up a task item in the GUI to display progress and status. + e. Connects the thread signals to update the GUI and handle errors. + f. Starts the quantization thread. + + The function handles various exceptions and displays error messages if any step fails. + + Raises: + ValueError: If required inputs are missing or invalid. + FileNotFoundError: If the input model file doesn't exist. + Exception: For any other unexpected errors during the process. """ def update_model_info(self, model_info): diff --git a/src/AutoGGUF.py b/src/AutoGGUF.py index ae14b65..6d0bbc8 100644 --- a/src/AutoGGUF.py +++ b/src/AutoGGUF.py @@ -144,48 +144,50 @@ def __init__(self): quant_options_widget = QWidget() quant_options_layout = QFormLayout() - self.quant_type = QComboBox() - self.quant_type.addItems( - [ - "IQ2_XXS", - "IQ2_XS", - "IQ2_S", - "IQ2_M", - "IQ1_S", - "IQ1_M", - "Q2_K", - "Q2_K_S", - "IQ3_XXS", - "IQ3_S", - "IQ3_M", - "Q3_K", - "IQ3_XS", - "Q3_K_S", - "Q3_K_M", - "Q3_K_L", - "IQ4_NL", - "IQ4_XS", - "Q4_K", - "Q4_K_S", - "Q4_K_M", - "Q5_K", - "Q5_K_S", - "Q5_K_M", - "Q6_K", - "Q8_0", - "Q4_0", - "Q4_1", - "Q5_0", - "Q5_1", - "Q4_0_4_4", - "Q4_0_4_8", - "Q4_0_8_8", - "BF16", - "F16", - "F32", - "COPY", - ] - ) + self.quant_type = QListWidget() + self.quant_type.setMinimumHeight(100) + self.quant_type.setMinimumWidth(150) + self.quant_type.setSelectionMode(QListWidget.SelectionMode.MultiSelection) + quant_types = [ + "IQ2_XXS", + "IQ2_XS", + "IQ2_S", + "IQ2_M", + "IQ1_S", + "IQ1_M", + "Q2_K", + "Q2_K_S", + "IQ3_XXS", + "IQ3_S", + "IQ3_M", + "Q3_K", + "IQ3_XS", + "Q3_K_S", + "Q3_K_M", + "Q3_K_L", + "IQ4_NL", + "IQ4_XS", + "Q4_K", + "Q4_K_S", + "Q4_K_M", + "Q5_K", + "Q5_K_S", + "Q5_K_M", + "Q6_K", + "Q8_0", + "Q4_0", + "Q4_1", + "Q5_0", + "Q5_1", + "Q4_0_4_4", + "Q4_0_4_8", + "Q4_0_8_8", + "BF16", + "F16", + "F32", + "COPY", + ] + self.quant_type.addItems(quant_types) quant_options_layout.addRow( self.create_label(QUANTIZATION_TYPE, SELECT_QUANTIZATION_TYPE), self.quant_type, @@ -565,6 +567,9 @@ def __init__(self): # Initialize threads self.quant_threads = [] + # Load models + self.load_models() + self.logger.info(AUTOGGUF_INITIALIZATION_COMPLETE) def refresh_backends(self): @@ -598,7 +603,7 @@ def update_base_model_visibility(self, index): def save_preset(self): self.logger.info(SAVING_PRESET) preset = { - "quant_type": self.quant_type.currentText(), + "quant_types": [item.text() for item in self.quant_type.selectedItems()], "allow_requantize": self.allow_requantize.isChecked(), "leave_output_tensor": self.leave_output_tensor.isChecked(), "pure": self.pure.isChecked(), @@ -633,7 +638,11 @@ def load_preset(self): with open(file_name, "r") as f: preset = json.load(f) - self.quant_type.setCurrentText(preset.get("quant_type", "")) + self.quant_type.clearSelection() + for quant_type in preset.get("quant_types", []): + items = self.quant_type.findItems(quant_type, Qt.MatchExactly) + if items: + items[0].setSelected(True) self.allow_requantize.setChecked(preset.get("allow_requantize", False)) self.leave_output_tensor.setChecked( preset.get("leave_output_tensor", False) @@ -1364,126 +1373,154 @@ def quantize_model(self): backend_path = self.backend_combo.currentData() if not backend_path: raise ValueError(NO_BACKEND_SELECTED) - quant_type = self.quant_type.currentText() + + selected_quant_types = [ + item.text() for item in self.quant_type.selectedItems() + ] + if not selected_quant_types: + raise ValueError(NO_QUANTIZATION_TYPE_SELECTED) input_path = os.path.join(self.models_input.text(), model_file) - - # Start building the output name - output_name_parts = [ - os.path.splitext(model_name)[0], - "converted", - quant_type, - ] - - # Check for output tensor options - if ( - self.use_output_tensor_type.isChecked() - or self.leave_output_tensor.isChecked() - ): - output_tensor_part = "o" - if self.use_output_tensor_type.isChecked(): - output_tensor_part += "." + self.output_tensor_type.currentText() - output_name_parts.append(output_tensor_part) - - # Check for embedding tensor options - if self.use_token_embedding_type.isChecked(): - embd_tensor_part = "t." + self.token_embedding_type.currentText() - output_name_parts.append(embd_tensor_part) - - # Check for pure option - if self.pure.isChecked(): - output_name_parts.append("pure") - - # Check for requantize option - if self.allow_requantize.isChecked(): - output_name_parts.append("rq") - - # Check for KV override - if any(entry.get_override_string() for entry in self.kv_override_entries): - output_name_parts.append("kv") - - # Join all parts with underscores and add .gguf extension - output_name = "_".join(output_name_parts) + ".gguf" - - output_path = os.path.join(self.output_input.text(), output_name) if not os.path.exists(input_path): raise FileNotFoundError(INPUT_FILE_NOT_EXIST.format(input_path)) - command = [os.path.join(backend_path, "llama-quantize")] + tasks = [] # List to keep track of all tasks - if self.allow_requantize.isChecked(): - command.append("--allow-requantize") - if self.leave_output_tensor.isChecked(): - command.append("--leave-output-tensor") - if self.pure.isChecked(): - command.append("--pure") - if self.imatrix.text(): - command.extend(["--imatrix", self.imatrix.text()]) - if self.include_weights.text(): - command.extend(["--include-weights", self.include_weights.text()]) - if self.exclude_weights.text(): - command.extend(["--exclude-weights", self.exclude_weights.text()]) - if self.use_output_tensor_type.isChecked(): - command.extend( - ["--output-tensor-type", self.output_tensor_type.currentText()] - ) - if self.use_token_embedding_type.isChecked(): - command.extend( - ["--token-embedding-type", self.token_embedding_type.currentText()] - ) - if self.keep_split.isChecked(): - command.append("--keep-split") - if self.kv_override_entries: - for entry in self.kv_override_entries: - override_string = entry.get_override_string( - model_name=model_name, - quant_type=quant_type, - output_path=output_path, + for quant_type in selected_quant_types: + # Start building the output name + output_name_parts = [ + os.path.splitext(model_name)[0], + "converted", + quant_type, + ] + + # Check for output tensor options + if ( + self.use_output_tensor_type.isChecked() + or self.leave_output_tensor.isChecked() + ): + output_tensor_part = "o" + if self.use_output_tensor_type.isChecked(): + output_tensor_part += ( + "." + self.output_tensor_type.currentText() + ) + output_name_parts.append(output_tensor_part) + + # Check for embedding tensor options + if self.use_token_embedding_type.isChecked(): + embd_tensor_part = "t." + self.token_embedding_type.currentText() + output_name_parts.append(embd_tensor_part) + + # Check for pure option + if self.pure.isChecked(): + output_name_parts.append("pure") + + # Check for requantize option + if self.allow_requantize.isChecked(): + output_name_parts.append("rq") + + # Check for KV override + if any( + entry.get_override_string() for entry in self.kv_override_entries + ): + output_name_parts.append("kv") + + # Join all parts with underscores and add .gguf extension + output_name = "_".join(output_name_parts) + ".gguf" + output_path = os.path.join(self.output_input.text(), output_name) + + command = [os.path.join(backend_path, "llama-quantize")] + + if self.allow_requantize.isChecked(): + command.append("--allow-requantize") + if self.leave_output_tensor.isChecked(): + command.append("--leave-output-tensor") + if self.pure.isChecked(): + command.append("--pure") + if self.imatrix.text(): + command.extend(["--imatrix", self.imatrix.text()]) + if self.include_weights.text(): + command.extend(["--include-weights", self.include_weights.text()]) + if self.exclude_weights.text(): + command.extend(["--exclude-weights", self.exclude_weights.text()]) + if self.use_output_tensor_type.isChecked(): + command.extend( + ["--output-tensor-type", self.output_tensor_type.currentText()] ) - if override_string: - command.extend(["--override-kv", override_string]) + if self.use_token_embedding_type.isChecked(): + command.extend( + [ + "--token-embedding-type", + self.token_embedding_type.currentText(), + ] + ) + if self.keep_split.isChecked(): + command.append("--keep-split") + if self.kv_override_entries: + for entry in self.kv_override_entries: + override_string = entry.get_override_string( + model_name=model_name, + quant_type=quant_type, + output_path=output_path, + ) + if override_string: + command.extend(["--override-kv", override_string]) - command.extend([input_path, output_path, quant_type]) + command.extend([input_path, output_path, quant_type]) - # Add extra arguments - if self.extra_arguments.text(): - command.extend(self.extra_arguments.text().split()) + # Add extra arguments + if self.extra_arguments.text(): + command.extend(self.extra_arguments.text().split()) - logs_path = self.logs_input.text() - ensure_directory(logs_path) + logs_path = self.logs_input.text() + ensure_directory(logs_path) - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - log_file = os.path.join( - logs_path, f"{model_name}_{timestamp}_{quant_type}.log" - ) + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + log_file = os.path.join( + logs_path, f"{model_name}_{timestamp}_{quant_type}.log" + ) - # Log quant command - command_str = " ".join(command) - self.logger.info(f"{QUANTIZATION_COMMAND}: {command_str}") + # Log quant command + command_str = " ".join(command) + self.logger.info(f"{QUANTIZATION_COMMAND}: {command_str}") - thread = QuantizationThread(command, backend_path, log_file) - self.quant_threads.append(thread) + thread = QuantizationThread(command, backend_path, log_file) + self.quant_threads.append(thread) - task_item = TaskListItem( - QUANTIZING_MODEL_TO.format(model_name, quant_type), log_file - ) - list_item = QListWidgetItem(self.task_list) - list_item.setSizeHint(task_item.sizeHint()) - self.task_list.addItem(list_item) - self.task_list.setItemWidget(list_item, task_item) + task_item = TaskListItem( + QUANTIZING_MODEL_TO.format(model_name, quant_type), log_file + ) + list_item = QListWidgetItem(self.task_list) + list_item.setSizeHint(task_item.sizeHint()) + self.task_list.addItem(list_item) + self.task_list.setItemWidget(list_item, task_item) + + tasks.append( + (thread, task_item) + ) # Add the thread and task_item to our list + + # Connect the output signal to the new progress parsing function + thread.output_signal.connect( + lambda line, ti=task_item: self.parse_progress(line, ti) + ) + thread.status_signal.connect(task_item.update_status) + thread.finished_signal.connect( + lambda t=thread, ti=task_item: self.task_finished(t, ti) + ) + thread.error_signal.connect( + lambda err, ti=task_item: self.handle_error(err, ti) + ) + thread.model_info_signal.connect(self.update_model_info) + + # Start all threads after setting them up + for thread, _ in tasks: + thread.start() + self.logger.info(QUANTIZATION_TASK_STARTED.format(model_name)) - # Connect the output signal to the new progress parsing function - thread.output_signal.connect( - lambda line: self.parse_progress(line, task_item) - ) - thread.status_signal.connect(task_item.update_status) - thread.finished_signal.connect(lambda: self.task_finished(thread)) - thread.error_signal.connect(lambda err: self.handle_error(err, task_item)) - thread.model_info_signal.connect(self.update_model_info) - thread.start() - self.logger.info(QUANTIZATION_TASK_STARTED.format(model_name)) except ValueError as e: self.show_error(str(e)) + except FileNotFoundError as e: + self.show_error(str(e)) except Exception as e: self.show_error(ERROR_STARTING_QUANTIZATION.format(str(e))) @@ -1501,10 +1538,11 @@ def parse_progress(self, line, task_item): progress = int((current / total) * 100) task_item.update_progress(progress) - def task_finished(self, thread): + def task_finished(self, thread, task_item): self.logger.info(TASK_FINISHED.format(thread.log_file)) if thread in self.quant_threads: self.quant_threads.remove(thread) + task_item.update_status(COMPLETED) def show_task_details(self, item): self.logger.debug(SHOWING_TASK_DETAILS_FOR.format(item.text())) @@ -1630,11 +1668,10 @@ def show_error(self, message): self.logger.error(ERROR_MESSAGE.format(message)) QMessageBox.critical(self, ERROR, message) - def handle_error(self, error_message, task_item, task_exists=True): + def handle_error(self, error_message, task_item): self.logger.error(TASK_ERROR.format(error_message)) self.show_error(error_message) - if task_exists: - task_item.set_error() + task_item.update_status(ERROR) def closeEvent(self, event: QCloseEvent): self.logger.info(APPLICATION_CLOSING) diff --git a/src/QuantizationThread.py b/src/QuantizationThread.py index aa798f2..e7c12c3 100644 --- a/src/QuantizationThread.py +++ b/src/QuantizationThread.py @@ -14,6 +14,7 @@ import traceback from datetime import datetime from imports_and_globals import open_file_safe +from localizations import * class QuantizationThread(QThread): @@ -49,13 +50,13 @@ def run(self): self.output_signal.emit(line) log.write(line + "\n") log.flush() - self.status_signal.emit("In Progress") + self.status_signal.emit(IN_PROGRESS) self.parse_model_info(line) # Wait for process to complete self.process.wait() if self.process.returncode == 0: - self.status_signal.emit("Completed") + self.status_signal.emit(COMPLETED) self.model_info_signal.emit(self.model_info) else: self.error_signal.emit( diff --git a/src/localizations.py b/src/localizations.py index 4ac0b52..1061bdb 100644 --- a/src/localizations.py +++ b/src/localizations.py @@ -230,6 +230,7 @@ def __init__(self): self.IMATRIX_GENERATION_COMMAND = "" self.LORA_CONVERSION_COMMAND = "" self.LORA_EXPORT_COMMAND = "" + self.NO_QUANTIZATION_TYPE_SELECTED = "" class _English(_Localization): @@ -502,6 +503,7 @@ def __init__(self): self.IMATRIX_GENERATION_COMMAND = "IMatrix generation command" self.LORA_CONVERSION_COMMAND = "LoRA conversion command" self.LORA_EXPORT_COMMAND = "LoRA export command" + self.NO_QUANTIZATION_TYPE_SELECTED = "No quantization type selected. Please select at least one quantization type." class _French: @@ -857,7 +859,9 @@ def __init__(self): self.NO_MODEL_SELECTED = "未选择模型" self.REFRESH_RELEASES = "刷新发布版本" self.NO_SUITABLE_CUDA_BACKENDS = "未找到合适的CUDA后端" - self.LLAMACPP_DOWNLOADED_EXTRACTED = "llama.cpp二进制文件已下载并提取到{0}\nCUDA文件已提取到{1}" + self.LLAMACPP_DOWNLOADED_EXTRACTED = ( + "llama.cpp二进制文件已下载并提取到{0}\nCUDA文件已提取到{1}" + ) self.CUDA_FILES_EXTRACTED = "CUDA文件已提取到" self.NO_SUITABLE_CUDA_BACKEND_EXTRACTION = "未找到适合提取的CUDA后端" self.ERROR_FETCHING_RELEASES = "获取发布版本时出错:{0}" @@ -888,7 +892,9 @@ def __init__(self): "llama.cpp二进制文件已下载并提取到{0}\nCUDA文件已提取到{1}" ) self.NO_SUITABLE_CUDA_BACKEND_FOUND = "未找到适合提取的CUDA后端" - self.LLAMACPP_BINARY_DOWNLOADED_AND_EXTRACTED = "llama.cpp二进制文件已下载并提取到{0}" + self.LLAMACPP_BINARY_DOWNLOADED_AND_EXTRACTED = ( + "llama.cpp二进制文件已下载并提取到{0}" + ) self.REFRESHING_LLAMACPP_RELEASES = "刷新llama.cpp发布版本" self.UPDATING_ASSET_LIST = "更新资源列表" self.UPDATING_CUDA_OPTIONS = "更新CUDA选项" @@ -952,7 +958,9 @@ def __init__(self): self.USE_THIS_TYPE_FOR_OUTPUT_WEIGHT = "对output.weight张量使用此类型" self.TOKEN_EMBEDDING_TYPE = "令牌嵌入类型:" self.USE_THIS_TYPE_FOR_TOKEN_EMBEDDINGS = "对令牌嵌入张量使用此类型" - self.WILL_GENERATE_QUANTIZED_MODEL_IN_SAME_SHARDS = "将在与输入相同的分片中生成量化模型" + self.WILL_GENERATE_QUANTIZED_MODEL_IN_SAME_SHARDS = ( + "将在与输入相同的分片中生成量化模型" + ) self.OVERRIDE_MODEL_METADATA = "覆盖模型元数据" self.INPUT_DATA_FILE_FOR_IMATRIX = "IMatrix生成的输入数据文件" self.MODEL_TO_BE_QUANTIZED = "要量化的模型" @@ -1400,7 +1408,9 @@ def __init__(self): self.NO_MODEL_SELECTED = "कोई मॉडल चयनित नहीं" self.REFRESH_RELEASES = "रिलीज़ रीफ्रेश करें" self.NO_SUITABLE_CUDA_BACKENDS = "कोई उपयुक्त CUDA बैकएंड नहीं मिला" - self.LLAMACPP_DOWNLOADED_EXTRACTED = "llama.cpp बाइनरी डाउनलोड और {0} में निकाली गई\nCUDA फ़ाइलें {1} में निकाली गईं" + self.LLAMACPP_DOWNLOADED_EXTRACTED = ( + "llama.cpp बाइनरी डाउनलोड और {0} में निकाली गई\nCUDA फ़ाइलें {1} में निकाली गईं" + ) self.CUDA_FILES_EXTRACTED = "CUDA फ़ाइलें निकाली गईं" self.NO_SUITABLE_CUDA_BACKEND_EXTRACTION = ( "निष्कर्षण के लिए कोई उपयुक्त CUDA बैकएंड नहीं मिला" @@ -1429,7 +1439,9 @@ def __init__(self): self.RESTARTING_TASK = "कार्य पुनः आरंभ हो रहा है: {0}" self.IN_PROGRESS = "प्रगति में" self.DOWNLOAD_FINISHED_EXTRACTED_TO = "डाउनलोड समाप्त। निकाला गया: {0}" - self.LLAMACPP_DOWNLOADED_AND_EXTRACTED = "llama.cpp बाइनरी डाउनलोड और {0} में निकाली गई\nCUDA फ़ाइलें {1} में निकाली गईं" + self.LLAMACPP_DOWNLOADED_AND_EXTRACTED = ( + "llama.cpp बाइनरी डाउनलोड और {0} में निकाली गई\nCUDA फ़ाइलें {1} में निकाली गईं" + ) self.NO_SUITABLE_CUDA_BACKEND_FOUND = ( "निष्कर्षण के लिए कोई उपयुक्त CUDA बैकएंड नहीं मिला" ) @@ -1451,25 +1463,17 @@ def __init__(self): self.DELETING_TASK = "कार्य हटाया जा रहा है: {0}" self.LOADING_MODELS = "मॉडल लोड हो रहे हैं" self.LOADED_MODELS = "{0} मॉडल लोड किए गए" - self.BROWSING_FOR_MODELS_DIRECTORY = ( - "मॉडल निर्देशिका के लिए ब्राउज़ किया जा रहा है" - ) + self.BROWSING_FOR_MODELS_DIRECTORY = "मॉडल निर्देशिका के लिए ब्राउज़ किया जा रहा है" self.SELECT_MODELS_DIRECTORY = "मॉडल निर्देशिका चुनें" - self.BROWSING_FOR_OUTPUT_DIRECTORY = ( - "आउटपुट निर्देशिका के लिए ब्राउज़ किया जा रहा है" - ) + self.BROWSING_FOR_OUTPUT_DIRECTORY = "आउटपुट निर्देशिका के लिए ब्राउज़ किया जा रहा है" self.SELECT_OUTPUT_DIRECTORY = "आउटपुट निर्देशिका चुनें" - self.BROWSING_FOR_LOGS_DIRECTORY = ( - "लॉग निर्देशिका के लिए ब्राउज़ किया जा रहा है" - ) + self.BROWSING_FOR_LOGS_DIRECTORY = "लॉग निर्देशिका के लिए ब्राउज़ किया जा रहा है" self.SELECT_LOGS_DIRECTORY = "लॉग निर्देशिका चुनें" self.BROWSING_FOR_IMATRIX_FILE = "IMatrix फ़ाइल के लिए ब्राउज़ किया जा रहा है" self.SELECT_IMATRIX_FILE = "IMatrix फ़ाइल चुनें" self.RAM_USAGE_FORMAT = "{0:.1f}% ({1} MB / {2} MB)" self.CPU_USAGE_FORMAT = "CPU उपयोग: {0:.1f}%" - self.VALIDATING_QUANTIZATION_INPUTS = ( - "क्वांटाइजेशन इनपुट सत्यापित किए जा रहे हैं" - ) + self.VALIDATING_QUANTIZATION_INPUTS = "क्वांटाइजेशन इनपुट सत्यापित किए जा रहे हैं" self.MODELS_PATH_REQUIRED = "मॉडल पथ आवश्यक है" self.OUTPUT_PATH_REQUIRED = "आउटपुट पथ आवश्यक है" self.LOGS_PATH_REQUIRED = "लॉग पथ आवश्यक है" @@ -1496,9 +1500,7 @@ def __init__(self): self.STARTING_IMATRIX_GENERATION = "IMatrix उत्पादन शुरू हो रहा है" self.BACKEND_PATH_NOT_EXIST = "बैकएंड पथ मौजूद नहीं है: {0}" self.GENERATING_IMATRIX = "IMatrix उत्पन्न किया जा रहा है" - self.ERROR_STARTING_IMATRIX_GENERATION = ( - "IMatrix उत्पादन शुरू करने में त्रुटि: {0}" - ) + self.ERROR_STARTING_IMATRIX_GENERATION = "IMatrix उत्पादन शुरू करने में त्रुटि: {0}" self.IMATRIX_GENERATION_TASK_STARTED = "IMatrix उत्पादन कार्य शुरू हुआ" self.ERROR_MESSAGE = "त्रुटि: {0}" self.TASK_ERROR = "कार्य त्रुटि: {0}" @@ -1508,14 +1510,14 @@ def __init__(self): self.ALLOWS_REQUANTIZING = ( "पहले से क्वांटाइज़ किए गए टेंसर को पुनः क्वांटाइज़ करने की अनुमति देता है" ) - self.LEAVE_OUTPUT_WEIGHT = ( - "output.weight को अक्वांटाइज़ (या पुनः क्वांटाइज़) छोड़ देगा" + self.LEAVE_OUTPUT_WEIGHT = "output.weight को अक्वांटाइज़ (या पुनः क्वांटाइज़) छोड़ देगा" + self.DISABLE_K_QUANT_MIXTURES = ( + "k-quant मिश्रण को अक्षम करें और सभी टेंसर को एक ही प्रकार में क्वांटाइज़ करें" ) - self.DISABLE_K_QUANT_MIXTURES = "k-quant मिश्रण को अक्षम करें और सभी टेंसर को एक ही प्रकार में क्वांटाइज़ करें" - self.USE_DATA_AS_IMPORTANCE_MATRIX = "क्वांट अनुकूलन के लिए फ़ाइल में डेटा को महत्व मैट्रिक्स के रूप में उपयोग करें" - self.USE_IMPORTANCE_MATRIX_FOR_TENSORS = ( - "इन टेंसर के लिए महत्व मैट्रिक्स का उपयोग करें" + self.USE_DATA_AS_IMPORTANCE_MATRIX = ( + "क्वांट अनुकूलन के लिए फ़ाइल में डेटा को महत्व मैट्रिक्स के रूप में उपयोग करें" ) + self.USE_IMPORTANCE_MATRIX_FOR_TENSORS = "इन टेंसर के लिए महत्व मैट्रिक्स का उपयोग करें" self.DONT_USE_IMPORTANCE_MATRIX_FOR_TENSORS = ( "इन टेंसर के लिए महत्व मैट्रिक्स का उपयोग न करें" ) @@ -1972,7 +1974,9 @@ def __init__(self): self.RESTART = "再起動" self.DELETE = "削除" self.CONFIRM_DELETION = "このタスクを削除してもよろしいですか?" - self.TASK_RUNNING_WARNING = "一部のタスクはまだ実行中です。終了してもよろしいですか?" + self.TASK_RUNNING_WARNING = ( + "一部のタスクはまだ実行中です。終了してもよろしいですか?" + ) self.YES = "はい" self.NO = "いいえ" self.DOWNLOAD_COMPLETE = "ダウンロード完了" @@ -1985,11 +1989,11 @@ def __init__(self): self.NO_MODEL_SELECTED = "モデルが選択されていません" self.REFRESH_RELEASES = "リリースを更新" self.NO_SUITABLE_CUDA_BACKENDS = "適切なCUDAバックエンドが見つかりませんでした" - self.LLAMACPP_DOWNLOADED_EXTRACTED = ( - "llama.cppバイナリがダウンロードされ、{0}に抽出されました\nCUDAファイルは{1}に抽出されました" - ) + self.LLAMACPP_DOWNLOADED_EXTRACTED = "llama.cppバイナリがダウンロードされ、{0}に抽出されました\nCUDAファイルは{1}に抽出されました" self.CUDA_FILES_EXTRACTED = "CUDAファイルはに抽出されました" - self.NO_SUITABLE_CUDA_BACKEND_EXTRACTION = "抽出に適したCUDAバックエンドが見つかりませんでした" + self.NO_SUITABLE_CUDA_BACKEND_EXTRACTION = ( + "抽出に適したCUDAバックエンドが見つかりませんでした" + ) self.ERROR_FETCHING_RELEASES = "リリースの取得中にエラーが発生しました: {0}" self.CONFIRM_DELETION_TITLE = "削除の確認" self.LOG_FOR = "{0}のログ" @@ -2014,10 +2018,10 @@ def __init__(self): self.RESTARTING_TASK = "タスクを再起動しています: {0}" self.IN_PROGRESS = "処理中" self.DOWNLOAD_FINISHED_EXTRACTED_TO = "ダウンロードが完了しました。抽出先: {0}" - self.LLAMACPP_DOWNLOADED_AND_EXTRACTED = ( - "llama.cppバイナリがダウンロードされ、{0}に抽出されました\nCUDAファイルは{1}に抽出されました" + self.LLAMACPP_DOWNLOADED_AND_EXTRACTED = "llama.cppバイナリがダウンロードされ、{0}に抽出されました\nCUDAファイルは{1}に抽出されました" + self.NO_SUITABLE_CUDA_BACKEND_FOUND = ( + "抽出に適したCUDAバックエンドが見つかりませんでした" ) - self.NO_SUITABLE_CUDA_BACKEND_FOUND = "抽出に適したCUDAバックエンドが見つかりませんでした" self.LLAMACPP_BINARY_DOWNLOADED_AND_EXTRACTED = ( "llama.cppバイナリがダウンロードされ、{0}に抽出されました" ) @@ -2067,24 +2071,42 @@ def __init__(self): self.STARTING_IMATRIX_GENERATION = "IMatrixの生成を開始しています" self.BACKEND_PATH_NOT_EXIST = "バックエンドパスが存在しません: {0}" self.GENERATING_IMATRIX = "IMatrixを生成しています" - self.ERROR_STARTING_IMATRIX_GENERATION = "IMatrixの生成を開始中にエラーが発生しました: {0}" + self.ERROR_STARTING_IMATRIX_GENERATION = ( + "IMatrixの生成を開始中にエラーが発生しました: {0}" + ) self.IMATRIX_GENERATION_TASK_STARTED = "IMatrix生成タスクが開始されました" self.ERROR_MESSAGE = "エラー: {0}" self.TASK_ERROR = "タスクエラー: {0}" self.APPLICATION_CLOSING = "アプリケーションを終了しています" self.APPLICATION_CLOSED = "アプリケーションが終了しました" self.SELECT_QUANTIZATION_TYPE = "量子化タイプを選択してください" - self.ALLOWS_REQUANTIZING = "すでに量子化されているテンソルの再量子化を許可します" + self.ALLOWS_REQUANTIZING = ( + "すでに量子化されているテンソルの再量子化を許可します" + ) self.LEAVE_OUTPUT_WEIGHT = "output.weightは(再)量子化されません" - self.DISABLE_K_QUANT_MIXTURES = "k-quant混合を無効にし、すべてのテンソルを同じタイプに量子化します" - self.USE_DATA_AS_IMPORTANCE_MATRIX = "量子化最適化の重要度マトリックスとしてファイル内のデータを使用します" - self.USE_IMPORTANCE_MATRIX_FOR_TENSORS = "これらのテンソルに重要度マトリックスを使用します" - self.DONT_USE_IMPORTANCE_MATRIX_FOR_TENSORS = "これらのテンソルに重要度マトリックスを使用しません" + self.DISABLE_K_QUANT_MIXTURES = ( + "k-quant混合を無効にし、すべてのテンソルを同じタイプに量子化します" + ) + self.USE_DATA_AS_IMPORTANCE_MATRIX = ( + "量子化最適化の重要度マトリックスとしてファイル内のデータを使用します" + ) + self.USE_IMPORTANCE_MATRIX_FOR_TENSORS = ( + "これらのテンソルに重要度マトリックスを使用します" + ) + self.DONT_USE_IMPORTANCE_MATRIX_FOR_TENSORS = ( + "これらのテンソルに重要度マトリックスを使用しません" + ) self.OUTPUT_TENSOR_TYPE = "出力テンソルタイプ:" - self.USE_THIS_TYPE_FOR_OUTPUT_WEIGHT = "output.weightテンソルにこのタイプを使用します" + self.USE_THIS_TYPE_FOR_OUTPUT_WEIGHT = ( + "output.weightテンソルにこのタイプを使用します" + ) self.TOKEN_EMBEDDING_TYPE = "トークン埋め込みタイプ:" - self.USE_THIS_TYPE_FOR_TOKEN_EMBEDDINGS = "トークン埋め込みテンソルにこのタイプを使用します" - self.WILL_GENERATE_QUANTIZED_MODEL_IN_SAME_SHARDS = "入力と同じシャードで量子化されたモデルを生成します" + self.USE_THIS_TYPE_FOR_TOKEN_EMBEDDINGS = ( + "トークン埋め込みテンソルにこのタイプを使用します" + ) + self.WILL_GENERATE_QUANTIZED_MODEL_IN_SAME_SHARDS = ( + "入力と同じシャードで量子化されたモデルを生成します" + ) self.OVERRIDE_MODEL_METADATA = "モデルメタデータを上書きする" self.INPUT_DATA_FILE_FOR_IMATRIX = "IMatrix生成用の入力データファイル" self.MODEL_TO_BE_QUANTIZED = "量子化されるモデル" @@ -2741,11 +2763,11 @@ def __init__(self): self.NO_MODEL_SELECTED = "모델이 선택되지 않았습니다" self.REFRESH_RELEASES = "릴리스 새로 고침" self.NO_SUITABLE_CUDA_BACKENDS = "적합한 CUDA 백엔드를 찾을 수 없습니다" - self.LLAMACPP_DOWNLOADED_EXTRACTED = ( - "llama.cpp 바이너리가 다운로드되어 {0}에 추출되었습니다.\nCUDA 파일이 {1}에 추출되었습니다." - ) + self.LLAMACPP_DOWNLOADED_EXTRACTED = "llama.cpp 바이너리가 다운로드되어 {0}에 추출되었습니다.\nCUDA 파일이 {1}에 추출되었습니다." self.CUDA_FILES_EXTRACTED = "CUDA 파일이 에 추출되었습니다." - self.NO_SUITABLE_CUDA_BACKEND_EXTRACTION = "추출에 적합한 CUDA 백엔드를 찾을 수 없습니다." + self.NO_SUITABLE_CUDA_BACKEND_EXTRACTION = ( + "추출에 적합한 CUDA 백엔드를 찾을 수 없습니다." + ) self.ERROR_FETCHING_RELEASES = "릴리스를 가져오는 중 오류가 발생했습니다: {0}" self.CONFIRM_DELETION_TITLE = "삭제 확인" self.LOG_FOR = "{0}에 대한 로그" @@ -2769,11 +2791,13 @@ def __init__(self): self.TASK_PRESET_SAVED_TO = "작업 프리셋이 {0}에 저장되었습니다." self.RESTARTING_TASK = "작업을 다시 시작하는 중입니다: {0}" self.IN_PROGRESS = "진행 중" - self.DOWNLOAD_FINISHED_EXTRACTED_TO = "다운로드가 완료되었습니다. 추출 위치: {0}" - self.LLAMACPP_DOWNLOADED_AND_EXTRACTED = ( - "llama.cpp 바이너리가 다운로드되어 {0}에 추출되었습니다.\nCUDA 파일이 {1}에 추출되었습니다." + self.DOWNLOAD_FINISHED_EXTRACTED_TO = ( + "다운로드가 완료되었습니다. 추출 위치: {0}" + ) + self.LLAMACPP_DOWNLOADED_AND_EXTRACTED = "llama.cpp 바이너리가 다운로드되어 {0}에 추출되었습니다.\nCUDA 파일이 {1}에 추출되었습니다." + self.NO_SUITABLE_CUDA_BACKEND_FOUND = ( + "추출에 적합한 CUDA 백엔드를 찾을 수 없습니다." ) - self.NO_SUITABLE_CUDA_BACKEND_FOUND = "추출에 적합한 CUDA 백엔드를 찾을 수 없습니다." self.LLAMACPP_BINARY_DOWNLOADED_AND_EXTRACTED = ( "llama.cpp 바이너리가 다운로드되어 {0}에 추출되었습니다." ) @@ -2810,10 +2834,14 @@ def __init__(self): self.INPUT_FILE_NOT_EXIST = "입력 파일 '{0}'이 존재하지 않습니다." self.QUANTIZING_MODEL_TO = "{0}을 {1}(으)로 양자화하는 중입니다." self.QUANTIZATION_TASK_STARTED = "{0}에 대한 양자화 작업이 시작되었습니다." - self.ERROR_STARTING_QUANTIZATION = "양자화를 시작하는 중 오류가 발생했습니다: {0}" + self.ERROR_STARTING_QUANTIZATION = ( + "양자화를 시작하는 중 오류가 발생했습니다: {0}" + ) self.UPDATING_MODEL_INFO = "모델 정보를 업데이트하는 중입니다: {0}" self.TASK_FINISHED = "작업이 완료되었습니다: {0}" - self.SHOWING_TASK_DETAILS_FOR = "다음에 대한 작업 세부 정보를 표시하는 중입니다: {0}" + self.SHOWING_TASK_DETAILS_FOR = ( + "다음에 대한 작업 세부 정보를 표시하는 중입니다: {0}" + ) self.BROWSING_FOR_IMATRIX_DATA_FILE = "IMatrix 데이터 파일을 찾아보는 중입니다." self.SELECT_DATA_FILE = "데이터 파일 선택" self.BROWSING_FOR_IMATRIX_MODEL_FILE = "IMatrix 모델 파일을 찾아보는 중입니다." @@ -2823,7 +2851,9 @@ def __init__(self): self.STARTING_IMATRIX_GENERATION = "IMatrix 생성을 시작하는 중입니다." self.BACKEND_PATH_NOT_EXIST = "백엔드 경로가 존재하지 않습니다: {0}" self.GENERATING_IMATRIX = "IMatrix를 생성하는 중입니다." - self.ERROR_STARTING_IMATRIX_GENERATION = "IMatrix 생성을 시작하는 중 오류가 발생했습니다: {0}" + self.ERROR_STARTING_IMATRIX_GENERATION = ( + "IMatrix 생성을 시작하는 중 오류가 발생했습니다: {0}" + ) self.IMATRIX_GENERATION_TASK_STARTED = "IMatrix 생성 작업이 시작되었습니다." self.ERROR_MESSAGE = "오류: {0}" self.TASK_ERROR = "작업 오류: {0}" @@ -2832,14 +2862,26 @@ def __init__(self): self.SELECT_QUANTIZATION_TYPE = "양자화 유형을 선택하세요." self.ALLOWS_REQUANTIZING = "이미 양자화된 텐서의 재양자화를 허용합니다." self.LEAVE_OUTPUT_WEIGHT = "output.weight를 (재)양자화하지 않은 상태로 둡니다." - self.DISABLE_K_QUANT_MIXTURES = "k-양자 혼합을 비활성화하고 모든 텐서를 동일한 유형으로 양자화합니다." - self.USE_DATA_AS_IMPORTANCE_MATRIX = "양자 최적화를 위한 중요도 행렬로 파일의 데이터를 사용합니다." - self.USE_IMPORTANCE_MATRIX_FOR_TENSORS = "이러한 텐서에 중요도 행렬을 사용합니다." - self.DONT_USE_IMPORTANCE_MATRIX_FOR_TENSORS = "이러한 텐서에 중요도 행렬을 사용하지 않습니다." + self.DISABLE_K_QUANT_MIXTURES = ( + "k-양자 혼합을 비활성화하고 모든 텐서를 동일한 유형으로 양자화합니다." + ) + self.USE_DATA_AS_IMPORTANCE_MATRIX = ( + "양자 최적화를 위한 중요도 행렬로 파일의 데이터를 사용합니다." + ) + self.USE_IMPORTANCE_MATRIX_FOR_TENSORS = ( + "이러한 텐서에 중요도 행렬을 사용합니다." + ) + self.DONT_USE_IMPORTANCE_MATRIX_FOR_TENSORS = ( + "이러한 텐서에 중요도 행렬을 사용하지 않습니다." + ) self.OUTPUT_TENSOR_TYPE = "출력 텐서 유형:" - self.USE_THIS_TYPE_FOR_OUTPUT_WEIGHT = "output.weight 텐서에 이 유형을 사용합니다." + self.USE_THIS_TYPE_FOR_OUTPUT_WEIGHT = ( + "output.weight 텐서에 이 유형을 사용합니다." + ) self.TOKEN_EMBEDDING_TYPE = "토큰 임베딩 유형:" - self.USE_THIS_TYPE_FOR_TOKEN_EMBEDDINGS = "토큰 임베딩 텐서에 이 유형을 사용합니다." + self.USE_THIS_TYPE_FOR_TOKEN_EMBEDDINGS = ( + "토큰 임베딩 텐서에 이 유형을 사용합니다." + ) self.WILL_GENERATE_QUANTIZED_MODEL_IN_SAME_SHARDS = ( "입력과 동일한 샤드에 양자화된 모델을 생성합니다." ) @@ -3794,9 +3836,7 @@ def __init__(self): self.STARTING_IMATRIX_GENERATION = "IMatrix জেনারেশন শুরু হচ্ছে" self.BACKEND_PATH_NOT_EXIST = "ব্যাকএন্ড পাথ বিদ্যমান নেই: {0}" self.GENERATING_IMATRIX = "IMatrix তৈরি করা হচ্ছে" - self.ERROR_STARTING_IMATRIX_GENERATION = ( - "IMatrix জেনারেশন শুরু করতে ত্রুটি: {0}" - ) + self.ERROR_STARTING_IMATRIX_GENERATION = "IMatrix জেনারেশন শুরু করতে ত্রুটি: {0}" self.IMATRIX_GENERATION_TASK_STARTED = "IMatrix জেনারেশন টাস্ক শুরু হয়েছে" self.ERROR_MESSAGE = "ত্রুটি: {0}" self.TASK_ERROR = "টাস্ক ত্রুটি: {0}" @@ -3804,11 +3844,13 @@ def __init__(self): self.APPLICATION_CLOSED = "অ্যাপ্লিকেশন বন্ধ" self.SELECT_QUANTIZATION_TYPE = "কোয়ান্টাইজেশন ধরণ নির্বাচন করুন" self.ALLOWS_REQUANTIZING = "যে টেন্সরগুলি ইতিমধ্যে কোয়ান্টাইজ করা হয়েছে তাদের পুনরায় কোয়ান্টাইজ করার অনুমতি দেয়" - self.LEAVE_OUTPUT_WEIGHT = ( - "output.weight কে (পুনরায়) কোয়ান্টাইজ না করে রেখে দেবে" + self.LEAVE_OUTPUT_WEIGHT = "output.weight কে (পুনরায়) কোয়ান্টাইজ না করে রেখে দেবে" + self.DISABLE_K_QUANT_MIXTURES = ( + "k-কোয়ান্ট মিশ্রণগুলি অক্ষম করুন এবং সমস্ত টেন্সরকে একই ধরণের কোয়ান্টাইজ করুন" + ) + self.USE_DATA_AS_IMPORTANCE_MATRIX = ( + "কোয়ান্ট অপ্টিমাইজেশনের জন্য ফাইলের ডেটা গুরুত্বপূর্ণ ম্যাট্রিক্স হিসাবে ব্যবহার করুন" ) - self.DISABLE_K_QUANT_MIXTURES = "k-কোয়ান্ট মিশ্রণগুলি অক্ষম করুন এবং সমস্ত টেন্সরকে একই ধরণের কোয়ান্টাইজ করুন" - self.USE_DATA_AS_IMPORTANCE_MATRIX = "কোয়ান্ট অপ্টিমাইজেশনের জন্য ফাইলের ডেটা গুরুত্বপূর্ণ ম্যাট্রিক্স হিসাবে ব্যবহার করুন" self.USE_IMPORTANCE_MATRIX_FOR_TENSORS = ( "এই টেন্সরগুলির জন্য গুরুত্বপূর্ণ ম্যাট্রিক্স ব্যবহার করুন" ) @@ -5912,7 +5954,9 @@ def __init__(self): "llama.cpp 二進位檔案已下載並解壓縮至 {0}\nCUDA 檔案已解壓縮至 {1}" ) self.NO_SUITABLE_CUDA_BACKEND_FOUND = "找不到合適的 CUDA 後端進行解壓縮" - self.LLAMACPP_BINARY_DOWNLOADED_AND_EXTRACTED = "llama.cpp 二進位檔案已下載並解壓縮至 {0}" + self.LLAMACPP_BINARY_DOWNLOADED_AND_EXTRACTED = ( + "llama.cpp 二進位檔案已下載並解壓縮至 {0}" + ) self.REFRESHING_LLAMACPP_RELEASES = "正在重新整理 llama.cpp 版本" self.UPDATING_ASSET_LIST = "正在更新資源清單" self.UPDATING_CUDA_OPTIONS = "正在更新 CUDA 選項" @@ -5969,14 +6013,18 @@ def __init__(self): self.ALLOWS_REQUANTIZING = "允許重新量化已量化的張量" self.LEAVE_OUTPUT_WEIGHT = "將保留 output.weight 不被(重新)量化" self.DISABLE_K_QUANT_MIXTURES = "停用 k-quant 混合並將所有張量量化為相同類型" - self.USE_DATA_AS_IMPORTANCE_MATRIX = "使用檔案中的資料作為量化最佳化的重要性矩陣" + self.USE_DATA_AS_IMPORTANCE_MATRIX = ( + "使用檔案中的資料作為量化最佳化的重要性矩陣" + ) self.USE_IMPORTANCE_MATRIX_FOR_TENSORS = "對這些張量使用重要性矩陣" self.DONT_USE_IMPORTANCE_MATRIX_FOR_TENSORS = "不要對這些張量使用重要性矩陣" self.OUTPUT_TENSOR_TYPE = "輸出張量類型:" self.USE_THIS_TYPE_FOR_OUTPUT_WEIGHT = "對 output.weight 張量使用此類型" self.TOKEN_EMBEDDING_TYPE = "權杖嵌入類型:" self.USE_THIS_TYPE_FOR_TOKEN_EMBEDDINGS = "對權杖嵌入張量使用此類型" - self.WILL_GENERATE_QUANTIZED_MODEL_IN_SAME_SHARDS = "將在與輸入相同的分片中產生量化模型" + self.WILL_GENERATE_QUANTIZED_MODEL_IN_SAME_SHARDS = ( + "將在與輸入相同的分片中產生量化模型" + ) self.OVERRIDE_MODEL_METADATA = "覆蓋模型中繼資料" self.INPUT_DATA_FILE_FOR_IMATRIX = "IMatrix 產生的輸入資料檔案" self.MODEL_TO_BE_QUANTIZED = "要量化的模型"