import json import os import shutil import urllib.error import urllib.request from datetime import datetime from functools import partial, wraps from typing import List from PySide6.QtCore import * from PySide6.QtGui import * from PySide6.QtWidgets import * import lora_conversion import presets import ui_update import utils from CustomTitleBar import CustomTitleBar from GPUMonitor import GPUMonitor from Localizations import * from Logger import Logger from Plugins import Plugins from QuantizationThread import QuantizationThread from TaskListItem import TaskListItem from error_handling import handle_error, show_error from globals import ( ensure_directory, load_dotenv, open_file_safe, process_args, resource_path, show_about, verify_gguf, ) class AutoGGUF(QMainWindow): def validate_input(*fields): def decorator(func): @wraps(func) def wrapper(self, *args, **kwargs): for field in fields: value = getattr(self, field).text().strip() # Length check if len(value) > 1024: show_error(self.logger, f"{field} exceeds maximum length") # Normalize path normalized_path = os.path.normpath(value) # Check for path traversal attempts if ".." in normalized_path: show_error(self.logger, f"Invalid path in {field}") # Disallow control characters and null bytes if re.search(r"[\x00-\x1f\x7f]", value): show_error(self.logger, f"Invalid characters in {field}") return func(self, *args, **kwargs) return wrapper return decorator def __init__(self, args: List[str]) -> None: super().__init__() init_timer = QElapsedTimer() init_timer.start() self.parse_resolution = ui_update.parse_resolution.__get__(self) self.log_dir_name = os.environ.get("AUTOGGUF_LOG_DIR_NAME", "logs") width, height = self.parse_resolution() self.logger = Logger("AutoGGUF", self.log_dir_name) self.logger.info(INITIALIZING_AUTOGGUF) self.setWindowTitle(WINDOW_TITLE) self.setWindowIcon(QIcon(resource_path("assets/favicon.ico"))) self.setGeometry(100, 100, width, height) self.setWindowFlag(Qt.FramelessWindowHint) load_dotenv(self) # Loads the .env file process_args(args) # Load any command line parameters # Configuration self.model_dir_name = os.environ.get("AUTOGGUF_MODEL_DIR_NAME", "models") self.output_dir_name = os.environ.get( "AUTOGGUF_OUTPUT_DIR_NAME", "quantized_models" ) self.resize_factor = float( os.environ.get("AUTOGGUF_RESIZE_FACTOR", 1.1) ) # 10% increase/decrease self.default_width, self.default_height = width, height self.resize(self.default_width, self.default_height) ensure_directory(os.path.abspath(self.output_dir_name)) ensure_directory(os.path.abspath(self.model_dir_name)) # References self.update_base_model_visibility = partial( ui_update.update_base_model_visibility, self ) self.update_assets = ui_update.update_assets.__get__(self) self.update_cuda_option = ui_update.update_cuda_option.__get__(self) self.update_cuda_backends = ui_update.update_cuda_backends.__get__(self) self.download_llama_cpp = utils.download_llama_cpp.__get__(self) self.refresh_releases = utils.refresh_releases.__get__(self) self.browse_lora_input = utils.browse_lora_input.__get__(self) self.browse_lora_output = utils.browse_lora_output.__get__(self) self.convert_lora = lora_conversion.convert_lora.__get__(self) self.show_about = show_about.__get__(self) self.save_preset = presets.save_preset.__get__(self) self.load_preset = presets.load_preset.__get__(self) self.browse_export_lora_model = ( lora_conversion.browse_export_lora_model.__get__(self) ) self.browse_export_lora_output = ( lora_conversion.browse_export_lora_output.__get__(self) ) self.add_lora_adapter = lora_conversion.add_lora_adapter.__get__(self) self.export_lora = lora_conversion.export_lora.__get__(self) self.browse_models = utils.browse_models.__get__(self) self.browse_output = utils.browse_output.__get__(self) self.browse_logs = utils.browse_logs.__get__(self) self.browse_imatrix = utils.browse_imatrix.__get__(self) self.get_models_data = utils.get_models_data.__get__(self) self.get_tasks_data = utils.get_tasks_data.__get__(self) self.add_kv_override = partial(utils.add_kv_override, self) self.remove_kv_override = partial(utils.remove_kv_override, self) self.cancel_task = partial(TaskListItem.cancel_task, self) self.delete_task = partial(TaskListItem.delete_task, self) self.show_task_context_menu = partial(TaskListItem.show_task_context_menu, self) self.show_task_properties = partial(TaskListItem.show_task_properties, self) self.toggle_gpu_offload_auto = partial(ui_update.toggle_gpu_offload_auto, self) self.update_threads_spinbox = partial(ui_update.update_threads_spinbox, self) self.update_threads_slider = partial(ui_update.update_threads_slider, self) self.update_gpu_offload_spinbox = partial( ui_update.update_gpu_offload_spinbox, self ) self.update_gpu_offload_slider = partial( ui_update.update_gpu_offload_slider, self ) self.update_model_info = partial(ui_update.update_model_info, self.logger) self.update_system_info = partial(ui_update.update_system_info, self) self.update_download_progress = partial( ui_update.update_download_progress, self ) self.delete_lora_adapter_item = partial( lora_conversion.delete_lora_adapter_item, self ) self.lora_conversion_finished = partial( lora_conversion.lora_conversion_finished, self ) self.parse_progress = partial(QuantizationThread.parse_progress, self) self.create_label = partial(ui_update.create_label, self) self.browse_imatrix_datafile = ui_update.browse_imatrix_datafile.__get__(self) self.browse_imatrix_model = ui_update.browse_imatrix_model.__get__(self) self.browse_imatrix_output = ui_update.browse_imatrix_output.__get__(self) self.restart_task = partial(TaskListItem.restart_task, self) self.browse_hf_outfile = ui_update.browse_hf_outfile.__get__(self) self.browse_hf_model_input = ui_update.browse_hf_model_input.__get__(self) self.browse_base_model = ui_update.browse_base_model.__get__(self) self.reset_size = ui_update.reset_size.__get__(self) self.resize_window = partial(ui_update.resize_window, self) self.show_detailed_stats_std = partial(GPUMonitor.show_detailed_stats_std, self) self.show_cpu_graph = partial(GPUMonitor.show_cpu_graph, self) self.show_ram_graph = partial(GPUMonitor.show_ram_graph, self) self.rename_model = partial(utils.rename_model, self) self.show_model_context_menu = partial(utils.show_model_context_menu, self) # Set up main widget and layout main_widget = QWidget() main_layout = QVBoxLayout(main_widget) main_layout.setContentsMargins(0, 0, 0, 0) main_layout.setSpacing(0) # Custom title bar self.title_bar = CustomTitleBar(self) main_layout.addWidget(self.title_bar) # Menu bar self.menubar = QMenuBar() self.title_bar.layout().insertWidget(1, self.menubar) # File menu file_menu = self.menubar.addMenu(f"&{FILE}") close_action = QAction(f"&{CLOSE}", self) close_action.setShortcut(QKeySequence("Alt+F4")) close_action.triggered.connect(self.close) save_preset_action = QAction(f"&{SAVE_PRESET}", self) save_preset_action.setShortcut(QKeySequence("Ctrl+S")) save_preset_action.triggered.connect(self.save_preset) load_preset_action = QAction(f"&{SAVE_PRESET}", self) load_preset_action.setShortcut(QKeySequence("Ctrl+S")) load_preset_action.triggered.connect(self.load_preset) file_menu.addAction(close_action) file_menu.addAction(save_preset_action) file_menu.addAction(load_preset_action) # AutoFP8 Window self.fp8_dialog = QDialog(self) self.fp8_dialog.setWindowTitle(QUANTIZE_TO_FP8_DYNAMIC) self.fp8_dialog.setFixedWidth(500) self.fp8_layout = QVBoxLayout() # Input path input_layout = QHBoxLayout() self.fp8_input = QLineEdit() input_button = QPushButton(BROWSE) input_button.clicked.connect( lambda: self.fp8_input.setText( QFileDialog.getExistingDirectory(self, OPEN_MODEL_FOLDER) ) ) input_layout.addWidget(QLabel(INPUT_MODEL)) input_layout.addWidget(self.fp8_input) input_layout.addWidget(input_button) self.fp8_layout.addLayout(input_layout) # Output path output_layout = QHBoxLayout() self.fp8_output = QLineEdit() output_button = QPushButton(BROWSE) output_button.clicked.connect( lambda: self.fp8_output.setText( QFileDialog.getExistingDirectory(self, OPEN_MODEL_FOLDER) ) ) output_layout.addWidget(QLabel(OUTPUT)) output_layout.addWidget(self.fp8_output) output_layout.addWidget(output_button) self.fp8_layout.addLayout(output_layout) # Quantize button quantize_button = QPushButton(QUANTIZE) quantize_button.clicked.connect( lambda: self.quantize_to_fp8_dynamic( self.fp8_input.text(), self.fp8_output.text() ) ) self.fp8_layout.addWidget(quantize_button) self.fp8_dialog.setLayout(self.fp8_layout) # Split GGUF Window self.split_gguf_dialog = QDialog(self) self.split_gguf_dialog.setWindowTitle(SPLIT_GGUF) self.split_gguf_dialog.setFixedWidth(500) self.split_gguf_layout = QVBoxLayout() # Input path input_layout = QHBoxLayout() self.split_gguf_input = QLineEdit() input_button = QPushButton(BROWSE) input_button.clicked.connect( lambda: self.split_gguf_input.setText( QFileDialog.getOpenFileName(self, SELECT_FILE, filter=GGUF_FILES)[0] ) ) input_layout.addWidget(QLabel(INPUT_MODEL)) input_layout.addWidget(self.split_gguf_input) input_layout.addWidget(input_button) self.split_gguf_layout.addLayout(input_layout) # Output path output_layout = QHBoxLayout() self.split_gguf_output = QLineEdit() output_button = QPushButton(BROWSE) output_button.clicked.connect( lambda: self.split_gguf_output.setText( QFileDialog.getOpenFileName(self, SELECT_FILE, filter=GGUF_FILES)[0] ) ) output_layout.addWidget(QLabel(OUTPUT)) output_layout.addWidget(self.split_gguf_output) output_layout.addWidget(output_button) self.split_gguf_layout.addLayout(output_layout) # Split options split_options_layout = QHBoxLayout() self.split_max_size = QLineEdit() self.split_max_size.setPlaceholderText(SIZE_IN_UNITS) self.split_max_tensors = QLineEdit() self.split_max_tensors.setPlaceholderText(NUMBER_OF_TENSORS) split_options_layout.addWidget(QLabel(SPLIT_MAX_SIZE)) split_options_layout.addWidget(self.split_max_size) split_options_layout.addWidget(QLabel(SPLIT_MAX_TENSORS)) split_options_layout.addWidget(self.split_max_tensors) self.split_gguf_layout.addLayout(split_options_layout) # Split button split_button = QPushButton(SPLIT_GGUF) split_button.clicked.connect( lambda: self.split_gguf( self.split_gguf_input.text(), self.split_gguf_output.text(), self.split_max_size.text(), self.split_max_tensors.text(), ) ) self.split_gguf_layout.addWidget(split_button) self.split_gguf_dialog.setLayout(self.split_gguf_layout) # Merge GGUF Window self.merge_gguf_dialog = QDialog(self) self.merge_gguf_dialog.setWindowTitle(MERGE_GGUF) self.merge_gguf_dialog.setFixedWidth(500) self.merge_gguf_layout = QVBoxLayout() # Input path input_layout = QHBoxLayout() self.merge_gguf_input = QLineEdit() input_button = QPushButton(BROWSE) input_button.clicked.connect( lambda: self.merge_gguf_input.setText( QFileDialog.getOpenFileName(self, SELECT_FILE, filter=GGUF_FILES)[0] ) ) input_layout.addWidget(QLabel(INPUT_MODEL)) input_layout.addWidget(self.merge_gguf_input) input_layout.addWidget(input_button) self.merge_gguf_layout.addLayout(input_layout) # Output path output_layout = QHBoxLayout() self.merge_gguf_output = QLineEdit() output_button = QPushButton(BROWSE) output_button.clicked.connect( lambda: self.merge_gguf_output.setText( QFileDialog.getOpenFileName(self, SELECT_FILE, filter=GGUF_FILES)[0] ) ) output_layout.addWidget(QLabel(OUTPUT)) output_layout.addWidget(self.merge_gguf_output) output_layout.addWidget(output_button) self.merge_gguf_layout.addLayout(output_layout) # Split button split_button = QPushButton(MERGE_GGUF) split_button.clicked.connect( lambda: self.merge_gguf( self.merge_gguf_input.text(), self.merge_gguf_output.text(), ) ) self.merge_gguf_layout.addWidget(split_button) self.merge_gguf_dialog.setLayout(self.merge_gguf_layout) # HF Upload Window self.hf_upload_dialog = QDialog(self) self.hf_upload_dialog.setWindowTitle(HF_UPLOAD) self.hf_upload_dialog.setFixedWidth(500) self.hf_upload_layout = QVBoxLayout() # Form layout for inputs form_layout = QFormLayout() # Repo input self.hf_repo_input = QLineEdit() form_layout.addRow(HF_REPOSITORY, self.hf_repo_input) # Remote path input self.hf_remote_path_input = QLineEdit() form_layout.addRow(HF_REMOTE_PATH, self.hf_remote_path_input) # Local file/folder input local_path_layout = QHBoxLayout() self.hf_local_path_input = QLineEdit() local_path_button = QPushButton(BROWSE) local_path_button.clicked.connect(self.browse_local_path) local_path_layout.addWidget(self.hf_local_path_input) local_path_layout.addWidget(local_path_button) form_layout.addRow(HF_LOCAL_PATH, local_path_layout) self.hf_upload_layout.addLayout(form_layout) # Upload type (file or folder) upload_type_group = QGroupBox(UPLOAD_TYPE) upload_type_layout = QHBoxLayout() self.upload_type_group = QButtonGroup() self.upload_type_file = QRadioButton(FILE) self.upload_type_folder = QRadioButton(FOLDER) self.upload_type_group.addButton(self.upload_type_file) self.upload_type_group.addButton(self.upload_type_folder) upload_type_layout.addWidget(self.upload_type_file) upload_type_layout.addWidget(self.upload_type_folder) upload_type_group.setLayout(upload_type_layout) self.hf_upload_layout.addWidget(upload_type_group) # Repo type (dataset/space/model) repo_type_group = QGroupBox(HF_REPOSITORY_TYPE) repo_type_layout = QHBoxLayout() self.repo_type_group = QButtonGroup() self.repo_type_model = QRadioButton(MODEL) self.repo_type_dataset = QRadioButton(DATASET) self.repo_type_space = QRadioButton(SPACE) self.repo_type_group.addButton(self.repo_type_model) self.repo_type_group.addButton(self.repo_type_dataset) self.repo_type_group.addButton(self.repo_type_space) repo_type_layout.addWidget(self.repo_type_model) repo_type_layout.addWidget(self.repo_type_dataset) repo_type_layout.addWidget(self.repo_type_space) repo_type_group.setLayout(repo_type_layout) self.hf_upload_layout.addWidget(repo_type_group) # Upload button upload_button = QPushButton(UPLOAD) upload_button.clicked.connect(self.transfer_to_hf) self.hf_upload_layout.addWidget(upload_button) self.hf_upload_dialog.setLayout(self.hf_upload_layout) # Tools menu tools_menu = self.menubar.addMenu(f"&{TOOLS}") autofp8_action = QAction(f"&{AUTOFP8}", self) autofp8_action.setShortcut(QKeySequence("Shift+Q")) autofp8_action.triggered.connect(self.fp8_dialog.exec) split_gguf_action = QAction(f"&{SPLIT_GGUF}", self) split_gguf_action.setShortcut(QKeySequence("Shift+G")) split_gguf_action.triggered.connect(self.split_gguf_dialog.exec) merge_gguf_action = QAction(f"&{MERGE_GGUF}", self) merge_gguf_action.setShortcut(QKeySequence("Shift+M")) merge_gguf_action.triggered.connect(self.merge_gguf_dialog.exec) hf_transfer_action = QAction(f"&{HF_TRANSFER}", self) hf_transfer_action.setShortcut(QKeySequence("Shift+H")) hf_transfer_action.triggered.connect(self.hf_upload_dialog.exec) tools_menu.addAction(hf_transfer_action) tools_menu.addAction(autofp8_action) tools_menu.addAction(split_gguf_action) tools_menu.addAction(merge_gguf_action) # Help menu help_menu = self.menubar.addMenu(f"&{HELP}") about_action = QAction(f"&{ABOUT}", self) about_action.setShortcut(QKeySequence("Ctrl+Q")) about_action.triggered.connect(self.show_about) help_menu.addAction(about_action) # Content widget content_widget = QWidget() content_layout = QHBoxLayout(content_widget) # Wrap content in a scroll area scroll_area = QScrollArea() scroll_area.setWidgetResizable(True) # Allow content to resize scroll_area.setWidget(content_widget) # Add scroll area to main layout main_layout.addWidget(scroll_area) self.setCentralWidget(main_widget) # Styling self.setStyleSheet( """ AutoGGUF { background-color: #2b2b2b; border-radius: 10px; } """ ) # Initialize threads self.quant_threads = [] # Add all widgets to content_layout left_widget = QWidget() right_widget = QWidget() left_widget.setMinimumWidth(1100) right_widget.setMinimumWidth(400) left_layout = QVBoxLayout(left_widget) right_layout = QVBoxLayout(right_widget) content_layout.addWidget(left_widget) content_layout.addWidget(right_widget) # System info self.ram_bar = QProgressBar() self.cpu_bar = QProgressBar() self.cpu_label = QLabel() self.gpu_monitor = GPUMonitor() left_layout.addWidget(QLabel(RAM_USAGE)) left_layout.addWidget(self.ram_bar) left_layout.addWidget(QLabel(CPU_USAGE)) left_layout.addWidget(self.cpu_bar) left_layout.addWidget(QLabel(GPU_USAGE)) left_layout.addWidget(self.gpu_monitor) # Add mouse click event handlers for RAM and CPU bars self.ram_bar.mouseDoubleClickEvent = self.show_ram_graph self.cpu_bar.mouseDoubleClickEvent = self.show_cpu_graph # Initialize data lists for CPU and RAM usage self.cpu_data = [] self.ram_data = [] # Timer for updating system info self.timer = QTimer() self.timer.timeout.connect(self.update_system_info) self.timer.start(200) # Backend selection backend_layout = QHBoxLayout() self.backend_combo = QComboBox() self.refresh_backends_button = QPushButton(REFRESH_BACKENDS) self.refresh_backends_button.clicked.connect(self.refresh_backends) backend_layout.addWidget(QLabel(BACKEND)) backend_layout.addWidget(self.backend_combo) backend_layout.addWidget(self.refresh_backends_button) left_layout.addLayout(backend_layout) # Download llama.cpp section download_group = QGroupBox(DOWNLOAD_LLAMACPP) download_layout = QFormLayout() self.release_combo = QComboBox() self.refresh_releases_button = QPushButton(REFRESH_RELEASES) self.refresh_releases_button.clicked.connect(self.refresh_releases) release_layout = QHBoxLayout() release_layout.addWidget(self.release_combo) release_layout.addWidget(self.refresh_releases_button) download_layout.addRow(SELECT_RELEASE, release_layout) self.asset_combo = QComboBox() self.asset_combo.currentIndexChanged.connect(self.update_cuda_option) download_layout.addRow(SELECT_ASSET, self.asset_combo) self.cuda_extract_checkbox = QCheckBox(EXTRACT_CUDA_FILES) self.cuda_extract_checkbox.setVisible(False) download_layout.addRow(self.cuda_extract_checkbox) self.cuda_backend_label = QLabel(SELECT_CUDA_BACKEND) self.cuda_backend_label.setVisible(False) self.backend_combo_cuda = QComboBox() self.backend_combo_cuda.setVisible(False) download_layout.addRow(self.cuda_backend_label, self.backend_combo_cuda) self.download_progress = QProgressBar() self.download_button = QPushButton(DOWNLOAD) self.download_button.clicked.connect(self.download_llama_cpp) download_layout.addRow(self.download_progress) download_layout.addRow(self.download_button) download_group.setLayout(download_layout) right_layout.addWidget(download_group) # Models path models_layout = QHBoxLayout() self.models_input = QLineEdit(os.path.abspath(self.model_dir_name)) models_button = QPushButton(BROWSE) models_button.clicked.connect(self.browse_models) models_layout.addWidget(QLabel(MODELS_PATH)) models_layout.addWidget(self.models_input) models_layout.addWidget(models_button) left_layout.addLayout(models_layout) # Output path output_layout = QHBoxLayout() self.output_input = QLineEdit(os.path.abspath(self.output_dir_name)) output_button = QPushButton(BROWSE) output_button.clicked.connect(self.browse_output) output_layout.addWidget(QLabel(OUTPUT_PATH)) output_layout.addWidget(self.output_input) output_layout.addWidget(output_button) left_layout.addLayout(output_layout) # Logs path logs_layout = QHBoxLayout() self.logs_input = QLineEdit(os.path.abspath(self.log_dir_name)) logs_button = QPushButton(BROWSE) logs_button.clicked.connect(self.browse_logs) logs_layout.addWidget(QLabel(LOGS_PATH)) logs_layout.addWidget(self.logs_input) logs_layout.addWidget(logs_button) left_layout.addLayout(logs_layout) # Model list self.model_tree = QTreeWidget() self.model_tree.setHeaderHidden(True) left_layout.addWidget(QLabel(AVAILABLE_MODELS)) left_layout.addWidget(self.model_tree) # Ssupport right-click menu self.model_tree.setContextMenuPolicy(Qt.ContextMenuPolicy.CustomContextMenu) self.model_tree.customContextMenuRequested.connect(self.show_model_context_menu) # Refresh models button refresh_models_button = QPushButton(REFRESH_MODELS) refresh_models_button.clicked.connect(self.load_models) left_layout.addWidget(refresh_models_button) # Import Model button import_model_button = QPushButton(IMPORT_MODEL) import_model_button.clicked.connect(self.import_model) left_layout.addWidget(import_model_button) # Quantization options quant_options_scroll = QScrollArea() quant_options_widget = QWidget() quant_options_layout = QFormLayout() self.quant_type = QListWidget() self.quant_type.setMinimumHeight(100) self.quant_type.setMinimumWidth(150) self.quant_type.setSelectionMode(QListWidget.SelectionMode.MultiSelection) quant_types = [ "IQ2_XXS", "IQ2_XS", "IQ2_S", "IQ2_M", "IQ1_S", "IQ1_M", "Q2_K", "Q2_K_S", "IQ3_XXS", "IQ3_S", "IQ3_M", "IQ3_XS", "Q3_K_S", "Q3_K_M", "Q3_K_L", "IQ4_NL", "IQ4_XS", "Q4_K_S", "Q4_K_M", "Q5_K_S", "Q5_K_M", "Q6_K", "Q8_0", "Q4_0", "Q4_1", "Q5_0", "Q5_1", "Q4_0_4_4", "Q4_0_4_8", "Q4_0_8_8", "BF16", "F16", "F32", "COPY", ] self.quant_type.addItems(quant_types) quant_options_layout.addRow( self.create_label(QUANTIZATION_TYPE, SELECT_QUANTIZATION_TYPE), self.quant_type, ) self.allow_requantize = QCheckBox(ALLOW_REQUANTIZE) self.leave_output_tensor = QCheckBox(LEAVE_OUTPUT_TENSOR) self.pure = QCheckBox(PURE) quant_options_layout.addRow( self.create_label("", ALLOWS_REQUANTIZING), self.allow_requantize ) quant_options_layout.addRow( self.create_label("", LEAVE_OUTPUT_WEIGHT), self.leave_output_tensor ) quant_options_layout.addRow( self.create_label("", DISABLE_K_QUANT_MIXTURES), self.pure ) self.imatrix = QLineEdit() self.imatrix_button = QPushButton(BROWSE) self.imatrix_button.clicked.connect(self.browse_imatrix) imatrix_layout = QHBoxLayout() imatrix_layout.addWidget(self.imatrix) imatrix_layout.addWidget(self.imatrix_button) quant_options_layout.addRow( self.create_label(IMATRIX, USE_DATA_AS_IMPORTANCE_MATRIX), imatrix_layout ) self.include_weights = QLineEdit() self.exclude_weights = QLineEdit() quant_options_layout.addRow( self.create_label(INCLUDE_WEIGHTS, USE_IMPORTANCE_MATRIX_FOR_TENSORS), self.include_weights, ) quant_options_layout.addRow( self.create_label(EXCLUDE_WEIGHTS, DONT_USE_IMPORTANCE_MATRIX_FOR_TENSORS), self.exclude_weights, ) tensor_types = [ "Q2_K", "Q2_K_S", "Q3_K_S", "Q3_K_M", "Q3_K_L", "Q4_K_S", "Q4_K_M", "Q5_K_S", "Q5_K_M", "Q6_K", "Q8_0", "Q4_0", "Q4_1", "Q5_0", "Q5_1", "BF16", "F16", "F32", ] self.use_output_tensor_type = QCheckBox(USE_OUTPUT_TENSOR_TYPE) self.output_tensor_type = QComboBox() self.output_tensor_type.addItems(tensor_types) self.output_tensor_type.setEnabled(False) self.use_output_tensor_type.toggled.connect( lambda checked: self.output_tensor_type.setEnabled(checked) ) output_tensor_layout = QHBoxLayout() output_tensor_layout.addWidget(self.use_output_tensor_type) output_tensor_layout.addWidget(self.output_tensor_type) quant_options_layout.addRow( self.create_label(OUTPUT_TENSOR_TYPE, USE_THIS_TYPE_FOR_OUTPUT_WEIGHT), output_tensor_layout, ) self.use_token_embedding_type = QCheckBox(USE_TOKEN_EMBEDDING_TYPE) self.token_embedding_type = QComboBox() self.token_embedding_type.addItems(tensor_types) self.token_embedding_type.setEnabled(False) self.use_token_embedding_type.toggled.connect( lambda checked: self.token_embedding_type.setEnabled(checked) ) token_embedding_layout = QHBoxLayout() token_embedding_layout.addWidget(self.use_token_embedding_type) token_embedding_layout.addWidget(self.token_embedding_type) quant_options_layout.addRow( self.create_label(TOKEN_EMBEDDING_TYPE, USE_THIS_TYPE_FOR_TOKEN_EMBEDDINGS), token_embedding_layout, ) self.keep_split = QCheckBox(KEEP_SPLIT) self.override_kv = QLineEdit() quant_options_layout.addRow( self.create_label("", WILL_GENERATE_QUANTIZED_MODEL_IN_SAME_SHARDS), self.keep_split, ) # KV Override section self.kv_override_widget = QWidget() self.kv_override_layout = QVBoxLayout(self.kv_override_widget) self.kv_override_entries = [] add_override_button = QPushButton(ADD_NEW_OVERRIDE) add_override_button.clicked.connect(self.add_kv_override) kv_override_scroll = QScrollArea() kv_override_scroll.setWidgetResizable(True) kv_override_scroll.setWidget(self.kv_override_widget) kv_override_scroll.setMinimumHeight(200) kv_override_main_layout = QVBoxLayout() kv_override_main_layout.addWidget(kv_override_scroll) kv_override_main_layout.addWidget(add_override_button) quant_options_layout.addRow( self.create_label(KV_OVERRIDES, OVERRIDE_MODEL_METADATA), kv_override_main_layout, ) self.extra_arguments = QLineEdit() quant_options_layout.addRow( self.create_label(EXTRA_ARGUMENTS, EXTRA_COMMAND_ARGUMENTS), self.extra_arguments, ) quant_options_widget.setLayout(quant_options_layout) quant_options_scroll.setWidget(quant_options_widget) quant_options_scroll.setWidgetResizable(True) left_layout.addWidget(quant_options_scroll) # Quantize button layout quantize_layout = QHBoxLayout() quantize_button = QPushButton(QUANTIZE_MODEL) quantize_button.clicked.connect(self.quantize_model) save_preset_button = QPushButton(SAVE_PRESET) save_preset_button.clicked.connect(self.save_preset) load_preset_button = QPushButton(LOAD_PRESET) load_preset_button.clicked.connect(self.load_preset) quantize_layout.addWidget(quantize_button) quantize_layout.addWidget(save_preset_button) quantize_layout.addWidget(load_preset_button) left_layout.addLayout(quantize_layout) # Task list self.task_list = QListWidget() self.task_list.setSelectionMode(QListWidget.SelectionMode.NoSelection) self.task_list.itemDoubleClicked.connect(self.show_task_details) left_layout.addWidget(QLabel(TASKS)) left_layout.addWidget(self.task_list) # IMatrix section imatrix_group = QGroupBox(IMATRIX_GENERATION) imatrix_layout = QFormLayout() self.imatrix_datafile = QLineEdit() self.imatrix_datafile_button = QPushButton(BROWSE) self.imatrix_datafile_button.clicked.connect(self.browse_imatrix_datafile) imatrix_datafile_layout = QHBoxLayout() imatrix_datafile_layout.addWidget(self.imatrix_datafile) imatrix_datafile_layout.addWidget(self.imatrix_datafile_button) imatrix_layout.addRow( self.create_label(DATA_FILE, INPUT_DATA_FILE_FOR_IMATRIX), imatrix_datafile_layout, ) self.imatrix_model = QLineEdit() self.imatrix_model_button = QPushButton(BROWSE) self.imatrix_model_button.clicked.connect(self.browse_imatrix_model) imatrix_model_layout = QHBoxLayout() imatrix_model_layout.addWidget(self.imatrix_model) imatrix_model_layout.addWidget(self.imatrix_model_button) imatrix_layout.addRow( self.create_label(MODEL, MODEL_TO_BE_QUANTIZED), imatrix_model_layout ) self.imatrix_output = QLineEdit() self.imatrix_output_button = QPushButton(BROWSE) self.imatrix_output_button.clicked.connect(self.browse_imatrix_output) imatrix_output_layout = QHBoxLayout() imatrix_output_layout.addWidget(self.imatrix_output) imatrix_output_layout.addWidget(self.imatrix_output_button) imatrix_layout.addRow( self.create_label(OUTPUT, OUTPUT_PATH_FOR_GENERATED_IMATRIX), imatrix_output_layout, ) self.imatrix_frequency = QSpinBox() self.imatrix_frequency.setRange(1, 100) self.imatrix_frequency.setValue(1) imatrix_layout.addRow( self.create_label(OUTPUT_FREQUENCY, HOW_OFTEN_TO_SAVE_IMATRIX), self.imatrix_frequency, ) self.imatrix_ctx_size = QSpinBox() self.imatrix_ctx_size.setRange(1, 1048576) self.imatrix_ctx_size.setValue(512) imatrix_layout.addRow( self.create_label(CONTEXT_SIZE, CONTEXT_SIZE_FOR_IMATRIX), self.imatrix_ctx_size, ) threads_layout = QHBoxLayout() self.threads_slider = QSlider(Qt.Orientation.Horizontal) self.threads_slider.setRange(1, 64) self.threads_slider.valueChanged.connect(self.update_threads_spinbox) self.threads_spinbox = QSpinBox() self.threads_spinbox.setRange(1, 128) self.threads_spinbox.valueChanged.connect(self.update_threads_slider) self.threads_spinbox.setMinimumWidth(75) threads_layout.addWidget(self.threads_slider) threads_layout.addWidget(self.threads_spinbox) imatrix_layout.addRow( self.create_label(THREADS, NUMBER_OF_THREADS_FOR_IMATRIX), threads_layout ) gpu_offload_layout = QHBoxLayout() self.gpu_offload_slider = QSlider(Qt.Orientation.Horizontal) self.gpu_offload_slider.setRange(0, 200) self.gpu_offload_slider.valueChanged.connect(self.update_gpu_offload_spinbox) self.gpu_offload_spinbox = QSpinBox() self.gpu_offload_spinbox.setRange(0, 1000) self.gpu_offload_spinbox.valueChanged.connect(self.update_gpu_offload_slider) self.gpu_offload_spinbox.setMinimumWidth(75) self.gpu_offload_auto = QCheckBox(AUTO) self.gpu_offload_auto.stateChanged.connect(self.toggle_gpu_offload_auto) gpu_offload_layout.addWidget(self.gpu_offload_slider) gpu_offload_layout.addWidget(self.gpu_offload_spinbox) gpu_offload_layout.addWidget(self.gpu_offload_auto) imatrix_layout.addRow( self.create_label(GPU_OFFLOAD, SET_GPU_OFFLOAD_VALUE), gpu_offload_layout ) imatrix_generate_button = QPushButton(GENERATE_IMATRIX) imatrix_generate_button.clicked.connect(self.generate_imatrix) imatrix_layout.addRow(imatrix_generate_button) imatrix_group.setLayout(imatrix_layout) right_layout.addWidget(imatrix_group) # LoRA Conversion Section lora_group = QGroupBox(LORA_CONVERSION) lora_layout = QFormLayout() self.lora_input = QLineEdit() lora_input_button = QPushButton(BROWSE) lora_input_button.clicked.connect(self.browse_lora_input) lora_input_layout = QHBoxLayout() lora_input_layout.addWidget(self.lora_input) lora_input_layout.addWidget(lora_input_button) lora_layout.addRow( self.create_label(LORA_INPUT_PATH, SELECT_LORA_INPUT_DIRECTORY), lora_input_layout, ) self.lora_output = QLineEdit() lora_output_button = QPushButton(BROWSE) lora_output_button.clicked.connect(self.browse_lora_output) lora_output_layout = QHBoxLayout() lora_output_layout.addWidget(self.lora_output) lora_output_layout.addWidget(lora_output_button) lora_layout.addRow( self.create_label(LORA_OUTPUT_PATH, SELECT_LORA_OUTPUT_FILE), lora_output_layout, ) self.lora_output_type_combo = QComboBox() self.lora_output_type_combo.addItems(["GGML", "GGUF"]) self.lora_output_type_combo.currentIndexChanged.connect( self.update_base_model_visibility ) lora_layout.addRow( self.create_label(OUTPUT_TYPE, SELECT_OUTPUT_TYPE), self.lora_output_type_combo, ) self.base_model_label = self.create_label(BASE_MODEL, SELECT_BASE_MODEL_FILE) self.base_model_path = QLineEdit() base_model_button = QPushButton(BROWSE) base_model_button.clicked.connect(self.browse_base_model) base_model_layout = QHBoxLayout() base_model_layout.addWidget(self.base_model_path, 1) base_model_layout.addWidget(base_model_button) self.base_model_widget = QWidget() self.base_model_widget.setLayout(base_model_layout) self.base_model_wrapper = QWidget() wrapper_layout = QHBoxLayout(self.base_model_wrapper) wrapper_layout.addWidget(self.base_model_label) wrapper_layout.addWidget(self.base_model_widget, 1) wrapper_layout.setContentsMargins(0, 0, 0, 0) lora_layout.addRow(self.base_model_wrapper) self.update_base_model_visibility(self.lora_output_type_combo.currentIndex()) lora_convert_button = QPushButton(CONVERT_LORA) lora_convert_button.clicked.connect(self.convert_lora) lora_layout.addRow(lora_convert_button) lora_group.setLayout(lora_layout) right_layout.addWidget(lora_group) # Export LoRA export_lora_group = QGroupBox(EXPORT_LORA) export_lora_layout = QFormLayout() self.export_lora_model = QLineEdit() export_lora_model_button = QPushButton(BROWSE) export_lora_model_button.clicked.connect(self.browse_export_lora_model) export_lora_model_layout = QHBoxLayout() export_lora_model_layout.addWidget(self.export_lora_model) export_lora_model_layout.addWidget(export_lora_model_button) export_lora_layout.addRow( self.create_label(MODEL, SELECT_MODEL_FILE), export_lora_model_layout ) self.export_lora_output = QLineEdit() export_lora_output_button = QPushButton(BROWSE) export_lora_output_button.clicked.connect(self.browse_export_lora_output) export_lora_output_layout = QHBoxLayout() export_lora_output_layout.addWidget(self.export_lora_output) export_lora_output_layout.addWidget(export_lora_output_button) export_lora_layout.addRow( self.create_label(OUTPUT, SELECT_OUTPUT_FILE), export_lora_output_layout ) self.export_lora_adapters = QListWidget() add_adapter_button = QPushButton(ADD_ADAPTER) add_adapter_button.clicked.connect(self.add_lora_adapter) adapters_layout = QVBoxLayout() adapters_layout.addWidget(self.export_lora_adapters) buttons_layout = QHBoxLayout() buttons_layout.addWidget(add_adapter_button) adapters_layout.addLayout(buttons_layout) export_lora_layout.addRow( self.create_label(GGML_LORA_ADAPTERS, SELECT_LORA_ADAPTER_FILES), adapters_layout, ) self.export_lora_threads = QSpinBox() self.export_lora_threads.setRange(1, 64) self.export_lora_threads.setValue(8) export_lora_layout.addRow( self.create_label(THREADS, NUMBER_OF_THREADS_FOR_LORA_EXPORT), self.export_lora_threads, ) export_lora_button = QPushButton(EXPORT_LORA) export_lora_button.clicked.connect(self.export_lora) export_lora_layout.addRow(export_lora_button) export_lora_group.setLayout(export_lora_layout) right_layout.addWidget(export_lora_group) # HuggingFace to GGUF Conversion hf_to_gguf_group = QGroupBox(HF_TO_GGUF_CONVERSION) hf_to_gguf_layout = QFormLayout() self.hf_model_input = QLineEdit() hf_model_input_button = QPushButton(BROWSE) hf_model_input_button.clicked.connect(self.browse_hf_model_input) hf_model_input_layout = QHBoxLayout() hf_model_input_layout.addWidget(self.hf_model_input) hf_model_input_layout.addWidget(hf_model_input_button) hf_to_gguf_layout.addRow(MODEL_DIRECTORY, hf_model_input_layout) self.hf_outfile = QLineEdit() hf_outfile_button = QPushButton(BROWSE) hf_outfile_button.clicked.connect(self.browse_hf_outfile) hf_outfile_layout = QHBoxLayout() hf_outfile_layout.addWidget(self.hf_outfile) hf_outfile_layout.addWidget(hf_outfile_button) hf_to_gguf_layout.addRow(OUTPUT_FILE, hf_outfile_layout) self.hf_outtype = QComboBox() self.hf_outtype.addItems(["f32", "f16", "bf16", "q8_0", "auto"]) hf_to_gguf_layout.addRow(OUTPUT_TYPE, self.hf_outtype) self.hf_vocab_only = QCheckBox(VOCAB_ONLY) hf_to_gguf_layout.addRow(self.hf_vocab_only) self.hf_use_temp_file = QCheckBox(USE_TEMP_FILE) hf_to_gguf_layout.addRow(self.hf_use_temp_file) self.hf_no_lazy = QCheckBox(NO_LAZY_EVALUATION) hf_to_gguf_layout.addRow(self.hf_no_lazy) self.hf_verbose = QCheckBox(VERBOSE) hf_to_gguf_layout.addRow(self.hf_verbose) self.hf_dry_run = QCheckBox(DRY_RUN) hf_to_gguf_layout.addRow(self.hf_dry_run) self.hf_model_name = QLineEdit() hf_to_gguf_layout.addRow(MODEL_NAME, self.hf_model_name) self.hf_split_max_size = QLineEdit() hf_to_gguf_layout.addRow(SPLIT_MAX_SIZE, self.hf_split_max_size) hf_to_gguf_convert_button = QPushButton(CONVERT_HF_TO_GGUF) hf_to_gguf_convert_button.clicked.connect(self.convert_hf_to_gguf) hf_to_gguf_layout.addRow(hf_to_gguf_convert_button) hf_to_gguf_group.setLayout(hf_to_gguf_layout) right_layout.addWidget(hf_to_gguf_group) # Modify the task list to support right-click menu self.task_list.setContextMenuPolicy(Qt.ContextMenuPolicy.CustomContextMenu) self.task_list.customContextMenuRequested.connect(self.show_task_context_menu) # Set initial state self.update_base_model_visibility(self.lora_output_type_combo.currentIndex()) # Initialize releases and backends if os.environ.get("AUTOGGUF_CHECK_BACKEND", "").lower() == "enabled": self.refresh_releases() self.refresh_backends() if os.environ.get("AUTOGGUF_CHECK_UPDATE", "").lower() == "enabled": self.logger.info(CHECKING_FOR_UPDATES) self.check_for_updates() # Load theme based on environment variable theme_path = os.environ.get("AUTOGGUF_THEME") if theme_path: try: with open(theme_path, "r") as f: theme = f.read() self.setStyleSheet(theme) except (FileNotFoundError, OSError): # If the specified theme file is not found or inaccessible, # fall back to the default theme with open(resource_path("assets/default.css"), "r") as f: default_theme = f.read() self.setStyleSheet(default_theme) else: # If the environment variable is not set, use the default theme with open(resource_path("assets/default.css"), "r") as f: default_theme = f.read() self.setStyleSheet(default_theme) # Imported models from external paths self.imported_models = [] # Load models self.load_models() # Load plugins self.plugins = Plugins.load_plugins(self) Plugins.apply_plugins(self) # Finish initialization self.logger.info(AUTOGGUF_INITIALIZATION_COMPLETE) self.logger.info(STARTUP_ELASPED_TIME.format(init_timer.elapsed())) def resizeEvent(self, event) -> None: super().resizeEvent(event) path = QPainterPath() path.addRoundedRect(self.rect(), 10, 10) mask = QRegion(path.toFillPolygon().toPolygon()) self.setMask(mask) def delete_model(self, item): model_name = item.text(0) reply = QMessageBox.question( self, CONFIRM_DELETE, DELETE_WARNING.format(model_name), QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No, QMessageBox.StandardButton.No, ) if reply == QMessageBox.StandardButton.Yes: model_path = os.path.join(self.models_input.text(), model_name) try: os.remove(model_path) self.model_tree.takeTopLevelItem( self.model_tree.indexOfTopLevelItem(item) ) self.logger.info(MODEL_DELETED_SUCCESSFULLY.format(model_name)) except Exception as e: show_error(self.logger, f"Error deleting model: {e}") def check_for_updates(self) -> None: try: url = "https://api.github.com/repos/leafspark/AutoGGUF/releases/latest" req = urllib.request.Request(url) with urllib.request.urlopen(req) as response: if response.status != 200: raise urllib.error.HTTPError( url, response.status, "HTTP Error", response.headers, None ) latest_release = json.loads(response.read().decode("utf-8")) latest_version = latest_release["tag_name"].replace("v", "") if latest_version > AUTOGGUF_VERSION.replace("v", ""): self.prompt_for_update(latest_release) except urllib.error.URLError as e: self.logger.warning(f"{ERROR_CHECKING_FOR_UPDATES} {e}") def prompt_for_update(self, release) -> None: update_message = QMessageBox() update_message.setIcon(QMessageBox.Information) update_message.setWindowTitle(UPDATE_AVAILABLE) update_message.setText(NEW_VERSION_AVAILABLE.format(release["tag_name"])) update_message.setInformativeText(DOWNLOAD_NEW_VERSION) update_message.addButton(QMessageBox.StandardButton.Yes) update_message.addButton(QMessageBox.StandardButton.No) update_message.setDefaultButton(QMessageBox.StandardButton.Yes) if update_message.exec() == QMessageBox.StandardButton.Yes: QDesktopServices.openUrl(QUrl(release["html_url"])) def keyPressEvent(self, event) -> None: if event.modifiers() == Qt.ControlModifier: if ( event.key() == Qt.Key_Equal ): # Qt.Key_Plus doesn't work on some keyboards self.resize_window(larger=True) elif event.key() == Qt.Key_Minus: self.resize_window(larger=False) elif event.key() == Qt.Key_0: self.reset_size() super().keyPressEvent(event) def refresh_backends(self) -> None: self.logger.info(REFRESHING_BACKENDS) llama_bin = os.path.abspath("llama_bin") os.makedirs(llama_bin, exist_ok=True) self.backend_combo.clear() valid_backends = [ (item, os.path.join(llama_bin, item)) for item in os.listdir(llama_bin) if os.path.isdir(os.path.join(llama_bin, item)) and "cudart-llama" not in item.lower() ] if valid_backends: for name, path in valid_backends: self.backend_combo.addItem(name, userData=path) self.backend_combo.setEnabled( True ) # Enable the combo box if there are valid backends else: self.backend_combo.addItem(NO_BACKENDS_AVAILABLE) self.backend_combo.setEnabled(False) self.logger.info(FOUND_VALID_BACKENDS.format(len(valid_backends))) def save_task_preset(self, task_item) -> None: self.logger.info(SAVING_TASK_PRESET.format(task_item.task_name)) for thread in self.quant_threads: if thread.log_file == task_item.log_file: preset = { "command": thread.command, "backend_path": thread.cwd, "log_file": thread.log_file, } file_name, _ = QFileDialog.getSaveFileName( self, SAVE_TASK_PRESET, "", JSON_FILES ) if file_name: with open(file_name, "w") as f: json.dump(preset, f, indent=4) QMessageBox.information( self, TASK_PRESET_SAVED, TASK_PRESET_SAVED_TO.format(file_name) ) break def download_finished(self, extract_dir) -> None: self.logger.info(DOWNLOAD_FINISHED_EXTRACTED_TO.format(extract_dir)) self.download_button.setEnabled(True) self.download_progress.setValue(100) if ( self.cuda_extract_checkbox.isChecked() and self.cuda_extract_checkbox.isVisible() ): cuda_backend = self.backend_combo_cuda.currentData() if cuda_backend and cuda_backend != NO_SUITABLE_CUDA_BACKENDS: self.extract_cuda_files(extract_dir, cuda_backend) QMessageBox.information( self, DOWNLOAD_COMPLETE, LLAMACPP_DOWNLOADED_AND_EXTRACTED.format(extract_dir, cuda_backend), ) else: QMessageBox.warning( self, CUDA_EXTRACTION_FAILED, NO_SUITABLE_CUDA_BACKEND_FOUND ) else: QMessageBox.information( self, DOWNLOAD_COMPLETE, LLAMACPP_BINARY_DOWNLOADED_AND_EXTRACTED.format(extract_dir), ) self.refresh_backends() # Refresh the backends after successful download self.update_cuda_option() # Update CUDA options in case a CUDA-capable backend was downloaded # Select the newly downloaded backend new_backend_name = os.path.basename(extract_dir) index = self.backend_combo.findText(new_backend_name) if index >= 0: self.backend_combo.setCurrentIndex(index) def validate_quantization_inputs(self) -> None: self.logger.debug(VALIDATING_QUANTIZATION_INPUTS) errors = [] if not self.backend_combo.currentData(): errors.append(NO_BACKEND_SELECTED) if not self.models_input.text(): errors.append(MODELS_PATH_REQUIRED) if not self.output_input.text(): errors.append(OUTPUT_PATH_REQUIRED) if not self.logs_input.text(): errors.append(LOGS_PATH_REQUIRED) if not self.model_tree.currentItem(): errors.append(NO_MODEL_SELECTED) if errors: raise ValueError("\n".join(errors)) def load_models(self) -> None: self.logger.info(LOADING_MODELS) models_dir = self.models_input.text() ensure_directory(models_dir) self.model_tree.clear() sharded_models = {} single_models = [] concatenated_models = [] shard_pattern = re.compile(r"(.*)-(\d+)-of-(\d+)\.gguf$") concat_pattern = re.compile(r"(.*)\.gguf\.part(\d+)of(\d+)$") for file in os.listdir(models_dir): full_path = os.path.join(models_dir, file) if file.endswith(".gguf"): if not verify_gguf(full_path): show_error(self.logger, INVALID_GGUF_FILE.format(file)) continue match = shard_pattern.match(file) if match: base_name, shard_num, total_shards = match.groups() if base_name not in sharded_models: sharded_models[base_name] = [] sharded_models[base_name].append((int(shard_num), file)) else: single_models.append(file) else: match = concat_pattern.match(file) if match: concatenated_models.append(file) if hasattr(self, "imported_models"): for imported_model in self.imported_models: file_name = os.path.basename(imported_model) if ( file_name not in single_models and file_name not in concatenated_models ): if verify_gguf(imported_model): single_models.append(file_name) else: show_error( self.logger, INVALID_GGUF_FILE.format(imported_model) ) for base_name, shards in sharded_models.items(): parent_item = QTreeWidgetItem(self.model_tree) parent_item.setText(0, SHARDED_MODEL_NAME.format(base_name)) first_shard = sorted(shards, key=lambda x: x[0])[0][1] parent_item.setData(0, Qt.ItemDataRole.UserRole, first_shard) for _, shard_file in sorted(shards): child_item = QTreeWidgetItem(parent_item) child_item.setText(0, shard_file) child_item.setData(0, Qt.ItemDataRole.UserRole, shard_file) for model in sorted(single_models): self.add_model_to_tree(model) for model in sorted(concatenated_models): item = self.add_model_to_tree(model) item.setForeground(0, Qt.gray) item.setToolTip(0, CONCATENATED_FILE_WARNING) self.model_tree.expandAll() self.logger.info( LOADED_MODELS.format( len(single_models) + len(sharded_models) + len(concatenated_models) ) ) if concatenated_models: self.logger.warning( CONCATENATED_FILES_FOUND.format(len(concatenated_models)) ) def add_model_to_tree(self, model) -> QTreeWidgetItem: item = QTreeWidgetItem(self.model_tree) item.setText(0, model) if hasattr(self, "imported_models") and model in [ os.path.basename(m) for m in self.imported_models ]: full_path = next( m for m in self.imported_models if os.path.basename(m) == model ) item.setData(0, Qt.ItemDataRole.UserRole, full_path) item.setToolTip(0, IMPORTED_MODEL_TOOLTIP.format(full_path)) else: item.setData(0, Qt.ItemDataRole.UserRole, model) return item def extract_cuda_files(self, extract_dir, destination) -> None: self.logger.info(EXTRACTING_CUDA_FILES.format(extract_dir, destination)) for root, dirs, files in os.walk(extract_dir): for file in files: if file.lower().endswith(".dll"): source_path = os.path.join(root, file) dest_path = os.path.join(destination, file) shutil.copy2(source_path, dest_path) def download_error(self, error_message) -> None: self.logger.error(DOWNLOAD_ERROR.format(error_message)) self.download_button.setEnabled(True) self.download_progress.setValue(0) show_error(self.logger, DOWNLOAD_FAILED.format(error_message)) # Clean up any partially downloaded files asset = self.asset_combo.currentData() if asset: partial_file = os.path.join(os.path.abspath("llama_bin"), asset["name"]) if os.path.exists(partial_file): os.remove(partial_file) def browse_local_path(self) -> None: if self.upload_type_file.isChecked(): file_path, _ = QFileDialog.getOpenFileName(self, SELECT_FILE) if file_path: self.hf_local_path_input.setText(file_path) elif self.upload_type_folder.isChecked(): folder_path = QFileDialog.getExistingDirectory(self, SELECT_FOLDER) if folder_path: self.hf_local_path_input.setText(folder_path) @validate_input("hf_repo_input", "hf_local_path_input", "hf_remote_path_input") def transfer_to_hf(self) -> None: hf_repo = self.hf_repo_input.text() local_path = self.hf_local_path_input.text() remote_path = self.hf_remote_path_input.text() type = "upload" if not hf_repo or not local_path or not type: QMessageBox.warning(self, ERROR, ALL_FIELDS_REQUIRED) return try: command = ["huggingface-cli", type, hf_repo, local_path] if remote_path: command.append(remote_path) # Add repo type argument if selected if self.repo_type_model.isChecked(): command.append("--repo-type=model") elif self.repo_type_dataset.isChecked(): command.append("--repo-type=dataset") elif self.repo_type_space.isChecked(): command.append("--repo-type=space") logs_path = self.logs_input.text() ensure_directory(logs_path) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") log_file = os.path.join(logs_path, f"hf_transfer_{timestamp}.log") # Log command command_str = " ".join(command) self.logger.info(HUGGINGFACE_UPLOAD_COMMAND + command_str) thread = QuantizationThread(command, os.getcwd(), log_file) self.quant_threads.append(thread) task_name_temp = ( UPLOADING if self.upload_type_file.isChecked() else UPLOADING_FOLDER ) task_name = HF_TRANSFER_TASK_NAME.format( task_name_temp, local_path, hf_repo, local_path ) task_item = TaskListItem( task_name, log_file, show_progress_bar=False, logger=self.logger, quant_threads=self.quant_threads, ) list_item = QListWidgetItem(self.task_list) list_item.setSizeHint(task_item.sizeHint()) self.task_list.addItem(list_item) self.task_list.setItemWidget(list_item, task_item) thread.status_signal.connect(task_item.update_status) thread.finished_signal.connect( lambda: self.task_finished(thread, task_item) ) thread.error_signal.connect( lambda err: handle_error(self.logger, err, task_item) ) thread.start() except Exception as e: show_error(self.logger, ERROR_STARTING_HF_TRANSFER.format(str(e))) self.logger.info(STARTED_HUGGINGFACE_TRANSFER.format(type)) def quantize_to_fp8_dynamic(self, model_dir: str, output_dir: str) -> None: if model_dir or output_dir == "": show_error( self.logger, f"{ERROR_STARTING_AUTOFP8_QUANTIZATION}: {NO_MODEL_SELECTED}", ) return self.logger.info( QUANTIZING_TO_WITH_AUTOFP8.format(os.path.basename(model_dir), output_dir) ) try: command = [ "python", "src/quantize_to_fp8_dynamic.py", model_dir, output_dir, ] logs_path = self.logs_input.text() ensure_directory(logs_path) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") log_file = os.path.join(logs_path, f"autofp8_{timestamp}.log") thread = QuantizationThread(command, os.getcwd(), log_file) self.quant_threads.append(thread) task_name = QUANTIZING_WITH_AUTOFP8.format(os.path.basename(model_dir)) task_item = TaskListItem( task_name, log_file, show_progress_bar=False, logger=self.logger, quant_threads=self.quant_threads, ) list_item = QListWidgetItem(self.task_list) list_item.setSizeHint(task_item.sizeHint()) self.task_list.addItem(list_item) self.task_list.setItemWidget(list_item, task_item) thread.status_signal.connect(task_item.update_status) thread.finished_signal.connect( lambda: self.task_finished(thread, task_item) ) thread.error_signal.connect( lambda err: handle_error(self.logger, err, task_item) ) thread.start() except Exception as e: show_error(self.logger, f"{ERROR_STARTING_AUTOFP8_QUANTIZATION}: {e}") self.logger.info(AUTOFP8_QUANTIZATION_TASK_STARTED) @validate_input( "hf_model_input", "hf_outfile", "hf_split_max_size", "hf_model_name", "logs_input", ) def convert_hf_to_gguf(self) -> None: self.logger.info(STARTING_HF_TO_GGUF_CONVERSION) try: model_dir = self.hf_model_input.text() if not model_dir: raise ValueError(MODEL_DIRECTORY_REQUIRED) command = ["python", "src/convert_hf_to_gguf.py"] if self.hf_vocab_only.isChecked(): command.append("--vocab-only") if self.hf_outfile.text(): command.extend(["--outfile", self.hf_outfile.text()]) command.extend(["--outtype", self.hf_outtype.currentText()]) if self.hf_use_temp_file.isChecked(): command.append("--use-temp-file") if self.hf_no_lazy.isChecked(): command.append("--no-lazy") if self.hf_model_name.text(): command.extend(["--model-name", self.hf_model_name.text()]) if self.hf_verbose.isChecked(): command.append("--verbose") if self.hf_split_max_size.text(): command.extend(["--split-max-size", self.hf_split_max_size.text()]) if self.hf_dry_run.isChecked(): command.append("--dry-run") command.append(model_dir) logs_path = self.logs_input.text() ensure_directory(logs_path) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") log_file = os.path.join(logs_path, f"hf_to_gguf_{timestamp}.log") # Log command command_str = " ".join(command) self.logger.info(HF_TO_GGUF_CONVERSION_COMMAND.format(command_str)) thread = QuantizationThread(command, os.getcwd(), log_file) self.quant_threads.append(thread) task_name = CONVERTING_TO_GGUF.format(os.path.basename(model_dir)) task_item = TaskListItem( task_name, log_file, show_progress_bar=False, logger=self.logger, quant_threads=self.quant_threads, ) list_item = QListWidgetItem(self.task_list) list_item.setSizeHint(task_item.sizeHint()) self.task_list.addItem(list_item) self.task_list.setItemWidget(list_item, task_item) thread.status_signal.connect(task_item.update_status) thread.finished_signal.connect( lambda: self.task_finished(thread, task_item) ) thread.error_signal.connect( lambda err: handle_error(self.logger, err, task_item) ) thread.start() except Exception as e: show_error(self.logger, ERROR_STARTING_HF_TO_GGUF_CONVERSION.format(str(e))) self.logger.info(HF_TO_GGUF_CONVERSION_TASK_STARTED) def split_gguf( self, model_dir: str, output_dir: str, max_size: str, max_tensors: str ) -> None: if not model_dir or not output_dir: show_error(self.logger, f"{SPLIT_GGUF_ERROR}: {NO_MODEL_SELECTED}") return self.logger.info(SPLIT_GGUF_TASK_STARTED) try: command = [ "llama-gguf-split", ] if max_size: command.extend(["--split-max-size", max_size]) if max_tensors: command.extend(["--split-max-tensors", max_tensors]) command.extend([model_dir, output_dir]) logs_path = self.logs_input.text() ensure_directory(logs_path) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") log_file = os.path.join(logs_path, f"gguf_split_{timestamp}.log") thread = QuantizationThread(command, os.getcwd(), log_file) self.quant_threads.append(thread) task_name = SPLIT_GGUF_DYNAMIC.format(os.path.basename(model_dir)) task_item = TaskListItem( task_name, log_file, show_progress_bar=False, logger=self.logger, quant_threads=self.quant_threads, ) list_item = QListWidgetItem(self.task_list) list_item.setSizeHint(task_item.sizeHint()) self.task_list.addItem(list_item) self.task_list.setItemWidget(list_item, task_item) thread.status_signal.connect(task_item.update_status) thread.finished_signal.connect( lambda: self.task_finished(thread, task_item) ) thread.error_signal.connect( lambda err: handle_error(self.logger, err, task_item) ) thread.start() except Exception as e: show_error(self.logger, SPLIT_GGUF_ERROR.format(e)) self.logger.info(SPLIT_GGUF_TASK_FINISHED) def merge_gguf(self, model_dir: str, output_dir: str) -> None: if not model_dir or not output_dir: show_error(self.logger, f"Error merging GGUF: No model selected") return self.logger.info("Merge GGUF task started.") try: command = ["llama-gguf-split", "--merge"] command.extend([model_dir, output_dir]) logs_path = self.logs_input.text() ensure_directory(logs_path) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") log_file = os.path.join(logs_path, f"gguf_merge_{timestamp}.log") thread = QuantizationThread(command, os.getcwd(), log_file) self.quant_threads.append(thread) task_name = "Merging GGUFs {}".format(os.path.basename(model_dir)) task_item = TaskListItem( task_name, log_file, show_progress_bar=False, logger=self.logger, quant_threads=self.quant_threads, ) list_item = QListWidgetItem(self.task_list) list_item.setSizeHint(task_item.sizeHint()) self.task_list.addItem(list_item) self.task_list.setItemWidget(list_item, task_item) thread.status_signal.connect(task_item.update_status) thread.finished_signal.connect( lambda: self.task_finished(thread, task_item) ) thread.error_signal.connect( lambda err: handle_error(self.logger, err, task_item) ) thread.start() except Exception as e: show_error(self.logger, "Error starting merge GGUF task: {}".format(e)) self.logger.info("Split GGUF task finished.") def quantize_model(self) -> None: self.logger.info(STARTING_MODEL_QUANTIZATION) try: self.validate_quantization_inputs() selected_item = self.model_tree.currentItem() if not selected_item: raise ValueError(NO_MODEL_SELECTED) model_file = selected_item.data(0, Qt.ItemDataRole.UserRole) model_name = selected_item.text(0).replace(" (sharded)", "") backend_path = self.backend_combo.currentData() if not backend_path: raise ValueError(NO_BACKEND_SELECTED) selected_quant_types = [ item.text() for item in self.quant_type.selectedItems() ] if not selected_quant_types: raise ValueError(NO_QUANTIZATION_TYPE_SELECTED) input_path = os.path.join(self.models_input.text(), model_file) if not os.path.exists(input_path): raise FileNotFoundError(INPUT_FILE_NOT_EXIST.format(input_path)) tasks = [] # List to keep track of all tasks for quant_type in selected_quant_types: # Start building the output name output_name_parts = [ os.path.splitext(model_name)[0], "converted", quant_type, ] # Check for output tensor options if ( self.use_output_tensor_type.isChecked() or self.leave_output_tensor.isChecked() ): output_tensor_part = "o" if self.use_output_tensor_type.isChecked(): output_tensor_part += ( "." + self.output_tensor_type.currentText() ) output_name_parts.append(output_tensor_part) # Check for embedding tensor options if self.use_token_embedding_type.isChecked(): embd_tensor_part = "t." + self.token_embedding_type.currentText() output_name_parts.append(embd_tensor_part) # Check for pure option if self.pure.isChecked(): output_name_parts.append("pure") # Check for requantize option if self.allow_requantize.isChecked(): output_name_parts.append("rq") # Check for KV override kv_used = bool if any( entry.get_override_string() for entry in self.kv_override_entries ): output_name_parts.append("kv") kv_used = True # Join all parts with underscores and add .gguf extension output_name = "_".join(output_name_parts) + ".gguf" output_path = os.path.join(self.output_input.text(), output_name) command = [os.path.join(backend_path, "llama-quantize")] if self.allow_requantize.isChecked(): command.append("--allow-requantize") if self.leave_output_tensor.isChecked(): command.append("--leave-output-tensor") if self.pure.isChecked(): command.append("--pure") if self.imatrix.text(): command.extend(["--imatrix", self.imatrix.text()]) if self.include_weights.text(): command.extend(["--include-weights", self.include_weights.text()]) if self.exclude_weights.text(): command.extend(["--exclude-weights", self.exclude_weights.text()]) if self.use_output_tensor_type.isChecked(): command.extend( [ "--output-tensor-type", self.output_tensor_type.currentText().lower(), ] ) if self.use_token_embedding_type.isChecked(): command.extend( [ "--token-embedding-type", self.token_embedding_type.currentText().lower(), ] ) if self.keep_split.isChecked(): command.append("--keep-split") if self.kv_override_entries: for entry in self.kv_override_entries: override_string = entry.get_override_string( model_name=model_name, quant_type=quant_type, output_path=output_path, quantization_parameters=[ kv_used, # If KV overrides are used self.allow_requantize.isChecked(), # If requantize is used self.pure.isChecked(), # If pure tensors option is used self.leave_output_tensor.isChecked(), # If leave output tensor option is used ], ) if override_string: command.extend(["--override-kv", override_string]) command.extend([input_path, output_path, quant_type]) # Add extra arguments if self.extra_arguments.text(): command.extend(self.extra_arguments.text().split()) logs_path = self.logs_input.text() ensure_directory(logs_path) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") log_file = os.path.join( logs_path, f"{model_name}_{timestamp}_{quant_type}.log" ) # Log quant command command_str = " ".join(command) self.logger.info(f"{QUANTIZATION_COMMAND}: {command_str}") thread = QuantizationThread(command, backend_path, log_file) self.quant_threads.append(thread) task_item = TaskListItem( QUANTIZING_MODEL_TO.format(model_name, quant_type), log_file, show_properties=True, logger=self.logger, ) list_item = QListWidgetItem(self.task_list) list_item.setSizeHint(task_item.sizeHint()) self.task_list.addItem(list_item) self.task_list.setItemWidget(list_item, task_item) tasks.append( (thread, task_item) ) # Add the thread and task_item to our list # Connect the output signal to the new progress parsing function thread.output_signal.connect( lambda line, ti=task_item: self.parse_progress(line, ti) ) thread.status_signal.connect(task_item.update_status) thread.finished_signal.connect( lambda t=thread, ti=task_item: self.task_finished(t, ti) ) thread.error_signal.connect( lambda err, ti=task_item: handle_error(self.logger, err, ti) ) thread.model_info_signal.connect(self.update_model_info) # Start all threads after setting them up for thread, _ in tasks: thread.start() self.logger.info(QUANTIZATION_TASK_STARTED.format(model_name)) except ValueError as e: show_error(self.logger, str(e)) except FileNotFoundError as e: show_error(self.logger, str(e)) except Exception as e: show_error(self.logger, ERROR_STARTING_QUANTIZATION.format(str(e))) def task_finished(self, thread, task_item) -> None: self.logger.info(TASK_FINISHED.format(thread.log_file)) if thread in self.quant_threads: self.quant_threads.remove(thread) task_item.update_status(COMPLETED) self.setAttribute(Qt.WA_WindowModified, True) # Set modified flag def show_task_details(self, item) -> None: self.logger.debug(SHOWING_TASK_DETAILS_FOR.format(item.text())) task_item = self.task_list.itemWidget(item) if task_item: log_dialog = QDialog(self) log_dialog.setWindowTitle(LOG_FOR.format(task_item.task_name)) log_dialog.setGeometry(200, 200, 800, 600) log_text = QPlainTextEdit() log_text.setReadOnly(True) layout = QVBoxLayout() layout.addWidget(log_text) log_dialog.setLayout(layout) # Load existing content if os.path.exists(task_item.log_file): with open_file_safe(task_item.log_file, "r") as f: log_text.setPlainText(f.read()) # Connect to the thread if it's still running for thread in self.quant_threads: if thread.log_file == task_item.log_file: thread.output_signal.connect(log_text.appendPlainText) break log_dialog.exec() def import_model(self) -> None: self.logger.info(IMPORTING_MODEL) file_path, _ = QFileDialog.getOpenFileName( self, SELECT_MODEL_TO_IMPORT, "", GGUF_FILES ) if file_path: file_name = os.path.basename(file_path) # Verify GGUF file if not verify_gguf(file_path): show_error(self.logger, INVALID_GGUF_FILE.format(file_name)) return reply = QMessageBox.question( self, CONFIRM_IMPORT, IMPORT_MODEL_CONFIRMATION.format(file_name), QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No, QMessageBox.StandardButton.No, ) if reply == QMessageBox.StandardButton.Yes: self.imported_models.append(file_path) self.load_models() self.logger.info(MODEL_IMPORTED_SUCCESSFULLY.format(file_name)) @validate_input( "imatrix_model", "imatrix_datafile", "imatrix_model", "imatrix_output" ) def generate_imatrix(self) -> None: self.logger.info(STARTING_IMATRIX_GENERATION) try: backend_path = self.backend_combo.currentData() if not os.path.exists(backend_path): raise FileNotFoundError(BACKEND_PATH_NOT_EXIST.format(backend_path)) # Check if the Model area is empty if not self.imatrix_model.text(): raise ValueError(MODEL_PATH_REQUIRED_FOR_IMATRIX) command = [ os.path.join(backend_path, "llama-imatrix"), "-f", self.imatrix_datafile.text(), "-m", self.imatrix_model.text(), "-o", self.imatrix_output.text(), "--output-frequency", str(self.imatrix_frequency.value()), "--ctx-size", str(self.imatrix_ctx_size.value()), "--threads", str(self.threads_spinbox.value()), ] if self.gpu_offload_auto.isChecked(): command.extend(["-ngl", "99"]) elif self.gpu_offload_spinbox.value() > 0: command.extend(["-ngl", str(self.gpu_offload_spinbox.value())]) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") log_file = os.path.join(self.logs_input.text(), f"imatrix_{timestamp}.log") # Log command command_str = " ".join(command) self.logger.info(f"{IMATRIX_GENERATION_COMMAND}: {command_str}") thread = QuantizationThread(command, backend_path, log_file) self.quant_threads.append(thread) task_name = GENERATING_IMATRIX_FOR.format( os.path.basename(self.imatrix_model.text()) ) task_item = TaskListItem( task_name, log_file, show_progress_bar=True, logger=self.logger, quant_threads=self.quant_threads, ) list_item = QListWidgetItem(self.task_list) list_item.setSizeHint(task_item.sizeHint()) self.task_list.addItem(list_item) self.task_list.setItemWidget(list_item, task_item) imatrix_chunks = None thread.status_signal.connect(task_item.update_status) thread.output_signal.connect( lambda line, ti=task_item: self.parse_progress(line, ti, imatrix_chunks) ) thread.finished_signal.connect( lambda: self.task_finished(thread, task_item) ) thread.error_signal.connect( lambda err: handle_error(self.logger, err, task_item) ) thread.start() except Exception as e: show_error(self.logger, ERROR_STARTING_IMATRIX_GENERATION.format(str(e))) self.logger.info(IMATRIX_GENERATION_TASK_STARTED) def closeEvent(self, event: QCloseEvent) -> None: self.logger.info(APPLICATION_CLOSING) if self.quant_threads: reply = QMessageBox.question( self, WARNING, TASK_RUNNING_WARNING, QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No, QMessageBox.StandardButton.No, ) if reply == QMessageBox.StandardButton.Yes: for thread in self.quant_threads: thread.terminate() event.accept() else: event.ignore() else: event.accept() self.logger.info(APPLICATION_CLOSED)