style: format code with Black

This commit is contained in:
BuildTools 2024-08-04 19:50:34 -07:00
parent 2dc5bd9e8a
commit fa51f7cdb8
21 changed files with 8215 additions and 6922 deletions

View File

@ -31,7 +31,7 @@ def __init__(self):
self.logger.info(INITIALIZING_AUTOGGUF) self.logger.info(INITIALIZING_AUTOGGUF)
self.setWindowTitle(WINDOW_TITLE) self.setWindowTitle(WINDOW_TITLE)
self.setWindowIcon(QIcon(resource_path("assets/favicon.ico"))) self.setWindowIcon(QIcon(resource_path("assets/favicon.ico")))
self.setGeometry(100, 100, 1600, 1200) self.setGeometry(100, 100, 1600, 1200)
ensure_directory(os.path.abspath("quantized_models")) ensure_directory(os.path.abspath("quantized_models"))
@ -171,7 +171,7 @@ def __init__(self):
"Q5_K_S", "Q5_K_S",
"Q5_K_M", "Q5_K_M",
"Q6_K", "Q6_K",
"Q8_0", "Q8_0",
"Q4_0", "Q4_0",
"Q4_1", "Q4_1",
"Q5_0", "Q5_0",
@ -180,7 +180,7 @@ def __init__(self):
"Q4_0_4_8", "Q4_0_4_8",
"Q4_0_8_8", "Q4_0_8_8",
"BF16", "BF16",
"F16", "F16",
"F32", "F32",
"COPY", "COPY",
] ]
@ -452,8 +452,13 @@ def __init__(self):
# Output Type Dropdown # Output Type Dropdown
self.lora_output_type_combo = QComboBox() self.lora_output_type_combo = QComboBox()
self.lora_output_type_combo.addItems(["GGML", "GGUF"]) self.lora_output_type_combo.addItems(["GGML", "GGUF"])
self.lora_output_type_combo.currentIndexChanged.connect(self.update_base_model_visibility) self.lora_output_type_combo.currentIndexChanged.connect(
lora_layout.addRow(self.create_label(OUTPUT_TYPE, SELECT_OUTPUT_TYPE), self.lora_output_type_combo) self.update_base_model_visibility
)
lora_layout.addRow(
self.create_label(OUTPUT_TYPE, SELECT_OUTPUT_TYPE),
self.lora_output_type_combo,
)
# Base Model Path (initially hidden) # Base Model Path (initially hidden)
self.base_model_label = self.create_label(BASE_MODEL, SELECT_BASE_MODEL_FILE) self.base_model_label = self.create_label(BASE_MODEL, SELECT_BASE_MODEL_FILE)
@ -471,7 +476,9 @@ def __init__(self):
wrapper_layout = QHBoxLayout(self.base_model_wrapper) wrapper_layout = QHBoxLayout(self.base_model_wrapper)
wrapper_layout.addWidget(self.base_model_label) wrapper_layout.addWidget(self.base_model_label)
wrapper_layout.addWidget(self.base_model_widget, 1) # Give it a stretch factor wrapper_layout.addWidget(self.base_model_widget, 1) # Give it a stretch factor
wrapper_layout.setContentsMargins(0, 0, 0, 0) # Remove margins for better alignment wrapper_layout.setContentsMargins(
0, 0, 0, 0
) # Remove margins for better alignment
# Add the wrapper to the layout # Add the wrapper to the layout
lora_layout.addRow(self.base_model_wrapper) lora_layout.addRow(self.base_model_wrapper)
@ -545,7 +552,7 @@ def __init__(self):
# Modify the task list to support right-click menu # Modify the task list to support right-click menu
self.task_list.setContextMenuPolicy(Qt.ContextMenuPolicy.CustomContextMenu) self.task_list.setContextMenuPolicy(Qt.ContextMenuPolicy.CustomContextMenu)
self.task_list.customContextMenuRequested.connect(self.show_task_context_menu) self.task_list.customContextMenuRequested.connect(self.show_task_context_menu)
# Set inital state # Set inital state
self.update_base_model_visibility(self.lora_output_type_combo.currentIndex()) self.update_base_model_visibility(self.lora_output_type_combo.currentIndex())
@ -1200,19 +1207,19 @@ def delete_task(self, item):
if reply == QMessageBox.StandardButton.Yes: if reply == QMessageBox.StandardButton.Yes:
# Retrieve the task_item before removing it from the list # Retrieve the task_item before removing it from the list
task_item = self.task_list.itemWidget(item) task_item = self.task_list.itemWidget(item)
# Remove the item from the list # Remove the item from the list
row = self.task_list.row(item) row = self.task_list.row(item)
self.task_list.takeItem(row) self.task_list.takeItem(row)
# If the task is still running, terminate it # If the task is still running, terminate it
if task_item and task_item.log_file: if task_item and task_item.log_file:
for thread in self.quant_threads: for thread in self.quant_threads:
if thread.log_file == task_item.log_file: if thread.log_file == task_item.log_file:
thread.terminate() thread.terminate()
self.quant_threads.remove(thread) self.quant_threads.remove(thread)
break break
# Delete the task_item widget # Delete the task_item widget
if task_item: if task_item:
task_item.deleteLater() task_item.deleteLater()
@ -1395,7 +1402,7 @@ def quantize_model(self):
override_string = entry.get_override_string( override_string = entry.get_override_string(
model_name=model_name, model_name=model_name,
quant_type=quant_type, quant_type=quant_type,
output_path=output_path output_path=output_path,
) )
if override_string: if override_string:
command.extend(["--override-kv", override_string]) command.extend(["--override-kv", override_string])
@ -1413,7 +1420,7 @@ def quantize_model(self):
log_file = os.path.join( log_file = os.path.join(
logs_path, f"{model_name}_{timestamp}_{quant_type}.log" logs_path, f"{model_name}_{timestamp}_{quant_type}.log"
) )
# Log quant command # Log quant command
command_str = " ".join(command) command_str = " ".join(command)
self.logger.info(f"{QUANTIZATION_COMMAND}: {command_str}") self.logger.info(f"{QUANTIZATION_COMMAND}: {command_str}")
@ -1430,7 +1437,9 @@ def quantize_model(self):
self.task_list.setItemWidget(list_item, task_item) self.task_list.setItemWidget(list_item, task_item)
# Connect the output signal to the new progress parsing function # Connect the output signal to the new progress parsing function
thread.output_signal.connect(lambda line: self.parse_progress(line, task_item)) thread.output_signal.connect(
lambda line: self.parse_progress(line, task_item)
)
thread.status_signal.connect(task_item.update_status) thread.status_signal.connect(task_item.update_status)
thread.finished_signal.connect(lambda: self.task_finished(thread)) thread.finished_signal.connect(lambda: self.task_finished(thread))
thread.error_signal.connect(lambda err: self.handle_error(err, task_item)) thread.error_signal.connect(lambda err: self.handle_error(err, task_item))
@ -1556,7 +1565,7 @@ def generate_imatrix(self):
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
log_file = os.path.join(self.logs_input.text(), f"imatrix_{timestamp}.log") log_file = os.path.join(self.logs_input.text(), f"imatrix_{timestamp}.log")
# Log command # Log command
command_str = " ".join(command) command_str = " ".join(command)
self.logger.info(f"{IMATRIX_GENERATION_COMMAND}: {command_str}") self.logger.info(f"{IMATRIX_GENERATION_COMMAND}: {command_str}")
@ -1580,7 +1589,7 @@ def generate_imatrix(self):
except Exception as e: except Exception as e:
self.show_error(ERROR_STARTING_IMATRIX_GENERATION.format(str(e))) self.show_error(ERROR_STARTING_IMATRIX_GENERATION.format(str(e)))
self.logger.info(IMATRIX_GENERATION_TASK_STARTED) self.logger.info(IMATRIX_GENERATION_TASK_STARTED)
def show_error(self, message): def show_error(self, message):
self.logger.error(ERROR_MESSAGE.format(message)) self.logger.error(ERROR_MESSAGE.format(message))
QMessageBox.critical(self, ERROR, message) QMessageBox.critical(self, ERROR, message)
@ -1617,4 +1626,4 @@ def closeEvent(self, event: QCloseEvent):
app = QApplication(sys.argv) app = QApplication(sys.argv)
window = AutoGGUF() window = AutoGGUF()
window.show() window.show()
sys.exit(app.exec()) sys.exit(app.exec())

View File

@ -1,54 +1,55 @@
from PyQt6.QtWidgets import * from PyQt6.QtWidgets import *
from PyQt6.QtCore import * from PyQt6.QtCore import *
from PyQt6.QtGui import * from PyQt6.QtGui import *
import os import os
import sys import sys
import psutil import psutil
import subprocess import subprocess
import time import time
import signal import signal
import json import json
import platform import platform
import requests import requests
import zipfile import zipfile
from datetime import datetime from datetime import datetime
class DownloadThread(QThread):
progress_signal = pyqtSignal(int) class DownloadThread(QThread):
finished_signal = pyqtSignal(str) progress_signal = pyqtSignal(int)
error_signal = pyqtSignal(str) finished_signal = pyqtSignal(str)
error_signal = pyqtSignal(str)
def __init__(self, url, save_path):
super().__init__() def __init__(self, url, save_path):
self.url = url super().__init__()
self.save_path = save_path self.url = url
self.save_path = save_path
def run(self):
try: def run(self):
response = requests.get(self.url, stream=True) try:
response.raise_for_status() response = requests.get(self.url, stream=True)
total_size = int(response.headers.get('content-length', 0)) response.raise_for_status()
block_size = 8192 total_size = int(response.headers.get("content-length", 0))
downloaded = 0 block_size = 8192
downloaded = 0
with open(self.save_path, 'wb') as file:
for data in response.iter_content(block_size): with open(self.save_path, "wb") as file:
size = file.write(data) for data in response.iter_content(block_size):
downloaded += size size = file.write(data)
if total_size: downloaded += size
progress = int((downloaded / total_size) * 100) if total_size:
self.progress_signal.emit(progress) progress = int((downloaded / total_size) * 100)
self.progress_signal.emit(progress)
# Extract the downloaded zip file
extract_dir = os.path.splitext(self.save_path)[0] # Extract the downloaded zip file
with zipfile.ZipFile(self.save_path, 'r') as zip_ref: extract_dir = os.path.splitext(self.save_path)[0]
zip_ref.extractall(extract_dir) with zipfile.ZipFile(self.save_path, "r") as zip_ref:
zip_ref.extractall(extract_dir)
# Remove the zip file after extraction
os.remove(self.save_path) # Remove the zip file after extraction
os.remove(self.save_path)
self.finished_signal.emit(extract_dir)
except Exception as e: self.finished_signal.emit(extract_dir)
self.error_signal.emit(str(e)) except Exception as e:
if os.path.exists(self.save_path): self.error_signal.emit(str(e))
os.remove(self.save_path) if os.path.exists(self.save_path):
os.remove(self.save_path)

View File

@ -1,83 +1,92 @@
from PyQt6.QtWidgets import QWidget, QHBoxLayout, QLineEdit, QComboBox, QPushButton from PyQt6.QtWidgets import QWidget, QHBoxLayout, QLineEdit, QComboBox, QPushButton
from PyQt6.QtCore import pyqtSignal, QRegularExpression from PyQt6.QtCore import pyqtSignal, QRegularExpression
from PyQt6.QtGui import QDoubleValidator, QIntValidator, QRegularExpressionValidator from PyQt6.QtGui import QDoubleValidator, QIntValidator, QRegularExpressionValidator
from datetime import datetime from datetime import datetime
import time import time
import os import os
import socket import socket
import platform import platform
class KVOverrideEntry(QWidget):
deleted = pyqtSignal(QWidget) class KVOverrideEntry(QWidget):
deleted = pyqtSignal(QWidget)
def __init__(self, parent=None):
super().__init__(parent) def __init__(self, parent=None):
layout = QHBoxLayout(self) super().__init__(parent)
layout.setContentsMargins(0, 0, 0, 0) layout = QHBoxLayout(self)
layout.setContentsMargins(0, 0, 0, 0)
self.key_input = QLineEdit()
self.key_input.setPlaceholderText("Key") self.key_input = QLineEdit()
# Set validator for key input (letters and dots only) self.key_input.setPlaceholderText("Key")
key_validator = QRegularExpressionValidator(QRegularExpression(r"[A-Za-z.]+")) # Set validator for key input (letters and dots only)
self.key_input.setValidator(key_validator) key_validator = QRegularExpressionValidator(QRegularExpression(r"[A-Za-z.]+"))
layout.addWidget(self.key_input) self.key_input.setValidator(key_validator)
layout.addWidget(self.key_input)
self.type_combo = QComboBox()
self.type_combo.addItems(["int", "str", "float"]) self.type_combo = QComboBox()
layout.addWidget(self.type_combo) self.type_combo.addItems(["int", "str", "float"])
layout.addWidget(self.type_combo)
self.value_input = QLineEdit()
self.value_input.setPlaceholderText("Value") self.value_input = QLineEdit()
layout.addWidget(self.value_input) self.value_input.setPlaceholderText("Value")
layout.addWidget(self.value_input)
delete_button = QPushButton("X")
delete_button.setFixedSize(30, 30) delete_button = QPushButton("X")
delete_button.clicked.connect(self.delete_clicked) delete_button.setFixedSize(30, 30)
layout.addWidget(delete_button) delete_button.clicked.connect(self.delete_clicked)
layout.addWidget(delete_button)
# Connect type change to validator update
self.type_combo.currentTextChanged.connect(self.update_validator) # Connect type change to validator update
self.type_combo.currentTextChanged.connect(self.update_validator)
# Initialize validator
self.update_validator(self.type_combo.currentText()) # Initialize validator
self.update_validator(self.type_combo.currentText())
def delete_clicked(self):
self.deleted.emit(self) def delete_clicked(self):
self.deleted.emit(self)
def get_override_string(self, model_name=None, quant_type=None, output_path=None): # Add arguments
key = self.key_input.text() def get_override_string(
type_ = self.type_combo.currentText() self, model_name=None, quant_type=None, output_path=None
value = self.value_input.text() ): # Add arguments
key = self.key_input.text()
dynamic_params = { type_ = self.type_combo.currentText()
"{system.time.milliseconds}": lambda: str(int(time.time() * 1000)), value = self.value_input.text()
"{system.time.seconds}": lambda: str(int(time.time())),
"{system.date.iso}": lambda: datetime.now().strftime("%Y-%m-%d"), dynamic_params = {
"{system.datetime.iso}": lambda: datetime.now().isoformat(), "{system.time.milliseconds}": lambda: str(int(time.time() * 1000)),
"{system.username}": lambda: os.getlogin(), "{system.time.seconds}": lambda: str(int(time.time())),
"{system.hostname}": lambda: socket.gethostname(), "{system.date.iso}": lambda: datetime.now().strftime("%Y-%m-%d"),
"{system.platform}": lambda: platform.system(), "{system.datetime.iso}": lambda: datetime.now().isoformat(),
"{system.python.version}": lambda: platform.python_version(), "{system.username}": lambda: os.getlogin(),
"{system.time.milliseconds}": lambda: str(int(time.time() * 1000)), "{system.hostname}": lambda: socket.gethostname(),
"{system.date}": lambda: datetime.now().strftime("%Y-%m-%d"), "{system.platform}": lambda: platform.system(),
"{model.name}": lambda: model_name if model_name is not None else "Unknown Model", "{system.python.version}": lambda: platform.python_version(),
"{quant.type}": lambda: quant_type if quant_type is not None else "Unknown Quant", "{system.time.milliseconds}": lambda: str(int(time.time() * 1000)),
"{output.path}": lambda: output_path if output_path is not None else "Unknown Output Path", "{system.date}": lambda: datetime.now().strftime("%Y-%m-%d"),
} "{model.name}": lambda: (
model_name if model_name is not None else "Unknown Model"
for param, func in dynamic_params.items(): ),
value = value.replace(param, func()) "{quant.type}": lambda: (
quant_type if quant_type is not None else "Unknown Quant"
return f"{key}={type_}:{value}" ),
"{output.path}": lambda: (
def get_raw_override_string(self): output_path if output_path is not None else "Unknown Output Path"
# Return the raw override string with placeholders intact ),
return f"{self.key_input.text()}={self.type_combo.currentText()}:{self.value_input.text()}" }
def update_validator(self, type_): for param, func in dynamic_params.items():
if type_ == "int": value = value.replace(param, func())
self.value_input.setValidator(QIntValidator())
elif type_ == "float": return f"{key}={type_}:{value}"
self.value_input.setValidator(QDoubleValidator())
else: # str def get_raw_override_string(self):
self.value_input.setValidator(None) # Return the raw override string with placeholders intact
return f"{self.key_input.text()}={self.type_combo.currentText()}:{self.value_input.text()}"
def update_validator(self, type_):
if type_ == "int":
self.value_input.setValidator(QIntValidator())
elif type_ == "float":
self.value_input.setValidator(QDoubleValidator())
else: # str
self.value_input.setValidator(None)

View File

@ -1,46 +1,51 @@
import logging import logging
from logging.handlers import RotatingFileHandler from logging.handlers import RotatingFileHandler
import os import os
import sys import sys
from datetime import datetime from datetime import datetime
class Logger:
def __init__(self, name, log_dir): class Logger:
self.logger = logging.getLogger(name) def __init__(self, name, log_dir):
self.logger.setLevel(logging.DEBUG) self.logger = logging.getLogger(name)
self.logger.setLevel(logging.DEBUG)
# Create logs directory if it doesn't exist
os.makedirs(log_dir, exist_ok=True) # Create logs directory if it doesn't exist
os.makedirs(log_dir, exist_ok=True)
# Console handler
console_handler = logging.StreamHandler() # Console handler
console_handler.setLevel(logging.INFO) console_handler = logging.StreamHandler()
console_format = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') console_handler.setLevel(logging.INFO)
console_handler.setFormatter(console_format) console_format = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
console_handler.setFormatter(console_format)
# File handler
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") # File handler
log_file = os.path.join(log_dir, f"latest_{timestamp}.log") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
file_handler = RotatingFileHandler(log_file, maxBytes=10*1024*1024, backupCount=5, encoding='utf-8') log_file = os.path.join(log_dir, f"latest_{timestamp}.log")
file_handler.setLevel(logging.DEBUG) file_handler = RotatingFileHandler(
file_format = logging.Formatter('%(asctime)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s') log_file, maxBytes=10 * 1024 * 1024, backupCount=5, encoding="utf-8"
file_handler.setFormatter(file_format) )
file_handler.setLevel(logging.DEBUG)
# Add handlers to logger file_format = logging.Formatter(
self.logger.addHandler(console_handler) "%(asctime)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s"
self.logger.addHandler(file_handler) )
file_handler.setFormatter(file_format)
def debug(self, message):
self.logger.debug(message) # Add handlers to logger
self.logger.addHandler(console_handler)
def info(self, message): self.logger.addHandler(file_handler)
self.logger.info(message)
def debug(self, message):
def warning(self, message): self.logger.debug(message)
self.logger.warning(message)
def info(self, message):
def error(self, message): self.logger.info(message)
self.logger.error(message)
def warning(self, message):
def critical(self, message): self.logger.warning(message)
self.logger.critical(message)
def error(self, message):
self.logger.error(message)
def critical(self, message):
self.logger.critical(message)

View File

@ -1,48 +1,48 @@
from PyQt6.QtWidgets import * from PyQt6.QtWidgets import *
from PyQt6.QtCore import * from PyQt6.QtCore import *
from PyQt6.QtGui import * from PyQt6.QtGui import *
import os import os
import sys import sys
import psutil import psutil
import subprocess import subprocess
import time import time
import signal import signal
import json import json
import platform import platform
import requests import requests
import zipfile import zipfile
from datetime import datetime from datetime import datetime
class ModelInfoDialog(QDialog):
def __init__(self, model_info, parent=None): class ModelInfoDialog(QDialog):
super().__init__(parent) def __init__(self, model_info, parent=None):
self.setWindowTitle("Model Information") super().__init__(parent)
self.setGeometry(200, 200, 600, 400) self.setWindowTitle("Model Information")
self.setGeometry(200, 200, 600, 400)
layout = QVBoxLayout()
layout = QVBoxLayout()
info_text = QTextEdit()
info_text.setReadOnly(True) info_text = QTextEdit()
info_text.setHtml(self.format_model_info(model_info)) info_text.setReadOnly(True)
info_text.setHtml(self.format_model_info(model_info))
layout.addWidget(info_text)
layout.addWidget(info_text)
close_button = QPushButton("Close")
close_button.clicked.connect(self.accept) close_button = QPushButton("Close")
layout.addWidget(close_button) close_button.clicked.connect(self.accept)
layout.addWidget(close_button)
self.setLayout(layout)
self.setLayout(layout)
def format_model_info(self, model_info):
html = "<h2>Model Information</h2>" def format_model_info(self, model_info):
html += f"<p><b>Architecture:</b> {model_info.get('architecture', 'N/A')}</p>" html = "<h2>Model Information</h2>"
html += f"<p><b>Quantization Type:</b> {model_info.get('quantization_type', 'N/A')}</p>" html += f"<p><b>Architecture:</b> {model_info.get('architecture', 'N/A')}</p>"
html += f"<p><b>KV Pairs:</b> {model_info.get('kv_pairs', 'N/A')}</p>" html += f"<p><b>Quantization Type:</b> {model_info.get('quantization_type', 'N/A')}</p>"
html += f"<p><b>Tensors:</b> {model_info.get('tensors', 'N/A')}</p>" html += f"<p><b>KV Pairs:</b> {model_info.get('kv_pairs', 'N/A')}</p>"
html += f"<p><b>Tensors:</b> {model_info.get('tensors', 'N/A')}</p>"
html += "<h3>Key-Value Pairs:</h3>"
for key, value in model_info.get('kv_data', {}).items(): html += "<h3>Key-Value Pairs:</h3>"
html += f"<p><b>{key}:</b> {value}</p>" for key, value in model_info.get("kv_data", {}).items():
html += f"<p><b>{key}:</b> {value}</p>"
return html
return html

View File

@ -1,94 +1,95 @@
from PyQt6.QtWidgets import * from PyQt6.QtWidgets import *
from PyQt6.QtCore import * from PyQt6.QtCore import *
from PyQt6.QtGui import * from PyQt6.QtGui import *
import os import os
import sys import sys
import psutil import psutil
import subprocess import subprocess
import time import time
import signal import signal
import json import json
import platform import platform
import requests import requests
import zipfile import zipfile
import traceback import traceback
from datetime import datetime from datetime import datetime
from imports_and_globals import open_file_safe from imports_and_globals import open_file_safe
class QuantizationThread(QThread):
# Define custom signals for communication with the main thread class QuantizationThread(QThread):
output_signal = pyqtSignal(str) # Define custom signals for communication with the main thread
status_signal = pyqtSignal(str) output_signal = pyqtSignal(str)
finished_signal = pyqtSignal() status_signal = pyqtSignal(str)
error_signal = pyqtSignal(str) finished_signal = pyqtSignal()
model_info_signal = pyqtSignal(dict) error_signal = pyqtSignal(str)
model_info_signal = pyqtSignal(dict)
def __init__(self, command, cwd, log_file):
super().__init__() def __init__(self, command, cwd, log_file):
self.command = command super().__init__()
self.cwd = cwd self.command = command
self.log_file = log_file self.cwd = cwd
self.process = None self.log_file = log_file
self.model_info = {} self.process = None
self.model_info = {}
def run(self):
try: def run(self):
# Start the subprocess try:
self.process = subprocess.Popen( # Start the subprocess
self.command, self.process = subprocess.Popen(
stdout=subprocess.PIPE, self.command,
stderr=subprocess.STDOUT, stdout=subprocess.PIPE,
text=True, stderr=subprocess.STDOUT,
cwd=self.cwd, text=True,
) cwd=self.cwd,
# Open log file and process output )
with open_file_safe(self.log_file, "w") as log: # Open log file and process output
for line in self.process.stdout: with open_file_safe(self.log_file, "w") as log:
line = line.strip() for line in self.process.stdout:
self.output_signal.emit(line) line = line.strip()
log.write(line + "\n") self.output_signal.emit(line)
log.flush() log.write(line + "\n")
self.status_signal.emit("In Progress") log.flush()
self.parse_model_info(line) self.status_signal.emit("In Progress")
self.parse_model_info(line)
# Wait for process to complete
self.process.wait() # Wait for process to complete
if self.process.returncode == 0: self.process.wait()
self.status_signal.emit("Completed") if self.process.returncode == 0:
self.model_info_signal.emit(self.model_info) self.status_signal.emit("Completed")
else: self.model_info_signal.emit(self.model_info)
self.error_signal.emit( else:
f"Process exited with code {self.process.returncode}" self.error_signal.emit(
) f"Process exited with code {self.process.returncode}"
self.finished_signal.emit() )
except Exception as e: self.finished_signal.emit()
self.error_signal.emit(str(e)) except Exception as e:
self.error_signal.emit(str(e))
def parse_model_info(self, line):
# Parse output for model information def parse_model_info(self, line):
if "llama_model_loader: loaded meta data with" in line: # Parse output for model information
parts = line.split() if "llama_model_loader: loaded meta data with" in line:
self.model_info["kv_pairs"] = parts[6] parts = line.split()
self.model_info["tensors"] = parts[9] self.model_info["kv_pairs"] = parts[6]
elif "general.architecture" in line: self.model_info["tensors"] = parts[9]
self.model_info["architecture"] = line.split("=")[-1].strip() elif "general.architecture" in line:
elif line.startswith("llama_model_loader: - kv"): self.model_info["architecture"] = line.split("=")[-1].strip()
key = line.split(":")[2].strip() elif line.startswith("llama_model_loader: - kv"):
value = line.split("=")[-1].strip() key = line.split(":")[2].strip()
self.model_info.setdefault("kv_data", {})[key] = value value = line.split("=")[-1].strip()
elif line.startswith("llama_model_loader: - type"): self.model_info.setdefault("kv_data", {})[key] = value
parts = line.split(":") elif line.startswith("llama_model_loader: - type"):
if len(parts) > 1: parts = line.split(":")
quant_type = parts[1].strip() if len(parts) > 1:
tensors = parts[2].strip().split()[0] quant_type = parts[1].strip()
self.model_info.setdefault("quantization_type", []).append( tensors = parts[2].strip().split()[0]
f"{quant_type}: {tensors} tensors" self.model_info.setdefault("quantization_type", []).append(
) f"{quant_type}: {tensors} tensors"
)
def terminate(self):
# Terminate the subprocess if it's still running def terminate(self):
if self.process: # Terminate the subprocess if it's still running
os.kill(self.process.pid, signal.SIGTERM) if self.process:
self.process.wait(timeout=5) os.kill(self.process.pid, signal.SIGTERM)
if self.process.poll() is None: self.process.wait(timeout=5)
os.kill(self.process.pid, signal.SIGKILL) if self.process.poll() is None:
os.kill(self.process.pid, signal.SIGKILL)

View File

@ -1,72 +1,73 @@
from PyQt6.QtWidgets import * from PyQt6.QtWidgets import *
from PyQt6.QtCore import * from PyQt6.QtCore import *
from PyQt6.QtGui import * from PyQt6.QtGui import *
import os import os
import sys import sys
import psutil import psutil
import subprocess import subprocess
import time import time
import signal import signal
import json import json
import platform import platform
import requests import requests
import zipfile import zipfile
from datetime import datetime from datetime import datetime
class TaskListItem(QWidget):
def __init__(self, task_name, log_file, show_progress_bar=True, parent=None): class TaskListItem(QWidget):
super().__init__(parent) def __init__(self, task_name, log_file, show_progress_bar=True, parent=None):
self.task_name = task_name super().__init__(parent)
self.log_file = log_file self.task_name = task_name
self.status = "Pending" self.log_file = log_file
layout = QHBoxLayout(self) self.status = "Pending"
self.task_label = QLabel(task_name) layout = QHBoxLayout(self)
self.progress_bar = QProgressBar() self.task_label = QLabel(task_name)
self.progress_bar.setRange(0, 100) self.progress_bar = QProgressBar()
self.status_label = QLabel(self.status) self.progress_bar.setRange(0, 100)
layout.addWidget(self.task_label) self.status_label = QLabel(self.status)
layout.addWidget(self.progress_bar) layout.addWidget(self.task_label)
layout.addWidget(self.status_label) layout.addWidget(self.progress_bar)
layout.addWidget(self.status_label)
# Hide progress bar if show_progress_bar is False
self.progress_bar.setVisible(show_progress_bar) # Hide progress bar if show_progress_bar is False
self.progress_bar.setVisible(show_progress_bar)
# Use indeterminate progress bar if not showing percentage
if not show_progress_bar: # Use indeterminate progress bar if not showing percentage
self.progress_bar.setRange(0, 0) if not show_progress_bar:
self.progress_bar.setRange(0, 0)
self.progress_timer = QTimer(self)
self.progress_timer.timeout.connect(self.update_progress) self.progress_timer = QTimer(self)
self.progress_value = 0 self.progress_timer.timeout.connect(self.update_progress)
self.progress_value = 0
def update_status(self, status):
self.status = status def update_status(self, status):
self.status_label.setText(status) self.status = status
if status == "In Progress": self.status_label.setText(status)
# Only start timer if showing percentage progress if status == "In Progress":
if self.progress_bar.isVisible(): # Only start timer if showing percentage progress
self.progress_bar.setRange(0, 100) if self.progress_bar.isVisible():
self.progress_timer.start(100) self.progress_bar.setRange(0, 100)
elif status == "Completed": self.progress_timer.start(100)
self.progress_timer.stop() elif status == "Completed":
self.progress_bar.setValue(100) self.progress_timer.stop()
elif status == "Canceled": self.progress_bar.setValue(100)
self.progress_timer.stop() elif status == "Canceled":
self.progress_bar.setValue(0) self.progress_timer.stop()
self.progress_bar.setValue(0)
def set_error(self):
self.status = "Error" def set_error(self):
self.status_label.setText("Error") self.status = "Error"
self.status_label.setStyleSheet("color: red;") self.status_label.setText("Error")
self.progress_bar.setRange(0, 100) self.status_label.setStyleSheet("color: red;")
self.progress_timer.stop() self.progress_bar.setRange(0, 100)
self.progress_timer.stop()
def update_progress(self, value=None):
if value is not None: def update_progress(self, value=None):
# Update progress bar with specific value if value is not None:
self.progress_value = value # Update progress bar with specific value
self.progress_bar.setValue(self.progress_value) self.progress_value = value
else: self.progress_bar.setValue(self.progress_value)
# Increment progress bar for indeterminate progress else:
self.progress_value = (self.progress_value + 1) % 101 # Increment progress bar for indeterminate progress
self.progress_bar.setValue(self.progress_value) self.progress_value = (self.progress_value + 1) % 101
self.progress_bar.setValue(self.progress_value)

View File

@ -4392,4 +4392,4 @@ def main() -> None:
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@ -12,8 +12,8 @@
import numpy as np import numpy as np
import torch import torch
if 'NO_LOCAL_GGUF' not in os.environ: if "NO_LOCAL_GGUF" not in os.environ:
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf')) sys.path.insert(1, str(Path(__file__).parent / "gguf-py" / "gguf"))
import gguf import gguf
logging.basicConfig(level=logging.DEBUG) logging.basicConfig(level=logging.DEBUG)
@ -35,7 +35,9 @@ def write_file_header(fout: BinaryIO, params: dict[str, Any]) -> None:
fout.write(struct.pack("i", int(params["lora_alpha"]))) fout.write(struct.pack("i", int(params["lora_alpha"])))
def write_tensor_header(fout: BinaryIO, name: str, shape: Sequence[int], data_type: np.dtype[Any]) -> None: def write_tensor_header(
fout: BinaryIO, name: str, shape: Sequence[int], data_type: np.dtype[Any]
) -> None:
sname = name.encode("utf-8") sname = name.encode("utf-8")
fout.write( fout.write(
struct.pack( struct.pack(
@ -49,15 +51,21 @@ def write_tensor_header(fout: BinaryIO, name: str, shape: Sequence[int], data_ty
fout.write(sname) fout.write(sname)
fout.seek((fout.tell() + 31) & -32) fout.seek((fout.tell() + 31) & -32)
def pyinstaller_include(): def pyinstaller_include():
# PyInstaller import # PyInstaller import
pass pass
if __name__ == '__main__':
if __name__ == "__main__":
if len(sys.argv) < 2: if len(sys.argv) < 2:
logger.info(f"Usage: python {sys.argv[0]} <path> [arch]") logger.info(f"Usage: python {sys.argv[0]} <path> [arch]")
logger.info("Path must contain HuggingFace PEFT LoRA files 'adapter_config.json' and 'adapter_model.bin'") logger.info(
logger.info(f"Arch must be one of {list(gguf.MODEL_ARCH_NAMES.values())} (default: llama)") "Path must contain HuggingFace PEFT LoRA files 'adapter_config.json' and 'adapter_model.bin'"
)
logger.info(
f"Arch must be one of {list(gguf.MODEL_ARCH_NAMES.values())} (default: llama)"
)
sys.exit(1) sys.exit(1)
input_json = os.path.join(sys.argv[1], "adapter_config.json") input_json = os.path.join(sys.argv[1], "adapter_config.json")
@ -70,6 +78,7 @@ def pyinstaller_include():
input_model = os.path.join(sys.argv[1], "adapter_model.safetensors") input_model = os.path.join(sys.argv[1], "adapter_model.safetensors")
# lazy import load_file only if lora is in safetensors format. # lazy import load_file only if lora is in safetensors format.
from safetensors.torch import load_file from safetensors.torch import load_file
model = load_file(input_model, device="cpu") model = load_file(input_model, device="cpu")
arch_name = sys.argv[2] if len(sys.argv) == 3 else "llama" arch_name = sys.argv[2] if len(sys.argv) == 3 else "llama"
@ -78,14 +87,18 @@ def pyinstaller_include():
logger.error(f"Error: unsupported architecture {arch_name}") logger.error(f"Error: unsupported architecture {arch_name}")
sys.exit(1) sys.exit(1)
arch = list(gguf.MODEL_ARCH_NAMES.keys())[list(gguf.MODEL_ARCH_NAMES.values()).index(arch_name)] arch = list(gguf.MODEL_ARCH_NAMES.keys())[
name_map = gguf.TensorNameMap(arch, 200) # 200 layers ought to be enough for anyone list(gguf.MODEL_ARCH_NAMES.values()).index(arch_name)
]
name_map = gguf.TensorNameMap(arch, 200) # 200 layers ought to be enough for anyone
with open(input_json, "r") as f: with open(input_json, "r") as f:
params = json.load(f) params = json.load(f)
if params["peft_type"] != "LORA": if params["peft_type"] != "LORA":
logger.error(f"Error: unsupported adapter type {params['peft_type']}, expected LORA") logger.error(
f"Error: unsupported adapter type {params['peft_type']}, expected LORA"
)
sys.exit(1) sys.exit(1)
if params["fan_in_fan_out"] is True: if params["fan_in_fan_out"] is True:
@ -127,7 +140,7 @@ def pyinstaller_include():
lora_suffixes = (".lora_A.weight", ".lora_B.weight") lora_suffixes = (".lora_A.weight", ".lora_B.weight")
if k.endswith(lora_suffixes): if k.endswith(lora_suffixes):
suffix = k[-len(lora_suffixes[0]):] suffix = k[-len(lora_suffixes[0]) :]
k = k[: -len(lora_suffixes[0])] k = k[: -len(lora_suffixes[0])]
else: else:
logger.error(f"Error: unrecognized tensor name {orig_k}") logger.error(f"Error: unrecognized tensor name {orig_k}")
@ -136,7 +149,9 @@ def pyinstaller_include():
tname = name_map.get_name(k) tname = name_map.get_name(k)
if tname is None: if tname is None:
logger.error(f"Error: could not map tensor name {orig_k}") logger.error(f"Error: could not map tensor name {orig_k}")
logger.error(" Note: the arch parameter must be specified if the model is not llama") logger.error(
" Note: the arch parameter must be specified if the model is not llama"
)
sys.exit(1) sys.exit(1)
if suffix == ".lora_A.weight": if suffix == ".lora_A.weight":
@ -146,7 +161,9 @@ def pyinstaller_include():
else: else:
assert False assert False
logger.info(f"{k} => {tname} {t.shape} {t.dtype} {t.nbytes/1024/1024:.2f}MB") logger.info(
f"{k} => {tname} {t.shape} {t.dtype} {t.nbytes/1024/1024:.2f}MB"
)
write_tensor_header(fout, tname, t.shape, t.dtype) write_tensor_header(fout, tname, t.shape, t.dtype)
t.tofile(fout) t.tofile(fout)

File diff suppressed because it is too large Load Diff

View File

@ -67,32 +67,34 @@ class ReaderTensor(NamedTuple):
class GGUFReader: class GGUFReader:
# I - same as host, S - swapped # I - same as host, S - swapped
byte_order: Literal['I', 'S'] = 'I' byte_order: Literal["I", "S"] = "I"
alignment: int = GGUF_DEFAULT_ALIGNMENT alignment: int = GGUF_DEFAULT_ALIGNMENT
data_offset: int data_offset: int
# Note: Internal helper, API may change. # Note: Internal helper, API may change.
gguf_scalar_to_np: dict[GGUFValueType, type[np.generic]] = { gguf_scalar_to_np: dict[GGUFValueType, type[np.generic]] = {
GGUFValueType.UINT8: np.uint8, GGUFValueType.UINT8: np.uint8,
GGUFValueType.INT8: np.int8, GGUFValueType.INT8: np.int8,
GGUFValueType.UINT16: np.uint16, GGUFValueType.UINT16: np.uint16,
GGUFValueType.INT16: np.int16, GGUFValueType.INT16: np.int16,
GGUFValueType.UINT32: np.uint32, GGUFValueType.UINT32: np.uint32,
GGUFValueType.INT32: np.int32, GGUFValueType.INT32: np.int32,
GGUFValueType.FLOAT32: np.float32, GGUFValueType.FLOAT32: np.float32,
GGUFValueType.UINT64: np.uint64, GGUFValueType.UINT64: np.uint64,
GGUFValueType.INT64: np.int64, GGUFValueType.INT64: np.int64,
GGUFValueType.FLOAT64: np.float64, GGUFValueType.FLOAT64: np.float64,
GGUFValueType.BOOL: np.bool_, GGUFValueType.BOOL: np.bool_,
} }
def __init__(self, path: os.PathLike[str] | str, mode: Literal['r', 'r+', 'c'] = 'r'): def __init__(
self.data = np.memmap(path, mode = mode) self, path: os.PathLike[str] | str, mode: Literal["r", "r+", "c"] = "r"
):
self.data = np.memmap(path, mode=mode)
offs = 0 offs = 0
# Check for GGUF magic # Check for GGUF magic
if self._get(offs, np.uint32, override_order = '<')[0] != GGUF_MAGIC: if self._get(offs, np.uint32, override_order="<")[0] != GGUF_MAGIC:
raise ValueError('GGUF magic invalid') raise ValueError("GGUF magic invalid")
offs += 4 offs += 4
# Check GGUF version # Check GGUF version
@ -100,28 +102,46 @@ def __init__(self, path: os.PathLike[str] | str, mode: Literal['r', 'r+', 'c'] =
if temp_version[0] & 65535 == 0: if temp_version[0] & 65535 == 0:
# If we get 0 here that means it's (probably) a GGUF file created for # If we get 0 here that means it's (probably) a GGUF file created for
# the opposite byte order of the machine this script is running on. # the opposite byte order of the machine this script is running on.
self.byte_order = 'S' self.byte_order = "S"
temp_version = temp_version.newbyteorder(self.byte_order) temp_version = temp_version.newbyteorder(self.byte_order)
version = temp_version[0] version = temp_version[0]
if version not in READER_SUPPORTED_VERSIONS: if version not in READER_SUPPORTED_VERSIONS:
raise ValueError(f'Sorry, file appears to be version {version} which we cannot handle') raise ValueError(
f"Sorry, file appears to be version {version} which we cannot handle"
)
self.fields: OrderedDict[str, ReaderField] = OrderedDict() self.fields: OrderedDict[str, ReaderField] = OrderedDict()
self.tensors: list[ReaderTensor] = [] self.tensors: list[ReaderTensor] = []
offs += self._push_field(ReaderField(offs, 'GGUF.version', [temp_version], [0], [GGUFValueType.UINT32])) offs += self._push_field(
ReaderField(
offs, "GGUF.version", [temp_version], [0], [GGUFValueType.UINT32]
)
)
# Check tensor count and kv count # Check tensor count and kv count
temp_counts = self._get(offs, np.uint64, 2) temp_counts = self._get(offs, np.uint64, 2)
offs += self._push_field(ReaderField(offs, 'GGUF.tensor_count', [temp_counts[:1]], [0], [GGUFValueType.UINT64])) offs += self._push_field(
offs += self._push_field(ReaderField(offs, 'GGUF.kv_count', [temp_counts[1:]], [0], [GGUFValueType.UINT64])) ReaderField(
offs,
"GGUF.tensor_count",
[temp_counts[:1]],
[0],
[GGUFValueType.UINT64],
)
)
offs += self._push_field(
ReaderField(
offs, "GGUF.kv_count", [temp_counts[1:]], [0], [GGUFValueType.UINT64]
)
)
tensor_count, kv_count = temp_counts tensor_count, kv_count = temp_counts
offs = self._build_fields(offs, kv_count) offs = self._build_fields(offs, kv_count)
# Build Tensor Info Fields # Build Tensor Info Fields
offs, tensors_fields = self._build_tensor_info(offs, tensor_count) offs, tensors_fields = self._build_tensor_info(offs, tensor_count)
new_align = self.fields.get('general.alignment') new_align = self.fields.get("general.alignment")
if new_align is not None: if new_align is not None:
if new_align.types != [GGUFValueType.UINT32]: if new_align.types != [GGUFValueType.UINT32]:
raise ValueError('Bad type for general.alignment field') raise ValueError("Bad type for general.alignment field")
self.alignment = new_align.parts[-1][0] self.alignment = new_align.parts[-1][0]
padding = offs % self.alignment padding = offs % self.alignment
if padding != 0: if padding != 0:
@ -129,7 +149,7 @@ def __init__(self, path: os.PathLike[str] | str, mode: Literal['r', 'r+', 'c'] =
self.data_offset = offs self.data_offset = offs
self._build_tensors(offs, tensors_fields) self._build_tensors(offs, tensors_fields)
_DT = TypeVar('_DT', bound = npt.DTypeLike) _DT = TypeVar("_DT", bound=npt.DTypeLike)
# Fetch a key/value metadata field by key. # Fetch a key/value metadata field by key.
def get_field(self, key: str) -> Union[ReaderField, None]: def get_field(self, key: str) -> Union[ReaderField, None]:
@ -140,14 +160,18 @@ def get_tensor(self, idx: int) -> ReaderTensor:
return self.tensors[idx] return self.tensors[idx]
def _get( def _get(
self, offset: int, dtype: npt.DTypeLike, count: int = 1, override_order: None | Literal['I', 'S', '<'] = None, self,
offset: int,
dtype: npt.DTypeLike,
count: int = 1,
override_order: None | Literal["I", "S", "<"] = None,
) -> npt.NDArray[Any]: ) -> npt.NDArray[Any]:
count = int(count) count = int(count)
itemsize = int(np.empty([], dtype = dtype).itemsize) itemsize = int(np.empty([], dtype=dtype).itemsize)
end_offs = offset + itemsize * count end_offs = offset + itemsize * count
return ( return (
self.data[offset:end_offs] self.data[offset:end_offs]
.view(dtype = dtype)[:count] .view(dtype=dtype)[:count]
.newbyteorder(override_order or self.byte_order) .newbyteorder(override_order or self.byte_order)
) )
@ -156,18 +180,22 @@ def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int:
# TODO: add option to generate error on duplicate keys # TODO: add option to generate error on duplicate keys
# raise KeyError(f'Duplicate {field.name} already in list at offset {field.offset}') # raise KeyError(f'Duplicate {field.name} already in list at offset {field.offset}')
logger.warning(f'Duplicate key {field.name} at offset {field.offset}') logger.warning(f"Duplicate key {field.name} at offset {field.offset}")
self.fields[field.name + '_{}'.format(field.offset)] = field self.fields[field.name + "_{}".format(field.offset)] = field
else: else:
self.fields[field.name] = field self.fields[field.name] = field
return 0 if skip_sum else sum(int(part.nbytes) for part in field.parts) return 0 if skip_sum else sum(int(part.nbytes) for part in field.parts)
def _get_str(self, offset: int) -> tuple[npt.NDArray[np.uint64], npt.NDArray[np.uint8]]: def _get_str(
self, offset: int
) -> tuple[npt.NDArray[np.uint64], npt.NDArray[np.uint8]]:
slen = self._get(offset, np.uint64) slen = self._get(offset, np.uint64)
return slen, self._get(offset + 8, np.uint8, slen[0]) return slen, self._get(offset + 8, np.uint8, slen[0])
def _get_field_parts( def _get_field_parts(
self, orig_offs: int, raw_type: int, self,
orig_offs: int,
raw_type: int,
) -> tuple[int, list[npt.NDArray[Any]], list[int], list[GGUFValueType]]: ) -> tuple[int, list[npt.NDArray[Any]], list[int], list[GGUFValueType]]:
offs = orig_offs offs = orig_offs
types: list[GGUFValueType] = [] types: list[GGUFValueType] = []
@ -192,7 +220,9 @@ def _get_field_parts(
aparts: list[npt.NDArray[Any]] = [raw_itype, alen] aparts: list[npt.NDArray[Any]] = [raw_itype, alen]
data_idxs: list[int] = [] data_idxs: list[int] = []
for idx in range(alen[0]): for idx in range(alen[0]):
curr_size, curr_parts, curr_idxs, curr_types = self._get_field_parts(offs, raw_itype[0]) curr_size, curr_parts, curr_idxs, curr_types = self._get_field_parts(
offs, raw_itype[0]
)
if idx == 0: if idx == 0:
types += curr_types types += curr_types
idxs_offs = len(aparts) idxs_offs = len(aparts)
@ -201,7 +231,7 @@ def _get_field_parts(
offs += curr_size offs += curr_size
return offs - orig_offs, aparts, data_idxs, types return offs - orig_offs, aparts, data_idxs, types
# We can't deal with this one. # We can't deal with this one.
raise ValueError('Unknown/unhandled field type {gtype}') raise ValueError("Unknown/unhandled field type {gtype}")
def _get_tensor_info_field(self, orig_offs: int) -> ReaderField: def _get_tensor_info_field(self, orig_offs: int) -> ReaderField:
offs = orig_offs offs = orig_offs
@ -228,7 +258,7 @@ def _get_tensor_info_field(self, orig_offs: int) -> ReaderField:
return ReaderField( return ReaderField(
orig_offs, orig_offs,
str(bytes(name_data), encoding = 'utf-8'), str(bytes(name_data), encoding="utf-8"),
[name_len, name_data, n_dims, dims, raw_dtype, offset_tensor], [name_len, name_data, n_dims, dims, raw_dtype, offset_tensor],
[1, 3, 4, 5], [1, 3, 4, 5],
) )
@ -242,19 +272,26 @@ def _build_fields(self, offs: int, count: int) -> int:
offs += int(raw_kv_type.nbytes) offs += int(raw_kv_type.nbytes)
parts: list[npt.NDArray[Any]] = [kv_klen, kv_kdata, raw_kv_type] parts: list[npt.NDArray[Any]] = [kv_klen, kv_kdata, raw_kv_type]
idxs_offs = len(parts) idxs_offs = len(parts)
field_size, field_parts, field_idxs, field_types = self._get_field_parts(offs, raw_kv_type[0]) field_size, field_parts, field_idxs, field_types = self._get_field_parts(
offs, raw_kv_type[0]
)
parts += field_parts parts += field_parts
self._push_field(ReaderField( self._push_field(
orig_offs, ReaderField(
str(bytes(kv_kdata), encoding = 'utf-8'), orig_offs,
parts, str(bytes(kv_kdata), encoding="utf-8"),
[idx + idxs_offs for idx in field_idxs], parts,
field_types, [idx + idxs_offs for idx in field_idxs],
), skip_sum = True) field_types,
),
skip_sum=True,
)
offs += field_size offs += field_size
return offs return offs
def _build_tensor_info(self, offs: int, count: int) -> tuple[int, list[ReaderField]]: def _build_tensor_info(
self, offs: int, count: int
) -> tuple[int, list[ReaderField]]:
tensor_fields = [] tensor_fields = []
for _ in range(count): for _ in range(count):
field = self._get_tensor_info_field(offs) field = self._get_tensor_info_field(offs)
@ -264,13 +301,13 @@ def _build_tensor_info(self, offs: int, count: int) -> tuple[int, list[ReaderFie
def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None: def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None:
tensors = [] tensors = []
tensor_names = set() # keep track of name to prevent duplicated tensors tensor_names = set() # keep track of name to prevent duplicated tensors
for field in fields: for field in fields:
_name_len, name_data, _n_dims, dims, raw_dtype, offset_tensor = field.parts _name_len, name_data, _n_dims, dims, raw_dtype, offset_tensor = field.parts
# check if there's any tensor having same name already in the list # check if there's any tensor having same name already in the list
tensor_name = str(bytes(name_data), encoding = 'utf-8') tensor_name = str(bytes(name_data), encoding="utf-8")
if tensor_name in tensor_names: if tensor_name in tensor_names:
raise ValueError(f'Found duplicated tensor with name {tensor_name}') raise ValueError(f"Found duplicated tensor with name {tensor_name}")
tensor_names.add(tensor_name) tensor_names.add(tensor_name)
ggml_type = GGMLQuantizationType(raw_dtype[0]) ggml_type = GGMLQuantizationType(raw_dtype[0])
n_elems = int(np.prod(dims)) n_elems = int(np.prod(dims))
@ -304,14 +341,16 @@ def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None:
item_count = n_bytes item_count = n_bytes
item_type = np.uint8 item_type = np.uint8
np_dims = quant_shape_to_byte_shape(np_dims, ggml_type) np_dims = quant_shape_to_byte_shape(np_dims, ggml_type)
tensors.append(ReaderTensor( tensors.append(
name = tensor_name, ReaderTensor(
tensor_type = ggml_type, name=tensor_name,
shape = dims, tensor_type=ggml_type,
n_elements = n_elems, shape=dims,
n_bytes = n_bytes, n_elements=n_elems,
data_offset = data_offs, n_bytes=n_bytes,
data = self._get(data_offs, item_type, item_count).reshape(np_dims), data_offset=data_offs,
field = field, data=self._get(data_offs, item_type, item_count).reshape(np_dims),
)) field=field,
)
)
self.tensors = tensors self.tensors = tensors

View File

@ -52,8 +52,8 @@ class GGUFValue:
class WriterState(Enum): class WriterState(Enum):
NO_FILE = auto() NO_FILE = auto()
EMPTY = auto() EMPTY = auto()
HEADER = auto() HEADER = auto()
KV_DATA = auto() KV_DATA = auto()
TI_DATA = auto() TI_DATA = auto()
WEIGHTS = auto() WEIGHTS = auto()
@ -67,22 +67,29 @@ class GGUFWriter:
kv_data: list[dict[str, GGUFValue]] kv_data: list[dict[str, GGUFValue]]
state: WriterState state: WriterState
_simple_value_packing = { _simple_value_packing = {
GGUFValueType.UINT8: "B", GGUFValueType.UINT8: "B",
GGUFValueType.INT8: "b", GGUFValueType.INT8: "b",
GGUFValueType.UINT16: "H", GGUFValueType.UINT16: "H",
GGUFValueType.INT16: "h", GGUFValueType.INT16: "h",
GGUFValueType.UINT32: "I", GGUFValueType.UINT32: "I",
GGUFValueType.INT32: "i", GGUFValueType.INT32: "i",
GGUFValueType.FLOAT32: "f", GGUFValueType.FLOAT32: "f",
GGUFValueType.UINT64: "Q", GGUFValueType.UINT64: "Q",
GGUFValueType.INT64: "q", GGUFValueType.INT64: "q",
GGUFValueType.FLOAT64: "d", GGUFValueType.FLOAT64: "d",
GGUFValueType.BOOL: "?", GGUFValueType.BOOL: "?",
} }
def __init__( def __init__(
self, path: os.PathLike[str] | str | None, arch: str, use_temp_file: bool = False, endianess: GGUFEndian = GGUFEndian.LITTLE, self,
split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False, small_first_shard: bool = False path: os.PathLike[str] | str | None,
arch: str,
use_temp_file: bool = False,
endianess: GGUFEndian = GGUFEndian.LITTLE,
split_max_tensors: int = 0,
split_max_size: int = 0,
dry_run: bool = False,
small_first_shard: bool = False,
): ):
self.fout = None self.fout = None
self.path = Path(path) if path else None self.path = Path(path) if path else None
@ -97,9 +104,11 @@ def __init__(
self.split_max_size = split_max_size self.split_max_size = split_max_size
self.dry_run = dry_run self.dry_run = dry_run
self.small_first_shard = small_first_shard self.small_first_shard = small_first_shard
logger.info("gguf: This GGUF file is for {0} Endian only".format( logger.info(
"Big" if self.endianess == GGUFEndian.BIG else "Little", "gguf: This GGUF file is for {0} Endian only".format(
)) "Big" if self.endianess == GGUFEndian.BIG else "Little",
)
)
self.state = WriterState.NO_FILE self.state = WriterState.NO_FILE
if self.small_first_shard: if self.small_first_shard:
@ -128,7 +137,9 @@ def get_total_parameter_count(self) -> tuple[int, int, int, int]:
elif name.endswith(".lora_b"): elif name.endswith(".lora_b"):
if last_lora_a is None or last_lora_a[0] != name[:-1] + "a": if last_lora_a is None or last_lora_a[0] != name[:-1] + "a":
# Bail when the LoRA pair can't be found trivially # Bail when the LoRA pair can't be found trivially
logger.warning("can't measure LoRA size correctly, tensor order is unusual") logger.warning(
"can't measure LoRA size correctly, tensor order is unusual"
)
return 0, 0, 0, 0 return 0, 0, 0, 0
else: else:
shape = (*shape[:-1], last_lora_a[1].shape[-1]) shape = (*shape[:-1], last_lora_a[1].shape[-1])
@ -136,7 +147,7 @@ def get_total_parameter_count(self) -> tuple[int, int, int, int]:
size = prod(shape) size = prod(shape)
if "_exps." in name: if "_exps." in name:
expert_params += (size // shape[-3]) expert_params += size // shape[-3]
expert_sum += shape[-3] expert_sum += shape[-3]
n_expert_tensors += 1 n_expert_tensors += 1
else: else:
@ -157,15 +168,26 @@ def get_total_parameter_count(self) -> tuple[int, int, int, int]:
def format_shard_names(self, path: Path) -> list[Path]: def format_shard_names(self, path: Path) -> list[Path]:
if len(self.tensors) == 1: if len(self.tensors) == 1:
return [path] return [path]
return [path.with_name(SHARD_NAME_FORMAT.format(path.stem, i + 1, len(self.tensors))) for i in range(len(self.tensors))] return [
path.with_name(
SHARD_NAME_FORMAT.format(path.stem, i + 1, len(self.tensors))
)
for i in range(len(self.tensors))
]
def open_output_file(self, path: Path | None = None) -> None: def open_output_file(self, path: Path | None = None) -> None:
if self.state is WriterState.EMPTY and self.fout is not None and (path is None or path == self.path): if (
self.state is WriterState.EMPTY
and self.fout is not None
and (path is None or path == self.path)
):
# allow calling this multiple times as long as the path is the same # allow calling this multiple times as long as the path is the same
return return
if self.state is not WriterState.NO_FILE: if self.state is not WriterState.NO_FILE:
raise ValueError(f'Expected output file to be not yet opened, got {self.state}') raise ValueError(
f"Expected output file to be not yet opened, got {self.state}"
)
if path is not None: if path is not None:
self.path = path self.path = path
@ -181,7 +203,9 @@ def print_plan(self) -> list[Path]:
filenames = self.format_shard_names(self.path) filenames = self.format_shard_names(self.path)
assert len(filenames) == len(self.tensors) assert len(filenames) == len(self.tensors)
for name, tensors in zip(filenames, self.tensors): for name, tensors in zip(filenames, self.tensors):
logger.info(f"{name}: n_tensors = {len(tensors)}, total_size = {GGUFWriter.format_n_bytes_to_str(sum(ti.nbytes for ti in tensors.values()))}") logger.info(
f"{name}: n_tensors = {len(tensors)}, total_size = {GGUFWriter.format_n_bytes_to_str(sum(ti.nbytes for ti in tensors.values()))}"
)
if self.dry_run: if self.dry_run:
logger.info("Dry run, not writing files") logger.info("Dry run, not writing files")
@ -201,17 +225,23 @@ def add_shard_kv_data(self) -> None:
self.kv_data.extend({} for _ in range(len(self.kv_data), total_splits)) self.kv_data.extend({} for _ in range(len(self.kv_data), total_splits))
for i, kv_data in enumerate(self.kv_data): for i, kv_data in enumerate(self.kv_data):
kv_data[Keys.Split.LLM_KV_SPLIT_NO] = GGUFValue(i, GGUFValueType.UINT16) kv_data[Keys.Split.LLM_KV_SPLIT_NO] = GGUFValue(i, GGUFValueType.UINT16)
kv_data[Keys.Split.LLM_KV_SPLIT_COUNT] = GGUFValue(total_splits, GGUFValueType.UINT16) kv_data[Keys.Split.LLM_KV_SPLIT_COUNT] = GGUFValue(
kv_data[Keys.Split.LLM_KV_SPLIT_TENSORS_COUNT] = GGUFValue(total_tensors, GGUFValueType.INT32) total_splits, GGUFValueType.UINT16
)
kv_data[Keys.Split.LLM_KV_SPLIT_TENSORS_COUNT] = GGUFValue(
total_tensors, GGUFValueType.INT32
)
def write_header_to_file(self, path: Path | None = None) -> None: def write_header_to_file(self, path: Path | None = None) -> None:
if len(self.tensors) == 1 and (self.split_max_tensors != 0 or self.split_max_size != 0): if len(self.tensors) == 1 and (
self.split_max_tensors != 0 or self.split_max_size != 0
):
logger.warning("Model fails split requirements, not splitting") logger.warning("Model fails split requirements, not splitting")
self.open_output_file(path) self.open_output_file(path)
if self.state is not WriterState.EMPTY: if self.state is not WriterState.EMPTY:
raise ValueError(f'Expected output file to be empty, got {self.state}') raise ValueError(f"Expected output file to be empty, got {self.state}")
assert self.fout is not None assert self.fout is not None
assert len(self.fout) == len(self.tensors) assert len(self.fout) == len(self.tensors)
@ -220,7 +250,7 @@ def write_header_to_file(self, path: Path | None = None) -> None:
self.add_shard_kv_data() self.add_shard_kv_data()
for fout, tensors, kv_data in zip(self.fout, self.tensors, self.kv_data): for fout, tensors, kv_data in zip(self.fout, self.tensors, self.kv_data):
fout.write(self._pack("<I", GGUF_MAGIC, skip_pack_prefix = True)) fout.write(self._pack("<I", GGUF_MAGIC, skip_pack_prefix=True))
fout.write(self._pack("I", GGUF_VERSION)) fout.write(self._pack("I", GGUF_VERSION))
fout.write(self._pack("Q", len(tensors))) fout.write(self._pack("Q", len(tensors)))
fout.write(self._pack("Q", len(kv_data))) fout.write(self._pack("Q", len(kv_data)))
@ -229,7 +259,9 @@ def write_header_to_file(self, path: Path | None = None) -> None:
def write_kv_data_to_file(self) -> None: def write_kv_data_to_file(self) -> None:
if self.state is not WriterState.HEADER: if self.state is not WriterState.HEADER:
raise ValueError(f'Expected output file to contain the header, got {self.state}') raise ValueError(
f"Expected output file to contain the header, got {self.state}"
)
assert self.fout is not None assert self.fout is not None
for fout, kv_data in zip(self.fout, self.kv_data): for fout, kv_data in zip(self.fout, self.kv_data):
@ -246,7 +278,9 @@ def write_kv_data_to_file(self) -> None:
def write_ti_data_to_file(self) -> None: def write_ti_data_to_file(self) -> None:
if self.state is not WriterState.KV_DATA: if self.state is not WriterState.KV_DATA:
raise ValueError(f'Expected output file to contain KV data, got {self.state}') raise ValueError(
f"Expected output file to contain KV data, got {self.state}"
)
assert self.fout is not None assert self.fout is not None
for fout, tensors in zip(self.fout, self.tensors): for fout, tensors in zip(self.fout, self.tensors):
@ -269,12 +303,12 @@ def write_ti_data_to_file(self) -> None:
def add_key_value(self, key: str, val: Any, vtype: GGUFValueType) -> None: def add_key_value(self, key: str, val: Any, vtype: GGUFValueType) -> None:
if any(key in kv_data for kv_data in self.kv_data): if any(key in kv_data for kv_data in self.kv_data):
raise ValueError(f'Duplicated key name {key!r}') raise ValueError(f"Duplicated key name {key!r}")
self.kv_data[0][key] = GGUFValue(value=val, type=vtype) self.kv_data[0][key] = GGUFValue(value=val, type=vtype)
def add_uint8(self, key: str, val: int) -> None: def add_uint8(self, key: str, val: int) -> None:
self.add_key_value(key,val, GGUFValueType.UINT8) self.add_key_value(key, val, GGUFValueType.UINT8)
def add_int8(self, key: str, val: int) -> None: def add_int8(self, key: str, val: int) -> None:
self.add_key_value(key, val, GGUFValueType.INT8) self.add_key_value(key, val, GGUFValueType.INT8)
@ -321,14 +355,20 @@ def ggml_pad(x: int, n: int) -> int:
return ((x + n - 1) // n) * n return ((x + n - 1) // n) * n
def add_tensor_info( def add_tensor_info(
self, name: str, tensor_shape: Sequence[int], tensor_dtype: np.dtype, self,
tensor_nbytes: int, raw_dtype: GGMLQuantizationType | None = None, name: str,
tensor_shape: Sequence[int],
tensor_dtype: np.dtype,
tensor_nbytes: int,
raw_dtype: GGMLQuantizationType | None = None,
) -> None: ) -> None:
if self.state is not WriterState.NO_FILE: if self.state is not WriterState.NO_FILE:
raise ValueError(f'Expected output file to be not yet opened, got {self.state}') raise ValueError(
f"Expected output file to be not yet opened, got {self.state}"
)
if any(name in tensors for tensors in self.tensors): if any(name in tensors for tensors in self.tensors):
raise ValueError(f'Duplicated tensor name {name!r}') raise ValueError(f"Duplicated tensor name {name!r}")
if raw_dtype is None: if raw_dtype is None:
if tensor_dtype == np.float16: if tensor_dtype == np.float16:
@ -346,7 +386,9 @@ def add_tensor_info(
elif tensor_dtype == np.int64: elif tensor_dtype == np.int64:
dtype = GGMLQuantizationType.I64 dtype = GGMLQuantizationType.I64
else: else:
raise ValueError("Only F16, F32, F64, I8, I16, I32, I64 tensors are supported for now") raise ValueError(
"Only F16, F32, F64, I8, I16, I32, I64 tensors are supported for now"
)
else: else:
dtype = raw_dtype dtype = raw_dtype
if tensor_dtype == np.uint8: if tensor_dtype == np.uint8:
@ -357,16 +399,22 @@ def add_tensor_info(
if ( # split when over tensor limit if ( # split when over tensor limit
self.split_max_tensors != 0 self.split_max_tensors != 0
and len(self.tensors[-1]) >= self.split_max_tensors and len(self.tensors[-1]) >= self.split_max_tensors
) or ( # split when over size limit ) or ( # split when over size limit
self.split_max_size != 0 self.split_max_size != 0
and sum(ti.nbytes for ti in self.tensors[-1].values()) + tensor_nbytes > self.split_max_size and sum(ti.nbytes for ti in self.tensors[-1].values()) + tensor_nbytes
> self.split_max_size
): ):
self.tensors.append({}) self.tensors.append({})
self.tensors[-1][name] = TensorInfo(shape=tensor_shape, dtype=dtype, nbytes=tensor_nbytes) self.tensors[-1][name] = TensorInfo(
shape=tensor_shape, dtype=dtype, nbytes=tensor_nbytes
)
def add_tensor( def add_tensor(
self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequence[int] | None = None, self,
name: str,
tensor: np.ndarray[Any, Any],
raw_shape: Sequence[int] | None = None,
raw_dtype: GGMLQuantizationType | None = None, raw_dtype: GGMLQuantizationType | None = None,
) -> None: ) -> None:
if self.endianess == GGUFEndian.BIG: if self.endianess == GGUFEndian.BIG:
@ -377,7 +425,9 @@ def add_tensor(
self.temp_file = fp self.temp_file = fp
shape: Sequence[int] = raw_shape if raw_shape is not None else tensor.shape shape: Sequence[int] = raw_shape if raw_shape is not None else tensor.shape
self.add_tensor_info(name, shape, tensor.dtype, tensor.nbytes, raw_dtype=raw_dtype) self.add_tensor_info(
name, shape, tensor.dtype, tensor.nbytes, raw_dtype=raw_dtype
)
if self.temp_file is None: if self.temp_file is None:
self.tensors[-1][name].tensor = tensor self.tensors[-1][name].tensor = tensor
@ -387,13 +437,21 @@ def add_tensor(
self.write_padding(self.temp_file, tensor.nbytes) self.write_padding(self.temp_file, tensor.nbytes)
def write_padding(self, fp: IO[bytes], n: int, align: int | None = None) -> None: def write_padding(self, fp: IO[bytes], n: int, align: int | None = None) -> None:
pad = GGUFWriter.ggml_pad(n, align if align is not None else self.data_alignment) - n pad = (
GGUFWriter.ggml_pad(n, align if align is not None else self.data_alignment)
- n
)
if pad != 0: if pad != 0:
fp.write(bytes([0] * pad)) fp.write(bytes([0] * pad))
def write_tensor_data(self, tensor: np.ndarray[Any, Any]) -> None: def write_tensor_data(self, tensor: np.ndarray[Any, Any]) -> None:
if self.state is not WriterState.TI_DATA and self.state is not WriterState.WEIGHTS: if (
raise ValueError(f'Expected output file to contain tensor info or weights, got {self.state}') self.state is not WriterState.TI_DATA
and self.state is not WriterState.WEIGHTS
):
raise ValueError(
f"Expected output file to contain tensor info or weights, got {self.state}"
)
assert self.fout is not None assert self.fout is not None
if self.endianess == GGUFEndian.BIG: if self.endianess == GGUFEndian.BIG:
@ -409,7 +467,9 @@ def write_tensor_data(self, tensor: np.ndarray[Any, Any]) -> None:
# pop the first tensor info # pop the first tensor info
# TODO: cleaner way to get the first key # TODO: cleaner way to get the first key
first_tensor_name = [name for name, _ in zip(self.tensors[file_id].keys(), range(1))][0] first_tensor_name = [
name for name, _ in zip(self.tensors[file_id].keys(), range(1))
][0]
ti = self.tensors[file_id].pop(first_tensor_name) ti = self.tensors[file_id].pop(first_tensor_name)
assert ti.nbytes == tensor.nbytes assert ti.nbytes == tensor.nbytes
@ -437,8 +497,15 @@ def write_tensors_to_file(self, *, progress: bool = False) -> None:
total_bytes = sum(ti.nbytes for t in self.tensors for ti in t.values()) total_bytes = sum(ti.nbytes for t in self.tensors for ti in t.values())
if len(self.fout) > 1: if len(self.fout) > 1:
shard_bar = tqdm(desc=f"Shard (0/{len(self.fout)})", total=None, unit="byte", unit_scale=True) shard_bar = tqdm(
bar = tqdm(desc="Writing", total=total_bytes, unit="byte", unit_scale=True) desc=f"Shard (0/{len(self.fout)})",
total=None,
unit="byte",
unit_scale=True,
)
bar = tqdm(
desc="Writing", total=total_bytes, unit="byte", unit_scale=True
)
for i, (fout, tensors) in enumerate(zip(self.fout, self.tensors)): for i, (fout, tensors) in enumerate(zip(self.fout, self.tensors)):
if shard_bar is not None: if shard_bar is not None:
@ -448,7 +515,9 @@ def write_tensors_to_file(self, *, progress: bool = False) -> None:
# relying on the fact that Python dicts preserve insertion order (since 3.7) # relying on the fact that Python dicts preserve insertion order (since 3.7)
for ti in tensors.values(): for ti in tensors.values():
assert ti.tensor is not None # can only iterate once over the tensors assert (
ti.tensor is not None
) # can only iterate once over the tensors
assert ti.tensor.nbytes == ti.nbytes assert ti.tensor.nbytes == ti.nbytes
ti.tensor.tofile(fout) ti.tensor.tofile(fout)
if shard_bar is not None: if shard_bar is not None:
@ -460,7 +529,9 @@ def write_tensors_to_file(self, *, progress: bool = False) -> None:
else: else:
self.temp_file.seek(0) self.temp_file.seek(0)
shutil.copyfileobj(self.temp_file, self.fout[0 if not self.small_first_shard else 1]) shutil.copyfileobj(
self.temp_file, self.fout[0 if not self.small_first_shard else 1]
)
self.flush() self.flush()
self.temp_file.close() self.temp_file.close()
@ -566,7 +637,9 @@ def add_base_model_version(self, source_id: int, version: str) -> None:
self.add_string(Keys.General.BASE_MODEL_VERSION.format(id=source_id), version) self.add_string(Keys.General.BASE_MODEL_VERSION.format(id=source_id), version)
def add_base_model_organization(self, source_id: int, organization: str) -> None: def add_base_model_organization(self, source_id: int, organization: str) -> None:
self.add_string(Keys.General.BASE_MODEL_ORGANIZATION.format(id=source_id), organization) self.add_string(
Keys.General.BASE_MODEL_ORGANIZATION.format(id=source_id), organization
)
def add_base_model_url(self, source_id: int, url: str) -> None: def add_base_model_url(self, source_id: int, url: str) -> None:
self.add_string(Keys.General.BASE_MODEL_URL.format(id=source_id), url) self.add_string(Keys.General.BASE_MODEL_URL.format(id=source_id), url)
@ -605,7 +678,9 @@ def add_block_count(self, length: int) -> None:
self.add_uint32(Keys.LLM.BLOCK_COUNT.format(arch=self.arch), length) self.add_uint32(Keys.LLM.BLOCK_COUNT.format(arch=self.arch), length)
def add_leading_dense_block_count(self, length: int) -> None: def add_leading_dense_block_count(self, length: int) -> None:
self.add_uint32(Keys.LLM.LEADING_DENSE_BLOCK_COUNT.format(arch=self.arch), length) self.add_uint32(
Keys.LLM.LEADING_DENSE_BLOCK_COUNT.format(arch=self.arch), length
)
def add_feed_forward_length(self, length: int | Sequence[int]) -> None: def add_feed_forward_length(self, length: int | Sequence[int]) -> None:
if isinstance(length, int): if isinstance(length, int):
@ -614,10 +689,14 @@ def add_feed_forward_length(self, length: int | Sequence[int]) -> None:
self.add_array(Keys.LLM.FEED_FORWARD_LENGTH.format(arch=self.arch), length) self.add_array(Keys.LLM.FEED_FORWARD_LENGTH.format(arch=self.arch), length)
def add_expert_feed_forward_length(self, length: int) -> None: def add_expert_feed_forward_length(self, length: int) -> None:
self.add_uint32(Keys.LLM.EXPERT_FEED_FORWARD_LENGTH.format(arch=self.arch), length) self.add_uint32(
Keys.LLM.EXPERT_FEED_FORWARD_LENGTH.format(arch=self.arch), length
)
def add_expert_shared_feed_forward_length(self, length: int) -> None: def add_expert_shared_feed_forward_length(self, length: int) -> None:
self.add_uint32(Keys.LLM.EXPERT_SHARED_FEED_FORWARD_LENGTH.format(arch=self.arch), length) self.add_uint32(
Keys.LLM.EXPERT_SHARED_FEED_FORWARD_LENGTH.format(arch=self.arch), length
)
def add_parallel_residual(self, use: bool) -> None: def add_parallel_residual(self, use: bool) -> None:
self.add_bool(Keys.LLM.USE_PARALLEL_RESIDUAL.format(arch=self.arch), use) self.add_bool(Keys.LLM.USE_PARALLEL_RESIDUAL.format(arch=self.arch), use)
@ -736,10 +815,14 @@ def add_tokenizer_model(self, model: str) -> None:
def add_tokenizer_pre(self, pre: str) -> None: def add_tokenizer_pre(self, pre: str) -> None:
self.add_string(Keys.Tokenizer.PRE, pre) self.add_string(Keys.Tokenizer.PRE, pre)
def add_token_list(self, tokens: Sequence[str] | Sequence[bytes] | Sequence[bytearray]) -> None: def add_token_list(
self, tokens: Sequence[str] | Sequence[bytes] | Sequence[bytearray]
) -> None:
self.add_array(Keys.Tokenizer.LIST, tokens) self.add_array(Keys.Tokenizer.LIST, tokens)
def add_token_merges(self, merges: Sequence[str] | Sequence[bytes] | Sequence[bytearray]) -> None: def add_token_merges(
self, merges: Sequence[str] | Sequence[bytes] | Sequence[bytearray]
) -> None:
self.add_array(Keys.Tokenizer.MERGES, merges) self.add_array(Keys.Tokenizer.MERGES, merges)
def add_token_types(self, types: Sequence[TokenType] | Sequence[int]) -> None: def add_token_types(self, types: Sequence[TokenType] | Sequence[int]) -> None:
@ -793,18 +876,22 @@ def add_chat_template(self, value: str | Sequence[Mapping[str, str]]) -> None:
template_names = set() template_names = set()
for choice in value: for choice in value:
name = choice.get('name', '') name = choice.get("name", "")
template = choice.get('template') template = choice.get("template")
# Allowing non-alphanumerical characters in template name is probably not a good idea, so filter it # Allowing non-alphanumerical characters in template name is probably not a good idea, so filter it
name = ''.join((c if c in ascii_letters + digits else '_' for c in name)) name = "".join(
(c if c in ascii_letters + digits else "_" for c in name)
)
if name and template is not None: if name and template is not None:
if name == 'default': if name == "default":
template_default = template template_default = template
else: else:
template_names.add(name) template_names.add(name)
self.add_string(Keys.Tokenizer.CHAT_TEMPLATE_N.format(name=name), template) self.add_string(
Keys.Tokenizer.CHAT_TEMPLATE_N.format(name=name), template
)
if template_names: if template_names:
self.add_array(Keys.Tokenizer.CHAT_TEMPLATES, list(template_names)) self.add_array(Keys.Tokenizer.CHAT_TEMPLATES, list(template_names))
@ -829,10 +916,10 @@ def add_eot_token_id(self, id: int) -> None:
self.add_uint32(Keys.Tokenizer.EOT_ID, id) self.add_uint32(Keys.Tokenizer.EOT_ID, id)
def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes: def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes:
pack_prefix = '' pack_prefix = ""
if not skip_pack_prefix: if not skip_pack_prefix:
pack_prefix = '<' if self.endianess == GGUFEndian.LITTLE else '>' pack_prefix = "<" if self.endianess == GGUFEndian.LITTLE else ">"
return struct.pack(f'{pack_prefix}{fmt}', value) return struct.pack(f"{pack_prefix}{fmt}", value)
def _pack_val(self, val: Any, vtype: GGUFValueType, add_vtype: bool) -> bytes: def _pack_val(self, val: Any, vtype: GGUFValueType, add_vtype: bool) -> bytes:
kv_data = bytearray() kv_data = bytearray()
@ -842,7 +929,9 @@ def _pack_val(self, val: Any, vtype: GGUFValueType, add_vtype: bool) -> bytes:
pack_fmt = self._simple_value_packing.get(vtype) pack_fmt = self._simple_value_packing.get(vtype)
if pack_fmt is not None: if pack_fmt is not None:
kv_data += self._pack(pack_fmt, val, skip_pack_prefix = vtype == GGUFValueType.BOOL) kv_data += self._pack(
pack_fmt, val, skip_pack_prefix=vtype == GGUFValueType.BOOL
)
elif vtype == GGUFValueType.STRING: elif vtype == GGUFValueType.STRING:
encoded_val = val.encode("utf-8") if isinstance(val, str) else val encoded_val = val.encode("utf-8") if isinstance(val, str) else val
kv_data += self._pack("Q", len(encoded_val)) kv_data += self._pack("Q", len(encoded_val))
@ -860,7 +949,9 @@ def _pack_val(self, val: Any, vtype: GGUFValueType, add_vtype: bool) -> bytes:
else: else:
ltype = GGUFValueType.get_type(val[0]) ltype = GGUFValueType.get_type(val[0])
if not all(GGUFValueType.get_type(i) is ltype for i in val[1:]): if not all(GGUFValueType.get_type(i) is ltype for i in val[1:]):
raise ValueError("All items in a GGUF array should be of the same type") raise ValueError(
"All items in a GGUF array should be of the same type"
)
kv_data += self._pack("I", ltype) kv_data += self._pack("I", ltype)
kv_data += self._pack("Q", len(val)) kv_data += self._pack("Q", len(val))
for item in val: for item in val:

View File

@ -13,7 +13,9 @@
class LazyMeta(ABCMeta): class LazyMeta(ABCMeta):
def __new__(cls, name: str, bases: tuple[type, ...], namespace: dict[str, Any], **kwargs): def __new__(
cls, name: str, bases: tuple[type, ...], namespace: dict[str, Any], **kwargs
):
def __getattr__(self, name: str) -> Any: def __getattr__(self, name: str) -> Any:
meta_attr = getattr(self._meta, name) meta_attr = getattr(self._meta, name)
if callable(meta_attr): if callable(meta_attr):
@ -41,6 +43,7 @@ def wrapped_special_op(self, *args, **kwargs):
getattr(type(self)._tensor_type, op_name), getattr(type(self)._tensor_type, op_name),
meta_noop=meta_noop, meta_noop=meta_noop,
)(self, *args, **kwargs) )(self, *args, **kwargs)
return wrapped_special_op return wrapped_special_op
# special methods bypass __getattr__, so they need to be added manually # special methods bypass __getattr__, so they need to be added manually
@ -48,11 +51,48 @@ def wrapped_special_op(self, *args, **kwargs):
# NOTE: doing this from a metaclass is very convenient # NOTE: doing this from a metaclass is very convenient
# TODO: make this even more comprehensive # TODO: make this even more comprehensive
for binary_op in ( for binary_op in (
"lt", "le", "eq", "ne", "ge", "gt", "not" "lt",
"abs", "add", "and", "floordiv", "invert", "lshift", "mod", "mul", "matmul", "le",
"neg", "or", "pos", "pow", "rshift", "sub", "truediv", "xor", "eq",
"iadd", "iand", "ifloordiv", "ilshift", "imod", "imul", "ior", "irshift", "isub", "ixor", "ne",
"radd", "rand", "rfloordiv", "rmul", "ror", "rpow", "rsub", "rtruediv", "rxor", "ge",
"gt",
"not" "abs",
"add",
"and",
"floordiv",
"invert",
"lshift",
"mod",
"mul",
"matmul",
"neg",
"or",
"pos",
"pow",
"rshift",
"sub",
"truediv",
"xor",
"iadd",
"iand",
"ifloordiv",
"ilshift",
"imod",
"imul",
"ior",
"irshift",
"isub",
"ixor",
"radd",
"rand",
"rfloordiv",
"rmul",
"ror",
"rpow",
"rsub",
"rtruediv",
"rxor",
): ):
attr_name = f"__{binary_op}__" attr_name = f"__{binary_op}__"
# the result of these operators usually has the same shape and dtype as the input, # the result of these operators usually has the same shape and dtype as the input,
@ -60,7 +100,9 @@ def wrapped_special_op(self, *args, **kwargs):
namespace[attr_name] = mk_wrap(attr_name, meta_noop=True) namespace[attr_name] = mk_wrap(attr_name, meta_noop=True)
for special_op in ( for special_op in (
"getitem", "setitem", "len", "getitem",
"setitem",
"len",
): ):
attr_name = f"__{special_op}__" attr_name = f"__{special_op}__"
namespace[attr_name] = mk_wrap(attr_name, meta_noop=False) namespace[attr_name] = mk_wrap(attr_name, meta_noop=False)
@ -77,7 +119,15 @@ class LazyBase(ABC, metaclass=LazyMeta):
_kwargs: dict[str, Any] _kwargs: dict[str, Any]
_func: Callable[[Any], Any] | None _func: Callable[[Any], Any] | None
def __init__(self, *, meta: Any, data: Any | None = None, args: tuple = (), kwargs: dict[str, Any] | None = None, func: Callable[[Any], Any] | None = None): def __init__(
self,
*,
meta: Any,
data: Any | None = None,
args: tuple = (),
kwargs: dict[str, Any] | None = None,
func: Callable[[Any], Any] | None = None,
):
super().__init__() super().__init__()
self._meta = meta self._meta = meta
self._data = data self._data = data
@ -107,7 +157,17 @@ def _recurse_apply(o: Any, fn: Callable[[Any], Any]) -> Any:
return o return o
@classmethod @classmethod
def _wrap_fn(cls, fn: Callable, *, use_self: LazyBase | None = None, meta_noop: bool | DTypeLike | tuple[DTypeLike, Callable[[tuple[int, ...]], tuple[int, ...]]] = False) -> Callable[[Any], Any]: def _wrap_fn(
cls,
fn: Callable,
*,
use_self: LazyBase | None = None,
meta_noop: (
bool
| DTypeLike
| tuple[DTypeLike, Callable[[tuple[int, ...]], tuple[int, ...]]]
) = False,
) -> Callable[[Any], Any]:
def wrapped_fn(*args, **kwargs): def wrapped_fn(*args, **kwargs):
if kwargs is None: if kwargs is None:
kwargs = {} kwargs = {}
@ -138,13 +198,16 @@ def wrapped_fn(*args, **kwargs):
res = cls.meta_with_dtype_and_shape(meta_noop, res.shape) res = cls.meta_with_dtype_and_shape(meta_noop, res.shape)
if isinstance(res, cls._tensor_type): if isinstance(res, cls._tensor_type):
return cls(meta=cls.eager_to_meta(res), args=args, kwargs=kwargs, func=fn) return cls(
meta=cls.eager_to_meta(res), args=args, kwargs=kwargs, func=fn
)
else: else:
del res # not needed del res # not needed
# non-tensor return likely relies on the contents of the args # non-tensor return likely relies on the contents of the args
# (e.g. the result of torch.equal) # (e.g. the result of torch.equal)
eager_args = cls.to_eager(args) eager_args = cls.to_eager(args)
return fn(*eager_args, **kwargs) return fn(*eager_args, **kwargs)
return wrapped_fn return wrapped_fn
@classmethod @classmethod
@ -175,7 +238,8 @@ def eager_to_meta(cls, t: Any) -> Any:
# must be overridden, meta tensor init is backend-specific # must be overridden, meta tensor init is backend-specific
@classmethod @classmethod
@abstractmethod @abstractmethod
def meta_with_dtype_and_shape(cls, dtype: Any, shape: Any) -> Any: pass def meta_with_dtype_and_shape(cls, dtype: Any, shape: Any) -> Any:
pass
@classmethod @classmethod
def from_eager(cls, t: Any) -> Any: def from_eager(cls, t: Any) -> Any:
@ -192,7 +256,9 @@ class LazyNumpyTensor(LazyBase):
_tensor_type = np.ndarray _tensor_type = np.ndarray
@classmethod @classmethod
def meta_with_dtype_and_shape(cls, dtype: DTypeLike, shape: tuple[int, ...]) -> np.ndarray[Any, Any]: def meta_with_dtype_and_shape(
cls, dtype: DTypeLike, shape: tuple[int, ...]
) -> np.ndarray[Any, Any]:
# The initial idea was to use np.nan as the fill value, # The initial idea was to use np.nan as the fill value,
# but non-float types like np.int16 can't use that. # but non-float types like np.int16 can't use that.
# So zero it is. # So zero it is.
@ -201,8 +267,16 @@ def meta_with_dtype_and_shape(cls, dtype: DTypeLike, shape: tuple[int, ...]) ->
def astype(self, dtype, *args, **kwargs): def astype(self, dtype, *args, **kwargs):
meta = type(self).meta_with_dtype_and_shape(dtype, self._meta.shape) meta = type(self).meta_with_dtype_and_shape(dtype, self._meta.shape)
full_args = (self, dtype,) + args full_args = (
return type(self)(meta=meta, args=full_args, kwargs=kwargs, func=(lambda a, *args, **kwargs: a.astype(*args, **kwargs))) self,
dtype,
) + args
return type(self)(
meta=meta,
args=full_args,
kwargs=kwargs,
func=(lambda a, *args, **kwargs: a.astype(*args, **kwargs)),
)
def tofile(self, *args, **kwargs): def tofile(self, *args, **kwargs):
eager = LazyNumpyTensor.to_eager(self) eager = LazyNumpyTensor.to_eager(self)

View File

@ -44,7 +44,12 @@ class Metadata:
datasets: Optional[list[str]] = None datasets: Optional[list[str]] = None
@staticmethod @staticmethod
def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Path] = None, model_name: Optional[str] = None, total_params: int = 0) -> Metadata: def load(
metadata_override_path: Optional[Path] = None,
model_path: Optional[Path] = None,
model_name: Optional[str] = None,
total_params: int = 0,
) -> Metadata:
# This grabs as many contextual authorship metadata as possible from the model repository # This grabs as many contextual authorship metadata as possible from the model repository
# making any conversion as required to match the gguf kv store metadata format # making any conversion as required to match the gguf kv store metadata format
# as well as giving users the ability to override any authorship metadata that may be incorrect # as well as giving users the ability to override any authorship metadata that may be incorrect
@ -57,43 +62,77 @@ def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Pat
# TODO: load adapter_config.json when possible, it usually contains the base model of the LoRA adapter # TODO: load adapter_config.json when possible, it usually contains the base model of the LoRA adapter
# heuristics # heuristics
metadata = Metadata.apply_metadata_heuristic(metadata, model_card, hf_params, model_path, total_params) metadata = Metadata.apply_metadata_heuristic(
metadata, model_card, hf_params, model_path, total_params
)
# Metadata Override File Provided # Metadata Override File Provided
# This is based on LLM_KV_NAMES mapping in llama.cpp # This is based on LLM_KV_NAMES mapping in llama.cpp
metadata_override = Metadata.load_metadata_override(metadata_override_path) metadata_override = Metadata.load_metadata_override(metadata_override_path)
metadata.name = metadata_override.get(Keys.General.NAME, metadata.name) metadata.name = metadata_override.get(Keys.General.NAME, metadata.name)
metadata.author = metadata_override.get(Keys.General.AUTHOR, metadata.author) metadata.author = metadata_override.get(Keys.General.AUTHOR, metadata.author)
metadata.version = metadata_override.get(Keys.General.VERSION, metadata.version) metadata.version = metadata_override.get(Keys.General.VERSION, metadata.version)
metadata.organization = metadata_override.get(Keys.General.ORGANIZATION, metadata.organization) metadata.organization = metadata_override.get(
Keys.General.ORGANIZATION, metadata.organization
)
metadata.finetune = metadata_override.get(Keys.General.FINETUNE, metadata.finetune) metadata.finetune = metadata_override.get(
metadata.basename = metadata_override.get(Keys.General.BASENAME, metadata.basename) Keys.General.FINETUNE, metadata.finetune
)
metadata.basename = metadata_override.get(
Keys.General.BASENAME, metadata.basename
)
metadata.description = metadata_override.get(Keys.General.DESCRIPTION, metadata.description) metadata.description = metadata_override.get(
metadata.quantized_by = metadata_override.get(Keys.General.QUANTIZED_BY, metadata.quantized_by) Keys.General.DESCRIPTION, metadata.description
)
metadata.quantized_by = metadata_override.get(
Keys.General.QUANTIZED_BY, metadata.quantized_by
)
metadata.size_label = metadata_override.get(Keys.General.SIZE_LABEL, metadata.size_label) metadata.size_label = metadata_override.get(
metadata.license_name = metadata_override.get(Keys.General.LICENSE_NAME, metadata.license_name) Keys.General.SIZE_LABEL, metadata.size_label
metadata.license_link = metadata_override.get(Keys.General.LICENSE_LINK, metadata.license_link) )
metadata.license_name = metadata_override.get(
Keys.General.LICENSE_NAME, metadata.license_name
)
metadata.license_link = metadata_override.get(
Keys.General.LICENSE_LINK, metadata.license_link
)
metadata.url = metadata_override.get(Keys.General.URL, metadata.url) metadata.url = metadata_override.get(Keys.General.URL, metadata.url)
metadata.doi = metadata_override.get(Keys.General.DOI, metadata.doi) metadata.doi = metadata_override.get(Keys.General.DOI, metadata.doi)
metadata.uuid = metadata_override.get(Keys.General.UUID, metadata.uuid) metadata.uuid = metadata_override.get(Keys.General.UUID, metadata.uuid)
metadata.repo_url = metadata_override.get(Keys.General.REPO_URL, metadata.repo_url) metadata.repo_url = metadata_override.get(
Keys.General.REPO_URL, metadata.repo_url
)
metadata.source_url = metadata_override.get(Keys.General.SOURCE_URL, metadata.source_url) metadata.source_url = metadata_override.get(
metadata.source_doi = metadata_override.get(Keys.General.SOURCE_DOI, metadata.source_doi) Keys.General.SOURCE_URL, metadata.source_url
metadata.source_uuid = metadata_override.get(Keys.General.SOURCE_UUID, metadata.source_uuid) )
metadata.source_repo_url = metadata_override.get(Keys.General.SOURCE_REPO_URL, metadata.source_repo_url) metadata.source_doi = metadata_override.get(
Keys.General.SOURCE_DOI, metadata.source_doi
)
metadata.source_uuid = metadata_override.get(
Keys.General.SOURCE_UUID, metadata.source_uuid
)
metadata.source_repo_url = metadata_override.get(
Keys.General.SOURCE_REPO_URL, metadata.source_repo_url
)
# Base Models is received here as an array of models # Base Models is received here as an array of models
metadata.base_models = metadata_override.get("general.base_models", metadata.base_models) metadata.base_models = metadata_override.get(
"general.base_models", metadata.base_models
)
metadata.tags = metadata_override.get(Keys.General.TAGS, metadata.tags) metadata.tags = metadata_override.get(Keys.General.TAGS, metadata.tags)
metadata.languages = metadata_override.get(Keys.General.LANGUAGES, metadata.languages) metadata.languages = metadata_override.get(
metadata.datasets = metadata_override.get(Keys.General.DATASETS, metadata.datasets) Keys.General.LANGUAGES, metadata.languages
)
metadata.datasets = metadata_override.get(
Keys.General.DATASETS, metadata.datasets
)
# Direct Metadata Override (via direct cli argument) # Direct Metadata Override (via direct cli argument)
if model_name is not None: if model_name is not None:
@ -102,7 +141,9 @@ def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Pat
return metadata return metadata
@staticmethod @staticmethod
def load_metadata_override(metadata_override_path: Optional[Path] = None) -> dict[str, Any]: def load_metadata_override(
metadata_override_path: Optional[Path] = None,
) -> dict[str, Any]:
if metadata_override_path is None or not metadata_override_path.is_file(): if metadata_override_path is None or not metadata_override_path.is_file():
return {} return {}
@ -128,7 +169,9 @@ def load_model_card(model_path: Optional[Path] = None) -> dict[str, Any]:
if isinstance(data, dict): if isinstance(data, dict):
return data return data
else: else:
logger.error(f"while reading YAML model card frontmatter, data is {type(data)} instead of dict") logger.error(
f"while reading YAML model card frontmatter, data is {type(data)} instead of dict"
)
return {} return {}
else: else:
return {} return {}
@ -149,10 +192,21 @@ def load_hf_parameters(model_path: Optional[Path] = None) -> dict[str, Any]:
@staticmethod @staticmethod
def id_to_title(string): def id_to_title(string):
# Convert capitalization into title form unless acronym or version number # Convert capitalization into title form unless acronym or version number
return ' '.join([w.title() if w.islower() and not re.match(r'^(v\d+(?:\.\d+)*|\d.*)$', w) else w for w in string.strip().replace('-', ' ').split()]) return " ".join(
[
(
w.title()
if w.islower() and not re.match(r"^(v\d+(?:\.\d+)*|\d.*)$", w)
else w
)
for w in string.strip().replace("-", " ").split()
]
)
@staticmethod @staticmethod
def get_model_id_components(model_id: Optional[str] = None, total_params: int = 0) -> tuple[str | None, str | None, str | None, str | None, str | None, str | None]: def get_model_id_components(
model_id: Optional[str] = None, total_params: int = 0
) -> tuple[str | None, str | None, str | None, str | None, str | None, str | None]:
# Huggingface often store model id as '<org>/<model name>' # Huggingface often store model id as '<org>/<model name>'
# so let's parse it and apply some heuristics if possible for model name components # so let's parse it and apply some heuristics if possible for model name components
@ -160,24 +214,24 @@ def get_model_id_components(model_id: Optional[str] = None, total_params: int =
# model ID missing # model ID missing
return None, None, None, None, None, None return None, None, None, None, None, None
if ' ' in model_id: if " " in model_id:
# model ID is actually a normal human sentence # model ID is actually a normal human sentence
# which means its most likely a normal model name only # which means its most likely a normal model name only
# not part of the hugging face naming standard, but whatever # not part of the hugging face naming standard, but whatever
return model_id, None, None, None, None, None return model_id, None, None, None, None, None
if '/' in model_id: if "/" in model_id:
# model ID (huggingface style) # model ID (huggingface style)
org_component, model_full_name_component = model_id.split('/', 1) org_component, model_full_name_component = model_id.split("/", 1)
else: else:
# model ID but missing org components # model ID but missing org components
org_component, model_full_name_component = None, model_id org_component, model_full_name_component = None, model_id
# Check if we erroneously matched against './' or '../' etc... # Check if we erroneously matched against './' or '../' etc...
if org_component is not None and org_component[0] == '.': if org_component is not None and org_component[0] == ".":
org_component = None org_component = None
name_parts: list[str] = model_full_name_component.split('-') name_parts: list[str] = model_full_name_component.split("-")
# Remove empty parts # Remove empty parts
for i in reversed(range(len(name_parts))): for i in reversed(range(len(name_parts))):
@ -191,14 +245,18 @@ def get_model_id_components(model_id: Optional[str] = None, total_params: int =
# Annotate the name # Annotate the name
for i, part in enumerate(name_parts): for i, part in enumerate(name_parts):
# Version # Version
if re.fullmatch(r'(v|iter)?\d+([.]\d+)*', part, re.IGNORECASE): if re.fullmatch(r"(v|iter)?\d+([.]\d+)*", part, re.IGNORECASE):
name_types[i].add("version") name_types[i].add("version")
# Quant type (should not be there for base models, but still annotated) # Quant type (should not be there for base models, but still annotated)
elif re.fullmatch(r'i?q\d(_\w)*|b?fp?(16|32)', part, re.IGNORECASE): elif re.fullmatch(r"i?q\d(_\w)*|b?fp?(16|32)", part, re.IGNORECASE):
name_types[i].add("type") name_types[i].add("type")
name_parts[i] = part.upper() name_parts[i] = part.upper()
# Model size # Model size
elif i > 0 and re.fullmatch(r'(([A]|\d+[x])?\d+([._]\d+)?[KMBT][\d]?|small|mini|medium|large|x?xl)', part, re.IGNORECASE): elif i > 0 and re.fullmatch(
r"(([A]|\d+[x])?\d+([._]\d+)?[KMBT][\d]?|small|mini|medium|large|x?xl)",
part,
re.IGNORECASE,
):
part = part.replace("_", ".") part = part.replace("_", ".")
# Handle weird bloom-7b1 notation # Handle weird bloom-7b1 notation
if part[-1].isdecimal(): if part[-1].isdecimal():
@ -209,14 +267,19 @@ def get_model_id_components(model_id: Optional[str] = None, total_params: int =
part = part[:-1] + part[-1].upper() part = part[:-1] + part[-1].upper()
if total_params != 0: if total_params != 0:
try: try:
label_params = float(part[:-1]) * pow(1000, " KMBT".find(part[-1])) label_params = float(part[:-1]) * pow(
1000, " KMBT".find(part[-1])
)
# Only use it as a size label if it's close or bigger than the model size # Only use it as a size label if it's close or bigger than the model size
# Note that LoRA adapters don't necessarily include all layers, # Note that LoRA adapters don't necessarily include all layers,
# so this is why bigger label sizes are accepted. # so this is why bigger label sizes are accepted.
# Do not use the size label when it's smaller than 1/8 of the model size # Do not use the size label when it's smaller than 1/8 of the model size
if (total_params < 0 and label_params < abs(total_params) // 8) or ( if (
total_params < 0 and label_params < abs(total_params) // 8
) or (
# Check both directions when the current model isn't a LoRA adapter # Check both directions when the current model isn't a LoRA adapter
total_params > 0 and abs(label_params - total_params) > 7 * total_params // 8 total_params > 0
and abs(label_params - total_params) > 7 * total_params // 8
): ):
# Likely a context length # Likely a context length
name_types[i].add("finetune") name_types[i].add("finetune")
@ -229,7 +292,9 @@ def get_model_id_components(model_id: Optional[str] = None, total_params: int =
name_types[i].add("size_label") name_types[i].add("size_label")
name_parts[i] = part name_parts[i] = part
# Some easy to recognize finetune names # Some easy to recognize finetune names
elif i > 0 and re.fullmatch(r'chat|instruct|vision|lora', part, re.IGNORECASE): elif i > 0 and re.fullmatch(
r"chat|instruct|vision|lora", part, re.IGNORECASE
):
if total_params < 0 and part.lower() == "lora": if total_params < 0 and part.lower() == "lora":
# ignore redundant "lora" in the finetune part when the output is a lora adapter # ignore redundant "lora" in the finetune part when the output is a lora adapter
name_types[i].add("type") name_types[i].add("type")
@ -238,7 +303,12 @@ def get_model_id_components(model_id: Optional[str] = None, total_params: int =
# Ignore word-based size labels when there is at least a number-based one present # Ignore word-based size labels when there is at least a number-based one present
# TODO: should word-based size labels always be removed instead? # TODO: should word-based size labels always be removed instead?
if any(c.isdecimal() for n, t in zip(name_parts, name_types) if "size_label" in t for c in n): if any(
c.isdecimal()
for n, t in zip(name_parts, name_types)
if "size_label" in t
for c in n
):
for n, t in zip(name_parts, name_types): for n, t in zip(name_parts, name_types):
if "size_label" in t: if "size_label" in t:
if all(c.isalpha() for c in n): if all(c.isalpha() for c in n):
@ -262,22 +332,55 @@ def get_model_id_components(model_id: Optional[str] = None, total_params: int =
else: else:
break break
basename = "-".join(n for n, t in zip(name_parts, name_types) if "basename" in t) or None basename = (
"-".join(n for n, t in zip(name_parts, name_types) if "basename" in t)
or None
)
# Deduplicate size labels using order-preserving 'dict' ('set' seems to sort the keys) # Deduplicate size labels using order-preserving 'dict' ('set' seems to sort the keys)
size_label = "-".join(dict.fromkeys(s for s, t in zip(name_parts, name_types) if "size_label" in t).keys()) or None size_label = (
finetune = "-".join(f for f, t in zip(name_parts, name_types) if "finetune" in t) or None "-".join(
dict.fromkeys(
s for s, t in zip(name_parts, name_types) if "size_label" in t
).keys()
)
or None
)
finetune = (
"-".join(f for f, t in zip(name_parts, name_types) if "finetune" in t)
or None
)
# TODO: should the basename version always be excluded? # TODO: should the basename version always be excluded?
# NOTE: multiple finetune versions are joined together # NOTE: multiple finetune versions are joined together
version = "-".join(v for v, t, in zip(name_parts, name_types) if "version" in t and "basename" not in t) or None version = (
"-".join(
v
for v, t, in zip(name_parts, name_types)
if "version" in t and "basename" not in t
)
or None
)
if size_label is None and finetune is None and version is None: if size_label is None and finetune is None and version is None:
# Too ambiguous, output nothing # Too ambiguous, output nothing
basename = None basename = None
return model_full_name_component, org_component, basename, finetune, version, size_label return (
model_full_name_component,
org_component,
basename,
finetune,
version,
size_label,
)
@staticmethod @staticmethod
def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = None, hf_params: Optional[dict] = None, model_path: Optional[Path] = None, total_params: int = 0) -> Metadata: def apply_metadata_heuristic(
metadata: Metadata,
model_card: Optional[dict] = None,
hf_params: Optional[dict] = None,
model_path: Optional[Path] = None,
total_params: int = 0,
) -> Metadata:
# Reference Model Card Metadata: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1 # Reference Model Card Metadata: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1
# Model Card Heuristics # Model Card Heuristics
@ -317,16 +420,30 @@ def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = No
for model_id in metadata_base_models: for model_id in metadata_base_models:
# NOTE: model size of base model is assumed to be similar to the size of the current model # NOTE: model size of base model is assumed to be similar to the size of the current model
model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id, total_params) (
model_full_name_component,
org_component,
basename,
finetune,
version,
size_label,
) = Metadata.get_model_id_components(model_id, total_params)
base_model = {} base_model = {}
if model_full_name_component is not None: if model_full_name_component is not None:
base_model["name"] = Metadata.id_to_title(model_full_name_component) base_model["name"] = Metadata.id_to_title(
model_full_name_component
)
if org_component is not None: if org_component is not None:
base_model["organization"] = Metadata.id_to_title(org_component) base_model["organization"] = Metadata.id_to_title(org_component)
if version is not None: if version is not None:
base_model["version"] = version base_model["version"] = version
if org_component is not None and model_full_name_component is not None: if (
base_model["repo_url"] = f"https://huggingface.co/{org_component}/{model_full_name_component}" org_component is not None
and model_full_name_component is not None
):
base_model["repo_url"] = (
f"https://huggingface.co/{org_component}/{model_full_name_component}"
)
metadata.base_models.append(base_model) metadata.base_models.append(base_model)
if "license" in model_card and metadata.license is None: if "license" in model_card and metadata.license is None:
@ -360,7 +477,9 @@ def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = No
elif isinstance(pipeline_tags_value, list): elif isinstance(pipeline_tags_value, list):
metadata.tags.extend(pipeline_tags_value) metadata.tags.extend(pipeline_tags_value)
language_value = model_card.get("languages", model_card.get("language", None)) language_value = model_card.get(
"languages", model_card.get("language", None)
)
if language_value is not None: if language_value is not None:
if metadata.languages is None: if metadata.languages is None:
@ -388,11 +507,18 @@ def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = No
if hf_params is not None: if hf_params is not None:
hf_name_or_path = hf_params.get("_name_or_path") hf_name_or_path = hf_params.get("_name_or_path")
if hf_name_or_path is not None and hf_name_or_path.count('/') <= 1: if hf_name_or_path is not None and hf_name_or_path.count("/") <= 1:
# Use _name_or_path only if its actually a model name and not some computer path # Use _name_or_path only if its actually a model name and not some computer path
# e.g. 'meta-llama/Llama-2-7b-hf' # e.g. 'meta-llama/Llama-2-7b-hf'
model_id = hf_name_or_path model_id = hf_name_or_path
model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id, total_params) (
model_full_name_component,
org_component,
basename,
finetune,
version,
size_label,
) = Metadata.get_model_id_components(model_id, total_params)
if metadata.name is None and model_full_name_component is not None: if metadata.name is None and model_full_name_component is not None:
metadata.name = Metadata.id_to_title(model_full_name_component) metadata.name = Metadata.id_to_title(model_full_name_component)
if metadata.organization is None and org_component is not None: if metadata.organization is None and org_component is not None:
@ -410,7 +536,14 @@ def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = No
############################################ ############################################
if model_path is not None: if model_path is not None:
model_id = model_path.name model_id = model_path.name
model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id, total_params) (
model_full_name_component,
org_component,
basename,
finetune,
version,
size_label,
) = Metadata.get_model_id_components(model_id, total_params)
if metadata.name is None and model_full_name_component is not None: if metadata.name is None and model_full_name_component is not None:
metadata.name = Metadata.id_to_title(model_full_name_component) metadata.name = Metadata.id_to_title(model_full_name_component)
if metadata.organization is None and org_component is not None: if metadata.organization is None and org_component is not None:
@ -485,7 +618,9 @@ def set_gguf_meta_model(self, gguf_writer: gguf.GGUFWriter):
if "version" in base_model_entry: if "version" in base_model_entry:
gguf_writer.add_base_model_version(key, base_model_entry["version"]) gguf_writer.add_base_model_version(key, base_model_entry["version"])
if "organization" in base_model_entry: if "organization" in base_model_entry:
gguf_writer.add_base_model_organization(key, base_model_entry["organization"]) gguf_writer.add_base_model_organization(
key, base_model_entry["organization"]
)
if "url" in base_model_entry: if "url" in base_model_entry:
gguf_writer.add_base_model_url(key, base_model_entry["url"]) gguf_writer.add_base_model_url(key, base_model_entry["url"])
if "doi" in base_model_entry: if "doi" in base_model_entry:
@ -493,7 +628,9 @@ def set_gguf_meta_model(self, gguf_writer: gguf.GGUFWriter):
if "uuid" in base_model_entry: if "uuid" in base_model_entry:
gguf_writer.add_base_model_uuid(key, base_model_entry["uuid"]) gguf_writer.add_base_model_uuid(key, base_model_entry["uuid"])
if "repo_url" in base_model_entry: if "repo_url" in base_model_entry:
gguf_writer.add_base_model_repo_url(key, base_model_entry["repo_url"]) gguf_writer.add_base_model_repo_url(
key, base_model_entry["repo_url"]
)
if self.tags is not None: if self.tags is not None:
gguf_writer.add_tags(self.tags) gguf_writer.add_tags(self.tags)

View File

@ -12,14 +12,18 @@
def quant_shape_to_byte_shape(shape: Sequence[int], quant_type: GGMLQuantizationType): def quant_shape_to_byte_shape(shape: Sequence[int], quant_type: GGMLQuantizationType):
block_size, type_size = GGML_QUANT_SIZES[quant_type] block_size, type_size = GGML_QUANT_SIZES[quant_type]
if shape[-1] % block_size != 0: if shape[-1] % block_size != 0:
raise ValueError(f"Quantized tensor row size ({shape[-1]}) is not a multiple of {quant_type.name} block size ({block_size})") raise ValueError(
f"Quantized tensor row size ({shape[-1]}) is not a multiple of {quant_type.name} block size ({block_size})"
)
return (*shape[:-1], shape[-1] // block_size * type_size) return (*shape[:-1], shape[-1] // block_size * type_size)
def quant_shape_from_byte_shape(shape: Sequence[int], quant_type: GGMLQuantizationType): def quant_shape_from_byte_shape(shape: Sequence[int], quant_type: GGMLQuantizationType):
block_size, type_size = GGML_QUANT_SIZES[quant_type] block_size, type_size = GGML_QUANT_SIZES[quant_type]
if shape[-1] % type_size != 0: if shape[-1] % type_size != 0:
raise ValueError(f"Quantized tensor bytes per row ({shape[-1]}) is not a multiple of {quant_type.name} type size ({type_size})") raise ValueError(
f"Quantized tensor bytes per row ({shape[-1]}) is not a multiple of {quant_type.name} type size ({type_size})"
)
return (*shape[:-1], shape[-1] // type_size * block_size) return (*shape[:-1], shape[-1] // type_size * block_size)
@ -27,14 +31,23 @@ def quant_shape_from_byte_shape(shape: Sequence[int], quant_type: GGMLQuantizati
def __compute_fp32_to_bf16(n: np.ndarray) -> np.ndarray: def __compute_fp32_to_bf16(n: np.ndarray) -> np.ndarray:
n = n.astype(np.float32, copy=False).view(np.uint32) n = n.astype(np.float32, copy=False).view(np.uint32)
# force nan to quiet # force nan to quiet
n = np.where((n & 0x7fffffff) > 0x7f800000, (n & np.uint32(0xffff0000)) | np.uint32(64 << 16), n) n = np.where(
(n & 0x7FFFFFFF) > 0x7F800000,
(n & np.uint32(0xFFFF0000)) | np.uint32(64 << 16),
n,
)
# round to nearest even # round to nearest even
n = (np.uint64(n) + (0x7fff + ((n >> 16) & 1))) >> 16 n = (np.uint64(n) + (0x7FFF + ((n >> 16) & 1))) >> 16
return n.astype(np.uint16) return n.astype(np.uint16)
# This is faster than np.vectorize and np.apply_along_axis because it works on more than one row at a time # This is faster than np.vectorize and np.apply_along_axis because it works on more than one row at a time
def __apply_over_grouped_rows(func: Callable[[np.ndarray], np.ndarray], arr: np.ndarray, otype: DTypeLike, oshape: tuple[int, ...]) -> np.ndarray: def __apply_over_grouped_rows(
func: Callable[[np.ndarray], np.ndarray],
arr: np.ndarray,
otype: DTypeLike,
oshape: tuple[int, ...],
) -> np.ndarray:
rows = arr.reshape((-1, arr.shape[-1])) rows = arr.reshape((-1, arr.shape[-1]))
osize = 1 osize = 1
for dim in oshape: for dim in oshape:
@ -42,15 +55,23 @@ def __apply_over_grouped_rows(func: Callable[[np.ndarray], np.ndarray], arr: np.
out = np.empty(shape=osize, dtype=otype) out = np.empty(shape=osize, dtype=otype)
# compute over groups of 16 rows (arbitrary, but seems good for performance) # compute over groups of 16 rows (arbitrary, but seems good for performance)
n_groups = (rows.shape[0] // 16) or 1 n_groups = (rows.shape[0] // 16) or 1
np.concatenate([func(group).ravel() for group in np.array_split(rows, n_groups)], axis=0, out=out) np.concatenate(
[func(group).ravel() for group in np.array_split(rows, n_groups)],
axis=0,
out=out,
)
return out.reshape(oshape) return out.reshape(oshape)
def __quantize_bf16_array(n: np.ndarray) -> np.ndarray: def __quantize_bf16_array(n: np.ndarray) -> np.ndarray:
return __apply_over_grouped_rows(__compute_fp32_to_bf16, arr=n, otype=np.uint16, oshape=n.shape) return __apply_over_grouped_rows(
__compute_fp32_to_bf16, arr=n, otype=np.uint16, oshape=n.shape
)
__quantize_bf16_lazy = LazyNumpyTensor._wrap_fn(__quantize_bf16_array, meta_noop=np.uint16) __quantize_bf16_lazy = LazyNumpyTensor._wrap_fn(
__quantize_bf16_array, meta_noop=np.uint16
)
def quantize_bf16(n: np.ndarray): def quantize_bf16(n: np.ndarray):
@ -105,7 +126,12 @@ def __quantize_q8_0_rows(n: np.ndarray) -> np.ndarray:
def __quantize_q8_0_array(n: np.ndarray) -> np.ndarray: def __quantize_q8_0_array(n: np.ndarray) -> np.ndarray:
return __apply_over_grouped_rows(__quantize_q8_0_rows, arr=n, otype=np.uint8, oshape=__quantize_q8_0_shape_change(n.shape)) return __apply_over_grouped_rows(
__quantize_q8_0_rows,
arr=n,
otype=np.uint8,
oshape=__quantize_q8_0_shape_change(n.shape),
)
__quantize_q8_0_lazy = LazyNumpyTensor._wrap_fn( __quantize_q8_0_lazy = LazyNumpyTensor._wrap_fn(

View File

@ -9,74 +9,68 @@ class TensorNameMap:
mappings_cfg: dict[MODEL_TENSOR, tuple[str, ...]] = { mappings_cfg: dict[MODEL_TENSOR, tuple[str, ...]] = {
# Token embeddings # Token embeddings
MODEL_TENSOR.TOKEN_EMBD: ( MODEL_TENSOR.TOKEN_EMBD: (
"gpt_neox.embed_in", # gptneox "gpt_neox.embed_in", # gptneox
"transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx jais "transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx jais
"transformer.word_embeddings", # falcon "transformer.word_embeddings", # falcon
"word_embeddings", # bloom "word_embeddings", # bloom
"model.embed_tokens", # llama-hf "model.embed_tokens", # llama-hf
"tok_embeddings", # llama-pth "tok_embeddings", # llama-pth
"embeddings.word_embeddings", # bert nomic-bert "embeddings.word_embeddings", # bert nomic-bert
"language_model.embedding.word_embeddings", # persimmon "language_model.embedding.word_embeddings", # persimmon
"wte", # gpt2 "wte", # gpt2
"transformer.embd.wte", # phi2 "transformer.embd.wte", # phi2
"model.tok_embeddings", # internlm2 "model.tok_embeddings", # internlm2
"model.embedding", # mamba-qbert "model.embedding", # mamba-qbert
"backbone.embedding", # mamba "backbone.embedding", # mamba
"backbone.embeddings", # mamba-hf "backbone.embeddings", # mamba-hf
"transformer.in_out_embed", # Grok "transformer.in_out_embed", # Grok
"embedding.word_embeddings", # chatglm "embedding.word_embeddings", # chatglm
"transformer.token_embeddings", # openelm "transformer.token_embeddings", # openelm
"shared", # t5 "shared", # t5
), ),
# Token type embeddings # Token type embeddings
MODEL_TENSOR.TOKEN_TYPES: ( MODEL_TENSOR.TOKEN_TYPES: (
"embeddings.token_type_embeddings", # bert nomic-bert "embeddings.token_type_embeddings", # bert nomic-bert
), ),
# Normalization of token embeddings # Normalization of token embeddings
MODEL_TENSOR.TOKEN_EMBD_NORM: ( MODEL_TENSOR.TOKEN_EMBD_NORM: (
"word_embeddings_layernorm", # bloom "word_embeddings_layernorm", # bloom
"embeddings.LayerNorm", # bert "embeddings.LayerNorm", # bert
"emb_ln", # nomic-bert "emb_ln", # nomic-bert
"transformer.norm", # openelm "transformer.norm", # openelm
), ),
# Position embeddings # Position embeddings
MODEL_TENSOR.POS_EMBD: ( MODEL_TENSOR.POS_EMBD: (
"transformer.wpe", # gpt2 "transformer.wpe", # gpt2
"embeddings.position_embeddings", # bert "embeddings.position_embeddings", # bert
"wpe", # gpt2 "wpe", # gpt2
), ),
# Output # Output
MODEL_TENSOR.OUTPUT: ( MODEL_TENSOR.OUTPUT: (
"embed_out", # gptneox "embed_out", # gptneox
"lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais "lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais
"output", # llama-pth bloom internlm2 "output", # llama-pth bloom internlm2
"word_embeddings_for_head", # persimmon "word_embeddings_for_head", # persimmon
"lm_head.linear", # phi2 "lm_head.linear", # phi2
"output_layer", # chatglm "output_layer", # chatglm
), ),
# Output norm # Output norm
MODEL_TENSOR.OUTPUT_NORM: ( MODEL_TENSOR.OUTPUT_NORM: (
"gpt_neox.final_layer_norm", # gptneox "gpt_neox.final_layer_norm", # gptneox
"transformer.ln_f", # gpt2 gpt-j falcon jais "transformer.ln_f", # gpt2 gpt-j falcon jais
"model.norm", # llama-hf baichuan internlm2 "model.norm", # llama-hf baichuan internlm2
"norm", # llama-pth "norm", # llama-pth
"transformer.norm_f", # mpt dbrx "transformer.norm_f", # mpt dbrx
"ln_f", # refact bloom qwen gpt2 "ln_f", # refact bloom qwen gpt2
"language_model.encoder.final_layernorm", # persimmon "language_model.encoder.final_layernorm", # persimmon
"model.final_layernorm", # persimmon "model.final_layernorm", # persimmon
"lm_head.ln", # phi2 "lm_head.ln", # phi2
"model.norm_f", # mamba-qbert "model.norm_f", # mamba-qbert
"backbone.norm_f", # mamba "backbone.norm_f", # mamba
"transformer.rms_norm", # Grok "transformer.rms_norm", # Grok
"encoder.final_layernorm", # chatglm "encoder.final_layernorm", # chatglm
"transformer.norm", # openelm "transformer.norm", # openelm
), ),
# Rope frequencies # Rope frequencies
MODEL_TENSOR.ROPE_FREQS: ( MODEL_TENSOR.ROPE_FREQS: (
"rope.freqs", # llama-pth "rope.freqs", # llama-pth
@ -87,501 +81,394 @@ class TensorNameMap:
block_mappings_cfg: dict[MODEL_TENSOR, tuple[str, ...]] = { block_mappings_cfg: dict[MODEL_TENSOR, tuple[str, ...]] = {
# Attention norm # Attention norm
MODEL_TENSOR.ATTN_NORM: ( MODEL_TENSOR.ATTN_NORM: (
"gpt_neox.layers.{bid}.input_layernorm", # gptneox "gpt_neox.layers.{bid}.input_layernorm", # gptneox
"transformer.h.{bid}.ln_1", # gpt2 gpt-j refact qwen jais "transformer.h.{bid}.ln_1", # gpt2 gpt-j refact qwen jais
"transformer.blocks.{bid}.norm_1", # mpt "transformer.blocks.{bid}.norm_1", # mpt
"transformer.h.{bid}.input_layernorm", # falcon7b "transformer.h.{bid}.input_layernorm", # falcon7b
"h.{bid}.input_layernorm", # bloom "h.{bid}.input_layernorm", # bloom
"transformer.h.{bid}.ln_mlp", # falcon40b "transformer.h.{bid}.ln_mlp", # falcon40b
"model.layers.{bid}.input_layernorm", # llama-hf "model.layers.{bid}.input_layernorm", # llama-hf
"layers.{bid}.attention_norm", # llama-pth "layers.{bid}.attention_norm", # llama-pth
"language_model.encoder.layers.{bid}.input_layernorm", # persimmon "language_model.encoder.layers.{bid}.input_layernorm", # persimmon
"model.layers.{bid}.ln1", # yi "model.layers.{bid}.ln1", # yi
"h.{bid}.ln_1", # gpt2 "h.{bid}.ln_1", # gpt2
"transformer.h.{bid}.ln", # phi2 "transformer.h.{bid}.ln", # phi2
"model.layers.layers.{bid}.norm", # plamo "model.layers.layers.{bid}.norm", # plamo
"model.layers.{bid}.attention_norm", # internlm2 "model.layers.{bid}.attention_norm", # internlm2
"model.layers.{bid}.norm", # mamba-qbert "model.layers.{bid}.norm", # mamba-qbert
"backbone.layers.{bid}.norm", # mamba "backbone.layers.{bid}.norm", # mamba
"transformer.decoder_layer.{bid}.rms_norm", # Grok "transformer.decoder_layer.{bid}.rms_norm", # Grok
"transformer.blocks.{bid}.norm_attn_norm.norm_1", # dbrx "transformer.blocks.{bid}.norm_attn_norm.norm_1", # dbrx
"encoder.layers.{bid}.input_layernorm", # chatglm "encoder.layers.{bid}.input_layernorm", # chatglm
"transformer.layers.{bid}.attn_norm", # openelm "transformer.layers.{bid}.attn_norm", # openelm
), ),
# Attention norm 2 # Attention norm 2
MODEL_TENSOR.ATTN_NORM_2: ( MODEL_TENSOR.ATTN_NORM_2: (
"transformer.h.{bid}.ln_attn", # falcon40b "transformer.h.{bid}.ln_attn", # falcon40b
"encoder.layer.{bid}.layer_norm_1", # jina-v2-code "encoder.layer.{bid}.layer_norm_1", # jina-v2-code
), ),
# Attention query-key-value # Attention query-key-value
MODEL_TENSOR.ATTN_QKV: ( MODEL_TENSOR.ATTN_QKV: (
"gpt_neox.layers.{bid}.attention.query_key_value", # gptneox "gpt_neox.layers.{bid}.attention.query_key_value", # gptneox
"transformer.h.{bid}.attn.c_attn", # gpt2 qwen jais "transformer.h.{bid}.attn.c_attn", # gpt2 qwen jais
"transformer.blocks.{bid}.attn.Wqkv", # mpt "transformer.blocks.{bid}.attn.Wqkv", # mpt
"transformer.blocks.{bid}.norm_attn_norm.attn.Wqkv", # dbrx "transformer.blocks.{bid}.norm_attn_norm.attn.Wqkv", # dbrx
"transformer.h.{bid}.self_attention.query_key_value", # falcon "transformer.h.{bid}.self_attention.query_key_value", # falcon
"h.{bid}.self_attention.query_key_value", # bloom "h.{bid}.self_attention.query_key_value", # bloom
"language_model.encoder.layers.{bid}.self_attention.query_key_value", # persimmon "language_model.encoder.layers.{bid}.self_attention.query_key_value", # persimmon
"model.layers.{bid}.self_attn.query_key_value", # persimmon "model.layers.{bid}.self_attn.query_key_value", # persimmon
"h.{bid}.attn.c_attn", # gpt2 "h.{bid}.attn.c_attn", # gpt2
"transformer.h.{bid}.mixer.Wqkv", # phi2 "transformer.h.{bid}.mixer.Wqkv", # phi2
"encoder.layers.{bid}.attn.Wqkv", # nomic-bert "encoder.layers.{bid}.attn.Wqkv", # nomic-bert
"model.layers.{bid}.self_attn.qkv_proj", # phi3 "model.layers.{bid}.self_attn.qkv_proj", # phi3
"encoder.layers.{bid}.self_attention.query_key_value", # chatglm "encoder.layers.{bid}.self_attention.query_key_value", # chatglm
"transformer.layers.{bid}.attn.qkv_proj", # openelm "transformer.layers.{bid}.attn.qkv_proj", # openelm
), ),
# Attention query # Attention query
MODEL_TENSOR.ATTN_Q: ( MODEL_TENSOR.ATTN_Q: (
"model.layers.{bid}.self_attn.q_proj", # llama-hf "model.layers.{bid}.self_attn.q_proj", # llama-hf
"layers.{bid}.attention.wq", # llama-pth "layers.{bid}.attention.wq", # llama-pth
"encoder.layer.{bid}.attention.self.query", # bert "encoder.layer.{bid}.attention.self.query", # bert
"transformer.h.{bid}.attn.q_proj", # gpt-j "transformer.h.{bid}.attn.q_proj", # gpt-j
"model.layers.layers.{bid}.self_attn.q_proj", # plamo "model.layers.layers.{bid}.self_attn.q_proj", # plamo
"model.layers.{bid}.attention.wq", # internlm2 "model.layers.{bid}.attention.wq", # internlm2
"transformer.decoder_layer.{bid}.multi_head_attention.query",# Grok "transformer.decoder_layer.{bid}.multi_head_attention.query", # Grok
), ),
# Attention key # Attention key
MODEL_TENSOR.ATTN_K: ( MODEL_TENSOR.ATTN_K: (
"model.layers.{bid}.self_attn.k_proj", # llama-hf "model.layers.{bid}.self_attn.k_proj", # llama-hf
"layers.{bid}.attention.wk", # llama-pth "layers.{bid}.attention.wk", # llama-pth
"encoder.layer.{bid}.attention.self.key", # bert "encoder.layer.{bid}.attention.self.key", # bert
"transformer.h.{bid}.attn.k_proj", # gpt-j "transformer.h.{bid}.attn.k_proj", # gpt-j
"transformer.h.{bid}.attn.k", # refact "transformer.h.{bid}.attn.k", # refact
"model.layers.layers.{bid}.self_attn.k_proj", # plamo "model.layers.layers.{bid}.self_attn.k_proj", # plamo
"model.layers.{bid}.attention.wk", # internlm2 "model.layers.{bid}.attention.wk", # internlm2
"transformer.decoder_layer.{bid}.multi_head_attention.key",# Grok "transformer.decoder_layer.{bid}.multi_head_attention.key", # Grok
), ),
# Attention value # Attention value
MODEL_TENSOR.ATTN_V: ( MODEL_TENSOR.ATTN_V: (
"model.layers.{bid}.self_attn.v_proj", # llama-hf "model.layers.{bid}.self_attn.v_proj", # llama-hf
"layers.{bid}.attention.wv", # llama-pth "layers.{bid}.attention.wv", # llama-pth
"encoder.layer.{bid}.attention.self.value", # bert "encoder.layer.{bid}.attention.self.value", # bert
"transformer.h.{bid}.attn.v_proj", # gpt-j "transformer.h.{bid}.attn.v_proj", # gpt-j
"transformer.h.{bid}.attn.v", # refact "transformer.h.{bid}.attn.v", # refact
"model.layers.layers.{bid}.self_attn.v_proj", # plamo "model.layers.layers.{bid}.self_attn.v_proj", # plamo
"model.layers.{bid}.attention.wv", # internlm2 "model.layers.{bid}.attention.wv", # internlm2
"transformer.decoder_layer.{bid}.multi_head_attention.value" # Grok "transformer.decoder_layer.{bid}.multi_head_attention.value", # Grok
), ),
# Attention output # Attention output
MODEL_TENSOR.ATTN_OUT: ( MODEL_TENSOR.ATTN_OUT: (
"gpt_neox.layers.{bid}.attention.dense", # gptneox "gpt_neox.layers.{bid}.attention.dense", # gptneox
"transformer.h.{bid}.attn.c_proj", # gpt2 refact qwen jais "transformer.h.{bid}.attn.c_proj", # gpt2 refact qwen jais
"transformer.blocks.{bid}.attn.out_proj", # mpt "transformer.blocks.{bid}.attn.out_proj", # mpt
"transformer.h.{bid}.self_attention.dense", # falcon "transformer.h.{bid}.self_attention.dense", # falcon
"h.{bid}.self_attention.dense", # bloom "h.{bid}.self_attention.dense", # bloom
"model.layers.{bid}.self_attn.o_proj", # llama-hf "model.layers.{bid}.self_attn.o_proj", # llama-hf
"layers.{bid}.attention.wo", # llama-pth "layers.{bid}.attention.wo", # llama-pth
"encoder.layer.{bid}.attention.output.dense", # bert "encoder.layer.{bid}.attention.output.dense", # bert
"transformer.h.{bid}.attn.out_proj", # gpt-j "transformer.h.{bid}.attn.out_proj", # gpt-j
"language_model.encoder.layers.{bid}.self_attention.dense", # persimmon "language_model.encoder.layers.{bid}.self_attention.dense", # persimmon
"model.layers.{bid}.self_attn.dense", # persimmon "model.layers.{bid}.self_attn.dense", # persimmon
"h.{bid}.attn.c_proj", # gpt2 "h.{bid}.attn.c_proj", # gpt2
"transformer.h.{bid}.mixer.out_proj", # phi2 "transformer.h.{bid}.mixer.out_proj", # phi2
"model.layers.layers.{bid}.self_attn.o_proj", # plamo "model.layers.layers.{bid}.self_attn.o_proj", # plamo
"model.layers.{bid}.attention.wo", # internlm2 "model.layers.{bid}.attention.wo", # internlm2
"encoder.layers.{bid}.attn.out_proj", # nomic-bert "encoder.layers.{bid}.attn.out_proj", # nomic-bert
"transformer.decoder_layer.{bid}.multi_head_attention.linear", # Grok "transformer.decoder_layer.{bid}.multi_head_attention.linear", # Grok
"transformer.blocks.{bid}.norm_attn_norm.attn.out_proj", # dbrx "transformer.blocks.{bid}.norm_attn_norm.attn.out_proj", # dbrx
"encoder.layers.{bid}.self_attention.dense", # chatglm "encoder.layers.{bid}.self_attention.dense", # chatglm
"transformer.layers.{bid}.attn.out_proj", # openelm "transformer.layers.{bid}.attn.out_proj", # openelm
), ),
# Attention output norm # Attention output norm
MODEL_TENSOR.ATTN_OUT_NORM: ( MODEL_TENSOR.ATTN_OUT_NORM: (
"encoder.layer.{bid}.attention.output.LayerNorm", # bert "encoder.layer.{bid}.attention.output.LayerNorm", # bert
"encoder.layers.{bid}.norm1", # nomic-bert "encoder.layers.{bid}.norm1", # nomic-bert
"transformer.decoder_layer.{bid}.rms_norm_1", # Grok "transformer.decoder_layer.{bid}.rms_norm_1", # Grok
"transformer.blocks.{bid}.norm_attn_norm.norm_2", # dbrx "transformer.blocks.{bid}.norm_attn_norm.norm_2", # dbrx
), ),
MODEL_TENSOR.ATTN_POST_NORM: ( MODEL_TENSOR.ATTN_POST_NORM: (
"model.layers.{bid}.post_attention_layernorm", # gemma2 "model.layers.{bid}.post_attention_layernorm", # gemma2
), ),
# Rotary embeddings # Rotary embeddings
MODEL_TENSOR.ATTN_ROT_EMBD: ( MODEL_TENSOR.ATTN_ROT_EMBD: (
"model.layers.{bid}.self_attn.rotary_emb.inv_freq", # llama-hf "model.layers.{bid}.self_attn.rotary_emb.inv_freq", # llama-hf
"layers.{bid}.attention.inner_attention.rope.freqs", # llama-pth "layers.{bid}.attention.inner_attention.rope.freqs", # llama-pth
"model.layers.layers.{bid}.self_attn.rotary_emb.inv_freq", # plamo "model.layers.layers.{bid}.self_attn.rotary_emb.inv_freq", # plamo
"transformer.h.{bid}.attn.rotary_emb.inv_freq", # codeshell "transformer.h.{bid}.attn.rotary_emb.inv_freq", # codeshell
), ),
# Feed-forward norm # Feed-forward norm
MODEL_TENSOR.FFN_NORM: ( MODEL_TENSOR.FFN_NORM: (
"gpt_neox.layers.{bid}.post_attention_layernorm", # gptneox "gpt_neox.layers.{bid}.post_attention_layernorm", # gptneox
"transformer.h.{bid}.ln_2", # gpt2 refact qwen jais "transformer.h.{bid}.ln_2", # gpt2 refact qwen jais
"h.{bid}.post_attention_layernorm", # bloom "h.{bid}.post_attention_layernorm", # bloom
"transformer.blocks.{bid}.norm_2", # mpt "transformer.blocks.{bid}.norm_2", # mpt
"model.layers.{bid}.post_attention_layernorm", # llama-hf "model.layers.{bid}.post_attention_layernorm", # llama-hf
"layers.{bid}.ffn_norm", # llama-pth "layers.{bid}.ffn_norm", # llama-pth
"language_model.encoder.layers.{bid}.post_attention_layernorm", # persimmon "language_model.encoder.layers.{bid}.post_attention_layernorm", # persimmon
"model.layers.{bid}.ln2", # yi "model.layers.{bid}.ln2", # yi
"h.{bid}.ln_2", # gpt2 "h.{bid}.ln_2", # gpt2
"model.layers.{bid}.ffn_norm", # internlm2 "model.layers.{bid}.ffn_norm", # internlm2
"transformer.decoder_layer.{bid}.rms_norm_2", # Grok "transformer.decoder_layer.{bid}.rms_norm_2", # Grok
"encoder.layers.{bid}.post_attention_layernorm", # chatglm "encoder.layers.{bid}.post_attention_layernorm", # chatglm
"transformer.layers.{bid}.ffn_norm", # openelm "transformer.layers.{bid}.ffn_norm", # openelm
), ),
# Post feed-forward norm # Post feed-forward norm
MODEL_TENSOR.FFN_PRE_NORM: ( MODEL_TENSOR.FFN_PRE_NORM: (
"model.layers.{bid}.pre_feedforward_layernorm", # gemma2 "model.layers.{bid}.pre_feedforward_layernorm", # gemma2
), ),
# Post feed-forward norm # Post feed-forward norm
MODEL_TENSOR.FFN_POST_NORM: ( MODEL_TENSOR.FFN_POST_NORM: (
"model.layers.{bid}.post_feedforward_layernorm", # gemma2 "model.layers.{bid}.post_feedforward_layernorm", # gemma2
), ),
MODEL_TENSOR.FFN_GATE_INP: ( MODEL_TENSOR.FFN_GATE_INP: (
"layers.{bid}.feed_forward.gate", # mixtral "layers.{bid}.feed_forward.gate", # mixtral
"model.layers.{bid}.block_sparse_moe.gate", # mixtral "model.layers.{bid}.block_sparse_moe.gate", # mixtral
"model.layers.{bid}.mlp.gate", # qwen2moe "model.layers.{bid}.mlp.gate", # qwen2moe
"transformer.decoder_layer.{bid}.router", # Grok "transformer.decoder_layer.{bid}.router", # Grok
"transformer.blocks.{bid}.ffn.router.layer", # dbrx "transformer.blocks.{bid}.ffn.router.layer", # dbrx
), ),
MODEL_TENSOR.FFN_GATE_INP_SHEXP: ( MODEL_TENSOR.FFN_GATE_INP_SHEXP: (
"model.layers.{bid}.mlp.shared_expert_gate", # qwen2moe "model.layers.{bid}.mlp.shared_expert_gate", # qwen2moe
), ),
# Feed-forward up # Feed-forward up
MODEL_TENSOR.FFN_UP: ( MODEL_TENSOR.FFN_UP: (
"gpt_neox.layers.{bid}.mlp.dense_h_to_4h", # gptneox "gpt_neox.layers.{bid}.mlp.dense_h_to_4h", # gptneox
"transformer.h.{bid}.mlp.c_fc", # gpt2 jais "transformer.h.{bid}.mlp.c_fc", # gpt2 jais
"transformer.blocks.{bid}.ffn.up_proj", # mpt "transformer.blocks.{bid}.ffn.up_proj", # mpt
"transformer.h.{bid}.mlp.dense_h_to_4h", # falcon "transformer.h.{bid}.mlp.dense_h_to_4h", # falcon
"h.{bid}.mlp.dense_h_to_4h", # bloom "h.{bid}.mlp.dense_h_to_4h", # bloom
"model.layers.{bid}.mlp.up_proj", # llama-hf refact "model.layers.{bid}.mlp.up_proj", # llama-hf refact
"layers.{bid}.feed_forward.w3", # llama-pth "layers.{bid}.feed_forward.w3", # llama-pth
"encoder.layer.{bid}.intermediate.dense", # bert "encoder.layer.{bid}.intermediate.dense", # bert
"transformer.h.{bid}.mlp.fc_in", # gpt-j "transformer.h.{bid}.mlp.fc_in", # gpt-j
"transformer.h.{bid}.mlp.linear_3", # refact "transformer.h.{bid}.mlp.linear_3", # refact
"language_model.encoder.layers.{bid}.mlp.dense_h_to_4h", # persimmon "language_model.encoder.layers.{bid}.mlp.dense_h_to_4h", # persimmon
"model.layers.{bid}.mlp.dense_h_to_4h", # persimmon "model.layers.{bid}.mlp.dense_h_to_4h", # persimmon
"transformer.h.{bid}.mlp.w1", # qwen "transformer.h.{bid}.mlp.w1", # qwen
"h.{bid}.mlp.c_fc", # gpt2 "h.{bid}.mlp.c_fc", # gpt2
"transformer.h.{bid}.mlp.fc1", # phi2 "transformer.h.{bid}.mlp.fc1", # phi2
"model.layers.{bid}.mlp.fc1", # phi2 "model.layers.{bid}.mlp.fc1", # phi2
"model.layers.{bid}.mlp.gate_up_proj", # phi3 "model.layers.{bid}.mlp.gate_up_proj", # phi3
"model.layers.layers.{bid}.mlp.up_proj", # plamo "model.layers.layers.{bid}.mlp.up_proj", # plamo
"model.layers.{bid}.feed_forward.w3", # internlm2 "model.layers.{bid}.feed_forward.w3", # internlm2
"encoder.layers.{bid}.mlp.fc11", # nomic-bert "encoder.layers.{bid}.mlp.fc11", # nomic-bert
"model.layers.{bid}.mlp.c_fc", # starcoder2 "model.layers.{bid}.mlp.c_fc", # starcoder2
"encoder.layer.{bid}.mlp.gated_layers_v", # jina-bert-v2 "encoder.layer.{bid}.mlp.gated_layers_v", # jina-bert-v2
"model.layers.{bid}.residual_mlp.w3", # arctic "model.layers.{bid}.residual_mlp.w3", # arctic
"encoder.layers.{bid}.mlp.dense_h_to_4h", # chatglm "encoder.layers.{bid}.mlp.dense_h_to_4h", # chatglm
), ),
MODEL_TENSOR.FFN_UP_EXP: ( MODEL_TENSOR.FFN_UP_EXP: (
"layers.{bid}.feed_forward.experts.w3", # mixtral (merged) "layers.{bid}.feed_forward.experts.w3", # mixtral (merged)
"transformer.decoder_layer.{bid}.moe.linear_v", # Grok (merged) "transformer.decoder_layer.{bid}.moe.linear_v", # Grok (merged)
"transformer.blocks.{bid}.ffn.experts.mlp.v1", # dbrx "transformer.blocks.{bid}.ffn.experts.mlp.v1", # dbrx
"model.layers.{bid}.mlp.experts.up_proj", # qwen2moe (merged) "model.layers.{bid}.mlp.experts.up_proj", # qwen2moe (merged)
), ),
MODEL_TENSOR.FFN_UP_SHEXP: ( MODEL_TENSOR.FFN_UP_SHEXP: (
"model.layers.{bid}.mlp.shared_expert.up_proj", # qwen2moe "model.layers.{bid}.mlp.shared_expert.up_proj", # qwen2moe
"model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek2 "model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek2
), ),
# AWQ-activation gate # AWQ-activation gate
MODEL_TENSOR.FFN_ACT: ( MODEL_TENSOR.FFN_ACT: ("transformer.blocks.{bid}.ffn.act",), # mpt
"transformer.blocks.{bid}.ffn.act", # mpt
),
# Feed-forward gate # Feed-forward gate
MODEL_TENSOR.FFN_GATE: ( MODEL_TENSOR.FFN_GATE: (
"model.layers.{bid}.mlp.gate_proj", # llama-hf refact "model.layers.{bid}.mlp.gate_proj", # llama-hf refact
"layers.{bid}.feed_forward.w1", # llama-pth "layers.{bid}.feed_forward.w1", # llama-pth
"transformer.h.{bid}.mlp.w2", # qwen "transformer.h.{bid}.mlp.w2", # qwen
"transformer.h.{bid}.mlp.c_fc2", # jais "transformer.h.{bid}.mlp.c_fc2", # jais
"model.layers.layers.{bid}.mlp.gate_proj", # plamo "model.layers.layers.{bid}.mlp.gate_proj", # plamo
"model.layers.{bid}.feed_forward.w1", # internlm2 "model.layers.{bid}.feed_forward.w1", # internlm2
"encoder.layers.{bid}.mlp.fc12", # nomic-bert "encoder.layers.{bid}.mlp.fc12", # nomic-bert
"encoder.layer.{bid}.mlp.gated_layers_w", # jina-bert-v2 "encoder.layer.{bid}.mlp.gated_layers_w", # jina-bert-v2
"transformer.h.{bid}.mlp.linear_1", # refact "transformer.h.{bid}.mlp.linear_1", # refact
"model.layers.{bid}.residual_mlp.w1", # arctic "model.layers.{bid}.residual_mlp.w1", # arctic
), ),
MODEL_TENSOR.FFN_GATE_EXP: ( MODEL_TENSOR.FFN_GATE_EXP: (
"layers.{bid}.feed_forward.experts.w1", # mixtral (merged) "layers.{bid}.feed_forward.experts.w1", # mixtral (merged)
"transformer.decoder_layer.{bid}.moe.linear", # Grok (merged) "transformer.decoder_layer.{bid}.moe.linear", # Grok (merged)
"transformer.blocks.{bid}.ffn.experts.mlp.w1", # dbrx "transformer.blocks.{bid}.ffn.experts.mlp.w1", # dbrx
"model.layers.{bid}.mlp.experts.gate_proj", # qwen2moe (merged) "model.layers.{bid}.mlp.experts.gate_proj", # qwen2moe (merged)
), ),
MODEL_TENSOR.FFN_GATE_SHEXP: ( MODEL_TENSOR.FFN_GATE_SHEXP: (
"model.layers.{bid}.mlp.shared_expert.gate_proj", # qwen2moe "model.layers.{bid}.mlp.shared_expert.gate_proj", # qwen2moe
"model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek2 "model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek2
), ),
# Feed-forward down # Feed-forward down
MODEL_TENSOR.FFN_DOWN: ( MODEL_TENSOR.FFN_DOWN: (
"gpt_neox.layers.{bid}.mlp.dense_4h_to_h", # gptneox "gpt_neox.layers.{bid}.mlp.dense_4h_to_h", # gptneox
"transformer.h.{bid}.mlp.c_proj", # gpt2 refact qwen jais "transformer.h.{bid}.mlp.c_proj", # gpt2 refact qwen jais
"transformer.blocks.{bid}.ffn.down_proj", # mpt "transformer.blocks.{bid}.ffn.down_proj", # mpt
"transformer.h.{bid}.mlp.dense_4h_to_h", # falcon "transformer.h.{bid}.mlp.dense_4h_to_h", # falcon
"h.{bid}.mlp.dense_4h_to_h", # bloom "h.{bid}.mlp.dense_4h_to_h", # bloom
"model.layers.{bid}.mlp.down_proj", # llama-hf "model.layers.{bid}.mlp.down_proj", # llama-hf
"layers.{bid}.feed_forward.w2", # llama-pth "layers.{bid}.feed_forward.w2", # llama-pth
"encoder.layer.{bid}.output.dense", # bert "encoder.layer.{bid}.output.dense", # bert
"transformer.h.{bid}.mlp.fc_out", # gpt-j "transformer.h.{bid}.mlp.fc_out", # gpt-j
"language_model.encoder.layers.{bid}.mlp.dense_4h_to_h", # persimmon "language_model.encoder.layers.{bid}.mlp.dense_4h_to_h", # persimmon
"model.layers.{bid}.mlp.dense_4h_to_h", # persimmon "model.layers.{bid}.mlp.dense_4h_to_h", # persimmon
"h.{bid}.mlp.c_proj", # gpt2 "h.{bid}.mlp.c_proj", # gpt2
"transformer.h.{bid}.mlp.fc2", # phi2 "transformer.h.{bid}.mlp.fc2", # phi2
"model.layers.{bid}.mlp.fc2", # phi2 "model.layers.{bid}.mlp.fc2", # phi2
"model.layers.layers.{bid}.mlp.down_proj", # plamo "model.layers.layers.{bid}.mlp.down_proj", # plamo
"model.layers.{bid}.feed_forward.w2", # internlm2 "model.layers.{bid}.feed_forward.w2", # internlm2
"encoder.layers.{bid}.mlp.fc2", # nomic-bert "encoder.layers.{bid}.mlp.fc2", # nomic-bert
"model.layers.{bid}.mlp.c_proj", # starcoder2 "model.layers.{bid}.mlp.c_proj", # starcoder2
"encoder.layer.{bid}.mlp.wo", # jina-bert-v2 "encoder.layer.{bid}.mlp.wo", # jina-bert-v2
"transformer.layers.{bid}.ffn.proj_2", # openelm "transformer.layers.{bid}.ffn.proj_2", # openelm
"model.layers.{bid}.residual_mlp.w2", # arctic "model.layers.{bid}.residual_mlp.w2", # arctic
"encoder.layer.{bid}.mlp.down_layer", # jina-bert-v2 "encoder.layer.{bid}.mlp.down_layer", # jina-bert-v2
"encoder.layers.{bid}.mlp.dense_4h_to_h", # chatglm "encoder.layers.{bid}.mlp.dense_4h_to_h", # chatglm
), ),
MODEL_TENSOR.FFN_DOWN_EXP: ( MODEL_TENSOR.FFN_DOWN_EXP: (
"layers.{bid}.feed_forward.experts.w2", # mixtral (merged) "layers.{bid}.feed_forward.experts.w2", # mixtral (merged)
"transformer.decoder_layer.{bid}.moe.linear_1", # Grok (merged) "transformer.decoder_layer.{bid}.moe.linear_1", # Grok (merged)
"transformer.blocks.{bid}.ffn.experts.mlp.w2", # dbrx "transformer.blocks.{bid}.ffn.experts.mlp.w2", # dbrx
"model.layers.{bid}.mlp.experts.down_proj", # qwen2moe (merged) "model.layers.{bid}.mlp.experts.down_proj", # qwen2moe (merged)
), ),
MODEL_TENSOR.FFN_DOWN_SHEXP: ( MODEL_TENSOR.FFN_DOWN_SHEXP: (
"model.layers.{bid}.mlp.shared_expert.down_proj", # qwen2moe "model.layers.{bid}.mlp.shared_expert.down_proj", # qwen2moe
"model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek2 "model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek2
), ),
MODEL_TENSOR.ATTN_Q_NORM: ( MODEL_TENSOR.ATTN_Q_NORM: (
"language_model.encoder.layers.{bid}.self_attention.q_layernorm", "language_model.encoder.layers.{bid}.self_attention.q_layernorm",
"model.layers.{bid}.self_attn.q_layernorm", # persimmon "model.layers.{bid}.self_attn.q_layernorm", # persimmon
"model.layers.{bid}.self_attn.q_norm", # cohere "model.layers.{bid}.self_attn.q_norm", # cohere
"transformer.blocks.{bid}.attn.q_ln", # sea-lion "transformer.blocks.{bid}.attn.q_ln", # sea-lion
"encoder.layer.{bid}.attention.self.layer_norm_q", # jina-bert-v2 "encoder.layer.{bid}.attention.self.layer_norm_q", # jina-bert-v2
"transformer.layers.{bid}.attn.q_norm", # openelm "transformer.layers.{bid}.attn.q_norm", # openelm
), ),
MODEL_TENSOR.ATTN_K_NORM: ( MODEL_TENSOR.ATTN_K_NORM: (
"language_model.encoder.layers.{bid}.self_attention.k_layernorm", "language_model.encoder.layers.{bid}.self_attention.k_layernorm",
"model.layers.{bid}.self_attn.k_layernorm", # persimmon "model.layers.{bid}.self_attn.k_layernorm", # persimmon
"model.layers.{bid}.self_attn.k_norm", # cohere "model.layers.{bid}.self_attn.k_norm", # cohere
"transformer.blocks.{bid}.attn.k_ln", # sea-lion "transformer.blocks.{bid}.attn.k_ln", # sea-lion
"encoder.layer.{bid}.attention.self.layer_norm_k", # jina-bert-v2 "encoder.layer.{bid}.attention.self.layer_norm_k", # jina-bert-v2
"transformer.layers.{bid}.attn.k_norm", # openelm "transformer.layers.{bid}.attn.k_norm", # openelm
), ),
MODEL_TENSOR.ROPE_FREQS: ( MODEL_TENSOR.ROPE_FREQS: (
"language_model.encoder.layers.{bid}.self_attention.rotary_emb.inv_freq", # persimmon "language_model.encoder.layers.{bid}.self_attention.rotary_emb.inv_freq", # persimmon
), ),
MODEL_TENSOR.LAYER_OUT_NORM: ( MODEL_TENSOR.LAYER_OUT_NORM: (
"encoder.layer.{bid}.output.LayerNorm", # bert "encoder.layer.{bid}.output.LayerNorm", # bert
"encoder.layers.{bid}.norm2", # nomic-bert "encoder.layers.{bid}.norm2", # nomic-bert
"transformer.decoder_layer.{bid}.rms_norm_3", # Grok "transformer.decoder_layer.{bid}.rms_norm_3", # Grok
"encoder.layer.{bid}.mlp.layernorm", # jina-bert-v2 "encoder.layer.{bid}.mlp.layernorm", # jina-bert-v2
"encoder.layer.{bid}.layer_norm_2" # jina-v2-code "encoder.layer.{bid}.layer_norm_2", # jina-v2-code
), ),
MODEL_TENSOR.SSM_IN: ( MODEL_TENSOR.SSM_IN: (
"model.layers.{bid}.in_proj", "model.layers.{bid}.in_proj",
"backbone.layers.{bid}.mixer.in_proj", "backbone.layers.{bid}.mixer.in_proj",
), ),
MODEL_TENSOR.SSM_CONV1D: ( MODEL_TENSOR.SSM_CONV1D: (
"model.layers.{bid}.conv1d", "model.layers.{bid}.conv1d",
"backbone.layers.{bid}.mixer.conv1d", "backbone.layers.{bid}.mixer.conv1d",
), ),
MODEL_TENSOR.SSM_X: ( MODEL_TENSOR.SSM_X: (
"model.layers.{bid}.x_proj", "model.layers.{bid}.x_proj",
"backbone.layers.{bid}.mixer.x_proj", "backbone.layers.{bid}.mixer.x_proj",
), ),
MODEL_TENSOR.SSM_DT: ( MODEL_TENSOR.SSM_DT: (
"model.layers.{bid}.dt_proj", "model.layers.{bid}.dt_proj",
"backbone.layers.{bid}.mixer.dt_proj", "backbone.layers.{bid}.mixer.dt_proj",
), ),
MODEL_TENSOR.SSM_A: ( MODEL_TENSOR.SSM_A: (
"model.layers.{bid}.A_log", "model.layers.{bid}.A_log",
"backbone.layers.{bid}.mixer.A_log", "backbone.layers.{bid}.mixer.A_log",
), ),
MODEL_TENSOR.SSM_D: ( MODEL_TENSOR.SSM_D: (
"model.layers.{bid}.D", "model.layers.{bid}.D",
"backbone.layers.{bid}.mixer.D", "backbone.layers.{bid}.mixer.D",
), ),
MODEL_TENSOR.SSM_OUT: ( MODEL_TENSOR.SSM_OUT: (
"model.layers.{bid}.out_proj", "model.layers.{bid}.out_proj",
"backbone.layers.{bid}.mixer.out_proj", "backbone.layers.{bid}.mixer.out_proj",
), ),
MODEL_TENSOR.ATTN_Q_A: ("model.layers.{bid}.self_attn.q_a_proj",), # deepseek2
MODEL_TENSOR.ATTN_Q_A: ( MODEL_TENSOR.ATTN_Q_B: ("model.layers.{bid}.self_attn.q_b_proj",), # deepseek2
"model.layers.{bid}.self_attn.q_a_proj", # deepseek2
),
MODEL_TENSOR.ATTN_Q_B: (
"model.layers.{bid}.self_attn.q_b_proj", # deepseek2
),
MODEL_TENSOR.ATTN_KV_A_MQA: ( MODEL_TENSOR.ATTN_KV_A_MQA: (
"model.layers.{bid}.self_attn.kv_a_proj_with_mqa", # deepseek2 "model.layers.{bid}.self_attn.kv_a_proj_with_mqa", # deepseek2
), ),
MODEL_TENSOR.ATTN_KV_B: ( MODEL_TENSOR.ATTN_KV_B: (
"model.layers.{bid}.self_attn.kv_b_proj", # deepseek2 "model.layers.{bid}.self_attn.kv_b_proj", # deepseek2
), ),
MODEL_TENSOR.ATTN_Q_A_NORM: ( MODEL_TENSOR.ATTN_Q_A_NORM: (
"model.layers.{bid}.self_attn.q_a_layernorm", # deepseek2 "model.layers.{bid}.self_attn.q_a_layernorm", # deepseek2
), ),
MODEL_TENSOR.ATTN_KV_A_NORM: ( MODEL_TENSOR.ATTN_KV_A_NORM: (
"model.layers.{bid}.self_attn.kv_a_layernorm", # deepseek2 "model.layers.{bid}.self_attn.kv_a_layernorm", # deepseek2
), ),
MODEL_TENSOR.ATTN_SUB_NORM: ( MODEL_TENSOR.ATTN_SUB_NORM: (
"model.layers.{bid}.self_attn.inner_attn_ln", # bitnet "model.layers.{bid}.self_attn.inner_attn_ln", # bitnet
), ),
MODEL_TENSOR.FFN_SUB_NORM: ("model.layers.{bid}.mlp.ffn_layernorm",), # bitnet
MODEL_TENSOR.FFN_SUB_NORM: ( MODEL_TENSOR.DEC_ATTN_NORM: ("decoder.block.{bid}.layer.0.layer_norm",), # t5
"model.layers.{bid}.mlp.ffn_layernorm", # bitnet MODEL_TENSOR.DEC_ATTN_Q: ("decoder.block.{bid}.layer.0.SelfAttention.q",), # t5
), MODEL_TENSOR.DEC_ATTN_K: ("decoder.block.{bid}.layer.0.SelfAttention.k",), # t5
MODEL_TENSOR.DEC_ATTN_V: ("decoder.block.{bid}.layer.0.SelfAttention.v",), # t5
MODEL_TENSOR.DEC_ATTN_NORM: (
"decoder.block.{bid}.layer.0.layer_norm", # t5
),
MODEL_TENSOR.DEC_ATTN_Q: (
"decoder.block.{bid}.layer.0.SelfAttention.q", # t5
),
MODEL_TENSOR.DEC_ATTN_K: (
"decoder.block.{bid}.layer.0.SelfAttention.k", # t5
),
MODEL_TENSOR.DEC_ATTN_V: (
"decoder.block.{bid}.layer.0.SelfAttention.v", # t5
),
MODEL_TENSOR.DEC_ATTN_OUT: ( MODEL_TENSOR.DEC_ATTN_OUT: (
"decoder.block.{bid}.layer.0.SelfAttention.o", # t5 "decoder.block.{bid}.layer.0.SelfAttention.o", # t5
), ),
MODEL_TENSOR.DEC_ATTN_REL_B: ( MODEL_TENSOR.DEC_ATTN_REL_B: (
"decoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias", # t5 "decoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias", # t5
), ),
MODEL_TENSOR.DEC_CROSS_ATTN_NORM: ( MODEL_TENSOR.DEC_CROSS_ATTN_NORM: (
"decoder.block.{bid}.layer.1.layer_norm", # t5 "decoder.block.{bid}.layer.1.layer_norm", # t5
), ),
MODEL_TENSOR.DEC_CROSS_ATTN_Q: ( MODEL_TENSOR.DEC_CROSS_ATTN_Q: (
"decoder.block.{bid}.layer.1.EncDecAttention.q", # t5 "decoder.block.{bid}.layer.1.EncDecAttention.q", # t5
), ),
MODEL_TENSOR.DEC_CROSS_ATTN_K: ( MODEL_TENSOR.DEC_CROSS_ATTN_K: (
"decoder.block.{bid}.layer.1.EncDecAttention.k", # t5 "decoder.block.{bid}.layer.1.EncDecAttention.k", # t5
), ),
MODEL_TENSOR.DEC_CROSS_ATTN_V: ( MODEL_TENSOR.DEC_CROSS_ATTN_V: (
"decoder.block.{bid}.layer.1.EncDecAttention.v", # t5 "decoder.block.{bid}.layer.1.EncDecAttention.v", # t5
), ),
MODEL_TENSOR.DEC_CROSS_ATTN_OUT: ( MODEL_TENSOR.DEC_CROSS_ATTN_OUT: (
"decoder.block.{bid}.layer.1.EncDecAttention.o", # t5 "decoder.block.{bid}.layer.1.EncDecAttention.o", # t5
), ),
MODEL_TENSOR.DEC_CROSS_ATTN_REL_B: ( MODEL_TENSOR.DEC_CROSS_ATTN_REL_B: (
"decoder.block.{bid}.layer.1.EncDecAttention.relative_attention_bias", # t5 "decoder.block.{bid}.layer.1.EncDecAttention.relative_attention_bias", # t5
), ),
MODEL_TENSOR.DEC_FFN_NORM: ("decoder.block.{bid}.layer.2.layer_norm",), # t5
MODEL_TENSOR.DEC_FFN_NORM: (
"decoder.block.{bid}.layer.2.layer_norm", # t5
),
MODEL_TENSOR.DEC_FFN_GATE: ( MODEL_TENSOR.DEC_FFN_GATE: (
"decoder.block.{bid}.layer.2.DenseReluDense.wi_0", # flan-t5 "decoder.block.{bid}.layer.2.DenseReluDense.wi_0", # flan-t5
), ),
MODEL_TENSOR.DEC_FFN_UP: ( MODEL_TENSOR.DEC_FFN_UP: (
"decoder.block.{bid}.layer.2.DenseReluDense.wi", # t5 "decoder.block.{bid}.layer.2.DenseReluDense.wi", # t5
"decoder.block.{bid}.layer.2.DenseReluDense.wi_1", # flan-t5 "decoder.block.{bid}.layer.2.DenseReluDense.wi_1", # flan-t5
), ),
MODEL_TENSOR.DEC_FFN_DOWN: ( MODEL_TENSOR.DEC_FFN_DOWN: (
"decoder.block.{bid}.layer.2.DenseReluDense.wo", # t5 "decoder.block.{bid}.layer.2.DenseReluDense.wo", # t5
), ),
MODEL_TENSOR.DEC_OUTPUT_NORM: ("decoder.final_layer_norm",), # t5
MODEL_TENSOR.DEC_OUTPUT_NORM: ( MODEL_TENSOR.ENC_ATTN_NORM: ("encoder.block.{bid}.layer.0.layer_norm",), # t5
"decoder.final_layer_norm", # t5 MODEL_TENSOR.ENC_ATTN_Q: ("encoder.block.{bid}.layer.0.SelfAttention.q",), # t5
), MODEL_TENSOR.ENC_ATTN_K: ("encoder.block.{bid}.layer.0.SelfAttention.k",), # t5
MODEL_TENSOR.ENC_ATTN_V: ("encoder.block.{bid}.layer.0.SelfAttention.v",), # t5
MODEL_TENSOR.ENC_ATTN_NORM: (
"encoder.block.{bid}.layer.0.layer_norm", # t5
),
MODEL_TENSOR.ENC_ATTN_Q: (
"encoder.block.{bid}.layer.0.SelfAttention.q", # t5
),
MODEL_TENSOR.ENC_ATTN_K: (
"encoder.block.{bid}.layer.0.SelfAttention.k", # t5
),
MODEL_TENSOR.ENC_ATTN_V: (
"encoder.block.{bid}.layer.0.SelfAttention.v", # t5
),
MODEL_TENSOR.ENC_ATTN_OUT: ( MODEL_TENSOR.ENC_ATTN_OUT: (
"encoder.block.{bid}.layer.0.SelfAttention.o", # t5 "encoder.block.{bid}.layer.0.SelfAttention.o", # t5
), ),
MODEL_TENSOR.ENC_ATTN_REL_B: ( MODEL_TENSOR.ENC_ATTN_REL_B: (
"encoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias", # t5 "encoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias", # t5
), ),
MODEL_TENSOR.ENC_FFN_NORM: ("encoder.block.{bid}.layer.1.layer_norm",), # t5
MODEL_TENSOR.ENC_FFN_NORM: (
"encoder.block.{bid}.layer.1.layer_norm", # t5
),
MODEL_TENSOR.ENC_FFN_GATE: ( MODEL_TENSOR.ENC_FFN_GATE: (
"encoder.block.{bid}.layer.1.DenseReluDense.wi_0", # flan-t5 "encoder.block.{bid}.layer.1.DenseReluDense.wi_0", # flan-t5
), ),
MODEL_TENSOR.ENC_FFN_UP: ( MODEL_TENSOR.ENC_FFN_UP: (
"encoder.block.{bid}.layer.1.DenseReluDense.wi", # t5 "encoder.block.{bid}.layer.1.DenseReluDense.wi", # t5
"encoder.block.{bid}.layer.1.DenseReluDense.wi_1", # flan-t5 "encoder.block.{bid}.layer.1.DenseReluDense.wi_1", # flan-t5
), ),
MODEL_TENSOR.ENC_FFN_DOWN: ( MODEL_TENSOR.ENC_FFN_DOWN: (
"encoder.block.{bid}.layer.1.DenseReluDense.wo", # t5 "encoder.block.{bid}.layer.1.DenseReluDense.wo", # t5
),
MODEL_TENSOR.ENC_OUTPUT_NORM: (
"encoder.final_layer_norm", # t5
), ),
MODEL_TENSOR.ENC_OUTPUT_NORM: ("encoder.final_layer_norm",), # t5
} }
# architecture-specific block mappings # architecture-specific block mappings
arch_block_mappings_cfg: dict[MODEL_ARCH, dict[MODEL_TENSOR, tuple[str, ...]]] = { arch_block_mappings_cfg: dict[MODEL_ARCH, dict[MODEL_TENSOR, tuple[str, ...]]] = {
MODEL_ARCH.ARCTIC: { MODEL_ARCH.ARCTIC: {
MODEL_TENSOR.FFN_NORM: ( MODEL_TENSOR.FFN_NORM: ("model.layers.{bid}.residual_layernorm",),
"model.layers.{bid}.residual_layernorm", MODEL_TENSOR.FFN_NORM_EXP: ("model.layers.{bid}.post_attention_layernorm",),
),
MODEL_TENSOR.FFN_NORM_EXP: (
"model.layers.{bid}.post_attention_layernorm",
),
}, },
} }
@ -603,31 +490,35 @@ def __init__(self, arch: MODEL_ARCH, n_blocks: int):
if tensor not in MODEL_TENSORS[arch]: if tensor not in MODEL_TENSORS[arch]:
continue continue
tensor_name = TENSOR_NAMES[tensor].format(bid = bid) tensor_name = TENSOR_NAMES[tensor].format(bid=bid)
self.mapping[tensor_name] = (tensor, tensor_name) self.mapping[tensor_name] = (tensor, tensor_name)
for key in keys: for key in keys:
key = key.format(bid = bid) key = key.format(bid=bid)
self.mapping[key] = (tensor, tensor_name) self.mapping[key] = (tensor, tensor_name)
def get_type_and_name(self, key: str, try_suffixes: Sequence[str] = ()) -> tuple[MODEL_TENSOR, str] | None: def get_type_and_name(
self, key: str, try_suffixes: Sequence[str] = ()
) -> tuple[MODEL_TENSOR, str] | None:
result = self.mapping.get(key) result = self.mapping.get(key)
if result is not None: if result is not None:
return result return result
for suffix in try_suffixes: for suffix in try_suffixes:
if key.endswith(suffix): if key.endswith(suffix):
result = self.mapping.get(key[:-len(suffix)]) result = self.mapping.get(key[: -len(suffix)])
if result is not None: if result is not None:
return result[0], result[1] + suffix return result[0], result[1] + suffix
return None return None
def get_name(self, key: str, try_suffixes: Sequence[str] = ()) -> str | None: def get_name(self, key: str, try_suffixes: Sequence[str] = ()) -> str | None:
result = self.get_type_and_name(key, try_suffixes = try_suffixes) result = self.get_type_and_name(key, try_suffixes=try_suffixes)
if result is None: if result is None:
return None return None
return result[1] return result[1]
def get_type(self, key: str, try_suffixes: Sequence[str] = ()) -> MODEL_TENSOR | None: def get_type(
result = self.get_type_and_name(key, try_suffixes = try_suffixes) self, key: str, try_suffixes: Sequence[str] = ()
) -> MODEL_TENSOR | None:
result = self.get_type_and_name(key, try_suffixes=try_suffixes)
if result is None: if result is None:
return None return None
return result[0] return result[0]

View File

@ -7,21 +7,27 @@ def fill_templated_filename(filename: str, output_type: str | None) -> str:
# Given a file name fill in any type templates e.g. 'some-model-name.{ftype}.gguf' # Given a file name fill in any type templates e.g. 'some-model-name.{ftype}.gguf'
ftype_lowercase: str = output_type.lower() if output_type is not None else "" ftype_lowercase: str = output_type.lower() if output_type is not None else ""
ftype_uppercase: str = output_type.upper() if output_type is not None else "" ftype_uppercase: str = output_type.upper() if output_type is not None else ""
return filename.format(ftype_lowercase, return filename.format(
outtype=ftype_lowercase, ftype=ftype_lowercase, ftype_lowercase,
OUTTYPE=ftype_uppercase, FTYPE=ftype_uppercase) outtype=ftype_lowercase,
ftype=ftype_lowercase,
OUTTYPE=ftype_uppercase,
FTYPE=ftype_uppercase,
)
def model_weight_count_rounded_notation(model_params_count: int, min_digits: int = 2) -> str: def model_weight_count_rounded_notation(
if model_params_count > 1e12 : model_params_count: int, min_digits: int = 2
) -> str:
if model_params_count > 1e12:
# Trillions Of Parameters # Trillions Of Parameters
scaled_model_params = model_params_count * 1e-12 scaled_model_params = model_params_count * 1e-12
scale_suffix = "T" scale_suffix = "T"
elif model_params_count > 1e9 : elif model_params_count > 1e9:
# Billions Of Parameters # Billions Of Parameters
scaled_model_params = model_params_count * 1e-9 scaled_model_params = model_params_count * 1e-9
scale_suffix = "B" scale_suffix = "B"
elif model_params_count > 1e6 : elif model_params_count > 1e6:
# Millions Of Parameters # Millions Of Parameters
scaled_model_params = model_params_count * 1e-6 scaled_model_params = model_params_count * 1e-6
scale_suffix = "M" scale_suffix = "M"
@ -30,39 +36,65 @@ def model_weight_count_rounded_notation(model_params_count: int, min_digits: int
scaled_model_params = model_params_count * 1e-3 scaled_model_params = model_params_count * 1e-3
scale_suffix = "K" scale_suffix = "K"
fix = max(min_digits - len(str(round(scaled_model_params)).lstrip('0')), 0) fix = max(min_digits - len(str(round(scaled_model_params)).lstrip("0")), 0)
return f"{scaled_model_params:.{fix}f}{scale_suffix}" return f"{scaled_model_params:.{fix}f}{scale_suffix}"
def size_label(total_params: int, shared_params: int, expert_params: int, expert_count: int) -> str: def size_label(
total_params: int, shared_params: int, expert_params: int, expert_count: int
) -> str:
if expert_count > 0: if expert_count > 0:
pretty_size = model_weight_count_rounded_notation(abs(shared_params) + abs(expert_params), min_digits=2) pretty_size = model_weight_count_rounded_notation(
abs(shared_params) + abs(expert_params), min_digits=2
)
size_class = f"{expert_count}x{pretty_size}" size_class = f"{expert_count}x{pretty_size}"
else: else:
size_class = model_weight_count_rounded_notation(abs(total_params), min_digits=2) size_class = model_weight_count_rounded_notation(
abs(total_params), min_digits=2
)
return size_class return size_class
def naming_convention(model_name: str | None, base_name: str | None, finetune_string: str | None, version_string: str | None, size_label: str | None, output_type: str | None, model_type: Literal['vocab', 'LoRA'] | None = None) -> str: def naming_convention(
model_name: str | None,
base_name: str | None,
finetune_string: str | None,
version_string: str | None,
size_label: str | None,
output_type: str | None,
model_type: Literal["vocab", "LoRA"] | None = None,
) -> str:
# Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention # Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention
if base_name is not None: if base_name is not None:
name = base_name.strip().replace(' ', '-').replace('/', '-') name = base_name.strip().replace(" ", "-").replace("/", "-")
elif model_name is not None: elif model_name is not None:
name = model_name.strip().replace(' ', '-').replace('/', '-') name = model_name.strip().replace(" ", "-").replace("/", "-")
else: else:
name = "ggml-model" name = "ggml-model"
parameters = f"-{size_label}" if size_label is not None else "" parameters = f"-{size_label}" if size_label is not None else ""
finetune = f"-{finetune_string.strip().replace(' ', '-')}" if finetune_string is not None else "" finetune = (
f"-{finetune_string.strip().replace(' ', '-')}"
if finetune_string is not None
else ""
)
version = f"-{version_string.strip().replace(' ', '-')}" if version_string is not None else "" version = (
f"-{version_string.strip().replace(' ', '-')}"
if version_string is not None
else ""
)
encoding = f"-{output_type.strip().replace(' ', '-').upper()}" if output_type is not None else "" encoding = (
f"-{output_type.strip().replace(' ', '-').upper()}"
if output_type is not None
else ""
)
kind = f"-{model_type.strip().replace(' ', '-')}" if model_type is not None else "" kind = f"-{model_type.strip().replace(' ', '-')}" if model_type is not None else ""

View File

@ -5,7 +5,16 @@
import json import json
import os import os
from pathlib import Path from pathlib import Path
from typing import Any, Callable, Sequence, Mapping, Iterable, Protocol, ClassVar, runtime_checkable from typing import (
Any,
Callable,
Sequence,
Mapping,
Iterable,
Protocol,
ClassVar,
runtime_checkable,
)
from sentencepiece import SentencePieceProcessor from sentencepiece import SentencePieceProcessor
@ -23,7 +32,9 @@ class SpecialVocab:
chat_template: str | Sequence[Mapping[str, str]] | None chat_template: str | Sequence[Mapping[str, str]] | None
def __init__( def __init__(
self, path: str | os.PathLike[str], load_merges: bool = False, self,
path: str | os.PathLike[str],
load_merges: bool = False,
special_token_types: Iterable[str] | None = None, special_token_types: Iterable[str] | None = None,
n_vocab: int | None = None, n_vocab: int | None = None,
): ):
@ -36,40 +47,60 @@ def __init__(
if special_token_types is not None: if special_token_types is not None:
self.special_token_types = special_token_types self.special_token_types = special_token_types
else: else:
self.special_token_types = ('bos', 'eos', 'unk', 'sep', 'pad', 'cls', 'mask') self.special_token_types = (
"bos",
"eos",
"unk",
"sep",
"pad",
"cls",
"mask",
)
self._load(Path(path)) self._load(Path(path))
def __repr__(self) -> str: def __repr__(self) -> str:
return '<SpecialVocab with {} merges, special tokens {}, add special tokens {}>'.format( return "<SpecialVocab with {} merges, special tokens {}, add special tokens {}>".format(
len(self.merges), self.special_token_ids or "unset", self.add_special_token or "unset", len(self.merges),
self.special_token_ids or "unset",
self.add_special_token or "unset",
) )
def add_to_gguf(self, gw: GGUFWriter, quiet: bool = False) -> None: def add_to_gguf(self, gw: GGUFWriter, quiet: bool = False) -> None:
if self.merges: if self.merges:
if not quiet: if not quiet:
logger.info(f'Adding {len(self.merges)} merge(s).') logger.info(f"Adding {len(self.merges)} merge(s).")
gw.add_token_merges(self.merges) gw.add_token_merges(self.merges)
elif self.load_merges: elif self.load_merges:
logger.warning('Adding merges requested but no merges found, output may be non-functional.') logger.warning(
"Adding merges requested but no merges found, output may be non-functional."
)
for typ, tokid in self.special_token_ids.items(): for typ, tokid in self.special_token_ids.items():
id_handler: Callable[[int], None] | None = getattr(gw, f'add_{typ}_token_id', None) id_handler: Callable[[int], None] | None = getattr(
gw, f"add_{typ}_token_id", None
)
if id_handler is None: if id_handler is None:
logger.warning(f'No handler for special token type {typ} with id {tokid} - skipping') logger.warning(
f"No handler for special token type {typ} with id {tokid} - skipping"
)
continue continue
if not quiet: if not quiet:
logger.info(f'Setting special token type {typ} to {tokid}') logger.info(f"Setting special token type {typ} to {tokid}")
id_handler(tokid) id_handler(tokid)
for typ, value in self.add_special_token.items(): for typ, value in self.add_special_token.items():
add_handler: Callable[[bool], None] | None = getattr(gw, f'add_add_{typ}_token', None) add_handler: Callable[[bool], None] | None = getattr(
gw, f"add_add_{typ}_token", None
)
if add_handler is None: if add_handler is None:
logger.warning(f'No handler for add_{typ}_token with value {value} - skipping') logger.warning(
f"No handler for add_{typ}_token with value {value} - skipping"
)
continue continue
if not quiet: if not quiet:
logger.info(f'Setting add_{typ}_token to {value}') logger.info(f"Setting add_{typ}_token to {value}")
add_handler(value) add_handler(value)
if self.chat_template is not None: if self.chat_template is not None:
if not quiet: if not quiet:
logger.info(f'Setting chat_template to {self.chat_template}') logger.info(f"Setting chat_template to {self.chat_template}")
gw.add_chat_template(self.chat_template) gw.add_chat_template(self.chat_template)
def _load(self, path: Path) -> None: def _load(self, path: Path) -> None:
@ -79,12 +110,12 @@ def _load(self, path: Path) -> None:
self._try_load_merges_txt(path) self._try_load_merges_txt(path)
def _try_load_merges_txt(self, path: Path) -> bool: def _try_load_merges_txt(self, path: Path) -> bool:
merges_file = path / 'merges.txt' merges_file = path / "merges.txt"
if not merges_file.is_file(): if not merges_file.is_file():
return False return False
with open(merges_file, 'r', encoding = 'utf-8') as fp: with open(merges_file, "r", encoding="utf-8") as fp:
first_line = next(fp, '').strip() first_line = next(fp, "").strip()
if not first_line.startswith('#'): if not first_line.startswith("#"):
fp.seek(0) fp.seek(0)
line_num = 0 line_num = 0
else: else:
@ -97,9 +128,11 @@ def _try_load_merges_txt(self, path: Path) -> bool:
continue continue
parts = line.split(None, 3) parts = line.split(None, 3)
if len(parts) != 2: if len(parts) != 2:
logger.warning(f'{merges_file.name}: Line {line_num}: Entry malformed, ignoring') logger.warning(
f"{merges_file.name}: Line {line_num}: Entry malformed, ignoring"
)
continue continue
merges.append(f'{parts[0]} {parts[1]}') merges.append(f"{parts[0]} {parts[1]}")
self.merges = merges self.merges = merges
return True return True
@ -107,45 +140,49 @@ def _set_special_token(self, typ: str, tid: Any) -> None:
if not isinstance(tid, int): if not isinstance(tid, int):
return return
if tid < 0: if tid < 0:
raise ValueError(f'invalid value for special token type {typ}: {tid}') raise ValueError(f"invalid value for special token type {typ}: {tid}")
if self.n_vocab is None or tid < self.n_vocab: if self.n_vocab is None or tid < self.n_vocab:
if typ in self.special_token_ids: if typ in self.special_token_ids:
return return
self.special_token_ids[typ] = tid self.special_token_ids[typ] = tid
return return
logger.warning(f'Special token type {typ}, id {tid} out of range, must be under {self.n_vocab} - skipping') logger.warning(
f"Special token type {typ}, id {tid} out of range, must be under {self.n_vocab} - skipping"
)
def _try_load_from_tokenizer_json(self, path: Path) -> bool: def _try_load_from_tokenizer_json(self, path: Path) -> bool:
tokenizer_file = path / 'tokenizer.json' tokenizer_file = path / "tokenizer.json"
if tokenizer_file.is_file(): if tokenizer_file.is_file():
with open(tokenizer_file, encoding = 'utf-8') as f: with open(tokenizer_file, encoding="utf-8") as f:
tokenizer = json.load(f) tokenizer = json.load(f)
if self.load_merges: if self.load_merges:
merges = tokenizer.get('model', {}).get('merges') merges = tokenizer.get("model", {}).get("merges")
if isinstance(merges, list) and merges and isinstance(merges[0], str): if isinstance(merges, list) and merges and isinstance(merges[0], str):
self.merges = merges self.merges = merges
added_tokens = tokenizer.get('added_tokens', {}) added_tokens = tokenizer.get("added_tokens", {})
else: else:
added_tokens = {} added_tokens = {}
tokenizer_config_file = path / 'tokenizer_config.json' tokenizer_config_file = path / "tokenizer_config.json"
if not tokenizer_config_file.is_file(): if not tokenizer_config_file.is_file():
return True return True
with open(tokenizer_config_file, encoding = 'utf-8') as f: with open(tokenizer_config_file, encoding="utf-8") as f:
tokenizer_config = json.load(f) tokenizer_config = json.load(f)
chat_template = tokenizer_config.get('chat_template') chat_template = tokenizer_config.get("chat_template")
if chat_template is None or isinstance(chat_template, (str, list)): if chat_template is None or isinstance(chat_template, (str, list)):
self.chat_template = chat_template self.chat_template = chat_template
else: else:
logger.warning(f'Bad type for chat_template field in {tokenizer_config_file!r} - ignoring') logger.warning(
f"Bad type for chat_template field in {tokenizer_config_file!r} - ignoring"
)
for typ in self.special_token_types: for typ in self.special_token_types:
add_entry = tokenizer_config.get(f'add_{typ}_token') add_entry = tokenizer_config.get(f"add_{typ}_token")
if isinstance(add_entry, bool): if isinstance(add_entry, bool):
self.add_special_token[typ] = add_entry self.add_special_token[typ] = add_entry
entry = tokenizer_config.get(f'{typ}_token') entry = tokenizer_config.get(f"{typ}_token")
if isinstance(entry, str): if isinstance(entry, str):
tc_content = entry tc_content = entry
elif isinstance(entry, dict): elif isinstance(entry, dict):
entry_content = entry.get('content') entry_content = entry.get("content")
if not isinstance(entry_content, str): if not isinstance(entry_content, str):
continue continue
tc_content = entry_content tc_content = entry_content
@ -153,20 +190,24 @@ def _try_load_from_tokenizer_json(self, path: Path) -> bool:
continue continue
# We only need the first match here. # We only need the first match here.
maybe_token_id = next( maybe_token_id = next(
(atok.get('id') for atok in added_tokens if atok.get('content') == tc_content), (
atok.get("id")
for atok in added_tokens
if atok.get("content") == tc_content
),
None, None,
) )
self._set_special_token(typ, maybe_token_id) self._set_special_token(typ, maybe_token_id)
return True return True
def _try_load_from_config_json(self, path: Path) -> bool: def _try_load_from_config_json(self, path: Path) -> bool:
config_file = path / 'config.json' config_file = path / "config.json"
if not config_file.is_file(): if not config_file.is_file():
return False return False
with open(config_file, encoding = 'utf-8') as f: with open(config_file, encoding="utf-8") as f:
config = json.load(f) config = json.load(f)
for typ in self.special_token_types: for typ in self.special_token_types:
self._set_special_token(typ, config.get(f'{typ}_token_id')) self._set_special_token(typ, config.get(f"{typ}_token_id"))
return True return True
@ -202,54 +243,59 @@ class BpeVocab(Vocab):
def __init__(self, base_path: Path): def __init__(self, base_path: Path):
added_tokens: dict[str, int] = {} added_tokens: dict[str, int] = {}
if (fname_tokenizer := base_path / 'vocab.json').exists(): if (fname_tokenizer := base_path / "vocab.json").exists():
# "slow" tokenizer # "slow" tokenizer
with open(fname_tokenizer, encoding="utf-8") as f: with open(fname_tokenizer, encoding="utf-8") as f:
self.vocab = json.load(f) self.vocab = json.load(f)
try: try:
# FIXME: Verify that added tokens here _cannot_ overlap with the main vocab. # FIXME: Verify that added tokens here _cannot_ overlap with the main vocab.
with open(base_path / 'added_tokens.json', encoding="utf-8") as f: with open(base_path / "added_tokens.json", encoding="utf-8") as f:
added_tokens = json.load(f) added_tokens = json.load(f)
except FileNotFoundError: except FileNotFoundError:
pass pass
else: else:
# "fast" tokenizer # "fast" tokenizer
fname_tokenizer = base_path / 'tokenizer.json' fname_tokenizer = base_path / "tokenizer.json"
# if this fails, FileNotFoundError propagates to caller # if this fails, FileNotFoundError propagates to caller
with open(fname_tokenizer, encoding="utf-8") as f: with open(fname_tokenizer, encoding="utf-8") as f:
tokenizer_json = json.load(f) tokenizer_json = json.load(f)
tokenizer_model: dict[str, Any] = tokenizer_json['model'] tokenizer_model: dict[str, Any] = tokenizer_json["model"]
if ( if (
tokenizer_model['type'] != 'BPE' or tokenizer_model.get('byte_fallback', False) tokenizer_model["type"] != "BPE"
or tokenizer_json['decoder']['type'] != 'ByteLevel' or tokenizer_model.get("byte_fallback", False)
or tokenizer_json["decoder"]["type"] != "ByteLevel"
): ):
raise FileNotFoundError('Cannot find GPT-2 BPE tokenizer') raise FileNotFoundError("Cannot find GPT-2 BPE tokenizer")
self.vocab = tokenizer_model["vocab"] self.vocab = tokenizer_model["vocab"]
if (added := tokenizer_json.get('added_tokens')) is not None: if (added := tokenizer_json.get("added_tokens")) is not None:
# Added tokens here can be duplicates of the main vocabulary. # Added tokens here can be duplicates of the main vocabulary.
added_tokens = {item['content']: item['id'] added_tokens = {
for item in added item["content"]: item["id"]
if item['content'] not in self.vocab} for item in added
if item["content"] not in self.vocab
}
vocab_size = len(self.vocab) vocab_size = len(self.vocab)
expected_ids = list(range(vocab_size, vocab_size + len(added_tokens))) expected_ids = list(range(vocab_size, vocab_size + len(added_tokens)))
actual_ids = sorted(added_tokens.values()) actual_ids = sorted(added_tokens.values())
if expected_ids != actual_ids: if expected_ids != actual_ids:
expected_end_id = vocab_size + len(actual_ids) - 1 expected_end_id = vocab_size + len(actual_ids) - 1
raise ValueError(f"Expected the {len(actual_ids)} added token ID(s) to be sequential in the range " raise ValueError(
f"{vocab_size} - {expected_end_id}; got {actual_ids}") f"Expected the {len(actual_ids)} added token ID(s) to be sequential in the range "
f"{vocab_size} - {expected_end_id}; got {actual_ids}"
)
items = sorted(added_tokens.items(), key=lambda text_idx: text_idx[1]) items = sorted(added_tokens.items(), key=lambda text_idx: text_idx[1])
self.added_tokens_dict = added_tokens self.added_tokens_dict = added_tokens
self.added_tokens_list = [text for (text, idx) in items] self.added_tokens_list = [text for (text, idx) in items]
self.vocab_size_base = vocab_size self.vocab_size_base = vocab_size
self.vocab_size = self.vocab_size_base + len(self.added_tokens_list) self.vocab_size = self.vocab_size_base + len(self.added_tokens_list)
self.fname_tokenizer = fname_tokenizer self.fname_tokenizer = fname_tokenizer
def bpe_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]: def bpe_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
reverse_vocab = {id: encoded_tok for encoded_tok, id in self.vocab.items()} reverse_vocab = {id: encoded_tok for encoded_tok, id in self.vocab.items()}
@ -276,40 +322,44 @@ class SentencePieceVocab(Vocab):
def __init__(self, base_path: Path): def __init__(self, base_path: Path):
added_tokens: dict[str, int] = {} added_tokens: dict[str, int] = {}
if (fname_tokenizer := base_path / 'tokenizer.model').exists(): if (fname_tokenizer := base_path / "tokenizer.model").exists():
# normal location # normal location
try: try:
with open(base_path / 'added_tokens.json', encoding="utf-8") as f: with open(base_path / "added_tokens.json", encoding="utf-8") as f:
added_tokens = json.load(f) added_tokens = json.load(f)
except FileNotFoundError: except FileNotFoundError:
pass pass
elif not (fname_tokenizer := base_path.parent / 'tokenizer.model').exists(): elif not (fname_tokenizer := base_path.parent / "tokenizer.model").exists():
# not found in alternate location either # not found in alternate location either
raise FileNotFoundError('Cannot find tokenizer.model') raise FileNotFoundError("Cannot find tokenizer.model")
self.sentencepiece_tokenizer = SentencePieceProcessor() self.sentencepiece_tokenizer = SentencePieceProcessor()
self.sentencepiece_tokenizer.LoadFromFile(str(fname_tokenizer)) self.sentencepiece_tokenizer.LoadFromFile(str(fname_tokenizer))
vocab_size = self.sentencepiece_tokenizer.vocab_size() vocab_size = self.sentencepiece_tokenizer.vocab_size()
new_tokens = {id: piece for piece, id in added_tokens.items() if id >= vocab_size} new_tokens = {
id: piece for piece, id in added_tokens.items() if id >= vocab_size
}
expected_new_ids = list(range(vocab_size, vocab_size + len(new_tokens))) expected_new_ids = list(range(vocab_size, vocab_size + len(new_tokens)))
actual_new_ids = sorted(new_tokens.keys()) actual_new_ids = sorted(new_tokens.keys())
if expected_new_ids != actual_new_ids: if expected_new_ids != actual_new_ids:
raise ValueError(f"Expected new token IDs {expected_new_ids} to be sequential; got {actual_new_ids}") raise ValueError(
f"Expected new token IDs {expected_new_ids} to be sequential; got {actual_new_ids}"
)
# Token pieces that were added to the base vocabulary. # Token pieces that were added to the base vocabulary.
self.added_tokens_dict = added_tokens self.added_tokens_dict = added_tokens
self.added_tokens_list = [new_tokens[id] for id in actual_new_ids] self.added_tokens_list = [new_tokens[id] for id in actual_new_ids]
self.vocab_size_base = vocab_size self.vocab_size_base = vocab_size
self.vocab_size = self.vocab_size_base + len(self.added_tokens_list) self.vocab_size = self.vocab_size_base + len(self.added_tokens_list)
self.fname_tokenizer = fname_tokenizer self.fname_tokenizer = fname_tokenizer
def sentencepiece_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]: def sentencepiece_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
tokenizer = self.sentencepiece_tokenizer tokenizer = self.sentencepiece_tokenizer
for i in range(tokenizer.vocab_size()): for i in range(tokenizer.vocab_size()):
piece = tokenizer.IdToPiece(i) piece = tokenizer.IdToPiece(i)
text = piece.encode("utf-8") text = piece.encode("utf-8")
score: float = tokenizer.GetScore(i) score: float = tokenizer.GetScore(i)
toktype = gguf.TokenType.NORMAL toktype = gguf.TokenType.NORMAL
@ -347,25 +397,27 @@ class LlamaHfVocab(Vocab):
name = "hfft" name = "hfft"
def __init__(self, base_path: Path): def __init__(self, base_path: Path):
fname_tokenizer = base_path / 'tokenizer.json' fname_tokenizer = base_path / "tokenizer.json"
# if this fails, FileNotFoundError propagates to caller # if this fails, FileNotFoundError propagates to caller
with open(fname_tokenizer, encoding='utf-8') as f: with open(fname_tokenizer, encoding="utf-8") as f:
tokenizer_json = json.load(f) tokenizer_json = json.load(f)
# pre-check so we know if we need transformers # pre-check so we know if we need transformers
tokenizer_model: dict[str, Any] = tokenizer_json['model'] tokenizer_model: dict[str, Any] = tokenizer_json["model"]
is_llama3 = ( is_llama3 = (
tokenizer_model['type'] == 'BPE' and tokenizer_model.get('ignore_merges', False) tokenizer_model["type"] == "BPE"
and not tokenizer_model.get('byte_fallback', True) and tokenizer_model.get("ignore_merges", False)
and not tokenizer_model.get("byte_fallback", True)
) )
if is_llama3: if is_llama3:
raise TypeError('Llama 3 must be converted with BpeVocab') raise TypeError("Llama 3 must be converted with BpeVocab")
if not is_llama3 and ( if not is_llama3 and (
tokenizer_model['type'] != 'BPE' or not tokenizer_model.get('byte_fallback', False) tokenizer_model["type"] != "BPE"
or tokenizer_json['decoder']['type'] != 'Sequence' or not tokenizer_model.get("byte_fallback", False)
or tokenizer_json["decoder"]["type"] != "Sequence"
): ):
raise FileNotFoundError('Cannot find Llama BPE tokenizer') raise FileNotFoundError("Cannot find Llama BPE tokenizer")
try: try:
from transformers import AutoTokenizer from transformers import AutoTokenizer
@ -387,7 +439,7 @@ def __init__(self, base_path: Path):
# Initialize lists and dictionaries for added tokens # Initialize lists and dictionaries for added tokens
self.added_tokens_list = [] self.added_tokens_list = []
self.added_tokens_dict = dict() self.added_tokens_dict = dict()
self.added_tokens_ids = set() self.added_tokens_ids = set()
# Process added tokens # Process added tokens
for tok, tokidx in sorted( for tok, tokidx in sorted(
@ -408,7 +460,7 @@ def __init__(self, base_path: Path):
# Set vocabulary sizes # Set vocabulary sizes
self.vocab_size_base = self.tokenizer.vocab_size self.vocab_size_base = self.tokenizer.vocab_size
self.vocab_size = self.vocab_size_base + len(self.added_tokens_list) self.vocab_size = self.vocab_size_base + len(self.added_tokens_list)
self.fname_tokenizer = fname_tokenizer self.fname_tokenizer = fname_tokenizer
@ -427,16 +479,22 @@ def hf_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
# Yield token text, score, and type # Yield token text, score, and type
yield token_text, self.get_token_score(token_id), self.get_token_type( yield token_text, self.get_token_score(token_id), self.get_token_type(
token_id, token_text, self.special_ids # Reuse already stored special IDs token_id,
token_text,
self.special_ids, # Reuse already stored special IDs
) )
def get_token_type(self, token_id: int, token_text: bytes, special_ids: set[int]) -> gguf.TokenType: def get_token_type(
self, token_id: int, token_text: bytes, special_ids: set[int]
) -> gguf.TokenType:
# Special case for byte tokens # Special case for byte tokens
if re.fullmatch(br"<0x[0-9A-Fa-f]{2}>", token_text): if re.fullmatch(rb"<0x[0-9A-Fa-f]{2}>", token_text):
return gguf.TokenType.BYTE return gguf.TokenType.BYTE
# Determine token type based on whether it's a special token # Determine token type based on whether it's a special token
return gguf.TokenType.CONTROL if token_id in special_ids else gguf.TokenType.NORMAL return (
gguf.TokenType.CONTROL if token_id in special_ids else gguf.TokenType.NORMAL
)
def get_token_score(self, token_id: int) -> float: def get_token_score(self, token_id: int) -> float:
# Placeholder for actual logic to determine the token's score # Placeholder for actual logic to determine the token's score
@ -446,7 +504,9 @@ def get_token_score(self, token_id: int) -> float:
def added_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]: def added_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
for text in self.added_tokens_list: for text in self.added_tokens_list:
if text in self.specials: if text in self.specials:
toktype = self.get_token_type(self.specials[text], b'', self.special_ids) toktype = self.get_token_type(
self.specials[text], b"", self.special_ids
)
score = self.get_token_score(self.specials[text]) score = self.get_token_score(self.specials[text])
else: else:
toktype = gguf.TokenType.USER_DEFINED toktype = gguf.TokenType.USER_DEFINED

View File

@ -1,39 +1,66 @@
import os import os
import sys import sys
import psutil import psutil
import subprocess import subprocess
import time import time
import signal import signal
import json import json
import platform import platform
import requests import requests
import zipfile import zipfile
from datetime import datetime from datetime import datetime
from PyQt6.QtWidgets import (QApplication, QMainWindow, QVBoxLayout, QHBoxLayout, QWidget, QPushButton, from PyQt6.QtWidgets import (
QListWidget, QLineEdit, QLabel, QFileDialog, QProgressBar, QComboBox, QTextEdit, QApplication,
QCheckBox, QGroupBox, QFormLayout, QScrollArea, QSlider, QSpinBox, QListWidgetItem, QMainWindow,
QMessageBox, QDialog, QPlainTextEdit, QMenu) QVBoxLayout,
from PyQt6.QtCore import QTimer, QThread, pyqtSignal, Qt, QSize QHBoxLayout,
from PyQt6.QtGui import QCloseEvent, QAction QWidget,
QPushButton,
def ensure_directory(path): QListWidget,
if not os.path.exists(path): QLineEdit,
os.makedirs(path) QLabel,
QFileDialog,
def open_file_safe(file_path, mode='r'): QProgressBar,
encodings = ['utf-8', 'latin-1', 'ascii', 'utf-16'] QComboBox,
for encoding in encodings: QTextEdit,
try: QCheckBox,
return open(file_path, mode, encoding=encoding) QGroupBox,
except UnicodeDecodeError: QFormLayout,
continue QScrollArea,
raise ValueError(f"Unable to open file {file_path} with any of the encodings: {encodings}") QSlider,
QSpinBox,
def resource_path(relative_path): QListWidgetItem,
try: QMessageBox,
# PyInstaller creates a temp folder and stores path in _MEIPASS QDialog,
base_path = sys._MEIPASS QPlainTextEdit,
except Exception: QMenu,
base_path = os.path.abspath(".") )
from PyQt6.QtCore import QTimer, QThread, pyqtSignal, Qt, QSize
return os.path.join(base_path, relative_path) from PyQt6.QtGui import QCloseEvent, QAction
def ensure_directory(path):
if not os.path.exists(path):
os.makedirs(path)
def open_file_safe(file_path, mode="r"):
encodings = ["utf-8", "latin-1", "ascii", "utf-16"]
for encoding in encodings:
try:
return open(file_path, mode, encoding=encoding)
except UnicodeDecodeError:
continue
raise ValueError(
f"Unable to open file {file_path} with any of the encodings: {encodings}"
)
def resource_path(relative_path):
try:
# PyInstaller creates a temp folder and stores path in _MEIPASS
base_path = sys._MEIPASS
except Exception:
base_path = os.path.abspath(".")
return os.path.join(base_path, relative_path)

File diff suppressed because it is too large Load Diff

View File

@ -1,9 +1,9 @@
import sys import sys
from PyQt6.QtWidgets import QApplication from PyQt6.QtWidgets import QApplication
from AutoGGUF import AutoGGUF from AutoGGUF import AutoGGUF
if __name__ == "__main__": if __name__ == "__main__":
app = QApplication(sys.argv) app = QApplication(sys.argv)
window = AutoGGUF() window = AutoGGUF()
window.show() window.show()
sys.exit(app.exec()) sys.exit(app.exec())