style: format code with Black

This commit is contained in:
BuildTools 2024-08-04 19:50:34 -07:00
parent 2dc5bd9e8a
commit fa51f7cdb8
21 changed files with 8215 additions and 6922 deletions

View File

@ -452,8 +452,13 @@ def __init__(self):
# Output Type Dropdown # Output Type Dropdown
self.lora_output_type_combo = QComboBox() self.lora_output_type_combo = QComboBox()
self.lora_output_type_combo.addItems(["GGML", "GGUF"]) self.lora_output_type_combo.addItems(["GGML", "GGUF"])
self.lora_output_type_combo.currentIndexChanged.connect(self.update_base_model_visibility) self.lora_output_type_combo.currentIndexChanged.connect(
lora_layout.addRow(self.create_label(OUTPUT_TYPE, SELECT_OUTPUT_TYPE), self.lora_output_type_combo) self.update_base_model_visibility
)
lora_layout.addRow(
self.create_label(OUTPUT_TYPE, SELECT_OUTPUT_TYPE),
self.lora_output_type_combo,
)
# Base Model Path (initially hidden) # Base Model Path (initially hidden)
self.base_model_label = self.create_label(BASE_MODEL, SELECT_BASE_MODEL_FILE) self.base_model_label = self.create_label(BASE_MODEL, SELECT_BASE_MODEL_FILE)
@ -471,7 +476,9 @@ def __init__(self):
wrapper_layout = QHBoxLayout(self.base_model_wrapper) wrapper_layout = QHBoxLayout(self.base_model_wrapper)
wrapper_layout.addWidget(self.base_model_label) wrapper_layout.addWidget(self.base_model_label)
wrapper_layout.addWidget(self.base_model_widget, 1) # Give it a stretch factor wrapper_layout.addWidget(self.base_model_widget, 1) # Give it a stretch factor
wrapper_layout.setContentsMargins(0, 0, 0, 0) # Remove margins for better alignment wrapper_layout.setContentsMargins(
0, 0, 0, 0
) # Remove margins for better alignment
# Add the wrapper to the layout # Add the wrapper to the layout
lora_layout.addRow(self.base_model_wrapper) lora_layout.addRow(self.base_model_wrapper)
@ -1395,7 +1402,7 @@ def quantize_model(self):
override_string = entry.get_override_string( override_string = entry.get_override_string(
model_name=model_name, model_name=model_name,
quant_type=quant_type, quant_type=quant_type,
output_path=output_path output_path=output_path,
) )
if override_string: if override_string:
command.extend(["--override-kv", override_string]) command.extend(["--override-kv", override_string])
@ -1430,7 +1437,9 @@ def quantize_model(self):
self.task_list.setItemWidget(list_item, task_item) self.task_list.setItemWidget(list_item, task_item)
# Connect the output signal to the new progress parsing function # Connect the output signal to the new progress parsing function
thread.output_signal.connect(lambda line: self.parse_progress(line, task_item)) thread.output_signal.connect(
lambda line: self.parse_progress(line, task_item)
)
thread.status_signal.connect(task_item.update_status) thread.status_signal.connect(task_item.update_status)
thread.finished_signal.connect(lambda: self.task_finished(thread)) thread.finished_signal.connect(lambda: self.task_finished(thread))
thread.error_signal.connect(lambda err: self.handle_error(err, task_item)) thread.error_signal.connect(lambda err: self.handle_error(err, task_item))

View File

@ -13,6 +13,7 @@
import zipfile import zipfile
from datetime import datetime from datetime import datetime
class DownloadThread(QThread): class DownloadThread(QThread):
progress_signal = pyqtSignal(int) progress_signal = pyqtSignal(int)
finished_signal = pyqtSignal(str) finished_signal = pyqtSignal(str)
@ -27,11 +28,11 @@ def run(self):
try: try:
response = requests.get(self.url, stream=True) response = requests.get(self.url, stream=True)
response.raise_for_status() response.raise_for_status()
total_size = int(response.headers.get('content-length', 0)) total_size = int(response.headers.get("content-length", 0))
block_size = 8192 block_size = 8192
downloaded = 0 downloaded = 0
with open(self.save_path, 'wb') as file: with open(self.save_path, "wb") as file:
for data in response.iter_content(block_size): for data in response.iter_content(block_size):
size = file.write(data) size = file.write(data)
downloaded += size downloaded += size
@ -41,7 +42,7 @@ def run(self):
# Extract the downloaded zip file # Extract the downloaded zip file
extract_dir = os.path.splitext(self.save_path)[0] extract_dir = os.path.splitext(self.save_path)[0]
with zipfile.ZipFile(self.save_path, 'r') as zip_ref: with zipfile.ZipFile(self.save_path, "r") as zip_ref:
zip_ref.extractall(extract_dir) zip_ref.extractall(extract_dir)
# Remove the zip file after extraction # Remove the zip file after extraction

View File

@ -7,6 +7,7 @@
import socket import socket
import platform import platform
class KVOverrideEntry(QWidget): class KVOverrideEntry(QWidget):
deleted = pyqtSignal(QWidget) deleted = pyqtSignal(QWidget)
@ -44,7 +45,9 @@ def __init__(self, parent=None):
def delete_clicked(self): def delete_clicked(self):
self.deleted.emit(self) self.deleted.emit(self)
def get_override_string(self, model_name=None, quant_type=None, output_path=None): # Add arguments def get_override_string(
self, model_name=None, quant_type=None, output_path=None
): # Add arguments
key = self.key_input.text() key = self.key_input.text()
type_ = self.type_combo.currentText() type_ = self.type_combo.currentText()
value = self.value_input.text() value = self.value_input.text()
@ -60,9 +63,15 @@ def get_override_string(self, model_name=None, quant_type=None, output_path=None
"{system.python.version}": lambda: platform.python_version(), "{system.python.version}": lambda: platform.python_version(),
"{system.time.milliseconds}": lambda: str(int(time.time() * 1000)), "{system.time.milliseconds}": lambda: str(int(time.time() * 1000)),
"{system.date}": lambda: datetime.now().strftime("%Y-%m-%d"), "{system.date}": lambda: datetime.now().strftime("%Y-%m-%d"),
"{model.name}": lambda: model_name if model_name is not None else "Unknown Model", "{model.name}": lambda: (
"{quant.type}": lambda: quant_type if quant_type is not None else "Unknown Quant", model_name if model_name is not None else "Unknown Model"
"{output.path}": lambda: output_path if output_path is not None else "Unknown Output Path", ),
"{quant.type}": lambda: (
quant_type if quant_type is not None else "Unknown Quant"
),
"{output.path}": lambda: (
output_path if output_path is not None else "Unknown Output Path"
),
} }
for param, func in dynamic_params.items(): for param, func in dynamic_params.items():

View File

@ -4,6 +4,7 @@
import sys import sys
from datetime import datetime from datetime import datetime
class Logger: class Logger:
def __init__(self, name, log_dir): def __init__(self, name, log_dir):
self.logger = logging.getLogger(name) self.logger = logging.getLogger(name)
@ -15,15 +16,19 @@ def __init__(self, name, log_dir):
# Console handler # Console handler
console_handler = logging.StreamHandler() console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO) console_handler.setLevel(logging.INFO)
console_format = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') console_format = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
console_handler.setFormatter(console_format) console_handler.setFormatter(console_format)
# File handler # File handler
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
log_file = os.path.join(log_dir, f"latest_{timestamp}.log") log_file = os.path.join(log_dir, f"latest_{timestamp}.log")
file_handler = RotatingFileHandler(log_file, maxBytes=10*1024*1024, backupCount=5, encoding='utf-8') file_handler = RotatingFileHandler(
log_file, maxBytes=10 * 1024 * 1024, backupCount=5, encoding="utf-8"
)
file_handler.setLevel(logging.DEBUG) file_handler.setLevel(logging.DEBUG)
file_format = logging.Formatter('%(asctime)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s') file_format = logging.Formatter(
"%(asctime)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s"
)
file_handler.setFormatter(file_format) file_handler.setFormatter(file_format)
# Add handlers to logger # Add handlers to logger

View File

@ -13,6 +13,7 @@
import zipfile import zipfile
from datetime import datetime from datetime import datetime
class ModelInfoDialog(QDialog): class ModelInfoDialog(QDialog):
def __init__(self, model_info, parent=None): def __init__(self, model_info, parent=None):
super().__init__(parent) super().__init__(parent)
@ -41,8 +42,7 @@ def format_model_info(self, model_info):
html += f"<p><b>Tensors:</b> {model_info.get('tensors', 'N/A')}</p>" html += f"<p><b>Tensors:</b> {model_info.get('tensors', 'N/A')}</p>"
html += "<h3>Key-Value Pairs:</h3>" html += "<h3>Key-Value Pairs:</h3>"
for key, value in model_info.get('kv_data', {}).items(): for key, value in model_info.get("kv_data", {}).items():
html += f"<p><b>{key}:</b> {value}</p>" html += f"<p><b>{key}:</b> {value}</p>"
return html return html

View File

@ -15,6 +15,7 @@
from datetime import datetime from datetime import datetime
from imports_and_globals import open_file_safe from imports_and_globals import open_file_safe
class QuantizationThread(QThread): class QuantizationThread(QThread):
# Define custom signals for communication with the main thread # Define custom signals for communication with the main thread
output_signal = pyqtSignal(str) output_signal = pyqtSignal(str)

View File

@ -13,6 +13,7 @@
import zipfile import zipfile
from datetime import datetime from datetime import datetime
class TaskListItem(QWidget): class TaskListItem(QWidget):
def __init__(self, task_name, log_file, show_progress_bar=True, parent=None): def __init__(self, task_name, log_file, show_progress_bar=True, parent=None):
super().__init__(parent) super().__init__(parent)

View File

@ -12,8 +12,8 @@
import numpy as np import numpy as np
import torch import torch
if 'NO_LOCAL_GGUF' not in os.environ: if "NO_LOCAL_GGUF" not in os.environ:
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf')) sys.path.insert(1, str(Path(__file__).parent / "gguf-py" / "gguf"))
import gguf import gguf
logging.basicConfig(level=logging.DEBUG) logging.basicConfig(level=logging.DEBUG)
@ -35,7 +35,9 @@ def write_file_header(fout: BinaryIO, params: dict[str, Any]) -> None:
fout.write(struct.pack("i", int(params["lora_alpha"]))) fout.write(struct.pack("i", int(params["lora_alpha"])))
def write_tensor_header(fout: BinaryIO, name: str, shape: Sequence[int], data_type: np.dtype[Any]) -> None: def write_tensor_header(
fout: BinaryIO, name: str, shape: Sequence[int], data_type: np.dtype[Any]
) -> None:
sname = name.encode("utf-8") sname = name.encode("utf-8")
fout.write( fout.write(
struct.pack( struct.pack(
@ -49,15 +51,21 @@ def write_tensor_header(fout: BinaryIO, name: str, shape: Sequence[int], data_ty
fout.write(sname) fout.write(sname)
fout.seek((fout.tell() + 31) & -32) fout.seek((fout.tell() + 31) & -32)
def pyinstaller_include(): def pyinstaller_include():
# PyInstaller import # PyInstaller import
pass pass
if __name__ == '__main__':
if __name__ == "__main__":
if len(sys.argv) < 2: if len(sys.argv) < 2:
logger.info(f"Usage: python {sys.argv[0]} <path> [arch]") logger.info(f"Usage: python {sys.argv[0]} <path> [arch]")
logger.info("Path must contain HuggingFace PEFT LoRA files 'adapter_config.json' and 'adapter_model.bin'") logger.info(
logger.info(f"Arch must be one of {list(gguf.MODEL_ARCH_NAMES.values())} (default: llama)") "Path must contain HuggingFace PEFT LoRA files 'adapter_config.json' and 'adapter_model.bin'"
)
logger.info(
f"Arch must be one of {list(gguf.MODEL_ARCH_NAMES.values())} (default: llama)"
)
sys.exit(1) sys.exit(1)
input_json = os.path.join(sys.argv[1], "adapter_config.json") input_json = os.path.join(sys.argv[1], "adapter_config.json")
@ -70,6 +78,7 @@ def pyinstaller_include():
input_model = os.path.join(sys.argv[1], "adapter_model.safetensors") input_model = os.path.join(sys.argv[1], "adapter_model.safetensors")
# lazy import load_file only if lora is in safetensors format. # lazy import load_file only if lora is in safetensors format.
from safetensors.torch import load_file from safetensors.torch import load_file
model = load_file(input_model, device="cpu") model = load_file(input_model, device="cpu")
arch_name = sys.argv[2] if len(sys.argv) == 3 else "llama" arch_name = sys.argv[2] if len(sys.argv) == 3 else "llama"
@ -78,14 +87,18 @@ def pyinstaller_include():
logger.error(f"Error: unsupported architecture {arch_name}") logger.error(f"Error: unsupported architecture {arch_name}")
sys.exit(1) sys.exit(1)
arch = list(gguf.MODEL_ARCH_NAMES.keys())[list(gguf.MODEL_ARCH_NAMES.values()).index(arch_name)] arch = list(gguf.MODEL_ARCH_NAMES.keys())[
list(gguf.MODEL_ARCH_NAMES.values()).index(arch_name)
]
name_map = gguf.TensorNameMap(arch, 200) # 200 layers ought to be enough for anyone name_map = gguf.TensorNameMap(arch, 200) # 200 layers ought to be enough for anyone
with open(input_json, "r") as f: with open(input_json, "r") as f:
params = json.load(f) params = json.load(f)
if params["peft_type"] != "LORA": if params["peft_type"] != "LORA":
logger.error(f"Error: unsupported adapter type {params['peft_type']}, expected LORA") logger.error(
f"Error: unsupported adapter type {params['peft_type']}, expected LORA"
)
sys.exit(1) sys.exit(1)
if params["fan_in_fan_out"] is True: if params["fan_in_fan_out"] is True:
@ -136,7 +149,9 @@ def pyinstaller_include():
tname = name_map.get_name(k) tname = name_map.get_name(k)
if tname is None: if tname is None:
logger.error(f"Error: could not map tensor name {orig_k}") logger.error(f"Error: could not map tensor name {orig_k}")
logger.error(" Note: the arch parameter must be specified if the model is not llama") logger.error(
" Note: the arch parameter must be specified if the model is not llama"
)
sys.exit(1) sys.exit(1)
if suffix == ".lora_A.weight": if suffix == ".lora_A.weight":
@ -146,7 +161,9 @@ def pyinstaller_include():
else: else:
assert False assert False
logger.info(f"{k} => {tname} {t.shape} {t.dtype} {t.nbytes/1024/1024:.2f}MB") logger.info(
f"{k} => {tname} {t.shape} {t.dtype} {t.nbytes/1024/1024:.2f}MB"
)
write_tensor_header(fout, tname, t.shape, t.dtype) write_tensor_header(fout, tname, t.shape, t.dtype)
t.tofile(fout) t.tofile(fout)

View File

@ -54,7 +54,9 @@ class General:
SOURCE_URL = "general.source.url" # Model Website/Paper SOURCE_URL = "general.source.url" # Model Website/Paper
SOURCE_DOI = "general.source.doi" SOURCE_DOI = "general.source.doi"
SOURCE_UUID = "general.source.uuid" SOURCE_UUID = "general.source.uuid"
SOURCE_REPO_URL = "general.source.repo_url" # Model Source Repository (git/svn/etc...) SOURCE_REPO_URL = (
"general.source.repo_url" # Model Source Repository (git/svn/etc...)
)
# Base Model Source. There can be more than one source if it's a merged # Base Model Source. There can be more than one source if it's a merged
# model like with 'Mistral-7B-Merge-14-v0.1'. This will assist in # model like with 'Mistral-7B-Merge-14-v0.1'. This will assist in
@ -136,7 +138,9 @@ class Tokenizer:
PRE = "tokenizer.ggml.pre" PRE = "tokenizer.ggml.pre"
LIST = "tokenizer.ggml.tokens" LIST = "tokenizer.ggml.tokens"
TOKEN_TYPE = "tokenizer.ggml.token_type" TOKEN_TYPE = "tokenizer.ggml.token_type"
TOKEN_TYPE_COUNT = "tokenizer.ggml.token_type_count" # for BERT-style token types TOKEN_TYPE_COUNT = (
"tokenizer.ggml.token_type_count" # for BERT-style token types
)
SCORES = "tokenizer.ggml.scores" SCORES = "tokenizer.ggml.scores"
MERGES = "tokenizer.ggml.merges" MERGES = "tokenizer.ggml.merges"
BOS_ID = "tokenizer.ggml.bos_token_id" BOS_ID = "tokenizer.ggml.bos_token_id"
@ -166,6 +170,7 @@ class Adapter:
TYPE = "adapter.type" TYPE = "adapter.type"
LORA_ALPHA = "adapter.lora.alpha" LORA_ALPHA = "adapter.lora.alpha"
# #
# recommended mapping of model tensor names for storage in gguf # recommended mapping of model tensor names for storage in gguf
# #
@ -1104,9 +1109,9 @@ class TokenType(IntEnum):
class RopeScalingType(Enum): class RopeScalingType(Enum):
NONE = 'none' NONE = "none"
LINEAR = 'linear' LINEAR = "linear"
YARN = 'yarn' YARN = "yarn"
class PoolingType(IntEnum): class PoolingType(IntEnum):

View File

@ -67,7 +67,7 @@ class ReaderTensor(NamedTuple):
class GGUFReader: class GGUFReader:
# I - same as host, S - swapped # I - same as host, S - swapped
byte_order: Literal['I', 'S'] = 'I' byte_order: Literal["I", "S"] = "I"
alignment: int = GGUF_DEFAULT_ALIGNMENT alignment: int = GGUF_DEFAULT_ALIGNMENT
data_offset: int data_offset: int
@ -86,13 +86,15 @@ class GGUFReader:
GGUFValueType.BOOL: np.bool_, GGUFValueType.BOOL: np.bool_,
} }
def __init__(self, path: os.PathLike[str] | str, mode: Literal['r', 'r+', 'c'] = 'r'): def __init__(
self, path: os.PathLike[str] | str, mode: Literal["r", "r+", "c"] = "r"
):
self.data = np.memmap(path, mode=mode) self.data = np.memmap(path, mode=mode)
offs = 0 offs = 0
# Check for GGUF magic # Check for GGUF magic
if self._get(offs, np.uint32, override_order = '<')[0] != GGUF_MAGIC: if self._get(offs, np.uint32, override_order="<")[0] != GGUF_MAGIC:
raise ValueError('GGUF magic invalid') raise ValueError("GGUF magic invalid")
offs += 4 offs += 4
# Check GGUF version # Check GGUF version
@ -100,28 +102,46 @@ def __init__(self, path: os.PathLike[str] | str, mode: Literal['r', 'r+', 'c'] =
if temp_version[0] & 65535 == 0: if temp_version[0] & 65535 == 0:
# If we get 0 here that means it's (probably) a GGUF file created for # If we get 0 here that means it's (probably) a GGUF file created for
# the opposite byte order of the machine this script is running on. # the opposite byte order of the machine this script is running on.
self.byte_order = 'S' self.byte_order = "S"
temp_version = temp_version.newbyteorder(self.byte_order) temp_version = temp_version.newbyteorder(self.byte_order)
version = temp_version[0] version = temp_version[0]
if version not in READER_SUPPORTED_VERSIONS: if version not in READER_SUPPORTED_VERSIONS:
raise ValueError(f'Sorry, file appears to be version {version} which we cannot handle') raise ValueError(
f"Sorry, file appears to be version {version} which we cannot handle"
)
self.fields: OrderedDict[str, ReaderField] = OrderedDict() self.fields: OrderedDict[str, ReaderField] = OrderedDict()
self.tensors: list[ReaderTensor] = [] self.tensors: list[ReaderTensor] = []
offs += self._push_field(ReaderField(offs, 'GGUF.version', [temp_version], [0], [GGUFValueType.UINT32])) offs += self._push_field(
ReaderField(
offs, "GGUF.version", [temp_version], [0], [GGUFValueType.UINT32]
)
)
# Check tensor count and kv count # Check tensor count and kv count
temp_counts = self._get(offs, np.uint64, 2) temp_counts = self._get(offs, np.uint64, 2)
offs += self._push_field(ReaderField(offs, 'GGUF.tensor_count', [temp_counts[:1]], [0], [GGUFValueType.UINT64])) offs += self._push_field(
offs += self._push_field(ReaderField(offs, 'GGUF.kv_count', [temp_counts[1:]], [0], [GGUFValueType.UINT64])) ReaderField(
offs,
"GGUF.tensor_count",
[temp_counts[:1]],
[0],
[GGUFValueType.UINT64],
)
)
offs += self._push_field(
ReaderField(
offs, "GGUF.kv_count", [temp_counts[1:]], [0], [GGUFValueType.UINT64]
)
)
tensor_count, kv_count = temp_counts tensor_count, kv_count = temp_counts
offs = self._build_fields(offs, kv_count) offs = self._build_fields(offs, kv_count)
# Build Tensor Info Fields # Build Tensor Info Fields
offs, tensors_fields = self._build_tensor_info(offs, tensor_count) offs, tensors_fields = self._build_tensor_info(offs, tensor_count)
new_align = self.fields.get('general.alignment') new_align = self.fields.get("general.alignment")
if new_align is not None: if new_align is not None:
if new_align.types != [GGUFValueType.UINT32]: if new_align.types != [GGUFValueType.UINT32]:
raise ValueError('Bad type for general.alignment field') raise ValueError("Bad type for general.alignment field")
self.alignment = new_align.parts[-1][0] self.alignment = new_align.parts[-1][0]
padding = offs % self.alignment padding = offs % self.alignment
if padding != 0: if padding != 0:
@ -129,7 +149,7 @@ def __init__(self, path: os.PathLike[str] | str, mode: Literal['r', 'r+', 'c'] =
self.data_offset = offs self.data_offset = offs
self._build_tensors(offs, tensors_fields) self._build_tensors(offs, tensors_fields)
_DT = TypeVar('_DT', bound = npt.DTypeLike) _DT = TypeVar("_DT", bound=npt.DTypeLike)
# Fetch a key/value metadata field by key. # Fetch a key/value metadata field by key.
def get_field(self, key: str) -> Union[ReaderField, None]: def get_field(self, key: str) -> Union[ReaderField, None]:
@ -140,7 +160,11 @@ def get_tensor(self, idx: int) -> ReaderTensor:
return self.tensors[idx] return self.tensors[idx]
def _get( def _get(
self, offset: int, dtype: npt.DTypeLike, count: int = 1, override_order: None | Literal['I', 'S', '<'] = None, self,
offset: int,
dtype: npt.DTypeLike,
count: int = 1,
override_order: None | Literal["I", "S", "<"] = None,
) -> npt.NDArray[Any]: ) -> npt.NDArray[Any]:
count = int(count) count = int(count)
itemsize = int(np.empty([], dtype=dtype).itemsize) itemsize = int(np.empty([], dtype=dtype).itemsize)
@ -156,18 +180,22 @@ def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int:
# TODO: add option to generate error on duplicate keys # TODO: add option to generate error on duplicate keys
# raise KeyError(f'Duplicate {field.name} already in list at offset {field.offset}') # raise KeyError(f'Duplicate {field.name} already in list at offset {field.offset}')
logger.warning(f'Duplicate key {field.name} at offset {field.offset}') logger.warning(f"Duplicate key {field.name} at offset {field.offset}")
self.fields[field.name + '_{}'.format(field.offset)] = field self.fields[field.name + "_{}".format(field.offset)] = field
else: else:
self.fields[field.name] = field self.fields[field.name] = field
return 0 if skip_sum else sum(int(part.nbytes) for part in field.parts) return 0 if skip_sum else sum(int(part.nbytes) for part in field.parts)
def _get_str(self, offset: int) -> tuple[npt.NDArray[np.uint64], npt.NDArray[np.uint8]]: def _get_str(
self, offset: int
) -> tuple[npt.NDArray[np.uint64], npt.NDArray[np.uint8]]:
slen = self._get(offset, np.uint64) slen = self._get(offset, np.uint64)
return slen, self._get(offset + 8, np.uint8, slen[0]) return slen, self._get(offset + 8, np.uint8, slen[0])
def _get_field_parts( def _get_field_parts(
self, orig_offs: int, raw_type: int, self,
orig_offs: int,
raw_type: int,
) -> tuple[int, list[npt.NDArray[Any]], list[int], list[GGUFValueType]]: ) -> tuple[int, list[npt.NDArray[Any]], list[int], list[GGUFValueType]]:
offs = orig_offs offs = orig_offs
types: list[GGUFValueType] = [] types: list[GGUFValueType] = []
@ -192,7 +220,9 @@ def _get_field_parts(
aparts: list[npt.NDArray[Any]] = [raw_itype, alen] aparts: list[npt.NDArray[Any]] = [raw_itype, alen]
data_idxs: list[int] = [] data_idxs: list[int] = []
for idx in range(alen[0]): for idx in range(alen[0]):
curr_size, curr_parts, curr_idxs, curr_types = self._get_field_parts(offs, raw_itype[0]) curr_size, curr_parts, curr_idxs, curr_types = self._get_field_parts(
offs, raw_itype[0]
)
if idx == 0: if idx == 0:
types += curr_types types += curr_types
idxs_offs = len(aparts) idxs_offs = len(aparts)
@ -201,7 +231,7 @@ def _get_field_parts(
offs += curr_size offs += curr_size
return offs - orig_offs, aparts, data_idxs, types return offs - orig_offs, aparts, data_idxs, types
# We can't deal with this one. # We can't deal with this one.
raise ValueError('Unknown/unhandled field type {gtype}') raise ValueError("Unknown/unhandled field type {gtype}")
def _get_tensor_info_field(self, orig_offs: int) -> ReaderField: def _get_tensor_info_field(self, orig_offs: int) -> ReaderField:
offs = orig_offs offs = orig_offs
@ -228,7 +258,7 @@ def _get_tensor_info_field(self, orig_offs: int) -> ReaderField:
return ReaderField( return ReaderField(
orig_offs, orig_offs,
str(bytes(name_data), encoding = 'utf-8'), str(bytes(name_data), encoding="utf-8"),
[name_len, name_data, n_dims, dims, raw_dtype, offset_tensor], [name_len, name_data, n_dims, dims, raw_dtype, offset_tensor],
[1, 3, 4, 5], [1, 3, 4, 5],
) )
@ -242,19 +272,26 @@ def _build_fields(self, offs: int, count: int) -> int:
offs += int(raw_kv_type.nbytes) offs += int(raw_kv_type.nbytes)
parts: list[npt.NDArray[Any]] = [kv_klen, kv_kdata, raw_kv_type] parts: list[npt.NDArray[Any]] = [kv_klen, kv_kdata, raw_kv_type]
idxs_offs = len(parts) idxs_offs = len(parts)
field_size, field_parts, field_idxs, field_types = self._get_field_parts(offs, raw_kv_type[0]) field_size, field_parts, field_idxs, field_types = self._get_field_parts(
offs, raw_kv_type[0]
)
parts += field_parts parts += field_parts
self._push_field(ReaderField( self._push_field(
ReaderField(
orig_offs, orig_offs,
str(bytes(kv_kdata), encoding = 'utf-8'), str(bytes(kv_kdata), encoding="utf-8"),
parts, parts,
[idx + idxs_offs for idx in field_idxs], [idx + idxs_offs for idx in field_idxs],
field_types, field_types,
), skip_sum = True) ),
skip_sum=True,
)
offs += field_size offs += field_size
return offs return offs
def _build_tensor_info(self, offs: int, count: int) -> tuple[int, list[ReaderField]]: def _build_tensor_info(
self, offs: int, count: int
) -> tuple[int, list[ReaderField]]:
tensor_fields = [] tensor_fields = []
for _ in range(count): for _ in range(count):
field = self._get_tensor_info_field(offs) field = self._get_tensor_info_field(offs)
@ -268,9 +305,9 @@ def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None:
for field in fields: for field in fields:
_name_len, name_data, _n_dims, dims, raw_dtype, offset_tensor = field.parts _name_len, name_data, _n_dims, dims, raw_dtype, offset_tensor = field.parts
# check if there's any tensor having same name already in the list # check if there's any tensor having same name already in the list
tensor_name = str(bytes(name_data), encoding = 'utf-8') tensor_name = str(bytes(name_data), encoding="utf-8")
if tensor_name in tensor_names: if tensor_name in tensor_names:
raise ValueError(f'Found duplicated tensor with name {tensor_name}') raise ValueError(f"Found duplicated tensor with name {tensor_name}")
tensor_names.add(tensor_name) tensor_names.add(tensor_name)
ggml_type = GGMLQuantizationType(raw_dtype[0]) ggml_type = GGMLQuantizationType(raw_dtype[0])
n_elems = int(np.prod(dims)) n_elems = int(np.prod(dims))
@ -304,7 +341,8 @@ def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None:
item_count = n_bytes item_count = n_bytes
item_type = np.uint8 item_type = np.uint8
np_dims = quant_shape_to_byte_shape(np_dims, ggml_type) np_dims = quant_shape_to_byte_shape(np_dims, ggml_type)
tensors.append(ReaderTensor( tensors.append(
ReaderTensor(
name=tensor_name, name=tensor_name,
tensor_type=ggml_type, tensor_type=ggml_type,
shape=dims, shape=dims,
@ -313,5 +351,6 @@ def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None:
data_offset=data_offs, data_offset=data_offs,
data=self._get(data_offs, item_type, item_count).reshape(np_dims), data=self._get(data_offs, item_type, item_count).reshape(np_dims),
field=field, field=field,
)) )
)
self.tensors = tensors self.tensors = tensors

View File

@ -81,8 +81,15 @@ class GGUFWriter:
} }
def __init__( def __init__(
self, path: os.PathLike[str] | str | None, arch: str, use_temp_file: bool = False, endianess: GGUFEndian = GGUFEndian.LITTLE, self,
split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False, small_first_shard: bool = False path: os.PathLike[str] | str | None,
arch: str,
use_temp_file: bool = False,
endianess: GGUFEndian = GGUFEndian.LITTLE,
split_max_tensors: int = 0,
split_max_size: int = 0,
dry_run: bool = False,
small_first_shard: bool = False,
): ):
self.fout = None self.fout = None
self.path = Path(path) if path else None self.path = Path(path) if path else None
@ -97,9 +104,11 @@ def __init__(
self.split_max_size = split_max_size self.split_max_size = split_max_size
self.dry_run = dry_run self.dry_run = dry_run
self.small_first_shard = small_first_shard self.small_first_shard = small_first_shard
logger.info("gguf: This GGUF file is for {0} Endian only".format( logger.info(
"gguf: This GGUF file is for {0} Endian only".format(
"Big" if self.endianess == GGUFEndian.BIG else "Little", "Big" if self.endianess == GGUFEndian.BIG else "Little",
)) )
)
self.state = WriterState.NO_FILE self.state = WriterState.NO_FILE
if self.small_first_shard: if self.small_first_shard:
@ -128,7 +137,9 @@ def get_total_parameter_count(self) -> tuple[int, int, int, int]:
elif name.endswith(".lora_b"): elif name.endswith(".lora_b"):
if last_lora_a is None or last_lora_a[0] != name[:-1] + "a": if last_lora_a is None or last_lora_a[0] != name[:-1] + "a":
# Bail when the LoRA pair can't be found trivially # Bail when the LoRA pair can't be found trivially
logger.warning("can't measure LoRA size correctly, tensor order is unusual") logger.warning(
"can't measure LoRA size correctly, tensor order is unusual"
)
return 0, 0, 0, 0 return 0, 0, 0, 0
else: else:
shape = (*shape[:-1], last_lora_a[1].shape[-1]) shape = (*shape[:-1], last_lora_a[1].shape[-1])
@ -136,7 +147,7 @@ def get_total_parameter_count(self) -> tuple[int, int, int, int]:
size = prod(shape) size = prod(shape)
if "_exps." in name: if "_exps." in name:
expert_params += (size // shape[-3]) expert_params += size // shape[-3]
expert_sum += shape[-3] expert_sum += shape[-3]
n_expert_tensors += 1 n_expert_tensors += 1
else: else:
@ -157,15 +168,26 @@ def get_total_parameter_count(self) -> tuple[int, int, int, int]:
def format_shard_names(self, path: Path) -> list[Path]: def format_shard_names(self, path: Path) -> list[Path]:
if len(self.tensors) == 1: if len(self.tensors) == 1:
return [path] return [path]
return [path.with_name(SHARD_NAME_FORMAT.format(path.stem, i + 1, len(self.tensors))) for i in range(len(self.tensors))] return [
path.with_name(
SHARD_NAME_FORMAT.format(path.stem, i + 1, len(self.tensors))
)
for i in range(len(self.tensors))
]
def open_output_file(self, path: Path | None = None) -> None: def open_output_file(self, path: Path | None = None) -> None:
if self.state is WriterState.EMPTY and self.fout is not None and (path is None or path == self.path): if (
self.state is WriterState.EMPTY
and self.fout is not None
and (path is None or path == self.path)
):
# allow calling this multiple times as long as the path is the same # allow calling this multiple times as long as the path is the same
return return
if self.state is not WriterState.NO_FILE: if self.state is not WriterState.NO_FILE:
raise ValueError(f'Expected output file to be not yet opened, got {self.state}') raise ValueError(
f"Expected output file to be not yet opened, got {self.state}"
)
if path is not None: if path is not None:
self.path = path self.path = path
@ -181,7 +203,9 @@ def print_plan(self) -> list[Path]:
filenames = self.format_shard_names(self.path) filenames = self.format_shard_names(self.path)
assert len(filenames) == len(self.tensors) assert len(filenames) == len(self.tensors)
for name, tensors in zip(filenames, self.tensors): for name, tensors in zip(filenames, self.tensors):
logger.info(f"{name}: n_tensors = {len(tensors)}, total_size = {GGUFWriter.format_n_bytes_to_str(sum(ti.nbytes for ti in tensors.values()))}") logger.info(
f"{name}: n_tensors = {len(tensors)}, total_size = {GGUFWriter.format_n_bytes_to_str(sum(ti.nbytes for ti in tensors.values()))}"
)
if self.dry_run: if self.dry_run:
logger.info("Dry run, not writing files") logger.info("Dry run, not writing files")
@ -201,17 +225,23 @@ def add_shard_kv_data(self) -> None:
self.kv_data.extend({} for _ in range(len(self.kv_data), total_splits)) self.kv_data.extend({} for _ in range(len(self.kv_data), total_splits))
for i, kv_data in enumerate(self.kv_data): for i, kv_data in enumerate(self.kv_data):
kv_data[Keys.Split.LLM_KV_SPLIT_NO] = GGUFValue(i, GGUFValueType.UINT16) kv_data[Keys.Split.LLM_KV_SPLIT_NO] = GGUFValue(i, GGUFValueType.UINT16)
kv_data[Keys.Split.LLM_KV_SPLIT_COUNT] = GGUFValue(total_splits, GGUFValueType.UINT16) kv_data[Keys.Split.LLM_KV_SPLIT_COUNT] = GGUFValue(
kv_data[Keys.Split.LLM_KV_SPLIT_TENSORS_COUNT] = GGUFValue(total_tensors, GGUFValueType.INT32) total_splits, GGUFValueType.UINT16
)
kv_data[Keys.Split.LLM_KV_SPLIT_TENSORS_COUNT] = GGUFValue(
total_tensors, GGUFValueType.INT32
)
def write_header_to_file(self, path: Path | None = None) -> None: def write_header_to_file(self, path: Path | None = None) -> None:
if len(self.tensors) == 1 and (self.split_max_tensors != 0 or self.split_max_size != 0): if len(self.tensors) == 1 and (
self.split_max_tensors != 0 or self.split_max_size != 0
):
logger.warning("Model fails split requirements, not splitting") logger.warning("Model fails split requirements, not splitting")
self.open_output_file(path) self.open_output_file(path)
if self.state is not WriterState.EMPTY: if self.state is not WriterState.EMPTY:
raise ValueError(f'Expected output file to be empty, got {self.state}') raise ValueError(f"Expected output file to be empty, got {self.state}")
assert self.fout is not None assert self.fout is not None
assert len(self.fout) == len(self.tensors) assert len(self.fout) == len(self.tensors)
@ -229,7 +259,9 @@ def write_header_to_file(self, path: Path | None = None) -> None:
def write_kv_data_to_file(self) -> None: def write_kv_data_to_file(self) -> None:
if self.state is not WriterState.HEADER: if self.state is not WriterState.HEADER:
raise ValueError(f'Expected output file to contain the header, got {self.state}') raise ValueError(
f"Expected output file to contain the header, got {self.state}"
)
assert self.fout is not None assert self.fout is not None
for fout, kv_data in zip(self.fout, self.kv_data): for fout, kv_data in zip(self.fout, self.kv_data):
@ -246,7 +278,9 @@ def write_kv_data_to_file(self) -> None:
def write_ti_data_to_file(self) -> None: def write_ti_data_to_file(self) -> None:
if self.state is not WriterState.KV_DATA: if self.state is not WriterState.KV_DATA:
raise ValueError(f'Expected output file to contain KV data, got {self.state}') raise ValueError(
f"Expected output file to contain KV data, got {self.state}"
)
assert self.fout is not None assert self.fout is not None
for fout, tensors in zip(self.fout, self.tensors): for fout, tensors in zip(self.fout, self.tensors):
@ -269,7 +303,7 @@ def write_ti_data_to_file(self) -> None:
def add_key_value(self, key: str, val: Any, vtype: GGUFValueType) -> None: def add_key_value(self, key: str, val: Any, vtype: GGUFValueType) -> None:
if any(key in kv_data for kv_data in self.kv_data): if any(key in kv_data for kv_data in self.kv_data):
raise ValueError(f'Duplicated key name {key!r}') raise ValueError(f"Duplicated key name {key!r}")
self.kv_data[0][key] = GGUFValue(value=val, type=vtype) self.kv_data[0][key] = GGUFValue(value=val, type=vtype)
@ -321,14 +355,20 @@ def ggml_pad(x: int, n: int) -> int:
return ((x + n - 1) // n) * n return ((x + n - 1) // n) * n
def add_tensor_info( def add_tensor_info(
self, name: str, tensor_shape: Sequence[int], tensor_dtype: np.dtype, self,
tensor_nbytes: int, raw_dtype: GGMLQuantizationType | None = None, name: str,
tensor_shape: Sequence[int],
tensor_dtype: np.dtype,
tensor_nbytes: int,
raw_dtype: GGMLQuantizationType | None = None,
) -> None: ) -> None:
if self.state is not WriterState.NO_FILE: if self.state is not WriterState.NO_FILE:
raise ValueError(f'Expected output file to be not yet opened, got {self.state}') raise ValueError(
f"Expected output file to be not yet opened, got {self.state}"
)
if any(name in tensors for tensors in self.tensors): if any(name in tensors for tensors in self.tensors):
raise ValueError(f'Duplicated tensor name {name!r}') raise ValueError(f"Duplicated tensor name {name!r}")
if raw_dtype is None: if raw_dtype is None:
if tensor_dtype == np.float16: if tensor_dtype == np.float16:
@ -346,7 +386,9 @@ def add_tensor_info(
elif tensor_dtype == np.int64: elif tensor_dtype == np.int64:
dtype = GGMLQuantizationType.I64 dtype = GGMLQuantizationType.I64
else: else:
raise ValueError("Only F16, F32, F64, I8, I16, I32, I64 tensors are supported for now") raise ValueError(
"Only F16, F32, F64, I8, I16, I32, I64 tensors are supported for now"
)
else: else:
dtype = raw_dtype dtype = raw_dtype
if tensor_dtype == np.uint8: if tensor_dtype == np.uint8:
@ -359,14 +401,20 @@ def add_tensor_info(
and len(self.tensors[-1]) >= self.split_max_tensors and len(self.tensors[-1]) >= self.split_max_tensors
) or ( # split when over size limit ) or ( # split when over size limit
self.split_max_size != 0 self.split_max_size != 0
and sum(ti.nbytes for ti in self.tensors[-1].values()) + tensor_nbytes > self.split_max_size and sum(ti.nbytes for ti in self.tensors[-1].values()) + tensor_nbytes
> self.split_max_size
): ):
self.tensors.append({}) self.tensors.append({})
self.tensors[-1][name] = TensorInfo(shape=tensor_shape, dtype=dtype, nbytes=tensor_nbytes) self.tensors[-1][name] = TensorInfo(
shape=tensor_shape, dtype=dtype, nbytes=tensor_nbytes
)
def add_tensor( def add_tensor(
self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequence[int] | None = None, self,
name: str,
tensor: np.ndarray[Any, Any],
raw_shape: Sequence[int] | None = None,
raw_dtype: GGMLQuantizationType | None = None, raw_dtype: GGMLQuantizationType | None = None,
) -> None: ) -> None:
if self.endianess == GGUFEndian.BIG: if self.endianess == GGUFEndian.BIG:
@ -377,7 +425,9 @@ def add_tensor(
self.temp_file = fp self.temp_file = fp
shape: Sequence[int] = raw_shape if raw_shape is not None else tensor.shape shape: Sequence[int] = raw_shape if raw_shape is not None else tensor.shape
self.add_tensor_info(name, shape, tensor.dtype, tensor.nbytes, raw_dtype=raw_dtype) self.add_tensor_info(
name, shape, tensor.dtype, tensor.nbytes, raw_dtype=raw_dtype
)
if self.temp_file is None: if self.temp_file is None:
self.tensors[-1][name].tensor = tensor self.tensors[-1][name].tensor = tensor
@ -387,13 +437,21 @@ def add_tensor(
self.write_padding(self.temp_file, tensor.nbytes) self.write_padding(self.temp_file, tensor.nbytes)
def write_padding(self, fp: IO[bytes], n: int, align: int | None = None) -> None: def write_padding(self, fp: IO[bytes], n: int, align: int | None = None) -> None:
pad = GGUFWriter.ggml_pad(n, align if align is not None else self.data_alignment) - n pad = (
GGUFWriter.ggml_pad(n, align if align is not None else self.data_alignment)
- n
)
if pad != 0: if pad != 0:
fp.write(bytes([0] * pad)) fp.write(bytes([0] * pad))
def write_tensor_data(self, tensor: np.ndarray[Any, Any]) -> None: def write_tensor_data(self, tensor: np.ndarray[Any, Any]) -> None:
if self.state is not WriterState.TI_DATA and self.state is not WriterState.WEIGHTS: if (
raise ValueError(f'Expected output file to contain tensor info or weights, got {self.state}') self.state is not WriterState.TI_DATA
and self.state is not WriterState.WEIGHTS
):
raise ValueError(
f"Expected output file to contain tensor info or weights, got {self.state}"
)
assert self.fout is not None assert self.fout is not None
if self.endianess == GGUFEndian.BIG: if self.endianess == GGUFEndian.BIG:
@ -409,7 +467,9 @@ def write_tensor_data(self, tensor: np.ndarray[Any, Any]) -> None:
# pop the first tensor info # pop the first tensor info
# TODO: cleaner way to get the first key # TODO: cleaner way to get the first key
first_tensor_name = [name for name, _ in zip(self.tensors[file_id].keys(), range(1))][0] first_tensor_name = [
name for name, _ in zip(self.tensors[file_id].keys(), range(1))
][0]
ti = self.tensors[file_id].pop(first_tensor_name) ti = self.tensors[file_id].pop(first_tensor_name)
assert ti.nbytes == tensor.nbytes assert ti.nbytes == tensor.nbytes
@ -437,8 +497,15 @@ def write_tensors_to_file(self, *, progress: bool = False) -> None:
total_bytes = sum(ti.nbytes for t in self.tensors for ti in t.values()) total_bytes = sum(ti.nbytes for t in self.tensors for ti in t.values())
if len(self.fout) > 1: if len(self.fout) > 1:
shard_bar = tqdm(desc=f"Shard (0/{len(self.fout)})", total=None, unit="byte", unit_scale=True) shard_bar = tqdm(
bar = tqdm(desc="Writing", total=total_bytes, unit="byte", unit_scale=True) desc=f"Shard (0/{len(self.fout)})",
total=None,
unit="byte",
unit_scale=True,
)
bar = tqdm(
desc="Writing", total=total_bytes, unit="byte", unit_scale=True
)
for i, (fout, tensors) in enumerate(zip(self.fout, self.tensors)): for i, (fout, tensors) in enumerate(zip(self.fout, self.tensors)):
if shard_bar is not None: if shard_bar is not None:
@ -448,7 +515,9 @@ def write_tensors_to_file(self, *, progress: bool = False) -> None:
# relying on the fact that Python dicts preserve insertion order (since 3.7) # relying on the fact that Python dicts preserve insertion order (since 3.7)
for ti in tensors.values(): for ti in tensors.values():
assert ti.tensor is not None # can only iterate once over the tensors assert (
ti.tensor is not None
) # can only iterate once over the tensors
assert ti.tensor.nbytes == ti.nbytes assert ti.tensor.nbytes == ti.nbytes
ti.tensor.tofile(fout) ti.tensor.tofile(fout)
if shard_bar is not None: if shard_bar is not None:
@ -460,7 +529,9 @@ def write_tensors_to_file(self, *, progress: bool = False) -> None:
else: else:
self.temp_file.seek(0) self.temp_file.seek(0)
shutil.copyfileobj(self.temp_file, self.fout[0 if not self.small_first_shard else 1]) shutil.copyfileobj(
self.temp_file, self.fout[0 if not self.small_first_shard else 1]
)
self.flush() self.flush()
self.temp_file.close() self.temp_file.close()
@ -566,7 +637,9 @@ def add_base_model_version(self, source_id: int, version: str) -> None:
self.add_string(Keys.General.BASE_MODEL_VERSION.format(id=source_id), version) self.add_string(Keys.General.BASE_MODEL_VERSION.format(id=source_id), version)
def add_base_model_organization(self, source_id: int, organization: str) -> None: def add_base_model_organization(self, source_id: int, organization: str) -> None:
self.add_string(Keys.General.BASE_MODEL_ORGANIZATION.format(id=source_id), organization) self.add_string(
Keys.General.BASE_MODEL_ORGANIZATION.format(id=source_id), organization
)
def add_base_model_url(self, source_id: int, url: str) -> None: def add_base_model_url(self, source_id: int, url: str) -> None:
self.add_string(Keys.General.BASE_MODEL_URL.format(id=source_id), url) self.add_string(Keys.General.BASE_MODEL_URL.format(id=source_id), url)
@ -605,7 +678,9 @@ def add_block_count(self, length: int) -> None:
self.add_uint32(Keys.LLM.BLOCK_COUNT.format(arch=self.arch), length) self.add_uint32(Keys.LLM.BLOCK_COUNT.format(arch=self.arch), length)
def add_leading_dense_block_count(self, length: int) -> None: def add_leading_dense_block_count(self, length: int) -> None:
self.add_uint32(Keys.LLM.LEADING_DENSE_BLOCK_COUNT.format(arch=self.arch), length) self.add_uint32(
Keys.LLM.LEADING_DENSE_BLOCK_COUNT.format(arch=self.arch), length
)
def add_feed_forward_length(self, length: int | Sequence[int]) -> None: def add_feed_forward_length(self, length: int | Sequence[int]) -> None:
if isinstance(length, int): if isinstance(length, int):
@ -614,10 +689,14 @@ def add_feed_forward_length(self, length: int | Sequence[int]) -> None:
self.add_array(Keys.LLM.FEED_FORWARD_LENGTH.format(arch=self.arch), length) self.add_array(Keys.LLM.FEED_FORWARD_LENGTH.format(arch=self.arch), length)
def add_expert_feed_forward_length(self, length: int) -> None: def add_expert_feed_forward_length(self, length: int) -> None:
self.add_uint32(Keys.LLM.EXPERT_FEED_FORWARD_LENGTH.format(arch=self.arch), length) self.add_uint32(
Keys.LLM.EXPERT_FEED_FORWARD_LENGTH.format(arch=self.arch), length
)
def add_expert_shared_feed_forward_length(self, length: int) -> None: def add_expert_shared_feed_forward_length(self, length: int) -> None:
self.add_uint32(Keys.LLM.EXPERT_SHARED_FEED_FORWARD_LENGTH.format(arch=self.arch), length) self.add_uint32(
Keys.LLM.EXPERT_SHARED_FEED_FORWARD_LENGTH.format(arch=self.arch), length
)
def add_parallel_residual(self, use: bool) -> None: def add_parallel_residual(self, use: bool) -> None:
self.add_bool(Keys.LLM.USE_PARALLEL_RESIDUAL.format(arch=self.arch), use) self.add_bool(Keys.LLM.USE_PARALLEL_RESIDUAL.format(arch=self.arch), use)
@ -736,10 +815,14 @@ def add_tokenizer_model(self, model: str) -> None:
def add_tokenizer_pre(self, pre: str) -> None: def add_tokenizer_pre(self, pre: str) -> None:
self.add_string(Keys.Tokenizer.PRE, pre) self.add_string(Keys.Tokenizer.PRE, pre)
def add_token_list(self, tokens: Sequence[str] | Sequence[bytes] | Sequence[bytearray]) -> None: def add_token_list(
self, tokens: Sequence[str] | Sequence[bytes] | Sequence[bytearray]
) -> None:
self.add_array(Keys.Tokenizer.LIST, tokens) self.add_array(Keys.Tokenizer.LIST, tokens)
def add_token_merges(self, merges: Sequence[str] | Sequence[bytes] | Sequence[bytearray]) -> None: def add_token_merges(
self, merges: Sequence[str] | Sequence[bytes] | Sequence[bytearray]
) -> None:
self.add_array(Keys.Tokenizer.MERGES, merges) self.add_array(Keys.Tokenizer.MERGES, merges)
def add_token_types(self, types: Sequence[TokenType] | Sequence[int]) -> None: def add_token_types(self, types: Sequence[TokenType] | Sequence[int]) -> None:
@ -793,18 +876,22 @@ def add_chat_template(self, value: str | Sequence[Mapping[str, str]]) -> None:
template_names = set() template_names = set()
for choice in value: for choice in value:
name = choice.get('name', '') name = choice.get("name", "")
template = choice.get('template') template = choice.get("template")
# Allowing non-alphanumerical characters in template name is probably not a good idea, so filter it # Allowing non-alphanumerical characters in template name is probably not a good idea, so filter it
name = ''.join((c if c in ascii_letters + digits else '_' for c in name)) name = "".join(
(c if c in ascii_letters + digits else "_" for c in name)
)
if name and template is not None: if name and template is not None:
if name == 'default': if name == "default":
template_default = template template_default = template
else: else:
template_names.add(name) template_names.add(name)
self.add_string(Keys.Tokenizer.CHAT_TEMPLATE_N.format(name=name), template) self.add_string(
Keys.Tokenizer.CHAT_TEMPLATE_N.format(name=name), template
)
if template_names: if template_names:
self.add_array(Keys.Tokenizer.CHAT_TEMPLATES, list(template_names)) self.add_array(Keys.Tokenizer.CHAT_TEMPLATES, list(template_names))
@ -829,10 +916,10 @@ def add_eot_token_id(self, id: int) -> None:
self.add_uint32(Keys.Tokenizer.EOT_ID, id) self.add_uint32(Keys.Tokenizer.EOT_ID, id)
def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes: def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes:
pack_prefix = '' pack_prefix = ""
if not skip_pack_prefix: if not skip_pack_prefix:
pack_prefix = '<' if self.endianess == GGUFEndian.LITTLE else '>' pack_prefix = "<" if self.endianess == GGUFEndian.LITTLE else ">"
return struct.pack(f'{pack_prefix}{fmt}', value) return struct.pack(f"{pack_prefix}{fmt}", value)
def _pack_val(self, val: Any, vtype: GGUFValueType, add_vtype: bool) -> bytes: def _pack_val(self, val: Any, vtype: GGUFValueType, add_vtype: bool) -> bytes:
kv_data = bytearray() kv_data = bytearray()
@ -842,7 +929,9 @@ def _pack_val(self, val: Any, vtype: GGUFValueType, add_vtype: bool) -> bytes:
pack_fmt = self._simple_value_packing.get(vtype) pack_fmt = self._simple_value_packing.get(vtype)
if pack_fmt is not None: if pack_fmt is not None:
kv_data += self._pack(pack_fmt, val, skip_pack_prefix = vtype == GGUFValueType.BOOL) kv_data += self._pack(
pack_fmt, val, skip_pack_prefix=vtype == GGUFValueType.BOOL
)
elif vtype == GGUFValueType.STRING: elif vtype == GGUFValueType.STRING:
encoded_val = val.encode("utf-8") if isinstance(val, str) else val encoded_val = val.encode("utf-8") if isinstance(val, str) else val
kv_data += self._pack("Q", len(encoded_val)) kv_data += self._pack("Q", len(encoded_val))
@ -860,7 +949,9 @@ def _pack_val(self, val: Any, vtype: GGUFValueType, add_vtype: bool) -> bytes:
else: else:
ltype = GGUFValueType.get_type(val[0]) ltype = GGUFValueType.get_type(val[0])
if not all(GGUFValueType.get_type(i) is ltype for i in val[1:]): if not all(GGUFValueType.get_type(i) is ltype for i in val[1:]):
raise ValueError("All items in a GGUF array should be of the same type") raise ValueError(
"All items in a GGUF array should be of the same type"
)
kv_data += self._pack("I", ltype) kv_data += self._pack("I", ltype)
kv_data += self._pack("Q", len(val)) kv_data += self._pack("Q", len(val))
for item in val: for item in val:

View File

@ -13,7 +13,9 @@
class LazyMeta(ABCMeta): class LazyMeta(ABCMeta):
def __new__(cls, name: str, bases: tuple[type, ...], namespace: dict[str, Any], **kwargs): def __new__(
cls, name: str, bases: tuple[type, ...], namespace: dict[str, Any], **kwargs
):
def __getattr__(self, name: str) -> Any: def __getattr__(self, name: str) -> Any:
meta_attr = getattr(self._meta, name) meta_attr = getattr(self._meta, name)
if callable(meta_attr): if callable(meta_attr):
@ -41,6 +43,7 @@ def wrapped_special_op(self, *args, **kwargs):
getattr(type(self)._tensor_type, op_name), getattr(type(self)._tensor_type, op_name),
meta_noop=meta_noop, meta_noop=meta_noop,
)(self, *args, **kwargs) )(self, *args, **kwargs)
return wrapped_special_op return wrapped_special_op
# special methods bypass __getattr__, so they need to be added manually # special methods bypass __getattr__, so they need to be added manually
@ -48,11 +51,48 @@ def wrapped_special_op(self, *args, **kwargs):
# NOTE: doing this from a metaclass is very convenient # NOTE: doing this from a metaclass is very convenient
# TODO: make this even more comprehensive # TODO: make this even more comprehensive
for binary_op in ( for binary_op in (
"lt", "le", "eq", "ne", "ge", "gt", "not" "lt",
"abs", "add", "and", "floordiv", "invert", "lshift", "mod", "mul", "matmul", "le",
"neg", "or", "pos", "pow", "rshift", "sub", "truediv", "xor", "eq",
"iadd", "iand", "ifloordiv", "ilshift", "imod", "imul", "ior", "irshift", "isub", "ixor", "ne",
"radd", "rand", "rfloordiv", "rmul", "ror", "rpow", "rsub", "rtruediv", "rxor", "ge",
"gt",
"not" "abs",
"add",
"and",
"floordiv",
"invert",
"lshift",
"mod",
"mul",
"matmul",
"neg",
"or",
"pos",
"pow",
"rshift",
"sub",
"truediv",
"xor",
"iadd",
"iand",
"ifloordiv",
"ilshift",
"imod",
"imul",
"ior",
"irshift",
"isub",
"ixor",
"radd",
"rand",
"rfloordiv",
"rmul",
"ror",
"rpow",
"rsub",
"rtruediv",
"rxor",
): ):
attr_name = f"__{binary_op}__" attr_name = f"__{binary_op}__"
# the result of these operators usually has the same shape and dtype as the input, # the result of these operators usually has the same shape and dtype as the input,
@ -60,7 +100,9 @@ def wrapped_special_op(self, *args, **kwargs):
namespace[attr_name] = mk_wrap(attr_name, meta_noop=True) namespace[attr_name] = mk_wrap(attr_name, meta_noop=True)
for special_op in ( for special_op in (
"getitem", "setitem", "len", "getitem",
"setitem",
"len",
): ):
attr_name = f"__{special_op}__" attr_name = f"__{special_op}__"
namespace[attr_name] = mk_wrap(attr_name, meta_noop=False) namespace[attr_name] = mk_wrap(attr_name, meta_noop=False)
@ -77,7 +119,15 @@ class LazyBase(ABC, metaclass=LazyMeta):
_kwargs: dict[str, Any] _kwargs: dict[str, Any]
_func: Callable[[Any], Any] | None _func: Callable[[Any], Any] | None
def __init__(self, *, meta: Any, data: Any | None = None, args: tuple = (), kwargs: dict[str, Any] | None = None, func: Callable[[Any], Any] | None = None): def __init__(
self,
*,
meta: Any,
data: Any | None = None,
args: tuple = (),
kwargs: dict[str, Any] | None = None,
func: Callable[[Any], Any] | None = None,
):
super().__init__() super().__init__()
self._meta = meta self._meta = meta
self._data = data self._data = data
@ -107,7 +157,17 @@ def _recurse_apply(o: Any, fn: Callable[[Any], Any]) -> Any:
return o return o
@classmethod @classmethod
def _wrap_fn(cls, fn: Callable, *, use_self: LazyBase | None = None, meta_noop: bool | DTypeLike | tuple[DTypeLike, Callable[[tuple[int, ...]], tuple[int, ...]]] = False) -> Callable[[Any], Any]: def _wrap_fn(
cls,
fn: Callable,
*,
use_self: LazyBase | None = None,
meta_noop: (
bool
| DTypeLike
| tuple[DTypeLike, Callable[[tuple[int, ...]], tuple[int, ...]]]
) = False,
) -> Callable[[Any], Any]:
def wrapped_fn(*args, **kwargs): def wrapped_fn(*args, **kwargs):
if kwargs is None: if kwargs is None:
kwargs = {} kwargs = {}
@ -138,13 +198,16 @@ def wrapped_fn(*args, **kwargs):
res = cls.meta_with_dtype_and_shape(meta_noop, res.shape) res = cls.meta_with_dtype_and_shape(meta_noop, res.shape)
if isinstance(res, cls._tensor_type): if isinstance(res, cls._tensor_type):
return cls(meta=cls.eager_to_meta(res), args=args, kwargs=kwargs, func=fn) return cls(
meta=cls.eager_to_meta(res), args=args, kwargs=kwargs, func=fn
)
else: else:
del res # not needed del res # not needed
# non-tensor return likely relies on the contents of the args # non-tensor return likely relies on the contents of the args
# (e.g. the result of torch.equal) # (e.g. the result of torch.equal)
eager_args = cls.to_eager(args) eager_args = cls.to_eager(args)
return fn(*eager_args, **kwargs) return fn(*eager_args, **kwargs)
return wrapped_fn return wrapped_fn
@classmethod @classmethod
@ -175,7 +238,8 @@ def eager_to_meta(cls, t: Any) -> Any:
# must be overridden, meta tensor init is backend-specific # must be overridden, meta tensor init is backend-specific
@classmethod @classmethod
@abstractmethod @abstractmethod
def meta_with_dtype_and_shape(cls, dtype: Any, shape: Any) -> Any: pass def meta_with_dtype_and_shape(cls, dtype: Any, shape: Any) -> Any:
pass
@classmethod @classmethod
def from_eager(cls, t: Any) -> Any: def from_eager(cls, t: Any) -> Any:
@ -192,7 +256,9 @@ class LazyNumpyTensor(LazyBase):
_tensor_type = np.ndarray _tensor_type = np.ndarray
@classmethod @classmethod
def meta_with_dtype_and_shape(cls, dtype: DTypeLike, shape: tuple[int, ...]) -> np.ndarray[Any, Any]: def meta_with_dtype_and_shape(
cls, dtype: DTypeLike, shape: tuple[int, ...]
) -> np.ndarray[Any, Any]:
# The initial idea was to use np.nan as the fill value, # The initial idea was to use np.nan as the fill value,
# but non-float types like np.int16 can't use that. # but non-float types like np.int16 can't use that.
# So zero it is. # So zero it is.
@ -201,8 +267,16 @@ def meta_with_dtype_and_shape(cls, dtype: DTypeLike, shape: tuple[int, ...]) ->
def astype(self, dtype, *args, **kwargs): def astype(self, dtype, *args, **kwargs):
meta = type(self).meta_with_dtype_and_shape(dtype, self._meta.shape) meta = type(self).meta_with_dtype_and_shape(dtype, self._meta.shape)
full_args = (self, dtype,) + args full_args = (
return type(self)(meta=meta, args=full_args, kwargs=kwargs, func=(lambda a, *args, **kwargs: a.astype(*args, **kwargs))) self,
dtype,
) + args
return type(self)(
meta=meta,
args=full_args,
kwargs=kwargs,
func=(lambda a, *args, **kwargs: a.astype(*args, **kwargs)),
)
def tofile(self, *args, **kwargs): def tofile(self, *args, **kwargs):
eager = LazyNumpyTensor.to_eager(self) eager = LazyNumpyTensor.to_eager(self)

View File

@ -44,7 +44,12 @@ class Metadata:
datasets: Optional[list[str]] = None datasets: Optional[list[str]] = None
@staticmethod @staticmethod
def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Path] = None, model_name: Optional[str] = None, total_params: int = 0) -> Metadata: def load(
metadata_override_path: Optional[Path] = None,
model_path: Optional[Path] = None,
model_name: Optional[str] = None,
total_params: int = 0,
) -> Metadata:
# This grabs as many contextual authorship metadata as possible from the model repository # This grabs as many contextual authorship metadata as possible from the model repository
# making any conversion as required to match the gguf kv store metadata format # making any conversion as required to match the gguf kv store metadata format
# as well as giving users the ability to override any authorship metadata that may be incorrect # as well as giving users the ability to override any authorship metadata that may be incorrect
@ -57,7 +62,9 @@ def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Pat
# TODO: load adapter_config.json when possible, it usually contains the base model of the LoRA adapter # TODO: load adapter_config.json when possible, it usually contains the base model of the LoRA adapter
# heuristics # heuristics
metadata = Metadata.apply_metadata_heuristic(metadata, model_card, hf_params, model_path, total_params) metadata = Metadata.apply_metadata_heuristic(
metadata, model_card, hf_params, model_path, total_params
)
# Metadata Override File Provided # Metadata Override File Provided
# This is based on LLM_KV_NAMES mapping in llama.cpp # This is based on LLM_KV_NAMES mapping in llama.cpp
@ -66,34 +73,66 @@ def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Pat
metadata.name = metadata_override.get(Keys.General.NAME, metadata.name) metadata.name = metadata_override.get(Keys.General.NAME, metadata.name)
metadata.author = metadata_override.get(Keys.General.AUTHOR, metadata.author) metadata.author = metadata_override.get(Keys.General.AUTHOR, metadata.author)
metadata.version = metadata_override.get(Keys.General.VERSION, metadata.version) metadata.version = metadata_override.get(Keys.General.VERSION, metadata.version)
metadata.organization = metadata_override.get(Keys.General.ORGANIZATION, metadata.organization) metadata.organization = metadata_override.get(
Keys.General.ORGANIZATION, metadata.organization
)
metadata.finetune = metadata_override.get(Keys.General.FINETUNE, metadata.finetune) metadata.finetune = metadata_override.get(
metadata.basename = metadata_override.get(Keys.General.BASENAME, metadata.basename) Keys.General.FINETUNE, metadata.finetune
)
metadata.basename = metadata_override.get(
Keys.General.BASENAME, metadata.basename
)
metadata.description = metadata_override.get(Keys.General.DESCRIPTION, metadata.description) metadata.description = metadata_override.get(
metadata.quantized_by = metadata_override.get(Keys.General.QUANTIZED_BY, metadata.quantized_by) Keys.General.DESCRIPTION, metadata.description
)
metadata.quantized_by = metadata_override.get(
Keys.General.QUANTIZED_BY, metadata.quantized_by
)
metadata.size_label = metadata_override.get(Keys.General.SIZE_LABEL, metadata.size_label) metadata.size_label = metadata_override.get(
metadata.license_name = metadata_override.get(Keys.General.LICENSE_NAME, metadata.license_name) Keys.General.SIZE_LABEL, metadata.size_label
metadata.license_link = metadata_override.get(Keys.General.LICENSE_LINK, metadata.license_link) )
metadata.license_name = metadata_override.get(
Keys.General.LICENSE_NAME, metadata.license_name
)
metadata.license_link = metadata_override.get(
Keys.General.LICENSE_LINK, metadata.license_link
)
metadata.url = metadata_override.get(Keys.General.URL, metadata.url) metadata.url = metadata_override.get(Keys.General.URL, metadata.url)
metadata.doi = metadata_override.get(Keys.General.DOI, metadata.doi) metadata.doi = metadata_override.get(Keys.General.DOI, metadata.doi)
metadata.uuid = metadata_override.get(Keys.General.UUID, metadata.uuid) metadata.uuid = metadata_override.get(Keys.General.UUID, metadata.uuid)
metadata.repo_url = metadata_override.get(Keys.General.REPO_URL, metadata.repo_url) metadata.repo_url = metadata_override.get(
Keys.General.REPO_URL, metadata.repo_url
)
metadata.source_url = metadata_override.get(Keys.General.SOURCE_URL, metadata.source_url) metadata.source_url = metadata_override.get(
metadata.source_doi = metadata_override.get(Keys.General.SOURCE_DOI, metadata.source_doi) Keys.General.SOURCE_URL, metadata.source_url
metadata.source_uuid = metadata_override.get(Keys.General.SOURCE_UUID, metadata.source_uuid) )
metadata.source_repo_url = metadata_override.get(Keys.General.SOURCE_REPO_URL, metadata.source_repo_url) metadata.source_doi = metadata_override.get(
Keys.General.SOURCE_DOI, metadata.source_doi
)
metadata.source_uuid = metadata_override.get(
Keys.General.SOURCE_UUID, metadata.source_uuid
)
metadata.source_repo_url = metadata_override.get(
Keys.General.SOURCE_REPO_URL, metadata.source_repo_url
)
# Base Models is received here as an array of models # Base Models is received here as an array of models
metadata.base_models = metadata_override.get("general.base_models", metadata.base_models) metadata.base_models = metadata_override.get(
"general.base_models", metadata.base_models
)
metadata.tags = metadata_override.get(Keys.General.TAGS, metadata.tags) metadata.tags = metadata_override.get(Keys.General.TAGS, metadata.tags)
metadata.languages = metadata_override.get(Keys.General.LANGUAGES, metadata.languages) metadata.languages = metadata_override.get(
metadata.datasets = metadata_override.get(Keys.General.DATASETS, metadata.datasets) Keys.General.LANGUAGES, metadata.languages
)
metadata.datasets = metadata_override.get(
Keys.General.DATASETS, metadata.datasets
)
# Direct Metadata Override (via direct cli argument) # Direct Metadata Override (via direct cli argument)
if model_name is not None: if model_name is not None:
@ -102,7 +141,9 @@ def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Pat
return metadata return metadata
@staticmethod @staticmethod
def load_metadata_override(metadata_override_path: Optional[Path] = None) -> dict[str, Any]: def load_metadata_override(
metadata_override_path: Optional[Path] = None,
) -> dict[str, Any]:
if metadata_override_path is None or not metadata_override_path.is_file(): if metadata_override_path is None or not metadata_override_path.is_file():
return {} return {}
@ -128,7 +169,9 @@ def load_model_card(model_path: Optional[Path] = None) -> dict[str, Any]:
if isinstance(data, dict): if isinstance(data, dict):
return data return data
else: else:
logger.error(f"while reading YAML model card frontmatter, data is {type(data)} instead of dict") logger.error(
f"while reading YAML model card frontmatter, data is {type(data)} instead of dict"
)
return {} return {}
else: else:
return {} return {}
@ -149,10 +192,21 @@ def load_hf_parameters(model_path: Optional[Path] = None) -> dict[str, Any]:
@staticmethod @staticmethod
def id_to_title(string): def id_to_title(string):
# Convert capitalization into title form unless acronym or version number # Convert capitalization into title form unless acronym or version number
return ' '.join([w.title() if w.islower() and not re.match(r'^(v\d+(?:\.\d+)*|\d.*)$', w) else w for w in string.strip().replace('-', ' ').split()]) return " ".join(
[
(
w.title()
if w.islower() and not re.match(r"^(v\d+(?:\.\d+)*|\d.*)$", w)
else w
)
for w in string.strip().replace("-", " ").split()
]
)
@staticmethod @staticmethod
def get_model_id_components(model_id: Optional[str] = None, total_params: int = 0) -> tuple[str | None, str | None, str | None, str | None, str | None, str | None]: def get_model_id_components(
model_id: Optional[str] = None, total_params: int = 0
) -> tuple[str | None, str | None, str | None, str | None, str | None, str | None]:
# Huggingface often store model id as '<org>/<model name>' # Huggingface often store model id as '<org>/<model name>'
# so let's parse it and apply some heuristics if possible for model name components # so let's parse it and apply some heuristics if possible for model name components
@ -160,24 +214,24 @@ def get_model_id_components(model_id: Optional[str] = None, total_params: int =
# model ID missing # model ID missing
return None, None, None, None, None, None return None, None, None, None, None, None
if ' ' in model_id: if " " in model_id:
# model ID is actually a normal human sentence # model ID is actually a normal human sentence
# which means its most likely a normal model name only # which means its most likely a normal model name only
# not part of the hugging face naming standard, but whatever # not part of the hugging face naming standard, but whatever
return model_id, None, None, None, None, None return model_id, None, None, None, None, None
if '/' in model_id: if "/" in model_id:
# model ID (huggingface style) # model ID (huggingface style)
org_component, model_full_name_component = model_id.split('/', 1) org_component, model_full_name_component = model_id.split("/", 1)
else: else:
# model ID but missing org components # model ID but missing org components
org_component, model_full_name_component = None, model_id org_component, model_full_name_component = None, model_id
# Check if we erroneously matched against './' or '../' etc... # Check if we erroneously matched against './' or '../' etc...
if org_component is not None and org_component[0] == '.': if org_component is not None and org_component[0] == ".":
org_component = None org_component = None
name_parts: list[str] = model_full_name_component.split('-') name_parts: list[str] = model_full_name_component.split("-")
# Remove empty parts # Remove empty parts
for i in reversed(range(len(name_parts))): for i in reversed(range(len(name_parts))):
@ -191,14 +245,18 @@ def get_model_id_components(model_id: Optional[str] = None, total_params: int =
# Annotate the name # Annotate the name
for i, part in enumerate(name_parts): for i, part in enumerate(name_parts):
# Version # Version
if re.fullmatch(r'(v|iter)?\d+([.]\d+)*', part, re.IGNORECASE): if re.fullmatch(r"(v|iter)?\d+([.]\d+)*", part, re.IGNORECASE):
name_types[i].add("version") name_types[i].add("version")
# Quant type (should not be there for base models, but still annotated) # Quant type (should not be there for base models, but still annotated)
elif re.fullmatch(r'i?q\d(_\w)*|b?fp?(16|32)', part, re.IGNORECASE): elif re.fullmatch(r"i?q\d(_\w)*|b?fp?(16|32)", part, re.IGNORECASE):
name_types[i].add("type") name_types[i].add("type")
name_parts[i] = part.upper() name_parts[i] = part.upper()
# Model size # Model size
elif i > 0 and re.fullmatch(r'(([A]|\d+[x])?\d+([._]\d+)?[KMBT][\d]?|small|mini|medium|large|x?xl)', part, re.IGNORECASE): elif i > 0 and re.fullmatch(
r"(([A]|\d+[x])?\d+([._]\d+)?[KMBT][\d]?|small|mini|medium|large|x?xl)",
part,
re.IGNORECASE,
):
part = part.replace("_", ".") part = part.replace("_", ".")
# Handle weird bloom-7b1 notation # Handle weird bloom-7b1 notation
if part[-1].isdecimal(): if part[-1].isdecimal():
@ -209,14 +267,19 @@ def get_model_id_components(model_id: Optional[str] = None, total_params: int =
part = part[:-1] + part[-1].upper() part = part[:-1] + part[-1].upper()
if total_params != 0: if total_params != 0:
try: try:
label_params = float(part[:-1]) * pow(1000, " KMBT".find(part[-1])) label_params = float(part[:-1]) * pow(
1000, " KMBT".find(part[-1])
)
# Only use it as a size label if it's close or bigger than the model size # Only use it as a size label if it's close or bigger than the model size
# Note that LoRA adapters don't necessarily include all layers, # Note that LoRA adapters don't necessarily include all layers,
# so this is why bigger label sizes are accepted. # so this is why bigger label sizes are accepted.
# Do not use the size label when it's smaller than 1/8 of the model size # Do not use the size label when it's smaller than 1/8 of the model size
if (total_params < 0 and label_params < abs(total_params) // 8) or ( if (
total_params < 0 and label_params < abs(total_params) // 8
) or (
# Check both directions when the current model isn't a LoRA adapter # Check both directions when the current model isn't a LoRA adapter
total_params > 0 and abs(label_params - total_params) > 7 * total_params // 8 total_params > 0
and abs(label_params - total_params) > 7 * total_params // 8
): ):
# Likely a context length # Likely a context length
name_types[i].add("finetune") name_types[i].add("finetune")
@ -229,7 +292,9 @@ def get_model_id_components(model_id: Optional[str] = None, total_params: int =
name_types[i].add("size_label") name_types[i].add("size_label")
name_parts[i] = part name_parts[i] = part
# Some easy to recognize finetune names # Some easy to recognize finetune names
elif i > 0 and re.fullmatch(r'chat|instruct|vision|lora', part, re.IGNORECASE): elif i > 0 and re.fullmatch(
r"chat|instruct|vision|lora", part, re.IGNORECASE
):
if total_params < 0 and part.lower() == "lora": if total_params < 0 and part.lower() == "lora":
# ignore redundant "lora" in the finetune part when the output is a lora adapter # ignore redundant "lora" in the finetune part when the output is a lora adapter
name_types[i].add("type") name_types[i].add("type")
@ -238,7 +303,12 @@ def get_model_id_components(model_id: Optional[str] = None, total_params: int =
# Ignore word-based size labels when there is at least a number-based one present # Ignore word-based size labels when there is at least a number-based one present
# TODO: should word-based size labels always be removed instead? # TODO: should word-based size labels always be removed instead?
if any(c.isdecimal() for n, t in zip(name_parts, name_types) if "size_label" in t for c in n): if any(
c.isdecimal()
for n, t in zip(name_parts, name_types)
if "size_label" in t
for c in n
):
for n, t in zip(name_parts, name_types): for n, t in zip(name_parts, name_types):
if "size_label" in t: if "size_label" in t:
if all(c.isalpha() for c in n): if all(c.isalpha() for c in n):
@ -262,22 +332,55 @@ def get_model_id_components(model_id: Optional[str] = None, total_params: int =
else: else:
break break
basename = "-".join(n for n, t in zip(name_parts, name_types) if "basename" in t) or None basename = (
"-".join(n for n, t in zip(name_parts, name_types) if "basename" in t)
or None
)
# Deduplicate size labels using order-preserving 'dict' ('set' seems to sort the keys) # Deduplicate size labels using order-preserving 'dict' ('set' seems to sort the keys)
size_label = "-".join(dict.fromkeys(s for s, t in zip(name_parts, name_types) if "size_label" in t).keys()) or None size_label = (
finetune = "-".join(f for f, t in zip(name_parts, name_types) if "finetune" in t) or None "-".join(
dict.fromkeys(
s for s, t in zip(name_parts, name_types) if "size_label" in t
).keys()
)
or None
)
finetune = (
"-".join(f for f, t in zip(name_parts, name_types) if "finetune" in t)
or None
)
# TODO: should the basename version always be excluded? # TODO: should the basename version always be excluded?
# NOTE: multiple finetune versions are joined together # NOTE: multiple finetune versions are joined together
version = "-".join(v for v, t, in zip(name_parts, name_types) if "version" in t and "basename" not in t) or None version = (
"-".join(
v
for v, t, in zip(name_parts, name_types)
if "version" in t and "basename" not in t
)
or None
)
if size_label is None and finetune is None and version is None: if size_label is None and finetune is None and version is None:
# Too ambiguous, output nothing # Too ambiguous, output nothing
basename = None basename = None
return model_full_name_component, org_component, basename, finetune, version, size_label return (
model_full_name_component,
org_component,
basename,
finetune,
version,
size_label,
)
@staticmethod @staticmethod
def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = None, hf_params: Optional[dict] = None, model_path: Optional[Path] = None, total_params: int = 0) -> Metadata: def apply_metadata_heuristic(
metadata: Metadata,
model_card: Optional[dict] = None,
hf_params: Optional[dict] = None,
model_path: Optional[Path] = None,
total_params: int = 0,
) -> Metadata:
# Reference Model Card Metadata: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1 # Reference Model Card Metadata: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1
# Model Card Heuristics # Model Card Heuristics
@ -317,16 +420,30 @@ def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = No
for model_id in metadata_base_models: for model_id in metadata_base_models:
# NOTE: model size of base model is assumed to be similar to the size of the current model # NOTE: model size of base model is assumed to be similar to the size of the current model
model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id, total_params) (
model_full_name_component,
org_component,
basename,
finetune,
version,
size_label,
) = Metadata.get_model_id_components(model_id, total_params)
base_model = {} base_model = {}
if model_full_name_component is not None: if model_full_name_component is not None:
base_model["name"] = Metadata.id_to_title(model_full_name_component) base_model["name"] = Metadata.id_to_title(
model_full_name_component
)
if org_component is not None: if org_component is not None:
base_model["organization"] = Metadata.id_to_title(org_component) base_model["organization"] = Metadata.id_to_title(org_component)
if version is not None: if version is not None:
base_model["version"] = version base_model["version"] = version
if org_component is not None and model_full_name_component is not None: if (
base_model["repo_url"] = f"https://huggingface.co/{org_component}/{model_full_name_component}" org_component is not None
and model_full_name_component is not None
):
base_model["repo_url"] = (
f"https://huggingface.co/{org_component}/{model_full_name_component}"
)
metadata.base_models.append(base_model) metadata.base_models.append(base_model)
if "license" in model_card and metadata.license is None: if "license" in model_card and metadata.license is None:
@ -360,7 +477,9 @@ def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = No
elif isinstance(pipeline_tags_value, list): elif isinstance(pipeline_tags_value, list):
metadata.tags.extend(pipeline_tags_value) metadata.tags.extend(pipeline_tags_value)
language_value = model_card.get("languages", model_card.get("language", None)) language_value = model_card.get(
"languages", model_card.get("language", None)
)
if language_value is not None: if language_value is not None:
if metadata.languages is None: if metadata.languages is None:
@ -388,11 +507,18 @@ def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = No
if hf_params is not None: if hf_params is not None:
hf_name_or_path = hf_params.get("_name_or_path") hf_name_or_path = hf_params.get("_name_or_path")
if hf_name_or_path is not None and hf_name_or_path.count('/') <= 1: if hf_name_or_path is not None and hf_name_or_path.count("/") <= 1:
# Use _name_or_path only if its actually a model name and not some computer path # Use _name_or_path only if its actually a model name and not some computer path
# e.g. 'meta-llama/Llama-2-7b-hf' # e.g. 'meta-llama/Llama-2-7b-hf'
model_id = hf_name_or_path model_id = hf_name_or_path
model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id, total_params) (
model_full_name_component,
org_component,
basename,
finetune,
version,
size_label,
) = Metadata.get_model_id_components(model_id, total_params)
if metadata.name is None and model_full_name_component is not None: if metadata.name is None and model_full_name_component is not None:
metadata.name = Metadata.id_to_title(model_full_name_component) metadata.name = Metadata.id_to_title(model_full_name_component)
if metadata.organization is None and org_component is not None: if metadata.organization is None and org_component is not None:
@ -410,7 +536,14 @@ def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = No
############################################ ############################################
if model_path is not None: if model_path is not None:
model_id = model_path.name model_id = model_path.name
model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id, total_params) (
model_full_name_component,
org_component,
basename,
finetune,
version,
size_label,
) = Metadata.get_model_id_components(model_id, total_params)
if metadata.name is None and model_full_name_component is not None: if metadata.name is None and model_full_name_component is not None:
metadata.name = Metadata.id_to_title(model_full_name_component) metadata.name = Metadata.id_to_title(model_full_name_component)
if metadata.organization is None and org_component is not None: if metadata.organization is None and org_component is not None:
@ -485,7 +618,9 @@ def set_gguf_meta_model(self, gguf_writer: gguf.GGUFWriter):
if "version" in base_model_entry: if "version" in base_model_entry:
gguf_writer.add_base_model_version(key, base_model_entry["version"]) gguf_writer.add_base_model_version(key, base_model_entry["version"])
if "organization" in base_model_entry: if "organization" in base_model_entry:
gguf_writer.add_base_model_organization(key, base_model_entry["organization"]) gguf_writer.add_base_model_organization(
key, base_model_entry["organization"]
)
if "url" in base_model_entry: if "url" in base_model_entry:
gguf_writer.add_base_model_url(key, base_model_entry["url"]) gguf_writer.add_base_model_url(key, base_model_entry["url"])
if "doi" in base_model_entry: if "doi" in base_model_entry:
@ -493,7 +628,9 @@ def set_gguf_meta_model(self, gguf_writer: gguf.GGUFWriter):
if "uuid" in base_model_entry: if "uuid" in base_model_entry:
gguf_writer.add_base_model_uuid(key, base_model_entry["uuid"]) gguf_writer.add_base_model_uuid(key, base_model_entry["uuid"])
if "repo_url" in base_model_entry: if "repo_url" in base_model_entry:
gguf_writer.add_base_model_repo_url(key, base_model_entry["repo_url"]) gguf_writer.add_base_model_repo_url(
key, base_model_entry["repo_url"]
)
if self.tags is not None: if self.tags is not None:
gguf_writer.add_tags(self.tags) gguf_writer.add_tags(self.tags)

View File

@ -12,14 +12,18 @@
def quant_shape_to_byte_shape(shape: Sequence[int], quant_type: GGMLQuantizationType): def quant_shape_to_byte_shape(shape: Sequence[int], quant_type: GGMLQuantizationType):
block_size, type_size = GGML_QUANT_SIZES[quant_type] block_size, type_size = GGML_QUANT_SIZES[quant_type]
if shape[-1] % block_size != 0: if shape[-1] % block_size != 0:
raise ValueError(f"Quantized tensor row size ({shape[-1]}) is not a multiple of {quant_type.name} block size ({block_size})") raise ValueError(
f"Quantized tensor row size ({shape[-1]}) is not a multiple of {quant_type.name} block size ({block_size})"
)
return (*shape[:-1], shape[-1] // block_size * type_size) return (*shape[:-1], shape[-1] // block_size * type_size)
def quant_shape_from_byte_shape(shape: Sequence[int], quant_type: GGMLQuantizationType): def quant_shape_from_byte_shape(shape: Sequence[int], quant_type: GGMLQuantizationType):
block_size, type_size = GGML_QUANT_SIZES[quant_type] block_size, type_size = GGML_QUANT_SIZES[quant_type]
if shape[-1] % type_size != 0: if shape[-1] % type_size != 0:
raise ValueError(f"Quantized tensor bytes per row ({shape[-1]}) is not a multiple of {quant_type.name} type size ({type_size})") raise ValueError(
f"Quantized tensor bytes per row ({shape[-1]}) is not a multiple of {quant_type.name} type size ({type_size})"
)
return (*shape[:-1], shape[-1] // type_size * block_size) return (*shape[:-1], shape[-1] // type_size * block_size)
@ -27,14 +31,23 @@ def quant_shape_from_byte_shape(shape: Sequence[int], quant_type: GGMLQuantizati
def __compute_fp32_to_bf16(n: np.ndarray) -> np.ndarray: def __compute_fp32_to_bf16(n: np.ndarray) -> np.ndarray:
n = n.astype(np.float32, copy=False).view(np.uint32) n = n.astype(np.float32, copy=False).view(np.uint32)
# force nan to quiet # force nan to quiet
n = np.where((n & 0x7fffffff) > 0x7f800000, (n & np.uint32(0xffff0000)) | np.uint32(64 << 16), n) n = np.where(
(n & 0x7FFFFFFF) > 0x7F800000,
(n & np.uint32(0xFFFF0000)) | np.uint32(64 << 16),
n,
)
# round to nearest even # round to nearest even
n = (np.uint64(n) + (0x7fff + ((n >> 16) & 1))) >> 16 n = (np.uint64(n) + (0x7FFF + ((n >> 16) & 1))) >> 16
return n.astype(np.uint16) return n.astype(np.uint16)
# This is faster than np.vectorize and np.apply_along_axis because it works on more than one row at a time # This is faster than np.vectorize and np.apply_along_axis because it works on more than one row at a time
def __apply_over_grouped_rows(func: Callable[[np.ndarray], np.ndarray], arr: np.ndarray, otype: DTypeLike, oshape: tuple[int, ...]) -> np.ndarray: def __apply_over_grouped_rows(
func: Callable[[np.ndarray], np.ndarray],
arr: np.ndarray,
otype: DTypeLike,
oshape: tuple[int, ...],
) -> np.ndarray:
rows = arr.reshape((-1, arr.shape[-1])) rows = arr.reshape((-1, arr.shape[-1]))
osize = 1 osize = 1
for dim in oshape: for dim in oshape:
@ -42,15 +55,23 @@ def __apply_over_grouped_rows(func: Callable[[np.ndarray], np.ndarray], arr: np.
out = np.empty(shape=osize, dtype=otype) out = np.empty(shape=osize, dtype=otype)
# compute over groups of 16 rows (arbitrary, but seems good for performance) # compute over groups of 16 rows (arbitrary, but seems good for performance)
n_groups = (rows.shape[0] // 16) or 1 n_groups = (rows.shape[0] // 16) or 1
np.concatenate([func(group).ravel() for group in np.array_split(rows, n_groups)], axis=0, out=out) np.concatenate(
[func(group).ravel() for group in np.array_split(rows, n_groups)],
axis=0,
out=out,
)
return out.reshape(oshape) return out.reshape(oshape)
def __quantize_bf16_array(n: np.ndarray) -> np.ndarray: def __quantize_bf16_array(n: np.ndarray) -> np.ndarray:
return __apply_over_grouped_rows(__compute_fp32_to_bf16, arr=n, otype=np.uint16, oshape=n.shape) return __apply_over_grouped_rows(
__compute_fp32_to_bf16, arr=n, otype=np.uint16, oshape=n.shape
)
__quantize_bf16_lazy = LazyNumpyTensor._wrap_fn(__quantize_bf16_array, meta_noop=np.uint16) __quantize_bf16_lazy = LazyNumpyTensor._wrap_fn(
__quantize_bf16_array, meta_noop=np.uint16
)
def quantize_bf16(n: np.ndarray): def quantize_bf16(n: np.ndarray):
@ -105,7 +126,12 @@ def __quantize_q8_0_rows(n: np.ndarray) -> np.ndarray:
def __quantize_q8_0_array(n: np.ndarray) -> np.ndarray: def __quantize_q8_0_array(n: np.ndarray) -> np.ndarray:
return __apply_over_grouped_rows(__quantize_q8_0_rows, arr=n, otype=np.uint8, oshape=__quantize_q8_0_shape_change(n.shape)) return __apply_over_grouped_rows(
__quantize_q8_0_rows,
arr=n,
otype=np.uint8,
oshape=__quantize_q8_0_shape_change(n.shape),
)
__quantize_q8_0_lazy = LazyNumpyTensor._wrap_fn( __quantize_q8_0_lazy = LazyNumpyTensor._wrap_fn(

View File

@ -28,12 +28,10 @@ class TensorNameMap:
"transformer.token_embeddings", # openelm "transformer.token_embeddings", # openelm
"shared", # t5 "shared", # t5
), ),
# Token type embeddings # Token type embeddings
MODEL_TENSOR.TOKEN_TYPES: ( MODEL_TENSOR.TOKEN_TYPES: (
"embeddings.token_type_embeddings", # bert nomic-bert "embeddings.token_type_embeddings", # bert nomic-bert
), ),
# Normalization of token embeddings # Normalization of token embeddings
MODEL_TENSOR.TOKEN_EMBD_NORM: ( MODEL_TENSOR.TOKEN_EMBD_NORM: (
"word_embeddings_layernorm", # bloom "word_embeddings_layernorm", # bloom
@ -41,14 +39,12 @@ class TensorNameMap:
"emb_ln", # nomic-bert "emb_ln", # nomic-bert
"transformer.norm", # openelm "transformer.norm", # openelm
), ),
# Position embeddings # Position embeddings
MODEL_TENSOR.POS_EMBD: ( MODEL_TENSOR.POS_EMBD: (
"transformer.wpe", # gpt2 "transformer.wpe", # gpt2
"embeddings.position_embeddings", # bert "embeddings.position_embeddings", # bert
"wpe", # gpt2 "wpe", # gpt2
), ),
# Output # Output
MODEL_TENSOR.OUTPUT: ( MODEL_TENSOR.OUTPUT: (
"embed_out", # gptneox "embed_out", # gptneox
@ -58,7 +54,6 @@ class TensorNameMap:
"lm_head.linear", # phi2 "lm_head.linear", # phi2
"output_layer", # chatglm "output_layer", # chatglm
), ),
# Output norm # Output norm
MODEL_TENSOR.OUTPUT_NORM: ( MODEL_TENSOR.OUTPUT_NORM: (
"gpt_neox.final_layer_norm", # gptneox "gpt_neox.final_layer_norm", # gptneox
@ -76,7 +71,6 @@ class TensorNameMap:
"encoder.final_layernorm", # chatglm "encoder.final_layernorm", # chatglm
"transformer.norm", # openelm "transformer.norm", # openelm
), ),
# Rope frequencies # Rope frequencies
MODEL_TENSOR.ROPE_FREQS: ( MODEL_TENSOR.ROPE_FREQS: (
"rope.freqs", # llama-pth "rope.freqs", # llama-pth
@ -108,13 +102,11 @@ class TensorNameMap:
"encoder.layers.{bid}.input_layernorm", # chatglm "encoder.layers.{bid}.input_layernorm", # chatglm
"transformer.layers.{bid}.attn_norm", # openelm "transformer.layers.{bid}.attn_norm", # openelm
), ),
# Attention norm 2 # Attention norm 2
MODEL_TENSOR.ATTN_NORM_2: ( MODEL_TENSOR.ATTN_NORM_2: (
"transformer.h.{bid}.ln_attn", # falcon40b "transformer.h.{bid}.ln_attn", # falcon40b
"encoder.layer.{bid}.layer_norm_1", # jina-v2-code "encoder.layer.{bid}.layer_norm_1", # jina-v2-code
), ),
# Attention query-key-value # Attention query-key-value
MODEL_TENSOR.ATTN_QKV: ( MODEL_TENSOR.ATTN_QKV: (
"gpt_neox.layers.{bid}.attention.query_key_value", # gptneox "gpt_neox.layers.{bid}.attention.query_key_value", # gptneox
@ -132,7 +124,6 @@ class TensorNameMap:
"encoder.layers.{bid}.self_attention.query_key_value", # chatglm "encoder.layers.{bid}.self_attention.query_key_value", # chatglm
"transformer.layers.{bid}.attn.qkv_proj", # openelm "transformer.layers.{bid}.attn.qkv_proj", # openelm
), ),
# Attention query # Attention query
MODEL_TENSOR.ATTN_Q: ( MODEL_TENSOR.ATTN_Q: (
"model.layers.{bid}.self_attn.q_proj", # llama-hf "model.layers.{bid}.self_attn.q_proj", # llama-hf
@ -143,7 +134,6 @@ class TensorNameMap:
"model.layers.{bid}.attention.wq", # internlm2 "model.layers.{bid}.attention.wq", # internlm2
"transformer.decoder_layer.{bid}.multi_head_attention.query", # Grok "transformer.decoder_layer.{bid}.multi_head_attention.query", # Grok
), ),
# Attention key # Attention key
MODEL_TENSOR.ATTN_K: ( MODEL_TENSOR.ATTN_K: (
"model.layers.{bid}.self_attn.k_proj", # llama-hf "model.layers.{bid}.self_attn.k_proj", # llama-hf
@ -155,7 +145,6 @@ class TensorNameMap:
"model.layers.{bid}.attention.wk", # internlm2 "model.layers.{bid}.attention.wk", # internlm2
"transformer.decoder_layer.{bid}.multi_head_attention.key", # Grok "transformer.decoder_layer.{bid}.multi_head_attention.key", # Grok
), ),
# Attention value # Attention value
MODEL_TENSOR.ATTN_V: ( MODEL_TENSOR.ATTN_V: (
"model.layers.{bid}.self_attn.v_proj", # llama-hf "model.layers.{bid}.self_attn.v_proj", # llama-hf
@ -165,9 +154,8 @@ class TensorNameMap:
"transformer.h.{bid}.attn.v", # refact "transformer.h.{bid}.attn.v", # refact
"model.layers.layers.{bid}.self_attn.v_proj", # plamo "model.layers.layers.{bid}.self_attn.v_proj", # plamo
"model.layers.{bid}.attention.wv", # internlm2 "model.layers.{bid}.attention.wv", # internlm2
"transformer.decoder_layer.{bid}.multi_head_attention.value" # Grok "transformer.decoder_layer.{bid}.multi_head_attention.value", # Grok
), ),
# Attention output # Attention output
MODEL_TENSOR.ATTN_OUT: ( MODEL_TENSOR.ATTN_OUT: (
"gpt_neox.layers.{bid}.attention.dense", # gptneox "gpt_neox.layers.{bid}.attention.dense", # gptneox
@ -191,7 +179,6 @@ class TensorNameMap:
"encoder.layers.{bid}.self_attention.dense", # chatglm "encoder.layers.{bid}.self_attention.dense", # chatglm
"transformer.layers.{bid}.attn.out_proj", # openelm "transformer.layers.{bid}.attn.out_proj", # openelm
), ),
# Attention output norm # Attention output norm
MODEL_TENSOR.ATTN_OUT_NORM: ( MODEL_TENSOR.ATTN_OUT_NORM: (
"encoder.layer.{bid}.attention.output.LayerNorm", # bert "encoder.layer.{bid}.attention.output.LayerNorm", # bert
@ -199,11 +186,9 @@ class TensorNameMap:
"transformer.decoder_layer.{bid}.rms_norm_1", # Grok "transformer.decoder_layer.{bid}.rms_norm_1", # Grok
"transformer.blocks.{bid}.norm_attn_norm.norm_2", # dbrx "transformer.blocks.{bid}.norm_attn_norm.norm_2", # dbrx
), ),
MODEL_TENSOR.ATTN_POST_NORM: ( MODEL_TENSOR.ATTN_POST_NORM: (
"model.layers.{bid}.post_attention_layernorm", # gemma2 "model.layers.{bid}.post_attention_layernorm", # gemma2
), ),
# Rotary embeddings # Rotary embeddings
MODEL_TENSOR.ATTN_ROT_EMBD: ( MODEL_TENSOR.ATTN_ROT_EMBD: (
"model.layers.{bid}.self_attn.rotary_emb.inv_freq", # llama-hf "model.layers.{bid}.self_attn.rotary_emb.inv_freq", # llama-hf
@ -211,7 +196,6 @@ class TensorNameMap:
"model.layers.layers.{bid}.self_attn.rotary_emb.inv_freq", # plamo "model.layers.layers.{bid}.self_attn.rotary_emb.inv_freq", # plamo
"transformer.h.{bid}.attn.rotary_emb.inv_freq", # codeshell "transformer.h.{bid}.attn.rotary_emb.inv_freq", # codeshell
), ),
# Feed-forward norm # Feed-forward norm
MODEL_TENSOR.FFN_NORM: ( MODEL_TENSOR.FFN_NORM: (
"gpt_neox.layers.{bid}.post_attention_layernorm", # gptneox "gpt_neox.layers.{bid}.post_attention_layernorm", # gptneox
@ -228,17 +212,14 @@ class TensorNameMap:
"encoder.layers.{bid}.post_attention_layernorm", # chatglm "encoder.layers.{bid}.post_attention_layernorm", # chatglm
"transformer.layers.{bid}.ffn_norm", # openelm "transformer.layers.{bid}.ffn_norm", # openelm
), ),
# Post feed-forward norm # Post feed-forward norm
MODEL_TENSOR.FFN_PRE_NORM: ( MODEL_TENSOR.FFN_PRE_NORM: (
"model.layers.{bid}.pre_feedforward_layernorm", # gemma2 "model.layers.{bid}.pre_feedforward_layernorm", # gemma2
), ),
# Post feed-forward norm # Post feed-forward norm
MODEL_TENSOR.FFN_POST_NORM: ( MODEL_TENSOR.FFN_POST_NORM: (
"model.layers.{bid}.post_feedforward_layernorm", # gemma2 "model.layers.{bid}.post_feedforward_layernorm", # gemma2
), ),
MODEL_TENSOR.FFN_GATE_INP: ( MODEL_TENSOR.FFN_GATE_INP: (
"layers.{bid}.feed_forward.gate", # mixtral "layers.{bid}.feed_forward.gate", # mixtral
"model.layers.{bid}.block_sparse_moe.gate", # mixtral "model.layers.{bid}.block_sparse_moe.gate", # mixtral
@ -246,11 +227,9 @@ class TensorNameMap:
"transformer.decoder_layer.{bid}.router", # Grok "transformer.decoder_layer.{bid}.router", # Grok
"transformer.blocks.{bid}.ffn.router.layer", # dbrx "transformer.blocks.{bid}.ffn.router.layer", # dbrx
), ),
MODEL_TENSOR.FFN_GATE_INP_SHEXP: ( MODEL_TENSOR.FFN_GATE_INP_SHEXP: (
"model.layers.{bid}.mlp.shared_expert_gate", # qwen2moe "model.layers.{bid}.mlp.shared_expert_gate", # qwen2moe
), ),
# Feed-forward up # Feed-forward up
MODEL_TENSOR.FFN_UP: ( MODEL_TENSOR.FFN_UP: (
"gpt_neox.layers.{bid}.mlp.dense_h_to_4h", # gptneox "gpt_neox.layers.{bid}.mlp.dense_h_to_4h", # gptneox
@ -278,24 +257,18 @@ class TensorNameMap:
"model.layers.{bid}.residual_mlp.w3", # arctic "model.layers.{bid}.residual_mlp.w3", # arctic
"encoder.layers.{bid}.mlp.dense_h_to_4h", # chatglm "encoder.layers.{bid}.mlp.dense_h_to_4h", # chatglm
), ),
MODEL_TENSOR.FFN_UP_EXP: ( MODEL_TENSOR.FFN_UP_EXP: (
"layers.{bid}.feed_forward.experts.w3", # mixtral (merged) "layers.{bid}.feed_forward.experts.w3", # mixtral (merged)
"transformer.decoder_layer.{bid}.moe.linear_v", # Grok (merged) "transformer.decoder_layer.{bid}.moe.linear_v", # Grok (merged)
"transformer.blocks.{bid}.ffn.experts.mlp.v1", # dbrx "transformer.blocks.{bid}.ffn.experts.mlp.v1", # dbrx
"model.layers.{bid}.mlp.experts.up_proj", # qwen2moe (merged) "model.layers.{bid}.mlp.experts.up_proj", # qwen2moe (merged)
), ),
MODEL_TENSOR.FFN_UP_SHEXP: ( MODEL_TENSOR.FFN_UP_SHEXP: (
"model.layers.{bid}.mlp.shared_expert.up_proj", # qwen2moe "model.layers.{bid}.mlp.shared_expert.up_proj", # qwen2moe
"model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek2 "model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek2
), ),
# AWQ-activation gate # AWQ-activation gate
MODEL_TENSOR.FFN_ACT: ( MODEL_TENSOR.FFN_ACT: ("transformer.blocks.{bid}.ffn.act",), # mpt
"transformer.blocks.{bid}.ffn.act", # mpt
),
# Feed-forward gate # Feed-forward gate
MODEL_TENSOR.FFN_GATE: ( MODEL_TENSOR.FFN_GATE: (
"model.layers.{bid}.mlp.gate_proj", # llama-hf refact "model.layers.{bid}.mlp.gate_proj", # llama-hf refact
@ -309,19 +282,16 @@ class TensorNameMap:
"transformer.h.{bid}.mlp.linear_1", # refact "transformer.h.{bid}.mlp.linear_1", # refact
"model.layers.{bid}.residual_mlp.w1", # arctic "model.layers.{bid}.residual_mlp.w1", # arctic
), ),
MODEL_TENSOR.FFN_GATE_EXP: ( MODEL_TENSOR.FFN_GATE_EXP: (
"layers.{bid}.feed_forward.experts.w1", # mixtral (merged) "layers.{bid}.feed_forward.experts.w1", # mixtral (merged)
"transformer.decoder_layer.{bid}.moe.linear", # Grok (merged) "transformer.decoder_layer.{bid}.moe.linear", # Grok (merged)
"transformer.blocks.{bid}.ffn.experts.mlp.w1", # dbrx "transformer.blocks.{bid}.ffn.experts.mlp.w1", # dbrx
"model.layers.{bid}.mlp.experts.gate_proj", # qwen2moe (merged) "model.layers.{bid}.mlp.experts.gate_proj", # qwen2moe (merged)
), ),
MODEL_TENSOR.FFN_GATE_SHEXP: ( MODEL_TENSOR.FFN_GATE_SHEXP: (
"model.layers.{bid}.mlp.shared_expert.gate_proj", # qwen2moe "model.layers.{bid}.mlp.shared_expert.gate_proj", # qwen2moe
"model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek2 "model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek2
), ),
# Feed-forward down # Feed-forward down
MODEL_TENSOR.FFN_DOWN: ( MODEL_TENSOR.FFN_DOWN: (
"gpt_neox.layers.{bid}.mlp.dense_4h_to_h", # gptneox "gpt_neox.layers.{bid}.mlp.dense_4h_to_h", # gptneox
@ -348,19 +318,16 @@ class TensorNameMap:
"encoder.layer.{bid}.mlp.down_layer", # jina-bert-v2 "encoder.layer.{bid}.mlp.down_layer", # jina-bert-v2
"encoder.layers.{bid}.mlp.dense_4h_to_h", # chatglm "encoder.layers.{bid}.mlp.dense_4h_to_h", # chatglm
), ),
MODEL_TENSOR.FFN_DOWN_EXP: ( MODEL_TENSOR.FFN_DOWN_EXP: (
"layers.{bid}.feed_forward.experts.w2", # mixtral (merged) "layers.{bid}.feed_forward.experts.w2", # mixtral (merged)
"transformer.decoder_layer.{bid}.moe.linear_1", # Grok (merged) "transformer.decoder_layer.{bid}.moe.linear_1", # Grok (merged)
"transformer.blocks.{bid}.ffn.experts.mlp.w2", # dbrx "transformer.blocks.{bid}.ffn.experts.mlp.w2", # dbrx
"model.layers.{bid}.mlp.experts.down_proj", # qwen2moe (merged) "model.layers.{bid}.mlp.experts.down_proj", # qwen2moe (merged)
), ),
MODEL_TENSOR.FFN_DOWN_SHEXP: ( MODEL_TENSOR.FFN_DOWN_SHEXP: (
"model.layers.{bid}.mlp.shared_expert.down_proj", # qwen2moe "model.layers.{bid}.mlp.shared_expert.down_proj", # qwen2moe
"model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek2 "model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek2
), ),
MODEL_TENSOR.ATTN_Q_NORM: ( MODEL_TENSOR.ATTN_Q_NORM: (
"language_model.encoder.layers.{bid}.self_attention.q_layernorm", "language_model.encoder.layers.{bid}.self_attention.q_layernorm",
"model.layers.{bid}.self_attn.q_layernorm", # persimmon "model.layers.{bid}.self_attn.q_layernorm", # persimmon
@ -369,7 +336,6 @@ class TensorNameMap:
"encoder.layer.{bid}.attention.self.layer_norm_q", # jina-bert-v2 "encoder.layer.{bid}.attention.self.layer_norm_q", # jina-bert-v2
"transformer.layers.{bid}.attn.q_norm", # openelm "transformer.layers.{bid}.attn.q_norm", # openelm
), ),
MODEL_TENSOR.ATTN_K_NORM: ( MODEL_TENSOR.ATTN_K_NORM: (
"language_model.encoder.layers.{bid}.self_attention.k_layernorm", "language_model.encoder.layers.{bid}.self_attention.k_layernorm",
"model.layers.{bid}.self_attn.k_layernorm", # persimmon "model.layers.{bid}.self_attn.k_layernorm", # persimmon
@ -378,210 +344,131 @@ class TensorNameMap:
"encoder.layer.{bid}.attention.self.layer_norm_k", # jina-bert-v2 "encoder.layer.{bid}.attention.self.layer_norm_k", # jina-bert-v2
"transformer.layers.{bid}.attn.k_norm", # openelm "transformer.layers.{bid}.attn.k_norm", # openelm
), ),
MODEL_TENSOR.ROPE_FREQS: ( MODEL_TENSOR.ROPE_FREQS: (
"language_model.encoder.layers.{bid}.self_attention.rotary_emb.inv_freq", # persimmon "language_model.encoder.layers.{bid}.self_attention.rotary_emb.inv_freq", # persimmon
), ),
MODEL_TENSOR.LAYER_OUT_NORM: ( MODEL_TENSOR.LAYER_OUT_NORM: (
"encoder.layer.{bid}.output.LayerNorm", # bert "encoder.layer.{bid}.output.LayerNorm", # bert
"encoder.layers.{bid}.norm2", # nomic-bert "encoder.layers.{bid}.norm2", # nomic-bert
"transformer.decoder_layer.{bid}.rms_norm_3", # Grok "transformer.decoder_layer.{bid}.rms_norm_3", # Grok
"encoder.layer.{bid}.mlp.layernorm", # jina-bert-v2 "encoder.layer.{bid}.mlp.layernorm", # jina-bert-v2
"encoder.layer.{bid}.layer_norm_2" # jina-v2-code "encoder.layer.{bid}.layer_norm_2", # jina-v2-code
), ),
MODEL_TENSOR.SSM_IN: ( MODEL_TENSOR.SSM_IN: (
"model.layers.{bid}.in_proj", "model.layers.{bid}.in_proj",
"backbone.layers.{bid}.mixer.in_proj", "backbone.layers.{bid}.mixer.in_proj",
), ),
MODEL_TENSOR.SSM_CONV1D: ( MODEL_TENSOR.SSM_CONV1D: (
"model.layers.{bid}.conv1d", "model.layers.{bid}.conv1d",
"backbone.layers.{bid}.mixer.conv1d", "backbone.layers.{bid}.mixer.conv1d",
), ),
MODEL_TENSOR.SSM_X: ( MODEL_TENSOR.SSM_X: (
"model.layers.{bid}.x_proj", "model.layers.{bid}.x_proj",
"backbone.layers.{bid}.mixer.x_proj", "backbone.layers.{bid}.mixer.x_proj",
), ),
MODEL_TENSOR.SSM_DT: ( MODEL_TENSOR.SSM_DT: (
"model.layers.{bid}.dt_proj", "model.layers.{bid}.dt_proj",
"backbone.layers.{bid}.mixer.dt_proj", "backbone.layers.{bid}.mixer.dt_proj",
), ),
MODEL_TENSOR.SSM_A: ( MODEL_TENSOR.SSM_A: (
"model.layers.{bid}.A_log", "model.layers.{bid}.A_log",
"backbone.layers.{bid}.mixer.A_log", "backbone.layers.{bid}.mixer.A_log",
), ),
MODEL_TENSOR.SSM_D: ( MODEL_TENSOR.SSM_D: (
"model.layers.{bid}.D", "model.layers.{bid}.D",
"backbone.layers.{bid}.mixer.D", "backbone.layers.{bid}.mixer.D",
), ),
MODEL_TENSOR.SSM_OUT: ( MODEL_TENSOR.SSM_OUT: (
"model.layers.{bid}.out_proj", "model.layers.{bid}.out_proj",
"backbone.layers.{bid}.mixer.out_proj", "backbone.layers.{bid}.mixer.out_proj",
), ),
MODEL_TENSOR.ATTN_Q_A: ("model.layers.{bid}.self_attn.q_a_proj",), # deepseek2
MODEL_TENSOR.ATTN_Q_A: ( MODEL_TENSOR.ATTN_Q_B: ("model.layers.{bid}.self_attn.q_b_proj",), # deepseek2
"model.layers.{bid}.self_attn.q_a_proj", # deepseek2
),
MODEL_TENSOR.ATTN_Q_B: (
"model.layers.{bid}.self_attn.q_b_proj", # deepseek2
),
MODEL_TENSOR.ATTN_KV_A_MQA: ( MODEL_TENSOR.ATTN_KV_A_MQA: (
"model.layers.{bid}.self_attn.kv_a_proj_with_mqa", # deepseek2 "model.layers.{bid}.self_attn.kv_a_proj_with_mqa", # deepseek2
), ),
MODEL_TENSOR.ATTN_KV_B: ( MODEL_TENSOR.ATTN_KV_B: (
"model.layers.{bid}.self_attn.kv_b_proj", # deepseek2 "model.layers.{bid}.self_attn.kv_b_proj", # deepseek2
), ),
MODEL_TENSOR.ATTN_Q_A_NORM: ( MODEL_TENSOR.ATTN_Q_A_NORM: (
"model.layers.{bid}.self_attn.q_a_layernorm", # deepseek2 "model.layers.{bid}.self_attn.q_a_layernorm", # deepseek2
), ),
MODEL_TENSOR.ATTN_KV_A_NORM: ( MODEL_TENSOR.ATTN_KV_A_NORM: (
"model.layers.{bid}.self_attn.kv_a_layernorm", # deepseek2 "model.layers.{bid}.self_attn.kv_a_layernorm", # deepseek2
), ),
MODEL_TENSOR.ATTN_SUB_NORM: ( MODEL_TENSOR.ATTN_SUB_NORM: (
"model.layers.{bid}.self_attn.inner_attn_ln", # bitnet "model.layers.{bid}.self_attn.inner_attn_ln", # bitnet
), ),
MODEL_TENSOR.FFN_SUB_NORM: ("model.layers.{bid}.mlp.ffn_layernorm",), # bitnet
MODEL_TENSOR.FFN_SUB_NORM: ( MODEL_TENSOR.DEC_ATTN_NORM: ("decoder.block.{bid}.layer.0.layer_norm",), # t5
"model.layers.{bid}.mlp.ffn_layernorm", # bitnet MODEL_TENSOR.DEC_ATTN_Q: ("decoder.block.{bid}.layer.0.SelfAttention.q",), # t5
), MODEL_TENSOR.DEC_ATTN_K: ("decoder.block.{bid}.layer.0.SelfAttention.k",), # t5
MODEL_TENSOR.DEC_ATTN_V: ("decoder.block.{bid}.layer.0.SelfAttention.v",), # t5
MODEL_TENSOR.DEC_ATTN_NORM: (
"decoder.block.{bid}.layer.0.layer_norm", # t5
),
MODEL_TENSOR.DEC_ATTN_Q: (
"decoder.block.{bid}.layer.0.SelfAttention.q", # t5
),
MODEL_TENSOR.DEC_ATTN_K: (
"decoder.block.{bid}.layer.0.SelfAttention.k", # t5
),
MODEL_TENSOR.DEC_ATTN_V: (
"decoder.block.{bid}.layer.0.SelfAttention.v", # t5
),
MODEL_TENSOR.DEC_ATTN_OUT: ( MODEL_TENSOR.DEC_ATTN_OUT: (
"decoder.block.{bid}.layer.0.SelfAttention.o", # t5 "decoder.block.{bid}.layer.0.SelfAttention.o", # t5
), ),
MODEL_TENSOR.DEC_ATTN_REL_B: ( MODEL_TENSOR.DEC_ATTN_REL_B: (
"decoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias", # t5 "decoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias", # t5
), ),
MODEL_TENSOR.DEC_CROSS_ATTN_NORM: ( MODEL_TENSOR.DEC_CROSS_ATTN_NORM: (
"decoder.block.{bid}.layer.1.layer_norm", # t5 "decoder.block.{bid}.layer.1.layer_norm", # t5
), ),
MODEL_TENSOR.DEC_CROSS_ATTN_Q: ( MODEL_TENSOR.DEC_CROSS_ATTN_Q: (
"decoder.block.{bid}.layer.1.EncDecAttention.q", # t5 "decoder.block.{bid}.layer.1.EncDecAttention.q", # t5
), ),
MODEL_TENSOR.DEC_CROSS_ATTN_K: ( MODEL_TENSOR.DEC_CROSS_ATTN_K: (
"decoder.block.{bid}.layer.1.EncDecAttention.k", # t5 "decoder.block.{bid}.layer.1.EncDecAttention.k", # t5
), ),
MODEL_TENSOR.DEC_CROSS_ATTN_V: ( MODEL_TENSOR.DEC_CROSS_ATTN_V: (
"decoder.block.{bid}.layer.1.EncDecAttention.v", # t5 "decoder.block.{bid}.layer.1.EncDecAttention.v", # t5
), ),
MODEL_TENSOR.DEC_CROSS_ATTN_OUT: ( MODEL_TENSOR.DEC_CROSS_ATTN_OUT: (
"decoder.block.{bid}.layer.1.EncDecAttention.o", # t5 "decoder.block.{bid}.layer.1.EncDecAttention.o", # t5
), ),
MODEL_TENSOR.DEC_CROSS_ATTN_REL_B: ( MODEL_TENSOR.DEC_CROSS_ATTN_REL_B: (
"decoder.block.{bid}.layer.1.EncDecAttention.relative_attention_bias", # t5 "decoder.block.{bid}.layer.1.EncDecAttention.relative_attention_bias", # t5
), ),
MODEL_TENSOR.DEC_FFN_NORM: ("decoder.block.{bid}.layer.2.layer_norm",), # t5
MODEL_TENSOR.DEC_FFN_NORM: (
"decoder.block.{bid}.layer.2.layer_norm", # t5
),
MODEL_TENSOR.DEC_FFN_GATE: ( MODEL_TENSOR.DEC_FFN_GATE: (
"decoder.block.{bid}.layer.2.DenseReluDense.wi_0", # flan-t5 "decoder.block.{bid}.layer.2.DenseReluDense.wi_0", # flan-t5
), ),
MODEL_TENSOR.DEC_FFN_UP: ( MODEL_TENSOR.DEC_FFN_UP: (
"decoder.block.{bid}.layer.2.DenseReluDense.wi", # t5 "decoder.block.{bid}.layer.2.DenseReluDense.wi", # t5
"decoder.block.{bid}.layer.2.DenseReluDense.wi_1", # flan-t5 "decoder.block.{bid}.layer.2.DenseReluDense.wi_1", # flan-t5
), ),
MODEL_TENSOR.DEC_FFN_DOWN: ( MODEL_TENSOR.DEC_FFN_DOWN: (
"decoder.block.{bid}.layer.2.DenseReluDense.wo", # t5 "decoder.block.{bid}.layer.2.DenseReluDense.wo", # t5
), ),
MODEL_TENSOR.DEC_OUTPUT_NORM: ("decoder.final_layer_norm",), # t5
MODEL_TENSOR.DEC_OUTPUT_NORM: ( MODEL_TENSOR.ENC_ATTN_NORM: ("encoder.block.{bid}.layer.0.layer_norm",), # t5
"decoder.final_layer_norm", # t5 MODEL_TENSOR.ENC_ATTN_Q: ("encoder.block.{bid}.layer.0.SelfAttention.q",), # t5
), MODEL_TENSOR.ENC_ATTN_K: ("encoder.block.{bid}.layer.0.SelfAttention.k",), # t5
MODEL_TENSOR.ENC_ATTN_V: ("encoder.block.{bid}.layer.0.SelfAttention.v",), # t5
MODEL_TENSOR.ENC_ATTN_NORM: (
"encoder.block.{bid}.layer.0.layer_norm", # t5
),
MODEL_TENSOR.ENC_ATTN_Q: (
"encoder.block.{bid}.layer.0.SelfAttention.q", # t5
),
MODEL_TENSOR.ENC_ATTN_K: (
"encoder.block.{bid}.layer.0.SelfAttention.k", # t5
),
MODEL_TENSOR.ENC_ATTN_V: (
"encoder.block.{bid}.layer.0.SelfAttention.v", # t5
),
MODEL_TENSOR.ENC_ATTN_OUT: ( MODEL_TENSOR.ENC_ATTN_OUT: (
"encoder.block.{bid}.layer.0.SelfAttention.o", # t5 "encoder.block.{bid}.layer.0.SelfAttention.o", # t5
), ),
MODEL_TENSOR.ENC_ATTN_REL_B: ( MODEL_TENSOR.ENC_ATTN_REL_B: (
"encoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias", # t5 "encoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias", # t5
), ),
MODEL_TENSOR.ENC_FFN_NORM: ("encoder.block.{bid}.layer.1.layer_norm",), # t5
MODEL_TENSOR.ENC_FFN_NORM: (
"encoder.block.{bid}.layer.1.layer_norm", # t5
),
MODEL_TENSOR.ENC_FFN_GATE: ( MODEL_TENSOR.ENC_FFN_GATE: (
"encoder.block.{bid}.layer.1.DenseReluDense.wi_0", # flan-t5 "encoder.block.{bid}.layer.1.DenseReluDense.wi_0", # flan-t5
), ),
MODEL_TENSOR.ENC_FFN_UP: ( MODEL_TENSOR.ENC_FFN_UP: (
"encoder.block.{bid}.layer.1.DenseReluDense.wi", # t5 "encoder.block.{bid}.layer.1.DenseReluDense.wi", # t5
"encoder.block.{bid}.layer.1.DenseReluDense.wi_1", # flan-t5 "encoder.block.{bid}.layer.1.DenseReluDense.wi_1", # flan-t5
), ),
MODEL_TENSOR.ENC_FFN_DOWN: ( MODEL_TENSOR.ENC_FFN_DOWN: (
"encoder.block.{bid}.layer.1.DenseReluDense.wo", # t5 "encoder.block.{bid}.layer.1.DenseReluDense.wo", # t5
), ),
MODEL_TENSOR.ENC_OUTPUT_NORM: ("encoder.final_layer_norm",), # t5
MODEL_TENSOR.ENC_OUTPUT_NORM: (
"encoder.final_layer_norm", # t5
),
} }
# architecture-specific block mappings # architecture-specific block mappings
arch_block_mappings_cfg: dict[MODEL_ARCH, dict[MODEL_TENSOR, tuple[str, ...]]] = { arch_block_mappings_cfg: dict[MODEL_ARCH, dict[MODEL_TENSOR, tuple[str, ...]]] = {
MODEL_ARCH.ARCTIC: { MODEL_ARCH.ARCTIC: {
MODEL_TENSOR.FFN_NORM: ( MODEL_TENSOR.FFN_NORM: ("model.layers.{bid}.residual_layernorm",),
"model.layers.{bid}.residual_layernorm", MODEL_TENSOR.FFN_NORM_EXP: ("model.layers.{bid}.post_attention_layernorm",),
),
MODEL_TENSOR.FFN_NORM_EXP: (
"model.layers.{bid}.post_attention_layernorm",
),
}, },
} }
@ -609,7 +496,9 @@ def __init__(self, arch: MODEL_ARCH, n_blocks: int):
key = key.format(bid=bid) key = key.format(bid=bid)
self.mapping[key] = (tensor, tensor_name) self.mapping[key] = (tensor, tensor_name)
def get_type_and_name(self, key: str, try_suffixes: Sequence[str] = ()) -> tuple[MODEL_TENSOR, str] | None: def get_type_and_name(
self, key: str, try_suffixes: Sequence[str] = ()
) -> tuple[MODEL_TENSOR, str] | None:
result = self.mapping.get(key) result = self.mapping.get(key)
if result is not None: if result is not None:
return result return result
@ -626,7 +515,9 @@ def get_name(self, key: str, try_suffixes: Sequence[str] = ()) -> str | None:
return None return None
return result[1] return result[1]
def get_type(self, key: str, try_suffixes: Sequence[str] = ()) -> MODEL_TENSOR | None: def get_type(
self, key: str, try_suffixes: Sequence[str] = ()
) -> MODEL_TENSOR | None:
result = self.get_type_and_name(key, try_suffixes=try_suffixes) result = self.get_type_and_name(key, try_suffixes=try_suffixes)
if result is None: if result is None:
return None return None

View File

@ -7,12 +7,18 @@ def fill_templated_filename(filename: str, output_type: str | None) -> str:
# Given a file name fill in any type templates e.g. 'some-model-name.{ftype}.gguf' # Given a file name fill in any type templates e.g. 'some-model-name.{ftype}.gguf'
ftype_lowercase: str = output_type.lower() if output_type is not None else "" ftype_lowercase: str = output_type.lower() if output_type is not None else ""
ftype_uppercase: str = output_type.upper() if output_type is not None else "" ftype_uppercase: str = output_type.upper() if output_type is not None else ""
return filename.format(ftype_lowercase, return filename.format(
outtype=ftype_lowercase, ftype=ftype_lowercase, ftype_lowercase,
OUTTYPE=ftype_uppercase, FTYPE=ftype_uppercase) outtype=ftype_lowercase,
ftype=ftype_lowercase,
OUTTYPE=ftype_uppercase,
FTYPE=ftype_uppercase,
)
def model_weight_count_rounded_notation(model_params_count: int, min_digits: int = 2) -> str: def model_weight_count_rounded_notation(
model_params_count: int, min_digits: int = 2
) -> str:
if model_params_count > 1e12: if model_params_count > 1e12:
# Trillions Of Parameters # Trillions Of Parameters
scaled_model_params = model_params_count * 1e-12 scaled_model_params = model_params_count * 1e-12
@ -30,39 +36,65 @@ def model_weight_count_rounded_notation(model_params_count: int, min_digits: int
scaled_model_params = model_params_count * 1e-3 scaled_model_params = model_params_count * 1e-3
scale_suffix = "K" scale_suffix = "K"
fix = max(min_digits - len(str(round(scaled_model_params)).lstrip('0')), 0) fix = max(min_digits - len(str(round(scaled_model_params)).lstrip("0")), 0)
return f"{scaled_model_params:.{fix}f}{scale_suffix}" return f"{scaled_model_params:.{fix}f}{scale_suffix}"
def size_label(total_params: int, shared_params: int, expert_params: int, expert_count: int) -> str: def size_label(
total_params: int, shared_params: int, expert_params: int, expert_count: int
) -> str:
if expert_count > 0: if expert_count > 0:
pretty_size = model_weight_count_rounded_notation(abs(shared_params) + abs(expert_params), min_digits=2) pretty_size = model_weight_count_rounded_notation(
abs(shared_params) + abs(expert_params), min_digits=2
)
size_class = f"{expert_count}x{pretty_size}" size_class = f"{expert_count}x{pretty_size}"
else: else:
size_class = model_weight_count_rounded_notation(abs(total_params), min_digits=2) size_class = model_weight_count_rounded_notation(
abs(total_params), min_digits=2
)
return size_class return size_class
def naming_convention(model_name: str | None, base_name: str | None, finetune_string: str | None, version_string: str | None, size_label: str | None, output_type: str | None, model_type: Literal['vocab', 'LoRA'] | None = None) -> str: def naming_convention(
model_name: str | None,
base_name: str | None,
finetune_string: str | None,
version_string: str | None,
size_label: str | None,
output_type: str | None,
model_type: Literal["vocab", "LoRA"] | None = None,
) -> str:
# Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention # Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention
if base_name is not None: if base_name is not None:
name = base_name.strip().replace(' ', '-').replace('/', '-') name = base_name.strip().replace(" ", "-").replace("/", "-")
elif model_name is not None: elif model_name is not None:
name = model_name.strip().replace(' ', '-').replace('/', '-') name = model_name.strip().replace(" ", "-").replace("/", "-")
else: else:
name = "ggml-model" name = "ggml-model"
parameters = f"-{size_label}" if size_label is not None else "" parameters = f"-{size_label}" if size_label is not None else ""
finetune = f"-{finetune_string.strip().replace(' ', '-')}" if finetune_string is not None else "" finetune = (
f"-{finetune_string.strip().replace(' ', '-')}"
if finetune_string is not None
else ""
)
version = f"-{version_string.strip().replace(' ', '-')}" if version_string is not None else "" version = (
f"-{version_string.strip().replace(' ', '-')}"
if version_string is not None
else ""
)
encoding = f"-{output_type.strip().replace(' ', '-').upper()}" if output_type is not None else "" encoding = (
f"-{output_type.strip().replace(' ', '-').upper()}"
if output_type is not None
else ""
)
kind = f"-{model_type.strip().replace(' ', '-')}" if model_type is not None else "" kind = f"-{model_type.strip().replace(' ', '-')}" if model_type is not None else ""

View File

@ -5,7 +5,16 @@
import json import json
import os import os
from pathlib import Path from pathlib import Path
from typing import Any, Callable, Sequence, Mapping, Iterable, Protocol, ClassVar, runtime_checkable from typing import (
Any,
Callable,
Sequence,
Mapping,
Iterable,
Protocol,
ClassVar,
runtime_checkable,
)
from sentencepiece import SentencePieceProcessor from sentencepiece import SentencePieceProcessor
@ -23,7 +32,9 @@ class SpecialVocab:
chat_template: str | Sequence[Mapping[str, str]] | None chat_template: str | Sequence[Mapping[str, str]] | None
def __init__( def __init__(
self, path: str | os.PathLike[str], load_merges: bool = False, self,
path: str | os.PathLike[str],
load_merges: bool = False,
special_token_types: Iterable[str] | None = None, special_token_types: Iterable[str] | None = None,
n_vocab: int | None = None, n_vocab: int | None = None,
): ):
@ -36,40 +47,60 @@ def __init__(
if special_token_types is not None: if special_token_types is not None:
self.special_token_types = special_token_types self.special_token_types = special_token_types
else: else:
self.special_token_types = ('bos', 'eos', 'unk', 'sep', 'pad', 'cls', 'mask') self.special_token_types = (
"bos",
"eos",
"unk",
"sep",
"pad",
"cls",
"mask",
)
self._load(Path(path)) self._load(Path(path))
def __repr__(self) -> str: def __repr__(self) -> str:
return '<SpecialVocab with {} merges, special tokens {}, add special tokens {}>'.format( return "<SpecialVocab with {} merges, special tokens {}, add special tokens {}>".format(
len(self.merges), self.special_token_ids or "unset", self.add_special_token or "unset", len(self.merges),
self.special_token_ids or "unset",
self.add_special_token or "unset",
) )
def add_to_gguf(self, gw: GGUFWriter, quiet: bool = False) -> None: def add_to_gguf(self, gw: GGUFWriter, quiet: bool = False) -> None:
if self.merges: if self.merges:
if not quiet: if not quiet:
logger.info(f'Adding {len(self.merges)} merge(s).') logger.info(f"Adding {len(self.merges)} merge(s).")
gw.add_token_merges(self.merges) gw.add_token_merges(self.merges)
elif self.load_merges: elif self.load_merges:
logger.warning('Adding merges requested but no merges found, output may be non-functional.') logger.warning(
"Adding merges requested but no merges found, output may be non-functional."
)
for typ, tokid in self.special_token_ids.items(): for typ, tokid in self.special_token_ids.items():
id_handler: Callable[[int], None] | None = getattr(gw, f'add_{typ}_token_id', None) id_handler: Callable[[int], None] | None = getattr(
gw, f"add_{typ}_token_id", None
)
if id_handler is None: if id_handler is None:
logger.warning(f'No handler for special token type {typ} with id {tokid} - skipping') logger.warning(
f"No handler for special token type {typ} with id {tokid} - skipping"
)
continue continue
if not quiet: if not quiet:
logger.info(f'Setting special token type {typ} to {tokid}') logger.info(f"Setting special token type {typ} to {tokid}")
id_handler(tokid) id_handler(tokid)
for typ, value in self.add_special_token.items(): for typ, value in self.add_special_token.items():
add_handler: Callable[[bool], None] | None = getattr(gw, f'add_add_{typ}_token', None) add_handler: Callable[[bool], None] | None = getattr(
gw, f"add_add_{typ}_token", None
)
if add_handler is None: if add_handler is None:
logger.warning(f'No handler for add_{typ}_token with value {value} - skipping') logger.warning(
f"No handler for add_{typ}_token with value {value} - skipping"
)
continue continue
if not quiet: if not quiet:
logger.info(f'Setting add_{typ}_token to {value}') logger.info(f"Setting add_{typ}_token to {value}")
add_handler(value) add_handler(value)
if self.chat_template is not None: if self.chat_template is not None:
if not quiet: if not quiet:
logger.info(f'Setting chat_template to {self.chat_template}') logger.info(f"Setting chat_template to {self.chat_template}")
gw.add_chat_template(self.chat_template) gw.add_chat_template(self.chat_template)
def _load(self, path: Path) -> None: def _load(self, path: Path) -> None:
@ -79,12 +110,12 @@ def _load(self, path: Path) -> None:
self._try_load_merges_txt(path) self._try_load_merges_txt(path)
def _try_load_merges_txt(self, path: Path) -> bool: def _try_load_merges_txt(self, path: Path) -> bool:
merges_file = path / 'merges.txt' merges_file = path / "merges.txt"
if not merges_file.is_file(): if not merges_file.is_file():
return False return False
with open(merges_file, 'r', encoding = 'utf-8') as fp: with open(merges_file, "r", encoding="utf-8") as fp:
first_line = next(fp, '').strip() first_line = next(fp, "").strip()
if not first_line.startswith('#'): if not first_line.startswith("#"):
fp.seek(0) fp.seek(0)
line_num = 0 line_num = 0
else: else:
@ -97,9 +128,11 @@ def _try_load_merges_txt(self, path: Path) -> bool:
continue continue
parts = line.split(None, 3) parts = line.split(None, 3)
if len(parts) != 2: if len(parts) != 2:
logger.warning(f'{merges_file.name}: Line {line_num}: Entry malformed, ignoring') logger.warning(
f"{merges_file.name}: Line {line_num}: Entry malformed, ignoring"
)
continue continue
merges.append(f'{parts[0]} {parts[1]}') merges.append(f"{parts[0]} {parts[1]}")
self.merges = merges self.merges = merges
return True return True
@ -107,45 +140,49 @@ def _set_special_token(self, typ: str, tid: Any) -> None:
if not isinstance(tid, int): if not isinstance(tid, int):
return return
if tid < 0: if tid < 0:
raise ValueError(f'invalid value for special token type {typ}: {tid}') raise ValueError(f"invalid value for special token type {typ}: {tid}")
if self.n_vocab is None or tid < self.n_vocab: if self.n_vocab is None or tid < self.n_vocab:
if typ in self.special_token_ids: if typ in self.special_token_ids:
return return
self.special_token_ids[typ] = tid self.special_token_ids[typ] = tid
return return
logger.warning(f'Special token type {typ}, id {tid} out of range, must be under {self.n_vocab} - skipping') logger.warning(
f"Special token type {typ}, id {tid} out of range, must be under {self.n_vocab} - skipping"
)
def _try_load_from_tokenizer_json(self, path: Path) -> bool: def _try_load_from_tokenizer_json(self, path: Path) -> bool:
tokenizer_file = path / 'tokenizer.json' tokenizer_file = path / "tokenizer.json"
if tokenizer_file.is_file(): if tokenizer_file.is_file():
with open(tokenizer_file, encoding = 'utf-8') as f: with open(tokenizer_file, encoding="utf-8") as f:
tokenizer = json.load(f) tokenizer = json.load(f)
if self.load_merges: if self.load_merges:
merges = tokenizer.get('model', {}).get('merges') merges = tokenizer.get("model", {}).get("merges")
if isinstance(merges, list) and merges and isinstance(merges[0], str): if isinstance(merges, list) and merges and isinstance(merges[0], str):
self.merges = merges self.merges = merges
added_tokens = tokenizer.get('added_tokens', {}) added_tokens = tokenizer.get("added_tokens", {})
else: else:
added_tokens = {} added_tokens = {}
tokenizer_config_file = path / 'tokenizer_config.json' tokenizer_config_file = path / "tokenizer_config.json"
if not tokenizer_config_file.is_file(): if not tokenizer_config_file.is_file():
return True return True
with open(tokenizer_config_file, encoding = 'utf-8') as f: with open(tokenizer_config_file, encoding="utf-8") as f:
tokenizer_config = json.load(f) tokenizer_config = json.load(f)
chat_template = tokenizer_config.get('chat_template') chat_template = tokenizer_config.get("chat_template")
if chat_template is None or isinstance(chat_template, (str, list)): if chat_template is None or isinstance(chat_template, (str, list)):
self.chat_template = chat_template self.chat_template = chat_template
else: else:
logger.warning(f'Bad type for chat_template field in {tokenizer_config_file!r} - ignoring') logger.warning(
f"Bad type for chat_template field in {tokenizer_config_file!r} - ignoring"
)
for typ in self.special_token_types: for typ in self.special_token_types:
add_entry = tokenizer_config.get(f'add_{typ}_token') add_entry = tokenizer_config.get(f"add_{typ}_token")
if isinstance(add_entry, bool): if isinstance(add_entry, bool):
self.add_special_token[typ] = add_entry self.add_special_token[typ] = add_entry
entry = tokenizer_config.get(f'{typ}_token') entry = tokenizer_config.get(f"{typ}_token")
if isinstance(entry, str): if isinstance(entry, str):
tc_content = entry tc_content = entry
elif isinstance(entry, dict): elif isinstance(entry, dict):
entry_content = entry.get('content') entry_content = entry.get("content")
if not isinstance(entry_content, str): if not isinstance(entry_content, str):
continue continue
tc_content = entry_content tc_content = entry_content
@ -153,20 +190,24 @@ def _try_load_from_tokenizer_json(self, path: Path) -> bool:
continue continue
# We only need the first match here. # We only need the first match here.
maybe_token_id = next( maybe_token_id = next(
(atok.get('id') for atok in added_tokens if atok.get('content') == tc_content), (
atok.get("id")
for atok in added_tokens
if atok.get("content") == tc_content
),
None, None,
) )
self._set_special_token(typ, maybe_token_id) self._set_special_token(typ, maybe_token_id)
return True return True
def _try_load_from_config_json(self, path: Path) -> bool: def _try_load_from_config_json(self, path: Path) -> bool:
config_file = path / 'config.json' config_file = path / "config.json"
if not config_file.is_file(): if not config_file.is_file():
return False return False
with open(config_file, encoding = 'utf-8') as f: with open(config_file, encoding="utf-8") as f:
config = json.load(f) config = json.load(f)
for typ in self.special_token_types: for typ in self.special_token_types:
self._set_special_token(typ, config.get(f'{typ}_token_id')) self._set_special_token(typ, config.get(f"{typ}_token_id"))
return True return True
@ -202,47 +243,52 @@ class BpeVocab(Vocab):
def __init__(self, base_path: Path): def __init__(self, base_path: Path):
added_tokens: dict[str, int] = {} added_tokens: dict[str, int] = {}
if (fname_tokenizer := base_path / 'vocab.json').exists(): if (fname_tokenizer := base_path / "vocab.json").exists():
# "slow" tokenizer # "slow" tokenizer
with open(fname_tokenizer, encoding="utf-8") as f: with open(fname_tokenizer, encoding="utf-8") as f:
self.vocab = json.load(f) self.vocab = json.load(f)
try: try:
# FIXME: Verify that added tokens here _cannot_ overlap with the main vocab. # FIXME: Verify that added tokens here _cannot_ overlap with the main vocab.
with open(base_path / 'added_tokens.json', encoding="utf-8") as f: with open(base_path / "added_tokens.json", encoding="utf-8") as f:
added_tokens = json.load(f) added_tokens = json.load(f)
except FileNotFoundError: except FileNotFoundError:
pass pass
else: else:
# "fast" tokenizer # "fast" tokenizer
fname_tokenizer = base_path / 'tokenizer.json' fname_tokenizer = base_path / "tokenizer.json"
# if this fails, FileNotFoundError propagates to caller # if this fails, FileNotFoundError propagates to caller
with open(fname_tokenizer, encoding="utf-8") as f: with open(fname_tokenizer, encoding="utf-8") as f:
tokenizer_json = json.load(f) tokenizer_json = json.load(f)
tokenizer_model: dict[str, Any] = tokenizer_json['model'] tokenizer_model: dict[str, Any] = tokenizer_json["model"]
if ( if (
tokenizer_model['type'] != 'BPE' or tokenizer_model.get('byte_fallback', False) tokenizer_model["type"] != "BPE"
or tokenizer_json['decoder']['type'] != 'ByteLevel' or tokenizer_model.get("byte_fallback", False)
or tokenizer_json["decoder"]["type"] != "ByteLevel"
): ):
raise FileNotFoundError('Cannot find GPT-2 BPE tokenizer') raise FileNotFoundError("Cannot find GPT-2 BPE tokenizer")
self.vocab = tokenizer_model["vocab"] self.vocab = tokenizer_model["vocab"]
if (added := tokenizer_json.get('added_tokens')) is not None: if (added := tokenizer_json.get("added_tokens")) is not None:
# Added tokens here can be duplicates of the main vocabulary. # Added tokens here can be duplicates of the main vocabulary.
added_tokens = {item['content']: item['id'] added_tokens = {
item["content"]: item["id"]
for item in added for item in added
if item['content'] not in self.vocab} if item["content"] not in self.vocab
}
vocab_size = len(self.vocab) vocab_size = len(self.vocab)
expected_ids = list(range(vocab_size, vocab_size + len(added_tokens))) expected_ids = list(range(vocab_size, vocab_size + len(added_tokens)))
actual_ids = sorted(added_tokens.values()) actual_ids = sorted(added_tokens.values())
if expected_ids != actual_ids: if expected_ids != actual_ids:
expected_end_id = vocab_size + len(actual_ids) - 1 expected_end_id = vocab_size + len(actual_ids) - 1
raise ValueError(f"Expected the {len(actual_ids)} added token ID(s) to be sequential in the range " raise ValueError(
f"{vocab_size} - {expected_end_id}; got {actual_ids}") f"Expected the {len(actual_ids)} added token ID(s) to be sequential in the range "
f"{vocab_size} - {expected_end_id}; got {actual_ids}"
)
items = sorted(added_tokens.items(), key=lambda text_idx: text_idx[1]) items = sorted(added_tokens.items(), key=lambda text_idx: text_idx[1])
self.added_tokens_dict = added_tokens self.added_tokens_dict = added_tokens
@ -276,27 +322,31 @@ class SentencePieceVocab(Vocab):
def __init__(self, base_path: Path): def __init__(self, base_path: Path):
added_tokens: dict[str, int] = {} added_tokens: dict[str, int] = {}
if (fname_tokenizer := base_path / 'tokenizer.model').exists(): if (fname_tokenizer := base_path / "tokenizer.model").exists():
# normal location # normal location
try: try:
with open(base_path / 'added_tokens.json', encoding="utf-8") as f: with open(base_path / "added_tokens.json", encoding="utf-8") as f:
added_tokens = json.load(f) added_tokens = json.load(f)
except FileNotFoundError: except FileNotFoundError:
pass pass
elif not (fname_tokenizer := base_path.parent / 'tokenizer.model').exists(): elif not (fname_tokenizer := base_path.parent / "tokenizer.model").exists():
# not found in alternate location either # not found in alternate location either
raise FileNotFoundError('Cannot find tokenizer.model') raise FileNotFoundError("Cannot find tokenizer.model")
self.sentencepiece_tokenizer = SentencePieceProcessor() self.sentencepiece_tokenizer = SentencePieceProcessor()
self.sentencepiece_tokenizer.LoadFromFile(str(fname_tokenizer)) self.sentencepiece_tokenizer.LoadFromFile(str(fname_tokenizer))
vocab_size = self.sentencepiece_tokenizer.vocab_size() vocab_size = self.sentencepiece_tokenizer.vocab_size()
new_tokens = {id: piece for piece, id in added_tokens.items() if id >= vocab_size} new_tokens = {
id: piece for piece, id in added_tokens.items() if id >= vocab_size
}
expected_new_ids = list(range(vocab_size, vocab_size + len(new_tokens))) expected_new_ids = list(range(vocab_size, vocab_size + len(new_tokens)))
actual_new_ids = sorted(new_tokens.keys()) actual_new_ids = sorted(new_tokens.keys())
if expected_new_ids != actual_new_ids: if expected_new_ids != actual_new_ids:
raise ValueError(f"Expected new token IDs {expected_new_ids} to be sequential; got {actual_new_ids}") raise ValueError(
f"Expected new token IDs {expected_new_ids} to be sequential; got {actual_new_ids}"
)
# Token pieces that were added to the base vocabulary. # Token pieces that were added to the base vocabulary.
self.added_tokens_dict = added_tokens self.added_tokens_dict = added_tokens
@ -347,25 +397,27 @@ class LlamaHfVocab(Vocab):
name = "hfft" name = "hfft"
def __init__(self, base_path: Path): def __init__(self, base_path: Path):
fname_tokenizer = base_path / 'tokenizer.json' fname_tokenizer = base_path / "tokenizer.json"
# if this fails, FileNotFoundError propagates to caller # if this fails, FileNotFoundError propagates to caller
with open(fname_tokenizer, encoding='utf-8') as f: with open(fname_tokenizer, encoding="utf-8") as f:
tokenizer_json = json.load(f) tokenizer_json = json.load(f)
# pre-check so we know if we need transformers # pre-check so we know if we need transformers
tokenizer_model: dict[str, Any] = tokenizer_json['model'] tokenizer_model: dict[str, Any] = tokenizer_json["model"]
is_llama3 = ( is_llama3 = (
tokenizer_model['type'] == 'BPE' and tokenizer_model.get('ignore_merges', False) tokenizer_model["type"] == "BPE"
and not tokenizer_model.get('byte_fallback', True) and tokenizer_model.get("ignore_merges", False)
and not tokenizer_model.get("byte_fallback", True)
) )
if is_llama3: if is_llama3:
raise TypeError('Llama 3 must be converted with BpeVocab') raise TypeError("Llama 3 must be converted with BpeVocab")
if not is_llama3 and ( if not is_llama3 and (
tokenizer_model['type'] != 'BPE' or not tokenizer_model.get('byte_fallback', False) tokenizer_model["type"] != "BPE"
or tokenizer_json['decoder']['type'] != 'Sequence' or not tokenizer_model.get("byte_fallback", False)
or tokenizer_json["decoder"]["type"] != "Sequence"
): ):
raise FileNotFoundError('Cannot find Llama BPE tokenizer') raise FileNotFoundError("Cannot find Llama BPE tokenizer")
try: try:
from transformers import AutoTokenizer from transformers import AutoTokenizer
@ -427,16 +479,22 @@ def hf_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
# Yield token text, score, and type # Yield token text, score, and type
yield token_text, self.get_token_score(token_id), self.get_token_type( yield token_text, self.get_token_score(token_id), self.get_token_type(
token_id, token_text, self.special_ids # Reuse already stored special IDs token_id,
token_text,
self.special_ids, # Reuse already stored special IDs
) )
def get_token_type(self, token_id: int, token_text: bytes, special_ids: set[int]) -> gguf.TokenType: def get_token_type(
self, token_id: int, token_text: bytes, special_ids: set[int]
) -> gguf.TokenType:
# Special case for byte tokens # Special case for byte tokens
if re.fullmatch(br"<0x[0-9A-Fa-f]{2}>", token_text): if re.fullmatch(rb"<0x[0-9A-Fa-f]{2}>", token_text):
return gguf.TokenType.BYTE return gguf.TokenType.BYTE
# Determine token type based on whether it's a special token # Determine token type based on whether it's a special token
return gguf.TokenType.CONTROL if token_id in special_ids else gguf.TokenType.NORMAL return (
gguf.TokenType.CONTROL if token_id in special_ids else gguf.TokenType.NORMAL
)
def get_token_score(self, token_id: int) -> float: def get_token_score(self, token_id: int) -> float:
# Placeholder for actual logic to determine the token's score # Placeholder for actual logic to determine the token's score
@ -446,7 +504,9 @@ def get_token_score(self, token_id: int) -> float:
def added_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]: def added_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
for text in self.added_tokens_list: for text in self.added_tokens_list:
if text in self.specials: if text in self.specials:
toktype = self.get_token_type(self.specials[text], b'', self.special_ids) toktype = self.get_token_type(
self.specials[text], b"", self.special_ids
)
score = self.get_token_score(self.specials[text]) score = self.get_token_score(self.specials[text])
else: else:
toktype = gguf.TokenType.USER_DEFINED toktype = gguf.TokenType.USER_DEFINED

View File

@ -9,25 +9,52 @@
import requests import requests
import zipfile import zipfile
from datetime import datetime from datetime import datetime
from PyQt6.QtWidgets import (QApplication, QMainWindow, QVBoxLayout, QHBoxLayout, QWidget, QPushButton, from PyQt6.QtWidgets import (
QListWidget, QLineEdit, QLabel, QFileDialog, QProgressBar, QComboBox, QTextEdit, QApplication,
QCheckBox, QGroupBox, QFormLayout, QScrollArea, QSlider, QSpinBox, QListWidgetItem, QMainWindow,
QMessageBox, QDialog, QPlainTextEdit, QMenu) QVBoxLayout,
QHBoxLayout,
QWidget,
QPushButton,
QListWidget,
QLineEdit,
QLabel,
QFileDialog,
QProgressBar,
QComboBox,
QTextEdit,
QCheckBox,
QGroupBox,
QFormLayout,
QScrollArea,
QSlider,
QSpinBox,
QListWidgetItem,
QMessageBox,
QDialog,
QPlainTextEdit,
QMenu,
)
from PyQt6.QtCore import QTimer, QThread, pyqtSignal, Qt, QSize from PyQt6.QtCore import QTimer, QThread, pyqtSignal, Qt, QSize
from PyQt6.QtGui import QCloseEvent, QAction from PyQt6.QtGui import QCloseEvent, QAction
def ensure_directory(path): def ensure_directory(path):
if not os.path.exists(path): if not os.path.exists(path):
os.makedirs(path) os.makedirs(path)
def open_file_safe(file_path, mode='r'):
encodings = ['utf-8', 'latin-1', 'ascii', 'utf-16'] def open_file_safe(file_path, mode="r"):
encodings = ["utf-8", "latin-1", "ascii", "utf-16"]
for encoding in encodings: for encoding in encodings:
try: try:
return open(file_path, mode, encoding=encoding) return open(file_path, mode, encoding=encoding)
except UnicodeDecodeError: except UnicodeDecodeError:
continue continue
raise ValueError(f"Unable to open file {file_path} with any of the encodings: {encodings}") raise ValueError(
f"Unable to open file {file_path} with any of the encodings: {encodings}"
)
def resource_path(relative_path): def resource_path(relative_path):
try: try:

File diff suppressed because it is too large Load Diff