style: format code with Black

This commit is contained in:
BuildTools 2024-08-04 19:50:34 -07:00
parent 2dc5bd9e8a
commit fa51f7cdb8
21 changed files with 8215 additions and 6922 deletions

View File

@ -452,8 +452,13 @@ def __init__(self):
# Output Type Dropdown
self.lora_output_type_combo = QComboBox()
self.lora_output_type_combo.addItems(["GGML", "GGUF"])
self.lora_output_type_combo.currentIndexChanged.connect(self.update_base_model_visibility)
lora_layout.addRow(self.create_label(OUTPUT_TYPE, SELECT_OUTPUT_TYPE), self.lora_output_type_combo)
self.lora_output_type_combo.currentIndexChanged.connect(
self.update_base_model_visibility
)
lora_layout.addRow(
self.create_label(OUTPUT_TYPE, SELECT_OUTPUT_TYPE),
self.lora_output_type_combo,
)
# Base Model Path (initially hidden)
self.base_model_label = self.create_label(BASE_MODEL, SELECT_BASE_MODEL_FILE)
@ -471,7 +476,9 @@ def __init__(self):
wrapper_layout = QHBoxLayout(self.base_model_wrapper)
wrapper_layout.addWidget(self.base_model_label)
wrapper_layout.addWidget(self.base_model_widget, 1) # Give it a stretch factor
wrapper_layout.setContentsMargins(0, 0, 0, 0) # Remove margins for better alignment
wrapper_layout.setContentsMargins(
0, 0, 0, 0
) # Remove margins for better alignment
# Add the wrapper to the layout
lora_layout.addRow(self.base_model_wrapper)
@ -1395,7 +1402,7 @@ def quantize_model(self):
override_string = entry.get_override_string(
model_name=model_name,
quant_type=quant_type,
output_path=output_path
output_path=output_path,
)
if override_string:
command.extend(["--override-kv", override_string])
@ -1430,7 +1437,9 @@ def quantize_model(self):
self.task_list.setItemWidget(list_item, task_item)
# Connect the output signal to the new progress parsing function
thread.output_signal.connect(lambda line: self.parse_progress(line, task_item))
thread.output_signal.connect(
lambda line: self.parse_progress(line, task_item)
)
thread.status_signal.connect(task_item.update_status)
thread.finished_signal.connect(lambda: self.task_finished(thread))
thread.error_signal.connect(lambda err: self.handle_error(err, task_item))

View File

@ -13,6 +13,7 @@
import zipfile
from datetime import datetime
class DownloadThread(QThread):
progress_signal = pyqtSignal(int)
finished_signal = pyqtSignal(str)
@ -27,11 +28,11 @@ def run(self):
try:
response = requests.get(self.url, stream=True)
response.raise_for_status()
total_size = int(response.headers.get('content-length', 0))
total_size = int(response.headers.get("content-length", 0))
block_size = 8192
downloaded = 0
with open(self.save_path, 'wb') as file:
with open(self.save_path, "wb") as file:
for data in response.iter_content(block_size):
size = file.write(data)
downloaded += size
@ -41,7 +42,7 @@ def run(self):
# Extract the downloaded zip file
extract_dir = os.path.splitext(self.save_path)[0]
with zipfile.ZipFile(self.save_path, 'r') as zip_ref:
with zipfile.ZipFile(self.save_path, "r") as zip_ref:
zip_ref.extractall(extract_dir)
# Remove the zip file after extraction

View File

@ -7,6 +7,7 @@
import socket
import platform
class KVOverrideEntry(QWidget):
deleted = pyqtSignal(QWidget)
@ -44,7 +45,9 @@ def __init__(self, parent=None):
def delete_clicked(self):
self.deleted.emit(self)
def get_override_string(self, model_name=None, quant_type=None, output_path=None): # Add arguments
def get_override_string(
self, model_name=None, quant_type=None, output_path=None
): # Add arguments
key = self.key_input.text()
type_ = self.type_combo.currentText()
value = self.value_input.text()
@ -60,9 +63,15 @@ def get_override_string(self, model_name=None, quant_type=None, output_path=None
"{system.python.version}": lambda: platform.python_version(),
"{system.time.milliseconds}": lambda: str(int(time.time() * 1000)),
"{system.date}": lambda: datetime.now().strftime("%Y-%m-%d"),
"{model.name}": lambda: model_name if model_name is not None else "Unknown Model",
"{quant.type}": lambda: quant_type if quant_type is not None else "Unknown Quant",
"{output.path}": lambda: output_path if output_path is not None else "Unknown Output Path",
"{model.name}": lambda: (
model_name if model_name is not None else "Unknown Model"
),
"{quant.type}": lambda: (
quant_type if quant_type is not None else "Unknown Quant"
),
"{output.path}": lambda: (
output_path if output_path is not None else "Unknown Output Path"
),
}
for param, func in dynamic_params.items():

View File

@ -4,6 +4,7 @@
import sys
from datetime import datetime
class Logger:
def __init__(self, name, log_dir):
self.logger = logging.getLogger(name)
@ -15,15 +16,19 @@ def __init__(self, name, log_dir):
# Console handler
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO)
console_format = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
console_format = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
console_handler.setFormatter(console_format)
# File handler
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
log_file = os.path.join(log_dir, f"latest_{timestamp}.log")
file_handler = RotatingFileHandler(log_file, maxBytes=10*1024*1024, backupCount=5, encoding='utf-8')
file_handler = RotatingFileHandler(
log_file, maxBytes=10 * 1024 * 1024, backupCount=5, encoding="utf-8"
)
file_handler.setLevel(logging.DEBUG)
file_format = logging.Formatter('%(asctime)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s')
file_format = logging.Formatter(
"%(asctime)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s"
)
file_handler.setFormatter(file_format)
# Add handlers to logger

View File

@ -13,6 +13,7 @@
import zipfile
from datetime import datetime
class ModelInfoDialog(QDialog):
def __init__(self, model_info, parent=None):
super().__init__(parent)
@ -41,8 +42,7 @@ def format_model_info(self, model_info):
html += f"<p><b>Tensors:</b> {model_info.get('tensors', 'N/A')}</p>"
html += "<h3>Key-Value Pairs:</h3>"
for key, value in model_info.get('kv_data', {}).items():
for key, value in model_info.get("kv_data", {}).items():
html += f"<p><b>{key}:</b> {value}</p>"
return html

View File

@ -15,6 +15,7 @@
from datetime import datetime
from imports_and_globals import open_file_safe
class QuantizationThread(QThread):
# Define custom signals for communication with the main thread
output_signal = pyqtSignal(str)

View File

@ -13,6 +13,7 @@
import zipfile
from datetime import datetime
class TaskListItem(QWidget):
def __init__(self, task_name, log_file, show_progress_bar=True, parent=None):
super().__init__(parent)

View File

@ -12,8 +12,8 @@
import numpy as np
import torch
if 'NO_LOCAL_GGUF' not in os.environ:
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
if "NO_LOCAL_GGUF" not in os.environ:
sys.path.insert(1, str(Path(__file__).parent / "gguf-py" / "gguf"))
import gguf
logging.basicConfig(level=logging.DEBUG)
@ -35,7 +35,9 @@ def write_file_header(fout: BinaryIO, params: dict[str, Any]) -> None:
fout.write(struct.pack("i", int(params["lora_alpha"])))
def write_tensor_header(fout: BinaryIO, name: str, shape: Sequence[int], data_type: np.dtype[Any]) -> None:
def write_tensor_header(
fout: BinaryIO, name: str, shape: Sequence[int], data_type: np.dtype[Any]
) -> None:
sname = name.encode("utf-8")
fout.write(
struct.pack(
@ -49,15 +51,21 @@ def write_tensor_header(fout: BinaryIO, name: str, shape: Sequence[int], data_ty
fout.write(sname)
fout.seek((fout.tell() + 31) & -32)
def pyinstaller_include():
# PyInstaller import
pass
if __name__ == '__main__':
if __name__ == "__main__":
if len(sys.argv) < 2:
logger.info(f"Usage: python {sys.argv[0]} <path> [arch]")
logger.info("Path must contain HuggingFace PEFT LoRA files 'adapter_config.json' and 'adapter_model.bin'")
logger.info(f"Arch must be one of {list(gguf.MODEL_ARCH_NAMES.values())} (default: llama)")
logger.info(
"Path must contain HuggingFace PEFT LoRA files 'adapter_config.json' and 'adapter_model.bin'"
)
logger.info(
f"Arch must be one of {list(gguf.MODEL_ARCH_NAMES.values())} (default: llama)"
)
sys.exit(1)
input_json = os.path.join(sys.argv[1], "adapter_config.json")
@ -70,6 +78,7 @@ def pyinstaller_include():
input_model = os.path.join(sys.argv[1], "adapter_model.safetensors")
# lazy import load_file only if lora is in safetensors format.
from safetensors.torch import load_file
model = load_file(input_model, device="cpu")
arch_name = sys.argv[2] if len(sys.argv) == 3 else "llama"
@ -78,14 +87,18 @@ def pyinstaller_include():
logger.error(f"Error: unsupported architecture {arch_name}")
sys.exit(1)
arch = list(gguf.MODEL_ARCH_NAMES.keys())[list(gguf.MODEL_ARCH_NAMES.values()).index(arch_name)]
arch = list(gguf.MODEL_ARCH_NAMES.keys())[
list(gguf.MODEL_ARCH_NAMES.values()).index(arch_name)
]
name_map = gguf.TensorNameMap(arch, 200) # 200 layers ought to be enough for anyone
with open(input_json, "r") as f:
params = json.load(f)
if params["peft_type"] != "LORA":
logger.error(f"Error: unsupported adapter type {params['peft_type']}, expected LORA")
logger.error(
f"Error: unsupported adapter type {params['peft_type']}, expected LORA"
)
sys.exit(1)
if params["fan_in_fan_out"] is True:
@ -136,7 +149,9 @@ def pyinstaller_include():
tname = name_map.get_name(k)
if tname is None:
logger.error(f"Error: could not map tensor name {orig_k}")
logger.error(" Note: the arch parameter must be specified if the model is not llama")
logger.error(
" Note: the arch parameter must be specified if the model is not llama"
)
sys.exit(1)
if suffix == ".lora_A.weight":
@ -146,7 +161,9 @@ def pyinstaller_include():
else:
assert False
logger.info(f"{k} => {tname} {t.shape} {t.dtype} {t.nbytes/1024/1024:.2f}MB")
logger.info(
f"{k} => {tname} {t.shape} {t.dtype} {t.nbytes/1024/1024:.2f}MB"
)
write_tensor_header(fout, tname, t.shape, t.dtype)
t.tofile(fout)

View File

@ -54,7 +54,9 @@ class General:
SOURCE_URL = "general.source.url" # Model Website/Paper
SOURCE_DOI = "general.source.doi"
SOURCE_UUID = "general.source.uuid"
SOURCE_REPO_URL = "general.source.repo_url" # Model Source Repository (git/svn/etc...)
SOURCE_REPO_URL = (
"general.source.repo_url" # Model Source Repository (git/svn/etc...)
)
# Base Model Source. There can be more than one source if it's a merged
# model like with 'Mistral-7B-Merge-14-v0.1'. This will assist in
@ -136,7 +138,9 @@ class Tokenizer:
PRE = "tokenizer.ggml.pre"
LIST = "tokenizer.ggml.tokens"
TOKEN_TYPE = "tokenizer.ggml.token_type"
TOKEN_TYPE_COUNT = "tokenizer.ggml.token_type_count" # for BERT-style token types
TOKEN_TYPE_COUNT = (
"tokenizer.ggml.token_type_count" # for BERT-style token types
)
SCORES = "tokenizer.ggml.scores"
MERGES = "tokenizer.ggml.merges"
BOS_ID = "tokenizer.ggml.bos_token_id"
@ -166,6 +170,7 @@ class Adapter:
TYPE = "adapter.type"
LORA_ALPHA = "adapter.lora.alpha"
#
# recommended mapping of model tensor names for storage in gguf
#
@ -1104,9 +1109,9 @@ class TokenType(IntEnum):
class RopeScalingType(Enum):
NONE = 'none'
LINEAR = 'linear'
YARN = 'yarn'
NONE = "none"
LINEAR = "linear"
YARN = "yarn"
class PoolingType(IntEnum):

View File

@ -67,7 +67,7 @@ class ReaderTensor(NamedTuple):
class GGUFReader:
# I - same as host, S - swapped
byte_order: Literal['I', 'S'] = 'I'
byte_order: Literal["I", "S"] = "I"
alignment: int = GGUF_DEFAULT_ALIGNMENT
data_offset: int
@ -86,13 +86,15 @@ class GGUFReader:
GGUFValueType.BOOL: np.bool_,
}
def __init__(self, path: os.PathLike[str] | str, mode: Literal['r', 'r+', 'c'] = 'r'):
def __init__(
self, path: os.PathLike[str] | str, mode: Literal["r", "r+", "c"] = "r"
):
self.data = np.memmap(path, mode=mode)
offs = 0
# Check for GGUF magic
if self._get(offs, np.uint32, override_order = '<')[0] != GGUF_MAGIC:
raise ValueError('GGUF magic invalid')
if self._get(offs, np.uint32, override_order="<")[0] != GGUF_MAGIC:
raise ValueError("GGUF magic invalid")
offs += 4
# Check GGUF version
@ -100,28 +102,46 @@ def __init__(self, path: os.PathLike[str] | str, mode: Literal['r', 'r+', 'c'] =
if temp_version[0] & 65535 == 0:
# If we get 0 here that means it's (probably) a GGUF file created for
# the opposite byte order of the machine this script is running on.
self.byte_order = 'S'
self.byte_order = "S"
temp_version = temp_version.newbyteorder(self.byte_order)
version = temp_version[0]
if version not in READER_SUPPORTED_VERSIONS:
raise ValueError(f'Sorry, file appears to be version {version} which we cannot handle')
raise ValueError(
f"Sorry, file appears to be version {version} which we cannot handle"
)
self.fields: OrderedDict[str, ReaderField] = OrderedDict()
self.tensors: list[ReaderTensor] = []
offs += self._push_field(ReaderField(offs, 'GGUF.version', [temp_version], [0], [GGUFValueType.UINT32]))
offs += self._push_field(
ReaderField(
offs, "GGUF.version", [temp_version], [0], [GGUFValueType.UINT32]
)
)
# Check tensor count and kv count
temp_counts = self._get(offs, np.uint64, 2)
offs += self._push_field(ReaderField(offs, 'GGUF.tensor_count', [temp_counts[:1]], [0], [GGUFValueType.UINT64]))
offs += self._push_field(ReaderField(offs, 'GGUF.kv_count', [temp_counts[1:]], [0], [GGUFValueType.UINT64]))
offs += self._push_field(
ReaderField(
offs,
"GGUF.tensor_count",
[temp_counts[:1]],
[0],
[GGUFValueType.UINT64],
)
)
offs += self._push_field(
ReaderField(
offs, "GGUF.kv_count", [temp_counts[1:]], [0], [GGUFValueType.UINT64]
)
)
tensor_count, kv_count = temp_counts
offs = self._build_fields(offs, kv_count)
# Build Tensor Info Fields
offs, tensors_fields = self._build_tensor_info(offs, tensor_count)
new_align = self.fields.get('general.alignment')
new_align = self.fields.get("general.alignment")
if new_align is not None:
if new_align.types != [GGUFValueType.UINT32]:
raise ValueError('Bad type for general.alignment field')
raise ValueError("Bad type for general.alignment field")
self.alignment = new_align.parts[-1][0]
padding = offs % self.alignment
if padding != 0:
@ -129,7 +149,7 @@ def __init__(self, path: os.PathLike[str] | str, mode: Literal['r', 'r+', 'c'] =
self.data_offset = offs
self._build_tensors(offs, tensors_fields)
_DT = TypeVar('_DT', bound = npt.DTypeLike)
_DT = TypeVar("_DT", bound=npt.DTypeLike)
# Fetch a key/value metadata field by key.
def get_field(self, key: str) -> Union[ReaderField, None]:
@ -140,7 +160,11 @@ def get_tensor(self, idx: int) -> ReaderTensor:
return self.tensors[idx]
def _get(
self, offset: int, dtype: npt.DTypeLike, count: int = 1, override_order: None | Literal['I', 'S', '<'] = None,
self,
offset: int,
dtype: npt.DTypeLike,
count: int = 1,
override_order: None | Literal["I", "S", "<"] = None,
) -> npt.NDArray[Any]:
count = int(count)
itemsize = int(np.empty([], dtype=dtype).itemsize)
@ -156,18 +180,22 @@ def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int:
# TODO: add option to generate error on duplicate keys
# raise KeyError(f'Duplicate {field.name} already in list at offset {field.offset}')
logger.warning(f'Duplicate key {field.name} at offset {field.offset}')
self.fields[field.name + '_{}'.format(field.offset)] = field
logger.warning(f"Duplicate key {field.name} at offset {field.offset}")
self.fields[field.name + "_{}".format(field.offset)] = field
else:
self.fields[field.name] = field
return 0 if skip_sum else sum(int(part.nbytes) for part in field.parts)
def _get_str(self, offset: int) -> tuple[npt.NDArray[np.uint64], npt.NDArray[np.uint8]]:
def _get_str(
self, offset: int
) -> tuple[npt.NDArray[np.uint64], npt.NDArray[np.uint8]]:
slen = self._get(offset, np.uint64)
return slen, self._get(offset + 8, np.uint8, slen[0])
def _get_field_parts(
self, orig_offs: int, raw_type: int,
self,
orig_offs: int,
raw_type: int,
) -> tuple[int, list[npt.NDArray[Any]], list[int], list[GGUFValueType]]:
offs = orig_offs
types: list[GGUFValueType] = []
@ -192,7 +220,9 @@ def _get_field_parts(
aparts: list[npt.NDArray[Any]] = [raw_itype, alen]
data_idxs: list[int] = []
for idx in range(alen[0]):
curr_size, curr_parts, curr_idxs, curr_types = self._get_field_parts(offs, raw_itype[0])
curr_size, curr_parts, curr_idxs, curr_types = self._get_field_parts(
offs, raw_itype[0]
)
if idx == 0:
types += curr_types
idxs_offs = len(aparts)
@ -201,7 +231,7 @@ def _get_field_parts(
offs += curr_size
return offs - orig_offs, aparts, data_idxs, types
# We can't deal with this one.
raise ValueError('Unknown/unhandled field type {gtype}')
raise ValueError("Unknown/unhandled field type {gtype}")
def _get_tensor_info_field(self, orig_offs: int) -> ReaderField:
offs = orig_offs
@ -228,7 +258,7 @@ def _get_tensor_info_field(self, orig_offs: int) -> ReaderField:
return ReaderField(
orig_offs,
str(bytes(name_data), encoding = 'utf-8'),
str(bytes(name_data), encoding="utf-8"),
[name_len, name_data, n_dims, dims, raw_dtype, offset_tensor],
[1, 3, 4, 5],
)
@ -242,19 +272,26 @@ def _build_fields(self, offs: int, count: int) -> int:
offs += int(raw_kv_type.nbytes)
parts: list[npt.NDArray[Any]] = [kv_klen, kv_kdata, raw_kv_type]
idxs_offs = len(parts)
field_size, field_parts, field_idxs, field_types = self._get_field_parts(offs, raw_kv_type[0])
field_size, field_parts, field_idxs, field_types = self._get_field_parts(
offs, raw_kv_type[0]
)
parts += field_parts
self._push_field(ReaderField(
self._push_field(
ReaderField(
orig_offs,
str(bytes(kv_kdata), encoding = 'utf-8'),
str(bytes(kv_kdata), encoding="utf-8"),
parts,
[idx + idxs_offs for idx in field_idxs],
field_types,
), skip_sum = True)
),
skip_sum=True,
)
offs += field_size
return offs
def _build_tensor_info(self, offs: int, count: int) -> tuple[int, list[ReaderField]]:
def _build_tensor_info(
self, offs: int, count: int
) -> tuple[int, list[ReaderField]]:
tensor_fields = []
for _ in range(count):
field = self._get_tensor_info_field(offs)
@ -268,9 +305,9 @@ def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None:
for field in fields:
_name_len, name_data, _n_dims, dims, raw_dtype, offset_tensor = field.parts
# check if there's any tensor having same name already in the list
tensor_name = str(bytes(name_data), encoding = 'utf-8')
tensor_name = str(bytes(name_data), encoding="utf-8")
if tensor_name in tensor_names:
raise ValueError(f'Found duplicated tensor with name {tensor_name}')
raise ValueError(f"Found duplicated tensor with name {tensor_name}")
tensor_names.add(tensor_name)
ggml_type = GGMLQuantizationType(raw_dtype[0])
n_elems = int(np.prod(dims))
@ -304,7 +341,8 @@ def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None:
item_count = n_bytes
item_type = np.uint8
np_dims = quant_shape_to_byte_shape(np_dims, ggml_type)
tensors.append(ReaderTensor(
tensors.append(
ReaderTensor(
name=tensor_name,
tensor_type=ggml_type,
shape=dims,
@ -313,5 +351,6 @@ def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None:
data_offset=data_offs,
data=self._get(data_offs, item_type, item_count).reshape(np_dims),
field=field,
))
)
)
self.tensors = tensors

View File

@ -81,8 +81,15 @@ class GGUFWriter:
}
def __init__(
self, path: os.PathLike[str] | str | None, arch: str, use_temp_file: bool = False, endianess: GGUFEndian = GGUFEndian.LITTLE,
split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False, small_first_shard: bool = False
self,
path: os.PathLike[str] | str | None,
arch: str,
use_temp_file: bool = False,
endianess: GGUFEndian = GGUFEndian.LITTLE,
split_max_tensors: int = 0,
split_max_size: int = 0,
dry_run: bool = False,
small_first_shard: bool = False,
):
self.fout = None
self.path = Path(path) if path else None
@ -97,9 +104,11 @@ def __init__(
self.split_max_size = split_max_size
self.dry_run = dry_run
self.small_first_shard = small_first_shard
logger.info("gguf: This GGUF file is for {0} Endian only".format(
logger.info(
"gguf: This GGUF file is for {0} Endian only".format(
"Big" if self.endianess == GGUFEndian.BIG else "Little",
))
)
)
self.state = WriterState.NO_FILE
if self.small_first_shard:
@ -128,7 +137,9 @@ def get_total_parameter_count(self) -> tuple[int, int, int, int]:
elif name.endswith(".lora_b"):
if last_lora_a is None or last_lora_a[0] != name[:-1] + "a":
# Bail when the LoRA pair can't be found trivially
logger.warning("can't measure LoRA size correctly, tensor order is unusual")
logger.warning(
"can't measure LoRA size correctly, tensor order is unusual"
)
return 0, 0, 0, 0
else:
shape = (*shape[:-1], last_lora_a[1].shape[-1])
@ -136,7 +147,7 @@ def get_total_parameter_count(self) -> tuple[int, int, int, int]:
size = prod(shape)
if "_exps." in name:
expert_params += (size // shape[-3])
expert_params += size // shape[-3]
expert_sum += shape[-3]
n_expert_tensors += 1
else:
@ -157,15 +168,26 @@ def get_total_parameter_count(self) -> tuple[int, int, int, int]:
def format_shard_names(self, path: Path) -> list[Path]:
if len(self.tensors) == 1:
return [path]
return [path.with_name(SHARD_NAME_FORMAT.format(path.stem, i + 1, len(self.tensors))) for i in range(len(self.tensors))]
return [
path.with_name(
SHARD_NAME_FORMAT.format(path.stem, i + 1, len(self.tensors))
)
for i in range(len(self.tensors))
]
def open_output_file(self, path: Path | None = None) -> None:
if self.state is WriterState.EMPTY and self.fout is not None and (path is None or path == self.path):
if (
self.state is WriterState.EMPTY
and self.fout is not None
and (path is None or path == self.path)
):
# allow calling this multiple times as long as the path is the same
return
if self.state is not WriterState.NO_FILE:
raise ValueError(f'Expected output file to be not yet opened, got {self.state}')
raise ValueError(
f"Expected output file to be not yet opened, got {self.state}"
)
if path is not None:
self.path = path
@ -181,7 +203,9 @@ def print_plan(self) -> list[Path]:
filenames = self.format_shard_names(self.path)
assert len(filenames) == len(self.tensors)
for name, tensors in zip(filenames, self.tensors):
logger.info(f"{name}: n_tensors = {len(tensors)}, total_size = {GGUFWriter.format_n_bytes_to_str(sum(ti.nbytes for ti in tensors.values()))}")
logger.info(
f"{name}: n_tensors = {len(tensors)}, total_size = {GGUFWriter.format_n_bytes_to_str(sum(ti.nbytes for ti in tensors.values()))}"
)
if self.dry_run:
logger.info("Dry run, not writing files")
@ -201,17 +225,23 @@ def add_shard_kv_data(self) -> None:
self.kv_data.extend({} for _ in range(len(self.kv_data), total_splits))
for i, kv_data in enumerate(self.kv_data):
kv_data[Keys.Split.LLM_KV_SPLIT_NO] = GGUFValue(i, GGUFValueType.UINT16)
kv_data[Keys.Split.LLM_KV_SPLIT_COUNT] = GGUFValue(total_splits, GGUFValueType.UINT16)
kv_data[Keys.Split.LLM_KV_SPLIT_TENSORS_COUNT] = GGUFValue(total_tensors, GGUFValueType.INT32)
kv_data[Keys.Split.LLM_KV_SPLIT_COUNT] = GGUFValue(
total_splits, GGUFValueType.UINT16
)
kv_data[Keys.Split.LLM_KV_SPLIT_TENSORS_COUNT] = GGUFValue(
total_tensors, GGUFValueType.INT32
)
def write_header_to_file(self, path: Path | None = None) -> None:
if len(self.tensors) == 1 and (self.split_max_tensors != 0 or self.split_max_size != 0):
if len(self.tensors) == 1 and (
self.split_max_tensors != 0 or self.split_max_size != 0
):
logger.warning("Model fails split requirements, not splitting")
self.open_output_file(path)
if self.state is not WriterState.EMPTY:
raise ValueError(f'Expected output file to be empty, got {self.state}')
raise ValueError(f"Expected output file to be empty, got {self.state}")
assert self.fout is not None
assert len(self.fout) == len(self.tensors)
@ -229,7 +259,9 @@ def write_header_to_file(self, path: Path | None = None) -> None:
def write_kv_data_to_file(self) -> None:
if self.state is not WriterState.HEADER:
raise ValueError(f'Expected output file to contain the header, got {self.state}')
raise ValueError(
f"Expected output file to contain the header, got {self.state}"
)
assert self.fout is not None
for fout, kv_data in zip(self.fout, self.kv_data):
@ -246,7 +278,9 @@ def write_kv_data_to_file(self) -> None:
def write_ti_data_to_file(self) -> None:
if self.state is not WriterState.KV_DATA:
raise ValueError(f'Expected output file to contain KV data, got {self.state}')
raise ValueError(
f"Expected output file to contain KV data, got {self.state}"
)
assert self.fout is not None
for fout, tensors in zip(self.fout, self.tensors):
@ -269,7 +303,7 @@ def write_ti_data_to_file(self) -> None:
def add_key_value(self, key: str, val: Any, vtype: GGUFValueType) -> None:
if any(key in kv_data for kv_data in self.kv_data):
raise ValueError(f'Duplicated key name {key!r}')
raise ValueError(f"Duplicated key name {key!r}")
self.kv_data[0][key] = GGUFValue(value=val, type=vtype)
@ -321,14 +355,20 @@ def ggml_pad(x: int, n: int) -> int:
return ((x + n - 1) // n) * n
def add_tensor_info(
self, name: str, tensor_shape: Sequence[int], tensor_dtype: np.dtype,
tensor_nbytes: int, raw_dtype: GGMLQuantizationType | None = None,
self,
name: str,
tensor_shape: Sequence[int],
tensor_dtype: np.dtype,
tensor_nbytes: int,
raw_dtype: GGMLQuantizationType | None = None,
) -> None:
if self.state is not WriterState.NO_FILE:
raise ValueError(f'Expected output file to be not yet opened, got {self.state}')
raise ValueError(
f"Expected output file to be not yet opened, got {self.state}"
)
if any(name in tensors for tensors in self.tensors):
raise ValueError(f'Duplicated tensor name {name!r}')
raise ValueError(f"Duplicated tensor name {name!r}")
if raw_dtype is None:
if tensor_dtype == np.float16:
@ -346,7 +386,9 @@ def add_tensor_info(
elif tensor_dtype == np.int64:
dtype = GGMLQuantizationType.I64
else:
raise ValueError("Only F16, F32, F64, I8, I16, I32, I64 tensors are supported for now")
raise ValueError(
"Only F16, F32, F64, I8, I16, I32, I64 tensors are supported for now"
)
else:
dtype = raw_dtype
if tensor_dtype == np.uint8:
@ -359,14 +401,20 @@ def add_tensor_info(
and len(self.tensors[-1]) >= self.split_max_tensors
) or ( # split when over size limit
self.split_max_size != 0
and sum(ti.nbytes for ti in self.tensors[-1].values()) + tensor_nbytes > self.split_max_size
and sum(ti.nbytes for ti in self.tensors[-1].values()) + tensor_nbytes
> self.split_max_size
):
self.tensors.append({})
self.tensors[-1][name] = TensorInfo(shape=tensor_shape, dtype=dtype, nbytes=tensor_nbytes)
self.tensors[-1][name] = TensorInfo(
shape=tensor_shape, dtype=dtype, nbytes=tensor_nbytes
)
def add_tensor(
self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequence[int] | None = None,
self,
name: str,
tensor: np.ndarray[Any, Any],
raw_shape: Sequence[int] | None = None,
raw_dtype: GGMLQuantizationType | None = None,
) -> None:
if self.endianess == GGUFEndian.BIG:
@ -377,7 +425,9 @@ def add_tensor(
self.temp_file = fp
shape: Sequence[int] = raw_shape if raw_shape is not None else tensor.shape
self.add_tensor_info(name, shape, tensor.dtype, tensor.nbytes, raw_dtype=raw_dtype)
self.add_tensor_info(
name, shape, tensor.dtype, tensor.nbytes, raw_dtype=raw_dtype
)
if self.temp_file is None:
self.tensors[-1][name].tensor = tensor
@ -387,13 +437,21 @@ def add_tensor(
self.write_padding(self.temp_file, tensor.nbytes)
def write_padding(self, fp: IO[bytes], n: int, align: int | None = None) -> None:
pad = GGUFWriter.ggml_pad(n, align if align is not None else self.data_alignment) - n
pad = (
GGUFWriter.ggml_pad(n, align if align is not None else self.data_alignment)
- n
)
if pad != 0:
fp.write(bytes([0] * pad))
def write_tensor_data(self, tensor: np.ndarray[Any, Any]) -> None:
if self.state is not WriterState.TI_DATA and self.state is not WriterState.WEIGHTS:
raise ValueError(f'Expected output file to contain tensor info or weights, got {self.state}')
if (
self.state is not WriterState.TI_DATA
and self.state is not WriterState.WEIGHTS
):
raise ValueError(
f"Expected output file to contain tensor info or weights, got {self.state}"
)
assert self.fout is not None
if self.endianess == GGUFEndian.BIG:
@ -409,7 +467,9 @@ def write_tensor_data(self, tensor: np.ndarray[Any, Any]) -> None:
# pop the first tensor info
# TODO: cleaner way to get the first key
first_tensor_name = [name for name, _ in zip(self.tensors[file_id].keys(), range(1))][0]
first_tensor_name = [
name for name, _ in zip(self.tensors[file_id].keys(), range(1))
][0]
ti = self.tensors[file_id].pop(first_tensor_name)
assert ti.nbytes == tensor.nbytes
@ -437,8 +497,15 @@ def write_tensors_to_file(self, *, progress: bool = False) -> None:
total_bytes = sum(ti.nbytes for t in self.tensors for ti in t.values())
if len(self.fout) > 1:
shard_bar = tqdm(desc=f"Shard (0/{len(self.fout)})", total=None, unit="byte", unit_scale=True)
bar = tqdm(desc="Writing", total=total_bytes, unit="byte", unit_scale=True)
shard_bar = tqdm(
desc=f"Shard (0/{len(self.fout)})",
total=None,
unit="byte",
unit_scale=True,
)
bar = tqdm(
desc="Writing", total=total_bytes, unit="byte", unit_scale=True
)
for i, (fout, tensors) in enumerate(zip(self.fout, self.tensors)):
if shard_bar is not None:
@ -448,7 +515,9 @@ def write_tensors_to_file(self, *, progress: bool = False) -> None:
# relying on the fact that Python dicts preserve insertion order (since 3.7)
for ti in tensors.values():
assert ti.tensor is not None # can only iterate once over the tensors
assert (
ti.tensor is not None
) # can only iterate once over the tensors
assert ti.tensor.nbytes == ti.nbytes
ti.tensor.tofile(fout)
if shard_bar is not None:
@ -460,7 +529,9 @@ def write_tensors_to_file(self, *, progress: bool = False) -> None:
else:
self.temp_file.seek(0)
shutil.copyfileobj(self.temp_file, self.fout[0 if not self.small_first_shard else 1])
shutil.copyfileobj(
self.temp_file, self.fout[0 if not self.small_first_shard else 1]
)
self.flush()
self.temp_file.close()
@ -566,7 +637,9 @@ def add_base_model_version(self, source_id: int, version: str) -> None:
self.add_string(Keys.General.BASE_MODEL_VERSION.format(id=source_id), version)
def add_base_model_organization(self, source_id: int, organization: str) -> None:
self.add_string(Keys.General.BASE_MODEL_ORGANIZATION.format(id=source_id), organization)
self.add_string(
Keys.General.BASE_MODEL_ORGANIZATION.format(id=source_id), organization
)
def add_base_model_url(self, source_id: int, url: str) -> None:
self.add_string(Keys.General.BASE_MODEL_URL.format(id=source_id), url)
@ -605,7 +678,9 @@ def add_block_count(self, length: int) -> None:
self.add_uint32(Keys.LLM.BLOCK_COUNT.format(arch=self.arch), length)
def add_leading_dense_block_count(self, length: int) -> None:
self.add_uint32(Keys.LLM.LEADING_DENSE_BLOCK_COUNT.format(arch=self.arch), length)
self.add_uint32(
Keys.LLM.LEADING_DENSE_BLOCK_COUNT.format(arch=self.arch), length
)
def add_feed_forward_length(self, length: int | Sequence[int]) -> None:
if isinstance(length, int):
@ -614,10 +689,14 @@ def add_feed_forward_length(self, length: int | Sequence[int]) -> None:
self.add_array(Keys.LLM.FEED_FORWARD_LENGTH.format(arch=self.arch), length)
def add_expert_feed_forward_length(self, length: int) -> None:
self.add_uint32(Keys.LLM.EXPERT_FEED_FORWARD_LENGTH.format(arch=self.arch), length)
self.add_uint32(
Keys.LLM.EXPERT_FEED_FORWARD_LENGTH.format(arch=self.arch), length
)
def add_expert_shared_feed_forward_length(self, length: int) -> None:
self.add_uint32(Keys.LLM.EXPERT_SHARED_FEED_FORWARD_LENGTH.format(arch=self.arch), length)
self.add_uint32(
Keys.LLM.EXPERT_SHARED_FEED_FORWARD_LENGTH.format(arch=self.arch), length
)
def add_parallel_residual(self, use: bool) -> None:
self.add_bool(Keys.LLM.USE_PARALLEL_RESIDUAL.format(arch=self.arch), use)
@ -736,10 +815,14 @@ def add_tokenizer_model(self, model: str) -> None:
def add_tokenizer_pre(self, pre: str) -> None:
self.add_string(Keys.Tokenizer.PRE, pre)
def add_token_list(self, tokens: Sequence[str] | Sequence[bytes] | Sequence[bytearray]) -> None:
def add_token_list(
self, tokens: Sequence[str] | Sequence[bytes] | Sequence[bytearray]
) -> None:
self.add_array(Keys.Tokenizer.LIST, tokens)
def add_token_merges(self, merges: Sequence[str] | Sequence[bytes] | Sequence[bytearray]) -> None:
def add_token_merges(
self, merges: Sequence[str] | Sequence[bytes] | Sequence[bytearray]
) -> None:
self.add_array(Keys.Tokenizer.MERGES, merges)
def add_token_types(self, types: Sequence[TokenType] | Sequence[int]) -> None:
@ -793,18 +876,22 @@ def add_chat_template(self, value: str | Sequence[Mapping[str, str]]) -> None:
template_names = set()
for choice in value:
name = choice.get('name', '')
template = choice.get('template')
name = choice.get("name", "")
template = choice.get("template")
# Allowing non-alphanumerical characters in template name is probably not a good idea, so filter it
name = ''.join((c if c in ascii_letters + digits else '_' for c in name))
name = "".join(
(c if c in ascii_letters + digits else "_" for c in name)
)
if name and template is not None:
if name == 'default':
if name == "default":
template_default = template
else:
template_names.add(name)
self.add_string(Keys.Tokenizer.CHAT_TEMPLATE_N.format(name=name), template)
self.add_string(
Keys.Tokenizer.CHAT_TEMPLATE_N.format(name=name), template
)
if template_names:
self.add_array(Keys.Tokenizer.CHAT_TEMPLATES, list(template_names))
@ -829,10 +916,10 @@ def add_eot_token_id(self, id: int) -> None:
self.add_uint32(Keys.Tokenizer.EOT_ID, id)
def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes:
pack_prefix = ''
pack_prefix = ""
if not skip_pack_prefix:
pack_prefix = '<' if self.endianess == GGUFEndian.LITTLE else '>'
return struct.pack(f'{pack_prefix}{fmt}', value)
pack_prefix = "<" if self.endianess == GGUFEndian.LITTLE else ">"
return struct.pack(f"{pack_prefix}{fmt}", value)
def _pack_val(self, val: Any, vtype: GGUFValueType, add_vtype: bool) -> bytes:
kv_data = bytearray()
@ -842,7 +929,9 @@ def _pack_val(self, val: Any, vtype: GGUFValueType, add_vtype: bool) -> bytes:
pack_fmt = self._simple_value_packing.get(vtype)
if pack_fmt is not None:
kv_data += self._pack(pack_fmt, val, skip_pack_prefix = vtype == GGUFValueType.BOOL)
kv_data += self._pack(
pack_fmt, val, skip_pack_prefix=vtype == GGUFValueType.BOOL
)
elif vtype == GGUFValueType.STRING:
encoded_val = val.encode("utf-8") if isinstance(val, str) else val
kv_data += self._pack("Q", len(encoded_val))
@ -860,7 +949,9 @@ def _pack_val(self, val: Any, vtype: GGUFValueType, add_vtype: bool) -> bytes:
else:
ltype = GGUFValueType.get_type(val[0])
if not all(GGUFValueType.get_type(i) is ltype for i in val[1:]):
raise ValueError("All items in a GGUF array should be of the same type")
raise ValueError(
"All items in a GGUF array should be of the same type"
)
kv_data += self._pack("I", ltype)
kv_data += self._pack("Q", len(val))
for item in val:

View File

@ -13,7 +13,9 @@
class LazyMeta(ABCMeta):
def __new__(cls, name: str, bases: tuple[type, ...], namespace: dict[str, Any], **kwargs):
def __new__(
cls, name: str, bases: tuple[type, ...], namespace: dict[str, Any], **kwargs
):
def __getattr__(self, name: str) -> Any:
meta_attr = getattr(self._meta, name)
if callable(meta_attr):
@ -41,6 +43,7 @@ def wrapped_special_op(self, *args, **kwargs):
getattr(type(self)._tensor_type, op_name),
meta_noop=meta_noop,
)(self, *args, **kwargs)
return wrapped_special_op
# special methods bypass __getattr__, so they need to be added manually
@ -48,11 +51,48 @@ def wrapped_special_op(self, *args, **kwargs):
# NOTE: doing this from a metaclass is very convenient
# TODO: make this even more comprehensive
for binary_op in (
"lt", "le", "eq", "ne", "ge", "gt", "not"
"abs", "add", "and", "floordiv", "invert", "lshift", "mod", "mul", "matmul",
"neg", "or", "pos", "pow", "rshift", "sub", "truediv", "xor",
"iadd", "iand", "ifloordiv", "ilshift", "imod", "imul", "ior", "irshift", "isub", "ixor",
"radd", "rand", "rfloordiv", "rmul", "ror", "rpow", "rsub", "rtruediv", "rxor",
"lt",
"le",
"eq",
"ne",
"ge",
"gt",
"not" "abs",
"add",
"and",
"floordiv",
"invert",
"lshift",
"mod",
"mul",
"matmul",
"neg",
"or",
"pos",
"pow",
"rshift",
"sub",
"truediv",
"xor",
"iadd",
"iand",
"ifloordiv",
"ilshift",
"imod",
"imul",
"ior",
"irshift",
"isub",
"ixor",
"radd",
"rand",
"rfloordiv",
"rmul",
"ror",
"rpow",
"rsub",
"rtruediv",
"rxor",
):
attr_name = f"__{binary_op}__"
# the result of these operators usually has the same shape and dtype as the input,
@ -60,7 +100,9 @@ def wrapped_special_op(self, *args, **kwargs):
namespace[attr_name] = mk_wrap(attr_name, meta_noop=True)
for special_op in (
"getitem", "setitem", "len",
"getitem",
"setitem",
"len",
):
attr_name = f"__{special_op}__"
namespace[attr_name] = mk_wrap(attr_name, meta_noop=False)
@ -77,7 +119,15 @@ class LazyBase(ABC, metaclass=LazyMeta):
_kwargs: dict[str, Any]
_func: Callable[[Any], Any] | None
def __init__(self, *, meta: Any, data: Any | None = None, args: tuple = (), kwargs: dict[str, Any] | None = None, func: Callable[[Any], Any] | None = None):
def __init__(
self,
*,
meta: Any,
data: Any | None = None,
args: tuple = (),
kwargs: dict[str, Any] | None = None,
func: Callable[[Any], Any] | None = None,
):
super().__init__()
self._meta = meta
self._data = data
@ -107,7 +157,17 @@ def _recurse_apply(o: Any, fn: Callable[[Any], Any]) -> Any:
return o
@classmethod
def _wrap_fn(cls, fn: Callable, *, use_self: LazyBase | None = None, meta_noop: bool | DTypeLike | tuple[DTypeLike, Callable[[tuple[int, ...]], tuple[int, ...]]] = False) -> Callable[[Any], Any]:
def _wrap_fn(
cls,
fn: Callable,
*,
use_self: LazyBase | None = None,
meta_noop: (
bool
| DTypeLike
| tuple[DTypeLike, Callable[[tuple[int, ...]], tuple[int, ...]]]
) = False,
) -> Callable[[Any], Any]:
def wrapped_fn(*args, **kwargs):
if kwargs is None:
kwargs = {}
@ -138,13 +198,16 @@ def wrapped_fn(*args, **kwargs):
res = cls.meta_with_dtype_and_shape(meta_noop, res.shape)
if isinstance(res, cls._tensor_type):
return cls(meta=cls.eager_to_meta(res), args=args, kwargs=kwargs, func=fn)
return cls(
meta=cls.eager_to_meta(res), args=args, kwargs=kwargs, func=fn
)
else:
del res # not needed
# non-tensor return likely relies on the contents of the args
# (e.g. the result of torch.equal)
eager_args = cls.to_eager(args)
return fn(*eager_args, **kwargs)
return wrapped_fn
@classmethod
@ -175,7 +238,8 @@ def eager_to_meta(cls, t: Any) -> Any:
# must be overridden, meta tensor init is backend-specific
@classmethod
@abstractmethod
def meta_with_dtype_and_shape(cls, dtype: Any, shape: Any) -> Any: pass
def meta_with_dtype_and_shape(cls, dtype: Any, shape: Any) -> Any:
pass
@classmethod
def from_eager(cls, t: Any) -> Any:
@ -192,7 +256,9 @@ class LazyNumpyTensor(LazyBase):
_tensor_type = np.ndarray
@classmethod
def meta_with_dtype_and_shape(cls, dtype: DTypeLike, shape: tuple[int, ...]) -> np.ndarray[Any, Any]:
def meta_with_dtype_and_shape(
cls, dtype: DTypeLike, shape: tuple[int, ...]
) -> np.ndarray[Any, Any]:
# The initial idea was to use np.nan as the fill value,
# but non-float types like np.int16 can't use that.
# So zero it is.
@ -201,8 +267,16 @@ def meta_with_dtype_and_shape(cls, dtype: DTypeLike, shape: tuple[int, ...]) ->
def astype(self, dtype, *args, **kwargs):
meta = type(self).meta_with_dtype_and_shape(dtype, self._meta.shape)
full_args = (self, dtype,) + args
return type(self)(meta=meta, args=full_args, kwargs=kwargs, func=(lambda a, *args, **kwargs: a.astype(*args, **kwargs)))
full_args = (
self,
dtype,
) + args
return type(self)(
meta=meta,
args=full_args,
kwargs=kwargs,
func=(lambda a, *args, **kwargs: a.astype(*args, **kwargs)),
)
def tofile(self, *args, **kwargs):
eager = LazyNumpyTensor.to_eager(self)

View File

@ -44,7 +44,12 @@ class Metadata:
datasets: Optional[list[str]] = None
@staticmethod
def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Path] = None, model_name: Optional[str] = None, total_params: int = 0) -> Metadata:
def load(
metadata_override_path: Optional[Path] = None,
model_path: Optional[Path] = None,
model_name: Optional[str] = None,
total_params: int = 0,
) -> Metadata:
# This grabs as many contextual authorship metadata as possible from the model repository
# making any conversion as required to match the gguf kv store metadata format
# as well as giving users the ability to override any authorship metadata that may be incorrect
@ -57,7 +62,9 @@ def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Pat
# TODO: load adapter_config.json when possible, it usually contains the base model of the LoRA adapter
# heuristics
metadata = Metadata.apply_metadata_heuristic(metadata, model_card, hf_params, model_path, total_params)
metadata = Metadata.apply_metadata_heuristic(
metadata, model_card, hf_params, model_path, total_params
)
# Metadata Override File Provided
# This is based on LLM_KV_NAMES mapping in llama.cpp
@ -66,34 +73,66 @@ def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Pat
metadata.name = metadata_override.get(Keys.General.NAME, metadata.name)
metadata.author = metadata_override.get(Keys.General.AUTHOR, metadata.author)
metadata.version = metadata_override.get(Keys.General.VERSION, metadata.version)
metadata.organization = metadata_override.get(Keys.General.ORGANIZATION, metadata.organization)
metadata.organization = metadata_override.get(
Keys.General.ORGANIZATION, metadata.organization
)
metadata.finetune = metadata_override.get(Keys.General.FINETUNE, metadata.finetune)
metadata.basename = metadata_override.get(Keys.General.BASENAME, metadata.basename)
metadata.finetune = metadata_override.get(
Keys.General.FINETUNE, metadata.finetune
)
metadata.basename = metadata_override.get(
Keys.General.BASENAME, metadata.basename
)
metadata.description = metadata_override.get(Keys.General.DESCRIPTION, metadata.description)
metadata.quantized_by = metadata_override.get(Keys.General.QUANTIZED_BY, metadata.quantized_by)
metadata.description = metadata_override.get(
Keys.General.DESCRIPTION, metadata.description
)
metadata.quantized_by = metadata_override.get(
Keys.General.QUANTIZED_BY, metadata.quantized_by
)
metadata.size_label = metadata_override.get(Keys.General.SIZE_LABEL, metadata.size_label)
metadata.license_name = metadata_override.get(Keys.General.LICENSE_NAME, metadata.license_name)
metadata.license_link = metadata_override.get(Keys.General.LICENSE_LINK, metadata.license_link)
metadata.size_label = metadata_override.get(
Keys.General.SIZE_LABEL, metadata.size_label
)
metadata.license_name = metadata_override.get(
Keys.General.LICENSE_NAME, metadata.license_name
)
metadata.license_link = metadata_override.get(
Keys.General.LICENSE_LINK, metadata.license_link
)
metadata.url = metadata_override.get(Keys.General.URL, metadata.url)
metadata.doi = metadata_override.get(Keys.General.DOI, metadata.doi)
metadata.uuid = metadata_override.get(Keys.General.UUID, metadata.uuid)
metadata.repo_url = metadata_override.get(Keys.General.REPO_URL, metadata.repo_url)
metadata.repo_url = metadata_override.get(
Keys.General.REPO_URL, metadata.repo_url
)
metadata.source_url = metadata_override.get(Keys.General.SOURCE_URL, metadata.source_url)
metadata.source_doi = metadata_override.get(Keys.General.SOURCE_DOI, metadata.source_doi)
metadata.source_uuid = metadata_override.get(Keys.General.SOURCE_UUID, metadata.source_uuid)
metadata.source_repo_url = metadata_override.get(Keys.General.SOURCE_REPO_URL, metadata.source_repo_url)
metadata.source_url = metadata_override.get(
Keys.General.SOURCE_URL, metadata.source_url
)
metadata.source_doi = metadata_override.get(
Keys.General.SOURCE_DOI, metadata.source_doi
)
metadata.source_uuid = metadata_override.get(
Keys.General.SOURCE_UUID, metadata.source_uuid
)
metadata.source_repo_url = metadata_override.get(
Keys.General.SOURCE_REPO_URL, metadata.source_repo_url
)
# Base Models is received here as an array of models
metadata.base_models = metadata_override.get("general.base_models", metadata.base_models)
metadata.base_models = metadata_override.get(
"general.base_models", metadata.base_models
)
metadata.tags = metadata_override.get(Keys.General.TAGS, metadata.tags)
metadata.languages = metadata_override.get(Keys.General.LANGUAGES, metadata.languages)
metadata.datasets = metadata_override.get(Keys.General.DATASETS, metadata.datasets)
metadata.languages = metadata_override.get(
Keys.General.LANGUAGES, metadata.languages
)
metadata.datasets = metadata_override.get(
Keys.General.DATASETS, metadata.datasets
)
# Direct Metadata Override (via direct cli argument)
if model_name is not None:
@ -102,7 +141,9 @@ def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Pat
return metadata
@staticmethod
def load_metadata_override(metadata_override_path: Optional[Path] = None) -> dict[str, Any]:
def load_metadata_override(
metadata_override_path: Optional[Path] = None,
) -> dict[str, Any]:
if metadata_override_path is None or not metadata_override_path.is_file():
return {}
@ -128,7 +169,9 @@ def load_model_card(model_path: Optional[Path] = None) -> dict[str, Any]:
if isinstance(data, dict):
return data
else:
logger.error(f"while reading YAML model card frontmatter, data is {type(data)} instead of dict")
logger.error(
f"while reading YAML model card frontmatter, data is {type(data)} instead of dict"
)
return {}
else:
return {}
@ -149,10 +192,21 @@ def load_hf_parameters(model_path: Optional[Path] = None) -> dict[str, Any]:
@staticmethod
def id_to_title(string):
# Convert capitalization into title form unless acronym or version number
return ' '.join([w.title() if w.islower() and not re.match(r'^(v\d+(?:\.\d+)*|\d.*)$', w) else w for w in string.strip().replace('-', ' ').split()])
return " ".join(
[
(
w.title()
if w.islower() and not re.match(r"^(v\d+(?:\.\d+)*|\d.*)$", w)
else w
)
for w in string.strip().replace("-", " ").split()
]
)
@staticmethod
def get_model_id_components(model_id: Optional[str] = None, total_params: int = 0) -> tuple[str | None, str | None, str | None, str | None, str | None, str | None]:
def get_model_id_components(
model_id: Optional[str] = None, total_params: int = 0
) -> tuple[str | None, str | None, str | None, str | None, str | None, str | None]:
# Huggingface often store model id as '<org>/<model name>'
# so let's parse it and apply some heuristics if possible for model name components
@ -160,24 +214,24 @@ def get_model_id_components(model_id: Optional[str] = None, total_params: int =
# model ID missing
return None, None, None, None, None, None
if ' ' in model_id:
if " " in model_id:
# model ID is actually a normal human sentence
# which means its most likely a normal model name only
# not part of the hugging face naming standard, but whatever
return model_id, None, None, None, None, None
if '/' in model_id:
if "/" in model_id:
# model ID (huggingface style)
org_component, model_full_name_component = model_id.split('/', 1)
org_component, model_full_name_component = model_id.split("/", 1)
else:
# model ID but missing org components
org_component, model_full_name_component = None, model_id
# Check if we erroneously matched against './' or '../' etc...
if org_component is not None and org_component[0] == '.':
if org_component is not None and org_component[0] == ".":
org_component = None
name_parts: list[str] = model_full_name_component.split('-')
name_parts: list[str] = model_full_name_component.split("-")
# Remove empty parts
for i in reversed(range(len(name_parts))):
@ -191,14 +245,18 @@ def get_model_id_components(model_id: Optional[str] = None, total_params: int =
# Annotate the name
for i, part in enumerate(name_parts):
# Version
if re.fullmatch(r'(v|iter)?\d+([.]\d+)*', part, re.IGNORECASE):
if re.fullmatch(r"(v|iter)?\d+([.]\d+)*", part, re.IGNORECASE):
name_types[i].add("version")
# Quant type (should not be there for base models, but still annotated)
elif re.fullmatch(r'i?q\d(_\w)*|b?fp?(16|32)', part, re.IGNORECASE):
elif re.fullmatch(r"i?q\d(_\w)*|b?fp?(16|32)", part, re.IGNORECASE):
name_types[i].add("type")
name_parts[i] = part.upper()
# Model size
elif i > 0 and re.fullmatch(r'(([A]|\d+[x])?\d+([._]\d+)?[KMBT][\d]?|small|mini|medium|large|x?xl)', part, re.IGNORECASE):
elif i > 0 and re.fullmatch(
r"(([A]|\d+[x])?\d+([._]\d+)?[KMBT][\d]?|small|mini|medium|large|x?xl)",
part,
re.IGNORECASE,
):
part = part.replace("_", ".")
# Handle weird bloom-7b1 notation
if part[-1].isdecimal():
@ -209,14 +267,19 @@ def get_model_id_components(model_id: Optional[str] = None, total_params: int =
part = part[:-1] + part[-1].upper()
if total_params != 0:
try:
label_params = float(part[:-1]) * pow(1000, " KMBT".find(part[-1]))
label_params = float(part[:-1]) * pow(
1000, " KMBT".find(part[-1])
)
# Only use it as a size label if it's close or bigger than the model size
# Note that LoRA adapters don't necessarily include all layers,
# so this is why bigger label sizes are accepted.
# Do not use the size label when it's smaller than 1/8 of the model size
if (total_params < 0 and label_params < abs(total_params) // 8) or (
if (
total_params < 0 and label_params < abs(total_params) // 8
) or (
# Check both directions when the current model isn't a LoRA adapter
total_params > 0 and abs(label_params - total_params) > 7 * total_params // 8
total_params > 0
and abs(label_params - total_params) > 7 * total_params // 8
):
# Likely a context length
name_types[i].add("finetune")
@ -229,7 +292,9 @@ def get_model_id_components(model_id: Optional[str] = None, total_params: int =
name_types[i].add("size_label")
name_parts[i] = part
# Some easy to recognize finetune names
elif i > 0 and re.fullmatch(r'chat|instruct|vision|lora', part, re.IGNORECASE):
elif i > 0 and re.fullmatch(
r"chat|instruct|vision|lora", part, re.IGNORECASE
):
if total_params < 0 and part.lower() == "lora":
# ignore redundant "lora" in the finetune part when the output is a lora adapter
name_types[i].add("type")
@ -238,7 +303,12 @@ def get_model_id_components(model_id: Optional[str] = None, total_params: int =
# Ignore word-based size labels when there is at least a number-based one present
# TODO: should word-based size labels always be removed instead?
if any(c.isdecimal() for n, t in zip(name_parts, name_types) if "size_label" in t for c in n):
if any(
c.isdecimal()
for n, t in zip(name_parts, name_types)
if "size_label" in t
for c in n
):
for n, t in zip(name_parts, name_types):
if "size_label" in t:
if all(c.isalpha() for c in n):
@ -262,22 +332,55 @@ def get_model_id_components(model_id: Optional[str] = None, total_params: int =
else:
break
basename = "-".join(n for n, t in zip(name_parts, name_types) if "basename" in t) or None
basename = (
"-".join(n for n, t in zip(name_parts, name_types) if "basename" in t)
or None
)
# Deduplicate size labels using order-preserving 'dict' ('set' seems to sort the keys)
size_label = "-".join(dict.fromkeys(s for s, t in zip(name_parts, name_types) if "size_label" in t).keys()) or None
finetune = "-".join(f for f, t in zip(name_parts, name_types) if "finetune" in t) or None
size_label = (
"-".join(
dict.fromkeys(
s for s, t in zip(name_parts, name_types) if "size_label" in t
).keys()
)
or None
)
finetune = (
"-".join(f for f, t in zip(name_parts, name_types) if "finetune" in t)
or None
)
# TODO: should the basename version always be excluded?
# NOTE: multiple finetune versions are joined together
version = "-".join(v for v, t, in zip(name_parts, name_types) if "version" in t and "basename" not in t) or None
version = (
"-".join(
v
for v, t, in zip(name_parts, name_types)
if "version" in t and "basename" not in t
)
or None
)
if size_label is None and finetune is None and version is None:
# Too ambiguous, output nothing
basename = None
return model_full_name_component, org_component, basename, finetune, version, size_label
return (
model_full_name_component,
org_component,
basename,
finetune,
version,
size_label,
)
@staticmethod
def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = None, hf_params: Optional[dict] = None, model_path: Optional[Path] = None, total_params: int = 0) -> Metadata:
def apply_metadata_heuristic(
metadata: Metadata,
model_card: Optional[dict] = None,
hf_params: Optional[dict] = None,
model_path: Optional[Path] = None,
total_params: int = 0,
) -> Metadata:
# Reference Model Card Metadata: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1
# Model Card Heuristics
@ -317,16 +420,30 @@ def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = No
for model_id in metadata_base_models:
# NOTE: model size of base model is assumed to be similar to the size of the current model
model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id, total_params)
(
model_full_name_component,
org_component,
basename,
finetune,
version,
size_label,
) = Metadata.get_model_id_components(model_id, total_params)
base_model = {}
if model_full_name_component is not None:
base_model["name"] = Metadata.id_to_title(model_full_name_component)
base_model["name"] = Metadata.id_to_title(
model_full_name_component
)
if org_component is not None:
base_model["organization"] = Metadata.id_to_title(org_component)
if version is not None:
base_model["version"] = version
if org_component is not None and model_full_name_component is not None:
base_model["repo_url"] = f"https://huggingface.co/{org_component}/{model_full_name_component}"
if (
org_component is not None
and model_full_name_component is not None
):
base_model["repo_url"] = (
f"https://huggingface.co/{org_component}/{model_full_name_component}"
)
metadata.base_models.append(base_model)
if "license" in model_card and metadata.license is None:
@ -360,7 +477,9 @@ def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = No
elif isinstance(pipeline_tags_value, list):
metadata.tags.extend(pipeline_tags_value)
language_value = model_card.get("languages", model_card.get("language", None))
language_value = model_card.get(
"languages", model_card.get("language", None)
)
if language_value is not None:
if metadata.languages is None:
@ -388,11 +507,18 @@ def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = No
if hf_params is not None:
hf_name_or_path = hf_params.get("_name_or_path")
if hf_name_or_path is not None and hf_name_or_path.count('/') <= 1:
if hf_name_or_path is not None and hf_name_or_path.count("/") <= 1:
# Use _name_or_path only if its actually a model name and not some computer path
# e.g. 'meta-llama/Llama-2-7b-hf'
model_id = hf_name_or_path
model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id, total_params)
(
model_full_name_component,
org_component,
basename,
finetune,
version,
size_label,
) = Metadata.get_model_id_components(model_id, total_params)
if metadata.name is None and model_full_name_component is not None:
metadata.name = Metadata.id_to_title(model_full_name_component)
if metadata.organization is None and org_component is not None:
@ -410,7 +536,14 @@ def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = No
############################################
if model_path is not None:
model_id = model_path.name
model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id, total_params)
(
model_full_name_component,
org_component,
basename,
finetune,
version,
size_label,
) = Metadata.get_model_id_components(model_id, total_params)
if metadata.name is None and model_full_name_component is not None:
metadata.name = Metadata.id_to_title(model_full_name_component)
if metadata.organization is None and org_component is not None:
@ -485,7 +618,9 @@ def set_gguf_meta_model(self, gguf_writer: gguf.GGUFWriter):
if "version" in base_model_entry:
gguf_writer.add_base_model_version(key, base_model_entry["version"])
if "organization" in base_model_entry:
gguf_writer.add_base_model_organization(key, base_model_entry["organization"])
gguf_writer.add_base_model_organization(
key, base_model_entry["organization"]
)
if "url" in base_model_entry:
gguf_writer.add_base_model_url(key, base_model_entry["url"])
if "doi" in base_model_entry:
@ -493,7 +628,9 @@ def set_gguf_meta_model(self, gguf_writer: gguf.GGUFWriter):
if "uuid" in base_model_entry:
gguf_writer.add_base_model_uuid(key, base_model_entry["uuid"])
if "repo_url" in base_model_entry:
gguf_writer.add_base_model_repo_url(key, base_model_entry["repo_url"])
gguf_writer.add_base_model_repo_url(
key, base_model_entry["repo_url"]
)
if self.tags is not None:
gguf_writer.add_tags(self.tags)

View File

@ -12,14 +12,18 @@
def quant_shape_to_byte_shape(shape: Sequence[int], quant_type: GGMLQuantizationType):
block_size, type_size = GGML_QUANT_SIZES[quant_type]
if shape[-1] % block_size != 0:
raise ValueError(f"Quantized tensor row size ({shape[-1]}) is not a multiple of {quant_type.name} block size ({block_size})")
raise ValueError(
f"Quantized tensor row size ({shape[-1]}) is not a multiple of {quant_type.name} block size ({block_size})"
)
return (*shape[:-1], shape[-1] // block_size * type_size)
def quant_shape_from_byte_shape(shape: Sequence[int], quant_type: GGMLQuantizationType):
block_size, type_size = GGML_QUANT_SIZES[quant_type]
if shape[-1] % type_size != 0:
raise ValueError(f"Quantized tensor bytes per row ({shape[-1]}) is not a multiple of {quant_type.name} type size ({type_size})")
raise ValueError(
f"Quantized tensor bytes per row ({shape[-1]}) is not a multiple of {quant_type.name} type size ({type_size})"
)
return (*shape[:-1], shape[-1] // type_size * block_size)
@ -27,14 +31,23 @@ def quant_shape_from_byte_shape(shape: Sequence[int], quant_type: GGMLQuantizati
def __compute_fp32_to_bf16(n: np.ndarray) -> np.ndarray:
n = n.astype(np.float32, copy=False).view(np.uint32)
# force nan to quiet
n = np.where((n & 0x7fffffff) > 0x7f800000, (n & np.uint32(0xffff0000)) | np.uint32(64 << 16), n)
n = np.where(
(n & 0x7FFFFFFF) > 0x7F800000,
(n & np.uint32(0xFFFF0000)) | np.uint32(64 << 16),
n,
)
# round to nearest even
n = (np.uint64(n) + (0x7fff + ((n >> 16) & 1))) >> 16
n = (np.uint64(n) + (0x7FFF + ((n >> 16) & 1))) >> 16
return n.astype(np.uint16)
# This is faster than np.vectorize and np.apply_along_axis because it works on more than one row at a time
def __apply_over_grouped_rows(func: Callable[[np.ndarray], np.ndarray], arr: np.ndarray, otype: DTypeLike, oshape: tuple[int, ...]) -> np.ndarray:
def __apply_over_grouped_rows(
func: Callable[[np.ndarray], np.ndarray],
arr: np.ndarray,
otype: DTypeLike,
oshape: tuple[int, ...],
) -> np.ndarray:
rows = arr.reshape((-1, arr.shape[-1]))
osize = 1
for dim in oshape:
@ -42,15 +55,23 @@ def __apply_over_grouped_rows(func: Callable[[np.ndarray], np.ndarray], arr: np.
out = np.empty(shape=osize, dtype=otype)
# compute over groups of 16 rows (arbitrary, but seems good for performance)
n_groups = (rows.shape[0] // 16) or 1
np.concatenate([func(group).ravel() for group in np.array_split(rows, n_groups)], axis=0, out=out)
np.concatenate(
[func(group).ravel() for group in np.array_split(rows, n_groups)],
axis=0,
out=out,
)
return out.reshape(oshape)
def __quantize_bf16_array(n: np.ndarray) -> np.ndarray:
return __apply_over_grouped_rows(__compute_fp32_to_bf16, arr=n, otype=np.uint16, oshape=n.shape)
return __apply_over_grouped_rows(
__compute_fp32_to_bf16, arr=n, otype=np.uint16, oshape=n.shape
)
__quantize_bf16_lazy = LazyNumpyTensor._wrap_fn(__quantize_bf16_array, meta_noop=np.uint16)
__quantize_bf16_lazy = LazyNumpyTensor._wrap_fn(
__quantize_bf16_array, meta_noop=np.uint16
)
def quantize_bf16(n: np.ndarray):
@ -105,7 +126,12 @@ def __quantize_q8_0_rows(n: np.ndarray) -> np.ndarray:
def __quantize_q8_0_array(n: np.ndarray) -> np.ndarray:
return __apply_over_grouped_rows(__quantize_q8_0_rows, arr=n, otype=np.uint8, oshape=__quantize_q8_0_shape_change(n.shape))
return __apply_over_grouped_rows(
__quantize_q8_0_rows,
arr=n,
otype=np.uint8,
oshape=__quantize_q8_0_shape_change(n.shape),
)
__quantize_q8_0_lazy = LazyNumpyTensor._wrap_fn(

View File

@ -28,12 +28,10 @@ class TensorNameMap:
"transformer.token_embeddings", # openelm
"shared", # t5
),
# Token type embeddings
MODEL_TENSOR.TOKEN_TYPES: (
"embeddings.token_type_embeddings", # bert nomic-bert
),
# Normalization of token embeddings
MODEL_TENSOR.TOKEN_EMBD_NORM: (
"word_embeddings_layernorm", # bloom
@ -41,14 +39,12 @@ class TensorNameMap:
"emb_ln", # nomic-bert
"transformer.norm", # openelm
),
# Position embeddings
MODEL_TENSOR.POS_EMBD: (
"transformer.wpe", # gpt2
"embeddings.position_embeddings", # bert
"wpe", # gpt2
),
# Output
MODEL_TENSOR.OUTPUT: (
"embed_out", # gptneox
@ -58,7 +54,6 @@ class TensorNameMap:
"lm_head.linear", # phi2
"output_layer", # chatglm
),
# Output norm
MODEL_TENSOR.OUTPUT_NORM: (
"gpt_neox.final_layer_norm", # gptneox
@ -76,7 +71,6 @@ class TensorNameMap:
"encoder.final_layernorm", # chatglm
"transformer.norm", # openelm
),
# Rope frequencies
MODEL_TENSOR.ROPE_FREQS: (
"rope.freqs", # llama-pth
@ -108,13 +102,11 @@ class TensorNameMap:
"encoder.layers.{bid}.input_layernorm", # chatglm
"transformer.layers.{bid}.attn_norm", # openelm
),
# Attention norm 2
MODEL_TENSOR.ATTN_NORM_2: (
"transformer.h.{bid}.ln_attn", # falcon40b
"encoder.layer.{bid}.layer_norm_1", # jina-v2-code
),
# Attention query-key-value
MODEL_TENSOR.ATTN_QKV: (
"gpt_neox.layers.{bid}.attention.query_key_value", # gptneox
@ -132,7 +124,6 @@ class TensorNameMap:
"encoder.layers.{bid}.self_attention.query_key_value", # chatglm
"transformer.layers.{bid}.attn.qkv_proj", # openelm
),
# Attention query
MODEL_TENSOR.ATTN_Q: (
"model.layers.{bid}.self_attn.q_proj", # llama-hf
@ -143,7 +134,6 @@ class TensorNameMap:
"model.layers.{bid}.attention.wq", # internlm2
"transformer.decoder_layer.{bid}.multi_head_attention.query", # Grok
),
# Attention key
MODEL_TENSOR.ATTN_K: (
"model.layers.{bid}.self_attn.k_proj", # llama-hf
@ -155,7 +145,6 @@ class TensorNameMap:
"model.layers.{bid}.attention.wk", # internlm2
"transformer.decoder_layer.{bid}.multi_head_attention.key", # Grok
),
# Attention value
MODEL_TENSOR.ATTN_V: (
"model.layers.{bid}.self_attn.v_proj", # llama-hf
@ -165,9 +154,8 @@ class TensorNameMap:
"transformer.h.{bid}.attn.v", # refact
"model.layers.layers.{bid}.self_attn.v_proj", # plamo
"model.layers.{bid}.attention.wv", # internlm2
"transformer.decoder_layer.{bid}.multi_head_attention.value" # Grok
"transformer.decoder_layer.{bid}.multi_head_attention.value", # Grok
),
# Attention output
MODEL_TENSOR.ATTN_OUT: (
"gpt_neox.layers.{bid}.attention.dense", # gptneox
@ -191,7 +179,6 @@ class TensorNameMap:
"encoder.layers.{bid}.self_attention.dense", # chatglm
"transformer.layers.{bid}.attn.out_proj", # openelm
),
# Attention output norm
MODEL_TENSOR.ATTN_OUT_NORM: (
"encoder.layer.{bid}.attention.output.LayerNorm", # bert
@ -199,11 +186,9 @@ class TensorNameMap:
"transformer.decoder_layer.{bid}.rms_norm_1", # Grok
"transformer.blocks.{bid}.norm_attn_norm.norm_2", # dbrx
),
MODEL_TENSOR.ATTN_POST_NORM: (
"model.layers.{bid}.post_attention_layernorm", # gemma2
),
# Rotary embeddings
MODEL_TENSOR.ATTN_ROT_EMBD: (
"model.layers.{bid}.self_attn.rotary_emb.inv_freq", # llama-hf
@ -211,7 +196,6 @@ class TensorNameMap:
"model.layers.layers.{bid}.self_attn.rotary_emb.inv_freq", # plamo
"transformer.h.{bid}.attn.rotary_emb.inv_freq", # codeshell
),
# Feed-forward norm
MODEL_TENSOR.FFN_NORM: (
"gpt_neox.layers.{bid}.post_attention_layernorm", # gptneox
@ -228,17 +212,14 @@ class TensorNameMap:
"encoder.layers.{bid}.post_attention_layernorm", # chatglm
"transformer.layers.{bid}.ffn_norm", # openelm
),
# Post feed-forward norm
MODEL_TENSOR.FFN_PRE_NORM: (
"model.layers.{bid}.pre_feedforward_layernorm", # gemma2
),
# Post feed-forward norm
MODEL_TENSOR.FFN_POST_NORM: (
"model.layers.{bid}.post_feedforward_layernorm", # gemma2
),
MODEL_TENSOR.FFN_GATE_INP: (
"layers.{bid}.feed_forward.gate", # mixtral
"model.layers.{bid}.block_sparse_moe.gate", # mixtral
@ -246,11 +227,9 @@ class TensorNameMap:
"transformer.decoder_layer.{bid}.router", # Grok
"transformer.blocks.{bid}.ffn.router.layer", # dbrx
),
MODEL_TENSOR.FFN_GATE_INP_SHEXP: (
"model.layers.{bid}.mlp.shared_expert_gate", # qwen2moe
),
# Feed-forward up
MODEL_TENSOR.FFN_UP: (
"gpt_neox.layers.{bid}.mlp.dense_h_to_4h", # gptneox
@ -278,24 +257,18 @@ class TensorNameMap:
"model.layers.{bid}.residual_mlp.w3", # arctic
"encoder.layers.{bid}.mlp.dense_h_to_4h", # chatglm
),
MODEL_TENSOR.FFN_UP_EXP: (
"layers.{bid}.feed_forward.experts.w3", # mixtral (merged)
"transformer.decoder_layer.{bid}.moe.linear_v", # Grok (merged)
"transformer.blocks.{bid}.ffn.experts.mlp.v1", # dbrx
"model.layers.{bid}.mlp.experts.up_proj", # qwen2moe (merged)
),
MODEL_TENSOR.FFN_UP_SHEXP: (
"model.layers.{bid}.mlp.shared_expert.up_proj", # qwen2moe
"model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek2
),
# AWQ-activation gate
MODEL_TENSOR.FFN_ACT: (
"transformer.blocks.{bid}.ffn.act", # mpt
),
MODEL_TENSOR.FFN_ACT: ("transformer.blocks.{bid}.ffn.act",), # mpt
# Feed-forward gate
MODEL_TENSOR.FFN_GATE: (
"model.layers.{bid}.mlp.gate_proj", # llama-hf refact
@ -309,19 +282,16 @@ class TensorNameMap:
"transformer.h.{bid}.mlp.linear_1", # refact
"model.layers.{bid}.residual_mlp.w1", # arctic
),
MODEL_TENSOR.FFN_GATE_EXP: (
"layers.{bid}.feed_forward.experts.w1", # mixtral (merged)
"transformer.decoder_layer.{bid}.moe.linear", # Grok (merged)
"transformer.blocks.{bid}.ffn.experts.mlp.w1", # dbrx
"model.layers.{bid}.mlp.experts.gate_proj", # qwen2moe (merged)
),
MODEL_TENSOR.FFN_GATE_SHEXP: (
"model.layers.{bid}.mlp.shared_expert.gate_proj", # qwen2moe
"model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek2
),
# Feed-forward down
MODEL_TENSOR.FFN_DOWN: (
"gpt_neox.layers.{bid}.mlp.dense_4h_to_h", # gptneox
@ -348,19 +318,16 @@ class TensorNameMap:
"encoder.layer.{bid}.mlp.down_layer", # jina-bert-v2
"encoder.layers.{bid}.mlp.dense_4h_to_h", # chatglm
),
MODEL_TENSOR.FFN_DOWN_EXP: (
"layers.{bid}.feed_forward.experts.w2", # mixtral (merged)
"transformer.decoder_layer.{bid}.moe.linear_1", # Grok (merged)
"transformer.blocks.{bid}.ffn.experts.mlp.w2", # dbrx
"model.layers.{bid}.mlp.experts.down_proj", # qwen2moe (merged)
),
MODEL_TENSOR.FFN_DOWN_SHEXP: (
"model.layers.{bid}.mlp.shared_expert.down_proj", # qwen2moe
"model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek2
),
MODEL_TENSOR.ATTN_Q_NORM: (
"language_model.encoder.layers.{bid}.self_attention.q_layernorm",
"model.layers.{bid}.self_attn.q_layernorm", # persimmon
@ -369,7 +336,6 @@ class TensorNameMap:
"encoder.layer.{bid}.attention.self.layer_norm_q", # jina-bert-v2
"transformer.layers.{bid}.attn.q_norm", # openelm
),
MODEL_TENSOR.ATTN_K_NORM: (
"language_model.encoder.layers.{bid}.self_attention.k_layernorm",
"model.layers.{bid}.self_attn.k_layernorm", # persimmon
@ -378,210 +344,131 @@ class TensorNameMap:
"encoder.layer.{bid}.attention.self.layer_norm_k", # jina-bert-v2
"transformer.layers.{bid}.attn.k_norm", # openelm
),
MODEL_TENSOR.ROPE_FREQS: (
"language_model.encoder.layers.{bid}.self_attention.rotary_emb.inv_freq", # persimmon
),
MODEL_TENSOR.LAYER_OUT_NORM: (
"encoder.layer.{bid}.output.LayerNorm", # bert
"encoder.layers.{bid}.norm2", # nomic-bert
"transformer.decoder_layer.{bid}.rms_norm_3", # Grok
"encoder.layer.{bid}.mlp.layernorm", # jina-bert-v2
"encoder.layer.{bid}.layer_norm_2" # jina-v2-code
"encoder.layer.{bid}.layer_norm_2", # jina-v2-code
),
MODEL_TENSOR.SSM_IN: (
"model.layers.{bid}.in_proj",
"backbone.layers.{bid}.mixer.in_proj",
),
MODEL_TENSOR.SSM_CONV1D: (
"model.layers.{bid}.conv1d",
"backbone.layers.{bid}.mixer.conv1d",
),
MODEL_TENSOR.SSM_X: (
"model.layers.{bid}.x_proj",
"backbone.layers.{bid}.mixer.x_proj",
),
MODEL_TENSOR.SSM_DT: (
"model.layers.{bid}.dt_proj",
"backbone.layers.{bid}.mixer.dt_proj",
),
MODEL_TENSOR.SSM_A: (
"model.layers.{bid}.A_log",
"backbone.layers.{bid}.mixer.A_log",
),
MODEL_TENSOR.SSM_D: (
"model.layers.{bid}.D",
"backbone.layers.{bid}.mixer.D",
),
MODEL_TENSOR.SSM_OUT: (
"model.layers.{bid}.out_proj",
"backbone.layers.{bid}.mixer.out_proj",
),
MODEL_TENSOR.ATTN_Q_A: (
"model.layers.{bid}.self_attn.q_a_proj", # deepseek2
),
MODEL_TENSOR.ATTN_Q_B: (
"model.layers.{bid}.self_attn.q_b_proj", # deepseek2
),
MODEL_TENSOR.ATTN_Q_A: ("model.layers.{bid}.self_attn.q_a_proj",), # deepseek2
MODEL_TENSOR.ATTN_Q_B: ("model.layers.{bid}.self_attn.q_b_proj",), # deepseek2
MODEL_TENSOR.ATTN_KV_A_MQA: (
"model.layers.{bid}.self_attn.kv_a_proj_with_mqa", # deepseek2
),
MODEL_TENSOR.ATTN_KV_B: (
"model.layers.{bid}.self_attn.kv_b_proj", # deepseek2
),
MODEL_TENSOR.ATTN_Q_A_NORM: (
"model.layers.{bid}.self_attn.q_a_layernorm", # deepseek2
),
MODEL_TENSOR.ATTN_KV_A_NORM: (
"model.layers.{bid}.self_attn.kv_a_layernorm", # deepseek2
),
MODEL_TENSOR.ATTN_SUB_NORM: (
"model.layers.{bid}.self_attn.inner_attn_ln", # bitnet
),
MODEL_TENSOR.FFN_SUB_NORM: (
"model.layers.{bid}.mlp.ffn_layernorm", # bitnet
),
MODEL_TENSOR.DEC_ATTN_NORM: (
"decoder.block.{bid}.layer.0.layer_norm", # t5
),
MODEL_TENSOR.DEC_ATTN_Q: (
"decoder.block.{bid}.layer.0.SelfAttention.q", # t5
),
MODEL_TENSOR.DEC_ATTN_K: (
"decoder.block.{bid}.layer.0.SelfAttention.k", # t5
),
MODEL_TENSOR.DEC_ATTN_V: (
"decoder.block.{bid}.layer.0.SelfAttention.v", # t5
),
MODEL_TENSOR.FFN_SUB_NORM: ("model.layers.{bid}.mlp.ffn_layernorm",), # bitnet
MODEL_TENSOR.DEC_ATTN_NORM: ("decoder.block.{bid}.layer.0.layer_norm",), # t5
MODEL_TENSOR.DEC_ATTN_Q: ("decoder.block.{bid}.layer.0.SelfAttention.q",), # t5
MODEL_TENSOR.DEC_ATTN_K: ("decoder.block.{bid}.layer.0.SelfAttention.k",), # t5
MODEL_TENSOR.DEC_ATTN_V: ("decoder.block.{bid}.layer.0.SelfAttention.v",), # t5
MODEL_TENSOR.DEC_ATTN_OUT: (
"decoder.block.{bid}.layer.0.SelfAttention.o", # t5
),
MODEL_TENSOR.DEC_ATTN_REL_B: (
"decoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias", # t5
),
MODEL_TENSOR.DEC_CROSS_ATTN_NORM: (
"decoder.block.{bid}.layer.1.layer_norm", # t5
),
MODEL_TENSOR.DEC_CROSS_ATTN_Q: (
"decoder.block.{bid}.layer.1.EncDecAttention.q", # t5
),
MODEL_TENSOR.DEC_CROSS_ATTN_K: (
"decoder.block.{bid}.layer.1.EncDecAttention.k", # t5
),
MODEL_TENSOR.DEC_CROSS_ATTN_V: (
"decoder.block.{bid}.layer.1.EncDecAttention.v", # t5
),
MODEL_TENSOR.DEC_CROSS_ATTN_OUT: (
"decoder.block.{bid}.layer.1.EncDecAttention.o", # t5
),
MODEL_TENSOR.DEC_CROSS_ATTN_REL_B: (
"decoder.block.{bid}.layer.1.EncDecAttention.relative_attention_bias", # t5
),
MODEL_TENSOR.DEC_FFN_NORM: (
"decoder.block.{bid}.layer.2.layer_norm", # t5
),
MODEL_TENSOR.DEC_FFN_NORM: ("decoder.block.{bid}.layer.2.layer_norm",), # t5
MODEL_TENSOR.DEC_FFN_GATE: (
"decoder.block.{bid}.layer.2.DenseReluDense.wi_0", # flan-t5
),
MODEL_TENSOR.DEC_FFN_UP: (
"decoder.block.{bid}.layer.2.DenseReluDense.wi", # t5
"decoder.block.{bid}.layer.2.DenseReluDense.wi_1", # flan-t5
),
MODEL_TENSOR.DEC_FFN_DOWN: (
"decoder.block.{bid}.layer.2.DenseReluDense.wo", # t5
),
MODEL_TENSOR.DEC_OUTPUT_NORM: (
"decoder.final_layer_norm", # t5
),
MODEL_TENSOR.ENC_ATTN_NORM: (
"encoder.block.{bid}.layer.0.layer_norm", # t5
),
MODEL_TENSOR.ENC_ATTN_Q: (
"encoder.block.{bid}.layer.0.SelfAttention.q", # t5
),
MODEL_TENSOR.ENC_ATTN_K: (
"encoder.block.{bid}.layer.0.SelfAttention.k", # t5
),
MODEL_TENSOR.ENC_ATTN_V: (
"encoder.block.{bid}.layer.0.SelfAttention.v", # t5
),
MODEL_TENSOR.DEC_OUTPUT_NORM: ("decoder.final_layer_norm",), # t5
MODEL_TENSOR.ENC_ATTN_NORM: ("encoder.block.{bid}.layer.0.layer_norm",), # t5
MODEL_TENSOR.ENC_ATTN_Q: ("encoder.block.{bid}.layer.0.SelfAttention.q",), # t5
MODEL_TENSOR.ENC_ATTN_K: ("encoder.block.{bid}.layer.0.SelfAttention.k",), # t5
MODEL_TENSOR.ENC_ATTN_V: ("encoder.block.{bid}.layer.0.SelfAttention.v",), # t5
MODEL_TENSOR.ENC_ATTN_OUT: (
"encoder.block.{bid}.layer.0.SelfAttention.o", # t5
),
MODEL_TENSOR.ENC_ATTN_REL_B: (
"encoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias", # t5
),
MODEL_TENSOR.ENC_FFN_NORM: (
"encoder.block.{bid}.layer.1.layer_norm", # t5
),
MODEL_TENSOR.ENC_FFN_NORM: ("encoder.block.{bid}.layer.1.layer_norm",), # t5
MODEL_TENSOR.ENC_FFN_GATE: (
"encoder.block.{bid}.layer.1.DenseReluDense.wi_0", # flan-t5
),
MODEL_TENSOR.ENC_FFN_UP: (
"encoder.block.{bid}.layer.1.DenseReluDense.wi", # t5
"encoder.block.{bid}.layer.1.DenseReluDense.wi_1", # flan-t5
),
MODEL_TENSOR.ENC_FFN_DOWN: (
"encoder.block.{bid}.layer.1.DenseReluDense.wo", # t5
),
MODEL_TENSOR.ENC_OUTPUT_NORM: (
"encoder.final_layer_norm", # t5
),
MODEL_TENSOR.ENC_OUTPUT_NORM: ("encoder.final_layer_norm",), # t5
}
# architecture-specific block mappings
arch_block_mappings_cfg: dict[MODEL_ARCH, dict[MODEL_TENSOR, tuple[str, ...]]] = {
MODEL_ARCH.ARCTIC: {
MODEL_TENSOR.FFN_NORM: (
"model.layers.{bid}.residual_layernorm",
),
MODEL_TENSOR.FFN_NORM_EXP: (
"model.layers.{bid}.post_attention_layernorm",
),
MODEL_TENSOR.FFN_NORM: ("model.layers.{bid}.residual_layernorm",),
MODEL_TENSOR.FFN_NORM_EXP: ("model.layers.{bid}.post_attention_layernorm",),
},
}
@ -609,7 +496,9 @@ def __init__(self, arch: MODEL_ARCH, n_blocks: int):
key = key.format(bid=bid)
self.mapping[key] = (tensor, tensor_name)
def get_type_and_name(self, key: str, try_suffixes: Sequence[str] = ()) -> tuple[MODEL_TENSOR, str] | None:
def get_type_and_name(
self, key: str, try_suffixes: Sequence[str] = ()
) -> tuple[MODEL_TENSOR, str] | None:
result = self.mapping.get(key)
if result is not None:
return result
@ -626,7 +515,9 @@ def get_name(self, key: str, try_suffixes: Sequence[str] = ()) -> str | None:
return None
return result[1]
def get_type(self, key: str, try_suffixes: Sequence[str] = ()) -> MODEL_TENSOR | None:
def get_type(
self, key: str, try_suffixes: Sequence[str] = ()
) -> MODEL_TENSOR | None:
result = self.get_type_and_name(key, try_suffixes=try_suffixes)
if result is None:
return None

View File

@ -7,12 +7,18 @@ def fill_templated_filename(filename: str, output_type: str | None) -> str:
# Given a file name fill in any type templates e.g. 'some-model-name.{ftype}.gguf'
ftype_lowercase: str = output_type.lower() if output_type is not None else ""
ftype_uppercase: str = output_type.upper() if output_type is not None else ""
return filename.format(ftype_lowercase,
outtype=ftype_lowercase, ftype=ftype_lowercase,
OUTTYPE=ftype_uppercase, FTYPE=ftype_uppercase)
return filename.format(
ftype_lowercase,
outtype=ftype_lowercase,
ftype=ftype_lowercase,
OUTTYPE=ftype_uppercase,
FTYPE=ftype_uppercase,
)
def model_weight_count_rounded_notation(model_params_count: int, min_digits: int = 2) -> str:
def model_weight_count_rounded_notation(
model_params_count: int, min_digits: int = 2
) -> str:
if model_params_count > 1e12:
# Trillions Of Parameters
scaled_model_params = model_params_count * 1e-12
@ -30,39 +36,65 @@ def model_weight_count_rounded_notation(model_params_count: int, min_digits: int
scaled_model_params = model_params_count * 1e-3
scale_suffix = "K"
fix = max(min_digits - len(str(round(scaled_model_params)).lstrip('0')), 0)
fix = max(min_digits - len(str(round(scaled_model_params)).lstrip("0")), 0)
return f"{scaled_model_params:.{fix}f}{scale_suffix}"
def size_label(total_params: int, shared_params: int, expert_params: int, expert_count: int) -> str:
def size_label(
total_params: int, shared_params: int, expert_params: int, expert_count: int
) -> str:
if expert_count > 0:
pretty_size = model_weight_count_rounded_notation(abs(shared_params) + abs(expert_params), min_digits=2)
pretty_size = model_weight_count_rounded_notation(
abs(shared_params) + abs(expert_params), min_digits=2
)
size_class = f"{expert_count}x{pretty_size}"
else:
size_class = model_weight_count_rounded_notation(abs(total_params), min_digits=2)
size_class = model_weight_count_rounded_notation(
abs(total_params), min_digits=2
)
return size_class
def naming_convention(model_name: str | None, base_name: str | None, finetune_string: str | None, version_string: str | None, size_label: str | None, output_type: str | None, model_type: Literal['vocab', 'LoRA'] | None = None) -> str:
def naming_convention(
model_name: str | None,
base_name: str | None,
finetune_string: str | None,
version_string: str | None,
size_label: str | None,
output_type: str | None,
model_type: Literal["vocab", "LoRA"] | None = None,
) -> str:
# Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention
if base_name is not None:
name = base_name.strip().replace(' ', '-').replace('/', '-')
name = base_name.strip().replace(" ", "-").replace("/", "-")
elif model_name is not None:
name = model_name.strip().replace(' ', '-').replace('/', '-')
name = model_name.strip().replace(" ", "-").replace("/", "-")
else:
name = "ggml-model"
parameters = f"-{size_label}" if size_label is not None else ""
finetune = f"-{finetune_string.strip().replace(' ', '-')}" if finetune_string is not None else ""
finetune = (
f"-{finetune_string.strip().replace(' ', '-')}"
if finetune_string is not None
else ""
)
version = f"-{version_string.strip().replace(' ', '-')}" if version_string is not None else ""
version = (
f"-{version_string.strip().replace(' ', '-')}"
if version_string is not None
else ""
)
encoding = f"-{output_type.strip().replace(' ', '-').upper()}" if output_type is not None else ""
encoding = (
f"-{output_type.strip().replace(' ', '-').upper()}"
if output_type is not None
else ""
)
kind = f"-{model_type.strip().replace(' ', '-')}" if model_type is not None else ""

View File

@ -5,7 +5,16 @@
import json
import os
from pathlib import Path
from typing import Any, Callable, Sequence, Mapping, Iterable, Protocol, ClassVar, runtime_checkable
from typing import (
Any,
Callable,
Sequence,
Mapping,
Iterable,
Protocol,
ClassVar,
runtime_checkable,
)
from sentencepiece import SentencePieceProcessor
@ -23,7 +32,9 @@ class SpecialVocab:
chat_template: str | Sequence[Mapping[str, str]] | None
def __init__(
self, path: str | os.PathLike[str], load_merges: bool = False,
self,
path: str | os.PathLike[str],
load_merges: bool = False,
special_token_types: Iterable[str] | None = None,
n_vocab: int | None = None,
):
@ -36,40 +47,60 @@ def __init__(
if special_token_types is not None:
self.special_token_types = special_token_types
else:
self.special_token_types = ('bos', 'eos', 'unk', 'sep', 'pad', 'cls', 'mask')
self.special_token_types = (
"bos",
"eos",
"unk",
"sep",
"pad",
"cls",
"mask",
)
self._load(Path(path))
def __repr__(self) -> str:
return '<SpecialVocab with {} merges, special tokens {}, add special tokens {}>'.format(
len(self.merges), self.special_token_ids or "unset", self.add_special_token or "unset",
return "<SpecialVocab with {} merges, special tokens {}, add special tokens {}>".format(
len(self.merges),
self.special_token_ids or "unset",
self.add_special_token or "unset",
)
def add_to_gguf(self, gw: GGUFWriter, quiet: bool = False) -> None:
if self.merges:
if not quiet:
logger.info(f'Adding {len(self.merges)} merge(s).')
logger.info(f"Adding {len(self.merges)} merge(s).")
gw.add_token_merges(self.merges)
elif self.load_merges:
logger.warning('Adding merges requested but no merges found, output may be non-functional.')
logger.warning(
"Adding merges requested but no merges found, output may be non-functional."
)
for typ, tokid in self.special_token_ids.items():
id_handler: Callable[[int], None] | None = getattr(gw, f'add_{typ}_token_id', None)
id_handler: Callable[[int], None] | None = getattr(
gw, f"add_{typ}_token_id", None
)
if id_handler is None:
logger.warning(f'No handler for special token type {typ} with id {tokid} - skipping')
logger.warning(
f"No handler for special token type {typ} with id {tokid} - skipping"
)
continue
if not quiet:
logger.info(f'Setting special token type {typ} to {tokid}')
logger.info(f"Setting special token type {typ} to {tokid}")
id_handler(tokid)
for typ, value in self.add_special_token.items():
add_handler: Callable[[bool], None] | None = getattr(gw, f'add_add_{typ}_token', None)
add_handler: Callable[[bool], None] | None = getattr(
gw, f"add_add_{typ}_token", None
)
if add_handler is None:
logger.warning(f'No handler for add_{typ}_token with value {value} - skipping')
logger.warning(
f"No handler for add_{typ}_token with value {value} - skipping"
)
continue
if not quiet:
logger.info(f'Setting add_{typ}_token to {value}')
logger.info(f"Setting add_{typ}_token to {value}")
add_handler(value)
if self.chat_template is not None:
if not quiet:
logger.info(f'Setting chat_template to {self.chat_template}')
logger.info(f"Setting chat_template to {self.chat_template}")
gw.add_chat_template(self.chat_template)
def _load(self, path: Path) -> None:
@ -79,12 +110,12 @@ def _load(self, path: Path) -> None:
self._try_load_merges_txt(path)
def _try_load_merges_txt(self, path: Path) -> bool:
merges_file = path / 'merges.txt'
merges_file = path / "merges.txt"
if not merges_file.is_file():
return False
with open(merges_file, 'r', encoding = 'utf-8') as fp:
first_line = next(fp, '').strip()
if not first_line.startswith('#'):
with open(merges_file, "r", encoding="utf-8") as fp:
first_line = next(fp, "").strip()
if not first_line.startswith("#"):
fp.seek(0)
line_num = 0
else:
@ -97,9 +128,11 @@ def _try_load_merges_txt(self, path: Path) -> bool:
continue
parts = line.split(None, 3)
if len(parts) != 2:
logger.warning(f'{merges_file.name}: Line {line_num}: Entry malformed, ignoring')
logger.warning(
f"{merges_file.name}: Line {line_num}: Entry malformed, ignoring"
)
continue
merges.append(f'{parts[0]} {parts[1]}')
merges.append(f"{parts[0]} {parts[1]}")
self.merges = merges
return True
@ -107,45 +140,49 @@ def _set_special_token(self, typ: str, tid: Any) -> None:
if not isinstance(tid, int):
return
if tid < 0:
raise ValueError(f'invalid value for special token type {typ}: {tid}')
raise ValueError(f"invalid value for special token type {typ}: {tid}")
if self.n_vocab is None or tid < self.n_vocab:
if typ in self.special_token_ids:
return
self.special_token_ids[typ] = tid
return
logger.warning(f'Special token type {typ}, id {tid} out of range, must be under {self.n_vocab} - skipping')
logger.warning(
f"Special token type {typ}, id {tid} out of range, must be under {self.n_vocab} - skipping"
)
def _try_load_from_tokenizer_json(self, path: Path) -> bool:
tokenizer_file = path / 'tokenizer.json'
tokenizer_file = path / "tokenizer.json"
if tokenizer_file.is_file():
with open(tokenizer_file, encoding = 'utf-8') as f:
with open(tokenizer_file, encoding="utf-8") as f:
tokenizer = json.load(f)
if self.load_merges:
merges = tokenizer.get('model', {}).get('merges')
merges = tokenizer.get("model", {}).get("merges")
if isinstance(merges, list) and merges and isinstance(merges[0], str):
self.merges = merges
added_tokens = tokenizer.get('added_tokens', {})
added_tokens = tokenizer.get("added_tokens", {})
else:
added_tokens = {}
tokenizer_config_file = path / 'tokenizer_config.json'
tokenizer_config_file = path / "tokenizer_config.json"
if not tokenizer_config_file.is_file():
return True
with open(tokenizer_config_file, encoding = 'utf-8') as f:
with open(tokenizer_config_file, encoding="utf-8") as f:
tokenizer_config = json.load(f)
chat_template = tokenizer_config.get('chat_template')
chat_template = tokenizer_config.get("chat_template")
if chat_template is None or isinstance(chat_template, (str, list)):
self.chat_template = chat_template
else:
logger.warning(f'Bad type for chat_template field in {tokenizer_config_file!r} - ignoring')
logger.warning(
f"Bad type for chat_template field in {tokenizer_config_file!r} - ignoring"
)
for typ in self.special_token_types:
add_entry = tokenizer_config.get(f'add_{typ}_token')
add_entry = tokenizer_config.get(f"add_{typ}_token")
if isinstance(add_entry, bool):
self.add_special_token[typ] = add_entry
entry = tokenizer_config.get(f'{typ}_token')
entry = tokenizer_config.get(f"{typ}_token")
if isinstance(entry, str):
tc_content = entry
elif isinstance(entry, dict):
entry_content = entry.get('content')
entry_content = entry.get("content")
if not isinstance(entry_content, str):
continue
tc_content = entry_content
@ -153,20 +190,24 @@ def _try_load_from_tokenizer_json(self, path: Path) -> bool:
continue
# We only need the first match here.
maybe_token_id = next(
(atok.get('id') for atok in added_tokens if atok.get('content') == tc_content),
(
atok.get("id")
for atok in added_tokens
if atok.get("content") == tc_content
),
None,
)
self._set_special_token(typ, maybe_token_id)
return True
def _try_load_from_config_json(self, path: Path) -> bool:
config_file = path / 'config.json'
config_file = path / "config.json"
if not config_file.is_file():
return False
with open(config_file, encoding = 'utf-8') as f:
with open(config_file, encoding="utf-8") as f:
config = json.load(f)
for typ in self.special_token_types:
self._set_special_token(typ, config.get(f'{typ}_token_id'))
self._set_special_token(typ, config.get(f"{typ}_token_id"))
return True
@ -202,47 +243,52 @@ class BpeVocab(Vocab):
def __init__(self, base_path: Path):
added_tokens: dict[str, int] = {}
if (fname_tokenizer := base_path / 'vocab.json').exists():
if (fname_tokenizer := base_path / "vocab.json").exists():
# "slow" tokenizer
with open(fname_tokenizer, encoding="utf-8") as f:
self.vocab = json.load(f)
try:
# FIXME: Verify that added tokens here _cannot_ overlap with the main vocab.
with open(base_path / 'added_tokens.json', encoding="utf-8") as f:
with open(base_path / "added_tokens.json", encoding="utf-8") as f:
added_tokens = json.load(f)
except FileNotFoundError:
pass
else:
# "fast" tokenizer
fname_tokenizer = base_path / 'tokenizer.json'
fname_tokenizer = base_path / "tokenizer.json"
# if this fails, FileNotFoundError propagates to caller
with open(fname_tokenizer, encoding="utf-8") as f:
tokenizer_json = json.load(f)
tokenizer_model: dict[str, Any] = tokenizer_json['model']
tokenizer_model: dict[str, Any] = tokenizer_json["model"]
if (
tokenizer_model['type'] != 'BPE' or tokenizer_model.get('byte_fallback', False)
or tokenizer_json['decoder']['type'] != 'ByteLevel'
tokenizer_model["type"] != "BPE"
or tokenizer_model.get("byte_fallback", False)
or tokenizer_json["decoder"]["type"] != "ByteLevel"
):
raise FileNotFoundError('Cannot find GPT-2 BPE tokenizer')
raise FileNotFoundError("Cannot find GPT-2 BPE tokenizer")
self.vocab = tokenizer_model["vocab"]
if (added := tokenizer_json.get('added_tokens')) is not None:
if (added := tokenizer_json.get("added_tokens")) is not None:
# Added tokens here can be duplicates of the main vocabulary.
added_tokens = {item['content']: item['id']
added_tokens = {
item["content"]: item["id"]
for item in added
if item['content'] not in self.vocab}
if item["content"] not in self.vocab
}
vocab_size = len(self.vocab)
expected_ids = list(range(vocab_size, vocab_size + len(added_tokens)))
actual_ids = sorted(added_tokens.values())
if expected_ids != actual_ids:
expected_end_id = vocab_size + len(actual_ids) - 1
raise ValueError(f"Expected the {len(actual_ids)} added token ID(s) to be sequential in the range "
f"{vocab_size} - {expected_end_id}; got {actual_ids}")
raise ValueError(
f"Expected the {len(actual_ids)} added token ID(s) to be sequential in the range "
f"{vocab_size} - {expected_end_id}; got {actual_ids}"
)
items = sorted(added_tokens.items(), key=lambda text_idx: text_idx[1])
self.added_tokens_dict = added_tokens
@ -276,27 +322,31 @@ class SentencePieceVocab(Vocab):
def __init__(self, base_path: Path):
added_tokens: dict[str, int] = {}
if (fname_tokenizer := base_path / 'tokenizer.model').exists():
if (fname_tokenizer := base_path / "tokenizer.model").exists():
# normal location
try:
with open(base_path / 'added_tokens.json', encoding="utf-8") as f:
with open(base_path / "added_tokens.json", encoding="utf-8") as f:
added_tokens = json.load(f)
except FileNotFoundError:
pass
elif not (fname_tokenizer := base_path.parent / 'tokenizer.model').exists():
elif not (fname_tokenizer := base_path.parent / "tokenizer.model").exists():
# not found in alternate location either
raise FileNotFoundError('Cannot find tokenizer.model')
raise FileNotFoundError("Cannot find tokenizer.model")
self.sentencepiece_tokenizer = SentencePieceProcessor()
self.sentencepiece_tokenizer.LoadFromFile(str(fname_tokenizer))
vocab_size = self.sentencepiece_tokenizer.vocab_size()
new_tokens = {id: piece for piece, id in added_tokens.items() if id >= vocab_size}
new_tokens = {
id: piece for piece, id in added_tokens.items() if id >= vocab_size
}
expected_new_ids = list(range(vocab_size, vocab_size + len(new_tokens)))
actual_new_ids = sorted(new_tokens.keys())
if expected_new_ids != actual_new_ids:
raise ValueError(f"Expected new token IDs {expected_new_ids} to be sequential; got {actual_new_ids}")
raise ValueError(
f"Expected new token IDs {expected_new_ids} to be sequential; got {actual_new_ids}"
)
# Token pieces that were added to the base vocabulary.
self.added_tokens_dict = added_tokens
@ -347,25 +397,27 @@ class LlamaHfVocab(Vocab):
name = "hfft"
def __init__(self, base_path: Path):
fname_tokenizer = base_path / 'tokenizer.json'
fname_tokenizer = base_path / "tokenizer.json"
# if this fails, FileNotFoundError propagates to caller
with open(fname_tokenizer, encoding='utf-8') as f:
with open(fname_tokenizer, encoding="utf-8") as f:
tokenizer_json = json.load(f)
# pre-check so we know if we need transformers
tokenizer_model: dict[str, Any] = tokenizer_json['model']
tokenizer_model: dict[str, Any] = tokenizer_json["model"]
is_llama3 = (
tokenizer_model['type'] == 'BPE' and tokenizer_model.get('ignore_merges', False)
and not tokenizer_model.get('byte_fallback', True)
tokenizer_model["type"] == "BPE"
and tokenizer_model.get("ignore_merges", False)
and not tokenizer_model.get("byte_fallback", True)
)
if is_llama3:
raise TypeError('Llama 3 must be converted with BpeVocab')
raise TypeError("Llama 3 must be converted with BpeVocab")
if not is_llama3 and (
tokenizer_model['type'] != 'BPE' or not tokenizer_model.get('byte_fallback', False)
or tokenizer_json['decoder']['type'] != 'Sequence'
tokenizer_model["type"] != "BPE"
or not tokenizer_model.get("byte_fallback", False)
or tokenizer_json["decoder"]["type"] != "Sequence"
):
raise FileNotFoundError('Cannot find Llama BPE tokenizer')
raise FileNotFoundError("Cannot find Llama BPE tokenizer")
try:
from transformers import AutoTokenizer
@ -427,16 +479,22 @@ def hf_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
# Yield token text, score, and type
yield token_text, self.get_token_score(token_id), self.get_token_type(
token_id, token_text, self.special_ids # Reuse already stored special IDs
token_id,
token_text,
self.special_ids, # Reuse already stored special IDs
)
def get_token_type(self, token_id: int, token_text: bytes, special_ids: set[int]) -> gguf.TokenType:
def get_token_type(
self, token_id: int, token_text: bytes, special_ids: set[int]
) -> gguf.TokenType:
# Special case for byte tokens
if re.fullmatch(br"<0x[0-9A-Fa-f]{2}>", token_text):
if re.fullmatch(rb"<0x[0-9A-Fa-f]{2}>", token_text):
return gguf.TokenType.BYTE
# Determine token type based on whether it's a special token
return gguf.TokenType.CONTROL if token_id in special_ids else gguf.TokenType.NORMAL
return (
gguf.TokenType.CONTROL if token_id in special_ids else gguf.TokenType.NORMAL
)
def get_token_score(self, token_id: int) -> float:
# Placeholder for actual logic to determine the token's score
@ -446,7 +504,9 @@ def get_token_score(self, token_id: int) -> float:
def added_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
for text in self.added_tokens_list:
if text in self.specials:
toktype = self.get_token_type(self.specials[text], b'', self.special_ids)
toktype = self.get_token_type(
self.specials[text], b"", self.special_ids
)
score = self.get_token_score(self.specials[text])
else:
toktype = gguf.TokenType.USER_DEFINED

View File

@ -9,25 +9,52 @@
import requests
import zipfile
from datetime import datetime
from PyQt6.QtWidgets import (QApplication, QMainWindow, QVBoxLayout, QHBoxLayout, QWidget, QPushButton,
QListWidget, QLineEdit, QLabel, QFileDialog, QProgressBar, QComboBox, QTextEdit,
QCheckBox, QGroupBox, QFormLayout, QScrollArea, QSlider, QSpinBox, QListWidgetItem,
QMessageBox, QDialog, QPlainTextEdit, QMenu)
from PyQt6.QtWidgets import (
QApplication,
QMainWindow,
QVBoxLayout,
QHBoxLayout,
QWidget,
QPushButton,
QListWidget,
QLineEdit,
QLabel,
QFileDialog,
QProgressBar,
QComboBox,
QTextEdit,
QCheckBox,
QGroupBox,
QFormLayout,
QScrollArea,
QSlider,
QSpinBox,
QListWidgetItem,
QMessageBox,
QDialog,
QPlainTextEdit,
QMenu,
)
from PyQt6.QtCore import QTimer, QThread, pyqtSignal, Qt, QSize
from PyQt6.QtGui import QCloseEvent, QAction
def ensure_directory(path):
if not os.path.exists(path):
os.makedirs(path)
def open_file_safe(file_path, mode='r'):
encodings = ['utf-8', 'latin-1', 'ascii', 'utf-16']
def open_file_safe(file_path, mode="r"):
encodings = ["utf-8", "latin-1", "ascii", "utf-16"]
for encoding in encodings:
try:
return open(file_path, mode, encoding=encoding)
except UnicodeDecodeError:
continue
raise ValueError(f"Unable to open file {file_path} with any of the encodings: {encodings}")
raise ValueError(
f"Unable to open file {file_path} with any of the encodings: {encodings}"
)
def resource_path(relative_path):
try:

File diff suppressed because it is too large Load Diff