mirror of https://github.com/leafspark/AutoGGUF
style: format code with Black
This commit is contained in:
parent
2dc5bd9e8a
commit
fa51f7cdb8
|
@ -452,8 +452,13 @@ def __init__(self):
|
|||
# Output Type Dropdown
|
||||
self.lora_output_type_combo = QComboBox()
|
||||
self.lora_output_type_combo.addItems(["GGML", "GGUF"])
|
||||
self.lora_output_type_combo.currentIndexChanged.connect(self.update_base_model_visibility)
|
||||
lora_layout.addRow(self.create_label(OUTPUT_TYPE, SELECT_OUTPUT_TYPE), self.lora_output_type_combo)
|
||||
self.lora_output_type_combo.currentIndexChanged.connect(
|
||||
self.update_base_model_visibility
|
||||
)
|
||||
lora_layout.addRow(
|
||||
self.create_label(OUTPUT_TYPE, SELECT_OUTPUT_TYPE),
|
||||
self.lora_output_type_combo,
|
||||
)
|
||||
|
||||
# Base Model Path (initially hidden)
|
||||
self.base_model_label = self.create_label(BASE_MODEL, SELECT_BASE_MODEL_FILE)
|
||||
|
@ -471,7 +476,9 @@ def __init__(self):
|
|||
wrapper_layout = QHBoxLayout(self.base_model_wrapper)
|
||||
wrapper_layout.addWidget(self.base_model_label)
|
||||
wrapper_layout.addWidget(self.base_model_widget, 1) # Give it a stretch factor
|
||||
wrapper_layout.setContentsMargins(0, 0, 0, 0) # Remove margins for better alignment
|
||||
wrapper_layout.setContentsMargins(
|
||||
0, 0, 0, 0
|
||||
) # Remove margins for better alignment
|
||||
|
||||
# Add the wrapper to the layout
|
||||
lora_layout.addRow(self.base_model_wrapper)
|
||||
|
@ -1395,7 +1402,7 @@ def quantize_model(self):
|
|||
override_string = entry.get_override_string(
|
||||
model_name=model_name,
|
||||
quant_type=quant_type,
|
||||
output_path=output_path
|
||||
output_path=output_path,
|
||||
)
|
||||
if override_string:
|
||||
command.extend(["--override-kv", override_string])
|
||||
|
@ -1430,7 +1437,9 @@ def quantize_model(self):
|
|||
self.task_list.setItemWidget(list_item, task_item)
|
||||
|
||||
# Connect the output signal to the new progress parsing function
|
||||
thread.output_signal.connect(lambda line: self.parse_progress(line, task_item))
|
||||
thread.output_signal.connect(
|
||||
lambda line: self.parse_progress(line, task_item)
|
||||
)
|
||||
thread.status_signal.connect(task_item.update_status)
|
||||
thread.finished_signal.connect(lambda: self.task_finished(thread))
|
||||
thread.error_signal.connect(lambda err: self.handle_error(err, task_item))
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
import zipfile
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
class DownloadThread(QThread):
|
||||
progress_signal = pyqtSignal(int)
|
||||
finished_signal = pyqtSignal(str)
|
||||
|
@ -27,11 +28,11 @@ def run(self):
|
|||
try:
|
||||
response = requests.get(self.url, stream=True)
|
||||
response.raise_for_status()
|
||||
total_size = int(response.headers.get('content-length', 0))
|
||||
total_size = int(response.headers.get("content-length", 0))
|
||||
block_size = 8192
|
||||
downloaded = 0
|
||||
|
||||
with open(self.save_path, 'wb') as file:
|
||||
with open(self.save_path, "wb") as file:
|
||||
for data in response.iter_content(block_size):
|
||||
size = file.write(data)
|
||||
downloaded += size
|
||||
|
@ -41,7 +42,7 @@ def run(self):
|
|||
|
||||
# Extract the downloaded zip file
|
||||
extract_dir = os.path.splitext(self.save_path)[0]
|
||||
with zipfile.ZipFile(self.save_path, 'r') as zip_ref:
|
||||
with zipfile.ZipFile(self.save_path, "r") as zip_ref:
|
||||
zip_ref.extractall(extract_dir)
|
||||
|
||||
# Remove the zip file after extraction
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
import socket
|
||||
import platform
|
||||
|
||||
|
||||
class KVOverrideEntry(QWidget):
|
||||
deleted = pyqtSignal(QWidget)
|
||||
|
||||
|
@ -44,7 +45,9 @@ def __init__(self, parent=None):
|
|||
def delete_clicked(self):
|
||||
self.deleted.emit(self)
|
||||
|
||||
def get_override_string(self, model_name=None, quant_type=None, output_path=None): # Add arguments
|
||||
def get_override_string(
|
||||
self, model_name=None, quant_type=None, output_path=None
|
||||
): # Add arguments
|
||||
key = self.key_input.text()
|
||||
type_ = self.type_combo.currentText()
|
||||
value = self.value_input.text()
|
||||
|
@ -60,9 +63,15 @@ def get_override_string(self, model_name=None, quant_type=None, output_path=None
|
|||
"{system.python.version}": lambda: platform.python_version(),
|
||||
"{system.time.milliseconds}": lambda: str(int(time.time() * 1000)),
|
||||
"{system.date}": lambda: datetime.now().strftime("%Y-%m-%d"),
|
||||
"{model.name}": lambda: model_name if model_name is not None else "Unknown Model",
|
||||
"{quant.type}": lambda: quant_type if quant_type is not None else "Unknown Quant",
|
||||
"{output.path}": lambda: output_path if output_path is not None else "Unknown Output Path",
|
||||
"{model.name}": lambda: (
|
||||
model_name if model_name is not None else "Unknown Model"
|
||||
),
|
||||
"{quant.type}": lambda: (
|
||||
quant_type if quant_type is not None else "Unknown Quant"
|
||||
),
|
||||
"{output.path}": lambda: (
|
||||
output_path if output_path is not None else "Unknown Output Path"
|
||||
),
|
||||
}
|
||||
|
||||
for param, func in dynamic_params.items():
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
import sys
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
class Logger:
|
||||
def __init__(self, name, log_dir):
|
||||
self.logger = logging.getLogger(name)
|
||||
|
@ -15,15 +16,19 @@ def __init__(self, name, log_dir):
|
|||
# Console handler
|
||||
console_handler = logging.StreamHandler()
|
||||
console_handler.setLevel(logging.INFO)
|
||||
console_format = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
|
||||
console_format = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
|
||||
console_handler.setFormatter(console_format)
|
||||
|
||||
# File handler
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
log_file = os.path.join(log_dir, f"latest_{timestamp}.log")
|
||||
file_handler = RotatingFileHandler(log_file, maxBytes=10*1024*1024, backupCount=5, encoding='utf-8')
|
||||
file_handler = RotatingFileHandler(
|
||||
log_file, maxBytes=10 * 1024 * 1024, backupCount=5, encoding="utf-8"
|
||||
)
|
||||
file_handler.setLevel(logging.DEBUG)
|
||||
file_format = logging.Formatter('%(asctime)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s')
|
||||
file_format = logging.Formatter(
|
||||
"%(asctime)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s"
|
||||
)
|
||||
file_handler.setFormatter(file_format)
|
||||
|
||||
# Add handlers to logger
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
import zipfile
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
class ModelInfoDialog(QDialog):
|
||||
def __init__(self, model_info, parent=None):
|
||||
super().__init__(parent)
|
||||
|
@ -41,8 +42,7 @@ def format_model_info(self, model_info):
|
|||
html += f"<p><b>Tensors:</b> {model_info.get('tensors', 'N/A')}</p>"
|
||||
|
||||
html += "<h3>Key-Value Pairs:</h3>"
|
||||
for key, value in model_info.get('kv_data', {}).items():
|
||||
for key, value in model_info.get("kv_data", {}).items():
|
||||
html += f"<p><b>{key}:</b> {value}</p>"
|
||||
|
||||
return html
|
||||
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
from datetime import datetime
|
||||
from imports_and_globals import open_file_safe
|
||||
|
||||
|
||||
class QuantizationThread(QThread):
|
||||
# Define custom signals for communication with the main thread
|
||||
output_signal = pyqtSignal(str)
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
import zipfile
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
class TaskListItem(QWidget):
|
||||
def __init__(self, task_name, log_file, show_progress_bar=True, parent=None):
|
||||
super().__init__(parent)
|
||||
|
|
|
@ -12,8 +12,8 @@
|
|||
import numpy as np
|
||||
import torch
|
||||
|
||||
if 'NO_LOCAL_GGUF' not in os.environ:
|
||||
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
|
||||
if "NO_LOCAL_GGUF" not in os.environ:
|
||||
sys.path.insert(1, str(Path(__file__).parent / "gguf-py" / "gguf"))
|
||||
import gguf
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
|
@ -35,7 +35,9 @@ def write_file_header(fout: BinaryIO, params: dict[str, Any]) -> None:
|
|||
fout.write(struct.pack("i", int(params["lora_alpha"])))
|
||||
|
||||
|
||||
def write_tensor_header(fout: BinaryIO, name: str, shape: Sequence[int], data_type: np.dtype[Any]) -> None:
|
||||
def write_tensor_header(
|
||||
fout: BinaryIO, name: str, shape: Sequence[int], data_type: np.dtype[Any]
|
||||
) -> None:
|
||||
sname = name.encode("utf-8")
|
||||
fout.write(
|
||||
struct.pack(
|
||||
|
@ -49,15 +51,21 @@ def write_tensor_header(fout: BinaryIO, name: str, shape: Sequence[int], data_ty
|
|||
fout.write(sname)
|
||||
fout.seek((fout.tell() + 31) & -32)
|
||||
|
||||
|
||||
def pyinstaller_include():
|
||||
# PyInstaller import
|
||||
pass
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) < 2:
|
||||
logger.info(f"Usage: python {sys.argv[0]} <path> [arch]")
|
||||
logger.info("Path must contain HuggingFace PEFT LoRA files 'adapter_config.json' and 'adapter_model.bin'")
|
||||
logger.info(f"Arch must be one of {list(gguf.MODEL_ARCH_NAMES.values())} (default: llama)")
|
||||
logger.info(
|
||||
"Path must contain HuggingFace PEFT LoRA files 'adapter_config.json' and 'adapter_model.bin'"
|
||||
)
|
||||
logger.info(
|
||||
f"Arch must be one of {list(gguf.MODEL_ARCH_NAMES.values())} (default: llama)"
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
input_json = os.path.join(sys.argv[1], "adapter_config.json")
|
||||
|
@ -70,6 +78,7 @@ def pyinstaller_include():
|
|||
input_model = os.path.join(sys.argv[1], "adapter_model.safetensors")
|
||||
# lazy import load_file only if lora is in safetensors format.
|
||||
from safetensors.torch import load_file
|
||||
|
||||
model = load_file(input_model, device="cpu")
|
||||
|
||||
arch_name = sys.argv[2] if len(sys.argv) == 3 else "llama"
|
||||
|
@ -78,14 +87,18 @@ def pyinstaller_include():
|
|||
logger.error(f"Error: unsupported architecture {arch_name}")
|
||||
sys.exit(1)
|
||||
|
||||
arch = list(gguf.MODEL_ARCH_NAMES.keys())[list(gguf.MODEL_ARCH_NAMES.values()).index(arch_name)]
|
||||
name_map = gguf.TensorNameMap(arch, 200) # 200 layers ought to be enough for anyone
|
||||
arch = list(gguf.MODEL_ARCH_NAMES.keys())[
|
||||
list(gguf.MODEL_ARCH_NAMES.values()).index(arch_name)
|
||||
]
|
||||
name_map = gguf.TensorNameMap(arch, 200) # 200 layers ought to be enough for anyone
|
||||
|
||||
with open(input_json, "r") as f:
|
||||
params = json.load(f)
|
||||
|
||||
if params["peft_type"] != "LORA":
|
||||
logger.error(f"Error: unsupported adapter type {params['peft_type']}, expected LORA")
|
||||
logger.error(
|
||||
f"Error: unsupported adapter type {params['peft_type']}, expected LORA"
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
if params["fan_in_fan_out"] is True:
|
||||
|
@ -127,7 +140,7 @@ def pyinstaller_include():
|
|||
|
||||
lora_suffixes = (".lora_A.weight", ".lora_B.weight")
|
||||
if k.endswith(lora_suffixes):
|
||||
suffix = k[-len(lora_suffixes[0]):]
|
||||
suffix = k[-len(lora_suffixes[0]) :]
|
||||
k = k[: -len(lora_suffixes[0])]
|
||||
else:
|
||||
logger.error(f"Error: unrecognized tensor name {orig_k}")
|
||||
|
@ -136,7 +149,9 @@ def pyinstaller_include():
|
|||
tname = name_map.get_name(k)
|
||||
if tname is None:
|
||||
logger.error(f"Error: could not map tensor name {orig_k}")
|
||||
logger.error(" Note: the arch parameter must be specified if the model is not llama")
|
||||
logger.error(
|
||||
" Note: the arch parameter must be specified if the model is not llama"
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
if suffix == ".lora_A.weight":
|
||||
|
@ -146,7 +161,9 @@ def pyinstaller_include():
|
|||
else:
|
||||
assert False
|
||||
|
||||
logger.info(f"{k} => {tname} {t.shape} {t.dtype} {t.nbytes/1024/1024:.2f}MB")
|
||||
logger.info(
|
||||
f"{k} => {tname} {t.shape} {t.dtype} {t.nbytes/1024/1024:.2f}MB"
|
||||
)
|
||||
write_tensor_header(fout, tname, t.shape, t.dtype)
|
||||
t.tofile(fout)
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -67,32 +67,34 @@ class ReaderTensor(NamedTuple):
|
|||
|
||||
class GGUFReader:
|
||||
# I - same as host, S - swapped
|
||||
byte_order: Literal['I', 'S'] = 'I'
|
||||
byte_order: Literal["I", "S"] = "I"
|
||||
alignment: int = GGUF_DEFAULT_ALIGNMENT
|
||||
data_offset: int
|
||||
|
||||
# Note: Internal helper, API may change.
|
||||
gguf_scalar_to_np: dict[GGUFValueType, type[np.generic]] = {
|
||||
GGUFValueType.UINT8: np.uint8,
|
||||
GGUFValueType.INT8: np.int8,
|
||||
GGUFValueType.UINT16: np.uint16,
|
||||
GGUFValueType.INT16: np.int16,
|
||||
GGUFValueType.UINT32: np.uint32,
|
||||
GGUFValueType.INT32: np.int32,
|
||||
GGUFValueType.UINT8: np.uint8,
|
||||
GGUFValueType.INT8: np.int8,
|
||||
GGUFValueType.UINT16: np.uint16,
|
||||
GGUFValueType.INT16: np.int16,
|
||||
GGUFValueType.UINT32: np.uint32,
|
||||
GGUFValueType.INT32: np.int32,
|
||||
GGUFValueType.FLOAT32: np.float32,
|
||||
GGUFValueType.UINT64: np.uint64,
|
||||
GGUFValueType.INT64: np.int64,
|
||||
GGUFValueType.UINT64: np.uint64,
|
||||
GGUFValueType.INT64: np.int64,
|
||||
GGUFValueType.FLOAT64: np.float64,
|
||||
GGUFValueType.BOOL: np.bool_,
|
||||
GGUFValueType.BOOL: np.bool_,
|
||||
}
|
||||
|
||||
def __init__(self, path: os.PathLike[str] | str, mode: Literal['r', 'r+', 'c'] = 'r'):
|
||||
self.data = np.memmap(path, mode = mode)
|
||||
def __init__(
|
||||
self, path: os.PathLike[str] | str, mode: Literal["r", "r+", "c"] = "r"
|
||||
):
|
||||
self.data = np.memmap(path, mode=mode)
|
||||
offs = 0
|
||||
|
||||
# Check for GGUF magic
|
||||
if self._get(offs, np.uint32, override_order = '<')[0] != GGUF_MAGIC:
|
||||
raise ValueError('GGUF magic invalid')
|
||||
if self._get(offs, np.uint32, override_order="<")[0] != GGUF_MAGIC:
|
||||
raise ValueError("GGUF magic invalid")
|
||||
offs += 4
|
||||
|
||||
# Check GGUF version
|
||||
|
@ -100,28 +102,46 @@ def __init__(self, path: os.PathLike[str] | str, mode: Literal['r', 'r+', 'c'] =
|
|||
if temp_version[0] & 65535 == 0:
|
||||
# If we get 0 here that means it's (probably) a GGUF file created for
|
||||
# the opposite byte order of the machine this script is running on.
|
||||
self.byte_order = 'S'
|
||||
self.byte_order = "S"
|
||||
temp_version = temp_version.newbyteorder(self.byte_order)
|
||||
version = temp_version[0]
|
||||
if version not in READER_SUPPORTED_VERSIONS:
|
||||
raise ValueError(f'Sorry, file appears to be version {version} which we cannot handle')
|
||||
raise ValueError(
|
||||
f"Sorry, file appears to be version {version} which we cannot handle"
|
||||
)
|
||||
self.fields: OrderedDict[str, ReaderField] = OrderedDict()
|
||||
self.tensors: list[ReaderTensor] = []
|
||||
offs += self._push_field(ReaderField(offs, 'GGUF.version', [temp_version], [0], [GGUFValueType.UINT32]))
|
||||
offs += self._push_field(
|
||||
ReaderField(
|
||||
offs, "GGUF.version", [temp_version], [0], [GGUFValueType.UINT32]
|
||||
)
|
||||
)
|
||||
|
||||
# Check tensor count and kv count
|
||||
temp_counts = self._get(offs, np.uint64, 2)
|
||||
offs += self._push_field(ReaderField(offs, 'GGUF.tensor_count', [temp_counts[:1]], [0], [GGUFValueType.UINT64]))
|
||||
offs += self._push_field(ReaderField(offs, 'GGUF.kv_count', [temp_counts[1:]], [0], [GGUFValueType.UINT64]))
|
||||
offs += self._push_field(
|
||||
ReaderField(
|
||||
offs,
|
||||
"GGUF.tensor_count",
|
||||
[temp_counts[:1]],
|
||||
[0],
|
||||
[GGUFValueType.UINT64],
|
||||
)
|
||||
)
|
||||
offs += self._push_field(
|
||||
ReaderField(
|
||||
offs, "GGUF.kv_count", [temp_counts[1:]], [0], [GGUFValueType.UINT64]
|
||||
)
|
||||
)
|
||||
tensor_count, kv_count = temp_counts
|
||||
offs = self._build_fields(offs, kv_count)
|
||||
|
||||
# Build Tensor Info Fields
|
||||
offs, tensors_fields = self._build_tensor_info(offs, tensor_count)
|
||||
new_align = self.fields.get('general.alignment')
|
||||
new_align = self.fields.get("general.alignment")
|
||||
if new_align is not None:
|
||||
if new_align.types != [GGUFValueType.UINT32]:
|
||||
raise ValueError('Bad type for general.alignment field')
|
||||
raise ValueError("Bad type for general.alignment field")
|
||||
self.alignment = new_align.parts[-1][0]
|
||||
padding = offs % self.alignment
|
||||
if padding != 0:
|
||||
|
@ -129,7 +149,7 @@ def __init__(self, path: os.PathLike[str] | str, mode: Literal['r', 'r+', 'c'] =
|
|||
self.data_offset = offs
|
||||
self._build_tensors(offs, tensors_fields)
|
||||
|
||||
_DT = TypeVar('_DT', bound = npt.DTypeLike)
|
||||
_DT = TypeVar("_DT", bound=npt.DTypeLike)
|
||||
|
||||
# Fetch a key/value metadata field by key.
|
||||
def get_field(self, key: str) -> Union[ReaderField, None]:
|
||||
|
@ -140,14 +160,18 @@ def get_tensor(self, idx: int) -> ReaderTensor:
|
|||
return self.tensors[idx]
|
||||
|
||||
def _get(
|
||||
self, offset: int, dtype: npt.DTypeLike, count: int = 1, override_order: None | Literal['I', 'S', '<'] = None,
|
||||
self,
|
||||
offset: int,
|
||||
dtype: npt.DTypeLike,
|
||||
count: int = 1,
|
||||
override_order: None | Literal["I", "S", "<"] = None,
|
||||
) -> npt.NDArray[Any]:
|
||||
count = int(count)
|
||||
itemsize = int(np.empty([], dtype = dtype).itemsize)
|
||||
itemsize = int(np.empty([], dtype=dtype).itemsize)
|
||||
end_offs = offset + itemsize * count
|
||||
return (
|
||||
self.data[offset:end_offs]
|
||||
.view(dtype = dtype)[:count]
|
||||
.view(dtype=dtype)[:count]
|
||||
.newbyteorder(override_order or self.byte_order)
|
||||
)
|
||||
|
||||
|
@ -156,18 +180,22 @@ def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int:
|
|||
# TODO: add option to generate error on duplicate keys
|
||||
# raise KeyError(f'Duplicate {field.name} already in list at offset {field.offset}')
|
||||
|
||||
logger.warning(f'Duplicate key {field.name} at offset {field.offset}')
|
||||
self.fields[field.name + '_{}'.format(field.offset)] = field
|
||||
logger.warning(f"Duplicate key {field.name} at offset {field.offset}")
|
||||
self.fields[field.name + "_{}".format(field.offset)] = field
|
||||
else:
|
||||
self.fields[field.name] = field
|
||||
return 0 if skip_sum else sum(int(part.nbytes) for part in field.parts)
|
||||
|
||||
def _get_str(self, offset: int) -> tuple[npt.NDArray[np.uint64], npt.NDArray[np.uint8]]:
|
||||
def _get_str(
|
||||
self, offset: int
|
||||
) -> tuple[npt.NDArray[np.uint64], npt.NDArray[np.uint8]]:
|
||||
slen = self._get(offset, np.uint64)
|
||||
return slen, self._get(offset + 8, np.uint8, slen[0])
|
||||
|
||||
def _get_field_parts(
|
||||
self, orig_offs: int, raw_type: int,
|
||||
self,
|
||||
orig_offs: int,
|
||||
raw_type: int,
|
||||
) -> tuple[int, list[npt.NDArray[Any]], list[int], list[GGUFValueType]]:
|
||||
offs = orig_offs
|
||||
types: list[GGUFValueType] = []
|
||||
|
@ -192,7 +220,9 @@ def _get_field_parts(
|
|||
aparts: list[npt.NDArray[Any]] = [raw_itype, alen]
|
||||
data_idxs: list[int] = []
|
||||
for idx in range(alen[0]):
|
||||
curr_size, curr_parts, curr_idxs, curr_types = self._get_field_parts(offs, raw_itype[0])
|
||||
curr_size, curr_parts, curr_idxs, curr_types = self._get_field_parts(
|
||||
offs, raw_itype[0]
|
||||
)
|
||||
if idx == 0:
|
||||
types += curr_types
|
||||
idxs_offs = len(aparts)
|
||||
|
@ -201,7 +231,7 @@ def _get_field_parts(
|
|||
offs += curr_size
|
||||
return offs - orig_offs, aparts, data_idxs, types
|
||||
# We can't deal with this one.
|
||||
raise ValueError('Unknown/unhandled field type {gtype}')
|
||||
raise ValueError("Unknown/unhandled field type {gtype}")
|
||||
|
||||
def _get_tensor_info_field(self, orig_offs: int) -> ReaderField:
|
||||
offs = orig_offs
|
||||
|
@ -228,7 +258,7 @@ def _get_tensor_info_field(self, orig_offs: int) -> ReaderField:
|
|||
|
||||
return ReaderField(
|
||||
orig_offs,
|
||||
str(bytes(name_data), encoding = 'utf-8'),
|
||||
str(bytes(name_data), encoding="utf-8"),
|
||||
[name_len, name_data, n_dims, dims, raw_dtype, offset_tensor],
|
||||
[1, 3, 4, 5],
|
||||
)
|
||||
|
@ -242,19 +272,26 @@ def _build_fields(self, offs: int, count: int) -> int:
|
|||
offs += int(raw_kv_type.nbytes)
|
||||
parts: list[npt.NDArray[Any]] = [kv_klen, kv_kdata, raw_kv_type]
|
||||
idxs_offs = len(parts)
|
||||
field_size, field_parts, field_idxs, field_types = self._get_field_parts(offs, raw_kv_type[0])
|
||||
field_size, field_parts, field_idxs, field_types = self._get_field_parts(
|
||||
offs, raw_kv_type[0]
|
||||
)
|
||||
parts += field_parts
|
||||
self._push_field(ReaderField(
|
||||
orig_offs,
|
||||
str(bytes(kv_kdata), encoding = 'utf-8'),
|
||||
parts,
|
||||
[idx + idxs_offs for idx in field_idxs],
|
||||
field_types,
|
||||
), skip_sum = True)
|
||||
self._push_field(
|
||||
ReaderField(
|
||||
orig_offs,
|
||||
str(bytes(kv_kdata), encoding="utf-8"),
|
||||
parts,
|
||||
[idx + idxs_offs for idx in field_idxs],
|
||||
field_types,
|
||||
),
|
||||
skip_sum=True,
|
||||
)
|
||||
offs += field_size
|
||||
return offs
|
||||
|
||||
def _build_tensor_info(self, offs: int, count: int) -> tuple[int, list[ReaderField]]:
|
||||
def _build_tensor_info(
|
||||
self, offs: int, count: int
|
||||
) -> tuple[int, list[ReaderField]]:
|
||||
tensor_fields = []
|
||||
for _ in range(count):
|
||||
field = self._get_tensor_info_field(offs)
|
||||
|
@ -264,13 +301,13 @@ def _build_tensor_info(self, offs: int, count: int) -> tuple[int, list[ReaderFie
|
|||
|
||||
def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None:
|
||||
tensors = []
|
||||
tensor_names = set() # keep track of name to prevent duplicated tensors
|
||||
tensor_names = set() # keep track of name to prevent duplicated tensors
|
||||
for field in fields:
|
||||
_name_len, name_data, _n_dims, dims, raw_dtype, offset_tensor = field.parts
|
||||
# check if there's any tensor having same name already in the list
|
||||
tensor_name = str(bytes(name_data), encoding = 'utf-8')
|
||||
tensor_name = str(bytes(name_data), encoding="utf-8")
|
||||
if tensor_name in tensor_names:
|
||||
raise ValueError(f'Found duplicated tensor with name {tensor_name}')
|
||||
raise ValueError(f"Found duplicated tensor with name {tensor_name}")
|
||||
tensor_names.add(tensor_name)
|
||||
ggml_type = GGMLQuantizationType(raw_dtype[0])
|
||||
n_elems = int(np.prod(dims))
|
||||
|
@ -304,14 +341,16 @@ def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None:
|
|||
item_count = n_bytes
|
||||
item_type = np.uint8
|
||||
np_dims = quant_shape_to_byte_shape(np_dims, ggml_type)
|
||||
tensors.append(ReaderTensor(
|
||||
name = tensor_name,
|
||||
tensor_type = ggml_type,
|
||||
shape = dims,
|
||||
n_elements = n_elems,
|
||||
n_bytes = n_bytes,
|
||||
data_offset = data_offs,
|
||||
data = self._get(data_offs, item_type, item_count).reshape(np_dims),
|
||||
field = field,
|
||||
))
|
||||
tensors.append(
|
||||
ReaderTensor(
|
||||
name=tensor_name,
|
||||
tensor_type=ggml_type,
|
||||
shape=dims,
|
||||
n_elements=n_elems,
|
||||
n_bytes=n_bytes,
|
||||
data_offset=data_offs,
|
||||
data=self._get(data_offs, item_type, item_count).reshape(np_dims),
|
||||
field=field,
|
||||
)
|
||||
)
|
||||
self.tensors = tensors
|
||||
|
|
|
@ -52,8 +52,8 @@ class GGUFValue:
|
|||
|
||||
class WriterState(Enum):
|
||||
NO_FILE = auto()
|
||||
EMPTY = auto()
|
||||
HEADER = auto()
|
||||
EMPTY = auto()
|
||||
HEADER = auto()
|
||||
KV_DATA = auto()
|
||||
TI_DATA = auto()
|
||||
WEIGHTS = auto()
|
||||
|
@ -67,22 +67,29 @@ class GGUFWriter:
|
|||
kv_data: list[dict[str, GGUFValue]]
|
||||
state: WriterState
|
||||
_simple_value_packing = {
|
||||
GGUFValueType.UINT8: "B",
|
||||
GGUFValueType.INT8: "b",
|
||||
GGUFValueType.UINT16: "H",
|
||||
GGUFValueType.INT16: "h",
|
||||
GGUFValueType.UINT32: "I",
|
||||
GGUFValueType.INT32: "i",
|
||||
GGUFValueType.UINT8: "B",
|
||||
GGUFValueType.INT8: "b",
|
||||
GGUFValueType.UINT16: "H",
|
||||
GGUFValueType.INT16: "h",
|
||||
GGUFValueType.UINT32: "I",
|
||||
GGUFValueType.INT32: "i",
|
||||
GGUFValueType.FLOAT32: "f",
|
||||
GGUFValueType.UINT64: "Q",
|
||||
GGUFValueType.INT64: "q",
|
||||
GGUFValueType.UINT64: "Q",
|
||||
GGUFValueType.INT64: "q",
|
||||
GGUFValueType.FLOAT64: "d",
|
||||
GGUFValueType.BOOL: "?",
|
||||
GGUFValueType.BOOL: "?",
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self, path: os.PathLike[str] | str | None, arch: str, use_temp_file: bool = False, endianess: GGUFEndian = GGUFEndian.LITTLE,
|
||||
split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False, small_first_shard: bool = False
|
||||
self,
|
||||
path: os.PathLike[str] | str | None,
|
||||
arch: str,
|
||||
use_temp_file: bool = False,
|
||||
endianess: GGUFEndian = GGUFEndian.LITTLE,
|
||||
split_max_tensors: int = 0,
|
||||
split_max_size: int = 0,
|
||||
dry_run: bool = False,
|
||||
small_first_shard: bool = False,
|
||||
):
|
||||
self.fout = None
|
||||
self.path = Path(path) if path else None
|
||||
|
@ -97,9 +104,11 @@ def __init__(
|
|||
self.split_max_size = split_max_size
|
||||
self.dry_run = dry_run
|
||||
self.small_first_shard = small_first_shard
|
||||
logger.info("gguf: This GGUF file is for {0} Endian only".format(
|
||||
"Big" if self.endianess == GGUFEndian.BIG else "Little",
|
||||
))
|
||||
logger.info(
|
||||
"gguf: This GGUF file is for {0} Endian only".format(
|
||||
"Big" if self.endianess == GGUFEndian.BIG else "Little",
|
||||
)
|
||||
)
|
||||
self.state = WriterState.NO_FILE
|
||||
|
||||
if self.small_first_shard:
|
||||
|
@ -128,7 +137,9 @@ def get_total_parameter_count(self) -> tuple[int, int, int, int]:
|
|||
elif name.endswith(".lora_b"):
|
||||
if last_lora_a is None or last_lora_a[0] != name[:-1] + "a":
|
||||
# Bail when the LoRA pair can't be found trivially
|
||||
logger.warning("can't measure LoRA size correctly, tensor order is unusual")
|
||||
logger.warning(
|
||||
"can't measure LoRA size correctly, tensor order is unusual"
|
||||
)
|
||||
return 0, 0, 0, 0
|
||||
else:
|
||||
shape = (*shape[:-1], last_lora_a[1].shape[-1])
|
||||
|
@ -136,7 +147,7 @@ def get_total_parameter_count(self) -> tuple[int, int, int, int]:
|
|||
size = prod(shape)
|
||||
|
||||
if "_exps." in name:
|
||||
expert_params += (size // shape[-3])
|
||||
expert_params += size // shape[-3]
|
||||
expert_sum += shape[-3]
|
||||
n_expert_tensors += 1
|
||||
else:
|
||||
|
@ -157,15 +168,26 @@ def get_total_parameter_count(self) -> tuple[int, int, int, int]:
|
|||
def format_shard_names(self, path: Path) -> list[Path]:
|
||||
if len(self.tensors) == 1:
|
||||
return [path]
|
||||
return [path.with_name(SHARD_NAME_FORMAT.format(path.stem, i + 1, len(self.tensors))) for i in range(len(self.tensors))]
|
||||
return [
|
||||
path.with_name(
|
||||
SHARD_NAME_FORMAT.format(path.stem, i + 1, len(self.tensors))
|
||||
)
|
||||
for i in range(len(self.tensors))
|
||||
]
|
||||
|
||||
def open_output_file(self, path: Path | None = None) -> None:
|
||||
if self.state is WriterState.EMPTY and self.fout is not None and (path is None or path == self.path):
|
||||
if (
|
||||
self.state is WriterState.EMPTY
|
||||
and self.fout is not None
|
||||
and (path is None or path == self.path)
|
||||
):
|
||||
# allow calling this multiple times as long as the path is the same
|
||||
return
|
||||
|
||||
if self.state is not WriterState.NO_FILE:
|
||||
raise ValueError(f'Expected output file to be not yet opened, got {self.state}')
|
||||
raise ValueError(
|
||||
f"Expected output file to be not yet opened, got {self.state}"
|
||||
)
|
||||
|
||||
if path is not None:
|
||||
self.path = path
|
||||
|
@ -181,7 +203,9 @@ def print_plan(self) -> list[Path]:
|
|||
filenames = self.format_shard_names(self.path)
|
||||
assert len(filenames) == len(self.tensors)
|
||||
for name, tensors in zip(filenames, self.tensors):
|
||||
logger.info(f"{name}: n_tensors = {len(tensors)}, total_size = {GGUFWriter.format_n_bytes_to_str(sum(ti.nbytes for ti in tensors.values()))}")
|
||||
logger.info(
|
||||
f"{name}: n_tensors = {len(tensors)}, total_size = {GGUFWriter.format_n_bytes_to_str(sum(ti.nbytes for ti in tensors.values()))}"
|
||||
)
|
||||
|
||||
if self.dry_run:
|
||||
logger.info("Dry run, not writing files")
|
||||
|
@ -201,17 +225,23 @@ def add_shard_kv_data(self) -> None:
|
|||
self.kv_data.extend({} for _ in range(len(self.kv_data), total_splits))
|
||||
for i, kv_data in enumerate(self.kv_data):
|
||||
kv_data[Keys.Split.LLM_KV_SPLIT_NO] = GGUFValue(i, GGUFValueType.UINT16)
|
||||
kv_data[Keys.Split.LLM_KV_SPLIT_COUNT] = GGUFValue(total_splits, GGUFValueType.UINT16)
|
||||
kv_data[Keys.Split.LLM_KV_SPLIT_TENSORS_COUNT] = GGUFValue(total_tensors, GGUFValueType.INT32)
|
||||
kv_data[Keys.Split.LLM_KV_SPLIT_COUNT] = GGUFValue(
|
||||
total_splits, GGUFValueType.UINT16
|
||||
)
|
||||
kv_data[Keys.Split.LLM_KV_SPLIT_TENSORS_COUNT] = GGUFValue(
|
||||
total_tensors, GGUFValueType.INT32
|
||||
)
|
||||
|
||||
def write_header_to_file(self, path: Path | None = None) -> None:
|
||||
if len(self.tensors) == 1 and (self.split_max_tensors != 0 or self.split_max_size != 0):
|
||||
if len(self.tensors) == 1 and (
|
||||
self.split_max_tensors != 0 or self.split_max_size != 0
|
||||
):
|
||||
logger.warning("Model fails split requirements, not splitting")
|
||||
|
||||
self.open_output_file(path)
|
||||
|
||||
if self.state is not WriterState.EMPTY:
|
||||
raise ValueError(f'Expected output file to be empty, got {self.state}')
|
||||
raise ValueError(f"Expected output file to be empty, got {self.state}")
|
||||
|
||||
assert self.fout is not None
|
||||
assert len(self.fout) == len(self.tensors)
|
||||
|
@ -220,7 +250,7 @@ def write_header_to_file(self, path: Path | None = None) -> None:
|
|||
self.add_shard_kv_data()
|
||||
|
||||
for fout, tensors, kv_data in zip(self.fout, self.tensors, self.kv_data):
|
||||
fout.write(self._pack("<I", GGUF_MAGIC, skip_pack_prefix = True))
|
||||
fout.write(self._pack("<I", GGUF_MAGIC, skip_pack_prefix=True))
|
||||
fout.write(self._pack("I", GGUF_VERSION))
|
||||
fout.write(self._pack("Q", len(tensors)))
|
||||
fout.write(self._pack("Q", len(kv_data)))
|
||||
|
@ -229,7 +259,9 @@ def write_header_to_file(self, path: Path | None = None) -> None:
|
|||
|
||||
def write_kv_data_to_file(self) -> None:
|
||||
if self.state is not WriterState.HEADER:
|
||||
raise ValueError(f'Expected output file to contain the header, got {self.state}')
|
||||
raise ValueError(
|
||||
f"Expected output file to contain the header, got {self.state}"
|
||||
)
|
||||
assert self.fout is not None
|
||||
|
||||
for fout, kv_data in zip(self.fout, self.kv_data):
|
||||
|
@ -246,7 +278,9 @@ def write_kv_data_to_file(self) -> None:
|
|||
|
||||
def write_ti_data_to_file(self) -> None:
|
||||
if self.state is not WriterState.KV_DATA:
|
||||
raise ValueError(f'Expected output file to contain KV data, got {self.state}')
|
||||
raise ValueError(
|
||||
f"Expected output file to contain KV data, got {self.state}"
|
||||
)
|
||||
assert self.fout is not None
|
||||
|
||||
for fout, tensors in zip(self.fout, self.tensors):
|
||||
|
@ -269,12 +303,12 @@ def write_ti_data_to_file(self) -> None:
|
|||
|
||||
def add_key_value(self, key: str, val: Any, vtype: GGUFValueType) -> None:
|
||||
if any(key in kv_data for kv_data in self.kv_data):
|
||||
raise ValueError(f'Duplicated key name {key!r}')
|
||||
raise ValueError(f"Duplicated key name {key!r}")
|
||||
|
||||
self.kv_data[0][key] = GGUFValue(value=val, type=vtype)
|
||||
|
||||
def add_uint8(self, key: str, val: int) -> None:
|
||||
self.add_key_value(key,val, GGUFValueType.UINT8)
|
||||
self.add_key_value(key, val, GGUFValueType.UINT8)
|
||||
|
||||
def add_int8(self, key: str, val: int) -> None:
|
||||
self.add_key_value(key, val, GGUFValueType.INT8)
|
||||
|
@ -321,14 +355,20 @@ def ggml_pad(x: int, n: int) -> int:
|
|||
return ((x + n - 1) // n) * n
|
||||
|
||||
def add_tensor_info(
|
||||
self, name: str, tensor_shape: Sequence[int], tensor_dtype: np.dtype,
|
||||
tensor_nbytes: int, raw_dtype: GGMLQuantizationType | None = None,
|
||||
self,
|
||||
name: str,
|
||||
tensor_shape: Sequence[int],
|
||||
tensor_dtype: np.dtype,
|
||||
tensor_nbytes: int,
|
||||
raw_dtype: GGMLQuantizationType | None = None,
|
||||
) -> None:
|
||||
if self.state is not WriterState.NO_FILE:
|
||||
raise ValueError(f'Expected output file to be not yet opened, got {self.state}')
|
||||
raise ValueError(
|
||||
f"Expected output file to be not yet opened, got {self.state}"
|
||||
)
|
||||
|
||||
if any(name in tensors for tensors in self.tensors):
|
||||
raise ValueError(f'Duplicated tensor name {name!r}')
|
||||
raise ValueError(f"Duplicated tensor name {name!r}")
|
||||
|
||||
if raw_dtype is None:
|
||||
if tensor_dtype == np.float16:
|
||||
|
@ -346,7 +386,9 @@ def add_tensor_info(
|
|||
elif tensor_dtype == np.int64:
|
||||
dtype = GGMLQuantizationType.I64
|
||||
else:
|
||||
raise ValueError("Only F16, F32, F64, I8, I16, I32, I64 tensors are supported for now")
|
||||
raise ValueError(
|
||||
"Only F16, F32, F64, I8, I16, I32, I64 tensors are supported for now"
|
||||
)
|
||||
else:
|
||||
dtype = raw_dtype
|
||||
if tensor_dtype == np.uint8:
|
||||
|
@ -357,16 +399,22 @@ def add_tensor_info(
|
|||
if ( # split when over tensor limit
|
||||
self.split_max_tensors != 0
|
||||
and len(self.tensors[-1]) >= self.split_max_tensors
|
||||
) or ( # split when over size limit
|
||||
) or ( # split when over size limit
|
||||
self.split_max_size != 0
|
||||
and sum(ti.nbytes for ti in self.tensors[-1].values()) + tensor_nbytes > self.split_max_size
|
||||
and sum(ti.nbytes for ti in self.tensors[-1].values()) + tensor_nbytes
|
||||
> self.split_max_size
|
||||
):
|
||||
self.tensors.append({})
|
||||
|
||||
self.tensors[-1][name] = TensorInfo(shape=tensor_shape, dtype=dtype, nbytes=tensor_nbytes)
|
||||
self.tensors[-1][name] = TensorInfo(
|
||||
shape=tensor_shape, dtype=dtype, nbytes=tensor_nbytes
|
||||
)
|
||||
|
||||
def add_tensor(
|
||||
self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequence[int] | None = None,
|
||||
self,
|
||||
name: str,
|
||||
tensor: np.ndarray[Any, Any],
|
||||
raw_shape: Sequence[int] | None = None,
|
||||
raw_dtype: GGMLQuantizationType | None = None,
|
||||
) -> None:
|
||||
if self.endianess == GGUFEndian.BIG:
|
||||
|
@ -377,7 +425,9 @@ def add_tensor(
|
|||
self.temp_file = fp
|
||||
|
||||
shape: Sequence[int] = raw_shape if raw_shape is not None else tensor.shape
|
||||
self.add_tensor_info(name, shape, tensor.dtype, tensor.nbytes, raw_dtype=raw_dtype)
|
||||
self.add_tensor_info(
|
||||
name, shape, tensor.dtype, tensor.nbytes, raw_dtype=raw_dtype
|
||||
)
|
||||
|
||||
if self.temp_file is None:
|
||||
self.tensors[-1][name].tensor = tensor
|
||||
|
@ -387,13 +437,21 @@ def add_tensor(
|
|||
self.write_padding(self.temp_file, tensor.nbytes)
|
||||
|
||||
def write_padding(self, fp: IO[bytes], n: int, align: int | None = None) -> None:
|
||||
pad = GGUFWriter.ggml_pad(n, align if align is not None else self.data_alignment) - n
|
||||
pad = (
|
||||
GGUFWriter.ggml_pad(n, align if align is not None else self.data_alignment)
|
||||
- n
|
||||
)
|
||||
if pad != 0:
|
||||
fp.write(bytes([0] * pad))
|
||||
|
||||
def write_tensor_data(self, tensor: np.ndarray[Any, Any]) -> None:
|
||||
if self.state is not WriterState.TI_DATA and self.state is not WriterState.WEIGHTS:
|
||||
raise ValueError(f'Expected output file to contain tensor info or weights, got {self.state}')
|
||||
if (
|
||||
self.state is not WriterState.TI_DATA
|
||||
and self.state is not WriterState.WEIGHTS
|
||||
):
|
||||
raise ValueError(
|
||||
f"Expected output file to contain tensor info or weights, got {self.state}"
|
||||
)
|
||||
assert self.fout is not None
|
||||
|
||||
if self.endianess == GGUFEndian.BIG:
|
||||
|
@ -409,7 +467,9 @@ def write_tensor_data(self, tensor: np.ndarray[Any, Any]) -> None:
|
|||
|
||||
# pop the first tensor info
|
||||
# TODO: cleaner way to get the first key
|
||||
first_tensor_name = [name for name, _ in zip(self.tensors[file_id].keys(), range(1))][0]
|
||||
first_tensor_name = [
|
||||
name for name, _ in zip(self.tensors[file_id].keys(), range(1))
|
||||
][0]
|
||||
ti = self.tensors[file_id].pop(first_tensor_name)
|
||||
assert ti.nbytes == tensor.nbytes
|
||||
|
||||
|
@ -437,8 +497,15 @@ def write_tensors_to_file(self, *, progress: bool = False) -> None:
|
|||
total_bytes = sum(ti.nbytes for t in self.tensors for ti in t.values())
|
||||
|
||||
if len(self.fout) > 1:
|
||||
shard_bar = tqdm(desc=f"Shard (0/{len(self.fout)})", total=None, unit="byte", unit_scale=True)
|
||||
bar = tqdm(desc="Writing", total=total_bytes, unit="byte", unit_scale=True)
|
||||
shard_bar = tqdm(
|
||||
desc=f"Shard (0/{len(self.fout)})",
|
||||
total=None,
|
||||
unit="byte",
|
||||
unit_scale=True,
|
||||
)
|
||||
bar = tqdm(
|
||||
desc="Writing", total=total_bytes, unit="byte", unit_scale=True
|
||||
)
|
||||
|
||||
for i, (fout, tensors) in enumerate(zip(self.fout, self.tensors)):
|
||||
if shard_bar is not None:
|
||||
|
@ -448,7 +515,9 @@ def write_tensors_to_file(self, *, progress: bool = False) -> None:
|
|||
|
||||
# relying on the fact that Python dicts preserve insertion order (since 3.7)
|
||||
for ti in tensors.values():
|
||||
assert ti.tensor is not None # can only iterate once over the tensors
|
||||
assert (
|
||||
ti.tensor is not None
|
||||
) # can only iterate once over the tensors
|
||||
assert ti.tensor.nbytes == ti.nbytes
|
||||
ti.tensor.tofile(fout)
|
||||
if shard_bar is not None:
|
||||
|
@ -460,7 +529,9 @@ def write_tensors_to_file(self, *, progress: bool = False) -> None:
|
|||
else:
|
||||
self.temp_file.seek(0)
|
||||
|
||||
shutil.copyfileobj(self.temp_file, self.fout[0 if not self.small_first_shard else 1])
|
||||
shutil.copyfileobj(
|
||||
self.temp_file, self.fout[0 if not self.small_first_shard else 1]
|
||||
)
|
||||
self.flush()
|
||||
self.temp_file.close()
|
||||
|
||||
|
@ -566,7 +637,9 @@ def add_base_model_version(self, source_id: int, version: str) -> None:
|
|||
self.add_string(Keys.General.BASE_MODEL_VERSION.format(id=source_id), version)
|
||||
|
||||
def add_base_model_organization(self, source_id: int, organization: str) -> None:
|
||||
self.add_string(Keys.General.BASE_MODEL_ORGANIZATION.format(id=source_id), organization)
|
||||
self.add_string(
|
||||
Keys.General.BASE_MODEL_ORGANIZATION.format(id=source_id), organization
|
||||
)
|
||||
|
||||
def add_base_model_url(self, source_id: int, url: str) -> None:
|
||||
self.add_string(Keys.General.BASE_MODEL_URL.format(id=source_id), url)
|
||||
|
@ -605,7 +678,9 @@ def add_block_count(self, length: int) -> None:
|
|||
self.add_uint32(Keys.LLM.BLOCK_COUNT.format(arch=self.arch), length)
|
||||
|
||||
def add_leading_dense_block_count(self, length: int) -> None:
|
||||
self.add_uint32(Keys.LLM.LEADING_DENSE_BLOCK_COUNT.format(arch=self.arch), length)
|
||||
self.add_uint32(
|
||||
Keys.LLM.LEADING_DENSE_BLOCK_COUNT.format(arch=self.arch), length
|
||||
)
|
||||
|
||||
def add_feed_forward_length(self, length: int | Sequence[int]) -> None:
|
||||
if isinstance(length, int):
|
||||
|
@ -614,10 +689,14 @@ def add_feed_forward_length(self, length: int | Sequence[int]) -> None:
|
|||
self.add_array(Keys.LLM.FEED_FORWARD_LENGTH.format(arch=self.arch), length)
|
||||
|
||||
def add_expert_feed_forward_length(self, length: int) -> None:
|
||||
self.add_uint32(Keys.LLM.EXPERT_FEED_FORWARD_LENGTH.format(arch=self.arch), length)
|
||||
self.add_uint32(
|
||||
Keys.LLM.EXPERT_FEED_FORWARD_LENGTH.format(arch=self.arch), length
|
||||
)
|
||||
|
||||
def add_expert_shared_feed_forward_length(self, length: int) -> None:
|
||||
self.add_uint32(Keys.LLM.EXPERT_SHARED_FEED_FORWARD_LENGTH.format(arch=self.arch), length)
|
||||
self.add_uint32(
|
||||
Keys.LLM.EXPERT_SHARED_FEED_FORWARD_LENGTH.format(arch=self.arch), length
|
||||
)
|
||||
|
||||
def add_parallel_residual(self, use: bool) -> None:
|
||||
self.add_bool(Keys.LLM.USE_PARALLEL_RESIDUAL.format(arch=self.arch), use)
|
||||
|
@ -736,10 +815,14 @@ def add_tokenizer_model(self, model: str) -> None:
|
|||
def add_tokenizer_pre(self, pre: str) -> None:
|
||||
self.add_string(Keys.Tokenizer.PRE, pre)
|
||||
|
||||
def add_token_list(self, tokens: Sequence[str] | Sequence[bytes] | Sequence[bytearray]) -> None:
|
||||
def add_token_list(
|
||||
self, tokens: Sequence[str] | Sequence[bytes] | Sequence[bytearray]
|
||||
) -> None:
|
||||
self.add_array(Keys.Tokenizer.LIST, tokens)
|
||||
|
||||
def add_token_merges(self, merges: Sequence[str] | Sequence[bytes] | Sequence[bytearray]) -> None:
|
||||
def add_token_merges(
|
||||
self, merges: Sequence[str] | Sequence[bytes] | Sequence[bytearray]
|
||||
) -> None:
|
||||
self.add_array(Keys.Tokenizer.MERGES, merges)
|
||||
|
||||
def add_token_types(self, types: Sequence[TokenType] | Sequence[int]) -> None:
|
||||
|
@ -793,18 +876,22 @@ def add_chat_template(self, value: str | Sequence[Mapping[str, str]]) -> None:
|
|||
template_names = set()
|
||||
|
||||
for choice in value:
|
||||
name = choice.get('name', '')
|
||||
template = choice.get('template')
|
||||
name = choice.get("name", "")
|
||||
template = choice.get("template")
|
||||
|
||||
# Allowing non-alphanumerical characters in template name is probably not a good idea, so filter it
|
||||
name = ''.join((c if c in ascii_letters + digits else '_' for c in name))
|
||||
name = "".join(
|
||||
(c if c in ascii_letters + digits else "_" for c in name)
|
||||
)
|
||||
|
||||
if name and template is not None:
|
||||
if name == 'default':
|
||||
if name == "default":
|
||||
template_default = template
|
||||
else:
|
||||
template_names.add(name)
|
||||
self.add_string(Keys.Tokenizer.CHAT_TEMPLATE_N.format(name=name), template)
|
||||
self.add_string(
|
||||
Keys.Tokenizer.CHAT_TEMPLATE_N.format(name=name), template
|
||||
)
|
||||
|
||||
if template_names:
|
||||
self.add_array(Keys.Tokenizer.CHAT_TEMPLATES, list(template_names))
|
||||
|
@ -829,10 +916,10 @@ def add_eot_token_id(self, id: int) -> None:
|
|||
self.add_uint32(Keys.Tokenizer.EOT_ID, id)
|
||||
|
||||
def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes:
|
||||
pack_prefix = ''
|
||||
pack_prefix = ""
|
||||
if not skip_pack_prefix:
|
||||
pack_prefix = '<' if self.endianess == GGUFEndian.LITTLE else '>'
|
||||
return struct.pack(f'{pack_prefix}{fmt}', value)
|
||||
pack_prefix = "<" if self.endianess == GGUFEndian.LITTLE else ">"
|
||||
return struct.pack(f"{pack_prefix}{fmt}", value)
|
||||
|
||||
def _pack_val(self, val: Any, vtype: GGUFValueType, add_vtype: bool) -> bytes:
|
||||
kv_data = bytearray()
|
||||
|
@ -842,7 +929,9 @@ def _pack_val(self, val: Any, vtype: GGUFValueType, add_vtype: bool) -> bytes:
|
|||
|
||||
pack_fmt = self._simple_value_packing.get(vtype)
|
||||
if pack_fmt is not None:
|
||||
kv_data += self._pack(pack_fmt, val, skip_pack_prefix = vtype == GGUFValueType.BOOL)
|
||||
kv_data += self._pack(
|
||||
pack_fmt, val, skip_pack_prefix=vtype == GGUFValueType.BOOL
|
||||
)
|
||||
elif vtype == GGUFValueType.STRING:
|
||||
encoded_val = val.encode("utf-8") if isinstance(val, str) else val
|
||||
kv_data += self._pack("Q", len(encoded_val))
|
||||
|
@ -860,7 +949,9 @@ def _pack_val(self, val: Any, vtype: GGUFValueType, add_vtype: bool) -> bytes:
|
|||
else:
|
||||
ltype = GGUFValueType.get_type(val[0])
|
||||
if not all(GGUFValueType.get_type(i) is ltype for i in val[1:]):
|
||||
raise ValueError("All items in a GGUF array should be of the same type")
|
||||
raise ValueError(
|
||||
"All items in a GGUF array should be of the same type"
|
||||
)
|
||||
kv_data += self._pack("I", ltype)
|
||||
kv_data += self._pack("Q", len(val))
|
||||
for item in val:
|
||||
|
|
|
@ -13,7 +13,9 @@
|
|||
|
||||
class LazyMeta(ABCMeta):
|
||||
|
||||
def __new__(cls, name: str, bases: tuple[type, ...], namespace: dict[str, Any], **kwargs):
|
||||
def __new__(
|
||||
cls, name: str, bases: tuple[type, ...], namespace: dict[str, Any], **kwargs
|
||||
):
|
||||
def __getattr__(self, name: str) -> Any:
|
||||
meta_attr = getattr(self._meta, name)
|
||||
if callable(meta_attr):
|
||||
|
@ -41,6 +43,7 @@ def wrapped_special_op(self, *args, **kwargs):
|
|||
getattr(type(self)._tensor_type, op_name),
|
||||
meta_noop=meta_noop,
|
||||
)(self, *args, **kwargs)
|
||||
|
||||
return wrapped_special_op
|
||||
|
||||
# special methods bypass __getattr__, so they need to be added manually
|
||||
|
@ -48,11 +51,48 @@ def wrapped_special_op(self, *args, **kwargs):
|
|||
# NOTE: doing this from a metaclass is very convenient
|
||||
# TODO: make this even more comprehensive
|
||||
for binary_op in (
|
||||
"lt", "le", "eq", "ne", "ge", "gt", "not"
|
||||
"abs", "add", "and", "floordiv", "invert", "lshift", "mod", "mul", "matmul",
|
||||
"neg", "or", "pos", "pow", "rshift", "sub", "truediv", "xor",
|
||||
"iadd", "iand", "ifloordiv", "ilshift", "imod", "imul", "ior", "irshift", "isub", "ixor",
|
||||
"radd", "rand", "rfloordiv", "rmul", "ror", "rpow", "rsub", "rtruediv", "rxor",
|
||||
"lt",
|
||||
"le",
|
||||
"eq",
|
||||
"ne",
|
||||
"ge",
|
||||
"gt",
|
||||
"not" "abs",
|
||||
"add",
|
||||
"and",
|
||||
"floordiv",
|
||||
"invert",
|
||||
"lshift",
|
||||
"mod",
|
||||
"mul",
|
||||
"matmul",
|
||||
"neg",
|
||||
"or",
|
||||
"pos",
|
||||
"pow",
|
||||
"rshift",
|
||||
"sub",
|
||||
"truediv",
|
||||
"xor",
|
||||
"iadd",
|
||||
"iand",
|
||||
"ifloordiv",
|
||||
"ilshift",
|
||||
"imod",
|
||||
"imul",
|
||||
"ior",
|
||||
"irshift",
|
||||
"isub",
|
||||
"ixor",
|
||||
"radd",
|
||||
"rand",
|
||||
"rfloordiv",
|
||||
"rmul",
|
||||
"ror",
|
||||
"rpow",
|
||||
"rsub",
|
||||
"rtruediv",
|
||||
"rxor",
|
||||
):
|
||||
attr_name = f"__{binary_op}__"
|
||||
# the result of these operators usually has the same shape and dtype as the input,
|
||||
|
@ -60,7 +100,9 @@ def wrapped_special_op(self, *args, **kwargs):
|
|||
namespace[attr_name] = mk_wrap(attr_name, meta_noop=True)
|
||||
|
||||
for special_op in (
|
||||
"getitem", "setitem", "len",
|
||||
"getitem",
|
||||
"setitem",
|
||||
"len",
|
||||
):
|
||||
attr_name = f"__{special_op}__"
|
||||
namespace[attr_name] = mk_wrap(attr_name, meta_noop=False)
|
||||
|
@ -77,7 +119,15 @@ class LazyBase(ABC, metaclass=LazyMeta):
|
|||
_kwargs: dict[str, Any]
|
||||
_func: Callable[[Any], Any] | None
|
||||
|
||||
def __init__(self, *, meta: Any, data: Any | None = None, args: tuple = (), kwargs: dict[str, Any] | None = None, func: Callable[[Any], Any] | None = None):
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
meta: Any,
|
||||
data: Any | None = None,
|
||||
args: tuple = (),
|
||||
kwargs: dict[str, Any] | None = None,
|
||||
func: Callable[[Any], Any] | None = None,
|
||||
):
|
||||
super().__init__()
|
||||
self._meta = meta
|
||||
self._data = data
|
||||
|
@ -107,7 +157,17 @@ def _recurse_apply(o: Any, fn: Callable[[Any], Any]) -> Any:
|
|||
return o
|
||||
|
||||
@classmethod
|
||||
def _wrap_fn(cls, fn: Callable, *, use_self: LazyBase | None = None, meta_noop: bool | DTypeLike | tuple[DTypeLike, Callable[[tuple[int, ...]], tuple[int, ...]]] = False) -> Callable[[Any], Any]:
|
||||
def _wrap_fn(
|
||||
cls,
|
||||
fn: Callable,
|
||||
*,
|
||||
use_self: LazyBase | None = None,
|
||||
meta_noop: (
|
||||
bool
|
||||
| DTypeLike
|
||||
| tuple[DTypeLike, Callable[[tuple[int, ...]], tuple[int, ...]]]
|
||||
) = False,
|
||||
) -> Callable[[Any], Any]:
|
||||
def wrapped_fn(*args, **kwargs):
|
||||
if kwargs is None:
|
||||
kwargs = {}
|
||||
|
@ -138,13 +198,16 @@ def wrapped_fn(*args, **kwargs):
|
|||
res = cls.meta_with_dtype_and_shape(meta_noop, res.shape)
|
||||
|
||||
if isinstance(res, cls._tensor_type):
|
||||
return cls(meta=cls.eager_to_meta(res), args=args, kwargs=kwargs, func=fn)
|
||||
return cls(
|
||||
meta=cls.eager_to_meta(res), args=args, kwargs=kwargs, func=fn
|
||||
)
|
||||
else:
|
||||
del res # not needed
|
||||
# non-tensor return likely relies on the contents of the args
|
||||
# (e.g. the result of torch.equal)
|
||||
eager_args = cls.to_eager(args)
|
||||
return fn(*eager_args, **kwargs)
|
||||
|
||||
return wrapped_fn
|
||||
|
||||
@classmethod
|
||||
|
@ -175,7 +238,8 @@ def eager_to_meta(cls, t: Any) -> Any:
|
|||
# must be overridden, meta tensor init is backend-specific
|
||||
@classmethod
|
||||
@abstractmethod
|
||||
def meta_with_dtype_and_shape(cls, dtype: Any, shape: Any) -> Any: pass
|
||||
def meta_with_dtype_and_shape(cls, dtype: Any, shape: Any) -> Any:
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def from_eager(cls, t: Any) -> Any:
|
||||
|
@ -192,7 +256,9 @@ class LazyNumpyTensor(LazyBase):
|
|||
_tensor_type = np.ndarray
|
||||
|
||||
@classmethod
|
||||
def meta_with_dtype_and_shape(cls, dtype: DTypeLike, shape: tuple[int, ...]) -> np.ndarray[Any, Any]:
|
||||
def meta_with_dtype_and_shape(
|
||||
cls, dtype: DTypeLike, shape: tuple[int, ...]
|
||||
) -> np.ndarray[Any, Any]:
|
||||
# The initial idea was to use np.nan as the fill value,
|
||||
# but non-float types like np.int16 can't use that.
|
||||
# So zero it is.
|
||||
|
@ -201,8 +267,16 @@ def meta_with_dtype_and_shape(cls, dtype: DTypeLike, shape: tuple[int, ...]) ->
|
|||
|
||||
def astype(self, dtype, *args, **kwargs):
|
||||
meta = type(self).meta_with_dtype_and_shape(dtype, self._meta.shape)
|
||||
full_args = (self, dtype,) + args
|
||||
return type(self)(meta=meta, args=full_args, kwargs=kwargs, func=(lambda a, *args, **kwargs: a.astype(*args, **kwargs)))
|
||||
full_args = (
|
||||
self,
|
||||
dtype,
|
||||
) + args
|
||||
return type(self)(
|
||||
meta=meta,
|
||||
args=full_args,
|
||||
kwargs=kwargs,
|
||||
func=(lambda a, *args, **kwargs: a.astype(*args, **kwargs)),
|
||||
)
|
||||
|
||||
def tofile(self, *args, **kwargs):
|
||||
eager = LazyNumpyTensor.to_eager(self)
|
||||
|
|
|
@ -44,7 +44,12 @@ class Metadata:
|
|||
datasets: Optional[list[str]] = None
|
||||
|
||||
@staticmethod
|
||||
def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Path] = None, model_name: Optional[str] = None, total_params: int = 0) -> Metadata:
|
||||
def load(
|
||||
metadata_override_path: Optional[Path] = None,
|
||||
model_path: Optional[Path] = None,
|
||||
model_name: Optional[str] = None,
|
||||
total_params: int = 0,
|
||||
) -> Metadata:
|
||||
# This grabs as many contextual authorship metadata as possible from the model repository
|
||||
# making any conversion as required to match the gguf kv store metadata format
|
||||
# as well as giving users the ability to override any authorship metadata that may be incorrect
|
||||
|
@ -57,43 +62,77 @@ def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Pat
|
|||
# TODO: load adapter_config.json when possible, it usually contains the base model of the LoRA adapter
|
||||
|
||||
# heuristics
|
||||
metadata = Metadata.apply_metadata_heuristic(metadata, model_card, hf_params, model_path, total_params)
|
||||
metadata = Metadata.apply_metadata_heuristic(
|
||||
metadata, model_card, hf_params, model_path, total_params
|
||||
)
|
||||
|
||||
# Metadata Override File Provided
|
||||
# This is based on LLM_KV_NAMES mapping in llama.cpp
|
||||
metadata_override = Metadata.load_metadata_override(metadata_override_path)
|
||||
|
||||
metadata.name = metadata_override.get(Keys.General.NAME, metadata.name)
|
||||
metadata.author = metadata_override.get(Keys.General.AUTHOR, metadata.author)
|
||||
metadata.version = metadata_override.get(Keys.General.VERSION, metadata.version)
|
||||
metadata.organization = metadata_override.get(Keys.General.ORGANIZATION, metadata.organization)
|
||||
metadata.name = metadata_override.get(Keys.General.NAME, metadata.name)
|
||||
metadata.author = metadata_override.get(Keys.General.AUTHOR, metadata.author)
|
||||
metadata.version = metadata_override.get(Keys.General.VERSION, metadata.version)
|
||||
metadata.organization = metadata_override.get(
|
||||
Keys.General.ORGANIZATION, metadata.organization
|
||||
)
|
||||
|
||||
metadata.finetune = metadata_override.get(Keys.General.FINETUNE, metadata.finetune)
|
||||
metadata.basename = metadata_override.get(Keys.General.BASENAME, metadata.basename)
|
||||
metadata.finetune = metadata_override.get(
|
||||
Keys.General.FINETUNE, metadata.finetune
|
||||
)
|
||||
metadata.basename = metadata_override.get(
|
||||
Keys.General.BASENAME, metadata.basename
|
||||
)
|
||||
|
||||
metadata.description = metadata_override.get(Keys.General.DESCRIPTION, metadata.description)
|
||||
metadata.quantized_by = metadata_override.get(Keys.General.QUANTIZED_BY, metadata.quantized_by)
|
||||
metadata.description = metadata_override.get(
|
||||
Keys.General.DESCRIPTION, metadata.description
|
||||
)
|
||||
metadata.quantized_by = metadata_override.get(
|
||||
Keys.General.QUANTIZED_BY, metadata.quantized_by
|
||||
)
|
||||
|
||||
metadata.size_label = metadata_override.get(Keys.General.SIZE_LABEL, metadata.size_label)
|
||||
metadata.license_name = metadata_override.get(Keys.General.LICENSE_NAME, metadata.license_name)
|
||||
metadata.license_link = metadata_override.get(Keys.General.LICENSE_LINK, metadata.license_link)
|
||||
metadata.size_label = metadata_override.get(
|
||||
Keys.General.SIZE_LABEL, metadata.size_label
|
||||
)
|
||||
metadata.license_name = metadata_override.get(
|
||||
Keys.General.LICENSE_NAME, metadata.license_name
|
||||
)
|
||||
metadata.license_link = metadata_override.get(
|
||||
Keys.General.LICENSE_LINK, metadata.license_link
|
||||
)
|
||||
|
||||
metadata.url = metadata_override.get(Keys.General.URL, metadata.url)
|
||||
metadata.doi = metadata_override.get(Keys.General.DOI, metadata.doi)
|
||||
metadata.uuid = metadata_override.get(Keys.General.UUID, metadata.uuid)
|
||||
metadata.repo_url = metadata_override.get(Keys.General.REPO_URL, metadata.repo_url)
|
||||
metadata.url = metadata_override.get(Keys.General.URL, metadata.url)
|
||||
metadata.doi = metadata_override.get(Keys.General.DOI, metadata.doi)
|
||||
metadata.uuid = metadata_override.get(Keys.General.UUID, metadata.uuid)
|
||||
metadata.repo_url = metadata_override.get(
|
||||
Keys.General.REPO_URL, metadata.repo_url
|
||||
)
|
||||
|
||||
metadata.source_url = metadata_override.get(Keys.General.SOURCE_URL, metadata.source_url)
|
||||
metadata.source_doi = metadata_override.get(Keys.General.SOURCE_DOI, metadata.source_doi)
|
||||
metadata.source_uuid = metadata_override.get(Keys.General.SOURCE_UUID, metadata.source_uuid)
|
||||
metadata.source_repo_url = metadata_override.get(Keys.General.SOURCE_REPO_URL, metadata.source_repo_url)
|
||||
metadata.source_url = metadata_override.get(
|
||||
Keys.General.SOURCE_URL, metadata.source_url
|
||||
)
|
||||
metadata.source_doi = metadata_override.get(
|
||||
Keys.General.SOURCE_DOI, metadata.source_doi
|
||||
)
|
||||
metadata.source_uuid = metadata_override.get(
|
||||
Keys.General.SOURCE_UUID, metadata.source_uuid
|
||||
)
|
||||
metadata.source_repo_url = metadata_override.get(
|
||||
Keys.General.SOURCE_REPO_URL, metadata.source_repo_url
|
||||
)
|
||||
|
||||
# Base Models is received here as an array of models
|
||||
metadata.base_models = metadata_override.get("general.base_models", metadata.base_models)
|
||||
metadata.base_models = metadata_override.get(
|
||||
"general.base_models", metadata.base_models
|
||||
)
|
||||
|
||||
metadata.tags = metadata_override.get(Keys.General.TAGS, metadata.tags)
|
||||
metadata.languages = metadata_override.get(Keys.General.LANGUAGES, metadata.languages)
|
||||
metadata.datasets = metadata_override.get(Keys.General.DATASETS, metadata.datasets)
|
||||
metadata.tags = metadata_override.get(Keys.General.TAGS, metadata.tags)
|
||||
metadata.languages = metadata_override.get(
|
||||
Keys.General.LANGUAGES, metadata.languages
|
||||
)
|
||||
metadata.datasets = metadata_override.get(
|
||||
Keys.General.DATASETS, metadata.datasets
|
||||
)
|
||||
|
||||
# Direct Metadata Override (via direct cli argument)
|
||||
if model_name is not None:
|
||||
|
@ -102,7 +141,9 @@ def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Pat
|
|||
return metadata
|
||||
|
||||
@staticmethod
|
||||
def load_metadata_override(metadata_override_path: Optional[Path] = None) -> dict[str, Any]:
|
||||
def load_metadata_override(
|
||||
metadata_override_path: Optional[Path] = None,
|
||||
) -> dict[str, Any]:
|
||||
if metadata_override_path is None or not metadata_override_path.is_file():
|
||||
return {}
|
||||
|
||||
|
@ -128,7 +169,9 @@ def load_model_card(model_path: Optional[Path] = None) -> dict[str, Any]:
|
|||
if isinstance(data, dict):
|
||||
return data
|
||||
else:
|
||||
logger.error(f"while reading YAML model card frontmatter, data is {type(data)} instead of dict")
|
||||
logger.error(
|
||||
f"while reading YAML model card frontmatter, data is {type(data)} instead of dict"
|
||||
)
|
||||
return {}
|
||||
else:
|
||||
return {}
|
||||
|
@ -149,10 +192,21 @@ def load_hf_parameters(model_path: Optional[Path] = None) -> dict[str, Any]:
|
|||
@staticmethod
|
||||
def id_to_title(string):
|
||||
# Convert capitalization into title form unless acronym or version number
|
||||
return ' '.join([w.title() if w.islower() and not re.match(r'^(v\d+(?:\.\d+)*|\d.*)$', w) else w for w in string.strip().replace('-', ' ').split()])
|
||||
return " ".join(
|
||||
[
|
||||
(
|
||||
w.title()
|
||||
if w.islower() and not re.match(r"^(v\d+(?:\.\d+)*|\d.*)$", w)
|
||||
else w
|
||||
)
|
||||
for w in string.strip().replace("-", " ").split()
|
||||
]
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def get_model_id_components(model_id: Optional[str] = None, total_params: int = 0) -> tuple[str | None, str | None, str | None, str | None, str | None, str | None]:
|
||||
def get_model_id_components(
|
||||
model_id: Optional[str] = None, total_params: int = 0
|
||||
) -> tuple[str | None, str | None, str | None, str | None, str | None, str | None]:
|
||||
# Huggingface often store model id as '<org>/<model name>'
|
||||
# so let's parse it and apply some heuristics if possible for model name components
|
||||
|
||||
|
@ -160,24 +214,24 @@ def get_model_id_components(model_id: Optional[str] = None, total_params: int =
|
|||
# model ID missing
|
||||
return None, None, None, None, None, None
|
||||
|
||||
if ' ' in model_id:
|
||||
if " " in model_id:
|
||||
# model ID is actually a normal human sentence
|
||||
# which means its most likely a normal model name only
|
||||
# not part of the hugging face naming standard, but whatever
|
||||
return model_id, None, None, None, None, None
|
||||
|
||||
if '/' in model_id:
|
||||
if "/" in model_id:
|
||||
# model ID (huggingface style)
|
||||
org_component, model_full_name_component = model_id.split('/', 1)
|
||||
org_component, model_full_name_component = model_id.split("/", 1)
|
||||
else:
|
||||
# model ID but missing org components
|
||||
org_component, model_full_name_component = None, model_id
|
||||
|
||||
# Check if we erroneously matched against './' or '../' etc...
|
||||
if org_component is not None and org_component[0] == '.':
|
||||
if org_component is not None and org_component[0] == ".":
|
||||
org_component = None
|
||||
|
||||
name_parts: list[str] = model_full_name_component.split('-')
|
||||
name_parts: list[str] = model_full_name_component.split("-")
|
||||
|
||||
# Remove empty parts
|
||||
for i in reversed(range(len(name_parts))):
|
||||
|
@ -191,14 +245,18 @@ def get_model_id_components(model_id: Optional[str] = None, total_params: int =
|
|||
# Annotate the name
|
||||
for i, part in enumerate(name_parts):
|
||||
# Version
|
||||
if re.fullmatch(r'(v|iter)?\d+([.]\d+)*', part, re.IGNORECASE):
|
||||
if re.fullmatch(r"(v|iter)?\d+([.]\d+)*", part, re.IGNORECASE):
|
||||
name_types[i].add("version")
|
||||
# Quant type (should not be there for base models, but still annotated)
|
||||
elif re.fullmatch(r'i?q\d(_\w)*|b?fp?(16|32)', part, re.IGNORECASE):
|
||||
elif re.fullmatch(r"i?q\d(_\w)*|b?fp?(16|32)", part, re.IGNORECASE):
|
||||
name_types[i].add("type")
|
||||
name_parts[i] = part.upper()
|
||||
# Model size
|
||||
elif i > 0 and re.fullmatch(r'(([A]|\d+[x])?\d+([._]\d+)?[KMBT][\d]?|small|mini|medium|large|x?xl)', part, re.IGNORECASE):
|
||||
elif i > 0 and re.fullmatch(
|
||||
r"(([A]|\d+[x])?\d+([._]\d+)?[KMBT][\d]?|small|mini|medium|large|x?xl)",
|
||||
part,
|
||||
re.IGNORECASE,
|
||||
):
|
||||
part = part.replace("_", ".")
|
||||
# Handle weird bloom-7b1 notation
|
||||
if part[-1].isdecimal():
|
||||
|
@ -209,14 +267,19 @@ def get_model_id_components(model_id: Optional[str] = None, total_params: int =
|
|||
part = part[:-1] + part[-1].upper()
|
||||
if total_params != 0:
|
||||
try:
|
||||
label_params = float(part[:-1]) * pow(1000, " KMBT".find(part[-1]))
|
||||
label_params = float(part[:-1]) * pow(
|
||||
1000, " KMBT".find(part[-1])
|
||||
)
|
||||
# Only use it as a size label if it's close or bigger than the model size
|
||||
# Note that LoRA adapters don't necessarily include all layers,
|
||||
# so this is why bigger label sizes are accepted.
|
||||
# Do not use the size label when it's smaller than 1/8 of the model size
|
||||
if (total_params < 0 and label_params < abs(total_params) // 8) or (
|
||||
if (
|
||||
total_params < 0 and label_params < abs(total_params) // 8
|
||||
) or (
|
||||
# Check both directions when the current model isn't a LoRA adapter
|
||||
total_params > 0 and abs(label_params - total_params) > 7 * total_params // 8
|
||||
total_params > 0
|
||||
and abs(label_params - total_params) > 7 * total_params // 8
|
||||
):
|
||||
# Likely a context length
|
||||
name_types[i].add("finetune")
|
||||
|
@ -229,7 +292,9 @@ def get_model_id_components(model_id: Optional[str] = None, total_params: int =
|
|||
name_types[i].add("size_label")
|
||||
name_parts[i] = part
|
||||
# Some easy to recognize finetune names
|
||||
elif i > 0 and re.fullmatch(r'chat|instruct|vision|lora', part, re.IGNORECASE):
|
||||
elif i > 0 and re.fullmatch(
|
||||
r"chat|instruct|vision|lora", part, re.IGNORECASE
|
||||
):
|
||||
if total_params < 0 and part.lower() == "lora":
|
||||
# ignore redundant "lora" in the finetune part when the output is a lora adapter
|
||||
name_types[i].add("type")
|
||||
|
@ -238,7 +303,12 @@ def get_model_id_components(model_id: Optional[str] = None, total_params: int =
|
|||
|
||||
# Ignore word-based size labels when there is at least a number-based one present
|
||||
# TODO: should word-based size labels always be removed instead?
|
||||
if any(c.isdecimal() for n, t in zip(name_parts, name_types) if "size_label" in t for c in n):
|
||||
if any(
|
||||
c.isdecimal()
|
||||
for n, t in zip(name_parts, name_types)
|
||||
if "size_label" in t
|
||||
for c in n
|
||||
):
|
||||
for n, t in zip(name_parts, name_types):
|
||||
if "size_label" in t:
|
||||
if all(c.isalpha() for c in n):
|
||||
|
@ -262,22 +332,55 @@ def get_model_id_components(model_id: Optional[str] = None, total_params: int =
|
|||
else:
|
||||
break
|
||||
|
||||
basename = "-".join(n for n, t in zip(name_parts, name_types) if "basename" in t) or None
|
||||
basename = (
|
||||
"-".join(n for n, t in zip(name_parts, name_types) if "basename" in t)
|
||||
or None
|
||||
)
|
||||
# Deduplicate size labels using order-preserving 'dict' ('set' seems to sort the keys)
|
||||
size_label = "-".join(dict.fromkeys(s for s, t in zip(name_parts, name_types) if "size_label" in t).keys()) or None
|
||||
finetune = "-".join(f for f, t in zip(name_parts, name_types) if "finetune" in t) or None
|
||||
size_label = (
|
||||
"-".join(
|
||||
dict.fromkeys(
|
||||
s for s, t in zip(name_parts, name_types) if "size_label" in t
|
||||
).keys()
|
||||
)
|
||||
or None
|
||||
)
|
||||
finetune = (
|
||||
"-".join(f for f, t in zip(name_parts, name_types) if "finetune" in t)
|
||||
or None
|
||||
)
|
||||
# TODO: should the basename version always be excluded?
|
||||
# NOTE: multiple finetune versions are joined together
|
||||
version = "-".join(v for v, t, in zip(name_parts, name_types) if "version" in t and "basename" not in t) or None
|
||||
version = (
|
||||
"-".join(
|
||||
v
|
||||
for v, t, in zip(name_parts, name_types)
|
||||
if "version" in t and "basename" not in t
|
||||
)
|
||||
or None
|
||||
)
|
||||
|
||||
if size_label is None and finetune is None and version is None:
|
||||
# Too ambiguous, output nothing
|
||||
basename = None
|
||||
|
||||
return model_full_name_component, org_component, basename, finetune, version, size_label
|
||||
return (
|
||||
model_full_name_component,
|
||||
org_component,
|
||||
basename,
|
||||
finetune,
|
||||
version,
|
||||
size_label,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = None, hf_params: Optional[dict] = None, model_path: Optional[Path] = None, total_params: int = 0) -> Metadata:
|
||||
def apply_metadata_heuristic(
|
||||
metadata: Metadata,
|
||||
model_card: Optional[dict] = None,
|
||||
hf_params: Optional[dict] = None,
|
||||
model_path: Optional[Path] = None,
|
||||
total_params: int = 0,
|
||||
) -> Metadata:
|
||||
# Reference Model Card Metadata: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1
|
||||
|
||||
# Model Card Heuristics
|
||||
|
@ -317,16 +420,30 @@ def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = No
|
|||
|
||||
for model_id in metadata_base_models:
|
||||
# NOTE: model size of base model is assumed to be similar to the size of the current model
|
||||
model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id, total_params)
|
||||
(
|
||||
model_full_name_component,
|
||||
org_component,
|
||||
basename,
|
||||
finetune,
|
||||
version,
|
||||
size_label,
|
||||
) = Metadata.get_model_id_components(model_id, total_params)
|
||||
base_model = {}
|
||||
if model_full_name_component is not None:
|
||||
base_model["name"] = Metadata.id_to_title(model_full_name_component)
|
||||
base_model["name"] = Metadata.id_to_title(
|
||||
model_full_name_component
|
||||
)
|
||||
if org_component is not None:
|
||||
base_model["organization"] = Metadata.id_to_title(org_component)
|
||||
if version is not None:
|
||||
base_model["version"] = version
|
||||
if org_component is not None and model_full_name_component is not None:
|
||||
base_model["repo_url"] = f"https://huggingface.co/{org_component}/{model_full_name_component}"
|
||||
if (
|
||||
org_component is not None
|
||||
and model_full_name_component is not None
|
||||
):
|
||||
base_model["repo_url"] = (
|
||||
f"https://huggingface.co/{org_component}/{model_full_name_component}"
|
||||
)
|
||||
metadata.base_models.append(base_model)
|
||||
|
||||
if "license" in model_card and metadata.license is None:
|
||||
|
@ -360,7 +477,9 @@ def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = No
|
|||
elif isinstance(pipeline_tags_value, list):
|
||||
metadata.tags.extend(pipeline_tags_value)
|
||||
|
||||
language_value = model_card.get("languages", model_card.get("language", None))
|
||||
language_value = model_card.get(
|
||||
"languages", model_card.get("language", None)
|
||||
)
|
||||
if language_value is not None:
|
||||
|
||||
if metadata.languages is None:
|
||||
|
@ -388,11 +507,18 @@ def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = No
|
|||
if hf_params is not None:
|
||||
|
||||
hf_name_or_path = hf_params.get("_name_or_path")
|
||||
if hf_name_or_path is not None and hf_name_or_path.count('/') <= 1:
|
||||
if hf_name_or_path is not None and hf_name_or_path.count("/") <= 1:
|
||||
# Use _name_or_path only if its actually a model name and not some computer path
|
||||
# e.g. 'meta-llama/Llama-2-7b-hf'
|
||||
model_id = hf_name_or_path
|
||||
model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id, total_params)
|
||||
(
|
||||
model_full_name_component,
|
||||
org_component,
|
||||
basename,
|
||||
finetune,
|
||||
version,
|
||||
size_label,
|
||||
) = Metadata.get_model_id_components(model_id, total_params)
|
||||
if metadata.name is None and model_full_name_component is not None:
|
||||
metadata.name = Metadata.id_to_title(model_full_name_component)
|
||||
if metadata.organization is None and org_component is not None:
|
||||
|
@ -410,7 +536,14 @@ def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = No
|
|||
############################################
|
||||
if model_path is not None:
|
||||
model_id = model_path.name
|
||||
model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id, total_params)
|
||||
(
|
||||
model_full_name_component,
|
||||
org_component,
|
||||
basename,
|
||||
finetune,
|
||||
version,
|
||||
size_label,
|
||||
) = Metadata.get_model_id_components(model_id, total_params)
|
||||
if metadata.name is None and model_full_name_component is not None:
|
||||
metadata.name = Metadata.id_to_title(model_full_name_component)
|
||||
if metadata.organization is None and org_component is not None:
|
||||
|
@ -485,7 +618,9 @@ def set_gguf_meta_model(self, gguf_writer: gguf.GGUFWriter):
|
|||
if "version" in base_model_entry:
|
||||
gguf_writer.add_base_model_version(key, base_model_entry["version"])
|
||||
if "organization" in base_model_entry:
|
||||
gguf_writer.add_base_model_organization(key, base_model_entry["organization"])
|
||||
gguf_writer.add_base_model_organization(
|
||||
key, base_model_entry["organization"]
|
||||
)
|
||||
if "url" in base_model_entry:
|
||||
gguf_writer.add_base_model_url(key, base_model_entry["url"])
|
||||
if "doi" in base_model_entry:
|
||||
|
@ -493,7 +628,9 @@ def set_gguf_meta_model(self, gguf_writer: gguf.GGUFWriter):
|
|||
if "uuid" in base_model_entry:
|
||||
gguf_writer.add_base_model_uuid(key, base_model_entry["uuid"])
|
||||
if "repo_url" in base_model_entry:
|
||||
gguf_writer.add_base_model_repo_url(key, base_model_entry["repo_url"])
|
||||
gguf_writer.add_base_model_repo_url(
|
||||
key, base_model_entry["repo_url"]
|
||||
)
|
||||
|
||||
if self.tags is not None:
|
||||
gguf_writer.add_tags(self.tags)
|
||||
|
|
|
@ -12,14 +12,18 @@
|
|||
def quant_shape_to_byte_shape(shape: Sequence[int], quant_type: GGMLQuantizationType):
|
||||
block_size, type_size = GGML_QUANT_SIZES[quant_type]
|
||||
if shape[-1] % block_size != 0:
|
||||
raise ValueError(f"Quantized tensor row size ({shape[-1]}) is not a multiple of {quant_type.name} block size ({block_size})")
|
||||
raise ValueError(
|
||||
f"Quantized tensor row size ({shape[-1]}) is not a multiple of {quant_type.name} block size ({block_size})"
|
||||
)
|
||||
return (*shape[:-1], shape[-1] // block_size * type_size)
|
||||
|
||||
|
||||
def quant_shape_from_byte_shape(shape: Sequence[int], quant_type: GGMLQuantizationType):
|
||||
block_size, type_size = GGML_QUANT_SIZES[quant_type]
|
||||
if shape[-1] % type_size != 0:
|
||||
raise ValueError(f"Quantized tensor bytes per row ({shape[-1]}) is not a multiple of {quant_type.name} type size ({type_size})")
|
||||
raise ValueError(
|
||||
f"Quantized tensor bytes per row ({shape[-1]}) is not a multiple of {quant_type.name} type size ({type_size})"
|
||||
)
|
||||
return (*shape[:-1], shape[-1] // type_size * block_size)
|
||||
|
||||
|
||||
|
@ -27,14 +31,23 @@ def quant_shape_from_byte_shape(shape: Sequence[int], quant_type: GGMLQuantizati
|
|||
def __compute_fp32_to_bf16(n: np.ndarray) -> np.ndarray:
|
||||
n = n.astype(np.float32, copy=False).view(np.uint32)
|
||||
# force nan to quiet
|
||||
n = np.where((n & 0x7fffffff) > 0x7f800000, (n & np.uint32(0xffff0000)) | np.uint32(64 << 16), n)
|
||||
n = np.where(
|
||||
(n & 0x7FFFFFFF) > 0x7F800000,
|
||||
(n & np.uint32(0xFFFF0000)) | np.uint32(64 << 16),
|
||||
n,
|
||||
)
|
||||
# round to nearest even
|
||||
n = (np.uint64(n) + (0x7fff + ((n >> 16) & 1))) >> 16
|
||||
n = (np.uint64(n) + (0x7FFF + ((n >> 16) & 1))) >> 16
|
||||
return n.astype(np.uint16)
|
||||
|
||||
|
||||
# This is faster than np.vectorize and np.apply_along_axis because it works on more than one row at a time
|
||||
def __apply_over_grouped_rows(func: Callable[[np.ndarray], np.ndarray], arr: np.ndarray, otype: DTypeLike, oshape: tuple[int, ...]) -> np.ndarray:
|
||||
def __apply_over_grouped_rows(
|
||||
func: Callable[[np.ndarray], np.ndarray],
|
||||
arr: np.ndarray,
|
||||
otype: DTypeLike,
|
||||
oshape: tuple[int, ...],
|
||||
) -> np.ndarray:
|
||||
rows = arr.reshape((-1, arr.shape[-1]))
|
||||
osize = 1
|
||||
for dim in oshape:
|
||||
|
@ -42,15 +55,23 @@ def __apply_over_grouped_rows(func: Callable[[np.ndarray], np.ndarray], arr: np.
|
|||
out = np.empty(shape=osize, dtype=otype)
|
||||
# compute over groups of 16 rows (arbitrary, but seems good for performance)
|
||||
n_groups = (rows.shape[0] // 16) or 1
|
||||
np.concatenate([func(group).ravel() for group in np.array_split(rows, n_groups)], axis=0, out=out)
|
||||
np.concatenate(
|
||||
[func(group).ravel() for group in np.array_split(rows, n_groups)],
|
||||
axis=0,
|
||||
out=out,
|
||||
)
|
||||
return out.reshape(oshape)
|
||||
|
||||
|
||||
def __quantize_bf16_array(n: np.ndarray) -> np.ndarray:
|
||||
return __apply_over_grouped_rows(__compute_fp32_to_bf16, arr=n, otype=np.uint16, oshape=n.shape)
|
||||
return __apply_over_grouped_rows(
|
||||
__compute_fp32_to_bf16, arr=n, otype=np.uint16, oshape=n.shape
|
||||
)
|
||||
|
||||
|
||||
__quantize_bf16_lazy = LazyNumpyTensor._wrap_fn(__quantize_bf16_array, meta_noop=np.uint16)
|
||||
__quantize_bf16_lazy = LazyNumpyTensor._wrap_fn(
|
||||
__quantize_bf16_array, meta_noop=np.uint16
|
||||
)
|
||||
|
||||
|
||||
def quantize_bf16(n: np.ndarray):
|
||||
|
@ -105,7 +126,12 @@ def __quantize_q8_0_rows(n: np.ndarray) -> np.ndarray:
|
|||
|
||||
|
||||
def __quantize_q8_0_array(n: np.ndarray) -> np.ndarray:
|
||||
return __apply_over_grouped_rows(__quantize_q8_0_rows, arr=n, otype=np.uint8, oshape=__quantize_q8_0_shape_change(n.shape))
|
||||
return __apply_over_grouped_rows(
|
||||
__quantize_q8_0_rows,
|
||||
arr=n,
|
||||
otype=np.uint8,
|
||||
oshape=__quantize_q8_0_shape_change(n.shape),
|
||||
)
|
||||
|
||||
|
||||
__quantize_q8_0_lazy = LazyNumpyTensor._wrap_fn(
|
||||
|
|
|
@ -9,74 +9,68 @@ class TensorNameMap:
|
|||
mappings_cfg: dict[MODEL_TENSOR, tuple[str, ...]] = {
|
||||
# Token embeddings
|
||||
MODEL_TENSOR.TOKEN_EMBD: (
|
||||
"gpt_neox.embed_in", # gptneox
|
||||
"transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx jais
|
||||
"transformer.word_embeddings", # falcon
|
||||
"word_embeddings", # bloom
|
||||
"model.embed_tokens", # llama-hf
|
||||
"tok_embeddings", # llama-pth
|
||||
"embeddings.word_embeddings", # bert nomic-bert
|
||||
"gpt_neox.embed_in", # gptneox
|
||||
"transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx jais
|
||||
"transformer.word_embeddings", # falcon
|
||||
"word_embeddings", # bloom
|
||||
"model.embed_tokens", # llama-hf
|
||||
"tok_embeddings", # llama-pth
|
||||
"embeddings.word_embeddings", # bert nomic-bert
|
||||
"language_model.embedding.word_embeddings", # persimmon
|
||||
"wte", # gpt2
|
||||
"transformer.embd.wte", # phi2
|
||||
"model.tok_embeddings", # internlm2
|
||||
"model.embedding", # mamba-qbert
|
||||
"backbone.embedding", # mamba
|
||||
"backbone.embeddings", # mamba-hf
|
||||
"transformer.in_out_embed", # Grok
|
||||
"embedding.word_embeddings", # chatglm
|
||||
"transformer.token_embeddings", # openelm
|
||||
"shared", # t5
|
||||
"wte", # gpt2
|
||||
"transformer.embd.wte", # phi2
|
||||
"model.tok_embeddings", # internlm2
|
||||
"model.embedding", # mamba-qbert
|
||||
"backbone.embedding", # mamba
|
||||
"backbone.embeddings", # mamba-hf
|
||||
"transformer.in_out_embed", # Grok
|
||||
"embedding.word_embeddings", # chatglm
|
||||
"transformer.token_embeddings", # openelm
|
||||
"shared", # t5
|
||||
),
|
||||
|
||||
# Token type embeddings
|
||||
MODEL_TENSOR.TOKEN_TYPES: (
|
||||
"embeddings.token_type_embeddings", # bert nomic-bert
|
||||
),
|
||||
|
||||
# Normalization of token embeddings
|
||||
MODEL_TENSOR.TOKEN_EMBD_NORM: (
|
||||
"word_embeddings_layernorm", # bloom
|
||||
"embeddings.LayerNorm", # bert
|
||||
"emb_ln", # nomic-bert
|
||||
"transformer.norm", # openelm
|
||||
"embeddings.LayerNorm", # bert
|
||||
"emb_ln", # nomic-bert
|
||||
"transformer.norm", # openelm
|
||||
),
|
||||
|
||||
# Position embeddings
|
||||
MODEL_TENSOR.POS_EMBD: (
|
||||
"transformer.wpe", # gpt2
|
||||
"transformer.wpe", # gpt2
|
||||
"embeddings.position_embeddings", # bert
|
||||
"wpe", # gpt2
|
||||
"wpe", # gpt2
|
||||
),
|
||||
|
||||
# Output
|
||||
MODEL_TENSOR.OUTPUT: (
|
||||
"embed_out", # gptneox
|
||||
"lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais
|
||||
"output", # llama-pth bloom internlm2
|
||||
"embed_out", # gptneox
|
||||
"lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais
|
||||
"output", # llama-pth bloom internlm2
|
||||
"word_embeddings_for_head", # persimmon
|
||||
"lm_head.linear", # phi2
|
||||
"output_layer", # chatglm
|
||||
"lm_head.linear", # phi2
|
||||
"output_layer", # chatglm
|
||||
),
|
||||
|
||||
# Output norm
|
||||
MODEL_TENSOR.OUTPUT_NORM: (
|
||||
"gpt_neox.final_layer_norm", # gptneox
|
||||
"transformer.ln_f", # gpt2 gpt-j falcon jais
|
||||
"model.norm", # llama-hf baichuan internlm2
|
||||
"norm", # llama-pth
|
||||
"transformer.norm_f", # mpt dbrx
|
||||
"ln_f", # refact bloom qwen gpt2
|
||||
"gpt_neox.final_layer_norm", # gptneox
|
||||
"transformer.ln_f", # gpt2 gpt-j falcon jais
|
||||
"model.norm", # llama-hf baichuan internlm2
|
||||
"norm", # llama-pth
|
||||
"transformer.norm_f", # mpt dbrx
|
||||
"ln_f", # refact bloom qwen gpt2
|
||||
"language_model.encoder.final_layernorm", # persimmon
|
||||
"model.final_layernorm", # persimmon
|
||||
"lm_head.ln", # phi2
|
||||
"model.norm_f", # mamba-qbert
|
||||
"backbone.norm_f", # mamba
|
||||
"transformer.rms_norm", # Grok
|
||||
"encoder.final_layernorm", # chatglm
|
||||
"transformer.norm", # openelm
|
||||
"model.final_layernorm", # persimmon
|
||||
"lm_head.ln", # phi2
|
||||
"model.norm_f", # mamba-qbert
|
||||
"backbone.norm_f", # mamba
|
||||
"transformer.rms_norm", # Grok
|
||||
"encoder.final_layernorm", # chatglm
|
||||
"transformer.norm", # openelm
|
||||
),
|
||||
|
||||
# Rope frequencies
|
||||
MODEL_TENSOR.ROPE_FREQS: (
|
||||
"rope.freqs", # llama-pth
|
||||
|
@ -87,501 +81,394 @@ class TensorNameMap:
|
|||
block_mappings_cfg: dict[MODEL_TENSOR, tuple[str, ...]] = {
|
||||
# Attention norm
|
||||
MODEL_TENSOR.ATTN_NORM: (
|
||||
"gpt_neox.layers.{bid}.input_layernorm", # gptneox
|
||||
"transformer.h.{bid}.ln_1", # gpt2 gpt-j refact qwen jais
|
||||
"transformer.blocks.{bid}.norm_1", # mpt
|
||||
"transformer.h.{bid}.input_layernorm", # falcon7b
|
||||
"h.{bid}.input_layernorm", # bloom
|
||||
"transformer.h.{bid}.ln_mlp", # falcon40b
|
||||
"model.layers.{bid}.input_layernorm", # llama-hf
|
||||
"layers.{bid}.attention_norm", # llama-pth
|
||||
"gpt_neox.layers.{bid}.input_layernorm", # gptneox
|
||||
"transformer.h.{bid}.ln_1", # gpt2 gpt-j refact qwen jais
|
||||
"transformer.blocks.{bid}.norm_1", # mpt
|
||||
"transformer.h.{bid}.input_layernorm", # falcon7b
|
||||
"h.{bid}.input_layernorm", # bloom
|
||||
"transformer.h.{bid}.ln_mlp", # falcon40b
|
||||
"model.layers.{bid}.input_layernorm", # llama-hf
|
||||
"layers.{bid}.attention_norm", # llama-pth
|
||||
"language_model.encoder.layers.{bid}.input_layernorm", # persimmon
|
||||
"model.layers.{bid}.ln1", # yi
|
||||
"h.{bid}.ln_1", # gpt2
|
||||
"transformer.h.{bid}.ln", # phi2
|
||||
"model.layers.layers.{bid}.norm", # plamo
|
||||
"model.layers.{bid}.attention_norm", # internlm2
|
||||
"model.layers.{bid}.norm", # mamba-qbert
|
||||
"backbone.layers.{bid}.norm", # mamba
|
||||
"transformer.decoder_layer.{bid}.rms_norm", # Grok
|
||||
"transformer.blocks.{bid}.norm_attn_norm.norm_1", # dbrx
|
||||
"encoder.layers.{bid}.input_layernorm", # chatglm
|
||||
"transformer.layers.{bid}.attn_norm", # openelm
|
||||
"model.layers.{bid}.ln1", # yi
|
||||
"h.{bid}.ln_1", # gpt2
|
||||
"transformer.h.{bid}.ln", # phi2
|
||||
"model.layers.layers.{bid}.norm", # plamo
|
||||
"model.layers.{bid}.attention_norm", # internlm2
|
||||
"model.layers.{bid}.norm", # mamba-qbert
|
||||
"backbone.layers.{bid}.norm", # mamba
|
||||
"transformer.decoder_layer.{bid}.rms_norm", # Grok
|
||||
"transformer.blocks.{bid}.norm_attn_norm.norm_1", # dbrx
|
||||
"encoder.layers.{bid}.input_layernorm", # chatglm
|
||||
"transformer.layers.{bid}.attn_norm", # openelm
|
||||
),
|
||||
|
||||
# Attention norm 2
|
||||
MODEL_TENSOR.ATTN_NORM_2: (
|
||||
"transformer.h.{bid}.ln_attn", # falcon40b
|
||||
"encoder.layer.{bid}.layer_norm_1", # jina-v2-code
|
||||
"encoder.layer.{bid}.layer_norm_1", # jina-v2-code
|
||||
),
|
||||
|
||||
# Attention query-key-value
|
||||
MODEL_TENSOR.ATTN_QKV: (
|
||||
"gpt_neox.layers.{bid}.attention.query_key_value", # gptneox
|
||||
"transformer.h.{bid}.attn.c_attn", # gpt2 qwen jais
|
||||
"transformer.blocks.{bid}.attn.Wqkv", # mpt
|
||||
"transformer.blocks.{bid}.norm_attn_norm.attn.Wqkv", # dbrx
|
||||
"transformer.h.{bid}.self_attention.query_key_value", # falcon
|
||||
"h.{bid}.self_attention.query_key_value", # bloom
|
||||
"gpt_neox.layers.{bid}.attention.query_key_value", # gptneox
|
||||
"transformer.h.{bid}.attn.c_attn", # gpt2 qwen jais
|
||||
"transformer.blocks.{bid}.attn.Wqkv", # mpt
|
||||
"transformer.blocks.{bid}.norm_attn_norm.attn.Wqkv", # dbrx
|
||||
"transformer.h.{bid}.self_attention.query_key_value", # falcon
|
||||
"h.{bid}.self_attention.query_key_value", # bloom
|
||||
"language_model.encoder.layers.{bid}.self_attention.query_key_value", # persimmon
|
||||
"model.layers.{bid}.self_attn.query_key_value", # persimmon
|
||||
"h.{bid}.attn.c_attn", # gpt2
|
||||
"transformer.h.{bid}.mixer.Wqkv", # phi2
|
||||
"encoder.layers.{bid}.attn.Wqkv", # nomic-bert
|
||||
"model.layers.{bid}.self_attn.qkv_proj", # phi3
|
||||
"encoder.layers.{bid}.self_attention.query_key_value", # chatglm
|
||||
"transformer.layers.{bid}.attn.qkv_proj", # openelm
|
||||
"model.layers.{bid}.self_attn.query_key_value", # persimmon
|
||||
"h.{bid}.attn.c_attn", # gpt2
|
||||
"transformer.h.{bid}.mixer.Wqkv", # phi2
|
||||
"encoder.layers.{bid}.attn.Wqkv", # nomic-bert
|
||||
"model.layers.{bid}.self_attn.qkv_proj", # phi3
|
||||
"encoder.layers.{bid}.self_attention.query_key_value", # chatglm
|
||||
"transformer.layers.{bid}.attn.qkv_proj", # openelm
|
||||
),
|
||||
|
||||
# Attention query
|
||||
MODEL_TENSOR.ATTN_Q: (
|
||||
"model.layers.{bid}.self_attn.q_proj", # llama-hf
|
||||
"layers.{bid}.attention.wq", # llama-pth
|
||||
"encoder.layer.{bid}.attention.self.query", # bert
|
||||
"transformer.h.{bid}.attn.q_proj", # gpt-j
|
||||
"model.layers.layers.{bid}.self_attn.q_proj", # plamo
|
||||
"model.layers.{bid}.attention.wq", # internlm2
|
||||
"transformer.decoder_layer.{bid}.multi_head_attention.query",# Grok
|
||||
"model.layers.{bid}.self_attn.q_proj", # llama-hf
|
||||
"layers.{bid}.attention.wq", # llama-pth
|
||||
"encoder.layer.{bid}.attention.self.query", # bert
|
||||
"transformer.h.{bid}.attn.q_proj", # gpt-j
|
||||
"model.layers.layers.{bid}.self_attn.q_proj", # plamo
|
||||
"model.layers.{bid}.attention.wq", # internlm2
|
||||
"transformer.decoder_layer.{bid}.multi_head_attention.query", # Grok
|
||||
),
|
||||
|
||||
# Attention key
|
||||
MODEL_TENSOR.ATTN_K: (
|
||||
"model.layers.{bid}.self_attn.k_proj", # llama-hf
|
||||
"layers.{bid}.attention.wk", # llama-pth
|
||||
"encoder.layer.{bid}.attention.self.key", # bert
|
||||
"transformer.h.{bid}.attn.k_proj", # gpt-j
|
||||
"transformer.h.{bid}.attn.k", # refact
|
||||
"model.layers.layers.{bid}.self_attn.k_proj", # plamo
|
||||
"model.layers.{bid}.attention.wk", # internlm2
|
||||
"transformer.decoder_layer.{bid}.multi_head_attention.key",# Grok
|
||||
"model.layers.{bid}.self_attn.k_proj", # llama-hf
|
||||
"layers.{bid}.attention.wk", # llama-pth
|
||||
"encoder.layer.{bid}.attention.self.key", # bert
|
||||
"transformer.h.{bid}.attn.k_proj", # gpt-j
|
||||
"transformer.h.{bid}.attn.k", # refact
|
||||
"model.layers.layers.{bid}.self_attn.k_proj", # plamo
|
||||
"model.layers.{bid}.attention.wk", # internlm2
|
||||
"transformer.decoder_layer.{bid}.multi_head_attention.key", # Grok
|
||||
),
|
||||
|
||||
# Attention value
|
||||
MODEL_TENSOR.ATTN_V: (
|
||||
"model.layers.{bid}.self_attn.v_proj", # llama-hf
|
||||
"layers.{bid}.attention.wv", # llama-pth
|
||||
"encoder.layer.{bid}.attention.self.value", # bert
|
||||
"transformer.h.{bid}.attn.v_proj", # gpt-j
|
||||
"transformer.h.{bid}.attn.v", # refact
|
||||
"model.layers.layers.{bid}.self_attn.v_proj", # plamo
|
||||
"model.layers.{bid}.attention.wv", # internlm2
|
||||
"transformer.decoder_layer.{bid}.multi_head_attention.value" # Grok
|
||||
"model.layers.{bid}.self_attn.v_proj", # llama-hf
|
||||
"layers.{bid}.attention.wv", # llama-pth
|
||||
"encoder.layer.{bid}.attention.self.value", # bert
|
||||
"transformer.h.{bid}.attn.v_proj", # gpt-j
|
||||
"transformer.h.{bid}.attn.v", # refact
|
||||
"model.layers.layers.{bid}.self_attn.v_proj", # plamo
|
||||
"model.layers.{bid}.attention.wv", # internlm2
|
||||
"transformer.decoder_layer.{bid}.multi_head_attention.value", # Grok
|
||||
),
|
||||
|
||||
# Attention output
|
||||
MODEL_TENSOR.ATTN_OUT: (
|
||||
"gpt_neox.layers.{bid}.attention.dense", # gptneox
|
||||
"transformer.h.{bid}.attn.c_proj", # gpt2 refact qwen jais
|
||||
"transformer.blocks.{bid}.attn.out_proj", # mpt
|
||||
"transformer.h.{bid}.self_attention.dense", # falcon
|
||||
"h.{bid}.self_attention.dense", # bloom
|
||||
"model.layers.{bid}.self_attn.o_proj", # llama-hf
|
||||
"layers.{bid}.attention.wo", # llama-pth
|
||||
"encoder.layer.{bid}.attention.output.dense", # bert
|
||||
"transformer.h.{bid}.attn.out_proj", # gpt-j
|
||||
"language_model.encoder.layers.{bid}.self_attention.dense", # persimmon
|
||||
"model.layers.{bid}.self_attn.dense", # persimmon
|
||||
"h.{bid}.attn.c_proj", # gpt2
|
||||
"transformer.h.{bid}.mixer.out_proj", # phi2
|
||||
"model.layers.layers.{bid}.self_attn.o_proj", # plamo
|
||||
"model.layers.{bid}.attention.wo", # internlm2
|
||||
"encoder.layers.{bid}.attn.out_proj", # nomic-bert
|
||||
"gpt_neox.layers.{bid}.attention.dense", # gptneox
|
||||
"transformer.h.{bid}.attn.c_proj", # gpt2 refact qwen jais
|
||||
"transformer.blocks.{bid}.attn.out_proj", # mpt
|
||||
"transformer.h.{bid}.self_attention.dense", # falcon
|
||||
"h.{bid}.self_attention.dense", # bloom
|
||||
"model.layers.{bid}.self_attn.o_proj", # llama-hf
|
||||
"layers.{bid}.attention.wo", # llama-pth
|
||||
"encoder.layer.{bid}.attention.output.dense", # bert
|
||||
"transformer.h.{bid}.attn.out_proj", # gpt-j
|
||||
"language_model.encoder.layers.{bid}.self_attention.dense", # persimmon
|
||||
"model.layers.{bid}.self_attn.dense", # persimmon
|
||||
"h.{bid}.attn.c_proj", # gpt2
|
||||
"transformer.h.{bid}.mixer.out_proj", # phi2
|
||||
"model.layers.layers.{bid}.self_attn.o_proj", # plamo
|
||||
"model.layers.{bid}.attention.wo", # internlm2
|
||||
"encoder.layers.{bid}.attn.out_proj", # nomic-bert
|
||||
"transformer.decoder_layer.{bid}.multi_head_attention.linear", # Grok
|
||||
"transformer.blocks.{bid}.norm_attn_norm.attn.out_proj", # dbrx
|
||||
"encoder.layers.{bid}.self_attention.dense", # chatglm
|
||||
"transformer.layers.{bid}.attn.out_proj", # openelm
|
||||
"transformer.blocks.{bid}.norm_attn_norm.attn.out_proj", # dbrx
|
||||
"encoder.layers.{bid}.self_attention.dense", # chatglm
|
||||
"transformer.layers.{bid}.attn.out_proj", # openelm
|
||||
),
|
||||
|
||||
# Attention output norm
|
||||
MODEL_TENSOR.ATTN_OUT_NORM: (
|
||||
"encoder.layer.{bid}.attention.output.LayerNorm", # bert
|
||||
"encoder.layers.{bid}.norm1", # nomic-bert
|
||||
"transformer.decoder_layer.{bid}.rms_norm_1", # Grok
|
||||
"encoder.layers.{bid}.norm1", # nomic-bert
|
||||
"transformer.decoder_layer.{bid}.rms_norm_1", # Grok
|
||||
"transformer.blocks.{bid}.norm_attn_norm.norm_2", # dbrx
|
||||
),
|
||||
|
||||
MODEL_TENSOR.ATTN_POST_NORM: (
|
||||
"model.layers.{bid}.post_attention_layernorm", # gemma2
|
||||
"model.layers.{bid}.post_attention_layernorm", # gemma2
|
||||
),
|
||||
|
||||
# Rotary embeddings
|
||||
MODEL_TENSOR.ATTN_ROT_EMBD: (
|
||||
"model.layers.{bid}.self_attn.rotary_emb.inv_freq", # llama-hf
|
||||
"layers.{bid}.attention.inner_attention.rope.freqs", # llama-pth
|
||||
"model.layers.layers.{bid}.self_attn.rotary_emb.inv_freq", # plamo
|
||||
"transformer.h.{bid}.attn.rotary_emb.inv_freq", # codeshell
|
||||
"model.layers.{bid}.self_attn.rotary_emb.inv_freq", # llama-hf
|
||||
"layers.{bid}.attention.inner_attention.rope.freqs", # llama-pth
|
||||
"model.layers.layers.{bid}.self_attn.rotary_emb.inv_freq", # plamo
|
||||
"transformer.h.{bid}.attn.rotary_emb.inv_freq", # codeshell
|
||||
),
|
||||
|
||||
# Feed-forward norm
|
||||
MODEL_TENSOR.FFN_NORM: (
|
||||
"gpt_neox.layers.{bid}.post_attention_layernorm", # gptneox
|
||||
"transformer.h.{bid}.ln_2", # gpt2 refact qwen jais
|
||||
"h.{bid}.post_attention_layernorm", # bloom
|
||||
"transformer.blocks.{bid}.norm_2", # mpt
|
||||
"model.layers.{bid}.post_attention_layernorm", # llama-hf
|
||||
"layers.{bid}.ffn_norm", # llama-pth
|
||||
"gpt_neox.layers.{bid}.post_attention_layernorm", # gptneox
|
||||
"transformer.h.{bid}.ln_2", # gpt2 refact qwen jais
|
||||
"h.{bid}.post_attention_layernorm", # bloom
|
||||
"transformer.blocks.{bid}.norm_2", # mpt
|
||||
"model.layers.{bid}.post_attention_layernorm", # llama-hf
|
||||
"layers.{bid}.ffn_norm", # llama-pth
|
||||
"language_model.encoder.layers.{bid}.post_attention_layernorm", # persimmon
|
||||
"model.layers.{bid}.ln2", # yi
|
||||
"h.{bid}.ln_2", # gpt2
|
||||
"model.layers.{bid}.ffn_norm", # internlm2
|
||||
"transformer.decoder_layer.{bid}.rms_norm_2", # Grok
|
||||
"encoder.layers.{bid}.post_attention_layernorm", # chatglm
|
||||
"transformer.layers.{bid}.ffn_norm", # openelm
|
||||
"model.layers.{bid}.ln2", # yi
|
||||
"h.{bid}.ln_2", # gpt2
|
||||
"model.layers.{bid}.ffn_norm", # internlm2
|
||||
"transformer.decoder_layer.{bid}.rms_norm_2", # Grok
|
||||
"encoder.layers.{bid}.post_attention_layernorm", # chatglm
|
||||
"transformer.layers.{bid}.ffn_norm", # openelm
|
||||
),
|
||||
|
||||
# Post feed-forward norm
|
||||
MODEL_TENSOR.FFN_PRE_NORM: (
|
||||
"model.layers.{bid}.pre_feedforward_layernorm", # gemma2
|
||||
"model.layers.{bid}.pre_feedforward_layernorm", # gemma2
|
||||
),
|
||||
|
||||
# Post feed-forward norm
|
||||
MODEL_TENSOR.FFN_POST_NORM: (
|
||||
"model.layers.{bid}.post_feedforward_layernorm", # gemma2
|
||||
"model.layers.{bid}.post_feedforward_layernorm", # gemma2
|
||||
),
|
||||
|
||||
MODEL_TENSOR.FFN_GATE_INP: (
|
||||
"layers.{bid}.feed_forward.gate", # mixtral
|
||||
"model.layers.{bid}.block_sparse_moe.gate", # mixtral
|
||||
"model.layers.{bid}.mlp.gate", # qwen2moe
|
||||
"transformer.decoder_layer.{bid}.router", # Grok
|
||||
"layers.{bid}.feed_forward.gate", # mixtral
|
||||
"model.layers.{bid}.block_sparse_moe.gate", # mixtral
|
||||
"model.layers.{bid}.mlp.gate", # qwen2moe
|
||||
"transformer.decoder_layer.{bid}.router", # Grok
|
||||
"transformer.blocks.{bid}.ffn.router.layer", # dbrx
|
||||
),
|
||||
|
||||
MODEL_TENSOR.FFN_GATE_INP_SHEXP: (
|
||||
"model.layers.{bid}.mlp.shared_expert_gate", # qwen2moe
|
||||
"model.layers.{bid}.mlp.shared_expert_gate", # qwen2moe
|
||||
),
|
||||
|
||||
# Feed-forward up
|
||||
MODEL_TENSOR.FFN_UP: (
|
||||
"gpt_neox.layers.{bid}.mlp.dense_h_to_4h", # gptneox
|
||||
"transformer.h.{bid}.mlp.c_fc", # gpt2 jais
|
||||
"transformer.blocks.{bid}.ffn.up_proj", # mpt
|
||||
"transformer.h.{bid}.mlp.dense_h_to_4h", # falcon
|
||||
"h.{bid}.mlp.dense_h_to_4h", # bloom
|
||||
"model.layers.{bid}.mlp.up_proj", # llama-hf refact
|
||||
"layers.{bid}.feed_forward.w3", # llama-pth
|
||||
"encoder.layer.{bid}.intermediate.dense", # bert
|
||||
"transformer.h.{bid}.mlp.fc_in", # gpt-j
|
||||
"transformer.h.{bid}.mlp.linear_3", # refact
|
||||
"gpt_neox.layers.{bid}.mlp.dense_h_to_4h", # gptneox
|
||||
"transformer.h.{bid}.mlp.c_fc", # gpt2 jais
|
||||
"transformer.blocks.{bid}.ffn.up_proj", # mpt
|
||||
"transformer.h.{bid}.mlp.dense_h_to_4h", # falcon
|
||||
"h.{bid}.mlp.dense_h_to_4h", # bloom
|
||||
"model.layers.{bid}.mlp.up_proj", # llama-hf refact
|
||||
"layers.{bid}.feed_forward.w3", # llama-pth
|
||||
"encoder.layer.{bid}.intermediate.dense", # bert
|
||||
"transformer.h.{bid}.mlp.fc_in", # gpt-j
|
||||
"transformer.h.{bid}.mlp.linear_3", # refact
|
||||
"language_model.encoder.layers.{bid}.mlp.dense_h_to_4h", # persimmon
|
||||
"model.layers.{bid}.mlp.dense_h_to_4h", # persimmon
|
||||
"transformer.h.{bid}.mlp.w1", # qwen
|
||||
"h.{bid}.mlp.c_fc", # gpt2
|
||||
"transformer.h.{bid}.mlp.fc1", # phi2
|
||||
"model.layers.{bid}.mlp.fc1", # phi2
|
||||
"model.layers.{bid}.mlp.gate_up_proj", # phi3
|
||||
"model.layers.layers.{bid}.mlp.up_proj", # plamo
|
||||
"model.layers.{bid}.feed_forward.w3", # internlm2
|
||||
"encoder.layers.{bid}.mlp.fc11", # nomic-bert
|
||||
"model.layers.{bid}.mlp.c_fc", # starcoder2
|
||||
"encoder.layer.{bid}.mlp.gated_layers_v", # jina-bert-v2
|
||||
"model.layers.{bid}.residual_mlp.w3", # arctic
|
||||
"encoder.layers.{bid}.mlp.dense_h_to_4h", # chatglm
|
||||
"model.layers.{bid}.mlp.dense_h_to_4h", # persimmon
|
||||
"transformer.h.{bid}.mlp.w1", # qwen
|
||||
"h.{bid}.mlp.c_fc", # gpt2
|
||||
"transformer.h.{bid}.mlp.fc1", # phi2
|
||||
"model.layers.{bid}.mlp.fc1", # phi2
|
||||
"model.layers.{bid}.mlp.gate_up_proj", # phi3
|
||||
"model.layers.layers.{bid}.mlp.up_proj", # plamo
|
||||
"model.layers.{bid}.feed_forward.w3", # internlm2
|
||||
"encoder.layers.{bid}.mlp.fc11", # nomic-bert
|
||||
"model.layers.{bid}.mlp.c_fc", # starcoder2
|
||||
"encoder.layer.{bid}.mlp.gated_layers_v", # jina-bert-v2
|
||||
"model.layers.{bid}.residual_mlp.w3", # arctic
|
||||
"encoder.layers.{bid}.mlp.dense_h_to_4h", # chatglm
|
||||
),
|
||||
|
||||
MODEL_TENSOR.FFN_UP_EXP: (
|
||||
"layers.{bid}.feed_forward.experts.w3", # mixtral (merged)
|
||||
"layers.{bid}.feed_forward.experts.w3", # mixtral (merged)
|
||||
"transformer.decoder_layer.{bid}.moe.linear_v", # Grok (merged)
|
||||
"transformer.blocks.{bid}.ffn.experts.mlp.v1", # dbrx
|
||||
"model.layers.{bid}.mlp.experts.up_proj", # qwen2moe (merged)
|
||||
"transformer.blocks.{bid}.ffn.experts.mlp.v1", # dbrx
|
||||
"model.layers.{bid}.mlp.experts.up_proj", # qwen2moe (merged)
|
||||
),
|
||||
|
||||
MODEL_TENSOR.FFN_UP_SHEXP: (
|
||||
"model.layers.{bid}.mlp.shared_expert.up_proj", # qwen2moe
|
||||
"model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek2
|
||||
"model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek2
|
||||
),
|
||||
|
||||
# AWQ-activation gate
|
||||
MODEL_TENSOR.FFN_ACT: (
|
||||
"transformer.blocks.{bid}.ffn.act", # mpt
|
||||
),
|
||||
|
||||
MODEL_TENSOR.FFN_ACT: ("transformer.blocks.{bid}.ffn.act",), # mpt
|
||||
# Feed-forward gate
|
||||
MODEL_TENSOR.FFN_GATE: (
|
||||
"model.layers.{bid}.mlp.gate_proj", # llama-hf refact
|
||||
"layers.{bid}.feed_forward.w1", # llama-pth
|
||||
"transformer.h.{bid}.mlp.w2", # qwen
|
||||
"transformer.h.{bid}.mlp.c_fc2", # jais
|
||||
"model.layers.layers.{bid}.mlp.gate_proj", # plamo
|
||||
"model.layers.{bid}.feed_forward.w1", # internlm2
|
||||
"encoder.layers.{bid}.mlp.fc12", # nomic-bert
|
||||
"encoder.layer.{bid}.mlp.gated_layers_w", # jina-bert-v2
|
||||
"transformer.h.{bid}.mlp.linear_1", # refact
|
||||
"model.layers.{bid}.residual_mlp.w1", # arctic
|
||||
"model.layers.{bid}.mlp.gate_proj", # llama-hf refact
|
||||
"layers.{bid}.feed_forward.w1", # llama-pth
|
||||
"transformer.h.{bid}.mlp.w2", # qwen
|
||||
"transformer.h.{bid}.mlp.c_fc2", # jais
|
||||
"model.layers.layers.{bid}.mlp.gate_proj", # plamo
|
||||
"model.layers.{bid}.feed_forward.w1", # internlm2
|
||||
"encoder.layers.{bid}.mlp.fc12", # nomic-bert
|
||||
"encoder.layer.{bid}.mlp.gated_layers_w", # jina-bert-v2
|
||||
"transformer.h.{bid}.mlp.linear_1", # refact
|
||||
"model.layers.{bid}.residual_mlp.w1", # arctic
|
||||
),
|
||||
|
||||
MODEL_TENSOR.FFN_GATE_EXP: (
|
||||
"layers.{bid}.feed_forward.experts.w1", # mixtral (merged)
|
||||
"transformer.decoder_layer.{bid}.moe.linear", # Grok (merged)
|
||||
"layers.{bid}.feed_forward.experts.w1", # mixtral (merged)
|
||||
"transformer.decoder_layer.{bid}.moe.linear", # Grok (merged)
|
||||
"transformer.blocks.{bid}.ffn.experts.mlp.w1", # dbrx
|
||||
"model.layers.{bid}.mlp.experts.gate_proj", # qwen2moe (merged)
|
||||
"model.layers.{bid}.mlp.experts.gate_proj", # qwen2moe (merged)
|
||||
),
|
||||
|
||||
MODEL_TENSOR.FFN_GATE_SHEXP: (
|
||||
"model.layers.{bid}.mlp.shared_expert.gate_proj", # qwen2moe
|
||||
"model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek2
|
||||
"model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek2
|
||||
),
|
||||
|
||||
# Feed-forward down
|
||||
MODEL_TENSOR.FFN_DOWN: (
|
||||
"gpt_neox.layers.{bid}.mlp.dense_4h_to_h", # gptneox
|
||||
"transformer.h.{bid}.mlp.c_proj", # gpt2 refact qwen jais
|
||||
"transformer.blocks.{bid}.ffn.down_proj", # mpt
|
||||
"transformer.h.{bid}.mlp.dense_4h_to_h", # falcon
|
||||
"h.{bid}.mlp.dense_4h_to_h", # bloom
|
||||
"model.layers.{bid}.mlp.down_proj", # llama-hf
|
||||
"layers.{bid}.feed_forward.w2", # llama-pth
|
||||
"encoder.layer.{bid}.output.dense", # bert
|
||||
"transformer.h.{bid}.mlp.fc_out", # gpt-j
|
||||
"gpt_neox.layers.{bid}.mlp.dense_4h_to_h", # gptneox
|
||||
"transformer.h.{bid}.mlp.c_proj", # gpt2 refact qwen jais
|
||||
"transformer.blocks.{bid}.ffn.down_proj", # mpt
|
||||
"transformer.h.{bid}.mlp.dense_4h_to_h", # falcon
|
||||
"h.{bid}.mlp.dense_4h_to_h", # bloom
|
||||
"model.layers.{bid}.mlp.down_proj", # llama-hf
|
||||
"layers.{bid}.feed_forward.w2", # llama-pth
|
||||
"encoder.layer.{bid}.output.dense", # bert
|
||||
"transformer.h.{bid}.mlp.fc_out", # gpt-j
|
||||
"language_model.encoder.layers.{bid}.mlp.dense_4h_to_h", # persimmon
|
||||
"model.layers.{bid}.mlp.dense_4h_to_h", # persimmon
|
||||
"h.{bid}.mlp.c_proj", # gpt2
|
||||
"transformer.h.{bid}.mlp.fc2", # phi2
|
||||
"model.layers.{bid}.mlp.fc2", # phi2
|
||||
"model.layers.layers.{bid}.mlp.down_proj", # plamo
|
||||
"model.layers.{bid}.feed_forward.w2", # internlm2
|
||||
"encoder.layers.{bid}.mlp.fc2", # nomic-bert
|
||||
"model.layers.{bid}.mlp.c_proj", # starcoder2
|
||||
"encoder.layer.{bid}.mlp.wo", # jina-bert-v2
|
||||
"transformer.layers.{bid}.ffn.proj_2", # openelm
|
||||
"model.layers.{bid}.residual_mlp.w2", # arctic
|
||||
"encoder.layer.{bid}.mlp.down_layer", # jina-bert-v2
|
||||
"encoder.layers.{bid}.mlp.dense_4h_to_h", # chatglm
|
||||
"model.layers.{bid}.mlp.dense_4h_to_h", # persimmon
|
||||
"h.{bid}.mlp.c_proj", # gpt2
|
||||
"transformer.h.{bid}.mlp.fc2", # phi2
|
||||
"model.layers.{bid}.mlp.fc2", # phi2
|
||||
"model.layers.layers.{bid}.mlp.down_proj", # plamo
|
||||
"model.layers.{bid}.feed_forward.w2", # internlm2
|
||||
"encoder.layers.{bid}.mlp.fc2", # nomic-bert
|
||||
"model.layers.{bid}.mlp.c_proj", # starcoder2
|
||||
"encoder.layer.{bid}.mlp.wo", # jina-bert-v2
|
||||
"transformer.layers.{bid}.ffn.proj_2", # openelm
|
||||
"model.layers.{bid}.residual_mlp.w2", # arctic
|
||||
"encoder.layer.{bid}.mlp.down_layer", # jina-bert-v2
|
||||
"encoder.layers.{bid}.mlp.dense_4h_to_h", # chatglm
|
||||
),
|
||||
|
||||
MODEL_TENSOR.FFN_DOWN_EXP: (
|
||||
"layers.{bid}.feed_forward.experts.w2", # mixtral (merged)
|
||||
"layers.{bid}.feed_forward.experts.w2", # mixtral (merged)
|
||||
"transformer.decoder_layer.{bid}.moe.linear_1", # Grok (merged)
|
||||
"transformer.blocks.{bid}.ffn.experts.mlp.w2", # dbrx
|
||||
"model.layers.{bid}.mlp.experts.down_proj", # qwen2moe (merged)
|
||||
"transformer.blocks.{bid}.ffn.experts.mlp.w2", # dbrx
|
||||
"model.layers.{bid}.mlp.experts.down_proj", # qwen2moe (merged)
|
||||
),
|
||||
|
||||
MODEL_TENSOR.FFN_DOWN_SHEXP: (
|
||||
"model.layers.{bid}.mlp.shared_expert.down_proj", # qwen2moe
|
||||
"model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek2
|
||||
"model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek2
|
||||
),
|
||||
|
||||
MODEL_TENSOR.ATTN_Q_NORM: (
|
||||
"language_model.encoder.layers.{bid}.self_attention.q_layernorm",
|
||||
"model.layers.{bid}.self_attn.q_layernorm", # persimmon
|
||||
"model.layers.{bid}.self_attn.q_norm", # cohere
|
||||
"transformer.blocks.{bid}.attn.q_ln", # sea-lion
|
||||
"encoder.layer.{bid}.attention.self.layer_norm_q", # jina-bert-v2
|
||||
"transformer.layers.{bid}.attn.q_norm", # openelm
|
||||
"model.layers.{bid}.self_attn.q_layernorm", # persimmon
|
||||
"model.layers.{bid}.self_attn.q_norm", # cohere
|
||||
"transformer.blocks.{bid}.attn.q_ln", # sea-lion
|
||||
"encoder.layer.{bid}.attention.self.layer_norm_q", # jina-bert-v2
|
||||
"transformer.layers.{bid}.attn.q_norm", # openelm
|
||||
),
|
||||
|
||||
MODEL_TENSOR.ATTN_K_NORM: (
|
||||
"language_model.encoder.layers.{bid}.self_attention.k_layernorm",
|
||||
"model.layers.{bid}.self_attn.k_layernorm", # persimmon
|
||||
"model.layers.{bid}.self_attn.k_norm", # cohere
|
||||
"transformer.blocks.{bid}.attn.k_ln", # sea-lion
|
||||
"encoder.layer.{bid}.attention.self.layer_norm_k", # jina-bert-v2
|
||||
"transformer.layers.{bid}.attn.k_norm", # openelm
|
||||
"model.layers.{bid}.self_attn.k_layernorm", # persimmon
|
||||
"model.layers.{bid}.self_attn.k_norm", # cohere
|
||||
"transformer.blocks.{bid}.attn.k_ln", # sea-lion
|
||||
"encoder.layer.{bid}.attention.self.layer_norm_k", # jina-bert-v2
|
||||
"transformer.layers.{bid}.attn.k_norm", # openelm
|
||||
),
|
||||
|
||||
MODEL_TENSOR.ROPE_FREQS: (
|
||||
"language_model.encoder.layers.{bid}.self_attention.rotary_emb.inv_freq", # persimmon
|
||||
),
|
||||
|
||||
MODEL_TENSOR.LAYER_OUT_NORM: (
|
||||
"encoder.layer.{bid}.output.LayerNorm", # bert
|
||||
"encoder.layers.{bid}.norm2", # nomic-bert
|
||||
"transformer.decoder_layer.{bid}.rms_norm_3", # Grok
|
||||
"encoder.layer.{bid}.mlp.layernorm", # jina-bert-v2
|
||||
"encoder.layer.{bid}.layer_norm_2" # jina-v2-code
|
||||
"encoder.layer.{bid}.output.LayerNorm", # bert
|
||||
"encoder.layers.{bid}.norm2", # nomic-bert
|
||||
"transformer.decoder_layer.{bid}.rms_norm_3", # Grok
|
||||
"encoder.layer.{bid}.mlp.layernorm", # jina-bert-v2
|
||||
"encoder.layer.{bid}.layer_norm_2", # jina-v2-code
|
||||
),
|
||||
|
||||
MODEL_TENSOR.SSM_IN: (
|
||||
"model.layers.{bid}.in_proj",
|
||||
"backbone.layers.{bid}.mixer.in_proj",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.SSM_CONV1D: (
|
||||
"model.layers.{bid}.conv1d",
|
||||
"backbone.layers.{bid}.mixer.conv1d",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.SSM_X: (
|
||||
"model.layers.{bid}.x_proj",
|
||||
"backbone.layers.{bid}.mixer.x_proj",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.SSM_DT: (
|
||||
"model.layers.{bid}.dt_proj",
|
||||
"backbone.layers.{bid}.mixer.dt_proj",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.SSM_A: (
|
||||
"model.layers.{bid}.A_log",
|
||||
"backbone.layers.{bid}.mixer.A_log",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.SSM_D: (
|
||||
"model.layers.{bid}.D",
|
||||
"backbone.layers.{bid}.mixer.D",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.SSM_OUT: (
|
||||
"model.layers.{bid}.out_proj",
|
||||
"backbone.layers.{bid}.mixer.out_proj",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.ATTN_Q_A: (
|
||||
"model.layers.{bid}.self_attn.q_a_proj", # deepseek2
|
||||
),
|
||||
|
||||
MODEL_TENSOR.ATTN_Q_B: (
|
||||
"model.layers.{bid}.self_attn.q_b_proj", # deepseek2
|
||||
),
|
||||
|
||||
MODEL_TENSOR.ATTN_Q_A: ("model.layers.{bid}.self_attn.q_a_proj",), # deepseek2
|
||||
MODEL_TENSOR.ATTN_Q_B: ("model.layers.{bid}.self_attn.q_b_proj",), # deepseek2
|
||||
MODEL_TENSOR.ATTN_KV_A_MQA: (
|
||||
"model.layers.{bid}.self_attn.kv_a_proj_with_mqa", # deepseek2
|
||||
"model.layers.{bid}.self_attn.kv_a_proj_with_mqa", # deepseek2
|
||||
),
|
||||
|
||||
MODEL_TENSOR.ATTN_KV_B: (
|
||||
"model.layers.{bid}.self_attn.kv_b_proj", # deepseek2
|
||||
"model.layers.{bid}.self_attn.kv_b_proj", # deepseek2
|
||||
),
|
||||
|
||||
MODEL_TENSOR.ATTN_Q_A_NORM: (
|
||||
"model.layers.{bid}.self_attn.q_a_layernorm", # deepseek2
|
||||
"model.layers.{bid}.self_attn.q_a_layernorm", # deepseek2
|
||||
),
|
||||
|
||||
MODEL_TENSOR.ATTN_KV_A_NORM: (
|
||||
"model.layers.{bid}.self_attn.kv_a_layernorm", # deepseek2
|
||||
"model.layers.{bid}.self_attn.kv_a_layernorm", # deepseek2
|
||||
),
|
||||
|
||||
MODEL_TENSOR.ATTN_SUB_NORM: (
|
||||
"model.layers.{bid}.self_attn.inner_attn_ln", # bitnet
|
||||
),
|
||||
|
||||
MODEL_TENSOR.FFN_SUB_NORM: (
|
||||
"model.layers.{bid}.mlp.ffn_layernorm", # bitnet
|
||||
),
|
||||
|
||||
MODEL_TENSOR.DEC_ATTN_NORM: (
|
||||
"decoder.block.{bid}.layer.0.layer_norm", # t5
|
||||
),
|
||||
|
||||
MODEL_TENSOR.DEC_ATTN_Q: (
|
||||
"decoder.block.{bid}.layer.0.SelfAttention.q", # t5
|
||||
),
|
||||
|
||||
MODEL_TENSOR.DEC_ATTN_K: (
|
||||
"decoder.block.{bid}.layer.0.SelfAttention.k", # t5
|
||||
),
|
||||
|
||||
MODEL_TENSOR.DEC_ATTN_V: (
|
||||
"decoder.block.{bid}.layer.0.SelfAttention.v", # t5
|
||||
),
|
||||
|
||||
MODEL_TENSOR.FFN_SUB_NORM: ("model.layers.{bid}.mlp.ffn_layernorm",), # bitnet
|
||||
MODEL_TENSOR.DEC_ATTN_NORM: ("decoder.block.{bid}.layer.0.layer_norm",), # t5
|
||||
MODEL_TENSOR.DEC_ATTN_Q: ("decoder.block.{bid}.layer.0.SelfAttention.q",), # t5
|
||||
MODEL_TENSOR.DEC_ATTN_K: ("decoder.block.{bid}.layer.0.SelfAttention.k",), # t5
|
||||
MODEL_TENSOR.DEC_ATTN_V: ("decoder.block.{bid}.layer.0.SelfAttention.v",), # t5
|
||||
MODEL_TENSOR.DEC_ATTN_OUT: (
|
||||
"decoder.block.{bid}.layer.0.SelfAttention.o", # t5
|
||||
"decoder.block.{bid}.layer.0.SelfAttention.o", # t5
|
||||
),
|
||||
|
||||
MODEL_TENSOR.DEC_ATTN_REL_B: (
|
||||
"decoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias", # t5
|
||||
"decoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias", # t5
|
||||
),
|
||||
|
||||
MODEL_TENSOR.DEC_CROSS_ATTN_NORM: (
|
||||
"decoder.block.{bid}.layer.1.layer_norm", # t5
|
||||
"decoder.block.{bid}.layer.1.layer_norm", # t5
|
||||
),
|
||||
|
||||
MODEL_TENSOR.DEC_CROSS_ATTN_Q: (
|
||||
"decoder.block.{bid}.layer.1.EncDecAttention.q", # t5
|
||||
"decoder.block.{bid}.layer.1.EncDecAttention.q", # t5
|
||||
),
|
||||
|
||||
MODEL_TENSOR.DEC_CROSS_ATTN_K: (
|
||||
"decoder.block.{bid}.layer.1.EncDecAttention.k", # t5
|
||||
"decoder.block.{bid}.layer.1.EncDecAttention.k", # t5
|
||||
),
|
||||
|
||||
MODEL_TENSOR.DEC_CROSS_ATTN_V: (
|
||||
"decoder.block.{bid}.layer.1.EncDecAttention.v", # t5
|
||||
"decoder.block.{bid}.layer.1.EncDecAttention.v", # t5
|
||||
),
|
||||
|
||||
MODEL_TENSOR.DEC_CROSS_ATTN_OUT: (
|
||||
"decoder.block.{bid}.layer.1.EncDecAttention.o", # t5
|
||||
"decoder.block.{bid}.layer.1.EncDecAttention.o", # t5
|
||||
),
|
||||
|
||||
MODEL_TENSOR.DEC_CROSS_ATTN_REL_B: (
|
||||
"decoder.block.{bid}.layer.1.EncDecAttention.relative_attention_bias", # t5
|
||||
"decoder.block.{bid}.layer.1.EncDecAttention.relative_attention_bias", # t5
|
||||
),
|
||||
|
||||
MODEL_TENSOR.DEC_FFN_NORM: (
|
||||
"decoder.block.{bid}.layer.2.layer_norm", # t5
|
||||
),
|
||||
|
||||
MODEL_TENSOR.DEC_FFN_NORM: ("decoder.block.{bid}.layer.2.layer_norm",), # t5
|
||||
MODEL_TENSOR.DEC_FFN_GATE: (
|
||||
"decoder.block.{bid}.layer.2.DenseReluDense.wi_0", # flan-t5
|
||||
"decoder.block.{bid}.layer.2.DenseReluDense.wi_0", # flan-t5
|
||||
),
|
||||
|
||||
MODEL_TENSOR.DEC_FFN_UP: (
|
||||
"decoder.block.{bid}.layer.2.DenseReluDense.wi", # t5
|
||||
"decoder.block.{bid}.layer.2.DenseReluDense.wi_1", # flan-t5
|
||||
"decoder.block.{bid}.layer.2.DenseReluDense.wi", # t5
|
||||
"decoder.block.{bid}.layer.2.DenseReluDense.wi_1", # flan-t5
|
||||
),
|
||||
|
||||
MODEL_TENSOR.DEC_FFN_DOWN: (
|
||||
"decoder.block.{bid}.layer.2.DenseReluDense.wo", # t5
|
||||
"decoder.block.{bid}.layer.2.DenseReluDense.wo", # t5
|
||||
),
|
||||
|
||||
MODEL_TENSOR.DEC_OUTPUT_NORM: (
|
||||
"decoder.final_layer_norm", # t5
|
||||
),
|
||||
|
||||
MODEL_TENSOR.ENC_ATTN_NORM: (
|
||||
"encoder.block.{bid}.layer.0.layer_norm", # t5
|
||||
),
|
||||
|
||||
MODEL_TENSOR.ENC_ATTN_Q: (
|
||||
"encoder.block.{bid}.layer.0.SelfAttention.q", # t5
|
||||
),
|
||||
|
||||
MODEL_TENSOR.ENC_ATTN_K: (
|
||||
"encoder.block.{bid}.layer.0.SelfAttention.k", # t5
|
||||
),
|
||||
|
||||
MODEL_TENSOR.ENC_ATTN_V: (
|
||||
"encoder.block.{bid}.layer.0.SelfAttention.v", # t5
|
||||
),
|
||||
|
||||
MODEL_TENSOR.DEC_OUTPUT_NORM: ("decoder.final_layer_norm",), # t5
|
||||
MODEL_TENSOR.ENC_ATTN_NORM: ("encoder.block.{bid}.layer.0.layer_norm",), # t5
|
||||
MODEL_TENSOR.ENC_ATTN_Q: ("encoder.block.{bid}.layer.0.SelfAttention.q",), # t5
|
||||
MODEL_TENSOR.ENC_ATTN_K: ("encoder.block.{bid}.layer.0.SelfAttention.k",), # t5
|
||||
MODEL_TENSOR.ENC_ATTN_V: ("encoder.block.{bid}.layer.0.SelfAttention.v",), # t5
|
||||
MODEL_TENSOR.ENC_ATTN_OUT: (
|
||||
"encoder.block.{bid}.layer.0.SelfAttention.o", # t5
|
||||
"encoder.block.{bid}.layer.0.SelfAttention.o", # t5
|
||||
),
|
||||
|
||||
MODEL_TENSOR.ENC_ATTN_REL_B: (
|
||||
"encoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias", # t5
|
||||
"encoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias", # t5
|
||||
),
|
||||
|
||||
MODEL_TENSOR.ENC_FFN_NORM: (
|
||||
"encoder.block.{bid}.layer.1.layer_norm", # t5
|
||||
),
|
||||
|
||||
MODEL_TENSOR.ENC_FFN_NORM: ("encoder.block.{bid}.layer.1.layer_norm",), # t5
|
||||
MODEL_TENSOR.ENC_FFN_GATE: (
|
||||
"encoder.block.{bid}.layer.1.DenseReluDense.wi_0", # flan-t5
|
||||
"encoder.block.{bid}.layer.1.DenseReluDense.wi_0", # flan-t5
|
||||
),
|
||||
|
||||
MODEL_TENSOR.ENC_FFN_UP: (
|
||||
"encoder.block.{bid}.layer.1.DenseReluDense.wi", # t5
|
||||
"encoder.block.{bid}.layer.1.DenseReluDense.wi_1", # flan-t5
|
||||
"encoder.block.{bid}.layer.1.DenseReluDense.wi", # t5
|
||||
"encoder.block.{bid}.layer.1.DenseReluDense.wi_1", # flan-t5
|
||||
),
|
||||
|
||||
MODEL_TENSOR.ENC_FFN_DOWN: (
|
||||
"encoder.block.{bid}.layer.1.DenseReluDense.wo", # t5
|
||||
),
|
||||
|
||||
MODEL_TENSOR.ENC_OUTPUT_NORM: (
|
||||
"encoder.final_layer_norm", # t5
|
||||
"encoder.block.{bid}.layer.1.DenseReluDense.wo", # t5
|
||||
),
|
||||
MODEL_TENSOR.ENC_OUTPUT_NORM: ("encoder.final_layer_norm",), # t5
|
||||
}
|
||||
|
||||
# architecture-specific block mappings
|
||||
arch_block_mappings_cfg: dict[MODEL_ARCH, dict[MODEL_TENSOR, tuple[str, ...]]] = {
|
||||
MODEL_ARCH.ARCTIC: {
|
||||
MODEL_TENSOR.FFN_NORM: (
|
||||
"model.layers.{bid}.residual_layernorm",
|
||||
),
|
||||
MODEL_TENSOR.FFN_NORM_EXP: (
|
||||
"model.layers.{bid}.post_attention_layernorm",
|
||||
),
|
||||
MODEL_TENSOR.FFN_NORM: ("model.layers.{bid}.residual_layernorm",),
|
||||
MODEL_TENSOR.FFN_NORM_EXP: ("model.layers.{bid}.post_attention_layernorm",),
|
||||
},
|
||||
}
|
||||
|
||||
|
@ -603,31 +490,35 @@ def __init__(self, arch: MODEL_ARCH, n_blocks: int):
|
|||
if tensor not in MODEL_TENSORS[arch]:
|
||||
continue
|
||||
|
||||
tensor_name = TENSOR_NAMES[tensor].format(bid = bid)
|
||||
tensor_name = TENSOR_NAMES[tensor].format(bid=bid)
|
||||
self.mapping[tensor_name] = (tensor, tensor_name)
|
||||
for key in keys:
|
||||
key = key.format(bid = bid)
|
||||
key = key.format(bid=bid)
|
||||
self.mapping[key] = (tensor, tensor_name)
|
||||
|
||||
def get_type_and_name(self, key: str, try_suffixes: Sequence[str] = ()) -> tuple[MODEL_TENSOR, str] | None:
|
||||
def get_type_and_name(
|
||||
self, key: str, try_suffixes: Sequence[str] = ()
|
||||
) -> tuple[MODEL_TENSOR, str] | None:
|
||||
result = self.mapping.get(key)
|
||||
if result is not None:
|
||||
return result
|
||||
for suffix in try_suffixes:
|
||||
if key.endswith(suffix):
|
||||
result = self.mapping.get(key[:-len(suffix)])
|
||||
result = self.mapping.get(key[: -len(suffix)])
|
||||
if result is not None:
|
||||
return result[0], result[1] + suffix
|
||||
return None
|
||||
|
||||
def get_name(self, key: str, try_suffixes: Sequence[str] = ()) -> str | None:
|
||||
result = self.get_type_and_name(key, try_suffixes = try_suffixes)
|
||||
result = self.get_type_and_name(key, try_suffixes=try_suffixes)
|
||||
if result is None:
|
||||
return None
|
||||
return result[1]
|
||||
|
||||
def get_type(self, key: str, try_suffixes: Sequence[str] = ()) -> MODEL_TENSOR | None:
|
||||
result = self.get_type_and_name(key, try_suffixes = try_suffixes)
|
||||
def get_type(
|
||||
self, key: str, try_suffixes: Sequence[str] = ()
|
||||
) -> MODEL_TENSOR | None:
|
||||
result = self.get_type_and_name(key, try_suffixes=try_suffixes)
|
||||
if result is None:
|
||||
return None
|
||||
return result[0]
|
||||
|
|
|
@ -7,21 +7,27 @@ def fill_templated_filename(filename: str, output_type: str | None) -> str:
|
|||
# Given a file name fill in any type templates e.g. 'some-model-name.{ftype}.gguf'
|
||||
ftype_lowercase: str = output_type.lower() if output_type is not None else ""
|
||||
ftype_uppercase: str = output_type.upper() if output_type is not None else ""
|
||||
return filename.format(ftype_lowercase,
|
||||
outtype=ftype_lowercase, ftype=ftype_lowercase,
|
||||
OUTTYPE=ftype_uppercase, FTYPE=ftype_uppercase)
|
||||
return filename.format(
|
||||
ftype_lowercase,
|
||||
outtype=ftype_lowercase,
|
||||
ftype=ftype_lowercase,
|
||||
OUTTYPE=ftype_uppercase,
|
||||
FTYPE=ftype_uppercase,
|
||||
)
|
||||
|
||||
|
||||
def model_weight_count_rounded_notation(model_params_count: int, min_digits: int = 2) -> str:
|
||||
if model_params_count > 1e12 :
|
||||
def model_weight_count_rounded_notation(
|
||||
model_params_count: int, min_digits: int = 2
|
||||
) -> str:
|
||||
if model_params_count > 1e12:
|
||||
# Trillions Of Parameters
|
||||
scaled_model_params = model_params_count * 1e-12
|
||||
scale_suffix = "T"
|
||||
elif model_params_count > 1e9 :
|
||||
elif model_params_count > 1e9:
|
||||
# Billions Of Parameters
|
||||
scaled_model_params = model_params_count * 1e-9
|
||||
scale_suffix = "B"
|
||||
elif model_params_count > 1e6 :
|
||||
elif model_params_count > 1e6:
|
||||
# Millions Of Parameters
|
||||
scaled_model_params = model_params_count * 1e-6
|
||||
scale_suffix = "M"
|
||||
|
@ -30,39 +36,65 @@ def model_weight_count_rounded_notation(model_params_count: int, min_digits: int
|
|||
scaled_model_params = model_params_count * 1e-3
|
||||
scale_suffix = "K"
|
||||
|
||||
fix = max(min_digits - len(str(round(scaled_model_params)).lstrip('0')), 0)
|
||||
fix = max(min_digits - len(str(round(scaled_model_params)).lstrip("0")), 0)
|
||||
|
||||
return f"{scaled_model_params:.{fix}f}{scale_suffix}"
|
||||
|
||||
|
||||
def size_label(total_params: int, shared_params: int, expert_params: int, expert_count: int) -> str:
|
||||
def size_label(
|
||||
total_params: int, shared_params: int, expert_params: int, expert_count: int
|
||||
) -> str:
|
||||
|
||||
if expert_count > 0:
|
||||
pretty_size = model_weight_count_rounded_notation(abs(shared_params) + abs(expert_params), min_digits=2)
|
||||
pretty_size = model_weight_count_rounded_notation(
|
||||
abs(shared_params) + abs(expert_params), min_digits=2
|
||||
)
|
||||
size_class = f"{expert_count}x{pretty_size}"
|
||||
else:
|
||||
size_class = model_weight_count_rounded_notation(abs(total_params), min_digits=2)
|
||||
size_class = model_weight_count_rounded_notation(
|
||||
abs(total_params), min_digits=2
|
||||
)
|
||||
|
||||
return size_class
|
||||
|
||||
|
||||
def naming_convention(model_name: str | None, base_name: str | None, finetune_string: str | None, version_string: str | None, size_label: str | None, output_type: str | None, model_type: Literal['vocab', 'LoRA'] | None = None) -> str:
|
||||
def naming_convention(
|
||||
model_name: str | None,
|
||||
base_name: str | None,
|
||||
finetune_string: str | None,
|
||||
version_string: str | None,
|
||||
size_label: str | None,
|
||||
output_type: str | None,
|
||||
model_type: Literal["vocab", "LoRA"] | None = None,
|
||||
) -> str:
|
||||
# Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention
|
||||
|
||||
if base_name is not None:
|
||||
name = base_name.strip().replace(' ', '-').replace('/', '-')
|
||||
name = base_name.strip().replace(" ", "-").replace("/", "-")
|
||||
elif model_name is not None:
|
||||
name = model_name.strip().replace(' ', '-').replace('/', '-')
|
||||
name = model_name.strip().replace(" ", "-").replace("/", "-")
|
||||
else:
|
||||
name = "ggml-model"
|
||||
|
||||
parameters = f"-{size_label}" if size_label is not None else ""
|
||||
|
||||
finetune = f"-{finetune_string.strip().replace(' ', '-')}" if finetune_string is not None else ""
|
||||
finetune = (
|
||||
f"-{finetune_string.strip().replace(' ', '-')}"
|
||||
if finetune_string is not None
|
||||
else ""
|
||||
)
|
||||
|
||||
version = f"-{version_string.strip().replace(' ', '-')}" if version_string is not None else ""
|
||||
version = (
|
||||
f"-{version_string.strip().replace(' ', '-')}"
|
||||
if version_string is not None
|
||||
else ""
|
||||
)
|
||||
|
||||
encoding = f"-{output_type.strip().replace(' ', '-').upper()}" if output_type is not None else ""
|
||||
encoding = (
|
||||
f"-{output_type.strip().replace(' ', '-').upper()}"
|
||||
if output_type is not None
|
||||
else ""
|
||||
)
|
||||
|
||||
kind = f"-{model_type.strip().replace(' ', '-')}" if model_type is not None else ""
|
||||
|
||||
|
|
|
@ -5,7 +5,16 @@
|
|||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Sequence, Mapping, Iterable, Protocol, ClassVar, runtime_checkable
|
||||
from typing import (
|
||||
Any,
|
||||
Callable,
|
||||
Sequence,
|
||||
Mapping,
|
||||
Iterable,
|
||||
Protocol,
|
||||
ClassVar,
|
||||
runtime_checkable,
|
||||
)
|
||||
|
||||
from sentencepiece import SentencePieceProcessor
|
||||
|
||||
|
@ -23,7 +32,9 @@ class SpecialVocab:
|
|||
chat_template: str | Sequence[Mapping[str, str]] | None
|
||||
|
||||
def __init__(
|
||||
self, path: str | os.PathLike[str], load_merges: bool = False,
|
||||
self,
|
||||
path: str | os.PathLike[str],
|
||||
load_merges: bool = False,
|
||||
special_token_types: Iterable[str] | None = None,
|
||||
n_vocab: int | None = None,
|
||||
):
|
||||
|
@ -36,40 +47,60 @@ def __init__(
|
|||
if special_token_types is not None:
|
||||
self.special_token_types = special_token_types
|
||||
else:
|
||||
self.special_token_types = ('bos', 'eos', 'unk', 'sep', 'pad', 'cls', 'mask')
|
||||
self.special_token_types = (
|
||||
"bos",
|
||||
"eos",
|
||||
"unk",
|
||||
"sep",
|
||||
"pad",
|
||||
"cls",
|
||||
"mask",
|
||||
)
|
||||
self._load(Path(path))
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return '<SpecialVocab with {} merges, special tokens {}, add special tokens {}>'.format(
|
||||
len(self.merges), self.special_token_ids or "unset", self.add_special_token or "unset",
|
||||
return "<SpecialVocab with {} merges, special tokens {}, add special tokens {}>".format(
|
||||
len(self.merges),
|
||||
self.special_token_ids or "unset",
|
||||
self.add_special_token or "unset",
|
||||
)
|
||||
|
||||
def add_to_gguf(self, gw: GGUFWriter, quiet: bool = False) -> None:
|
||||
if self.merges:
|
||||
if not quiet:
|
||||
logger.info(f'Adding {len(self.merges)} merge(s).')
|
||||
logger.info(f"Adding {len(self.merges)} merge(s).")
|
||||
gw.add_token_merges(self.merges)
|
||||
elif self.load_merges:
|
||||
logger.warning('Adding merges requested but no merges found, output may be non-functional.')
|
||||
logger.warning(
|
||||
"Adding merges requested but no merges found, output may be non-functional."
|
||||
)
|
||||
for typ, tokid in self.special_token_ids.items():
|
||||
id_handler: Callable[[int], None] | None = getattr(gw, f'add_{typ}_token_id', None)
|
||||
id_handler: Callable[[int], None] | None = getattr(
|
||||
gw, f"add_{typ}_token_id", None
|
||||
)
|
||||
if id_handler is None:
|
||||
logger.warning(f'No handler for special token type {typ} with id {tokid} - skipping')
|
||||
logger.warning(
|
||||
f"No handler for special token type {typ} with id {tokid} - skipping"
|
||||
)
|
||||
continue
|
||||
if not quiet:
|
||||
logger.info(f'Setting special token type {typ} to {tokid}')
|
||||
logger.info(f"Setting special token type {typ} to {tokid}")
|
||||
id_handler(tokid)
|
||||
for typ, value in self.add_special_token.items():
|
||||
add_handler: Callable[[bool], None] | None = getattr(gw, f'add_add_{typ}_token', None)
|
||||
add_handler: Callable[[bool], None] | None = getattr(
|
||||
gw, f"add_add_{typ}_token", None
|
||||
)
|
||||
if add_handler is None:
|
||||
logger.warning(f'No handler for add_{typ}_token with value {value} - skipping')
|
||||
logger.warning(
|
||||
f"No handler for add_{typ}_token with value {value} - skipping"
|
||||
)
|
||||
continue
|
||||
if not quiet:
|
||||
logger.info(f'Setting add_{typ}_token to {value}')
|
||||
logger.info(f"Setting add_{typ}_token to {value}")
|
||||
add_handler(value)
|
||||
if self.chat_template is not None:
|
||||
if not quiet:
|
||||
logger.info(f'Setting chat_template to {self.chat_template}')
|
||||
logger.info(f"Setting chat_template to {self.chat_template}")
|
||||
gw.add_chat_template(self.chat_template)
|
||||
|
||||
def _load(self, path: Path) -> None:
|
||||
|
@ -79,12 +110,12 @@ def _load(self, path: Path) -> None:
|
|||
self._try_load_merges_txt(path)
|
||||
|
||||
def _try_load_merges_txt(self, path: Path) -> bool:
|
||||
merges_file = path / 'merges.txt'
|
||||
merges_file = path / "merges.txt"
|
||||
if not merges_file.is_file():
|
||||
return False
|
||||
with open(merges_file, 'r', encoding = 'utf-8') as fp:
|
||||
first_line = next(fp, '').strip()
|
||||
if not first_line.startswith('#'):
|
||||
with open(merges_file, "r", encoding="utf-8") as fp:
|
||||
first_line = next(fp, "").strip()
|
||||
if not first_line.startswith("#"):
|
||||
fp.seek(0)
|
||||
line_num = 0
|
||||
else:
|
||||
|
@ -97,9 +128,11 @@ def _try_load_merges_txt(self, path: Path) -> bool:
|
|||
continue
|
||||
parts = line.split(None, 3)
|
||||
if len(parts) != 2:
|
||||
logger.warning(f'{merges_file.name}: Line {line_num}: Entry malformed, ignoring')
|
||||
logger.warning(
|
||||
f"{merges_file.name}: Line {line_num}: Entry malformed, ignoring"
|
||||
)
|
||||
continue
|
||||
merges.append(f'{parts[0]} {parts[1]}')
|
||||
merges.append(f"{parts[0]} {parts[1]}")
|
||||
self.merges = merges
|
||||
return True
|
||||
|
||||
|
@ -107,45 +140,49 @@ def _set_special_token(self, typ: str, tid: Any) -> None:
|
|||
if not isinstance(tid, int):
|
||||
return
|
||||
if tid < 0:
|
||||
raise ValueError(f'invalid value for special token type {typ}: {tid}')
|
||||
raise ValueError(f"invalid value for special token type {typ}: {tid}")
|
||||
if self.n_vocab is None or tid < self.n_vocab:
|
||||
if typ in self.special_token_ids:
|
||||
return
|
||||
self.special_token_ids[typ] = tid
|
||||
return
|
||||
logger.warning(f'Special token type {typ}, id {tid} out of range, must be under {self.n_vocab} - skipping')
|
||||
logger.warning(
|
||||
f"Special token type {typ}, id {tid} out of range, must be under {self.n_vocab} - skipping"
|
||||
)
|
||||
|
||||
def _try_load_from_tokenizer_json(self, path: Path) -> bool:
|
||||
tokenizer_file = path / 'tokenizer.json'
|
||||
tokenizer_file = path / "tokenizer.json"
|
||||
if tokenizer_file.is_file():
|
||||
with open(tokenizer_file, encoding = 'utf-8') as f:
|
||||
with open(tokenizer_file, encoding="utf-8") as f:
|
||||
tokenizer = json.load(f)
|
||||
if self.load_merges:
|
||||
merges = tokenizer.get('model', {}).get('merges')
|
||||
merges = tokenizer.get("model", {}).get("merges")
|
||||
if isinstance(merges, list) and merges and isinstance(merges[0], str):
|
||||
self.merges = merges
|
||||
added_tokens = tokenizer.get('added_tokens', {})
|
||||
added_tokens = tokenizer.get("added_tokens", {})
|
||||
else:
|
||||
added_tokens = {}
|
||||
tokenizer_config_file = path / 'tokenizer_config.json'
|
||||
tokenizer_config_file = path / "tokenizer_config.json"
|
||||
if not tokenizer_config_file.is_file():
|
||||
return True
|
||||
with open(tokenizer_config_file, encoding = 'utf-8') as f:
|
||||
with open(tokenizer_config_file, encoding="utf-8") as f:
|
||||
tokenizer_config = json.load(f)
|
||||
chat_template = tokenizer_config.get('chat_template')
|
||||
chat_template = tokenizer_config.get("chat_template")
|
||||
if chat_template is None or isinstance(chat_template, (str, list)):
|
||||
self.chat_template = chat_template
|
||||
else:
|
||||
logger.warning(f'Bad type for chat_template field in {tokenizer_config_file!r} - ignoring')
|
||||
logger.warning(
|
||||
f"Bad type for chat_template field in {tokenizer_config_file!r} - ignoring"
|
||||
)
|
||||
for typ in self.special_token_types:
|
||||
add_entry = tokenizer_config.get(f'add_{typ}_token')
|
||||
add_entry = tokenizer_config.get(f"add_{typ}_token")
|
||||
if isinstance(add_entry, bool):
|
||||
self.add_special_token[typ] = add_entry
|
||||
entry = tokenizer_config.get(f'{typ}_token')
|
||||
entry = tokenizer_config.get(f"{typ}_token")
|
||||
if isinstance(entry, str):
|
||||
tc_content = entry
|
||||
elif isinstance(entry, dict):
|
||||
entry_content = entry.get('content')
|
||||
entry_content = entry.get("content")
|
||||
if not isinstance(entry_content, str):
|
||||
continue
|
||||
tc_content = entry_content
|
||||
|
@ -153,20 +190,24 @@ def _try_load_from_tokenizer_json(self, path: Path) -> bool:
|
|||
continue
|
||||
# We only need the first match here.
|
||||
maybe_token_id = next(
|
||||
(atok.get('id') for atok in added_tokens if atok.get('content') == tc_content),
|
||||
(
|
||||
atok.get("id")
|
||||
for atok in added_tokens
|
||||
if atok.get("content") == tc_content
|
||||
),
|
||||
None,
|
||||
)
|
||||
self._set_special_token(typ, maybe_token_id)
|
||||
return True
|
||||
|
||||
def _try_load_from_config_json(self, path: Path) -> bool:
|
||||
config_file = path / 'config.json'
|
||||
config_file = path / "config.json"
|
||||
if not config_file.is_file():
|
||||
return False
|
||||
with open(config_file, encoding = 'utf-8') as f:
|
||||
with open(config_file, encoding="utf-8") as f:
|
||||
config = json.load(f)
|
||||
for typ in self.special_token_types:
|
||||
self._set_special_token(typ, config.get(f'{typ}_token_id'))
|
||||
self._set_special_token(typ, config.get(f"{typ}_token_id"))
|
||||
return True
|
||||
|
||||
|
||||
|
@ -202,54 +243,59 @@ class BpeVocab(Vocab):
|
|||
def __init__(self, base_path: Path):
|
||||
added_tokens: dict[str, int] = {}
|
||||
|
||||
if (fname_tokenizer := base_path / 'vocab.json').exists():
|
||||
if (fname_tokenizer := base_path / "vocab.json").exists():
|
||||
# "slow" tokenizer
|
||||
with open(fname_tokenizer, encoding="utf-8") as f:
|
||||
self.vocab = json.load(f)
|
||||
|
||||
try:
|
||||
# FIXME: Verify that added tokens here _cannot_ overlap with the main vocab.
|
||||
with open(base_path / 'added_tokens.json', encoding="utf-8") as f:
|
||||
with open(base_path / "added_tokens.json", encoding="utf-8") as f:
|
||||
added_tokens = json.load(f)
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
else:
|
||||
# "fast" tokenizer
|
||||
fname_tokenizer = base_path / 'tokenizer.json'
|
||||
fname_tokenizer = base_path / "tokenizer.json"
|
||||
|
||||
# if this fails, FileNotFoundError propagates to caller
|
||||
with open(fname_tokenizer, encoding="utf-8") as f:
|
||||
tokenizer_json = json.load(f)
|
||||
|
||||
tokenizer_model: dict[str, Any] = tokenizer_json['model']
|
||||
tokenizer_model: dict[str, Any] = tokenizer_json["model"]
|
||||
if (
|
||||
tokenizer_model['type'] != 'BPE' or tokenizer_model.get('byte_fallback', False)
|
||||
or tokenizer_json['decoder']['type'] != 'ByteLevel'
|
||||
tokenizer_model["type"] != "BPE"
|
||||
or tokenizer_model.get("byte_fallback", False)
|
||||
or tokenizer_json["decoder"]["type"] != "ByteLevel"
|
||||
):
|
||||
raise FileNotFoundError('Cannot find GPT-2 BPE tokenizer')
|
||||
raise FileNotFoundError("Cannot find GPT-2 BPE tokenizer")
|
||||
|
||||
self.vocab = tokenizer_model["vocab"]
|
||||
|
||||
if (added := tokenizer_json.get('added_tokens')) is not None:
|
||||
if (added := tokenizer_json.get("added_tokens")) is not None:
|
||||
# Added tokens here can be duplicates of the main vocabulary.
|
||||
added_tokens = {item['content']: item['id']
|
||||
for item in added
|
||||
if item['content'] not in self.vocab}
|
||||
added_tokens = {
|
||||
item["content"]: item["id"]
|
||||
for item in added
|
||||
if item["content"] not in self.vocab
|
||||
}
|
||||
|
||||
vocab_size = len(self.vocab)
|
||||
vocab_size = len(self.vocab)
|
||||
expected_ids = list(range(vocab_size, vocab_size + len(added_tokens)))
|
||||
actual_ids = sorted(added_tokens.values())
|
||||
actual_ids = sorted(added_tokens.values())
|
||||
if expected_ids != actual_ids:
|
||||
expected_end_id = vocab_size + len(actual_ids) - 1
|
||||
raise ValueError(f"Expected the {len(actual_ids)} added token ID(s) to be sequential in the range "
|
||||
f"{vocab_size} - {expected_end_id}; got {actual_ids}")
|
||||
raise ValueError(
|
||||
f"Expected the {len(actual_ids)} added token ID(s) to be sequential in the range "
|
||||
f"{vocab_size} - {expected_end_id}; got {actual_ids}"
|
||||
)
|
||||
|
||||
items = sorted(added_tokens.items(), key=lambda text_idx: text_idx[1])
|
||||
self.added_tokens_dict = added_tokens
|
||||
self.added_tokens_list = [text for (text, idx) in items]
|
||||
self.vocab_size_base = vocab_size
|
||||
self.vocab_size = self.vocab_size_base + len(self.added_tokens_list)
|
||||
self.fname_tokenizer = fname_tokenizer
|
||||
self.added_tokens_dict = added_tokens
|
||||
self.added_tokens_list = [text for (text, idx) in items]
|
||||
self.vocab_size_base = vocab_size
|
||||
self.vocab_size = self.vocab_size_base + len(self.added_tokens_list)
|
||||
self.fname_tokenizer = fname_tokenizer
|
||||
|
||||
def bpe_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
|
||||
reverse_vocab = {id: encoded_tok for encoded_tok, id in self.vocab.items()}
|
||||
|
@ -276,40 +322,44 @@ class SentencePieceVocab(Vocab):
|
|||
|
||||
def __init__(self, base_path: Path):
|
||||
added_tokens: dict[str, int] = {}
|
||||
if (fname_tokenizer := base_path / 'tokenizer.model').exists():
|
||||
if (fname_tokenizer := base_path / "tokenizer.model").exists():
|
||||
# normal location
|
||||
try:
|
||||
with open(base_path / 'added_tokens.json', encoding="utf-8") as f:
|
||||
with open(base_path / "added_tokens.json", encoding="utf-8") as f:
|
||||
added_tokens = json.load(f)
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
elif not (fname_tokenizer := base_path.parent / 'tokenizer.model').exists():
|
||||
elif not (fname_tokenizer := base_path.parent / "tokenizer.model").exists():
|
||||
# not found in alternate location either
|
||||
raise FileNotFoundError('Cannot find tokenizer.model')
|
||||
raise FileNotFoundError("Cannot find tokenizer.model")
|
||||
|
||||
self.sentencepiece_tokenizer = SentencePieceProcessor()
|
||||
self.sentencepiece_tokenizer.LoadFromFile(str(fname_tokenizer))
|
||||
vocab_size = self.sentencepiece_tokenizer.vocab_size()
|
||||
|
||||
new_tokens = {id: piece for piece, id in added_tokens.items() if id >= vocab_size}
|
||||
new_tokens = {
|
||||
id: piece for piece, id in added_tokens.items() if id >= vocab_size
|
||||
}
|
||||
expected_new_ids = list(range(vocab_size, vocab_size + len(new_tokens)))
|
||||
actual_new_ids = sorted(new_tokens.keys())
|
||||
actual_new_ids = sorted(new_tokens.keys())
|
||||
|
||||
if expected_new_ids != actual_new_ids:
|
||||
raise ValueError(f"Expected new token IDs {expected_new_ids} to be sequential; got {actual_new_ids}")
|
||||
raise ValueError(
|
||||
f"Expected new token IDs {expected_new_ids} to be sequential; got {actual_new_ids}"
|
||||
)
|
||||
|
||||
# Token pieces that were added to the base vocabulary.
|
||||
self.added_tokens_dict = added_tokens
|
||||
self.added_tokens_list = [new_tokens[id] for id in actual_new_ids]
|
||||
self.vocab_size_base = vocab_size
|
||||
self.vocab_size = self.vocab_size_base + len(self.added_tokens_list)
|
||||
self.fname_tokenizer = fname_tokenizer
|
||||
self.added_tokens_dict = added_tokens
|
||||
self.added_tokens_list = [new_tokens[id] for id in actual_new_ids]
|
||||
self.vocab_size_base = vocab_size
|
||||
self.vocab_size = self.vocab_size_base + len(self.added_tokens_list)
|
||||
self.fname_tokenizer = fname_tokenizer
|
||||
|
||||
def sentencepiece_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
|
||||
tokenizer = self.sentencepiece_tokenizer
|
||||
for i in range(tokenizer.vocab_size()):
|
||||
piece = tokenizer.IdToPiece(i)
|
||||
text = piece.encode("utf-8")
|
||||
text = piece.encode("utf-8")
|
||||
score: float = tokenizer.GetScore(i)
|
||||
|
||||
toktype = gguf.TokenType.NORMAL
|
||||
|
@ -347,25 +397,27 @@ class LlamaHfVocab(Vocab):
|
|||
name = "hfft"
|
||||
|
||||
def __init__(self, base_path: Path):
|
||||
fname_tokenizer = base_path / 'tokenizer.json'
|
||||
fname_tokenizer = base_path / "tokenizer.json"
|
||||
# if this fails, FileNotFoundError propagates to caller
|
||||
with open(fname_tokenizer, encoding='utf-8') as f:
|
||||
with open(fname_tokenizer, encoding="utf-8") as f:
|
||||
tokenizer_json = json.load(f)
|
||||
|
||||
# pre-check so we know if we need transformers
|
||||
tokenizer_model: dict[str, Any] = tokenizer_json['model']
|
||||
tokenizer_model: dict[str, Any] = tokenizer_json["model"]
|
||||
is_llama3 = (
|
||||
tokenizer_model['type'] == 'BPE' and tokenizer_model.get('ignore_merges', False)
|
||||
and not tokenizer_model.get('byte_fallback', True)
|
||||
tokenizer_model["type"] == "BPE"
|
||||
and tokenizer_model.get("ignore_merges", False)
|
||||
and not tokenizer_model.get("byte_fallback", True)
|
||||
)
|
||||
if is_llama3:
|
||||
raise TypeError('Llama 3 must be converted with BpeVocab')
|
||||
raise TypeError("Llama 3 must be converted with BpeVocab")
|
||||
|
||||
if not is_llama3 and (
|
||||
tokenizer_model['type'] != 'BPE' or not tokenizer_model.get('byte_fallback', False)
|
||||
or tokenizer_json['decoder']['type'] != 'Sequence'
|
||||
tokenizer_model["type"] != "BPE"
|
||||
or not tokenizer_model.get("byte_fallback", False)
|
||||
or tokenizer_json["decoder"]["type"] != "Sequence"
|
||||
):
|
||||
raise FileNotFoundError('Cannot find Llama BPE tokenizer')
|
||||
raise FileNotFoundError("Cannot find Llama BPE tokenizer")
|
||||
|
||||
try:
|
||||
from transformers import AutoTokenizer
|
||||
|
@ -387,7 +439,7 @@ def __init__(self, base_path: Path):
|
|||
# Initialize lists and dictionaries for added tokens
|
||||
self.added_tokens_list = []
|
||||
self.added_tokens_dict = dict()
|
||||
self.added_tokens_ids = set()
|
||||
self.added_tokens_ids = set()
|
||||
|
||||
# Process added tokens
|
||||
for tok, tokidx in sorted(
|
||||
|
@ -408,7 +460,7 @@ def __init__(self, base_path: Path):
|
|||
|
||||
# Set vocabulary sizes
|
||||
self.vocab_size_base = self.tokenizer.vocab_size
|
||||
self.vocab_size = self.vocab_size_base + len(self.added_tokens_list)
|
||||
self.vocab_size = self.vocab_size_base + len(self.added_tokens_list)
|
||||
|
||||
self.fname_tokenizer = fname_tokenizer
|
||||
|
||||
|
@ -427,16 +479,22 @@ def hf_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
|
|||
|
||||
# Yield token text, score, and type
|
||||
yield token_text, self.get_token_score(token_id), self.get_token_type(
|
||||
token_id, token_text, self.special_ids # Reuse already stored special IDs
|
||||
token_id,
|
||||
token_text,
|
||||
self.special_ids, # Reuse already stored special IDs
|
||||
)
|
||||
|
||||
def get_token_type(self, token_id: int, token_text: bytes, special_ids: set[int]) -> gguf.TokenType:
|
||||
def get_token_type(
|
||||
self, token_id: int, token_text: bytes, special_ids: set[int]
|
||||
) -> gguf.TokenType:
|
||||
# Special case for byte tokens
|
||||
if re.fullmatch(br"<0x[0-9A-Fa-f]{2}>", token_text):
|
||||
if re.fullmatch(rb"<0x[0-9A-Fa-f]{2}>", token_text):
|
||||
return gguf.TokenType.BYTE
|
||||
|
||||
# Determine token type based on whether it's a special token
|
||||
return gguf.TokenType.CONTROL if token_id in special_ids else gguf.TokenType.NORMAL
|
||||
return (
|
||||
gguf.TokenType.CONTROL if token_id in special_ids else gguf.TokenType.NORMAL
|
||||
)
|
||||
|
||||
def get_token_score(self, token_id: int) -> float:
|
||||
# Placeholder for actual logic to determine the token's score
|
||||
|
@ -446,7 +504,9 @@ def get_token_score(self, token_id: int) -> float:
|
|||
def added_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
|
||||
for text in self.added_tokens_list:
|
||||
if text in self.specials:
|
||||
toktype = self.get_token_type(self.specials[text], b'', self.special_ids)
|
||||
toktype = self.get_token_type(
|
||||
self.specials[text], b"", self.special_ids
|
||||
)
|
||||
score = self.get_token_score(self.specials[text])
|
||||
else:
|
||||
toktype = gguf.TokenType.USER_DEFINED
|
||||
|
|
|
@ -9,25 +9,52 @@
|
|||
import requests
|
||||
import zipfile
|
||||
from datetime import datetime
|
||||
from PyQt6.QtWidgets import (QApplication, QMainWindow, QVBoxLayout, QHBoxLayout, QWidget, QPushButton,
|
||||
QListWidget, QLineEdit, QLabel, QFileDialog, QProgressBar, QComboBox, QTextEdit,
|
||||
QCheckBox, QGroupBox, QFormLayout, QScrollArea, QSlider, QSpinBox, QListWidgetItem,
|
||||
QMessageBox, QDialog, QPlainTextEdit, QMenu)
|
||||
from PyQt6.QtWidgets import (
|
||||
QApplication,
|
||||
QMainWindow,
|
||||
QVBoxLayout,
|
||||
QHBoxLayout,
|
||||
QWidget,
|
||||
QPushButton,
|
||||
QListWidget,
|
||||
QLineEdit,
|
||||
QLabel,
|
||||
QFileDialog,
|
||||
QProgressBar,
|
||||
QComboBox,
|
||||
QTextEdit,
|
||||
QCheckBox,
|
||||
QGroupBox,
|
||||
QFormLayout,
|
||||
QScrollArea,
|
||||
QSlider,
|
||||
QSpinBox,
|
||||
QListWidgetItem,
|
||||
QMessageBox,
|
||||
QDialog,
|
||||
QPlainTextEdit,
|
||||
QMenu,
|
||||
)
|
||||
from PyQt6.QtCore import QTimer, QThread, pyqtSignal, Qt, QSize
|
||||
from PyQt6.QtGui import QCloseEvent, QAction
|
||||
|
||||
|
||||
def ensure_directory(path):
|
||||
if not os.path.exists(path):
|
||||
os.makedirs(path)
|
||||
|
||||
def open_file_safe(file_path, mode='r'):
|
||||
encodings = ['utf-8', 'latin-1', 'ascii', 'utf-16']
|
||||
|
||||
def open_file_safe(file_path, mode="r"):
|
||||
encodings = ["utf-8", "latin-1", "ascii", "utf-16"]
|
||||
for encoding in encodings:
|
||||
try:
|
||||
return open(file_path, mode, encoding=encoding)
|
||||
except UnicodeDecodeError:
|
||||
continue
|
||||
raise ValueError(f"Unable to open file {file_path} with any of the encodings: {encodings}")
|
||||
raise ValueError(
|
||||
f"Unable to open file {file_path} with any of the encodings: {encodings}"
|
||||
)
|
||||
|
||||
|
||||
def resource_path(relative_path):
|
||||
try:
|
||||
|
|
1844
src/localizations.py
1844
src/localizations.py
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue