mirror of https://github.com/leafspark/AutoGGUF
chore: update for new year and improve compliance
- updated copyright year in LICENSE file to 2025
- bundled llama.cpp licensing text in About menu to maintain MIT compliance
- updated llama.cpp and gguf Python library and scripts
- adjusted monitoring intervals from 0.2s to 0.5s
- updated Python requirements to latest compatible versions
- added new HF to GGUF conversion types: `tq1_0` and `tq2_0`
Happy New Year 🎉!
This commit is contained in:
parent
ddbf96c8e9
commit
102e3a14fd
2
LICENSE
2
LICENSE
|
@ -186,7 +186,7 @@
|
||||||
same "printed page" as the copyright notice for easier
|
same "printed page" as the copyright notice for easier
|
||||||
identification within third-party archives.
|
identification within third-party archives.
|
||||||
|
|
||||||
Copyright 2024 leafspark
|
Copyright (c) 2024-2025 leafspark
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
you may not use this file except in compliance with the License.
|
you may not use this file except in compliance with the License.
|
||||||
|
|
|
@ -1,13 +1,13 @@
|
||||||
PyYAML~=6.0.2
|
PyYAML~=6.0.2
|
||||||
psutil~=6.1.0
|
psutil~=6.1.1
|
||||||
pynvml~=12.0.0
|
pynvml~=12.0.0
|
||||||
PySide6~=6.8.1
|
PySide6~=6.8.1
|
||||||
safetensors~=0.4.5
|
safetensors~=0.5.0
|
||||||
numpy<2.0.0
|
numpy<2.0.0
|
||||||
torch~=2.5.1
|
torch~=2.5.1
|
||||||
sentencepiece~=0.2.0
|
sentencepiece~=0.2.0
|
||||||
setuptools~=75.5.0
|
setuptools~=75.6.0
|
||||||
huggingface-hub~=0.26.5
|
huggingface-hub~=0.27.0
|
||||||
transformers~=4.47.0
|
transformers~=4.47.1
|
||||||
fastapi~=0.115.6
|
fastapi~=0.115.6
|
||||||
uvicorn~=0.34.0
|
uvicorn~=0.34.0
|
||||||
|
|
|
@ -500,7 +500,7 @@ def __init__(self, args: List[str]) -> None:
|
||||||
# Timer for updating system info
|
# Timer for updating system info
|
||||||
self.timer = QTimer()
|
self.timer = QTimer()
|
||||||
self.timer.timeout.connect(self.update_system_info)
|
self.timer.timeout.connect(self.update_system_info)
|
||||||
self.timer.start(200)
|
self.timer.start(500)
|
||||||
|
|
||||||
# Backend selection
|
# Backend selection
|
||||||
backend_layout = QHBoxLayout()
|
backend_layout = QHBoxLayout()
|
||||||
|
@ -1023,7 +1023,9 @@ def __init__(self, args: List[str]) -> None:
|
||||||
hf_to_gguf_layout.addRow(OUTPUT_FILE, hf_outfile_layout)
|
hf_to_gguf_layout.addRow(OUTPUT_FILE, hf_outfile_layout)
|
||||||
|
|
||||||
self.hf_outtype = QComboBox()
|
self.hf_outtype = QComboBox()
|
||||||
self.hf_outtype.addItems(["f32", "f16", "bf16", "q8_0", "auto"])
|
self.hf_outtype.addItems(
|
||||||
|
["f32", "f16", "bf16", "q8_0", "tq1_0", "tq2_0", "auto"]
|
||||||
|
)
|
||||||
hf_to_gguf_layout.addRow(OUTPUT_TYPE, self.hf_outtype)
|
hf_to_gguf_layout.addRow(OUTPUT_TYPE, self.hf_outtype)
|
||||||
|
|
||||||
self.hf_vocab_only = QCheckBox(VOCAB_ONLY)
|
self.hf_vocab_only = QCheckBox(VOCAB_ONLY)
|
||||||
|
|
|
@ -95,7 +95,7 @@ def __init__(self, parent=None) -> None:
|
||||||
|
|
||||||
self.timer = QTimer(self)
|
self.timer = QTimer(self)
|
||||||
self.timer.timeout.connect(self.update_gpu_info)
|
self.timer.timeout.connect(self.update_gpu_info)
|
||||||
self.timer.start(200) # Update every 0.2 seconds
|
self.timer.start(500) # Update every 0.5 seconds
|
||||||
|
|
||||||
self.gpu_data = []
|
self.gpu_data = []
|
||||||
self.vram_data = []
|
self.vram_data = []
|
||||||
|
@ -192,7 +192,7 @@ def update_graph_data() -> None:
|
||||||
|
|
||||||
timer = QTimer(dialog)
|
timer = QTimer(dialog)
|
||||||
timer.timeout.connect(update_graph_data)
|
timer.timeout.connect(update_graph_data)
|
||||||
timer.start(200) # Update every 0.2 seconds
|
timer.start(500) # Update every 0.5 seconds
|
||||||
|
|
||||||
dialog.exec()
|
dialog.exec()
|
||||||
|
|
||||||
|
@ -227,7 +227,7 @@ def update_graph_data() -> None:
|
||||||
|
|
||||||
timer = QTimer(dialog)
|
timer = QTimer(dialog)
|
||||||
timer.timeout.connect(update_graph_data)
|
timer.timeout.connect(update_graph_data)
|
||||||
timer.start(200) # Update every 0.2 seconds
|
timer.start(500) # Update every 0.5 seconds
|
||||||
|
|
||||||
tab_widget.addTab(gpu_graph, GPU_USAGE_OVER_TIME)
|
tab_widget.addTab(gpu_graph, GPU_USAGE_OVER_TIME)
|
||||||
tab_widget.addTab(vram_graph, VRAM_USAGE_OVER_TIME)
|
tab_widget.addTab(vram_graph, VRAM_USAGE_OVER_TIME)
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -18,14 +18,16 @@
|
||||||
SupportsIndex,
|
SupportsIndex,
|
||||||
cast,
|
cast,
|
||||||
)
|
)
|
||||||
|
from transformers import AutoConfig
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from torch import Tensor
|
from torch import Tensor
|
||||||
|
|
||||||
from gguf.constants import *
|
import gguf
|
||||||
|
|
||||||
|
# reuse model definitions from convert_hf_to_gguf.py
|
||||||
from convert_hf_to_gguf import LazyTorchTensor, Model
|
from convert_hf_to_gguf import LazyTorchTensor, Model
|
||||||
|
|
||||||
logger = logging.getLogger("lora-to-gguf")
|
logger = logging.getLogger("lora-to-gguf")
|
||||||
|
@ -37,9 +39,10 @@ class PartialLoraTensor:
|
||||||
B: Tensor | None = None
|
B: Tensor | None = None
|
||||||
|
|
||||||
|
|
||||||
|
# magic to support tensor shape modifications and splitting
|
||||||
class LoraTorchTensor:
|
class LoraTorchTensor:
|
||||||
_lora_A: Tensor
|
_lora_A: Tensor # (n_rank, row_size)
|
||||||
_lora_B: Tensor
|
_lora_B: Tensor # (col_size, n_rank)
|
||||||
_rank: int
|
_rank: int
|
||||||
|
|
||||||
def __init__(self, A: Tensor, B: Tensor):
|
def __init__(self, A: Tensor, B: Tensor):
|
||||||
|
@ -57,14 +60,20 @@ def get_lora_A_B(self) -> tuple[Tensor, Tensor]:
|
||||||
|
|
||||||
def __getitem__(
|
def __getitem__(
|
||||||
self,
|
self,
|
||||||
indices: SupportsIndex | slice | tuple[SupportsIndex | slice | Tensor, ...],
|
indices: (
|
||||||
|
SupportsIndex
|
||||||
|
| slice
|
||||||
|
| tuple[
|
||||||
|
SupportsIndex | slice | Tensor, ...
|
||||||
|
] # TODO: add ellipsis in the type signature
|
||||||
|
),
|
||||||
) -> LoraTorchTensor:
|
) -> LoraTorchTensor:
|
||||||
shape = self.shape
|
shape = self.shape
|
||||||
if isinstance(indices, SupportsIndex):
|
if isinstance(indices, SupportsIndex):
|
||||||
if len(shape) > 2:
|
if len(shape) > 2:
|
||||||
return LoraTorchTensor(self._lora_A[indices], self._lora_B[indices])
|
return LoraTorchTensor(self._lora_A[indices], self._lora_B[indices])
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError
|
raise NotImplementedError # can't return a vector
|
||||||
elif isinstance(indices, slice):
|
elif isinstance(indices, slice):
|
||||||
if len(shape) > 2:
|
if len(shape) > 2:
|
||||||
return LoraTorchTensor(self._lora_A[indices], self._lora_B[indices])
|
return LoraTorchTensor(self._lora_A[indices], self._lora_B[indices])
|
||||||
|
@ -74,7 +83,7 @@ def __getitem__(
|
||||||
assert len(indices) > 0
|
assert len(indices) > 0
|
||||||
if indices[-1] is Ellipsis:
|
if indices[-1] is Ellipsis:
|
||||||
return self[indices[:-1]]
|
return self[indices[:-1]]
|
||||||
|
# expand ellipsis
|
||||||
indices = tuple(
|
indices = tuple(
|
||||||
u
|
u
|
||||||
for v in (
|
for v in (
|
||||||
|
@ -94,6 +103,7 @@ def __getitem__(
|
||||||
*(slice(None, None) for _ in range(len(indices), len(shape))),
|
*(slice(None, None) for _ in range(len(indices), len(shape))),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# TODO: make sure this is correct
|
||||||
indices_A = (
|
indices_A = (
|
||||||
*(
|
*(
|
||||||
(
|
(
|
||||||
|
@ -109,7 +119,7 @@ def __getitem__(
|
||||||
indices_B = indices[:-1]
|
indices_B = indices[:-1]
|
||||||
return LoraTorchTensor(self._lora_A[indices_A], self._lora_B[indices_B])
|
return LoraTorchTensor(self._lora_A[indices_A], self._lora_B[indices_B])
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError
|
raise NotImplementedError # unknown indice type
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def dtype(self) -> torch.dtype:
|
def dtype(self) -> torch.dtype:
|
||||||
|
@ -132,8 +142,9 @@ def reshape(self, *shape: int | tuple[int, ...]) -> LoraTorchTensor:
|
||||||
new_shape = cast(tuple[int, ...], shape)
|
new_shape = cast(tuple[int, ...], shape)
|
||||||
orig_shape = self.shape
|
orig_shape = self.shape
|
||||||
if len(new_shape) < 2:
|
if len(new_shape) < 2:
|
||||||
raise NotImplementedError
|
raise NotImplementedError # can't become a vector
|
||||||
|
|
||||||
|
# expand -1 in the shape
|
||||||
if any(dim == -1 for dim in new_shape):
|
if any(dim == -1 for dim in new_shape):
|
||||||
n_elems = prod(orig_shape)
|
n_elems = prod(orig_shape)
|
||||||
n_new_elems = prod(dim if dim != -1 else 1 for dim in new_shape)
|
n_new_elems = prod(dim if dim != -1 else 1 for dim in new_shape)
|
||||||
|
@ -143,7 +154,7 @@ def reshape(self, *shape: int | tuple[int, ...]) -> LoraTorchTensor:
|
||||||
)
|
)
|
||||||
|
|
||||||
if new_shape[-1] != orig_shape[-1]:
|
if new_shape[-1] != orig_shape[-1]:
|
||||||
raise NotImplementedError
|
raise NotImplementedError # can't reshape the row size trivially
|
||||||
|
|
||||||
shape_A = (*(1 for _ in new_shape[:-2]), self._rank, orig_shape[-1])
|
shape_A = (*(1 for _ in new_shape[:-2]), self._rank, orig_shape[-1])
|
||||||
shape_B = (*new_shape[:-1], self._rank)
|
shape_B = (*new_shape[:-1], self._rank)
|
||||||
|
@ -162,7 +173,7 @@ def permute(self, *dims: int) -> LoraTorchTensor:
|
||||||
shape = self.shape
|
shape = self.shape
|
||||||
dims = tuple(dim - len(shape) if dim >= 0 else dim for dim in dims)
|
dims = tuple(dim - len(shape) if dim >= 0 else dim for dim in dims)
|
||||||
if dims[-1] == -1:
|
if dims[-1] == -1:
|
||||||
|
# TODO: support higher dimensional A shapes bigger than 1
|
||||||
assert all(dim == 1 for dim in self._lora_A.shape[:-2])
|
assert all(dim == 1 for dim in self._lora_A.shape[:-2])
|
||||||
return LoraTorchTensor(self._lora_A, self._lora_B.permute(*dims))
|
return LoraTorchTensor(self._lora_A, self._lora_B.permute(*dims))
|
||||||
if len(shape) == 2 and dims[-1] == -2 and dims[-2] == -1:
|
if len(shape) == 2 and dims[-1] == -2 and dims[-2] == -1:
|
||||||
|
@ -170,7 +181,7 @@ def permute(self, *dims: int) -> LoraTorchTensor:
|
||||||
self._lora_B.permute(*dims), self._lora_A.permute(*dims)
|
self._lora_B.permute(*dims), self._lora_A.permute(*dims)
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
|
# TODO: compose the above two
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def transpose(self, dim0: int, dim1: int) -> LoraTorchTensor:
|
def transpose(self, dim0: int, dim1: int) -> LoraTorchTensor:
|
||||||
|
@ -189,7 +200,7 @@ def to(self, *args, **kwargs):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def __torch_function__(cls, func: Callable, types, args=(), kwargs=None):
|
def __torch_function__(cls, func: Callable, types, args=(), kwargs=None):
|
||||||
del types
|
del types # unused
|
||||||
|
|
||||||
if kwargs is None:
|
if kwargs is None:
|
||||||
kwargs = {}
|
kwargs = {}
|
||||||
|
@ -230,28 +241,73 @@ def get_base_tensor_name(lora_tensor_name: str) -> str:
|
||||||
base_name = lora_tensor_name.replace("base_model.model.", "")
|
base_name = lora_tensor_name.replace("base_model.model.", "")
|
||||||
base_name = base_name.replace(".lora_A.weight", ".weight")
|
base_name = base_name.replace(".lora_A.weight", ".weight")
|
||||||
base_name = base_name.replace(".lora_B.weight", ".weight")
|
base_name = base_name.replace(".lora_B.weight", ".weight")
|
||||||
|
# models produced by mergekit-extract-lora have token embeddings in the adapter
|
||||||
|
base_name = base_name.replace(".lora_embedding_A", ".weight")
|
||||||
|
base_name = base_name.replace(".lora_embedding_B", ".weight")
|
||||||
return base_name
|
return base_name
|
||||||
|
|
||||||
|
|
||||||
def parse_args() -> argparse.Namespace:
|
def parse_args() -> argparse.Namespace:
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser(
|
||||||
parser.add_argument("--outfile", type=Path)
|
description="Convert a Hugging Face PEFT LoRA adapter to a GGUF file"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--outfile",
|
||||||
|
type=Path,
|
||||||
|
help="path to write to; default: based on input. {ftype} will be replaced by the outtype.",
|
||||||
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--outtype",
|
"--outtype",
|
||||||
type=str,
|
type=str,
|
||||||
choices=["f32", "f16", "bf16", "q8_0", "auto"],
|
choices=["f32", "f16", "bf16", "q8_0", "auto"],
|
||||||
default="f16",
|
default="f16",
|
||||||
|
help="output format - use f32 for float32, f16 for float16, bf16 for bfloat16, q8_0 for Q8_0, auto for the highest-fidelity 16-bit float type depending on the first loaded tensor type",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--bigendian",
|
||||||
|
action="store_true",
|
||||||
|
help="model is executed on big endian machine",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--no-lazy",
|
||||||
|
action="store_true",
|
||||||
|
help="use more RAM by computing all outputs before writing (use in case lazy evaluation is broken)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--verbose",
|
||||||
|
action="store_true",
|
||||||
|
help="increase output verbosity",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--dry-run",
|
||||||
|
action="store_true",
|
||||||
|
help="only print out what will be done, without writing any new files",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--base",
|
||||||
|
type=Path,
|
||||||
|
help="directory containing Hugging Face model config files (config.json, tokenizer.json) for the base model that the adapter is based on - only config is needed, actual model weights are not required. If base model is unspecified, it will be loaded from Hugging Face hub based on the adapter config",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--base-model-id",
|
||||||
|
type=str,
|
||||||
|
help="the model ID of the base model, if it is not available locally or in the adapter config. If specified, it will ignore --base and load the base model config from the Hugging Face hub (Example: 'meta-llama/Llama-3.2-1B-Instruct')",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"lora_path",
|
||||||
|
type=Path,
|
||||||
|
help="directory containing Hugging Face PEFT LoRA config (adapter_model.json) and weights (adapter_model.safetensors or adapter_model.bin)",
|
||||||
)
|
)
|
||||||
parser.add_argument("--bigendian", action="store_true")
|
|
||||||
parser.add_argument("--no-lazy", action="store_true")
|
|
||||||
parser.add_argument("--verbose", action="store_true")
|
|
||||||
parser.add_argument("--dry-run", action="store_true")
|
|
||||||
parser.add_argument("--base", type=Path, required=True)
|
|
||||||
parser.add_argument("lora_path", type=Path)
|
|
||||||
|
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
def load_hparams_from_hf(hf_model_id: str) -> dict[str, Any]:
|
||||||
|
# normally, adapter does not come with base model config, we need to load it from AutoConfig
|
||||||
|
config = AutoConfig.from_pretrained(hf_model_id)
|
||||||
|
return config.to_dict()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
args = parse_args()
|
args = parse_args()
|
||||||
logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
|
logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
|
||||||
|
@ -266,19 +322,20 @@ def parse_args() -> argparse.Namespace:
|
||||||
|
|
||||||
ftype = ftype_map[args.outtype]
|
ftype = ftype_map[args.outtype]
|
||||||
|
|
||||||
dir_base_model: Path = args.base
|
dir_base_model: Path | None = args.base
|
||||||
dir_lora: Path = args.lora_path
|
dir_lora: Path = args.lora_path
|
||||||
|
base_model_id: str | None = args.base_model_id
|
||||||
lora_config = dir_lora / "adapter_config.json"
|
lora_config = dir_lora / "adapter_config.json"
|
||||||
input_model = dir_lora / "adapter_model.safetensors"
|
input_model = dir_lora / "adapter_model.safetensors"
|
||||||
|
|
||||||
if args.outfile is not None:
|
if args.outfile is not None:
|
||||||
fname_out = args.outfile
|
fname_out = args.outfile
|
||||||
else:
|
else:
|
||||||
|
# output in the same directory as the model by default
|
||||||
fname_out = dir_lora
|
fname_out = dir_lora
|
||||||
|
|
||||||
if os.path.exists(input_model):
|
if os.path.exists(input_model):
|
||||||
|
# lazy import load_file only if lora is in safetensors format.
|
||||||
from safetensors.torch import load_file
|
from safetensors.torch import load_file
|
||||||
|
|
||||||
lora_model = load_file(input_model, device="cpu")
|
lora_model = load_file(input_model, device="cpu")
|
||||||
|
@ -286,8 +343,38 @@ def parse_args() -> argparse.Namespace:
|
||||||
input_model = os.path.join(dir_lora, "adapter_model.bin")
|
input_model = os.path.join(dir_lora, "adapter_model.bin")
|
||||||
lora_model = torch.load(input_model, map_location="cpu", weights_only=True)
|
lora_model = torch.load(input_model, map_location="cpu", weights_only=True)
|
||||||
|
|
||||||
logger.info(f"Loading base model: {dir_base_model.name}")
|
# load LoRA config
|
||||||
hparams = Model.load_hparams(dir_base_model)
|
with open(lora_config, "r") as f:
|
||||||
|
lparams: dict[str, Any] = json.load(f)
|
||||||
|
|
||||||
|
# load base model
|
||||||
|
if base_model_id is not None:
|
||||||
|
logger.info(f"Loading base model from Hugging Face: {base_model_id}")
|
||||||
|
hparams = load_hparams_from_hf(base_model_id)
|
||||||
|
elif dir_base_model is None:
|
||||||
|
if "base_model_name_or_path" in lparams:
|
||||||
|
model_id = lparams["base_model_name_or_path"]
|
||||||
|
logger.info(f"Loading base model from Hugging Face: {model_id}")
|
||||||
|
try:
|
||||||
|
hparams = load_hparams_from_hf(model_id)
|
||||||
|
except OSError as e:
|
||||||
|
logger.error(f"Failed to load base model config: {e}")
|
||||||
|
logger.error(
|
||||||
|
"Please try downloading the base model and add its path to --base"
|
||||||
|
)
|
||||||
|
sys.exit(1)
|
||||||
|
else:
|
||||||
|
logger.error(
|
||||||
|
"'base_model_name_or_path' is not found in adapter_config.json"
|
||||||
|
)
|
||||||
|
logger.error(
|
||||||
|
"Base model config is required. Please download the base model and add its path to --base"
|
||||||
|
)
|
||||||
|
sys.exit(1)
|
||||||
|
else:
|
||||||
|
logger.info(f"Loading base model: {dir_base_model.name}")
|
||||||
|
hparams = Model.load_hparams(dir_base_model)
|
||||||
|
|
||||||
with torch.inference_mode():
|
with torch.inference_mode():
|
||||||
try:
|
try:
|
||||||
model_class = Model.from_model_architecture(hparams["architectures"][0])
|
model_class = Model.from_model_architecture(hparams["architectures"][0])
|
||||||
|
@ -309,6 +396,9 @@ def __init__(
|
||||||
self.dir_model_card = dir_lora_model
|
self.dir_model_card = dir_lora_model
|
||||||
self.lora_alpha = float(lora_alpha)
|
self.lora_alpha = float(lora_alpha)
|
||||||
|
|
||||||
|
def set_vocab(self):
|
||||||
|
pass
|
||||||
|
|
||||||
def set_type(self):
|
def set_type(self):
|
||||||
self.gguf_writer.add_type(gguf.GGUFType.ADAPTER)
|
self.gguf_writer.add_type(gguf.GGUFType.ADAPTER)
|
||||||
self.gguf_writer.add_string(gguf.Keys.Adapter.TYPE, "lora")
|
self.gguf_writer.add_string(gguf.Keys.Adapter.TYPE, "lora")
|
||||||
|
@ -317,7 +407,10 @@ def set_gguf_parameters(self):
|
||||||
self.gguf_writer.add_float32(
|
self.gguf_writer.add_float32(
|
||||||
gguf.Keys.Adapter.LORA_ALPHA, self.lora_alpha
|
gguf.Keys.Adapter.LORA_ALPHA, self.lora_alpha
|
||||||
)
|
)
|
||||||
super().set_gguf_parameters()
|
|
||||||
|
def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
|
||||||
|
# Never add extra tensors (e.g. rope_freqs) for LoRA adapters
|
||||||
|
return ()
|
||||||
|
|
||||||
def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
|
def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
|
||||||
tensor_map: dict[str, PartialLoraTensor] = {}
|
tensor_map: dict[str, PartialLoraTensor] = {}
|
||||||
|
@ -326,14 +419,26 @@ def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
|
||||||
if self.lazy:
|
if self.lazy:
|
||||||
tensor = LazyTorchTensor.from_eager(tensor)
|
tensor = LazyTorchTensor.from_eager(tensor)
|
||||||
base_name = get_base_tensor_name(name)
|
base_name = get_base_tensor_name(name)
|
||||||
is_lora_a = ".lora_A.weight" in name
|
# note: mergekit-extract-lora also adds token embeddings to the adapter
|
||||||
is_lora_b = ".lora_B.weight" in name
|
is_lora_a = ".lora_A.weight" in name or ".lora_embedding_A" in name
|
||||||
|
is_lora_b = ".lora_B.weight" in name or ".lora_embedding_B" in name
|
||||||
if not is_lora_a and not is_lora_b:
|
if not is_lora_a and not is_lora_b:
|
||||||
if ".base_layer.weight" in name:
|
if ".base_layer.weight" in name:
|
||||||
continue
|
continue
|
||||||
|
# mergekit-extract-lora add these layernorm to the adapter, we need to keep them
|
||||||
|
if "_layernorm" in name or ".norm" in name:
|
||||||
|
yield (base_name, tensor)
|
||||||
|
continue
|
||||||
logger.error(
|
logger.error(
|
||||||
f"Unexpected name '{name}': Not a lora_A or lora_B tensor"
|
f"Unexpected name '{name}': Not a lora_A or lora_B tensor"
|
||||||
)
|
)
|
||||||
|
if ".embed_tokens.weight" in name or ".lm_head.weight" in name:
|
||||||
|
logger.error(
|
||||||
|
"Embeddings is present in the adapter. This can be due to new tokens added during fine tuning"
|
||||||
|
)
|
||||||
|
logger.error(
|
||||||
|
"Please refer to https://github.com/ggerganov/llama.cpp/pull/9948"
|
||||||
|
)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
if base_name in tensor_map:
|
if base_name in tensor_map:
|
||||||
|
@ -358,17 +463,34 @@ def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
|
||||||
def modify_tensors(
|
def modify_tensors(
|
||||||
self, data_torch: Tensor, name: str, bid: int | None
|
self, data_torch: Tensor, name: str, bid: int | None
|
||||||
) -> Iterable[tuple[str, Tensor]]:
|
) -> Iterable[tuple[str, Tensor]]:
|
||||||
dest = super().modify_tensors(data_torch, name, bid)
|
dest = list(super().modify_tensors(data_torch, name, bid))
|
||||||
|
# some archs may have the same tensor for lm_head and output (tie word embeddings)
|
||||||
|
# in this case, adapters targeting lm_head will fail when using llama-export-lora
|
||||||
|
# therefore, we ignore them for now
|
||||||
|
# see: https://github.com/ggerganov/llama.cpp/issues/9065
|
||||||
|
if name == "lm_head.weight" and len(dest) == 0:
|
||||||
|
raise ValueError(
|
||||||
|
"lm_head is present in adapter, but is ignored in base model"
|
||||||
|
)
|
||||||
for dest_name, dest_data in dest:
|
for dest_name, dest_data in dest:
|
||||||
|
# mergekit-extract-lora add these layernorm to the adapter
|
||||||
|
if "_norm" in dest_name:
|
||||||
|
assert dest_data.dim() == 1
|
||||||
|
yield (dest_name, dest_data)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# otherwise, we must get the lora_A and lora_B tensors
|
||||||
assert isinstance(dest_data, LoraTorchTensor)
|
assert isinstance(dest_data, LoraTorchTensor)
|
||||||
lora_a, lora_b = dest_data.get_lora_A_B()
|
lora_a, lora_b = dest_data.get_lora_A_B()
|
||||||
|
|
||||||
|
# note: mergekit-extract-lora flip and transpose A and B
|
||||||
|
# here we only need to transpose token_embd.lora_a, see llm_build_inp_embd()
|
||||||
|
if "token_embd.weight" in dest_name:
|
||||||
|
lora_a = lora_a.T
|
||||||
|
|
||||||
yield (dest_name + ".lora_a", lora_a)
|
yield (dest_name + ".lora_a", lora_a)
|
||||||
yield (dest_name + ".lora_b", lora_b)
|
yield (dest_name + ".lora_b", lora_b)
|
||||||
|
|
||||||
with open(lora_config, "r") as f:
|
|
||||||
lparams: dict[str, Any] = json.load(f)
|
|
||||||
|
|
||||||
alpha: float = lparams["lora_alpha"]
|
alpha: float = lparams["lora_alpha"]
|
||||||
|
|
||||||
model_instance = LoraModel(
|
model_instance = LoraModel(
|
||||||
|
@ -381,7 +503,7 @@ def modify_tensors(
|
||||||
dry_run=args.dry_run,
|
dry_run=args.dry_run,
|
||||||
dir_lora_model=dir_lora,
|
dir_lora_model=dir_lora,
|
||||||
lora_alpha=alpha,
|
lora_alpha=alpha,
|
||||||
is_lora=True,
|
hparams=hparams,
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.info("Exporting model...")
|
logger.info("Exporting model...")
|
||||||
|
|
|
@ -3,10 +3,18 @@
|
||||||
from enum import Enum, IntEnum, auto
|
from enum import Enum, IntEnum, auto
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
GGUF_MAGIC = 0x46554747
|
#
|
||||||
|
# constants
|
||||||
|
#
|
||||||
|
|
||||||
|
GGUF_MAGIC = 0x46554747 # "GGUF"
|
||||||
GGUF_VERSION = 3
|
GGUF_VERSION = 3
|
||||||
GGUF_DEFAULT_ALIGNMENT = 32
|
GGUF_DEFAULT_ALIGNMENT = 32
|
||||||
GGML_QUANT_VERSION = 2
|
GGML_QUANT_VERSION = 2 # GGML_QNT_VERSION from ggml.h
|
||||||
|
|
||||||
|
#
|
||||||
|
# metadata keys
|
||||||
|
#
|
||||||
|
|
||||||
|
|
||||||
class Keys:
|
class Keys:
|
||||||
|
@ -17,6 +25,7 @@ class General:
|
||||||
ALIGNMENT = "general.alignment"
|
ALIGNMENT = "general.alignment"
|
||||||
FILE_TYPE = "general.file_type"
|
FILE_TYPE = "general.file_type"
|
||||||
|
|
||||||
|
# Authorship Metadata
|
||||||
NAME = "general.name"
|
NAME = "general.name"
|
||||||
AUTHOR = "general.author"
|
AUTHOR = "general.author"
|
||||||
VERSION = "general.version"
|
VERSION = "general.version"
|
||||||
|
@ -30,38 +39,62 @@ class General:
|
||||||
|
|
||||||
SIZE_LABEL = "general.size_label"
|
SIZE_LABEL = "general.size_label"
|
||||||
|
|
||||||
|
# Licensing details
|
||||||
LICENSE = "general.license"
|
LICENSE = "general.license"
|
||||||
LICENSE_NAME = "general.license.name"
|
LICENSE_NAME = "general.license.name"
|
||||||
LICENSE_LINK = "general.license.link"
|
LICENSE_LINK = "general.license.link"
|
||||||
|
|
||||||
URL = "general.url"
|
# Typically represents the converted GGUF repo (Unless native)
|
||||||
|
URL = "general.url" # Model Website/Paper
|
||||||
DOI = "general.doi"
|
DOI = "general.doi"
|
||||||
UUID = "general.uuid"
|
UUID = "general.uuid"
|
||||||
REPO_URL = "general.repo_url"
|
REPO_URL = "general.repo_url" # Model Source Repository (git/svn/etc...)
|
||||||
|
|
||||||
SOURCE_URL = "general.source.url"
|
# Model Source during conversion
|
||||||
|
SOURCE_URL = "general.source.url" # Model Website/Paper
|
||||||
SOURCE_DOI = "general.source.doi"
|
SOURCE_DOI = "general.source.doi"
|
||||||
SOURCE_UUID = "general.source.uuid"
|
SOURCE_UUID = "general.source.uuid"
|
||||||
SOURCE_REPO_URL = "general.source.repo_url"
|
SOURCE_REPO_URL = (
|
||||||
|
"general.source.repo_url" # Model Source Repository (git/svn/etc...)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Base Model Source. There can be more than one source if it's a merged
|
||||||
|
# model like with 'Mistral-7B-Merge-14-v0.1'. This will assist in
|
||||||
|
# tracing linage of models as it is finetuned or merged over time.
|
||||||
BASE_MODEL_COUNT = "general.base_model.count"
|
BASE_MODEL_COUNT = "general.base_model.count"
|
||||||
BASE_MODEL_NAME = "general.base_model.{id}.name"
|
BASE_MODEL_NAME = "general.base_model.{id}.name"
|
||||||
BASE_MODEL_AUTHOR = "general.base_model.{id}.author"
|
BASE_MODEL_AUTHOR = "general.base_model.{id}.author"
|
||||||
BASE_MODEL_VERSION = "general.base_model.{id}.version"
|
BASE_MODEL_VERSION = "general.base_model.{id}.version"
|
||||||
BASE_MODEL_ORGANIZATION = "general.base_model.{id}.organization"
|
BASE_MODEL_ORGANIZATION = "general.base_model.{id}.organization"
|
||||||
BASE_MODEL_URL = "general.base_model.{id}.url"
|
BASE_MODEL_DESCRIPTION = "general.base_model.{id}.description"
|
||||||
|
BASE_MODEL_URL = "general.base_model.{id}.url" # Model Website/Paper
|
||||||
BASE_MODEL_DOI = "general.base_model.{id}.doi"
|
BASE_MODEL_DOI = "general.base_model.{id}.doi"
|
||||||
BASE_MODEL_UUID = "general.base_model.{id}.uuid"
|
BASE_MODEL_UUID = "general.base_model.{id}.uuid"
|
||||||
BASE_MODEL_REPO_URL = "general.base_model.{id}.repo_url"
|
BASE_MODEL_REPO_URL = "general.base_model.{id}.repo_url" # Model Source Repository (git/svn/etc...)
|
||||||
|
|
||||||
|
# Dataset Source
|
||||||
|
DATASET_COUNT = "general.dataset.count"
|
||||||
|
DATASET_NAME = "general.dataset.{id}.name"
|
||||||
|
DATASET_AUTHOR = "general.dataset.{id}.author"
|
||||||
|
DATASET_VERSION = "general.dataset.{id}.version"
|
||||||
|
DATASET_ORGANIZATION = "general.dataset.{id}.organization"
|
||||||
|
DATASET_DESCRIPTION = "general.dataset.{id}.description"
|
||||||
|
DATASET_URL = "general.dataset.{id}.url" # Model Website/Paper
|
||||||
|
DATASET_DOI = "general.dataset.{id}.doi"
|
||||||
|
DATASET_UUID = "general.dataset.{id}.uuid"
|
||||||
|
DATASET_REPO_URL = (
|
||||||
|
"general.dataset.{id}.repo_url" # Model Source Repository (git/svn/etc...)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Array based KV stores
|
||||||
TAGS = "general.tags"
|
TAGS = "general.tags"
|
||||||
LANGUAGES = "general.languages"
|
LANGUAGES = "general.languages"
|
||||||
DATASETS = "general.datasets"
|
|
||||||
|
|
||||||
class LLM:
|
class LLM:
|
||||||
VOCAB_SIZE = "{arch}.vocab_size"
|
VOCAB_SIZE = "{arch}.vocab_size"
|
||||||
CONTEXT_LENGTH = "{arch}.context_length"
|
CONTEXT_LENGTH = "{arch}.context_length"
|
||||||
EMBEDDING_LENGTH = "{arch}.embedding_length"
|
EMBEDDING_LENGTH = "{arch}.embedding_length"
|
||||||
|
FEATURES_LENGTH = "{arch}.features_length"
|
||||||
BLOCK_COUNT = "{arch}.block_count"
|
BLOCK_COUNT = "{arch}.block_count"
|
||||||
LEADING_DENSE_BLOCK_COUNT = "{arch}.leading_dense_block_count"
|
LEADING_DENSE_BLOCK_COUNT = "{arch}.leading_dense_block_count"
|
||||||
FEED_FORWARD_LENGTH = "{arch}.feed_forward_length"
|
FEED_FORWARD_LENGTH = "{arch}.feed_forward_length"
|
||||||
|
@ -73,11 +106,14 @@ class LLM:
|
||||||
EXPERT_USED_COUNT = "{arch}.expert_used_count"
|
EXPERT_USED_COUNT = "{arch}.expert_used_count"
|
||||||
EXPERT_SHARED_COUNT = "{arch}.expert_shared_count"
|
EXPERT_SHARED_COUNT = "{arch}.expert_shared_count"
|
||||||
EXPERT_WEIGHTS_SCALE = "{arch}.expert_weights_scale"
|
EXPERT_WEIGHTS_SCALE = "{arch}.expert_weights_scale"
|
||||||
|
EXPERT_WEIGHTS_NORM = "{arch}.expert_weights_norm"
|
||||||
|
EXPERT_GATING_FUNC = "{arch}.expert_gating_func"
|
||||||
POOLING_TYPE = "{arch}.pooling_type"
|
POOLING_TYPE = "{arch}.pooling_type"
|
||||||
LOGIT_SCALE = "{arch}.logit_scale"
|
LOGIT_SCALE = "{arch}.logit_scale"
|
||||||
DECODER_START_TOKEN_ID = "{arch}.decoder_start_token_id"
|
DECODER_START_TOKEN_ID = "{arch}.decoder_start_token_id"
|
||||||
ATTN_LOGIT_SOFTCAPPING = "{arch}.attn_logit_softcapping"
|
ATTN_LOGIT_SOFTCAPPING = "{arch}.attn_logit_softcapping"
|
||||||
FINAL_LOGIT_SOFTCAPPING = "{arch}.final_logit_softcapping"
|
FINAL_LOGIT_SOFTCAPPING = "{arch}.final_logit_softcapping"
|
||||||
|
SWIN_NORM = "{arch}.swin_norm"
|
||||||
RESCALE_EVERY_N_LAYERS = "{arch}.rescale_every_n_layers"
|
RESCALE_EVERY_N_LAYERS = "{arch}.rescale_every_n_layers"
|
||||||
TIME_MIX_EXTRA_DIM = "{arch}.time_mix_extra_dim"
|
TIME_MIX_EXTRA_DIM = "{arch}.time_mix_extra_dim"
|
||||||
TIME_DECAY_EXTRA_DIM = "{arch}.time_decay_extra_dim"
|
TIME_DECAY_EXTRA_DIM = "{arch}.time_decay_extra_dim"
|
||||||
|
@ -93,6 +129,8 @@ class Attention:
|
||||||
VALUE_LENGTH = "{arch}.attention.value_length"
|
VALUE_LENGTH = "{arch}.attention.value_length"
|
||||||
LAYERNORM_EPS = "{arch}.attention.layer_norm_epsilon"
|
LAYERNORM_EPS = "{arch}.attention.layer_norm_epsilon"
|
||||||
LAYERNORM_RMS_EPS = "{arch}.attention.layer_norm_rms_epsilon"
|
LAYERNORM_RMS_EPS = "{arch}.attention.layer_norm_rms_epsilon"
|
||||||
|
GROUPNORM_EPS = "{arch}.attention.group_norm_epsilon"
|
||||||
|
GROUPNORM_GROUPS = "{arch}.attention.group_norm_groups"
|
||||||
CAUSAL = "{arch}.attention.causal"
|
CAUSAL = "{arch}.attention.causal"
|
||||||
Q_LORA_RANK = "{arch}.attention.q_lora_rank"
|
Q_LORA_RANK = "{arch}.attention.q_lora_rank"
|
||||||
KV_LORA_RANK = "{arch}.attention.kv_lora_rank"
|
KV_LORA_RANK = "{arch}.attention.kv_lora_rank"
|
||||||
|
@ -102,6 +140,7 @@ class Attention:
|
||||||
|
|
||||||
class Rope:
|
class Rope:
|
||||||
DIMENSION_COUNT = "{arch}.rope.dimension_count"
|
DIMENSION_COUNT = "{arch}.rope.dimension_count"
|
||||||
|
DIMENSION_SECTIONS = "{arch}.rope.dimension_sections"
|
||||||
FREQ_BASE = "{arch}.rope.freq_base"
|
FREQ_BASE = "{arch}.rope.freq_base"
|
||||||
SCALING_TYPE = "{arch}.rope.scaling.type"
|
SCALING_TYPE = "{arch}.rope.scaling.type"
|
||||||
SCALING_FACTOR = "{arch}.rope.scaling.factor"
|
SCALING_FACTOR = "{arch}.rope.scaling.factor"
|
||||||
|
@ -125,16 +164,28 @@ class SSM:
|
||||||
class WKV:
|
class WKV:
|
||||||
HEAD_SIZE = "{arch}.wkv.head_size"
|
HEAD_SIZE = "{arch}.wkv.head_size"
|
||||||
|
|
||||||
|
class PosNet:
|
||||||
|
EMBEDDING_LENGTH = "{arch}.posnet.embedding_length"
|
||||||
|
BLOCK_COUNT = "{arch}.posnet.block_count"
|
||||||
|
|
||||||
|
class ConvNext:
|
||||||
|
EMBEDDING_LENGTH = "{arch}.convnext.embedding_length"
|
||||||
|
BLOCK_COUNT = "{arch}.convnext.block_count"
|
||||||
|
|
||||||
class Tokenizer:
|
class Tokenizer:
|
||||||
MODEL = "tokenizer.ggml.model"
|
MODEL = "tokenizer.ggml.model"
|
||||||
PRE = "tokenizer.ggml.pre"
|
PRE = "tokenizer.ggml.pre"
|
||||||
LIST = "tokenizer.ggml.tokens"
|
LIST = "tokenizer.ggml.tokens"
|
||||||
TOKEN_TYPE = "tokenizer.ggml.token_type"
|
TOKEN_TYPE = "tokenizer.ggml.token_type"
|
||||||
TOKEN_TYPE_COUNT = "tokenizer.ggml.token_type_count"
|
TOKEN_TYPE_COUNT = (
|
||||||
|
"tokenizer.ggml.token_type_count" # for BERT-style token types
|
||||||
|
)
|
||||||
SCORES = "tokenizer.ggml.scores"
|
SCORES = "tokenizer.ggml.scores"
|
||||||
MERGES = "tokenizer.ggml.merges"
|
MERGES = "tokenizer.ggml.merges"
|
||||||
BOS_ID = "tokenizer.ggml.bos_token_id"
|
BOS_ID = "tokenizer.ggml.bos_token_id"
|
||||||
EOS_ID = "tokenizer.ggml.eos_token_id"
|
EOS_ID = "tokenizer.ggml.eos_token_id"
|
||||||
|
EOT_ID = "tokenizer.ggml.eot_token_id"
|
||||||
|
EOM_ID = "tokenizer.ggml.eom_token_id"
|
||||||
UNK_ID = "tokenizer.ggml.unknown_token_id"
|
UNK_ID = "tokenizer.ggml.unknown_token_id"
|
||||||
SEP_ID = "tokenizer.ggml.seperator_token_id"
|
SEP_ID = "tokenizer.ggml.seperator_token_id"
|
||||||
PAD_ID = "tokenizer.ggml.padding_token_id"
|
PAD_ID = "tokenizer.ggml.padding_token_id"
|
||||||
|
@ -150,18 +201,28 @@ class Tokenizer:
|
||||||
CHAT_TEMPLATE = "tokenizer.chat_template"
|
CHAT_TEMPLATE = "tokenizer.chat_template"
|
||||||
CHAT_TEMPLATE_N = "tokenizer.chat_template.{name}"
|
CHAT_TEMPLATE_N = "tokenizer.chat_template.{name}"
|
||||||
CHAT_TEMPLATES = "tokenizer.chat_templates"
|
CHAT_TEMPLATES = "tokenizer.chat_templates"
|
||||||
|
# FIM/Infill special tokens constants
|
||||||
|
FIM_PRE_ID = "tokenizer.ggml.fim_pre_token_id"
|
||||||
|
FIM_SUF_ID = "tokenizer.ggml.fim_suf_token_id"
|
||||||
|
FIM_MID_ID = "tokenizer.ggml.fim_mid_token_id"
|
||||||
|
FIM_PAD_ID = "tokenizer.ggml.fim_pad_token_id"
|
||||||
|
FIM_REP_ID = "tokenizer.ggml.fim_rep_token_id"
|
||||||
|
FIM_SEP_ID = "tokenizer.ggml.fim_sep_token_id"
|
||||||
|
# deprecated:
|
||||||
PREFIX_ID = "tokenizer.ggml.prefix_token_id"
|
PREFIX_ID = "tokenizer.ggml.prefix_token_id"
|
||||||
SUFFIX_ID = "tokenizer.ggml.suffix_token_id"
|
SUFFIX_ID = "tokenizer.ggml.suffix_token_id"
|
||||||
MIDDLE_ID = "tokenizer.ggml.middle_token_id"
|
MIDDLE_ID = "tokenizer.ggml.middle_token_id"
|
||||||
EOT_ID = "tokenizer.ggml.eot_token_id"
|
|
||||||
EOM_ID = "tokenizer.ggml.eom_token_id"
|
|
||||||
|
|
||||||
class Adapter:
|
class Adapter:
|
||||||
TYPE = "adapter.type"
|
TYPE = "adapter.type"
|
||||||
LORA_ALPHA = "adapter.lora.alpha"
|
LORA_ALPHA = "adapter.lora.alpha"
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# recommended mapping of model tensor names for storage in gguf
|
||||||
|
#
|
||||||
|
|
||||||
|
|
||||||
class GGUFType:
|
class GGUFType:
|
||||||
MODEL = "model"
|
MODEL = "model"
|
||||||
ADAPTER = "adapter"
|
ADAPTER = "adapter"
|
||||||
|
@ -169,6 +230,7 @@ class GGUFType:
|
||||||
|
|
||||||
class MODEL_ARCH(IntEnum):
|
class MODEL_ARCH(IntEnum):
|
||||||
LLAMA = auto()
|
LLAMA = auto()
|
||||||
|
DECI = auto()
|
||||||
FALCON = auto()
|
FALCON = auto()
|
||||||
BAICHUAN = auto()
|
BAICHUAN = auto()
|
||||||
GROK = auto()
|
GROK = auto()
|
||||||
|
@ -186,6 +248,7 @@ class MODEL_ARCH(IntEnum):
|
||||||
QWEN = auto()
|
QWEN = auto()
|
||||||
QWEN2 = auto()
|
QWEN2 = auto()
|
||||||
QWEN2MOE = auto()
|
QWEN2MOE = auto()
|
||||||
|
QWEN2VL = auto()
|
||||||
PHI2 = auto()
|
PHI2 = auto()
|
||||||
PHI3 = auto()
|
PHI3 = auto()
|
||||||
PLAMO = auto()
|
PLAMO = auto()
|
||||||
|
@ -199,14 +262,16 @@ class MODEL_ARCH(IntEnum):
|
||||||
STARCODER2 = auto()
|
STARCODER2 = auto()
|
||||||
RWKV6 = auto()
|
RWKV6 = auto()
|
||||||
MAMBA = auto()
|
MAMBA = auto()
|
||||||
JAMBA = auto()
|
|
||||||
XVERSE = auto()
|
XVERSE = auto()
|
||||||
COMMAND_R = auto()
|
COMMAND_R = auto()
|
||||||
|
COHERE2 = auto()
|
||||||
DBRX = auto()
|
DBRX = auto()
|
||||||
OLMO = auto()
|
OLMO = auto()
|
||||||
|
OLMO2 = auto()
|
||||||
OLMOE = auto()
|
OLMOE = auto()
|
||||||
OPENELM = auto()
|
OPENELM = auto()
|
||||||
ARCTIC = auto()
|
ARCTIC = auto()
|
||||||
|
DEEPSEEK = auto()
|
||||||
DEEPSEEK2 = auto()
|
DEEPSEEK2 = auto()
|
||||||
CHATGLM = auto()
|
CHATGLM = auto()
|
||||||
BITNET = auto()
|
BITNET = auto()
|
||||||
|
@ -216,6 +281,9 @@ class MODEL_ARCH(IntEnum):
|
||||||
NEMOTRON = auto()
|
NEMOTRON = auto()
|
||||||
EXAONE = auto()
|
EXAONE = auto()
|
||||||
GRANITE = auto()
|
GRANITE = auto()
|
||||||
|
GRANITE_MOE = auto()
|
||||||
|
CHAMELEON = auto()
|
||||||
|
WAVTOKENIZER_DEC = auto()
|
||||||
|
|
||||||
|
|
||||||
class MODEL_TENSOR(IntEnum):
|
class MODEL_TENSOR(IntEnum):
|
||||||
|
@ -254,6 +322,7 @@ class MODEL_TENSOR(IntEnum):
|
||||||
FFN_GATE_SHEXP = auto()
|
FFN_GATE_SHEXP = auto()
|
||||||
FFN_DOWN_SHEXP = auto()
|
FFN_DOWN_SHEXP = auto()
|
||||||
FFN_UP_SHEXP = auto()
|
FFN_UP_SHEXP = auto()
|
||||||
|
FFN_EXP_PROBS_B = auto()
|
||||||
ATTN_Q_NORM = auto()
|
ATTN_Q_NORM = auto()
|
||||||
ATTN_K_NORM = auto()
|
ATTN_K_NORM = auto()
|
||||||
LAYER_OUT_NORM = auto()
|
LAYER_OUT_NORM = auto()
|
||||||
|
@ -261,10 +330,7 @@ class MODEL_TENSOR(IntEnum):
|
||||||
SSM_CONV1D = auto()
|
SSM_CONV1D = auto()
|
||||||
SSM_X = auto()
|
SSM_X = auto()
|
||||||
SSM_DT = auto()
|
SSM_DT = auto()
|
||||||
SSM_DT_NORM = auto()
|
|
||||||
SSM_A = auto()
|
SSM_A = auto()
|
||||||
SSM_B_NORM = auto()
|
|
||||||
SSM_C_NORM = auto()
|
|
||||||
SSM_D = auto()
|
SSM_D = auto()
|
||||||
SSM_OUT = auto()
|
SSM_OUT = auto()
|
||||||
TIME_MIX_W1 = auto()
|
TIME_MIX_W1 = auto()
|
||||||
|
@ -326,10 +392,29 @@ class MODEL_TENSOR(IntEnum):
|
||||||
ENC_FFN_DOWN = auto()
|
ENC_FFN_DOWN = auto()
|
||||||
ENC_FFN_UP = auto()
|
ENC_FFN_UP = auto()
|
||||||
ENC_OUTPUT_NORM = auto()
|
ENC_OUTPUT_NORM = auto()
|
||||||
|
CLS = auto() # classifier
|
||||||
|
CLS_OUT = auto() # classifier output projection
|
||||||
|
CONV1D = auto()
|
||||||
|
CONVNEXT_DW = auto()
|
||||||
|
CONVNEXT_NORM = auto()
|
||||||
|
CONVNEXT_PW1 = auto()
|
||||||
|
CONVNEXT_PW2 = auto()
|
||||||
|
CONVNEXT_GAMMA = auto()
|
||||||
|
POSNET_CONV1 = auto()
|
||||||
|
POSNET_CONV2 = auto()
|
||||||
|
POSNET_NORM = auto()
|
||||||
|
POSNET_NORM1 = auto()
|
||||||
|
POSNET_NORM2 = auto()
|
||||||
|
POSNET_ATTN_NORM = auto()
|
||||||
|
POSNET_ATTN_Q = auto()
|
||||||
|
POSNET_ATTN_K = auto()
|
||||||
|
POSNET_ATTN_V = auto()
|
||||||
|
POSNET_ATTN_OUT = auto()
|
||||||
|
|
||||||
|
|
||||||
MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
||||||
MODEL_ARCH.LLAMA: "llama",
|
MODEL_ARCH.LLAMA: "llama",
|
||||||
|
MODEL_ARCH.DECI: "deci",
|
||||||
MODEL_ARCH.FALCON: "falcon",
|
MODEL_ARCH.FALCON: "falcon",
|
||||||
MODEL_ARCH.BAICHUAN: "baichuan",
|
MODEL_ARCH.BAICHUAN: "baichuan",
|
||||||
MODEL_ARCH.GROK: "grok",
|
MODEL_ARCH.GROK: "grok",
|
||||||
|
@ -347,6 +432,7 @@ class MODEL_TENSOR(IntEnum):
|
||||||
MODEL_ARCH.QWEN: "qwen",
|
MODEL_ARCH.QWEN: "qwen",
|
||||||
MODEL_ARCH.QWEN2: "qwen2",
|
MODEL_ARCH.QWEN2: "qwen2",
|
||||||
MODEL_ARCH.QWEN2MOE: "qwen2moe",
|
MODEL_ARCH.QWEN2MOE: "qwen2moe",
|
||||||
|
MODEL_ARCH.QWEN2VL: "qwen2vl",
|
||||||
MODEL_ARCH.PHI2: "phi2",
|
MODEL_ARCH.PHI2: "phi2",
|
||||||
MODEL_ARCH.PHI3: "phi3",
|
MODEL_ARCH.PHI3: "phi3",
|
||||||
MODEL_ARCH.PLAMO: "plamo",
|
MODEL_ARCH.PLAMO: "plamo",
|
||||||
|
@ -360,14 +446,16 @@ class MODEL_TENSOR(IntEnum):
|
||||||
MODEL_ARCH.STARCODER2: "starcoder2",
|
MODEL_ARCH.STARCODER2: "starcoder2",
|
||||||
MODEL_ARCH.RWKV6: "rwkv6",
|
MODEL_ARCH.RWKV6: "rwkv6",
|
||||||
MODEL_ARCH.MAMBA: "mamba",
|
MODEL_ARCH.MAMBA: "mamba",
|
||||||
MODEL_ARCH.JAMBA: "jamba",
|
|
||||||
MODEL_ARCH.XVERSE: "xverse",
|
MODEL_ARCH.XVERSE: "xverse",
|
||||||
MODEL_ARCH.COMMAND_R: "command-r",
|
MODEL_ARCH.COMMAND_R: "command-r",
|
||||||
|
MODEL_ARCH.COHERE2: "cohere2",
|
||||||
MODEL_ARCH.DBRX: "dbrx",
|
MODEL_ARCH.DBRX: "dbrx",
|
||||||
MODEL_ARCH.OLMO: "olmo",
|
MODEL_ARCH.OLMO: "olmo",
|
||||||
|
MODEL_ARCH.OLMO2: "olmo2",
|
||||||
MODEL_ARCH.OLMOE: "olmoe",
|
MODEL_ARCH.OLMOE: "olmoe",
|
||||||
MODEL_ARCH.OPENELM: "openelm",
|
MODEL_ARCH.OPENELM: "openelm",
|
||||||
MODEL_ARCH.ARCTIC: "arctic",
|
MODEL_ARCH.ARCTIC: "arctic",
|
||||||
|
MODEL_ARCH.DEEPSEEK: "deepseek",
|
||||||
MODEL_ARCH.DEEPSEEK2: "deepseek2",
|
MODEL_ARCH.DEEPSEEK2: "deepseek2",
|
||||||
MODEL_ARCH.CHATGLM: "chatglm",
|
MODEL_ARCH.CHATGLM: "chatglm",
|
||||||
MODEL_ARCH.BITNET: "bitnet",
|
MODEL_ARCH.BITNET: "bitnet",
|
||||||
|
@ -377,6 +465,9 @@ class MODEL_TENSOR(IntEnum):
|
||||||
MODEL_ARCH.NEMOTRON: "nemotron",
|
MODEL_ARCH.NEMOTRON: "nemotron",
|
||||||
MODEL_ARCH.EXAONE: "exaone",
|
MODEL_ARCH.EXAONE: "exaone",
|
||||||
MODEL_ARCH.GRANITE: "granite",
|
MODEL_ARCH.GRANITE: "granite",
|
||||||
|
MODEL_ARCH.GRANITE_MOE: "granitemoe",
|
||||||
|
MODEL_ARCH.CHAMELEON: "chameleon",
|
||||||
|
MODEL_ARCH.WAVTOKENIZER_DEC: "wavtokenizer-dec",
|
||||||
}
|
}
|
||||||
|
|
||||||
TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
||||||
|
@ -417,15 +508,13 @@ class MODEL_TENSOR(IntEnum):
|
||||||
MODEL_TENSOR.FFN_GATE_EXP: "blk.{bid}.ffn_gate_exps",
|
MODEL_TENSOR.FFN_GATE_EXP: "blk.{bid}.ffn_gate_exps",
|
||||||
MODEL_TENSOR.FFN_DOWN_EXP: "blk.{bid}.ffn_down_exps",
|
MODEL_TENSOR.FFN_DOWN_EXP: "blk.{bid}.ffn_down_exps",
|
||||||
MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up_exps",
|
MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up_exps",
|
||||||
|
MODEL_TENSOR.FFN_EXP_PROBS_B: "blk.{bid}.exp_probs_b",
|
||||||
MODEL_TENSOR.LAYER_OUT_NORM: "blk.{bid}.layer_output_norm",
|
MODEL_TENSOR.LAYER_OUT_NORM: "blk.{bid}.layer_output_norm",
|
||||||
MODEL_TENSOR.SSM_IN: "blk.{bid}.ssm_in",
|
MODEL_TENSOR.SSM_IN: "blk.{bid}.ssm_in",
|
||||||
MODEL_TENSOR.SSM_CONV1D: "blk.{bid}.ssm_conv1d",
|
MODEL_TENSOR.SSM_CONV1D: "blk.{bid}.ssm_conv1d",
|
||||||
MODEL_TENSOR.SSM_X: "blk.{bid}.ssm_x",
|
MODEL_TENSOR.SSM_X: "blk.{bid}.ssm_x",
|
||||||
MODEL_TENSOR.SSM_DT: "blk.{bid}.ssm_dt",
|
MODEL_TENSOR.SSM_DT: "blk.{bid}.ssm_dt",
|
||||||
MODEL_TENSOR.SSM_DT_NORM: "blk.{bid}.ssm_dt_norm",
|
|
||||||
MODEL_TENSOR.SSM_A: "blk.{bid}.ssm_a",
|
MODEL_TENSOR.SSM_A: "blk.{bid}.ssm_a",
|
||||||
MODEL_TENSOR.SSM_B_NORM: "blk.{bid}.ssm_b_norm",
|
|
||||||
MODEL_TENSOR.SSM_C_NORM: "blk.{bid}.ssm_c_norm",
|
|
||||||
MODEL_TENSOR.SSM_D: "blk.{bid}.ssm_d",
|
MODEL_TENSOR.SSM_D: "blk.{bid}.ssm_d",
|
||||||
MODEL_TENSOR.SSM_OUT: "blk.{bid}.ssm_out",
|
MODEL_TENSOR.SSM_OUT: "blk.{bid}.ssm_out",
|
||||||
MODEL_TENSOR.TIME_MIX_W1: "blk.{bid}.time_mix_w1",
|
MODEL_TENSOR.TIME_MIX_W1: "blk.{bid}.time_mix_w1",
|
||||||
|
@ -487,6 +576,24 @@ class MODEL_TENSOR(IntEnum):
|
||||||
MODEL_TENSOR.ENC_FFN_DOWN: "enc.blk.{bid}.ffn_down",
|
MODEL_TENSOR.ENC_FFN_DOWN: "enc.blk.{bid}.ffn_down",
|
||||||
MODEL_TENSOR.ENC_FFN_UP: "enc.blk.{bid}.ffn_up",
|
MODEL_TENSOR.ENC_FFN_UP: "enc.blk.{bid}.ffn_up",
|
||||||
MODEL_TENSOR.ENC_OUTPUT_NORM: "enc.output_norm",
|
MODEL_TENSOR.ENC_OUTPUT_NORM: "enc.output_norm",
|
||||||
|
MODEL_TENSOR.CLS: "cls",
|
||||||
|
MODEL_TENSOR.CLS_OUT: "cls.output",
|
||||||
|
MODEL_TENSOR.CONV1D: "conv1d",
|
||||||
|
MODEL_TENSOR.CONVNEXT_DW: "convnext.{bid}.dw",
|
||||||
|
MODEL_TENSOR.CONVNEXT_NORM: "convnext.{bid}.norm",
|
||||||
|
MODEL_TENSOR.CONVNEXT_PW1: "convnext.{bid}.pw1",
|
||||||
|
MODEL_TENSOR.CONVNEXT_PW2: "convnext.{bid}.pw2",
|
||||||
|
MODEL_TENSOR.CONVNEXT_GAMMA: "convnext.{bid}.gamma",
|
||||||
|
MODEL_TENSOR.POSNET_CONV1: "posnet.{bid}.conv1",
|
||||||
|
MODEL_TENSOR.POSNET_CONV2: "posnet.{bid}.conv2",
|
||||||
|
MODEL_TENSOR.POSNET_NORM: "posnet.{bid}.norm",
|
||||||
|
MODEL_TENSOR.POSNET_NORM1: "posnet.{bid}.norm1",
|
||||||
|
MODEL_TENSOR.POSNET_NORM2: "posnet.{bid}.norm2",
|
||||||
|
MODEL_TENSOR.POSNET_ATTN_NORM: "posnet.{bid}.attn_norm",
|
||||||
|
MODEL_TENSOR.POSNET_ATTN_Q: "posnet.{bid}.attn_q",
|
||||||
|
MODEL_TENSOR.POSNET_ATTN_K: "posnet.{bid}.attn_k",
|
||||||
|
MODEL_TENSOR.POSNET_ATTN_V: "posnet.{bid}.attn_v",
|
||||||
|
MODEL_TENSOR.POSNET_ATTN_OUT: "posnet.{bid}.attn_output",
|
||||||
}
|
}
|
||||||
|
|
||||||
MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
||||||
|
@ -510,6 +617,26 @@ class MODEL_TENSOR(IntEnum):
|
||||||
MODEL_TENSOR.FFN_DOWN_EXP,
|
MODEL_TENSOR.FFN_DOWN_EXP,
|
||||||
MODEL_TENSOR.FFN_UP_EXP,
|
MODEL_TENSOR.FFN_UP_EXP,
|
||||||
],
|
],
|
||||||
|
MODEL_ARCH.DECI: [
|
||||||
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
|
MODEL_TENSOR.OUTPUT_NORM,
|
||||||
|
MODEL_TENSOR.OUTPUT,
|
||||||
|
MODEL_TENSOR.ROPE_FREQS,
|
||||||
|
MODEL_TENSOR.ATTN_NORM,
|
||||||
|
MODEL_TENSOR.ATTN_Q,
|
||||||
|
MODEL_TENSOR.ATTN_K,
|
||||||
|
MODEL_TENSOR.ATTN_V,
|
||||||
|
MODEL_TENSOR.ATTN_OUT,
|
||||||
|
MODEL_TENSOR.ATTN_ROT_EMBD,
|
||||||
|
MODEL_TENSOR.FFN_GATE_INP,
|
||||||
|
MODEL_TENSOR.FFN_NORM,
|
||||||
|
MODEL_TENSOR.FFN_GATE,
|
||||||
|
MODEL_TENSOR.FFN_DOWN,
|
||||||
|
MODEL_TENSOR.FFN_UP,
|
||||||
|
MODEL_TENSOR.FFN_GATE_EXP,
|
||||||
|
MODEL_TENSOR.FFN_DOWN_EXP,
|
||||||
|
MODEL_TENSOR.FFN_UP_EXP,
|
||||||
|
],
|
||||||
MODEL_ARCH.GROK: [
|
MODEL_ARCH.GROK: [
|
||||||
MODEL_TENSOR.TOKEN_EMBD,
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
MODEL_TENSOR.OUTPUT_NORM,
|
MODEL_TENSOR.OUTPUT_NORM,
|
||||||
|
@ -596,6 +723,8 @@ class MODEL_TENSOR(IntEnum):
|
||||||
MODEL_TENSOR.FFN_DOWN,
|
MODEL_TENSOR.FFN_DOWN,
|
||||||
MODEL_TENSOR.FFN_UP,
|
MODEL_TENSOR.FFN_UP,
|
||||||
MODEL_TENSOR.LAYER_OUT_NORM,
|
MODEL_TENSOR.LAYER_OUT_NORM,
|
||||||
|
MODEL_TENSOR.CLS,
|
||||||
|
MODEL_TENSOR.CLS_OUT,
|
||||||
],
|
],
|
||||||
MODEL_ARCH.NOMIC_BERT: [
|
MODEL_ARCH.NOMIC_BERT: [
|
||||||
MODEL_TENSOR.TOKEN_EMBD,
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
|
@ -627,6 +756,7 @@ class MODEL_TENSOR(IntEnum):
|
||||||
MODEL_TENSOR.FFN_GATE,
|
MODEL_TENSOR.FFN_GATE,
|
||||||
MODEL_TENSOR.FFN_DOWN,
|
MODEL_TENSOR.FFN_DOWN,
|
||||||
MODEL_TENSOR.LAYER_OUT_NORM,
|
MODEL_TENSOR.LAYER_OUT_NORM,
|
||||||
|
MODEL_TENSOR.CLS,
|
||||||
],
|
],
|
||||||
MODEL_ARCH.MPT: [
|
MODEL_ARCH.MPT: [
|
||||||
MODEL_TENSOR.TOKEN_EMBD,
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
|
@ -713,6 +843,21 @@ class MODEL_TENSOR(IntEnum):
|
||||||
MODEL_TENSOR.FFN_UP,
|
MODEL_TENSOR.FFN_UP,
|
||||||
],
|
],
|
||||||
MODEL_ARCH.QWEN2: [
|
MODEL_ARCH.QWEN2: [
|
||||||
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
|
MODEL_TENSOR.OUTPUT_NORM,
|
||||||
|
MODEL_TENSOR.OUTPUT,
|
||||||
|
MODEL_TENSOR.ROPE_FREQS,
|
||||||
|
MODEL_TENSOR.ATTN_NORM,
|
||||||
|
MODEL_TENSOR.ATTN_Q,
|
||||||
|
MODEL_TENSOR.ATTN_K,
|
||||||
|
MODEL_TENSOR.ATTN_V,
|
||||||
|
MODEL_TENSOR.ATTN_OUT,
|
||||||
|
MODEL_TENSOR.FFN_NORM,
|
||||||
|
MODEL_TENSOR.FFN_GATE,
|
||||||
|
MODEL_TENSOR.FFN_DOWN,
|
||||||
|
MODEL_TENSOR.FFN_UP,
|
||||||
|
],
|
||||||
|
MODEL_ARCH.QWEN2VL: [
|
||||||
MODEL_TENSOR.TOKEN_EMBD,
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
MODEL_TENSOR.OUTPUT_NORM,
|
MODEL_TENSOR.OUTPUT_NORM,
|
||||||
MODEL_TENSOR.OUTPUT,
|
MODEL_TENSOR.OUTPUT,
|
||||||
|
@ -790,6 +935,8 @@ class MODEL_TENSOR(IntEnum):
|
||||||
MODEL_TENSOR.TOKEN_EMBD,
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
MODEL_TENSOR.OUTPUT_NORM,
|
MODEL_TENSOR.OUTPUT_NORM,
|
||||||
MODEL_TENSOR.OUTPUT,
|
MODEL_TENSOR.OUTPUT,
|
||||||
|
MODEL_TENSOR.ROPE_FACTORS_LONG,
|
||||||
|
MODEL_TENSOR.ROPE_FACTORS_SHORT,
|
||||||
MODEL_TENSOR.ATTN_NORM,
|
MODEL_TENSOR.ATTN_NORM,
|
||||||
MODEL_TENSOR.ATTN_QKV,
|
MODEL_TENSOR.ATTN_QKV,
|
||||||
MODEL_TENSOR.ATTN_Q,
|
MODEL_TENSOR.ATTN_Q,
|
||||||
|
@ -849,6 +996,8 @@ class MODEL_TENSOR(IntEnum):
|
||||||
MODEL_TENSOR.OUTPUT,
|
MODEL_TENSOR.OUTPUT,
|
||||||
MODEL_TENSOR.OUTPUT_NORM,
|
MODEL_TENSOR.OUTPUT_NORM,
|
||||||
MODEL_TENSOR.ROPE_FREQS,
|
MODEL_TENSOR.ROPE_FREQS,
|
||||||
|
MODEL_TENSOR.ROPE_FACTORS_LONG,
|
||||||
|
MODEL_TENSOR.ROPE_FACTORS_SHORT,
|
||||||
MODEL_TENSOR.ATTN_NORM,
|
MODEL_TENSOR.ATTN_NORM,
|
||||||
MODEL_TENSOR.ATTN_Q,
|
MODEL_TENSOR.ATTN_Q,
|
||||||
MODEL_TENSOR.ATTN_K,
|
MODEL_TENSOR.ATTN_K,
|
||||||
|
@ -868,6 +1017,8 @@ class MODEL_TENSOR(IntEnum):
|
||||||
MODEL_TENSOR.TOKEN_EMBD,
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
MODEL_TENSOR.OUTPUT_NORM,
|
MODEL_TENSOR.OUTPUT_NORM,
|
||||||
MODEL_TENSOR.OUTPUT,
|
MODEL_TENSOR.OUTPUT,
|
||||||
|
MODEL_TENSOR.ROPE_FACTORS_LONG,
|
||||||
|
MODEL_TENSOR.ROPE_FACTORS_SHORT,
|
||||||
MODEL_TENSOR.ATTN_NORM,
|
MODEL_TENSOR.ATTN_NORM,
|
||||||
MODEL_TENSOR.ATTN_Q_A,
|
MODEL_TENSOR.ATTN_Q_A,
|
||||||
MODEL_TENSOR.ATTN_Q_B,
|
MODEL_TENSOR.ATTN_Q_B,
|
||||||
|
@ -968,34 +1119,6 @@ class MODEL_TENSOR(IntEnum):
|
||||||
MODEL_TENSOR.SSM_D,
|
MODEL_TENSOR.SSM_D,
|
||||||
MODEL_TENSOR.SSM_OUT,
|
MODEL_TENSOR.SSM_OUT,
|
||||||
],
|
],
|
||||||
MODEL_ARCH.JAMBA: [
|
|
||||||
MODEL_TENSOR.TOKEN_EMBD,
|
|
||||||
MODEL_TENSOR.OUTPUT_NORM,
|
|
||||||
MODEL_TENSOR.OUTPUT,
|
|
||||||
MODEL_TENSOR.ATTN_NORM,
|
|
||||||
MODEL_TENSOR.ATTN_Q,
|
|
||||||
MODEL_TENSOR.ATTN_K,
|
|
||||||
MODEL_TENSOR.ATTN_V,
|
|
||||||
MODEL_TENSOR.ATTN_OUT,
|
|
||||||
MODEL_TENSOR.SSM_IN,
|
|
||||||
MODEL_TENSOR.SSM_CONV1D,
|
|
||||||
MODEL_TENSOR.SSM_X,
|
|
||||||
MODEL_TENSOR.SSM_DT,
|
|
||||||
MODEL_TENSOR.SSM_DT_NORM,
|
|
||||||
MODEL_TENSOR.SSM_A,
|
|
||||||
MODEL_TENSOR.SSM_B_NORM,
|
|
||||||
MODEL_TENSOR.SSM_C_NORM,
|
|
||||||
MODEL_TENSOR.SSM_D,
|
|
||||||
MODEL_TENSOR.SSM_OUT,
|
|
||||||
MODEL_TENSOR.FFN_GATE_INP,
|
|
||||||
MODEL_TENSOR.FFN_NORM,
|
|
||||||
MODEL_TENSOR.FFN_GATE,
|
|
||||||
MODEL_TENSOR.FFN_DOWN,
|
|
||||||
MODEL_TENSOR.FFN_UP,
|
|
||||||
MODEL_TENSOR.FFN_GATE_EXP,
|
|
||||||
MODEL_TENSOR.FFN_DOWN_EXP,
|
|
||||||
MODEL_TENSOR.FFN_UP_EXP,
|
|
||||||
],
|
|
||||||
MODEL_ARCH.XVERSE: [
|
MODEL_ARCH.XVERSE: [
|
||||||
MODEL_TENSOR.TOKEN_EMBD,
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
MODEL_TENSOR.OUTPUT_NORM,
|
MODEL_TENSOR.OUTPUT_NORM,
|
||||||
|
@ -1026,6 +1149,18 @@ class MODEL_TENSOR(IntEnum):
|
||||||
MODEL_TENSOR.ATTN_K_NORM,
|
MODEL_TENSOR.ATTN_K_NORM,
|
||||||
MODEL_TENSOR.ATTN_Q_NORM,
|
MODEL_TENSOR.ATTN_Q_NORM,
|
||||||
],
|
],
|
||||||
|
MODEL_ARCH.COHERE2: [
|
||||||
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
|
MODEL_TENSOR.OUTPUT_NORM,
|
||||||
|
MODEL_TENSOR.ATTN_NORM,
|
||||||
|
MODEL_TENSOR.ATTN_Q,
|
||||||
|
MODEL_TENSOR.ATTN_K,
|
||||||
|
MODEL_TENSOR.ATTN_V,
|
||||||
|
MODEL_TENSOR.ATTN_OUT,
|
||||||
|
MODEL_TENSOR.FFN_GATE,
|
||||||
|
MODEL_TENSOR.FFN_DOWN,
|
||||||
|
MODEL_TENSOR.FFN_UP,
|
||||||
|
],
|
||||||
MODEL_ARCH.DBRX: [
|
MODEL_ARCH.DBRX: [
|
||||||
MODEL_TENSOR.TOKEN_EMBD,
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
MODEL_TENSOR.OUTPUT_NORM,
|
MODEL_TENSOR.OUTPUT_NORM,
|
||||||
|
@ -1050,6 +1185,22 @@ class MODEL_TENSOR(IntEnum):
|
||||||
MODEL_TENSOR.FFN_DOWN,
|
MODEL_TENSOR.FFN_DOWN,
|
||||||
MODEL_TENSOR.FFN_UP,
|
MODEL_TENSOR.FFN_UP,
|
||||||
],
|
],
|
||||||
|
MODEL_ARCH.OLMO2: [
|
||||||
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
|
MODEL_TENSOR.OUTPUT_NORM,
|
||||||
|
MODEL_TENSOR.OUTPUT,
|
||||||
|
MODEL_TENSOR.ATTN_Q,
|
||||||
|
MODEL_TENSOR.ATTN_K,
|
||||||
|
MODEL_TENSOR.ATTN_V,
|
||||||
|
MODEL_TENSOR.ATTN_OUT,
|
||||||
|
MODEL_TENSOR.ATTN_POST_NORM,
|
||||||
|
MODEL_TENSOR.ATTN_Q_NORM,
|
||||||
|
MODEL_TENSOR.ATTN_K_NORM,
|
||||||
|
MODEL_TENSOR.FFN_POST_NORM,
|
||||||
|
MODEL_TENSOR.FFN_GATE,
|
||||||
|
MODEL_TENSOR.FFN_DOWN,
|
||||||
|
MODEL_TENSOR.FFN_UP,
|
||||||
|
],
|
||||||
MODEL_ARCH.OLMOE: [
|
MODEL_ARCH.OLMOE: [
|
||||||
MODEL_TENSOR.TOKEN_EMBD,
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
MODEL_TENSOR.OUTPUT_NORM,
|
MODEL_TENSOR.OUTPUT_NORM,
|
||||||
|
@ -1101,6 +1252,29 @@ class MODEL_TENSOR(IntEnum):
|
||||||
MODEL_TENSOR.FFN_DOWN_EXP,
|
MODEL_TENSOR.FFN_DOWN_EXP,
|
||||||
MODEL_TENSOR.FFN_UP_EXP,
|
MODEL_TENSOR.FFN_UP_EXP,
|
||||||
],
|
],
|
||||||
|
MODEL_ARCH.DEEPSEEK: [
|
||||||
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
|
MODEL_TENSOR.OUTPUT_NORM,
|
||||||
|
MODEL_TENSOR.OUTPUT,
|
||||||
|
MODEL_TENSOR.ROPE_FREQS,
|
||||||
|
MODEL_TENSOR.ATTN_NORM,
|
||||||
|
MODEL_TENSOR.ATTN_Q,
|
||||||
|
MODEL_TENSOR.ATTN_K,
|
||||||
|
MODEL_TENSOR.ATTN_V,
|
||||||
|
MODEL_TENSOR.ATTN_OUT,
|
||||||
|
MODEL_TENSOR.ATTN_ROT_EMBD,
|
||||||
|
MODEL_TENSOR.FFN_GATE_INP,
|
||||||
|
MODEL_TENSOR.FFN_NORM,
|
||||||
|
MODEL_TENSOR.FFN_GATE,
|
||||||
|
MODEL_TENSOR.FFN_DOWN,
|
||||||
|
MODEL_TENSOR.FFN_UP,
|
||||||
|
MODEL_TENSOR.FFN_GATE_EXP,
|
||||||
|
MODEL_TENSOR.FFN_DOWN_EXP,
|
||||||
|
MODEL_TENSOR.FFN_UP_EXP,
|
||||||
|
MODEL_TENSOR.FFN_GATE_SHEXP,
|
||||||
|
MODEL_TENSOR.FFN_DOWN_SHEXP,
|
||||||
|
MODEL_TENSOR.FFN_UP_SHEXP,
|
||||||
|
],
|
||||||
MODEL_ARCH.DEEPSEEK2: [
|
MODEL_ARCH.DEEPSEEK2: [
|
||||||
MODEL_TENSOR.TOKEN_EMBD,
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
MODEL_TENSOR.OUTPUT_NORM,
|
MODEL_TENSOR.OUTPUT_NORM,
|
||||||
|
@ -1127,6 +1301,7 @@ class MODEL_TENSOR(IntEnum):
|
||||||
MODEL_TENSOR.FFN_GATE_SHEXP,
|
MODEL_TENSOR.FFN_GATE_SHEXP,
|
||||||
MODEL_TENSOR.FFN_DOWN_SHEXP,
|
MODEL_TENSOR.FFN_DOWN_SHEXP,
|
||||||
MODEL_TENSOR.FFN_UP_SHEXP,
|
MODEL_TENSOR.FFN_UP_SHEXP,
|
||||||
|
MODEL_TENSOR.FFN_EXP_PROBS_B,
|
||||||
],
|
],
|
||||||
MODEL_ARCH.CHATGLM: [
|
MODEL_ARCH.CHATGLM: [
|
||||||
MODEL_TENSOR.TOKEN_EMBD,
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
|
@ -1248,6 +1423,7 @@ class MODEL_TENSOR(IntEnum):
|
||||||
MODEL_ARCH.GRANITE: [
|
MODEL_ARCH.GRANITE: [
|
||||||
MODEL_TENSOR.TOKEN_EMBD,
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
MODEL_TENSOR.OUTPUT_NORM,
|
MODEL_TENSOR.OUTPUT_NORM,
|
||||||
|
MODEL_TENSOR.OUTPUT,
|
||||||
MODEL_TENSOR.ATTN_NORM,
|
MODEL_TENSOR.ATTN_NORM,
|
||||||
MODEL_TENSOR.ATTN_Q,
|
MODEL_TENSOR.ATTN_Q,
|
||||||
MODEL_TENSOR.ATTN_K,
|
MODEL_TENSOR.ATTN_K,
|
||||||
|
@ -1258,13 +1434,72 @@ class MODEL_TENSOR(IntEnum):
|
||||||
MODEL_TENSOR.FFN_DOWN,
|
MODEL_TENSOR.FFN_DOWN,
|
||||||
MODEL_TENSOR.FFN_UP,
|
MODEL_TENSOR.FFN_UP,
|
||||||
],
|
],
|
||||||
|
MODEL_ARCH.GRANITE_MOE: [
|
||||||
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
|
MODEL_TENSOR.OUTPUT_NORM,
|
||||||
|
MODEL_TENSOR.OUTPUT,
|
||||||
|
MODEL_TENSOR.ATTN_NORM,
|
||||||
|
MODEL_TENSOR.ATTN_Q,
|
||||||
|
MODEL_TENSOR.ATTN_K,
|
||||||
|
MODEL_TENSOR.ATTN_V,
|
||||||
|
MODEL_TENSOR.ATTN_OUT,
|
||||||
|
MODEL_TENSOR.FFN_NORM,
|
||||||
|
MODEL_TENSOR.FFN_GATE_INP,
|
||||||
|
MODEL_TENSOR.FFN_GATE_EXP,
|
||||||
|
MODEL_TENSOR.FFN_DOWN_EXP,
|
||||||
|
MODEL_TENSOR.FFN_UP_EXP,
|
||||||
|
],
|
||||||
|
MODEL_ARCH.CHAMELEON: [
|
||||||
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
|
MODEL_TENSOR.OUTPUT_NORM,
|
||||||
|
MODEL_TENSOR.OUTPUT,
|
||||||
|
MODEL_TENSOR.ATTN_NORM,
|
||||||
|
MODEL_TENSOR.ATTN_Q,
|
||||||
|
MODEL_TENSOR.ATTN_Q_NORM,
|
||||||
|
MODEL_TENSOR.ATTN_K,
|
||||||
|
MODEL_TENSOR.ATTN_K_NORM,
|
||||||
|
MODEL_TENSOR.ATTN_V,
|
||||||
|
MODEL_TENSOR.ATTN_OUT,
|
||||||
|
MODEL_TENSOR.FFN_NORM,
|
||||||
|
MODEL_TENSOR.FFN_GATE,
|
||||||
|
MODEL_TENSOR.FFN_DOWN,
|
||||||
|
MODEL_TENSOR.FFN_UP,
|
||||||
|
],
|
||||||
|
MODEL_ARCH.WAVTOKENIZER_DEC: [
|
||||||
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
|
MODEL_TENSOR.TOKEN_EMBD_NORM,
|
||||||
|
MODEL_TENSOR.CONV1D,
|
||||||
|
MODEL_TENSOR.CONVNEXT_DW,
|
||||||
|
MODEL_TENSOR.CONVNEXT_NORM,
|
||||||
|
MODEL_TENSOR.CONVNEXT_PW1,
|
||||||
|
MODEL_TENSOR.CONVNEXT_PW2,
|
||||||
|
MODEL_TENSOR.CONVNEXT_GAMMA,
|
||||||
|
MODEL_TENSOR.OUTPUT,
|
||||||
|
MODEL_TENSOR.OUTPUT_NORM,
|
||||||
|
MODEL_TENSOR.POSNET_CONV1,
|
||||||
|
MODEL_TENSOR.POSNET_CONV2,
|
||||||
|
MODEL_TENSOR.POSNET_NORM,
|
||||||
|
MODEL_TENSOR.POSNET_NORM1,
|
||||||
|
MODEL_TENSOR.POSNET_NORM2,
|
||||||
|
MODEL_TENSOR.POSNET_ATTN_NORM,
|
||||||
|
MODEL_TENSOR.POSNET_ATTN_Q,
|
||||||
|
MODEL_TENSOR.POSNET_ATTN_K,
|
||||||
|
MODEL_TENSOR.POSNET_ATTN_V,
|
||||||
|
MODEL_TENSOR.POSNET_ATTN_OUT,
|
||||||
|
],
|
||||||
|
# TODO
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# tensors that will not be serialized
|
||||||
MODEL_TENSOR_SKIP: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
MODEL_TENSOR_SKIP: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
||||||
MODEL_ARCH.LLAMA: [
|
MODEL_ARCH.LLAMA: [
|
||||||
MODEL_TENSOR.ROPE_FREQS,
|
MODEL_TENSOR.ROPE_FREQS,
|
||||||
MODEL_TENSOR.ATTN_ROT_EMBD,
|
MODEL_TENSOR.ATTN_ROT_EMBD,
|
||||||
],
|
],
|
||||||
|
MODEL_ARCH.DECI: [
|
||||||
|
MODEL_TENSOR.ROPE_FREQS,
|
||||||
|
MODEL_TENSOR.ATTN_ROT_EMBD,
|
||||||
|
],
|
||||||
MODEL_ARCH.BAICHUAN: [
|
MODEL_ARCH.BAICHUAN: [
|
||||||
MODEL_TENSOR.ROPE_FREQS,
|
MODEL_TENSOR.ROPE_FREQS,
|
||||||
MODEL_TENSOR.ATTN_ROT_EMBD,
|
MODEL_TENSOR.ATTN_ROT_EMBD,
|
||||||
|
@ -1289,6 +1524,10 @@ class MODEL_TENSOR(IntEnum):
|
||||||
MODEL_TENSOR.ROPE_FREQS,
|
MODEL_TENSOR.ROPE_FREQS,
|
||||||
MODEL_TENSOR.ATTN_ROT_EMBD,
|
MODEL_TENSOR.ATTN_ROT_EMBD,
|
||||||
],
|
],
|
||||||
|
MODEL_ARCH.DEEPSEEK: [
|
||||||
|
MODEL_TENSOR.ROPE_FREQS,
|
||||||
|
MODEL_TENSOR.ATTN_ROT_EMBD,
|
||||||
|
],
|
||||||
MODEL_ARCH.DEEPSEEK2: [
|
MODEL_ARCH.DEEPSEEK2: [
|
||||||
MODEL_TENSOR.ROPE_FREQS,
|
MODEL_TENSOR.ROPE_FREQS,
|
||||||
MODEL_TENSOR.ATTN_ROT_EMBD,
|
MODEL_TENSOR.ATTN_ROT_EMBD,
|
||||||
|
@ -1302,6 +1541,10 @@ class MODEL_TENSOR(IntEnum):
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# types
|
||||||
|
#
|
||||||
|
|
||||||
|
|
||||||
class TokenType(IntEnum):
|
class TokenType(IntEnum):
|
||||||
NORMAL = 1
|
NORMAL = 1
|
||||||
|
@ -1316,6 +1559,7 @@ class RopeScalingType(Enum):
|
||||||
NONE = "none"
|
NONE = "none"
|
||||||
LINEAR = "linear"
|
LINEAR = "linear"
|
||||||
YARN = "yarn"
|
YARN = "yarn"
|
||||||
|
LONGROPE = "longrope"
|
||||||
|
|
||||||
|
|
||||||
class PoolingType(IntEnum):
|
class PoolingType(IntEnum):
|
||||||
|
@ -1354,52 +1598,61 @@ class GGMLQuantizationType(IntEnum):
|
||||||
F64 = 28
|
F64 = 28
|
||||||
IQ1_M = 29
|
IQ1_M = 29
|
||||||
BF16 = 30
|
BF16 = 30
|
||||||
Q4_0_4_4 = 31
|
|
||||||
Q4_0_4_8 = 32
|
|
||||||
Q4_0_8_8 = 33
|
|
||||||
TQ1_0 = 34
|
TQ1_0 = 34
|
||||||
TQ2_0 = 35
|
TQ2_0 = 35
|
||||||
|
|
||||||
|
|
||||||
|
class ExpertGatingFuncType(IntEnum):
|
||||||
|
SOFTMAX = 1
|
||||||
|
SIGMOID = 2
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: add GGMLFileType from ggml_ftype in ggml.h
|
||||||
|
|
||||||
|
|
||||||
|
# from llama_ftype in llama.h
|
||||||
|
# ALL VALUES SHOULD BE THE SAME HERE AS THEY ARE OVER THERE.
|
||||||
class LlamaFileType(IntEnum):
|
class LlamaFileType(IntEnum):
|
||||||
ALL_F32 = 0
|
ALL_F32 = 0
|
||||||
MOSTLY_F16 = 1
|
MOSTLY_F16 = 1 # except 1d tensors
|
||||||
MOSTLY_Q4_0 = 2
|
MOSTLY_Q4_0 = 2 # except 1d tensors
|
||||||
MOSTLY_Q4_1 = 3
|
MOSTLY_Q4_1 = 3 # except 1d tensors
|
||||||
|
# MOSTLY_Q4_1_SOME_F16 = 4 # tok_embeddings.weight and output.weight are F16
|
||||||
|
# MOSTLY_Q4_2 = 5 # support has been removed
|
||||||
|
# MOSTLY_Q4_3 = 6 # support has been removed
|
||||||
|
MOSTLY_Q8_0 = 7 # except 1d tensors
|
||||||
|
MOSTLY_Q5_0 = 8 # except 1d tensors
|
||||||
|
MOSTLY_Q5_1 = 9 # except 1d tensors
|
||||||
|
MOSTLY_Q2_K = 10 # except 1d tensors
|
||||||
|
MOSTLY_Q3_K_S = 11 # except 1d tensors
|
||||||
|
MOSTLY_Q3_K_M = 12 # except 1d tensors
|
||||||
|
MOSTLY_Q3_K_L = 13 # except 1d tensors
|
||||||
|
MOSTLY_Q4_K_S = 14 # except 1d tensors
|
||||||
|
MOSTLY_Q4_K_M = 15 # except 1d tensors
|
||||||
|
MOSTLY_Q5_K_S = 16 # except 1d tensors
|
||||||
|
MOSTLY_Q5_K_M = 17 # except 1d tensors
|
||||||
|
MOSTLY_Q6_K = 18 # except 1d tensors
|
||||||
|
MOSTLY_IQ2_XXS = 19 # except 1d tensors
|
||||||
|
MOSTLY_IQ2_XS = 20 # except 1d tensors
|
||||||
|
MOSTLY_Q2_K_S = 21 # except 1d tensors
|
||||||
|
MOSTLY_IQ3_XS = 22 # except 1d tensors
|
||||||
|
MOSTLY_IQ3_XXS = 23 # except 1d tensors
|
||||||
|
MOSTLY_IQ1_S = 24 # except 1d tensors
|
||||||
|
MOSTLY_IQ4_NL = 25 # except 1d tensors
|
||||||
|
MOSTLY_IQ3_S = 26 # except 1d tensors
|
||||||
|
MOSTLY_IQ3_M = 27 # except 1d tensors
|
||||||
|
MOSTLY_IQ2_S = 28 # except 1d tensors
|
||||||
|
MOSTLY_IQ2_M = 29 # except 1d tensors
|
||||||
|
MOSTLY_IQ4_XS = 30 # except 1d tensors
|
||||||
|
MOSTLY_IQ1_M = 31 # except 1d tensors
|
||||||
|
MOSTLY_BF16 = 32 # except 1d tensors
|
||||||
|
# MOSTLY_Q4_0_4_4 = 33 # removed from gguf files, use Q4_0 and runtime repack
|
||||||
|
# MOSTLY_Q4_0_4_8 = 34 # removed from gguf files, use Q4_0 and runtime repack
|
||||||
|
# MOSTLY_Q4_0_8_8 = 35 # removed from gguf files, use Q4_0 and runtime repack
|
||||||
|
MOSTLY_TQ1_0 = 36 # except 1d tensors
|
||||||
|
MOSTLY_TQ2_0 = 37 # except 1d tensors
|
||||||
|
|
||||||
MOSTLY_Q8_0 = 7
|
GUESSED = 1024 # not specified in the model file
|
||||||
MOSTLY_Q5_0 = 8
|
|
||||||
MOSTLY_Q5_1 = 9
|
|
||||||
MOSTLY_Q2_K = 10
|
|
||||||
MOSTLY_Q3_K_S = 11
|
|
||||||
MOSTLY_Q3_K_M = 12
|
|
||||||
MOSTLY_Q3_K_L = 13
|
|
||||||
MOSTLY_Q4_K_S = 14
|
|
||||||
MOSTLY_Q4_K_M = 15
|
|
||||||
MOSTLY_Q5_K_S = 16
|
|
||||||
MOSTLY_Q5_K_M = 17
|
|
||||||
MOSTLY_Q6_K = 18
|
|
||||||
MOSTLY_IQ2_XXS = 19
|
|
||||||
MOSTLY_IQ2_XS = 20
|
|
||||||
MOSTLY_Q2_K_S = 21
|
|
||||||
MOSTLY_IQ3_XS = 22
|
|
||||||
MOSTLY_IQ3_XXS = 23
|
|
||||||
MOSTLY_IQ1_S = 24
|
|
||||||
MOSTLY_IQ4_NL = 25
|
|
||||||
MOSTLY_IQ3_S = 26
|
|
||||||
MOSTLY_IQ3_M = 27
|
|
||||||
MOSTLY_IQ2_S = 28
|
|
||||||
MOSTLY_IQ2_M = 29
|
|
||||||
MOSTLY_IQ4_XS = 30
|
|
||||||
MOSTLY_IQ1_M = 31
|
|
||||||
MOSTLY_BF16 = 32
|
|
||||||
MOSTLY_Q4_0_4_4 = 33
|
|
||||||
MOSTLY_Q4_0_4_8 = 34
|
|
||||||
MOSTLY_Q4_0_8_8 = 35
|
|
||||||
MOSTLY_TQ1_0 = 36
|
|
||||||
MOSTLY_TQ2_0 = 37
|
|
||||||
|
|
||||||
GUESSED = 1024
|
|
||||||
|
|
||||||
|
|
||||||
class GGUFEndian(IntEnum):
|
class GGUFEndian(IntEnum):
|
||||||
|
@ -1434,11 +1687,12 @@ def get_type(val: Any) -> GGUFValueType:
|
||||||
return GGUFValueType.BOOL
|
return GGUFValueType.BOOL
|
||||||
elif isinstance(val, int):
|
elif isinstance(val, int):
|
||||||
return GGUFValueType.INT32
|
return GGUFValueType.INT32
|
||||||
|
# TODO: need help with 64-bit types in Python
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Unknown type: {type(val)}")
|
raise ValueError(f"Unknown type: {type(val)}")
|
||||||
|
|
||||||
|
|
||||||
|
# Items here are (block size, type size)
|
||||||
QK_K = 256
|
QK_K = 256
|
||||||
GGML_QUANT_SIZES: dict[GGMLQuantizationType, tuple[int, int]] = {
|
GGML_QUANT_SIZES: dict[GGMLQuantizationType, tuple[int, int]] = {
|
||||||
GGMLQuantizationType.F32: (1, 4),
|
GGMLQuantizationType.F32: (1, 4),
|
||||||
|
@ -1470,13 +1724,14 @@ def get_type(val: Any) -> GGUFValueType:
|
||||||
GGMLQuantizationType.F64: (1, 8),
|
GGMLQuantizationType.F64: (1, 8),
|
||||||
GGMLQuantizationType.IQ1_M: (256, QK_K // 8 + QK_K // 16 + QK_K // 32),
|
GGMLQuantizationType.IQ1_M: (256, QK_K // 8 + QK_K // 16 + QK_K // 32),
|
||||||
GGMLQuantizationType.BF16: (1, 2),
|
GGMLQuantizationType.BF16: (1, 2),
|
||||||
GGMLQuantizationType.Q4_0_4_4: (32, 2 + 16),
|
|
||||||
GGMLQuantizationType.Q4_0_4_8: (32, 2 + 16),
|
|
||||||
GGMLQuantizationType.Q4_0_8_8: (32, 2 + 16),
|
|
||||||
GGMLQuantizationType.TQ1_0: (256, 2 + 4 * 13),
|
GGMLQuantizationType.TQ1_0: (256, 2 + 4 * 13),
|
||||||
GGMLQuantizationType.TQ2_0: (256, 2 + 64),
|
GGMLQuantizationType.TQ2_0: (256, 2 + 64),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Aliases for backward compatibility.
|
||||||
|
|
||||||
|
# general
|
||||||
KEY_GENERAL_ARCHITECTURE = Keys.General.ARCHITECTURE
|
KEY_GENERAL_ARCHITECTURE = Keys.General.ARCHITECTURE
|
||||||
KEY_GENERAL_QUANTIZATION_VERSION = Keys.General.QUANTIZATION_VERSION
|
KEY_GENERAL_QUANTIZATION_VERSION = Keys.General.QUANTIZATION_VERSION
|
||||||
KEY_GENERAL_ALIGNMENT = Keys.General.ALIGNMENT
|
KEY_GENERAL_ALIGNMENT = Keys.General.ALIGNMENT
|
||||||
|
@ -1488,6 +1743,7 @@ def get_type(val: Any) -> GGUFValueType:
|
||||||
KEY_GENERAL_SOURCE_URL = Keys.General.SOURCE_URL
|
KEY_GENERAL_SOURCE_URL = Keys.General.SOURCE_URL
|
||||||
KEY_GENERAL_FILE_TYPE = Keys.General.FILE_TYPE
|
KEY_GENERAL_FILE_TYPE = Keys.General.FILE_TYPE
|
||||||
|
|
||||||
|
# LLM
|
||||||
KEY_VOCAB_SIZE = Keys.LLM.VOCAB_SIZE
|
KEY_VOCAB_SIZE = Keys.LLM.VOCAB_SIZE
|
||||||
KEY_CONTEXT_LENGTH = Keys.LLM.CONTEXT_LENGTH
|
KEY_CONTEXT_LENGTH = Keys.LLM.CONTEXT_LENGTH
|
||||||
KEY_EMBEDDING_LENGTH = Keys.LLM.EMBEDDING_LENGTH
|
KEY_EMBEDDING_LENGTH = Keys.LLM.EMBEDDING_LENGTH
|
||||||
|
@ -1496,6 +1752,7 @@ def get_type(val: Any) -> GGUFValueType:
|
||||||
KEY_USE_PARALLEL_RESIDUAL = Keys.LLM.USE_PARALLEL_RESIDUAL
|
KEY_USE_PARALLEL_RESIDUAL = Keys.LLM.USE_PARALLEL_RESIDUAL
|
||||||
KEY_TENSOR_DATA_LAYOUT = Keys.LLM.TENSOR_DATA_LAYOUT
|
KEY_TENSOR_DATA_LAYOUT = Keys.LLM.TENSOR_DATA_LAYOUT
|
||||||
|
|
||||||
|
# attention
|
||||||
KEY_ATTENTION_HEAD_COUNT = Keys.Attention.HEAD_COUNT
|
KEY_ATTENTION_HEAD_COUNT = Keys.Attention.HEAD_COUNT
|
||||||
KEY_ATTENTION_HEAD_COUNT_KV = Keys.Attention.HEAD_COUNT_KV
|
KEY_ATTENTION_HEAD_COUNT_KV = Keys.Attention.HEAD_COUNT_KV
|
||||||
KEY_ATTENTION_MAX_ALIBI_BIAS = Keys.Attention.MAX_ALIBI_BIAS
|
KEY_ATTENTION_MAX_ALIBI_BIAS = Keys.Attention.MAX_ALIBI_BIAS
|
||||||
|
@ -1503,6 +1760,7 @@ def get_type(val: Any) -> GGUFValueType:
|
||||||
KEY_ATTENTION_LAYERNORM_EPS = Keys.Attention.LAYERNORM_EPS
|
KEY_ATTENTION_LAYERNORM_EPS = Keys.Attention.LAYERNORM_EPS
|
||||||
KEY_ATTENTION_LAYERNORM_RMS_EPS = Keys.Attention.LAYERNORM_RMS_EPS
|
KEY_ATTENTION_LAYERNORM_RMS_EPS = Keys.Attention.LAYERNORM_RMS_EPS
|
||||||
|
|
||||||
|
# RoPE
|
||||||
KEY_ROPE_DIMENSION_COUNT = Keys.Rope.DIMENSION_COUNT
|
KEY_ROPE_DIMENSION_COUNT = Keys.Rope.DIMENSION_COUNT
|
||||||
KEY_ROPE_FREQ_BASE = Keys.Rope.FREQ_BASE
|
KEY_ROPE_FREQ_BASE = Keys.Rope.FREQ_BASE
|
||||||
KEY_ROPE_SCALING_TYPE = Keys.Rope.SCALING_TYPE
|
KEY_ROPE_SCALING_TYPE = Keys.Rope.SCALING_TYPE
|
||||||
|
@ -1510,12 +1768,14 @@ def get_type(val: Any) -> GGUFValueType:
|
||||||
KEY_ROPE_SCALING_ORIG_CTX_LEN = Keys.Rope.SCALING_ORIG_CTX_LEN
|
KEY_ROPE_SCALING_ORIG_CTX_LEN = Keys.Rope.SCALING_ORIG_CTX_LEN
|
||||||
KEY_ROPE_SCALING_FINETUNED = Keys.Rope.SCALING_FINETUNED
|
KEY_ROPE_SCALING_FINETUNED = Keys.Rope.SCALING_FINETUNED
|
||||||
|
|
||||||
|
# SSM
|
||||||
KEY_SSM_CONV_KERNEL = Keys.SSM.CONV_KERNEL
|
KEY_SSM_CONV_KERNEL = Keys.SSM.CONV_KERNEL
|
||||||
KEY_SSM_INNER_SIZE = Keys.SSM.INNER_SIZE
|
KEY_SSM_INNER_SIZE = Keys.SSM.INNER_SIZE
|
||||||
KEY_SSM_STATE_SIZE = Keys.SSM.STATE_SIZE
|
KEY_SSM_STATE_SIZE = Keys.SSM.STATE_SIZE
|
||||||
KEY_SSM_TIME_STEP_RANK = Keys.SSM.TIME_STEP_RANK
|
KEY_SSM_TIME_STEP_RANK = Keys.SSM.TIME_STEP_RANK
|
||||||
KEY_SSM_DT_B_C_RMS = Keys.SSM.DT_B_C_RMS
|
KEY_SSM_DT_B_C_RMS = Keys.SSM.DT_B_C_RMS
|
||||||
|
|
||||||
|
# tokenization
|
||||||
KEY_TOKENIZER_MODEL = Keys.Tokenizer.MODEL
|
KEY_TOKENIZER_MODEL = Keys.Tokenizer.MODEL
|
||||||
KEY_TOKENIZER_PRE = Keys.Tokenizer.PRE
|
KEY_TOKENIZER_PRE = Keys.Tokenizer.PRE
|
||||||
KEY_TOKENIZER_LIST = Keys.Tokenizer.LIST
|
KEY_TOKENIZER_LIST = Keys.Tokenizer.LIST
|
||||||
|
@ -1524,6 +1784,8 @@ def get_type(val: Any) -> GGUFValueType:
|
||||||
KEY_TOKENIZER_MERGES = Keys.Tokenizer.MERGES
|
KEY_TOKENIZER_MERGES = Keys.Tokenizer.MERGES
|
||||||
KEY_TOKENIZER_BOS_ID = Keys.Tokenizer.BOS_ID
|
KEY_TOKENIZER_BOS_ID = Keys.Tokenizer.BOS_ID
|
||||||
KEY_TOKENIZER_EOS_ID = Keys.Tokenizer.EOS_ID
|
KEY_TOKENIZER_EOS_ID = Keys.Tokenizer.EOS_ID
|
||||||
|
KEY_TOKENIZER_EOT_ID = Keys.Tokenizer.EOT_ID
|
||||||
|
KEY_TOKENIZER_EOM_ID = Keys.Tokenizer.EOM_ID
|
||||||
KEY_TOKENIZER_UNK_ID = Keys.Tokenizer.UNK_ID
|
KEY_TOKENIZER_UNK_ID = Keys.Tokenizer.UNK_ID
|
||||||
KEY_TOKENIZER_SEP_ID = Keys.Tokenizer.SEP_ID
|
KEY_TOKENIZER_SEP_ID = Keys.Tokenizer.SEP_ID
|
||||||
KEY_TOKENIZER_PAD_ID = Keys.Tokenizer.PAD_ID
|
KEY_TOKENIZER_PAD_ID = Keys.Tokenizer.PAD_ID
|
||||||
|
@ -1531,8 +1793,15 @@ def get_type(val: Any) -> GGUFValueType:
|
||||||
KEY_TOKENIZER_MASK_ID = Keys.Tokenizer.MASK_ID
|
KEY_TOKENIZER_MASK_ID = Keys.Tokenizer.MASK_ID
|
||||||
KEY_TOKENIZER_HF_JSON = Keys.Tokenizer.HF_JSON
|
KEY_TOKENIZER_HF_JSON = Keys.Tokenizer.HF_JSON
|
||||||
KEY_TOKENIZER_RWKV = Keys.Tokenizer.RWKV
|
KEY_TOKENIZER_RWKV = Keys.Tokenizer.RWKV
|
||||||
KEY_TOKENIZER_PRIFIX_ID = Keys.Tokenizer.PREFIX_ID
|
|
||||||
|
KEY_TOKENIZER_FIM_PRE_ID = Keys.Tokenizer.FIM_PRE_ID
|
||||||
|
KEY_TOKENIZER_FIM_SUF_ID = Keys.Tokenizer.FIM_SUF_ID
|
||||||
|
KEY_TOKENIZER_FIM_MID_ID = Keys.Tokenizer.FIM_MID_ID
|
||||||
|
KEY_TOKENIZER_FIM_PAD_ID = Keys.Tokenizer.FIM_PAD_ID
|
||||||
|
KEY_TOKENIZER_FIM_REP_ID = Keys.Tokenizer.FIM_REP_ID
|
||||||
|
KEY_TOKENIZER_FIM_SEP_ID = Keys.Tokenizer.FIM_SEP_ID
|
||||||
|
|
||||||
|
# deprecated
|
||||||
|
KEY_TOKENIZER_PREFIX_ID = Keys.Tokenizer.PREFIX_ID
|
||||||
KEY_TOKENIZER_SUFFIX_ID = Keys.Tokenizer.SUFFIX_ID
|
KEY_TOKENIZER_SUFFIX_ID = Keys.Tokenizer.SUFFIX_ID
|
||||||
KEY_TOKENIZER_MIDDLE_ID = Keys.Tokenizer.MIDDLE_ID
|
KEY_TOKENIZER_MIDDLE_ID = Keys.Tokenizer.MIDDLE_ID
|
||||||
KEY_TOKENIZER_EOT_ID = Keys.Tokenizer.EOT_ID
|
|
||||||
KEY_TOKENIZER_EOM_ID = Keys.Tokenizer.EOM_ID
|
|
||||||
|
|
|
@ -169,11 +169,10 @@ def _get(
|
||||||
count = int(count)
|
count = int(count)
|
||||||
itemsize = int(np.empty([], dtype=dtype).itemsize)
|
itemsize = int(np.empty([], dtype=dtype).itemsize)
|
||||||
end_offs = offset + itemsize * count
|
end_offs = offset + itemsize * count
|
||||||
return (
|
arr = self.data[offset:end_offs].view(dtype=dtype)[:count]
|
||||||
self.data[offset:end_offs]
|
if override_order is None:
|
||||||
.view(dtype=dtype)[:count]
|
return arr
|
||||||
.newbyteorder(override_order or self.byte_order)
|
return arr.view(arr.dtype.newbyteorder(override_order))
|
||||||
)
|
|
||||||
|
|
||||||
def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int:
|
def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int:
|
||||||
if field.name in self.fields:
|
if field.name in self.fields:
|
||||||
|
|
|
@ -26,12 +26,14 @@
|
||||||
RopeScalingType,
|
RopeScalingType,
|
||||||
PoolingType,
|
PoolingType,
|
||||||
TokenType,
|
TokenType,
|
||||||
|
ExpertGatingFuncType,
|
||||||
)
|
)
|
||||||
|
|
||||||
from .quants import quant_shape_from_byte_shape
|
from .quants import quant_shape_from_byte_shape
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
SHARD_NAME_FORMAT = "{:s}-{:05d}-of-{:05d}.gguf"
|
SHARD_NAME_FORMAT = "{:s}-{:05d}-of-{:05d}.gguf"
|
||||||
|
|
||||||
|
|
||||||
|
@ -135,7 +137,7 @@ def get_total_parameter_count(self) -> tuple[int, int, int, int]:
|
||||||
continue
|
continue
|
||||||
elif name.endswith(".lora_b"):
|
elif name.endswith(".lora_b"):
|
||||||
if last_lora_a is None or last_lora_a[0] != name[:-1] + "a":
|
if last_lora_a is None or last_lora_a[0] != name[:-1] + "a":
|
||||||
|
# Bail when the LoRA pair can't be found trivially
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"can't measure LoRA size correctly, tensor order is unusual"
|
"can't measure LoRA size correctly, tensor order is unusual"
|
||||||
)
|
)
|
||||||
|
@ -154,11 +156,14 @@ def get_total_parameter_count(self) -> tuple[int, int, int, int]:
|
||||||
|
|
||||||
total_params += size
|
total_params += size
|
||||||
|
|
||||||
|
# Hopefully this should work even for variable-expert-count models
|
||||||
expert_count = (expert_sum // n_expert_tensors) if n_expert_tensors > 0 else 0
|
expert_count = (expert_sum // n_expert_tensors) if n_expert_tensors > 0 else 0
|
||||||
|
|
||||||
|
# Negate the total to signal it's likely not exact
|
||||||
if last_lora_a is not None:
|
if last_lora_a is not None:
|
||||||
total_params = -total_params
|
total_params = -total_params
|
||||||
|
|
||||||
|
# NOTE: keep the output in the same order as accepted by 'size_label' in gguf-py/gguf/utility.py
|
||||||
return total_params, shared_params, expert_params, expert_count
|
return total_params, shared_params, expert_params, expert_count
|
||||||
|
|
||||||
def format_shard_names(self, path: Path) -> list[Path]:
|
def format_shard_names(self, path: Path) -> list[Path]:
|
||||||
|
@ -177,7 +182,7 @@ def open_output_file(self, path: Path | None = None) -> None:
|
||||||
and self.fout is not None
|
and self.fout is not None
|
||||||
and (path is None or path == self.path)
|
and (path is None or path == self.path)
|
||||||
):
|
):
|
||||||
|
# allow calling this multiple times as long as the path is the same
|
||||||
return
|
return
|
||||||
|
|
||||||
if self.state is not WriterState.NO_FILE:
|
if self.state is not WriterState.NO_FILE:
|
||||||
|
@ -206,7 +211,7 @@ def print_plan(self) -> list[Path]:
|
||||||
if self.dry_run:
|
if self.dry_run:
|
||||||
logger.info("Dry run, not writing files")
|
logger.info("Dry run, not writing files")
|
||||||
for name in filenames:
|
for name in filenames:
|
||||||
print(name)
|
print(name) # noqa: NP100
|
||||||
exit()
|
exit()
|
||||||
|
|
||||||
return filenames
|
return filenames
|
||||||
|
@ -390,11 +395,12 @@ def add_tensor_info(
|
||||||
if tensor_dtype == np.uint8:
|
if tensor_dtype == np.uint8:
|
||||||
tensor_shape = quant_shape_from_byte_shape(tensor_shape, raw_dtype)
|
tensor_shape = quant_shape_from_byte_shape(tensor_shape, raw_dtype)
|
||||||
|
|
||||||
|
# make sure there is at least one tensor before splitting
|
||||||
if len(self.tensors[-1]) > 0:
|
if len(self.tensors[-1]) > 0:
|
||||||
if (
|
if ( # split when over tensor limit
|
||||||
self.split_max_tensors != 0
|
self.split_max_tensors != 0
|
||||||
and len(self.tensors[-1]) >= self.split_max_tensors
|
and len(self.tensors[-1]) >= self.split_max_tensors
|
||||||
) or (
|
) or ( # split when over size limit
|
||||||
self.split_max_size != 0
|
self.split_max_size != 0
|
||||||
and sum(ti.nbytes for ti in self.tensors[-1].values()) + tensor_nbytes
|
and sum(ti.nbytes for ti in self.tensors[-1].values()) + tensor_nbytes
|
||||||
> self.split_max_size
|
> self.split_max_size
|
||||||
|
@ -460,6 +466,8 @@ def write_tensor_data(self, tensor: np.ndarray[Any, Any]) -> None:
|
||||||
|
|
||||||
fout = self.fout[file_id]
|
fout = self.fout[file_id]
|
||||||
|
|
||||||
|
# pop the first tensor info
|
||||||
|
# TODO: cleaner way to get the first key
|
||||||
first_tensor_name = [
|
first_tensor_name = [
|
||||||
name for name, _ in zip(self.tensors[file_id].keys(), range(1))
|
name for name, _ in zip(self.tensors[file_id].keys(), range(1))
|
||||||
][0]
|
][0]
|
||||||
|
@ -506,8 +514,11 @@ def write_tensors_to_file(self, *, progress: bool = False) -> None:
|
||||||
total = sum(ti.nbytes for ti in tensors.values())
|
total = sum(ti.nbytes for ti in tensors.values())
|
||||||
shard_bar.reset(total=(total if total > 0 else None))
|
shard_bar.reset(total=(total if total > 0 else None))
|
||||||
|
|
||||||
|
# relying on the fact that Python dicts preserve insertion order (since 3.7)
|
||||||
for ti in tensors.values():
|
for ti in tensors.values():
|
||||||
assert ti.tensor is not None
|
assert (
|
||||||
|
ti.tensor is not None
|
||||||
|
) # can only iterate once over the tensors
|
||||||
assert ti.tensor.nbytes == ti.nbytes
|
assert ti.tensor.nbytes == ti.nbytes
|
||||||
ti.tensor.tofile(fout)
|
ti.tensor.tofile(fout)
|
||||||
if shard_bar is not None:
|
if shard_bar is not None:
|
||||||
|
@ -631,6 +642,11 @@ def add_base_model_organization(self, source_id: int, organization: str) -> None
|
||||||
Keys.General.BASE_MODEL_ORGANIZATION.format(id=source_id), organization
|
Keys.General.BASE_MODEL_ORGANIZATION.format(id=source_id), organization
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def add_base_model_description(self, source_id: int, description: str) -> None:
|
||||||
|
self.add_string(
|
||||||
|
Keys.General.BASE_MODEL_DESCRIPTION.format(id=source_id), description
|
||||||
|
)
|
||||||
|
|
||||||
def add_base_model_url(self, source_id: int, url: str) -> None:
|
def add_base_model_url(self, source_id: int, url: str) -> None:
|
||||||
self.add_string(Keys.General.BASE_MODEL_URL.format(id=source_id), url)
|
self.add_string(Keys.General.BASE_MODEL_URL.format(id=source_id), url)
|
||||||
|
|
||||||
|
@ -643,15 +659,46 @@ def add_base_model_uuid(self, source_id: int, uuid: str) -> None:
|
||||||
def add_base_model_repo_url(self, source_id: int, repo_url: str) -> None:
|
def add_base_model_repo_url(self, source_id: int, repo_url: str) -> None:
|
||||||
self.add_string(Keys.General.BASE_MODEL_REPO_URL.format(id=source_id), repo_url)
|
self.add_string(Keys.General.BASE_MODEL_REPO_URL.format(id=source_id), repo_url)
|
||||||
|
|
||||||
|
def add_dataset_count(self, source_count: int) -> None:
|
||||||
|
self.add_uint32(Keys.General.DATASET_COUNT, source_count)
|
||||||
|
|
||||||
|
def add_dataset_name(self, source_id: int, name: str) -> None:
|
||||||
|
self.add_string(Keys.General.DATASET_NAME.format(id=source_id), name)
|
||||||
|
|
||||||
|
def add_dataset_author(self, source_id: int, author: str) -> None:
|
||||||
|
self.add_string(Keys.General.DATASET_AUTHOR.format(id=source_id), author)
|
||||||
|
|
||||||
|
def add_dataset_version(self, source_id: int, version: str) -> None:
|
||||||
|
self.add_string(Keys.General.DATASET_VERSION.format(id=source_id), version)
|
||||||
|
|
||||||
|
def add_dataset_organization(self, source_id: int, organization: str) -> None:
|
||||||
|
self.add_string(
|
||||||
|
Keys.General.DATASET_ORGANIZATION.format(id=source_id), organization
|
||||||
|
)
|
||||||
|
|
||||||
|
def add_dataset_description(self, source_id: int, description: str) -> None:
|
||||||
|
self.add_string(
|
||||||
|
Keys.General.DATASET_DESCRIPTION.format(id=source_id), description
|
||||||
|
)
|
||||||
|
|
||||||
|
def add_dataset_url(self, source_id: int, url: str) -> None:
|
||||||
|
self.add_string(Keys.General.DATASET_URL.format(id=source_id), url)
|
||||||
|
|
||||||
|
def add_dataset_doi(self, source_id: int, doi: str) -> None:
|
||||||
|
self.add_string(Keys.General.DATASET_DOI.format(id=source_id), doi)
|
||||||
|
|
||||||
|
def add_dataset_uuid(self, source_id: int, uuid: str) -> None:
|
||||||
|
self.add_string(Keys.General.DATASET_UUID.format(id=source_id), uuid)
|
||||||
|
|
||||||
|
def add_dataset_repo_url(self, source_id: int, repo_url: str) -> None:
|
||||||
|
self.add_string(Keys.General.DATASET_REPO_URL.format(id=source_id), repo_url)
|
||||||
|
|
||||||
def add_tags(self, tags: Sequence[str]) -> None:
|
def add_tags(self, tags: Sequence[str]) -> None:
|
||||||
self.add_array(Keys.General.TAGS, tags)
|
self.add_array(Keys.General.TAGS, tags)
|
||||||
|
|
||||||
def add_languages(self, languages: Sequence[str]) -> None:
|
def add_languages(self, languages: Sequence[str]) -> None:
|
||||||
self.add_array(Keys.General.LANGUAGES, languages)
|
self.add_array(Keys.General.LANGUAGES, languages)
|
||||||
|
|
||||||
def add_datasets(self, datasets: Sequence[str]) -> None:
|
|
||||||
self.add_array(Keys.General.DATASETS, datasets)
|
|
||||||
|
|
||||||
def add_tensor_data_layout(self, layout: str) -> None:
|
def add_tensor_data_layout(self, layout: str) -> None:
|
||||||
self.add_string(Keys.LLM.TENSOR_DATA_LAYOUT.format(arch=self.arch), layout)
|
self.add_string(Keys.LLM.TENSOR_DATA_LAYOUT.format(arch=self.arch), layout)
|
||||||
|
|
||||||
|
@ -664,6 +711,21 @@ def add_context_length(self, length: int) -> None:
|
||||||
def add_embedding_length(self, length: int) -> None:
|
def add_embedding_length(self, length: int) -> None:
|
||||||
self.add_uint32(Keys.LLM.EMBEDDING_LENGTH.format(arch=self.arch), length)
|
self.add_uint32(Keys.LLM.EMBEDDING_LENGTH.format(arch=self.arch), length)
|
||||||
|
|
||||||
|
def add_features_length(self, length: int) -> None:
|
||||||
|
self.add_uint32(Keys.LLM.FEATURES_LENGTH.format(arch=self.arch), length)
|
||||||
|
|
||||||
|
def add_posnet_embedding_length(self, length: int) -> None:
|
||||||
|
self.add_uint32(Keys.PosNet.EMBEDDING_LENGTH.format(arch=self.arch), length)
|
||||||
|
|
||||||
|
def add_posnet_block_count(self, length: int) -> None:
|
||||||
|
self.add_uint32(Keys.PosNet.BLOCK_COUNT.format(arch=self.arch), length)
|
||||||
|
|
||||||
|
def add_convnext_embedding_length(self, length: int) -> None:
|
||||||
|
self.add_uint32(Keys.ConvNext.EMBEDDING_LENGTH.format(arch=self.arch), length)
|
||||||
|
|
||||||
|
def add_convnext_block_count(self, length: int) -> None:
|
||||||
|
self.add_uint32(Keys.ConvNext.BLOCK_COUNT.format(arch=self.arch), length)
|
||||||
|
|
||||||
def add_block_count(self, length: int) -> None:
|
def add_block_count(self, length: int) -> None:
|
||||||
self.add_uint32(Keys.LLM.BLOCK_COUNT.format(arch=self.arch), length)
|
self.add_uint32(Keys.LLM.BLOCK_COUNT.format(arch=self.arch), length)
|
||||||
|
|
||||||
|
@ -739,6 +801,15 @@ def add_expert_shared_count(self, count: int) -> None:
|
||||||
def add_expert_weights_scale(self, value: float) -> None:
|
def add_expert_weights_scale(self, value: float) -> None:
|
||||||
self.add_float32(Keys.LLM.EXPERT_WEIGHTS_SCALE.format(arch=self.arch), value)
|
self.add_float32(Keys.LLM.EXPERT_WEIGHTS_SCALE.format(arch=self.arch), value)
|
||||||
|
|
||||||
|
def add_expert_weights_norm(self, value: bool) -> None:
|
||||||
|
self.add_bool(Keys.LLM.EXPERT_WEIGHTS_NORM.format(arch=self.arch), value)
|
||||||
|
|
||||||
|
def add_expert_gating_func(self, value: ExpertGatingFuncType) -> None:
|
||||||
|
self.add_uint32(Keys.LLM.EXPERT_GATING_FUNC.format(arch=self.arch), value.value)
|
||||||
|
|
||||||
|
def add_swin_norm(self, value: bool) -> None:
|
||||||
|
self.add_bool(Keys.LLM.SWIN_NORM.format(arch=self.arch), value)
|
||||||
|
|
||||||
def add_rescale_every_n_layers(self, count: int) -> None:
|
def add_rescale_every_n_layers(self, count: int) -> None:
|
||||||
self.add_uint32(Keys.LLM.RESCALE_EVERY_N_LAYERS.format(arch=self.arch), count)
|
self.add_uint32(Keys.LLM.RESCALE_EVERY_N_LAYERS.format(arch=self.arch), count)
|
||||||
|
|
||||||
|
@ -763,6 +834,12 @@ def add_layer_norm_eps(self, value: float) -> None:
|
||||||
def add_layer_norm_rms_eps(self, value: float) -> None:
|
def add_layer_norm_rms_eps(self, value: float) -> None:
|
||||||
self.add_float32(Keys.Attention.LAYERNORM_RMS_EPS.format(arch=self.arch), value)
|
self.add_float32(Keys.Attention.LAYERNORM_RMS_EPS.format(arch=self.arch), value)
|
||||||
|
|
||||||
|
def add_group_norm_eps(self, value: float) -> None:
|
||||||
|
self.add_float32(Keys.Attention.GROUPNORM_EPS.format(arch=self.arch), value)
|
||||||
|
|
||||||
|
def add_group_norm_groups(self, value: int) -> None:
|
||||||
|
self.add_uint32(Keys.Attention.GROUPNORM_GROUPS.format(arch=self.arch), value)
|
||||||
|
|
||||||
def add_causal_attention(self, value: bool) -> None:
|
def add_causal_attention(self, value: bool) -> None:
|
||||||
self.add_bool(Keys.Attention.CAUSAL.format(arch=self.arch), value)
|
self.add_bool(Keys.Attention.CAUSAL.format(arch=self.arch), value)
|
||||||
|
|
||||||
|
@ -787,6 +864,9 @@ def add_pooling_type(self, value: PoolingType) -> None:
|
||||||
def add_rope_dimension_count(self, count: int) -> None:
|
def add_rope_dimension_count(self, count: int) -> None:
|
||||||
self.add_uint32(Keys.Rope.DIMENSION_COUNT.format(arch=self.arch), count)
|
self.add_uint32(Keys.Rope.DIMENSION_COUNT.format(arch=self.arch), count)
|
||||||
|
|
||||||
|
def add_rope_dimension_sections(self, dims: Sequence[int]) -> None:
|
||||||
|
self.add_array(Keys.Rope.DIMENSION_SECTIONS.format(arch=self.arch), dims)
|
||||||
|
|
||||||
def add_rope_freq_base(self, value: float) -> None:
|
def add_rope_freq_base(self, value: float) -> None:
|
||||||
self.add_float32(Keys.Rope.FREQ_BASE.format(arch=self.arch), value)
|
self.add_float32(Keys.Rope.FREQ_BASE.format(arch=self.arch), value)
|
||||||
|
|
||||||
|
@ -893,6 +973,7 @@ def add_chat_template(self, value: str | Sequence[Mapping[str, str]]) -> None:
|
||||||
name = choice.get("name", "")
|
name = choice.get("name", "")
|
||||||
template = choice.get("template")
|
template = choice.get("template")
|
||||||
|
|
||||||
|
# Allowing non-alphanumerical characters in template name is probably not a good idea, so filter it
|
||||||
name = "".join(
|
name = "".join(
|
||||||
(c if c in ascii_letters + digits else "_" for c in name)
|
(c if c in ascii_letters + digits else "_" for c in name)
|
||||||
)
|
)
|
||||||
|
@ -916,15 +997,6 @@ def add_chat_template(self, value: str | Sequence[Mapping[str, str]]) -> None:
|
||||||
|
|
||||||
self.add_string(Keys.Tokenizer.CHAT_TEMPLATE, value)
|
self.add_string(Keys.Tokenizer.CHAT_TEMPLATE, value)
|
||||||
|
|
||||||
def add_prefix_token_id(self, id: int) -> None:
|
|
||||||
self.add_uint32(Keys.Tokenizer.PREFIX_ID, id)
|
|
||||||
|
|
||||||
def add_suffix_token_id(self, id: int) -> None:
|
|
||||||
self.add_uint32(Keys.Tokenizer.SUFFIX_ID, id)
|
|
||||||
|
|
||||||
def add_middle_token_id(self, id: int) -> None:
|
|
||||||
self.add_uint32(Keys.Tokenizer.MIDDLE_ID, id)
|
|
||||||
|
|
||||||
def add_eot_token_id(self, id: int) -> None:
|
def add_eot_token_id(self, id: int) -> None:
|
||||||
self.add_uint32(Keys.Tokenizer.EOT_ID, id)
|
self.add_uint32(Keys.Tokenizer.EOT_ID, id)
|
||||||
|
|
||||||
|
|
|
@ -12,6 +12,7 @@
|
||||||
|
|
||||||
|
|
||||||
class LazyMeta(ABCMeta):
|
class LazyMeta(ABCMeta):
|
||||||
|
|
||||||
def __new__(
|
def __new__(
|
||||||
cls, name: str, bases: tuple[type, ...], namespace: dict[str, Any], **kwargs
|
cls, name: str, bases: tuple[type, ...], namespace: dict[str, Any], **kwargs
|
||||||
):
|
):
|
||||||
|
@ -34,7 +35,7 @@ def __getattr__(self, name: str) -> Any:
|
||||||
|
|
||||||
# need to make a builder for the wrapped wrapper to copy the name,
|
# need to make a builder for the wrapped wrapper to copy the name,
|
||||||
# or else it fails with very cryptic error messages,
|
# or else it fails with very cryptic error messages,
|
||||||
# because somehow the same string would end up in every closure
|
# because somehow the same string would end up in every closures
|
||||||
def mk_wrap(op_name: str, *, meta_noop: bool = False):
|
def mk_wrap(op_name: str, *, meta_noop: bool = False):
|
||||||
# need to wrap the wrapper to get self
|
# need to wrap the wrapper to get self
|
||||||
def wrapped_special_op(self, *args, **kwargs):
|
def wrapped_special_op(self, *args, **kwargs):
|
||||||
|
@ -254,6 +255,8 @@ def from_eager(cls, t: Any) -> Any:
|
||||||
class LazyNumpyTensor(LazyBase):
|
class LazyNumpyTensor(LazyBase):
|
||||||
_tensor_type = np.ndarray
|
_tensor_type = np.ndarray
|
||||||
|
|
||||||
|
shape: tuple[int, ...] # Makes the type checker happy in quants.py
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def meta_with_dtype_and_shape(
|
def meta_with_dtype_and_shape(
|
||||||
cls, dtype: DTypeLike, shape: tuple[int, ...]
|
cls, dtype: DTypeLike, shape: tuple[int, ...]
|
||||||
|
|
|
@ -41,7 +41,7 @@ class Metadata:
|
||||||
base_models: Optional[list[dict]] = None
|
base_models: Optional[list[dict]] = None
|
||||||
tags: Optional[list[str]] = None
|
tags: Optional[list[str]] = None
|
||||||
languages: Optional[list[str]] = None
|
languages: Optional[list[str]] = None
|
||||||
datasets: Optional[list[str]] = None
|
datasets: Optional[list[dict]] = None
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def load(
|
def load(
|
||||||
|
@ -50,7 +50,7 @@ def load(
|
||||||
model_name: Optional[str] = None,
|
model_name: Optional[str] = None,
|
||||||
total_params: int = 0,
|
total_params: int = 0,
|
||||||
) -> Metadata:
|
) -> Metadata:
|
||||||
# This grabs as much contextual authorship metadata as possible from the model repository
|
# This grabs as many contextual authorship metadata as possible from the model repository
|
||||||
# making any conversion as required to match the gguf kv store metadata format
|
# making any conversion as required to match the gguf kv store metadata format
|
||||||
# as well as giving users the ability to override any authorship metadata that may be incorrect
|
# as well as giving users the ability to override any authorship metadata that may be incorrect
|
||||||
|
|
||||||
|
@ -126,13 +126,13 @@ def load(
|
||||||
"general.base_models", metadata.base_models
|
"general.base_models", metadata.base_models
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Datasets is received here as an array of datasets
|
||||||
|
metadata.datasets = metadata_override.get("general.datasets", metadata.datasets)
|
||||||
|
|
||||||
metadata.tags = metadata_override.get(Keys.General.TAGS, metadata.tags)
|
metadata.tags = metadata_override.get(Keys.General.TAGS, metadata.tags)
|
||||||
metadata.languages = metadata_override.get(
|
metadata.languages = metadata_override.get(
|
||||||
Keys.General.LANGUAGES, metadata.languages
|
Keys.General.LANGUAGES, metadata.languages
|
||||||
)
|
)
|
||||||
metadata.datasets = metadata_override.get(
|
|
||||||
Keys.General.DATASETS, metadata.datasets
|
|
||||||
)
|
|
||||||
|
|
||||||
# Direct Metadata Override (via direct cli argument)
|
# Direct Metadata Override (via direct cli argument)
|
||||||
if model_name is not None:
|
if model_name is not None:
|
||||||
|
@ -228,7 +228,11 @@ def get_model_id_components(
|
||||||
org_component, model_full_name_component = None, model_id
|
org_component, model_full_name_component = None, model_id
|
||||||
|
|
||||||
# Check if we erroneously matched against './' or '../' etc...
|
# Check if we erroneously matched against './' or '../' etc...
|
||||||
if org_component is not None and org_component[0] == ".":
|
if (
|
||||||
|
org_component is not None
|
||||||
|
and len(org_component) > 0
|
||||||
|
and org_component[0] == "."
|
||||||
|
):
|
||||||
org_component = None
|
org_component = None
|
||||||
|
|
||||||
name_parts: list[str] = model_full_name_component.split("-")
|
name_parts: list[str] = model_full_name_component.split("-")
|
||||||
|
@ -387,27 +391,86 @@ def apply_metadata_heuristic(
|
||||||
########################
|
########################
|
||||||
if model_card is not None:
|
if model_card is not None:
|
||||||
|
|
||||||
if "model_name" in model_card and metadata.name is None:
|
def use_model_card_metadata(metadata_key: str, model_card_key: str):
|
||||||
# Not part of huggingface model card standard but notice some model creator using it
|
if (
|
||||||
# such as TheBloke in 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF'
|
model_card_key in model_card
|
||||||
metadata.name = model_card.get("model_name")
|
and getattr(metadata, metadata_key, None) is None
|
||||||
|
):
|
||||||
|
setattr(metadata, metadata_key, model_card.get(model_card_key))
|
||||||
|
|
||||||
if "model_creator" in model_card and metadata.author is None:
|
def use_array_model_card_metadata(metadata_key: str, model_card_key: str):
|
||||||
# Not part of huggingface model card standard but notice some model creator using it
|
# Note: Will append rather than replace if already exist
|
||||||
# such as TheBloke in 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF'
|
tags_value = model_card.get(model_card_key, None)
|
||||||
metadata.author = model_card.get("model_creator")
|
if tags_value is None:
|
||||||
|
return
|
||||||
|
|
||||||
if "model_type" in model_card and metadata.basename is None:
|
current_value = getattr(metadata, metadata_key, None)
|
||||||
# Not part of huggingface model card standard but notice some model creator using it
|
if current_value is None:
|
||||||
# such as TheBloke in 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF'
|
current_value = []
|
||||||
metadata.basename = model_card.get("model_type")
|
|
||||||
|
|
||||||
if "base_model" in model_card:
|
if isinstance(tags_value, str):
|
||||||
|
current_value.append(tags_value)
|
||||||
|
elif isinstance(tags_value, list):
|
||||||
|
current_value.extend(tags_value)
|
||||||
|
|
||||||
|
setattr(metadata, metadata_key, current_value)
|
||||||
|
|
||||||
|
# LLAMA.cpp's direct internal convention
|
||||||
|
# (Definitely not part of hugging face formal/informal standard)
|
||||||
|
#########################################
|
||||||
|
use_model_card_metadata("name", "name")
|
||||||
|
use_model_card_metadata("author", "author")
|
||||||
|
use_model_card_metadata("version", "version")
|
||||||
|
use_model_card_metadata("organization", "organization")
|
||||||
|
use_model_card_metadata("description", "description")
|
||||||
|
use_model_card_metadata("finetune", "finetune")
|
||||||
|
use_model_card_metadata("basename", "basename")
|
||||||
|
use_model_card_metadata("size_label", "size_label")
|
||||||
|
use_model_card_metadata("source_url", "url")
|
||||||
|
use_model_card_metadata("source_doi", "doi")
|
||||||
|
use_model_card_metadata("source_uuid", "uuid")
|
||||||
|
use_model_card_metadata("source_repo_url", "repo_url")
|
||||||
|
|
||||||
|
# LLAMA.cpp's huggingface style convention
|
||||||
|
# (Definitely not part of hugging face formal/informal standard... but with model_ appended to match their style)
|
||||||
|
###########################################
|
||||||
|
use_model_card_metadata("name", "model_name")
|
||||||
|
use_model_card_metadata("author", "model_author")
|
||||||
|
use_model_card_metadata("version", "model_version")
|
||||||
|
use_model_card_metadata("organization", "model_organization")
|
||||||
|
use_model_card_metadata("description", "model_description")
|
||||||
|
use_model_card_metadata("finetune", "model_finetune")
|
||||||
|
use_model_card_metadata("basename", "model_basename")
|
||||||
|
use_model_card_metadata("size_label", "model_size_label")
|
||||||
|
use_model_card_metadata("source_url", "model_url")
|
||||||
|
use_model_card_metadata("source_doi", "model_doi")
|
||||||
|
use_model_card_metadata("source_uuid", "model_uuid")
|
||||||
|
use_model_card_metadata("source_repo_url", "model_repo_url")
|
||||||
|
|
||||||
|
# Hugging Face Direct Convention
|
||||||
|
#################################
|
||||||
|
|
||||||
|
# Not part of huggingface model card standard but notice some model creator using it
|
||||||
|
# such as TheBloke in 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF'
|
||||||
|
use_model_card_metadata("name", "model_name")
|
||||||
|
use_model_card_metadata("author", "model_creator")
|
||||||
|
use_model_card_metadata("basename", "model_type")
|
||||||
|
|
||||||
|
if (
|
||||||
|
"base_model" in model_card
|
||||||
|
or "base_models" in model_card
|
||||||
|
or "base_model_sources" in model_card
|
||||||
|
):
|
||||||
# This represents the parent models that this is based on
|
# This represents the parent models that this is based on
|
||||||
# Example: stabilityai/stable-diffusion-xl-base-1.0. Can also be a list (for merges)
|
# Example: stabilityai/stable-diffusion-xl-base-1.0. Can also be a list (for merges)
|
||||||
# Example of merges: https://huggingface.co/EmbeddedLLM/Mistral-7B-Merge-14-v0.1/blob/main/README.md
|
# Example of merges: https://huggingface.co/EmbeddedLLM/Mistral-7B-Merge-14-v0.1/blob/main/README.md
|
||||||
metadata_base_models = []
|
metadata_base_models = []
|
||||||
base_model_value = model_card.get("base_model", None)
|
base_model_value = model_card.get(
|
||||||
|
"base_model",
|
||||||
|
model_card.get(
|
||||||
|
"base_models", model_card.get("base_model_sources", None)
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
if base_model_value is not None:
|
if base_model_value is not None:
|
||||||
if isinstance(base_model_value, str):
|
if isinstance(base_model_value, str):
|
||||||
|
@ -420,86 +483,195 @@ def apply_metadata_heuristic(
|
||||||
|
|
||||||
for model_id in metadata_base_models:
|
for model_id in metadata_base_models:
|
||||||
# NOTE: model size of base model is assumed to be similar to the size of the current model
|
# NOTE: model size of base model is assumed to be similar to the size of the current model
|
||||||
(
|
|
||||||
model_full_name_component,
|
|
||||||
org_component,
|
|
||||||
basename,
|
|
||||||
finetune,
|
|
||||||
version,
|
|
||||||
size_label,
|
|
||||||
) = Metadata.get_model_id_components(model_id, total_params)
|
|
||||||
base_model = {}
|
base_model = {}
|
||||||
if model_full_name_component is not None:
|
if isinstance(model_id, str):
|
||||||
base_model["name"] = Metadata.id_to_title(
|
if (
|
||||||
model_full_name_component
|
model_id.startswith("http://")
|
||||||
)
|
or model_id.startswith("https://")
|
||||||
if org_component is not None:
|
or model_id.startswith("ssh://")
|
||||||
base_model["organization"] = Metadata.id_to_title(org_component)
|
):
|
||||||
if version is not None:
|
base_model["repo_url"] = model_id
|
||||||
base_model["version"] = version
|
|
||||||
if (
|
# Check if Hugging Face ID is present in URL
|
||||||
org_component is not None
|
if "huggingface.co" in model_id:
|
||||||
and model_full_name_component is not None
|
match = re.match(
|
||||||
):
|
r"https?://huggingface.co/([^/]+/[^/]+)$", model_id
|
||||||
base_model["repo_url"] = (
|
)
|
||||||
f"https://huggingface.co/{org_component}/{model_full_name_component}"
|
if match:
|
||||||
|
model_id_component = match.group(1)
|
||||||
|
(
|
||||||
|
model_full_name_component,
|
||||||
|
org_component,
|
||||||
|
basename,
|
||||||
|
finetune,
|
||||||
|
version,
|
||||||
|
size_label,
|
||||||
|
) = Metadata.get_model_id_components(
|
||||||
|
model_id_component, total_params
|
||||||
|
)
|
||||||
|
|
||||||
|
# Populate model dictionary with extracted components
|
||||||
|
if model_full_name_component is not None:
|
||||||
|
base_model["name"] = Metadata.id_to_title(
|
||||||
|
model_full_name_component
|
||||||
|
)
|
||||||
|
if org_component is not None:
|
||||||
|
base_model["organization"] = (
|
||||||
|
Metadata.id_to_title(org_component)
|
||||||
|
)
|
||||||
|
if version is not None:
|
||||||
|
base_model["version"] = version
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Likely a Hugging Face ID
|
||||||
|
(
|
||||||
|
model_full_name_component,
|
||||||
|
org_component,
|
||||||
|
basename,
|
||||||
|
finetune,
|
||||||
|
version,
|
||||||
|
size_label,
|
||||||
|
) = Metadata.get_model_id_components(model_id, total_params)
|
||||||
|
|
||||||
|
# Populate model dictionary with extracted components
|
||||||
|
if model_full_name_component is not None:
|
||||||
|
base_model["name"] = Metadata.id_to_title(
|
||||||
|
model_full_name_component
|
||||||
|
)
|
||||||
|
if org_component is not None:
|
||||||
|
base_model["organization"] = Metadata.id_to_title(
|
||||||
|
org_component
|
||||||
|
)
|
||||||
|
if version is not None:
|
||||||
|
base_model["version"] = version
|
||||||
|
if (
|
||||||
|
org_component is not None
|
||||||
|
and model_full_name_component is not None
|
||||||
|
):
|
||||||
|
base_model["repo_url"] = (
|
||||||
|
f"https://huggingface.co/{org_component}/{model_full_name_component}"
|
||||||
|
)
|
||||||
|
|
||||||
|
elif isinstance(model_id, dict):
|
||||||
|
base_model = model_id
|
||||||
|
|
||||||
|
else:
|
||||||
|
logger.error(
|
||||||
|
f"base model entry '{str(model_id)}' not in a known format"
|
||||||
)
|
)
|
||||||
|
|
||||||
metadata.base_models.append(base_model)
|
metadata.base_models.append(base_model)
|
||||||
|
|
||||||
if "license" in model_card and metadata.license is None:
|
if (
|
||||||
metadata.license = model_card.get("license")
|
"datasets" in model_card
|
||||||
|
or "dataset" in model_card
|
||||||
|
or "dataset_sources" in model_card
|
||||||
|
):
|
||||||
|
# This represents the datasets that this was trained from
|
||||||
|
metadata_datasets = []
|
||||||
|
dataset_value = model_card.get(
|
||||||
|
"datasets",
|
||||||
|
model_card.get("dataset", model_card.get("dataset_sources", None)),
|
||||||
|
)
|
||||||
|
|
||||||
if "license_name" in model_card and metadata.license_name is None:
|
if dataset_value is not None:
|
||||||
metadata.license_name = model_card.get("license_name")
|
if isinstance(dataset_value, str):
|
||||||
|
metadata_datasets.append(dataset_value)
|
||||||
if "license_link" in model_card and metadata.license_link is None:
|
elif isinstance(dataset_value, list):
|
||||||
metadata.license_link = model_card.get("license_link")
|
metadata_datasets.extend(dataset_value)
|
||||||
|
|
||||||
tags_value = model_card.get("tags", None)
|
|
||||||
if tags_value is not None:
|
|
||||||
|
|
||||||
if metadata.tags is None:
|
|
||||||
metadata.tags = []
|
|
||||||
|
|
||||||
if isinstance(tags_value, str):
|
|
||||||
metadata.tags.append(tags_value)
|
|
||||||
elif isinstance(tags_value, list):
|
|
||||||
metadata.tags.extend(tags_value)
|
|
||||||
|
|
||||||
pipeline_tags_value = model_card.get("pipeline_tag", None)
|
|
||||||
if pipeline_tags_value is not None:
|
|
||||||
|
|
||||||
if metadata.tags is None:
|
|
||||||
metadata.tags = []
|
|
||||||
|
|
||||||
if isinstance(pipeline_tags_value, str):
|
|
||||||
metadata.tags.append(pipeline_tags_value)
|
|
||||||
elif isinstance(pipeline_tags_value, list):
|
|
||||||
metadata.tags.extend(pipeline_tags_value)
|
|
||||||
|
|
||||||
language_value = model_card.get(
|
|
||||||
"languages", model_card.get("language", None)
|
|
||||||
)
|
|
||||||
if language_value is not None:
|
|
||||||
|
|
||||||
if metadata.languages is None:
|
|
||||||
metadata.languages = []
|
|
||||||
|
|
||||||
if isinstance(language_value, str):
|
|
||||||
metadata.languages.append(language_value)
|
|
||||||
elif isinstance(language_value, list):
|
|
||||||
metadata.languages.extend(language_value)
|
|
||||||
|
|
||||||
dataset_value = model_card.get("datasets", model_card.get("dataset", None))
|
|
||||||
if dataset_value is not None:
|
|
||||||
|
|
||||||
if metadata.datasets is None:
|
if metadata.datasets is None:
|
||||||
metadata.datasets = []
|
metadata.datasets = []
|
||||||
|
|
||||||
if isinstance(dataset_value, str):
|
for dataset_id in metadata_datasets:
|
||||||
metadata.datasets.append(dataset_value)
|
# NOTE: model size of base model is assumed to be similar to the size of the current model
|
||||||
elif isinstance(dataset_value, list):
|
dataset = {}
|
||||||
metadata.datasets.extend(dataset_value)
|
if isinstance(dataset_id, str):
|
||||||
|
if dataset_id.startswith(("http://", "https://", "ssh://")):
|
||||||
|
dataset["repo_url"] = dataset_id
|
||||||
|
|
||||||
|
# Check if Hugging Face ID is present in URL
|
||||||
|
if "huggingface.co" in dataset_id:
|
||||||
|
match = re.match(
|
||||||
|
r"https?://huggingface.co/([^/]+/[^/]+)$",
|
||||||
|
dataset_id,
|
||||||
|
)
|
||||||
|
if match:
|
||||||
|
dataset_id_component = match.group(1)
|
||||||
|
(
|
||||||
|
dataset_name_component,
|
||||||
|
org_component,
|
||||||
|
basename,
|
||||||
|
finetune,
|
||||||
|
version,
|
||||||
|
size_label,
|
||||||
|
) = Metadata.get_model_id_components(
|
||||||
|
dataset_id_component, total_params
|
||||||
|
)
|
||||||
|
|
||||||
|
# Populate dataset dictionary with extracted components
|
||||||
|
if dataset_name_component is not None:
|
||||||
|
dataset["name"] = Metadata.id_to_title(
|
||||||
|
dataset_name_component
|
||||||
|
)
|
||||||
|
if org_component is not None:
|
||||||
|
dataset["organization"] = Metadata.id_to_title(
|
||||||
|
org_component
|
||||||
|
)
|
||||||
|
if version is not None:
|
||||||
|
dataset["version"] = version
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Likely a Hugging Face ID
|
||||||
|
(
|
||||||
|
dataset_name_component,
|
||||||
|
org_component,
|
||||||
|
basename,
|
||||||
|
finetune,
|
||||||
|
version,
|
||||||
|
size_label,
|
||||||
|
) = Metadata.get_model_id_components(
|
||||||
|
dataset_id, total_params
|
||||||
|
)
|
||||||
|
|
||||||
|
# Populate dataset dictionary with extracted components
|
||||||
|
if dataset_name_component is not None:
|
||||||
|
dataset["name"] = Metadata.id_to_title(
|
||||||
|
dataset_name_component
|
||||||
|
)
|
||||||
|
if org_component is not None:
|
||||||
|
dataset["organization"] = Metadata.id_to_title(
|
||||||
|
org_component
|
||||||
|
)
|
||||||
|
if version is not None:
|
||||||
|
dataset["version"] = version
|
||||||
|
if (
|
||||||
|
org_component is not None
|
||||||
|
and dataset_name_component is not None
|
||||||
|
):
|
||||||
|
dataset["repo_url"] = (
|
||||||
|
f"https://huggingface.co/{org_component}/{dataset_name_component}"
|
||||||
|
)
|
||||||
|
|
||||||
|
elif isinstance(dataset_id, dict):
|
||||||
|
dataset = dataset_id
|
||||||
|
|
||||||
|
else:
|
||||||
|
logger.error(
|
||||||
|
f"dataset entry '{str(dataset_id)}' not in a known format"
|
||||||
|
)
|
||||||
|
|
||||||
|
metadata.datasets.append(dataset)
|
||||||
|
|
||||||
|
use_model_card_metadata("license", "license")
|
||||||
|
use_model_card_metadata("license_name", "license_name")
|
||||||
|
use_model_card_metadata("license_link", "license_link")
|
||||||
|
|
||||||
|
use_array_model_card_metadata("tags", "tags")
|
||||||
|
use_array_model_card_metadata("tags", "pipeline_tag")
|
||||||
|
|
||||||
|
use_array_model_card_metadata("languages", "languages")
|
||||||
|
use_array_model_card_metadata("languages", "language")
|
||||||
|
|
||||||
# Hugging Face Parameter Heuristics
|
# Hugging Face Parameter Heuristics
|
||||||
####################################
|
####################################
|
||||||
|
@ -508,7 +680,7 @@ def apply_metadata_heuristic(
|
||||||
|
|
||||||
hf_name_or_path = hf_params.get("_name_or_path")
|
hf_name_or_path = hf_params.get("_name_or_path")
|
||||||
if hf_name_or_path is not None and hf_name_or_path.count("/") <= 1:
|
if hf_name_or_path is not None and hf_name_or_path.count("/") <= 1:
|
||||||
# Use _name_or_path only if it's actually a model name and not some computer path
|
# Use _name_or_path only if its actually a model name and not some computer path
|
||||||
# e.g. 'meta-llama/Llama-2-7b-hf'
|
# e.g. 'meta-llama/Llama-2-7b-hf'
|
||||||
model_id = hf_name_or_path
|
model_id = hf_name_or_path
|
||||||
(
|
(
|
||||||
|
@ -584,7 +756,10 @@ def set_gguf_meta_model(self, gguf_writer: gguf.GGUFWriter):
|
||||||
gguf_writer.add_size_label(self.size_label)
|
gguf_writer.add_size_label(self.size_label)
|
||||||
|
|
||||||
if self.license is not None:
|
if self.license is not None:
|
||||||
gguf_writer.add_license(self.license)
|
if isinstance(self.license, list):
|
||||||
|
gguf_writer.add_license(",".join(self.license))
|
||||||
|
else:
|
||||||
|
gguf_writer.add_license(self.license)
|
||||||
if self.license_name is not None:
|
if self.license_name is not None:
|
||||||
gguf_writer.add_license_name(self.license_name)
|
gguf_writer.add_license_name(self.license_name)
|
||||||
if self.license_link is not None:
|
if self.license_link is not None:
|
||||||
|
@ -621,6 +796,10 @@ def set_gguf_meta_model(self, gguf_writer: gguf.GGUFWriter):
|
||||||
gguf_writer.add_base_model_organization(
|
gguf_writer.add_base_model_organization(
|
||||||
key, base_model_entry["organization"]
|
key, base_model_entry["organization"]
|
||||||
)
|
)
|
||||||
|
if "description" in base_model_entry:
|
||||||
|
gguf_writer.add_base_model_description(
|
||||||
|
key, base_model_entry["description"]
|
||||||
|
)
|
||||||
if "url" in base_model_entry:
|
if "url" in base_model_entry:
|
||||||
gguf_writer.add_base_model_url(key, base_model_entry["url"])
|
gguf_writer.add_base_model_url(key, base_model_entry["url"])
|
||||||
if "doi" in base_model_entry:
|
if "doi" in base_model_entry:
|
||||||
|
@ -632,9 +811,33 @@ def set_gguf_meta_model(self, gguf_writer: gguf.GGUFWriter):
|
||||||
key, base_model_entry["repo_url"]
|
key, base_model_entry["repo_url"]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if self.datasets is not None:
|
||||||
|
gguf_writer.add_dataset_count(len(self.datasets))
|
||||||
|
for key, dataset_entry in enumerate(self.datasets):
|
||||||
|
if "name" in dataset_entry:
|
||||||
|
gguf_writer.add_dataset_name(key, dataset_entry["name"])
|
||||||
|
if "author" in dataset_entry:
|
||||||
|
gguf_writer.add_dataset_author(key, dataset_entry["author"])
|
||||||
|
if "version" in dataset_entry:
|
||||||
|
gguf_writer.add_dataset_version(key, dataset_entry["version"])
|
||||||
|
if "organization" in dataset_entry:
|
||||||
|
gguf_writer.add_dataset_organization(
|
||||||
|
key, dataset_entry["organization"]
|
||||||
|
)
|
||||||
|
if "description" in dataset_entry:
|
||||||
|
gguf_writer.add_dataset_description(
|
||||||
|
key, dataset_entry["description"]
|
||||||
|
)
|
||||||
|
if "url" in dataset_entry:
|
||||||
|
gguf_writer.add_dataset_url(key, dataset_entry["url"])
|
||||||
|
if "doi" in dataset_entry:
|
||||||
|
gguf_writer.add_dataset_doi(key, dataset_entry["doi"])
|
||||||
|
if "uuid" in dataset_entry:
|
||||||
|
gguf_writer.add_dataset_uuid(key, dataset_entry["uuid"])
|
||||||
|
if "repo_url" in dataset_entry:
|
||||||
|
gguf_writer.add_dataset_repo_url(key, dataset_entry["repo_url"])
|
||||||
|
|
||||||
if self.tags is not None:
|
if self.tags is not None:
|
||||||
gguf_writer.add_tags(self.tags)
|
gguf_writer.add_tags(self.tags)
|
||||||
if self.languages is not None:
|
if self.languages is not None:
|
||||||
gguf_writer.add_languages(self.languages)
|
gguf_writer.add_languages(self.languages)
|
||||||
if self.datasets is not None:
|
|
||||||
gguf_writer.add_datasets(self.datasets)
|
|
||||||
|
|
1471
src/gguf/quants.py
1471
src/gguf/quants.py
File diff suppressed because it is too large
Load Diff
|
@ -7,463 +7,574 @@
|
||||||
|
|
||||||
class TensorNameMap:
|
class TensorNameMap:
|
||||||
mappings_cfg: dict[MODEL_TENSOR, tuple[str, ...]] = {
|
mappings_cfg: dict[MODEL_TENSOR, tuple[str, ...]] = {
|
||||||
|
# Token embeddings
|
||||||
MODEL_TENSOR.TOKEN_EMBD: (
|
MODEL_TENSOR.TOKEN_EMBD: (
|
||||||
"gpt_neox.embed_in",
|
"gpt_neox.embed_in", # gptneox
|
||||||
"transformer.wte",
|
"transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx jais exaone
|
||||||
"transformer.word_embeddings",
|
"transformer.word_embeddings", # falcon
|
||||||
"word_embeddings",
|
"word_embeddings", # bloom
|
||||||
"model.embed_tokens",
|
"model.embed_tokens", # llama-hf nemotron olmoe olmo2
|
||||||
"tok_embeddings",
|
"tok_embeddings", # llama-pth
|
||||||
"embeddings.word_embeddings",
|
"embeddings.word_embeddings", # bert nomic-bert
|
||||||
"language_model.embedding.word_embeddings",
|
"language_model.embedding.word_embeddings", # persimmon
|
||||||
"wte",
|
"wte", # gpt2
|
||||||
"transformer.embd.wte",
|
"transformer.embd.wte", # phi2
|
||||||
"model.tok_embeddings",
|
"model.tok_embeddings", # internlm2
|
||||||
"model.embedding",
|
"model.embedding", # mamba-qbert
|
||||||
"backbone.embedding",
|
"backbone.embedding", # mamba
|
||||||
"backbone.embeddings",
|
"backbone.embeddings", # mamba-hf
|
||||||
"transformer.in_out_embed",
|
"transformer.in_out_embed", # Grok
|
||||||
"embedding.word_embeddings",
|
"embedding.word_embeddings", # chatglm
|
||||||
"transformer.token_embeddings",
|
"transformer.token_embeddings", # openelm
|
||||||
"shared",
|
"shared", # t5
|
||||||
"rwkv.embeddings",
|
"rwkv.embeddings", # rwkv
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.TOKEN_TYPES: ("embeddings.token_type_embeddings",),
|
# Token type embeddings
|
||||||
|
MODEL_TENSOR.TOKEN_TYPES: (
|
||||||
|
"embeddings.token_type_embeddings", # bert nomic-bert
|
||||||
|
),
|
||||||
|
# Normalization of token embeddings
|
||||||
MODEL_TENSOR.TOKEN_EMBD_NORM: (
|
MODEL_TENSOR.TOKEN_EMBD_NORM: (
|
||||||
"word_embeddings_layernorm",
|
"word_embeddings_layernorm", # bloom
|
||||||
"embeddings.LayerNorm",
|
"embeddings.LayerNorm", # bert
|
||||||
"emb_ln",
|
"emb_ln", # nomic-bert
|
||||||
"transformer.norm",
|
"transformer.norm", # openelm
|
||||||
"rwkv.blocks.0.pre_ln",
|
"rwkv.blocks.0.pre_ln", # rwkv
|
||||||
|
"backbone.norm", # wavtokenizer
|
||||||
),
|
),
|
||||||
|
# Position embeddings
|
||||||
MODEL_TENSOR.POS_EMBD: (
|
MODEL_TENSOR.POS_EMBD: (
|
||||||
"transformer.wpe",
|
"transformer.wpe", # gpt2
|
||||||
"embeddings.position_embeddings",
|
"embeddings.position_embeddings", # bert
|
||||||
"wpe",
|
"wpe", # gpt2
|
||||||
),
|
),
|
||||||
|
# Output
|
||||||
MODEL_TENSOR.OUTPUT: (
|
MODEL_TENSOR.OUTPUT: (
|
||||||
"embed_out",
|
"embed_out", # gptneox
|
||||||
"lm_head",
|
"lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone olmoe olmo2
|
||||||
"output",
|
"output", # llama-pth bloom internlm2
|
||||||
"word_embeddings_for_head",
|
"word_embeddings_for_head", # persimmon
|
||||||
"lm_head.linear",
|
"lm_head.linear", # phi2
|
||||||
"output_layer",
|
"output_layer", # chatglm
|
||||||
"head",
|
"head", # rwkv
|
||||||
|
"head.out", # wavtokenizer
|
||||||
),
|
),
|
||||||
|
# Output norm
|
||||||
MODEL_TENSOR.OUTPUT_NORM: (
|
MODEL_TENSOR.OUTPUT_NORM: (
|
||||||
"gpt_neox.final_layer_norm",
|
"gpt_neox.final_layer_norm", # gptneox
|
||||||
"transformer.ln_f",
|
"transformer.ln_f", # gpt2 gpt-j falcon jais exaone
|
||||||
"model.norm",
|
"model.norm", # llama-hf baichuan internlm2 olmoe olmo2
|
||||||
"norm",
|
"norm", # llama-pth
|
||||||
"transformer.norm_f",
|
"transformer.norm_f", # mpt dbrx
|
||||||
"ln_f",
|
"ln_f", # refact bloom qwen gpt2
|
||||||
"language_model.encoder.final_layernorm",
|
"language_model.encoder.final_layernorm", # persimmon
|
||||||
"model.final_layernorm",
|
"model.final_layernorm", # persimmon
|
||||||
"lm_head.ln",
|
"lm_head.ln", # phi2
|
||||||
"model.norm_f",
|
"model.norm_f", # mamba-qbert
|
||||||
"backbone.norm_f",
|
"backbone.norm_f", # mamba
|
||||||
"transformer.rms_norm",
|
"transformer.rms_norm", # Grok
|
||||||
"encoder.final_layernorm",
|
"encoder.final_layernorm", # chatglm
|
||||||
"transformer.norm",
|
"transformer.norm", # openelm
|
||||||
"model.norm",
|
"model.norm", # nemotron
|
||||||
"rwkv.ln_out",
|
"rwkv.ln_out", # rwkv
|
||||||
|
"backbone.final_layer_norm", # wavtokenizer
|
||||||
),
|
),
|
||||||
|
# Rope frequencies
|
||||||
MODEL_TENSOR.ROPE_FREQS: (
|
MODEL_TENSOR.ROPE_FREQS: (
|
||||||
"rope.freqs",
|
"rope.freqs", # llama-pth
|
||||||
"rotary_pos_emb.inv_freq",
|
"rotary_pos_emb.inv_freq", # chatglm
|
||||||
),
|
),
|
||||||
|
MODEL_TENSOR.ROPE_FACTORS_LONG: (),
|
||||||
|
MODEL_TENSOR.ROPE_FACTORS_SHORT: (),
|
||||||
|
MODEL_TENSOR.CONV1D: ("backbone.embed",), # roberta
|
||||||
}
|
}
|
||||||
|
|
||||||
block_mappings_cfg: dict[MODEL_TENSOR, tuple[str, ...]] = {
|
block_mappings_cfg: dict[MODEL_TENSOR, tuple[str, ...]] = {
|
||||||
|
# Attention norm
|
||||||
MODEL_TENSOR.ATTN_NORM: (
|
MODEL_TENSOR.ATTN_NORM: (
|
||||||
"gpt_neox.layers.{bid}.input_layernorm",
|
"gpt_neox.layers.{bid}.input_layernorm", # gptneox
|
||||||
"transformer.h.{bid}.ln_1",
|
"transformer.h.{bid}.ln_1", # gpt2 gpt-j refact qwen jais exaone
|
||||||
"transformer.blocks.{bid}.norm_1",
|
"transformer.blocks.{bid}.norm_1", # mpt
|
||||||
"transformer.h.{bid}.input_layernorm",
|
"transformer.h.{bid}.input_layernorm", # falcon7b
|
||||||
"h.{bid}.input_layernorm",
|
"h.{bid}.input_layernorm", # bloom
|
||||||
"transformer.h.{bid}.ln_mlp",
|
"transformer.h.{bid}.ln_mlp", # falcon40b
|
||||||
"model.layers.{bid}.input_layernorm",
|
"model.layers.{bid}.input_layernorm", # llama-hf nemotron olmoe
|
||||||
"layers.{bid}.attention_norm",
|
"layers.{bid}.attention_norm", # llama-pth
|
||||||
"language_model.encoder.layers.{bid}.input_layernorm",
|
"language_model.encoder.layers.{bid}.input_layernorm", # persimmon
|
||||||
"model.layers.{bid}.ln1",
|
"model.layers.{bid}.ln1", # yi
|
||||||
"h.{bid}.ln_1",
|
"h.{bid}.ln_1", # gpt2
|
||||||
"transformer.h.{bid}.ln",
|
"transformer.h.{bid}.ln", # phi2
|
||||||
"model.layers.layers.{bid}.norm",
|
"model.layers.layers.{bid}.norm", # plamo
|
||||||
"model.layers.{bid}.attention_norm",
|
"model.layers.{bid}.attention_norm", # internlm2
|
||||||
"model.layers.{bid}.norm",
|
"model.layers.{bid}.norm", # mamba-qbert
|
||||||
"backbone.layers.{bid}.norm",
|
"backbone.layers.{bid}.norm", # mamba
|
||||||
"transformer.decoder_layer.{bid}.rms_norm",
|
"transformer.decoder_layer.{bid}.rms_norm", # Grok
|
||||||
"transformer.blocks.{bid}.norm_attn_norm.norm_1",
|
"transformer.blocks.{bid}.norm_attn_norm.norm_1", # dbrx
|
||||||
"encoder.layers.{bid}.input_layernorm",
|
"encoder.layers.{bid}.input_layernorm", # chatglm
|
||||||
"transformer.layers.{bid}.attn_norm",
|
"transformer.layers.{bid}.attn_norm", # openelm
|
||||||
"rwkv.blocks.{bid}.ln1",
|
"rwkv.blocks.{bid}.ln1", # rwkv
|
||||||
),
|
),
|
||||||
|
# Attention norm 2
|
||||||
MODEL_TENSOR.ATTN_NORM_2: (
|
MODEL_TENSOR.ATTN_NORM_2: (
|
||||||
"transformer.h.{bid}.ln_attn",
|
"transformer.h.{bid}.ln_attn", # falcon40b
|
||||||
"encoder.layer.{bid}.layer_norm_1",
|
"encoder.layer.{bid}.layer_norm_1", # jina-v2-code
|
||||||
"rwkv.blocks.{bid}.ln2",
|
"rwkv.blocks.{bid}.ln2", # rwkv
|
||||||
),
|
),
|
||||||
|
# Attention query-key-value
|
||||||
MODEL_TENSOR.ATTN_QKV: (
|
MODEL_TENSOR.ATTN_QKV: (
|
||||||
"gpt_neox.layers.{bid}.attention.query_key_value",
|
"gpt_neox.layers.{bid}.attention.query_key_value", # gptneox
|
||||||
"transformer.h.{bid}.attn.c_attn",
|
"transformer.h.{bid}.attn.c_attn", # gpt2 qwen jais
|
||||||
"transformer.blocks.{bid}.attn.Wqkv",
|
"transformer.blocks.{bid}.attn.Wqkv", # mpt
|
||||||
"transformer.blocks.{bid}.norm_attn_norm.attn.Wqkv",
|
"transformer.blocks.{bid}.norm_attn_norm.attn.Wqkv", # dbrx
|
||||||
"transformer.h.{bid}.self_attention.query_key_value",
|
"transformer.h.{bid}.self_attention.query_key_value", # falcon
|
||||||
"h.{bid}.self_attention.query_key_value",
|
"h.{bid}.self_attention.query_key_value", # bloom
|
||||||
"language_model.encoder.layers.{bid}.self_attention.query_key_value",
|
"language_model.encoder.layers.{bid}.self_attention.query_key_value", # persimmon
|
||||||
"model.layers.{bid}.self_attn.query_key_value",
|
"model.layers.{bid}.self_attn.query_key_value", # persimmon
|
||||||
"h.{bid}.attn.c_attn",
|
"h.{bid}.attn.c_attn", # gpt2
|
||||||
"transformer.h.{bid}.mixer.Wqkv",
|
"transformer.h.{bid}.mixer.Wqkv", # phi2
|
||||||
"encoder.layers.{bid}.attn.Wqkv",
|
"encoder.layers.{bid}.attn.Wqkv", # nomic-bert
|
||||||
"model.layers.{bid}.self_attn.qkv_proj",
|
"model.layers.{bid}.self_attn.qkv_proj", # phi3
|
||||||
"encoder.layers.{bid}.self_attention.query_key_value",
|
"encoder.layers.{bid}.self_attention.query_key_value", # chatglm
|
||||||
"transformer.layers.{bid}.attn.qkv_proj",
|
"transformer.layers.{bid}.attn.qkv_proj", # openelm
|
||||||
),
|
),
|
||||||
|
# Attention query
|
||||||
MODEL_TENSOR.ATTN_Q: (
|
MODEL_TENSOR.ATTN_Q: (
|
||||||
"model.layers.{bid}.self_attn.q_proj",
|
"model.layers.{bid}.self_attn.q_proj", # llama-hf nemotron olmoe olmo2
|
||||||
"layers.{bid}.attention.wq",
|
"model.layers.{bid}.self_attn.q_proj_no_perm", # llama-custom
|
||||||
"encoder.layer.{bid}.attention.self.query",
|
"layers.{bid}.attention.wq", # llama-pth
|
||||||
"transformer.h.{bid}.attn.q_proj",
|
"encoder.layer.{bid}.attention.self.query", # bert
|
||||||
"model.layers.layers.{bid}.self_attn.q_proj",
|
"transformer.h.{bid}.attn.q_proj", # gpt-j
|
||||||
"model.layers.{bid}.attention.wq",
|
"model.layers.layers.{bid}.self_attn.q_proj", # plamo
|
||||||
"transformer.decoder_layer.{bid}.multi_head_attention.query",
|
"model.layers.{bid}.attention.wq", # internlm2
|
||||||
"transformer.h.{bid}.attn.attention.q_proj",
|
"transformer.decoder_layer.{bid}.multi_head_attention.query", # Grok
|
||||||
|
"transformer.h.{bid}.attn.attention.q_proj", # exaone
|
||||||
),
|
),
|
||||||
|
# Attention key
|
||||||
MODEL_TENSOR.ATTN_K: (
|
MODEL_TENSOR.ATTN_K: (
|
||||||
"model.layers.{bid}.self_attn.k_proj",
|
"model.layers.{bid}.self_attn.k_proj", # llama-hf nemotron olmoe olmo2
|
||||||
"layers.{bid}.attention.wk",
|
"model.layers.{bid}.self_attn.k_proj_no_perm", # llama-custom
|
||||||
"encoder.layer.{bid}.attention.self.key",
|
"layers.{bid}.attention.wk", # llama-pth
|
||||||
"transformer.h.{bid}.attn.k_proj",
|
"encoder.layer.{bid}.attention.self.key", # bert
|
||||||
"transformer.h.{bid}.attn.k",
|
"transformer.h.{bid}.attn.k_proj", # gpt-j
|
||||||
"model.layers.layers.{bid}.self_attn.k_proj",
|
"transformer.h.{bid}.attn.k", # refact
|
||||||
"model.layers.{bid}.attention.wk",
|
"model.layers.layers.{bid}.self_attn.k_proj", # plamo
|
||||||
"transformer.decoder_layer.{bid}.multi_head_attention.key",
|
"model.layers.{bid}.attention.wk", # internlm2
|
||||||
"transformer.h.{bid}.attn.attention.k_proj",
|
"transformer.decoder_layer.{bid}.multi_head_attention.key", # Grok
|
||||||
|
"transformer.h.{bid}.attn.attention.k_proj", # exaone
|
||||||
),
|
),
|
||||||
|
# Attention value
|
||||||
MODEL_TENSOR.ATTN_V: (
|
MODEL_TENSOR.ATTN_V: (
|
||||||
"model.layers.{bid}.self_attn.v_proj",
|
"model.layers.{bid}.self_attn.v_proj", # llama-hf nemotron olmoe olmo2
|
||||||
"layers.{bid}.attention.wv",
|
"layers.{bid}.attention.wv", # llama-pth
|
||||||
"encoder.layer.{bid}.attention.self.value",
|
"encoder.layer.{bid}.attention.self.value", # bert
|
||||||
"transformer.h.{bid}.attn.v_proj",
|
"transformer.h.{bid}.attn.v_proj", # gpt-j
|
||||||
"transformer.h.{bid}.attn.v",
|
"transformer.h.{bid}.attn.v", # refact
|
||||||
"model.layers.layers.{bid}.self_attn.v_proj",
|
"model.layers.layers.{bid}.self_attn.v_proj", # plamo
|
||||||
"model.layers.{bid}.attention.wv",
|
"model.layers.{bid}.attention.wv", # internlm2
|
||||||
"transformer.decoder_layer.{bid}.multi_head_attention.value",
|
"transformer.decoder_layer.{bid}.multi_head_attention.value", # Grok
|
||||||
"transformer.h.{bid}.attn.attention.v_proj",
|
"transformer.h.{bid}.attn.attention.v_proj", # exaone
|
||||||
),
|
),
|
||||||
|
# Attention output
|
||||||
MODEL_TENSOR.ATTN_OUT: (
|
MODEL_TENSOR.ATTN_OUT: (
|
||||||
"gpt_neox.layers.{bid}.attention.dense",
|
"gpt_neox.layers.{bid}.attention.dense", # gptneox
|
||||||
"transformer.h.{bid}.attn.c_proj",
|
"transformer.h.{bid}.attn.c_proj", # gpt2 refact qwen jais
|
||||||
"transformer.blocks.{bid}.attn.out_proj",
|
"transformer.blocks.{bid}.attn.out_proj", # mpt
|
||||||
"transformer.h.{bid}.self_attention.dense",
|
"transformer.h.{bid}.self_attention.dense", # falcon
|
||||||
"h.{bid}.self_attention.dense",
|
"h.{bid}.self_attention.dense", # bloom
|
||||||
"model.layers.{bid}.self_attn.o_proj",
|
"model.layers.{bid}.self_attn.o_proj", # llama-hf nemotron olmoe olmo2
|
||||||
"layers.{bid}.attention.wo",
|
"model.layers.{bid}.self_attn.linear_attn", # deci
|
||||||
"encoder.layer.{bid}.attention.output.dense",
|
"layers.{bid}.attention.wo", # llama-pth
|
||||||
"transformer.h.{bid}.attn.out_proj",
|
"encoder.layer.{bid}.attention.output.dense", # bert
|
||||||
"language_model.encoder.layers.{bid}.self_attention.dense",
|
"transformer.h.{bid}.attn.out_proj", # gpt-j
|
||||||
"model.layers.{bid}.self_attn.dense",
|
"language_model.encoder.layers.{bid}.self_attention.dense", # persimmon
|
||||||
"h.{bid}.attn.c_proj",
|
"model.layers.{bid}.self_attn.dense", # persimmon
|
||||||
"transformer.h.{bid}.mixer.out_proj",
|
"h.{bid}.attn.c_proj", # gpt2
|
||||||
"model.layers.layers.{bid}.self_attn.o_proj",
|
"transformer.h.{bid}.mixer.out_proj", # phi2
|
||||||
"model.layers.{bid}.attention.wo",
|
"model.layers.layers.{bid}.self_attn.o_proj", # plamo
|
||||||
"encoder.layers.{bid}.attn.out_proj",
|
"model.layers.{bid}.attention.wo", # internlm2
|
||||||
"transformer.decoder_layer.{bid}.multi_head_attention.linear",
|
"encoder.layers.{bid}.attn.out_proj", # nomic-bert
|
||||||
"transformer.blocks.{bid}.norm_attn_norm.attn.out_proj",
|
"transformer.decoder_layer.{bid}.multi_head_attention.linear", # Grok
|
||||||
"encoder.layers.{bid}.self_attention.dense",
|
"transformer.blocks.{bid}.norm_attn_norm.attn.out_proj", # dbrx
|
||||||
"transformer.layers.{bid}.attn.out_proj",
|
"encoder.layers.{bid}.self_attention.dense", # chatglm
|
||||||
"transformer.h.{bid}.attn.attention.out_proj",
|
"transformer.layers.{bid}.attn.out_proj", # openelm
|
||||||
|
"transformer.h.{bid}.attn.attention.out_proj", # exaone
|
||||||
),
|
),
|
||||||
|
# Attention output norm
|
||||||
MODEL_TENSOR.ATTN_OUT_NORM: (
|
MODEL_TENSOR.ATTN_OUT_NORM: (
|
||||||
"encoder.layer.{bid}.attention.output.LayerNorm",
|
"encoder.layer.{bid}.attention.output.LayerNorm", # bert
|
||||||
"encoder.layers.{bid}.norm1",
|
"encoder.layers.{bid}.norm1", # nomic-bert
|
||||||
"transformer.decoder_layer.{bid}.rms_norm_1",
|
"transformer.decoder_layer.{bid}.rms_norm_1", # Grok
|
||||||
"transformer.blocks.{bid}.norm_attn_norm.norm_2",
|
"transformer.blocks.{bid}.norm_attn_norm.norm_2", # dbrx
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.ATTN_POST_NORM: ("model.layers.{bid}.post_attention_layernorm",),
|
MODEL_TENSOR.ATTN_POST_NORM: (
|
||||||
|
"model.layers.{bid}.post_attention_layernorm", # gemma2 olmo2
|
||||||
|
),
|
||||||
|
# Rotary embeddings
|
||||||
MODEL_TENSOR.ATTN_ROT_EMBD: (
|
MODEL_TENSOR.ATTN_ROT_EMBD: (
|
||||||
"model.layers.{bid}.self_attn.rotary_emb.inv_freq",
|
"model.layers.{bid}.self_attn.rotary_emb.inv_freq", # llama-hf
|
||||||
"layers.{bid}.attention.inner_attention.rope.freqs",
|
"layers.{bid}.attention.inner_attention.rope.freqs", # llama-pth
|
||||||
"model.layers.layers.{bid}.self_attn.rotary_emb.inv_freq",
|
"model.layers.layers.{bid}.self_attn.rotary_emb.inv_freq", # plamo
|
||||||
"transformer.h.{bid}.attn.rotary_emb.inv_freq",
|
"transformer.h.{bid}.attn.rotary_emb.inv_freq", # codeshell
|
||||||
),
|
),
|
||||||
|
# Feed-forward norm
|
||||||
MODEL_TENSOR.FFN_NORM: (
|
MODEL_TENSOR.FFN_NORM: (
|
||||||
"gpt_neox.layers.{bid}.post_attention_layernorm",
|
"gpt_neox.layers.{bid}.post_attention_layernorm", # gptneox
|
||||||
"transformer.h.{bid}.ln_2",
|
"transformer.h.{bid}.ln_2", # gpt2 refact qwen jais exaone
|
||||||
"h.{bid}.post_attention_layernorm",
|
"h.{bid}.post_attention_layernorm", # bloom
|
||||||
"transformer.blocks.{bid}.norm_2",
|
"transformer.blocks.{bid}.norm_2", # mpt
|
||||||
"model.layers.{bid}.post_attention_layernorm",
|
"model.layers.{bid}.post_attention_layernorm", # llama-hf nemotron olmoe
|
||||||
"layers.{bid}.ffn_norm",
|
"layers.{bid}.ffn_norm", # llama-pth
|
||||||
"language_model.encoder.layers.{bid}.post_attention_layernorm",
|
"language_model.encoder.layers.{bid}.post_attention_layernorm", # persimmon
|
||||||
"model.layers.{bid}.ln2",
|
"model.layers.{bid}.ln2", # yi
|
||||||
"h.{bid}.ln_2",
|
"h.{bid}.ln_2", # gpt2
|
||||||
"model.layers.{bid}.ffn_norm",
|
"model.layers.{bid}.ffn_norm", # internlm2
|
||||||
"transformer.decoder_layer.{bid}.rms_norm_2",
|
"transformer.decoder_layer.{bid}.rms_norm_2", # Grok
|
||||||
"encoder.layers.{bid}.post_attention_layernorm",
|
"encoder.layers.{bid}.post_attention_layernorm", # chatglm
|
||||||
"transformer.layers.{bid}.ffn_norm",
|
"transformer.layers.{bid}.ffn_norm", # openelm
|
||||||
|
),
|
||||||
|
# Post feed-forward norm
|
||||||
|
MODEL_TENSOR.FFN_PRE_NORM: (
|
||||||
|
"model.layers.{bid}.pre_feedforward_layernorm", # gemma2
|
||||||
|
),
|
||||||
|
# Post feed-forward norm
|
||||||
|
MODEL_TENSOR.FFN_POST_NORM: (
|
||||||
|
"model.layers.{bid}.post_feedforward_layernorm", # gemma2 olmo2
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.FFN_PRE_NORM: ("model.layers.{bid}.pre_feedforward_layernorm",),
|
|
||||||
MODEL_TENSOR.FFN_POST_NORM: ("model.layers.{bid}.post_feedforward_layernorm",),
|
|
||||||
MODEL_TENSOR.FFN_GATE_INP: (
|
MODEL_TENSOR.FFN_GATE_INP: (
|
||||||
"layers.{bid}.feed_forward.gate",
|
"layers.{bid}.feed_forward.gate", # mixtral
|
||||||
"model.layers.{bid}.block_sparse_moe.gate",
|
"model.layers.{bid}.block_sparse_moe.gate", # mixtral
|
||||||
"model.layers.{bid}.mlp.gate",
|
"model.layers.{bid}.mlp.gate", # qwen2moe olmoe
|
||||||
"transformer.decoder_layer.{bid}.router",
|
"transformer.decoder_layer.{bid}.router", # Grok
|
||||||
"transformer.blocks.{bid}.ffn.router.layer",
|
"transformer.blocks.{bid}.ffn.router.layer", # dbrx
|
||||||
|
"model.layers.{bid}.block_sparse_moe.router.layer", # granitemoe
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.FFN_GATE_INP_SHEXP: ("model.layers.{bid}.mlp.shared_expert_gate",),
|
MODEL_TENSOR.FFN_GATE_INP_SHEXP: (
|
||||||
|
"model.layers.{bid}.mlp.shared_expert_gate", # qwen2moe
|
||||||
|
),
|
||||||
|
MODEL_TENSOR.FFN_EXP_PROBS_B: (
|
||||||
|
"model.layers.{bid}.mlp.gate.e_score_correction", # deepseek-v3
|
||||||
|
),
|
||||||
|
# Feed-forward up
|
||||||
MODEL_TENSOR.FFN_UP: (
|
MODEL_TENSOR.FFN_UP: (
|
||||||
"gpt_neox.layers.{bid}.mlp.dense_h_to_4h",
|
"gpt_neox.layers.{bid}.mlp.dense_h_to_4h", # gptneox
|
||||||
"transformer.h.{bid}.mlp.c_fc",
|
"transformer.h.{bid}.mlp.c_fc", # gpt2 jais
|
||||||
"transformer.blocks.{bid}.ffn.up_proj",
|
"transformer.blocks.{bid}.ffn.up_proj", # mpt
|
||||||
"transformer.h.{bid}.mlp.dense_h_to_4h",
|
"transformer.h.{bid}.mlp.dense_h_to_4h", # falcon
|
||||||
"h.{bid}.mlp.dense_h_to_4h",
|
"h.{bid}.mlp.dense_h_to_4h", # bloom
|
||||||
"model.layers.{bid}.mlp.up_proj",
|
"model.layers.{bid}.mlp.up_proj", # llama-hf refact nemotron olmo2
|
||||||
"layers.{bid}.feed_forward.w3",
|
"layers.{bid}.feed_forward.w3", # llama-pth
|
||||||
"encoder.layer.{bid}.intermediate.dense",
|
"encoder.layer.{bid}.intermediate.dense", # bert
|
||||||
"transformer.h.{bid}.mlp.fc_in",
|
"transformer.h.{bid}.mlp.fc_in", # gpt-j
|
||||||
"transformer.h.{bid}.mlp.linear_3",
|
"transformer.h.{bid}.mlp.linear_3", # refact
|
||||||
"language_model.encoder.layers.{bid}.mlp.dense_h_to_4h",
|
"language_model.encoder.layers.{bid}.mlp.dense_h_to_4h", # persimmon
|
||||||
"model.layers.{bid}.mlp.dense_h_to_4h",
|
"model.layers.{bid}.mlp.dense_h_to_4h", # persimmon
|
||||||
"transformer.h.{bid}.mlp.w1",
|
"transformer.h.{bid}.mlp.w1", # qwen
|
||||||
"h.{bid}.mlp.c_fc",
|
"h.{bid}.mlp.c_fc", # gpt2
|
||||||
"transformer.h.{bid}.mlp.fc1",
|
"transformer.h.{bid}.mlp.fc1", # phi2
|
||||||
"model.layers.{bid}.mlp.fc1",
|
"model.layers.{bid}.mlp.fc1", # phi2
|
||||||
"model.layers.{bid}.mlp.gate_up_proj",
|
"model.layers.{bid}.mlp.gate_up_proj", # phi3
|
||||||
"model.layers.layers.{bid}.mlp.up_proj",
|
"model.layers.layers.{bid}.mlp.up_proj", # plamo
|
||||||
"model.layers.{bid}.feed_forward.w3",
|
"model.layers.{bid}.feed_forward.w3", # internlm2
|
||||||
"encoder.layers.{bid}.mlp.fc11",
|
"encoder.layers.{bid}.mlp.fc11", # nomic-bert
|
||||||
"model.layers.{bid}.mlp.c_fc",
|
"model.layers.{bid}.mlp.c_fc", # starcoder2
|
||||||
"encoder.layer.{bid}.mlp.gated_layers_v",
|
"encoder.layer.{bid}.mlp.gated_layers_v", # jina-bert-v2
|
||||||
"model.layers.{bid}.residual_mlp.w3",
|
"model.layers.{bid}.residual_mlp.w3", # arctic
|
||||||
"encoder.layers.{bid}.mlp.dense_h_to_4h",
|
"encoder.layers.{bid}.mlp.dense_h_to_4h", # chatglm
|
||||||
"transformer.h.{bid}.mlp.c_fc_1",
|
"transformer.h.{bid}.mlp.c_fc_1", # exaone
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.FFN_UP_EXP: (
|
MODEL_TENSOR.FFN_UP_EXP: (
|
||||||
"layers.{bid}.feed_forward.experts.w3",
|
"layers.{bid}.feed_forward.experts.w3", # mixtral (merged)
|
||||||
"transformer.decoder_layer.{bid}.moe.linear_v",
|
"transformer.decoder_layer.{bid}.moe.linear_v", # Grok (merged)
|
||||||
"transformer.blocks.{bid}.ffn.experts.mlp.v1",
|
"transformer.blocks.{bid}.ffn.experts.mlp.v1", # dbrx
|
||||||
"model.layers.{bid}.mlp.experts.up_proj",
|
"model.layers.{bid}.mlp.experts.up_proj", # qwen2moe olmoe (merged)
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.FFN_UP_SHEXP: (
|
MODEL_TENSOR.FFN_UP_SHEXP: (
|
||||||
"model.layers.{bid}.mlp.shared_expert.up_proj",
|
"model.layers.{bid}.mlp.shared_expert.up_proj", # qwen2moe
|
||||||
"model.layers.{bid}.mlp.shared_experts.up_proj",
|
"model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek deepseek2
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.FFN_ACT: ("transformer.blocks.{bid}.ffn.act",),
|
# AWQ-activation gate
|
||||||
|
MODEL_TENSOR.FFN_ACT: ("transformer.blocks.{bid}.ffn.act",), # mpt
|
||||||
|
# Feed-forward gate
|
||||||
MODEL_TENSOR.FFN_GATE: (
|
MODEL_TENSOR.FFN_GATE: (
|
||||||
"model.layers.{bid}.mlp.gate_proj",
|
"model.layers.{bid}.mlp.gate_proj", # llama-hf refact olmo2
|
||||||
"layers.{bid}.feed_forward.w1",
|
"layers.{bid}.feed_forward.w1", # llama-pth
|
||||||
"transformer.h.{bid}.mlp.w2",
|
"transformer.h.{bid}.mlp.w2", # qwen
|
||||||
"transformer.h.{bid}.mlp.c_fc2",
|
"transformer.h.{bid}.mlp.c_fc2", # jais
|
||||||
"model.layers.layers.{bid}.mlp.gate_proj",
|
"model.layers.layers.{bid}.mlp.gate_proj", # plamo
|
||||||
"model.layers.{bid}.feed_forward.w1",
|
"model.layers.{bid}.feed_forward.w1", # internlm2
|
||||||
"encoder.layers.{bid}.mlp.fc12",
|
"encoder.layers.{bid}.mlp.fc12", # nomic-bert
|
||||||
"encoder.layer.{bid}.mlp.gated_layers_w",
|
"encoder.layer.{bid}.mlp.gated_layers_w", # jina-bert-v2
|
||||||
"transformer.h.{bid}.mlp.linear_1",
|
"transformer.h.{bid}.mlp.linear_1", # refact
|
||||||
"model.layers.{bid}.residual_mlp.w1",
|
"model.layers.{bid}.residual_mlp.w1", # arctic
|
||||||
"transformer.h.{bid}.mlp.c_fc_0",
|
"transformer.h.{bid}.mlp.c_fc_0", # exaone
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.FFN_GATE_EXP: (
|
MODEL_TENSOR.FFN_GATE_EXP: (
|
||||||
"layers.{bid}.feed_forward.experts.w1",
|
"layers.{bid}.feed_forward.experts.w1", # mixtral (merged)
|
||||||
"transformer.decoder_layer.{bid}.moe.linear",
|
"transformer.decoder_layer.{bid}.moe.linear", # Grok (merged)
|
||||||
"transformer.blocks.{bid}.ffn.experts.mlp.w1",
|
"transformer.blocks.{bid}.ffn.experts.mlp.w1", # dbrx
|
||||||
"model.layers.{bid}.mlp.experts.gate_proj",
|
"model.layers.{bid}.mlp.experts.gate_proj", # qwen2moe olmoe (merged)
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.FFN_GATE_SHEXP: (
|
MODEL_TENSOR.FFN_GATE_SHEXP: (
|
||||||
"model.layers.{bid}.mlp.shared_expert.gate_proj",
|
"model.layers.{bid}.mlp.shared_expert.gate_proj", # qwen2moe
|
||||||
"model.layers.{bid}.mlp.shared_experts.gate_proj",
|
"model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek deepseek2
|
||||||
),
|
),
|
||||||
|
# Feed-forward down
|
||||||
MODEL_TENSOR.FFN_DOWN: (
|
MODEL_TENSOR.FFN_DOWN: (
|
||||||
"gpt_neox.layers.{bid}.mlp.dense_4h_to_h",
|
"gpt_neox.layers.{bid}.mlp.dense_4h_to_h", # gptneox
|
||||||
"transformer.h.{bid}.mlp.c_proj",
|
"transformer.h.{bid}.mlp.c_proj", # gpt2 refact qwen jais
|
||||||
"transformer.blocks.{bid}.ffn.down_proj",
|
"transformer.blocks.{bid}.ffn.down_proj", # mpt
|
||||||
"transformer.h.{bid}.mlp.dense_4h_to_h",
|
"transformer.h.{bid}.mlp.dense_4h_to_h", # falcon
|
||||||
"h.{bid}.mlp.dense_4h_to_h",
|
"h.{bid}.mlp.dense_4h_to_h", # bloom
|
||||||
"model.layers.{bid}.mlp.down_proj",
|
"model.layers.{bid}.mlp.down_proj", # llama-hf nemotron olmo2
|
||||||
"layers.{bid}.feed_forward.w2",
|
"layers.{bid}.feed_forward.w2", # llama-pth
|
||||||
"encoder.layer.{bid}.output.dense",
|
"encoder.layer.{bid}.output.dense", # bert
|
||||||
"transformer.h.{bid}.mlp.fc_out",
|
"transformer.h.{bid}.mlp.fc_out", # gpt-j
|
||||||
"language_model.encoder.layers.{bid}.mlp.dense_4h_to_h",
|
"language_model.encoder.layers.{bid}.mlp.dense_4h_to_h", # persimmon
|
||||||
"model.layers.{bid}.mlp.dense_4h_to_h",
|
"model.layers.{bid}.mlp.dense_4h_to_h", # persimmon
|
||||||
"h.{bid}.mlp.c_proj",
|
"h.{bid}.mlp.c_proj", # gpt2
|
||||||
"transformer.h.{bid}.mlp.fc2",
|
"transformer.h.{bid}.mlp.fc2", # phi2
|
||||||
"model.layers.{bid}.mlp.fc2",
|
"model.layers.{bid}.mlp.fc2", # phi2
|
||||||
"model.layers.layers.{bid}.mlp.down_proj",
|
"model.layers.layers.{bid}.mlp.down_proj", # plamo
|
||||||
"model.layers.{bid}.feed_forward.w2",
|
"model.layers.{bid}.feed_forward.w2", # internlm2
|
||||||
"encoder.layers.{bid}.mlp.fc2",
|
"encoder.layers.{bid}.mlp.fc2", # nomic-bert
|
||||||
"model.layers.{bid}.mlp.c_proj",
|
"model.layers.{bid}.mlp.c_proj", # starcoder2
|
||||||
"encoder.layer.{bid}.mlp.wo",
|
"encoder.layer.{bid}.mlp.wo", # jina-bert-v2
|
||||||
"transformer.layers.{bid}.ffn.proj_2",
|
"transformer.layers.{bid}.ffn.proj_2", # openelm
|
||||||
"model.layers.{bid}.residual_mlp.w2",
|
"model.layers.{bid}.residual_mlp.w2", # arctic
|
||||||
"encoder.layer.{bid}.mlp.down_layer",
|
"encoder.layer.{bid}.mlp.down_layer", # jina-bert-v2
|
||||||
"encoder.layers.{bid}.mlp.dense_4h_to_h",
|
"encoder.layers.{bid}.mlp.dense_4h_to_h", # chatglm
|
||||||
"model.layers.h.{bid}.mlp.c_proj",
|
"model.layers.h.{bid}.mlp.c_proj", # exaone
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.FFN_DOWN_EXP: (
|
MODEL_TENSOR.FFN_DOWN_EXP: (
|
||||||
"layers.{bid}.feed_forward.experts.w2",
|
"layers.{bid}.feed_forward.experts.w2", # mixtral (merged)
|
||||||
"transformer.decoder_layer.{bid}.moe.linear_1",
|
"transformer.decoder_layer.{bid}.moe.linear_1", # Grok (merged)
|
||||||
"transformer.blocks.{bid}.ffn.experts.mlp.w2",
|
"transformer.blocks.{bid}.ffn.experts.mlp.w2", # dbrx
|
||||||
"model.layers.{bid}.mlp.experts.down_proj",
|
"model.layers.{bid}.mlp.experts.down_proj", # qwen2moe olmoe (merged)
|
||||||
|
"model.layers.{bid}.block_sparse_moe.output_linear", # granitemoe
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.FFN_DOWN_SHEXP: (
|
MODEL_TENSOR.FFN_DOWN_SHEXP: (
|
||||||
"model.layers.{bid}.mlp.shared_expert.down_proj",
|
"model.layers.{bid}.mlp.shared_expert.down_proj", # qwen2moe
|
||||||
"model.layers.{bid}.mlp.shared_experts.down_proj",
|
"model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek deepseek2
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.ATTN_Q_NORM: (
|
MODEL_TENSOR.ATTN_Q_NORM: (
|
||||||
"language_model.encoder.layers.{bid}.self_attention.q_layernorm",
|
"language_model.encoder.layers.{bid}.self_attention.q_layernorm",
|
||||||
"model.layers.{bid}.self_attn.q_layernorm",
|
"model.layers.{bid}.self_attn.q_layernorm", # persimmon
|
||||||
"model.layers.{bid}.self_attn.q_norm",
|
"model.layers.{bid}.self_attn.q_norm", # cohere olmoe chameleon olmo2
|
||||||
"transformer.blocks.{bid}.attn.q_ln",
|
"transformer.blocks.{bid}.attn.q_ln", # sea-lion
|
||||||
"encoder.layer.{bid}.attention.self.layer_norm_q",
|
"encoder.layer.{bid}.attention.self.layer_norm_q", # jina-bert-v2
|
||||||
"transformer.layers.{bid}.attn.q_norm",
|
"transformer.layers.{bid}.attn.q_norm", # openelm
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.ATTN_K_NORM: (
|
MODEL_TENSOR.ATTN_K_NORM: (
|
||||||
"language_model.encoder.layers.{bid}.self_attention.k_layernorm",
|
"language_model.encoder.layers.{bid}.self_attention.k_layernorm",
|
||||||
"model.layers.{bid}.self_attn.k_layernorm",
|
"model.layers.{bid}.self_attn.k_layernorm", # persimmon
|
||||||
"model.layers.{bid}.self_attn.k_norm",
|
"model.layers.{bid}.self_attn.k_norm", # cohere olmoe chameleon olmo2
|
||||||
"transformer.blocks.{bid}.attn.k_ln",
|
"transformer.blocks.{bid}.attn.k_ln", # sea-lion
|
||||||
"encoder.layer.{bid}.attention.self.layer_norm_k",
|
"encoder.layer.{bid}.attention.self.layer_norm_k", # jina-bert-v2
|
||||||
"transformer.layers.{bid}.attn.k_norm",
|
"transformer.layers.{bid}.attn.k_norm", # openelm
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.ROPE_FREQS: (
|
MODEL_TENSOR.ROPE_FREQS: (
|
||||||
"language_model.encoder.layers.{bid}.self_attention.rotary_emb.inv_freq",
|
"language_model.encoder.layers.{bid}.self_attention.rotary_emb.inv_freq", # persimmon
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.LAYER_OUT_NORM: (
|
MODEL_TENSOR.LAYER_OUT_NORM: (
|
||||||
"encoder.layer.{bid}.output.LayerNorm",
|
"encoder.layer.{bid}.output.LayerNorm", # bert
|
||||||
"encoder.layers.{bid}.norm2",
|
"encoder.layers.{bid}.norm2", # nomic-bert
|
||||||
"transformer.decoder_layer.{bid}.rms_norm_3",
|
"transformer.decoder_layer.{bid}.rms_norm_3", # Grok
|
||||||
"encoder.layer.{bid}.mlp.layernorm",
|
"encoder.layer.{bid}.mlp.layernorm", # jina-bert-v2
|
||||||
"encoder.layer.{bid}.layer_norm_2",
|
"encoder.layer.{bid}.layer_norm_2", # jina-v2-code
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.SSM_IN: (
|
MODEL_TENSOR.SSM_IN: (
|
||||||
"model.layers.{bid}.in_proj",
|
"model.layers.{bid}.in_proj",
|
||||||
"backbone.layers.{bid}.mixer.in_proj",
|
"backbone.layers.{bid}.mixer.in_proj",
|
||||||
"model.layers.{bid}.mamba.in_proj",
|
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.SSM_CONV1D: (
|
MODEL_TENSOR.SSM_CONV1D: (
|
||||||
"model.layers.{bid}.conv1d",
|
"model.layers.{bid}.conv1d",
|
||||||
"backbone.layers.{bid}.mixer.conv1d",
|
"backbone.layers.{bid}.mixer.conv1d",
|
||||||
"model.layers.{bid}.mamba.conv1d",
|
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.SSM_X: (
|
MODEL_TENSOR.SSM_X: (
|
||||||
"model.layers.{bid}.x_proj",
|
"model.layers.{bid}.x_proj",
|
||||||
"backbone.layers.{bid}.mixer.x_proj",
|
"backbone.layers.{bid}.mixer.x_proj",
|
||||||
"model.layers.{bid}.mamba.x_proj",
|
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.SSM_DT: (
|
MODEL_TENSOR.SSM_DT: (
|
||||||
"model.layers.{bid}.dt_proj",
|
"model.layers.{bid}.dt_proj",
|
||||||
"backbone.layers.{bid}.mixer.dt_proj",
|
"backbone.layers.{bid}.mixer.dt_proj",
|
||||||
"model.layers.{bid}.mamba.dt_proj",
|
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.SSM_DT_NORM: ("model.layers.{bid}.mamba.dt_layernorm",),
|
|
||||||
MODEL_TENSOR.SSM_A: (
|
MODEL_TENSOR.SSM_A: (
|
||||||
"model.layers.{bid}.A_log",
|
"model.layers.{bid}.A_log",
|
||||||
"backbone.layers.{bid}.mixer.A_log",
|
"backbone.layers.{bid}.mixer.A_log",
|
||||||
"model.layers.{bid}.mamba.A_log",
|
|
||||||
),
|
|
||||||
MODEL_TENSOR.SSM_B_NORM: (
|
|
||||||
"model.layers.{bid}.mamba.b_layernorm",
|
|
||||||
"model.layers.{bid}.mamba.B_layernorm",
|
|
||||||
),
|
|
||||||
MODEL_TENSOR.SSM_C_NORM: (
|
|
||||||
"model.layers.{bid}.mamba.c_layernorm",
|
|
||||||
"model.layers.{bid}.mamba.C_layernorm",
|
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.SSM_D: (
|
MODEL_TENSOR.SSM_D: (
|
||||||
"model.layers.{bid}.D",
|
"model.layers.{bid}.D",
|
||||||
"backbone.layers.{bid}.mixer.D",
|
"backbone.layers.{bid}.mixer.D",
|
||||||
"model.layers.{bid}.mamba.D",
|
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.SSM_OUT: (
|
MODEL_TENSOR.SSM_OUT: (
|
||||||
"model.layers.{bid}.out_proj",
|
"model.layers.{bid}.out_proj",
|
||||||
"backbone.layers.{bid}.mixer.out_proj",
|
"backbone.layers.{bid}.mixer.out_proj",
|
||||||
"model.layers.{bid}.mamba.out_proj",
|
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.TIME_MIX_W1: ("rwkv.blocks.{bid}.attention.time_maa_w1",),
|
MODEL_TENSOR.TIME_MIX_W1: (
|
||||||
MODEL_TENSOR.TIME_MIX_W2: ("rwkv.blocks.{bid}.attention.time_maa_w2",),
|
"rwkv.blocks.{bid}.attention.time_maa_w1", # rwkv v6
|
||||||
MODEL_TENSOR.TIME_MIX_LERP_X: ("rwkv.blocks.{bid}.attention.time_maa_x",),
|
),
|
||||||
MODEL_TENSOR.TIME_MIX_LERP_K: ("rwkv.blocks.{bid}.attention.time_maa_k",),
|
MODEL_TENSOR.TIME_MIX_W2: (
|
||||||
MODEL_TENSOR.TIME_MIX_LERP_V: ("rwkv.blocks.{bid}.attention.time_maa_v",),
|
"rwkv.blocks.{bid}.attention.time_maa_w2", # rwkv v6
|
||||||
MODEL_TENSOR.TIME_MIX_LERP_R: ("rwkv.blocks.{bid}.attention.time_maa_r",),
|
),
|
||||||
MODEL_TENSOR.TIME_MIX_LERP_G: ("rwkv.blocks.{bid}.attention.time_maa_g",),
|
MODEL_TENSOR.TIME_MIX_LERP_X: (
|
||||||
MODEL_TENSOR.TIME_MIX_LERP_W: ("rwkv.blocks.{bid}.attention.time_maa_w",),
|
"rwkv.blocks.{bid}.attention.time_maa_x", # rwkv v6
|
||||||
MODEL_TENSOR.TIME_MIX_FIRST: ("rwkv.blocks.{bid}.attention.time_faaaa",),
|
),
|
||||||
MODEL_TENSOR.TIME_MIX_DECAY: ("rwkv.blocks.{bid}.attention.time_decay",),
|
MODEL_TENSOR.TIME_MIX_LERP_K: (
|
||||||
MODEL_TENSOR.TIME_MIX_DECAY_W1: ("rwkv.blocks.{bid}.attention.time_decay_w1",),
|
"rwkv.blocks.{bid}.attention.time_maa_k", # rwkv v6
|
||||||
MODEL_TENSOR.TIME_MIX_DECAY_W2: ("rwkv.blocks.{bid}.attention.time_decay_w2",),
|
),
|
||||||
MODEL_TENSOR.TIME_MIX_KEY: ("rwkv.blocks.{bid}.attention.key",),
|
MODEL_TENSOR.TIME_MIX_LERP_V: (
|
||||||
MODEL_TENSOR.TIME_MIX_VALUE: ("rwkv.blocks.{bid}.attention.value",),
|
"rwkv.blocks.{bid}.attention.time_maa_v", # rwkv v6
|
||||||
MODEL_TENSOR.TIME_MIX_RECEPTANCE: ("rwkv.blocks.{bid}.attention.receptance",),
|
),
|
||||||
MODEL_TENSOR.TIME_MIX_GATE: ("rwkv.blocks.{bid}.attention.gate",),
|
MODEL_TENSOR.TIME_MIX_LERP_R: (
|
||||||
MODEL_TENSOR.TIME_MIX_LN: ("rwkv.blocks.{bid}.attention.ln_x",),
|
"rwkv.blocks.{bid}.attention.time_maa_r", # rwkv v6
|
||||||
MODEL_TENSOR.TIME_MIX_OUTPUT: ("rwkv.blocks.{bid}.attention.output",),
|
),
|
||||||
MODEL_TENSOR.CHANNEL_MIX_LERP_K: ("rwkv.blocks.{bid}.feed_forward.time_maa_k",),
|
MODEL_TENSOR.TIME_MIX_LERP_G: (
|
||||||
MODEL_TENSOR.CHANNEL_MIX_LERP_R: ("rwkv.blocks.{bid}.feed_forward.time_maa_r",),
|
"rwkv.blocks.{bid}.attention.time_maa_g", # rwkv v6
|
||||||
MODEL_TENSOR.CHANNEL_MIX_KEY: ("rwkv.blocks.{bid}.feed_forward.key",),
|
),
|
||||||
|
MODEL_TENSOR.TIME_MIX_LERP_W: (
|
||||||
|
"rwkv.blocks.{bid}.attention.time_maa_w", # rwkv v6
|
||||||
|
),
|
||||||
|
MODEL_TENSOR.TIME_MIX_FIRST: (
|
||||||
|
"rwkv.blocks.{bid}.attention.time_faaaa", # rwkv v6
|
||||||
|
),
|
||||||
|
MODEL_TENSOR.TIME_MIX_DECAY: (
|
||||||
|
"rwkv.blocks.{bid}.attention.time_decay", # rwkv v6
|
||||||
|
),
|
||||||
|
MODEL_TENSOR.TIME_MIX_DECAY_W1: (
|
||||||
|
"rwkv.blocks.{bid}.attention.time_decay_w1", # rwkv v6
|
||||||
|
),
|
||||||
|
MODEL_TENSOR.TIME_MIX_DECAY_W2: (
|
||||||
|
"rwkv.blocks.{bid}.attention.time_decay_w2", # rwkv v6
|
||||||
|
),
|
||||||
|
MODEL_TENSOR.TIME_MIX_KEY: ("rwkv.blocks.{bid}.attention.key",), # rwkv
|
||||||
|
MODEL_TENSOR.TIME_MIX_VALUE: ("rwkv.blocks.{bid}.attention.value",), # rwkv
|
||||||
|
MODEL_TENSOR.TIME_MIX_RECEPTANCE: (
|
||||||
|
"rwkv.blocks.{bid}.attention.receptance", # rwkv
|
||||||
|
),
|
||||||
|
MODEL_TENSOR.TIME_MIX_GATE: ("rwkv.blocks.{bid}.attention.gate",), # rwkv
|
||||||
|
MODEL_TENSOR.TIME_MIX_LN: ("rwkv.blocks.{bid}.attention.ln_x",), # rwkv
|
||||||
|
MODEL_TENSOR.TIME_MIX_OUTPUT: ("rwkv.blocks.{bid}.attention.output",), # rwkv
|
||||||
|
MODEL_TENSOR.CHANNEL_MIX_LERP_K: (
|
||||||
|
"rwkv.blocks.{bid}.feed_forward.time_maa_k", # rwkv v6
|
||||||
|
),
|
||||||
|
MODEL_TENSOR.CHANNEL_MIX_LERP_R: (
|
||||||
|
"rwkv.blocks.{bid}.feed_forward.time_maa_r", # rwkv v6
|
||||||
|
),
|
||||||
|
MODEL_TENSOR.CHANNEL_MIX_KEY: ("rwkv.blocks.{bid}.feed_forward.key",), # rwkv
|
||||||
MODEL_TENSOR.CHANNEL_MIX_RECEPTANCE: (
|
MODEL_TENSOR.CHANNEL_MIX_RECEPTANCE: (
|
||||||
"rwkv.blocks.{bid}.feed_forward.receptance",
|
"rwkv.blocks.{bid}.feed_forward.receptance", # rwkv
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.CHANNEL_MIX_VALUE: ("rwkv.blocks.{bid}.feed_forward.value",),
|
MODEL_TENSOR.CHANNEL_MIX_VALUE: (
|
||||||
MODEL_TENSOR.ATTN_Q_A: ("model.layers.{bid}.self_attn.q_a_proj",),
|
"rwkv.blocks.{bid}.feed_forward.value", # rwkv
|
||||||
MODEL_TENSOR.ATTN_Q_B: ("model.layers.{bid}.self_attn.q_b_proj",),
|
),
|
||||||
|
MODEL_TENSOR.ATTN_Q_A: ("model.layers.{bid}.self_attn.q_a_proj",), # deepseek2
|
||||||
|
MODEL_TENSOR.ATTN_Q_B: ("model.layers.{bid}.self_attn.q_b_proj",), # deepseek2
|
||||||
MODEL_TENSOR.ATTN_KV_A_MQA: (
|
MODEL_TENSOR.ATTN_KV_A_MQA: (
|
||||||
"model.layers.{bid}.self_attn.kv_a_proj_with_mqa",
|
"model.layers.{bid}.self_attn.kv_a_proj_with_mqa", # deepseek2
|
||||||
|
),
|
||||||
|
MODEL_TENSOR.ATTN_KV_B: (
|
||||||
|
"model.layers.{bid}.self_attn.kv_b_proj", # deepseek2
|
||||||
|
),
|
||||||
|
MODEL_TENSOR.ATTN_Q_A_NORM: (
|
||||||
|
"model.layers.{bid}.self_attn.q_a_layernorm", # deepseek2
|
||||||
|
),
|
||||||
|
MODEL_TENSOR.ATTN_KV_A_NORM: (
|
||||||
|
"model.layers.{bid}.self_attn.kv_a_layernorm", # deepseek2
|
||||||
|
),
|
||||||
|
MODEL_TENSOR.ATTN_SUB_NORM: (
|
||||||
|
"model.layers.{bid}.self_attn.inner_attn_ln", # bitnet
|
||||||
|
),
|
||||||
|
MODEL_TENSOR.FFN_SUB_NORM: ("model.layers.{bid}.mlp.ffn_layernorm",), # bitnet
|
||||||
|
MODEL_TENSOR.DEC_ATTN_NORM: ("decoder.block.{bid}.layer.0.layer_norm",), # t5
|
||||||
|
MODEL_TENSOR.DEC_ATTN_Q: ("decoder.block.{bid}.layer.0.SelfAttention.q",), # t5
|
||||||
|
MODEL_TENSOR.DEC_ATTN_K: ("decoder.block.{bid}.layer.0.SelfAttention.k",), # t5
|
||||||
|
MODEL_TENSOR.DEC_ATTN_V: ("decoder.block.{bid}.layer.0.SelfAttention.v",), # t5
|
||||||
|
MODEL_TENSOR.DEC_ATTN_OUT: (
|
||||||
|
"decoder.block.{bid}.layer.0.SelfAttention.o", # t5
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.ATTN_KV_B: ("model.layers.{bid}.self_attn.kv_b_proj",),
|
|
||||||
MODEL_TENSOR.ATTN_Q_A_NORM: ("model.layers.{bid}.self_attn.q_a_layernorm",),
|
|
||||||
MODEL_TENSOR.ATTN_KV_A_NORM: ("model.layers.{bid}.self_attn.kv_a_layernorm",),
|
|
||||||
MODEL_TENSOR.ATTN_SUB_NORM: ("model.layers.{bid}.self_attn.inner_attn_ln",),
|
|
||||||
MODEL_TENSOR.FFN_SUB_NORM: ("model.layers.{bid}.mlp.ffn_layernorm",),
|
|
||||||
MODEL_TENSOR.DEC_ATTN_NORM: ("decoder.block.{bid}.layer.0.layer_norm",),
|
|
||||||
MODEL_TENSOR.DEC_ATTN_Q: ("decoder.block.{bid}.layer.0.SelfAttention.q",),
|
|
||||||
MODEL_TENSOR.DEC_ATTN_K: ("decoder.block.{bid}.layer.0.SelfAttention.k",),
|
|
||||||
MODEL_TENSOR.DEC_ATTN_V: ("decoder.block.{bid}.layer.0.SelfAttention.v",),
|
|
||||||
MODEL_TENSOR.DEC_ATTN_OUT: ("decoder.block.{bid}.layer.0.SelfAttention.o",),
|
|
||||||
MODEL_TENSOR.DEC_ATTN_REL_B: (
|
MODEL_TENSOR.DEC_ATTN_REL_B: (
|
||||||
"decoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias",
|
"decoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias", # t5
|
||||||
|
),
|
||||||
|
MODEL_TENSOR.DEC_CROSS_ATTN_NORM: (
|
||||||
|
"decoder.block.{bid}.layer.1.layer_norm", # t5
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.DEC_CROSS_ATTN_NORM: ("decoder.block.{bid}.layer.1.layer_norm",),
|
|
||||||
MODEL_TENSOR.DEC_CROSS_ATTN_Q: (
|
MODEL_TENSOR.DEC_CROSS_ATTN_Q: (
|
||||||
"decoder.block.{bid}.layer.1.EncDecAttention.q",
|
"decoder.block.{bid}.layer.1.EncDecAttention.q", # t5
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.DEC_CROSS_ATTN_K: (
|
MODEL_TENSOR.DEC_CROSS_ATTN_K: (
|
||||||
"decoder.block.{bid}.layer.1.EncDecAttention.k",
|
"decoder.block.{bid}.layer.1.EncDecAttention.k", # t5
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.DEC_CROSS_ATTN_V: (
|
MODEL_TENSOR.DEC_CROSS_ATTN_V: (
|
||||||
"decoder.block.{bid}.layer.1.EncDecAttention.v",
|
"decoder.block.{bid}.layer.1.EncDecAttention.v", # t5
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.DEC_CROSS_ATTN_OUT: (
|
MODEL_TENSOR.DEC_CROSS_ATTN_OUT: (
|
||||||
"decoder.block.{bid}.layer.1.EncDecAttention.o",
|
"decoder.block.{bid}.layer.1.EncDecAttention.o", # t5
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.DEC_CROSS_ATTN_REL_B: (
|
MODEL_TENSOR.DEC_CROSS_ATTN_REL_B: (
|
||||||
"decoder.block.{bid}.layer.1.EncDecAttention.relative_attention_bias",
|
"decoder.block.{bid}.layer.1.EncDecAttention.relative_attention_bias", # t5
|
||||||
|
),
|
||||||
|
MODEL_TENSOR.DEC_FFN_NORM: ("decoder.block.{bid}.layer.2.layer_norm",), # t5
|
||||||
|
MODEL_TENSOR.DEC_FFN_GATE: (
|
||||||
|
"decoder.block.{bid}.layer.2.DenseReluDense.wi_0", # flan-t5
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.DEC_FFN_NORM: ("decoder.block.{bid}.layer.2.layer_norm",),
|
|
||||||
MODEL_TENSOR.DEC_FFN_GATE: ("decoder.block.{bid}.layer.2.DenseReluDense.wi_0",),
|
|
||||||
MODEL_TENSOR.DEC_FFN_UP: (
|
MODEL_TENSOR.DEC_FFN_UP: (
|
||||||
"decoder.block.{bid}.layer.2.DenseReluDense.wi",
|
"decoder.block.{bid}.layer.2.DenseReluDense.wi", # t5
|
||||||
"decoder.block.{bid}.layer.2.DenseReluDense.wi_1",
|
"decoder.block.{bid}.layer.2.DenseReluDense.wi_1", # flan-t5
|
||||||
|
),
|
||||||
|
MODEL_TENSOR.DEC_FFN_DOWN: (
|
||||||
|
"decoder.block.{bid}.layer.2.DenseReluDense.wo", # t5
|
||||||
|
),
|
||||||
|
MODEL_TENSOR.DEC_OUTPUT_NORM: ("decoder.final_layer_norm",), # t5
|
||||||
|
MODEL_TENSOR.ENC_ATTN_NORM: ("encoder.block.{bid}.layer.0.layer_norm",), # t5
|
||||||
|
MODEL_TENSOR.ENC_ATTN_Q: ("encoder.block.{bid}.layer.0.SelfAttention.q",), # t5
|
||||||
|
MODEL_TENSOR.ENC_ATTN_K: ("encoder.block.{bid}.layer.0.SelfAttention.k",), # t5
|
||||||
|
MODEL_TENSOR.ENC_ATTN_V: ("encoder.block.{bid}.layer.0.SelfAttention.v",), # t5
|
||||||
|
MODEL_TENSOR.ENC_ATTN_OUT: (
|
||||||
|
"encoder.block.{bid}.layer.0.SelfAttention.o", # t5
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.DEC_FFN_DOWN: ("decoder.block.{bid}.layer.2.DenseReluDense.wo",),
|
|
||||||
MODEL_TENSOR.DEC_OUTPUT_NORM: ("decoder.final_layer_norm",),
|
|
||||||
MODEL_TENSOR.ENC_ATTN_NORM: ("encoder.block.{bid}.layer.0.layer_norm",),
|
|
||||||
MODEL_TENSOR.ENC_ATTN_Q: ("encoder.block.{bid}.layer.0.SelfAttention.q",),
|
|
||||||
MODEL_TENSOR.ENC_ATTN_K: ("encoder.block.{bid}.layer.0.SelfAttention.k",),
|
|
||||||
MODEL_TENSOR.ENC_ATTN_V: ("encoder.block.{bid}.layer.0.SelfAttention.v",),
|
|
||||||
MODEL_TENSOR.ENC_ATTN_OUT: ("encoder.block.{bid}.layer.0.SelfAttention.o",),
|
|
||||||
MODEL_TENSOR.ENC_ATTN_REL_B: (
|
MODEL_TENSOR.ENC_ATTN_REL_B: (
|
||||||
"encoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias",
|
"encoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias", # t5
|
||||||
|
),
|
||||||
|
MODEL_TENSOR.ENC_FFN_NORM: ("encoder.block.{bid}.layer.1.layer_norm",), # t5
|
||||||
|
MODEL_TENSOR.ENC_FFN_GATE: (
|
||||||
|
"encoder.block.{bid}.layer.1.DenseReluDense.wi_0", # flan-t5
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.ENC_FFN_NORM: ("encoder.block.{bid}.layer.1.layer_norm",),
|
|
||||||
MODEL_TENSOR.ENC_FFN_GATE: ("encoder.block.{bid}.layer.1.DenseReluDense.wi_0",),
|
|
||||||
MODEL_TENSOR.ENC_FFN_UP: (
|
MODEL_TENSOR.ENC_FFN_UP: (
|
||||||
"encoder.block.{bid}.layer.1.DenseReluDense.wi",
|
"encoder.block.{bid}.layer.1.DenseReluDense.wi", # t5
|
||||||
"encoder.block.{bid}.layer.1.DenseReluDense.wi_1",
|
"encoder.block.{bid}.layer.1.DenseReluDense.wi_1", # flan-t5
|
||||||
|
),
|
||||||
|
MODEL_TENSOR.ENC_FFN_DOWN: (
|
||||||
|
"encoder.block.{bid}.layer.1.DenseReluDense.wo", # t5
|
||||||
|
),
|
||||||
|
############################################################################
|
||||||
|
# TODO: these do not belong to block_mappings_cfg - move them to mappings_cfg
|
||||||
|
MODEL_TENSOR.ENC_OUTPUT_NORM: ("encoder.final_layer_norm",), # t5
|
||||||
|
MODEL_TENSOR.CLS: (
|
||||||
|
"classifier", # jina
|
||||||
|
"classifier.dense", # roberta
|
||||||
|
),
|
||||||
|
MODEL_TENSOR.CLS_OUT: ("classifier.out_proj",), # roberta
|
||||||
|
#############################################################################
|
||||||
|
MODEL_TENSOR.CONVNEXT_DW: ("backbone.convnext.{bid}.dwconv",), # wavtokenizer
|
||||||
|
MODEL_TENSOR.CONVNEXT_NORM: ("backbone.convnext.{bid}.norm",), # wavtokenizer
|
||||||
|
MODEL_TENSOR.CONVNEXT_PW1: ("backbone.convnext.{bid}.pwconv1",), # wavtokenizer
|
||||||
|
MODEL_TENSOR.CONVNEXT_PW2: ("backbone.convnext.{bid}.pwconv2",), # wavtokenizer
|
||||||
|
MODEL_TENSOR.CONVNEXT_GAMMA: ("backbone.convnext.{bid}.gamma",), # wavtokenizer
|
||||||
|
MODEL_TENSOR.POSNET_CONV1: ("backbone.posnet.{bid}.conv1",), # wavtokenizer
|
||||||
|
MODEL_TENSOR.POSNET_CONV2: ("backbone.posnet.{bid}.conv2",), # wavtokenizer
|
||||||
|
MODEL_TENSOR.POSNET_NORM: ("backbone.posnet.{bid}.norm",), # wavtokenizer
|
||||||
|
MODEL_TENSOR.POSNET_NORM1: ("backbone.posnet.{bid}.norm1",), # wavtokenizer
|
||||||
|
MODEL_TENSOR.POSNET_NORM2: ("backbone.posnet.{bid}.norm2",), # wavtokenizer
|
||||||
|
MODEL_TENSOR.POSNET_ATTN_NORM: ("backbone.posnet.{bid}.norm",), # wavtokenizer
|
||||||
|
MODEL_TENSOR.POSNET_ATTN_Q: ("backbone.posnet.{bid}.q",), # wavtokenizer
|
||||||
|
MODEL_TENSOR.POSNET_ATTN_K: ("backbone.posnet.{bid}.k",), # wavtokenizer
|
||||||
|
MODEL_TENSOR.POSNET_ATTN_V: ("backbone.posnet.{bid}.v",), # wavtokenizer
|
||||||
|
MODEL_TENSOR.POSNET_ATTN_OUT: (
|
||||||
|
"backbone.posnet.{bid}.proj_out", # wavtokenizer
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.ENC_FFN_DOWN: ("encoder.block.{bid}.layer.1.DenseReluDense.wo",),
|
|
||||||
MODEL_TENSOR.ENC_OUTPUT_NORM: ("encoder.final_layer_norm",),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# architecture-specific block mappings
|
||||||
arch_block_mappings_cfg: dict[MODEL_ARCH, dict[MODEL_TENSOR, tuple[str, ...]]] = {
|
arch_block_mappings_cfg: dict[MODEL_ARCH, dict[MODEL_TENSOR, tuple[str, ...]]] = {
|
||||||
MODEL_ARCH.ARCTIC: {
|
MODEL_ARCH.ARCTIC: {
|
||||||
MODEL_TENSOR.FFN_NORM: ("model.layers.{bid}.residual_layernorm",),
|
MODEL_TENSOR.FFN_NORM: ("model.layers.{bid}.residual_layernorm",),
|
||||||
|
|
|
@ -157,8 +157,36 @@ def _try_load_from_tokenizer_json(self, path: Path) -> bool:
|
||||||
tokenizer = json.load(f)
|
tokenizer = json.load(f)
|
||||||
if self.load_merges:
|
if self.load_merges:
|
||||||
merges = tokenizer.get("model", {}).get("merges")
|
merges = tokenizer.get("model", {}).get("merges")
|
||||||
if isinstance(merges, list) and merges and isinstance(merges[0], str):
|
if isinstance(merges, list) and merges:
|
||||||
self.merges = merges
|
if isinstance(merges[0], str):
|
||||||
|
self.merges = merges
|
||||||
|
elif (
|
||||||
|
isinstance(merges[0], list)
|
||||||
|
and len(merges[0]) == 2
|
||||||
|
and isinstance(merges[0][0], str)
|
||||||
|
):
|
||||||
|
# New format since transformers 4.45 to support spaces in merges
|
||||||
|
# ref: https://github.com/ggerganov/llama.cpp/issues/9692
|
||||||
|
# TODO: internally store as the new format instead of converting to old
|
||||||
|
if any(" " in s for pair in merges for s in pair):
|
||||||
|
logger.warning(
|
||||||
|
f'Spaces in merges detected, encoding as {chr(ord(" ") + 256)!r}'
|
||||||
|
)
|
||||||
|
self.merges = [
|
||||||
|
" ".join(
|
||||||
|
[
|
||||||
|
# ensure the spaces are properly encoded
|
||||||
|
"".join(
|
||||||
|
chr(ord(c) + 256) if c == " " else c
|
||||||
|
for c in part
|
||||||
|
)
|
||||||
|
for part in pair
|
||||||
|
]
|
||||||
|
)
|
||||||
|
for pair in merges
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
raise ValueError("Unknown tokenizer merges format")
|
||||||
added_tokens = tokenizer.get("added_tokens", {})
|
added_tokens = tokenizer.get("added_tokens", {})
|
||||||
else:
|
else:
|
||||||
added_tokens = {}
|
added_tokens = {}
|
||||||
|
@ -225,7 +253,6 @@ class Vocab(BaseVocab, Protocol):
|
||||||
fname_tokenizer: Path
|
fname_tokenizer: Path
|
||||||
|
|
||||||
def __init__(self, base_path: Path): ...
|
def __init__(self, base_path: Path): ...
|
||||||
|
|
||||||
def all_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]: ...
|
def all_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]: ...
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -80,11 +80,15 @@ def load_dotenv(self=Any) -> None:
|
||||||
|
|
||||||
|
|
||||||
def show_about(self) -> None:
|
def show_about(self) -> None:
|
||||||
about_text = (
|
about_text = f"""AutoGGUF
|
||||||
"AutoGGUF\n\n"
|
|
||||||
f"Version: {AUTOGGUF_VERSION}\n\n"
|
Version: {AUTOGGUF_VERSION}
|
||||||
"A tool for managing and converting GGUF models."
|
|
||||||
)
|
A tool for managing and converting GGUF models.
|
||||||
|
This application is licensed under the Apache License 2.0.
|
||||||
|
Copyright (c) 2025 leafspark.
|
||||||
|
It also utilizes llama.cpp, licensed under the MIT License.
|
||||||
|
Copyright (c) 2023-2024 The ggml authors."""
|
||||||
QMessageBox.about(self, "About AutoGGUF", about_text)
|
QMessageBox.about(self, "About AutoGGUF", about_text)
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue