style: format code with Black

2024-08-04 19:50:34 -07:00 · 2024-08-04 19:50:34 -07:00 · fa51f7cdb8
parent 2dc5bd9e8a
commit fa51f7cdb8
21 changed files with 8215 additions and 6922 deletions
--- a/src/AutoGGUF.py
+++ b/src/AutoGGUF.py
@ -452,8 +452,13 @@ def __init__(self):
        # Output Type Dropdown
        self.lora_output_type_combo = QComboBox()
        self.lora_output_type_combo.addItems(["GGML", "GGUF"])
-        self.lora_output_type_combo.currentIndexChanged.connect(self.update_base_model_visibility)
+        self.lora_output_type_combo.currentIndexChanged.connect(
-        lora_layout.addRow(self.create_label(OUTPUT_TYPE, SELECT_OUTPUT_TYPE), self.lora_output_type_combo)
+            self.update_base_model_visibility
        )
        lora_layout.addRow(
            self.create_label(OUTPUT_TYPE, SELECT_OUTPUT_TYPE),
            self.lora_output_type_combo,
        )
        # Base Model Path (initially hidden)
        self.base_model_label = self.create_label(BASE_MODEL, SELECT_BASE_MODEL_FILE)
@ -471,7 +476,9 @@ def __init__(self):
        wrapper_layout = QHBoxLayout(self.base_model_wrapper)
        wrapper_layout.addWidget(self.base_model_label)
        wrapper_layout.addWidget(self.base_model_widget, 1)  # Give it a stretch factor
-        wrapper_layout.setContentsMargins(0, 0, 0, 0)  # Remove margins for better alignment
+        wrapper_layout.setContentsMargins(
            0, 0, 0, 0
        )  # Remove margins for better alignment
        # Add the wrapper to the layout
        lora_layout.addRow(self.base_model_wrapper)
@ -1395,7 +1402,7 @@ def quantize_model(self):
                    override_string = entry.get_override_string(
                        model_name=model_name,
                        quant_type=quant_type,
-                        output_path=output_path
+                        output_path=output_path,
                    )
                    if override_string:
                        command.extend(["--override-kv", override_string])
@ -1430,7 +1437,9 @@ def quantize_model(self):
            self.task_list.setItemWidget(list_item, task_item)
            # Connect the output signal to the new progress parsing function
-            thread.output_signal.connect(lambda line: self.parse_progress(line, task_item))
+            thread.output_signal.connect(
                lambda line: self.parse_progress(line, task_item)
            )
            thread.status_signal.connect(task_item.update_status)
            thread.finished_signal.connect(lambda: self.task_finished(thread))
            thread.error_signal.connect(lambda err: self.handle_error(err, task_item))
--- a/src/DownloadThread.py
+++ b/src/DownloadThread.py
@ -13,6 +13,7 @@
 import zipfile
 from datetime import datetime
 class DownloadThread(QThread):
    progress_signal = pyqtSignal(int)
    finished_signal = pyqtSignal(str)
@ -27,11 +28,11 @@ def run(self):
        try:
            response = requests.get(self.url, stream=True)
            response.raise_for_status()
-            total_size = int(response.headers.get('content-length', 0))
+            total_size = int(response.headers.get("content-length", 0))
            block_size = 8192
            downloaded = 0
-            with open(self.save_path, 'wb') as file:
+            with open(self.save_path, "wb") as file:
                for data in response.iter_content(block_size):
                    size = file.write(data)
                    downloaded += size
@ -41,7 +42,7 @@ def run(self):
            # Extract the downloaded zip file
            extract_dir = os.path.splitext(self.save_path)[0]
-            with zipfile.ZipFile(self.save_path, 'r') as zip_ref:
+            with zipfile.ZipFile(self.save_path, "r") as zip_ref:
                zip_ref.extractall(extract_dir)
            # Remove the zip file after extraction
--- a/src/KVOverrideEntry.py
+++ b/src/KVOverrideEntry.py
@ -7,6 +7,7 @@
 import socket
 import platform
 class KVOverrideEntry(QWidget):
    deleted = pyqtSignal(QWidget)
@ -44,7 +45,9 @@ def __init__(self, parent=None):
    def delete_clicked(self):
        self.deleted.emit(self)
-    def get_override_string(self, model_name=None, quant_type=None, output_path=None):  # Add arguments
+    def get_override_string(
        self, model_name=None, quant_type=None, output_path=None
    ):  # Add arguments
        key = self.key_input.text()
        type_ = self.type_combo.currentText()
        value = self.value_input.text()
@ -60,9 +63,15 @@ def get_override_string(self, model_name=None, quant_type=None, output_path=None
            "{system.python.version}": lambda: platform.python_version(),
            "{system.time.milliseconds}": lambda: str(int(time.time() * 1000)),
            "{system.date}": lambda: datetime.now().strftime("%Y-%m-%d"),
-            "{model.name}": lambda: model_name if model_name is not None else "Unknown Model",
+            "{model.name}": lambda: (
-            "{quant.type}": lambda: quant_type if quant_type is not None else "Unknown Quant",
+                model_name if model_name is not None else "Unknown Model"
-            "{output.path}": lambda: output_path if output_path is not None else "Unknown Output Path",
+            ),
            "{quant.type}": lambda: (
                quant_type if quant_type is not None else "Unknown Quant"
            ),
            "{output.path}": lambda: (
                output_path if output_path is not None else "Unknown Output Path"
            ),
        }
        for param, func in dynamic_params.items():
--- a/src/Logger.py
+++ b/src/Logger.py
@ -4,6 +4,7 @@
 import sys
 from datetime import datetime
 class Logger:
    def __init__(self, name, log_dir):
        self.logger = logging.getLogger(name)
@ -15,15 +16,19 @@ def __init__(self, name, log_dir):
        # Console handler
        console_handler = logging.StreamHandler()
        console_handler.setLevel(logging.INFO)
-        console_format = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
+        console_format = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
        console_handler.setFormatter(console_format)
        # File handler
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        log_file = os.path.join(log_dir, f"latest_{timestamp}.log")
-        file_handler = RotatingFileHandler(log_file, maxBytes=10*1024*1024, backupCount=5, encoding='utf-8')
+        file_handler = RotatingFileHandler(
            log_file, maxBytes=10 * 1024 * 1024, backupCount=5, encoding="utf-8"
        )
        file_handler.setLevel(logging.DEBUG)
-        file_format = logging.Formatter('%(asctime)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s')
+        file_format = logging.Formatter(
            "%(asctime)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s"
        )
        file_handler.setFormatter(file_format)
        # Add handlers to logger
--- a/src/ModelInfoDialog.py
+++ b/src/ModelInfoDialog.py
@ -13,6 +13,7 @@
 import zipfile
 from datetime import datetime
 class ModelInfoDialog(QDialog):
    def __init__(self, model_info, parent=None):
        super().__init__(parent)
@ -41,8 +42,7 @@ def format_model_info(self, model_info):
        html += f"<p><b>Tensors:</b> {model_info.get('tensors', 'N/A')}</p>"
        html += "<h3>Key-Value Pairs:</h3>"
-        for key, value in model_info.get('kv_data', {}).items():
+        for key, value in model_info.get("kv_data", {}).items():
            html += f"<p><b>{key}:</b> {value}</p>"
        return html
--- a/src/QuantizationThread.py
+++ b/src/QuantizationThread.py
@ -15,6 +15,7 @@
 from datetime import datetime
 from imports_and_globals import open_file_safe
 class QuantizationThread(QThread):
    # Define custom signals for communication with the main thread
    output_signal = pyqtSignal(str)
--- a/src/TaskListItem.py
+++ b/src/TaskListItem.py
@ -13,6 +13,7 @@
 import zipfile
 from datetime import datetime
 class TaskListItem(QWidget):
    def __init__(self, task_name, log_file, show_progress_bar=True, parent=None):
        super().__init__(parent)
--- a/src/convert_lora_to_ggml.py
+++ b/src/convert_lora_to_ggml.py
@ -12,8 +12,8 @@
 import numpy as np
 import torch
-if 'NO_LOCAL_GGUF' not in os.environ:
+if "NO_LOCAL_GGUF" not in os.environ:
-    sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
+    sys.path.insert(1, str(Path(__file__).parent / "gguf-py" / "gguf"))
 import gguf
 logging.basicConfig(level=logging.DEBUG)
@ -35,7 +35,9 @@ def write_file_header(fout: BinaryIO, params: dict[str, Any]) -> None:
    fout.write(struct.pack("i", int(params["lora_alpha"])))
-def write_tensor_header(fout: BinaryIO, name: str, shape: Sequence[int], data_type: np.dtype[Any]) -> None:
+def write_tensor_header(
    fout: BinaryIO, name: str, shape: Sequence[int], data_type: np.dtype[Any]
 ) -> None:
    sname = name.encode("utf-8")
    fout.write(
        struct.pack(
@ -49,15 +51,21 @@ def write_tensor_header(fout: BinaryIO, name: str, shape: Sequence[int], data_ty
    fout.write(sname)
    fout.seek((fout.tell() + 31) & -32)
 def pyinstaller_include():
    # PyInstaller import
    pass
-if __name__ == '__main__':
+
 if __name__ == "__main__":
    if len(sys.argv) < 2:
        logger.info(f"Usage: python {sys.argv[0]} <path> [arch]")
-        logger.info("Path must contain HuggingFace PEFT LoRA files 'adapter_config.json' and 'adapter_model.bin'")
+        logger.info(
-        logger.info(f"Arch must be one of {list(gguf.MODEL_ARCH_NAMES.values())} (default: llama)")
+            "Path must contain HuggingFace PEFT LoRA files 'adapter_config.json' and 'adapter_model.bin'"
        )
        logger.info(
            f"Arch must be one of {list(gguf.MODEL_ARCH_NAMES.values())} (default: llama)"
        )
        sys.exit(1)
    input_json = os.path.join(sys.argv[1], "adapter_config.json")
@ -70,6 +78,7 @@ def pyinstaller_include():
        input_model = os.path.join(sys.argv[1], "adapter_model.safetensors")
        # lazy import load_file only if lora is in safetensors format.
        from safetensors.torch import load_file
        model = load_file(input_model, device="cpu")
    arch_name = sys.argv[2] if len(sys.argv) == 3 else "llama"
@ -78,14 +87,18 @@ def pyinstaller_include():
        logger.error(f"Error: unsupported architecture {arch_name}")
        sys.exit(1)
-    arch = list(gguf.MODEL_ARCH_NAMES.keys())[list(gguf.MODEL_ARCH_NAMES.values()).index(arch_name)]
+    arch = list(gguf.MODEL_ARCH_NAMES.keys())[
        list(gguf.MODEL_ARCH_NAMES.values()).index(arch_name)
    ]
    name_map = gguf.TensorNameMap(arch, 200)  # 200 layers ought to be enough for anyone
    with open(input_json, "r") as f:
        params = json.load(f)
    if params["peft_type"] != "LORA":
-        logger.error(f"Error: unsupported adapter type {params['peft_type']}, expected LORA")
+        logger.error(
            f"Error: unsupported adapter type {params['peft_type']}, expected LORA"
        )
        sys.exit(1)
    if params["fan_in_fan_out"] is True:
@ -136,7 +149,9 @@ def pyinstaller_include():
            tname = name_map.get_name(k)
            if tname is None:
                logger.error(f"Error: could not map tensor name {orig_k}")
-                logger.error(" Note: the arch parameter must be specified if the model is not llama")
+                logger.error(
                    " Note: the arch parameter must be specified if the model is not llama"
                )
                sys.exit(1)
            if suffix == ".lora_A.weight":
@ -146,7 +161,9 @@ def pyinstaller_include():
            else:
                assert False
-            logger.info(f"{k} => {tname} {t.shape} {t.dtype} {t.nbytes/1024/1024:.2f}MB")
+            logger.info(
                f"{k} => {tname} {t.shape} {t.dtype} {t.nbytes/1024/1024:.2f}MB"
            )
            write_tensor_header(fout, tname, t.shape, t.dtype)
            t.tofile(fout)
--- a/src/gguf-py/gguf/constants.py
+++ b/src/gguf-py/gguf/constants.py
@ -54,7 +54,9 @@ class General:
        SOURCE_URL = "general.source.url"  # Model Website/Paper
        SOURCE_DOI = "general.source.doi"
        SOURCE_UUID = "general.source.uuid"
-        SOURCE_REPO_URL            = "general.source.repo_url" # Model Source Repository (git/svn/etc...)
+        SOURCE_REPO_URL = (
            "general.source.repo_url"  # Model Source Repository (git/svn/etc...)
        )
        # Base Model Source. There can be more than one source if it's a merged
        # model like with 'Mistral-7B-Merge-14-v0.1'. This will assist in
@ -136,7 +138,9 @@ class Tokenizer:
        PRE = "tokenizer.ggml.pre"
        LIST = "tokenizer.ggml.tokens"
        TOKEN_TYPE = "tokenizer.ggml.token_type"
-        TOKEN_TYPE_COUNT     = "tokenizer.ggml.token_type_count"  # for BERT-style token types
+        TOKEN_TYPE_COUNT = (
            "tokenizer.ggml.token_type_count"  # for BERT-style token types
        )
        SCORES = "tokenizer.ggml.scores"
        MERGES = "tokenizer.ggml.merges"
        BOS_ID = "tokenizer.ggml.bos_token_id"
@ -166,6 +170,7 @@ class Adapter:
        TYPE = "adapter.type"
        LORA_ALPHA = "adapter.lora.alpha"
 #
 # recommended mapping of model tensor names for storage in gguf
 #
@ -1104,9 +1109,9 @@ class TokenType(IntEnum):
 class RopeScalingType(Enum):
-    NONE   = 'none'
+    NONE = "none"
-    LINEAR = 'linear'
+    LINEAR = "linear"
-    YARN   = 'yarn'
+    YARN = "yarn"
 class PoolingType(IntEnum):
--- a/src/gguf-py/gguf/gguf_reader.py
+++ b/src/gguf-py/gguf/gguf_reader.py
@ -67,7 +67,7 @@ class ReaderTensor(NamedTuple):
 class GGUFReader:
    # I - same as host, S - swapped
-    byte_order: Literal['I', 'S'] = 'I'
+    byte_order: Literal["I", "S"] = "I"
    alignment: int = GGUF_DEFAULT_ALIGNMENT
    data_offset: int
@ -86,13 +86,15 @@ class GGUFReader:
        GGUFValueType.BOOL: np.bool_,
    }
-    def __init__(self, path: os.PathLike[str] | str, mode: Literal['r', 'r+', 'c'] = 'r'):
+    def __init__(
        self, path: os.PathLike[str] | str, mode: Literal["r", "r+", "c"] = "r"
    ):
        self.data = np.memmap(path, mode=mode)
        offs = 0
        # Check for GGUF magic
-        if self._get(offs, np.uint32, override_order = '<')[0] != GGUF_MAGIC:
+        if self._get(offs, np.uint32, override_order="<")[0] != GGUF_MAGIC:
-            raise ValueError('GGUF magic invalid')
+            raise ValueError("GGUF magic invalid")
        offs += 4
        # Check GGUF version
@ -100,28 +102,46 @@ def __init__(self, path: os.PathLike[str] | str, mode: Literal['r', 'r+', 'c'] =
        if temp_version[0] & 65535 == 0:
            # If we get 0 here that means it's (probably) a GGUF file created for
            # the opposite byte order of the machine this script is running on.
-            self.byte_order = 'S'
+            self.byte_order = "S"
            temp_version = temp_version.newbyteorder(self.byte_order)
        version = temp_version[0]
        if version not in READER_SUPPORTED_VERSIONS:
-            raise ValueError(f'Sorry, file appears to be version {version} which we cannot handle')
+            raise ValueError(
                f"Sorry, file appears to be version {version} which we cannot handle"
            )
        self.fields: OrderedDict[str, ReaderField] = OrderedDict()
        self.tensors: list[ReaderTensor] = []
-        offs += self._push_field(ReaderField(offs, 'GGUF.version', [temp_version], [0], [GGUFValueType.UINT32]))
+        offs += self._push_field(
            ReaderField(
                offs, "GGUF.version", [temp_version], [0], [GGUFValueType.UINT32]
            )
        )
        # Check tensor count and kv count
        temp_counts = self._get(offs, np.uint64, 2)
-        offs += self._push_field(ReaderField(offs, 'GGUF.tensor_count', [temp_counts[:1]], [0], [GGUFValueType.UINT64]))
+        offs += self._push_field(
-        offs += self._push_field(ReaderField(offs, 'GGUF.kv_count', [temp_counts[1:]], [0], [GGUFValueType.UINT64]))
+            ReaderField(
                offs,
                "GGUF.tensor_count",
                [temp_counts[:1]],
                [0],
                [GGUFValueType.UINT64],
            )
        )
        offs += self._push_field(
            ReaderField(
                offs, "GGUF.kv_count", [temp_counts[1:]], [0], [GGUFValueType.UINT64]
            )
        )
        tensor_count, kv_count = temp_counts
        offs = self._build_fields(offs, kv_count)
        # Build Tensor Info Fields
        offs, tensors_fields = self._build_tensor_info(offs, tensor_count)
-        new_align = self.fields.get('general.alignment')
+        new_align = self.fields.get("general.alignment")
        if new_align is not None:
            if new_align.types != [GGUFValueType.UINT32]:
-                raise ValueError('Bad type for general.alignment field')
+                raise ValueError("Bad type for general.alignment field")
            self.alignment = new_align.parts[-1][0]
        padding = offs % self.alignment
        if padding != 0:
@ -129,7 +149,7 @@ def __init__(self, path: os.PathLike[str] | str, mode: Literal['r', 'r+', 'c'] =
        self.data_offset = offs
        self._build_tensors(offs, tensors_fields)
-    _DT = TypeVar('_DT', bound = npt.DTypeLike)
+    _DT = TypeVar("_DT", bound=npt.DTypeLike)
    # Fetch a key/value metadata field by key.
    def get_field(self, key: str) -> Union[ReaderField, None]:
@ -140,7 +160,11 @@ def get_tensor(self, idx: int) -> ReaderTensor:
        return self.tensors[idx]
    def _get(
-        self, offset: int, dtype: npt.DTypeLike, count: int = 1, override_order: None | Literal['I', 'S', '<'] = None,
+        self,
        offset: int,
        dtype: npt.DTypeLike,
        count: int = 1,
        override_order: None | Literal["I", "S", "<"] = None,
    ) -> npt.NDArray[Any]:
        count = int(count)
        itemsize = int(np.empty([], dtype=dtype).itemsize)
@ -156,18 +180,22 @@ def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int:
            # TODO: add option to generate error on duplicate keys
            # raise KeyError(f'Duplicate {field.name} already in list at offset {field.offset}')
-            logger.warning(f'Duplicate key {field.name} at offset {field.offset}')
+            logger.warning(f"Duplicate key {field.name} at offset {field.offset}")
-            self.fields[field.name + '_{}'.format(field.offset)] = field
+            self.fields[field.name + "_{}".format(field.offset)] = field
        else:
            self.fields[field.name] = field
        return 0 if skip_sum else sum(int(part.nbytes) for part in field.parts)
-    def _get_str(self, offset: int) -> tuple[npt.NDArray[np.uint64], npt.NDArray[np.uint8]]:
+    def _get_str(
        self, offset: int
    ) -> tuple[npt.NDArray[np.uint64], npt.NDArray[np.uint8]]:
        slen = self._get(offset, np.uint64)
        return slen, self._get(offset + 8, np.uint8, slen[0])
    def _get_field_parts(
-        self, orig_offs: int, raw_type: int,
+        self,
        orig_offs: int,
        raw_type: int,
    ) -> tuple[int, list[npt.NDArray[Any]], list[int], list[GGUFValueType]]:
        offs = orig_offs
        types: list[GGUFValueType] = []
@ -192,7 +220,9 @@ def _get_field_parts(
            aparts: list[npt.NDArray[Any]] = [raw_itype, alen]
            data_idxs: list[int] = []
            for idx in range(alen[0]):
-                curr_size, curr_parts, curr_idxs, curr_types = self._get_field_parts(offs, raw_itype[0])
+                curr_size, curr_parts, curr_idxs, curr_types = self._get_field_parts(
                    offs, raw_itype[0]
                )
                if idx == 0:
                    types += curr_types
                idxs_offs = len(aparts)
@ -201,7 +231,7 @@ def _get_field_parts(
                offs += curr_size
            return offs - orig_offs, aparts, data_idxs, types
        # We can't deal with this one.
-        raise ValueError('Unknown/unhandled field type {gtype}')
+        raise ValueError("Unknown/unhandled field type {gtype}")
    def _get_tensor_info_field(self, orig_offs: int) -> ReaderField:
        offs = orig_offs
@ -228,7 +258,7 @@ def _get_tensor_info_field(self, orig_offs: int) -> ReaderField:
        return ReaderField(
            orig_offs,
-            str(bytes(name_data), encoding = 'utf-8'),
+            str(bytes(name_data), encoding="utf-8"),
            [name_len, name_data, n_dims, dims, raw_dtype, offset_tensor],
            [1, 3, 4, 5],
        )
@ -242,19 +272,26 @@ def _build_fields(self, offs: int, count: int) -> int:
            offs += int(raw_kv_type.nbytes)
            parts: list[npt.NDArray[Any]] = [kv_klen, kv_kdata, raw_kv_type]
            idxs_offs = len(parts)
-            field_size, field_parts, field_idxs, field_types = self._get_field_parts(offs, raw_kv_type[0])
+            field_size, field_parts, field_idxs, field_types = self._get_field_parts(
                offs, raw_kv_type[0]
            )
            parts += field_parts
-            self._push_field(ReaderField(
+            self._push_field(
                ReaderField(
                    orig_offs,
-                str(bytes(kv_kdata), encoding = 'utf-8'),
+                    str(bytes(kv_kdata), encoding="utf-8"),
                    parts,
                    [idx + idxs_offs for idx in field_idxs],
                    field_types,
-            ), skip_sum = True)
+                ),
                skip_sum=True,
            )
            offs += field_size
        return offs
-    def _build_tensor_info(self, offs: int, count: int) -> tuple[int, list[ReaderField]]:
+    def _build_tensor_info(
        self, offs: int, count: int
    ) -> tuple[int, list[ReaderField]]:
        tensor_fields = []
        for _ in range(count):
            field = self._get_tensor_info_field(offs)
@ -268,9 +305,9 @@ def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None:
        for field in fields:
            _name_len, name_data, _n_dims, dims, raw_dtype, offset_tensor = field.parts
            # check if there's any tensor having same name already in the list
-            tensor_name = str(bytes(name_data), encoding = 'utf-8')
+            tensor_name = str(bytes(name_data), encoding="utf-8")
            if tensor_name in tensor_names:
-                raise ValueError(f'Found duplicated tensor with name {tensor_name}')
+                raise ValueError(f"Found duplicated tensor with name {tensor_name}")
            tensor_names.add(tensor_name)
            ggml_type = GGMLQuantizationType(raw_dtype[0])
            n_elems = int(np.prod(dims))
@ -304,7 +341,8 @@ def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None:
                item_count = n_bytes
                item_type = np.uint8
                np_dims = quant_shape_to_byte_shape(np_dims, ggml_type)
-            tensors.append(ReaderTensor(
+            tensors.append(
                ReaderTensor(
                    name=tensor_name,
                    tensor_type=ggml_type,
                    shape=dims,
@ -313,5 +351,6 @@ def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None:
                    data_offset=data_offs,
                    data=self._get(data_offs, item_type, item_count).reshape(np_dims),
                    field=field,
-            ))
+                )
            )
        self.tensors = tensors
--- a/src/gguf-py/gguf/gguf_writer.py
+++ b/src/gguf-py/gguf/gguf_writer.py
@ -81,8 +81,15 @@ class GGUFWriter:
    }
    def __init__(
-        self, path: os.PathLike[str] | str | None, arch: str, use_temp_file: bool = False, endianess: GGUFEndian = GGUFEndian.LITTLE,
+        self,
-        split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False, small_first_shard: bool = False
+        path: os.PathLike[str] | str | None,
        arch: str,
        use_temp_file: bool = False,
        endianess: GGUFEndian = GGUFEndian.LITTLE,
        split_max_tensors: int = 0,
        split_max_size: int = 0,
        dry_run: bool = False,
        small_first_shard: bool = False,
    ):
        self.fout = None
        self.path = Path(path) if path else None
@ -97,9 +104,11 @@ def __init__(
        self.split_max_size = split_max_size
        self.dry_run = dry_run
        self.small_first_shard = small_first_shard
-        logger.info("gguf: This GGUF file is for {0} Endian only".format(
+        logger.info(
            "gguf: This GGUF file is for {0} Endian only".format(
                "Big" if self.endianess == GGUFEndian.BIG else "Little",
-        ))
+            )
        )
        self.state = WriterState.NO_FILE
        if self.small_first_shard:
@ -128,7 +137,9 @@ def get_total_parameter_count(self) -> tuple[int, int, int, int]:
                elif name.endswith(".lora_b"):
                    if last_lora_a is None or last_lora_a[0] != name[:-1] + "a":
                        # Bail when the LoRA pair can't be found trivially
-                        logger.warning("can't measure LoRA size correctly, tensor order is unusual")
+                        logger.warning(
                            "can't measure LoRA size correctly, tensor order is unusual"
                        )
                        return 0, 0, 0, 0
                    else:
                        shape = (*shape[:-1], last_lora_a[1].shape[-1])
@ -136,7 +147,7 @@ def get_total_parameter_count(self) -> tuple[int, int, int, int]:
                size = prod(shape)
                if "_exps." in name:
-                    expert_params += (size // shape[-3])
+                    expert_params += size // shape[-3]
                    expert_sum += shape[-3]
                    n_expert_tensors += 1
                else:
@ -157,15 +168,26 @@ def get_total_parameter_count(self) -> tuple[int, int, int, int]:
    def format_shard_names(self, path: Path) -> list[Path]:
        if len(self.tensors) == 1:
            return [path]
-        return [path.with_name(SHARD_NAME_FORMAT.format(path.stem, i + 1, len(self.tensors))) for i in range(len(self.tensors))]
+        return [
            path.with_name(
                SHARD_NAME_FORMAT.format(path.stem, i + 1, len(self.tensors))
            )
            for i in range(len(self.tensors))
        ]
    def open_output_file(self, path: Path | None = None) -> None:
-        if self.state is WriterState.EMPTY and self.fout is not None and (path is None or path == self.path):
+        if (
            self.state is WriterState.EMPTY
            and self.fout is not None
            and (path is None or path == self.path)
        ):
            # allow calling this multiple times as long as the path is the same
            return
        if self.state is not WriterState.NO_FILE:
-            raise ValueError(f'Expected output file to be not yet opened, got {self.state}')
+            raise ValueError(
                f"Expected output file to be not yet opened, got {self.state}"
            )
        if path is not None:
            self.path = path
@ -181,7 +203,9 @@ def print_plan(self) -> list[Path]:
        filenames = self.format_shard_names(self.path)
        assert len(filenames) == len(self.tensors)
        for name, tensors in zip(filenames, self.tensors):
-            logger.info(f"{name}: n_tensors = {len(tensors)}, total_size = {GGUFWriter.format_n_bytes_to_str(sum(ti.nbytes for ti in tensors.values()))}")
+            logger.info(
                f"{name}: n_tensors = {len(tensors)}, total_size = {GGUFWriter.format_n_bytes_to_str(sum(ti.nbytes for ti in tensors.values()))}"
            )
        if self.dry_run:
            logger.info("Dry run, not writing files")
@ -201,17 +225,23 @@ def add_shard_kv_data(self) -> None:
        self.kv_data.extend({} for _ in range(len(self.kv_data), total_splits))
        for i, kv_data in enumerate(self.kv_data):
            kv_data[Keys.Split.LLM_KV_SPLIT_NO] = GGUFValue(i, GGUFValueType.UINT16)
-            kv_data[Keys.Split.LLM_KV_SPLIT_COUNT] = GGUFValue(total_splits, GGUFValueType.UINT16)
+            kv_data[Keys.Split.LLM_KV_SPLIT_COUNT] = GGUFValue(
-            kv_data[Keys.Split.LLM_KV_SPLIT_TENSORS_COUNT] = GGUFValue(total_tensors, GGUFValueType.INT32)
+                total_splits, GGUFValueType.UINT16
            )
            kv_data[Keys.Split.LLM_KV_SPLIT_TENSORS_COUNT] = GGUFValue(
                total_tensors, GGUFValueType.INT32
            )
    def write_header_to_file(self, path: Path | None = None) -> None:
-        if len(self.tensors) == 1 and (self.split_max_tensors != 0 or self.split_max_size != 0):
+        if len(self.tensors) == 1 and (
            self.split_max_tensors != 0 or self.split_max_size != 0
        ):
            logger.warning("Model fails split requirements, not splitting")
        self.open_output_file(path)
        if self.state is not WriterState.EMPTY:
-            raise ValueError(f'Expected output file to be empty, got {self.state}')
+            raise ValueError(f"Expected output file to be empty, got {self.state}")
        assert self.fout is not None
        assert len(self.fout) == len(self.tensors)
@ -229,7 +259,9 @@ def write_header_to_file(self, path: Path | None = None) -> None:
    def write_kv_data_to_file(self) -> None:
        if self.state is not WriterState.HEADER:
-            raise ValueError(f'Expected output file to contain the header, got {self.state}')
+            raise ValueError(
                f"Expected output file to contain the header, got {self.state}"
            )
        assert self.fout is not None
        for fout, kv_data in zip(self.fout, self.kv_data):
@ -246,7 +278,9 @@ def write_kv_data_to_file(self) -> None:
    def write_ti_data_to_file(self) -> None:
        if self.state is not WriterState.KV_DATA:
-            raise ValueError(f'Expected output file to contain KV data, got {self.state}')
+            raise ValueError(
                f"Expected output file to contain KV data, got {self.state}"
            )
        assert self.fout is not None
        for fout, tensors in zip(self.fout, self.tensors):
@ -269,7 +303,7 @@ def write_ti_data_to_file(self) -> None:
    def add_key_value(self, key: str, val: Any, vtype: GGUFValueType) -> None:
        if any(key in kv_data for kv_data in self.kv_data):
-            raise ValueError(f'Duplicated key name {key!r}')
+            raise ValueError(f"Duplicated key name {key!r}")
        self.kv_data[0][key] = GGUFValue(value=val, type=vtype)
@ -321,14 +355,20 @@ def ggml_pad(x: int, n: int) -> int:
        return ((x + n - 1) // n) * n
    def add_tensor_info(
-        self, name: str, tensor_shape: Sequence[int], tensor_dtype: np.dtype,
+        self,
-        tensor_nbytes: int, raw_dtype: GGMLQuantizationType | None = None,
+        name: str,
        tensor_shape: Sequence[int],
        tensor_dtype: np.dtype,
        tensor_nbytes: int,
        raw_dtype: GGMLQuantizationType | None = None,
    ) -> None:
        if self.state is not WriterState.NO_FILE:
-            raise ValueError(f'Expected output file to be not yet opened, got {self.state}')
+            raise ValueError(
                f"Expected output file to be not yet opened, got {self.state}"
            )
        if any(name in tensors for tensors in self.tensors):
-            raise ValueError(f'Duplicated tensor name {name!r}')
+            raise ValueError(f"Duplicated tensor name {name!r}")
        if raw_dtype is None:
            if tensor_dtype == np.float16:
@ -346,7 +386,9 @@ def add_tensor_info(
            elif tensor_dtype == np.int64:
                dtype = GGMLQuantizationType.I64
            else:
-                raise ValueError("Only F16, F32, F64, I8, I16, I32, I64 tensors are supported for now")
+                raise ValueError(
                    "Only F16, F32, F64, I8, I16, I32, I64 tensors are supported for now"
                )
        else:
            dtype = raw_dtype
            if tensor_dtype == np.uint8:
@ -359,14 +401,20 @@ def add_tensor_info(
                and len(self.tensors[-1]) >= self.split_max_tensors
            ) or (  # split when over size limit
                self.split_max_size != 0
-                and sum(ti.nbytes for ti in self.tensors[-1].values()) + tensor_nbytes > self.split_max_size
+                and sum(ti.nbytes for ti in self.tensors[-1].values()) + tensor_nbytes
                > self.split_max_size
            ):
                self.tensors.append({})
-        self.tensors[-1][name] = TensorInfo(shape=tensor_shape, dtype=dtype, nbytes=tensor_nbytes)
+        self.tensors[-1][name] = TensorInfo(
            shape=tensor_shape, dtype=dtype, nbytes=tensor_nbytes
        )
    def add_tensor(
-        self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequence[int] | None = None,
+        self,
        name: str,
        tensor: np.ndarray[Any, Any],
        raw_shape: Sequence[int] | None = None,
        raw_dtype: GGMLQuantizationType | None = None,
    ) -> None:
        if self.endianess == GGUFEndian.BIG:
@ -377,7 +425,9 @@ def add_tensor(
            self.temp_file = fp
        shape: Sequence[int] = raw_shape if raw_shape is not None else tensor.shape
-        self.add_tensor_info(name, shape, tensor.dtype, tensor.nbytes, raw_dtype=raw_dtype)
+        self.add_tensor_info(
            name, shape, tensor.dtype, tensor.nbytes, raw_dtype=raw_dtype
        )
        if self.temp_file is None:
            self.tensors[-1][name].tensor = tensor
@ -387,13 +437,21 @@ def add_tensor(
        self.write_padding(self.temp_file, tensor.nbytes)
    def write_padding(self, fp: IO[bytes], n: int, align: int | None = None) -> None:
-        pad = GGUFWriter.ggml_pad(n, align if align is not None else self.data_alignment) - n
+        pad = (
            GGUFWriter.ggml_pad(n, align if align is not None else self.data_alignment)
            - n
        )
        if pad != 0:
            fp.write(bytes([0] * pad))
    def write_tensor_data(self, tensor: np.ndarray[Any, Any]) -> None:
-        if self.state is not WriterState.TI_DATA and self.state is not WriterState.WEIGHTS:
+        if (
-            raise ValueError(f'Expected output file to contain tensor info or weights, got {self.state}')
+            self.state is not WriterState.TI_DATA
            and self.state is not WriterState.WEIGHTS
        ):
            raise ValueError(
                f"Expected output file to contain tensor info or weights, got {self.state}"
            )
        assert self.fout is not None
        if self.endianess == GGUFEndian.BIG:
@ -409,7 +467,9 @@ def write_tensor_data(self, tensor: np.ndarray[Any, Any]) -> None:
        # pop the first tensor info
        # TODO: cleaner way to get the first key
-        first_tensor_name = [name for name, _ in zip(self.tensors[file_id].keys(), range(1))][0]
+        first_tensor_name = [
            name for name, _ in zip(self.tensors[file_id].keys(), range(1))
        ][0]
        ti = self.tensors[file_id].pop(first_tensor_name)
        assert ti.nbytes == tensor.nbytes
@ -437,8 +497,15 @@ def write_tensors_to_file(self, *, progress: bool = False) -> None:
                total_bytes = sum(ti.nbytes for t in self.tensors for ti in t.values())
                if len(self.fout) > 1:
-                    shard_bar = tqdm(desc=f"Shard (0/{len(self.fout)})", total=None, unit="byte", unit_scale=True)
+                    shard_bar = tqdm(
-                bar = tqdm(desc="Writing", total=total_bytes, unit="byte", unit_scale=True)
+                        desc=f"Shard (0/{len(self.fout)})",
                        total=None,
                        unit="byte",
                        unit_scale=True,
                    )
                bar = tqdm(
                    desc="Writing", total=total_bytes, unit="byte", unit_scale=True
                )
            for i, (fout, tensors) in enumerate(zip(self.fout, self.tensors)):
                if shard_bar is not None:
@ -448,7 +515,9 @@ def write_tensors_to_file(self, *, progress: bool = False) -> None:
                # relying on the fact that Python dicts preserve insertion order (since 3.7)
                for ti in tensors.values():
-                    assert ti.tensor is not None  # can only iterate once over the tensors
+                    assert (
                        ti.tensor is not None
                    )  # can only iterate once over the tensors
                    assert ti.tensor.nbytes == ti.nbytes
                    ti.tensor.tofile(fout)
                    if shard_bar is not None:
@ -460,7 +529,9 @@ def write_tensors_to_file(self, *, progress: bool = False) -> None:
        else:
            self.temp_file.seek(0)
-            shutil.copyfileobj(self.temp_file, self.fout[0 if not self.small_first_shard else 1])
+            shutil.copyfileobj(
                self.temp_file, self.fout[0 if not self.small_first_shard else 1]
            )
            self.flush()
            self.temp_file.close()
@ -566,7 +637,9 @@ def add_base_model_version(self, source_id: int, version: str) -> None:
        self.add_string(Keys.General.BASE_MODEL_VERSION.format(id=source_id), version)
    def add_base_model_organization(self, source_id: int, organization: str) -> None:
-        self.add_string(Keys.General.BASE_MODEL_ORGANIZATION.format(id=source_id), organization)
+        self.add_string(
            Keys.General.BASE_MODEL_ORGANIZATION.format(id=source_id), organization
        )
    def add_base_model_url(self, source_id: int, url: str) -> None:
        self.add_string(Keys.General.BASE_MODEL_URL.format(id=source_id), url)
@ -605,7 +678,9 @@ def add_block_count(self, length: int) -> None:
        self.add_uint32(Keys.LLM.BLOCK_COUNT.format(arch=self.arch), length)
    def add_leading_dense_block_count(self, length: int) -> None:
-        self.add_uint32(Keys.LLM.LEADING_DENSE_BLOCK_COUNT.format(arch=self.arch), length)
+        self.add_uint32(
            Keys.LLM.LEADING_DENSE_BLOCK_COUNT.format(arch=self.arch), length
        )
    def add_feed_forward_length(self, length: int | Sequence[int]) -> None:
        if isinstance(length, int):
@ -614,10 +689,14 @@ def add_feed_forward_length(self, length: int | Sequence[int]) -> None:
            self.add_array(Keys.LLM.FEED_FORWARD_LENGTH.format(arch=self.arch), length)
    def add_expert_feed_forward_length(self, length: int) -> None:
-        self.add_uint32(Keys.LLM.EXPERT_FEED_FORWARD_LENGTH.format(arch=self.arch), length)
+        self.add_uint32(
            Keys.LLM.EXPERT_FEED_FORWARD_LENGTH.format(arch=self.arch), length
        )
    def add_expert_shared_feed_forward_length(self, length: int) -> None:
-        self.add_uint32(Keys.LLM.EXPERT_SHARED_FEED_FORWARD_LENGTH.format(arch=self.arch), length)
+        self.add_uint32(
            Keys.LLM.EXPERT_SHARED_FEED_FORWARD_LENGTH.format(arch=self.arch), length
        )
    def add_parallel_residual(self, use: bool) -> None:
        self.add_bool(Keys.LLM.USE_PARALLEL_RESIDUAL.format(arch=self.arch), use)
@ -736,10 +815,14 @@ def add_tokenizer_model(self, model: str) -> None:
    def add_tokenizer_pre(self, pre: str) -> None:
        self.add_string(Keys.Tokenizer.PRE, pre)
-    def add_token_list(self, tokens: Sequence[str] | Sequence[bytes] | Sequence[bytearray]) -> None:
+    def add_token_list(
        self, tokens: Sequence[str] | Sequence[bytes] | Sequence[bytearray]
    ) -> None:
        self.add_array(Keys.Tokenizer.LIST, tokens)
-    def add_token_merges(self, merges: Sequence[str] | Sequence[bytes] | Sequence[bytearray]) -> None:
+    def add_token_merges(
        self, merges: Sequence[str] | Sequence[bytes] | Sequence[bytearray]
    ) -> None:
        self.add_array(Keys.Tokenizer.MERGES, merges)
    def add_token_types(self, types: Sequence[TokenType] | Sequence[int]) -> None:
@ -793,18 +876,22 @@ def add_chat_template(self, value: str | Sequence[Mapping[str, str]]) -> None:
            template_names = set()
            for choice in value:
-                name = choice.get('name', '')
+                name = choice.get("name", "")
-                template = choice.get('template')
+                template = choice.get("template")
                # Allowing non-alphanumerical characters in template name is probably not a good idea, so filter it
-                name = ''.join((c if c in ascii_letters + digits else '_' for c in name))
+                name = "".join(
                    (c if c in ascii_letters + digits else "_" for c in name)
                )
                if name and template is not None:
-                    if name == 'default':
+                    if name == "default":
                        template_default = template
                    else:
                        template_names.add(name)
-                        self.add_string(Keys.Tokenizer.CHAT_TEMPLATE_N.format(name=name), template)
+                        self.add_string(
                            Keys.Tokenizer.CHAT_TEMPLATE_N.format(name=name), template
                        )
            if template_names:
                self.add_array(Keys.Tokenizer.CHAT_TEMPLATES, list(template_names))
@ -829,10 +916,10 @@ def add_eot_token_id(self, id: int) -> None:
        self.add_uint32(Keys.Tokenizer.EOT_ID, id)
    def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes:
-        pack_prefix = ''
+        pack_prefix = ""
        if not skip_pack_prefix:
-            pack_prefix = '<' if self.endianess == GGUFEndian.LITTLE else '>'
+            pack_prefix = "<" if self.endianess == GGUFEndian.LITTLE else ">"
-        return struct.pack(f'{pack_prefix}{fmt}', value)
+        return struct.pack(f"{pack_prefix}{fmt}", value)
    def _pack_val(self, val: Any, vtype: GGUFValueType, add_vtype: bool) -> bytes:
        kv_data = bytearray()
@ -842,7 +929,9 @@ def _pack_val(self, val: Any, vtype: GGUFValueType, add_vtype: bool) -> bytes:
        pack_fmt = self._simple_value_packing.get(vtype)
        if pack_fmt is not None:
-            kv_data += self._pack(pack_fmt, val, skip_pack_prefix = vtype == GGUFValueType.BOOL)
+            kv_data += self._pack(
                pack_fmt, val, skip_pack_prefix=vtype == GGUFValueType.BOOL
            )
        elif vtype == GGUFValueType.STRING:
            encoded_val = val.encode("utf-8") if isinstance(val, str) else val
            kv_data += self._pack("Q", len(encoded_val))
@ -860,7 +949,9 @@ def _pack_val(self, val: Any, vtype: GGUFValueType, add_vtype: bool) -> bytes:
            else:
                ltype = GGUFValueType.get_type(val[0])
                if not all(GGUFValueType.get_type(i) is ltype for i in val[1:]):
-                    raise ValueError("All items in a GGUF array should be of the same type")
+                    raise ValueError(
                        "All items in a GGUF array should be of the same type"
                    )
            kv_data += self._pack("I", ltype)
            kv_data += self._pack("Q", len(val))
            for item in val:
--- a/src/gguf-py/gguf/lazy.py
+++ b/src/gguf-py/gguf/lazy.py
@ -13,7 +13,9 @@
 class LazyMeta(ABCMeta):
-    def __new__(cls, name: str, bases: tuple[type, ...], namespace: dict[str, Any], **kwargs):
+    def __new__(
        cls, name: str, bases: tuple[type, ...], namespace: dict[str, Any], **kwargs
    ):
        def __getattr__(self, name: str) -> Any:
            meta_attr = getattr(self._meta, name)
            if callable(meta_attr):
@ -41,6 +43,7 @@ def wrapped_special_op(self, *args, **kwargs):
                    getattr(type(self)._tensor_type, op_name),
                    meta_noop=meta_noop,
                )(self, *args, **kwargs)
            return wrapped_special_op
        # special methods bypass __getattr__, so they need to be added manually
@ -48,11 +51,48 @@ def wrapped_special_op(self, *args, **kwargs):
        # NOTE: doing this from a metaclass is very convenient
        # TODO: make this even more comprehensive
        for binary_op in (
-            "lt", "le", "eq", "ne", "ge", "gt", "not"
+            "lt",
-            "abs", "add", "and", "floordiv", "invert", "lshift", "mod", "mul", "matmul",
+            "le",
-            "neg", "or", "pos", "pow", "rshift", "sub", "truediv", "xor",
+            "eq",
-            "iadd", "iand", "ifloordiv", "ilshift", "imod", "imul", "ior", "irshift", "isub", "ixor",
+            "ne",
-            "radd", "rand", "rfloordiv", "rmul", "ror", "rpow", "rsub", "rtruediv", "rxor",
+            "ge",
            "gt",
            "not" "abs",
            "add",
            "and",
            "floordiv",
            "invert",
            "lshift",
            "mod",
            "mul",
            "matmul",
            "neg",
            "or",
            "pos",
            "pow",
            "rshift",
            "sub",
            "truediv",
            "xor",
            "iadd",
            "iand",
            "ifloordiv",
            "ilshift",
            "imod",
            "imul",
            "ior",
            "irshift",
            "isub",
            "ixor",
            "radd",
            "rand",
            "rfloordiv",
            "rmul",
            "ror",
            "rpow",
            "rsub",
            "rtruediv",
            "rxor",
        ):
            attr_name = f"__{binary_op}__"
            # the result of these operators usually has the same shape and dtype as the input,
@ -60,7 +100,9 @@ def wrapped_special_op(self, *args, **kwargs):
            namespace[attr_name] = mk_wrap(attr_name, meta_noop=True)
        for special_op in (
-            "getitem", "setitem", "len",
+            "getitem",
            "setitem",
            "len",
        ):
            attr_name = f"__{special_op}__"
            namespace[attr_name] = mk_wrap(attr_name, meta_noop=False)
@ -77,7 +119,15 @@ class LazyBase(ABC, metaclass=LazyMeta):
    _kwargs: dict[str, Any]
    _func: Callable[[Any], Any] | None
-    def __init__(self, *, meta: Any, data: Any | None = None, args: tuple = (), kwargs: dict[str, Any] | None = None, func: Callable[[Any], Any] | None = None):
+    def __init__(
        self,
        *,
        meta: Any,
        data: Any | None = None,
        args: tuple = (),
        kwargs: dict[str, Any] | None = None,
        func: Callable[[Any], Any] | None = None,
    ):
        super().__init__()
        self._meta = meta
        self._data = data
@ -107,7 +157,17 @@ def _recurse_apply(o: Any, fn: Callable[[Any], Any]) -> Any:
            return o
    @classmethod
-    def _wrap_fn(cls, fn: Callable, *, use_self: LazyBase | None = None, meta_noop: bool | DTypeLike | tuple[DTypeLike, Callable[[tuple[int, ...]], tuple[int, ...]]] = False) -> Callable[[Any], Any]:
+    def _wrap_fn(
        cls,
        fn: Callable,
        *,
        use_self: LazyBase | None = None,
        meta_noop: (
            bool
            | DTypeLike
            | tuple[DTypeLike, Callable[[tuple[int, ...]], tuple[int, ...]]]
        ) = False,
    ) -> Callable[[Any], Any]:
        def wrapped_fn(*args, **kwargs):
            if kwargs is None:
                kwargs = {}
@ -138,13 +198,16 @@ def wrapped_fn(*args, **kwargs):
                        res = cls.meta_with_dtype_and_shape(meta_noop, res.shape)
            if isinstance(res, cls._tensor_type):
-                return cls(meta=cls.eager_to_meta(res), args=args, kwargs=kwargs, func=fn)
+                return cls(
                    meta=cls.eager_to_meta(res), args=args, kwargs=kwargs, func=fn
                )
            else:
                del res  # not needed
                # non-tensor return likely relies on the contents of the args
                # (e.g. the result of torch.equal)
                eager_args = cls.to_eager(args)
                return fn(*eager_args, **kwargs)
        return wrapped_fn
    @classmethod
@ -175,7 +238,8 @@ def eager_to_meta(cls, t: Any) -> Any:
    # must be overridden, meta tensor init is backend-specific
    @classmethod
    @abstractmethod
-    def meta_with_dtype_and_shape(cls, dtype: Any, shape: Any) -> Any: pass
+    def meta_with_dtype_and_shape(cls, dtype: Any, shape: Any) -> Any:
        pass
    @classmethod
    def from_eager(cls, t: Any) -> Any:
@ -192,7 +256,9 @@ class LazyNumpyTensor(LazyBase):
    _tensor_type = np.ndarray
    @classmethod
-    def meta_with_dtype_and_shape(cls, dtype: DTypeLike, shape: tuple[int, ...]) -> np.ndarray[Any, Any]:
+    def meta_with_dtype_and_shape(
        cls, dtype: DTypeLike, shape: tuple[int, ...]
    ) -> np.ndarray[Any, Any]:
        # The initial idea was to use np.nan as the fill value,
        # but non-float types like np.int16 can't use that.
        # So zero it is.
@ -201,8 +267,16 @@ def meta_with_dtype_and_shape(cls, dtype: DTypeLike, shape: tuple[int, ...]) ->
    def astype(self, dtype, *args, **kwargs):
        meta = type(self).meta_with_dtype_and_shape(dtype, self._meta.shape)
-        full_args = (self, dtype,) + args
+        full_args = (
-        return type(self)(meta=meta, args=full_args, kwargs=kwargs, func=(lambda a, *args, **kwargs: a.astype(*args, **kwargs)))
+            self,
            dtype,
        ) + args
        return type(self)(
            meta=meta,
            args=full_args,
            kwargs=kwargs,
            func=(lambda a, *args, **kwargs: a.astype(*args, **kwargs)),
        )
    def tofile(self, *args, **kwargs):
        eager = LazyNumpyTensor.to_eager(self)
--- a/src/gguf-py/gguf/metadata.py
+++ b/src/gguf-py/gguf/metadata.py
@ -44,7 +44,12 @@ class Metadata:
    datasets: Optional[list[str]] = None
    @staticmethod
-    def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Path] = None, model_name: Optional[str] = None, total_params: int = 0) -> Metadata:
+    def load(
        metadata_override_path: Optional[Path] = None,
        model_path: Optional[Path] = None,
        model_name: Optional[str] = None,
        total_params: int = 0,
    ) -> Metadata:
        # This grabs as many contextual authorship metadata as possible from the model repository
        # making any conversion as required to match the gguf kv store metadata format
        # as well as giving users the ability to override any authorship metadata that may be incorrect
@ -57,7 +62,9 @@ def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Pat
        # TODO: load adapter_config.json when possible, it usually contains the base model of the LoRA adapter
        # heuristics
-        metadata = Metadata.apply_metadata_heuristic(metadata, model_card, hf_params, model_path, total_params)
+        metadata = Metadata.apply_metadata_heuristic(
            metadata, model_card, hf_params, model_path, total_params
        )
        # Metadata Override File Provided
        # This is based on LLM_KV_NAMES mapping in llama.cpp
@ -66,34 +73,66 @@ def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Pat
        metadata.name = metadata_override.get(Keys.General.NAME, metadata.name)
        metadata.author = metadata_override.get(Keys.General.AUTHOR, metadata.author)
        metadata.version = metadata_override.get(Keys.General.VERSION, metadata.version)
-        metadata.organization    = metadata_override.get(Keys.General.ORGANIZATION,    metadata.organization)
+        metadata.organization = metadata_override.get(
            Keys.General.ORGANIZATION, metadata.organization
        )
-        metadata.finetune        = metadata_override.get(Keys.General.FINETUNE,        metadata.finetune)
+        metadata.finetune = metadata_override.get(
-        metadata.basename        = metadata_override.get(Keys.General.BASENAME,        metadata.basename)
+            Keys.General.FINETUNE, metadata.finetune
        )
        metadata.basename = metadata_override.get(
            Keys.General.BASENAME, metadata.basename
        )
-        metadata.description     = metadata_override.get(Keys.General.DESCRIPTION,     metadata.description)
+        metadata.description = metadata_override.get(
-        metadata.quantized_by    = metadata_override.get(Keys.General.QUANTIZED_BY,    metadata.quantized_by)
+            Keys.General.DESCRIPTION, metadata.description
        )
        metadata.quantized_by = metadata_override.get(
            Keys.General.QUANTIZED_BY, metadata.quantized_by
        )
-        metadata.size_label      = metadata_override.get(Keys.General.SIZE_LABEL,      metadata.size_label)
+        metadata.size_label = metadata_override.get(
-        metadata.license_name    = metadata_override.get(Keys.General.LICENSE_NAME,    metadata.license_name)
+            Keys.General.SIZE_LABEL, metadata.size_label
-        metadata.license_link    = metadata_override.get(Keys.General.LICENSE_LINK,    metadata.license_link)
+        )
        metadata.license_name = metadata_override.get(
            Keys.General.LICENSE_NAME, metadata.license_name
        )
        metadata.license_link = metadata_override.get(
            Keys.General.LICENSE_LINK, metadata.license_link
        )
        metadata.url = metadata_override.get(Keys.General.URL, metadata.url)
        metadata.doi = metadata_override.get(Keys.General.DOI, metadata.doi)
        metadata.uuid = metadata_override.get(Keys.General.UUID, metadata.uuid)
-        metadata.repo_url        = metadata_override.get(Keys.General.REPO_URL,        metadata.repo_url)
+        metadata.repo_url = metadata_override.get(
            Keys.General.REPO_URL, metadata.repo_url
        )
-        metadata.source_url      = metadata_override.get(Keys.General.SOURCE_URL,      metadata.source_url)
+        metadata.source_url = metadata_override.get(
-        metadata.source_doi      = metadata_override.get(Keys.General.SOURCE_DOI,      metadata.source_doi)
+            Keys.General.SOURCE_URL, metadata.source_url
-        metadata.source_uuid     = metadata_override.get(Keys.General.SOURCE_UUID,     metadata.source_uuid)
+        )
-        metadata.source_repo_url = metadata_override.get(Keys.General.SOURCE_REPO_URL, metadata.source_repo_url)
+        metadata.source_doi = metadata_override.get(
            Keys.General.SOURCE_DOI, metadata.source_doi
        )
        metadata.source_uuid = metadata_override.get(
            Keys.General.SOURCE_UUID, metadata.source_uuid
        )
        metadata.source_repo_url = metadata_override.get(
            Keys.General.SOURCE_REPO_URL, metadata.source_repo_url
        )
        # Base Models is received here as an array of models
-        metadata.base_models     = metadata_override.get("general.base_models",        metadata.base_models)
+        metadata.base_models = metadata_override.get(
            "general.base_models", metadata.base_models
        )
        metadata.tags = metadata_override.get(Keys.General.TAGS, metadata.tags)
-        metadata.languages       = metadata_override.get(Keys.General.LANGUAGES,       metadata.languages)
+        metadata.languages = metadata_override.get(
-        metadata.datasets        = metadata_override.get(Keys.General.DATASETS,        metadata.datasets)
+            Keys.General.LANGUAGES, metadata.languages
        )
        metadata.datasets = metadata_override.get(
            Keys.General.DATASETS, metadata.datasets
        )
        # Direct Metadata Override (via direct cli argument)
        if model_name is not None:
@ -102,7 +141,9 @@ def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Pat
        return metadata
    @staticmethod
-    def load_metadata_override(metadata_override_path: Optional[Path] = None) -> dict[str, Any]:
+    def load_metadata_override(
        metadata_override_path: Optional[Path] = None,
    ) -> dict[str, Any]:
        if metadata_override_path is None or not metadata_override_path.is_file():
            return {}
@ -128,7 +169,9 @@ def load_model_card(model_path: Optional[Path] = None) -> dict[str, Any]:
                if isinstance(data, dict):
                    return data
                else:
-                    logger.error(f"while reading YAML model card frontmatter, data is {type(data)} instead of dict")
+                    logger.error(
                        f"while reading YAML model card frontmatter, data is {type(data)} instead of dict"
                    )
                    return {}
            else:
                return {}
@ -149,10 +192,21 @@ def load_hf_parameters(model_path: Optional[Path] = None) -> dict[str, Any]:
    @staticmethod
    def id_to_title(string):
        # Convert capitalization into title form unless acronym or version number
-        return ' '.join([w.title() if w.islower() and not re.match(r'^(v\d+(?:\.\d+)*|\d.*)$', w) else w for w in string.strip().replace('-', ' ').split()])
+        return " ".join(
            [
                (
                    w.title()
                    if w.islower() and not re.match(r"^(v\d+(?:\.\d+)*|\d.*)$", w)
                    else w
                )
                for w in string.strip().replace("-", " ").split()
            ]
        )
    @staticmethod
-    def get_model_id_components(model_id: Optional[str] = None, total_params: int = 0) -> tuple[str | None, str | None, str | None, str | None, str | None, str | None]:
+    def get_model_id_components(
        model_id: Optional[str] = None, total_params: int = 0
    ) -> tuple[str | None, str | None, str | None, str | None, str | None, str | None]:
        # Huggingface often store model id as '<org>/<model name>'
        # so let's parse it and apply some heuristics if possible for model name components
@ -160,24 +214,24 @@ def get_model_id_components(model_id: Optional[str] = None, total_params: int =
            # model ID missing
            return None, None, None, None, None, None
-        if ' ' in model_id:
+        if " " in model_id:
            # model ID is actually a normal human sentence
            # which means its most likely a normal model name only
            # not part of the hugging face naming standard, but whatever
            return model_id, None, None, None, None, None
-        if '/' in model_id:
+        if "/" in model_id:
            # model ID (huggingface style)
-            org_component, model_full_name_component = model_id.split('/', 1)
+            org_component, model_full_name_component = model_id.split("/", 1)
        else:
            # model ID but missing org components
            org_component, model_full_name_component = None, model_id
        # Check if we erroneously matched against './' or '../' etc...
-        if org_component is not None and org_component[0] == '.':
+        if org_component is not None and org_component[0] == ".":
            org_component = None
-        name_parts: list[str] = model_full_name_component.split('-')
+        name_parts: list[str] = model_full_name_component.split("-")
        # Remove empty parts
        for i in reversed(range(len(name_parts))):
@ -191,14 +245,18 @@ def get_model_id_components(model_id: Optional[str] = None, total_params: int =
        # Annotate the name
        for i, part in enumerate(name_parts):
            # Version
-            if re.fullmatch(r'(v|iter)?\d+([.]\d+)*', part, re.IGNORECASE):
+            if re.fullmatch(r"(v|iter)?\d+([.]\d+)*", part, re.IGNORECASE):
                name_types[i].add("version")
            # Quant type (should not be there for base models, but still annotated)
-            elif re.fullmatch(r'i?q\d(_\w)*|b?fp?(16|32)', part, re.IGNORECASE):
+            elif re.fullmatch(r"i?q\d(_\w)*|b?fp?(16|32)", part, re.IGNORECASE):
                name_types[i].add("type")
                name_parts[i] = part.upper()
            # Model size
-            elif i > 0 and re.fullmatch(r'(([A]|\d+[x])?\d+([._]\d+)?[KMBT][\d]?|small|mini|medium|large|x?xl)', part, re.IGNORECASE):
+            elif i > 0 and re.fullmatch(
                r"(([A]|\d+[x])?\d+([._]\d+)?[KMBT][\d]?|small|mini|medium|large|x?xl)",
                part,
                re.IGNORECASE,
            ):
                part = part.replace("_", ".")
                # Handle weird bloom-7b1 notation
                if part[-1].isdecimal():
@ -209,14 +267,19 @@ def get_model_id_components(model_id: Optional[str] = None, total_params: int =
                        part = part[:-1] + part[-1].upper()
                if total_params != 0:
                    try:
-                        label_params = float(part[:-1]) * pow(1000, " KMBT".find(part[-1]))
+                        label_params = float(part[:-1]) * pow(
                            1000, " KMBT".find(part[-1])
                        )
                        # Only use it as a size label if it's close or bigger than the model size
                        # Note that LoRA adapters don't necessarily include all layers,
                        # so this is why bigger label sizes are accepted.
                        # Do not use the size label when it's smaller than 1/8 of the model size
-                        if (total_params < 0 and label_params < abs(total_params) // 8) or (
+                        if (
                            total_params < 0 and label_params < abs(total_params) // 8
                        ) or (
                            # Check both directions when the current model isn't a LoRA adapter
-                            total_params > 0 and abs(label_params - total_params) > 7 * total_params // 8
+                            total_params > 0
                            and abs(label_params - total_params) > 7 * total_params // 8
                        ):
                            # Likely a context length
                            name_types[i].add("finetune")
@ -229,7 +292,9 @@ def get_model_id_components(model_id: Optional[str] = None, total_params: int =
                    name_types[i].add("size_label")
                name_parts[i] = part
            # Some easy to recognize finetune names
-            elif i > 0 and re.fullmatch(r'chat|instruct|vision|lora', part, re.IGNORECASE):
+            elif i > 0 and re.fullmatch(
                r"chat|instruct|vision|lora", part, re.IGNORECASE
            ):
                if total_params < 0 and part.lower() == "lora":
                    # ignore redundant "lora" in the finetune part when the output is a lora adapter
                    name_types[i].add("type")
@ -238,7 +303,12 @@ def get_model_id_components(model_id: Optional[str] = None, total_params: int =
        # Ignore word-based size labels when there is at least a number-based one present
        # TODO: should word-based size labels always be removed instead?
-        if any(c.isdecimal() for n, t in zip(name_parts, name_types) if "size_label" in t for c in n):
+        if any(
            c.isdecimal()
            for n, t in zip(name_parts, name_types)
            if "size_label" in t
            for c in n
        ):
            for n, t in zip(name_parts, name_types):
                if "size_label" in t:
                    if all(c.isalpha() for c in n):
@ -262,22 +332,55 @@ def get_model_id_components(model_id: Optional[str] = None, total_params: int =
            else:
                break
-        basename = "-".join(n for n, t in zip(name_parts, name_types) if "basename" in t) or None
+        basename = (
            "-".join(n for n, t in zip(name_parts, name_types) if "basename" in t)
            or None
        )
        # Deduplicate size labels using order-preserving 'dict' ('set' seems to sort the keys)
-        size_label = "-".join(dict.fromkeys(s for s, t in zip(name_parts, name_types) if "size_label" in t).keys()) or None
+        size_label = (
-        finetune = "-".join(f for f, t in zip(name_parts, name_types) if "finetune" in t) or None
+            "-".join(
                dict.fromkeys(
                    s for s, t in zip(name_parts, name_types) if "size_label" in t
                ).keys()
            )
            or None
        )
        finetune = (
            "-".join(f for f, t in zip(name_parts, name_types) if "finetune" in t)
            or None
        )
        # TODO: should the basename version always be excluded?
        # NOTE: multiple finetune versions are joined together
-        version = "-".join(v for v, t, in zip(name_parts, name_types) if "version" in t and "basename" not in t) or None
+        version = (
            "-".join(
                v
                for v, t, in zip(name_parts, name_types)
                if "version" in t and "basename" not in t
            )
            or None
        )
        if size_label is None and finetune is None and version is None:
            # Too ambiguous, output nothing
            basename = None
-        return model_full_name_component, org_component, basename, finetune, version, size_label
+        return (
            model_full_name_component,
            org_component,
            basename,
            finetune,
            version,
            size_label,
        )
    @staticmethod
-    def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = None, hf_params: Optional[dict] = None, model_path: Optional[Path] = None, total_params: int = 0) -> Metadata:
+    def apply_metadata_heuristic(
        metadata: Metadata,
        model_card: Optional[dict] = None,
        hf_params: Optional[dict] = None,
        model_path: Optional[Path] = None,
        total_params: int = 0,
    ) -> Metadata:
        # Reference Model Card Metadata: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1
        # Model Card Heuristics
@ -317,16 +420,30 @@ def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = No
                for model_id in metadata_base_models:
                    # NOTE: model size of base model is assumed to be similar to the size of the current model
-                    model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id, total_params)
+                    (
                        model_full_name_component,
                        org_component,
                        basename,
                        finetune,
                        version,
                        size_label,
                    ) = Metadata.get_model_id_components(model_id, total_params)
                    base_model = {}
                    if model_full_name_component is not None:
-                        base_model["name"] = Metadata.id_to_title(model_full_name_component)
+                        base_model["name"] = Metadata.id_to_title(
                            model_full_name_component
                        )
                    if org_component is not None:
                        base_model["organization"] = Metadata.id_to_title(org_component)
                    if version is not None:
                        base_model["version"] = version
-                    if org_component is not None and model_full_name_component is not None:
+                    if (
-                        base_model["repo_url"] = f"https://huggingface.co/{org_component}/{model_full_name_component}"
+                        org_component is not None
                        and model_full_name_component is not None
                    ):
                        base_model["repo_url"] = (
                            f"https://huggingface.co/{org_component}/{model_full_name_component}"
                        )
                    metadata.base_models.append(base_model)
            if "license" in model_card and metadata.license is None:
@ -360,7 +477,9 @@ def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = No
                elif isinstance(pipeline_tags_value, list):
                    metadata.tags.extend(pipeline_tags_value)
-            language_value = model_card.get("languages", model_card.get("language", None))
+            language_value = model_card.get(
                "languages", model_card.get("language", None)
            )
            if language_value is not None:
                if metadata.languages is None:
@ -388,11 +507,18 @@ def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = No
        if hf_params is not None:
            hf_name_or_path = hf_params.get("_name_or_path")
-            if hf_name_or_path is not None and hf_name_or_path.count('/') <= 1:
+            if hf_name_or_path is not None and hf_name_or_path.count("/") <= 1:
                # Use _name_or_path only if its actually a model name and not some computer path
                # e.g. 'meta-llama/Llama-2-7b-hf'
                model_id = hf_name_or_path
-                model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id, total_params)
+                (
                    model_full_name_component,
                    org_component,
                    basename,
                    finetune,
                    version,
                    size_label,
                ) = Metadata.get_model_id_components(model_id, total_params)
                if metadata.name is None and model_full_name_component is not None:
                    metadata.name = Metadata.id_to_title(model_full_name_component)
                if metadata.organization is None and org_component is not None:
@ -410,7 +536,14 @@ def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = No
        ############################################
        if model_path is not None:
            model_id = model_path.name
-            model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id, total_params)
+            (
                model_full_name_component,
                org_component,
                basename,
                finetune,
                version,
                size_label,
            ) = Metadata.get_model_id_components(model_id, total_params)
            if metadata.name is None and model_full_name_component is not None:
                metadata.name = Metadata.id_to_title(model_full_name_component)
            if metadata.organization is None and org_component is not None:
@ -485,7 +618,9 @@ def set_gguf_meta_model(self, gguf_writer: gguf.GGUFWriter):
                if "version" in base_model_entry:
                    gguf_writer.add_base_model_version(key, base_model_entry["version"])
                if "organization" in base_model_entry:
-                    gguf_writer.add_base_model_organization(key, base_model_entry["organization"])
+                    gguf_writer.add_base_model_organization(
                        key, base_model_entry["organization"]
                    )
                if "url" in base_model_entry:
                    gguf_writer.add_base_model_url(key, base_model_entry["url"])
                if "doi" in base_model_entry:
@ -493,7 +628,9 @@ def set_gguf_meta_model(self, gguf_writer: gguf.GGUFWriter):
                if "uuid" in base_model_entry:
                    gguf_writer.add_base_model_uuid(key, base_model_entry["uuid"])
                if "repo_url" in base_model_entry:
-                    gguf_writer.add_base_model_repo_url(key, base_model_entry["repo_url"])
+                    gguf_writer.add_base_model_repo_url(
                        key, base_model_entry["repo_url"]
                    )
        if self.tags is not None:
            gguf_writer.add_tags(self.tags)
--- a/src/gguf-py/gguf/quants.py
+++ b/src/gguf-py/gguf/quants.py
@ -12,14 +12,18 @@
 def quant_shape_to_byte_shape(shape: Sequence[int], quant_type: GGMLQuantizationType):
    block_size, type_size = GGML_QUANT_SIZES[quant_type]
    if shape[-1] % block_size != 0:
-        raise ValueError(f"Quantized tensor row size ({shape[-1]}) is not a multiple of {quant_type.name} block size ({block_size})")
+        raise ValueError(
            f"Quantized tensor row size ({shape[-1]}) is not a multiple of {quant_type.name} block size ({block_size})"
        )
    return (*shape[:-1], shape[-1] // block_size * type_size)
 def quant_shape_from_byte_shape(shape: Sequence[int], quant_type: GGMLQuantizationType):
    block_size, type_size = GGML_QUANT_SIZES[quant_type]
    if shape[-1] % type_size != 0:
-        raise ValueError(f"Quantized tensor bytes per row ({shape[-1]}) is not a multiple of {quant_type.name} type size ({type_size})")
+        raise ValueError(
            f"Quantized tensor bytes per row ({shape[-1]}) is not a multiple of {quant_type.name} type size ({type_size})"
        )
    return (*shape[:-1], shape[-1] // type_size * block_size)
@ -27,14 +31,23 @@ def quant_shape_from_byte_shape(shape: Sequence[int], quant_type: GGMLQuantizati
 def __compute_fp32_to_bf16(n: np.ndarray) -> np.ndarray:
    n = n.astype(np.float32, copy=False).view(np.uint32)
    # force nan to quiet
-    n = np.where((n & 0x7fffffff) > 0x7f800000, (n & np.uint32(0xffff0000)) | np.uint32(64 << 16), n)
+    n = np.where(
        (n & 0x7FFFFFFF) > 0x7F800000,
        (n & np.uint32(0xFFFF0000)) | np.uint32(64 << 16),
        n,
    )
    # round to nearest even
-    n = (np.uint64(n) + (0x7fff + ((n >> 16) & 1))) >> 16
+    n = (np.uint64(n) + (0x7FFF + ((n >> 16) & 1))) >> 16
    return n.astype(np.uint16)
 # This is faster than np.vectorize and np.apply_along_axis because it works on more than one row at a time
-def __apply_over_grouped_rows(func: Callable[[np.ndarray], np.ndarray], arr: np.ndarray, otype: DTypeLike, oshape: tuple[int, ...]) -> np.ndarray:
+def __apply_over_grouped_rows(
    func: Callable[[np.ndarray], np.ndarray],
    arr: np.ndarray,
    otype: DTypeLike,
    oshape: tuple[int, ...],
 ) -> np.ndarray:
    rows = arr.reshape((-1, arr.shape[-1]))
    osize = 1
    for dim in oshape:
@ -42,15 +55,23 @@ def __apply_over_grouped_rows(func: Callable[[np.ndarray], np.ndarray], arr: np.
    out = np.empty(shape=osize, dtype=otype)
    # compute over groups of 16 rows (arbitrary, but seems good for performance)
    n_groups = (rows.shape[0] // 16) or 1
-    np.concatenate([func(group).ravel() for group in np.array_split(rows, n_groups)], axis=0, out=out)
+    np.concatenate(
        [func(group).ravel() for group in np.array_split(rows, n_groups)],
        axis=0,
        out=out,
    )
    return out.reshape(oshape)
 def __quantize_bf16_array(n: np.ndarray) -> np.ndarray:
-    return __apply_over_grouped_rows(__compute_fp32_to_bf16, arr=n, otype=np.uint16, oshape=n.shape)
+    return __apply_over_grouped_rows(
        __compute_fp32_to_bf16, arr=n, otype=np.uint16, oshape=n.shape
    )
-__quantize_bf16_lazy = LazyNumpyTensor._wrap_fn(__quantize_bf16_array, meta_noop=np.uint16)
+__quantize_bf16_lazy = LazyNumpyTensor._wrap_fn(
    __quantize_bf16_array, meta_noop=np.uint16
 )
 def quantize_bf16(n: np.ndarray):
@ -105,7 +126,12 @@ def __quantize_q8_0_rows(n: np.ndarray) -> np.ndarray:
 def __quantize_q8_0_array(n: np.ndarray) -> np.ndarray:
-    return __apply_over_grouped_rows(__quantize_q8_0_rows, arr=n, otype=np.uint8, oshape=__quantize_q8_0_shape_change(n.shape))
+    return __apply_over_grouped_rows(
        __quantize_q8_0_rows,
        arr=n,
        otype=np.uint8,
        oshape=__quantize_q8_0_shape_change(n.shape),
    )
 __quantize_q8_0_lazy = LazyNumpyTensor._wrap_fn(
--- a/src/gguf-py/gguf/tensor_mapping.py
+++ b/src/gguf-py/gguf/tensor_mapping.py
@ -28,12 +28,10 @@ class TensorNameMap:
            "transformer.token_embeddings",  # openelm
            "shared",  # t5
        ),
        # Token type embeddings
        MODEL_TENSOR.TOKEN_TYPES: (
            "embeddings.token_type_embeddings",  # bert nomic-bert
        ),
        # Normalization of token embeddings
        MODEL_TENSOR.TOKEN_EMBD_NORM: (
            "word_embeddings_layernorm",  # bloom
@ -41,14 +39,12 @@ class TensorNameMap:
            "emb_ln",  # nomic-bert
            "transformer.norm",  # openelm
        ),
        # Position embeddings
        MODEL_TENSOR.POS_EMBD: (
            "transformer.wpe",  # gpt2
            "embeddings.position_embeddings",  # bert
            "wpe",  # gpt2
        ),
        # Output
        MODEL_TENSOR.OUTPUT: (
            "embed_out",  # gptneox
@ -58,7 +54,6 @@ class TensorNameMap:
            "lm_head.linear",  # phi2
            "output_layer",  # chatglm
        ),
        # Output norm
        MODEL_TENSOR.OUTPUT_NORM: (
            "gpt_neox.final_layer_norm",  # gptneox
@ -76,7 +71,6 @@ class TensorNameMap:
            "encoder.final_layernorm",  # chatglm
            "transformer.norm",  # openelm
        ),
        # Rope frequencies
        MODEL_TENSOR.ROPE_FREQS: (
            "rope.freqs",  # llama-pth
@ -108,13 +102,11 @@ class TensorNameMap:
            "encoder.layers.{bid}.input_layernorm",  # chatglm
            "transformer.layers.{bid}.attn_norm",  # openelm
        ),
        # Attention norm 2
        MODEL_TENSOR.ATTN_NORM_2: (
            "transformer.h.{bid}.ln_attn",  # falcon40b
            "encoder.layer.{bid}.layer_norm_1",  # jina-v2-code
        ),
        # Attention query-key-value
        MODEL_TENSOR.ATTN_QKV: (
            "gpt_neox.layers.{bid}.attention.query_key_value",  # gptneox
@ -132,7 +124,6 @@ class TensorNameMap:
            "encoder.layers.{bid}.self_attention.query_key_value",  # chatglm
            "transformer.layers.{bid}.attn.qkv_proj",  # openelm
        ),
        # Attention query
        MODEL_TENSOR.ATTN_Q: (
            "model.layers.{bid}.self_attn.q_proj",  # llama-hf
@ -143,7 +134,6 @@ class TensorNameMap:
            "model.layers.{bid}.attention.wq",  # internlm2
            "transformer.decoder_layer.{bid}.multi_head_attention.query",  # Grok
        ),
        # Attention key
        MODEL_TENSOR.ATTN_K: (
            "model.layers.{bid}.self_attn.k_proj",  # llama-hf
@ -155,7 +145,6 @@ class TensorNameMap:
            "model.layers.{bid}.attention.wk",  # internlm2
            "transformer.decoder_layer.{bid}.multi_head_attention.key",  # Grok
        ),
        # Attention value
        MODEL_TENSOR.ATTN_V: (
            "model.layers.{bid}.self_attn.v_proj",  # llama-hf
@ -165,9 +154,8 @@ class TensorNameMap:
            "transformer.h.{bid}.attn.v",  # refact
            "model.layers.layers.{bid}.self_attn.v_proj",  # plamo
            "model.layers.{bid}.attention.wv",  # internlm2
-            "transformer.decoder_layer.{bid}.multi_head_attention.value" # Grok
+            "transformer.decoder_layer.{bid}.multi_head_attention.value",  # Grok
        ),
        # Attention output
        MODEL_TENSOR.ATTN_OUT: (
            "gpt_neox.layers.{bid}.attention.dense",  # gptneox
@ -191,7 +179,6 @@ class TensorNameMap:
            "encoder.layers.{bid}.self_attention.dense",  # chatglm
            "transformer.layers.{bid}.attn.out_proj",  # openelm
        ),
        # Attention output norm
        MODEL_TENSOR.ATTN_OUT_NORM: (
            "encoder.layer.{bid}.attention.output.LayerNorm",  # bert
@ -199,11 +186,9 @@ class TensorNameMap:
            "transformer.decoder_layer.{bid}.rms_norm_1",  # Grok
            "transformer.blocks.{bid}.norm_attn_norm.norm_2",  # dbrx
        ),
        MODEL_TENSOR.ATTN_POST_NORM: (
            "model.layers.{bid}.post_attention_layernorm",  # gemma2
        ),
        # Rotary embeddings
        MODEL_TENSOR.ATTN_ROT_EMBD: (
            "model.layers.{bid}.self_attn.rotary_emb.inv_freq",  # llama-hf
@ -211,7 +196,6 @@ class TensorNameMap:
            "model.layers.layers.{bid}.self_attn.rotary_emb.inv_freq",  # plamo
            "transformer.h.{bid}.attn.rotary_emb.inv_freq",  # codeshell
        ),
        # Feed-forward norm
        MODEL_TENSOR.FFN_NORM: (
            "gpt_neox.layers.{bid}.post_attention_layernorm",  # gptneox
@ -228,17 +212,14 @@ class TensorNameMap:
            "encoder.layers.{bid}.post_attention_layernorm",  # chatglm
            "transformer.layers.{bid}.ffn_norm",  # openelm
        ),
        # Post feed-forward norm
        MODEL_TENSOR.FFN_PRE_NORM: (
            "model.layers.{bid}.pre_feedforward_layernorm",  # gemma2
        ),
        # Post feed-forward norm
        MODEL_TENSOR.FFN_POST_NORM: (
            "model.layers.{bid}.post_feedforward_layernorm",  # gemma2
        ),
        MODEL_TENSOR.FFN_GATE_INP: (
            "layers.{bid}.feed_forward.gate",  # mixtral
            "model.layers.{bid}.block_sparse_moe.gate",  # mixtral
@ -246,11 +227,9 @@ class TensorNameMap:
            "transformer.decoder_layer.{bid}.router",  # Grok
            "transformer.blocks.{bid}.ffn.router.layer",  # dbrx
        ),
        MODEL_TENSOR.FFN_GATE_INP_SHEXP: (
            "model.layers.{bid}.mlp.shared_expert_gate",  # qwen2moe
        ),
        # Feed-forward up
        MODEL_TENSOR.FFN_UP: (
            "gpt_neox.layers.{bid}.mlp.dense_h_to_4h",  # gptneox
@ -278,24 +257,18 @@ class TensorNameMap:
            "model.layers.{bid}.residual_mlp.w3",  # arctic
            "encoder.layers.{bid}.mlp.dense_h_to_4h",  # chatglm
        ),
        MODEL_TENSOR.FFN_UP_EXP: (
            "layers.{bid}.feed_forward.experts.w3",  # mixtral (merged)
            "transformer.decoder_layer.{bid}.moe.linear_v",  # Grok (merged)
            "transformer.blocks.{bid}.ffn.experts.mlp.v1",  # dbrx
            "model.layers.{bid}.mlp.experts.up_proj",  # qwen2moe (merged)
        ),
        MODEL_TENSOR.FFN_UP_SHEXP: (
            "model.layers.{bid}.mlp.shared_expert.up_proj",  # qwen2moe
            "model.layers.{bid}.mlp.shared_experts.up_proj",  # deepseek2
        ),
        # AWQ-activation gate
-        MODEL_TENSOR.FFN_ACT: (
+        MODEL_TENSOR.FFN_ACT: ("transformer.blocks.{bid}.ffn.act",),  # mpt
            "transformer.blocks.{bid}.ffn.act",  # mpt
        ),
        # Feed-forward gate
        MODEL_TENSOR.FFN_GATE: (
            "model.layers.{bid}.mlp.gate_proj",  # llama-hf refact
@ -309,19 +282,16 @@ class TensorNameMap:
            "transformer.h.{bid}.mlp.linear_1",  # refact
            "model.layers.{bid}.residual_mlp.w1",  # arctic
        ),
        MODEL_TENSOR.FFN_GATE_EXP: (
            "layers.{bid}.feed_forward.experts.w1",  # mixtral (merged)
            "transformer.decoder_layer.{bid}.moe.linear",  # Grok (merged)
            "transformer.blocks.{bid}.ffn.experts.mlp.w1",  # dbrx
            "model.layers.{bid}.mlp.experts.gate_proj",  # qwen2moe (merged)
        ),
        MODEL_TENSOR.FFN_GATE_SHEXP: (
            "model.layers.{bid}.mlp.shared_expert.gate_proj",  # qwen2moe
            "model.layers.{bid}.mlp.shared_experts.gate_proj",  # deepseek2
        ),
        # Feed-forward down
        MODEL_TENSOR.FFN_DOWN: (
            "gpt_neox.layers.{bid}.mlp.dense_4h_to_h",  # gptneox
@ -348,19 +318,16 @@ class TensorNameMap:
            "encoder.layer.{bid}.mlp.down_layer",  # jina-bert-v2
            "encoder.layers.{bid}.mlp.dense_4h_to_h",  # chatglm
        ),
        MODEL_TENSOR.FFN_DOWN_EXP: (
            "layers.{bid}.feed_forward.experts.w2",  # mixtral (merged)
            "transformer.decoder_layer.{bid}.moe.linear_1",  # Grok (merged)
            "transformer.blocks.{bid}.ffn.experts.mlp.w2",  # dbrx
            "model.layers.{bid}.mlp.experts.down_proj",  # qwen2moe (merged)
        ),
        MODEL_TENSOR.FFN_DOWN_SHEXP: (
            "model.layers.{bid}.mlp.shared_expert.down_proj",  # qwen2moe
            "model.layers.{bid}.mlp.shared_experts.down_proj",  # deepseek2
        ),
        MODEL_TENSOR.ATTN_Q_NORM: (
            "language_model.encoder.layers.{bid}.self_attention.q_layernorm",
            "model.layers.{bid}.self_attn.q_layernorm",  # persimmon
@ -369,7 +336,6 @@ class TensorNameMap:
            "encoder.layer.{bid}.attention.self.layer_norm_q",  # jina-bert-v2
            "transformer.layers.{bid}.attn.q_norm",  # openelm
        ),
        MODEL_TENSOR.ATTN_K_NORM: (
            "language_model.encoder.layers.{bid}.self_attention.k_layernorm",
            "model.layers.{bid}.self_attn.k_layernorm",  # persimmon
@ -378,210 +344,131 @@ class TensorNameMap:
            "encoder.layer.{bid}.attention.self.layer_norm_k",  # jina-bert-v2
            "transformer.layers.{bid}.attn.k_norm",  # openelm
        ),
        MODEL_TENSOR.ROPE_FREQS: (
            "language_model.encoder.layers.{bid}.self_attention.rotary_emb.inv_freq",  # persimmon
        ),
        MODEL_TENSOR.LAYER_OUT_NORM: (
            "encoder.layer.{bid}.output.LayerNorm",  # bert
            "encoder.layers.{bid}.norm2",  # nomic-bert
            "transformer.decoder_layer.{bid}.rms_norm_3",  # Grok
            "encoder.layer.{bid}.mlp.layernorm",  # jina-bert-v2
-            "encoder.layer.{bid}.layer_norm_2"              # jina-v2-code
+            "encoder.layer.{bid}.layer_norm_2",  # jina-v2-code
        ),
        MODEL_TENSOR.SSM_IN: (
            "model.layers.{bid}.in_proj",
            "backbone.layers.{bid}.mixer.in_proj",
        ),
        MODEL_TENSOR.SSM_CONV1D: (
            "model.layers.{bid}.conv1d",
            "backbone.layers.{bid}.mixer.conv1d",
        ),
        MODEL_TENSOR.SSM_X: (
            "model.layers.{bid}.x_proj",
            "backbone.layers.{bid}.mixer.x_proj",
        ),
        MODEL_TENSOR.SSM_DT: (
            "model.layers.{bid}.dt_proj",
            "backbone.layers.{bid}.mixer.dt_proj",
        ),
        MODEL_TENSOR.SSM_A: (
            "model.layers.{bid}.A_log",
            "backbone.layers.{bid}.mixer.A_log",
        ),
        MODEL_TENSOR.SSM_D: (
            "model.layers.{bid}.D",
            "backbone.layers.{bid}.mixer.D",
        ),
        MODEL_TENSOR.SSM_OUT: (
            "model.layers.{bid}.out_proj",
            "backbone.layers.{bid}.mixer.out_proj",
        ),
-
+        MODEL_TENSOR.ATTN_Q_A: ("model.layers.{bid}.self_attn.q_a_proj",),  # deepseek2
-        MODEL_TENSOR.ATTN_Q_A: (
+        MODEL_TENSOR.ATTN_Q_B: ("model.layers.{bid}.self_attn.q_b_proj",),  # deepseek2
            "model.layers.{bid}.self_attn.q_a_proj", # deepseek2
        ),
        MODEL_TENSOR.ATTN_Q_B: (
            "model.layers.{bid}.self_attn.q_b_proj", # deepseek2
        ),
        MODEL_TENSOR.ATTN_KV_A_MQA: (
            "model.layers.{bid}.self_attn.kv_a_proj_with_mqa",  # deepseek2
        ),
        MODEL_TENSOR.ATTN_KV_B: (
            "model.layers.{bid}.self_attn.kv_b_proj",  # deepseek2
        ),
        MODEL_TENSOR.ATTN_Q_A_NORM: (
            "model.layers.{bid}.self_attn.q_a_layernorm",  # deepseek2
        ),
        MODEL_TENSOR.ATTN_KV_A_NORM: (
            "model.layers.{bid}.self_attn.kv_a_layernorm",  # deepseek2
        ),
        MODEL_TENSOR.ATTN_SUB_NORM: (
            "model.layers.{bid}.self_attn.inner_attn_ln",  # bitnet
        ),
-
+        MODEL_TENSOR.FFN_SUB_NORM: ("model.layers.{bid}.mlp.ffn_layernorm",),  # bitnet
-        MODEL_TENSOR.FFN_SUB_NORM: (
+        MODEL_TENSOR.DEC_ATTN_NORM: ("decoder.block.{bid}.layer.0.layer_norm",),  # t5
-            "model.layers.{bid}.mlp.ffn_layernorm",  # bitnet
+        MODEL_TENSOR.DEC_ATTN_Q: ("decoder.block.{bid}.layer.0.SelfAttention.q",),  # t5
-        ),
+        MODEL_TENSOR.DEC_ATTN_K: ("decoder.block.{bid}.layer.0.SelfAttention.k",),  # t5
-
+        MODEL_TENSOR.DEC_ATTN_V: ("decoder.block.{bid}.layer.0.SelfAttention.v",),  # t5
        MODEL_TENSOR.DEC_ATTN_NORM: (
            "decoder.block.{bid}.layer.0.layer_norm", # t5
        ),
        MODEL_TENSOR.DEC_ATTN_Q: (
            "decoder.block.{bid}.layer.0.SelfAttention.q", # t5
        ),
        MODEL_TENSOR.DEC_ATTN_K: (
            "decoder.block.{bid}.layer.0.SelfAttention.k", # t5
        ),
        MODEL_TENSOR.DEC_ATTN_V: (
            "decoder.block.{bid}.layer.0.SelfAttention.v", # t5
        ),
        MODEL_TENSOR.DEC_ATTN_OUT: (
            "decoder.block.{bid}.layer.0.SelfAttention.o",  # t5
        ),
        MODEL_TENSOR.DEC_ATTN_REL_B: (
            "decoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias",  # t5
        ),
        MODEL_TENSOR.DEC_CROSS_ATTN_NORM: (
            "decoder.block.{bid}.layer.1.layer_norm",  # t5
        ),
        MODEL_TENSOR.DEC_CROSS_ATTN_Q: (
            "decoder.block.{bid}.layer.1.EncDecAttention.q",  # t5
        ),
        MODEL_TENSOR.DEC_CROSS_ATTN_K: (
            "decoder.block.{bid}.layer.1.EncDecAttention.k",  # t5
        ),
        MODEL_TENSOR.DEC_CROSS_ATTN_V: (
            "decoder.block.{bid}.layer.1.EncDecAttention.v",  # t5
        ),
        MODEL_TENSOR.DEC_CROSS_ATTN_OUT: (
            "decoder.block.{bid}.layer.1.EncDecAttention.o",  # t5
        ),
        MODEL_TENSOR.DEC_CROSS_ATTN_REL_B: (
            "decoder.block.{bid}.layer.1.EncDecAttention.relative_attention_bias",  # t5
        ),
-
+        MODEL_TENSOR.DEC_FFN_NORM: ("decoder.block.{bid}.layer.2.layer_norm",),  # t5
        MODEL_TENSOR.DEC_FFN_NORM: (
            "decoder.block.{bid}.layer.2.layer_norm", # t5
        ),
        MODEL_TENSOR.DEC_FFN_GATE: (
            "decoder.block.{bid}.layer.2.DenseReluDense.wi_0",  # flan-t5
        ),
        MODEL_TENSOR.DEC_FFN_UP: (
            "decoder.block.{bid}.layer.2.DenseReluDense.wi",  # t5
            "decoder.block.{bid}.layer.2.DenseReluDense.wi_1",  # flan-t5
        ),
        MODEL_TENSOR.DEC_FFN_DOWN: (
            "decoder.block.{bid}.layer.2.DenseReluDense.wo",  # t5
        ),
-
+        MODEL_TENSOR.DEC_OUTPUT_NORM: ("decoder.final_layer_norm",),  # t5
-        MODEL_TENSOR.DEC_OUTPUT_NORM: (
+        MODEL_TENSOR.ENC_ATTN_NORM: ("encoder.block.{bid}.layer.0.layer_norm",),  # t5
-            "decoder.final_layer_norm", # t5
+        MODEL_TENSOR.ENC_ATTN_Q: ("encoder.block.{bid}.layer.0.SelfAttention.q",),  # t5
-        ),
+        MODEL_TENSOR.ENC_ATTN_K: ("encoder.block.{bid}.layer.0.SelfAttention.k",),  # t5
-
+        MODEL_TENSOR.ENC_ATTN_V: ("encoder.block.{bid}.layer.0.SelfAttention.v",),  # t5
        MODEL_TENSOR.ENC_ATTN_NORM: (
            "encoder.block.{bid}.layer.0.layer_norm", # t5
        ),
        MODEL_TENSOR.ENC_ATTN_Q: (
            "encoder.block.{bid}.layer.0.SelfAttention.q", # t5
        ),
        MODEL_TENSOR.ENC_ATTN_K: (
            "encoder.block.{bid}.layer.0.SelfAttention.k", # t5
        ),
        MODEL_TENSOR.ENC_ATTN_V: (
            "encoder.block.{bid}.layer.0.SelfAttention.v", # t5
        ),
        MODEL_TENSOR.ENC_ATTN_OUT: (
            "encoder.block.{bid}.layer.0.SelfAttention.o",  # t5
        ),
        MODEL_TENSOR.ENC_ATTN_REL_B: (
            "encoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias",  # t5
        ),
-
+        MODEL_TENSOR.ENC_FFN_NORM: ("encoder.block.{bid}.layer.1.layer_norm",),  # t5
        MODEL_TENSOR.ENC_FFN_NORM: (
            "encoder.block.{bid}.layer.1.layer_norm", # t5
        ),
        MODEL_TENSOR.ENC_FFN_GATE: (
            "encoder.block.{bid}.layer.1.DenseReluDense.wi_0",  # flan-t5
        ),
        MODEL_TENSOR.ENC_FFN_UP: (
            "encoder.block.{bid}.layer.1.DenseReluDense.wi",  # t5
            "encoder.block.{bid}.layer.1.DenseReluDense.wi_1",  # flan-t5
        ),
        MODEL_TENSOR.ENC_FFN_DOWN: (
            "encoder.block.{bid}.layer.1.DenseReluDense.wo",  # t5
        ),
-
+        MODEL_TENSOR.ENC_OUTPUT_NORM: ("encoder.final_layer_norm",),  # t5
        MODEL_TENSOR.ENC_OUTPUT_NORM: (
            "encoder.final_layer_norm", # t5
        ),
    }
    # architecture-specific block mappings
    arch_block_mappings_cfg: dict[MODEL_ARCH, dict[MODEL_TENSOR, tuple[str, ...]]] = {
        MODEL_ARCH.ARCTIC: {
-            MODEL_TENSOR.FFN_NORM: (
+            MODEL_TENSOR.FFN_NORM: ("model.layers.{bid}.residual_layernorm",),
-                "model.layers.{bid}.residual_layernorm",
+            MODEL_TENSOR.FFN_NORM_EXP: ("model.layers.{bid}.post_attention_layernorm",),
            ),
            MODEL_TENSOR.FFN_NORM_EXP: (
                "model.layers.{bid}.post_attention_layernorm",
            ),
        },
    }
@ -609,7 +496,9 @@ def __init__(self, arch: MODEL_ARCH, n_blocks: int):
                    key = key.format(bid=bid)
                    self.mapping[key] = (tensor, tensor_name)
-    def get_type_and_name(self, key: str, try_suffixes: Sequence[str] = ()) -> tuple[MODEL_TENSOR, str] | None:
+    def get_type_and_name(
        self, key: str, try_suffixes: Sequence[str] = ()
    ) -> tuple[MODEL_TENSOR, str] | None:
        result = self.mapping.get(key)
        if result is not None:
            return result
@ -626,7 +515,9 @@ def get_name(self, key: str, try_suffixes: Sequence[str] = ()) -> str | None:
            return None
        return result[1]
-    def get_type(self, key: str, try_suffixes: Sequence[str] = ()) -> MODEL_TENSOR | None:
+    def get_type(
        self, key: str, try_suffixes: Sequence[str] = ()
    ) -> MODEL_TENSOR | None:
        result = self.get_type_and_name(key, try_suffixes=try_suffixes)
        if result is None:
            return None
--- a/src/gguf-py/gguf/utility.py
+++ b/src/gguf-py/gguf/utility.py
@ -7,12 +7,18 @@ def fill_templated_filename(filename: str, output_type: str | None) -> str:
    # Given a file name fill in any type templates e.g. 'some-model-name.{ftype}.gguf'
    ftype_lowercase: str = output_type.lower() if output_type is not None else ""
    ftype_uppercase: str = output_type.upper() if output_type is not None else ""
-    return filename.format(ftype_lowercase,
+    return filename.format(
-                           outtype=ftype_lowercase, ftype=ftype_lowercase,
+        ftype_lowercase,
-                           OUTTYPE=ftype_uppercase, FTYPE=ftype_uppercase)
+        outtype=ftype_lowercase,
        ftype=ftype_lowercase,
        OUTTYPE=ftype_uppercase,
        FTYPE=ftype_uppercase,
    )
-def model_weight_count_rounded_notation(model_params_count: int, min_digits: int = 2) -> str:
+def model_weight_count_rounded_notation(
    model_params_count: int, min_digits: int = 2
 ) -> str:
    if model_params_count > 1e12:
        # Trillions Of Parameters
        scaled_model_params = model_params_count * 1e-12
@ -30,39 +36,65 @@ def model_weight_count_rounded_notation(model_params_count: int, min_digits: int
        scaled_model_params = model_params_count * 1e-3
        scale_suffix = "K"
-    fix = max(min_digits - len(str(round(scaled_model_params)).lstrip('0')), 0)
+    fix = max(min_digits - len(str(round(scaled_model_params)).lstrip("0")), 0)
    return f"{scaled_model_params:.{fix}f}{scale_suffix}"
-def size_label(total_params: int, shared_params: int, expert_params: int, expert_count: int) -> str:
+def size_label(
    total_params: int, shared_params: int, expert_params: int, expert_count: int
 ) -> str:
    if expert_count > 0:
-        pretty_size = model_weight_count_rounded_notation(abs(shared_params) + abs(expert_params), min_digits=2)
+        pretty_size = model_weight_count_rounded_notation(
            abs(shared_params) + abs(expert_params), min_digits=2
        )
        size_class = f"{expert_count}x{pretty_size}"
    else:
-        size_class = model_weight_count_rounded_notation(abs(total_params), min_digits=2)
+        size_class = model_weight_count_rounded_notation(
            abs(total_params), min_digits=2
        )
    return size_class
-def naming_convention(model_name: str | None, base_name: str | None, finetune_string: str | None, version_string: str | None, size_label: str | None, output_type: str | None, model_type: Literal['vocab', 'LoRA'] | None = None) -> str:
+def naming_convention(
    model_name: str | None,
    base_name: str | None,
    finetune_string: str | None,
    version_string: str | None,
    size_label: str | None,
    output_type: str | None,
    model_type: Literal["vocab", "LoRA"] | None = None,
 ) -> str:
    # Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention
    if base_name is not None:
-        name = base_name.strip().replace(' ', '-').replace('/', '-')
+        name = base_name.strip().replace(" ", "-").replace("/", "-")
    elif model_name is not None:
-        name = model_name.strip().replace(' ', '-').replace('/', '-')
+        name = model_name.strip().replace(" ", "-").replace("/", "-")
    else:
        name = "ggml-model"
    parameters = f"-{size_label}" if size_label is not None else ""
-    finetune = f"-{finetune_string.strip().replace(' ', '-')}" if finetune_string is not None else ""
+    finetune = (
        f"-{finetune_string.strip().replace(' ', '-')}"
        if finetune_string is not None
        else ""
    )
-    version = f"-{version_string.strip().replace(' ', '-')}" if version_string is not None else ""
+    version = (
        f"-{version_string.strip().replace(' ', '-')}"
        if version_string is not None
        else ""
    )
-    encoding = f"-{output_type.strip().replace(' ', '-').upper()}" if output_type is not None else ""
+    encoding = (
        f"-{output_type.strip().replace(' ', '-').upper()}"
        if output_type is not None
        else ""
    )
    kind = f"-{model_type.strip().replace(' ', '-')}" if model_type is not None else ""
--- a/src/gguf-py/gguf/vocab.py
+++ b/src/gguf-py/gguf/vocab.py
@ -5,7 +5,16 @@
 import json
 import os
 from pathlib import Path
-from typing import Any, Callable, Sequence, Mapping, Iterable, Protocol, ClassVar, runtime_checkable
+from typing import (
    Any,
    Callable,
    Sequence,
    Mapping,
    Iterable,
    Protocol,
    ClassVar,
    runtime_checkable,
 )
 from sentencepiece import SentencePieceProcessor
@ -23,7 +32,9 @@ class SpecialVocab:
    chat_template: str | Sequence[Mapping[str, str]] | None
    def __init__(
-        self, path: str | os.PathLike[str], load_merges: bool = False,
+        self,
        path: str | os.PathLike[str],
        load_merges: bool = False,
        special_token_types: Iterable[str] | None = None,
        n_vocab: int | None = None,
    ):
@ -36,40 +47,60 @@ def __init__(
        if special_token_types is not None:
            self.special_token_types = special_token_types
        else:
-            self.special_token_types = ('bos', 'eos', 'unk', 'sep', 'pad', 'cls', 'mask')
+            self.special_token_types = (
                "bos",
                "eos",
                "unk",
                "sep",
                "pad",
                "cls",
                "mask",
            )
        self._load(Path(path))
    def __repr__(self) -> str:
-        return '<SpecialVocab with {} merges, special tokens {}, add special tokens {}>'.format(
+        return "<SpecialVocab with {} merges, special tokens {}, add special tokens {}>".format(
-            len(self.merges), self.special_token_ids or "unset", self.add_special_token or "unset",
+            len(self.merges),
            self.special_token_ids or "unset",
            self.add_special_token or "unset",
        )
    def add_to_gguf(self, gw: GGUFWriter, quiet: bool = False) -> None:
        if self.merges:
            if not quiet:
-                logger.info(f'Adding {len(self.merges)} merge(s).')
+                logger.info(f"Adding {len(self.merges)} merge(s).")
            gw.add_token_merges(self.merges)
        elif self.load_merges:
-            logger.warning('Adding merges requested but no merges found, output may be non-functional.')
+            logger.warning(
                "Adding merges requested but no merges found, output may be non-functional."
            )
        for typ, tokid in self.special_token_ids.items():
-            id_handler: Callable[[int], None] | None = getattr(gw, f'add_{typ}_token_id', None)
+            id_handler: Callable[[int], None] | None = getattr(
                gw, f"add_{typ}_token_id", None
            )
            if id_handler is None:
-                logger.warning(f'No handler for special token type {typ} with id {tokid} - skipping')
+                logger.warning(
                    f"No handler for special token type {typ} with id {tokid} - skipping"
                )
                continue
            if not quiet:
-                logger.info(f'Setting special token type {typ} to {tokid}')
+                logger.info(f"Setting special token type {typ} to {tokid}")
            id_handler(tokid)
        for typ, value in self.add_special_token.items():
-            add_handler: Callable[[bool], None] | None = getattr(gw, f'add_add_{typ}_token', None)
+            add_handler: Callable[[bool], None] | None = getattr(
                gw, f"add_add_{typ}_token", None
            )
            if add_handler is None:
-                logger.warning(f'No handler for add_{typ}_token with value {value} - skipping')
+                logger.warning(
                    f"No handler for add_{typ}_token with value {value} - skipping"
                )
                continue
            if not quiet:
-                logger.info(f'Setting add_{typ}_token to {value}')
+                logger.info(f"Setting add_{typ}_token to {value}")
            add_handler(value)
        if self.chat_template is not None:
            if not quiet:
-                logger.info(f'Setting chat_template to {self.chat_template}')
+                logger.info(f"Setting chat_template to {self.chat_template}")
            gw.add_chat_template(self.chat_template)
    def _load(self, path: Path) -> None:
@ -79,12 +110,12 @@ def _load(self, path: Path) -> None:
            self._try_load_merges_txt(path)
    def _try_load_merges_txt(self, path: Path) -> bool:
-        merges_file = path / 'merges.txt'
+        merges_file = path / "merges.txt"
        if not merges_file.is_file():
            return False
-        with open(merges_file, 'r', encoding = 'utf-8') as fp:
+        with open(merges_file, "r", encoding="utf-8") as fp:
-            first_line = next(fp, '').strip()
+            first_line = next(fp, "").strip()
-            if not first_line.startswith('#'):
+            if not first_line.startswith("#"):
                fp.seek(0)
                line_num = 0
            else:
@ -97,9 +128,11 @@ def _try_load_merges_txt(self, path: Path) -> bool:
                    continue
                parts = line.split(None, 3)
                if len(parts) != 2:
-                    logger.warning(f'{merges_file.name}: Line {line_num}: Entry malformed, ignoring')
+                    logger.warning(
                        f"{merges_file.name}: Line {line_num}: Entry malformed, ignoring"
                    )
                    continue
-                merges.append(f'{parts[0]} {parts[1]}')
+                merges.append(f"{parts[0]} {parts[1]}")
        self.merges = merges
        return True
@ -107,45 +140,49 @@ def _set_special_token(self, typ: str, tid: Any) -> None:
        if not isinstance(tid, int):
            return
        if tid < 0:
-            raise ValueError(f'invalid value for special token type {typ}: {tid}')
+            raise ValueError(f"invalid value for special token type {typ}: {tid}")
        if self.n_vocab is None or tid < self.n_vocab:
            if typ in self.special_token_ids:
                return
            self.special_token_ids[typ] = tid
            return
-        logger.warning(f'Special token type {typ}, id {tid} out of range, must be under {self.n_vocab} - skipping')
+        logger.warning(
            f"Special token type {typ}, id {tid} out of range, must be under {self.n_vocab} - skipping"
        )
    def _try_load_from_tokenizer_json(self, path: Path) -> bool:
-        tokenizer_file = path / 'tokenizer.json'
+        tokenizer_file = path / "tokenizer.json"
        if tokenizer_file.is_file():
-            with open(tokenizer_file, encoding = 'utf-8') as f:
+            with open(tokenizer_file, encoding="utf-8") as f:
                tokenizer = json.load(f)
            if self.load_merges:
-                merges = tokenizer.get('model', {}).get('merges')
+                merges = tokenizer.get("model", {}).get("merges")
                if isinstance(merges, list) and merges and isinstance(merges[0], str):
                    self.merges = merges
-            added_tokens = tokenizer.get('added_tokens', {})
+            added_tokens = tokenizer.get("added_tokens", {})
        else:
            added_tokens = {}
-        tokenizer_config_file = path / 'tokenizer_config.json'
+        tokenizer_config_file = path / "tokenizer_config.json"
        if not tokenizer_config_file.is_file():
            return True
-        with open(tokenizer_config_file, encoding = 'utf-8') as f:
+        with open(tokenizer_config_file, encoding="utf-8") as f:
            tokenizer_config = json.load(f)
-        chat_template = tokenizer_config.get('chat_template')
+        chat_template = tokenizer_config.get("chat_template")
        if chat_template is None or isinstance(chat_template, (str, list)):
            self.chat_template = chat_template
        else:
-            logger.warning(f'Bad type for chat_template field in {tokenizer_config_file!r} - ignoring')
+            logger.warning(
                f"Bad type for chat_template field in {tokenizer_config_file!r} - ignoring"
            )
        for typ in self.special_token_types:
-            add_entry = tokenizer_config.get(f'add_{typ}_token')
+            add_entry = tokenizer_config.get(f"add_{typ}_token")
            if isinstance(add_entry, bool):
                self.add_special_token[typ] = add_entry
-            entry = tokenizer_config.get(f'{typ}_token')
+            entry = tokenizer_config.get(f"{typ}_token")
            if isinstance(entry, str):
                tc_content = entry
            elif isinstance(entry, dict):
-                entry_content = entry.get('content')
+                entry_content = entry.get("content")
                if not isinstance(entry_content, str):
                    continue
                tc_content = entry_content
@ -153,20 +190,24 @@ def _try_load_from_tokenizer_json(self, path: Path) -> bool:
                continue
            # We only need the first match here.
            maybe_token_id = next(
-                (atok.get('id') for atok in added_tokens if atok.get('content') == tc_content),
+                (
                    atok.get("id")
                    for atok in added_tokens
                    if atok.get("content") == tc_content
                ),
                None,
            )
            self._set_special_token(typ, maybe_token_id)
        return True
    def _try_load_from_config_json(self, path: Path) -> bool:
-        config_file = path / 'config.json'
+        config_file = path / "config.json"
        if not config_file.is_file():
            return False
-        with open(config_file, encoding = 'utf-8') as f:
+        with open(config_file, encoding="utf-8") as f:
            config = json.load(f)
        for typ in self.special_token_types:
-            self._set_special_token(typ, config.get(f'{typ}_token_id'))
+            self._set_special_token(typ, config.get(f"{typ}_token_id"))
        return True
@ -202,47 +243,52 @@ class BpeVocab(Vocab):
    def __init__(self, base_path: Path):
        added_tokens: dict[str, int] = {}
-        if (fname_tokenizer := base_path / 'vocab.json').exists():
+        if (fname_tokenizer := base_path / "vocab.json").exists():
            # "slow" tokenizer
            with open(fname_tokenizer, encoding="utf-8") as f:
                self.vocab = json.load(f)
            try:
                # FIXME: Verify that added tokens here _cannot_ overlap with the main vocab.
-                with open(base_path / 'added_tokens.json', encoding="utf-8") as f:
+                with open(base_path / "added_tokens.json", encoding="utf-8") as f:
                    added_tokens = json.load(f)
            except FileNotFoundError:
                pass
        else:
            # "fast" tokenizer
-            fname_tokenizer = base_path / 'tokenizer.json'
+            fname_tokenizer = base_path / "tokenizer.json"
            # if this fails, FileNotFoundError propagates to caller
            with open(fname_tokenizer, encoding="utf-8") as f:
                tokenizer_json = json.load(f)
-            tokenizer_model: dict[str, Any] = tokenizer_json['model']
+            tokenizer_model: dict[str, Any] = tokenizer_json["model"]
            if (
-                tokenizer_model['type'] != 'BPE' or tokenizer_model.get('byte_fallback', False)
+                tokenizer_model["type"] != "BPE"
-                or tokenizer_json['decoder']['type'] != 'ByteLevel'
+                or tokenizer_model.get("byte_fallback", False)
                or tokenizer_json["decoder"]["type"] != "ByteLevel"
            ):
-                raise FileNotFoundError('Cannot find GPT-2 BPE tokenizer')
+                raise FileNotFoundError("Cannot find GPT-2 BPE tokenizer")
            self.vocab = tokenizer_model["vocab"]
-            if (added := tokenizer_json.get('added_tokens')) is not None:
+            if (added := tokenizer_json.get("added_tokens")) is not None:
                # Added tokens here can be duplicates of the main vocabulary.
-                added_tokens = {item['content']: item['id']
+                added_tokens = {
                    item["content"]: item["id"]
                    for item in added
-                                if item['content'] not in self.vocab}
+                    if item["content"] not in self.vocab
                }
        vocab_size = len(self.vocab)
        expected_ids = list(range(vocab_size, vocab_size + len(added_tokens)))
        actual_ids = sorted(added_tokens.values())
        if expected_ids != actual_ids:
            expected_end_id = vocab_size + len(actual_ids) - 1
-            raise ValueError(f"Expected the {len(actual_ids)} added token ID(s) to be sequential in the range "
+            raise ValueError(
-                             f"{vocab_size} - {expected_end_id}; got {actual_ids}")
+                f"Expected the {len(actual_ids)} added token ID(s) to be sequential in the range "
                f"{vocab_size} - {expected_end_id}; got {actual_ids}"
            )
        items = sorted(added_tokens.items(), key=lambda text_idx: text_idx[1])
        self.added_tokens_dict = added_tokens
@ -276,27 +322,31 @@ class SentencePieceVocab(Vocab):
    def __init__(self, base_path: Path):
        added_tokens: dict[str, int] = {}
-        if (fname_tokenizer := base_path / 'tokenizer.model').exists():
+        if (fname_tokenizer := base_path / "tokenizer.model").exists():
            # normal location
            try:
-                with open(base_path / 'added_tokens.json', encoding="utf-8") as f:
+                with open(base_path / "added_tokens.json", encoding="utf-8") as f:
                    added_tokens = json.load(f)
            except FileNotFoundError:
                pass
-        elif not (fname_tokenizer := base_path.parent / 'tokenizer.model').exists():
+        elif not (fname_tokenizer := base_path.parent / "tokenizer.model").exists():
            # not found in alternate location either
-            raise FileNotFoundError('Cannot find tokenizer.model')
+            raise FileNotFoundError("Cannot find tokenizer.model")
        self.sentencepiece_tokenizer = SentencePieceProcessor()
        self.sentencepiece_tokenizer.LoadFromFile(str(fname_tokenizer))
        vocab_size = self.sentencepiece_tokenizer.vocab_size()
-        new_tokens       = {id: piece for piece, id in added_tokens.items() if id >= vocab_size}
+        new_tokens = {
            id: piece for piece, id in added_tokens.items() if id >= vocab_size
        }
        expected_new_ids = list(range(vocab_size, vocab_size + len(new_tokens)))
        actual_new_ids = sorted(new_tokens.keys())
        if expected_new_ids != actual_new_ids:
-            raise ValueError(f"Expected new token IDs {expected_new_ids} to be sequential; got {actual_new_ids}")
+            raise ValueError(
                f"Expected new token IDs {expected_new_ids} to be sequential; got {actual_new_ids}"
            )
        # Token pieces that were added to the base vocabulary.
        self.added_tokens_dict = added_tokens
@ -347,25 +397,27 @@ class LlamaHfVocab(Vocab):
    name = "hfft"
    def __init__(self, base_path: Path):
-        fname_tokenizer = base_path / 'tokenizer.json'
+        fname_tokenizer = base_path / "tokenizer.json"
        # if this fails, FileNotFoundError propagates to caller
-        with open(fname_tokenizer, encoding='utf-8') as f:
+        with open(fname_tokenizer, encoding="utf-8") as f:
            tokenizer_json = json.load(f)
        # pre-check so we know if we need transformers
-        tokenizer_model: dict[str, Any] = tokenizer_json['model']
+        tokenizer_model: dict[str, Any] = tokenizer_json["model"]
        is_llama3 = (
-            tokenizer_model['type'] == 'BPE' and tokenizer_model.get('ignore_merges', False)
+            tokenizer_model["type"] == "BPE"
-            and not tokenizer_model.get('byte_fallback', True)
+            and tokenizer_model.get("ignore_merges", False)
            and not tokenizer_model.get("byte_fallback", True)
        )
        if is_llama3:
-            raise TypeError('Llama 3 must be converted with BpeVocab')
+            raise TypeError("Llama 3 must be converted with BpeVocab")
        if not is_llama3 and (
-            tokenizer_model['type'] != 'BPE' or not tokenizer_model.get('byte_fallback', False)
+            tokenizer_model["type"] != "BPE"
-            or tokenizer_json['decoder']['type'] != 'Sequence'
+            or not tokenizer_model.get("byte_fallback", False)
            or tokenizer_json["decoder"]["type"] != "Sequence"
        ):
-            raise FileNotFoundError('Cannot find Llama BPE tokenizer')
+            raise FileNotFoundError("Cannot find Llama BPE tokenizer")
        try:
            from transformers import AutoTokenizer
@ -427,16 +479,22 @@ def hf_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
            # Yield token text, score, and type
            yield token_text, self.get_token_score(token_id), self.get_token_type(
-                token_id, token_text, self.special_ids  # Reuse already stored special IDs
+                token_id,
                token_text,
                self.special_ids,  # Reuse already stored special IDs
            )
-    def get_token_type(self, token_id: int, token_text: bytes, special_ids: set[int]) -> gguf.TokenType:
+    def get_token_type(
        self, token_id: int, token_text: bytes, special_ids: set[int]
    ) -> gguf.TokenType:
        # Special case for byte tokens
-        if re.fullmatch(br"<0x[0-9A-Fa-f]{2}>", token_text):
+        if re.fullmatch(rb"<0x[0-9A-Fa-f]{2}>", token_text):
            return gguf.TokenType.BYTE
        # Determine token type based on whether it's a special token
-        return gguf.TokenType.CONTROL if token_id in special_ids else gguf.TokenType.NORMAL
+        return (
            gguf.TokenType.CONTROL if token_id in special_ids else gguf.TokenType.NORMAL
        )
    def get_token_score(self, token_id: int) -> float:
        # Placeholder for actual logic to determine the token's score
@ -446,7 +504,9 @@ def get_token_score(self, token_id: int) -> float:
    def added_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
        for text in self.added_tokens_list:
            if text in self.specials:
-                toktype = self.get_token_type(self.specials[text], b'', self.special_ids)
+                toktype = self.get_token_type(
                    self.specials[text], b"", self.special_ids
                )
                score = self.get_token_score(self.specials[text])
            else:
                toktype = gguf.TokenType.USER_DEFINED
--- a/src/imports_and_globals.py
+++ b/src/imports_and_globals.py
@ -9,25 +9,52 @@
 import requests
 import zipfile
 from datetime import datetime
-from PyQt6.QtWidgets import (QApplication, QMainWindow, QVBoxLayout, QHBoxLayout, QWidget, QPushButton, 
+from PyQt6.QtWidgets import (
-                             QListWidget, QLineEdit, QLabel, QFileDialog, QProgressBar, QComboBox, QTextEdit,
+    QApplication,
-                             QCheckBox, QGroupBox, QFormLayout, QScrollArea, QSlider, QSpinBox, QListWidgetItem,
+    QMainWindow,
-                             QMessageBox, QDialog, QPlainTextEdit, QMenu)
+    QVBoxLayout,
    QHBoxLayout,
    QWidget,
    QPushButton,
    QListWidget,
    QLineEdit,
    QLabel,
    QFileDialog,
    QProgressBar,
    QComboBox,
    QTextEdit,
    QCheckBox,
    QGroupBox,
    QFormLayout,
    QScrollArea,
    QSlider,
    QSpinBox,
    QListWidgetItem,
    QMessageBox,
    QDialog,
    QPlainTextEdit,
    QMenu,
 )
 from PyQt6.QtCore import QTimer, QThread, pyqtSignal, Qt, QSize
 from PyQt6.QtGui import QCloseEvent, QAction
 def ensure_directory(path):
    if not os.path.exists(path):
        os.makedirs(path)
-def open_file_safe(file_path, mode='r'):
+
-    encodings = ['utf-8', 'latin-1', 'ascii', 'utf-16']
+def open_file_safe(file_path, mode="r"):
    encodings = ["utf-8", "latin-1", "ascii", "utf-16"]
    for encoding in encodings:
        try:
            return open(file_path, mode, encoding=encoding)
        except UnicodeDecodeError:
            continue
-    raise ValueError(f"Unable to open file {file_path} with any of the encodings: {encodings}")        
+    raise ValueError(
        f"Unable to open file {file_path} with any of the encodings: {encodings}"
    )
 def resource_path(relative_path):
    try:
--- a/src/localizations.py
+++ b/src/localizations.py