refactor: optimize GGUF imports

- optimize imports in GGUF conversion utilities - rename gguf library modules - update .gitignore and build workflow
2024-09-14 10:11:43 -07:00 · 2024-09-14 10:11:43 -07:00 · 747aa7b9a8
parent 3804da0a3f
commit 747aa7b9a8
17 changed files with 605 additions and 762 deletions
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@ -61,8 +61,8 @@ jobs:
      if: matrix.os == 'windows-latest'
      run: |
        $distPath = if ("${{ github.event.inputs.build_type }}" -eq "RELEASE") { "build\release\dist" } else { "build\dev\dist" }
-        New-Item -ItemType Directory -Force -Path "$distPath\src\gguf-py"
-        Copy-Item -Path "src\gguf-py\*" -Destination "$distPath\src\gguf-py" -Recurse
+        New-Item -ItemType Directory -Force -Path "$distPath\src\gguf"
+        Copy-Item -Path "src\gguf\*" -Destination "$distPath\src\gguf" -Recurse
        Copy-Item -Path "src\convert_hf_to_gguf.py" -Destination "$distPath\src"
        Copy-Item -Path "src\convert_lora_to_gguf.py" -Destination "$distPath\src"
        Copy-Item -Path "src\convert_lora_to_ggml.py" -Destination "$distPath\src"
@ -72,8 +72,8 @@ jobs:
      if: matrix.os != 'windows-latest'
      run: |
        distPath=$(if [ "${{ github.event.inputs.build_type }}" = "RELEASE" ]; then echo "build/release/dist"; else echo "build/dev/dist"; fi)
-        mkdir -p $distPath/src/gguf-py
-        cp -R src/gguf-py/* $distPath/src/gguf-py/
+        mkdir -p $distPath/src/gguf
+        cp -R src/gguf/* $distPath/src/gguf/
        cp src/convert_hf_to_gguf.py $distPath/src/
        cp src/convert_lora_to_gguf.py $distPath/src/
        cp src/convert_lora_to_ggml.py $distPath/src/
--- a/.gitignore
+++ b/.gitignore
@ -20,6 +20,9 @@ __pycache__/
 !src/
 src/*
 !src/*.py
+!src/gguf
+src/gguf/*
+!src/gguf/*.py

 # Allow docs folder and its .py files
 !docs/
--- a/src/AutoGGUF.py
+++ b/src/AutoGGUF.py
@ -1,8 +1,8 @@
 import importlib
 import json
 import shutil
-import urllib.request
 import urllib.error
+import urllib.request
 from datetime import datetime
 from functools import partial, wraps
 from typing import Any, Dict, List, Tuple
@ -24,10 +24,10 @@
 from error_handling import handle_error, show_error
 from imports_and_globals import (
    ensure_directory,
+    load_dotenv,
    open_file_safe,
    resource_path,
    show_about,
-    load_dotenv,
 )


@ -41,21 +41,18 @@ def wrapper(self, *args, **kwargs):

                    # Length check
                    if len(value) > 1024:
-                        show_error(f"{field} exceeds maximum length")
+                        show_error(self.logger, f"{field} exceeds maximum length")

                    # Normalize path
                    normalized_path = os.path.normpath(value)

                    # Check for path traversal attempts
                    if ".." in normalized_path:
-                        show_error(f"Invalid path in {field}")
+                        show_error(self.logger, f"Invalid path in {field}")

                    # Disallow control characters and null bytes
                    if re.search(r"[\x00-\x1f\x7f]", value):
-                        show_error(f"Invalid characters in {field}")
-
-                    # Update the field with normalized path
-                    getattr(self, field).setText(normalized_path)
+                        show_error(self.logger, f"Invalid characters in {field}")

                return func(self, *args, **kwargs)

--- a/src/convert_hf_to_gguf.py
+++ b/src/convert_hf_to_gguf.py
@ -30,8 +30,6 @@
 if TYPE_CHECKING:
    from torch import Tensor

-if "NO_LOCAL_GGUF" not in os.environ:
-    sys.path.insert(1, str(Path(__file__).parent / "gguf-py"))
 import gguf

 logger = logging.getLogger("hf-to-gguf")
--- a/src/convert_lora_to_ggml.py
+++ b/src/convert_lora_to_ggml.py
@ -1,19 +1,17 @@
 from __future__ import annotations

-import logging
 import json
+import logging
 import os
 import struct
 import sys
-from pathlib import Path
-from typing import Any, BinaryIO, Sequence
+from typing import BinaryIO

 import numpy as np
 import torch

-if "NO_LOCAL_GGUF" not in os.environ:
-    sys.path.insert(1, str(Path(__file__).parent / "gguf-py" / "gguf"))
-import gguf
+from gguf.constants import *
+from gguf.tensor_mapping import *

 logging.basicConfig(level=logging.DEBUG)
 logger = logging.getLogger("lora-to-gguf")
@ -51,11 +49,6 @@ def write_tensor_header(
    fout.seek((fout.tell() + 31) & -32)


-def pyinstaller_include():
-    # PyInstaller import
-    pass
-
-
 if __name__ == "__main__":
    if len(sys.argv) < 2:
        logger.info(f"Usage: python {sys.argv[0]} <path> <output_path> [arch]")
@ -63,7 +56,7 @@ def pyinstaller_include():
            "Path must contain HuggingFace PEFT LoRA files 'adapter_config.json' and 'adapter_model.bin'"
        )
        logger.info(
-            f"Arch must be one of {list(gguf.MODEL_ARCH_NAMES.values())} (default: llama)"
+            f"Arch must be one of {list(MODEL_ARCH_NAMES.values())} (default: llama)"
        )
        sys.exit(1)

@ -82,14 +75,14 @@ def pyinstaller_include():

    arch_name = sys.argv[3] if len(sys.argv) == 4 else "llama"

-    if arch_name not in gguf.MODEL_ARCH_NAMES.values():
+    if arch_name not in MODEL_ARCH_NAMES.values():
        logger.error(f"Error: unsupported architecture {arch_name}")
        sys.exit(1)

-    arch = list(gguf.MODEL_ARCH_NAMES.keys())[
-        list(gguf.MODEL_ARCH_NAMES.values()).index(arch_name)
+    arch = list(MODEL_ARCH_NAMES.keys())[
+        list(MODEL_ARCH_NAMES.values()).index(arch_name)
    ]
-    name_map = gguf.TensorNameMap(arch, 500)
+    name_map = TensorNameMap(arch, 500)

    with open(input_json, "r") as f:
        params = json.load(f)
--- a/src/convert_lora_to_gguf.py
+++ b/src/convert_lora_to_gguf.py
@ -24,9 +24,7 @@
 if TYPE_CHECKING:
    from torch import Tensor

-if "NO_LOCAL_GGUF" not in os.environ:
-    sys.path.insert(1, str(Path(__file__).parent / "gguf-py"))
-import gguf
+from gguf.constants import *

 from convert_hf_to_gguf import LazyTorchTensor, Model

--- a/src/gguf-py/gguf/gguf.py
+++ b/src/gguf-py/gguf/gguf.py
@ -1,15 +0,0 @@
-# This file left for compatibility. If you want to use the GGUF API from Python
-# then don't import gguf/gguf.py directly. If you're looking for examples, see the
-# examples/ directory for gguf-py
-
-import importlib
-import sys
-from pathlib import Path
-
-sys.path.insert(0, str(Path(__file__).parent.parent))
-
-# Compatibility for people trying to import gguf/gguf.py directly instead of as a package.
-importlib.invalidate_caches()
-import gguf  # noqa: E402
-
-importlib.reload(gguf)
--- a/src/gguf-py/gguf/init.py
+++ b/src/gguf-py/gguf/init.py
--- a/src/gguf-py/gguf/constants.py
+++ b/src/gguf-py/gguf/constants.py
--- a/src/gguf-py/gguf/gguf_reader.py
+++ b/src/gguf-py/gguf/gguf_reader.py
--- a/src/gguf-py/gguf/gguf_writer.py
+++ b/src/gguf-py/gguf/gguf_writer.py
--- a/src/gguf-py/gguf/lazy.py
+++ b/src/gguf-py/gguf/lazy.py
--- a/src/gguf-py/gguf/metadata.py
+++ b/src/gguf-py/gguf/metadata.py
@ -441,9 +441,9 @@ def apply_metadata_heuristic(
                        org_component is not None
                        and model_full_name_component is not None
                    ):
-                        base_model[
-                            "repo_url"
-                        ] = f"https://huggingface.co/{org_component}/{model_full_name_component}"
+                        base_model["repo_url"] = (
+                            f"https://huggingface.co/{org_component}/{model_full_name_component}"
+                        )
                    metadata.base_models.append(base_model)

            if "license" in model_card and metadata.license is None:
--- a/src/gguf-py/gguf/quants.py
+++ b/src/gguf-py/gguf/quants.py
--- a/src/gguf-py/gguf/tensor_mapping.py
+++ b/src/gguf-py/gguf/tensor_mapping.py
@ -4,9 +4,9 @@

 from .constants import MODEL_ARCH, MODEL_TENSOR, MODEL_TENSORS, TENSOR_NAMES

+
 class TensorNameMap:
    mappings_cfg: dict[MODEL_TENSOR, tuple[str, ...]] = {
-
        MODEL_TENSOR.TOKEN_EMBD: (
            "gpt_neox.embed_in",
            "transformer.wte",
@ -27,24 +27,18 @@ class TensorNameMap:
            "transformer.token_embeddings",
            "shared",
        ),
-
-        MODEL_TENSOR.TOKEN_TYPES: (
-            "embeddings.token_type_embeddings",
-        ),
-
+        MODEL_TENSOR.TOKEN_TYPES: ("embeddings.token_type_embeddings",),
        MODEL_TENSOR.TOKEN_EMBD_NORM: (
            "word_embeddings_layernorm",
            "embeddings.LayerNorm",
            "emb_ln",
            "transformer.norm",
        ),
-
        MODEL_TENSOR.POS_EMBD: (
            "transformer.wpe",
            "embeddings.position_embeddings",
            "wpe",
        ),
-
        MODEL_TENSOR.OUTPUT: (
            "embed_out",
            "lm_head",
@ -53,7 +47,6 @@ class TensorNameMap:
            "lm_head.linear",
            "output_layer",
        ),
-
        MODEL_TENSOR.OUTPUT_NORM: (
            "gpt_neox.final_layer_norm",
            "transformer.ln_f",
@ -71,7 +64,6 @@ class TensorNameMap:
            "transformer.norm",
            "model.norm",
        ),
-
        MODEL_TENSOR.ROPE_FREQS: (
            "rope.freqs",
            "rotary_pos_emb.inv_freq",
@ -79,7 +71,6 @@ class TensorNameMap:
    }

    block_mappings_cfg: dict[MODEL_TENSOR, tuple[str, ...]] = {
-
        MODEL_TENSOR.ATTN_NORM: (
            "gpt_neox.layers.{bid}.input_layernorm",
            "transformer.h.{bid}.ln_1",
@ -102,12 +93,10 @@ class TensorNameMap:
            "encoder.layers.{bid}.input_layernorm",
            "transformer.layers.{bid}.attn_norm",
        ),
-
        MODEL_TENSOR.ATTN_NORM_2: (
            "transformer.h.{bid}.ln_attn",
            "encoder.layer.{bid}.layer_norm_1",
        ),
-
        MODEL_TENSOR.ATTN_QKV: (
            "gpt_neox.layers.{bid}.attention.query_key_value",
            "transformer.h.{bid}.attn.c_attn",
@ -124,7 +113,6 @@ class TensorNameMap:
            "encoder.layers.{bid}.self_attention.query_key_value",
            "transformer.layers.{bid}.attn.qkv_proj",
        ),
-
        MODEL_TENSOR.ATTN_Q: (
            "model.layers.{bid}.self_attn.q_proj",
            "layers.{bid}.attention.wq",
@ -135,7 +123,6 @@ class TensorNameMap:
            "transformer.decoder_layer.{bid}.multi_head_attention.query",
            "transformer.h.{bid}.attn.attention.q_proj",
        ),
-
        MODEL_TENSOR.ATTN_K: (
            "model.layers.{bid}.self_attn.k_proj",
            "layers.{bid}.attention.wk",
@ -147,7 +134,6 @@ class TensorNameMap:
            "transformer.decoder_layer.{bid}.multi_head_attention.key",
            "transformer.h.{bid}.attn.attention.k_proj",
        ),
-
        MODEL_TENSOR.ATTN_V: (
            "model.layers.{bid}.self_attn.v_proj",
            "layers.{bid}.attention.wv",
@ -159,7 +145,6 @@ class TensorNameMap:
            "transformer.decoder_layer.{bid}.multi_head_attention.value",
            "transformer.h.{bid}.attn.attention.v_proj",
        ),
-
        MODEL_TENSOR.ATTN_OUT: (
            "gpt_neox.layers.{bid}.attention.dense",
            "transformer.h.{bid}.attn.c_proj",
@ -183,25 +168,19 @@ class TensorNameMap:
            "transformer.layers.{bid}.attn.out_proj",
            "transformer.h.{bid}.attn.attention.out_proj",
        ),
-
        MODEL_TENSOR.ATTN_OUT_NORM: (
            "encoder.layer.{bid}.attention.output.LayerNorm",
            "encoder.layers.{bid}.norm1",
            "transformer.decoder_layer.{bid}.rms_norm_1",
            "transformer.blocks.{bid}.norm_attn_norm.norm_2",
        ),
-
-        MODEL_TENSOR.ATTN_POST_NORM: (
-            "model.layers.{bid}.post_attention_layernorm",
-        ),
-
+        MODEL_TENSOR.ATTN_POST_NORM: ("model.layers.{bid}.post_attention_layernorm",),
        MODEL_TENSOR.ATTN_ROT_EMBD: (
            "model.layers.{bid}.self_attn.rotary_emb.inv_freq",
            "layers.{bid}.attention.inner_attention.rope.freqs",
            "model.layers.layers.{bid}.self_attn.rotary_emb.inv_freq",
            "transformer.h.{bid}.attn.rotary_emb.inv_freq",
        ),
-
        MODEL_TENSOR.FFN_NORM: (
            "gpt_neox.layers.{bid}.post_attention_layernorm",
            "transformer.h.{bid}.ln_2",
@ -217,15 +196,8 @@ class TensorNameMap:
            "encoder.layers.{bid}.post_attention_layernorm",
            "transformer.layers.{bid}.ffn_norm",
        ),
-
-        MODEL_TENSOR.FFN_PRE_NORM: (
-            "model.layers.{bid}.pre_feedforward_layernorm",
-        ),
-
-        MODEL_TENSOR.FFN_POST_NORM: (
-            "model.layers.{bid}.post_feedforward_layernorm",
-        ),
-
+        MODEL_TENSOR.FFN_PRE_NORM: ("model.layers.{bid}.pre_feedforward_layernorm",),
+        MODEL_TENSOR.FFN_POST_NORM: ("model.layers.{bid}.post_feedforward_layernorm",),
        MODEL_TENSOR.FFN_GATE_INP: (
            "layers.{bid}.feed_forward.gate",
            "model.layers.{bid}.block_sparse_moe.gate",
@ -233,11 +205,7 @@ class TensorNameMap:
            "transformer.decoder_layer.{bid}.router",
            "transformer.blocks.{bid}.ffn.router.layer",
        ),
-
-        MODEL_TENSOR.FFN_GATE_INP_SHEXP: (
-            "model.layers.{bid}.mlp.shared_expert_gate",
-        ),
-
+        MODEL_TENSOR.FFN_GATE_INP_SHEXP: ("model.layers.{bid}.mlp.shared_expert_gate",),
        MODEL_TENSOR.FFN_UP: (
            "gpt_neox.layers.{bid}.mlp.dense_h_to_4h",
            "transformer.h.{bid}.mlp.c_fc",
@ -265,23 +233,17 @@ class TensorNameMap:
            "encoder.layers.{bid}.mlp.dense_h_to_4h",
            "transformer.h.{bid}.mlp.c_fc_1",
        ),
-
        MODEL_TENSOR.FFN_UP_EXP: (
            "layers.{bid}.feed_forward.experts.w3",
            "transformer.decoder_layer.{bid}.moe.linear_v",
            "transformer.blocks.{bid}.ffn.experts.mlp.v1",
            "model.layers.{bid}.mlp.experts.up_proj",
        ),
-
        MODEL_TENSOR.FFN_UP_SHEXP: (
            "model.layers.{bid}.mlp.shared_expert.up_proj",
            "model.layers.{bid}.mlp.shared_experts.up_proj",
        ),
-
-        MODEL_TENSOR.FFN_ACT: (
-            "transformer.blocks.{bid}.ffn.act",
-        ),
-
+        MODEL_TENSOR.FFN_ACT: ("transformer.blocks.{bid}.ffn.act",),
        MODEL_TENSOR.FFN_GATE: (
            "model.layers.{bid}.mlp.gate_proj",
            "layers.{bid}.feed_forward.w1",
@ -295,19 +257,16 @@ class TensorNameMap:
            "model.layers.{bid}.residual_mlp.w1",
            "transformer.h.{bid}.mlp.c_fc_0",
        ),
-
        MODEL_TENSOR.FFN_GATE_EXP: (
            "layers.{bid}.feed_forward.experts.w1",
            "transformer.decoder_layer.{bid}.moe.linear",
            "transformer.blocks.{bid}.ffn.experts.mlp.w1",
            "model.layers.{bid}.mlp.experts.gate_proj",
        ),
-
        MODEL_TENSOR.FFN_GATE_SHEXP: (
            "model.layers.{bid}.mlp.shared_expert.gate_proj",
            "model.layers.{bid}.mlp.shared_experts.gate_proj",
        ),
-
        MODEL_TENSOR.FFN_DOWN: (
            "gpt_neox.layers.{bid}.mlp.dense_4h_to_h",
            "transformer.h.{bid}.mlp.c_proj",
@ -334,19 +293,16 @@ class TensorNameMap:
            "encoder.layers.{bid}.mlp.dense_4h_to_h",
            "model.layers.h.{bid}.mlp.c_proj",
        ),
-
        MODEL_TENSOR.FFN_DOWN_EXP: (
            "layers.{bid}.feed_forward.experts.w2",
            "transformer.decoder_layer.{bid}.moe.linear_1",
            "transformer.blocks.{bid}.ffn.experts.mlp.w2",
            "model.layers.{bid}.mlp.experts.down_proj",
        ),
-
        MODEL_TENSOR.FFN_DOWN_SHEXP: (
            "model.layers.{bid}.mlp.shared_expert.down_proj",
            "model.layers.{bid}.mlp.shared_experts.down_proj",
        ),
-
        MODEL_TENSOR.ATTN_Q_NORM: (
            "language_model.encoder.layers.{bid}.self_attention.q_layernorm",
            "model.layers.{bid}.self_attn.q_layernorm",
@ -355,7 +311,6 @@ class TensorNameMap:
            "encoder.layer.{bid}.attention.self.layer_norm_q",
            "transformer.layers.{bid}.attn.q_norm",
        ),
-
        MODEL_TENSOR.ATTN_K_NORM: (
            "language_model.encoder.layers.{bid}.self_attention.k_layernorm",
            "model.layers.{bid}.self_attn.k_layernorm",
@ -364,209 +319,108 @@ class TensorNameMap:
            "encoder.layer.{bid}.attention.self.layer_norm_k",
            "transformer.layers.{bid}.attn.k_norm",
        ),
-
        MODEL_TENSOR.ROPE_FREQS: (
            "language_model.encoder.layers.{bid}.self_attention.rotary_emb.inv_freq",
        ),
-
        MODEL_TENSOR.LAYER_OUT_NORM: (
            "encoder.layer.{bid}.output.LayerNorm",
            "encoder.layers.{bid}.norm2",
            "transformer.decoder_layer.{bid}.rms_norm_3",
            "encoder.layer.{bid}.mlp.layernorm",
-            "encoder.layer.{bid}.layer_norm_2"
+            "encoder.layer.{bid}.layer_norm_2",
        ),
-
        MODEL_TENSOR.SSM_IN: (
            "model.layers.{bid}.in_proj",
            "backbone.layers.{bid}.mixer.in_proj",
        ),
-
        MODEL_TENSOR.SSM_CONV1D: (
            "model.layers.{bid}.conv1d",
            "backbone.layers.{bid}.mixer.conv1d",
        ),
-
        MODEL_TENSOR.SSM_X: (
            "model.layers.{bid}.x_proj",
            "backbone.layers.{bid}.mixer.x_proj",
        ),
-
        MODEL_TENSOR.SSM_DT: (
            "model.layers.{bid}.dt_proj",
            "backbone.layers.{bid}.mixer.dt_proj",
        ),
-
        MODEL_TENSOR.SSM_A: (
            "model.layers.{bid}.A_log",
            "backbone.layers.{bid}.mixer.A_log",
        ),
-
        MODEL_TENSOR.SSM_D: (
            "model.layers.{bid}.D",
            "backbone.layers.{bid}.mixer.D",
        ),
-
        MODEL_TENSOR.SSM_OUT: (
            "model.layers.{bid}.out_proj",
            "backbone.layers.{bid}.mixer.out_proj",
        ),
-
-        MODEL_TENSOR.ATTN_Q_A: (
-            "model.layers.{bid}.self_attn.q_a_proj",
-        ),
-
-        MODEL_TENSOR.ATTN_Q_B: (
-            "model.layers.{bid}.self_attn.q_b_proj",
-        ),
-
+        MODEL_TENSOR.ATTN_Q_A: ("model.layers.{bid}.self_attn.q_a_proj",),
+        MODEL_TENSOR.ATTN_Q_B: ("model.layers.{bid}.self_attn.q_b_proj",),
        MODEL_TENSOR.ATTN_KV_A_MQA: (
            "model.layers.{bid}.self_attn.kv_a_proj_with_mqa",
        ),
-
-        MODEL_TENSOR.ATTN_KV_B: (
-            "model.layers.{bid}.self_attn.kv_b_proj",
-        ),
-
-        MODEL_TENSOR.ATTN_Q_A_NORM: (
-            "model.layers.{bid}.self_attn.q_a_layernorm",
-        ),
-
-        MODEL_TENSOR.ATTN_KV_A_NORM: (
-            "model.layers.{bid}.self_attn.kv_a_layernorm",
-        ),
-
-        MODEL_TENSOR.ATTN_SUB_NORM: (
-            "model.layers.{bid}.self_attn.inner_attn_ln",
-        ),
-
-        MODEL_TENSOR.FFN_SUB_NORM: (
-            "model.layers.{bid}.mlp.ffn_layernorm",
-        ),
-
-        MODEL_TENSOR.DEC_ATTN_NORM: (
-            "decoder.block.{bid}.layer.0.layer_norm",
-        ),
-
-        MODEL_TENSOR.DEC_ATTN_Q: (
-            "decoder.block.{bid}.layer.0.SelfAttention.q",
-        ),
-
-        MODEL_TENSOR.DEC_ATTN_K: (
-            "decoder.block.{bid}.layer.0.SelfAttention.k",
-        ),
-
-        MODEL_TENSOR.DEC_ATTN_V: (
-            "decoder.block.{bid}.layer.0.SelfAttention.v",
-        ),
-
-        MODEL_TENSOR.DEC_ATTN_OUT: (
-            "decoder.block.{bid}.layer.0.SelfAttention.o",
-        ),
-
+        MODEL_TENSOR.ATTN_KV_B: ("model.layers.{bid}.self_attn.kv_b_proj",),
+        MODEL_TENSOR.ATTN_Q_A_NORM: ("model.layers.{bid}.self_attn.q_a_layernorm",),
+        MODEL_TENSOR.ATTN_KV_A_NORM: ("model.layers.{bid}.self_attn.kv_a_layernorm",),
+        MODEL_TENSOR.ATTN_SUB_NORM: ("model.layers.{bid}.self_attn.inner_attn_ln",),
+        MODEL_TENSOR.FFN_SUB_NORM: ("model.layers.{bid}.mlp.ffn_layernorm",),
+        MODEL_TENSOR.DEC_ATTN_NORM: ("decoder.block.{bid}.layer.0.layer_norm",),
+        MODEL_TENSOR.DEC_ATTN_Q: ("decoder.block.{bid}.layer.0.SelfAttention.q",),
+        MODEL_TENSOR.DEC_ATTN_K: ("decoder.block.{bid}.layer.0.SelfAttention.k",),
+        MODEL_TENSOR.DEC_ATTN_V: ("decoder.block.{bid}.layer.0.SelfAttention.v",),
+        MODEL_TENSOR.DEC_ATTN_OUT: ("decoder.block.{bid}.layer.0.SelfAttention.o",),
        MODEL_TENSOR.DEC_ATTN_REL_B: (
            "decoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias",
        ),
-
-        MODEL_TENSOR.DEC_CROSS_ATTN_NORM: (
-            "decoder.block.{bid}.layer.1.layer_norm",
-        ),
-
+        MODEL_TENSOR.DEC_CROSS_ATTN_NORM: ("decoder.block.{bid}.layer.1.layer_norm",),
        MODEL_TENSOR.DEC_CROSS_ATTN_Q: (
            "decoder.block.{bid}.layer.1.EncDecAttention.q",
        ),
-
        MODEL_TENSOR.DEC_CROSS_ATTN_K: (
            "decoder.block.{bid}.layer.1.EncDecAttention.k",
        ),
-
        MODEL_TENSOR.DEC_CROSS_ATTN_V: (
            "decoder.block.{bid}.layer.1.EncDecAttention.v",
        ),
-
        MODEL_TENSOR.DEC_CROSS_ATTN_OUT: (
            "decoder.block.{bid}.layer.1.EncDecAttention.o",
        ),
-
        MODEL_TENSOR.DEC_CROSS_ATTN_REL_B: (
            "decoder.block.{bid}.layer.1.EncDecAttention.relative_attention_bias",
        ),
-
-        MODEL_TENSOR.DEC_FFN_NORM: (
-            "decoder.block.{bid}.layer.2.layer_norm",
-        ),
-
-        MODEL_TENSOR.DEC_FFN_GATE: (
-            "decoder.block.{bid}.layer.2.DenseReluDense.wi_0",
-        ),
-
+        MODEL_TENSOR.DEC_FFN_NORM: ("decoder.block.{bid}.layer.2.layer_norm",),
+        MODEL_TENSOR.DEC_FFN_GATE: ("decoder.block.{bid}.layer.2.DenseReluDense.wi_0",),
        MODEL_TENSOR.DEC_FFN_UP: (
            "decoder.block.{bid}.layer.2.DenseReluDense.wi",
            "decoder.block.{bid}.layer.2.DenseReluDense.wi_1",
        ),
-
-        MODEL_TENSOR.DEC_FFN_DOWN: (
-            "decoder.block.{bid}.layer.2.DenseReluDense.wo",
-        ),
-
-        MODEL_TENSOR.DEC_OUTPUT_NORM: (
-            "decoder.final_layer_norm",
-        ),
-
-        MODEL_TENSOR.ENC_ATTN_NORM: (
-            "encoder.block.{bid}.layer.0.layer_norm",
-        ),
-
-        MODEL_TENSOR.ENC_ATTN_Q: (
-            "encoder.block.{bid}.layer.0.SelfAttention.q",
-        ),
-
-        MODEL_TENSOR.ENC_ATTN_K: (
-            "encoder.block.{bid}.layer.0.SelfAttention.k",
-        ),
-
-        MODEL_TENSOR.ENC_ATTN_V: (
-            "encoder.block.{bid}.layer.0.SelfAttention.v",
-        ),
-
-        MODEL_TENSOR.ENC_ATTN_OUT: (
-            "encoder.block.{bid}.layer.0.SelfAttention.o",
-        ),
-
+        MODEL_TENSOR.DEC_FFN_DOWN: ("decoder.block.{bid}.layer.2.DenseReluDense.wo",),
+        MODEL_TENSOR.DEC_OUTPUT_NORM: ("decoder.final_layer_norm",),
+        MODEL_TENSOR.ENC_ATTN_NORM: ("encoder.block.{bid}.layer.0.layer_norm",),
+        MODEL_TENSOR.ENC_ATTN_Q: ("encoder.block.{bid}.layer.0.SelfAttention.q",),
+        MODEL_TENSOR.ENC_ATTN_K: ("encoder.block.{bid}.layer.0.SelfAttention.k",),
+        MODEL_TENSOR.ENC_ATTN_V: ("encoder.block.{bid}.layer.0.SelfAttention.v",),
+        MODEL_TENSOR.ENC_ATTN_OUT: ("encoder.block.{bid}.layer.0.SelfAttention.o",),
        MODEL_TENSOR.ENC_ATTN_REL_B: (
            "encoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias",
        ),
-
-        MODEL_TENSOR.ENC_FFN_NORM: (
-            "encoder.block.{bid}.layer.1.layer_norm",
-        ),
-
-        MODEL_TENSOR.ENC_FFN_GATE: (
-            "encoder.block.{bid}.layer.1.DenseReluDense.wi_0",
-        ),
-
+        MODEL_TENSOR.ENC_FFN_NORM: ("encoder.block.{bid}.layer.1.layer_norm",),
+        MODEL_TENSOR.ENC_FFN_GATE: ("encoder.block.{bid}.layer.1.DenseReluDense.wi_0",),
        MODEL_TENSOR.ENC_FFN_UP: (
            "encoder.block.{bid}.layer.1.DenseReluDense.wi",
            "encoder.block.{bid}.layer.1.DenseReluDense.wi_1",
        ),
-
-        MODEL_TENSOR.ENC_FFN_DOWN: (
-            "encoder.block.{bid}.layer.1.DenseReluDense.wo",
-        ),
-
-        MODEL_TENSOR.ENC_OUTPUT_NORM: (
-            "encoder.final_layer_norm",
-        ),
+        MODEL_TENSOR.ENC_FFN_DOWN: ("encoder.block.{bid}.layer.1.DenseReluDense.wo",),
+        MODEL_TENSOR.ENC_OUTPUT_NORM: ("encoder.final_layer_norm",),
    }

    arch_block_mappings_cfg: dict[MODEL_ARCH, dict[MODEL_TENSOR, tuple[str, ...]]] = {
        MODEL_ARCH.ARCTIC: {
-            MODEL_TENSOR.FFN_NORM: (
-                "model.layers.{bid}.residual_layernorm",
-            ),
-            MODEL_TENSOR.FFN_NORM_EXP: (
-                "model.layers.{bid}.post_attention_layernorm",
-            ),
+            MODEL_TENSOR.FFN_NORM: ("model.layers.{bid}.residual_layernorm",),
+            MODEL_TENSOR.FFN_NORM_EXP: ("model.layers.{bid}.post_attention_layernorm",),
        },
    }

@ -588,31 +442,35 @@ def __init__(self, arch: MODEL_ARCH, n_blocks: int):
                if tensor not in MODEL_TENSORS[arch]:
                    continue

-                tensor_name = TENSOR_NAMES[tensor].format(bid = bid)
+                tensor_name = TENSOR_NAMES[tensor].format(bid=bid)
                self.mapping[tensor_name] = (tensor, tensor_name)
                for key in keys:
-                    key = key.format(bid = bid)
+                    key = key.format(bid=bid)
                    self.mapping[key] = (tensor, tensor_name)

-    def get_type_and_name(self, key: str, try_suffixes: Sequence[str] = ()) -> tuple[MODEL_TENSOR, str] | None:
+    def get_type_and_name(
+        self, key: str, try_suffixes: Sequence[str] = ()
+    ) -> tuple[MODEL_TENSOR, str] | None:
        result = self.mapping.get(key)
        if result is not None:
            return result
        for suffix in try_suffixes:
            if key.endswith(suffix):
-                result = self.mapping.get(key[:-len(suffix)])
+                result = self.mapping.get(key[: -len(suffix)])
                if result is not None:
                    return result[0], result[1] + suffix
        return None

    def get_name(self, key: str, try_suffixes: Sequence[str] = ()) -> str | None:
-        result = self.get_type_and_name(key, try_suffixes = try_suffixes)
+        result = self.get_type_and_name(key, try_suffixes=try_suffixes)
        if result is None:
            return None
        return result[1]

-    def get_type(self, key: str, try_suffixes: Sequence[str] = ()) -> MODEL_TENSOR | None:
-        result = self.get_type_and_name(key, try_suffixes = try_suffixes)
+    def get_type(
+        self, key: str, try_suffixes: Sequence[str] = ()
+    ) -> MODEL_TENSOR | None:
+        result = self.get_type_and_name(key, try_suffixes=try_suffixes)
        if result is None:
            return None
        return result[0]
@ -629,5 +487,6 @@ def __contains__(self, key: str) -> bool:
    def __repr__(self) -> str:
        return repr(self.mapping)

+
 def get_tensor_name_map(arch: MODEL_ARCH, n_blocks: int) -> TensorNameMap:
    return TensorNameMap(arch, n_blocks)
--- a/src/gguf-py/gguf/utility.py
+++ b/src/gguf-py/gguf/utility.py
--- a/src/gguf-py/gguf/vocab.py
+++ b/src/gguf-py/gguf/vocab.py
@ -224,11 +224,9 @@ class Vocab(BaseVocab, Protocol):
    added_tokens_list: list[str]
    fname_tokenizer: Path

-    def __init__(self, base_path: Path):
-        ...
+    def __init__(self, base_path: Path): ...

-    def all_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
-        ...
+    def all_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]: ...


 class NoVocab(BaseVocab):