mirror of https://github.com/leafspark/AutoGGUF
style: format code with Black
This commit is contained in:
parent
2dc5bd9e8a
commit
fa51f7cdb8
|
@ -452,8 +452,13 @@ def __init__(self):
|
||||||
# Output Type Dropdown
|
# Output Type Dropdown
|
||||||
self.lora_output_type_combo = QComboBox()
|
self.lora_output_type_combo = QComboBox()
|
||||||
self.lora_output_type_combo.addItems(["GGML", "GGUF"])
|
self.lora_output_type_combo.addItems(["GGML", "GGUF"])
|
||||||
self.lora_output_type_combo.currentIndexChanged.connect(self.update_base_model_visibility)
|
self.lora_output_type_combo.currentIndexChanged.connect(
|
||||||
lora_layout.addRow(self.create_label(OUTPUT_TYPE, SELECT_OUTPUT_TYPE), self.lora_output_type_combo)
|
self.update_base_model_visibility
|
||||||
|
)
|
||||||
|
lora_layout.addRow(
|
||||||
|
self.create_label(OUTPUT_TYPE, SELECT_OUTPUT_TYPE),
|
||||||
|
self.lora_output_type_combo,
|
||||||
|
)
|
||||||
|
|
||||||
# Base Model Path (initially hidden)
|
# Base Model Path (initially hidden)
|
||||||
self.base_model_label = self.create_label(BASE_MODEL, SELECT_BASE_MODEL_FILE)
|
self.base_model_label = self.create_label(BASE_MODEL, SELECT_BASE_MODEL_FILE)
|
||||||
|
@ -471,7 +476,9 @@ def __init__(self):
|
||||||
wrapper_layout = QHBoxLayout(self.base_model_wrapper)
|
wrapper_layout = QHBoxLayout(self.base_model_wrapper)
|
||||||
wrapper_layout.addWidget(self.base_model_label)
|
wrapper_layout.addWidget(self.base_model_label)
|
||||||
wrapper_layout.addWidget(self.base_model_widget, 1) # Give it a stretch factor
|
wrapper_layout.addWidget(self.base_model_widget, 1) # Give it a stretch factor
|
||||||
wrapper_layout.setContentsMargins(0, 0, 0, 0) # Remove margins for better alignment
|
wrapper_layout.setContentsMargins(
|
||||||
|
0, 0, 0, 0
|
||||||
|
) # Remove margins for better alignment
|
||||||
|
|
||||||
# Add the wrapper to the layout
|
# Add the wrapper to the layout
|
||||||
lora_layout.addRow(self.base_model_wrapper)
|
lora_layout.addRow(self.base_model_wrapper)
|
||||||
|
@ -1395,7 +1402,7 @@ def quantize_model(self):
|
||||||
override_string = entry.get_override_string(
|
override_string = entry.get_override_string(
|
||||||
model_name=model_name,
|
model_name=model_name,
|
||||||
quant_type=quant_type,
|
quant_type=quant_type,
|
||||||
output_path=output_path
|
output_path=output_path,
|
||||||
)
|
)
|
||||||
if override_string:
|
if override_string:
|
||||||
command.extend(["--override-kv", override_string])
|
command.extend(["--override-kv", override_string])
|
||||||
|
@ -1430,7 +1437,9 @@ def quantize_model(self):
|
||||||
self.task_list.setItemWidget(list_item, task_item)
|
self.task_list.setItemWidget(list_item, task_item)
|
||||||
|
|
||||||
# Connect the output signal to the new progress parsing function
|
# Connect the output signal to the new progress parsing function
|
||||||
thread.output_signal.connect(lambda line: self.parse_progress(line, task_item))
|
thread.output_signal.connect(
|
||||||
|
lambda line: self.parse_progress(line, task_item)
|
||||||
|
)
|
||||||
thread.status_signal.connect(task_item.update_status)
|
thread.status_signal.connect(task_item.update_status)
|
||||||
thread.finished_signal.connect(lambda: self.task_finished(thread))
|
thread.finished_signal.connect(lambda: self.task_finished(thread))
|
||||||
thread.error_signal.connect(lambda err: self.handle_error(err, task_item))
|
thread.error_signal.connect(lambda err: self.handle_error(err, task_item))
|
||||||
|
|
|
@ -13,6 +13,7 @@
|
||||||
import zipfile
|
import zipfile
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
class DownloadThread(QThread):
|
class DownloadThread(QThread):
|
||||||
progress_signal = pyqtSignal(int)
|
progress_signal = pyqtSignal(int)
|
||||||
finished_signal = pyqtSignal(str)
|
finished_signal = pyqtSignal(str)
|
||||||
|
@ -27,11 +28,11 @@ def run(self):
|
||||||
try:
|
try:
|
||||||
response = requests.get(self.url, stream=True)
|
response = requests.get(self.url, stream=True)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
total_size = int(response.headers.get('content-length', 0))
|
total_size = int(response.headers.get("content-length", 0))
|
||||||
block_size = 8192
|
block_size = 8192
|
||||||
downloaded = 0
|
downloaded = 0
|
||||||
|
|
||||||
with open(self.save_path, 'wb') as file:
|
with open(self.save_path, "wb") as file:
|
||||||
for data in response.iter_content(block_size):
|
for data in response.iter_content(block_size):
|
||||||
size = file.write(data)
|
size = file.write(data)
|
||||||
downloaded += size
|
downloaded += size
|
||||||
|
@ -41,7 +42,7 @@ def run(self):
|
||||||
|
|
||||||
# Extract the downloaded zip file
|
# Extract the downloaded zip file
|
||||||
extract_dir = os.path.splitext(self.save_path)[0]
|
extract_dir = os.path.splitext(self.save_path)[0]
|
||||||
with zipfile.ZipFile(self.save_path, 'r') as zip_ref:
|
with zipfile.ZipFile(self.save_path, "r") as zip_ref:
|
||||||
zip_ref.extractall(extract_dir)
|
zip_ref.extractall(extract_dir)
|
||||||
|
|
||||||
# Remove the zip file after extraction
|
# Remove the zip file after extraction
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
import socket
|
import socket
|
||||||
import platform
|
import platform
|
||||||
|
|
||||||
|
|
||||||
class KVOverrideEntry(QWidget):
|
class KVOverrideEntry(QWidget):
|
||||||
deleted = pyqtSignal(QWidget)
|
deleted = pyqtSignal(QWidget)
|
||||||
|
|
||||||
|
@ -44,7 +45,9 @@ def __init__(self, parent=None):
|
||||||
def delete_clicked(self):
|
def delete_clicked(self):
|
||||||
self.deleted.emit(self)
|
self.deleted.emit(self)
|
||||||
|
|
||||||
def get_override_string(self, model_name=None, quant_type=None, output_path=None): # Add arguments
|
def get_override_string(
|
||||||
|
self, model_name=None, quant_type=None, output_path=None
|
||||||
|
): # Add arguments
|
||||||
key = self.key_input.text()
|
key = self.key_input.text()
|
||||||
type_ = self.type_combo.currentText()
|
type_ = self.type_combo.currentText()
|
||||||
value = self.value_input.text()
|
value = self.value_input.text()
|
||||||
|
@ -60,9 +63,15 @@ def get_override_string(self, model_name=None, quant_type=None, output_path=None
|
||||||
"{system.python.version}": lambda: platform.python_version(),
|
"{system.python.version}": lambda: platform.python_version(),
|
||||||
"{system.time.milliseconds}": lambda: str(int(time.time() * 1000)),
|
"{system.time.milliseconds}": lambda: str(int(time.time() * 1000)),
|
||||||
"{system.date}": lambda: datetime.now().strftime("%Y-%m-%d"),
|
"{system.date}": lambda: datetime.now().strftime("%Y-%m-%d"),
|
||||||
"{model.name}": lambda: model_name if model_name is not None else "Unknown Model",
|
"{model.name}": lambda: (
|
||||||
"{quant.type}": lambda: quant_type if quant_type is not None else "Unknown Quant",
|
model_name if model_name is not None else "Unknown Model"
|
||||||
"{output.path}": lambda: output_path if output_path is not None else "Unknown Output Path",
|
),
|
||||||
|
"{quant.type}": lambda: (
|
||||||
|
quant_type if quant_type is not None else "Unknown Quant"
|
||||||
|
),
|
||||||
|
"{output.path}": lambda: (
|
||||||
|
output_path if output_path is not None else "Unknown Output Path"
|
||||||
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
for param, func in dynamic_params.items():
|
for param, func in dynamic_params.items():
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
import sys
|
import sys
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
class Logger:
|
class Logger:
|
||||||
def __init__(self, name, log_dir):
|
def __init__(self, name, log_dir):
|
||||||
self.logger = logging.getLogger(name)
|
self.logger = logging.getLogger(name)
|
||||||
|
@ -15,15 +16,19 @@ def __init__(self, name, log_dir):
|
||||||
# Console handler
|
# Console handler
|
||||||
console_handler = logging.StreamHandler()
|
console_handler = logging.StreamHandler()
|
||||||
console_handler.setLevel(logging.INFO)
|
console_handler.setLevel(logging.INFO)
|
||||||
console_format = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
|
console_format = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
|
||||||
console_handler.setFormatter(console_format)
|
console_handler.setFormatter(console_format)
|
||||||
|
|
||||||
# File handler
|
# File handler
|
||||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||||
log_file = os.path.join(log_dir, f"latest_{timestamp}.log")
|
log_file = os.path.join(log_dir, f"latest_{timestamp}.log")
|
||||||
file_handler = RotatingFileHandler(log_file, maxBytes=10*1024*1024, backupCount=5, encoding='utf-8')
|
file_handler = RotatingFileHandler(
|
||||||
|
log_file, maxBytes=10 * 1024 * 1024, backupCount=5, encoding="utf-8"
|
||||||
|
)
|
||||||
file_handler.setLevel(logging.DEBUG)
|
file_handler.setLevel(logging.DEBUG)
|
||||||
file_format = logging.Formatter('%(asctime)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s')
|
file_format = logging.Formatter(
|
||||||
|
"%(asctime)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s"
|
||||||
|
)
|
||||||
file_handler.setFormatter(file_format)
|
file_handler.setFormatter(file_format)
|
||||||
|
|
||||||
# Add handlers to logger
|
# Add handlers to logger
|
||||||
|
|
|
@ -13,6 +13,7 @@
|
||||||
import zipfile
|
import zipfile
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
class ModelInfoDialog(QDialog):
|
class ModelInfoDialog(QDialog):
|
||||||
def __init__(self, model_info, parent=None):
|
def __init__(self, model_info, parent=None):
|
||||||
super().__init__(parent)
|
super().__init__(parent)
|
||||||
|
@ -41,8 +42,7 @@ def format_model_info(self, model_info):
|
||||||
html += f"<p><b>Tensors:</b> {model_info.get('tensors', 'N/A')}</p>"
|
html += f"<p><b>Tensors:</b> {model_info.get('tensors', 'N/A')}</p>"
|
||||||
|
|
||||||
html += "<h3>Key-Value Pairs:</h3>"
|
html += "<h3>Key-Value Pairs:</h3>"
|
||||||
for key, value in model_info.get('kv_data', {}).items():
|
for key, value in model_info.get("kv_data", {}).items():
|
||||||
html += f"<p><b>{key}:</b> {value}</p>"
|
html += f"<p><b>{key}:</b> {value}</p>"
|
||||||
|
|
||||||
return html
|
return html
|
||||||
|
|
||||||
|
|
|
@ -15,6 +15,7 @@
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from imports_and_globals import open_file_safe
|
from imports_and_globals import open_file_safe
|
||||||
|
|
||||||
|
|
||||||
class QuantizationThread(QThread):
|
class QuantizationThread(QThread):
|
||||||
# Define custom signals for communication with the main thread
|
# Define custom signals for communication with the main thread
|
||||||
output_signal = pyqtSignal(str)
|
output_signal = pyqtSignal(str)
|
||||||
|
|
|
@ -13,6 +13,7 @@
|
||||||
import zipfile
|
import zipfile
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
class TaskListItem(QWidget):
|
class TaskListItem(QWidget):
|
||||||
def __init__(self, task_name, log_file, show_progress_bar=True, parent=None):
|
def __init__(self, task_name, log_file, show_progress_bar=True, parent=None):
|
||||||
super().__init__(parent)
|
super().__init__(parent)
|
||||||
|
|
|
@ -12,8 +12,8 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
if 'NO_LOCAL_GGUF' not in os.environ:
|
if "NO_LOCAL_GGUF" not in os.environ:
|
||||||
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
|
sys.path.insert(1, str(Path(__file__).parent / "gguf-py" / "gguf"))
|
||||||
import gguf
|
import gguf
|
||||||
|
|
||||||
logging.basicConfig(level=logging.DEBUG)
|
logging.basicConfig(level=logging.DEBUG)
|
||||||
|
@ -35,7 +35,9 @@ def write_file_header(fout: BinaryIO, params: dict[str, Any]) -> None:
|
||||||
fout.write(struct.pack("i", int(params["lora_alpha"])))
|
fout.write(struct.pack("i", int(params["lora_alpha"])))
|
||||||
|
|
||||||
|
|
||||||
def write_tensor_header(fout: BinaryIO, name: str, shape: Sequence[int], data_type: np.dtype[Any]) -> None:
|
def write_tensor_header(
|
||||||
|
fout: BinaryIO, name: str, shape: Sequence[int], data_type: np.dtype[Any]
|
||||||
|
) -> None:
|
||||||
sname = name.encode("utf-8")
|
sname = name.encode("utf-8")
|
||||||
fout.write(
|
fout.write(
|
||||||
struct.pack(
|
struct.pack(
|
||||||
|
@ -49,15 +51,21 @@ def write_tensor_header(fout: BinaryIO, name: str, shape: Sequence[int], data_ty
|
||||||
fout.write(sname)
|
fout.write(sname)
|
||||||
fout.seek((fout.tell() + 31) & -32)
|
fout.seek((fout.tell() + 31) & -32)
|
||||||
|
|
||||||
|
|
||||||
def pyinstaller_include():
|
def pyinstaller_include():
|
||||||
# PyInstaller import
|
# PyInstaller import
|
||||||
pass
|
pass
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
|
if __name__ == "__main__":
|
||||||
if len(sys.argv) < 2:
|
if len(sys.argv) < 2:
|
||||||
logger.info(f"Usage: python {sys.argv[0]} <path> [arch]")
|
logger.info(f"Usage: python {sys.argv[0]} <path> [arch]")
|
||||||
logger.info("Path must contain HuggingFace PEFT LoRA files 'adapter_config.json' and 'adapter_model.bin'")
|
logger.info(
|
||||||
logger.info(f"Arch must be one of {list(gguf.MODEL_ARCH_NAMES.values())} (default: llama)")
|
"Path must contain HuggingFace PEFT LoRA files 'adapter_config.json' and 'adapter_model.bin'"
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
f"Arch must be one of {list(gguf.MODEL_ARCH_NAMES.values())} (default: llama)"
|
||||||
|
)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
input_json = os.path.join(sys.argv[1], "adapter_config.json")
|
input_json = os.path.join(sys.argv[1], "adapter_config.json")
|
||||||
|
@ -70,6 +78,7 @@ def pyinstaller_include():
|
||||||
input_model = os.path.join(sys.argv[1], "adapter_model.safetensors")
|
input_model = os.path.join(sys.argv[1], "adapter_model.safetensors")
|
||||||
# lazy import load_file only if lora is in safetensors format.
|
# lazy import load_file only if lora is in safetensors format.
|
||||||
from safetensors.torch import load_file
|
from safetensors.torch import load_file
|
||||||
|
|
||||||
model = load_file(input_model, device="cpu")
|
model = load_file(input_model, device="cpu")
|
||||||
|
|
||||||
arch_name = sys.argv[2] if len(sys.argv) == 3 else "llama"
|
arch_name = sys.argv[2] if len(sys.argv) == 3 else "llama"
|
||||||
|
@ -78,14 +87,18 @@ def pyinstaller_include():
|
||||||
logger.error(f"Error: unsupported architecture {arch_name}")
|
logger.error(f"Error: unsupported architecture {arch_name}")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
arch = list(gguf.MODEL_ARCH_NAMES.keys())[list(gguf.MODEL_ARCH_NAMES.values()).index(arch_name)]
|
arch = list(gguf.MODEL_ARCH_NAMES.keys())[
|
||||||
|
list(gguf.MODEL_ARCH_NAMES.values()).index(arch_name)
|
||||||
|
]
|
||||||
name_map = gguf.TensorNameMap(arch, 200) # 200 layers ought to be enough for anyone
|
name_map = gguf.TensorNameMap(arch, 200) # 200 layers ought to be enough for anyone
|
||||||
|
|
||||||
with open(input_json, "r") as f:
|
with open(input_json, "r") as f:
|
||||||
params = json.load(f)
|
params = json.load(f)
|
||||||
|
|
||||||
if params["peft_type"] != "LORA":
|
if params["peft_type"] != "LORA":
|
||||||
logger.error(f"Error: unsupported adapter type {params['peft_type']}, expected LORA")
|
logger.error(
|
||||||
|
f"Error: unsupported adapter type {params['peft_type']}, expected LORA"
|
||||||
|
)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
if params["fan_in_fan_out"] is True:
|
if params["fan_in_fan_out"] is True:
|
||||||
|
@ -136,7 +149,9 @@ def pyinstaller_include():
|
||||||
tname = name_map.get_name(k)
|
tname = name_map.get_name(k)
|
||||||
if tname is None:
|
if tname is None:
|
||||||
logger.error(f"Error: could not map tensor name {orig_k}")
|
logger.error(f"Error: could not map tensor name {orig_k}")
|
||||||
logger.error(" Note: the arch parameter must be specified if the model is not llama")
|
logger.error(
|
||||||
|
" Note: the arch parameter must be specified if the model is not llama"
|
||||||
|
)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
if suffix == ".lora_A.weight":
|
if suffix == ".lora_A.weight":
|
||||||
|
@ -146,7 +161,9 @@ def pyinstaller_include():
|
||||||
else:
|
else:
|
||||||
assert False
|
assert False
|
||||||
|
|
||||||
logger.info(f"{k} => {tname} {t.shape} {t.dtype} {t.nbytes/1024/1024:.2f}MB")
|
logger.info(
|
||||||
|
f"{k} => {tname} {t.shape} {t.dtype} {t.nbytes/1024/1024:.2f}MB"
|
||||||
|
)
|
||||||
write_tensor_header(fout, tname, t.shape, t.dtype)
|
write_tensor_header(fout, tname, t.shape, t.dtype)
|
||||||
t.tofile(fout)
|
t.tofile(fout)
|
||||||
|
|
||||||
|
|
|
@ -54,7 +54,9 @@ class General:
|
||||||
SOURCE_URL = "general.source.url" # Model Website/Paper
|
SOURCE_URL = "general.source.url" # Model Website/Paper
|
||||||
SOURCE_DOI = "general.source.doi"
|
SOURCE_DOI = "general.source.doi"
|
||||||
SOURCE_UUID = "general.source.uuid"
|
SOURCE_UUID = "general.source.uuid"
|
||||||
SOURCE_REPO_URL = "general.source.repo_url" # Model Source Repository (git/svn/etc...)
|
SOURCE_REPO_URL = (
|
||||||
|
"general.source.repo_url" # Model Source Repository (git/svn/etc...)
|
||||||
|
)
|
||||||
|
|
||||||
# Base Model Source. There can be more than one source if it's a merged
|
# Base Model Source. There can be more than one source if it's a merged
|
||||||
# model like with 'Mistral-7B-Merge-14-v0.1'. This will assist in
|
# model like with 'Mistral-7B-Merge-14-v0.1'. This will assist in
|
||||||
|
@ -136,7 +138,9 @@ class Tokenizer:
|
||||||
PRE = "tokenizer.ggml.pre"
|
PRE = "tokenizer.ggml.pre"
|
||||||
LIST = "tokenizer.ggml.tokens"
|
LIST = "tokenizer.ggml.tokens"
|
||||||
TOKEN_TYPE = "tokenizer.ggml.token_type"
|
TOKEN_TYPE = "tokenizer.ggml.token_type"
|
||||||
TOKEN_TYPE_COUNT = "tokenizer.ggml.token_type_count" # for BERT-style token types
|
TOKEN_TYPE_COUNT = (
|
||||||
|
"tokenizer.ggml.token_type_count" # for BERT-style token types
|
||||||
|
)
|
||||||
SCORES = "tokenizer.ggml.scores"
|
SCORES = "tokenizer.ggml.scores"
|
||||||
MERGES = "tokenizer.ggml.merges"
|
MERGES = "tokenizer.ggml.merges"
|
||||||
BOS_ID = "tokenizer.ggml.bos_token_id"
|
BOS_ID = "tokenizer.ggml.bos_token_id"
|
||||||
|
@ -166,6 +170,7 @@ class Adapter:
|
||||||
TYPE = "adapter.type"
|
TYPE = "adapter.type"
|
||||||
LORA_ALPHA = "adapter.lora.alpha"
|
LORA_ALPHA = "adapter.lora.alpha"
|
||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# recommended mapping of model tensor names for storage in gguf
|
# recommended mapping of model tensor names for storage in gguf
|
||||||
#
|
#
|
||||||
|
@ -1104,9 +1109,9 @@ class TokenType(IntEnum):
|
||||||
|
|
||||||
|
|
||||||
class RopeScalingType(Enum):
|
class RopeScalingType(Enum):
|
||||||
NONE = 'none'
|
NONE = "none"
|
||||||
LINEAR = 'linear'
|
LINEAR = "linear"
|
||||||
YARN = 'yarn'
|
YARN = "yarn"
|
||||||
|
|
||||||
|
|
||||||
class PoolingType(IntEnum):
|
class PoolingType(IntEnum):
|
||||||
|
|
|
@ -67,7 +67,7 @@ class ReaderTensor(NamedTuple):
|
||||||
|
|
||||||
class GGUFReader:
|
class GGUFReader:
|
||||||
# I - same as host, S - swapped
|
# I - same as host, S - swapped
|
||||||
byte_order: Literal['I', 'S'] = 'I'
|
byte_order: Literal["I", "S"] = "I"
|
||||||
alignment: int = GGUF_DEFAULT_ALIGNMENT
|
alignment: int = GGUF_DEFAULT_ALIGNMENT
|
||||||
data_offset: int
|
data_offset: int
|
||||||
|
|
||||||
|
@ -86,13 +86,15 @@ class GGUFReader:
|
||||||
GGUFValueType.BOOL: np.bool_,
|
GGUFValueType.BOOL: np.bool_,
|
||||||
}
|
}
|
||||||
|
|
||||||
def __init__(self, path: os.PathLike[str] | str, mode: Literal['r', 'r+', 'c'] = 'r'):
|
def __init__(
|
||||||
|
self, path: os.PathLike[str] | str, mode: Literal["r", "r+", "c"] = "r"
|
||||||
|
):
|
||||||
self.data = np.memmap(path, mode=mode)
|
self.data = np.memmap(path, mode=mode)
|
||||||
offs = 0
|
offs = 0
|
||||||
|
|
||||||
# Check for GGUF magic
|
# Check for GGUF magic
|
||||||
if self._get(offs, np.uint32, override_order = '<')[0] != GGUF_MAGIC:
|
if self._get(offs, np.uint32, override_order="<")[0] != GGUF_MAGIC:
|
||||||
raise ValueError('GGUF magic invalid')
|
raise ValueError("GGUF magic invalid")
|
||||||
offs += 4
|
offs += 4
|
||||||
|
|
||||||
# Check GGUF version
|
# Check GGUF version
|
||||||
|
@ -100,28 +102,46 @@ def __init__(self, path: os.PathLike[str] | str, mode: Literal['r', 'r+', 'c'] =
|
||||||
if temp_version[0] & 65535 == 0:
|
if temp_version[0] & 65535 == 0:
|
||||||
# If we get 0 here that means it's (probably) a GGUF file created for
|
# If we get 0 here that means it's (probably) a GGUF file created for
|
||||||
# the opposite byte order of the machine this script is running on.
|
# the opposite byte order of the machine this script is running on.
|
||||||
self.byte_order = 'S'
|
self.byte_order = "S"
|
||||||
temp_version = temp_version.newbyteorder(self.byte_order)
|
temp_version = temp_version.newbyteorder(self.byte_order)
|
||||||
version = temp_version[0]
|
version = temp_version[0]
|
||||||
if version not in READER_SUPPORTED_VERSIONS:
|
if version not in READER_SUPPORTED_VERSIONS:
|
||||||
raise ValueError(f'Sorry, file appears to be version {version} which we cannot handle')
|
raise ValueError(
|
||||||
|
f"Sorry, file appears to be version {version} which we cannot handle"
|
||||||
|
)
|
||||||
self.fields: OrderedDict[str, ReaderField] = OrderedDict()
|
self.fields: OrderedDict[str, ReaderField] = OrderedDict()
|
||||||
self.tensors: list[ReaderTensor] = []
|
self.tensors: list[ReaderTensor] = []
|
||||||
offs += self._push_field(ReaderField(offs, 'GGUF.version', [temp_version], [0], [GGUFValueType.UINT32]))
|
offs += self._push_field(
|
||||||
|
ReaderField(
|
||||||
|
offs, "GGUF.version", [temp_version], [0], [GGUFValueType.UINT32]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
# Check tensor count and kv count
|
# Check tensor count and kv count
|
||||||
temp_counts = self._get(offs, np.uint64, 2)
|
temp_counts = self._get(offs, np.uint64, 2)
|
||||||
offs += self._push_field(ReaderField(offs, 'GGUF.tensor_count', [temp_counts[:1]], [0], [GGUFValueType.UINT64]))
|
offs += self._push_field(
|
||||||
offs += self._push_field(ReaderField(offs, 'GGUF.kv_count', [temp_counts[1:]], [0], [GGUFValueType.UINT64]))
|
ReaderField(
|
||||||
|
offs,
|
||||||
|
"GGUF.tensor_count",
|
||||||
|
[temp_counts[:1]],
|
||||||
|
[0],
|
||||||
|
[GGUFValueType.UINT64],
|
||||||
|
)
|
||||||
|
)
|
||||||
|
offs += self._push_field(
|
||||||
|
ReaderField(
|
||||||
|
offs, "GGUF.kv_count", [temp_counts[1:]], [0], [GGUFValueType.UINT64]
|
||||||
|
)
|
||||||
|
)
|
||||||
tensor_count, kv_count = temp_counts
|
tensor_count, kv_count = temp_counts
|
||||||
offs = self._build_fields(offs, kv_count)
|
offs = self._build_fields(offs, kv_count)
|
||||||
|
|
||||||
# Build Tensor Info Fields
|
# Build Tensor Info Fields
|
||||||
offs, tensors_fields = self._build_tensor_info(offs, tensor_count)
|
offs, tensors_fields = self._build_tensor_info(offs, tensor_count)
|
||||||
new_align = self.fields.get('general.alignment')
|
new_align = self.fields.get("general.alignment")
|
||||||
if new_align is not None:
|
if new_align is not None:
|
||||||
if new_align.types != [GGUFValueType.UINT32]:
|
if new_align.types != [GGUFValueType.UINT32]:
|
||||||
raise ValueError('Bad type for general.alignment field')
|
raise ValueError("Bad type for general.alignment field")
|
||||||
self.alignment = new_align.parts[-1][0]
|
self.alignment = new_align.parts[-1][0]
|
||||||
padding = offs % self.alignment
|
padding = offs % self.alignment
|
||||||
if padding != 0:
|
if padding != 0:
|
||||||
|
@ -129,7 +149,7 @@ def __init__(self, path: os.PathLike[str] | str, mode: Literal['r', 'r+', 'c'] =
|
||||||
self.data_offset = offs
|
self.data_offset = offs
|
||||||
self._build_tensors(offs, tensors_fields)
|
self._build_tensors(offs, tensors_fields)
|
||||||
|
|
||||||
_DT = TypeVar('_DT', bound = npt.DTypeLike)
|
_DT = TypeVar("_DT", bound=npt.DTypeLike)
|
||||||
|
|
||||||
# Fetch a key/value metadata field by key.
|
# Fetch a key/value metadata field by key.
|
||||||
def get_field(self, key: str) -> Union[ReaderField, None]:
|
def get_field(self, key: str) -> Union[ReaderField, None]:
|
||||||
|
@ -140,7 +160,11 @@ def get_tensor(self, idx: int) -> ReaderTensor:
|
||||||
return self.tensors[idx]
|
return self.tensors[idx]
|
||||||
|
|
||||||
def _get(
|
def _get(
|
||||||
self, offset: int, dtype: npt.DTypeLike, count: int = 1, override_order: None | Literal['I', 'S', '<'] = None,
|
self,
|
||||||
|
offset: int,
|
||||||
|
dtype: npt.DTypeLike,
|
||||||
|
count: int = 1,
|
||||||
|
override_order: None | Literal["I", "S", "<"] = None,
|
||||||
) -> npt.NDArray[Any]:
|
) -> npt.NDArray[Any]:
|
||||||
count = int(count)
|
count = int(count)
|
||||||
itemsize = int(np.empty([], dtype=dtype).itemsize)
|
itemsize = int(np.empty([], dtype=dtype).itemsize)
|
||||||
|
@ -156,18 +180,22 @@ def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int:
|
||||||
# TODO: add option to generate error on duplicate keys
|
# TODO: add option to generate error on duplicate keys
|
||||||
# raise KeyError(f'Duplicate {field.name} already in list at offset {field.offset}')
|
# raise KeyError(f'Duplicate {field.name} already in list at offset {field.offset}')
|
||||||
|
|
||||||
logger.warning(f'Duplicate key {field.name} at offset {field.offset}')
|
logger.warning(f"Duplicate key {field.name} at offset {field.offset}")
|
||||||
self.fields[field.name + '_{}'.format(field.offset)] = field
|
self.fields[field.name + "_{}".format(field.offset)] = field
|
||||||
else:
|
else:
|
||||||
self.fields[field.name] = field
|
self.fields[field.name] = field
|
||||||
return 0 if skip_sum else sum(int(part.nbytes) for part in field.parts)
|
return 0 if skip_sum else sum(int(part.nbytes) for part in field.parts)
|
||||||
|
|
||||||
def _get_str(self, offset: int) -> tuple[npt.NDArray[np.uint64], npt.NDArray[np.uint8]]:
|
def _get_str(
|
||||||
|
self, offset: int
|
||||||
|
) -> tuple[npt.NDArray[np.uint64], npt.NDArray[np.uint8]]:
|
||||||
slen = self._get(offset, np.uint64)
|
slen = self._get(offset, np.uint64)
|
||||||
return slen, self._get(offset + 8, np.uint8, slen[0])
|
return slen, self._get(offset + 8, np.uint8, slen[0])
|
||||||
|
|
||||||
def _get_field_parts(
|
def _get_field_parts(
|
||||||
self, orig_offs: int, raw_type: int,
|
self,
|
||||||
|
orig_offs: int,
|
||||||
|
raw_type: int,
|
||||||
) -> tuple[int, list[npt.NDArray[Any]], list[int], list[GGUFValueType]]:
|
) -> tuple[int, list[npt.NDArray[Any]], list[int], list[GGUFValueType]]:
|
||||||
offs = orig_offs
|
offs = orig_offs
|
||||||
types: list[GGUFValueType] = []
|
types: list[GGUFValueType] = []
|
||||||
|
@ -192,7 +220,9 @@ def _get_field_parts(
|
||||||
aparts: list[npt.NDArray[Any]] = [raw_itype, alen]
|
aparts: list[npt.NDArray[Any]] = [raw_itype, alen]
|
||||||
data_idxs: list[int] = []
|
data_idxs: list[int] = []
|
||||||
for idx in range(alen[0]):
|
for idx in range(alen[0]):
|
||||||
curr_size, curr_parts, curr_idxs, curr_types = self._get_field_parts(offs, raw_itype[0])
|
curr_size, curr_parts, curr_idxs, curr_types = self._get_field_parts(
|
||||||
|
offs, raw_itype[0]
|
||||||
|
)
|
||||||
if idx == 0:
|
if idx == 0:
|
||||||
types += curr_types
|
types += curr_types
|
||||||
idxs_offs = len(aparts)
|
idxs_offs = len(aparts)
|
||||||
|
@ -201,7 +231,7 @@ def _get_field_parts(
|
||||||
offs += curr_size
|
offs += curr_size
|
||||||
return offs - orig_offs, aparts, data_idxs, types
|
return offs - orig_offs, aparts, data_idxs, types
|
||||||
# We can't deal with this one.
|
# We can't deal with this one.
|
||||||
raise ValueError('Unknown/unhandled field type {gtype}')
|
raise ValueError("Unknown/unhandled field type {gtype}")
|
||||||
|
|
||||||
def _get_tensor_info_field(self, orig_offs: int) -> ReaderField:
|
def _get_tensor_info_field(self, orig_offs: int) -> ReaderField:
|
||||||
offs = orig_offs
|
offs = orig_offs
|
||||||
|
@ -228,7 +258,7 @@ def _get_tensor_info_field(self, orig_offs: int) -> ReaderField:
|
||||||
|
|
||||||
return ReaderField(
|
return ReaderField(
|
||||||
orig_offs,
|
orig_offs,
|
||||||
str(bytes(name_data), encoding = 'utf-8'),
|
str(bytes(name_data), encoding="utf-8"),
|
||||||
[name_len, name_data, n_dims, dims, raw_dtype, offset_tensor],
|
[name_len, name_data, n_dims, dims, raw_dtype, offset_tensor],
|
||||||
[1, 3, 4, 5],
|
[1, 3, 4, 5],
|
||||||
)
|
)
|
||||||
|
@ -242,19 +272,26 @@ def _build_fields(self, offs: int, count: int) -> int:
|
||||||
offs += int(raw_kv_type.nbytes)
|
offs += int(raw_kv_type.nbytes)
|
||||||
parts: list[npt.NDArray[Any]] = [kv_klen, kv_kdata, raw_kv_type]
|
parts: list[npt.NDArray[Any]] = [kv_klen, kv_kdata, raw_kv_type]
|
||||||
idxs_offs = len(parts)
|
idxs_offs = len(parts)
|
||||||
field_size, field_parts, field_idxs, field_types = self._get_field_parts(offs, raw_kv_type[0])
|
field_size, field_parts, field_idxs, field_types = self._get_field_parts(
|
||||||
|
offs, raw_kv_type[0]
|
||||||
|
)
|
||||||
parts += field_parts
|
parts += field_parts
|
||||||
self._push_field(ReaderField(
|
self._push_field(
|
||||||
|
ReaderField(
|
||||||
orig_offs,
|
orig_offs,
|
||||||
str(bytes(kv_kdata), encoding = 'utf-8'),
|
str(bytes(kv_kdata), encoding="utf-8"),
|
||||||
parts,
|
parts,
|
||||||
[idx + idxs_offs for idx in field_idxs],
|
[idx + idxs_offs for idx in field_idxs],
|
||||||
field_types,
|
field_types,
|
||||||
), skip_sum = True)
|
),
|
||||||
|
skip_sum=True,
|
||||||
|
)
|
||||||
offs += field_size
|
offs += field_size
|
||||||
return offs
|
return offs
|
||||||
|
|
||||||
def _build_tensor_info(self, offs: int, count: int) -> tuple[int, list[ReaderField]]:
|
def _build_tensor_info(
|
||||||
|
self, offs: int, count: int
|
||||||
|
) -> tuple[int, list[ReaderField]]:
|
||||||
tensor_fields = []
|
tensor_fields = []
|
||||||
for _ in range(count):
|
for _ in range(count):
|
||||||
field = self._get_tensor_info_field(offs)
|
field = self._get_tensor_info_field(offs)
|
||||||
|
@ -268,9 +305,9 @@ def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None:
|
||||||
for field in fields:
|
for field in fields:
|
||||||
_name_len, name_data, _n_dims, dims, raw_dtype, offset_tensor = field.parts
|
_name_len, name_data, _n_dims, dims, raw_dtype, offset_tensor = field.parts
|
||||||
# check if there's any tensor having same name already in the list
|
# check if there's any tensor having same name already in the list
|
||||||
tensor_name = str(bytes(name_data), encoding = 'utf-8')
|
tensor_name = str(bytes(name_data), encoding="utf-8")
|
||||||
if tensor_name in tensor_names:
|
if tensor_name in tensor_names:
|
||||||
raise ValueError(f'Found duplicated tensor with name {tensor_name}')
|
raise ValueError(f"Found duplicated tensor with name {tensor_name}")
|
||||||
tensor_names.add(tensor_name)
|
tensor_names.add(tensor_name)
|
||||||
ggml_type = GGMLQuantizationType(raw_dtype[0])
|
ggml_type = GGMLQuantizationType(raw_dtype[0])
|
||||||
n_elems = int(np.prod(dims))
|
n_elems = int(np.prod(dims))
|
||||||
|
@ -304,7 +341,8 @@ def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None:
|
||||||
item_count = n_bytes
|
item_count = n_bytes
|
||||||
item_type = np.uint8
|
item_type = np.uint8
|
||||||
np_dims = quant_shape_to_byte_shape(np_dims, ggml_type)
|
np_dims = quant_shape_to_byte_shape(np_dims, ggml_type)
|
||||||
tensors.append(ReaderTensor(
|
tensors.append(
|
||||||
|
ReaderTensor(
|
||||||
name=tensor_name,
|
name=tensor_name,
|
||||||
tensor_type=ggml_type,
|
tensor_type=ggml_type,
|
||||||
shape=dims,
|
shape=dims,
|
||||||
|
@ -313,5 +351,6 @@ def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None:
|
||||||
data_offset=data_offs,
|
data_offset=data_offs,
|
||||||
data=self._get(data_offs, item_type, item_count).reshape(np_dims),
|
data=self._get(data_offs, item_type, item_count).reshape(np_dims),
|
||||||
field=field,
|
field=field,
|
||||||
))
|
)
|
||||||
|
)
|
||||||
self.tensors = tensors
|
self.tensors = tensors
|
||||||
|
|
|
@ -81,8 +81,15 @@ class GGUFWriter:
|
||||||
}
|
}
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self, path: os.PathLike[str] | str | None, arch: str, use_temp_file: bool = False, endianess: GGUFEndian = GGUFEndian.LITTLE,
|
self,
|
||||||
split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False, small_first_shard: bool = False
|
path: os.PathLike[str] | str | None,
|
||||||
|
arch: str,
|
||||||
|
use_temp_file: bool = False,
|
||||||
|
endianess: GGUFEndian = GGUFEndian.LITTLE,
|
||||||
|
split_max_tensors: int = 0,
|
||||||
|
split_max_size: int = 0,
|
||||||
|
dry_run: bool = False,
|
||||||
|
small_first_shard: bool = False,
|
||||||
):
|
):
|
||||||
self.fout = None
|
self.fout = None
|
||||||
self.path = Path(path) if path else None
|
self.path = Path(path) if path else None
|
||||||
|
@ -97,9 +104,11 @@ def __init__(
|
||||||
self.split_max_size = split_max_size
|
self.split_max_size = split_max_size
|
||||||
self.dry_run = dry_run
|
self.dry_run = dry_run
|
||||||
self.small_first_shard = small_first_shard
|
self.small_first_shard = small_first_shard
|
||||||
logger.info("gguf: This GGUF file is for {0} Endian only".format(
|
logger.info(
|
||||||
|
"gguf: This GGUF file is for {0} Endian only".format(
|
||||||
"Big" if self.endianess == GGUFEndian.BIG else "Little",
|
"Big" if self.endianess == GGUFEndian.BIG else "Little",
|
||||||
))
|
)
|
||||||
|
)
|
||||||
self.state = WriterState.NO_FILE
|
self.state = WriterState.NO_FILE
|
||||||
|
|
||||||
if self.small_first_shard:
|
if self.small_first_shard:
|
||||||
|
@ -128,7 +137,9 @@ def get_total_parameter_count(self) -> tuple[int, int, int, int]:
|
||||||
elif name.endswith(".lora_b"):
|
elif name.endswith(".lora_b"):
|
||||||
if last_lora_a is None or last_lora_a[0] != name[:-1] + "a":
|
if last_lora_a is None or last_lora_a[0] != name[:-1] + "a":
|
||||||
# Bail when the LoRA pair can't be found trivially
|
# Bail when the LoRA pair can't be found trivially
|
||||||
logger.warning("can't measure LoRA size correctly, tensor order is unusual")
|
logger.warning(
|
||||||
|
"can't measure LoRA size correctly, tensor order is unusual"
|
||||||
|
)
|
||||||
return 0, 0, 0, 0
|
return 0, 0, 0, 0
|
||||||
else:
|
else:
|
||||||
shape = (*shape[:-1], last_lora_a[1].shape[-1])
|
shape = (*shape[:-1], last_lora_a[1].shape[-1])
|
||||||
|
@ -136,7 +147,7 @@ def get_total_parameter_count(self) -> tuple[int, int, int, int]:
|
||||||
size = prod(shape)
|
size = prod(shape)
|
||||||
|
|
||||||
if "_exps." in name:
|
if "_exps." in name:
|
||||||
expert_params += (size // shape[-3])
|
expert_params += size // shape[-3]
|
||||||
expert_sum += shape[-3]
|
expert_sum += shape[-3]
|
||||||
n_expert_tensors += 1
|
n_expert_tensors += 1
|
||||||
else:
|
else:
|
||||||
|
@ -157,15 +168,26 @@ def get_total_parameter_count(self) -> tuple[int, int, int, int]:
|
||||||
def format_shard_names(self, path: Path) -> list[Path]:
|
def format_shard_names(self, path: Path) -> list[Path]:
|
||||||
if len(self.tensors) == 1:
|
if len(self.tensors) == 1:
|
||||||
return [path]
|
return [path]
|
||||||
return [path.with_name(SHARD_NAME_FORMAT.format(path.stem, i + 1, len(self.tensors))) for i in range(len(self.tensors))]
|
return [
|
||||||
|
path.with_name(
|
||||||
|
SHARD_NAME_FORMAT.format(path.stem, i + 1, len(self.tensors))
|
||||||
|
)
|
||||||
|
for i in range(len(self.tensors))
|
||||||
|
]
|
||||||
|
|
||||||
def open_output_file(self, path: Path | None = None) -> None:
|
def open_output_file(self, path: Path | None = None) -> None:
|
||||||
if self.state is WriterState.EMPTY and self.fout is not None and (path is None or path == self.path):
|
if (
|
||||||
|
self.state is WriterState.EMPTY
|
||||||
|
and self.fout is not None
|
||||||
|
and (path is None or path == self.path)
|
||||||
|
):
|
||||||
# allow calling this multiple times as long as the path is the same
|
# allow calling this multiple times as long as the path is the same
|
||||||
return
|
return
|
||||||
|
|
||||||
if self.state is not WriterState.NO_FILE:
|
if self.state is not WriterState.NO_FILE:
|
||||||
raise ValueError(f'Expected output file to be not yet opened, got {self.state}')
|
raise ValueError(
|
||||||
|
f"Expected output file to be not yet opened, got {self.state}"
|
||||||
|
)
|
||||||
|
|
||||||
if path is not None:
|
if path is not None:
|
||||||
self.path = path
|
self.path = path
|
||||||
|
@ -181,7 +203,9 @@ def print_plan(self) -> list[Path]:
|
||||||
filenames = self.format_shard_names(self.path)
|
filenames = self.format_shard_names(self.path)
|
||||||
assert len(filenames) == len(self.tensors)
|
assert len(filenames) == len(self.tensors)
|
||||||
for name, tensors in zip(filenames, self.tensors):
|
for name, tensors in zip(filenames, self.tensors):
|
||||||
logger.info(f"{name}: n_tensors = {len(tensors)}, total_size = {GGUFWriter.format_n_bytes_to_str(sum(ti.nbytes for ti in tensors.values()))}")
|
logger.info(
|
||||||
|
f"{name}: n_tensors = {len(tensors)}, total_size = {GGUFWriter.format_n_bytes_to_str(sum(ti.nbytes for ti in tensors.values()))}"
|
||||||
|
)
|
||||||
|
|
||||||
if self.dry_run:
|
if self.dry_run:
|
||||||
logger.info("Dry run, not writing files")
|
logger.info("Dry run, not writing files")
|
||||||
|
@ -201,17 +225,23 @@ def add_shard_kv_data(self) -> None:
|
||||||
self.kv_data.extend({} for _ in range(len(self.kv_data), total_splits))
|
self.kv_data.extend({} for _ in range(len(self.kv_data), total_splits))
|
||||||
for i, kv_data in enumerate(self.kv_data):
|
for i, kv_data in enumerate(self.kv_data):
|
||||||
kv_data[Keys.Split.LLM_KV_SPLIT_NO] = GGUFValue(i, GGUFValueType.UINT16)
|
kv_data[Keys.Split.LLM_KV_SPLIT_NO] = GGUFValue(i, GGUFValueType.UINT16)
|
||||||
kv_data[Keys.Split.LLM_KV_SPLIT_COUNT] = GGUFValue(total_splits, GGUFValueType.UINT16)
|
kv_data[Keys.Split.LLM_KV_SPLIT_COUNT] = GGUFValue(
|
||||||
kv_data[Keys.Split.LLM_KV_SPLIT_TENSORS_COUNT] = GGUFValue(total_tensors, GGUFValueType.INT32)
|
total_splits, GGUFValueType.UINT16
|
||||||
|
)
|
||||||
|
kv_data[Keys.Split.LLM_KV_SPLIT_TENSORS_COUNT] = GGUFValue(
|
||||||
|
total_tensors, GGUFValueType.INT32
|
||||||
|
)
|
||||||
|
|
||||||
def write_header_to_file(self, path: Path | None = None) -> None:
|
def write_header_to_file(self, path: Path | None = None) -> None:
|
||||||
if len(self.tensors) == 1 and (self.split_max_tensors != 0 or self.split_max_size != 0):
|
if len(self.tensors) == 1 and (
|
||||||
|
self.split_max_tensors != 0 or self.split_max_size != 0
|
||||||
|
):
|
||||||
logger.warning("Model fails split requirements, not splitting")
|
logger.warning("Model fails split requirements, not splitting")
|
||||||
|
|
||||||
self.open_output_file(path)
|
self.open_output_file(path)
|
||||||
|
|
||||||
if self.state is not WriterState.EMPTY:
|
if self.state is not WriterState.EMPTY:
|
||||||
raise ValueError(f'Expected output file to be empty, got {self.state}')
|
raise ValueError(f"Expected output file to be empty, got {self.state}")
|
||||||
|
|
||||||
assert self.fout is not None
|
assert self.fout is not None
|
||||||
assert len(self.fout) == len(self.tensors)
|
assert len(self.fout) == len(self.tensors)
|
||||||
|
@ -229,7 +259,9 @@ def write_header_to_file(self, path: Path | None = None) -> None:
|
||||||
|
|
||||||
def write_kv_data_to_file(self) -> None:
|
def write_kv_data_to_file(self) -> None:
|
||||||
if self.state is not WriterState.HEADER:
|
if self.state is not WriterState.HEADER:
|
||||||
raise ValueError(f'Expected output file to contain the header, got {self.state}')
|
raise ValueError(
|
||||||
|
f"Expected output file to contain the header, got {self.state}"
|
||||||
|
)
|
||||||
assert self.fout is not None
|
assert self.fout is not None
|
||||||
|
|
||||||
for fout, kv_data in zip(self.fout, self.kv_data):
|
for fout, kv_data in zip(self.fout, self.kv_data):
|
||||||
|
@ -246,7 +278,9 @@ def write_kv_data_to_file(self) -> None:
|
||||||
|
|
||||||
def write_ti_data_to_file(self) -> None:
|
def write_ti_data_to_file(self) -> None:
|
||||||
if self.state is not WriterState.KV_DATA:
|
if self.state is not WriterState.KV_DATA:
|
||||||
raise ValueError(f'Expected output file to contain KV data, got {self.state}')
|
raise ValueError(
|
||||||
|
f"Expected output file to contain KV data, got {self.state}"
|
||||||
|
)
|
||||||
assert self.fout is not None
|
assert self.fout is not None
|
||||||
|
|
||||||
for fout, tensors in zip(self.fout, self.tensors):
|
for fout, tensors in zip(self.fout, self.tensors):
|
||||||
|
@ -269,7 +303,7 @@ def write_ti_data_to_file(self) -> None:
|
||||||
|
|
||||||
def add_key_value(self, key: str, val: Any, vtype: GGUFValueType) -> None:
|
def add_key_value(self, key: str, val: Any, vtype: GGUFValueType) -> None:
|
||||||
if any(key in kv_data for kv_data in self.kv_data):
|
if any(key in kv_data for kv_data in self.kv_data):
|
||||||
raise ValueError(f'Duplicated key name {key!r}')
|
raise ValueError(f"Duplicated key name {key!r}")
|
||||||
|
|
||||||
self.kv_data[0][key] = GGUFValue(value=val, type=vtype)
|
self.kv_data[0][key] = GGUFValue(value=val, type=vtype)
|
||||||
|
|
||||||
|
@ -321,14 +355,20 @@ def ggml_pad(x: int, n: int) -> int:
|
||||||
return ((x + n - 1) // n) * n
|
return ((x + n - 1) // n) * n
|
||||||
|
|
||||||
def add_tensor_info(
|
def add_tensor_info(
|
||||||
self, name: str, tensor_shape: Sequence[int], tensor_dtype: np.dtype,
|
self,
|
||||||
tensor_nbytes: int, raw_dtype: GGMLQuantizationType | None = None,
|
name: str,
|
||||||
|
tensor_shape: Sequence[int],
|
||||||
|
tensor_dtype: np.dtype,
|
||||||
|
tensor_nbytes: int,
|
||||||
|
raw_dtype: GGMLQuantizationType | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
if self.state is not WriterState.NO_FILE:
|
if self.state is not WriterState.NO_FILE:
|
||||||
raise ValueError(f'Expected output file to be not yet opened, got {self.state}')
|
raise ValueError(
|
||||||
|
f"Expected output file to be not yet opened, got {self.state}"
|
||||||
|
)
|
||||||
|
|
||||||
if any(name in tensors for tensors in self.tensors):
|
if any(name in tensors for tensors in self.tensors):
|
||||||
raise ValueError(f'Duplicated tensor name {name!r}')
|
raise ValueError(f"Duplicated tensor name {name!r}")
|
||||||
|
|
||||||
if raw_dtype is None:
|
if raw_dtype is None:
|
||||||
if tensor_dtype == np.float16:
|
if tensor_dtype == np.float16:
|
||||||
|
@ -346,7 +386,9 @@ def add_tensor_info(
|
||||||
elif tensor_dtype == np.int64:
|
elif tensor_dtype == np.int64:
|
||||||
dtype = GGMLQuantizationType.I64
|
dtype = GGMLQuantizationType.I64
|
||||||
else:
|
else:
|
||||||
raise ValueError("Only F16, F32, F64, I8, I16, I32, I64 tensors are supported for now")
|
raise ValueError(
|
||||||
|
"Only F16, F32, F64, I8, I16, I32, I64 tensors are supported for now"
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
dtype = raw_dtype
|
dtype = raw_dtype
|
||||||
if tensor_dtype == np.uint8:
|
if tensor_dtype == np.uint8:
|
||||||
|
@ -359,14 +401,20 @@ def add_tensor_info(
|
||||||
and len(self.tensors[-1]) >= self.split_max_tensors
|
and len(self.tensors[-1]) >= self.split_max_tensors
|
||||||
) or ( # split when over size limit
|
) or ( # split when over size limit
|
||||||
self.split_max_size != 0
|
self.split_max_size != 0
|
||||||
and sum(ti.nbytes for ti in self.tensors[-1].values()) + tensor_nbytes > self.split_max_size
|
and sum(ti.nbytes for ti in self.tensors[-1].values()) + tensor_nbytes
|
||||||
|
> self.split_max_size
|
||||||
):
|
):
|
||||||
self.tensors.append({})
|
self.tensors.append({})
|
||||||
|
|
||||||
self.tensors[-1][name] = TensorInfo(shape=tensor_shape, dtype=dtype, nbytes=tensor_nbytes)
|
self.tensors[-1][name] = TensorInfo(
|
||||||
|
shape=tensor_shape, dtype=dtype, nbytes=tensor_nbytes
|
||||||
|
)
|
||||||
|
|
||||||
def add_tensor(
|
def add_tensor(
|
||||||
self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequence[int] | None = None,
|
self,
|
||||||
|
name: str,
|
||||||
|
tensor: np.ndarray[Any, Any],
|
||||||
|
raw_shape: Sequence[int] | None = None,
|
||||||
raw_dtype: GGMLQuantizationType | None = None,
|
raw_dtype: GGMLQuantizationType | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
if self.endianess == GGUFEndian.BIG:
|
if self.endianess == GGUFEndian.BIG:
|
||||||
|
@ -377,7 +425,9 @@ def add_tensor(
|
||||||
self.temp_file = fp
|
self.temp_file = fp
|
||||||
|
|
||||||
shape: Sequence[int] = raw_shape if raw_shape is not None else tensor.shape
|
shape: Sequence[int] = raw_shape if raw_shape is not None else tensor.shape
|
||||||
self.add_tensor_info(name, shape, tensor.dtype, tensor.nbytes, raw_dtype=raw_dtype)
|
self.add_tensor_info(
|
||||||
|
name, shape, tensor.dtype, tensor.nbytes, raw_dtype=raw_dtype
|
||||||
|
)
|
||||||
|
|
||||||
if self.temp_file is None:
|
if self.temp_file is None:
|
||||||
self.tensors[-1][name].tensor = tensor
|
self.tensors[-1][name].tensor = tensor
|
||||||
|
@ -387,13 +437,21 @@ def add_tensor(
|
||||||
self.write_padding(self.temp_file, tensor.nbytes)
|
self.write_padding(self.temp_file, tensor.nbytes)
|
||||||
|
|
||||||
def write_padding(self, fp: IO[bytes], n: int, align: int | None = None) -> None:
|
def write_padding(self, fp: IO[bytes], n: int, align: int | None = None) -> None:
|
||||||
pad = GGUFWriter.ggml_pad(n, align if align is not None else self.data_alignment) - n
|
pad = (
|
||||||
|
GGUFWriter.ggml_pad(n, align if align is not None else self.data_alignment)
|
||||||
|
- n
|
||||||
|
)
|
||||||
if pad != 0:
|
if pad != 0:
|
||||||
fp.write(bytes([0] * pad))
|
fp.write(bytes([0] * pad))
|
||||||
|
|
||||||
def write_tensor_data(self, tensor: np.ndarray[Any, Any]) -> None:
|
def write_tensor_data(self, tensor: np.ndarray[Any, Any]) -> None:
|
||||||
if self.state is not WriterState.TI_DATA and self.state is not WriterState.WEIGHTS:
|
if (
|
||||||
raise ValueError(f'Expected output file to contain tensor info or weights, got {self.state}')
|
self.state is not WriterState.TI_DATA
|
||||||
|
and self.state is not WriterState.WEIGHTS
|
||||||
|
):
|
||||||
|
raise ValueError(
|
||||||
|
f"Expected output file to contain tensor info or weights, got {self.state}"
|
||||||
|
)
|
||||||
assert self.fout is not None
|
assert self.fout is not None
|
||||||
|
|
||||||
if self.endianess == GGUFEndian.BIG:
|
if self.endianess == GGUFEndian.BIG:
|
||||||
|
@ -409,7 +467,9 @@ def write_tensor_data(self, tensor: np.ndarray[Any, Any]) -> None:
|
||||||
|
|
||||||
# pop the first tensor info
|
# pop the first tensor info
|
||||||
# TODO: cleaner way to get the first key
|
# TODO: cleaner way to get the first key
|
||||||
first_tensor_name = [name for name, _ in zip(self.tensors[file_id].keys(), range(1))][0]
|
first_tensor_name = [
|
||||||
|
name for name, _ in zip(self.tensors[file_id].keys(), range(1))
|
||||||
|
][0]
|
||||||
ti = self.tensors[file_id].pop(first_tensor_name)
|
ti = self.tensors[file_id].pop(first_tensor_name)
|
||||||
assert ti.nbytes == tensor.nbytes
|
assert ti.nbytes == tensor.nbytes
|
||||||
|
|
||||||
|
@ -437,8 +497,15 @@ def write_tensors_to_file(self, *, progress: bool = False) -> None:
|
||||||
total_bytes = sum(ti.nbytes for t in self.tensors for ti in t.values())
|
total_bytes = sum(ti.nbytes for t in self.tensors for ti in t.values())
|
||||||
|
|
||||||
if len(self.fout) > 1:
|
if len(self.fout) > 1:
|
||||||
shard_bar = tqdm(desc=f"Shard (0/{len(self.fout)})", total=None, unit="byte", unit_scale=True)
|
shard_bar = tqdm(
|
||||||
bar = tqdm(desc="Writing", total=total_bytes, unit="byte", unit_scale=True)
|
desc=f"Shard (0/{len(self.fout)})",
|
||||||
|
total=None,
|
||||||
|
unit="byte",
|
||||||
|
unit_scale=True,
|
||||||
|
)
|
||||||
|
bar = tqdm(
|
||||||
|
desc="Writing", total=total_bytes, unit="byte", unit_scale=True
|
||||||
|
)
|
||||||
|
|
||||||
for i, (fout, tensors) in enumerate(zip(self.fout, self.tensors)):
|
for i, (fout, tensors) in enumerate(zip(self.fout, self.tensors)):
|
||||||
if shard_bar is not None:
|
if shard_bar is not None:
|
||||||
|
@ -448,7 +515,9 @@ def write_tensors_to_file(self, *, progress: bool = False) -> None:
|
||||||
|
|
||||||
# relying on the fact that Python dicts preserve insertion order (since 3.7)
|
# relying on the fact that Python dicts preserve insertion order (since 3.7)
|
||||||
for ti in tensors.values():
|
for ti in tensors.values():
|
||||||
assert ti.tensor is not None # can only iterate once over the tensors
|
assert (
|
||||||
|
ti.tensor is not None
|
||||||
|
) # can only iterate once over the tensors
|
||||||
assert ti.tensor.nbytes == ti.nbytes
|
assert ti.tensor.nbytes == ti.nbytes
|
||||||
ti.tensor.tofile(fout)
|
ti.tensor.tofile(fout)
|
||||||
if shard_bar is not None:
|
if shard_bar is not None:
|
||||||
|
@ -460,7 +529,9 @@ def write_tensors_to_file(self, *, progress: bool = False) -> None:
|
||||||
else:
|
else:
|
||||||
self.temp_file.seek(0)
|
self.temp_file.seek(0)
|
||||||
|
|
||||||
shutil.copyfileobj(self.temp_file, self.fout[0 if not self.small_first_shard else 1])
|
shutil.copyfileobj(
|
||||||
|
self.temp_file, self.fout[0 if not self.small_first_shard else 1]
|
||||||
|
)
|
||||||
self.flush()
|
self.flush()
|
||||||
self.temp_file.close()
|
self.temp_file.close()
|
||||||
|
|
||||||
|
@ -566,7 +637,9 @@ def add_base_model_version(self, source_id: int, version: str) -> None:
|
||||||
self.add_string(Keys.General.BASE_MODEL_VERSION.format(id=source_id), version)
|
self.add_string(Keys.General.BASE_MODEL_VERSION.format(id=source_id), version)
|
||||||
|
|
||||||
def add_base_model_organization(self, source_id: int, organization: str) -> None:
|
def add_base_model_organization(self, source_id: int, organization: str) -> None:
|
||||||
self.add_string(Keys.General.BASE_MODEL_ORGANIZATION.format(id=source_id), organization)
|
self.add_string(
|
||||||
|
Keys.General.BASE_MODEL_ORGANIZATION.format(id=source_id), organization
|
||||||
|
)
|
||||||
|
|
||||||
def add_base_model_url(self, source_id: int, url: str) -> None:
|
def add_base_model_url(self, source_id: int, url: str) -> None:
|
||||||
self.add_string(Keys.General.BASE_MODEL_URL.format(id=source_id), url)
|
self.add_string(Keys.General.BASE_MODEL_URL.format(id=source_id), url)
|
||||||
|
@ -605,7 +678,9 @@ def add_block_count(self, length: int) -> None:
|
||||||
self.add_uint32(Keys.LLM.BLOCK_COUNT.format(arch=self.arch), length)
|
self.add_uint32(Keys.LLM.BLOCK_COUNT.format(arch=self.arch), length)
|
||||||
|
|
||||||
def add_leading_dense_block_count(self, length: int) -> None:
|
def add_leading_dense_block_count(self, length: int) -> None:
|
||||||
self.add_uint32(Keys.LLM.LEADING_DENSE_BLOCK_COUNT.format(arch=self.arch), length)
|
self.add_uint32(
|
||||||
|
Keys.LLM.LEADING_DENSE_BLOCK_COUNT.format(arch=self.arch), length
|
||||||
|
)
|
||||||
|
|
||||||
def add_feed_forward_length(self, length: int | Sequence[int]) -> None:
|
def add_feed_forward_length(self, length: int | Sequence[int]) -> None:
|
||||||
if isinstance(length, int):
|
if isinstance(length, int):
|
||||||
|
@ -614,10 +689,14 @@ def add_feed_forward_length(self, length: int | Sequence[int]) -> None:
|
||||||
self.add_array(Keys.LLM.FEED_FORWARD_LENGTH.format(arch=self.arch), length)
|
self.add_array(Keys.LLM.FEED_FORWARD_LENGTH.format(arch=self.arch), length)
|
||||||
|
|
||||||
def add_expert_feed_forward_length(self, length: int) -> None:
|
def add_expert_feed_forward_length(self, length: int) -> None:
|
||||||
self.add_uint32(Keys.LLM.EXPERT_FEED_FORWARD_LENGTH.format(arch=self.arch), length)
|
self.add_uint32(
|
||||||
|
Keys.LLM.EXPERT_FEED_FORWARD_LENGTH.format(arch=self.arch), length
|
||||||
|
)
|
||||||
|
|
||||||
def add_expert_shared_feed_forward_length(self, length: int) -> None:
|
def add_expert_shared_feed_forward_length(self, length: int) -> None:
|
||||||
self.add_uint32(Keys.LLM.EXPERT_SHARED_FEED_FORWARD_LENGTH.format(arch=self.arch), length)
|
self.add_uint32(
|
||||||
|
Keys.LLM.EXPERT_SHARED_FEED_FORWARD_LENGTH.format(arch=self.arch), length
|
||||||
|
)
|
||||||
|
|
||||||
def add_parallel_residual(self, use: bool) -> None:
|
def add_parallel_residual(self, use: bool) -> None:
|
||||||
self.add_bool(Keys.LLM.USE_PARALLEL_RESIDUAL.format(arch=self.arch), use)
|
self.add_bool(Keys.LLM.USE_PARALLEL_RESIDUAL.format(arch=self.arch), use)
|
||||||
|
@ -736,10 +815,14 @@ def add_tokenizer_model(self, model: str) -> None:
|
||||||
def add_tokenizer_pre(self, pre: str) -> None:
|
def add_tokenizer_pre(self, pre: str) -> None:
|
||||||
self.add_string(Keys.Tokenizer.PRE, pre)
|
self.add_string(Keys.Tokenizer.PRE, pre)
|
||||||
|
|
||||||
def add_token_list(self, tokens: Sequence[str] | Sequence[bytes] | Sequence[bytearray]) -> None:
|
def add_token_list(
|
||||||
|
self, tokens: Sequence[str] | Sequence[bytes] | Sequence[bytearray]
|
||||||
|
) -> None:
|
||||||
self.add_array(Keys.Tokenizer.LIST, tokens)
|
self.add_array(Keys.Tokenizer.LIST, tokens)
|
||||||
|
|
||||||
def add_token_merges(self, merges: Sequence[str] | Sequence[bytes] | Sequence[bytearray]) -> None:
|
def add_token_merges(
|
||||||
|
self, merges: Sequence[str] | Sequence[bytes] | Sequence[bytearray]
|
||||||
|
) -> None:
|
||||||
self.add_array(Keys.Tokenizer.MERGES, merges)
|
self.add_array(Keys.Tokenizer.MERGES, merges)
|
||||||
|
|
||||||
def add_token_types(self, types: Sequence[TokenType] | Sequence[int]) -> None:
|
def add_token_types(self, types: Sequence[TokenType] | Sequence[int]) -> None:
|
||||||
|
@ -793,18 +876,22 @@ def add_chat_template(self, value: str | Sequence[Mapping[str, str]]) -> None:
|
||||||
template_names = set()
|
template_names = set()
|
||||||
|
|
||||||
for choice in value:
|
for choice in value:
|
||||||
name = choice.get('name', '')
|
name = choice.get("name", "")
|
||||||
template = choice.get('template')
|
template = choice.get("template")
|
||||||
|
|
||||||
# Allowing non-alphanumerical characters in template name is probably not a good idea, so filter it
|
# Allowing non-alphanumerical characters in template name is probably not a good idea, so filter it
|
||||||
name = ''.join((c if c in ascii_letters + digits else '_' for c in name))
|
name = "".join(
|
||||||
|
(c if c in ascii_letters + digits else "_" for c in name)
|
||||||
|
)
|
||||||
|
|
||||||
if name and template is not None:
|
if name and template is not None:
|
||||||
if name == 'default':
|
if name == "default":
|
||||||
template_default = template
|
template_default = template
|
||||||
else:
|
else:
|
||||||
template_names.add(name)
|
template_names.add(name)
|
||||||
self.add_string(Keys.Tokenizer.CHAT_TEMPLATE_N.format(name=name), template)
|
self.add_string(
|
||||||
|
Keys.Tokenizer.CHAT_TEMPLATE_N.format(name=name), template
|
||||||
|
)
|
||||||
|
|
||||||
if template_names:
|
if template_names:
|
||||||
self.add_array(Keys.Tokenizer.CHAT_TEMPLATES, list(template_names))
|
self.add_array(Keys.Tokenizer.CHAT_TEMPLATES, list(template_names))
|
||||||
|
@ -829,10 +916,10 @@ def add_eot_token_id(self, id: int) -> None:
|
||||||
self.add_uint32(Keys.Tokenizer.EOT_ID, id)
|
self.add_uint32(Keys.Tokenizer.EOT_ID, id)
|
||||||
|
|
||||||
def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes:
|
def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes:
|
||||||
pack_prefix = ''
|
pack_prefix = ""
|
||||||
if not skip_pack_prefix:
|
if not skip_pack_prefix:
|
||||||
pack_prefix = '<' if self.endianess == GGUFEndian.LITTLE else '>'
|
pack_prefix = "<" if self.endianess == GGUFEndian.LITTLE else ">"
|
||||||
return struct.pack(f'{pack_prefix}{fmt}', value)
|
return struct.pack(f"{pack_prefix}{fmt}", value)
|
||||||
|
|
||||||
def _pack_val(self, val: Any, vtype: GGUFValueType, add_vtype: bool) -> bytes:
|
def _pack_val(self, val: Any, vtype: GGUFValueType, add_vtype: bool) -> bytes:
|
||||||
kv_data = bytearray()
|
kv_data = bytearray()
|
||||||
|
@ -842,7 +929,9 @@ def _pack_val(self, val: Any, vtype: GGUFValueType, add_vtype: bool) -> bytes:
|
||||||
|
|
||||||
pack_fmt = self._simple_value_packing.get(vtype)
|
pack_fmt = self._simple_value_packing.get(vtype)
|
||||||
if pack_fmt is not None:
|
if pack_fmt is not None:
|
||||||
kv_data += self._pack(pack_fmt, val, skip_pack_prefix = vtype == GGUFValueType.BOOL)
|
kv_data += self._pack(
|
||||||
|
pack_fmt, val, skip_pack_prefix=vtype == GGUFValueType.BOOL
|
||||||
|
)
|
||||||
elif vtype == GGUFValueType.STRING:
|
elif vtype == GGUFValueType.STRING:
|
||||||
encoded_val = val.encode("utf-8") if isinstance(val, str) else val
|
encoded_val = val.encode("utf-8") if isinstance(val, str) else val
|
||||||
kv_data += self._pack("Q", len(encoded_val))
|
kv_data += self._pack("Q", len(encoded_val))
|
||||||
|
@ -860,7 +949,9 @@ def _pack_val(self, val: Any, vtype: GGUFValueType, add_vtype: bool) -> bytes:
|
||||||
else:
|
else:
|
||||||
ltype = GGUFValueType.get_type(val[0])
|
ltype = GGUFValueType.get_type(val[0])
|
||||||
if not all(GGUFValueType.get_type(i) is ltype for i in val[1:]):
|
if not all(GGUFValueType.get_type(i) is ltype for i in val[1:]):
|
||||||
raise ValueError("All items in a GGUF array should be of the same type")
|
raise ValueError(
|
||||||
|
"All items in a GGUF array should be of the same type"
|
||||||
|
)
|
||||||
kv_data += self._pack("I", ltype)
|
kv_data += self._pack("I", ltype)
|
||||||
kv_data += self._pack("Q", len(val))
|
kv_data += self._pack("Q", len(val))
|
||||||
for item in val:
|
for item in val:
|
||||||
|
|
|
@ -13,7 +13,9 @@
|
||||||
|
|
||||||
class LazyMeta(ABCMeta):
|
class LazyMeta(ABCMeta):
|
||||||
|
|
||||||
def __new__(cls, name: str, bases: tuple[type, ...], namespace: dict[str, Any], **kwargs):
|
def __new__(
|
||||||
|
cls, name: str, bases: tuple[type, ...], namespace: dict[str, Any], **kwargs
|
||||||
|
):
|
||||||
def __getattr__(self, name: str) -> Any:
|
def __getattr__(self, name: str) -> Any:
|
||||||
meta_attr = getattr(self._meta, name)
|
meta_attr = getattr(self._meta, name)
|
||||||
if callable(meta_attr):
|
if callable(meta_attr):
|
||||||
|
@ -41,6 +43,7 @@ def wrapped_special_op(self, *args, **kwargs):
|
||||||
getattr(type(self)._tensor_type, op_name),
|
getattr(type(self)._tensor_type, op_name),
|
||||||
meta_noop=meta_noop,
|
meta_noop=meta_noop,
|
||||||
)(self, *args, **kwargs)
|
)(self, *args, **kwargs)
|
||||||
|
|
||||||
return wrapped_special_op
|
return wrapped_special_op
|
||||||
|
|
||||||
# special methods bypass __getattr__, so they need to be added manually
|
# special methods bypass __getattr__, so they need to be added manually
|
||||||
|
@ -48,11 +51,48 @@ def wrapped_special_op(self, *args, **kwargs):
|
||||||
# NOTE: doing this from a metaclass is very convenient
|
# NOTE: doing this from a metaclass is very convenient
|
||||||
# TODO: make this even more comprehensive
|
# TODO: make this even more comprehensive
|
||||||
for binary_op in (
|
for binary_op in (
|
||||||
"lt", "le", "eq", "ne", "ge", "gt", "not"
|
"lt",
|
||||||
"abs", "add", "and", "floordiv", "invert", "lshift", "mod", "mul", "matmul",
|
"le",
|
||||||
"neg", "or", "pos", "pow", "rshift", "sub", "truediv", "xor",
|
"eq",
|
||||||
"iadd", "iand", "ifloordiv", "ilshift", "imod", "imul", "ior", "irshift", "isub", "ixor",
|
"ne",
|
||||||
"radd", "rand", "rfloordiv", "rmul", "ror", "rpow", "rsub", "rtruediv", "rxor",
|
"ge",
|
||||||
|
"gt",
|
||||||
|
"not" "abs",
|
||||||
|
"add",
|
||||||
|
"and",
|
||||||
|
"floordiv",
|
||||||
|
"invert",
|
||||||
|
"lshift",
|
||||||
|
"mod",
|
||||||
|
"mul",
|
||||||
|
"matmul",
|
||||||
|
"neg",
|
||||||
|
"or",
|
||||||
|
"pos",
|
||||||
|
"pow",
|
||||||
|
"rshift",
|
||||||
|
"sub",
|
||||||
|
"truediv",
|
||||||
|
"xor",
|
||||||
|
"iadd",
|
||||||
|
"iand",
|
||||||
|
"ifloordiv",
|
||||||
|
"ilshift",
|
||||||
|
"imod",
|
||||||
|
"imul",
|
||||||
|
"ior",
|
||||||
|
"irshift",
|
||||||
|
"isub",
|
||||||
|
"ixor",
|
||||||
|
"radd",
|
||||||
|
"rand",
|
||||||
|
"rfloordiv",
|
||||||
|
"rmul",
|
||||||
|
"ror",
|
||||||
|
"rpow",
|
||||||
|
"rsub",
|
||||||
|
"rtruediv",
|
||||||
|
"rxor",
|
||||||
):
|
):
|
||||||
attr_name = f"__{binary_op}__"
|
attr_name = f"__{binary_op}__"
|
||||||
# the result of these operators usually has the same shape and dtype as the input,
|
# the result of these operators usually has the same shape and dtype as the input,
|
||||||
|
@ -60,7 +100,9 @@ def wrapped_special_op(self, *args, **kwargs):
|
||||||
namespace[attr_name] = mk_wrap(attr_name, meta_noop=True)
|
namespace[attr_name] = mk_wrap(attr_name, meta_noop=True)
|
||||||
|
|
||||||
for special_op in (
|
for special_op in (
|
||||||
"getitem", "setitem", "len",
|
"getitem",
|
||||||
|
"setitem",
|
||||||
|
"len",
|
||||||
):
|
):
|
||||||
attr_name = f"__{special_op}__"
|
attr_name = f"__{special_op}__"
|
||||||
namespace[attr_name] = mk_wrap(attr_name, meta_noop=False)
|
namespace[attr_name] = mk_wrap(attr_name, meta_noop=False)
|
||||||
|
@ -77,7 +119,15 @@ class LazyBase(ABC, metaclass=LazyMeta):
|
||||||
_kwargs: dict[str, Any]
|
_kwargs: dict[str, Any]
|
||||||
_func: Callable[[Any], Any] | None
|
_func: Callable[[Any], Any] | None
|
||||||
|
|
||||||
def __init__(self, *, meta: Any, data: Any | None = None, args: tuple = (), kwargs: dict[str, Any] | None = None, func: Callable[[Any], Any] | None = None):
|
def __init__(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
meta: Any,
|
||||||
|
data: Any | None = None,
|
||||||
|
args: tuple = (),
|
||||||
|
kwargs: dict[str, Any] | None = None,
|
||||||
|
func: Callable[[Any], Any] | None = None,
|
||||||
|
):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self._meta = meta
|
self._meta = meta
|
||||||
self._data = data
|
self._data = data
|
||||||
|
@ -107,7 +157,17 @@ def _recurse_apply(o: Any, fn: Callable[[Any], Any]) -> Any:
|
||||||
return o
|
return o
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _wrap_fn(cls, fn: Callable, *, use_self: LazyBase | None = None, meta_noop: bool | DTypeLike | tuple[DTypeLike, Callable[[tuple[int, ...]], tuple[int, ...]]] = False) -> Callable[[Any], Any]:
|
def _wrap_fn(
|
||||||
|
cls,
|
||||||
|
fn: Callable,
|
||||||
|
*,
|
||||||
|
use_self: LazyBase | None = None,
|
||||||
|
meta_noop: (
|
||||||
|
bool
|
||||||
|
| DTypeLike
|
||||||
|
| tuple[DTypeLike, Callable[[tuple[int, ...]], tuple[int, ...]]]
|
||||||
|
) = False,
|
||||||
|
) -> Callable[[Any], Any]:
|
||||||
def wrapped_fn(*args, **kwargs):
|
def wrapped_fn(*args, **kwargs):
|
||||||
if kwargs is None:
|
if kwargs is None:
|
||||||
kwargs = {}
|
kwargs = {}
|
||||||
|
@ -138,13 +198,16 @@ def wrapped_fn(*args, **kwargs):
|
||||||
res = cls.meta_with_dtype_and_shape(meta_noop, res.shape)
|
res = cls.meta_with_dtype_and_shape(meta_noop, res.shape)
|
||||||
|
|
||||||
if isinstance(res, cls._tensor_type):
|
if isinstance(res, cls._tensor_type):
|
||||||
return cls(meta=cls.eager_to_meta(res), args=args, kwargs=kwargs, func=fn)
|
return cls(
|
||||||
|
meta=cls.eager_to_meta(res), args=args, kwargs=kwargs, func=fn
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
del res # not needed
|
del res # not needed
|
||||||
# non-tensor return likely relies on the contents of the args
|
# non-tensor return likely relies on the contents of the args
|
||||||
# (e.g. the result of torch.equal)
|
# (e.g. the result of torch.equal)
|
||||||
eager_args = cls.to_eager(args)
|
eager_args = cls.to_eager(args)
|
||||||
return fn(*eager_args, **kwargs)
|
return fn(*eager_args, **kwargs)
|
||||||
|
|
||||||
return wrapped_fn
|
return wrapped_fn
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -175,7 +238,8 @@ def eager_to_meta(cls, t: Any) -> Any:
|
||||||
# must be overridden, meta tensor init is backend-specific
|
# must be overridden, meta tensor init is backend-specific
|
||||||
@classmethod
|
@classmethod
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def meta_with_dtype_and_shape(cls, dtype: Any, shape: Any) -> Any: pass
|
def meta_with_dtype_and_shape(cls, dtype: Any, shape: Any) -> Any:
|
||||||
|
pass
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_eager(cls, t: Any) -> Any:
|
def from_eager(cls, t: Any) -> Any:
|
||||||
|
@ -192,7 +256,9 @@ class LazyNumpyTensor(LazyBase):
|
||||||
_tensor_type = np.ndarray
|
_tensor_type = np.ndarray
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def meta_with_dtype_and_shape(cls, dtype: DTypeLike, shape: tuple[int, ...]) -> np.ndarray[Any, Any]:
|
def meta_with_dtype_and_shape(
|
||||||
|
cls, dtype: DTypeLike, shape: tuple[int, ...]
|
||||||
|
) -> np.ndarray[Any, Any]:
|
||||||
# The initial idea was to use np.nan as the fill value,
|
# The initial idea was to use np.nan as the fill value,
|
||||||
# but non-float types like np.int16 can't use that.
|
# but non-float types like np.int16 can't use that.
|
||||||
# So zero it is.
|
# So zero it is.
|
||||||
|
@ -201,8 +267,16 @@ def meta_with_dtype_and_shape(cls, dtype: DTypeLike, shape: tuple[int, ...]) ->
|
||||||
|
|
||||||
def astype(self, dtype, *args, **kwargs):
|
def astype(self, dtype, *args, **kwargs):
|
||||||
meta = type(self).meta_with_dtype_and_shape(dtype, self._meta.shape)
|
meta = type(self).meta_with_dtype_and_shape(dtype, self._meta.shape)
|
||||||
full_args = (self, dtype,) + args
|
full_args = (
|
||||||
return type(self)(meta=meta, args=full_args, kwargs=kwargs, func=(lambda a, *args, **kwargs: a.astype(*args, **kwargs)))
|
self,
|
||||||
|
dtype,
|
||||||
|
) + args
|
||||||
|
return type(self)(
|
||||||
|
meta=meta,
|
||||||
|
args=full_args,
|
||||||
|
kwargs=kwargs,
|
||||||
|
func=(lambda a, *args, **kwargs: a.astype(*args, **kwargs)),
|
||||||
|
)
|
||||||
|
|
||||||
def tofile(self, *args, **kwargs):
|
def tofile(self, *args, **kwargs):
|
||||||
eager = LazyNumpyTensor.to_eager(self)
|
eager = LazyNumpyTensor.to_eager(self)
|
||||||
|
|
|
@ -44,7 +44,12 @@ class Metadata:
|
||||||
datasets: Optional[list[str]] = None
|
datasets: Optional[list[str]] = None
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Path] = None, model_name: Optional[str] = None, total_params: int = 0) -> Metadata:
|
def load(
|
||||||
|
metadata_override_path: Optional[Path] = None,
|
||||||
|
model_path: Optional[Path] = None,
|
||||||
|
model_name: Optional[str] = None,
|
||||||
|
total_params: int = 0,
|
||||||
|
) -> Metadata:
|
||||||
# This grabs as many contextual authorship metadata as possible from the model repository
|
# This grabs as many contextual authorship metadata as possible from the model repository
|
||||||
# making any conversion as required to match the gguf kv store metadata format
|
# making any conversion as required to match the gguf kv store metadata format
|
||||||
# as well as giving users the ability to override any authorship metadata that may be incorrect
|
# as well as giving users the ability to override any authorship metadata that may be incorrect
|
||||||
|
@ -57,7 +62,9 @@ def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Pat
|
||||||
# TODO: load adapter_config.json when possible, it usually contains the base model of the LoRA adapter
|
# TODO: load adapter_config.json when possible, it usually contains the base model of the LoRA adapter
|
||||||
|
|
||||||
# heuristics
|
# heuristics
|
||||||
metadata = Metadata.apply_metadata_heuristic(metadata, model_card, hf_params, model_path, total_params)
|
metadata = Metadata.apply_metadata_heuristic(
|
||||||
|
metadata, model_card, hf_params, model_path, total_params
|
||||||
|
)
|
||||||
|
|
||||||
# Metadata Override File Provided
|
# Metadata Override File Provided
|
||||||
# This is based on LLM_KV_NAMES mapping in llama.cpp
|
# This is based on LLM_KV_NAMES mapping in llama.cpp
|
||||||
|
@ -66,34 +73,66 @@ def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Pat
|
||||||
metadata.name = metadata_override.get(Keys.General.NAME, metadata.name)
|
metadata.name = metadata_override.get(Keys.General.NAME, metadata.name)
|
||||||
metadata.author = metadata_override.get(Keys.General.AUTHOR, metadata.author)
|
metadata.author = metadata_override.get(Keys.General.AUTHOR, metadata.author)
|
||||||
metadata.version = metadata_override.get(Keys.General.VERSION, metadata.version)
|
metadata.version = metadata_override.get(Keys.General.VERSION, metadata.version)
|
||||||
metadata.organization = metadata_override.get(Keys.General.ORGANIZATION, metadata.organization)
|
metadata.organization = metadata_override.get(
|
||||||
|
Keys.General.ORGANIZATION, metadata.organization
|
||||||
|
)
|
||||||
|
|
||||||
metadata.finetune = metadata_override.get(Keys.General.FINETUNE, metadata.finetune)
|
metadata.finetune = metadata_override.get(
|
||||||
metadata.basename = metadata_override.get(Keys.General.BASENAME, metadata.basename)
|
Keys.General.FINETUNE, metadata.finetune
|
||||||
|
)
|
||||||
|
metadata.basename = metadata_override.get(
|
||||||
|
Keys.General.BASENAME, metadata.basename
|
||||||
|
)
|
||||||
|
|
||||||
metadata.description = metadata_override.get(Keys.General.DESCRIPTION, metadata.description)
|
metadata.description = metadata_override.get(
|
||||||
metadata.quantized_by = metadata_override.get(Keys.General.QUANTIZED_BY, metadata.quantized_by)
|
Keys.General.DESCRIPTION, metadata.description
|
||||||
|
)
|
||||||
|
metadata.quantized_by = metadata_override.get(
|
||||||
|
Keys.General.QUANTIZED_BY, metadata.quantized_by
|
||||||
|
)
|
||||||
|
|
||||||
metadata.size_label = metadata_override.get(Keys.General.SIZE_LABEL, metadata.size_label)
|
metadata.size_label = metadata_override.get(
|
||||||
metadata.license_name = metadata_override.get(Keys.General.LICENSE_NAME, metadata.license_name)
|
Keys.General.SIZE_LABEL, metadata.size_label
|
||||||
metadata.license_link = metadata_override.get(Keys.General.LICENSE_LINK, metadata.license_link)
|
)
|
||||||
|
metadata.license_name = metadata_override.get(
|
||||||
|
Keys.General.LICENSE_NAME, metadata.license_name
|
||||||
|
)
|
||||||
|
metadata.license_link = metadata_override.get(
|
||||||
|
Keys.General.LICENSE_LINK, metadata.license_link
|
||||||
|
)
|
||||||
|
|
||||||
metadata.url = metadata_override.get(Keys.General.URL, metadata.url)
|
metadata.url = metadata_override.get(Keys.General.URL, metadata.url)
|
||||||
metadata.doi = metadata_override.get(Keys.General.DOI, metadata.doi)
|
metadata.doi = metadata_override.get(Keys.General.DOI, metadata.doi)
|
||||||
metadata.uuid = metadata_override.get(Keys.General.UUID, metadata.uuid)
|
metadata.uuid = metadata_override.get(Keys.General.UUID, metadata.uuid)
|
||||||
metadata.repo_url = metadata_override.get(Keys.General.REPO_URL, metadata.repo_url)
|
metadata.repo_url = metadata_override.get(
|
||||||
|
Keys.General.REPO_URL, metadata.repo_url
|
||||||
|
)
|
||||||
|
|
||||||
metadata.source_url = metadata_override.get(Keys.General.SOURCE_URL, metadata.source_url)
|
metadata.source_url = metadata_override.get(
|
||||||
metadata.source_doi = metadata_override.get(Keys.General.SOURCE_DOI, metadata.source_doi)
|
Keys.General.SOURCE_URL, metadata.source_url
|
||||||
metadata.source_uuid = metadata_override.get(Keys.General.SOURCE_UUID, metadata.source_uuid)
|
)
|
||||||
metadata.source_repo_url = metadata_override.get(Keys.General.SOURCE_REPO_URL, metadata.source_repo_url)
|
metadata.source_doi = metadata_override.get(
|
||||||
|
Keys.General.SOURCE_DOI, metadata.source_doi
|
||||||
|
)
|
||||||
|
metadata.source_uuid = metadata_override.get(
|
||||||
|
Keys.General.SOURCE_UUID, metadata.source_uuid
|
||||||
|
)
|
||||||
|
metadata.source_repo_url = metadata_override.get(
|
||||||
|
Keys.General.SOURCE_REPO_URL, metadata.source_repo_url
|
||||||
|
)
|
||||||
|
|
||||||
# Base Models is received here as an array of models
|
# Base Models is received here as an array of models
|
||||||
metadata.base_models = metadata_override.get("general.base_models", metadata.base_models)
|
metadata.base_models = metadata_override.get(
|
||||||
|
"general.base_models", metadata.base_models
|
||||||
|
)
|
||||||
|
|
||||||
metadata.tags = metadata_override.get(Keys.General.TAGS, metadata.tags)
|
metadata.tags = metadata_override.get(Keys.General.TAGS, metadata.tags)
|
||||||
metadata.languages = metadata_override.get(Keys.General.LANGUAGES, metadata.languages)
|
metadata.languages = metadata_override.get(
|
||||||
metadata.datasets = metadata_override.get(Keys.General.DATASETS, metadata.datasets)
|
Keys.General.LANGUAGES, metadata.languages
|
||||||
|
)
|
||||||
|
metadata.datasets = metadata_override.get(
|
||||||
|
Keys.General.DATASETS, metadata.datasets
|
||||||
|
)
|
||||||
|
|
||||||
# Direct Metadata Override (via direct cli argument)
|
# Direct Metadata Override (via direct cli argument)
|
||||||
if model_name is not None:
|
if model_name is not None:
|
||||||
|
@ -102,7 +141,9 @@ def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Pat
|
||||||
return metadata
|
return metadata
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def load_metadata_override(metadata_override_path: Optional[Path] = None) -> dict[str, Any]:
|
def load_metadata_override(
|
||||||
|
metadata_override_path: Optional[Path] = None,
|
||||||
|
) -> dict[str, Any]:
|
||||||
if metadata_override_path is None or not metadata_override_path.is_file():
|
if metadata_override_path is None or not metadata_override_path.is_file():
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
@ -128,7 +169,9 @@ def load_model_card(model_path: Optional[Path] = None) -> dict[str, Any]:
|
||||||
if isinstance(data, dict):
|
if isinstance(data, dict):
|
||||||
return data
|
return data
|
||||||
else:
|
else:
|
||||||
logger.error(f"while reading YAML model card frontmatter, data is {type(data)} instead of dict")
|
logger.error(
|
||||||
|
f"while reading YAML model card frontmatter, data is {type(data)} instead of dict"
|
||||||
|
)
|
||||||
return {}
|
return {}
|
||||||
else:
|
else:
|
||||||
return {}
|
return {}
|
||||||
|
@ -149,10 +192,21 @@ def load_hf_parameters(model_path: Optional[Path] = None) -> dict[str, Any]:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def id_to_title(string):
|
def id_to_title(string):
|
||||||
# Convert capitalization into title form unless acronym or version number
|
# Convert capitalization into title form unless acronym or version number
|
||||||
return ' '.join([w.title() if w.islower() and not re.match(r'^(v\d+(?:\.\d+)*|\d.*)$', w) else w for w in string.strip().replace('-', ' ').split()])
|
return " ".join(
|
||||||
|
[
|
||||||
|
(
|
||||||
|
w.title()
|
||||||
|
if w.islower() and not re.match(r"^(v\d+(?:\.\d+)*|\d.*)$", w)
|
||||||
|
else w
|
||||||
|
)
|
||||||
|
for w in string.strip().replace("-", " ").split()
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_model_id_components(model_id: Optional[str] = None, total_params: int = 0) -> tuple[str | None, str | None, str | None, str | None, str | None, str | None]:
|
def get_model_id_components(
|
||||||
|
model_id: Optional[str] = None, total_params: int = 0
|
||||||
|
) -> tuple[str | None, str | None, str | None, str | None, str | None, str | None]:
|
||||||
# Huggingface often store model id as '<org>/<model name>'
|
# Huggingface often store model id as '<org>/<model name>'
|
||||||
# so let's parse it and apply some heuristics if possible for model name components
|
# so let's parse it and apply some heuristics if possible for model name components
|
||||||
|
|
||||||
|
@ -160,24 +214,24 @@ def get_model_id_components(model_id: Optional[str] = None, total_params: int =
|
||||||
# model ID missing
|
# model ID missing
|
||||||
return None, None, None, None, None, None
|
return None, None, None, None, None, None
|
||||||
|
|
||||||
if ' ' in model_id:
|
if " " in model_id:
|
||||||
# model ID is actually a normal human sentence
|
# model ID is actually a normal human sentence
|
||||||
# which means its most likely a normal model name only
|
# which means its most likely a normal model name only
|
||||||
# not part of the hugging face naming standard, but whatever
|
# not part of the hugging face naming standard, but whatever
|
||||||
return model_id, None, None, None, None, None
|
return model_id, None, None, None, None, None
|
||||||
|
|
||||||
if '/' in model_id:
|
if "/" in model_id:
|
||||||
# model ID (huggingface style)
|
# model ID (huggingface style)
|
||||||
org_component, model_full_name_component = model_id.split('/', 1)
|
org_component, model_full_name_component = model_id.split("/", 1)
|
||||||
else:
|
else:
|
||||||
# model ID but missing org components
|
# model ID but missing org components
|
||||||
org_component, model_full_name_component = None, model_id
|
org_component, model_full_name_component = None, model_id
|
||||||
|
|
||||||
# Check if we erroneously matched against './' or '../' etc...
|
# Check if we erroneously matched against './' or '../' etc...
|
||||||
if org_component is not None and org_component[0] == '.':
|
if org_component is not None and org_component[0] == ".":
|
||||||
org_component = None
|
org_component = None
|
||||||
|
|
||||||
name_parts: list[str] = model_full_name_component.split('-')
|
name_parts: list[str] = model_full_name_component.split("-")
|
||||||
|
|
||||||
# Remove empty parts
|
# Remove empty parts
|
||||||
for i in reversed(range(len(name_parts))):
|
for i in reversed(range(len(name_parts))):
|
||||||
|
@ -191,14 +245,18 @@ def get_model_id_components(model_id: Optional[str] = None, total_params: int =
|
||||||
# Annotate the name
|
# Annotate the name
|
||||||
for i, part in enumerate(name_parts):
|
for i, part in enumerate(name_parts):
|
||||||
# Version
|
# Version
|
||||||
if re.fullmatch(r'(v|iter)?\d+([.]\d+)*', part, re.IGNORECASE):
|
if re.fullmatch(r"(v|iter)?\d+([.]\d+)*", part, re.IGNORECASE):
|
||||||
name_types[i].add("version")
|
name_types[i].add("version")
|
||||||
# Quant type (should not be there for base models, but still annotated)
|
# Quant type (should not be there for base models, but still annotated)
|
||||||
elif re.fullmatch(r'i?q\d(_\w)*|b?fp?(16|32)', part, re.IGNORECASE):
|
elif re.fullmatch(r"i?q\d(_\w)*|b?fp?(16|32)", part, re.IGNORECASE):
|
||||||
name_types[i].add("type")
|
name_types[i].add("type")
|
||||||
name_parts[i] = part.upper()
|
name_parts[i] = part.upper()
|
||||||
# Model size
|
# Model size
|
||||||
elif i > 0 and re.fullmatch(r'(([A]|\d+[x])?\d+([._]\d+)?[KMBT][\d]?|small|mini|medium|large|x?xl)', part, re.IGNORECASE):
|
elif i > 0 and re.fullmatch(
|
||||||
|
r"(([A]|\d+[x])?\d+([._]\d+)?[KMBT][\d]?|small|mini|medium|large|x?xl)",
|
||||||
|
part,
|
||||||
|
re.IGNORECASE,
|
||||||
|
):
|
||||||
part = part.replace("_", ".")
|
part = part.replace("_", ".")
|
||||||
# Handle weird bloom-7b1 notation
|
# Handle weird bloom-7b1 notation
|
||||||
if part[-1].isdecimal():
|
if part[-1].isdecimal():
|
||||||
|
@ -209,14 +267,19 @@ def get_model_id_components(model_id: Optional[str] = None, total_params: int =
|
||||||
part = part[:-1] + part[-1].upper()
|
part = part[:-1] + part[-1].upper()
|
||||||
if total_params != 0:
|
if total_params != 0:
|
||||||
try:
|
try:
|
||||||
label_params = float(part[:-1]) * pow(1000, " KMBT".find(part[-1]))
|
label_params = float(part[:-1]) * pow(
|
||||||
|
1000, " KMBT".find(part[-1])
|
||||||
|
)
|
||||||
# Only use it as a size label if it's close or bigger than the model size
|
# Only use it as a size label if it's close or bigger than the model size
|
||||||
# Note that LoRA adapters don't necessarily include all layers,
|
# Note that LoRA adapters don't necessarily include all layers,
|
||||||
# so this is why bigger label sizes are accepted.
|
# so this is why bigger label sizes are accepted.
|
||||||
# Do not use the size label when it's smaller than 1/8 of the model size
|
# Do not use the size label when it's smaller than 1/8 of the model size
|
||||||
if (total_params < 0 and label_params < abs(total_params) // 8) or (
|
if (
|
||||||
|
total_params < 0 and label_params < abs(total_params) // 8
|
||||||
|
) or (
|
||||||
# Check both directions when the current model isn't a LoRA adapter
|
# Check both directions when the current model isn't a LoRA adapter
|
||||||
total_params > 0 and abs(label_params - total_params) > 7 * total_params // 8
|
total_params > 0
|
||||||
|
and abs(label_params - total_params) > 7 * total_params // 8
|
||||||
):
|
):
|
||||||
# Likely a context length
|
# Likely a context length
|
||||||
name_types[i].add("finetune")
|
name_types[i].add("finetune")
|
||||||
|
@ -229,7 +292,9 @@ def get_model_id_components(model_id: Optional[str] = None, total_params: int =
|
||||||
name_types[i].add("size_label")
|
name_types[i].add("size_label")
|
||||||
name_parts[i] = part
|
name_parts[i] = part
|
||||||
# Some easy to recognize finetune names
|
# Some easy to recognize finetune names
|
||||||
elif i > 0 and re.fullmatch(r'chat|instruct|vision|lora', part, re.IGNORECASE):
|
elif i > 0 and re.fullmatch(
|
||||||
|
r"chat|instruct|vision|lora", part, re.IGNORECASE
|
||||||
|
):
|
||||||
if total_params < 0 and part.lower() == "lora":
|
if total_params < 0 and part.lower() == "lora":
|
||||||
# ignore redundant "lora" in the finetune part when the output is a lora adapter
|
# ignore redundant "lora" in the finetune part when the output is a lora adapter
|
||||||
name_types[i].add("type")
|
name_types[i].add("type")
|
||||||
|
@ -238,7 +303,12 @@ def get_model_id_components(model_id: Optional[str] = None, total_params: int =
|
||||||
|
|
||||||
# Ignore word-based size labels when there is at least a number-based one present
|
# Ignore word-based size labels when there is at least a number-based one present
|
||||||
# TODO: should word-based size labels always be removed instead?
|
# TODO: should word-based size labels always be removed instead?
|
||||||
if any(c.isdecimal() for n, t in zip(name_parts, name_types) if "size_label" in t for c in n):
|
if any(
|
||||||
|
c.isdecimal()
|
||||||
|
for n, t in zip(name_parts, name_types)
|
||||||
|
if "size_label" in t
|
||||||
|
for c in n
|
||||||
|
):
|
||||||
for n, t in zip(name_parts, name_types):
|
for n, t in zip(name_parts, name_types):
|
||||||
if "size_label" in t:
|
if "size_label" in t:
|
||||||
if all(c.isalpha() for c in n):
|
if all(c.isalpha() for c in n):
|
||||||
|
@ -262,22 +332,55 @@ def get_model_id_components(model_id: Optional[str] = None, total_params: int =
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
|
|
||||||
basename = "-".join(n for n, t in zip(name_parts, name_types) if "basename" in t) or None
|
basename = (
|
||||||
|
"-".join(n for n, t in zip(name_parts, name_types) if "basename" in t)
|
||||||
|
or None
|
||||||
|
)
|
||||||
# Deduplicate size labels using order-preserving 'dict' ('set' seems to sort the keys)
|
# Deduplicate size labels using order-preserving 'dict' ('set' seems to sort the keys)
|
||||||
size_label = "-".join(dict.fromkeys(s for s, t in zip(name_parts, name_types) if "size_label" in t).keys()) or None
|
size_label = (
|
||||||
finetune = "-".join(f for f, t in zip(name_parts, name_types) if "finetune" in t) or None
|
"-".join(
|
||||||
|
dict.fromkeys(
|
||||||
|
s for s, t in zip(name_parts, name_types) if "size_label" in t
|
||||||
|
).keys()
|
||||||
|
)
|
||||||
|
or None
|
||||||
|
)
|
||||||
|
finetune = (
|
||||||
|
"-".join(f for f, t in zip(name_parts, name_types) if "finetune" in t)
|
||||||
|
or None
|
||||||
|
)
|
||||||
# TODO: should the basename version always be excluded?
|
# TODO: should the basename version always be excluded?
|
||||||
# NOTE: multiple finetune versions are joined together
|
# NOTE: multiple finetune versions are joined together
|
||||||
version = "-".join(v for v, t, in zip(name_parts, name_types) if "version" in t and "basename" not in t) or None
|
version = (
|
||||||
|
"-".join(
|
||||||
|
v
|
||||||
|
for v, t, in zip(name_parts, name_types)
|
||||||
|
if "version" in t and "basename" not in t
|
||||||
|
)
|
||||||
|
or None
|
||||||
|
)
|
||||||
|
|
||||||
if size_label is None and finetune is None and version is None:
|
if size_label is None and finetune is None and version is None:
|
||||||
# Too ambiguous, output nothing
|
# Too ambiguous, output nothing
|
||||||
basename = None
|
basename = None
|
||||||
|
|
||||||
return model_full_name_component, org_component, basename, finetune, version, size_label
|
return (
|
||||||
|
model_full_name_component,
|
||||||
|
org_component,
|
||||||
|
basename,
|
||||||
|
finetune,
|
||||||
|
version,
|
||||||
|
size_label,
|
||||||
|
)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = None, hf_params: Optional[dict] = None, model_path: Optional[Path] = None, total_params: int = 0) -> Metadata:
|
def apply_metadata_heuristic(
|
||||||
|
metadata: Metadata,
|
||||||
|
model_card: Optional[dict] = None,
|
||||||
|
hf_params: Optional[dict] = None,
|
||||||
|
model_path: Optional[Path] = None,
|
||||||
|
total_params: int = 0,
|
||||||
|
) -> Metadata:
|
||||||
# Reference Model Card Metadata: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1
|
# Reference Model Card Metadata: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1
|
||||||
|
|
||||||
# Model Card Heuristics
|
# Model Card Heuristics
|
||||||
|
@ -317,16 +420,30 @@ def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = No
|
||||||
|
|
||||||
for model_id in metadata_base_models:
|
for model_id in metadata_base_models:
|
||||||
# NOTE: model size of base model is assumed to be similar to the size of the current model
|
# NOTE: model size of base model is assumed to be similar to the size of the current model
|
||||||
model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id, total_params)
|
(
|
||||||
|
model_full_name_component,
|
||||||
|
org_component,
|
||||||
|
basename,
|
||||||
|
finetune,
|
||||||
|
version,
|
||||||
|
size_label,
|
||||||
|
) = Metadata.get_model_id_components(model_id, total_params)
|
||||||
base_model = {}
|
base_model = {}
|
||||||
if model_full_name_component is not None:
|
if model_full_name_component is not None:
|
||||||
base_model["name"] = Metadata.id_to_title(model_full_name_component)
|
base_model["name"] = Metadata.id_to_title(
|
||||||
|
model_full_name_component
|
||||||
|
)
|
||||||
if org_component is not None:
|
if org_component is not None:
|
||||||
base_model["organization"] = Metadata.id_to_title(org_component)
|
base_model["organization"] = Metadata.id_to_title(org_component)
|
||||||
if version is not None:
|
if version is not None:
|
||||||
base_model["version"] = version
|
base_model["version"] = version
|
||||||
if org_component is not None and model_full_name_component is not None:
|
if (
|
||||||
base_model["repo_url"] = f"https://huggingface.co/{org_component}/{model_full_name_component}"
|
org_component is not None
|
||||||
|
and model_full_name_component is not None
|
||||||
|
):
|
||||||
|
base_model["repo_url"] = (
|
||||||
|
f"https://huggingface.co/{org_component}/{model_full_name_component}"
|
||||||
|
)
|
||||||
metadata.base_models.append(base_model)
|
metadata.base_models.append(base_model)
|
||||||
|
|
||||||
if "license" in model_card and metadata.license is None:
|
if "license" in model_card and metadata.license is None:
|
||||||
|
@ -360,7 +477,9 @@ def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = No
|
||||||
elif isinstance(pipeline_tags_value, list):
|
elif isinstance(pipeline_tags_value, list):
|
||||||
metadata.tags.extend(pipeline_tags_value)
|
metadata.tags.extend(pipeline_tags_value)
|
||||||
|
|
||||||
language_value = model_card.get("languages", model_card.get("language", None))
|
language_value = model_card.get(
|
||||||
|
"languages", model_card.get("language", None)
|
||||||
|
)
|
||||||
if language_value is not None:
|
if language_value is not None:
|
||||||
|
|
||||||
if metadata.languages is None:
|
if metadata.languages is None:
|
||||||
|
@ -388,11 +507,18 @@ def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = No
|
||||||
if hf_params is not None:
|
if hf_params is not None:
|
||||||
|
|
||||||
hf_name_or_path = hf_params.get("_name_or_path")
|
hf_name_or_path = hf_params.get("_name_or_path")
|
||||||
if hf_name_or_path is not None and hf_name_or_path.count('/') <= 1:
|
if hf_name_or_path is not None and hf_name_or_path.count("/") <= 1:
|
||||||
# Use _name_or_path only if its actually a model name and not some computer path
|
# Use _name_or_path only if its actually a model name and not some computer path
|
||||||
# e.g. 'meta-llama/Llama-2-7b-hf'
|
# e.g. 'meta-llama/Llama-2-7b-hf'
|
||||||
model_id = hf_name_or_path
|
model_id = hf_name_or_path
|
||||||
model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id, total_params)
|
(
|
||||||
|
model_full_name_component,
|
||||||
|
org_component,
|
||||||
|
basename,
|
||||||
|
finetune,
|
||||||
|
version,
|
||||||
|
size_label,
|
||||||
|
) = Metadata.get_model_id_components(model_id, total_params)
|
||||||
if metadata.name is None and model_full_name_component is not None:
|
if metadata.name is None and model_full_name_component is not None:
|
||||||
metadata.name = Metadata.id_to_title(model_full_name_component)
|
metadata.name = Metadata.id_to_title(model_full_name_component)
|
||||||
if metadata.organization is None and org_component is not None:
|
if metadata.organization is None and org_component is not None:
|
||||||
|
@ -410,7 +536,14 @@ def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = No
|
||||||
############################################
|
############################################
|
||||||
if model_path is not None:
|
if model_path is not None:
|
||||||
model_id = model_path.name
|
model_id = model_path.name
|
||||||
model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id, total_params)
|
(
|
||||||
|
model_full_name_component,
|
||||||
|
org_component,
|
||||||
|
basename,
|
||||||
|
finetune,
|
||||||
|
version,
|
||||||
|
size_label,
|
||||||
|
) = Metadata.get_model_id_components(model_id, total_params)
|
||||||
if metadata.name is None and model_full_name_component is not None:
|
if metadata.name is None and model_full_name_component is not None:
|
||||||
metadata.name = Metadata.id_to_title(model_full_name_component)
|
metadata.name = Metadata.id_to_title(model_full_name_component)
|
||||||
if metadata.organization is None and org_component is not None:
|
if metadata.organization is None and org_component is not None:
|
||||||
|
@ -485,7 +618,9 @@ def set_gguf_meta_model(self, gguf_writer: gguf.GGUFWriter):
|
||||||
if "version" in base_model_entry:
|
if "version" in base_model_entry:
|
||||||
gguf_writer.add_base_model_version(key, base_model_entry["version"])
|
gguf_writer.add_base_model_version(key, base_model_entry["version"])
|
||||||
if "organization" in base_model_entry:
|
if "organization" in base_model_entry:
|
||||||
gguf_writer.add_base_model_organization(key, base_model_entry["organization"])
|
gguf_writer.add_base_model_organization(
|
||||||
|
key, base_model_entry["organization"]
|
||||||
|
)
|
||||||
if "url" in base_model_entry:
|
if "url" in base_model_entry:
|
||||||
gguf_writer.add_base_model_url(key, base_model_entry["url"])
|
gguf_writer.add_base_model_url(key, base_model_entry["url"])
|
||||||
if "doi" in base_model_entry:
|
if "doi" in base_model_entry:
|
||||||
|
@ -493,7 +628,9 @@ def set_gguf_meta_model(self, gguf_writer: gguf.GGUFWriter):
|
||||||
if "uuid" in base_model_entry:
|
if "uuid" in base_model_entry:
|
||||||
gguf_writer.add_base_model_uuid(key, base_model_entry["uuid"])
|
gguf_writer.add_base_model_uuid(key, base_model_entry["uuid"])
|
||||||
if "repo_url" in base_model_entry:
|
if "repo_url" in base_model_entry:
|
||||||
gguf_writer.add_base_model_repo_url(key, base_model_entry["repo_url"])
|
gguf_writer.add_base_model_repo_url(
|
||||||
|
key, base_model_entry["repo_url"]
|
||||||
|
)
|
||||||
|
|
||||||
if self.tags is not None:
|
if self.tags is not None:
|
||||||
gguf_writer.add_tags(self.tags)
|
gguf_writer.add_tags(self.tags)
|
||||||
|
|
|
@ -12,14 +12,18 @@
|
||||||
def quant_shape_to_byte_shape(shape: Sequence[int], quant_type: GGMLQuantizationType):
|
def quant_shape_to_byte_shape(shape: Sequence[int], quant_type: GGMLQuantizationType):
|
||||||
block_size, type_size = GGML_QUANT_SIZES[quant_type]
|
block_size, type_size = GGML_QUANT_SIZES[quant_type]
|
||||||
if shape[-1] % block_size != 0:
|
if shape[-1] % block_size != 0:
|
||||||
raise ValueError(f"Quantized tensor row size ({shape[-1]}) is not a multiple of {quant_type.name} block size ({block_size})")
|
raise ValueError(
|
||||||
|
f"Quantized tensor row size ({shape[-1]}) is not a multiple of {quant_type.name} block size ({block_size})"
|
||||||
|
)
|
||||||
return (*shape[:-1], shape[-1] // block_size * type_size)
|
return (*shape[:-1], shape[-1] // block_size * type_size)
|
||||||
|
|
||||||
|
|
||||||
def quant_shape_from_byte_shape(shape: Sequence[int], quant_type: GGMLQuantizationType):
|
def quant_shape_from_byte_shape(shape: Sequence[int], quant_type: GGMLQuantizationType):
|
||||||
block_size, type_size = GGML_QUANT_SIZES[quant_type]
|
block_size, type_size = GGML_QUANT_SIZES[quant_type]
|
||||||
if shape[-1] % type_size != 0:
|
if shape[-1] % type_size != 0:
|
||||||
raise ValueError(f"Quantized tensor bytes per row ({shape[-1]}) is not a multiple of {quant_type.name} type size ({type_size})")
|
raise ValueError(
|
||||||
|
f"Quantized tensor bytes per row ({shape[-1]}) is not a multiple of {quant_type.name} type size ({type_size})"
|
||||||
|
)
|
||||||
return (*shape[:-1], shape[-1] // type_size * block_size)
|
return (*shape[:-1], shape[-1] // type_size * block_size)
|
||||||
|
|
||||||
|
|
||||||
|
@ -27,14 +31,23 @@ def quant_shape_from_byte_shape(shape: Sequence[int], quant_type: GGMLQuantizati
|
||||||
def __compute_fp32_to_bf16(n: np.ndarray) -> np.ndarray:
|
def __compute_fp32_to_bf16(n: np.ndarray) -> np.ndarray:
|
||||||
n = n.astype(np.float32, copy=False).view(np.uint32)
|
n = n.astype(np.float32, copy=False).view(np.uint32)
|
||||||
# force nan to quiet
|
# force nan to quiet
|
||||||
n = np.where((n & 0x7fffffff) > 0x7f800000, (n & np.uint32(0xffff0000)) | np.uint32(64 << 16), n)
|
n = np.where(
|
||||||
|
(n & 0x7FFFFFFF) > 0x7F800000,
|
||||||
|
(n & np.uint32(0xFFFF0000)) | np.uint32(64 << 16),
|
||||||
|
n,
|
||||||
|
)
|
||||||
# round to nearest even
|
# round to nearest even
|
||||||
n = (np.uint64(n) + (0x7fff + ((n >> 16) & 1))) >> 16
|
n = (np.uint64(n) + (0x7FFF + ((n >> 16) & 1))) >> 16
|
||||||
return n.astype(np.uint16)
|
return n.astype(np.uint16)
|
||||||
|
|
||||||
|
|
||||||
# This is faster than np.vectorize and np.apply_along_axis because it works on more than one row at a time
|
# This is faster than np.vectorize and np.apply_along_axis because it works on more than one row at a time
|
||||||
def __apply_over_grouped_rows(func: Callable[[np.ndarray], np.ndarray], arr: np.ndarray, otype: DTypeLike, oshape: tuple[int, ...]) -> np.ndarray:
|
def __apply_over_grouped_rows(
|
||||||
|
func: Callable[[np.ndarray], np.ndarray],
|
||||||
|
arr: np.ndarray,
|
||||||
|
otype: DTypeLike,
|
||||||
|
oshape: tuple[int, ...],
|
||||||
|
) -> np.ndarray:
|
||||||
rows = arr.reshape((-1, arr.shape[-1]))
|
rows = arr.reshape((-1, arr.shape[-1]))
|
||||||
osize = 1
|
osize = 1
|
||||||
for dim in oshape:
|
for dim in oshape:
|
||||||
|
@ -42,15 +55,23 @@ def __apply_over_grouped_rows(func: Callable[[np.ndarray], np.ndarray], arr: np.
|
||||||
out = np.empty(shape=osize, dtype=otype)
|
out = np.empty(shape=osize, dtype=otype)
|
||||||
# compute over groups of 16 rows (arbitrary, but seems good for performance)
|
# compute over groups of 16 rows (arbitrary, but seems good for performance)
|
||||||
n_groups = (rows.shape[0] // 16) or 1
|
n_groups = (rows.shape[0] // 16) or 1
|
||||||
np.concatenate([func(group).ravel() for group in np.array_split(rows, n_groups)], axis=0, out=out)
|
np.concatenate(
|
||||||
|
[func(group).ravel() for group in np.array_split(rows, n_groups)],
|
||||||
|
axis=0,
|
||||||
|
out=out,
|
||||||
|
)
|
||||||
return out.reshape(oshape)
|
return out.reshape(oshape)
|
||||||
|
|
||||||
|
|
||||||
def __quantize_bf16_array(n: np.ndarray) -> np.ndarray:
|
def __quantize_bf16_array(n: np.ndarray) -> np.ndarray:
|
||||||
return __apply_over_grouped_rows(__compute_fp32_to_bf16, arr=n, otype=np.uint16, oshape=n.shape)
|
return __apply_over_grouped_rows(
|
||||||
|
__compute_fp32_to_bf16, arr=n, otype=np.uint16, oshape=n.shape
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
__quantize_bf16_lazy = LazyNumpyTensor._wrap_fn(__quantize_bf16_array, meta_noop=np.uint16)
|
__quantize_bf16_lazy = LazyNumpyTensor._wrap_fn(
|
||||||
|
__quantize_bf16_array, meta_noop=np.uint16
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def quantize_bf16(n: np.ndarray):
|
def quantize_bf16(n: np.ndarray):
|
||||||
|
@ -105,7 +126,12 @@ def __quantize_q8_0_rows(n: np.ndarray) -> np.ndarray:
|
||||||
|
|
||||||
|
|
||||||
def __quantize_q8_0_array(n: np.ndarray) -> np.ndarray:
|
def __quantize_q8_0_array(n: np.ndarray) -> np.ndarray:
|
||||||
return __apply_over_grouped_rows(__quantize_q8_0_rows, arr=n, otype=np.uint8, oshape=__quantize_q8_0_shape_change(n.shape))
|
return __apply_over_grouped_rows(
|
||||||
|
__quantize_q8_0_rows,
|
||||||
|
arr=n,
|
||||||
|
otype=np.uint8,
|
||||||
|
oshape=__quantize_q8_0_shape_change(n.shape),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
__quantize_q8_0_lazy = LazyNumpyTensor._wrap_fn(
|
__quantize_q8_0_lazy = LazyNumpyTensor._wrap_fn(
|
||||||
|
|
|
@ -28,12 +28,10 @@ class TensorNameMap:
|
||||||
"transformer.token_embeddings", # openelm
|
"transformer.token_embeddings", # openelm
|
||||||
"shared", # t5
|
"shared", # t5
|
||||||
),
|
),
|
||||||
|
|
||||||
# Token type embeddings
|
# Token type embeddings
|
||||||
MODEL_TENSOR.TOKEN_TYPES: (
|
MODEL_TENSOR.TOKEN_TYPES: (
|
||||||
"embeddings.token_type_embeddings", # bert nomic-bert
|
"embeddings.token_type_embeddings", # bert nomic-bert
|
||||||
),
|
),
|
||||||
|
|
||||||
# Normalization of token embeddings
|
# Normalization of token embeddings
|
||||||
MODEL_TENSOR.TOKEN_EMBD_NORM: (
|
MODEL_TENSOR.TOKEN_EMBD_NORM: (
|
||||||
"word_embeddings_layernorm", # bloom
|
"word_embeddings_layernorm", # bloom
|
||||||
|
@ -41,14 +39,12 @@ class TensorNameMap:
|
||||||
"emb_ln", # nomic-bert
|
"emb_ln", # nomic-bert
|
||||||
"transformer.norm", # openelm
|
"transformer.norm", # openelm
|
||||||
),
|
),
|
||||||
|
|
||||||
# Position embeddings
|
# Position embeddings
|
||||||
MODEL_TENSOR.POS_EMBD: (
|
MODEL_TENSOR.POS_EMBD: (
|
||||||
"transformer.wpe", # gpt2
|
"transformer.wpe", # gpt2
|
||||||
"embeddings.position_embeddings", # bert
|
"embeddings.position_embeddings", # bert
|
||||||
"wpe", # gpt2
|
"wpe", # gpt2
|
||||||
),
|
),
|
||||||
|
|
||||||
# Output
|
# Output
|
||||||
MODEL_TENSOR.OUTPUT: (
|
MODEL_TENSOR.OUTPUT: (
|
||||||
"embed_out", # gptneox
|
"embed_out", # gptneox
|
||||||
|
@ -58,7 +54,6 @@ class TensorNameMap:
|
||||||
"lm_head.linear", # phi2
|
"lm_head.linear", # phi2
|
||||||
"output_layer", # chatglm
|
"output_layer", # chatglm
|
||||||
),
|
),
|
||||||
|
|
||||||
# Output norm
|
# Output norm
|
||||||
MODEL_TENSOR.OUTPUT_NORM: (
|
MODEL_TENSOR.OUTPUT_NORM: (
|
||||||
"gpt_neox.final_layer_norm", # gptneox
|
"gpt_neox.final_layer_norm", # gptneox
|
||||||
|
@ -76,7 +71,6 @@ class TensorNameMap:
|
||||||
"encoder.final_layernorm", # chatglm
|
"encoder.final_layernorm", # chatglm
|
||||||
"transformer.norm", # openelm
|
"transformer.norm", # openelm
|
||||||
),
|
),
|
||||||
|
|
||||||
# Rope frequencies
|
# Rope frequencies
|
||||||
MODEL_TENSOR.ROPE_FREQS: (
|
MODEL_TENSOR.ROPE_FREQS: (
|
||||||
"rope.freqs", # llama-pth
|
"rope.freqs", # llama-pth
|
||||||
|
@ -108,13 +102,11 @@ class TensorNameMap:
|
||||||
"encoder.layers.{bid}.input_layernorm", # chatglm
|
"encoder.layers.{bid}.input_layernorm", # chatglm
|
||||||
"transformer.layers.{bid}.attn_norm", # openelm
|
"transformer.layers.{bid}.attn_norm", # openelm
|
||||||
),
|
),
|
||||||
|
|
||||||
# Attention norm 2
|
# Attention norm 2
|
||||||
MODEL_TENSOR.ATTN_NORM_2: (
|
MODEL_TENSOR.ATTN_NORM_2: (
|
||||||
"transformer.h.{bid}.ln_attn", # falcon40b
|
"transformer.h.{bid}.ln_attn", # falcon40b
|
||||||
"encoder.layer.{bid}.layer_norm_1", # jina-v2-code
|
"encoder.layer.{bid}.layer_norm_1", # jina-v2-code
|
||||||
),
|
),
|
||||||
|
|
||||||
# Attention query-key-value
|
# Attention query-key-value
|
||||||
MODEL_TENSOR.ATTN_QKV: (
|
MODEL_TENSOR.ATTN_QKV: (
|
||||||
"gpt_neox.layers.{bid}.attention.query_key_value", # gptneox
|
"gpt_neox.layers.{bid}.attention.query_key_value", # gptneox
|
||||||
|
@ -132,7 +124,6 @@ class TensorNameMap:
|
||||||
"encoder.layers.{bid}.self_attention.query_key_value", # chatglm
|
"encoder.layers.{bid}.self_attention.query_key_value", # chatglm
|
||||||
"transformer.layers.{bid}.attn.qkv_proj", # openelm
|
"transformer.layers.{bid}.attn.qkv_proj", # openelm
|
||||||
),
|
),
|
||||||
|
|
||||||
# Attention query
|
# Attention query
|
||||||
MODEL_TENSOR.ATTN_Q: (
|
MODEL_TENSOR.ATTN_Q: (
|
||||||
"model.layers.{bid}.self_attn.q_proj", # llama-hf
|
"model.layers.{bid}.self_attn.q_proj", # llama-hf
|
||||||
|
@ -143,7 +134,6 @@ class TensorNameMap:
|
||||||
"model.layers.{bid}.attention.wq", # internlm2
|
"model.layers.{bid}.attention.wq", # internlm2
|
||||||
"transformer.decoder_layer.{bid}.multi_head_attention.query", # Grok
|
"transformer.decoder_layer.{bid}.multi_head_attention.query", # Grok
|
||||||
),
|
),
|
||||||
|
|
||||||
# Attention key
|
# Attention key
|
||||||
MODEL_TENSOR.ATTN_K: (
|
MODEL_TENSOR.ATTN_K: (
|
||||||
"model.layers.{bid}.self_attn.k_proj", # llama-hf
|
"model.layers.{bid}.self_attn.k_proj", # llama-hf
|
||||||
|
@ -155,7 +145,6 @@ class TensorNameMap:
|
||||||
"model.layers.{bid}.attention.wk", # internlm2
|
"model.layers.{bid}.attention.wk", # internlm2
|
||||||
"transformer.decoder_layer.{bid}.multi_head_attention.key", # Grok
|
"transformer.decoder_layer.{bid}.multi_head_attention.key", # Grok
|
||||||
),
|
),
|
||||||
|
|
||||||
# Attention value
|
# Attention value
|
||||||
MODEL_TENSOR.ATTN_V: (
|
MODEL_TENSOR.ATTN_V: (
|
||||||
"model.layers.{bid}.self_attn.v_proj", # llama-hf
|
"model.layers.{bid}.self_attn.v_proj", # llama-hf
|
||||||
|
@ -165,9 +154,8 @@ class TensorNameMap:
|
||||||
"transformer.h.{bid}.attn.v", # refact
|
"transformer.h.{bid}.attn.v", # refact
|
||||||
"model.layers.layers.{bid}.self_attn.v_proj", # plamo
|
"model.layers.layers.{bid}.self_attn.v_proj", # plamo
|
||||||
"model.layers.{bid}.attention.wv", # internlm2
|
"model.layers.{bid}.attention.wv", # internlm2
|
||||||
"transformer.decoder_layer.{bid}.multi_head_attention.value" # Grok
|
"transformer.decoder_layer.{bid}.multi_head_attention.value", # Grok
|
||||||
),
|
),
|
||||||
|
|
||||||
# Attention output
|
# Attention output
|
||||||
MODEL_TENSOR.ATTN_OUT: (
|
MODEL_TENSOR.ATTN_OUT: (
|
||||||
"gpt_neox.layers.{bid}.attention.dense", # gptneox
|
"gpt_neox.layers.{bid}.attention.dense", # gptneox
|
||||||
|
@ -191,7 +179,6 @@ class TensorNameMap:
|
||||||
"encoder.layers.{bid}.self_attention.dense", # chatglm
|
"encoder.layers.{bid}.self_attention.dense", # chatglm
|
||||||
"transformer.layers.{bid}.attn.out_proj", # openelm
|
"transformer.layers.{bid}.attn.out_proj", # openelm
|
||||||
),
|
),
|
||||||
|
|
||||||
# Attention output norm
|
# Attention output norm
|
||||||
MODEL_TENSOR.ATTN_OUT_NORM: (
|
MODEL_TENSOR.ATTN_OUT_NORM: (
|
||||||
"encoder.layer.{bid}.attention.output.LayerNorm", # bert
|
"encoder.layer.{bid}.attention.output.LayerNorm", # bert
|
||||||
|
@ -199,11 +186,9 @@ class TensorNameMap:
|
||||||
"transformer.decoder_layer.{bid}.rms_norm_1", # Grok
|
"transformer.decoder_layer.{bid}.rms_norm_1", # Grok
|
||||||
"transformer.blocks.{bid}.norm_attn_norm.norm_2", # dbrx
|
"transformer.blocks.{bid}.norm_attn_norm.norm_2", # dbrx
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.ATTN_POST_NORM: (
|
MODEL_TENSOR.ATTN_POST_NORM: (
|
||||||
"model.layers.{bid}.post_attention_layernorm", # gemma2
|
"model.layers.{bid}.post_attention_layernorm", # gemma2
|
||||||
),
|
),
|
||||||
|
|
||||||
# Rotary embeddings
|
# Rotary embeddings
|
||||||
MODEL_TENSOR.ATTN_ROT_EMBD: (
|
MODEL_TENSOR.ATTN_ROT_EMBD: (
|
||||||
"model.layers.{bid}.self_attn.rotary_emb.inv_freq", # llama-hf
|
"model.layers.{bid}.self_attn.rotary_emb.inv_freq", # llama-hf
|
||||||
|
@ -211,7 +196,6 @@ class TensorNameMap:
|
||||||
"model.layers.layers.{bid}.self_attn.rotary_emb.inv_freq", # plamo
|
"model.layers.layers.{bid}.self_attn.rotary_emb.inv_freq", # plamo
|
||||||
"transformer.h.{bid}.attn.rotary_emb.inv_freq", # codeshell
|
"transformer.h.{bid}.attn.rotary_emb.inv_freq", # codeshell
|
||||||
),
|
),
|
||||||
|
|
||||||
# Feed-forward norm
|
# Feed-forward norm
|
||||||
MODEL_TENSOR.FFN_NORM: (
|
MODEL_TENSOR.FFN_NORM: (
|
||||||
"gpt_neox.layers.{bid}.post_attention_layernorm", # gptneox
|
"gpt_neox.layers.{bid}.post_attention_layernorm", # gptneox
|
||||||
|
@ -228,17 +212,14 @@ class TensorNameMap:
|
||||||
"encoder.layers.{bid}.post_attention_layernorm", # chatglm
|
"encoder.layers.{bid}.post_attention_layernorm", # chatglm
|
||||||
"transformer.layers.{bid}.ffn_norm", # openelm
|
"transformer.layers.{bid}.ffn_norm", # openelm
|
||||||
),
|
),
|
||||||
|
|
||||||
# Post feed-forward norm
|
# Post feed-forward norm
|
||||||
MODEL_TENSOR.FFN_PRE_NORM: (
|
MODEL_TENSOR.FFN_PRE_NORM: (
|
||||||
"model.layers.{bid}.pre_feedforward_layernorm", # gemma2
|
"model.layers.{bid}.pre_feedforward_layernorm", # gemma2
|
||||||
),
|
),
|
||||||
|
|
||||||
# Post feed-forward norm
|
# Post feed-forward norm
|
||||||
MODEL_TENSOR.FFN_POST_NORM: (
|
MODEL_TENSOR.FFN_POST_NORM: (
|
||||||
"model.layers.{bid}.post_feedforward_layernorm", # gemma2
|
"model.layers.{bid}.post_feedforward_layernorm", # gemma2
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.FFN_GATE_INP: (
|
MODEL_TENSOR.FFN_GATE_INP: (
|
||||||
"layers.{bid}.feed_forward.gate", # mixtral
|
"layers.{bid}.feed_forward.gate", # mixtral
|
||||||
"model.layers.{bid}.block_sparse_moe.gate", # mixtral
|
"model.layers.{bid}.block_sparse_moe.gate", # mixtral
|
||||||
|
@ -246,11 +227,9 @@ class TensorNameMap:
|
||||||
"transformer.decoder_layer.{bid}.router", # Grok
|
"transformer.decoder_layer.{bid}.router", # Grok
|
||||||
"transformer.blocks.{bid}.ffn.router.layer", # dbrx
|
"transformer.blocks.{bid}.ffn.router.layer", # dbrx
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.FFN_GATE_INP_SHEXP: (
|
MODEL_TENSOR.FFN_GATE_INP_SHEXP: (
|
||||||
"model.layers.{bid}.mlp.shared_expert_gate", # qwen2moe
|
"model.layers.{bid}.mlp.shared_expert_gate", # qwen2moe
|
||||||
),
|
),
|
||||||
|
|
||||||
# Feed-forward up
|
# Feed-forward up
|
||||||
MODEL_TENSOR.FFN_UP: (
|
MODEL_TENSOR.FFN_UP: (
|
||||||
"gpt_neox.layers.{bid}.mlp.dense_h_to_4h", # gptneox
|
"gpt_neox.layers.{bid}.mlp.dense_h_to_4h", # gptneox
|
||||||
|
@ -278,24 +257,18 @@ class TensorNameMap:
|
||||||
"model.layers.{bid}.residual_mlp.w3", # arctic
|
"model.layers.{bid}.residual_mlp.w3", # arctic
|
||||||
"encoder.layers.{bid}.mlp.dense_h_to_4h", # chatglm
|
"encoder.layers.{bid}.mlp.dense_h_to_4h", # chatglm
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.FFN_UP_EXP: (
|
MODEL_TENSOR.FFN_UP_EXP: (
|
||||||
"layers.{bid}.feed_forward.experts.w3", # mixtral (merged)
|
"layers.{bid}.feed_forward.experts.w3", # mixtral (merged)
|
||||||
"transformer.decoder_layer.{bid}.moe.linear_v", # Grok (merged)
|
"transformer.decoder_layer.{bid}.moe.linear_v", # Grok (merged)
|
||||||
"transformer.blocks.{bid}.ffn.experts.mlp.v1", # dbrx
|
"transformer.blocks.{bid}.ffn.experts.mlp.v1", # dbrx
|
||||||
"model.layers.{bid}.mlp.experts.up_proj", # qwen2moe (merged)
|
"model.layers.{bid}.mlp.experts.up_proj", # qwen2moe (merged)
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.FFN_UP_SHEXP: (
|
MODEL_TENSOR.FFN_UP_SHEXP: (
|
||||||
"model.layers.{bid}.mlp.shared_expert.up_proj", # qwen2moe
|
"model.layers.{bid}.mlp.shared_expert.up_proj", # qwen2moe
|
||||||
"model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek2
|
"model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek2
|
||||||
),
|
),
|
||||||
|
|
||||||
# AWQ-activation gate
|
# AWQ-activation gate
|
||||||
MODEL_TENSOR.FFN_ACT: (
|
MODEL_TENSOR.FFN_ACT: ("transformer.blocks.{bid}.ffn.act",), # mpt
|
||||||
"transformer.blocks.{bid}.ffn.act", # mpt
|
|
||||||
),
|
|
||||||
|
|
||||||
# Feed-forward gate
|
# Feed-forward gate
|
||||||
MODEL_TENSOR.FFN_GATE: (
|
MODEL_TENSOR.FFN_GATE: (
|
||||||
"model.layers.{bid}.mlp.gate_proj", # llama-hf refact
|
"model.layers.{bid}.mlp.gate_proj", # llama-hf refact
|
||||||
|
@ -309,19 +282,16 @@ class TensorNameMap:
|
||||||
"transformer.h.{bid}.mlp.linear_1", # refact
|
"transformer.h.{bid}.mlp.linear_1", # refact
|
||||||
"model.layers.{bid}.residual_mlp.w1", # arctic
|
"model.layers.{bid}.residual_mlp.w1", # arctic
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.FFN_GATE_EXP: (
|
MODEL_TENSOR.FFN_GATE_EXP: (
|
||||||
"layers.{bid}.feed_forward.experts.w1", # mixtral (merged)
|
"layers.{bid}.feed_forward.experts.w1", # mixtral (merged)
|
||||||
"transformer.decoder_layer.{bid}.moe.linear", # Grok (merged)
|
"transformer.decoder_layer.{bid}.moe.linear", # Grok (merged)
|
||||||
"transformer.blocks.{bid}.ffn.experts.mlp.w1", # dbrx
|
"transformer.blocks.{bid}.ffn.experts.mlp.w1", # dbrx
|
||||||
"model.layers.{bid}.mlp.experts.gate_proj", # qwen2moe (merged)
|
"model.layers.{bid}.mlp.experts.gate_proj", # qwen2moe (merged)
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.FFN_GATE_SHEXP: (
|
MODEL_TENSOR.FFN_GATE_SHEXP: (
|
||||||
"model.layers.{bid}.mlp.shared_expert.gate_proj", # qwen2moe
|
"model.layers.{bid}.mlp.shared_expert.gate_proj", # qwen2moe
|
||||||
"model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek2
|
"model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek2
|
||||||
),
|
),
|
||||||
|
|
||||||
# Feed-forward down
|
# Feed-forward down
|
||||||
MODEL_TENSOR.FFN_DOWN: (
|
MODEL_TENSOR.FFN_DOWN: (
|
||||||
"gpt_neox.layers.{bid}.mlp.dense_4h_to_h", # gptneox
|
"gpt_neox.layers.{bid}.mlp.dense_4h_to_h", # gptneox
|
||||||
|
@ -348,19 +318,16 @@ class TensorNameMap:
|
||||||
"encoder.layer.{bid}.mlp.down_layer", # jina-bert-v2
|
"encoder.layer.{bid}.mlp.down_layer", # jina-bert-v2
|
||||||
"encoder.layers.{bid}.mlp.dense_4h_to_h", # chatglm
|
"encoder.layers.{bid}.mlp.dense_4h_to_h", # chatglm
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.FFN_DOWN_EXP: (
|
MODEL_TENSOR.FFN_DOWN_EXP: (
|
||||||
"layers.{bid}.feed_forward.experts.w2", # mixtral (merged)
|
"layers.{bid}.feed_forward.experts.w2", # mixtral (merged)
|
||||||
"transformer.decoder_layer.{bid}.moe.linear_1", # Grok (merged)
|
"transformer.decoder_layer.{bid}.moe.linear_1", # Grok (merged)
|
||||||
"transformer.blocks.{bid}.ffn.experts.mlp.w2", # dbrx
|
"transformer.blocks.{bid}.ffn.experts.mlp.w2", # dbrx
|
||||||
"model.layers.{bid}.mlp.experts.down_proj", # qwen2moe (merged)
|
"model.layers.{bid}.mlp.experts.down_proj", # qwen2moe (merged)
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.FFN_DOWN_SHEXP: (
|
MODEL_TENSOR.FFN_DOWN_SHEXP: (
|
||||||
"model.layers.{bid}.mlp.shared_expert.down_proj", # qwen2moe
|
"model.layers.{bid}.mlp.shared_expert.down_proj", # qwen2moe
|
||||||
"model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek2
|
"model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek2
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.ATTN_Q_NORM: (
|
MODEL_TENSOR.ATTN_Q_NORM: (
|
||||||
"language_model.encoder.layers.{bid}.self_attention.q_layernorm",
|
"language_model.encoder.layers.{bid}.self_attention.q_layernorm",
|
||||||
"model.layers.{bid}.self_attn.q_layernorm", # persimmon
|
"model.layers.{bid}.self_attn.q_layernorm", # persimmon
|
||||||
|
@ -369,7 +336,6 @@ class TensorNameMap:
|
||||||
"encoder.layer.{bid}.attention.self.layer_norm_q", # jina-bert-v2
|
"encoder.layer.{bid}.attention.self.layer_norm_q", # jina-bert-v2
|
||||||
"transformer.layers.{bid}.attn.q_norm", # openelm
|
"transformer.layers.{bid}.attn.q_norm", # openelm
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.ATTN_K_NORM: (
|
MODEL_TENSOR.ATTN_K_NORM: (
|
||||||
"language_model.encoder.layers.{bid}.self_attention.k_layernorm",
|
"language_model.encoder.layers.{bid}.self_attention.k_layernorm",
|
||||||
"model.layers.{bid}.self_attn.k_layernorm", # persimmon
|
"model.layers.{bid}.self_attn.k_layernorm", # persimmon
|
||||||
|
@ -378,210 +344,131 @@ class TensorNameMap:
|
||||||
"encoder.layer.{bid}.attention.self.layer_norm_k", # jina-bert-v2
|
"encoder.layer.{bid}.attention.self.layer_norm_k", # jina-bert-v2
|
||||||
"transformer.layers.{bid}.attn.k_norm", # openelm
|
"transformer.layers.{bid}.attn.k_norm", # openelm
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.ROPE_FREQS: (
|
MODEL_TENSOR.ROPE_FREQS: (
|
||||||
"language_model.encoder.layers.{bid}.self_attention.rotary_emb.inv_freq", # persimmon
|
"language_model.encoder.layers.{bid}.self_attention.rotary_emb.inv_freq", # persimmon
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.LAYER_OUT_NORM: (
|
MODEL_TENSOR.LAYER_OUT_NORM: (
|
||||||
"encoder.layer.{bid}.output.LayerNorm", # bert
|
"encoder.layer.{bid}.output.LayerNorm", # bert
|
||||||
"encoder.layers.{bid}.norm2", # nomic-bert
|
"encoder.layers.{bid}.norm2", # nomic-bert
|
||||||
"transformer.decoder_layer.{bid}.rms_norm_3", # Grok
|
"transformer.decoder_layer.{bid}.rms_norm_3", # Grok
|
||||||
"encoder.layer.{bid}.mlp.layernorm", # jina-bert-v2
|
"encoder.layer.{bid}.mlp.layernorm", # jina-bert-v2
|
||||||
"encoder.layer.{bid}.layer_norm_2" # jina-v2-code
|
"encoder.layer.{bid}.layer_norm_2", # jina-v2-code
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.SSM_IN: (
|
MODEL_TENSOR.SSM_IN: (
|
||||||
"model.layers.{bid}.in_proj",
|
"model.layers.{bid}.in_proj",
|
||||||
"backbone.layers.{bid}.mixer.in_proj",
|
"backbone.layers.{bid}.mixer.in_proj",
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.SSM_CONV1D: (
|
MODEL_TENSOR.SSM_CONV1D: (
|
||||||
"model.layers.{bid}.conv1d",
|
"model.layers.{bid}.conv1d",
|
||||||
"backbone.layers.{bid}.mixer.conv1d",
|
"backbone.layers.{bid}.mixer.conv1d",
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.SSM_X: (
|
MODEL_TENSOR.SSM_X: (
|
||||||
"model.layers.{bid}.x_proj",
|
"model.layers.{bid}.x_proj",
|
||||||
"backbone.layers.{bid}.mixer.x_proj",
|
"backbone.layers.{bid}.mixer.x_proj",
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.SSM_DT: (
|
MODEL_TENSOR.SSM_DT: (
|
||||||
"model.layers.{bid}.dt_proj",
|
"model.layers.{bid}.dt_proj",
|
||||||
"backbone.layers.{bid}.mixer.dt_proj",
|
"backbone.layers.{bid}.mixer.dt_proj",
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.SSM_A: (
|
MODEL_TENSOR.SSM_A: (
|
||||||
"model.layers.{bid}.A_log",
|
"model.layers.{bid}.A_log",
|
||||||
"backbone.layers.{bid}.mixer.A_log",
|
"backbone.layers.{bid}.mixer.A_log",
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.SSM_D: (
|
MODEL_TENSOR.SSM_D: (
|
||||||
"model.layers.{bid}.D",
|
"model.layers.{bid}.D",
|
||||||
"backbone.layers.{bid}.mixer.D",
|
"backbone.layers.{bid}.mixer.D",
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.SSM_OUT: (
|
MODEL_TENSOR.SSM_OUT: (
|
||||||
"model.layers.{bid}.out_proj",
|
"model.layers.{bid}.out_proj",
|
||||||
"backbone.layers.{bid}.mixer.out_proj",
|
"backbone.layers.{bid}.mixer.out_proj",
|
||||||
),
|
),
|
||||||
|
MODEL_TENSOR.ATTN_Q_A: ("model.layers.{bid}.self_attn.q_a_proj",), # deepseek2
|
||||||
MODEL_TENSOR.ATTN_Q_A: (
|
MODEL_TENSOR.ATTN_Q_B: ("model.layers.{bid}.self_attn.q_b_proj",), # deepseek2
|
||||||
"model.layers.{bid}.self_attn.q_a_proj", # deepseek2
|
|
||||||
),
|
|
||||||
|
|
||||||
MODEL_TENSOR.ATTN_Q_B: (
|
|
||||||
"model.layers.{bid}.self_attn.q_b_proj", # deepseek2
|
|
||||||
),
|
|
||||||
|
|
||||||
MODEL_TENSOR.ATTN_KV_A_MQA: (
|
MODEL_TENSOR.ATTN_KV_A_MQA: (
|
||||||
"model.layers.{bid}.self_attn.kv_a_proj_with_mqa", # deepseek2
|
"model.layers.{bid}.self_attn.kv_a_proj_with_mqa", # deepseek2
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.ATTN_KV_B: (
|
MODEL_TENSOR.ATTN_KV_B: (
|
||||||
"model.layers.{bid}.self_attn.kv_b_proj", # deepseek2
|
"model.layers.{bid}.self_attn.kv_b_proj", # deepseek2
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.ATTN_Q_A_NORM: (
|
MODEL_TENSOR.ATTN_Q_A_NORM: (
|
||||||
"model.layers.{bid}.self_attn.q_a_layernorm", # deepseek2
|
"model.layers.{bid}.self_attn.q_a_layernorm", # deepseek2
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.ATTN_KV_A_NORM: (
|
MODEL_TENSOR.ATTN_KV_A_NORM: (
|
||||||
"model.layers.{bid}.self_attn.kv_a_layernorm", # deepseek2
|
"model.layers.{bid}.self_attn.kv_a_layernorm", # deepseek2
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.ATTN_SUB_NORM: (
|
MODEL_TENSOR.ATTN_SUB_NORM: (
|
||||||
"model.layers.{bid}.self_attn.inner_attn_ln", # bitnet
|
"model.layers.{bid}.self_attn.inner_attn_ln", # bitnet
|
||||||
),
|
),
|
||||||
|
MODEL_TENSOR.FFN_SUB_NORM: ("model.layers.{bid}.mlp.ffn_layernorm",), # bitnet
|
||||||
MODEL_TENSOR.FFN_SUB_NORM: (
|
MODEL_TENSOR.DEC_ATTN_NORM: ("decoder.block.{bid}.layer.0.layer_norm",), # t5
|
||||||
"model.layers.{bid}.mlp.ffn_layernorm", # bitnet
|
MODEL_TENSOR.DEC_ATTN_Q: ("decoder.block.{bid}.layer.0.SelfAttention.q",), # t5
|
||||||
),
|
MODEL_TENSOR.DEC_ATTN_K: ("decoder.block.{bid}.layer.0.SelfAttention.k",), # t5
|
||||||
|
MODEL_TENSOR.DEC_ATTN_V: ("decoder.block.{bid}.layer.0.SelfAttention.v",), # t5
|
||||||
MODEL_TENSOR.DEC_ATTN_NORM: (
|
|
||||||
"decoder.block.{bid}.layer.0.layer_norm", # t5
|
|
||||||
),
|
|
||||||
|
|
||||||
MODEL_TENSOR.DEC_ATTN_Q: (
|
|
||||||
"decoder.block.{bid}.layer.0.SelfAttention.q", # t5
|
|
||||||
),
|
|
||||||
|
|
||||||
MODEL_TENSOR.DEC_ATTN_K: (
|
|
||||||
"decoder.block.{bid}.layer.0.SelfAttention.k", # t5
|
|
||||||
),
|
|
||||||
|
|
||||||
MODEL_TENSOR.DEC_ATTN_V: (
|
|
||||||
"decoder.block.{bid}.layer.0.SelfAttention.v", # t5
|
|
||||||
),
|
|
||||||
|
|
||||||
MODEL_TENSOR.DEC_ATTN_OUT: (
|
MODEL_TENSOR.DEC_ATTN_OUT: (
|
||||||
"decoder.block.{bid}.layer.0.SelfAttention.o", # t5
|
"decoder.block.{bid}.layer.0.SelfAttention.o", # t5
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.DEC_ATTN_REL_B: (
|
MODEL_TENSOR.DEC_ATTN_REL_B: (
|
||||||
"decoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias", # t5
|
"decoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias", # t5
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.DEC_CROSS_ATTN_NORM: (
|
MODEL_TENSOR.DEC_CROSS_ATTN_NORM: (
|
||||||
"decoder.block.{bid}.layer.1.layer_norm", # t5
|
"decoder.block.{bid}.layer.1.layer_norm", # t5
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.DEC_CROSS_ATTN_Q: (
|
MODEL_TENSOR.DEC_CROSS_ATTN_Q: (
|
||||||
"decoder.block.{bid}.layer.1.EncDecAttention.q", # t5
|
"decoder.block.{bid}.layer.1.EncDecAttention.q", # t5
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.DEC_CROSS_ATTN_K: (
|
MODEL_TENSOR.DEC_CROSS_ATTN_K: (
|
||||||
"decoder.block.{bid}.layer.1.EncDecAttention.k", # t5
|
"decoder.block.{bid}.layer.1.EncDecAttention.k", # t5
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.DEC_CROSS_ATTN_V: (
|
MODEL_TENSOR.DEC_CROSS_ATTN_V: (
|
||||||
"decoder.block.{bid}.layer.1.EncDecAttention.v", # t5
|
"decoder.block.{bid}.layer.1.EncDecAttention.v", # t5
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.DEC_CROSS_ATTN_OUT: (
|
MODEL_TENSOR.DEC_CROSS_ATTN_OUT: (
|
||||||
"decoder.block.{bid}.layer.1.EncDecAttention.o", # t5
|
"decoder.block.{bid}.layer.1.EncDecAttention.o", # t5
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.DEC_CROSS_ATTN_REL_B: (
|
MODEL_TENSOR.DEC_CROSS_ATTN_REL_B: (
|
||||||
"decoder.block.{bid}.layer.1.EncDecAttention.relative_attention_bias", # t5
|
"decoder.block.{bid}.layer.1.EncDecAttention.relative_attention_bias", # t5
|
||||||
),
|
),
|
||||||
|
MODEL_TENSOR.DEC_FFN_NORM: ("decoder.block.{bid}.layer.2.layer_norm",), # t5
|
||||||
MODEL_TENSOR.DEC_FFN_NORM: (
|
|
||||||
"decoder.block.{bid}.layer.2.layer_norm", # t5
|
|
||||||
),
|
|
||||||
|
|
||||||
MODEL_TENSOR.DEC_FFN_GATE: (
|
MODEL_TENSOR.DEC_FFN_GATE: (
|
||||||
"decoder.block.{bid}.layer.2.DenseReluDense.wi_0", # flan-t5
|
"decoder.block.{bid}.layer.2.DenseReluDense.wi_0", # flan-t5
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.DEC_FFN_UP: (
|
MODEL_TENSOR.DEC_FFN_UP: (
|
||||||
"decoder.block.{bid}.layer.2.DenseReluDense.wi", # t5
|
"decoder.block.{bid}.layer.2.DenseReluDense.wi", # t5
|
||||||
"decoder.block.{bid}.layer.2.DenseReluDense.wi_1", # flan-t5
|
"decoder.block.{bid}.layer.2.DenseReluDense.wi_1", # flan-t5
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.DEC_FFN_DOWN: (
|
MODEL_TENSOR.DEC_FFN_DOWN: (
|
||||||
"decoder.block.{bid}.layer.2.DenseReluDense.wo", # t5
|
"decoder.block.{bid}.layer.2.DenseReluDense.wo", # t5
|
||||||
),
|
),
|
||||||
|
MODEL_TENSOR.DEC_OUTPUT_NORM: ("decoder.final_layer_norm",), # t5
|
||||||
MODEL_TENSOR.DEC_OUTPUT_NORM: (
|
MODEL_TENSOR.ENC_ATTN_NORM: ("encoder.block.{bid}.layer.0.layer_norm",), # t5
|
||||||
"decoder.final_layer_norm", # t5
|
MODEL_TENSOR.ENC_ATTN_Q: ("encoder.block.{bid}.layer.0.SelfAttention.q",), # t5
|
||||||
),
|
MODEL_TENSOR.ENC_ATTN_K: ("encoder.block.{bid}.layer.0.SelfAttention.k",), # t5
|
||||||
|
MODEL_TENSOR.ENC_ATTN_V: ("encoder.block.{bid}.layer.0.SelfAttention.v",), # t5
|
||||||
MODEL_TENSOR.ENC_ATTN_NORM: (
|
|
||||||
"encoder.block.{bid}.layer.0.layer_norm", # t5
|
|
||||||
),
|
|
||||||
|
|
||||||
MODEL_TENSOR.ENC_ATTN_Q: (
|
|
||||||
"encoder.block.{bid}.layer.0.SelfAttention.q", # t5
|
|
||||||
),
|
|
||||||
|
|
||||||
MODEL_TENSOR.ENC_ATTN_K: (
|
|
||||||
"encoder.block.{bid}.layer.0.SelfAttention.k", # t5
|
|
||||||
),
|
|
||||||
|
|
||||||
MODEL_TENSOR.ENC_ATTN_V: (
|
|
||||||
"encoder.block.{bid}.layer.0.SelfAttention.v", # t5
|
|
||||||
),
|
|
||||||
|
|
||||||
MODEL_TENSOR.ENC_ATTN_OUT: (
|
MODEL_TENSOR.ENC_ATTN_OUT: (
|
||||||
"encoder.block.{bid}.layer.0.SelfAttention.o", # t5
|
"encoder.block.{bid}.layer.0.SelfAttention.o", # t5
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.ENC_ATTN_REL_B: (
|
MODEL_TENSOR.ENC_ATTN_REL_B: (
|
||||||
"encoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias", # t5
|
"encoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias", # t5
|
||||||
),
|
),
|
||||||
|
MODEL_TENSOR.ENC_FFN_NORM: ("encoder.block.{bid}.layer.1.layer_norm",), # t5
|
||||||
MODEL_TENSOR.ENC_FFN_NORM: (
|
|
||||||
"encoder.block.{bid}.layer.1.layer_norm", # t5
|
|
||||||
),
|
|
||||||
|
|
||||||
MODEL_TENSOR.ENC_FFN_GATE: (
|
MODEL_TENSOR.ENC_FFN_GATE: (
|
||||||
"encoder.block.{bid}.layer.1.DenseReluDense.wi_0", # flan-t5
|
"encoder.block.{bid}.layer.1.DenseReluDense.wi_0", # flan-t5
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.ENC_FFN_UP: (
|
MODEL_TENSOR.ENC_FFN_UP: (
|
||||||
"encoder.block.{bid}.layer.1.DenseReluDense.wi", # t5
|
"encoder.block.{bid}.layer.1.DenseReluDense.wi", # t5
|
||||||
"encoder.block.{bid}.layer.1.DenseReluDense.wi_1", # flan-t5
|
"encoder.block.{bid}.layer.1.DenseReluDense.wi_1", # flan-t5
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.ENC_FFN_DOWN: (
|
MODEL_TENSOR.ENC_FFN_DOWN: (
|
||||||
"encoder.block.{bid}.layer.1.DenseReluDense.wo", # t5
|
"encoder.block.{bid}.layer.1.DenseReluDense.wo", # t5
|
||||||
),
|
),
|
||||||
|
MODEL_TENSOR.ENC_OUTPUT_NORM: ("encoder.final_layer_norm",), # t5
|
||||||
MODEL_TENSOR.ENC_OUTPUT_NORM: (
|
|
||||||
"encoder.final_layer_norm", # t5
|
|
||||||
),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# architecture-specific block mappings
|
# architecture-specific block mappings
|
||||||
arch_block_mappings_cfg: dict[MODEL_ARCH, dict[MODEL_TENSOR, tuple[str, ...]]] = {
|
arch_block_mappings_cfg: dict[MODEL_ARCH, dict[MODEL_TENSOR, tuple[str, ...]]] = {
|
||||||
MODEL_ARCH.ARCTIC: {
|
MODEL_ARCH.ARCTIC: {
|
||||||
MODEL_TENSOR.FFN_NORM: (
|
MODEL_TENSOR.FFN_NORM: ("model.layers.{bid}.residual_layernorm",),
|
||||||
"model.layers.{bid}.residual_layernorm",
|
MODEL_TENSOR.FFN_NORM_EXP: ("model.layers.{bid}.post_attention_layernorm",),
|
||||||
),
|
|
||||||
MODEL_TENSOR.FFN_NORM_EXP: (
|
|
||||||
"model.layers.{bid}.post_attention_layernorm",
|
|
||||||
),
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -609,7 +496,9 @@ def __init__(self, arch: MODEL_ARCH, n_blocks: int):
|
||||||
key = key.format(bid=bid)
|
key = key.format(bid=bid)
|
||||||
self.mapping[key] = (tensor, tensor_name)
|
self.mapping[key] = (tensor, tensor_name)
|
||||||
|
|
||||||
def get_type_and_name(self, key: str, try_suffixes: Sequence[str] = ()) -> tuple[MODEL_TENSOR, str] | None:
|
def get_type_and_name(
|
||||||
|
self, key: str, try_suffixes: Sequence[str] = ()
|
||||||
|
) -> tuple[MODEL_TENSOR, str] | None:
|
||||||
result = self.mapping.get(key)
|
result = self.mapping.get(key)
|
||||||
if result is not None:
|
if result is not None:
|
||||||
return result
|
return result
|
||||||
|
@ -626,7 +515,9 @@ def get_name(self, key: str, try_suffixes: Sequence[str] = ()) -> str | None:
|
||||||
return None
|
return None
|
||||||
return result[1]
|
return result[1]
|
||||||
|
|
||||||
def get_type(self, key: str, try_suffixes: Sequence[str] = ()) -> MODEL_TENSOR | None:
|
def get_type(
|
||||||
|
self, key: str, try_suffixes: Sequence[str] = ()
|
||||||
|
) -> MODEL_TENSOR | None:
|
||||||
result = self.get_type_and_name(key, try_suffixes=try_suffixes)
|
result = self.get_type_and_name(key, try_suffixes=try_suffixes)
|
||||||
if result is None:
|
if result is None:
|
||||||
return None
|
return None
|
||||||
|
|
|
@ -7,12 +7,18 @@ def fill_templated_filename(filename: str, output_type: str | None) -> str:
|
||||||
# Given a file name fill in any type templates e.g. 'some-model-name.{ftype}.gguf'
|
# Given a file name fill in any type templates e.g. 'some-model-name.{ftype}.gguf'
|
||||||
ftype_lowercase: str = output_type.lower() if output_type is not None else ""
|
ftype_lowercase: str = output_type.lower() if output_type is not None else ""
|
||||||
ftype_uppercase: str = output_type.upper() if output_type is not None else ""
|
ftype_uppercase: str = output_type.upper() if output_type is not None else ""
|
||||||
return filename.format(ftype_lowercase,
|
return filename.format(
|
||||||
outtype=ftype_lowercase, ftype=ftype_lowercase,
|
ftype_lowercase,
|
||||||
OUTTYPE=ftype_uppercase, FTYPE=ftype_uppercase)
|
outtype=ftype_lowercase,
|
||||||
|
ftype=ftype_lowercase,
|
||||||
|
OUTTYPE=ftype_uppercase,
|
||||||
|
FTYPE=ftype_uppercase,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def model_weight_count_rounded_notation(model_params_count: int, min_digits: int = 2) -> str:
|
def model_weight_count_rounded_notation(
|
||||||
|
model_params_count: int, min_digits: int = 2
|
||||||
|
) -> str:
|
||||||
if model_params_count > 1e12:
|
if model_params_count > 1e12:
|
||||||
# Trillions Of Parameters
|
# Trillions Of Parameters
|
||||||
scaled_model_params = model_params_count * 1e-12
|
scaled_model_params = model_params_count * 1e-12
|
||||||
|
@ -30,39 +36,65 @@ def model_weight_count_rounded_notation(model_params_count: int, min_digits: int
|
||||||
scaled_model_params = model_params_count * 1e-3
|
scaled_model_params = model_params_count * 1e-3
|
||||||
scale_suffix = "K"
|
scale_suffix = "K"
|
||||||
|
|
||||||
fix = max(min_digits - len(str(round(scaled_model_params)).lstrip('0')), 0)
|
fix = max(min_digits - len(str(round(scaled_model_params)).lstrip("0")), 0)
|
||||||
|
|
||||||
return f"{scaled_model_params:.{fix}f}{scale_suffix}"
|
return f"{scaled_model_params:.{fix}f}{scale_suffix}"
|
||||||
|
|
||||||
|
|
||||||
def size_label(total_params: int, shared_params: int, expert_params: int, expert_count: int) -> str:
|
def size_label(
|
||||||
|
total_params: int, shared_params: int, expert_params: int, expert_count: int
|
||||||
|
) -> str:
|
||||||
|
|
||||||
if expert_count > 0:
|
if expert_count > 0:
|
||||||
pretty_size = model_weight_count_rounded_notation(abs(shared_params) + abs(expert_params), min_digits=2)
|
pretty_size = model_weight_count_rounded_notation(
|
||||||
|
abs(shared_params) + abs(expert_params), min_digits=2
|
||||||
|
)
|
||||||
size_class = f"{expert_count}x{pretty_size}"
|
size_class = f"{expert_count}x{pretty_size}"
|
||||||
else:
|
else:
|
||||||
size_class = model_weight_count_rounded_notation(abs(total_params), min_digits=2)
|
size_class = model_weight_count_rounded_notation(
|
||||||
|
abs(total_params), min_digits=2
|
||||||
|
)
|
||||||
|
|
||||||
return size_class
|
return size_class
|
||||||
|
|
||||||
|
|
||||||
def naming_convention(model_name: str | None, base_name: str | None, finetune_string: str | None, version_string: str | None, size_label: str | None, output_type: str | None, model_type: Literal['vocab', 'LoRA'] | None = None) -> str:
|
def naming_convention(
|
||||||
|
model_name: str | None,
|
||||||
|
base_name: str | None,
|
||||||
|
finetune_string: str | None,
|
||||||
|
version_string: str | None,
|
||||||
|
size_label: str | None,
|
||||||
|
output_type: str | None,
|
||||||
|
model_type: Literal["vocab", "LoRA"] | None = None,
|
||||||
|
) -> str:
|
||||||
# Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention
|
# Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention
|
||||||
|
|
||||||
if base_name is not None:
|
if base_name is not None:
|
||||||
name = base_name.strip().replace(' ', '-').replace('/', '-')
|
name = base_name.strip().replace(" ", "-").replace("/", "-")
|
||||||
elif model_name is not None:
|
elif model_name is not None:
|
||||||
name = model_name.strip().replace(' ', '-').replace('/', '-')
|
name = model_name.strip().replace(" ", "-").replace("/", "-")
|
||||||
else:
|
else:
|
||||||
name = "ggml-model"
|
name = "ggml-model"
|
||||||
|
|
||||||
parameters = f"-{size_label}" if size_label is not None else ""
|
parameters = f"-{size_label}" if size_label is not None else ""
|
||||||
|
|
||||||
finetune = f"-{finetune_string.strip().replace(' ', '-')}" if finetune_string is not None else ""
|
finetune = (
|
||||||
|
f"-{finetune_string.strip().replace(' ', '-')}"
|
||||||
|
if finetune_string is not None
|
||||||
|
else ""
|
||||||
|
)
|
||||||
|
|
||||||
version = f"-{version_string.strip().replace(' ', '-')}" if version_string is not None else ""
|
version = (
|
||||||
|
f"-{version_string.strip().replace(' ', '-')}"
|
||||||
|
if version_string is not None
|
||||||
|
else ""
|
||||||
|
)
|
||||||
|
|
||||||
encoding = f"-{output_type.strip().replace(' ', '-').upper()}" if output_type is not None else ""
|
encoding = (
|
||||||
|
f"-{output_type.strip().replace(' ', '-').upper()}"
|
||||||
|
if output_type is not None
|
||||||
|
else ""
|
||||||
|
)
|
||||||
|
|
||||||
kind = f"-{model_type.strip().replace(' ', '-')}" if model_type is not None else ""
|
kind = f"-{model_type.strip().replace(' ', '-')}" if model_type is not None else ""
|
||||||
|
|
||||||
|
|
|
@ -5,7 +5,16 @@
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Callable, Sequence, Mapping, Iterable, Protocol, ClassVar, runtime_checkable
|
from typing import (
|
||||||
|
Any,
|
||||||
|
Callable,
|
||||||
|
Sequence,
|
||||||
|
Mapping,
|
||||||
|
Iterable,
|
||||||
|
Protocol,
|
||||||
|
ClassVar,
|
||||||
|
runtime_checkable,
|
||||||
|
)
|
||||||
|
|
||||||
from sentencepiece import SentencePieceProcessor
|
from sentencepiece import SentencePieceProcessor
|
||||||
|
|
||||||
|
@ -23,7 +32,9 @@ class SpecialVocab:
|
||||||
chat_template: str | Sequence[Mapping[str, str]] | None
|
chat_template: str | Sequence[Mapping[str, str]] | None
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self, path: str | os.PathLike[str], load_merges: bool = False,
|
self,
|
||||||
|
path: str | os.PathLike[str],
|
||||||
|
load_merges: bool = False,
|
||||||
special_token_types: Iterable[str] | None = None,
|
special_token_types: Iterable[str] | None = None,
|
||||||
n_vocab: int | None = None,
|
n_vocab: int | None = None,
|
||||||
):
|
):
|
||||||
|
@ -36,40 +47,60 @@ def __init__(
|
||||||
if special_token_types is not None:
|
if special_token_types is not None:
|
||||||
self.special_token_types = special_token_types
|
self.special_token_types = special_token_types
|
||||||
else:
|
else:
|
||||||
self.special_token_types = ('bos', 'eos', 'unk', 'sep', 'pad', 'cls', 'mask')
|
self.special_token_types = (
|
||||||
|
"bos",
|
||||||
|
"eos",
|
||||||
|
"unk",
|
||||||
|
"sep",
|
||||||
|
"pad",
|
||||||
|
"cls",
|
||||||
|
"mask",
|
||||||
|
)
|
||||||
self._load(Path(path))
|
self._load(Path(path))
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
def __repr__(self) -> str:
|
||||||
return '<SpecialVocab with {} merges, special tokens {}, add special tokens {}>'.format(
|
return "<SpecialVocab with {} merges, special tokens {}, add special tokens {}>".format(
|
||||||
len(self.merges), self.special_token_ids or "unset", self.add_special_token or "unset",
|
len(self.merges),
|
||||||
|
self.special_token_ids or "unset",
|
||||||
|
self.add_special_token or "unset",
|
||||||
)
|
)
|
||||||
|
|
||||||
def add_to_gguf(self, gw: GGUFWriter, quiet: bool = False) -> None:
|
def add_to_gguf(self, gw: GGUFWriter, quiet: bool = False) -> None:
|
||||||
if self.merges:
|
if self.merges:
|
||||||
if not quiet:
|
if not quiet:
|
||||||
logger.info(f'Adding {len(self.merges)} merge(s).')
|
logger.info(f"Adding {len(self.merges)} merge(s).")
|
||||||
gw.add_token_merges(self.merges)
|
gw.add_token_merges(self.merges)
|
||||||
elif self.load_merges:
|
elif self.load_merges:
|
||||||
logger.warning('Adding merges requested but no merges found, output may be non-functional.')
|
logger.warning(
|
||||||
|
"Adding merges requested but no merges found, output may be non-functional."
|
||||||
|
)
|
||||||
for typ, tokid in self.special_token_ids.items():
|
for typ, tokid in self.special_token_ids.items():
|
||||||
id_handler: Callable[[int], None] | None = getattr(gw, f'add_{typ}_token_id', None)
|
id_handler: Callable[[int], None] | None = getattr(
|
||||||
|
gw, f"add_{typ}_token_id", None
|
||||||
|
)
|
||||||
if id_handler is None:
|
if id_handler is None:
|
||||||
logger.warning(f'No handler for special token type {typ} with id {tokid} - skipping')
|
logger.warning(
|
||||||
|
f"No handler for special token type {typ} with id {tokid} - skipping"
|
||||||
|
)
|
||||||
continue
|
continue
|
||||||
if not quiet:
|
if not quiet:
|
||||||
logger.info(f'Setting special token type {typ} to {tokid}')
|
logger.info(f"Setting special token type {typ} to {tokid}")
|
||||||
id_handler(tokid)
|
id_handler(tokid)
|
||||||
for typ, value in self.add_special_token.items():
|
for typ, value in self.add_special_token.items():
|
||||||
add_handler: Callable[[bool], None] | None = getattr(gw, f'add_add_{typ}_token', None)
|
add_handler: Callable[[bool], None] | None = getattr(
|
||||||
|
gw, f"add_add_{typ}_token", None
|
||||||
|
)
|
||||||
if add_handler is None:
|
if add_handler is None:
|
||||||
logger.warning(f'No handler for add_{typ}_token with value {value} - skipping')
|
logger.warning(
|
||||||
|
f"No handler for add_{typ}_token with value {value} - skipping"
|
||||||
|
)
|
||||||
continue
|
continue
|
||||||
if not quiet:
|
if not quiet:
|
||||||
logger.info(f'Setting add_{typ}_token to {value}')
|
logger.info(f"Setting add_{typ}_token to {value}")
|
||||||
add_handler(value)
|
add_handler(value)
|
||||||
if self.chat_template is not None:
|
if self.chat_template is not None:
|
||||||
if not quiet:
|
if not quiet:
|
||||||
logger.info(f'Setting chat_template to {self.chat_template}')
|
logger.info(f"Setting chat_template to {self.chat_template}")
|
||||||
gw.add_chat_template(self.chat_template)
|
gw.add_chat_template(self.chat_template)
|
||||||
|
|
||||||
def _load(self, path: Path) -> None:
|
def _load(self, path: Path) -> None:
|
||||||
|
@ -79,12 +110,12 @@ def _load(self, path: Path) -> None:
|
||||||
self._try_load_merges_txt(path)
|
self._try_load_merges_txt(path)
|
||||||
|
|
||||||
def _try_load_merges_txt(self, path: Path) -> bool:
|
def _try_load_merges_txt(self, path: Path) -> bool:
|
||||||
merges_file = path / 'merges.txt'
|
merges_file = path / "merges.txt"
|
||||||
if not merges_file.is_file():
|
if not merges_file.is_file():
|
||||||
return False
|
return False
|
||||||
with open(merges_file, 'r', encoding = 'utf-8') as fp:
|
with open(merges_file, "r", encoding="utf-8") as fp:
|
||||||
first_line = next(fp, '').strip()
|
first_line = next(fp, "").strip()
|
||||||
if not first_line.startswith('#'):
|
if not first_line.startswith("#"):
|
||||||
fp.seek(0)
|
fp.seek(0)
|
||||||
line_num = 0
|
line_num = 0
|
||||||
else:
|
else:
|
||||||
|
@ -97,9 +128,11 @@ def _try_load_merges_txt(self, path: Path) -> bool:
|
||||||
continue
|
continue
|
||||||
parts = line.split(None, 3)
|
parts = line.split(None, 3)
|
||||||
if len(parts) != 2:
|
if len(parts) != 2:
|
||||||
logger.warning(f'{merges_file.name}: Line {line_num}: Entry malformed, ignoring')
|
logger.warning(
|
||||||
|
f"{merges_file.name}: Line {line_num}: Entry malformed, ignoring"
|
||||||
|
)
|
||||||
continue
|
continue
|
||||||
merges.append(f'{parts[0]} {parts[1]}')
|
merges.append(f"{parts[0]} {parts[1]}")
|
||||||
self.merges = merges
|
self.merges = merges
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@ -107,45 +140,49 @@ def _set_special_token(self, typ: str, tid: Any) -> None:
|
||||||
if not isinstance(tid, int):
|
if not isinstance(tid, int):
|
||||||
return
|
return
|
||||||
if tid < 0:
|
if tid < 0:
|
||||||
raise ValueError(f'invalid value for special token type {typ}: {tid}')
|
raise ValueError(f"invalid value for special token type {typ}: {tid}")
|
||||||
if self.n_vocab is None or tid < self.n_vocab:
|
if self.n_vocab is None or tid < self.n_vocab:
|
||||||
if typ in self.special_token_ids:
|
if typ in self.special_token_ids:
|
||||||
return
|
return
|
||||||
self.special_token_ids[typ] = tid
|
self.special_token_ids[typ] = tid
|
||||||
return
|
return
|
||||||
logger.warning(f'Special token type {typ}, id {tid} out of range, must be under {self.n_vocab} - skipping')
|
logger.warning(
|
||||||
|
f"Special token type {typ}, id {tid} out of range, must be under {self.n_vocab} - skipping"
|
||||||
|
)
|
||||||
|
|
||||||
def _try_load_from_tokenizer_json(self, path: Path) -> bool:
|
def _try_load_from_tokenizer_json(self, path: Path) -> bool:
|
||||||
tokenizer_file = path / 'tokenizer.json'
|
tokenizer_file = path / "tokenizer.json"
|
||||||
if tokenizer_file.is_file():
|
if tokenizer_file.is_file():
|
||||||
with open(tokenizer_file, encoding = 'utf-8') as f:
|
with open(tokenizer_file, encoding="utf-8") as f:
|
||||||
tokenizer = json.load(f)
|
tokenizer = json.load(f)
|
||||||
if self.load_merges:
|
if self.load_merges:
|
||||||
merges = tokenizer.get('model', {}).get('merges')
|
merges = tokenizer.get("model", {}).get("merges")
|
||||||
if isinstance(merges, list) and merges and isinstance(merges[0], str):
|
if isinstance(merges, list) and merges and isinstance(merges[0], str):
|
||||||
self.merges = merges
|
self.merges = merges
|
||||||
added_tokens = tokenizer.get('added_tokens', {})
|
added_tokens = tokenizer.get("added_tokens", {})
|
||||||
else:
|
else:
|
||||||
added_tokens = {}
|
added_tokens = {}
|
||||||
tokenizer_config_file = path / 'tokenizer_config.json'
|
tokenizer_config_file = path / "tokenizer_config.json"
|
||||||
if not tokenizer_config_file.is_file():
|
if not tokenizer_config_file.is_file():
|
||||||
return True
|
return True
|
||||||
with open(tokenizer_config_file, encoding = 'utf-8') as f:
|
with open(tokenizer_config_file, encoding="utf-8") as f:
|
||||||
tokenizer_config = json.load(f)
|
tokenizer_config = json.load(f)
|
||||||
chat_template = tokenizer_config.get('chat_template')
|
chat_template = tokenizer_config.get("chat_template")
|
||||||
if chat_template is None or isinstance(chat_template, (str, list)):
|
if chat_template is None or isinstance(chat_template, (str, list)):
|
||||||
self.chat_template = chat_template
|
self.chat_template = chat_template
|
||||||
else:
|
else:
|
||||||
logger.warning(f'Bad type for chat_template field in {tokenizer_config_file!r} - ignoring')
|
logger.warning(
|
||||||
|
f"Bad type for chat_template field in {tokenizer_config_file!r} - ignoring"
|
||||||
|
)
|
||||||
for typ in self.special_token_types:
|
for typ in self.special_token_types:
|
||||||
add_entry = tokenizer_config.get(f'add_{typ}_token')
|
add_entry = tokenizer_config.get(f"add_{typ}_token")
|
||||||
if isinstance(add_entry, bool):
|
if isinstance(add_entry, bool):
|
||||||
self.add_special_token[typ] = add_entry
|
self.add_special_token[typ] = add_entry
|
||||||
entry = tokenizer_config.get(f'{typ}_token')
|
entry = tokenizer_config.get(f"{typ}_token")
|
||||||
if isinstance(entry, str):
|
if isinstance(entry, str):
|
||||||
tc_content = entry
|
tc_content = entry
|
||||||
elif isinstance(entry, dict):
|
elif isinstance(entry, dict):
|
||||||
entry_content = entry.get('content')
|
entry_content = entry.get("content")
|
||||||
if not isinstance(entry_content, str):
|
if not isinstance(entry_content, str):
|
||||||
continue
|
continue
|
||||||
tc_content = entry_content
|
tc_content = entry_content
|
||||||
|
@ -153,20 +190,24 @@ def _try_load_from_tokenizer_json(self, path: Path) -> bool:
|
||||||
continue
|
continue
|
||||||
# We only need the first match here.
|
# We only need the first match here.
|
||||||
maybe_token_id = next(
|
maybe_token_id = next(
|
||||||
(atok.get('id') for atok in added_tokens if atok.get('content') == tc_content),
|
(
|
||||||
|
atok.get("id")
|
||||||
|
for atok in added_tokens
|
||||||
|
if atok.get("content") == tc_content
|
||||||
|
),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
self._set_special_token(typ, maybe_token_id)
|
self._set_special_token(typ, maybe_token_id)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def _try_load_from_config_json(self, path: Path) -> bool:
|
def _try_load_from_config_json(self, path: Path) -> bool:
|
||||||
config_file = path / 'config.json'
|
config_file = path / "config.json"
|
||||||
if not config_file.is_file():
|
if not config_file.is_file():
|
||||||
return False
|
return False
|
||||||
with open(config_file, encoding = 'utf-8') as f:
|
with open(config_file, encoding="utf-8") as f:
|
||||||
config = json.load(f)
|
config = json.load(f)
|
||||||
for typ in self.special_token_types:
|
for typ in self.special_token_types:
|
||||||
self._set_special_token(typ, config.get(f'{typ}_token_id'))
|
self._set_special_token(typ, config.get(f"{typ}_token_id"))
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
@ -202,47 +243,52 @@ class BpeVocab(Vocab):
|
||||||
def __init__(self, base_path: Path):
|
def __init__(self, base_path: Path):
|
||||||
added_tokens: dict[str, int] = {}
|
added_tokens: dict[str, int] = {}
|
||||||
|
|
||||||
if (fname_tokenizer := base_path / 'vocab.json').exists():
|
if (fname_tokenizer := base_path / "vocab.json").exists():
|
||||||
# "slow" tokenizer
|
# "slow" tokenizer
|
||||||
with open(fname_tokenizer, encoding="utf-8") as f:
|
with open(fname_tokenizer, encoding="utf-8") as f:
|
||||||
self.vocab = json.load(f)
|
self.vocab = json.load(f)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# FIXME: Verify that added tokens here _cannot_ overlap with the main vocab.
|
# FIXME: Verify that added tokens here _cannot_ overlap with the main vocab.
|
||||||
with open(base_path / 'added_tokens.json', encoding="utf-8") as f:
|
with open(base_path / "added_tokens.json", encoding="utf-8") as f:
|
||||||
added_tokens = json.load(f)
|
added_tokens = json.load(f)
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
# "fast" tokenizer
|
# "fast" tokenizer
|
||||||
fname_tokenizer = base_path / 'tokenizer.json'
|
fname_tokenizer = base_path / "tokenizer.json"
|
||||||
|
|
||||||
# if this fails, FileNotFoundError propagates to caller
|
# if this fails, FileNotFoundError propagates to caller
|
||||||
with open(fname_tokenizer, encoding="utf-8") as f:
|
with open(fname_tokenizer, encoding="utf-8") as f:
|
||||||
tokenizer_json = json.load(f)
|
tokenizer_json = json.load(f)
|
||||||
|
|
||||||
tokenizer_model: dict[str, Any] = tokenizer_json['model']
|
tokenizer_model: dict[str, Any] = tokenizer_json["model"]
|
||||||
if (
|
if (
|
||||||
tokenizer_model['type'] != 'BPE' or tokenizer_model.get('byte_fallback', False)
|
tokenizer_model["type"] != "BPE"
|
||||||
or tokenizer_json['decoder']['type'] != 'ByteLevel'
|
or tokenizer_model.get("byte_fallback", False)
|
||||||
|
or tokenizer_json["decoder"]["type"] != "ByteLevel"
|
||||||
):
|
):
|
||||||
raise FileNotFoundError('Cannot find GPT-2 BPE tokenizer')
|
raise FileNotFoundError("Cannot find GPT-2 BPE tokenizer")
|
||||||
|
|
||||||
self.vocab = tokenizer_model["vocab"]
|
self.vocab = tokenizer_model["vocab"]
|
||||||
|
|
||||||
if (added := tokenizer_json.get('added_tokens')) is not None:
|
if (added := tokenizer_json.get("added_tokens")) is not None:
|
||||||
# Added tokens here can be duplicates of the main vocabulary.
|
# Added tokens here can be duplicates of the main vocabulary.
|
||||||
added_tokens = {item['content']: item['id']
|
added_tokens = {
|
||||||
|
item["content"]: item["id"]
|
||||||
for item in added
|
for item in added
|
||||||
if item['content'] not in self.vocab}
|
if item["content"] not in self.vocab
|
||||||
|
}
|
||||||
|
|
||||||
vocab_size = len(self.vocab)
|
vocab_size = len(self.vocab)
|
||||||
expected_ids = list(range(vocab_size, vocab_size + len(added_tokens)))
|
expected_ids = list(range(vocab_size, vocab_size + len(added_tokens)))
|
||||||
actual_ids = sorted(added_tokens.values())
|
actual_ids = sorted(added_tokens.values())
|
||||||
if expected_ids != actual_ids:
|
if expected_ids != actual_ids:
|
||||||
expected_end_id = vocab_size + len(actual_ids) - 1
|
expected_end_id = vocab_size + len(actual_ids) - 1
|
||||||
raise ValueError(f"Expected the {len(actual_ids)} added token ID(s) to be sequential in the range "
|
raise ValueError(
|
||||||
f"{vocab_size} - {expected_end_id}; got {actual_ids}")
|
f"Expected the {len(actual_ids)} added token ID(s) to be sequential in the range "
|
||||||
|
f"{vocab_size} - {expected_end_id}; got {actual_ids}"
|
||||||
|
)
|
||||||
|
|
||||||
items = sorted(added_tokens.items(), key=lambda text_idx: text_idx[1])
|
items = sorted(added_tokens.items(), key=lambda text_idx: text_idx[1])
|
||||||
self.added_tokens_dict = added_tokens
|
self.added_tokens_dict = added_tokens
|
||||||
|
@ -276,27 +322,31 @@ class SentencePieceVocab(Vocab):
|
||||||
|
|
||||||
def __init__(self, base_path: Path):
|
def __init__(self, base_path: Path):
|
||||||
added_tokens: dict[str, int] = {}
|
added_tokens: dict[str, int] = {}
|
||||||
if (fname_tokenizer := base_path / 'tokenizer.model').exists():
|
if (fname_tokenizer := base_path / "tokenizer.model").exists():
|
||||||
# normal location
|
# normal location
|
||||||
try:
|
try:
|
||||||
with open(base_path / 'added_tokens.json', encoding="utf-8") as f:
|
with open(base_path / "added_tokens.json", encoding="utf-8") as f:
|
||||||
added_tokens = json.load(f)
|
added_tokens = json.load(f)
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
pass
|
pass
|
||||||
elif not (fname_tokenizer := base_path.parent / 'tokenizer.model').exists():
|
elif not (fname_tokenizer := base_path.parent / "tokenizer.model").exists():
|
||||||
# not found in alternate location either
|
# not found in alternate location either
|
||||||
raise FileNotFoundError('Cannot find tokenizer.model')
|
raise FileNotFoundError("Cannot find tokenizer.model")
|
||||||
|
|
||||||
self.sentencepiece_tokenizer = SentencePieceProcessor()
|
self.sentencepiece_tokenizer = SentencePieceProcessor()
|
||||||
self.sentencepiece_tokenizer.LoadFromFile(str(fname_tokenizer))
|
self.sentencepiece_tokenizer.LoadFromFile(str(fname_tokenizer))
|
||||||
vocab_size = self.sentencepiece_tokenizer.vocab_size()
|
vocab_size = self.sentencepiece_tokenizer.vocab_size()
|
||||||
|
|
||||||
new_tokens = {id: piece for piece, id in added_tokens.items() if id >= vocab_size}
|
new_tokens = {
|
||||||
|
id: piece for piece, id in added_tokens.items() if id >= vocab_size
|
||||||
|
}
|
||||||
expected_new_ids = list(range(vocab_size, vocab_size + len(new_tokens)))
|
expected_new_ids = list(range(vocab_size, vocab_size + len(new_tokens)))
|
||||||
actual_new_ids = sorted(new_tokens.keys())
|
actual_new_ids = sorted(new_tokens.keys())
|
||||||
|
|
||||||
if expected_new_ids != actual_new_ids:
|
if expected_new_ids != actual_new_ids:
|
||||||
raise ValueError(f"Expected new token IDs {expected_new_ids} to be sequential; got {actual_new_ids}")
|
raise ValueError(
|
||||||
|
f"Expected new token IDs {expected_new_ids} to be sequential; got {actual_new_ids}"
|
||||||
|
)
|
||||||
|
|
||||||
# Token pieces that were added to the base vocabulary.
|
# Token pieces that were added to the base vocabulary.
|
||||||
self.added_tokens_dict = added_tokens
|
self.added_tokens_dict = added_tokens
|
||||||
|
@ -347,25 +397,27 @@ class LlamaHfVocab(Vocab):
|
||||||
name = "hfft"
|
name = "hfft"
|
||||||
|
|
||||||
def __init__(self, base_path: Path):
|
def __init__(self, base_path: Path):
|
||||||
fname_tokenizer = base_path / 'tokenizer.json'
|
fname_tokenizer = base_path / "tokenizer.json"
|
||||||
# if this fails, FileNotFoundError propagates to caller
|
# if this fails, FileNotFoundError propagates to caller
|
||||||
with open(fname_tokenizer, encoding='utf-8') as f:
|
with open(fname_tokenizer, encoding="utf-8") as f:
|
||||||
tokenizer_json = json.load(f)
|
tokenizer_json = json.load(f)
|
||||||
|
|
||||||
# pre-check so we know if we need transformers
|
# pre-check so we know if we need transformers
|
||||||
tokenizer_model: dict[str, Any] = tokenizer_json['model']
|
tokenizer_model: dict[str, Any] = tokenizer_json["model"]
|
||||||
is_llama3 = (
|
is_llama3 = (
|
||||||
tokenizer_model['type'] == 'BPE' and tokenizer_model.get('ignore_merges', False)
|
tokenizer_model["type"] == "BPE"
|
||||||
and not tokenizer_model.get('byte_fallback', True)
|
and tokenizer_model.get("ignore_merges", False)
|
||||||
|
and not tokenizer_model.get("byte_fallback", True)
|
||||||
)
|
)
|
||||||
if is_llama3:
|
if is_llama3:
|
||||||
raise TypeError('Llama 3 must be converted with BpeVocab')
|
raise TypeError("Llama 3 must be converted with BpeVocab")
|
||||||
|
|
||||||
if not is_llama3 and (
|
if not is_llama3 and (
|
||||||
tokenizer_model['type'] != 'BPE' or not tokenizer_model.get('byte_fallback', False)
|
tokenizer_model["type"] != "BPE"
|
||||||
or tokenizer_json['decoder']['type'] != 'Sequence'
|
or not tokenizer_model.get("byte_fallback", False)
|
||||||
|
or tokenizer_json["decoder"]["type"] != "Sequence"
|
||||||
):
|
):
|
||||||
raise FileNotFoundError('Cannot find Llama BPE tokenizer')
|
raise FileNotFoundError("Cannot find Llama BPE tokenizer")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from transformers import AutoTokenizer
|
from transformers import AutoTokenizer
|
||||||
|
@ -427,16 +479,22 @@ def hf_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
|
||||||
|
|
||||||
# Yield token text, score, and type
|
# Yield token text, score, and type
|
||||||
yield token_text, self.get_token_score(token_id), self.get_token_type(
|
yield token_text, self.get_token_score(token_id), self.get_token_type(
|
||||||
token_id, token_text, self.special_ids # Reuse already stored special IDs
|
token_id,
|
||||||
|
token_text,
|
||||||
|
self.special_ids, # Reuse already stored special IDs
|
||||||
)
|
)
|
||||||
|
|
||||||
def get_token_type(self, token_id: int, token_text: bytes, special_ids: set[int]) -> gguf.TokenType:
|
def get_token_type(
|
||||||
|
self, token_id: int, token_text: bytes, special_ids: set[int]
|
||||||
|
) -> gguf.TokenType:
|
||||||
# Special case for byte tokens
|
# Special case for byte tokens
|
||||||
if re.fullmatch(br"<0x[0-9A-Fa-f]{2}>", token_text):
|
if re.fullmatch(rb"<0x[0-9A-Fa-f]{2}>", token_text):
|
||||||
return gguf.TokenType.BYTE
|
return gguf.TokenType.BYTE
|
||||||
|
|
||||||
# Determine token type based on whether it's a special token
|
# Determine token type based on whether it's a special token
|
||||||
return gguf.TokenType.CONTROL if token_id in special_ids else gguf.TokenType.NORMAL
|
return (
|
||||||
|
gguf.TokenType.CONTROL if token_id in special_ids else gguf.TokenType.NORMAL
|
||||||
|
)
|
||||||
|
|
||||||
def get_token_score(self, token_id: int) -> float:
|
def get_token_score(self, token_id: int) -> float:
|
||||||
# Placeholder for actual logic to determine the token's score
|
# Placeholder for actual logic to determine the token's score
|
||||||
|
@ -446,7 +504,9 @@ def get_token_score(self, token_id: int) -> float:
|
||||||
def added_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
|
def added_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
|
||||||
for text in self.added_tokens_list:
|
for text in self.added_tokens_list:
|
||||||
if text in self.specials:
|
if text in self.specials:
|
||||||
toktype = self.get_token_type(self.specials[text], b'', self.special_ids)
|
toktype = self.get_token_type(
|
||||||
|
self.specials[text], b"", self.special_ids
|
||||||
|
)
|
||||||
score = self.get_token_score(self.specials[text])
|
score = self.get_token_score(self.specials[text])
|
||||||
else:
|
else:
|
||||||
toktype = gguf.TokenType.USER_DEFINED
|
toktype = gguf.TokenType.USER_DEFINED
|
||||||
|
|
|
@ -9,25 +9,52 @@
|
||||||
import requests
|
import requests
|
||||||
import zipfile
|
import zipfile
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from PyQt6.QtWidgets import (QApplication, QMainWindow, QVBoxLayout, QHBoxLayout, QWidget, QPushButton,
|
from PyQt6.QtWidgets import (
|
||||||
QListWidget, QLineEdit, QLabel, QFileDialog, QProgressBar, QComboBox, QTextEdit,
|
QApplication,
|
||||||
QCheckBox, QGroupBox, QFormLayout, QScrollArea, QSlider, QSpinBox, QListWidgetItem,
|
QMainWindow,
|
||||||
QMessageBox, QDialog, QPlainTextEdit, QMenu)
|
QVBoxLayout,
|
||||||
|
QHBoxLayout,
|
||||||
|
QWidget,
|
||||||
|
QPushButton,
|
||||||
|
QListWidget,
|
||||||
|
QLineEdit,
|
||||||
|
QLabel,
|
||||||
|
QFileDialog,
|
||||||
|
QProgressBar,
|
||||||
|
QComboBox,
|
||||||
|
QTextEdit,
|
||||||
|
QCheckBox,
|
||||||
|
QGroupBox,
|
||||||
|
QFormLayout,
|
||||||
|
QScrollArea,
|
||||||
|
QSlider,
|
||||||
|
QSpinBox,
|
||||||
|
QListWidgetItem,
|
||||||
|
QMessageBox,
|
||||||
|
QDialog,
|
||||||
|
QPlainTextEdit,
|
||||||
|
QMenu,
|
||||||
|
)
|
||||||
from PyQt6.QtCore import QTimer, QThread, pyqtSignal, Qt, QSize
|
from PyQt6.QtCore import QTimer, QThread, pyqtSignal, Qt, QSize
|
||||||
from PyQt6.QtGui import QCloseEvent, QAction
|
from PyQt6.QtGui import QCloseEvent, QAction
|
||||||
|
|
||||||
|
|
||||||
def ensure_directory(path):
|
def ensure_directory(path):
|
||||||
if not os.path.exists(path):
|
if not os.path.exists(path):
|
||||||
os.makedirs(path)
|
os.makedirs(path)
|
||||||
|
|
||||||
def open_file_safe(file_path, mode='r'):
|
|
||||||
encodings = ['utf-8', 'latin-1', 'ascii', 'utf-16']
|
def open_file_safe(file_path, mode="r"):
|
||||||
|
encodings = ["utf-8", "latin-1", "ascii", "utf-16"]
|
||||||
for encoding in encodings:
|
for encoding in encodings:
|
||||||
try:
|
try:
|
||||||
return open(file_path, mode, encoding=encoding)
|
return open(file_path, mode, encoding=encoding)
|
||||||
except UnicodeDecodeError:
|
except UnicodeDecodeError:
|
||||||
continue
|
continue
|
||||||
raise ValueError(f"Unable to open file {file_path} with any of the encodings: {encodings}")
|
raise ValueError(
|
||||||
|
f"Unable to open file {file_path} with any of the encodings: {encodings}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def resource_path(relative_path):
|
def resource_path(relative_path):
|
||||||
try:
|
try:
|
||||||
|
|
1764
src/localizations.py
1764
src/localizations.py
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue