Merge pull request #139 from leafspark/dependabot/pip/huggingface-hub-approx-eq-0.33.1

build(deps): update huggingface-hub requirement from ~=0.31.2 to ~=0.33.1
build(deps): update huggingface-hub requirement
2025-07-03 11:38:30 -07:00 · 2025-06-29 11:09:34 +00:00 · 2025-06-27 10:48:20 -07:00 · 2025-06-08 11:09:56 +00:00 · 2025-05-28 16:17:16 -07:00 · 2025-05-24 21:41:33 -07:00
78 changed files with 31687 additions and 933 deletions
--- a/.env.example
+++ b/.env.example
@ -0,0 +1,13 @@
+AUTOGGUF_RESOLUTION=1650x1100
+AUTOGGUF_THEME=
+AUTOGGUF_CHECK_BACKEND=disabled
+AUTOGGUF_CHECK_UPDATE=disabled
+AUTOGGUF_SERVER_API_KEY=
+AUTOGGUF_MODEL_DIR_NAME=models
+AUTOGGUF_OUTPUT_DIR_NAME=quantized_models
+AUTOGGUF_RESIZE_FACTOR=1.1
+AUTOGGUF_SERVER=enabled
+AUTOGGUF_SERVER_PORT=7001
+AUTOGGUF_SERVER_API_KEY=
+AUTOGGUF_LANGUAGE=en-US
+AUTOGGUF_BACKEND_REPO=ggerganov/llama.cpp
--- a/.gitattributes
+++ b/.gitattributes
@ -0,0 +1,193 @@
+# Set default behavior to automatically normalize line endings.
+* text=auto eol=lf
+
+# Explicitly declare text files you want to always be normalized and converted
+# to native line endings on checkout.
+*.c text
+*.h text
+*.cpp text
+*.hpp text
+*.py text
+*.js text
+*.css text
+*.html text
+*.xml text
+*.json text
+*.md text
+*.txt text
+*.yml text
+*.yaml text
+*.ini text
+*.cfg text
+*.conf text
+*.toml text
+*.sh text eol=lf
+*.bat text eol=crlf
+*.cmd text eol=crlf
+*.ps1 text eol=crlf
+Makefile text eol=lf
+
+# Declare files that will always have CRLF line endings on checkout.
+*.sln text eol=crlf
+*.vcxproj text eol=crlf
+*.vcxproj.filters text eol=crlf
+*.dbproj text eol=crlf
+
+# Denote all files that are truly binary and should not be modified.
+*.png binary
+*.jpg binary
+*.jpeg binary
+*.gif binary
+*.ico binary
+*.mov binary
+*.mp4 binary
+*.mp3 binary
+*.flv binary
+*.fla binary
+*.swf binary
+*.gz binary
+*.zip binary
+*.7z binary
+*.ttf binary
+*.eot binary
+*.woff binary
+*.woff2 binary
+*.pyc binary
+*.pdf binary
+*.exe binary
+*.dll binary
+*.so binary
+*.dylib binary
+
+# Specific file types for development
+*.sdf binary
+*.db binary
+*.pkl binary
+*.npy binary
+*.npz binary
+*.pyd binary
+
+# Exclude files from exporting
+.gitattributes export-ignore
+.gitignore export-ignore
+.gitkeep export-ignore
+
+# Linguist language overrides
+*.ipynb linguist-language=Python
+*.qml linguist-language=QML
+*.glsl linguist-language=GLSL
+*.frag linguist-language=GLSL
+*.vert linguist-language=GLSL
+*.geom linguist-language=GLSL
+*.comp linguist-language=GLSL
+*.metal linguist-language=Metal
+*.hlsl linguist-language=HLSL
+*.shader linguist-language=ShaderLab
+
+# Treat certain files as binary to prevent merge conflicts
+*.pbxproj binary merge=union
+*.svg text
+
+# Collapse Unity-generated files on GitHub
+*.asset linguist-generated
+*.mat linguist-generated
+*.meta linguist-generated
+*.prefab linguist-generated
+*.unity linguist-generated
+
+# Treat notebook checkpoints as generated
+.ipynb_checkpoints/* linguist-generated
+
+# Custom diff drivers
+*.md diff=markdown
+*.php diff=php
+*.py diff=python
+*.rb diff=ruby
+*.tex diff=tex
+
+# LFS
+*.psd filter=lfs diff=lfs merge=lfs -text
+*.ai filter=lfs diff=lfs merge=lfs -text
+*.tif filter=lfs diff=lfs merge=lfs -text
+*.cubemap filter=lfs diff=lfs merge=lfs -text
+*.tga filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.resS filter=lfs diff=lfs merge=lfs -text
+*.assets filter=lfs diff=lfs merge=lfs -text
+*.wav filter=lfs diff=lfs merge=lfs -text
+*.fbx filter=lfs diff=lfs merge=lfs -text
+*.3ds filter=lfs diff=lfs merge=lfs -text
+*.pdb filter=lfs diff=lfs merge=lfs -text
+*.mdb filter=lfs diff=lfs merge=lfs -text
+
+# Ignore files (like .gitignore)
+.dockerignore text
+.env text
+.editorconfig text
+.gitconfig text
+
+# Treat patches as text
+*.patch text diff
+
+# Windows files
+*.bat text eol=crlf
+*.cmd text eol=crlf
+
+# Unix files
+*.bash text eol=lf
+*.sh text eol=lf
+
+# Python files
+*.pxd text diff=python
+*.py3 text diff=python
+*.pyw text diff=python
+*.pyx text diff=python
+
+# Ruby files
+*.rb text diff=ruby
+*.rbw text diff=ruby
+*.gemspec text diff=ruby
+*.rake text diff=ruby
+Rakefile text diff=ruby
+
+# SQL files
+*.sql text
+
+# Java files
+*.java text diff=java
+*.gradle text diff=java
+*.gradle.kts text diff=kotlin
+
+# Kotlin files
+*.kt text diff=kotlin
+*.kts text diff=kotlin
+
+# Scala files
+*.scala text diff=scala
+*.sc text diff=scala
+
+# C# files
+*.cs text diff=csharp
+*.cshtml text diff=html
+*.csx text diff=csharp
+
+# Visual Studio files
+*.sln text eol=crlf merge=union
+*.csproj merge=union
+*.vbproj merge=union
+*.vcxproj merge=union
+*.vcproj merge=union
+*.dbproj merge=union
+*.fsproj merge=union
+*.lsproj merge=union
+*.wixproj merge=union
+*.modelproj merge=union
+*.sqlproj merge=union
+*.wwaproj merge=union
+
+# Xcode files
+*.pbxproj merge=union
+
+# Android files
+*.gradle text diff=java
+*.xml text
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@ -0,0 +1,38 @@
+---
+name: Bug report
+about: Create a report to help us improve AutoGGUF
+title: '[BUG] '
+labels: bug
+assignees: ''
+
+---
+
+**Describe the bug**
+A clear and concise description of what the bug is.
+
+**To Reproduce**
+Steps to reproduce the behavior:
+1. Go to '...'
+2. Click on '....'
+3. Scroll down to '....'
+4. See error
+
+**Expected behavior**
+A clear and concise description of what you expected to happen.
+
+**Screenshots**
+If applicable, add screenshots to help explain your problem.
+
+**Environment (please complete the following information):**
+ - OS: [e.g. Windows, macOS, Linux]
+ - AutoGGUF Version: [e.g. v1.4.2]
+ - Python Version (if running from source): [e.g. 3.9]
+ - llama.cpp backend version: [e.g. 3601]
+
+**Additional context**
+Add any other context about the problem here. Include any relevant log outputs or error messages.
+
+**Checklist:**
+- [ ] I have checked the existing issues to make sure this is not a duplicate
+- [ ] I have included all relevant information to reproduce the issue
+- [ ] I am running the latest version of AutoGGUF
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@ -0,0 +1,8 @@
+version: 2
+updates:
+  - package-ecosystem: "pip"
+    directory: "/"
+    schedule:
+      interval: "weekly"
+      day: "sunday"
+    open-pull-requests-limit: 10
--- a/.github/workflows/black.yml
+++ b/.github/workflows/black.yml
@ -0,0 +1,20 @@
+name: Black
+
+on:
+  push:
+    paths:
+      - '**.py'
+  pull_request:
+    paths:
+      - '**.py'
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+      - uses: psf/black@stable
+        with:
+          options: "--check --verbose"
+          src: "./src"
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@ -0,0 +1,135 @@
+name: Build AutoGGUF (PyInstaller)
+
+on:
+  workflow_dispatch:
+    inputs:
+      build_type:
+        description: 'Build type (RELEASE or DEV)'
+        required: true
+        default: 'RELEASE'
+        type: choice
+        options:
+        - RELEASE
+        - DEV
+
+jobs:
+  build:
+    strategy:
+      matrix:
+        os: [windows-latest, ubuntu-latest, macos-latest]
+        arch: [x64]
+    runs-on: ${{ matrix.os }}
+    outputs:
+      artifact-names: ${{ steps.set-outputs.outputs.artifact-names }}
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: '3.12'
+        architecture: ${{ matrix.arch }}
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install $(grep -v "^torch" requirements.txt)
+        pip install pyinstaller pillow
+
+    - name: Build with PyInstaller (Windows)
+      if: matrix.os == 'windows-latest'
+      run: |
+        $archSuffix = if ("${{ matrix.arch }}" -eq "x86") { "-x86" } else { "-x64" }
+        if ("${{ github.event.inputs.build_type }}" -eq "RELEASE") {
+          pyinstaller --windowed --onefile --name=AutoGGUF$archSuffix --icon=../../assets/favicon_large.png --add-data "../../assets;assets" --distpath=build\release\dist --workpath=build\release\build --specpath=build\release src\main.py
+        } else {
+          pyinstaller --onefile --name=AutoGGUF$archSuffix --icon=../../assets/favicon_large.png --add-data "../../assets;assets" --distpath=build\dev\dist --workpath=build\dev\build --specpath=build\dev src\main.py
+        }
+
+    - name: Build with PyInstaller (Linux/macOS)
+      if: matrix.os != 'windows-latest'
+      run: |
+        if [ "${{ github.event.inputs.build_type }}" = "RELEASE" ]; then
+          pyinstaller --windowed --onefile --name=AutoGGUF-x64 --icon=../../assets/favicon_large.png --add-data "../../assets:assets" --distpath=build/release/dist --workpath=build/release/build --specpath=build/release src/main.py
+        else
+          pyinstaller --onefile --name=AutoGGUF-x64 --icon=../../assets/favicon_large.png --add-data "../../assets:assets" --distpath=build/dev/dist --workpath=build/dev/build --specpath=build/dev src/main.py
+        fi
+
+    - name: Copy additional files (Windows)
+      if: matrix.os == 'windows-latest'
+      run: |
+        $distPath = if ("${{ github.event.inputs.build_type }}" -eq "RELEASE") { "build\release\dist" } else { "build\dev\dist" }
+        New-Item -ItemType Directory -Force -Path "$distPath\src\gguf"
+        Copy-Item -Path "src\gguf\*" -Destination "$distPath\src\gguf" -Recurse
+        Copy-Item -Path "src\convert_hf_to_gguf.py" -Destination "$distPath\src"
+        Copy-Item -Path "src\convert_lora_to_gguf.py" -Destination "$distPath\src"
+        Copy-Item -Path "src\convert_lora_to_ggml.py" -Destination "$distPath\src"
+        Copy-Item -Path "src\quantize_to_fp8_dynamic.py" -Destination "$distPath\src"
+        Copy-Item -Path ".env.example" -Destination "$distPath\"
+
+    - name: Copy additional files (Linux/macOS)
+      if: matrix.os != 'windows-latest'
+      run: |
+        distPath=$(if [ "${{ github.event.inputs.build_type }}" = "RELEASE" ]; then echo "build/release/dist"; else echo "build/dev/dist"; fi)
+        mkdir -p $distPath/src/gguf
+        cp -R src/gguf/* $distPath/src/gguf/
+        cp src/convert_hf_to_gguf.py $distPath/src/
+        cp src/convert_lora_to_gguf.py $distPath/src/
+        cp src/convert_lora_to_ggml.py $distPath/src/
+        cp src/quantize_to_fp8_dynamic.py $distPath/src/
+        cp .env.example $distPath/
+
+    - name: Set outputs for artifact name
+      id: set-outputs
+      run: echo "artifact-name=AutoGGUF-${{ matrix.os }}-${{ matrix.arch }}-${{ github.event.inputs.build_type }}-${{ github.sha }}" >> $GITHUB_OUTPUT
+        
+    - name: Upload Artifact
+      uses: actions/upload-artifact@v4
+      with:
+        name: AutoGGUF-${{ matrix.os }}-${{ matrix.arch }}-${{ github.event.inputs.build_type }}-${{ github.sha }}
+        path: build/${{ github.event.inputs.build_type == 'RELEASE' && 'release' || 'dev' }}/dist
+
+  generate-checksums:
+    needs: build
+    runs-on: ubuntu-latest
+    steps:
+    - name: Download all artifacts
+      uses: actions/download-artifact@v4
+      with:
+        path: ./artifacts
+
+    - name: Generate SHA256 checksums for all artifacts
+      run: |
+        cd artifacts
+        versionHash=$(echo ${{ github.sha }} | cut -c1-7)
+        echo "# AutoGGUF Build Checksums" > ../checksums.txt
+        echo "Build: ${{ github.event.inputs.build_type }}" >> ../checksums.txt
+        echo "Commit: ${{ github.sha }}" >> ../checksums.txt
+        echo "Date: $(date -u)" >> ../checksums.txt
+        echo "" >> ../checksums.txt
+        
+        # Find all artifact directories and generate checksums of their zip equivalents
+        for artifact_dir in AutoGGUF-*-${{ github.event.inputs.build_type }}-${{ github.sha }}; do
+          if [ -d "$artifact_dir" ]; then
+            echo "Processing $artifact_dir..."
+            cd "$artifact_dir"
+            
+            # Create a temporary zip to calculate hash (simulating what GitHub creates)
+            zip -r "../temp_${artifact_dir}.zip" .
+            cd ..
+            
+            # Generate SHA256 of the zip file
+            hash=$(sha256sum "temp_${artifact_dir}.zip" | cut -d' ' -f1)
+            echo "${hash}  ${artifact_dir}.zip" >> ../checksums.txt
+            
+            # Clean up the temporary zip
+            rm "temp_${artifact_dir}.zip"
+          fi
+        done
+
+    - name: Upload checksums
+      uses: actions/upload-artifact@v4
+      with:
+        name: AutoGGUF-${{ github.sha }}-SHA256
+        path: checksums.txt
--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@ -0,0 +1,99 @@
+# For most projects, this workflow file will not need changing; you simply need
+# to commit it to your repository.
+#
+# You may wish to alter this file to override the set of languages analyzed,
+# or to provide custom queries or build logic.
+#
+# ******** NOTE ********
+# We have attempted to detect the languages in your repository. Please check
+# the `language` matrix defined below to confirm you have the correct set of
+# supported CodeQL languages.
+#
+name: "CodeQL"
+
+on:
+  push:
+    branches: [ "main" ]
+    paths-ignore:
+      - '**/*.md'
+      - '**/*.txt'
+  pull_request:
+    branches: [ "main" ]
+    paths-ignore:
+      - '**/*.md'
+      - '**/*.txt'
+  schedule:
+    - cron: '21 20 * * 6'
+
+jobs:
+  analyze:
+    name: Analyze (${{ matrix.language }})
+    # Runner size impacts CodeQL analysis time. To learn more, please see:
+    #   - https://gh.io/recommended-hardware-resources-for-running-codeql
+    #   - https://gh.io/supported-runners-and-hardware-resources
+    #   - https://gh.io/using-larger-runners (GitHub.com only)
+    # Consider using larger runners or machines with greater resources for possible analysis time improvements.
+    runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }}
+    timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }}
+    permissions:
+      # required for all workflows
+      security-events: write
+
+      # required to fetch internal or private CodeQL packs
+      packages: read
+
+      # only required for workflows in private repositories
+      actions: read
+      contents: read
+
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+        - language: python
+          build-mode: none
+        # CodeQL supports the following values keywords for 'language': 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift'
+        # Use `c-cpp` to analyze code written in C, C++ or both
+        # Use 'java-kotlin' to analyze code written in Java, Kotlin or both
+        # Use 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both
+        # To learn more about changing the languages that are analyzed or customizing the build mode for your analysis,
+        # see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning.
+        # If you are analyzing a compiled language, you can modify the 'build-mode' for that language to customize how
+        # your codebase is analyzed, see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/codeql-code-scanning-for-compiled-languages
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v4
+
+    # Initializes the CodeQL tools for scanning.
+    - name: Initialize CodeQL
+      uses: github/codeql-action/init@v3
+      with:
+        languages: ${{ matrix.language }}
+        build-mode: ${{ matrix.build-mode }}
+        # If you wish to specify custom queries, you can do so here or in a config file.
+        # By default, queries listed here will override any specified in a config file.
+        # Prefix the list here with "+" to use these queries and those in the config file.
+
+        # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
+        # queries: security-extended,security-and-quality
+
+    # If the analysis step fails for one of the languages you are analyzing with
+    # "We were unable to automatically build your code", modify the matrix above
+    # to set the build mode to "manual" for that language. Then modify this step
+    # to build your code.
+    # ℹ️ Command-line programs to run using the OS shell.
+    # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
+    - if: matrix.build-mode == 'manual'
+      shell: bash
+      run: |
+        echo 'If you are using a "manual" build mode for one or more of the' \
+          'languages you are analyzing, replace this with the commands to build' \
+          'your code, for example:'
+        echo '  make bootstrap'
+        echo '  make release'
+        exit 1
+
+    - name: Perform CodeQL Analysis
+      uses: github/codeql-action/analyze@v3
+      with:
+        category: "/language:${{matrix.language}}"
--- a/.github/workflows/pip-audit.yml
+++ b/.github/workflows/pip-audit.yml
@ -0,0 +1,59 @@
+name: Dependency Audit
+
+on:
+  push:
+    paths:
+      - '**/requirements.txt'
+  pull_request:
+    paths:
+      - '**/requirements.txt'
+  schedule:
+    - cron: '0 0 * * *'  # Run daily at midnight UTC
+
+jobs:
+  audit:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: '3.x'
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install pip-audit
+
+    - name: Run pip-audit
+      run: |
+        pip-audit -r requirements.txt > audit_output.txt
+      continue-on-error: true
+
+    - name: Display audit results
+      run: cat audit_output.txt
+
+    - name: Create detailed report
+      run: |
+        echo "Pip Audit Report" > detailed_report.txt
+        echo "==================" >> detailed_report.txt
+        echo "" >> detailed_report.txt
+        echo "Date: $(date)" >> detailed_report.txt
+        echo "" >> detailed_report.txt
+        echo "Audit Results:" >> detailed_report.txt
+        cat audit_output.txt >> detailed_report.txt
+        echo "" >> detailed_report.txt
+        echo "Environment:" >> detailed_report.txt
+        python --version >> detailed_report.txt
+        pip --version >> detailed_report.txt
+        echo "" >> detailed_report.txt
+        echo "Requirements:" >> detailed_report.txt
+        cat requirements.txt >> detailed_report.txt
+
+    - name: Upload audit results
+      uses: actions/upload-artifact@v4
+      with:
+        name: pip-audit-report
+        path: detailed_report.txt
+
--- a/.github/workflows/pre-commit.yml.disabled
+++ b/.github/workflows/pre-commit.yml.disabled
@ -0,0 +1,17 @@
+name: pre-commit
+
+on: [push, pull_request]
+
+jobs:
+  pre-commit:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.x'
+    - name: Install pre-commit
+      run: pip install pre-commit
+    - name: Run pre-commit
+      run: pre-commit run --all-files
--- a/.github/workflows/pylint.yml
+++ b/.github/workflows/pylint.yml
@ -0,0 +1,28 @@
+name: Pylint
+on:
+  push:
+    paths:
+      - '**.py'
+  pull_request:
+    paths:
+      - '**.py'
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.9", "3.10"]
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v5
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install $(grep -v "^torch" requirements.txt | tr '\n' ' ')
+        pip install pylint
+    - name: Analysing the code with pylint
+      run: |
+        pylint $(git ls-files '*.py') --disable=all --enable=E0001,E0100,E0101,E0102,E0103,E0104,E0105,E0107,E0108,E0110,E0111,E0112,E0113,E0114,E0115,E0116,E0117,E0118,E0202,E0203,E0211,E0213,E0236,E0237,E0238,E0239,E0240,E0241,E0301,E0302,E0303,E0401,E0402,E0701,E0702,E0703,E0704,E0710,E0711,E0712,E1003,E1101,E1102,E1111,E1120,E1121,E1123,E1124,E1125,E1126,E1127,E1128,E1129,E1130,E1131,E1132,E1133,E1134,E1135,E1136,E1137,E1138,E1139,E1200,E1201,E1205,E1206,E1300,E1301,E1302,E1303,E1304,E1305,E1306,E1310,E1700,E1701,W0311,W0312,W0611,W0612,W0613,W0702,W1401,W1402,C0123,C0200,C0325,C0411,C0412 --fail-under=5
--- a/.github/workflows/radon.yml
+++ b/.github/workflows/radon.yml
@ -0,0 +1,72 @@
+name: Radon Code Metrics
+
+on:
+  workflow_dispatch:
+  push:
+    paths:
+      - '**.py'
+  pull_request:
+    paths:
+      - '**.py'
+
+jobs:
+  radon:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
+
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: '3.x'
+
+    - name: Install radon
+      run: pip install radon
+
+    - name: Run radon
+      run: |
+        if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
+          CHANGED_FILES=$(git ls-files '*.py')
+        else
+          CHANGED_FILES=$(git diff --name-only ${{ github.event.before }} ${{ github.sha }} | grep '\.py$' || echo "")
+        fi
+        
+        echo "Files to be analyzed:"
+        echo "$CHANGED_FILES"
+        
+        if [ -n "$CHANGED_FILES" ]; then
+          echo "Running Cyclomatic Complexity check..."
+          radon cc $CHANGED_FILES -a -s -n F --exclude "AutoGGUF.quantize_model"
+          
+          echo "Running Maintainability Index check..."
+          radon mi $CHANGED_FILES -s -n F
+        else
+          echo "No Python files to analyze."
+        fi
+      continue-on-error: true
+
+    - name: Check radon output
+      run: |
+        if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
+          CHANGED_FILES=$(git ls-files '*.py')
+        else
+          CHANGED_FILES=$(git diff --name-only ${{ github.event.before }} ${{ github.sha }} | grep '\.py$' || echo "")
+        fi
+        
+        if [ -n "$CHANGED_FILES" ]; then
+          CC_OUTPUT=$(radon cc $CHANGED_FILES -a -s -n F --exclude "AutoGGUF.quantize_model")
+          MI_OUTPUT=$(radon mi $CHANGED_FILES -s -n F)
+          
+          if [ -n "$CC_OUTPUT" ] || [ -n "$MI_OUTPUT" ]; then
+            echo "Radon detected code complexity or maintainability issues:"
+            [ -n "$CC_OUTPUT" ] && echo "$CC_OUTPUT"
+            [ -n "$MI_OUTPUT" ] && echo "$MI_OUTPUT"
+            exit 1
+          else
+            echo "No code complexity or maintainability issues detected."
+          fi
+        else
+          echo "No Python files to analyze."
+        fi
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,53 @@
+# Ignore __pycache__ directories
+__pycache__/
+
+# Ignore everything
+*
+
+# Allow specific file types globally
+!*.bat
+!*.txt
+!*.md
+!*.sh
+!LICENSE
+
+# Allow these files
+!.pre-commit-config.yaml
+!.env.example
+!setup.py
+
+# Allow src folder and its .py files
+!src/
+src/*
+!src/*.py
+!src/gguf
+src/gguf/*
+!src/gguf/*.py
+
+# Allow docs folder and its .py files
+!docs/
+docs/*
+!docs/*.py
+
+# Allow plugins folder and its .py files
+!plugins/
+plugins/*
+!plugins/*.py
+
+# Allow assets folder, but only .svg, .png, .rc, .css, .iss and .ico files
+!assets/
+assets/*
+!assets/*.svg
+!assets/*.png
+!assets/*.ico
+!assets/*.rc
+!assets/*.res
+!assets/*.css
+!assets/*.iss
+
+# Allow .github folder and its contents
+!.github/
+!.github/**
+
+# Don't ignore .gitignore
+!.gitignore
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -0,0 +1,10 @@
+repos:
+- repo: https://github.com/psf/black
+  rev: 22.10.0
+  hooks:
+    - id: black
+      language_version: python3
+- repo: https://github.com/Lucas-C/pre-commit-hooks
+  rev: v1.1.9
+  hooks:
+    - id: remove-crlf
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -0,0 +1,433 @@
+# Changelog
+
+## [v2.0.1] - 2025-05-24
+
+### Added
+- Human readable mappings from KV pairs into model properties
+- certifi library for backend download and update checking
+- Automated checksums in CI process
+
+### Changed
+- Updated llama.cpp backend
+- Improved backend UI, logging, and task handling
+- Enhanced display of model properties and cleaner formatting of KV pairs
+- Updated tensor data formatting and removed redundant KV pairs property
+- Updated CUDA backend check for latest llama.cpp release format
+- Global urllib usage implementation
+- Updated README with more information about patches and updates
+- Edited quick start instructions
+- Small file formatting improvements
+
+### Fixed
+- Type hints corrections
+- Build errors in CI
+- `@upload-artifact` updated to v4
+
+## [v2.0.0] - 2025-01-27
+
+### Added
+- Clipboard support for save/load preset functionality with shift-click option
+- Support for shift-clicking to get quantization command
+- AUTOGGUF_BACKEND_REPO environment variable for custom GitHub repository fetching
+- New HF to GGUF conversion types: `tq1_0` and `tq2_0`
+
+### Changed
+- Updated multiple dependencies:
+  - PySide6, PyTorch, Transformers, FastAPI, uvicorn, and other core libraries to their latest compatible versions
+- Adjusted monitoring intervals from 0.2s to 0.5s
+- Updated copyright year to 2025
+- Bundled llama.cpp licensing text in About menu
+- Removed x86 build matrix from CI
+- Removed Import Model confirmation dialog
+
+### Fixed
+- Resolved PySide6 segfault issue
+- Fixed error when deleting models from list
+- Corrected incorrect menu bar name for Load Preset
+
+## [v1.9.1] - 2024-10-13
+
+### Added
+- Support for specifying log directory name using AUTOGGUF_LOG_DIR_NAME environment variable
+- Work in progress GGUF merge window
+- Support for repository types in HF Transfer utility
+- New `dequantize_gguf.py` script
+- Support for MiniCPM3, RWKVv6, OLMoE, IBM Granite, and Jamba in llama.cpp convert scripts (conversion only)
+- Add Nuitka build script for Linux
+
+### Changed
+- Updated Finnish and Russian localizations using Claude 3 Opus
+- Improved layout of HF Upload window
+- Updated gguf library from upstream
+- Refactored code to use localizations for menubar
+- Renamed imports_and_globals.py to globals.py
+- Moved general functions verify_gguf and process_args to globals.py
+- Created Plugins class for extensibility
+- Updated dependencies:
+  - huggingface-hub
+  - fastapi (~=0.115.0)
+  - setuptools (~=75.1.0)
+  - pyside6 (~=6.7.3)
+  - uvicorn (~=0.31.0)
+
+### Fixed
+- Corrected localization strings and file select types for GGUF merging
+- Fix minor errors in build scripts
+
+## [v1.9.0] - 2024-09-15
+
+### Added
+- Implemented Hugging Face (HF) upload functionality with GUI definitions
+- Added RAM and CPU usage graphs to UI
+- Input validation using wraps added to UI
+- Right-click context menu added to the models list in UI
+- Support for iMatrix generation tracking
+- GGUF splitting feature added
+- Japanese and German localizations updated
+
+### Changed
+- Refactored to move functions out of `AutoGGUF` to reduce bloat
+- Localized GGUF split strings
+- Optimized GGUF imports and renamed related modules
+- Removed old `HFTransfer` class
+- Adjusted logging strings and updated French and Dutch localizations
+- Improved startup time by optimizing default configuration, disabling network fetches for backends/updates
+- Removed `requests` and `python-dotenv` to reduce size
+- Updated `fastapi` requirement from `~=0.112.2` to `~=0.114.2`
+- Updated `torch` requirement from `~=2.4.0` to `~=2.4.1`
+- Updated `setuptools` requirement from `~=74.0.0` to `~=74.1.2`
+- Updated `safetensors` requirement from `~=0.4.4` to `~=0.4.5`
+- Updated `huggingface-hub` requirement from `~=0.24.6` to `~=0.24.7`
+
+### Fixed
+- Adjusted indeterminate progress bar behavior
+- Removed comments in `requirements.txt` and updated its formatting
+
+## [v1.8.1] - 2024-09-04
+
+### Added
+- AutoFP8 quantization classes and window (currently WIP)
+- Minimize/maximize buttons to title bar
+- API key authentication support for the local server
+- HuggingFace upload/download class
+- OpenAPI docs for endpoints
+- Added new showcase image
+
+### Changed
+- Replaced Flask with FastAPI and Uvicorn for improved performance
+- Moved functions out of AutoGGUF.py into utils.py and TaskListItem.py
+- Updated llama.cpp convert scripts
+- Improved LoRA conversion process:
+  - Allow specifying output path in arguments
+  - Removed shutil.move operation
+  - Increased max number of LoRA layers
+- Changed default port to 7001
+- Now binding to localhost (127.0.0.1) instead of 0.0.0.0
+- Upadted Spanish localizations
+- Updated setuptools requirement from ~=68.2.0 to ~=74.0.0
+- Updated .env.example with new configuration parameters
+
+### Fixed
+- Web page not found error
+- Use of proper status in TaskListItem
+- Passing of quant_threads and Logger to TaskListItem
+- Improved window moving smoothness
+- Prevention of moving window below taskbar
+- Optimized imports in various files
+- Remove aliased quant types
+
+## [v1.8.0] - 2024-08-26
+
+### Added
+- .env.example file added
+- Sha256 generation support added to build.yml
+- Allow importing models from any directory on the system
+- Added manual model import functionality
+- Verification for manual imports and support for concatenated files
+- Implemented plugins feature using importlib
+- Configuration options for AUTOGGUF_MODEL_DIR_NAME, AUTOGGUF_OUTPUT_DIR_NAME, and AUTOGGUF_RESIZE_FACTOR added
+
+### Changed
+- Moved get helper functions to utils.py
+- Added type hints
+- Reformat TaskListItem.py for better readability
+- Separate macOS and Linux runs in CI/CD
+- Updated .gitignore for better file management
+- Updated numpy requirement from <2.0.0 to <3.0.0
+
+### Fixed
+- Fixed sha256 file format and avoided overwriting
+- Updated regex for progress tracking
+- Arabic and French localizations fixed
+- Only count valid backends instead of total backend combos
+- Import missing modules
+
+## [v1.7.2] - 2024-08-19
+
+### Added
+- Update checking support (controlled by AUTOGGUF_CHECK_UPDATE environment variable)
+- Live update support for GPU monitor graphs
+- Smoother usage bar changes in monitor
+- Unicode X button in KV Overrides box
+- PyPI setup script
+- Inno Setup build file
+- Missing requirements and dotenv file loading
+
+### Changed
+- Moved functions out of AutoGGUF.py
+- Relocated CustomTitleBar to separate file
+- Updated torch requirement from ~=2.2.0 to ~=2.4.0
+- Updated showcase image
+- Version bumped to v1.7.2 in Localizations.py
+
+### Fixed
+- setup.py issues
+
+## [v1.7.1] - 2024-08-16
+
+### Added
+- Modern UI with seamless title bar
+- Window resizing shortcuts (Ctrl+, Ctrl-, Ctrl+0)
+- Theming support
+- CPU usage bar
+- Save Preset and Load Preset options in File menu
+- Support for EXAONE model type
+- Window size configuration through environment variables
+
+### Changed
+- Refactored window to be scrollable
+- Moved save/load preset logic to presets.py
+- Updated docstrings for AutoGGUF.py, lora_conversion.py, and Logger.py
+- Adapted gguf library to project standards
+
+### Fixed
+- Updated version to v1.7.0
+- Fixed IDE-detected code typos and errors
+
+## [v1.7.0] - 2024-08-16
+
+### Added
+- Menu bar with Close and About options
+- Program version in localizations.py
+- Support for 32-bit builds
+- Added dependency audit
+- Implemented radon, dependabot, and pre-commit workflows
+
+### Changed
+- Updated torch requirement from `~=1.13.1` to `~=2.4.0`
+- Updated psutil requirement from `~=5.9.8` to `~=6.0.0`
+- Refactored functions out of AutoGGUF.py and moved to ui_update.py
+- Changed filenames to follow PEP 8 conventions
+- Disabled .md and .txt CodeQL analysis
+
+### Fixed
+- Optimized imports in AutoGGUF.py
+- Updated README with new version and styled screenshot
+- Fixed image blur in documentation
+
+## [v1.6.2] - 2024-08-15
+
+### Added
+- Server functionality with new endpoints:
+  - `/v1/backends`: Lists all backends and their paths
+  - `/v1/health`: Heartbeat endpoint
+  - `/v1/tasks`: Provides current task info (name, status, progress, log file)
+  - `/v1/models`: Retrieves model details (name, type, path, shard status)
+- Environment variable support for server configuration:
+  - `AUTOGGUF_SERVER`: Enable/disable server (true/false)
+  - `AUTOGGUF_SERVER_PORT`: Set server port (integer)
+
+### Changed
+- Updated AutoGGUF docstrings
+- Refactored build scripts
+
+### Fixed
+- Set GGML types to lowercase in command builder
+
+## [v1.6.1] - 2024-08-12
+
+### Added
+- Optimized build scripts
+- Nuitka for building
+
+### Changed
+- Updated .gitignore
+
+### Fixed
+- Bug where deletion while a task is running crashes the program
+
+### Notes
+- Fast build: Higher unzipped size (97MB), smaller download (38MB)
+- Standard build: Created with PyInstaller, medium download and unzipped size (50MB), potentially slower
+
+## [v1.6.0] - 2024-08-08
+
+### Changed
+- Resolve licensing issues by using PySide6
+
+### Added
+- Add GPU monitoring support for NVIDIA GPUs
+
+## [v1.5.1] - 2024-08-08
+
+### Changed
+- Refactor localizations to use them in HF conversion area
+- Rename FAILED_LOAD_PRESET to FAILED_TO_LOAD_PRESET localization key
+
+### Removed
+- Remove Save Preset context menu action
+
+### Added
+- Support loading *.gguf file types
+
+## [v1.5.0] - 2024-08-06
+
+### Changed
+- Refactor localizations to use them in HF conversion area
+- Organize localizations
+
+### Added
+- Add sha256 and PGP signatures (same as commit ones)
+- Add HuggingFace to GGUF conversion support
+
+### Fixed
+- Fix scaling on low resolution screens, interface now scrolls
+
+## [v1.4.3] - 2024-08-05
+
+### Changed
+- Updated src file in release to be Black formatted
+- Modifying the quantize_model function to process all selected types
+- Updating preset saving and loading to handle multiple quantization types
+- Use ERROR and IN_PROGRESS constants from localizations in QuantizationThread
+- Minor repository changes
+
+### Added
+- Added model sharding management support
+- Allow multiple quantization types to be selected and started simultaneously
+
+## [v1.4.2] - 2024-08-04
+
+### Fixed
+- Resolves bug where Base Model text was shown even when GGML type was selected
+- Improved alignment
+
+### Changed
+- Minor repository changes
+
+## [v1.4.1] - 2024-08-04
+
+### Added
+- Dynamic KV Overrides (see wiki: AutoGGUF/wiki/Dynamic-KV-Overrides)
+- Quantization commands are now printed and logged
+
+## [v1.4.0] - 2024-08-04
+
+### Added
+- LoRA Conversion:
+  - New section for converting HuggingFace PEFT LoRA adapters to GGML/GGUF
+  - Output type selection (GGML or GGUF)
+  - Base model selection for GGUF output
+  - LoRA adapter list with individual scaling factors
+  - Export LoRA section for merging adapters into base model
+- UI Improvements:
+  - Updated task names in task list
+  - IMatrix generation check
+  - Larger window size
+  - Added exe favicon
+- Localization:
+  - French and Simplified Chinese support for LoRA and "Refresh Models" strings
+- Code and Build:
+  - Code organization improvements
+  - Added build script
+  - .gitignore file
+- Misc:
+  - Currently includes src folder with conversion tools
+  - No console window popup
+
+## [v1.3.1] - 2024-08-04
+
+### Added
+- AUTOGGUF_CHECK_BACKEND environment variable to disable backend check on start
+
+### Changed
+- --onefile build with PyInstaller, _internal directory is no longer required
+
+## [v1.3.0] - 2024-08-03
+
+### Added
+- Support for new llama-imatrix parameters:
+  - Context size (--ctx-size) input
+  - Threads (--threads) control
+- New parameters to IMatrix section layout
+- Slider-spinbox combination for thread count selection
+- QSpinBox for output frequency input (1-100 range with percentage suffix)
+
+### Changed
+- Converted context size input to a QSpinBox
+- Updated generate_imatrix() method to use new UI element values
+- Improved error handling in preset loading
+- Enhanced localization support for new UI elements
+
+### Fixed
+- Error when loading presets containing KV overrides
+
+### Removed
+- Duplicated functions
+
+## [v1.2.1] - 2024-08-03
+
+### Added
+- Refresh Models button
+- Linux build (built on Ubuntu 24.04 LTS)
+
+### Fixed
+- iostream llama.cpp issue, quantized_models directory created on launch
+
+## [v1.2.0] - 2024-08-03
+
+### Added
+- More robust logging (find logs at latest_<timestamp>.log in logs folder)
+- Localizations with support for 28 languages (machine translated using Gemini Experimental 0801)
+
+## [v1.1.0] - 2024-08-03
+
+### Added
+- Dynamic KV override functionality
+- Improved CUDA checking ability and extraction to the backend folder
+- Scrollable area for KV overrides with add/delete capabilities
+
+### Changed
+- Enhanced visibility and usability of Output Tensor Type and Token Embedding Type options
+- Refactored code for better modularity and reduced circular dependencies
+
+### Fixed
+- Behavior of Output Tensor Type and Token Embedding Type dropdown menus
+- Various minor UI inconsistencies
+
+## [1.0.1] - 2024-08-02
+
+### Added
+- Windows binary (created using PyInstaller)
+
+### Fixed
+- Issue where quantization errored with "AutoGGUF does not have x attribute"
+
+## [v1.0.0] - 2024-08-02
+
+### Added
+- Initial release
+- GUI interface for automated GGUF model quantization
+- System resource monitoring (RAM and CPU usage)
+- Llama.cpp backend selection and management
+- Automatic download of llama.cpp releases from GitHub
+- Model selection from local directory
+- Comprehensive quantization options
+- Task list for managing multiple quantization jobs
+- Real-time log viewing for quantization tasks
+- IMatrix generation feature with customizable settings
+- GPU offload settings for IMatrix generation
+- Context menu for task management
+- Detailed model information dialog
+- Error handling and user notifications
+- Confirmation dialogs for task deletion and application exit
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@ -0,0 +1,127 @@
+# Contributor Covenant Code of Conduct
+
+## Our Pledge
+
+We as members, contributors, and leaders pledge to make participation in our
+community a harassment-free experience for everyone, regardless of age, body
+size, visible or invisible disability, ethnicity, sex characteristics, gender
+identity and expression, level of experience, education, socio-economic status,
+nationality, personal appearance, race, religion, or sexual identity
+and orientation.
+
+We pledge to act and interact in ways that contribute to an open, welcoming,
+diverse, inclusive, and healthy community.
+
+## Our Standards
+
+Examples of behavior that contributes to a positive environment for our
+community include:
+
+* Demonstrating empathy and kindness toward other people
+* Being respectful of differing opinions, viewpoints, and experiences
+* Giving and gracefully accepting constructive feedback
+* Accepting responsibility and apologizing to those affected by our mistakes,
+  and learning from the experience
+* Focusing on what is best not just for us as individuals, but for the
+  overall community
+
+Examples of unacceptable behavior include:
+
+* The use of sexualized language or imagery, and sexual attention or
+  advances of any kind
+* Trolling, insulting or derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information, such as a physical or email
+  address, without their explicit permission
+* Other conduct which could reasonably be considered inappropriate in a
+  professional setting
+
+## Enforcement Responsibilities
+
+Community leaders are responsible for clarifying and enforcing our standards of
+acceptable behavior and will take appropriate and fair corrective action in
+response to any behavior that they deem inappropriate, threatening, offensive,
+or harmful.
+
+Community leaders have the right and responsibility to remove, edit, or reject
+comments, commits, code, wiki edits, issues, and other contributions that are
+not aligned to this Code of Conduct, and will communicate reasons for moderation
+decisions when appropriate.
+
+## Scope
+
+This Code of Conduct applies within all community spaces, and also applies when
+an individual is officially representing the community in public spaces.
+Examples of representing our community include using an official e-mail address,
+posting via an official social media account, or acting as an appointed
+representative at an online or offline event.
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported to the community leaders responsible for enforcement in the Discussions tab.
+All complaints will be reviewed and investigated promptly and fairly.
+
+All community leaders are obligated to respect the privacy and security of the
+reporter of any incident.
+
+## Enforcement Guidelines
+
+Community leaders will follow these Community Impact Guidelines in determining
+the consequences for any action they deem in violation of this Code of Conduct:
+
+### 1. Correction
+
+**Community Impact**: Use of inappropriate language or other behavior deemed
+unprofessional or unwelcome in the community.
+
+**Consequence**: A private, written warning from community leaders, providing
+clarity around the nature of the violation and an explanation of why the
+behavior was inappropriate. A public apology may be requested.
+
+### 2. Warning
+
+**Community Impact**: A violation through a single incident or series
+of actions.
+
+**Consequence**: A warning with consequences for continued behavior. No
+interaction with the people involved, including unsolicited interaction with
+those enforcing the Code of Conduct, for a specified period of time. This
+includes avoiding interactions in community spaces as well as external channels
+like social media. Violating these terms may lead to a temporary or
+permanent ban.
+
+### 3. Temporary Ban
+
+**Community Impact**: A serious violation of community standards, including
+sustained inappropriate behavior.
+
+**Consequence**: A temporary ban from any sort of interaction or public
+communication with the community for a specified period of time. No public or
+private interaction with the people involved, including unsolicited interaction
+with those enforcing the Code of Conduct, is allowed during this period.
+Violating these terms may lead to a permanent ban.
+
+### 4. Permanent Ban
+
+**Community Impact**: Demonstrating a pattern of violation of community
+standards, including sustained inappropriate behavior,  harassment of an
+individual, or aggression toward or disparagement of classes of individuals.
+
+**Consequence**: A permanent ban from any sort of public interaction within
+the community.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage],
+version 2.0, available at
+https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
+
+Community Impact Guidelines were inspired by [Mozilla's code of conduct
+enforcement ladder](https://github.com/mozilla/diversity).
+
+[homepage]: https://www.contributor-covenant.org
+
+For answers to common questions about this code of conduct, see the FAQ at
+https://www.contributor-covenant.org/faq. Translations are available at
+https://www.contributor-covenant.org/translations.
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -0,0 +1,61 @@
+# Contributing to AutoGGUF
+
+First off, thanks for taking the time to contribute! 🎉👍
+
+### Reporting Bugs
+
+- Use the issue tracker to report bugs
+- Describe the bug in detail
+- Include screenshots if possible
+
+### Suggesting Enhancements
+
+- Use the issue tracker to suggest enhancements
+- Explain why this enhancement would be useful
+
+### Code Contributions
+
+You can find issues labeled with "good first issue" in the Issues tab as a starting point. Code refactors and optimizations are also appreciated, although if there's a vulnrability please report it privately in the Security tab. For feature PRs, please make a discussion first to make sure your feature can be added and continously maintained.
+
+1. Fork the repo
+2. Clone your fork (`git clone https://github.com/your-username/AutoGGUF.git && cd AutoGGUF`)
+3. Create your feature branch (`git checkout -b feature/AmazingFeature`)
+5. Install pre-commit: (`pip install pre-commit`)
+6. Set up the git hook scripts: (`pre-commit install`)
+7. Commit your changes (`git commit -m 'Add some AmazingFeature'`)
+8. Push to the branch (`git push origin feature/AmazingFeature`)
+9. Open a Pull Request on GitHub
+
+## Styleguides
+
+### Git Commit Messages
+
+- Use the present tense ("Add feature" not "Added feature")
+- Use the imperative mood ("Move cursor to..." not "Moves cursor to...")
+- Limit the first line to 72 characters or fewer
+
+### Commit Types:
+
+```
+feat: Added new feature
+fix: Fixed a bug
+docs: Updated documentation
+style: Code style changes (formatting, etc.)
+refactor: Code refactoring
+perf: Performance improvements
+test: Added or modified tests
+build: Changes to build system or external dependencies
+ci: Changes to CI configuration files and scripts
+chore: Other changes that don't modify src or test files
+```
+
+### Python Styleguide
+
+- Follow PEP 8
+- Please use Black to format your code first
+- Use meaningful variable names
+- Comment your code, but don't overdo it
+
+## Questions?
+
+Feel free to contact the project maintainers if you have any questions.
--- a/2
+++ b/2
@ -186,7 +186,7 @@
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

-   Copyright [yyyy] [name of copyright owner]
+   Copyright (c) 2024-2025 leafspark

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
--- a/README.md
+++ b/README.md
@ -1,26 +1,170 @@
+![AutoGGUF-banner](https://github.com/user-attachments/assets/0f74b104-0541-46a7-9ac8-4a3fcb74b896)
+
 # AutoGGUF - automated GGUF model quantizer

-This application provides a graphical user interface for quantizing GGUF models
-using the llama.cpp library. It allows users to download different versions of
-llama.cpp, manage multiple backends, and perform quantization tasks with various
-options.
+<!-- Project Status -->
+[![GitHub release](https://img.shields.io/github/release/leafspark/AutoGGUF.svg)](https://github.com/leafspark/AutoGGUF/releases)
+[![GitHub last commit](https://img.shields.io/github/last-commit/leafspark/AutoGGUF.svg)](https://github.com/leafspark/AutoGGUF/commits)
+[![CI/CD Status](https://img.shields.io/badge/CI%2FCD-passing-brightgreen)]()

-**Main features**:
-1. Download and manage llama.cpp backends
-2. Select and quantize GGUF models
-3. Configure quantization parameters
-4. Monitor system resources during quantization
+<!-- Project Info -->
+[![Powered by llama.cpp](https://img.shields.io/badge/Powered%20by-llama.cpp-green.svg)](https://github.com/ggerganov/llama.cpp)
+[![Platform Compatibility](https://img.shields.io/badge/platform-Linux%20%7C%20macOS%20%7C%20Windows-blue)]()
+[![GitHub license](https://img.shields.io/github/license/leafspark/AutoGGUF.svg)](https://github.com/leafspark/AutoGGUF/blob/main/LICENSE)
+![GitHub top language](https://img.shields.io/github/languages/top/leafspark/AutoGGUF.svg)

-**Usage**:
-1. Install dependencies, either using the `requirements.txt` file or `pip install PyQt6 requests psutil`.
-2. Run the `run.bat` script to start the application, or run the command `python src/main.py`.
+<!-- Repository Stats -->
+![GitHub stars](https://img.shields.io/github/stars/leafspark/AutoGGUF.svg)
+![GitHub forks](https://img.shields.io/github/forks/leafspark/AutoGGUF.svg)
+![GitHub release (latest by date)](https://img.shields.io/github/downloads/leafspark/AutoGGUF/latest/total?color=green)
+![GitHub repo size](https://img.shields.io/github/repo-size/leafspark/AutoGGUF.svg)
+<!-- ![Lines of Code](https://ghloc.vercel.app/leafspark/AutoGGUF?filter=.bat$,.py$,.sh$,.bat$) -->

-**Dependencies**:
- PyQt6
- requests
- psutil
+<!-- Contribution -->
+[![Issues](https://img.shields.io/github/issues/leafspark/AutoGGUF)](https://github.com/leafspark/AutoGGUF/issues)
+[![Code Style: Black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
+[![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg)](https://github.com/leafspark/AutoGGUF/pulls)

-**To be implemented:**
- Actual progress bar tracking
- Download safetensors from HF and convert to unquanted GGUF
- Specify multiple KV overrides
+The most comprehensive GUI tool for GGUF model quantization. Stop wrestling with command lines - quantize, merge, and optimize your models with just a few clicks.
+
+## Features
+
+- 📩 Update and manage llama.cpp backends
+- 🗃️ Download and quantize GGUF/safetensors models
+- 📐 Configure quantization parameters
+- 💻 Monitor system resources in real time during quantization
+- ⏳ Parallel quantization + imatrix generation
+- 🎉 LoRA conversion and merging
+- 📁 Preset saving and loading
+- 8️⃣ AutoFP8 quantization
+- 🪓 GGUF splitting and merging
+- 🌐 HTTP API for automation and monitoring
+
+## Why AutoGGUF?
+- Fast: Saves time on manual configuration
+- Simple: Clean UI, no terminal needed
+- Powerful: Handles models up to infinite size, only limited by your RAM 
+- Resource-aware: Optimized memory management and efficient UI library
+
+![AutoGGUF-v1 8 1-showcase-blue](https://github.com/user-attachments/assets/b136ccc3-5983-4266-9e66-00cebf3ca590)
+
+## Quick Start
+
+### Cross-platform (recommended)
+1. `git clone https://github.com/leafspark/AutoGGUF`
+2. `cd AutoGGUF`
+3. Install dependencies:
+   ```
+   pip install -r requirements.txt
+   ```
+4. Run the application:
+   ```
+   python src/main.py
+   ```
+   or use the `run.bat` script.
+
+macOS and Ubuntu builds are provided with GitHub Actions, you may download the binaries in the releases section.
+
+### Windows (for the impatient)
+Standard builds:
+1. Download the latest release
+2. Extract all files to a folder
+3. Run `AutoGGUF-x64.exe`
+4. Any necessary folders will be automatically created
+
+Setup builds:
+1. Download the setup variant of latest release
+2. Extract all files to a folder
+3. Run the setup program
+4. The .gguf extension will be registered with the program automatically
+5. Run the program from the Start Menu or desktop shortcuts
+
+After launching the program, you may access its local server at port 7001 (set `AUTOGGUF_SERVER` to "enabled" first).
+
+### Verifying Releases
+
+#### Linux/macOS:
+```bash
+gpg --import AutoGGUF-v1.5.0-prerel.asc
+gpg --verify AutoGGUF-v1.9.1-Windows-avx2.zip.sig AutoGGUF-v1.9.1-Windows-avx2.zip
+sha256sum -c AutoGGUF-v1.9.1.sha256
+```
+
+#### Windows (PowerShell):
+```powershell
+# Import the public key
+gpg --import AutoGGUF-v1.5.0-prerel.asc
+
+# Verify the signature
+gpg --verify AutoGGUF-v1.9.1-Windows-avx2.zip.sig AutoGGUF-v1.9.1-Windows-avx2.zip
+
+# Check SHA256
+$fileHash = (Get-FileHash -Algorithm SHA256 AutoGGUF-v1.9.1-Windows-avx2.zip).Hash.ToLower()
+$storedHash = (Get-Content AutoGGUF-v1.9.1.sha256 | Select-String AutoGGUF-v1.9.1-Windows-avx2.zip).Line.Split()[0]
+if ($fileHash -eq $storedHash) { "SHA256 Match" } else { "SHA256 Mismatch" }
+```
+
+Release keys are identical to ones used for commiting.
+
+## Building
+
+### Cross-platform
+```bash
+pip install -U pyinstaller
+./build.sh RELEASE | DEV
+cd build/<type>/dist/
+./AutoGGUF
+```
+
+### Windows
+```bash
+pip install -U pyinstaller
+build RELEASE | DEV
+```
+Find the executable in `build/<type>/dist/AutoGGUF-x64.exe`.
+
+You can also use Nuitka, which may result in a slower build but a faster output executable:
+```bash
+build_optimized RELEASE | DEV
+```
+
+## Localizations
+
+View the list of supported languages at [AutoGGUF/wiki/Installation#configuration](https://github.com/leafspark/AutoGGUF/wiki/Installation#configuration) (LLM translated, except for English).
+
+Languages will be updated as soon as possible after an update, or as a part of the update.
+
+To use a specific language, set the `AUTOGGUF_LANGUAGE` environment variable to one of the listed language codes (note: some languages may not be fully supported yet, in which the UI elements will fall back to English).
+
+## Issues
+
+- Some inconsistent logging and signal handling
+- Missing or duplicated translations (priority)
+- Buggy/incomplete API interfaces
+- Code review and formatting (priority)
+
+## Planned Features
+
+- [ ] Time estimation for quantization
+- [ ] Quantization file size estimate
+- [ ] Perplexity testing
+- [ ] bitsandbytes support
+
+#### Project Status
+
+AutoGGUF has now entered maintenance mode. It's considered stable and feature-complete for most use cases, so I'm not actively developing new features, but I’ll continue to publish occasional builds, update dependencies regularly, and fix critical bugs as needed. If you encounter issues or have suggestions, feel free to open an issue.
+
+## Support
+
+- SSL module cannot be found error: Install OpenSSL or run from source using `python src/main.py` with the `run.bat` script (`pip install requests`)
+- Check out the [Wiki](https://github.com/leafspark/AutoGGUF/wiki) for advanced usage and configuration
+
+## Contributing
+
+Fork the repo, make your changes, and ensure you have the latest commits when merging. Include a changelog of new features in your pull request description. Read `CONTRIBUTING.md` for more information.
+
+## Stargazers
+
+[![Star History Chart](https://api.star-history.com/svg?repos=leafspark/AutoGGUF&type=Date)](https://star-history.com/#leafspark/AutoGGUF&Date)
+
+`Last Updated: May 24, 2025`
--- a/SECURITY.md
+++ b/SECURITY.md
@ -0,0 +1,13 @@
+# Security Policy
+
+## Supported Versions
+
+| Version         | Supported          |
+|-----------------|--------------------|
+| stable (v2.0.x) | :white_check_mark: |
+
+Beta versions are not officially supported and may contain unknown security vulnerabilities. Use them at your own risk.
+
+## Reporting a Vulnerability
+
+Use the Issues tab, or for severe vulnerabilities, please contact the maintainers via email.
--- a/assets/autogguf.iss
+++ b/assets/autogguf.iss
@ -0,0 +1,81 @@
+#define MyAppName "AutoGGUF"
+#define MyAppVersion "v1.7.1"
+#define MyAppPublisher "leafspark"
+#define MyAppURL "https://github.com/leafspark/AutoGGUF"
+#define MyAppExeName "AutoGGUF-x64.exe"
+#define MyAppAssocName MyAppName + " File"
+#define MyAppAssocExt ".gguf"
+#define MyAppAssocKey StringChange(MyAppAssocName, " ", "") + MyAppAssocExt
+
+[Setup]
+; NOTE: The value of AppId uniquely identifies this application. Do not use the same AppId value in installers for other applications.
+; (To generate a new GUID, click Tools | Generate GUID inside the IDE.)
+AppId={{9753D5EB-05A8-489B-86A4-FCE6341FDE0E}
+AppName={#MyAppName}
+AppVersion={#MyAppVersion}
+;AppVerName={#MyAppName} {#MyAppVersion}
+AppPublisher={#MyAppPublisher}
+AppPublisherURL={#MyAppURL}
+AppSupportURL={#MyAppURL}
+AppUpdatesURL={#MyAppURL}
+DefaultDirName={autopf}\{#MyAppName}
+; "ArchitecturesAllowed=x64compatible" specifies that Setup cannot run
+; on anything but x64 and Windows 11 on Arm.
+ArchitecturesAllowed=x64compatible
+; "ArchitecturesInstallIn64BitMode=x64compatible" requests that the
+; install be done in "64-bit mode" on x64 or Windows 11 on Arm,
+; meaning it should use the native 64-bit Program Files directory and
+; the 64-bit view of the registry.
+ArchitecturesInstallIn64BitMode=x64compatible
+ChangesAssociations=yes
+DisableProgramGroupPage=yes
+LicenseFile=F:\autogguf-release\LICENSE.txt
+; Remove the following line to run in administrative install mode (install for all users.)
+PrivilegesRequired=lowest
+PrivilegesRequiredOverridesAllowed=dialog
+OutputDir=E:\Downloads\autogguf-inno
+OutputBaseFilename=autogguf
+Compression=lzma
+SolidCompression=yes
+WizardStyle=modern
+
+[Languages]
+Name: "english"; MessagesFile: "compiler:Default.isl"
+Name: "brazilianportuguese"; MessagesFile: "compiler:Languages\BrazilianPortuguese.isl"
+Name: "dutch"; MessagesFile: "compiler:Languages\Dutch.isl"
+Name: "finnish"; MessagesFile: "compiler:Languages\Finnish.isl"
+Name: "french"; MessagesFile: "compiler:Languages\French.isl"
+Name: "german"; MessagesFile: "compiler:Languages\German.isl"
+Name: "hungarian"; MessagesFile: "compiler:Languages\Hungarian.isl"
+Name: "italian"; MessagesFile: "compiler:Languages\Italian.isl"
+Name: "japanese"; MessagesFile: "compiler:Languages\Japanese.isl"
+Name: "korean"; MessagesFile: "compiler:Languages\Korean.isl"
+Name: "polish"; MessagesFile: "compiler:Languages\Polish.isl"
+Name: "portuguese"; MessagesFile: "compiler:Languages\Portuguese.isl"
+Name: "russian"; MessagesFile: "compiler:Languages\Russian.isl"
+Name: "spanish"; MessagesFile: "compiler:Languages\Spanish.isl"
+Name: "turkish"; MessagesFile: "compiler:Languages\Turkish.isl"
+Name: "ukrainian"; MessagesFile: "compiler:Languages\Ukrainian.isl"
+
+[Tasks]
+Name: "desktopicon"; Description: "{cm:CreateDesktopIcon}"; GroupDescription: "{cm:AdditionalIcons}"; Flags: unchecked
+
+[Files]
+Source: "F:\autogguf-release\AutoGGUF-v1.7.1-Windows-avx2-standard\{#MyAppExeName}"; DestDir: "{app}"; Flags: ignoreversion
+Source: "F:\autogguf-release\AutoGGUF-v1.7.1-Windows-avx2-standard\src\*"; DestDir: "{app}"; Flags: ignoreversion recursesubdirs createallsubdirs
+; NOTE: Don't use "Flags: ignoreversion" on any shared system files
+
+[Registry]
+Root: HKA; Subkey: "Software\Classes\{#MyAppAssocExt}\OpenWithProgids"; ValueType: string; ValueName: "{#MyAppAssocKey}"; ValueData: ""; Flags: uninsdeletevalue
+Root: HKA; Subkey: "Software\Classes\{#MyAppAssocKey}"; ValueType: string; ValueName: ""; ValueData: "{#MyAppAssocName}"; Flags: uninsdeletekey
+Root: HKA; Subkey: "Software\Classes\{#MyAppAssocKey}\DefaultIcon"; ValueType: string; ValueName: ""; ValueData: "{app}\{#MyAppExeName},0"
+Root: HKA; Subkey: "Software\Classes\{#MyAppAssocKey}\shell\open\command"; ValueType: string; ValueName: ""; ValueData: """{app}\{#MyAppExeName}"" ""%1"""
+Root: HKA; Subkey: "Software\Classes\Applications\{#MyAppExeName}\SupportedTypes"; ValueType: string; ValueName: ".myp"; ValueData: ""
+
+[Icons]
+Name: "{autoprograms}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"
+Name: "{autodesktop}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; Tasks: desktopicon
+
+[Run]
+Filename: "{app}\{#MyAppExeName}"; Description: "{cm:LaunchProgram,{#StringChange(MyAppName, '&', '&&')}}"; Flags: nowait postinstall skipifsilent
+
--- a/assets/default.css
+++ b/assets/default.css
@ -0,0 +1 @@
+/* Leave this file blank for default theme */
--- a/assets/favicon.ico
+++ b/assets/favicon.ico
--- a/assets/favicon_large.png
+++ b/assets/favicon_large.png
--- a/assets/gguf_favicon.ico
+++ b/assets/gguf_favicon.ico
--- a/assets/gguf_favicon_light.ico
+++ b/assets/gguf_favicon_light.ico
--- a/build.bat
+++ b/build.bat
@ -0,0 +1,24 @@
+@echo off
+
+if "%1"=="" (
+    echo Usage: build.bat [RELEASE^|DEV]
+    exit /b 1
+)
+
+if /I "%1"=="RELEASE" (
+    echo Building RELEASE version...
+    pyinstaller --windowed --onefile --name=AutoGGUF --icon=../../assets/favicon_large.png --add-data "../../assets;assets" --distpath=build\release\dist --workpath=build\release\build --specpath=build\release src\main.py
+) else if /I "%1"=="DEV" (
+    echo Building DEV version...
+    pyinstaller --onefile --name=AutoGGUF --icon=../../assets/favicon_large.png --add-data "../../assets;assets" --distpath=build\dev\dist --workpath=build\dev\build --specpath=build\dev src\main.py
+) else (
+    echo Invalid argument. Use RELEASE or DEV.
+    exit /b 1
+)
+
+if errorlevel 1 (
+    echo Build failed.
+    exit /b 1
+) else (
+    echo Build completed successfully.
+)
--- a/build.sh
+++ b/build.sh
@ -0,0 +1,24 @@
+#!/bin/bash
+
+if [ $# -eq 0 ]; then
+    echo "Usage: build.sh [RELEASE|DEV]"
+    exit 1
+fi
+
+if [ "${1,,}" = "release" ]; then
+    echo "Building RELEASE version..."
+    pyinstaller --windowed --onefile --name=AutoGGUF --icon=../../assets/favicon_large.png --add-data "../../assets:assets" --distpath=build/release/dist --workpath=build/release/build --specpath=build/release src/main.py
+elif [ "${1,,}" = "dev" ]; then
+    echo "Building DEV version..."
+    pyinstaller --onefile --name=AutoGGUF --icon=../../assets/favicon_large.png --add-data "../../assets:assets" --distpath=build/dev/dist --workpath=build/dev/build --specpath=build/dev src/main.py
+else
+    echo "Invalid argument. Use RELEASE or DEV."
+    exit 1
+fi
+
+if [ $? -ne 0 ]; then
+    echo "Build failed."
+    exit 1
+else
+    echo "Build completed successfully."
+fi
--- a/build_optimized.bat
+++ b/build_optimized.bat
@ -0,0 +1,26 @@
+@echo off
+
+if "%1"=="" (
+    echo Usage: build_optimized.bat [RELEASE^|DEV]
+    exit /b 1
+)
+
+set COMMON_FLAGS=--standalone --enable-plugin=pyside6 --include-data-dir=assets=assets
+
+if /I "%1"=="RELEASE" (
+    echo Building RELEASE version...
+    python -m nuitka %COMMON_FLAGS% --windows-console-mode=disable --output-dir=build\release src\main.py --lto=yes
+) else if /I "%1"=="DEV" (
+    echo Building DEV version...
+    python -m nuitka %COMMON_FLAGS% --output-dir=build\dev src\main.py
+) else (
+    echo Invalid argument. Use RELEASE or DEV.
+    exit /b 1
+)
+
+if errorlevel 1 (
+    echo Build failed.
+    exit /b 1
+) else (
+    echo Build completed successfully.
+)
--- a/build_optimized.sh
+++ b/build_optimized.sh
@ -0,0 +1,26 @@
+#!/bin/bash
+
+if [ -z "$1" ]; then
+    echo "Usage: build_fast.sh [RELEASE|DEV]"
+    exit 1
+fi
+
+COMMON_FLAGS="--standalone --enable-plugin=pyside6 --include-data-dir=assets=assets"
+
+if [ "$1" == "RELEASE" ]; then
+    echo "Building RELEASE version..."
+    python -m nuitka $COMMON_FLAGS --windows-console-mode=disable --output-dir=build/release src/main.py --lto=yes
+elif [ "$1" == "DEV" ]; then
+    echo "Building DEV version..."
+    python -m nuitka $COMMON_FLAGS --output-dir=build/dev src/main.py
+else
+    echo "Invalid argument. Use RELEASE or DEV."
+    exit 1
+fi
+
+if [ $? -ne 0 ]; then
+    echo "Build failed."
+    exit 1
+else
+    echo "Build completed successfully."
+fi
--- a/docs/AutoGGUF.py
+++ b/docs/AutoGGUF.py
@ -0,0 +1,319 @@
+import importlib
+import json
+import re
+import shutil
+from datetime import datetime
+from functools import partial
+from typing import Any, Dict, List, Tuple
+
+import requests
+from PySide6.QtCore import *
+from PySide6.QtGui import *
+from PySide6.QtWidgets import *
+from dotenv import load_dotenv
+
+import lora_conversion
+import presets
+import ui_update
+import utils
+from CustomTitleBar import CustomTitleBar
+from GPUMonitor import GPUMonitor
+from Localizations import *
+from Logger import Logger
+from QuantizationThread import QuantizationThread
+from TaskListItem import TaskListItem
+from error_handling import handle_error, show_error
+from imports_and_globals import (
+    ensure_directory,
+    open_file_safe,
+    resource_path,
+    show_about,
+)
+
+
+class CustomTitleBar(QWidget):
+    """
+    Custom title bar for the main window, providing drag-and-drop functionality
+    and minimize/close buttons.
+    """
+
+    def __init__(self, parent=None):
+        """
+        Initializes the custom title bar.
+
+        Args:
+            parent (QWidget, optional): The parent widget. Defaults to None.
+        """
+
+
+class AutoGGUF(QMainWindow):
+    """
+    Main application window for AutoGGUF, providing a user interface for
+    quantizing and converting large language models.
+    """
+
+    def __init__(self):
+        """
+        Initializes the main window, setting up the UI, logger, and other
+        necessary components.
+        """
+
+    def keyPressEvent(self, event):
+        """
+        Handles key press events for window resizing.
+
+        Args:
+            event (QKeyEvent): The key press event.
+        """
+
+    def resize_window(self, larger):
+        """
+        Resizes the window by a specified factor.
+
+        Args:
+            larger (bool): Whether to make the window larger or smaller.
+        """
+
+    def reset_size(self):
+        """Resets the window to its default size."""
+
+    def parse_resolution(self):
+        """
+        Parses the resolution from the AUTOGGUF_RESOLUTION environment variable.
+
+        Returns:
+            tuple: The width and height of the window.
+        """
+
+    def resizeEvent(self, event):
+        """
+        Handles resize events to maintain rounded corners.
+
+        Args:
+            event (QResizeEvent): The resize event.
+        """
+
+    def refresh_backends(self):
+        """Refreshes the list of available backends."""
+
+    def save_task_preset(self, task_item):
+        """
+        Saves the preset for a specific task.
+
+        Args:
+            task_item (TaskListItem): The task item to save the preset for.
+        """
+
+    def browse_export_lora_model(self):
+        """Opens a file dialog to browse for the export LORA model file."""
+
+    def browse_export_lora_output(self):
+        """Opens a file dialog to browse for the export LORA output file."""
+
+    def add_lora_adapter(self):
+        """Adds a LORA adapter to the export LORA list."""
+
+    def browse_base_model(self):
+        """Opens a file dialog to browse for the base model folder."""
+
+    def delete_lora_adapter_item(self, adapter_widget):
+        """
+        Deletes a LORA adapter item from the export LORA list.
+
+        Args:
+            adapter_widget (QWidget): The widget containing the adapter information.
+        """
+
+    def browse_hf_model_input(self):
+        """Opens a file dialog to browse for the HuggingFace model directory."""
+
+    def browse_hf_outfile(self):
+        """Opens a file dialog to browse for the HuggingFace to GGUF output file."""
+
+    def convert_hf_to_gguf(self):
+        """Converts a HuggingFace model to GGUF format."""
+
+    def export_lora(self):
+        """Exports a LORA from a GGML model."""
+
+    def restart_task(self, task_item):
+        """
+        Restarts a specific task.
+
+        Args:
+            task_item (TaskListItem): The task item to restart.
+        """
+
+    def lora_conversion_finished(self, thread, input_path, output_path):
+        """
+        Handles the completion of a LORA conversion task.
+
+        Args:
+            thread (QuantizationThread): The thread that handled the conversion.
+            input_path (str): The path to the input LORA file.
+            output_path (str): The path to the output GGML file.
+        """
+
+    def download_finished(self, extract_dir):
+        """
+        Handles the completion of a download, extracting files and updating the UI.
+
+        Args:
+            extract_dir (str): The directory where the downloaded files were extracted.
+        """
+
+    def extract_cuda_files(self, extract_dir, destination):
+        """
+        Extracts CUDA files from a downloaded archive.
+
+        Args:
+            extract_dir (str): The directory where the downloaded files were extracted.
+            destination (str): The destination directory for the CUDA files.
+        """
+
+    def download_error(self, error_message):
+        """
+        Handles download errors, displaying an error message and cleaning up.
+
+        Args:
+            error_message (str): The error message.
+        """
+
+    def show_task_context_menu(self, position):
+        """
+        Shows the context menu for a task item in the task list.
+
+        Args:
+            position (QPoint): The position of the context menu.
+        """
+
+    def show_task_properties(self, item):
+        """
+        Shows the properties dialog for a specific task.
+
+        Args:
+            item (QListWidgetItem): The task item.
+        """
+
+    def toggle_gpu_offload_auto(self, state):
+        """
+        Toggles the automatic GPU offload option.
+
+        Args:
+            state (Qt.CheckState): The state of the checkbox.
+        """
+
+    def cancel_task_by_item(self, item):
+        """
+        Cancels a task by its item in the task list.
+
+        Args:
+            item (QListWidgetItem): The task item.
+        """
+
+    def cancel_task(self, item):
+        """
+        Cancels a specific task.
+
+        Args:
+            item (QListWidgetItem): The task item.
+        """
+
+    def delete_task(self, item):
+        """
+        Deletes a specific task.
+
+        Args:
+            item (QListWidgetItem): The task item.
+        """
+
+    def create_label(self, text, tooltip):
+        """
+        Creates a QLabel with a tooltip.
+
+        Args:
+            text (str): The text for the label.
+            tooltip (str): The tooltip for the label.
+
+        Returns:
+            QLabel: The created label.
+        """
+
+    def load_models(self):
+        """Loads the available models and displays them in the model tree."""
+
+    def browse_models(self):
+        """Opens a file dialog to browse for the models directory."""
+
+    def browse_output(self):
+        """Opens a file dialog to browse for the output directory."""
+
+    def browse_logs(self):
+        """Opens a file dialog to browse for the logs directory."""
+
+    def browse_imatrix(self):
+        """Opens a file dialog to browse for the imatrix file."""
+
+    def validate_quantization_inputs(self):
+        """Validates the inputs for quantization."""
+
+    def add_kv_override(self, override_string=None):
+        """Adds a KV override entry to the list."""
+
+    def remove_kv_override(self, entry):
+        """Removes a KV override entry from the list."""
+
+    def quantize_model(self):
+        """Quantizes the selected model."""
+
+    def parse_progress(self, line, task_item):
+        """
+        Parses the progress from the output line and updates the task item.
+
+        Args:
+            line (str): The output line.
+            task_item (TaskListItem): The task item.
+        """
+
+    def task_finished(self, thread, task_item):
+        """
+        Handles the completion of a task.
+
+        Args:
+            thread (QuantizationThread): The thread that handled the task.
+            task_item (TaskListItem): The task item.
+        """
+
+    def show_task_details(self, item):
+        """
+        Shows the details of a specific task.
+
+        Args:
+            item (QListWidgetItem): The task item.
+        """
+
+    def browse_imatrix_datafile(self):
+        """Opens a file dialog to browse for the imatrix data file."""
+
+    def browse_imatrix_model(self):
+        """Opens a file dialog to browse for the imatrix model file."""
+
+    def browse_imatrix_output(self):
+        """Opens a file dialog to browse for the imatrix output file."""
+
+    def get_models_data(self):
+        """Retrieves data for all loaded models."""
+
+    def get_tasks_data(self):
+        """Retrieves data for all tasks in the task list."""
+
+    def generate_imatrix(self):
+        """Generates an imatrix file."""
+
+    def closeEvent(self, event: QCloseEvent):
+        """
+        Handles close events, prompting the user if there are running tasks.
+
+        Args:
+            event (QCloseEvent): The close event.
+        """
--- a/docs/DownloadThread.py
+++ b/docs/DownloadThread.py
@ -0,0 +1,44 @@
+import os
+import zipfile
+
+import requests
+from PySide6.QtCore import QThread, Signal
+
+
+class DownloadThread(QThread):
+    """
+    A QThread subclass for downloading and extracting zip files.
+
+    This thread downloads a file from a given URL, saves it to a specified path,
+    extracts its contents if it's a zip file, and then removes the original zip file.
+
+    Signals:
+        progress_signal (int): Emits the download progress as a percentage.
+        finished_signal (str): Emits the path of the extracted directory upon successful completion.
+        error_signal (str): Emits an error message if an exception occurs during the process.
+    """
+
+    def __init__(self, url: str, save_path: str) -> None:
+        """
+        Initialize the DownloadThread.
+
+        Args:
+            url (str): The URL of the file to download.
+            save_path (str): The local path where the file will be saved.
+        """
+
+    def run(self) -> None:
+        """
+        Execute the download, extraction, and cleanup process.
+
+        This method performs the following steps:
+        1. Downloads the file from the specified URL.
+        2. Saves the file to the specified path.
+        3. Extracts the contents if it's a zip file.
+        4. Removes the original zip file after extraction.
+        5. Emits signals for progress updates, completion, or errors.
+
+        Raises:
+            Exception: Any exception that occurs during the process is caught
+                       and emitted through the error_signal.
+        """
--- a/docs/KVOverrideEntry.py
+++ b/docs/KVOverrideEntry.py
@ -0,0 +1,87 @@
+class KVOverrideEntry(QWidget):
+    """
+    KVOverrideEntry is a PyQt6-based widget for creating and managing key-value override entries.
+
+    This class provides functionality for:
+    - Inputting keys and values with type specification
+    - Dynamic value substitution using predefined placeholders
+    - Validating inputs based on selected data types
+    - Generating formatted override strings
+
+    The widget includes input fields for keys and values, a type selector,
+    and a delete button. It supports various system-related and custom placeholders
+    for dynamic value generation.
+
+    Attributes:
+        deleted (pyqtSignal): Signal emitted when the entry is deleted.
+        key_input (QLineEdit): Input field for the key.
+        type_combo (QComboBox): Dropdown for selecting the value type.
+        value_input (QLineEdit): Input field for the value.
+
+    Supported dynamic placeholders:
+        {system.time.milliseconds}: Current time in milliseconds
+        {system.time.seconds}: Current time in seconds
+        {system.date.iso}: Current date in ISO format
+        {system.datetime.iso}: Current date and time in ISO format
+        {system.username}: Current system username
+        {system.hostname}: Current system hostname
+        {system.platform}: Current operating system platform
+        {system.python.version}: Python version
+        {system.date}: Current date in YYYY-MM-DD format
+        {model.name}: Model name (if provided)
+        {quant.type}: Quantization type (if provided)
+        {output.path}: Output path (if provided)
+    """
+
+    def __init__(self, parent=None):
+        """
+        Initialize the KVOverrideEntry widget.
+
+        This method sets up the widget layout, creates and configures input fields,
+        sets up validators, and connects signals to their respective slots.
+
+        Args:
+            parent (QWidget, optional): The parent widget. Defaults to None.
+        """
+
+    def delete_clicked(self):
+        """
+        Handle the delete button click event.
+
+        Emits the 'deleted' signal to notify the parent widget that this entry
+        should be removed.
+        """
+
+    def get_override_string(self, model_name=None, quant_type=None, output_path=None):
+        """
+        Generate a formatted override string with dynamic value substitution.
+
+        This method processes the input fields and replaces any placeholders
+        in the value with their corresponding dynamic values.
+
+        Args:
+            model_name (str, optional): Model name for substitution.
+            quant_type (str, optional): Quantization type for substitution.
+            output_path (str, optional): Output path for substitution.
+
+        Returns:
+            str: Formatted override string in the format "key=type:value".
+        """
+
+    def get_raw_override_string(self):
+        """
+        Generate a raw override string without dynamic substitution.
+
+        Returns:
+            str: Raw override string with placeholders intact, in the format "key=type:value".
+        """
+
+    def update_validator(self, type_):
+        """
+        Update the validator for the value input field based on the selected type.
+
+        This method ensures that the value input adheres to the chosen data type.
+
+        Args:
+            type_ (str): The selected data type ('int', 'float', or 'str').
+        """
--- a/docs/Logger.py
+++ b/docs/Logger.py
@ -0,0 +1,56 @@
+class Logger:
+    """
+    This module provides a custom logger class for logging messages to both the console and a rotating log file.
+
+    The log file will be created in the specified `log_dir` with a timestamp in the filename.
+    The file will rotate when it reaches 10MB, keeping a maximum of 5 backup files.
+    """
+
+    def __init__(self, name, log_dir):
+        """
+        Initializes the logger with a specified name and log directory.
+
+        Args:
+            name (str): The name of the logger.
+            log_dir (str): The directory where log files will be stored.
+        """
+
+    def debug(self, message):
+        """
+        Logs a message with the DEBUG level.
+
+        Args:
+            message (str): The message to log.
+        """
+
+    def info(self, message):
+        """
+        Logs a message with the INFO level.
+
+        Args:
+            message (str): The message to log.
+        """
+
+    def warning(self, message):
+        """
+        Logs a message with the WARNING level.
+
+        Args:
+            message (str): The message to log.
+        """
+
+    def error(self, message):
+        """
+        Logs a message with the ERROR level.
+
+        Args:
+            message (str): The message to log.
+        """
+
+    def critical(self, message):
+        """
+        Logs a message with the CRITICAL level.
+
+        Args:
+            message (str): The message to log.
+        """
--- a/docs/ModelInfoDialog.py
+++ b/docs/ModelInfoDialog.py
@ -0,0 +1,28 @@
+class ModelInfoDialog(QDialog):
+    """
+    A dialog window for displaying model information.
+
+    This class creates a dialog that shows detailed information about a machine learning model,
+    including its architecture, quantization type, and other relevant data.
+
+    Attributes:
+        None
+
+    Args:
+        model_info (dict): A dictionary containing the model's information.
+        parent (QWidget, optional): The parent widget of this dialog. Defaults to None.
+    """
+
+    def format_model_info(self, model_info) -> str:
+        """
+        Formats the model information into HTML for display.
+
+        This method takes the raw model information and converts it into a formatted HTML string,
+        which can be displayed in the dialog's QTextEdit widget.
+
+        Args:
+            model_info (dict): A dictionary containing the model's information.
+
+        Returns:
+            str: Formatted HTML string containing the model information.
+        """
--- a/docs/QuantizationThread.py
+++ b/docs/QuantizationThread.py
@ -0,0 +1,64 @@
+class QuantizationThread(QThread):
+    """
+    QuantizationThread is a PyQt6-based thread for managing model quantization processes.
+
+    This class provides functionality for:
+    - Running quantization commands as subprocesses
+    - Parsing and emitting model information during quantization
+    - Logging quantization output to a file
+    - Communicating process status, output, and errors to the main thread
+
+    The thread manages the execution of quantization commands, monitors their output,
+    and parses relevant model information. It uses Qt signals to communicate various
+    events and data back to the main application thread.
+
+    Attributes:
+        output_signal (pyqtSignal): Signal emitting subprocess output lines.
+        status_signal (pyqtSignal): Signal for updating quantization status.
+        finished_signal (pyqtSignal): Signal emitted when quantization is complete.
+        error_signal (pyqtSignal): Signal for reporting errors during quantization.
+        model_info_signal (pyqtSignal): Signal for sending parsed model information.
+
+    Methods:
+        run(): Executes the quantization process and manages its lifecycle.
+        parse_model_info(line: str): Parses output lines for model information.
+        terminate(): Safely terminates the running subprocess.
+    """
+
+    def __init__(self, command, cwd, log_file):
+        """
+        Initialize the QuantizationThread.
+
+        Args:
+            command (list): The command to execute for quantization.
+            cwd (str): The working directory for the subprocess.
+            log_file (str): Path to the file where output will be logged.
+        """
+
+    def run(self):
+        """
+        Execute the quantization process.
+
+        This method runs the subprocess, captures its output, logs it,
+        parses model information, and emits signals for status updates.
+        It handles process completion and any exceptions that occur.
+        """
+
+    def parse_model_info(self, line):
+        """
+        Parse a line of subprocess output for model information.
+
+        This method extracts various pieces of model information from
+        the output lines and stores them in the model_info dictionary.
+
+        Args:
+            line (str): A line of output from the quantization process.
+        """
+
+    def terminate(self):
+        """
+        Terminate the running subprocess.
+
+        This method safely terminates the quantization process if it's
+        still running, using SIGTERM first and SIGKILL if necessary.
+        """
--- a/docs/convert_lora_to_ggml.py
+++ b/docs/convert_lora_to_ggml.py
@ -0,0 +1,32 @@
+"""
+Convert PEFT LoRA adapters to GGML format.
+
+This script converts Hugging Face PEFT LoRA adapter files to the GGML format
+used by llama.cpp and related projects. It reads the adapter configuration
+from 'adapter_config.json' and the model weights from 'adapter_model.bin'
+or 'adapter_model.safetensors', then writes the converted model to
+'ggml-adapter-model.bin' in the same directory.
+
+Usage:
+    python lora_to_gguf.py <path> [arch]
+
+Arguments:
+    path: Directory containing the PEFT LoRA files
+    arch: Model architecture (default: llama)
+
+The script supports various model architectures and handles both PyTorch
+and safetensors formats for input weights. It performs necessary tensor
+transformations and writes the output in the GGML binary format.
+
+Requirements:
+    - Python 3.6+
+    - numpy
+    - torch
+    - safetensors (optional, for safetensors input)
+
+The script also requires the GGUF Python module, which should be in the
+'gguf-py/gguf' subdirectory relative to this script's location.
+
+Note: This script is designed for use with llama.cpp and related projects.
+Ensure compatibility with your target application when using the output.
+"""
--- a/docs/convert_lora_to_gguf.py
+++ b/docs/convert_lora_to_gguf.py
@ -0,0 +1,40 @@
+"""
+LoRA to GGUF Converter
+
+This script converts a Hugging Face PEFT LoRA adapter to a GGML-compatible file format.
+
+Key features:
+- Supports various output formats (f32, f16, bf16, q8_0, auto)
+- Handles big-endian and little-endian architectures
+- Provides options for lazy evaluation and verbose output
+- Combines base model information with LoRA adapters
+
+Classes:
+    PartialLoraTensor: Dataclass for storing partial LoRA tensor information.
+    LoraTorchTensor: Custom tensor class for LoRA operations and transformations.
+    LoraModel: Extends the base model class to incorporate LoRA-specific functionality.
+
+Functions:
+    get_base_tensor_name: Extracts the base tensor name from a LoRA tensor name.
+    pyinstaller_include: Placeholder for PyInstaller import handling.
+    parse_args: Parses command-line arguments for the script.
+
+Usage:
+    python lora_to_gguf.py --base <base_model_path> <lora_adapter_path> [options]
+
+Arguments:
+    --base: Path to the directory containing the base model file (required)
+    lora_path: Path to the directory containing the LoRA adapter file (required)
+    --outfile: Path to write the output file (optional)
+    --outtype: Output format (f32, f16, bf16, q8_0, auto; default: f16)
+    --bigendian: Flag to indicate big-endian machine execution
+    --no-lazy: Disable lazy evaluation (uses more RAM)
+    --verbose: Increase output verbosity
+    --dry-run: Perform a dry run without writing files
+
+The script processes LoRA adapters, combines them with base model information,
+and generates a GGML-compatible file for use in various applications.
+
+Note: This script requires specific dependencies like torch, gguf, and safetensors.
+Ensure all required libraries are installed before running the script.
+"""
--- a/docs/lora_conversion.py
+++ b/docs/lora_conversion.py
@ -0,0 +1,17 @@
+def convert_lora(self):
+    """Converts a LORA file to either GGML or GGUF format.
+
+    This function initiates the conversion process based on user input,
+    utilizing a separate thread for the actual conversion and providing
+    progress updates in the UI.
+
+    It validates input paths, constructs the conversion command, creates
+    a log file, manages the conversion thread, and handles errors.
+
+    Args:
+        self: The object instance.
+
+    Raises:
+        ValueError: If required input paths are missing.
+
+    """
--- a/plugins/example.py
+++ b/plugins/example.py
@ -0,0 +1,13 @@
+class ExamplePlugin:
+    def init(self, autogguf_instance):
+        # This gets called after the plugin is loaded
+        print("Plugin initialized")
+
+    def __data__(self):
+        return {
+            "name": "ExamplePlugin",
+            "description": "This is an example plugin.",
+            "compatible_versions": ["*"],
+            "author": "leafspark",
+            "version": "v1.0.0",
+        }
--- a/requirements.txt
+++ b/requirements.txt
@ -1,3 +1,14 @@
-PyQt6==6.5.2
-psutil==5.9.5
-requests==2.31.0
+PyYAML~=6.0.2
+psutil~=7.0.0
+pynvml~=12.0.0
+PySide6~=6.9.1
+safetensors~=0.5.3
+numpy<2.0.0
+torch~=2.7.0
+sentencepiece~=0.2.0
+setuptools~=80.7.1
+huggingface-hub~=0.33.1
+transformers~=4.51.3
+fastapi~=0.115.12
+uvicorn~=0.34.2
+certifi~=2025.4.26
--- a/run.bat
+++ b/run.bat
@ -1,2 +1,5 @@
@echo off
+set PYTHONIOENCODING=utf-8
+set AUTOGGUF_LANGUAGE=en-US
+set AUTOGGUF_CHECK_BACKEND=disabled
 python src/main.py
--- a/run.sh
+++ b/run.sh
@ -0,0 +1,31 @@
+#!/bin/sh
+
+# Check if Python is installed
+if ! command -v python3 >/dev/null 2>&1; then
+    echo "Error: Python 3 is not installed or not in the PATH."
+    echo "Please install Python 3 and try again."
+    exit 1
+fi
+
+# Set environment variables
+export PYTHONIOENCODING=utf-8
+export AUTOGGUF_LANGUAGE=en-US
+
+# Try to run main.py in the current directory
+if [ -f "main.py" ]; then
+    echo "Running main.py in the current directory..."
+    python3 main.py
+    exit 0
+fi
+
+# If main.py doesn't exist in the current directory, try src/main.py
+if [ -f "src/main.py" ]; then
+    echo "Running src/main.py..."
+    python3 src/main.py
+    exit 0
+fi
+
+# If neither file is found, display an error message
+echo "Error: Neither main.py nor src/main.py found."
+echo "Please make sure the script is in the correct directory."
+exit 1
--- a/setup.py
+++ b/setup.py
@ -0,0 +1,17 @@
+from setuptools import setup
+
+with open("requirements.txt") as f:
+    required = f.read().splitlines()
+
+setup(
+    name="AutoGGUF",
+    version="v2.0.1",
+    packages=[""],
+    url="https://github.com/leafspark/AutoGGUF",
+    license="apache-2.0",
+    author="leafspark",
+    author_email="leafspark@proton.me",
+    description="automatically quant GGUF models",
+    install_requires=required,
+    entry_points={"console_scripts": ["autogguf-gui = main:main"]},
+)
--- a/src/AutoGGUF.py
+++ b/src/AutoGGUF.py
--- a/src/CustomTitleBar.py
+++ b/src/CustomTitleBar.py
@ -0,0 +1,112 @@
+from PySide6.QtCore import QPoint, Qt
+from PySide6.QtWidgets import QHBoxLayout, QLabel, QMenuBar, QPushButton, QWidget
+
+
+class CustomTitleBar(QWidget):
+    def __init__(self, parent=None) -> None:
+        super().__init__(parent)
+        self.parent = parent
+        layout = QHBoxLayout(self)
+        layout.setContentsMargins(10, 5, 10, 5)
+
+        # Add the favicon
+        # TODO: uncomment this
+        # self.icon_label = QLabel()
+        # self.icon_label.setPixmap(QPixmap(resource_path("assets/favicon.ico")))
+        # layout.addWidget(self.icon_label)
+
+        # Add app title (bolded)
+        self.title = QLabel("<b>AutoGGUF</b>")  # Use HTML tags for bolding
+        layout.addWidget(self.title)
+
+        # Add menubar here
+        self.menubar = QMenuBar()
+        layout.addWidget(self.menubar)  # Add menubar to the layout
+
+        layout.addStretch(1)  # This pushes the buttons to the right
+
+        # Add minimize and close buttons
+        self.minimize_button = QPushButton("—")
+        self.close_button = QPushButton("✕")
+
+        for button in (self.minimize_button, self.close_button):
+            button.setFixedSize(30, 30)
+            button.setStyleSheet(
+                """
+                QPushButton {
+                    border: none;
+                    background-color: transparent;
+                }
+                QPushButton:hover {
+                    background-color: rgba(255, 255, 255, 0.1);
+                }
+            """
+            )
+
+        # Enable mouse tracking for smoother movement
+        self.setMouseTracking(True)
+
+        # Add maximize button
+        self.maximize_button = QPushButton("□")
+        self.maximize_button.setFixedSize(30, 30)
+        self.maximize_button.setStyleSheet(
+            """
+            QPushButton {
+                border: none;
+                background-color: transparent;
+                padding: 2px;
+                font-size: 15px;
+            }
+            QPushButton:hover {
+                background-color: rgba(255, 255, 255, 0.1);
+            }
+        """
+        )
+        self.maximize_button.clicked.connect(self.toggle_maximize)
+
+        layout.addWidget(self.minimize_button)
+        layout.addWidget(self.maximize_button)
+        layout.addWidget(self.close_button)
+
+        self.minimize_button.clicked.connect(self.parent.showMinimized)
+        self.close_button.clicked.connect(self.parent.close)
+
+        self.start = QPoint(0, 0)
+        self.pressing = False
+        self.isMaximized = False  # Flag to track maximization state
+        self.normal_size = None  # Store the normal window size
+
+    def mousePressEvent(self, event) -> None:
+        if event.button() == Qt.LeftButton:
+            self.start = event.globalPos() - self.parent.frameGeometry().topLeft()
+            self.pressing = True
+
+    def mouseMoveEvent(self, event) -> None:
+        if self.pressing:
+            new_pos = event.globalPos() - self.start
+            screen = self.parent.screen()
+            screen_geo = screen.availableGeometry()
+
+            # Check if the new position would put the titlebar below the taskbar
+            if (
+                new_pos.y() + self.parent.height() > screen_geo.bottom()
+            ):  # Use screen_geo.bottom()
+                new_pos.setY(screen_geo.bottom() - self.parent.height())
+
+            self.parent.move(new_pos)
+
+    def mouseReleaseEvent(self, event) -> None:
+        self.pressing = False
+
+    def toggle_maximize(self) -> None:
+        if self.isMaximized:
+            self.parent.showNormal()
+            if self.normal_size:
+                self.parent.resize(self.normal_size)
+            self.maximize_button.setText("□")  # Change back to maximize symbol
+            self.isMaximized = False
+        else:
+            self.normal_size = self.parent.size()  # Store the current size
+            self.parent.showMaximized()
+            self.maximize_button.setText("❐")  # Change to restore symbol
+            self.isMaximized = True
--- a/src/DownloadThread.py
+++ b/src/DownloadThread.py
@ -1,38 +1,44 @@
-from PyQt6.QtWidgets import *
-from PyQt6.QtCore import *
-from PyQt6.QtGui import *
 import os
-import sys
-import psutil
-import subprocess
-import time
-import signal
-import json
-import platform
-import requests
+import urllib.request
+import urllib.error
 import zipfile
-from datetime import datetime
+import ssl
+import certifi
+from PySide6.QtCore import QThread, Signal
+

 class DownloadThread(QThread):
-    progress_signal = pyqtSignal(int)
-    finished_signal = pyqtSignal(str)
-    error_signal = pyqtSignal(str)
+    progress_signal = Signal(int)
+    finished_signal = Signal(str)
+    error_signal = Signal(str)

-    def __init__(self, url, save_path):
+    def __init__(self, url, save_path) -> None:
        super().__init__()
        self.url = url
        self.save_path = save_path

-    def run(self):
+    def run(self) -> None:
        try:
-            response = requests.get(self.url, stream=True)
-            response.raise_for_status()
-            total_size = int(response.headers.get('content-length', 0))
+            req = urllib.request.Request(self.url)
+
+            # Create SSL context with certifi certificates
+            ssl_context = ssl.create_default_context(cafile=certifi.where())
+
+            with urllib.request.urlopen(req, context=ssl_context) as response:
+                if response.status != 200:
+                    raise urllib.error.HTTPError(
+                        self.url, response.status, "HTTP Error", response.headers, None
+                    )
+
+                total_size = int(response.headers.get("Content-Length", 0))
                block_size = 8192
                downloaded = 0

-            with open(self.save_path, 'wb') as file:
-                for data in response.iter_content(block_size):
+                with open(self.save_path, "wb") as file:
+                    while True:
+                        data = response.read(block_size)
+                        if not data:
+                            break
                        size = file.write(data)
                        downloaded += size
                        if total_size:
@ -41,7 +47,7 @@ def run(self):

            # Extract the downloaded zip file
            extract_dir = os.path.splitext(self.save_path)[0]
-            with zipfile.ZipFile(self.save_path, 'r') as zip_ref:
+            with zipfile.ZipFile(self.save_path, "r") as zip_ref:
                zip_ref.extractall(extract_dir)

            # Remove the zip file after extraction
@ -50,4 +56,5 @@ def run(self):
            self.finished_signal.emit(extract_dir)
        except Exception as e:
            self.error_signal.emit(str(e))
-
+            if os.path.exists(self.save_path):
+                os.remove(self.save_path)
--- a/src/GPUMonitor.py
+++ b/src/GPUMonitor.py
@ -0,0 +1,240 @@
+import pynvml
+from PySide6.QtCore import QTimer
+from PySide6.QtGui import QPainter, QPen, QColor
+from PySide6.QtWidgets import (
+    QWidget,
+    QHBoxLayout,
+    QVBoxLayout,
+    QProgressBar,
+    QLabel,
+    QDialog,
+    QTabWidget,
+    QGraphicsView,
+    QGraphicsScene,
+    QGraphicsLineItem,
+    QComboBox,
+)
+
+from Localizations import (
+    GPU_USAGE_FORMAT,
+    GPU_DETAILS,
+    GPU_USAGE_OVER_TIME,
+    VRAM_USAGE_OVER_TIME,
+    NO_GPU_DETECTED,
+    AMD_GPU_NOT_SUPPORTED,
+    CPU_USAGE_OVER_TIME,
+    RAM_USAGE_OVER_TIME,
+)
+
+from ui_update import animate_bar
+
+
+class SimpleGraph(QGraphicsView):
+    def __init__(self, title, parent=None) -> None:
+        super().__init__(parent)
+        self.setScene(QGraphicsScene(self))
+        self.setRenderHint(QPainter.RenderHint.Antialiasing)
+
+        self.setMinimumHeight(200)
+        self.title = title
+        self.data = []
+
+    def update_data(self, data) -> None:
+        self.data = data
+        self.scene().clear()
+        if not self.data:
+            return
+
+        width = self.width() - 40
+        height = self.height() - 40
+        max_value = 100  # Fixed to 100% for GPU usage
+
+        # Draw axes
+        self.scene().addLine(20, height + 20, width + 20, height + 20)
+        self.scene().addLine(20, 20, 20, height + 20)
+
+        # Draw title
+        self.scene().addText(self.title).setPos(width // 2, 0)
+
+        # Draw graph
+        path = QPen(QColor(0, 120, 212), 2)  # Blue color, 2px width
+        for i in range(1, len(self.data)):
+            x1 = 20 + (i - 1) * width / (len(self.data) - 1)
+            y1 = 20 + height - (self.data[i - 1] * height / max_value)
+            x2 = 20 + i * width / (len(self.data) - 1)
+            y2 = 20 + height - (self.data[i] * height / max_value)
+            line = QGraphicsLineItem(x1, y1, x2, y2)
+            line.setPen(path)
+            self.scene().addItem(line)
+
+    def resizeEvent(self, event) -> None:
+        super().resizeEvent(event)
+        self.update_data(self.data)
+
+
+class GPUMonitor(QWidget):
+    def __init__(self, parent=None) -> None:
+        super().__init__(parent)
+        self.setMinimumHeight(30)
+        self.setMaximumHeight(30)
+
+        layout = QHBoxLayout(self)
+        layout.setContentsMargins(0, 0, 0, 0)
+
+        self.gpu_selector = QComboBox()
+        self.gpu_selector.setVisible(False)
+        self.gpu_selector.currentIndexChanged.connect(self.change_gpu)
+        layout.addWidget(self.gpu_selector)
+
+        self.gpu_bar = QProgressBar()
+        self.gpu_bar.setTextVisible(False)
+        layout.addWidget(self.gpu_bar)
+
+        self.gpu_label = QLabel()
+        layout.addWidget(self.gpu_label)
+
+        self.timer = QTimer(self)
+        self.timer.timeout.connect(self.update_gpu_info)
+        self.timer.start(500)  # Update every 0.5 seconds
+
+        self.gpu_data = []
+        self.vram_data = []
+
+        self.handles = []
+        self.current_gpu = 0
+
+        try:
+            pynvml.nvmlInit()
+            device_count = pynvml.nvmlDeviceGetCount()
+            for i in range(device_count):
+                handle = pynvml.nvmlDeviceGetHandleByIndex(i)
+                name = pynvml.nvmlDeviceGetName(handle)
+                # Handle both string and bytes cases
+                if isinstance(name, bytes):
+                    name = name.decode("utf-8")
+                self.handles.append(handle)
+                self.gpu_selector.addItem(f"NVIDIA GPU {i}: {name}")
+
+            if device_count > 1:
+                self.gpu_selector.setVisible(True)
+
+            if device_count == 0:
+                self.check_for_amd_gpu()
+
+        except pynvml.NVMLError:
+            self.check_for_amd_gpu()
+
+        if not self.handles:
+            self.gpu_label.setText(NO_GPU_DETECTED)
+
+    def check_for_amd_gpu(self) -> None:
+        # This is a placeholder. Implementing AMD GPU detection would require
+        # platform-specific methods or additional libraries.
+        self.gpu_label.setText(AMD_GPU_NOT_SUPPORTED)
+
+    def change_gpu(self, index) -> None:
+        self.current_gpu = index
+        self.gpu_data.clear()
+        self.vram_data.clear()
+
+    def update_gpu_info(self) -> None:
+        if self.handles:
+            try:
+                handle = self.handles[self.current_gpu]
+                utilization = pynvml.nvmlDeviceGetUtilizationRates(handle)
+                memory = pynvml.nvmlDeviceGetMemoryInfo(handle)
+
+                gpu_usage = utilization.gpu
+                vram_usage = (memory.used / memory.total) * 100
+
+                animate_bar(self, self.gpu_bar, int(vram_usage))
+                self.gpu_label.setText(
+                    GPU_USAGE_FORMAT.format(
+                        gpu_usage,
+                        vram_usage,
+                        memory.used // 1024 // 1024,
+                        memory.total // 1024 // 1024,
+                    )
+                )
+
+                self.gpu_data.append(gpu_usage)
+                self.vram_data.append(vram_usage)
+
+                if len(self.gpu_data) > 60:
+                    self.gpu_data.pop(0)
+                    self.vram_data.pop(0)
+            except pynvml.NVMLError:
+                self.gpu_bar.setValue(0)
+                self.gpu_label.setText(GPU_USAGE_FORMAT.format(0, 0, 0, 0))
+
+    def mouseDoubleClickEvent(self, event) -> None:
+        if self.handles:
+            self.show_detailed_stats()
+
+    def show_ram_graph(self, event) -> None:
+        self.show_detailed_stats_std(RAM_USAGE_OVER_TIME, self.ram_data)
+
+    def show_cpu_graph(self, event) -> None:
+        self.show_detailed_stats_std(CPU_USAGE_OVER_TIME, self.cpu_data)
+
+    def show_detailed_stats_std(self, title, data) -> None:
+        dialog = QDialog(self)
+        dialog.setWindowTitle(title)
+        dialog.setMinimumSize(800, 600)
+
+        layout = QVBoxLayout(dialog)
+
+        graph = SimpleGraph(title)
+        layout.addWidget(graph)
+
+        def update_graph_data() -> None:
+            graph.update_data(data)
+
+        timer = QTimer(dialog)
+        timer.timeout.connect(update_graph_data)
+        timer.start(500)  # Update every 0.5 seconds
+
+        dialog.exec()
+
+    def show_detailed_stats(self) -> None:
+        dialog = QDialog(self)
+        dialog.setWindowTitle(GPU_DETAILS)
+        dialog.setMinimumSize(800, 600)
+
+        layout = QVBoxLayout(dialog)
+
+        if len(self.handles) > 1:
+            gpu_selector = QComboBox()
+            gpu_selector.addItems(
+                [
+                    self.gpu_selector.itemText(i)
+                    for i in range(self.gpu_selector.count())
+                ]
+            )
+            gpu_selector.setCurrentIndex(self.current_gpu)
+            gpu_selector.currentIndexChanged.connect(self.change_gpu)
+            layout.addWidget(gpu_selector)
+
+        tab_widget = QTabWidget()
+        layout.addWidget(tab_widget)
+
+        gpu_graph = SimpleGraph(GPU_USAGE_OVER_TIME)
+        vram_graph = SimpleGraph(VRAM_USAGE_OVER_TIME)
+
+        def update_graph_data() -> None:
+            gpu_graph.update_data(self.gpu_data)
+            vram_graph.update_data(self.vram_data)
+
+        timer = QTimer(dialog)
+        timer.timeout.connect(update_graph_data)
+        timer.start(500)  # Update every 0.5 seconds
+
+        tab_widget.addTab(gpu_graph, GPU_USAGE_OVER_TIME)
+        tab_widget.addTab(vram_graph, VRAM_USAGE_OVER_TIME)
+
+        dialog.exec()
+
+    def closeEvent(self, event) -> None:
+        if self.handles:
+            pynvml.nvmlShutdown()
+        super().closeEvent(event)
--- a/src/KVOverrideEntry.py
+++ b/src/KVOverrideEntry.py
@ -0,0 +1,122 @@
+import locale
+import os
+import platform
+import shutil
+import socket
+import time
+from datetime import datetime
+
+import psutil
+from PySide6.QtCore import QRegularExpression, Signal
+from PySide6.QtGui import QDoubleValidator, QIntValidator, QRegularExpressionValidator
+from PySide6.QtWidgets import QComboBox, QHBoxLayout, QLineEdit, QPushButton, QWidget
+
+
+class KVOverrideEntry(QWidget):
+    deleted = Signal(QWidget)
+
+    def __init__(self, parent=None) -> None:
+        super().__init__(parent)
+        layout = QHBoxLayout(self)
+        layout.setContentsMargins(0, 0, 0, 0)
+
+        self.key_input = QLineEdit()
+        self.key_input.setPlaceholderText("Key")
+
+        # Set validator for key input (letters and dots only)
+        key_validator = QRegularExpressionValidator(QRegularExpression(r"[A-Za-z.]+"))
+        self.key_input.setValidator(key_validator)
+        layout.addWidget(self.key_input)
+
+        self.type_combo = QComboBox()
+        self.type_combo.addItems(["int", "str", "float", "u32", "i32"])
+        layout.addWidget(self.type_combo)
+
+        self.value_input = QLineEdit()
+        self.value_input.setPlaceholderText("Value")
+        layout.addWidget(self.value_input)
+
+        delete_button = QPushButton("✕")
+        delete_button.setFixedSize(30, 30)
+        delete_button.clicked.connect(self.delete_clicked)
+        layout.addWidget(delete_button)
+
+        # Connect type change to validator update
+        self.type_combo.currentTextChanged.connect(self.update_validator)
+
+        # Initialize validator
+        self.update_validator(self.type_combo.currentText())
+
+    def delete_clicked(self) -> None:
+        self.deleted.emit(self)
+
+    def get_override_string(
+        self,
+        model_name=None,
+        quant_type=None,
+        output_path=None,
+        quantization_parameters=None,
+    ) -> str:  # Add arguments
+        key = self.key_input.text()
+        type_ = self.type_combo.currentText()
+        value = self.value_input.text()
+
+        dynamic_params = {
+            "{system.time.milliseconds}": lambda: str(int(time.time() * 1000)),
+            "{system.time.seconds}": lambda: str(int(time.time())),
+            "{system.date.iso}": lambda: datetime.now().strftime("%Y-%m-%d"),
+            "{system.datetime.iso}": lambda: datetime.now().isoformat(),
+            "{system.username}": lambda: os.getlogin(),
+            "{system.hostname}": lambda: socket.gethostname(),
+            "{system.platform}": lambda: platform.system(),
+            "{system.python.version}": lambda: platform.python_version(),
+            "{system.timezone}": lambda: time.tzname[time.daylight],
+            "{system.cpus}": lambda: str(os.cpu_count()),
+            "{system.memory.total}": lambda: str(psutil.virtual_memory().total),
+            "{system.memory.free}": lambda: str(psutil.virtual_memory().free),
+            "{system.filesystem.used}": lambda: str(shutil.disk_usage("/").used),
+            "{system.kernel.version}": lambda: platform.release(),
+            "{system.locale}": lambda: locale.getdefaultlocale()[0],
+            "{process.nice}": lambda: str(os.nice(0)),
+            "{model.name}": lambda: (
+                model_name if model_name is not None else "Unknown Model"
+            ),
+            "{quant.type}": lambda: (
+                quant_type if quant_type is not None else "Unknown Quant"
+            ),
+            "{output.path}": lambda: (
+                output_path if output_path is not None else "Unknown Output Path"
+            ),
+            "{quant.kv}": lambda: (
+                quantization_parameters[0]
+                if quantization_parameters is not None
+                else False
+            ),
+            "{quant.requantized}": lambda: (
+                quantization_parameters[1]
+                if quantization_parameters is not None
+                else False
+            ),
+            "{quant.leave_output_tensor}": lambda: (
+                quantization_parameters[2]
+                if quantization_parameters is not None
+                else False
+            ),
+        }
+
+        for param, func in dynamic_params.items():
+            value = value.replace(param, func())
+
+        return f"{key}={type_}:{value}"
+
+    def get_raw_override_string(self) -> str:
+        # Return the raw override string with placeholders intact
+        return f"{self.key_input.text()}={self.type_combo.currentText()}:{self.value_input.text()}"
+
+    def update_validator(self, type_) -> None:
+        if type_ == "int":
+            self.value_input.setValidator(QIntValidator())
+        elif type_ == "float":
+            self.value_input.setValidator(QDoubleValidator())
+        else:  # str
+            self.value_input.setValidator(None)
--- a/src/Localizations.py
+++ b/src/Localizations.py
--- a/src/Logger.py
+++ b/src/Logger.py
@ -0,0 +1,50 @@
+import logging
+from logging.handlers import RotatingFileHandler
+import os
+from datetime import datetime
+
+
+class Logger:
+    def __init__(self, name, log_dir) -> None:
+        self.logger = logging.getLogger(name)
+        self.logger.setLevel(logging.DEBUG)
+
+        # Create logs directory if it doesn't exist
+        os.makedirs(log_dir, exist_ok=True)
+
+        # Console handler
+        console_handler = logging.StreamHandler()
+        console_handler.setLevel(logging.INFO)
+        console_format = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
+        console_handler.setFormatter(console_format)
+
+        # File handler
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        log_file = os.path.join(log_dir, f"latest_{timestamp}.log")
+        file_handler = RotatingFileHandler(
+            log_file, maxBytes=10 * 1024 * 1024, backupCount=5, encoding="utf-8"
+        )
+        file_handler.setLevel(logging.DEBUG)
+        file_format = logging.Formatter(
+            "%(asctime)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s"
+        )
+        file_handler.setFormatter(file_format)
+
+        # Add handlers to logger
+        self.logger.addHandler(console_handler)
+        self.logger.addHandler(file_handler)
+
+    def debug(self, message) -> None:
+        self.logger.debug(message)
+
+    def info(self, message) -> None:
+        self.logger.info(message)
+
+    def warning(self, message) -> None:
+        self.logger.warning(message)
+
+    def error(self, message) -> None:
+        self.logger.error(message)
+
+    def critical(self, message) -> None:
+        self.logger.critical(message)
--- a/src/ModelInfoDialog.py
+++ b/src/ModelInfoDialog.py
@ -1,20 +1,8 @@
-from PyQt6.QtWidgets import *
-from PyQt6.QtCore import *
-from PyQt6.QtGui import *
-import os
-import sys
-import psutil
-import subprocess
-import time
-import signal
-import json
-import platform
-import requests
-import zipfile
-from datetime import datetime
+from PySide6.QtWidgets import QVBoxLayout, QTextEdit, QDialog, QPushButton
+

 class ModelInfoDialog(QDialog):
-    def __init__(self, model_info, parent=None):
+    def __init__(self, model_info, parent=None) -> None:
        super().__init__(parent)
        self.setWindowTitle("Model Information")
        self.setGeometry(200, 200, 600, 400)
@ -33,16 +21,28 @@ def __init__(self, model_info, parent=None):

        self.setLayout(layout)

-    def format_model_info(self, model_info):
+    def format_model_info(self, model_info) -> str:
        html = "<h2>Model Information</h2>"
        html += f"<p><b>Architecture:</b> {model_info.get('architecture', 'N/A')}</p>"
-        html += f"<p><b>Quantization Type:</b> {model_info.get('quantization_type', 'N/A')}</p>"
-        html += f"<p><b>KV Pairs:</b> {model_info.get('kv_pairs', 'N/A')}</p>"
+
+        # Format quantization types
+        quant_types = model_info.get("quantization_type", [])
+        if quant_types:
+            # Clean up the format: remove "- type " prefix and join with " | "
+            formatted_types = []
+            for qtype in quant_types:
+                # Remove "- type " prefix if present
+                clean_type = qtype.replace("- type ", "").strip()
+                formatted_types.append(clean_type)
+            quant_display = " | ".join(formatted_types)
+        else:
+            quant_display = "N/A"
+
+        html += f"<p><b>Quantization Type:</b> {quant_display}</p>"
        html += f"<p><b>Tensors:</b> {model_info.get('tensors', 'N/A')}</p>"

        html += "<h3>Key-Value Pairs:</h3>"
-        for key, value in model_info.get('kv_data', {}).items():
+        for key, value in model_info.get("kv_data", {}).items():
            html += f"<p><b>{key}:</b> {value}</p>"

        return html
-        
--- a/src/Plugins.py
+++ b/src/Plugins.py
@ -0,0 +1,81 @@
+import importlib
+import os
+from typing import Any, Dict
+from Localizations import *
+
+
+class Plugins:
+
+    def load_plugins(self) -> Dict[str, Dict[str, Any]]:
+        plugins = {}
+        plugin_dir = "plugins"
+
+        if not os.path.exists(plugin_dir):
+            self.logger.info(PLUGINS_DIR_NOT_EXIST.format(plugin_dir))
+            return plugins
+
+        if not os.path.isdir(plugin_dir):
+            self.logger.warning(PLUGINS_DIR_NOT_DIRECTORY.format(plugin_dir))
+            return plugins
+
+        for file in os.listdir(plugin_dir):
+            if file.endswith(".py") and not file.endswith(".disabled.py"):
+                name = file[:-3]
+                path = os.path.join(plugin_dir, file)
+
+                try:
+                    spec = importlib.util.spec_from_file_location(name, path)
+                    module = importlib.util.module_from_spec(spec)
+                    spec.loader.exec_module(module)
+
+                    for item_name in dir(module):
+                        item = getattr(module, item_name)
+                        if isinstance(item, type) and hasattr(item, "__data__"):
+                            plugin_instance = item()
+                            plugin_data = plugin_instance.__data__()
+
+                            compatible_versions = plugin_data.get(
+                                "compatible_versions", []
+                            )
+                            if (
+                                "*" in compatible_versions
+                                or AUTOGGUF_VERSION in compatible_versions
+                            ):
+                                plugins[name] = {
+                                    "instance": plugin_instance,
+                                    "data": plugin_data,
+                                }
+                                self.logger.info(
+                                    PLUGIN_LOADED.format(
+                                        plugin_data["name"], plugin_data["version"]
+                                    )
+                                )
+                            else:
+                                self.logger.warning(
+                                    PLUGIN_INCOMPATIBLE.format(
+                                        plugin_data["name"],
+                                        plugin_data["version"],
+                                        AUTOGGUF_VERSION,
+                                        ", ".join(compatible_versions),
+                                    )
+                                )
+                            break
+                except Exception as e:
+                    self.logger.error(PLUGIN_LOAD_FAILED.format(name, str(e)))
+
+        return plugins
+
+    def apply_plugins(self) -> None:
+        if not self.plugins:
+            self.logger.info(NO_PLUGINS_LOADED)
+            return
+
+        for plugin_name, plugin_info in self.plugins.items():
+            plugin_instance = plugin_info["instance"]
+            for attr_name in dir(plugin_instance):
+                if not attr_name.startswith("__") and attr_name != "init":
+                    attr_value = getattr(plugin_instance, attr_name)
+                    setattr(self, attr_name, attr_value)
+
+            if hasattr(plugin_instance, "init") and callable(plugin_instance.init):
+                plugin_instance.init(self)
--- a/src/QuantizationThread.py
+++ b/src/QuantizationThread.py
@ -1,26 +1,23 @@
-from PyQt6.QtWidgets import *
-from PyQt6.QtCore import *
-from PyQt6.QtGui import *
 import os
-import sys
-import psutil
-import subprocess
-import time
+import re
 import signal
-import json
-import platform
-import requests
-import zipfile
-from datetime import datetime
+import subprocess
+
+from PySide6.QtCore import Signal, QThread
+
+from globals import open_file_safe
+from Localizations import IN_PROGRESS, COMPLETED
+

 class QuantizationThread(QThread):
-    output_signal = pyqtSignal(str)
-    status_signal = pyqtSignal(str)
-    finished_signal = pyqtSignal()
-    error_signal = pyqtSignal(str)
-    model_info_signal = pyqtSignal(dict)
+    # Define custom signals for communication with the main thread
+    output_signal = Signal(str)
+    status_signal = Signal(str)
+    finished_signal = Signal()
+    error_signal = Signal(str)
+    model_info_signal = Signal(dict)

-    def __init__(self, command, cwd, log_file):
+    def __init__(self, command, cwd, log_file) -> None:
        super().__init__()
        self.command = command
        self.cwd = cwd
@ -28,51 +25,131 @@ def __init__(self, command, cwd, log_file):
        self.process = None
        self.model_info = {}

-    def run(self):
+    def run(self) -> None:
        try:
-            self.process = subprocess.Popen(self.command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, 
-                                       text=True, cwd=self.cwd)
-            with open(self.log_file, 'w') as log:
+            # Start the subprocess
+            self.process = subprocess.Popen(
+                self.command,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,
+                text=True,
+                cwd=self.cwd,
+            )
+            # Open log file and process output
+            with open_file_safe(self.log_file, "w") as log:
                for line in self.process.stdout:
                    line = line.strip()
                    self.output_signal.emit(line)
-                    log.write(line + '\n')
+                    log.write(line + "\n")
                    log.flush()
-                    self.status_signal.emit("In Progress")
+                    self.status_signal.emit(IN_PROGRESS)
                    self.parse_model_info(line)

+            # Wait for process to complete
            self.process.wait()
            if self.process.returncode == 0:
-                self.status_signal.emit("Completed")
+                self.status_signal.emit(COMPLETED)
                self.model_info_signal.emit(self.model_info)
            else:
-                self.error_signal.emit(f"Process exited with code {self.process.returncode}")
+                self.error_signal.emit(
+                    f"Process exited with code {self.process.returncode}"
+                )
            self.finished_signal.emit()
        except Exception as e:
            self.error_signal.emit(str(e))

-    def parse_model_info(self, line):
+    def parse_model_info(self, line) -> None:
+        # Mapping of technical keys to human-readable names
+        key_mappings = {
+            "general.architecture": "Architecture",
+            "general.name": "Model Name",
+            "general.file_type": "File Type",
+            "general.quantization_version": "Quantization Version",
+            "llama.block_count": "Layers",
+            "llama.context_length": "Context Length",
+            "llama.embedding_length": "Embedding Size",
+            "llama.feed_forward_length": "Feed Forward Length",
+            "llama.attention.head_count": "Attention Heads",
+            "llama.attention.head_count_kv": "Key-Value Heads",
+            "llama.attention.layer_norm_rms_epsilon": "RMS Norm Epsilon",
+            "llama.rope.freq_base": "RoPE Frequency Base",
+            "llama.rope.dimension_count": "RoPE Dimensions",
+            "llama.vocab_size": "Vocabulary Size",
+            "tokenizer.ggml.model": "Tokenizer Model",
+            "tokenizer.ggml.pre": "Tokenizer Preprocessing",
+            "tokenizer.ggml.tokens": "Tokens",
+            "tokenizer.ggml.token_type": "Token Types",
+            "tokenizer.ggml.merges": "BPE Merges",
+            "tokenizer.ggml.bos_token_id": "Begin of Sequence Token ID",
+            "tokenizer.ggml.eos_token_id": "End of Sequence Token ID",
+            "tokenizer.chat_template": "Chat Template",
+            "tokenizer.ggml.padding_token_id": "Padding Token ID",
+            "tokenizer.ggml.unk_token_id": "Unknown Token ID",
+        }
+
+        # Parse output for model information
        if "llama_model_loader: loaded meta data with" in line:
            parts = line.split()
-            self.model_info['kv_pairs'] = parts[6]
-            self.model_info['tensors'] = parts[9]
+            self.model_info["kv_pairs"] = parts[6]
+            self.model_info["tensors"] = parts[9]
        elif "general.architecture" in line:
-            self.model_info['architecture'] = line.split('=')[-1].strip()
-        elif line.startswith("llama_model_loader: - kv"):
-            key = line.split(':')[2].strip()
-            value = line.split('=')[-1].strip()
-            self.model_info.setdefault('kv_data', {})[key] = value
+            self.model_info["architecture"] = line.split("=")[-1].strip()
+        elif line.startswith("llama_model_loader: - kv") and "=" in line:
+            # Split on '=' and take the parts
+            parts = line.split("=", 1)  # Split only on first '='
+            left_part = parts[0].strip()
+            value = parts[1].strip()
+
+            # Extract key and type from left part
+            # Format: "llama_model_loader: - kv N: key type"
+            kv_parts = left_part.split(":")
+            if len(kv_parts) >= 3:
+                key_type_part = kv_parts[2].strip()  # This is "key type"
+                key = key_type_part.rsplit(" ", 1)[
+                    0
+                ]  # Everything except last word (type)
+
+                # Use human-readable name if available, otherwise use original key
+                display_key = key_mappings.get(key, key)
+
+                self.model_info.setdefault("kv_data", {})[display_key] = value
        elif line.startswith("llama_model_loader: - type"):
-            parts = line.split(':')
+            parts = line.split(":")
            if len(parts) > 1:
                quant_type = parts[1].strip()
                tensors = parts[2].strip().split()[0]
-                self.model_info.setdefault('quantization_type', []).append(f"{quant_type}: {tensors} tensors")
+                self.model_info.setdefault("quantization_type", []).append(
+                    f"{quant_type}: {tensors} tensors"
+                )

-    def terminate(self):
+    def parse_progress(self, line, task_item, imatrix_chunks=None) -> None:
+        # Parses the output line for progress information and updates the task item.
+        match = re.search(r"\[\s*(\d+)\s*/\s*(\d+)\s*].*", line)
+
+        if match:
+            current = int(match.group(1))
+            total = int(match.group(2))
+            progress = int((current / total) * 100)
+            task_item.update_progress(progress)
+        else:
+            imatrix_match = re.search(
+                r"compute_imatrix: computing over (\d+) chunks with batch_size \d+",
+                line,
+            )
+            if imatrix_match:
+                imatrix_chunks = int(imatrix_match.group(1))
+            elif imatrix_chunks is not None:
+                if "save_imatrix: stored collected data" in line:
+                    save_match = re.search(r"collected data after (\d+) chunks", line)
+                    if save_match:
+                        saved_chunks = int(save_match.group(1))
+                        progress = int((saved_chunks / self.imatrix_chunks) * 100)
+                        task_item.update_progress(progress)
+
+    def terminate(self) -> None:
+        # Terminate the subprocess if it's still running
        if self.process:
            os.kill(self.process.pid, signal.SIGTERM)
            self.process.wait(timeout=5)
            if self.process.poll() is None:
                os.kill(self.process.pid, signal.SIGKILL)
-
--- a/src/TaskListItem.py
+++ b/src/TaskListItem.py
@ -1,25 +1,52 @@
-from PyQt6.QtWidgets import *
-from PyQt6.QtCore import *
-from PyQt6.QtGui import *
-import os
-import sys
-import psutil
-import subprocess
-import time
-import signal
-import json
-import platform
-import requests
-import zipfile
-from datetime import datetime
+from typing import List
+
+from PySide6.QtCore import *
+from PySide6.QtGui import QAction
+from PySide6.QtWidgets import *
+
+from Localizations import (
+    DELETING_TASK,
+    CANCELLING_TASK,
+    CONFIRM_DELETION_TITLE,
+    CONFIRM_DELETION,
+    SHOWING_TASK_CONTEXT_MENU,
+    CANCELED,
+    CANCEL,
+    PROPERTIES,
+    COMPLETED,
+    SHOWING_PROPERTIES_FOR_TASK,
+    DELETE,
+    RESTART,
+    IN_PROGRESS,
+    ERROR,
+    RESTARTING_TASK,
+)
+from ModelInfoDialog import ModelInfoDialog
+from QuantizationThread import QuantizationThread
+from Logger import Logger
+from error_handling import handle_error
+

 class TaskListItem(QWidget):
-    def __init__(self, task_name, log_file, parent=None):
+    def __init__(
+        self,
+        task_name,
+        log_file,
+        show_progress_bar=True,
+        parent=None,
+        show_properties=False,
+        logger=Logger,
+        quant_threads=List[QuantizationThread],
+    ) -> None:
        super().__init__(parent)
+        self.quant_threads = quant_threads
        self.task_name = task_name
        self.log_file = log_file
+        self.logger = logger
+        self.show_properties = show_properties
        self.status = "Pending"
        layout = QHBoxLayout(self)
+
        self.task_label = QLabel(task_name)
        self.progress_bar = QProgressBar()
        self.progress_bar.setRange(0, 100)
@ -27,31 +54,148 @@ def __init__(self, task_name, log_file, parent=None):
        layout.addWidget(self.task_label)
        layout.addWidget(self.progress_bar)
        layout.addWidget(self.status_label)
+
+        # Hide progress bar if show_progress_bar is False
+        self.progress_bar.setVisible(show_progress_bar)
+
+        # Use indeterminate progress bar if not showing percentage
+        if not show_progress_bar:
+            self.progress_bar.setRange(0, 0)
+
        self.progress_timer = QTimer(self)
        self.progress_timer.timeout.connect(self.update_progress)
        self.progress_value = 0

-    def update_status(self, status):
+    def show_task_context_menu(self, position) -> None:
+        self.logger.debug(SHOWING_TASK_CONTEXT_MENU)
+        item = self.task_list.itemAt(position)
+        if item is not None:
+            context_menu = QMenu(self)
+
+            properties_action = QAction(PROPERTIES, self)
+            properties_action.triggered.connect(lambda: self.show_task_properties(item))
+            context_menu.addAction(properties_action)
+
+            task_item = self.task_list.itemWidget(item)
+            if task_item.status != COMPLETED:
+                cancel_action = QAction(CANCEL, self)
+                cancel_action.triggered.connect(lambda: self.cancel_task(item))
+                context_menu.addAction(cancel_action)
+
+            if task_item.status == CANCELED:
+                restart_action = QAction(RESTART, self)
+                restart_action.triggered.connect(lambda: self.restart_task(task_item))
+                context_menu.addAction(restart_action)
+
+            delete_action = QAction(DELETE, self)
+            delete_action.triggered.connect(lambda: self.delete_task(item))
+            context_menu.addAction(delete_action)
+
+            context_menu.exec(self.task_list.viewport().mapToGlobal(position))
+
+    def show_task_properties(self, item) -> None:
+        self.logger.debug(SHOWING_PROPERTIES_FOR_TASK.format(item.text()))
+        for thread in self.quant_threads:
+            model_info_dialog = ModelInfoDialog(thread.model_info, self)
+            model_info_dialog.exec()
+            break
+
+    def cancel_task(self, item) -> None:
+        # TODO: fix possibly buggy signal behavior
+        task_item = self.task_list.itemWidget(item)
+        if task_item:
+            task_name = task_item.task_name  # Store the name before any changes
+            self.logger.info(CANCELLING_TASK.format(task_name))
+
+            # Find the thread and disconnect signals before terminating
+            for thread in self.quant_threads:
+                if thread.log_file == task_item.log_file:
+                    # Disconnect all signals from this thread first
+                    try:
+                        thread.error_signal.disconnect()  # Disconnect all error signal connections
+                        thread.output_signal.disconnect()  # Disconnect all output signal connections
+                    except TypeError:
+                        # No connections to disconnect
+                        pass
+
+                    # Now terminate the thread
+                    thread.terminate()
+                    self.quant_threads.remove(thread)
+                    break
+
+    def delete_task(self, item) -> None:
+        task_item = self.task_list.itemWidget(item)
+        if not task_item:
+            return
+
+        task_name = task_item.task_name  # Store task_name before deletion
+        self.logger.info(DELETING_TASK.format(task_name))
+
+        reply = QMessageBox.question(
+            self,
+            CONFIRM_DELETION_TITLE,
+            CONFIRM_DELETION,
+            QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No,
+            QMessageBox.StandardButton.No,
+        )
+
+        if reply == QMessageBox.StandardButton.Yes:
+            # Cancel the task first (which disconnects signals)
+            self.cancel_task(item)
+
+            # Now remove from list and delete
+            row = self.task_list.row(item)
+            self.task_list.takeItem(row)
+
+            # Delete the widget after removing from list
+            task_item.deleteLater()
+
+    def update_status(self, status) -> None:
        self.status = status
        self.status_label.setText(status)
-        if status == "In Progress":
+        if status == IN_PROGRESS:
+            # Only start timer if showing percentage progress
+            if self.progress_bar.isVisible():
                self.progress_bar.setRange(0, 100)
                self.progress_timer.start(100)
-        elif status == "Completed":
+        elif status == COMPLETED:
            self.progress_timer.stop()
            self.progress_bar.setValue(100)
-        elif status == "Canceled":
+        elif status == CANCELED:
            self.progress_timer.stop()
            self.progress_bar.setValue(0)

-    def set_error(self):
-        self.status = "Error"
-        self.status_label.setText("Error")
+    def set_error(self) -> None:
+        self.status = ERROR
+        self.status_label.setText(ERROR)
        self.status_label.setStyleSheet("color: red;")
        self.progress_bar.setRange(0, 100)
        self.progress_timer.stop()

-    def update_progress(self):
-        self.progress_value = (self.progress_value + 1) % 101
+    def update_progress(self, value=None) -> None:
+        if value is not None:
+            # Update progress bar with specific value
+            self.progress_value = value
            self.progress_bar.setValue(self.progress_value)
+        else:
+            return

+    def restart_task(self, task_item) -> None:
+        self.logger.info(RESTARTING_TASK.format(task_item.task_name))
+        for thread in self.quant_threads:
+            if thread.log_file == task_item.log_file:
+                new_thread = QuantizationThread(
+                    thread.command, thread.cwd, thread.log_file
+                )
+                self.quant_threads.append(new_thread)
+                new_thread.status_signal.connect(task_item.update_status)
+                new_thread.finished_signal.connect(
+                    lambda: self.task_finished(new_thread, task_item)
+                )
+                new_thread.error_signal.connect(
+                    lambda err: handle_error(self.logger, err, task_item)
+                )
+                new_thread.model_info_signal.connect(self.update_model_info)
+                new_thread.start()
+                task_item.update_status(IN_PROGRESS)
+                break
--- a/src/convert_hf_to_gguf.py
+++ b/src/convert_hf_to_gguf.py
--- a/src/convert_lora_to_ggml.py
+++ b/src/convert_lora_to_ggml.py
@ -0,0 +1,162 @@
+from __future__ import annotations
+
+import json
+import logging
+import os
+import struct
+import sys
+from typing import BinaryIO
+
+import numpy as np
+import torch
+
+from gguf.constants import *
+from gguf.tensor_mapping import *
+
+logging.basicConfig(level=logging.DEBUG)
+logger = logging.getLogger("lora-to-gguf")
+
+NUMPY_TYPE_TO_FTYPE: dict[str, int] = {"float32": 0, "float16": 1}
+
+
+def write_file_header(fout: BinaryIO, params: dict[str, Any]) -> None:
+    fout.write(b"ggla"[::-1])  # magic (ggml lora)
+    fout.write(struct.pack("i", 1))  # file version
+    fout.write(struct.pack("i", params["r"]))
+    # https://opendelta.readthedocs.io/en/latest/modules/deltas.html says that `lora_alpha` is an int
+    # but some models ship a float value instead
+    # let's convert to int, but fail if lossless conversion is not possible
+    assert (
+        int(params["lora_alpha"]) == params["lora_alpha"]
+    ), "cannot convert float to int losslessly"
+    fout.write(struct.pack("i", int(params["lora_alpha"])))
+
+
+def write_tensor_header(
+    fout: BinaryIO, name: str, shape: Sequence[int], data_type: np.dtype[Any]
+) -> None:
+    sname = name.encode("utf-8")
+    fout.write(
+        struct.pack(
+            "iii",
+            len(shape),
+            len(sname),
+            NUMPY_TYPE_TO_FTYPE[data_type.name],
+        )
+    )
+    fout.write(struct.pack("i" * len(shape), *shape[::-1]))
+    fout.write(sname)
+    fout.seek((fout.tell() + 31) & -32)
+
+
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        logger.info(f"Usage: python {sys.argv[0]} <path> <output_path> [arch]")
+        logger.info(
+            "Path must contain HuggingFace PEFT LoRA files 'adapter_config.json' and 'adapter_model.bin'"
+        )
+        logger.info(
+            f"Arch must be one of {list(MODEL_ARCH_NAMES.values())} (default: llama)"
+        )
+        sys.exit(1)
+
+    input_json = os.path.join(sys.argv[1], "adapter_config.json")
+    input_model = os.path.join(sys.argv[1], "adapter_model.bin")
+    output_path = sys.argv[2]
+
+    if os.path.exists(input_model):
+        model = torch.load(input_model, map_location="cpu")
+    else:
+        input_model = os.path.join(sys.argv[1], "adapter_model.safetensors")
+        # lazy import load_file only if lora is in safetensors format.
+        from safetensors.torch import load_file
+
+        model = load_file(input_model, device="cpu")
+
+    arch_name = sys.argv[3] if len(sys.argv) == 4 else "llama"
+
+    if arch_name not in MODEL_ARCH_NAMES.values():
+        logger.error(f"Error: unsupported architecture {arch_name}")
+        sys.exit(1)
+
+    arch = list(MODEL_ARCH_NAMES.keys())[
+        list(MODEL_ARCH_NAMES.values()).index(arch_name)
+    ]
+    name_map = TensorNameMap(arch, 500)
+
+    with open(input_json, "r") as f:
+        params = json.load(f)
+
+    if params["peft_type"] != "LORA":
+        logger.error(
+            f"Error: unsupported adapter type {params['peft_type']}, expected LORA"
+        )
+        sys.exit(1)
+
+    if params["fan_in_fan_out"] is True:
+        logger.error("Error: param fan_in_fan_out is not supported")
+        sys.exit(1)
+
+    if params["bias"] is not None and params["bias"] != "none":
+        logger.error("Error: param bias is not supported")
+        sys.exit(1)
+
+    # TODO: these seem to be layers that have been trained but without lora.
+    # doesn't seem widely used but eventually should be supported
+    if params["modules_to_save"] is not None and len(params["modules_to_save"]) > 0:
+        logger.error("Error: param modules_to_save is not supported")
+        sys.exit(1)
+
+    with open(output_path, "wb") as fout:
+        fout.truncate()
+
+        write_file_header(fout, params)
+        for k, v in model.items():
+            orig_k = k
+            if k.endswith(".default.weight"):
+                k = k.replace(".default.weight", ".weight")
+            if k in ["llama_proj.weight", "llama_proj.bias"]:
+                continue
+            if k.endswith("lora_A.weight"):
+                if v.dtype != torch.float16 and v.dtype != torch.float32:
+                    v = v.float()
+                v = v.T
+            else:
+                v = v.float()
+
+            t = v.detach().numpy()
+
+            prefix = "base_model.model."
+            if k.startswith(prefix):
+                k = k[len(prefix) :]
+
+            lora_suffixes = (".lora_A.weight", ".lora_B.weight")
+            if k.endswith(lora_suffixes):
+                suffix = k[-len(lora_suffixes[0]) :]
+                k = k[: -len(lora_suffixes[0])]
+            else:
+                logger.error(f"Error: unrecognized tensor name {orig_k}")
+                sys.exit(1)
+
+            tname = name_map.get_name(k)
+            if tname is None:
+                logger.error(f"Error: could not map tensor name {orig_k}")
+                logger.error(
+                    " Note: the arch parameter must be specified if the model is not llama"
+                )
+                sys.exit(1)
+
+            if suffix == ".lora_A.weight":
+                tname += ".weight.loraA"
+            elif suffix == ".lora_B.weight":
+                tname += ".weight.loraB"
+            else:
+                assert False
+
+            logger.info(
+                f"{k} => {tname} {t.shape} {t.dtype} {t.nbytes/1024/1024:.2f}MB"
+            )
+            write_tensor_header(fout, tname, t.shape, t.dtype)
+            t.tofile(fout)
+
+    logger.info(f"Converted {input_json} and {input_model} to {output_path}")
--- a/src/convert_lora_to_gguf.py
+++ b/src/convert_lora_to_gguf.py
@ -0,0 +1,510 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+import logging
+import argparse
+import os
+import sys
+import json
+from math import prod
+from pathlib import Path
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Iterable,
+    Iterator,
+    Sequence,
+    SupportsIndex,
+    cast,
+)
+from transformers import AutoConfig
+
+import torch
+
+if TYPE_CHECKING:
+    from torch import Tensor
+import gguf
+
+# reuse model definitions from convert_hf_to_gguf.py
+from convert_hf_to_gguf import LazyTorchTensor, ModelBase
+
+logger = logging.getLogger("lora-to-gguf")
+
+
+@dataclass
+class PartialLoraTensor:
+    A: Tensor | None = None
+    B: Tensor | None = None
+
+
+# magic to support tensor shape modifications and splitting
+class LoraTorchTensor:
+    _lora_A: Tensor  # (n_rank, row_size)
+    _lora_B: Tensor  # (col_size, n_rank)
+    _rank: int
+
+    def __init__(self, A: Tensor, B: Tensor):
+        assert len(A.shape) == len(B.shape)
+        assert A.shape[-2] == B.shape[-1]
+        if A.dtype != B.dtype:
+            A = A.to(torch.float32)
+            B = B.to(torch.float32)
+        self._lora_A = A
+        self._lora_B = B
+        self._rank = B.shape[-1]
+
+    def get_lora_A_B(self) -> tuple[Tensor, Tensor]:
+        return (self._lora_A, self._lora_B)
+
+    def __getitem__(
+        self,
+        indices: (
+            SupportsIndex
+            | slice
+            | tuple[
+                SupportsIndex | slice | Tensor, ...
+            ]  # TODO: add ellipsis in the type signature
+        ),
+    ) -> LoraTorchTensor:
+        shape = self.shape
+        if isinstance(indices, SupportsIndex):
+            if len(shape) > 2:
+                return LoraTorchTensor(self._lora_A[indices], self._lora_B[indices])
+            else:
+                raise NotImplementedError  # can't return a vector
+        elif isinstance(indices, slice):
+            if len(shape) > 2:
+                return LoraTorchTensor(self._lora_A[indices], self._lora_B[indices])
+            else:
+                return LoraTorchTensor(self._lora_A, self._lora_B[indices])
+        elif isinstance(indices, tuple):
+            assert len(indices) > 0
+            if indices[-1] is Ellipsis:
+                return self[indices[:-1]]
+            # expand ellipsis
+            indices = tuple(
+                u
+                for v in (
+                    (
+                        (slice(None, None) for _ in range(len(indices) - 1))
+                        if i is Ellipsis
+                        else (i,)
+                    )
+                    for i in indices
+                )
+                for u in v
+            )
+
+            if len(indices) < len(shape):
+                indices = (
+                    *indices,
+                    *(slice(None, None) for _ in range(len(indices), len(shape))),
+                )
+
+            # TODO: make sure this is correct
+            indices_A = (
+                *(
+                    (
+                        j.__index__() % self._lora_A.shape[i]
+                        if isinstance(j, SupportsIndex)
+                        else slice(None, None)
+                    )
+                    for i, j in enumerate(indices[:-2])
+                ),
+                slice(None, None),
+                indices[-1],
+            )
+            indices_B = indices[:-1]
+            return LoraTorchTensor(self._lora_A[indices_A], self._lora_B[indices_B])
+        else:
+            raise NotImplementedError  # unknown indice type
+
+    @property
+    def dtype(self) -> torch.dtype:
+        assert self._lora_A.dtype == self._lora_B.dtype
+        return self._lora_A.dtype
+
+    @property
+    def shape(self) -> tuple[int, ...]:
+        assert len(self._lora_A.shape) == len(self._lora_B.shape)
+        return (*self._lora_B.shape[:-1], self._lora_A.shape[-1])
+
+    def size(self, dim=None):
+        assert dim is None
+        return self.shape
+
+    def reshape(self, *shape: int | tuple[int, ...]) -> LoraTorchTensor:
+        if isinstance(shape[0], tuple):
+            new_shape: tuple[int, ...] = shape[0]
+        else:
+            new_shape = cast(tuple[int, ...], shape)
+        orig_shape = self.shape
+        if len(new_shape) < 2:
+            raise NotImplementedError  # can't become a vector
+
+        # expand -1 in the shape
+        if any(dim == -1 for dim in new_shape):
+            n_elems = prod(orig_shape)
+            n_new_elems = prod(dim if dim != -1 else 1 for dim in new_shape)
+            assert n_elems % n_new_elems == 0
+            new_shape = (
+                *(dim if dim != -1 else n_elems // n_new_elems for dim in new_shape),
+            )
+
+        if new_shape[-1] != orig_shape[-1]:
+            raise NotImplementedError  # can't reshape the row size trivially
+
+        shape_A = (*(1 for _ in new_shape[:-2]), self._rank, orig_shape[-1])
+        shape_B = (*new_shape[:-1], self._rank)
+        return LoraTorchTensor(
+            self._lora_A.reshape(shape_A),
+            self._lora_B.reshape(shape_B),
+        )
+
+    def reshape_as(self, other: Tensor) -> LoraTorchTensor:
+        return self.reshape(*other.shape)
+
+    def view(self, *size: int) -> LoraTorchTensor:
+        return self.reshape(*size)
+
+    def permute(self, *dims: int) -> LoraTorchTensor:
+        shape = self.shape
+        dims = tuple(dim - len(shape) if dim >= 0 else dim for dim in dims)
+        if dims[-1] == -1:
+            # TODO: support higher dimensional A shapes bigger than 1
+            assert all(dim == 1 for dim in self._lora_A.shape[:-2])
+            return LoraTorchTensor(self._lora_A, self._lora_B.permute(*dims))
+        if len(shape) == 2 and dims[-1] == -2 and dims[-2] == -1:
+            return LoraTorchTensor(
+                self._lora_B.permute(*dims), self._lora_A.permute(*dims)
+            )
+        else:
+            # TODO: compose the above two
+            raise NotImplementedError
+
+    def transpose(self, dim0: int, dim1: int) -> LoraTorchTensor:
+        shape = self.shape
+        dims = [i for i in range(len(shape))]
+        dims[dim0], dims[dim1] = dims[dim1], dims[dim0]
+        return self.permute(*dims)
+
+    def swapaxes(self, axis0: int, axis1: int) -> LoraTorchTensor:
+        return self.transpose(axis0, axis1)
+
+    def to(self, *args, **kwargs):
+        return LoraTorchTensor(
+            self._lora_A.to(*args, **kwargs), self._lora_B.to(*args, **kwargs)
+        )
+
+    @classmethod
+    def __torch_function__(cls, func: Callable, types, args=(), kwargs=None):
+        del types  # unused
+
+        if kwargs is None:
+            kwargs = {}
+
+        if func is torch.permute:
+            return type(args[0]).permute(*args, **kwargs)
+        elif func is torch.reshape:
+            return type(args[0]).reshape(*args, **kwargs)
+        elif func is torch.stack:
+            assert isinstance(args[0], Sequence)
+            dim = kwargs.get("dim", 0)
+            assert dim == 0
+            return LoraTorchTensor(
+                torch.stack([a._lora_A for a in args[0]], dim),
+                torch.stack([b._lora_B for b in args[0]], dim),
+            )
+        elif func is torch.cat:
+            assert isinstance(args[0], Sequence)
+            dim = kwargs.get("dim", 0)
+            assert dim == 0
+            if len(args[0][0].shape) > 2:
+                return LoraTorchTensor(
+                    torch.cat([a._lora_A for a in args[0]], dim),
+                    torch.cat([b._lora_B for b in args[0]], dim),
+                )
+            elif all(torch.equal(args[0][0]._lora_A, t._lora_A) for t in args[0][1:]):
+                return LoraTorchTensor(
+                    args[0][0]._lora_A,
+                    torch.cat([b._lora_B for b in args[0]], dim),
+                )
+            else:
+                raise NotImplementedError
+        else:
+            raise NotImplementedError
+
+
+def get_base_tensor_name(lora_tensor_name: str) -> str:
+    base_name = lora_tensor_name.replace("base_model.model.", "")
+    base_name = base_name.replace(".lora_A.weight", ".weight")
+    base_name = base_name.replace(".lora_B.weight", ".weight")
+    # models produced by mergekit-extract-lora have token embeddings in the adapter
+    base_name = base_name.replace(".lora_embedding_A", ".weight")
+    base_name = base_name.replace(".lora_embedding_B", ".weight")
+    return base_name
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Convert a Hugging Face PEFT LoRA adapter to a GGUF file"
+    )
+    parser.add_argument(
+        "--outfile",
+        type=Path,
+        help="path to write to; default: based on input. {ftype} will be replaced by the outtype.",
+    )
+    parser.add_argument(
+        "--outtype",
+        type=str,
+        choices=["f32", "f16", "bf16", "q8_0", "auto"],
+        default="f16",
+        help="output format - use f32 for float32, f16 for float16, bf16 for bfloat16, q8_0 for Q8_0, auto for the highest-fidelity 16-bit float type depending on the first loaded tensor type",
+    )
+    parser.add_argument(
+        "--bigendian",
+        action="store_true",
+        help="model is executed on big endian machine",
+    )
+    parser.add_argument(
+        "--no-lazy",
+        action="store_true",
+        help="use more RAM by computing all outputs before writing (use in case lazy evaluation is broken)",
+    )
+    parser.add_argument(
+        "--verbose",
+        action="store_true",
+        help="increase output verbosity",
+    )
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="only print out what will be done, without writing any new files",
+    )
+    parser.add_argument(
+        "--base",
+        type=Path,
+        help="directory containing Hugging Face model config files (config.json, tokenizer.json) for the base model that the adapter is based on - only config is needed, actual model weights are not required. If base model is unspecified, it will be loaded from Hugging Face hub based on the adapter config",
+    )
+    parser.add_argument(
+        "--base-model-id",
+        type=str,
+        help="the model ID of the base model, if it is not available locally or in the adapter config. If specified, it will ignore --base and load the base model config from the Hugging Face hub (Example: 'meta-llama/Llama-3.2-1B-Instruct')",
+    )
+    parser.add_argument(
+        "lora_path",
+        type=Path,
+        help="directory containing Hugging Face PEFT LoRA config (adapter_model.json) and weights (adapter_model.safetensors or adapter_model.bin)",
+    )
+
+    return parser.parse_args()
+
+
+def load_hparams_from_hf(hf_model_id: str) -> dict[str, Any]:
+    # normally, adapter does not come with base model config, we need to load it from AutoConfig
+    config = AutoConfig.from_pretrained(hf_model_id)
+    return config.to_dict()
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
+
+    ftype_map: dict[str, gguf.LlamaFileType] = {
+        "f32": gguf.LlamaFileType.ALL_F32,
+        "f16": gguf.LlamaFileType.MOSTLY_F16,
+        "bf16": gguf.LlamaFileType.MOSTLY_BF16,
+        "q8_0": gguf.LlamaFileType.MOSTLY_Q8_0,
+        "auto": gguf.LlamaFileType.GUESSED,
+    }
+
+    ftype = ftype_map[args.outtype]
+
+    dir_base_model: Path | None = args.base
+    dir_lora: Path = args.lora_path
+    base_model_id: str | None = args.base_model_id
+    lora_config = dir_lora / "adapter_config.json"
+    input_model = dir_lora / "adapter_model.safetensors"
+
+    if args.outfile is not None:
+        fname_out = args.outfile
+    else:
+        # output in the same directory as the model by default
+        fname_out = dir_lora
+
+    if os.path.exists(input_model):
+        # lazy import load_file only if lora is in safetensors format.
+        from safetensors.torch import load_file
+
+        lora_model = load_file(input_model, device="cpu")
+    else:
+        input_model = os.path.join(dir_lora, "adapter_model.bin")
+        lora_model = torch.load(input_model, map_location="cpu", weights_only=True)
+
+    # load LoRA config
+    with open(lora_config, "r") as f:
+        lparams: dict[str, Any] = json.load(f)
+
+    # load base model
+    if base_model_id is not None:
+        logger.info(f"Loading base model from Hugging Face: {base_model_id}")
+        hparams = load_hparams_from_hf(base_model_id)
+    elif dir_base_model is None:
+        if "base_model_name_or_path" in lparams:
+            model_id = lparams["base_model_name_or_path"]
+            logger.info(f"Loading base model from Hugging Face: {model_id}")
+            try:
+                hparams = load_hparams_from_hf(model_id)
+            except OSError as e:
+                logger.error(f"Failed to load base model config: {e}")
+                logger.error(
+                    "Please try downloading the base model and add its path to --base"
+                )
+                sys.exit(1)
+        else:
+            logger.error(
+                "'base_model_name_or_path' is not found in adapter_config.json"
+            )
+            logger.error(
+                "Base model config is required. Please download the base model and add its path to --base"
+            )
+            sys.exit(1)
+    else:
+        logger.info(f"Loading base model: {dir_base_model.name}")
+        hparams = ModelBase.load_hparams(dir_base_model)
+
+    with torch.inference_mode():
+        try:
+            model_class = ModelBase.from_model_architecture(hparams["architectures"][0])
+        except NotImplementedError:
+            logger.error(f"Model {hparams['architectures'][0]} is not supported")
+            sys.exit(1)
+
+        class LoraModel(model_class):
+            model_arch = model_class.model_arch
+
+            lora_alpha: float
+
+            def __init__(
+                self, *args, dir_lora_model: Path, lora_alpha: float, **kwargs
+            ):
+
+                super().__init__(*args, **kwargs)
+
+                self.dir_model_card = dir_lora_model
+                self.lora_alpha = float(lora_alpha)
+
+            def set_vocab(self):
+                pass
+
+            def set_type(self):
+                self.gguf_writer.add_type(gguf.GGUFType.ADAPTER)
+                self.gguf_writer.add_string(gguf.Keys.Adapter.TYPE, "lora")
+
+            def set_gguf_parameters(self):
+                self.gguf_writer.add_float32(
+                    gguf.Keys.Adapter.LORA_ALPHA, self.lora_alpha
+                )
+
+            def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
+                # Never add extra tensors (e.g. rope_freqs) for LoRA adapters
+                return ()
+
+            def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
+                tensor_map: dict[str, PartialLoraTensor] = {}
+
+                for name, tensor in lora_model.items():
+                    if self.lazy:
+                        tensor = LazyTorchTensor.from_eager(tensor)
+                    base_name = get_base_tensor_name(name)
+                    # note: mergekit-extract-lora also adds token embeddings to the adapter
+                    is_lora_a = ".lora_A.weight" in name or ".lora_embedding_A" in name
+                    is_lora_b = ".lora_B.weight" in name or ".lora_embedding_B" in name
+                    if not is_lora_a and not is_lora_b:
+                        if ".base_layer.weight" in name:
+                            continue
+                        # mergekit-extract-lora add these layernorm to the adapter, we need to keep them
+                        if "_layernorm" in name or ".norm" in name:
+                            yield (base_name, tensor)
+                            continue
+                        logger.error(
+                            f"Unexpected name '{name}': Not a lora_A or lora_B tensor"
+                        )
+                        if ".embed_tokens.weight" in name or ".lm_head.weight" in name:
+                            logger.error(
+                                "Embeddings is present in the adapter. This can be due to new tokens added during fine tuning"
+                            )
+                            logger.error(
+                                "Please refer to https://github.com/ggml-org/llama.cpp/pull/9948"
+                            )
+                        sys.exit(1)
+
+                    if base_name in tensor_map:
+                        if is_lora_a:
+                            tensor_map[base_name].A = tensor
+                        else:
+                            tensor_map[base_name].B = tensor
+                    else:
+                        if is_lora_a:
+                            tensor_map[base_name] = PartialLoraTensor(A=tensor)
+                        else:
+                            tensor_map[base_name] = PartialLoraTensor(B=tensor)
+
+                for name, tensor in tensor_map.items():
+                    assert tensor.A is not None
+                    assert tensor.B is not None
+                    yield (
+                        name,
+                        cast(torch.Tensor, LoraTorchTensor(tensor.A, tensor.B)),
+                    )
+
+            def modify_tensors(
+                self, data_torch: Tensor, name: str, bid: int | None
+            ) -> Iterable[tuple[str, Tensor]]:
+                dest = list(super().modify_tensors(data_torch, name, bid))
+                # some archs may have the same tensor for lm_head and output (tie word embeddings)
+                # in this case, adapters targeting lm_head will fail when using llama-export-lora
+                # therefore, we ignore them for now
+                # see: https://github.com/ggml-org/llama.cpp/issues/9065
+                if name == "lm_head.weight" and len(dest) == 0:
+                    raise ValueError(
+                        "lm_head is present in adapter, but is ignored in base model"
+                    )
+                for dest_name, dest_data in dest:
+                    # mergekit-extract-lora add these layernorm to the adapter
+                    if "_norm" in dest_name:
+                        assert dest_data.dim() == 1
+                        yield (dest_name, dest_data)
+                        continue
+
+                    # otherwise, we must get the lora_A and lora_B tensors
+                    assert isinstance(dest_data, LoraTorchTensor)
+                    lora_a, lora_b = dest_data.get_lora_A_B()
+
+                    # note: mergekit-extract-lora flip and transpose A and B
+                    # here we only need to transpose token_embd.lora_a, see llm_build_inp_embd()
+                    if "token_embd.weight" in dest_name:
+                        lora_a = lora_a.T
+
+                    yield (dest_name + ".lora_a", lora_a)
+                    yield (dest_name + ".lora_b", lora_b)
+
+        alpha: float = lparams["lora_alpha"]
+
+        model_instance = LoraModel(
+            dir_base_model,
+            ftype,
+            fname_out,
+            is_big_endian=args.bigendian,
+            use_temp_file=False,
+            eager=args.no_lazy,
+            dry_run=args.dry_run,
+            dir_lora_model=dir_lora,
+            lora_alpha=alpha,
+            hparams=hparams,
+        )
+
+        logger.info("Exporting model...")
+        model_instance.write()
+        logger.info(f"Model successfully exported to {model_instance.fname_out}")
--- a/src/dequantize_gguf.py
+++ b/src/dequantize_gguf.py
@ -0,0 +1,105 @@
+import argparse
+import json
+import sys
+from pathlib import Path
+
+import numpy as np
+import torch
+from safetensors.torch import save_file
+
+import gguf
+
+
+def dequantize_tensor(tensor):
+    if tensor.tensor_type in [
+        gguf.GGMLQuantizationType.F32,
+        gguf.GGMLQuantizationType.F16,
+        gguf.GGMLQuantizationType.BF16,
+    ]:
+        return np.array(tensor.data)
+    else:
+        return tensor.data.astype(np.float32)
+
+
+def gguf_to_safetensors(gguf_path, safetensors_path, metadata_path=None):
+    try:
+        reader = gguf.GGUFReader(gguf_path)
+    except Exception as e:
+        print(f"Error reading GGUF file: {e}", file=sys.stderr)
+        sys.exit(1)
+
+    tensors = {}
+    metadata = {}
+
+    for tensor in reader.tensors:
+        try:
+            dequantized_data = dequantize_tensor(tensor)
+            tensors[tensor.name] = torch.from_numpy(
+                dequantized_data.reshape(tuple(reversed(tensor.shape)))
+            )
+        except Exception as e:
+            print(f"Error processing tensor {tensor.name}: {e}", file=sys.stderr)
+            continue
+
+    for field_name, field in reader.fields.items():
+        if field.data:
+            metadata[field_name] = field.parts[field.data[0]].tolist()
+
+    try:
+        save_file(tensors, safetensors_path)
+    except Exception as e:
+        print(f"Error saving SafeTensors file: {e}", file=sys.stderr)
+        sys.exit(1)
+
+    decoded_metadata = {}
+    for key, value in metadata.items():
+        if isinstance(value, list) and all(isinstance(item, int) for item in value):
+            decoded_value = ""
+            for item in value:
+                if 48 <= item <= 57:
+                    decoded_value += str(item - 48)
+                elif 32 <= item <= 126:
+                    decoded_value += chr(item)
+                else:
+                    decoded_value += str(item)
+            decoded_metadata[key] = decoded_value
+        else:
+            decoded_metadata[key] = value
+
+    if metadata_path:
+        try:
+            with open(metadata_path, "w") as f:
+                json.dump(decoded_metadata, f, indent=4)
+        except Exception as e:
+            print(f"Error saving metadata file: {e}", file=sys.stderr)
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Convert GGUF to SafeTensors format")
+    parser.add_argument("gguf_path", type=str, help="Path to the input GGUF file")
+    parser.add_argument(
+        "safetensors_path", type=str, help="Path to save the SafeTensors file"
+    )
+    parser.add_argument(
+        "--metadata_path",
+        type=str,
+        help="Optional path to save metadata as a JSON file",
+    )
+
+    args = parser.parse_args()
+
+    gguf_path = Path(args.gguf_path)
+    safetensors_path = Path(args.safetensors_path)
+    metadata_path = Path(args.metadata_path) if args.metadata_path else None
+
+    if not gguf_path.exists():
+        print(f"Error: GGUF file '{gguf_path}' does not exist.", file=sys.stderr)
+        sys.exit(1)
+
+    print(f"Converting {gguf_path} to {safetensors_path}")
+    gguf_to_safetensors(gguf_path, safetensors_path, metadata_path)
+    print("Conversion complete.")
+
+
+if __name__ == "__main__":
+    main()
--- a/src/error_handling.py
+++ b/src/error_handling.py
@ -0,0 +1,13 @@
+from PySide6.QtWidgets import QMessageBox
+from Localizations import ERROR_MESSAGE, ERROR, TASK_ERROR
+
+
+def show_error(logger, message) -> None:
+    logger.error(message)
+    QMessageBox.critical(None, ERROR, message)
+
+
+def handle_error(logger, error_message, task_item) -> None:
+    logger.error(TASK_ERROR.format(error_message))
+    show_error(logger, error_message)
+    task_item.update_status(ERROR)
--- a/src/gguf/init.py
+++ b/src/gguf/init.py
@ -0,0 +1,9 @@
+from .constants import *
+from .lazy import *
+from .gguf_reader import *
+from .gguf_writer import *
+from .quants import *
+from .tensor_mapping import *
+from .vocab import *
+from .utility import *
+from .metadata import *
--- a/src/gguf/constants.py
+++ b/src/gguf/constants.py
--- a/src/gguf/gguf.py
+++ b/src/gguf/gguf.py
@ -0,0 +1,11 @@
+import importlib
+import sys
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+# Compatibility for people trying to import gguf/gguf.py directly instead of as a package.
+importlib.invalidate_caches()
+import gguf  # noqa: E402
+
+importlib.reload(gguf)
--- a/src/gguf/gguf_reader.py
+++ b/src/gguf/gguf_reader.py
@ -0,0 +1,412 @@
+from __future__ import annotations
+
+import logging
+import os
+import sys
+from collections import OrderedDict
+from typing import Any, Literal, NamedTuple, TypeVar, Union
+
+import numpy as np
+import numpy.typing as npt
+
+from .quants import quant_shape_to_byte_shape
+
+if __name__ == "__main__":
+    from pathlib import Path
+
+    # Allow running file in package as a script.
+    sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from gguf.constants import (
+    GGML_QUANT_SIZES,
+    GGUF_DEFAULT_ALIGNMENT,
+    GGUF_MAGIC,
+    GGUF_VERSION,
+    GGMLQuantizationType,
+    GGUFValueType,
+    GGUFEndian,
+)
+
+logger = logging.getLogger(__name__)
+
+READER_SUPPORTED_VERSIONS = [2, GGUF_VERSION]
+
+
+class ReaderField(NamedTuple):
+    # Offset to start of this field.
+    offset: int
+
+    # Name of the field (not necessarily from file data).
+    name: str
+
+    # Data parts. Some types have multiple components, such as strings
+    # that consist of a length followed by the string data.
+    parts: list[npt.NDArray[Any]] = []
+
+    # Indexes into parts that we can call the actual data. For example
+    # an array of strings will be populated with indexes to the actual
+    # string data.
+    data: list[int] = [-1]
+
+    types: list[GGUFValueType] = []
+
+    def contents(self, index_or_slice: int | slice = slice(None)) -> Any:
+        if self.types:
+            to_string = lambda x: str(x.tobytes(), encoding="utf-8")  # noqa: E731
+            main_type = self.types[0]
+
+            if main_type == GGUFValueType.ARRAY:
+                sub_type = self.types[-1]
+
+                if sub_type == GGUFValueType.STRING:
+                    indices = self.data[index_or_slice]
+
+                    if isinstance(index_or_slice, int):
+                        return to_string(self.parts[indices])  # type: ignore
+                    else:
+                        return [to_string(self.parts[idx]) for idx in indices]  # type: ignore
+                else:
+                    # FIXME: When/if _get_field_parts() support multi-dimensional arrays, this must do so too
+
+                    # Check if it's unsafe to perform slice optimization on data
+                    # if any(True for idx in self.data if len(self.parts[idx]) != 1):
+                    #     optim_slice = slice(None)
+                    # else:
+                    #     optim_slice = index_or_slice
+                    #     index_or_slice = slice(None)
+
+                    # if isinstance(optim_slice, int):
+                    #     return self.parts[self.data[optim_slice]].tolist()[0]
+                    # else:
+                    #     return [pv for idx in self.data[optim_slice] for pv in self.parts[idx].tolist()][index_or_slice]
+
+                    if isinstance(index_or_slice, int):
+                        return self.parts[self.data[index_or_slice]].tolist()[0]
+                    else:
+                        return [
+                            pv
+                            for idx in self.data[index_or_slice]
+                            for pv in self.parts[idx].tolist()
+                        ]
+
+            if main_type == GGUFValueType.STRING:
+                return to_string(self.parts[-1])
+            else:
+                return self.parts[-1].tolist()[0]
+
+        return None
+
+
+class ReaderTensor(NamedTuple):
+    name: str
+    tensor_type: GGMLQuantizationType
+    shape: npt.NDArray[np.uint32]
+    n_elements: int
+    n_bytes: int
+    data_offset: int
+    data: npt.NDArray[Any]
+    field: ReaderField
+
+
+class GGUFReader:
+    # I - same as host, S - swapped
+    byte_order: Literal["I", "S"] = "I"
+    alignment: int = GGUF_DEFAULT_ALIGNMENT
+    data_offset: int
+
+    # Note: Internal helper, API may change.
+    gguf_scalar_to_np: dict[GGUFValueType, type[np.generic]] = {
+        GGUFValueType.UINT8: np.uint8,
+        GGUFValueType.INT8: np.int8,
+        GGUFValueType.UINT16: np.uint16,
+        GGUFValueType.INT16: np.int16,
+        GGUFValueType.UINT32: np.uint32,
+        GGUFValueType.INT32: np.int32,
+        GGUFValueType.FLOAT32: np.float32,
+        GGUFValueType.UINT64: np.uint64,
+        GGUFValueType.INT64: np.int64,
+        GGUFValueType.FLOAT64: np.float64,
+        GGUFValueType.BOOL: np.bool_,
+    }
+
+    def __init__(
+        self, path: os.PathLike[str] | str, mode: Literal["r", "r+", "c"] = "r"
+    ):
+        self.data = np.memmap(path, mode=mode)
+        offs = 0
+
+        # Check for GGUF magic
+        if self._get(offs, np.uint32, override_order="<")[0] != GGUF_MAGIC:
+            raise ValueError("GGUF magic invalid")
+        offs += 4
+
+        # Check GGUF version
+        temp_version = self._get(offs, np.uint32)
+        if temp_version[0] & 65535 == 0:
+            # If we get 0 here that means it's (probably) a GGUF file created for
+            # the opposite byte order of the machine this script is running on.
+            self.byte_order = "S"
+            temp_version = temp_version.view(
+                temp_version.dtype.newbyteorder(self.byte_order)
+            )
+        version = temp_version[0]
+        if version not in READER_SUPPORTED_VERSIONS:
+            raise ValueError(
+                f"Sorry, file appears to be version {version} which we cannot handle"
+            )
+        if sys.byteorder == "little":
+            # Host is little endian
+            host_endian = GGUFEndian.LITTLE
+            swapped_endian = GGUFEndian.BIG
+        else:
+            # Sorry PDP or other weird systems that don't use BE or LE.
+            host_endian = GGUFEndian.BIG
+            swapped_endian = GGUFEndian.LITTLE
+        self.endianess = swapped_endian if self.byte_order == "S" else host_endian
+        self.fields: OrderedDict[str, ReaderField] = OrderedDict()
+        self.tensors: list[ReaderTensor] = []
+        offs += self._push_field(
+            ReaderField(
+                offs, "GGUF.version", [temp_version], [0], [GGUFValueType.UINT32]
+            )
+        )
+
+        # Check tensor count and kv count
+        temp_counts = self._get(offs, np.uint64, 2)
+        offs += self._push_field(
+            ReaderField(
+                offs,
+                "GGUF.tensor_count",
+                [temp_counts[:1]],
+                [0],
+                [GGUFValueType.UINT64],
+            )
+        )
+        offs += self._push_field(
+            ReaderField(
+                offs, "GGUF.kv_count", [temp_counts[1:]], [0], [GGUFValueType.UINT64]
+            )
+        )
+        tensor_count, kv_count = temp_counts
+        offs = self._build_fields(offs, kv_count)
+
+        # Build Tensor Info Fields
+        offs, tensors_fields = self._build_tensor_info(offs, tensor_count)
+        new_align = self.fields.get("general.alignment")
+        if new_align is not None:
+            if new_align.types != [GGUFValueType.UINT32]:
+                raise ValueError("Bad type for general.alignment field")
+            self.alignment = new_align.parts[-1][0]
+        padding = offs % self.alignment
+        if padding != 0:
+            offs += self.alignment - padding
+        self.data_offset = offs
+        self._build_tensors(offs, tensors_fields)
+
+    _DT = TypeVar("_DT", bound=npt.DTypeLike)
+
+    # Fetch a key/value metadata field by key.
+    def get_field(self, key: str) -> Union[ReaderField, None]:
+        return self.fields.get(key, None)
+
+    # Fetch a tensor from the list by index.
+    def get_tensor(self, idx: int) -> ReaderTensor:
+        return self.tensors[idx]
+
+    def _get(
+        self,
+        offset: int,
+        dtype: npt.DTypeLike,
+        count: int = 1,
+        override_order: None | Literal["I", "S", "<"] = None,
+    ) -> npt.NDArray[Any]:
+        count = int(count)
+        itemsize = int(np.empty([], dtype=dtype).itemsize)
+        end_offs = offset + itemsize * count
+        arr = self.data[offset:end_offs].view(dtype=dtype)[:count]
+        return arr.view(
+            arr.dtype.newbyteorder(
+                self.byte_order if override_order is None else override_order
+            )
+        )
+
+    def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int:
+        if field.name in self.fields:
+            # TODO: add option to generate error on duplicate keys
+            # raise KeyError(f'Duplicate {field.name} already in list at offset {field.offset}')
+
+            logger.warning(f"Duplicate key {field.name} at offset {field.offset}")
+            self.fields[field.name + "_{}".format(field.offset)] = field
+        else:
+            self.fields[field.name] = field
+        return 0 if skip_sum else sum(int(part.nbytes) for part in field.parts)
+
+    def _get_str(
+        self, offset: int
+    ) -> tuple[npt.NDArray[np.uint64], npt.NDArray[np.uint8]]:
+        slen = self._get(offset, np.uint64)
+        return slen, self._get(offset + 8, np.uint8, slen[0])
+
+    def _get_field_parts(
+        self,
+        orig_offs: int,
+        raw_type: int,
+    ) -> tuple[int, list[npt.NDArray[Any]], list[int], list[GGUFValueType]]:
+        offs = orig_offs
+        types: list[GGUFValueType] = []
+        gtype = GGUFValueType(raw_type)
+        types.append(gtype)
+        # Handle strings.
+        if gtype == GGUFValueType.STRING:
+            sparts: list[npt.NDArray[Any]] = list(self._get_str(offs))
+            size = sum(int(part.nbytes) for part in sparts)
+            return size, sparts, [1], types
+        # Check if it's a simple scalar type.
+        nptype = self.gguf_scalar_to_np.get(gtype)
+        if nptype is not None:
+            val = self._get(offs, nptype)
+            return int(val.nbytes), [val], [0], types
+        # Handle arrays.
+        if gtype == GGUFValueType.ARRAY:
+            raw_itype = self._get(offs, np.uint32)
+            offs += int(raw_itype.nbytes)
+            alen = self._get(offs, np.uint64)
+            offs += int(alen.nbytes)
+            aparts: list[npt.NDArray[Any]] = [raw_itype, alen]
+            data_idxs: list[int] = []
+            # FIXME: Handle multi-dimensional arrays properly instead of flattening
+            for idx in range(alen[0]):
+                curr_size, curr_parts, curr_idxs, curr_types = self._get_field_parts(
+                    offs, raw_itype[0]
+                )
+                if idx == 0:
+                    types += curr_types
+                idxs_offs = len(aparts)
+                aparts += curr_parts
+                data_idxs += (idx + idxs_offs for idx in curr_idxs)
+                offs += curr_size
+            return offs - orig_offs, aparts, data_idxs, types
+        # We can't deal with this one.
+        raise ValueError("Unknown/unhandled field type {gtype}")
+
+    def _get_tensor_info_field(self, orig_offs: int) -> ReaderField:
+        offs = orig_offs
+
+        # Get Tensor Name
+        name_len, name_data = self._get_str(offs)
+        offs += int(name_len.nbytes + name_data.nbytes)
+
+        # Get Tensor Dimensions Count
+        n_dims = self._get(offs, np.uint32)
+        offs += int(n_dims.nbytes)
+
+        # Get Tensor Dimension Array
+        dims = self._get(offs, np.uint64, n_dims[0])
+        offs += int(dims.nbytes)
+
+        # Get Tensor Encoding Scheme Type
+        raw_dtype = self._get(offs, np.uint32)
+        offs += int(raw_dtype.nbytes)
+
+        # Get Tensor Offset
+        offset_tensor = self._get(offs, np.uint64)
+        offs += int(offset_tensor.nbytes)
+
+        return ReaderField(
+            orig_offs,
+            str(bytes(name_data), encoding="utf-8"),
+            [name_len, name_data, n_dims, dims, raw_dtype, offset_tensor],
+            [1, 3, 4, 5],
+        )
+
+    def _build_fields(self, offs: int, count: int) -> int:
+        for _ in range(count):
+            orig_offs = offs
+            kv_klen, kv_kdata = self._get_str(offs)
+            offs += int(kv_klen.nbytes + kv_kdata.nbytes)
+            raw_kv_type = self._get(offs, np.uint32)
+            offs += int(raw_kv_type.nbytes)
+            parts: list[npt.NDArray[Any]] = [kv_klen, kv_kdata, raw_kv_type]
+            idxs_offs = len(parts)
+            field_size, field_parts, field_idxs, field_types = self._get_field_parts(
+                offs, raw_kv_type[0]
+            )
+            parts += field_parts
+            self._push_field(
+                ReaderField(
+                    orig_offs,
+                    str(bytes(kv_kdata), encoding="utf-8"),
+                    parts,
+                    [idx + idxs_offs for idx in field_idxs],
+                    field_types,
+                ),
+                skip_sum=True,
+            )
+            offs += field_size
+        return offs
+
+    def _build_tensor_info(
+        self, offs: int, count: int
+    ) -> tuple[int, list[ReaderField]]:
+        tensor_fields = []
+        for _ in range(count):
+            field = self._get_tensor_info_field(offs)
+            offs += sum(int(part.nbytes) for part in field.parts)
+            tensor_fields.append(field)
+        return offs, tensor_fields
+
+    def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None:
+        tensors = []
+        tensor_names = set()  # keep track of name to prevent duplicated tensors
+        for field in fields:
+            _name_len, name_data, _n_dims, dims, raw_dtype, offset_tensor = field.parts
+            # check if there's any tensor having same name already in the list
+            tensor_name = str(bytes(name_data), encoding="utf-8")
+            if tensor_name in tensor_names:
+                raise ValueError(f"Found duplicated tensor with name {tensor_name}")
+            tensor_names.add(tensor_name)
+            ggml_type = GGMLQuantizationType(raw_dtype[0])
+            n_elems = int(np.prod(dims))
+            np_dims = tuple(reversed(dims.tolist()))
+            block_size, type_size = GGML_QUANT_SIZES[ggml_type]
+            n_bytes = n_elems * type_size // block_size
+            data_offs = int(start_offs + offset_tensor[0])
+            item_type: npt.DTypeLike
+            if ggml_type == GGMLQuantizationType.F16:
+                item_count = n_elems
+                item_type = np.float16
+            elif ggml_type == GGMLQuantizationType.F32:
+                item_count = n_elems
+                item_type = np.float32
+            elif ggml_type == GGMLQuantizationType.F64:
+                item_count = n_elems
+                item_type = np.float64
+            elif ggml_type == GGMLQuantizationType.I8:
+                item_count = n_elems
+                item_type = np.int8
+            elif ggml_type == GGMLQuantizationType.I16:
+                item_count = n_elems
+                item_type = np.int16
+            elif ggml_type == GGMLQuantizationType.I32:
+                item_count = n_elems
+                item_type = np.int32
+            elif ggml_type == GGMLQuantizationType.I64:
+                item_count = n_elems
+                item_type = np.int64
+            else:
+                item_count = n_bytes
+                item_type = np.uint8
+                np_dims = quant_shape_to_byte_shape(np_dims, ggml_type)
+            tensors.append(
+                ReaderTensor(
+                    name=tensor_name,
+                    tensor_type=ggml_type,
+                    shape=dims,
+                    n_elements=n_elems,
+                    n_bytes=n_bytes,
+                    data_offset=data_offs,
+                    data=self._get(data_offs, item_type, item_count).reshape(np_dims),
+                    field=field,
+                )
+            )
+        self.tensors = tensors
--- a/src/gguf/gguf_writer.py
+++ b/src/gguf/gguf_writer.py
--- a/src/gguf/lazy.py
+++ b/src/gguf/lazy.py
@ -0,0 +1,308 @@
+from __future__ import annotations
+from abc import ABC, ABCMeta, abstractmethod
+
+import logging
+from typing import Any, Callable
+
+import numpy as np
+from numpy.typing import DTypeLike
+
+
+logger = logging.getLogger(__name__)
+
+
+class LazyMeta(ABCMeta):
+
+    def __new__(
+        cls, name: str, bases: tuple[type, ...], namespace: dict[str, Any], **kwargs
+    ):
+        def __getattr__(self, name: str) -> Any:
+            meta_attr = getattr(self._meta, name)
+            if callable(meta_attr):
+                return type(self)._wrap_fn(
+                    (lambda s, *args, **kwargs: getattr(s, name)(*args, **kwargs)),
+                    use_self=self,
+                )
+            elif isinstance(meta_attr, self._tensor_type):
+                # e.g. self.T with torch.Tensor should still be wrapped
+                return type(self)._wrap_fn(lambda s: getattr(s, name))(self)
+            else:
+                # no need to wrap non-tensor properties,
+                # and they likely don't depend on the actual contents of the tensor
+                return meta_attr
+
+        namespace["__getattr__"] = __getattr__
+
+        # need to make a builder for the wrapped wrapper to copy the name,
+        # or else it fails with very cryptic error messages,
+        # because somehow the same string would end up in every closures
+        def mk_wrap(op_name: str, *, meta_noop: bool = False):
+            # need to wrap the wrapper to get self
+            def wrapped_special_op(self, *args, **kwargs):
+                return type(self)._wrap_fn(
+                    getattr(type(self)._tensor_type, op_name),
+                    meta_noop=meta_noop,
+                )(self, *args, **kwargs)
+
+            return wrapped_special_op
+
+        # special methods bypass __getattr__, so they need to be added manually
+        # ref: https://docs.python.org/3/reference/datamodel.html#special-lookup
+        # NOTE: doing this from a metaclass is very convenient
+        # TODO: make this even more comprehensive
+        for binary_op in (
+            "lt",
+            "le",
+            "eq",
+            "ne",
+            "ge",
+            "gt",
+            "not" "abs",
+            "add",
+            "and",
+            "floordiv",
+            "invert",
+            "lshift",
+            "mod",
+            "mul",
+            "matmul",
+            "neg",
+            "or",
+            "pos",
+            "pow",
+            "rshift",
+            "sub",
+            "truediv",
+            "xor",
+            "iadd",
+            "iand",
+            "ifloordiv",
+            "ilshift",
+            "imod",
+            "imul",
+            "ior",
+            "irshift",
+            "isub",
+            "ixor",
+            "radd",
+            "rand",
+            "rfloordiv",
+            "rmul",
+            "ror",
+            "rpow",
+            "rsub",
+            "rtruediv",
+            "rxor",
+        ):
+            attr_name = f"__{binary_op}__"
+            # the result of these operators usually has the same shape and dtype as the input,
+            # so evaluation on the meta tensor can be skipped.
+            namespace[attr_name] = mk_wrap(attr_name, meta_noop=True)
+
+        for special_op in (
+            "getitem",
+            "setitem",
+            "len",
+        ):
+            attr_name = f"__{special_op}__"
+            namespace[attr_name] = mk_wrap(attr_name, meta_noop=False)
+
+        return super().__new__(cls, name, bases, namespace, **kwargs)
+
+
+# Tree of lazy tensors
+class LazyBase(ABC, metaclass=LazyMeta):
+    _tensor_type: type
+    _meta: Any
+    _data: Any | None
+    _args: tuple
+    _kwargs: dict[str, Any]
+    _func: Callable[[Any], Any] | None
+
+    def __init__(
+        self,
+        *,
+        meta: Any,
+        data: Any | None = None,
+        args: tuple = (),
+        kwargs: dict[str, Any] | None = None,
+        func: Callable[[Any], Any] | None = None,
+    ):
+        super().__init__()
+        self._meta = meta
+        self._data = data
+        self._args = args
+        self._kwargs = kwargs if kwargs is not None else {}
+        self._func = func
+        assert self._func is not None or self._data is not None
+
+    def __init_subclass__(cls) -> None:
+        if "_tensor_type" not in cls.__dict__:
+            raise TypeError(f"property '_tensor_type' must be defined for {cls!r}")
+        return super().__init_subclass__()
+
+    @staticmethod
+    def _recurse_apply(o: Any, fn: Callable[[Any], Any]) -> Any:
+        # TODO: dict and set
+        if isinstance(o, (list, tuple)):
+            L = []
+            for item in o:
+                L.append(LazyBase._recurse_apply(item, fn))
+            if isinstance(o, tuple):
+                L = tuple(L)
+            return L
+        elif isinstance(o, LazyBase):
+            return fn(o)
+        else:
+            return o
+
+    @classmethod
+    def _wrap_fn(
+        cls,
+        fn: Callable,
+        *,
+        use_self: LazyBase | None = None,
+        meta_noop: (
+            bool
+            | DTypeLike
+            | tuple[DTypeLike, Callable[[tuple[int, ...]], tuple[int, ...]]]
+        ) = False,
+    ) -> Callable[[Any], Any]:
+        def wrapped_fn(*args, **kwargs):
+            if kwargs is None:
+                kwargs = {}
+            args = ((use_self,) if use_self is not None else ()) + args
+
+            meta_args = LazyBase._recurse_apply(args, lambda t: t._meta)
+            # TODO: maybe handle tensors in kwargs too
+
+            if isinstance(meta_noop, bool) and not meta_noop:
+                try:
+                    res = fn(*meta_args, **kwargs)
+                except NotImplementedError:
+                    # running some operations on PyTorch's Meta tensors can cause this exception
+                    res = None
+            else:
+                # some operators don't need to actually run on the meta tensors
+                assert len(args) > 0
+                res = args[0]
+                assert isinstance(res, cls)
+                res = res._meta
+                # allow operations to override the dtype and shape
+                if meta_noop is not True:
+                    if isinstance(meta_noop, tuple):
+                        dtype, shape = meta_noop
+                        assert callable(shape)
+                        res = cls.meta_with_dtype_and_shape(dtype, shape(res.shape))
+                    else:
+                        res = cls.meta_with_dtype_and_shape(meta_noop, res.shape)
+
+            if isinstance(res, cls._tensor_type):
+                return cls(
+                    meta=cls.eager_to_meta(res), args=args, kwargs=kwargs, func=fn
+                )
+            elif isinstance(res, tuple) and all(
+                isinstance(t, cls._tensor_type) for t in res
+            ):
+                # share the evaluation between lazy tuple elements
+                shared_args: list = [args, None]
+
+                def eager_tuple_element(a: list[Any], i: int = 0, /, **kw) -> LazyBase:
+                    assert len(a) == 2
+                    if a[1] is None:
+                        a[1] = fn(*a[0], **kw)
+                    return a[1][i]
+
+                return tuple(
+                    cls(
+                        meta=cls.eager_to_meta(res[i]),
+                        args=(shared_args, i),
+                        kwargs=kwargs,
+                        func=eager_tuple_element,
+                    )
+                    for i in range(len(res))
+                )
+            else:
+                del res  # not needed
+                # non-tensor return likely relies on the contents of the args
+                # (e.g. the result of torch.equal)
+                eager_args = cls.to_eager(args)
+                return fn(*eager_args, **kwargs)
+
+        return wrapped_fn
+
+    @classmethod
+    def to_eager(cls, t: Any) -> Any:
+        def simple_to_eager(_t: LazyBase) -> Any:
+            if _t._data is not None:
+                return _t._data
+
+            # NOTE: there's a recursion limit in Python (usually 1000)
+
+            assert _t._func is not None
+            _t._args = cls._recurse_apply(_t._args, simple_to_eager)
+            _t._data = _t._func(*_t._args, **_t._kwargs)
+            # sanity check
+            assert _t._data is not None
+            assert _t._data.dtype == _t._meta.dtype
+            assert _t._data.shape == _t._meta.shape
+
+            return _t._data
+
+        # recurse into lists and/or tuples, keeping their structure
+        return cls._recurse_apply(t, simple_to_eager)
+
+    @classmethod
+    def eager_to_meta(cls, t: Any) -> Any:
+        return cls.meta_with_dtype_and_shape(t.dtype, t.shape)
+
+    # must be overridden, meta tensor init is backend-specific
+    @classmethod
+    @abstractmethod
+    def meta_with_dtype_and_shape(cls, dtype: Any, shape: Any) -> Any:
+        pass
+
+    @classmethod
+    def from_eager(cls, t: Any) -> Any:
+        if type(t) is cls:
+            # already lazy
+            return t
+        elif isinstance(t, cls._tensor_type):
+            return cls(meta=cls.eager_to_meta(t), data=t)
+        else:
+            return TypeError(f"{type(t)!r} is not compatible with {cls._tensor_type!r}")
+
+
+class LazyNumpyTensor(LazyBase):
+    _tensor_type = np.ndarray
+
+    shape: tuple[int, ...]  # Makes the type checker happy in quants.py
+
+    @classmethod
+    def meta_with_dtype_and_shape(
+        cls, dtype: DTypeLike, shape: tuple[int, ...]
+    ) -> np.ndarray[Any, Any]:
+        # The initial idea was to use np.nan as the fill value,
+        # but non-float types like np.int16 can't use that.
+        # So zero it is.
+        cheat = np.zeros(1, dtype)
+        return np.lib.stride_tricks.as_strided(cheat, shape, (0 for _ in shape))
+
+    def astype(self, dtype, *args, **kwargs):
+        meta = type(self).meta_with_dtype_and_shape(dtype, self._meta.shape)
+        full_args = (
+            self,
+            dtype,
+        ) + args
+        return type(self)(
+            meta=meta,
+            args=full_args,
+            kwargs=kwargs,
+            func=(lambda a, *args, **kwargs: a.astype(*args, **kwargs)),
+        )
+
+    def tofile(self, *args, **kwargs):
+        eager = LazyNumpyTensor.to_eager(self)
+        return eager.tofile(*args, **kwargs)
+
+    # TODO: __array_function__
--- a/src/gguf/metadata.py
+++ b/src/gguf/metadata.py
@ -0,0 +1,863 @@
+from __future__ import annotations
+
+import re
+import json
+import yaml
+import logging
+from pathlib import Path
+from typing import Any, Literal, Optional
+from dataclasses import dataclass
+
+from .constants import Keys
+
+import gguf
+
+logger = logging.getLogger("metadata")
+
+
+@dataclass
+class Metadata:
+    # Authorship Metadata to be written to GGUF KV Store
+    name: Optional[str] = None
+    author: Optional[str] = None
+    version: Optional[str] = None
+    organization: Optional[str] = None
+    finetune: Optional[str] = None
+    basename: Optional[str] = None
+    description: Optional[str] = None
+    quantized_by: Optional[str] = None
+    size_label: Optional[str] = None
+    url: Optional[str] = None
+    doi: Optional[str] = None
+    uuid: Optional[str] = None
+    repo_url: Optional[str] = None
+    source_url: Optional[str] = None
+    source_doi: Optional[str] = None
+    source_uuid: Optional[str] = None
+    source_repo_url: Optional[str] = None
+    license: Optional[str] = None
+    license_name: Optional[str] = None
+    license_link: Optional[str] = None
+    base_models: Optional[list[dict]] = None
+    tags: Optional[list[str]] = None
+    languages: Optional[list[str]] = None
+    datasets: Optional[list[dict]] = None
+
+    @staticmethod
+    def load(
+        metadata_override_path: Optional[Path] = None,
+        model_path: Optional[Path] = None,
+        model_name: Optional[str] = None,
+        total_params: int = 0,
+    ) -> Metadata:
+        # This grabs as many contextual authorship metadata as possible from the model repository
+        # making any conversion as required to match the gguf kv store metadata format
+        # as well as giving users the ability to override any authorship metadata that may be incorrect
+
+        # Create a new Metadata instance
+        metadata = Metadata()
+
+        model_card = Metadata.load_model_card(model_path)
+        hf_params = Metadata.load_hf_parameters(model_path)
+        # TODO: load adapter_config.json when possible, it usually contains the base model of the LoRA adapter
+
+        # heuristics
+        metadata = Metadata.apply_metadata_heuristic(
+            metadata, model_card, hf_params, model_path, total_params
+        )
+
+        # Metadata Override File Provided
+        # This is based on LLM_KV_NAMES mapping in llama.cpp
+        metadata_override = Metadata.load_metadata_override(metadata_override_path)
+
+        metadata.name = metadata_override.get(Keys.General.NAME, metadata.name)
+        metadata.author = metadata_override.get(Keys.General.AUTHOR, metadata.author)
+        metadata.version = metadata_override.get(Keys.General.VERSION, metadata.version)
+        metadata.organization = metadata_override.get(
+            Keys.General.ORGANIZATION, metadata.organization
+        )
+
+        metadata.finetune = metadata_override.get(
+            Keys.General.FINETUNE, metadata.finetune
+        )
+        metadata.basename = metadata_override.get(
+            Keys.General.BASENAME, metadata.basename
+        )
+
+        metadata.description = metadata_override.get(
+            Keys.General.DESCRIPTION, metadata.description
+        )
+        metadata.quantized_by = metadata_override.get(
+            Keys.General.QUANTIZED_BY, metadata.quantized_by
+        )
+
+        metadata.size_label = metadata_override.get(
+            Keys.General.SIZE_LABEL, metadata.size_label
+        )
+        metadata.license_name = metadata_override.get(
+            Keys.General.LICENSE_NAME, metadata.license_name
+        )
+        metadata.license_link = metadata_override.get(
+            Keys.General.LICENSE_LINK, metadata.license_link
+        )
+
+        metadata.url = metadata_override.get(Keys.General.URL, metadata.url)
+        metadata.doi = metadata_override.get(Keys.General.DOI, metadata.doi)
+        metadata.uuid = metadata_override.get(Keys.General.UUID, metadata.uuid)
+        metadata.repo_url = metadata_override.get(
+            Keys.General.REPO_URL, metadata.repo_url
+        )
+
+        metadata.source_url = metadata_override.get(
+            Keys.General.SOURCE_URL, metadata.source_url
+        )
+        metadata.source_doi = metadata_override.get(
+            Keys.General.SOURCE_DOI, metadata.source_doi
+        )
+        metadata.source_uuid = metadata_override.get(
+            Keys.General.SOURCE_UUID, metadata.source_uuid
+        )
+        metadata.source_repo_url = metadata_override.get(
+            Keys.General.SOURCE_REPO_URL, metadata.source_repo_url
+        )
+
+        # Base Models is received here as an array of models
+        metadata.base_models = metadata_override.get(
+            "general.base_models", metadata.base_models
+        )
+
+        # Datasets is received here as an array of datasets
+        metadata.datasets = metadata_override.get("general.datasets", metadata.datasets)
+
+        metadata.tags = metadata_override.get(Keys.General.TAGS, metadata.tags)
+        metadata.languages = metadata_override.get(
+            Keys.General.LANGUAGES, metadata.languages
+        )
+
+        # Direct Metadata Override (via direct cli argument)
+        if model_name is not None:
+            metadata.name = model_name
+
+        return metadata
+
+    @staticmethod
+    def load_metadata_override(
+        metadata_override_path: Optional[Path] = None,
+    ) -> dict[str, Any]:
+        if metadata_override_path is None or not metadata_override_path.is_file():
+            return {}
+
+        with open(metadata_override_path, "r", encoding="utf-8") as f:
+            return json.load(f)
+
+    @staticmethod
+    def load_model_card(model_path: Optional[Path] = None) -> dict[str, Any]:
+        if model_path is None or not model_path.is_dir():
+            return {}
+
+        model_card_path = model_path / "README.md"
+
+        if not model_card_path.is_file():
+            return {}
+
+        # The model card metadata is assumed to always be in YAML (frontmatter)
+        # ref: https://github.com/huggingface/transformers/blob/a5c642fe7a1f25d3bdcd76991443ba6ff7ee34b2/src/transformers/modelcard.py#L468-L473
+        yaml_content: str = ""
+        with open(model_card_path, "r", encoding="utf-8") as f:
+            content = f.read()
+            lines = content.splitlines()
+            lines_yaml = []
+            if len(lines) == 0:
+                # Empty file
+                return {}
+            if len(lines) > 0 and lines[0] != "---":
+                # No frontmatter
+                return {}
+            for line in lines[1:]:
+                if line == "---":
+                    break  # End of frontmatter
+                else:
+                    lines_yaml.append(line)
+            yaml_content = "\n".join(lines_yaml) + "\n"
+
+        # Quick hack to fix the Norway problem
+        # https://hitchdev.com/strictyaml/why/implicit-typing-removed/
+        yaml_content = yaml_content.replace("- no\n", '- "no"\n')
+
+        if yaml_content:
+            data = yaml.safe_load(yaml_content)
+            if isinstance(data, dict):
+                return data
+            else:
+                logger.error(
+                    f"while reading YAML model card frontmatter, data is {type(data)} instead of dict"
+                )
+                return {}
+        else:
+            return {}
+
+    @staticmethod
+    def load_hf_parameters(model_path: Optional[Path] = None) -> dict[str, Any]:
+        if model_path is None or not model_path.is_dir():
+            return {}
+
+        config_path = model_path / "config.json"
+
+        if not config_path.is_file():
+            return {}
+
+        with open(config_path, "r", encoding="utf-8") as f:
+            return json.load(f)
+
+    @staticmethod
+    def id_to_title(string):
+        # Convert capitalization into title form unless acronym or version number
+        return " ".join(
+            [
+                (
+                    w.title()
+                    if w.islower() and not re.match(r"^(v\d+(?:\.\d+)*|\d.*)$", w)
+                    else w
+                )
+                for w in string.strip().replace("-", " ").split()
+            ]
+        )
+
+    @staticmethod
+    def get_model_id_components(
+        model_id: Optional[str] = None, total_params: int = 0
+    ) -> tuple[str | None, str | None, str | None, str | None, str | None, str | None]:
+        # Huggingface often store model id as '<org>/<model name>'
+        # so let's parse it and apply some heuristics if possible for model name components
+
+        if model_id is None:
+            # model ID missing
+            return None, None, None, None, None, None
+
+        if " " in model_id:
+            # model ID is actually a normal human sentence
+            # which means its most likely a normal model name only
+            # not part of the hugging face naming standard, but whatever
+            return model_id, None, None, None, None, None
+
+        if "/" in model_id:
+            # model ID (huggingface style)
+            org_component, model_full_name_component = model_id.split("/", 1)
+        else:
+            # model ID but missing org components
+            org_component, model_full_name_component = None, model_id
+
+        # Check if we erroneously matched against './' or '../' etc...
+        if (
+            org_component is not None
+            and len(org_component) > 0
+            and org_component[0] == "."
+        ):
+            org_component = None
+
+        name_parts: list[str] = model_full_name_component.split("-")
+
+        # Remove empty parts
+        for i in reversed(range(len(name_parts))):
+            if len(name_parts[i]) == 0:
+                del name_parts[i]
+
+        name_types: list[
+            set[Literal["basename", "size_label", "finetune", "version", "type"]]
+        ] = [set() for _ in name_parts]
+
+        # Annotate the name
+        for i, part in enumerate(name_parts):
+            # Version
+            if re.fullmatch(r"(v|iter)?\d+([.]\d+)*", part, re.IGNORECASE):
+                name_types[i].add("version")
+            # Quant type (should not be there for base models, but still annotated)
+            elif re.fullmatch(r"i?q\d(_\w)*|b?fp?(16|32)", part, re.IGNORECASE):
+                name_types[i].add("type")
+                name_parts[i] = part.upper()
+            # Model size
+            elif i > 0 and re.fullmatch(
+                r"(([A]|\d+[x])?\d+([._]\d+)?[KMBT][\d]?|small|mini|medium|large|x?xl)",
+                part,
+                re.IGNORECASE,
+            ):
+                part = part.replace("_", ".")
+                # Handle weird bloom-7b1 notation
+                if part[-1].isdecimal():
+                    part = part[:-2] + "." + part[-1] + part[-2]
+                # Normalize the size suffixes
+                if len(part) > 1 and part[-2].isdecimal():
+                    if part[-1] in "kmbt":
+                        part = part[:-1] + part[-1].upper()
+                if total_params != 0:
+                    try:
+                        label_params = float(part[:-1]) * pow(
+                            1000, " KMBT".find(part[-1])
+                        )
+                        # Only use it as a size label if it's close or bigger than the model size
+                        # Note that LoRA adapters don't necessarily include all layers,
+                        # so this is why bigger label sizes are accepted.
+                        # Do not use the size label when it's smaller than 1/8 of the model size
+                        if (
+                            total_params < 0 and label_params < abs(total_params) // 8
+                        ) or (
+                            # Check both directions when the current model isn't a LoRA adapter
+                            total_params > 0
+                            and abs(label_params - total_params) > 7 * total_params // 8
+                        ):
+                            # Likely a context length
+                            name_types[i].add("finetune")
+                            # Lowercase the size when it's a context length
+                            part = part[:-1] + part[-1].lower()
+                    except ValueError:
+                        # Failed to convert the size label to float, use it anyway
+                        pass
+                if len(name_types[i]) == 0:
+                    name_types[i].add("size_label")
+                name_parts[i] = part
+            # Some easy to recognize finetune names
+            elif i > 0 and re.fullmatch(
+                r"chat|instruct|vision|lora", part, re.IGNORECASE
+            ):
+                if total_params < 0 and part.lower() == "lora":
+                    # ignore redundant "lora" in the finetune part when the output is a lora adapter
+                    name_types[i].add("type")
+                else:
+                    name_types[i].add("finetune")
+
+        # Ignore word-based size labels when there is at least a number-based one present
+        # TODO: should word-based size labels always be removed instead?
+        if any(
+            c.isdecimal()
+            for n, t in zip(name_parts, name_types)
+            if "size_label" in t
+            for c in n
+        ):
+            for n, t in zip(name_parts, name_types):
+                if "size_label" in t:
+                    if all(c.isalpha() for c in n):
+                        t.remove("size_label")
+
+        at_start = True
+        # Find the basename through the annotated name
+        for part, t in zip(name_parts, name_types):
+            if at_start and ((len(t) == 0 and part[0].isalpha()) or "version" in t):
+                t.add("basename")
+            else:
+                if at_start:
+                    at_start = False
+                if len(t) == 0:
+                    t.add("finetune")
+
+        # Remove the basename annotation from trailing version
+        for part, t in zip(reversed(name_parts), reversed(name_types)):
+            if "basename" in t and len(t) > 1:
+                t.remove("basename")
+            else:
+                break
+
+        basename = (
+            "-".join(n for n, t in zip(name_parts, name_types) if "basename" in t)
+            or None
+        )
+        # Deduplicate size labels using order-preserving 'dict' ('set' seems to sort the keys)
+        size_label = (
+            "-".join(
+                dict.fromkeys(
+                    s for s, t in zip(name_parts, name_types) if "size_label" in t
+                ).keys()
+            )
+            or None
+        )
+        finetune = (
+            "-".join(f for f, t in zip(name_parts, name_types) if "finetune" in t)
+            or None
+        )
+        # TODO: should the basename version always be excluded?
+        # NOTE: multiple finetune versions are joined together
+        version = (
+            "-".join(
+                v
+                for v, t, in zip(name_parts, name_types)
+                if "version" in t and "basename" not in t
+            )
+            or None
+        )
+
+        if size_label is None and finetune is None and version is None:
+            # Too ambiguous, output nothing
+            basename = None
+
+        return (
+            model_full_name_component,
+            org_component,
+            basename,
+            finetune,
+            version,
+            size_label,
+        )
+
+    @staticmethod
+    def apply_metadata_heuristic(
+        metadata: Metadata,
+        model_card: Optional[dict] = None,
+        hf_params: Optional[dict] = None,
+        model_path: Optional[Path] = None,
+        total_params: int = 0,
+    ) -> Metadata:
+        # Reference Model Card Metadata: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1
+
+        # Model Card Heuristics
+        ########################
+        if model_card is not None:
+
+            def use_model_card_metadata(metadata_key: str, model_card_key: str):
+                if (
+                    model_card_key in model_card
+                    and getattr(metadata, metadata_key, None) is None
+                ):
+                    setattr(metadata, metadata_key, model_card.get(model_card_key))
+
+            def use_array_model_card_metadata(metadata_key: str, model_card_key: str):
+                # Note: Will append rather than replace if already exist
+                tags_value = model_card.get(model_card_key, None)
+                if tags_value is None:
+                    return
+
+                current_value = getattr(metadata, metadata_key, None)
+                if current_value is None:
+                    current_value = []
+
+                if isinstance(tags_value, str):
+                    current_value.append(tags_value)
+                elif isinstance(tags_value, list):
+                    current_value.extend(tags_value)
+
+                setattr(metadata, metadata_key, current_value)
+
+            # LLAMA.cpp's direct internal convention
+            # (Definitely not part of hugging face formal/informal standard)
+            #########################################
+            use_model_card_metadata("name", "name")
+            use_model_card_metadata("author", "author")
+            use_model_card_metadata("version", "version")
+            use_model_card_metadata("organization", "organization")
+            use_model_card_metadata("description", "description")
+            use_model_card_metadata("finetune", "finetune")
+            use_model_card_metadata("basename", "basename")
+            use_model_card_metadata("size_label", "size_label")
+            use_model_card_metadata("source_url", "url")
+            use_model_card_metadata("source_doi", "doi")
+            use_model_card_metadata("source_uuid", "uuid")
+            use_model_card_metadata("source_repo_url", "repo_url")
+
+            # LLAMA.cpp's huggingface style convention
+            # (Definitely not part of hugging face formal/informal standard... but with model_ appended to match their style)
+            ###########################################
+            use_model_card_metadata("name", "model_name")
+            use_model_card_metadata("author", "model_author")
+            use_model_card_metadata("version", "model_version")
+            use_model_card_metadata("organization", "model_organization")
+            use_model_card_metadata("description", "model_description")
+            use_model_card_metadata("finetune", "model_finetune")
+            use_model_card_metadata("basename", "model_basename")
+            use_model_card_metadata("size_label", "model_size_label")
+            use_model_card_metadata("source_url", "model_url")
+            use_model_card_metadata("source_doi", "model_doi")
+            use_model_card_metadata("source_uuid", "model_uuid")
+            use_model_card_metadata("source_repo_url", "model_repo_url")
+
+            # Hugging Face Direct Convention
+            #################################
+
+            # Not part of huggingface model card standard but notice some model creator using it
+            # such as TheBloke in 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF'
+            use_model_card_metadata("name", "model_name")
+            use_model_card_metadata("author", "model_creator")
+            use_model_card_metadata("basename", "model_type")
+
+            if (
+                "base_model" in model_card
+                or "base_models" in model_card
+                or "base_model_sources" in model_card
+            ):
+                # This represents the parent models that this is based on
+                # Example: stabilityai/stable-diffusion-xl-base-1.0. Can also be a list (for merges)
+                # Example of merges: https://huggingface.co/EmbeddedLLM/Mistral-7B-Merge-14-v0.1/blob/main/README.md
+                metadata_base_models = []
+                base_model_value = model_card.get(
+                    "base_model",
+                    model_card.get(
+                        "base_models", model_card.get("base_model_sources", None)
+                    ),
+                )
+
+                if base_model_value is not None:
+                    if isinstance(base_model_value, str):
+                        metadata_base_models.append(base_model_value)
+                    elif isinstance(base_model_value, list):
+                        metadata_base_models.extend(base_model_value)
+
+                if metadata.base_models is None:
+                    metadata.base_models = []
+
+                for model_id in metadata_base_models:
+                    # NOTE: model size of base model is assumed to be similar to the size of the current model
+                    base_model = {}
+                    if isinstance(model_id, str):
+                        if (
+                            model_id.startswith("http://")
+                            or model_id.startswith("https://")
+                            or model_id.startswith("ssh://")
+                        ):
+                            base_model["repo_url"] = model_id
+
+                            # Check if Hugging Face ID is present in URL
+                            if "huggingface.co" in model_id:
+                                match = re.match(
+                                    r"https?://huggingface.co/([^/]+/[^/]+)$", model_id
+                                )
+                                if match:
+                                    model_id_component = match.group(1)
+                                    (
+                                        model_full_name_component,
+                                        org_component,
+                                        basename,
+                                        finetune,
+                                        version,
+                                        size_label,
+                                    ) = Metadata.get_model_id_components(
+                                        model_id_component, total_params
+                                    )
+
+                                    # Populate model dictionary with extracted components
+                                    if model_full_name_component is not None:
+                                        base_model["name"] = Metadata.id_to_title(
+                                            model_full_name_component
+                                        )
+                                    if org_component is not None:
+                                        base_model["organization"] = (
+                                            Metadata.id_to_title(org_component)
+                                        )
+                                    if version is not None:
+                                        base_model["version"] = version
+
+                        else:
+                            # Likely a Hugging Face ID
+                            (
+                                model_full_name_component,
+                                org_component,
+                                basename,
+                                finetune,
+                                version,
+                                size_label,
+                            ) = Metadata.get_model_id_components(model_id, total_params)
+
+                            # Populate model dictionary with extracted components
+                            if model_full_name_component is not None:
+                                base_model["name"] = Metadata.id_to_title(
+                                    model_full_name_component
+                                )
+                            if org_component is not None:
+                                base_model["organization"] = Metadata.id_to_title(
+                                    org_component
+                                )
+                            if version is not None:
+                                base_model["version"] = version
+                            if (
+                                org_component is not None
+                                and model_full_name_component is not None
+                            ):
+                                base_model["repo_url"] = (
+                                    f"https://huggingface.co/{org_component}/{model_full_name_component}"
+                                )
+
+                    elif isinstance(model_id, dict):
+                        base_model = model_id
+
+                    else:
+                        logger.error(
+                            f"base model entry '{str(model_id)}' not in a known format"
+                        )
+
+                    metadata.base_models.append(base_model)
+
+            if (
+                "datasets" in model_card
+                or "dataset" in model_card
+                or "dataset_sources" in model_card
+            ):
+                # This represents the datasets that this was trained from
+                metadata_datasets = []
+                dataset_value = model_card.get(
+                    "datasets",
+                    model_card.get("dataset", model_card.get("dataset_sources", None)),
+                )
+
+                if dataset_value is not None:
+                    if isinstance(dataset_value, str):
+                        metadata_datasets.append(dataset_value)
+                    elif isinstance(dataset_value, list):
+                        metadata_datasets.extend(dataset_value)
+
+                if metadata.datasets is None:
+                    metadata.datasets = []
+
+                for dataset_id in metadata_datasets:
+                    # NOTE: model size of base model is assumed to be similar to the size of the current model
+                    dataset = {}
+                    if isinstance(dataset_id, str):
+                        if dataset_id.startswith(("http://", "https://", "ssh://")):
+                            dataset["repo_url"] = dataset_id
+
+                            # Check if Hugging Face ID is present in URL
+                            if "huggingface.co" in dataset_id:
+                                match = re.match(
+                                    r"https?://huggingface.co/([^/]+/[^/]+)$",
+                                    dataset_id,
+                                )
+                                if match:
+                                    dataset_id_component = match.group(1)
+                                    (
+                                        dataset_name_component,
+                                        org_component,
+                                        basename,
+                                        finetune,
+                                        version,
+                                        size_label,
+                                    ) = Metadata.get_model_id_components(
+                                        dataset_id_component, total_params
+                                    )
+
+                                    # Populate dataset dictionary with extracted components
+                                    if dataset_name_component is not None:
+                                        dataset["name"] = Metadata.id_to_title(
+                                            dataset_name_component
+                                        )
+                                    if org_component is not None:
+                                        dataset["organization"] = Metadata.id_to_title(
+                                            org_component
+                                        )
+                                    if version is not None:
+                                        dataset["version"] = version
+
+                        else:
+                            # Likely a Hugging Face ID
+                            (
+                                dataset_name_component,
+                                org_component,
+                                basename,
+                                finetune,
+                                version,
+                                size_label,
+                            ) = Metadata.get_model_id_components(
+                                dataset_id, total_params
+                            )
+
+                            # Populate dataset dictionary with extracted components
+                            if dataset_name_component is not None:
+                                dataset["name"] = Metadata.id_to_title(
+                                    dataset_name_component
+                                )
+                            if org_component is not None:
+                                dataset["organization"] = Metadata.id_to_title(
+                                    org_component
+                                )
+                            if version is not None:
+                                dataset["version"] = version
+                            if (
+                                org_component is not None
+                                and dataset_name_component is not None
+                            ):
+                                dataset["repo_url"] = (
+                                    f"https://huggingface.co/{org_component}/{dataset_name_component}"
+                                )
+
+                    elif isinstance(dataset_id, dict):
+                        dataset = dataset_id
+
+                    else:
+                        logger.error(
+                            f"dataset entry '{str(dataset_id)}' not in a known format"
+                        )
+
+                    metadata.datasets.append(dataset)
+
+            use_model_card_metadata("license", "license")
+            use_model_card_metadata("license_name", "license_name")
+            use_model_card_metadata("license_link", "license_link")
+
+            use_array_model_card_metadata("tags", "tags")
+            use_array_model_card_metadata("tags", "pipeline_tag")
+
+            use_array_model_card_metadata("languages", "languages")
+            use_array_model_card_metadata("languages", "language")
+
+        # Hugging Face Parameter Heuristics
+        ####################################
+
+        if hf_params is not None:
+
+            hf_name_or_path = hf_params.get("_name_or_path")
+            if hf_name_or_path is not None and hf_name_or_path.count("/") <= 1:
+                # Use _name_or_path only if its actually a model name and not some computer path
+                # e.g. 'meta-llama/Llama-2-7b-hf'
+                model_id = hf_name_or_path
+                (
+                    model_full_name_component,
+                    org_component,
+                    basename,
+                    finetune,
+                    version,
+                    size_label,
+                ) = Metadata.get_model_id_components(model_id, total_params)
+                if metadata.name is None and model_full_name_component is not None:
+                    metadata.name = Metadata.id_to_title(model_full_name_component)
+                if metadata.organization is None and org_component is not None:
+                    metadata.organization = Metadata.id_to_title(org_component)
+                if metadata.basename is None and basename is not None:
+                    metadata.basename = basename
+                if metadata.finetune is None and finetune is not None:
+                    metadata.finetune = finetune
+                if metadata.version is None and version is not None:
+                    metadata.version = version
+                if metadata.size_label is None and size_label is not None:
+                    metadata.size_label = size_label
+
+        # Directory Folder Name Fallback Heuristics
+        ############################################
+        if model_path is not None:
+            model_id = model_path.name
+            (
+                model_full_name_component,
+                org_component,
+                basename,
+                finetune,
+                version,
+                size_label,
+            ) = Metadata.get_model_id_components(model_id, total_params)
+            if metadata.name is None and model_full_name_component is not None:
+                metadata.name = Metadata.id_to_title(model_full_name_component)
+            if metadata.organization is None and org_component is not None:
+                metadata.organization = Metadata.id_to_title(org_component)
+            if metadata.basename is None and basename is not None:
+                metadata.basename = basename
+            if metadata.finetune is None and finetune is not None:
+                metadata.finetune = finetune
+            if metadata.version is None and version is not None:
+                metadata.version = version
+            if metadata.size_label is None and size_label is not None:
+                metadata.size_label = size_label
+
+        return metadata
+
+    def set_gguf_meta_model(self, gguf_writer: gguf.GGUFWriter):
+        assert self.name is not None
+        gguf_writer.add_name(self.name)
+
+        if self.author is not None:
+            gguf_writer.add_author(self.author)
+        if self.version is not None:
+            gguf_writer.add_version(self.version)
+        if self.organization is not None:
+            gguf_writer.add_organization(self.organization)
+
+        if self.finetune is not None:
+            gguf_writer.add_finetune(self.finetune)
+        if self.basename is not None:
+            gguf_writer.add_basename(self.basename)
+
+        if self.description is not None:
+            gguf_writer.add_description(self.description)
+        if self.quantized_by is not None:
+            gguf_writer.add_quantized_by(self.quantized_by)
+
+        if self.size_label is not None:
+            gguf_writer.add_size_label(self.size_label)
+
+        if self.license is not None:
+            if isinstance(self.license, list):
+                gguf_writer.add_license(",".join(self.license))
+            else:
+                gguf_writer.add_license(self.license)
+        if self.license_name is not None:
+            gguf_writer.add_license_name(self.license_name)
+        if self.license_link is not None:
+            gguf_writer.add_license_link(self.license_link)
+
+        if self.url is not None:
+            gguf_writer.add_url(self.url)
+        if self.doi is not None:
+            gguf_writer.add_doi(self.doi)
+        if self.uuid is not None:
+            gguf_writer.add_uuid(self.uuid)
+        if self.repo_url is not None:
+            gguf_writer.add_repo_url(self.repo_url)
+
+        if self.source_url is not None:
+            gguf_writer.add_source_url(self.source_url)
+        if self.source_doi is not None:
+            gguf_writer.add_source_doi(self.source_doi)
+        if self.source_uuid is not None:
+            gguf_writer.add_source_uuid(self.source_uuid)
+        if self.source_repo_url is not None:
+            gguf_writer.add_source_repo_url(self.source_repo_url)
+
+        if self.base_models is not None:
+            gguf_writer.add_base_model_count(len(self.base_models))
+            for key, base_model_entry in enumerate(self.base_models):
+                if "name" in base_model_entry:
+                    gguf_writer.add_base_model_name(key, base_model_entry["name"])
+                if "author" in base_model_entry:
+                    gguf_writer.add_base_model_author(key, base_model_entry["author"])
+                if "version" in base_model_entry:
+                    gguf_writer.add_base_model_version(key, base_model_entry["version"])
+                if "organization" in base_model_entry:
+                    gguf_writer.add_base_model_organization(
+                        key, base_model_entry["organization"]
+                    )
+                if "description" in base_model_entry:
+                    gguf_writer.add_base_model_description(
+                        key, base_model_entry["description"]
+                    )
+                if "url" in base_model_entry:
+                    gguf_writer.add_base_model_url(key, base_model_entry["url"])
+                if "doi" in base_model_entry:
+                    gguf_writer.add_base_model_doi(key, base_model_entry["doi"])
+                if "uuid" in base_model_entry:
+                    gguf_writer.add_base_model_uuid(key, base_model_entry["uuid"])
+                if "repo_url" in base_model_entry:
+                    gguf_writer.add_base_model_repo_url(
+                        key, base_model_entry["repo_url"]
+                    )
+
+        if self.datasets is not None:
+            gguf_writer.add_dataset_count(len(self.datasets))
+            for key, dataset_entry in enumerate(self.datasets):
+                if "name" in dataset_entry:
+                    gguf_writer.add_dataset_name(key, dataset_entry["name"])
+                if "author" in dataset_entry:
+                    gguf_writer.add_dataset_author(key, dataset_entry["author"])
+                if "version" in dataset_entry:
+                    gguf_writer.add_dataset_version(key, dataset_entry["version"])
+                if "organization" in dataset_entry:
+                    gguf_writer.add_dataset_organization(
+                        key, dataset_entry["organization"]
+                    )
+                if "description" in dataset_entry:
+                    gguf_writer.add_dataset_description(
+                        key, dataset_entry["description"]
+                    )
+                if "url" in dataset_entry:
+                    gguf_writer.add_dataset_url(key, dataset_entry["url"])
+                if "doi" in dataset_entry:
+                    gguf_writer.add_dataset_doi(key, dataset_entry["doi"])
+                if "uuid" in dataset_entry:
+                    gguf_writer.add_dataset_uuid(key, dataset_entry["uuid"])
+                if "repo_url" in dataset_entry:
+                    gguf_writer.add_dataset_repo_url(key, dataset_entry["repo_url"])
+
+        if self.tags is not None:
+            gguf_writer.add_tags(self.tags)
+        if self.languages is not None:
+            gguf_writer.add_languages(self.languages)
--- a/src/gguf/quants.py
+++ b/src/gguf/quants.py
--- a/src/gguf/tensor_mapping.py
+++ b/src/gguf/tensor_mapping.py
@ -0,0 +1,884 @@
+from __future__ import annotations
+
+from typing import Sequence
+
+from .constants import MODEL_ARCH, MODEL_TENSOR, MODEL_TENSORS, TENSOR_NAMES
+
+
+class TensorNameMap:
+    mappings_cfg: dict[MODEL_TENSOR, tuple[str, ...]] = {
+        # Token embeddings
+        MODEL_TENSOR.TOKEN_EMBD: (
+            "gpt_neox.embed_in",  # gptneox
+            "transformer.wte",  # gpt2 gpt-j mpt refact qwen dbrx jais exaone
+            "transformer.word_embeddings",  # falcon
+            "word_embeddings",  # bloom
+            "model.embed_tokens",  # llama-hf nemotron olmoe olmo2 rwkv6qwen2 glm4-0414
+            "tok_embeddings",  # llama-pth
+            "embeddings.word_embeddings",  # bert nomic-bert
+            "language_model.embedding.word_embeddings",  # persimmon
+            "wte",  # gpt2
+            "transformer.embd.wte",  # phi2
+            "model.tok_embeddings",  # internlm2
+            "model.embedding",  # mamba-qbert
+            "backbone.embedding",  # mamba
+            "backbone.embeddings",  # mamba-hf
+            "transformer.in_out_embed",  # Grok
+            "embedding.word_embeddings",  # chatglm
+            "transformer.token_embeddings",  # openelm
+            "shared",  # t5
+            "rwkv.embeddings",  # rwkv6
+            "model.embeddings",  # rwkv7
+            "model.word_embeddings",  # bailingmoe
+            "language_model.model.embed_tokens",  # llama4
+        ),
+        # Token type embeddings
+        MODEL_TENSOR.TOKEN_TYPES: (
+            "embeddings.token_type_embeddings",  # bert nomic-bert
+        ),
+        # Normalization of token embeddings
+        MODEL_TENSOR.TOKEN_EMBD_NORM: (
+            "word_embeddings_layernorm",  # bloom
+            "embeddings.LayerNorm",  # bert
+            "emb_ln",  # nomic-bert
+            "transformer.norm",  # openelm
+            "rwkv.blocks.0.pre_ln",  # rwkv
+            "rwkv.blocks.0.pre_ln",  # rwkv6
+            "model.pre_ln",  # rwkv7
+            "model.layers.0.pre_norm",  # rwkv7
+            "backbone.norm",  # wavtokenizer
+        ),
+        # Position embeddings
+        MODEL_TENSOR.POS_EMBD: (
+            "transformer.wpe",  # gpt2
+            "embeddings.position_embeddings",  # bert
+            "wpe",  # gpt2
+        ),
+        # Output
+        MODEL_TENSOR.OUTPUT: (
+            "embed_out",  # gptneox
+            "lm_head",  # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone olmoe olmo2 phimoe
+            "output",  # llama-pth bloom internlm2
+            "word_embeddings_for_head",  # persimmon
+            "lm_head.linear",  # phi2
+            "output_layer",  # chatglm
+            "head",  # rwkv
+            "head.out",  # wavtokenizer
+            "lm_head",  # llama4
+        ),
+        # Output norm
+        MODEL_TENSOR.OUTPUT_NORM: (
+            "gpt_neox.final_layer_norm",  # gptneox
+            "transformer.ln_f",  # gpt2 gpt-j falcon jais exaone
+            "model.norm",  # llama-hf baichuan internlm2 olmoe olmo2 phimoe
+            "norm",  # llama-pth
+            "transformer.norm_f",  # mpt dbrx
+            "ln_f",  # refact bloom qwen gpt2
+            "language_model.encoder.final_layernorm",  # persimmon
+            "model.final_layernorm",  # persimmon
+            "lm_head.ln",  # phi2
+            "model.norm_f",  # mamba-qbert
+            "backbone.norm_f",  # mamba
+            "transformer.rms_norm",  # Grok
+            "encoder.final_layernorm",  # chatglm
+            "transformer.norm",  # openelm
+            "model.norm",  # nemotron
+            "rwkv.ln_out",  # rwkv6
+            "model.ln_out",  # rwkv7
+            "backbone.final_layer_norm",  # wavtokenizer
+            "model.norm",  # llama4
+        ),
+        # Rope frequencies
+        MODEL_TENSOR.ROPE_FREQS: (
+            "rope.freqs",  # llama-pth
+            "rotary_pos_emb.inv_freq",  # chatglm
+        ),
+        MODEL_TENSOR.ROPE_FACTORS_LONG: (),
+        MODEL_TENSOR.ROPE_FACTORS_SHORT: (),
+        MODEL_TENSOR.CONV1D: ("backbone.embed",),  # roberta
+    }
+
+    block_mappings_cfg: dict[MODEL_TENSOR, tuple[str, ...]] = {
+        # Attention norm
+        MODEL_TENSOR.ATTN_NORM: (
+            "gpt_neox.layers.{bid}.input_layernorm",  # gptneox
+            "transformer.h.{bid}.ln_1",  # gpt2 gpt-j refact qwen jais exaone
+            "transformer.blocks.{bid}.norm_1",  # mpt
+            "transformer.h.{bid}.input_layernorm",  # falcon7b
+            "h.{bid}.input_layernorm",  # bloom
+            "transformer.h.{bid}.ln_mlp",  # falcon40b
+            "model.layers.{bid}.input_layernorm",  # llama-hf nemotron olmoe phimoe
+            "layers.{bid}.attention_norm",  # llama-pth
+            "language_model.encoder.layers.{bid}.input_layernorm",  # persimmon
+            "model.layers.{bid}.ln1",  # yi
+            "h.{bid}.ln_1",  # gpt2
+            "transformer.h.{bid}.ln",  # phi2
+            "model.layers.layers.{bid}.norm",  # plamo
+            "model.layers.{bid}.attention_norm",  # internlm2
+            "model.layers.{bid}.norm",  # mamba-qbert
+            "backbone.layers.{bid}.norm",  # mamba
+            "transformer.decoder_layer.{bid}.rms_norm",  # Grok
+            "transformer.blocks.{bid}.norm_attn_norm.norm_1",  # dbrx
+            "encoder.layers.{bid}.input_layernorm",  # chatglm
+            "transformer.layers.{bid}.attn_norm",  # openelm
+            "rwkv.blocks.{bid}.ln1",  # rwkv6
+            "model.layers.{bid}.ln1",  # rwkv7
+            "model.layers.{bid}.input_layernorm",  # llama4
+        ),
+        # Attention norm 2
+        MODEL_TENSOR.ATTN_NORM_2: (
+            "transformer.h.{bid}.ln_attn",  # falcon40b
+            "encoder.layer.{bid}.layer_norm_1",  # jina-v2-code
+            "rwkv.blocks.{bid}.ln2",  # rwkv6
+            "model.layers.{bid}.ln2",  # rwkv7
+        ),
+        # Attention query-key-value
+        MODEL_TENSOR.ATTN_QKV: (
+            "gpt_neox.layers.{bid}.attention.query_key_value",  # gptneox
+            "transformer.h.{bid}.attn.c_attn",  # gpt2 qwen jais
+            "transformer.blocks.{bid}.attn.Wqkv",  # mpt
+            "transformer.blocks.{bid}.norm_attn_norm.attn.Wqkv",  # dbrx
+            "transformer.h.{bid}.self_attention.query_key_value",  # falcon
+            "h.{bid}.self_attention.query_key_value",  # bloom
+            "language_model.encoder.layers.{bid}.self_attention.query_key_value",  # persimmon
+            "model.layers.{bid}.self_attn.query_key_value",  # persimmon
+            "h.{bid}.attn.c_attn",  # gpt2
+            "transformer.h.{bid}.mixer.Wqkv",  # phi2
+            "encoder.layers.{bid}.attn.Wqkv",  # nomic-bert
+            "model.layers.{bid}.self_attn.qkv_proj",  # phi3
+            "encoder.layers.{bid}.self_attention.query_key_value",  # chatglm
+            "transformer.layers.{bid}.attn.qkv_proj",  # openelm
+        ),
+        # Attention query
+        MODEL_TENSOR.ATTN_Q: (
+            "model.layers.{bid}.self_attn.q_proj",  # llama-hf nemotron olmoe olmo2 phimoe
+            "model.layers.{bid}.self_attn.q_proj_no_perm",  # llama-custom
+            "layers.{bid}.attention.wq",  # llama-pth
+            "encoder.layer.{bid}.attention.self.query",  # bert
+            "transformer.h.{bid}.attn.q_proj",  # gpt-j
+            "model.layers.layers.{bid}.self_attn.q_proj",  # plamo
+            "model.layers.{bid}.attention.wq",  # internlm2
+            "transformer.decoder_layer.{bid}.multi_head_attention.query",  # Grok
+            "transformer.h.{bid}.attn.attention.q_proj",  # exaone
+            "model.layers.{bid}.self_attn.q_proj",  # llama4
+        ),
+        # Attention key
+        MODEL_TENSOR.ATTN_K: (
+            "model.layers.{bid}.self_attn.k_proj",  # llama-hf nemotron olmoe olmo2 phimoe
+            "model.layers.{bid}.self_attn.k_proj_no_perm",  # llama-custom
+            "layers.{bid}.attention.wk",  # llama-pth
+            "encoder.layer.{bid}.attention.self.key",  # bert
+            "transformer.h.{bid}.attn.k_proj",  # gpt-j
+            "transformer.h.{bid}.attn.k",  # refact
+            "model.layers.layers.{bid}.self_attn.k_proj",  # plamo
+            "model.layers.{bid}.attention.wk",  # internlm2
+            "transformer.decoder_layer.{bid}.multi_head_attention.key",  # Grok
+            "transformer.h.{bid}.attn.attention.k_proj",  # exaone
+            "model.layers.{bid}.self_attn.k_proj",  # llama4
+        ),
+        # Attention value
+        MODEL_TENSOR.ATTN_V: (
+            "model.layers.{bid}.self_attn.v_proj",  # llama-hf nemotron olmoe olmo2 phimoe
+            "layers.{bid}.attention.wv",  # llama-pth
+            "encoder.layer.{bid}.attention.self.value",  # bert
+            "transformer.h.{bid}.attn.v_proj",  # gpt-j
+            "transformer.h.{bid}.attn.v",  # refact
+            "model.layers.layers.{bid}.self_attn.v_proj",  # plamo
+            "model.layers.{bid}.attention.wv",  # internlm2
+            "transformer.decoder_layer.{bid}.multi_head_attention.value",  # Grok
+            "transformer.h.{bid}.attn.attention.v_proj",  # exaone
+            "model.layers.{bid}.self_attn.v_proj",  # llama4
+        ),
+        # Attention output
+        MODEL_TENSOR.ATTN_OUT: (
+            "gpt_neox.layers.{bid}.attention.dense",  # gptneox
+            "transformer.h.{bid}.attn.c_proj",  # gpt2 refact qwen jais
+            "transformer.blocks.{bid}.attn.out_proj",  # mpt
+            "transformer.h.{bid}.self_attention.dense",  # falcon
+            "h.{bid}.self_attention.dense",  # bloom
+            "model.layers.{bid}.self_attn.o_proj",  # llama-hf nemotron olmoe olmo2 phimoe
+            "model.layers.{bid}.self_attn.linear_attn",  # deci
+            "layers.{bid}.attention.wo",  # llama-pth
+            "encoder.layer.{bid}.attention.output.dense",  # bert
+            "transformer.h.{bid}.attn.out_proj",  # gpt-j
+            "language_model.encoder.layers.{bid}.self_attention.dense",  # persimmon
+            "model.layers.{bid}.self_attn.dense",  # persimmon
+            "h.{bid}.attn.c_proj",  # gpt2
+            "transformer.h.{bid}.mixer.out_proj",  # phi2
+            "model.layers.layers.{bid}.self_attn.o_proj",  # plamo
+            "model.layers.{bid}.attention.wo",  # internlm2
+            "encoder.layers.{bid}.attn.out_proj",  # nomic-bert
+            "transformer.decoder_layer.{bid}.multi_head_attention.linear",  # Grok
+            "transformer.blocks.{bid}.norm_attn_norm.attn.out_proj",  # dbrx
+            "encoder.layers.{bid}.self_attention.dense",  # chatglm
+            "transformer.layers.{bid}.attn.out_proj",  # openelm
+            "transformer.h.{bid}.attn.attention.out_proj",  # exaone
+            "model.layers.{bid}.self_attn.o_proj",  # llama4
+        ),
+        # Attention output norm
+        MODEL_TENSOR.ATTN_OUT_NORM: (
+            "encoder.layer.{bid}.attention.output.LayerNorm",  # bert
+            "encoder.layers.{bid}.norm1",  # nomic-bert
+            "transformer.decoder_layer.{bid}.rms_norm_1",  # Grok
+            "transformer.blocks.{bid}.norm_attn_norm.norm_2",  # dbrx
+        ),
+        MODEL_TENSOR.ATTN_POST_NORM: (
+            "model.layers.{bid}.post_attention_layernorm",  # gemma2 olmo2    # ge
+            "model.layers.{bid}.post_self_attn_layernorm",  # glm-4-0414
+        ),
+        # Rotary embeddings
+        MODEL_TENSOR.ATTN_ROT_EMBD: (
+            "model.layers.{bid}.self_attn.rotary_emb.inv_freq",  # llama-hf
+            "layers.{bid}.attention.inner_attention.rope.freqs",  # llama-pth
+            "model.layers.layers.{bid}.self_attn.rotary_emb.inv_freq",  # plamo
+            "transformer.h.{bid}.attn.rotary_emb.inv_freq",  # codeshell
+        ),
+        # Feed-forward norm
+        MODEL_TENSOR.FFN_NORM: (
+            "gpt_neox.layers.{bid}.post_attention_layernorm",  # gptneox
+            "transformer.h.{bid}.ln_2",  # gpt2 refact qwen jais exaone
+            "h.{bid}.post_attention_layernorm",  # bloom
+            "transformer.blocks.{bid}.norm_2",  # mpt
+            "model.layers.{bid}.post_attention_layernorm",  # llama-hf nemotron olmoe phimoe
+            "layers.{bid}.ffn_norm",  # llama-pth
+            "language_model.encoder.layers.{bid}.post_attention_layernorm",  # persimmon
+            "model.layers.{bid}.ln2",  # yi
+            "h.{bid}.ln_2",  # gpt2
+            "model.layers.{bid}.ffn_norm",  # internlm2
+            "transformer.decoder_layer.{bid}.rms_norm_2",  # Grok
+            "encoder.layers.{bid}.post_attention_layernorm",  # chatglm
+            "transformer.layers.{bid}.ffn_norm",  # openelm
+            "model.layers.{bid}.post_attention_layernorm",  # llama4
+        ),
+        # Post feed-forward norm
+        MODEL_TENSOR.FFN_PRE_NORM: (
+            "model.layers.{bid}.pre_feedforward_layernorm",  # gemma2
+        ),
+        # Post feed-forward norm
+        MODEL_TENSOR.FFN_POST_NORM: (
+            "model.layers.{bid}.post_feedforward_layernorm",  # gemma2 olmo2
+            "model.layers.{bid}.post_mlp_layernorm",  # glm-4-0414
+        ),
+        MODEL_TENSOR.FFN_GATE_INP: (
+            "layers.{bid}.feed_forward.gate",  # mixtral
+            "model.layers.{bid}.block_sparse_moe.gate",  # mixtral phimoe
+            "model.layers.{bid}.mlp.gate",  # qwen2moe olmoe
+            "transformer.decoder_layer.{bid}.router",  # Grok
+            "transformer.blocks.{bid}.ffn.router.layer",  # dbrx
+            "model.layers.{bid}.block_sparse_moe.router.layer",  # granitemoe
+            "model.layers.{bid}.feed_forward.router",  # llama4
+            "encoder.layers.{bid}.mlp.router.layer",  # nomic-bert-moe
+        ),
+        MODEL_TENSOR.FFN_GATE_INP_SHEXP: (
+            "model.layers.{bid}.mlp.shared_expert_gate",  # qwen2moe
+        ),
+        MODEL_TENSOR.FFN_EXP_PROBS_B: (
+            "model.layers.{bid}.mlp.gate.e_score_correction",  # deepseek-v3
+        ),
+        # Feed-forward up
+        MODEL_TENSOR.FFN_UP: (
+            "gpt_neox.layers.{bid}.mlp.dense_h_to_4h",  # gptneox
+            "transformer.h.{bid}.mlp.c_fc",  # gpt2 jais
+            "transformer.blocks.{bid}.ffn.up_proj",  # mpt
+            "transformer.h.{bid}.mlp.dense_h_to_4h",  # falcon
+            "h.{bid}.mlp.dense_h_to_4h",  # bloom
+            "model.layers.{bid}.mlp.up_proj",  # llama-hf refact nemotron olmo2
+            "layers.{bid}.feed_forward.w3",  # llama-pth
+            "encoder.layer.{bid}.intermediate.dense",  # bert
+            "transformer.h.{bid}.mlp.fc_in",  # gpt-j
+            "transformer.h.{bid}.mlp.linear_3",  # refact
+            "language_model.encoder.layers.{bid}.mlp.dense_h_to_4h",  # persimmon
+            "model.layers.{bid}.mlp.dense_h_to_4h",  # persimmon
+            "transformer.h.{bid}.mlp.w1",  # qwen
+            "h.{bid}.mlp.c_fc",  # gpt2
+            "transformer.h.{bid}.mlp.fc1",  # phi2
+            "model.layers.{bid}.mlp.fc1",  # phi2
+            "model.layers.{bid}.mlp.gate_up_proj",  # phi3 glm-4-0414
+            "model.layers.layers.{bid}.mlp.up_proj",  # plamo
+            "model.layers.{bid}.feed_forward.w3",  # internlm2
+            "encoder.layers.{bid}.mlp.fc11",  # nomic-bert
+            "encoder.layers.{bid}.mlp.fc1",  # nomic-bert-moe
+            "model.layers.{bid}.mlp.c_fc",  # starcoder2
+            "encoder.layer.{bid}.mlp.gated_layers_v",  # jina-bert-v2
+            "model.layers.{bid}.residual_mlp.w3",  # arctic
+            "encoder.layers.{bid}.mlp.dense_h_to_4h",  # chatglm
+            "transformer.h.{bid}.mlp.c_fc_1",  # exaone
+            "model.layers.{bid}.feed_forward.up_proj",  # llama4
+        ),
+        MODEL_TENSOR.FFN_UP_EXP: (
+            "layers.{bid}.feed_forward.experts.w3",  # mixtral (merged)
+            "transformer.decoder_layer.{bid}.moe.linear_v",  # Grok (merged)
+            "transformer.blocks.{bid}.ffn.experts.mlp.v1",  # dbrx
+            "model.layers.{bid}.mlp.experts.up_proj",  # qwen2moe olmoe (merged)
+            "model.layers.{bid}.block_sparse_moe.experts.w3",  # phimoe (merged)
+            "model.layers.{bid}.feed_forward.experts.up_proj",  # llama4
+            "encoder.layers.{bid}.mlp.experts.mlp.w1",  # nomic-bert-moe
+        ),
+        MODEL_TENSOR.FFN_UP_SHEXP: (
+            "model.layers.{bid}.mlp.shared_expert.up_proj",  # qwen2moe
+            "model.layers.{bid}.mlp.shared_experts.up_proj",  # deepseek deepseek2
+            "model.layers.{bid}.feed_forward.shared_expert.up_proj",  # llama4
+        ),
+        # AWQ-activation gate
+        MODEL_TENSOR.FFN_ACT: ("transformer.blocks.{bid}.ffn.act",),  # mpt
+        # Feed-forward gate
+        MODEL_TENSOR.FFN_GATE: (
+            "model.layers.{bid}.mlp.gate_proj",  # llama-hf refact olmo2
+            "layers.{bid}.feed_forward.w1",  # llama-pth
+            "transformer.h.{bid}.mlp.w2",  # qwen
+            "transformer.h.{bid}.mlp.c_fc2",  # jais
+            "model.layers.layers.{bid}.mlp.gate_proj",  # plamo
+            "model.layers.{bid}.feed_forward.w1",  # internlm2
+            "encoder.layers.{bid}.mlp.fc12",  # nomic-bert
+            "encoder.layer.{bid}.mlp.gated_layers_w",  # jina-bert-v2
+            "transformer.h.{bid}.mlp.linear_1",  # refact
+            "model.layers.{bid}.residual_mlp.w1",  # arctic
+            "transformer.h.{bid}.mlp.c_fc_0",  # exaone
+            "model.layers.{bid}.feed_forward.gate_proj",  # llama4
+        ),
+        MODEL_TENSOR.FFN_GATE_EXP: (
+            "layers.{bid}.feed_forward.experts.w1",  # mixtral (merged)
+            "transformer.decoder_layer.{bid}.moe.linear",  # Grok (merged)
+            "transformer.blocks.{bid}.ffn.experts.mlp.w1",  # dbrx
+            "model.layers.{bid}.mlp.experts.gate_proj",  # qwen2moe olmoe (merged)
+            "model.layers.{bid}.block_sparse_moe.experts.w1",  # phimoe (merged)
+            "model.layers.{bid}.feed_forward.experts.gate_proj",  # llama4
+        ),
+        MODEL_TENSOR.FFN_GATE_SHEXP: (
+            "model.layers.{bid}.mlp.shared_expert.gate_proj",  # qwen2moe
+            "model.layers.{bid}.mlp.shared_experts.gate_proj",  # deepseek deepseek2
+            "model.layers.{bid}.feed_forward.shared_expert.gate_proj",  # llama4
+        ),
+        # Feed-forward down
+        MODEL_TENSOR.FFN_DOWN: (
+            "gpt_neox.layers.{bid}.mlp.dense_4h_to_h",  # gptneox
+            "transformer.h.{bid}.mlp.c_proj",  # gpt2 refact qwen jais
+            "transformer.blocks.{bid}.ffn.down_proj",  # mpt
+            "transformer.h.{bid}.mlp.dense_4h_to_h",  # falcon
+            "h.{bid}.mlp.dense_4h_to_h",  # bloom
+            "model.layers.{bid}.mlp.down_proj",  # llama-hf nemotron olmo2
+            "layers.{bid}.feed_forward.w2",  # llama-pth
+            "encoder.layer.{bid}.output.dense",  # bert
+            "transformer.h.{bid}.mlp.fc_out",  # gpt-j
+            "language_model.encoder.layers.{bid}.mlp.dense_4h_to_h",  # persimmon
+            "model.layers.{bid}.mlp.dense_4h_to_h",  # persimmon
+            "h.{bid}.mlp.c_proj",  # gpt2
+            "transformer.h.{bid}.mlp.fc2",  # phi2
+            "model.layers.{bid}.mlp.fc2",  # phi2
+            "model.layers.layers.{bid}.mlp.down_proj",  # plamo
+            "model.layers.{bid}.feed_forward.w2",  # internlm2
+            "encoder.layers.{bid}.mlp.fc2",  # nomic-bert
+            "model.layers.{bid}.mlp.c_proj",  # starcoder2
+            "encoder.layer.{bid}.mlp.wo",  # jina-bert-v2
+            "transformer.layers.{bid}.ffn.proj_2",  # openelm
+            "model.layers.{bid}.residual_mlp.w2",  # arctic
+            "encoder.layer.{bid}.mlp.down_layer",  # jina-bert-v2
+            "encoder.layers.{bid}.mlp.dense_4h_to_h",  # chatglm
+            "model.layers.h.{bid}.mlp.c_proj",  # exaone
+            "model.layers.{bid}.feed_forward.down_proj",  # llama4
+        ),
+        MODEL_TENSOR.FFN_DOWN_EXP: (
+            "layers.{bid}.feed_forward.experts.w2",  # mixtral (merged)
+            "transformer.decoder_layer.{bid}.moe.linear_1",  # Grok (merged)
+            "transformer.blocks.{bid}.ffn.experts.mlp.w2",  # dbrx
+            "model.layers.{bid}.mlp.experts.down_proj",  # qwen2moe olmoe (merged)
+            "model.layers.{bid}.block_sparse_moe.output_linear",  # granitemoe
+            "model.layers.{bid}.block_sparse_moe.experts.w2",  # phimoe (merged)
+            "model.layers.{bid}.feed_forward.experts.down_proj",  # llama4
+            "encoder.layers.{bid}.mlp.experts.mlp.w2",  # nomic-bert-moe
+        ),
+        MODEL_TENSOR.FFN_DOWN_SHEXP: (
+            "model.layers.{bid}.mlp.shared_expert.down_proj",  # qwen2moe
+            "model.layers.{bid}.mlp.shared_experts.down_proj",  # deepseek deepseek2
+            "model.layers.{bid}.feed_forward.shared_expert.down_proj",  # llama4
+            "model.layers.{bid}.shared_mlp.output_linear",  # granitemoe
+        ),
+        MODEL_TENSOR.ATTN_Q_NORM: (
+            "language_model.encoder.layers.{bid}.self_attention.q_layernorm",
+            "model.layers.{bid}.self_attn.q_layernorm",  # persimmon
+            "model.layers.{bid}.self_attn.q_norm",  # cohere olmoe chameleon olmo2
+            "transformer.blocks.{bid}.attn.q_ln",  # sea-lion
+            "encoder.layer.{bid}.attention.self.layer_norm_q",  # jina-bert-v2
+            "transformer.layers.{bid}.attn.q_norm",  # openelm
+        ),
+        MODEL_TENSOR.ATTN_K_NORM: (
+            "language_model.encoder.layers.{bid}.self_attention.k_layernorm",
+            "model.layers.{bid}.self_attn.k_layernorm",  # persimmon
+            "model.layers.{bid}.self_attn.k_norm",  # cohere olmoe chameleon olmo2
+            "transformer.blocks.{bid}.attn.k_ln",  # sea-lion
+            "encoder.layer.{bid}.attention.self.layer_norm_k",  # jina-bert-v2
+            "transformer.layers.{bid}.attn.k_norm",  # openelm
+        ),
+        MODEL_TENSOR.ROPE_FREQS: (
+            "language_model.encoder.layers.{bid}.self_attention.rotary_emb.inv_freq",  # persimmon
+        ),
+        MODEL_TENSOR.LAYER_OUT_NORM: (
+            "encoder.layer.{bid}.output.LayerNorm",  # bert
+            "encoder.layers.{bid}.norm2",  # nomic-bert
+            "transformer.decoder_layer.{bid}.rms_norm_3",  # Grok
+            "encoder.layer.{bid}.mlp.layernorm",  # jina-bert-v2
+            "encoder.layer.{bid}.layer_norm_2",  # jina-v2-code
+        ),
+        MODEL_TENSOR.SSM_IN: (
+            "model.layers.{bid}.in_proj",
+            "backbone.layers.{bid}.mixer.in_proj",
+        ),
+        MODEL_TENSOR.SSM_CONV1D: (
+            "model.layers.{bid}.conv1d",
+            "backbone.layers.{bid}.mixer.conv1d",
+        ),
+        MODEL_TENSOR.SSM_X: (
+            "model.layers.{bid}.x_proj",
+            "backbone.layers.{bid}.mixer.x_proj",
+        ),
+        MODEL_TENSOR.SSM_DT: (
+            "model.layers.{bid}.dt_proj",
+            "backbone.layers.{bid}.mixer.dt_proj",
+        ),
+        MODEL_TENSOR.SSM_A: (
+            "model.layers.{bid}.A_log",
+            "backbone.layers.{bid}.mixer.A_log",
+        ),
+        MODEL_TENSOR.SSM_D: (
+            "model.layers.{bid}.D",
+            "backbone.layers.{bid}.mixer.D",
+        ),
+        MODEL_TENSOR.SSM_OUT: (
+            "model.layers.{bid}.out_proj",
+            "backbone.layers.{bid}.mixer.out_proj",
+        ),
+        MODEL_TENSOR.TIME_MIX_W0: ("model.layers.{bid}.attention.w0",),  # rwkv7
+        MODEL_TENSOR.TIME_MIX_W1: (
+            "rwkv.blocks.{bid}.attention.time_maa_w1",  # rwkv6
+            "model.layers.{bid}.self_attn.time_maa_w1",  # rwkv6qwen2
+            "model.layers.{bid}.attention.w1",  # rwkv7
+        ),
+        MODEL_TENSOR.TIME_MIX_W2: (
+            "rwkv.blocks.{bid}.attention.time_maa_w2",  # rwkv6
+            "model.layers.{bid}.self_attn.time_maa_w2",  # rwkv6qwen2
+            "model.layers.{bid}.attention.w2",  # rwkv7
+        ),
+        MODEL_TENSOR.TIME_MIX_A0: ("model.layers.{bid}.attention.a0",),  # rwkv7
+        MODEL_TENSOR.TIME_MIX_A1: ("model.layers.{bid}.attention.a1",),  # rwkv7
+        MODEL_TENSOR.TIME_MIX_A2: ("model.layers.{bid}.attention.a2",),  # rwkv7
+        MODEL_TENSOR.TIME_MIX_V0: ("model.layers.{bid}.attention.v0",),  # rwkv7
+        MODEL_TENSOR.TIME_MIX_V1: ("model.layers.{bid}.attention.v1",),  # rwkv7
+        MODEL_TENSOR.TIME_MIX_V2: ("model.layers.{bid}.attention.v2",),  # rwkv7
+        MODEL_TENSOR.TIME_MIX_G1: ("model.layers.{bid}.attention.g1",),  # rwkv7
+        MODEL_TENSOR.TIME_MIX_G2: ("model.layers.{bid}.attention.g2",),  # rwkv7
+        MODEL_TENSOR.TIME_MIX_K_K: ("model.layers.{bid}.attention.k_k",),  # rwkv7
+        MODEL_TENSOR.TIME_MIX_K_A: ("model.layers.{bid}.attention.k_a",),  # rwkv7
+        MODEL_TENSOR.TIME_MIX_R_K: ("model.layers.{bid}.attention.r_k",),  # rwkv7
+        MODEL_TENSOR.TIME_MIX_LERP_X: (
+            "rwkv.blocks.{bid}.attention.time_maa_x",  # rwkv6
+            "model.layers.{bid}.self_attn.time_maa_x",  # rwkv6qwen2
+        ),
+        MODEL_TENSOR.TIME_MIX_LERP_K: (
+            "rwkv.blocks.{bid}.attention.time_maa_k",  # rwkv6
+            "model.layers.{bid}.self_attn.time_maa_k",  # rwkv6qwen2
+        ),
+        MODEL_TENSOR.TIME_MIX_LERP_V: (
+            "rwkv.blocks.{bid}.attention.time_maa_v",  # rwkv6
+            "model.layers.{bid}.self_attn.time_maa_v",  # rwkv6qwen2
+        ),
+        MODEL_TENSOR.TIME_MIX_LERP_R: (
+            "rwkv.blocks.{bid}.attention.time_maa_r",  # rwkv6
+            "model.layers.{bid}.self_attn.time_maa_r",  # rwkv6qwen2
+        ),
+        MODEL_TENSOR.TIME_MIX_LERP_G: (
+            "rwkv.blocks.{bid}.attention.time_maa_g",  # rwkv6
+            "model.layers.{bid}.self_attn.time_maa_g",  # rwkv6qwen2
+        ),
+        MODEL_TENSOR.TIME_MIX_LERP_W: (
+            "rwkv.blocks.{bid}.attention.time_maa_w",  # rwkv6
+            "model.layers.{bid}.self_attn.time_maa_w",  # rwkv6qwen2
+        ),
+        MODEL_TENSOR.TIME_MIX_FIRST: (
+            "rwkv.blocks.{bid}.attention.time_faaaa",  # rwkv6
+        ),
+        MODEL_TENSOR.TIME_MIX_DECAY: (
+            "rwkv.blocks.{bid}.attention.time_decay",  # rwkv6
+            "model.layers.{bid}.self_attn.time_decay",  # rwkv6qwen2
+        ),
+        MODEL_TENSOR.TIME_MIX_DECAY_W1: (
+            "rwkv.blocks.{bid}.attention.time_decay_w1",  # rwkv6
+            "model.layers.{bid}.self_attn.time_decay_w1",  # rwkv6qwen2
+        ),
+        MODEL_TENSOR.TIME_MIX_DECAY_W2: (
+            "rwkv.blocks.{bid}.attention.time_decay_w2",  # rwkv6
+            "model.layers.{bid}.self_attn.time_decay_w2",  # rwkv6qwen2
+        ),
+        MODEL_TENSOR.TIME_MIX_KEY: (
+            "rwkv.blocks.{bid}.attention.key",  # rwkv6
+            "model.layers.{bid}.self_attn.k_proj",  # rwkv6qwen2
+            "model.layers.{bid}.attention.key",  # rwkv7
+            "model.layers.{bid}.attention.k_proj",  # rwkv7
+        ),
+        MODEL_TENSOR.TIME_MIX_VALUE: (
+            "rwkv.blocks.{bid}.attention.value",  # rwkv6
+            "model.layers.{bid}.self_attn.v_proj",  # rwkv6qwen2
+            "model.layers.{bid}.attention.value",  # rwkv7
+            "model.layers.{bid}.attention.v_proj",  # rwkv7
+        ),
+        MODEL_TENSOR.TIME_MIX_RECEPTANCE: (
+            "rwkv.blocks.{bid}.attention.receptance",  # rwkv6
+            "model.layers.{bid}.self_attn.q_proj",  # rwkv6qwen2
+            "model.layers.{bid}.attention.receptance",  # rwkv7
+            "model.layers.{bid}.attention.r_proj",  # rwkv7
+        ),
+        MODEL_TENSOR.TIME_MIX_GATE: (
+            "rwkv.blocks.{bid}.attention.gate",  # rwkv6
+            "model.layers.{bid}.self_attn.gate",  # rwkv6qwen2
+        ),
+        MODEL_TENSOR.TIME_MIX_LN: (
+            "rwkv.blocks.{bid}.attention.ln_x",  # rwkv6
+            "model.layers.{bid}.attention.ln_x",  # rwkv7
+        ),
+        MODEL_TENSOR.TIME_MIX_OUTPUT: (
+            "rwkv.blocks.{bid}.attention.output",  # rwkv6
+            "model.layers.{bid}.self_attn.o_proj",  # rwkv6qwen2
+            "model.layers.{bid}.attention.output",  # rwkv7
+            "model.layers.{bid}.attention.o_proj",  # rwkv7
+        ),
+        MODEL_TENSOR.CHANNEL_MIX_LERP_K: (
+            "rwkv.blocks.{bid}.feed_forward.time_maa_k",  # rwkv6
+            "model.layers.{bid}.feed_forward.x_k",  # rwkv7
+        ),
+        MODEL_TENSOR.CHANNEL_MIX_LERP_R: (
+            "rwkv.blocks.{bid}.feed_forward.time_maa_r",  # rwkv6
+        ),
+        MODEL_TENSOR.CHANNEL_MIX_KEY: (
+            "rwkv.blocks.{bid}.feed_forward.key",  # rwkv6
+            "model.layers.{bid}.feed_forward.key",  # rwkv7
+        ),
+        MODEL_TENSOR.CHANNEL_MIX_RECEPTANCE: (
+            "rwkv.blocks.{bid}.feed_forward.receptance",  # rwkv6
+        ),
+        MODEL_TENSOR.CHANNEL_MIX_VALUE: (
+            "rwkv.blocks.{bid}.feed_forward.value",  # rwkv6
+            "model.layers.{bid}.feed_forward.value",  # rwkv7
+        ),
+        MODEL_TENSOR.ATTN_Q_A: ("model.layers.{bid}.self_attn.q_a_proj",),  # deepseek2
+        MODEL_TENSOR.ATTN_Q_B: ("model.layers.{bid}.self_attn.q_b_proj",),  # deepseek2
+        MODEL_TENSOR.ATTN_KV_A_MQA: (
+            "model.layers.{bid}.self_attn.kv_a_proj_with_mqa",  # deepseek2
+        ),
+        MODEL_TENSOR.ATTN_KV_B: (
+            "model.layers.{bid}.self_attn.kv_b_proj",  # deepseek2
+        ),
+        MODEL_TENSOR.ATTN_K_B: ("model.layers.{bid}.self_attn.k_b_proj",),  # deepseek2
+        MODEL_TENSOR.ATTN_V_B: ("model.layers.{bid}.self_attn.v_b_proj",),  # deepseek2
+        MODEL_TENSOR.ATTN_Q_A_NORM: (
+            "model.layers.{bid}.self_attn.q_a_layernorm",  # deepseek2
+        ),
+        MODEL_TENSOR.ATTN_KV_A_NORM: (
+            "model.layers.{bid}.self_attn.kv_a_layernorm",  # deepseek2
+        ),
+        MODEL_TENSOR.ATTN_SUB_NORM: (
+            "model.layers.{bid}.self_attn.inner_attn_ln",  # bitnet
+        ),
+        MODEL_TENSOR.FFN_SUB_NORM: ("model.layers.{bid}.mlp.ffn_layernorm",),  # bitnet
+        MODEL_TENSOR.DEC_ATTN_NORM: ("decoder.block.{bid}.layer.0.layer_norm",),  # t5
+        MODEL_TENSOR.DEC_ATTN_Q: ("decoder.block.{bid}.layer.0.SelfAttention.q",),  # t5
+        MODEL_TENSOR.DEC_ATTN_K: ("decoder.block.{bid}.layer.0.SelfAttention.k",),  # t5
+        MODEL_TENSOR.DEC_ATTN_V: ("decoder.block.{bid}.layer.0.SelfAttention.v",),  # t5
+        MODEL_TENSOR.DEC_ATTN_OUT: (
+            "decoder.block.{bid}.layer.0.SelfAttention.o",  # t5
+        ),
+        MODEL_TENSOR.DEC_ATTN_REL_B: (
+            "decoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias",  # t5
+        ),
+        MODEL_TENSOR.DEC_CROSS_ATTN_NORM: (
+            "decoder.block.{bid}.layer.1.layer_norm",  # t5
+        ),
+        MODEL_TENSOR.DEC_CROSS_ATTN_Q: (
+            "decoder.block.{bid}.layer.1.EncDecAttention.q",  # t5
+        ),
+        MODEL_TENSOR.DEC_CROSS_ATTN_K: (
+            "decoder.block.{bid}.layer.1.EncDecAttention.k",  # t5
+        ),
+        MODEL_TENSOR.DEC_CROSS_ATTN_V: (
+            "decoder.block.{bid}.layer.1.EncDecAttention.v",  # t5
+        ),
+        MODEL_TENSOR.DEC_CROSS_ATTN_OUT: (
+            "decoder.block.{bid}.layer.1.EncDecAttention.o",  # t5
+        ),
+        MODEL_TENSOR.DEC_CROSS_ATTN_REL_B: (
+            "decoder.block.{bid}.layer.1.EncDecAttention.relative_attention_bias",  # t5
+        ),
+        MODEL_TENSOR.DEC_FFN_NORM: ("decoder.block.{bid}.layer.2.layer_norm",),  # t5
+        MODEL_TENSOR.DEC_FFN_GATE: (
+            "decoder.block.{bid}.layer.2.DenseReluDense.wi_0",  # flan-t5
+        ),
+        MODEL_TENSOR.DEC_FFN_UP: (
+            "decoder.block.{bid}.layer.2.DenseReluDense.wi",  # t5
+            "decoder.block.{bid}.layer.2.DenseReluDense.wi_1",  # flan-t5
+        ),
+        MODEL_TENSOR.DEC_FFN_DOWN: (
+            "decoder.block.{bid}.layer.2.DenseReluDense.wo",  # t5
+        ),
+        MODEL_TENSOR.DEC_OUTPUT_NORM: ("decoder.final_layer_norm",),  # t5
+        MODEL_TENSOR.ENC_ATTN_NORM: ("encoder.block.{bid}.layer.0.layer_norm",),  # t5
+        MODEL_TENSOR.ENC_ATTN_Q: ("encoder.block.{bid}.layer.0.SelfAttention.q",),  # t5
+        MODEL_TENSOR.ENC_ATTN_K: ("encoder.block.{bid}.layer.0.SelfAttention.k",),  # t5
+        MODEL_TENSOR.ENC_ATTN_V: ("encoder.block.{bid}.layer.0.SelfAttention.v",),  # t5
+        MODEL_TENSOR.ENC_ATTN_OUT: (
+            "encoder.block.{bid}.layer.0.SelfAttention.o",  # t5
+        ),
+        MODEL_TENSOR.ENC_ATTN_REL_B: (
+            "encoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias",  # t5
+        ),
+        MODEL_TENSOR.ENC_FFN_NORM: ("encoder.block.{bid}.layer.1.layer_norm",),  # t5
+        MODEL_TENSOR.ENC_FFN_GATE: (
+            "encoder.block.{bid}.layer.1.DenseReluDense.wi_0",  # flan-t5
+        ),
+        MODEL_TENSOR.ENC_FFN_UP: (
+            "encoder.block.{bid}.layer.1.DenseReluDense.wi",  # t5
+            "encoder.block.{bid}.layer.1.DenseReluDense.wi_1",  # flan-t5
+        ),
+        MODEL_TENSOR.ENC_FFN_DOWN: (
+            "encoder.block.{bid}.layer.1.DenseReluDense.wo",  # t5
+        ),
+        ############################################################################
+        # TODO: these do not belong to block_mappings_cfg - move them to mappings_cfg
+        MODEL_TENSOR.ENC_OUTPUT_NORM: ("encoder.final_layer_norm",),  # t5
+        MODEL_TENSOR.CLS: (
+            "classifier",  # jina
+            "classifier.dense",  # roberta
+        ),
+        MODEL_TENSOR.CLS_OUT: ("classifier.out_proj",),  # roberta
+        #############################################################################
+        MODEL_TENSOR.CONVNEXT_DW: ("backbone.convnext.{bid}.dwconv",),  # wavtokenizer
+        MODEL_TENSOR.CONVNEXT_NORM: ("backbone.convnext.{bid}.norm",),  # wavtokenizer
+        MODEL_TENSOR.CONVNEXT_PW1: ("backbone.convnext.{bid}.pwconv1",),  # wavtokenizer
+        MODEL_TENSOR.CONVNEXT_PW2: ("backbone.convnext.{bid}.pwconv2",),  # wavtokenizer
+        MODEL_TENSOR.CONVNEXT_GAMMA: ("backbone.convnext.{bid}.gamma",),  # wavtokenizer
+        MODEL_TENSOR.POSNET_CONV1: ("backbone.posnet.{bid}.conv1",),  # wavtokenizer
+        MODEL_TENSOR.POSNET_CONV2: ("backbone.posnet.{bid}.conv2",),  # wavtokenizer
+        MODEL_TENSOR.POSNET_NORM: ("backbone.posnet.{bid}.norm",),  # wavtokenizer
+        MODEL_TENSOR.POSNET_NORM1: ("backbone.posnet.{bid}.norm1",),  # wavtokenizer
+        MODEL_TENSOR.POSNET_NORM2: ("backbone.posnet.{bid}.norm2",),  # wavtokenizer
+        MODEL_TENSOR.POSNET_ATTN_NORM: ("backbone.posnet.{bid}.norm",),  # wavtokenizer
+        MODEL_TENSOR.POSNET_ATTN_Q: ("backbone.posnet.{bid}.q",),  # wavtokenizer
+        MODEL_TENSOR.POSNET_ATTN_K: ("backbone.posnet.{bid}.k",),  # wavtokenizer
+        MODEL_TENSOR.POSNET_ATTN_V: ("backbone.posnet.{bid}.v",),  # wavtokenizer
+        MODEL_TENSOR.POSNET_ATTN_OUT: (
+            "backbone.posnet.{bid}.proj_out",  # wavtokenizer
+        ),
+        #############################################################################
+        ## Vision encoder
+        MODEL_TENSOR.V_MMPROJ: (
+            "multi_modal_projector.linear_{bid}",
+            "visual.merger.mlp.{bid}",  # qwen2vl
+        ),
+        MODEL_TENSOR.V_MMPROJ_FC: (
+            "model.connector.modality_projection.proj",  # SmolVLM
+        ),
+        MODEL_TENSOR.V_MMPROJ_MLP: (
+            "model.mm_projector.mlp.mlp.{bid}",
+            "mlp1.{bid}",  # InternVL
+        ),
+        MODEL_TENSOR.V_MMPROJ_PEG: ("model.mm_projector.peg.peg.{bid}",),
+        MODEL_TENSOR.V_ENC_EMBD_CLS: (
+            "vision_tower.vision_model.embeddings.class_embedding",
+        ),
+        MODEL_TENSOR.V_ENC_EMBD_PATCH: (
+            "vision_tower.vision_model.embeddings.patch_embedding",
+            "vpm.embeddings.patch_embedding",
+            "model.vision_model.embeddings.patch_embedding",  # SmolVLM
+            "vision_tower.patch_conv",  # pixtral
+            "visual.patch_embed.proj",  # qwen2vl
+        ),
+        MODEL_TENSOR.V_ENC_EMBD_POS: (
+            "vision_tower.vision_model.embeddings.position_embedding",
+            "vpm.embeddings.position_embedding",
+            "model.vision_model.embeddings.position_embedding",  # SmolVLM
+        ),
+        MODEL_TENSOR.V_ENC_ATTN_Q: (
+            "vision_tower.vision_model.encoder.layers.{bid}.self_attn.q_proj",
+            "vpm.encoder.layers.{bid}.self_attn.q_proj",
+            "model.vision_model.encoder.layers.{bid}.self_attn.q_proj",  # SmolVLM
+            "vision_tower.transformer.layers.{bid}.attention.q_proj",  # pixtral
+            "visual.blocks.{bid}.attn.q",  # qwen2vl, generated
+        ),
+        MODEL_TENSOR.V_ENC_ATTN_Q_NORM: (
+            "vision_tower.vision_model.encoder.layers.{bid}.attn.q_norm",  # InternVL
+        ),
+        MODEL_TENSOR.V_ENC_ATTN_K: (
+            "vision_tower.vision_model.encoder.layers.{bid}.self_attn.k_proj",
+            "vpm.encoder.layers.{bid}.self_attn.k_proj",
+            "model.vision_model.encoder.layers.{bid}.self_attn.k_proj",  # SmolVLM
+            "vision_tower.transformer.layers.{bid}.attention.k_proj",  # pixtral
+            "visual.blocks.{bid}.attn.k",  # qwen2vl, generated
+        ),
+        MODEL_TENSOR.V_ENC_ATTN_K_NORM: (
+            "vision_tower.vision_model.encoder.layers.{bid}.attn.k_norm",  # InternVL
+        ),
+        MODEL_TENSOR.V_ENC_ATTN_V: (
+            "vision_tower.vision_model.encoder.layers.{bid}.self_attn.v_proj",
+            "vpm.encoder.layers.{bid}.self_attn.v_proj",
+            "model.vision_model.encoder.layers.{bid}.self_attn.v_proj",  # SmolVLM
+            "vision_tower.transformer.layers.{bid}.attention.v_proj",  # pixtral
+            "visual.blocks.{bid}.attn.v",  # qwen2vl, generated
+        ),
+        MODEL_TENSOR.V_ENC_INPUT_NORM: (
+            "vision_tower.vision_model.encoder.layers.{bid}.layer_norm1",
+            "vision_tower.vision_model.encoder.layers.{bid}.norm1",  # InternVL
+            "vpm.encoder.layers.{bid}.layer_norm1",
+            "model.vision_model.encoder.layers.{bid}.layer_norm1",  # SmolVLM
+            "vision_tower.transformer.layers.{bid}.attention_norm",  # pixtral
+            "visual.blocks.{bid}.norm1",  # qwen2vl
+        ),
+        MODEL_TENSOR.V_ENC_OUTPUT: (
+            "vision_tower.vision_model.encoder.layers.{bid}.self_attn.out_proj",
+            "vision_tower.vision_model.encoder.layers.{bid}.attn.proj",  # InternVL
+            "vpm.encoder.layers.{bid}.self_attn.out_proj",
+            "model.vision_model.encoder.layers.{bid}.self_attn.out_proj",  # SmolVLM
+            "vision_tower.transformer.layers.{bid}.attention.o_proj",  # pixtral
+            "visual.blocks.{bid}.attn.proj",  # qwen2vl
+        ),
+        MODEL_TENSOR.V_ENC_OUTPUT_NORM: (
+            "vision_tower.vision_model.encoder.layers.{bid}.layer_norm2",
+            "vision_tower.vision_model.encoder.layers.{bid}.norm2",  # InternVL
+            "vpm.encoder.layers.{bid}.layer_norm2",
+            "model.vision_model.encoder.layers.{bid}.layer_norm2",  # SmolVLM
+            "vision_tower.transformer.layers.{bid}.ffn_norm",  # pixtral
+            "visual.blocks.{bid}.norm2",  # qwen2vl
+        ),
+        MODEL_TENSOR.V_ENC_FFN_UP: (
+            "vision_tower.vision_model.encoder.layers.{bid}.mlp.fc1",
+            "vpm.encoder.layers.{bid}.mlp.fc1",
+            "model.vision_model.encoder.layers.{bid}.mlp.fc1",  # SmolVLM, gemma3
+            "vision_tower.transformer.layers.{bid}.feed_forward.up_proj",  # pixtral
+            "visual.blocks.{bid}.mlp.fc1",  # qwen2vl
+            "visual.blocks.{bid}.mlp.up_proj",  # qwen2.5vl
+        ),
+        MODEL_TENSOR.V_ENC_FFN_GATE: (
+            "vision_tower.transformer.layers.{bid}.feed_forward.gate_proj",  # pixtral
+            "visual.blocks.{bid}.mlp.gate_proj",  # qwen2.5vl
+        ),
+        MODEL_TENSOR.V_ENC_FFN_DOWN: (
+            "vision_tower.vision_model.encoder.layers.{bid}.mlp.fc2",
+            "vpm.encoder.layers.{bid}.mlp.fc2",
+            "model.vision_model.encoder.layers.{bid}.mlp.fc2",  # SmolVLM, gemma3
+            "vision_tower.transformer.layers.{bid}.feed_forward.down_proj",  # pixtral
+            "visual.blocks.{bid}.mlp.fc2",  # qwen2vl
+            "visual.blocks.{bid}.mlp.down_proj",  # qwen2.5vl
+        ),
+        MODEL_TENSOR.V_LAYER_SCALE_1: (
+            "vision_tower.vision_model.encoder.layers.{bid}.ls1",  # InternVL
+        ),
+        MODEL_TENSOR.V_LAYER_SCALE_2: (
+            "vision_tower.vision_model.encoder.layers.{bid}.ls2",  # InternVL
+        ),
+        MODEL_TENSOR.V_PRE_NORM: (
+            "vision_tower.vision_model.pre_layrnorm",
+            "vision_tower.ln_pre",  # pixtral
+        ),
+        MODEL_TENSOR.V_POST_NORM: (
+            "vision_tower.vision_model.post_layernorm",
+            "model.vision_model.post_layernorm",  # SmolVLM
+            "visual.merger.ln_q",  # qwen2vl
+        ),
+        MODEL_TENSOR.V_MM_INP_PROJ: ("multi_modal_projector.mm_input_projection",),
+        MODEL_TENSOR.V_MM_INP_NORM: ("multi_modal_projector.norm",),
+        MODEL_TENSOR.V_MM_SOFT_EMB_NORM: ("multi_modal_projector.mm_soft_emb_norm",),
+        MODEL_TENSOR.V_RESMPL_POS_EMBD_K: ("resampler.pos_embed_k",),
+        MODEL_TENSOR.V_RESMPL_ATTN_Q: (
+            "resampler.attn.in_proj_q",  # tensor generated from resampler.attn.in_proj
+        ),
+        MODEL_TENSOR.V_RESMPL_ATTN_K: (
+            "resampler.attn.in_proj_k",  # tensor generated from resampler.attn.in_proj
+        ),
+        MODEL_TENSOR.V_RESMPL_ATTN_V: (
+            "resampler.attn.in_proj_v",  # tensor generated from resampler.attn.in_proj
+        ),
+        MODEL_TENSOR.V_RESMPL_ATTN_OUT: ("resampler.attn.out_proj",),
+        MODEL_TENSOR.V_RESMPL_KV: ("resampler.kv_proj",),
+        MODEL_TENSOR.V_RESMPL_POST_NORM: ("resampler.ln_post",),
+        MODEL_TENSOR.V_RESMPL_KV_NORM: ("resampler.ln_kv",),
+        MODEL_TENSOR.V_RESMPL_Q_NORM: ("resampler.ln_q",),
+        MODEL_TENSOR.V_RESMPL_PROJ: ("resampler.proj",),
+        MODEL_TENSOR.V_RESMPL_QUERY: ("resampler.query",),
+        MODEL_TENSOR.V_TOK_EMBD_IMG_BREAK: (
+            "v.token_embd.img_break",  # for pixtral, this is a generated vector
+        ),
+        MODEL_TENSOR.V_MM_PATCH_MERGER: (
+            "multi_modal_projector.patch_merger.merging_layer",  # mistral small 3.1
+        ),
+    }
+
+    # architecture-specific block mappings
+    arch_block_mappings_cfg: dict[MODEL_ARCH, dict[MODEL_TENSOR, tuple[str, ...]]] = {
+        MODEL_ARCH.ARCTIC: {
+            MODEL_TENSOR.FFN_NORM: ("model.layers.{bid}.residual_layernorm",),
+            MODEL_TENSOR.FFN_NORM_EXP: ("model.layers.{bid}.post_attention_layernorm",),
+        },
+    }
+
+    mapping: dict[str, tuple[MODEL_TENSOR, str]]
+
+    def __init__(self, arch: MODEL_ARCH, n_blocks: int):
+        self.mapping = {}
+        for tensor, keys in self.mappings_cfg.items():
+            if tensor not in MODEL_TENSORS[arch]:
+                continue
+            tensor_name = TENSOR_NAMES[tensor]
+            self.mapping[tensor_name] = (tensor, tensor_name)
+            for key in keys:
+                self.mapping[key] = (tensor, tensor_name)
+        if arch in self.arch_block_mappings_cfg:
+            self.block_mappings_cfg.update(self.arch_block_mappings_cfg[arch])
+        for bid in range(n_blocks):
+            for tensor, keys in self.block_mappings_cfg.items():
+                if tensor not in MODEL_TENSORS[arch]:
+                    continue
+
+                tensor_name = TENSOR_NAMES[tensor].format(bid=bid)
+                self.mapping[tensor_name] = (tensor, tensor_name)
+                for key in keys:
+                    key = key.format(bid=bid)
+                    self.mapping[key] = (tensor, tensor_name)
+
+    def get_type_and_name(
+        self, key: str, try_suffixes: Sequence[str] = ()
+    ) -> tuple[MODEL_TENSOR, str] | None:
+        result = self.mapping.get(key)
+        if result is not None:
+            return result
+        for suffix in try_suffixes:
+            if key.endswith(suffix):
+                result = self.mapping.get(key[: -len(suffix)])
+                if result is not None:
+                    return result[0], result[1] + suffix
+        return None
+
+    def get_name(self, key: str, try_suffixes: Sequence[str] = ()) -> str | None:
+        result = self.get_type_and_name(key, try_suffixes=try_suffixes)
+        if result is None:
+            return None
+        return result[1]
+
+    def get_type(
+        self, key: str, try_suffixes: Sequence[str] = ()
+    ) -> MODEL_TENSOR | None:
+        result = self.get_type_and_name(key, try_suffixes=try_suffixes)
+        if result is None:
+            return None
+        return result[0]
+
+    def __getitem__(self, key: str) -> str:
+        try:
+            return self.mapping[key][1]
+        except KeyError:
+            raise KeyError(key)
+
+    def __contains__(self, key: str) -> bool:
+        return key in self.mapping
+
+    def __repr__(self) -> str:
+        return repr(self.mapping)
+
+
+def get_tensor_name_map(arch: MODEL_ARCH, n_blocks: int) -> TensorNameMap:
+    return TensorNameMap(arch, n_blocks)
--- a/src/gguf/utility.py
+++ b/src/gguf/utility.py
@ -0,0 +1,316 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Literal
+
+import os
+import json
+
+
+def fill_templated_filename(filename: str, output_type: str | None) -> str:
+    # Given a file name fill in any type templates e.g. 'some-model-name.{ftype}.gguf'
+    ftype_lowercase: str = output_type.lower() if output_type is not None else ""
+    ftype_uppercase: str = output_type.upper() if output_type is not None else ""
+    return filename.format(
+        ftype_lowercase,
+        outtype=ftype_lowercase,
+        ftype=ftype_lowercase,
+        OUTTYPE=ftype_uppercase,
+        FTYPE=ftype_uppercase,
+    )
+
+
+def model_weight_count_rounded_notation(
+    model_params_count: int, min_digits: int = 2
+) -> str:
+    if model_params_count > 1e12:
+        # Trillions Of Parameters
+        scaled_model_params = model_params_count * 1e-12
+        scale_suffix = "T"
+    elif model_params_count > 1e9:
+        # Billions Of Parameters
+        scaled_model_params = model_params_count * 1e-9
+        scale_suffix = "B"
+    elif model_params_count > 1e6:
+        # Millions Of Parameters
+        scaled_model_params = model_params_count * 1e-6
+        scale_suffix = "M"
+    else:
+        # Thousands Of Parameters
+        scaled_model_params = model_params_count * 1e-3
+        scale_suffix = "K"
+
+    fix = max(min_digits - len(str(round(scaled_model_params)).lstrip("0")), 0)
+
+    return f"{scaled_model_params:.{fix}f}{scale_suffix}"
+
+
+def size_label(
+    total_params: int, shared_params: int, expert_params: int, expert_count: int
+) -> str:
+
+    if expert_count > 0:
+        pretty_size = model_weight_count_rounded_notation(
+            abs(shared_params) + abs(expert_params), min_digits=2
+        )
+        size_class = f"{expert_count}x{pretty_size}"
+    else:
+        size_class = model_weight_count_rounded_notation(
+            abs(total_params), min_digits=2
+        )
+
+    return size_class
+
+
+def naming_convention(
+    model_name: str | None,
+    base_name: str | None,
+    finetune_string: str | None,
+    version_string: str | None,
+    size_label: str | None,
+    output_type: str | None,
+    model_type: Literal["vocab", "LoRA"] | None = None,
+) -> str:
+    # Reference: https://github.com/ggml-org/ggml/blob/master/docs/gguf.md#gguf-naming-convention
+
+    if base_name is not None:
+        name = base_name.strip().replace(" ", "-").replace("/", "-")
+    elif model_name is not None:
+        name = model_name.strip().replace(" ", "-").replace("/", "-")
+    else:
+        name = "ggml-model"
+
+    parameters = f"-{size_label}" if size_label is not None else ""
+
+    finetune = (
+        f"-{finetune_string.strip().replace(' ', '-')}"
+        if finetune_string is not None
+        else ""
+    )
+
+    version = (
+        f"-{version_string.strip().replace(' ', '-')}"
+        if version_string is not None
+        else ""
+    )
+
+    encoding = (
+        f"-{output_type.strip().replace(' ', '-').upper()}"
+        if output_type is not None
+        else ""
+    )
+
+    kind = f"-{model_type.strip().replace(' ', '-')}" if model_type is not None else ""
+
+    return f"{name}{parameters}{finetune}{version}{encoding}{kind}"
+
+
+@dataclass
+class RemoteTensor:
+    dtype: str
+    shape: tuple[int, ...]
+    offset_start: int
+    size: int
+    url: str
+
+    def data(self) -> bytearray:
+        # TODO: handle request errors (maybe with limited retries?)
+        # NOTE: using a bytearray, otherwise PyTorch complains the buffer is not writeable
+        data = bytearray(
+            SafetensorRemote.get_data_by_range(
+                url=self.url, start=self.offset_start, size=self.size
+            )
+        )
+        return data
+
+
+class SafetensorRemote:
+    """
+    Uility class to handle remote safetensor files.
+    This class is designed to work with Hugging Face model repositories.
+
+    Example (one model has single safetensor file, the other has multiple):
+        for model_id in ["ngxson/TEST-Tiny-Llama4", "Qwen/Qwen2.5-7B-Instruct"]:
+            tensors = SafetensorRemote.get_list_tensors_hf_model(model_id)
+            print(tensors)
+
+    Example reading tensor data:
+        tensors = SafetensorRemote.get_list_tensors_hf_model(model_id)
+        for name, meta in tensors.items():
+            dtype, shape, offset_start, size, remote_safetensor_url = meta
+            # read the tensor data
+            data = SafetensorRemote.get_data_by_range(remote_safetensor_url, offset_start, size)
+            print(data)
+    """
+
+    BASE_DOMAIN = "https://huggingface.co"
+    ALIGNMENT = 8  # bytes
+
+    @classmethod
+    def get_list_tensors_hf_model(cls, model_id: str) -> dict[str, RemoteTensor]:
+        """
+        Get list of tensors from a Hugging Face model repository.
+
+        Returns a dictionary of tensor names and their metadata.
+        Each tensor is represented as a tuple of (dtype, shape, offset_start, size, remote_safetensor_url)
+        """
+        # case 1: model has only one single model.safetensor file
+        is_single_file = cls.check_file_exist(
+            f"{cls.BASE_DOMAIN}/{model_id}/resolve/main/model.safetensors"
+        )
+        if is_single_file:
+            url = f"{cls.BASE_DOMAIN}/{model_id}/resolve/main/model.safetensors"
+            return cls.get_list_tensors(url)
+
+        # case 2: model has multiple files
+        index_url = (
+            f"{cls.BASE_DOMAIN}/{model_id}/resolve/main/model.safetensors.index.json"
+        )
+        is_multiple_files = cls.check_file_exist(index_url)
+        if is_multiple_files:
+            # read the index file
+            index_data = cls.get_data_by_range(index_url, 0)
+            index_str = index_data.decode("utf-8")
+            index_json = json.loads(index_str)
+            assert (
+                index_json.get("weight_map") is not None
+            ), "weight_map not found in index file"
+            weight_map = index_json["weight_map"]
+            # get the list of files
+            all_files = list(set(weight_map.values()))
+            all_files.sort()  # make sure we load shard files in order
+            # get the list of tensors
+            tensors: dict[str, RemoteTensor] = {}
+            for file in all_files:
+                url = f"{cls.BASE_DOMAIN}/{model_id}/resolve/main/{file}"
+                for key, val in cls.get_list_tensors(url).items():
+                    tensors[key] = val
+            return tensors
+
+        raise ValueError(f"Model {model_id} does not have any safetensor files")
+
+    @classmethod
+    def get_list_tensors(cls, url: str) -> dict[str, RemoteTensor]:
+        """
+        Get list of tensors from a remote safetensor file.
+
+        Returns a dictionary of tensor names and their metadata.
+        Each tensor is represented as a tuple of (dtype, shape, offset_start, size)
+        """
+        metadata, data_start_offset = cls.get_metadata(url)
+        res: dict[str, RemoteTensor] = {}
+
+        for name, meta in metadata.items():
+            if name == "__metadata__":
+                continue
+            if not isinstance(meta, dict):
+                raise ValueError(f"Invalid metadata for tensor '{name}': {meta}")
+            try:
+                dtype = meta["dtype"]
+                shape = meta["shape"]
+                offset_start_relative, offset_end_relative = meta["data_offsets"]
+                size = offset_end_relative - offset_start_relative
+                offset_start = data_start_offset + offset_start_relative
+                res[name] = RemoteTensor(
+                    dtype=dtype,
+                    shape=tuple(shape),
+                    offset_start=offset_start,
+                    size=size,
+                    url=url,
+                )
+            except KeyError as e:
+                raise ValueError(
+                    f"Missing key in metadata for tensor '{name}': {e}, meta = {meta}"
+                )
+
+        return res
+
+    @classmethod
+    def get_metadata(cls, url: str) -> tuple[dict, int]:
+        """
+        Get JSON metadata from a remote safetensor file.
+
+        Returns tuple of (metadata, data_start_offset)
+        """
+        # Request first 5MB of the file (hopefully enough for metadata)
+        read_size = 5 * 1024 * 1024
+        raw_data = cls.get_data_by_range(url, 0, read_size)
+
+        # Parse header
+        # First 8 bytes contain the metadata length as u64 little-endian
+        if len(raw_data) < 8:
+            raise ValueError("Not enough data to read metadata size")
+        metadata_length = int.from_bytes(raw_data[:8], byteorder="little")
+
+        # Calculate the data start offset
+        data_start_offset = 8 + metadata_length
+        alignment = SafetensorRemote.ALIGNMENT
+        if data_start_offset % alignment != 0:
+            data_start_offset += alignment - (data_start_offset % alignment)
+
+        # Check if we have enough data to read the metadata
+        if len(raw_data) < 8 + metadata_length:
+            raise ValueError(
+                f"Could not read complete metadata. Need {8 + metadata_length} bytes, got {len(raw_data)}"
+            )
+
+        # Extract metadata bytes and parse as JSON
+        metadata_bytes = raw_data[8 : 8 + metadata_length]
+        metadata_str = metadata_bytes.decode("utf-8")
+        try:
+            metadata = json.loads(metadata_str)
+            return metadata, data_start_offset
+        except json.JSONDecodeError as e:
+            raise ValueError(f"Failed to parse safetensor metadata as JSON: {e}")
+
+    @classmethod
+    def get_data_by_range(cls, url: str, start: int, size: int = -1) -> bytes:
+        """
+        Get raw byte data from a remote file by range.
+        If size is not specified, it will read the entire file.
+        """
+        import requests
+        from urllib.parse import urlparse
+
+        parsed_url = urlparse(url)
+        if not parsed_url.scheme or not parsed_url.netloc:
+            raise ValueError(f"Invalid URL: {url}")
+
+        headers = cls._get_request_headers()
+        if size > -1:
+            headers["Range"] = f"bytes={start}-{start + size}"
+        response = requests.get(url, allow_redirects=True, headers=headers)
+        response.raise_for_status()
+
+        # Get raw byte data
+        return response.content[:size]
+
+    @classmethod
+    def check_file_exist(cls, url: str) -> bool:
+        """
+        Check if a file exists at the given URL.
+        Returns True if the file exists, False otherwise.
+        """
+        import requests
+        from urllib.parse import urlparse
+
+        parsed_url = urlparse(url)
+        if not parsed_url.scheme or not parsed_url.netloc:
+            raise ValueError(f"Invalid URL: {url}")
+
+        try:
+            headers = cls._get_request_headers()
+            headers["Range"] = "bytes=0-0"
+            response = requests.head(url, allow_redirects=True, headers=headers)
+            # Success (2xx) or redirect (3xx)
+            return 200 <= response.status_code < 400
+        except requests.RequestException:
+            return False
+
+    @classmethod
+    def _get_request_headers(cls) -> dict[str, str]:
+        """Prepare common headers for requests."""
+        headers = {"User-Agent": "convert_hf_to_gguf"}
+        if os.environ.get("HF_TOKEN"):
+            headers["Authorization"] = f"Bearer {os.environ['HF_TOKEN']}"
+        return headers
--- a/src/gguf/vocab.py
+++ b/src/gguf/vocab.py
@ -0,0 +1,558 @@
+from __future__ import annotations
+
+import re
+import logging
+import json
+import os
+from pathlib import Path
+from typing import (
+    Any,
+    Callable,
+    Sequence,
+    Mapping,
+    Iterable,
+    Protocol,
+    ClassVar,
+    runtime_checkable,
+)
+
+from sentencepiece import SentencePieceProcessor
+
+import gguf
+
+from .gguf_writer import GGUFWriter
+
+logger = logging.getLogger(__name__)
+
+
+class SpecialVocab:
+    merges: list[str]
+    add_special_token: dict[str, bool]
+    special_token_ids: dict[str, int]
+    chat_template: str | Sequence[Mapping[str, str]] | None
+
+    def __init__(
+        self,
+        path: str | os.PathLike[str],
+        load_merges: bool = False,
+        special_token_types: Iterable[str] | None = None,
+        n_vocab: int | None = None,
+    ):
+        self.special_token_ids = {}
+        self.add_special_token = {}
+        self.n_vocab = n_vocab
+        self.load_merges = load_merges
+        self.merges = []
+        self.chat_template = None
+        if special_token_types is not None:
+            self.special_token_types = special_token_types
+        else:
+            self.special_token_types = (
+                "bos",
+                "eos",
+                "unk",
+                "sep",
+                "pad",
+                "cls",
+                "mask",
+            )
+        self._load(Path(path))
+
+    def __repr__(self) -> str:
+        return "<SpecialVocab with {} merges, special tokens {}, add special tokens {}>".format(
+            len(self.merges),
+            self.special_token_ids or "unset",
+            self.add_special_token or "unset",
+        )
+
+    def add_to_gguf(self, gw: GGUFWriter, quiet: bool = False) -> None:
+        if self.merges:
+            if not quiet:
+                logger.info(f"Adding {len(self.merges)} merge(s).")
+            gw.add_token_merges(self.merges)
+        elif self.load_merges:
+            logger.warning(
+                "Adding merges requested but no merges found, output may be non-functional."
+            )
+        for typ, tokid in self.special_token_ids.items():
+            id_handler: Callable[[int], None] | None = getattr(
+                gw, f"add_{typ}_token_id", None
+            )
+            if id_handler is None:
+                logger.warning(
+                    f"No handler for special token type {typ} with id {tokid} - skipping"
+                )
+                continue
+            if not quiet:
+                logger.info(f"Setting special token type {typ} to {tokid}")
+            id_handler(tokid)
+        for typ, value in self.add_special_token.items():
+            add_handler: Callable[[bool], None] | None = getattr(
+                gw, f"add_add_{typ}_token", None
+            )
+            if add_handler is None:
+                logger.warning(
+                    f"No handler for add_{typ}_token with value {value} - skipping"
+                )
+                continue
+            if not quiet:
+                logger.info(f"Setting add_{typ}_token to {value}")
+            add_handler(value)
+        if self.chat_template is not None:
+            if not quiet:
+                logger.info(f"Setting chat_template to {self.chat_template}")
+            gw.add_chat_template(self.chat_template)
+
+    def _load(self, path: Path) -> None:
+        self._try_load_from_tokenizer_json(path)
+        self._try_load_from_config_json(path)
+        if self.load_merges and not self.merges:
+            self._try_load_merges_txt(path)
+
+    def _try_load_merges_txt(self, path: Path) -> bool:
+        merges_file = path / "merges.txt"
+        if not merges_file.is_file():
+            return False
+        with open(merges_file, "r", encoding="utf-8") as fp:
+            first_line = next(fp, "").strip()
+            if not first_line.startswith("#"):
+                fp.seek(0)
+                line_num = 0
+            else:
+                line_num = 1
+            merges = []
+            for line in fp:
+                line_num += 1
+                line = line.strip()
+                if not line:
+                    continue
+                parts = line.split(None, 3)
+                if len(parts) != 2:
+                    logger.warning(
+                        f"{merges_file.name}: Line {line_num}: Entry malformed, ignoring"
+                    )
+                    continue
+                merges.append(f"{parts[0]} {parts[1]}")
+        self.merges = merges
+        return True
+
+    def _set_special_token(self, typ: str, tid: Any) -> None:
+        if not isinstance(tid, int):
+            return
+        if tid < 0:
+            raise ValueError(f"invalid value for special token type {typ}: {tid}")
+        if self.n_vocab is None or tid < self.n_vocab:
+            if typ in self.special_token_ids:
+                return
+            self.special_token_ids[typ] = tid
+            return
+        logger.warning(
+            f"Special token type {typ}, id {tid} out of range, must be under {self.n_vocab} - skipping"
+        )
+
+    def _try_load_from_tokenizer_json(self, path: Path) -> bool:
+        tokenizer_file = path / "tokenizer.json"
+        if tokenizer_file.is_file():
+            with open(tokenizer_file, encoding="utf-8") as f:
+                tokenizer = json.load(f)
+            if self.load_merges:
+                merges = tokenizer.get("model", {}).get("merges")
+                if isinstance(merges, list) and merges:
+                    if isinstance(merges[0], str):
+                        self.merges = merges
+                    elif (
+                        isinstance(merges[0], list)
+                        and len(merges[0]) == 2
+                        and isinstance(merges[0][0], str)
+                    ):
+                        # New format since transformers 4.45 to support spaces in merges
+                        # ref: https://github.com/ggml-org/llama.cpp/issues/9692
+                        # TODO: internally store as the new format instead of converting to old
+                        if any(" " in s for pair in merges for s in pair):
+                            logger.warning(
+                                f'Spaces in merges detected, encoding as {chr(ord(" ") + 256)!r}'
+                            )
+                        self.merges = [
+                            " ".join(
+                                [
+                                    # ensure the spaces are properly encoded
+                                    "".join(
+                                        chr(ord(c) + 256) if c == " " else c
+                                        for c in part
+                                    )
+                                    for part in pair
+                                ]
+                            )
+                            for pair in merges
+                        ]
+                    else:
+                        raise ValueError("Unknown tokenizer merges format")
+            added_tokens = tokenizer.get("added_tokens", {})
+        else:
+            added_tokens = {}
+        tokenizer_config_file = path / "tokenizer_config.json"
+        if not tokenizer_config_file.is_file():
+            return True
+        with open(tokenizer_config_file, encoding="utf-8") as f:
+            tokenizer_config = json.load(f)
+        chat_template_alt = None
+        chat_template_file = path / "chat_template.json"
+        if chat_template_file.is_file():
+            with open(chat_template_file, encoding="utf-8") as f:
+                chat_template_alt = json.load(f).get("chat_template")
+        chat_template = tokenizer_config.get("chat_template", chat_template_alt)
+        if chat_template is None or isinstance(chat_template, (str, list)):
+            self.chat_template = chat_template
+        else:
+            logger.warning(
+                f"Bad type for chat_template field in {tokenizer_config_file!r} - ignoring"
+            )
+        for typ in self.special_token_types:
+            add_entry = tokenizer_config.get(f"add_{typ}_token")
+            if isinstance(add_entry, bool):
+                self.add_special_token[typ] = add_entry
+            entry = tokenizer_config.get(f"{typ}_token")
+            if isinstance(entry, str):
+                tc_content = entry
+            elif isinstance(entry, dict):
+                entry_content = entry.get("content")
+                if not isinstance(entry_content, str):
+                    continue
+                tc_content = entry_content
+            else:
+                continue
+            # We only need the first match here.
+            maybe_token_id = next(
+                (
+                    atok.get("id")
+                    for atok in added_tokens
+                    if atok.get("content") == tc_content
+                ),
+                None,
+            )
+            self._set_special_token(typ, maybe_token_id)
+        return True
+
+    def _try_load_from_config_json(self, path: Path) -> bool:
+        config_file = path / "config.json"
+        if not config_file.is_file():
+            return False
+        with open(config_file, encoding="utf-8") as f:
+            config = json.load(f)
+        for typ in self.special_token_types:
+            self._set_special_token(typ, config.get(f"{typ}_token_id"))
+        return True
+
+
+@runtime_checkable
+class BaseVocab(Protocol):
+    tokenizer_model: ClassVar[str]
+    name: ClassVar[str]
+
+
+@runtime_checkable
+class Vocab(BaseVocab, Protocol):
+    vocab_size: int
+    added_tokens_dict: dict[str, int]
+    added_tokens_list: list[str]
+    fname_tokenizer: Path
+
+    def __init__(self, base_path: Path): ...
+    def all_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]: ...
+
+
+class NoVocab(BaseVocab):
+    tokenizer_model = "no_vocab"
+    name = "no_vocab"
+
+    def __repr__(self) -> str:
+        return "<NoVocab for a model without integrated vocabulary>"
+
+
+class BpeVocab(Vocab):
+    tokenizer_model = "gpt2"
+    name = "bpe"
+
+    def __init__(self, base_path: Path):
+        added_tokens: dict[str, int] = {}
+
+        if (fname_tokenizer := base_path / "vocab.json").exists():
+            # "slow" tokenizer
+            with open(fname_tokenizer, encoding="utf-8") as f:
+                self.vocab = json.load(f)
+
+            try:
+                # FIXME: Verify that added tokens here _cannot_ overlap with the main vocab.
+                with open(base_path / "added_tokens.json", encoding="utf-8") as f:
+                    added_tokens = json.load(f)
+            except FileNotFoundError:
+                pass
+        else:
+            # "fast" tokenizer
+            fname_tokenizer = base_path / "tokenizer.json"
+
+            # if this fails, FileNotFoundError propagates to caller
+            with open(fname_tokenizer, encoding="utf-8") as f:
+                tokenizer_json = json.load(f)
+
+            tokenizer_model: dict[str, Any] = tokenizer_json["model"]
+            if (
+                tokenizer_model["type"] != "BPE"
+                or tokenizer_model.get("byte_fallback", False)
+                or tokenizer_json["decoder"]["type"] != "ByteLevel"
+            ):
+                raise FileNotFoundError("Cannot find GPT-2 BPE tokenizer")
+
+            self.vocab = tokenizer_model["vocab"]
+
+            if (added := tokenizer_json.get("added_tokens")) is not None:
+                # Added tokens here can be duplicates of the main vocabulary.
+                added_tokens = {
+                    item["content"]: item["id"]
+                    for item in added
+                    if item["content"] not in self.vocab
+                }
+
+        vocab_size = len(self.vocab)
+        expected_ids = list(range(vocab_size, vocab_size + len(added_tokens)))
+        actual_ids = sorted(added_tokens.values())
+        if expected_ids != actual_ids:
+            expected_end_id = vocab_size + len(actual_ids) - 1
+            raise ValueError(
+                f"Expected the {len(actual_ids)} added token ID(s) to be sequential in the range "
+                f"{vocab_size} - {expected_end_id}; got {actual_ids}"
+            )
+
+        items = sorted(added_tokens.items(), key=lambda text_idx: text_idx[1])
+        self.added_tokens_dict = added_tokens
+        self.added_tokens_list = [text for (text, idx) in items]
+        self.vocab_size_base = vocab_size
+        self.vocab_size = self.vocab_size_base + len(self.added_tokens_list)
+        self.fname_tokenizer = fname_tokenizer
+
+    def bpe_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
+        reverse_vocab = {id: encoded_tok for encoded_tok, id in self.vocab.items()}
+
+        for i, _ in enumerate(self.vocab):
+            yield reverse_vocab[i], 0.0, gguf.TokenType.NORMAL
+
+    def added_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
+        for text in self.added_tokens_list:
+            score = -1000.0
+            yield text.encode("utf-8"), score, gguf.TokenType.CONTROL
+
+    def all_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
+        yield from self.bpe_tokens()
+        yield from self.added_tokens()
+
+    def __repr__(self) -> str:
+        return f"<BpeVocab with {self.vocab_size_base} base tokens and {len(self.added_tokens_list)} added tokens>"
+
+
+class SentencePieceVocab(Vocab):
+    tokenizer_model = "llama"
+    name = "spm"
+
+    def __init__(self, base_path: Path):
+        added_tokens: dict[str, int] = {}
+        if (fname_tokenizer := base_path / "tokenizer.model").exists():
+            # normal location
+            try:
+                with open(base_path / "added_tokens.json", encoding="utf-8") as f:
+                    added_tokens = json.load(f)
+            except FileNotFoundError:
+                pass
+        elif not (fname_tokenizer := base_path.parent / "tokenizer.model").exists():
+            # not found in alternate location either
+            raise FileNotFoundError("Cannot find tokenizer.model")
+
+        self.sentencepiece_tokenizer = SentencePieceProcessor()
+        self.sentencepiece_tokenizer.LoadFromFile(str(fname_tokenizer))
+        vocab_size = self.sentencepiece_tokenizer.vocab_size()
+
+        new_tokens = {
+            id: piece for piece, id in added_tokens.items() if id >= vocab_size
+        }
+        expected_new_ids = list(range(vocab_size, vocab_size + len(new_tokens)))
+        actual_new_ids = sorted(new_tokens.keys())
+
+        if expected_new_ids != actual_new_ids:
+            raise ValueError(
+                f"Expected new token IDs {expected_new_ids} to be sequential; got {actual_new_ids}"
+            )
+
+        # Token pieces that were added to the base vocabulary.
+        self.added_tokens_dict = added_tokens
+        self.added_tokens_list = [new_tokens[id] for id in actual_new_ids]
+        self.vocab_size_base = vocab_size
+        self.vocab_size = self.vocab_size_base + len(self.added_tokens_list)
+        self.fname_tokenizer = fname_tokenizer
+
+    def sentencepiece_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
+        tokenizer = self.sentencepiece_tokenizer
+        for i in range(tokenizer.vocab_size()):
+            piece = tokenizer.IdToPiece(i)
+            text = piece.encode("utf-8")
+            score: float = tokenizer.GetScore(i)
+
+            toktype = gguf.TokenType.NORMAL
+            if tokenizer.IsUnknown(i):
+                toktype = gguf.TokenType.UNKNOWN
+            if tokenizer.IsControl(i):
+                toktype = gguf.TokenType.CONTROL
+
+            # NOTE: I think added_tokens are user defined.
+            # ref: https://github.com/google/sentencepiece/blob/master/src/sentencepiece_model.proto
+            # if tokenizer.is_user_defined(i): toktype = gguf.TokenType.USER_DEFINED
+
+            if tokenizer.IsUnused(i):
+                toktype = gguf.TokenType.UNUSED
+            if tokenizer.IsByte(i):
+                toktype = gguf.TokenType.BYTE
+
+            yield text, score, toktype
+
+    def added_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
+        for text in self.added_tokens_list:
+            score = -1000.0
+            yield text.encode("utf-8"), score, gguf.TokenType.USER_DEFINED
+
+    def all_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
+        yield from self.sentencepiece_tokens()
+        yield from self.added_tokens()
+
+    def __repr__(self) -> str:
+        return f"<SentencePieceVocab with {self.vocab_size_base} base tokens and {len(self.added_tokens_list)} added tokens>"
+
+
+class LlamaHfVocab(Vocab):
+    tokenizer_model = "llama"
+    name = "hfft"
+
+    def __init__(self, base_path: Path):
+        fname_tokenizer = base_path / "tokenizer.json"
+        # if this fails, FileNotFoundError propagates to caller
+        with open(fname_tokenizer, encoding="utf-8") as f:
+            tokenizer_json = json.load(f)
+
+        # pre-check so we know if we need transformers
+        tokenizer_model: dict[str, Any] = tokenizer_json["model"]
+        is_llama3 = (
+            tokenizer_model["type"] == "BPE"
+            and tokenizer_model.get("ignore_merges", False)
+            and not tokenizer_model.get("byte_fallback", True)
+        )
+        if is_llama3:
+            raise TypeError("Llama 3 must be converted with BpeVocab")
+
+        if not is_llama3 and (
+            tokenizer_model["type"] != "BPE"
+            or not tokenizer_model.get("byte_fallback", False)
+            or tokenizer_json["decoder"]["type"] != "Sequence"
+        ):
+            raise FileNotFoundError("Cannot find Llama BPE tokenizer")
+
+        try:
+            from transformers import AutoTokenizer
+        except ImportError as e:
+            raise ImportError(
+                "To use LlamaHfVocab, please install the `transformers` package. "
+                "You can install it with `pip install transformers`."
+            ) from e
+
+        # Allow the tokenizer to default to slow or fast versions.
+        # Explicitly set tokenizer to use local paths.
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            base_path,
+            cache_dir=base_path,
+            local_files_only=True,
+        )
+        assert self.tokenizer.is_fast  # assume tokenizer.json is used
+
+        # Initialize lists and dictionaries for added tokens
+        self.added_tokens_list = []
+        self.added_tokens_dict = dict()
+        self.added_tokens_ids = set()
+
+        # Process added tokens
+        for tok, tokidx in sorted(
+            self.tokenizer.get_added_vocab().items(), key=lambda x: x[1]
+        ):
+            # Only consider added tokens that are not in the base vocabulary
+            if tokidx >= self.tokenizer.vocab_size:
+                self.added_tokens_list.append(tok)
+                self.added_tokens_dict[tok] = tokidx
+                self.added_tokens_ids.add(tokidx)
+
+        # Store special tokens and their IDs
+        self.specials = {
+            tok: self.tokenizer.get_vocab()[tok]
+            for tok in self.tokenizer.all_special_tokens
+        }
+        self.special_ids = set(self.tokenizer.all_special_ids)
+
+        # Set vocabulary sizes
+        self.vocab_size_base = self.tokenizer.vocab_size
+        self.vocab_size = self.vocab_size_base + len(self.added_tokens_list)
+
+        self.fname_tokenizer = fname_tokenizer
+
+    def hf_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
+        reverse_vocab = {
+            id: encoded_tok for encoded_tok, id in self.tokenizer.get_vocab().items()
+        }
+
+        for token_id in range(self.vocab_size_base):
+            # Skip processing added tokens here
+            if token_id in self.added_tokens_ids:
+                continue
+
+            # Convert token text to bytes
+            token_text = reverse_vocab[token_id].encode("utf-8")
+
+            # Yield token text, score, and type
+            yield token_text, self.get_token_score(token_id), self.get_token_type(
+                token_id,
+                token_text,
+                self.special_ids,  # Reuse already stored special IDs
+            )
+
+    def get_token_type(
+        self, token_id: int, token_text: bytes, special_ids: set[int]
+    ) -> gguf.TokenType:
+        # Special case for byte tokens
+        if re.fullmatch(rb"<0x[0-9A-Fa-f]{2}>", token_text):
+            return gguf.TokenType.BYTE
+
+        # Determine token type based on whether it's a special token
+        return (
+            gguf.TokenType.CONTROL if token_id in special_ids else gguf.TokenType.NORMAL
+        )
+
+    def get_token_score(self, token_id: int) -> float:
+        # Placeholder for actual logic to determine the token's score
+        # This needs to be implemented based on specific requirements
+        return -1000.0  # Default score
+
+    def added_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
+        for text in self.added_tokens_list:
+            if text in self.specials:
+                toktype = self.get_token_type(
+                    self.specials[text], b"", self.special_ids
+                )
+                score = self.get_token_score(self.specials[text])
+            else:
+                toktype = gguf.TokenType.USER_DEFINED
+                score = -1000.0
+
+            yield text.encode("utf-8"), score, toktype
+
+    def has_newline_token(self):
+        return "<0x0A>" in self.tokenizer.vocab or "\n" in self.tokenizer.vocab
+
+    def all_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
+        yield from self.hf_tokens()
+        yield from self.added_tokens()
+
+    def __repr__(self) -> str:
+        return f"<LlamaHfVocab with {self.vocab_size_base} base tokens and {len(self.added_tokens_list)} added tokens>"
--- a/src/globals.py
+++ b/src/globals.py
@ -0,0 +1,123 @@
+import os
+import re
+import sys
+from typing import Any, IO, List, TextIO, Union
+
+from PySide6.QtWidgets import (
+    QMessageBox,
+)
+
+from Localizations import (
+    DOTENV_FILE_NOT_FOUND,
+    COULD_NOT_PARSE_LINE,
+    ERROR_LOADING_DOTENV,
+    AUTOGGUF_VERSION,
+)
+
+
+def verify_gguf(file_path) -> bool:
+    try:
+        with open(file_path, "rb") as f:
+            magic = f.read(4)
+            return magic == b"GGUF"
+    except (FileNotFoundError, IOError, OSError):
+        return False
+
+
+def process_args(args: List[str]) -> bool:
+    try:
+        i = 1
+        while i < len(args):
+            key = (
+                args[i][2:].replace("-", "_").upper()
+            )  # Strip the first two '--' and replace '-' with '_'
+            if i + 1 < len(args) and not args[i + 1].startswith("--"):
+                value = args[i + 1]
+                i += 2
+            else:
+                value = "enabled"
+                i += 1
+            os.environ[key] = value
+        return True
+    except Exception:
+        return False
+
+
+def load_dotenv(self=Any) -> None:
+    if not os.path.isfile(".env"):
+        self.logger.warning(DOTENV_FILE_NOT_FOUND)
+        return
+
+    try:
+        with open(".env") as f:
+            for line in f:
+                # Strip leading/trailing whitespace
+                line = line.strip()
+
+                # Ignore comments and empty lines
+                if not line or line.startswith("#"):
+                    continue
+
+                # Match key-value pairs (unquoted and quoted values)
+                match = re.match(r"^([^=]+)=(.*)$", line)
+                if not match:
+                    self.logger.warning(COULD_NOT_PARSE_LINE.format(line))
+                    continue
+
+                key, value = match.groups()
+
+                # Remove any surrounding quotes from the value
+                if value.startswith(("'", '"')) and value.endswith(("'", '"')):
+                    value = value[1:-1]
+
+                # Decode escape sequences
+                value = bytes(value, "utf-8").decode("unicode_escape")
+
+                # Set the environment variable
+                os.environ[key.strip()] = value.strip()
+    except Exception as e:
+        self.logger.error(ERROR_LOADING_DOTENV.format(e))
+
+
+def show_about(self) -> None:
+    about_text = f"""AutoGGUF
+
+Version: {AUTOGGUF_VERSION}
+        
+A tool for managing and converting GGUF models.
+This application is licensed under the Apache License 2.0.
+Copyright (c) 2024-2025 leafspark.
+It also utilizes llama.cpp, licensed under the MIT License.
+Copyright (c) 2023-2025 The ggml authors."""
+    QMessageBox.about(self, "About AutoGGUF", about_text)
+
+
+def ensure_directory(path) -> None:
+    if not os.path.exists(path):
+        os.makedirs(path)
+
+
+def open_file_safe(file_path, mode="r") -> IO[Any]:
+    encodings = ["utf-8", "latin-1", "ascii", "utf-16"]
+    for encoding in encodings:
+        try:
+            return open(file_path, mode, encoding=encoding)
+        except UnicodeDecodeError:
+            continue
+    raise ValueError(
+        f"Unable to open file {file_path} with any of the encodings: {encodings}"
+    )
+
+
+def resource_path(relative_path) -> Union[str, str, bytes]:
+    if hasattr(sys, "_MEIPASS"):
+        # PyInstaller path
+        base_path = sys._MEIPASS
+    elif "__compiled__" in globals():
+        # Nuitka path
+        base_path = os.path.dirname(sys.executable)
+    else:
+        # Regular Python path
+        base_path = os.path.abspath(".")
+
+    return os.path.join(base_path, relative_path)
--- a/src/imports_and_globals.py
+++ b/src/imports_and_globals.py
@ -1,21 +0,0 @@
-import os
-import sys
-import psutil
-import subprocess
-import time
-import signal
-import json
-import platform
-import requests
-import zipfile
-from datetime import datetime
-from PyQt6.QtWidgets import (QApplication, QMainWindow, QVBoxLayout, QHBoxLayout, QWidget, QPushButton, 
-                             QListWidget, QLineEdit, QLabel, QFileDialog, QProgressBar, QComboBox, QTextEdit,
-                             QCheckBox, QGroupBox, QFormLayout, QScrollArea, QSlider, QSpinBox, QListWidgetItem,
-                             QMessageBox, QDialog, QPlainTextEdit, QMenu)
-from PyQt6.QtCore import QTimer, QThread, pyqtSignal, Qt, QSize
-from PyQt6.QtGui import QCloseEvent, QAction
-
-def ensure_directory(path):
-    if not os.path.exists(path):
-        os.makedirs(path)
--- a/src/lora_conversion.py
+++ b/src/lora_conversion.py
@ -0,0 +1,226 @@
+from datetime import datetime
+
+from PySide6.QtWidgets import (
+    QFileDialog,
+    QHBoxLayout,
+    QLineEdit,
+    QListWidgetItem,
+    QPushButton,
+    QWidget,
+)
+
+from QuantizationThread import QuantizationThread
+from TaskListItem import TaskListItem
+from error_handling import handle_error, show_error
+from globals import ensure_directory
+from Localizations import *
+
+
+def export_lora(self) -> None:
+    self.logger.info(STARTING_LORA_EXPORT)
+    try:
+        model_path = self.export_lora_model.text()
+        output_path = self.export_lora_output.text()
+        lora_adapters = []
+
+        for i in range(self.export_lora_adapters.count()):
+            item = self.export_lora_adapters.item(i)
+            adapter_widget = self.export_lora_adapters.itemWidget(item)
+            path_input = adapter_widget.layout().itemAt(0).widget()
+            scale_input = adapter_widget.layout().itemAt(1).widget()
+            adapter_path = path_input.text()
+            adapter_scale = scale_input.text()
+            lora_adapters.append((adapter_path, adapter_scale))
+
+        if not model_path:
+            raise ValueError(MODEL_PATH_REQUIRED)
+        if not output_path:
+            raise ValueError(OUTPUT_PATH_REQUIRED)
+        if not lora_adapters:
+            raise ValueError(AT_LEAST_ONE_LORA_ADAPTER_REQUIRED)
+
+        backend_path = self.backend_combo.currentData()
+        if not backend_path:
+            raise ValueError(NO_BACKEND_SELECTED)
+
+        command = [
+            os.path.join(backend_path, "llama-export-lora"),
+            "--model",
+            model_path,
+            "--output",
+            output_path,
+        ]
+
+        for adapter_path, adapter_scale in lora_adapters:
+            if adapter_path:
+                if adapter_scale:
+                    try:
+                        scale_value = float(adapter_scale)
+                        command.extend(
+                            ["--lora-scaled", adapter_path, str(scale_value)]
+                        )
+                    except ValueError:
+                        raise ValueError(INVALID_LORA_SCALE_VALUE)
+                else:
+                    command.extend(["--lora", adapter_path])
+
+        threads = self.export_lora_threads.value()
+        command.extend(["--threads", str(threads)])
+
+        logs_path = self.logs_input.text()
+        ensure_directory(logs_path)
+
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        log_file = os.path.join(logs_path, f"lora_export_{timestamp}.log")
+
+        command_str = " ".join(command)
+        self.logger.info(f"{LORA_EXPORT_COMMAND}: {command_str}")
+
+        thread = QuantizationThread(command, backend_path, log_file)
+        self.quant_threads.append(thread)
+
+        task_item = TaskListItem(EXPORTING_LORA, log_file, show_progress_bar=False)
+        list_item = QListWidgetItem(self.task_list)
+        list_item.setSizeHint(task_item.sizeHint())
+        self.task_list.addItem(list_item)
+        self.task_list.setItemWidget(list_item, task_item)
+
+        thread.status_signal.connect(task_item.update_status)
+        thread.finished_signal.connect(lambda: self.task_finished(thread))
+        thread.error_signal.connect(
+            lambda err: handle_error(self.logger, err, task_item)
+        )
+        thread.start()
+        self.logger.info(LORA_EXPORT_TASK_STARTED)
+    except ValueError as e:
+        show_error(self.logger, str(e))
+    except Exception as e:
+        show_error(self.logger, ERROR_STARTING_LORA_EXPORT.format(str(e)))
+
+
+def lora_conversion_finished(self, thread) -> None:
+    self.logger.info(LORA_CONVERSION_FINISHED)
+    if thread in self.quant_threads:
+        self.quant_threads.remove(thread)
+
+
+def delete_lora_adapter_item(self, adapter_widget) -> None:
+    self.logger.info(DELETING_LORA_ADAPTER)
+    # Find the QListWidgetItem containing the adapter_widget
+    for i in range(self.export_lora_adapters.count()):
+        item = self.export_lora_adapters.item(i)
+        if self.export_lora_adapters.itemWidget(item) == adapter_widget:
+            self.export_lora_adapters.takeItem(i)  # Remove the item
+            break
+
+
+def browse_export_lora_model(self) -> None:
+    self.logger.info(BROWSING_FOR_EXPORT_LORA_MODEL_FILE)
+    model_file, _ = QFileDialog.getOpenFileName(self, SELECT_MODEL_FILE, "", GGUF_FILES)
+    if model_file:
+        self.export_lora_model.setText(os.path.abspath(model_file))
+
+
+def browse_export_lora_output(self) -> None:
+    self.logger.info(BROWSING_FOR_EXPORT_LORA_OUTPUT_FILE)
+    output_file, _ = QFileDialog.getSaveFileName(
+        self, SELECT_OUTPUT_FILE, "", GGUF_FILES
+    )
+    if output_file:
+        self.export_lora_output.setText(os.path.abspath(output_file))
+
+
+def add_lora_adapter(self) -> None:
+    self.logger.info(ADDING_LORA_ADAPTER)
+    adapter_path, _ = QFileDialog.getOpenFileName(
+        self, SELECT_LORA_ADAPTER_FILE, "", LORA_FILES
+    )
+    if adapter_path:
+        # Create a widget to hold the path and scale input
+        adapter_widget = QWidget()
+        adapter_layout = QHBoxLayout(adapter_widget)
+
+        path_input = QLineEdit(adapter_path)
+        path_input.setReadOnly(True)
+        adapter_layout.addWidget(path_input)
+
+        scale_input = QLineEdit("1.0")  # Default scale value
+        adapter_layout.addWidget(scale_input)
+
+        delete_button = QPushButton(DELETE_ADAPTER)
+        delete_button.clicked.connect(
+            lambda: self.delete_lora_adapter_item(adapter_widget)
+        )
+        adapter_layout.addWidget(delete_button)
+
+        # Add the widget to the list
+        list_item = QListWidgetItem(self.export_lora_adapters)
+        list_item.setSizeHint(adapter_widget.sizeHint())
+        self.export_lora_adapters.addItem(list_item)
+        self.export_lora_adapters.setItemWidget(list_item, adapter_widget)
+
+
+def convert_lora(self) -> None:
+    self.logger.info(STARTING_LORA_CONVERSION)
+    try:
+        lora_input_path = self.lora_input.text()
+        lora_output_path = self.lora_output.text()
+        lora_output_type = self.lora_output_type_combo.currentText()
+
+        if not lora_input_path:
+            raise ValueError(LORA_INPUT_PATH_REQUIRED)
+        if not lora_output_path:
+            raise ValueError(LORA_OUTPUT_PATH_REQUIRED)
+
+        if lora_output_type == "GGUF":  # Use new file and parameters for GGUF
+            command = [
+                "python",
+                "src/convert_lora_to_gguf.py",
+                "--outfile",
+                lora_output_path,
+                lora_input_path,
+            ]
+            base_model_path = self.base_model_path.text()
+            if not base_model_path:
+                raise ValueError(BASE_MODEL_PATH_REQUIRED)
+            command.extend(["--base", base_model_path])
+        else:  # Use old GGML parameters for GGML
+            command = [
+                "python",
+                "src/convert_lora_to_ggml.py",
+                lora_input_path,
+                lora_output_path,
+            ]
+
+        logs_path = self.logs_input.text()
+        ensure_directory(logs_path)
+
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        log_file = os.path.join(logs_path, f"lora_conversion_{timestamp}.log")
+
+        command_str = " ".join(command)
+        self.logger.info(f"{LORA_CONVERSION_COMMAND}: {command_str}")
+
+        thread = QuantizationThread(command, os.getcwd(), log_file)
+        self.quant_threads.append(thread)
+
+        task_name = LORA_CONVERSION_FROM_TO.format(
+            os.path.basename(lora_input_path), os.path.basename(lora_output_path)
+        )
+        task_item = TaskListItem(task_name, log_file, show_progress_bar=False)
+        list_item = QListWidgetItem(self.task_list)
+        list_item.setSizeHint(task_item.sizeHint())
+        self.task_list.addItem(list_item)
+        self.task_list.setItemWidget(list_item, task_item)
+
+        thread.status_signal.connect(task_item.update_status)
+        thread.finished_signal.connect(lambda: self.lora_conversion_finished(thread))
+        thread.error_signal.connect(
+            lambda err: handle_error(self.logger, err, task_item)
+        )
+        thread.start()
+        self.logger.info(LORA_CONVERSION_TASK_STARTED)
+    except ValueError as e:
+        show_error(self.logger, str(e))
+    except Exception as e:
+        show_error(self.logger, ERROR_STARTING_LORA_CONVERSION.format(str(e)))
--- a/src/main.py
+++ b/src/main.py
@ -1,9 +1,196 @@
+import os
 import sys
-from PyQt6.QtWidgets import QApplication
+import threading
+from enum import Enum
+from typing import List, Optional
+
+from PySide6.QtCore import QTimer
+from PySide6.QtWidgets import QApplication
+from fastapi import FastAPI, Query, Depends, HTTPException, Security
+from fastapi.security.api_key import APIKeyHeader
+from pydantic import BaseModel, Field
+from uvicorn import Config, Server
+
 from AutoGGUF import AutoGGUF
+from Localizations import AUTOGGUF_VERSION
+
+app = FastAPI(
+    title="AutoGGUF",
+    description="API for AutoGGUF - automatically quant GGUF models",
+    version=AUTOGGUF_VERSION,
+    license_info={
+        "name": "Apache 2.0",
+        "url": "https://raw.githubusercontent.com/leafspark/AutoGGUF/main/LICENSE",
+    },
+)
+
+# Global variable to hold the window reference
+window = None
+
+
+class ModelType(str, Enum):
+    single = "single"
+    sharded = "sharded"
+
+
+class Model(BaseModel):
+    name: str = Field(..., description="Name of the model")
+    type: str = Field(..., description="Type of the model")
+    path: str = Field(..., description="Path to the model file")
+    size: Optional[int] = Field(None, description="Size of the model in bytes")
+
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "name": "Llama-3.1-8B-Instruct.fp16.gguf",
+                "type": "single",
+                "path": "Llama-3.1-8B-Instruct.fp16.gguf",
+                "size": 13000000000,
+            }
+        }
+
+
+class Task(BaseModel):
+    # id: str = Field(..., description="Unique identifier for the task")
+    status: str = Field(..., description="Current status of the task")
+    progress: float = Field(..., description="Progress of the task as a percentage")
+
+    class Config:
+        json_json_schema_extra = {
+            "example": {"id": "task_123", "status": "running", "progress": 75.5}
+        }
+
+
+class Backend(BaseModel):
+    name: str = Field(..., description="Name of the backend")
+    path: str = Field(..., description="Path to the backend executable")
+
+
+class Plugin(BaseModel):
+    name: str = Field(..., description="Name of the plugin")
+    version: str = Field(..., description="Version of the plugin")
+    description: str = Field(..., description="Description of the plugin")
+    author: str = Field(..., description="Author of the plugin")
+
+
+# API Key configuration
+API_KEY_NAME = "Authorization"
+api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False)
+
+
+def get_api_key(
+    api_key_header: str = Security(api_key_header),
+) -> Optional[str]:
+    api_key_env = os.getenv("AUTOGGUF_SERVER_API_KEY")
+    if not api_key_env:
+        return None  # No API key restriction if not set
+
+    api_keys = [
+        key.strip() for key in api_key_env.split(",") if key.strip()
+    ]  # Split by comma and strip whitespace
+
+    if api_key_header and api_key_header.startswith("Bearer "):
+        api_key = api_key_header[len("Bearer ") :]
+        if api_key in api_keys:
+            return api_key
+
+    raise HTTPException(status_code=403, detail="Could not validate API key")
+
+
+@app.get(
+    "/v1/models",
+    response_model=List[Model],
+    tags=["Models"],
+    dependencies=[Depends(get_api_key)],
+)
+async def get_models(
+    type: Optional[ModelType] = Query(None, description="Filter models by type")
+) -> List[Model]:
+    if window:
+        models = window.get_models_data()
+        if type:
+            models = [m for m in models if m["type"] == type]
+
+        return [Model(**m) for m in models]
+    return []
+
+
+@app.get(
+    "/v1/tasks",
+    response_model=List[Task],
+    tags=["Tasks"],
+    dependencies=[Depends(get_api_key)],
+)
+async def get_tasks() -> List[Task]:
+    if window:
+        return window.get_tasks_data()
+    return []
+
+
+@app.get("/v1/health", tags=["System"], dependencies=[Depends(get_api_key)])
+async def health_check() -> dict:
+    return {"status": "alive"}
+
+
+@app.get(
+    "/v1/backends",
+    response_model=List[Backend],
+    tags=["System"],
+    dependencies=[Depends(get_api_key)],
+)
+async def get_backends() -> List[Backend]:
+    backends = []
+    if window:
+        for i in range(window.backend_combo.count()):
+            backends.append(
+                Backend(
+                    name=window.backend_combo.itemText(i),
+                    path=window.backend_combo.itemData(i),
+                )
+            )
+    return backends
+
+
+@app.get(
+    "/v1/plugins",
+    response_model=List[Plugin],
+    tags=["System"],
+    dependencies=[Depends(get_api_key)],
+)
+async def get_plugins() -> List[Plugin]:
+    if window:
+        return [
+            Plugin(**plugin_data["data"]) for plugin_data in window.plugins.values()
+        ]
+    return []
+
+
+def run_uvicorn() -> None:
+    if os.environ.get("AUTOGGUF_SERVER", "").lower() == "enabled":
+        config = Config(
+            app=app,
+            host="127.0.0.1",
+            port=int(os.environ.get("AUTOGGUF_SERVER_PORT", 7001)),
+            log_level="info",
+        )
+        server = Server(config)
+        server.run()
+
+
+def main() -> None:
+    global window
+    qt_app = QApplication(sys.argv)
+    window = AutoGGUF(sys.argv)
+    window.show()
+
+    # Start Uvicorn in a separate thread after a short delay
+    timer = QTimer()
+    timer.singleShot(
+        100, lambda: threading.Thread(target=run_uvicorn, daemon=True).start()
+    )
+
+    sys.exit(qt_app.exec())
+

 if __name__ == "__main__":
-    app = QApplication(sys.argv)
-    window = AutoGGUF()
-    window.show()
-    sys.exit(app.exec())
+    main()
--- a/src/presets.py
+++ b/src/presets.py
@ -0,0 +1,118 @@
+import json
+
+from PySide6.QtCore import Qt
+from PySide6.QtWidgets import QApplication, QFileDialog, QMessageBox
+from Localizations import (
+    SAVING_PRESET,
+    SAVE_PRESET,
+    JSON_FILES,
+    PRESET_SAVED,
+    PRESET_SAVED_TO,
+    LOADING_PRESET,
+    LOAD_PRESET,
+    PRESET_LOADED,
+    PRESET_LOADED_FROM,
+)
+
+
+def save_preset(self) -> None:
+    self.logger.info(SAVING_PRESET)
+    preset = {
+        "quant_types": [item.text() for item in self.quant_type.selectedItems()],
+        "allow_requantize": self.allow_requantize.isChecked(),
+        "leave_output_tensor": self.leave_output_tensor.isChecked(),
+        "pure": self.pure.isChecked(),
+        "imatrix": self.imatrix.text(),
+        "include_weights": self.include_weights.text(),
+        "exclude_weights": self.exclude_weights.text(),
+        "use_output_tensor_type": self.use_output_tensor_type.isChecked(),
+        "output_tensor_type": self.output_tensor_type.currentText(),
+        "use_token_embedding_type": self.use_token_embedding_type.isChecked(),
+        "token_embedding_type": self.token_embedding_type.currentText(),
+        "keep_split": self.keep_split.isChecked(),
+        "kv_overrides": [
+            entry.get_raw_override_string() for entry in self.kv_override_entries
+        ],
+        "extra_arguments": self.extra_arguments.text(),
+    }
+
+    if not QApplication.keyboardModifiers() & Qt.ShiftModifier:
+        file_name, _ = QFileDialog.getSaveFileName(self, SAVE_PRESET, "", JSON_FILES)
+        if file_name:
+            with open(file_name, "w") as f:
+                json.dump(preset, f, indent=4)
+            QMessageBox.information(
+                self, PRESET_SAVED, PRESET_SAVED_TO.format(file_name)
+            )
+        self.logger.info(PRESET_SAVED_TO.format(file_name))
+    else:
+        clipboard = QApplication.clipboard()
+        preset_str = json.dumps(preset, indent=1)
+        clipboard.setText(preset_str)
+        QMessageBox.information(self, PRESET_SAVED, "Preset copied to clipboard")
+        self.logger.info("Preset copied to clipboard")
+
+
+def load_preset(self) -> None:
+    self.logger.info(LOADING_PRESET)
+
+    try:
+        if QApplication.keyboardModifiers() & Qt.ShiftModifier:
+            clipboard = QApplication.clipboard()
+            preset = json.loads(clipboard.text())
+            source = "clipboard"
+        else:
+            file_name, _ = QFileDialog.getOpenFileName(
+                self, LOAD_PRESET, "", JSON_FILES
+            )
+            if not file_name:
+                return
+            with open(file_name, "r") as f:
+                preset = json.load(f)
+            source = file_name
+
+        self.quant_type.clearSelection()
+        for quant_type in preset.get("quant_types", []):
+            items = self.quant_type.findItems(quant_type, Qt.MatchExactly)
+            if items:
+                items[0].setSelected(True)
+        self.allow_requantize.setChecked(preset.get("allow_requantize", False))
+        self.leave_output_tensor.setChecked(preset.get("leave_output_tensor", False))
+        self.pure.setChecked(preset.get("pure", False))
+        self.imatrix.setText(preset.get("imatrix", ""))
+        self.include_weights.setText(preset.get("include_weights", ""))
+        self.exclude_weights.setText(preset.get("exclude_weights", ""))
+        self.use_output_tensor_type.setChecked(
+            preset.get("use_output_tensor_type", False)
+        )
+        self.output_tensor_type.setCurrentText(preset.get("output_tensor_type", ""))
+        self.use_token_embedding_type.setChecked(
+            preset.get("use_token_embedding_type", False)
+        )
+        self.token_embedding_type.setCurrentText(preset.get("token_embedding_type", ""))
+        self.keep_split.setChecked(preset.get("keep_split", False))
+        self.extra_arguments.setText(preset.get("extra_arguments", ""))
+
+        # Clear existing KV overrides and add new ones
+        for entry in self.kv_override_entries:
+            self.remove_kv_override(entry)
+        for override in preset.get("kv_overrides", []):
+            self.add_kv_override(override)
+
+        QMessageBox.information(
+            self,
+            PRESET_LOADED,
+            PRESET_LOADED_FROM.format(
+                source
+                if not QApplication.keyboardModifiers() & Qt.ShiftModifier
+                else "clipboard"
+            ),
+        )
+        self.logger.info(PRESET_LOADED_FROM.format(source))
+
+    except json.JSONDecodeError:
+        QMessageBox.critical(self, "Error", "Invalid JSON in clipboard")
+        self.logger.error("Failed to parse JSON from clipboard")
+    except Exception as e:
+        QMessageBox.critical(self, "Error", f"Failed to load preset: {str(e)}")
+        self.logger.error(f"Failed to load preset: {str(e)}")
--- a/src/quantize_to_fp8_dynamic.py
+++ b/src/quantize_to_fp8_dynamic.py
@ -0,0 +1,559 @@
+import copy
+import gc
+import re
+import sys
+from typing import List
+from typing import Optional, Tuple
+
+import torch
+import tqdm
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+
+# https://github.com/neuralmagic/AutoFP8
+
+
+class BaseQuantizeConfig:
+    """Configuration for model quantization.
+
+    Args:
+        quant_method: Type/precision of quantization method to use.
+            At the moment, this is just "fp8" which specifically means
+            the fp8_e4m3 format in pytorch.
+        activation_scheme: Choice of either "dynamic" or "static" quantization
+            of activtions. If "static", then calibration samples are required
+            during quantization to produce accurate per-tensor scales for
+            activations of Linear modules.
+        ignore_patterns: List of patterns used to ignore layers. If a string
+            starts with "re:", then everything afterward is used as python
+            regex style matching i.e. re.search(), for each Linear layer.
+            By default, "re:.*lm_head" is included to ignore the embedding
+            Linear layer usually at the end of decoder LLMs
+        kv_cache_quant_targets: Tuple of Linear module names to target for
+            calibration of the output scales for KV cache quantization.
+            Usually, these should be `("k_proj", "v_proj")`.
+    """
+
+    def __init__(
+        self,
+        quant_method: str = "fp8",
+        activation_scheme: str = "static",
+        ignore_patterns: List[str] = ["re:.*lm_head"],
+        kv_cache_quant_targets: Optional[Tuple[str]] = None,
+    ):
+        if quant_method != "fp8":
+            raise ValueError("Only FP8 quantization is supported.")
+        if activation_scheme not in ["static", "dynamic"]:
+            raise ValueError(
+                "Invalid activation_scheme. Choose either 'static' or 'dynamic'."
+            )
+        self.quant_method = quant_method
+        self.activation_scheme = activation_scheme
+        self.ignore_patterns = ignore_patterns
+        self.kv_cache_quant_targets = kv_cache_quant_targets
+        self.ignored_layers = []
+
+
+# Class responsible for quantizing weights
+class FP8DynamicLinear(torch.nn.Module):
+    def __init__(
+        self,
+        weight: torch.Tensor,
+        weight_scale: torch.Tensor,
+        bias: torch.nn.Parameter,
+    ):
+        super().__init__()
+        self.weight = torch.nn.Parameter(weight, requires_grad=False)
+        self.weight_scale = torch.nn.Parameter(weight_scale, requires_grad=False)
+        self.bias = bias
+
+    def forward(self, x):
+        qinput, x_scale = per_tensor_quantize(x)
+        output = fp8_gemm(
+            A=qinput,
+            A_scale=x_scale,
+            B=self.weight,
+            B_scale=self.weight_scale,
+            bias=self.bias,
+            out_dtype=x.dtype,
+        )
+        return output
+
+
+# Module responsible for taking already quantized weights, and recording input scales (and possibly output scales)
+# using an activation observer
+class FP8StaticLinearQuantizer(torch.nn.Module):
+    def __init__(
+        self,
+        weight: torch.Tensor,
+        weight_scale: torch.Tensor,
+        bias: torch.nn.Parameter,
+        quantize_output: bool = False,
+    ):
+        super().__init__()
+        self.weight = torch.nn.Parameter(weight, requires_grad=False)
+        self.weight_scale = torch.nn.Parameter(weight_scale, requires_grad=False)
+        self.bias = bias
+        self.input_scale = None
+        self.output_scale = None
+        self.quantize_output = quantize_output
+
+    def forward(self, x):
+        qinput, x_input_scale = per_tensor_quantize(x)
+        if self.input_scale is None:
+            self.input_scale = torch.nn.Parameter(x_input_scale, requires_grad=False)
+        elif x_input_scale > self.input_scale:
+            self.input_scale = torch.nn.Parameter(x_input_scale, requires_grad=False)
+        output = fp8_gemm(
+            A=qinput,
+            A_scale=self.input_scale,
+            B=self.weight,
+            B_scale=self.weight_scale,
+            bias=self.bias,
+            out_dtype=x.dtype,
+        )
+
+        # Optionally, quantize output and record scale
+        if self.quantize_output:
+            qoutput, output_scale = per_tensor_quantize(output)
+            if self.output_scale is None:
+                self.output_scale = torch.nn.Parameter(
+                    output_scale, requires_grad=False
+                )
+            elif output_scale > self.output_scale:
+                self.output_scale = torch.nn.Parameter(
+                    output_scale, requires_grad=False
+                )
+            output = qoutput.to(output.dtype) * output_scale
+
+        return output
+
+
+# Module responsible for representing the final checkpoint representation
+class FP8StaticLinear(torch.nn.Module):
+    def __init__(
+        self,
+        weight: torch.nn.Parameter,
+        weight_scale: torch.nn.Parameter,
+        bias: torch.nn.Parameter,
+        input_scale: torch.nn.Parameter,
+        output_scale: Optional[torch.nn.Parameter] = None,
+    ):
+        super().__init__()
+        self.weight = weight
+        self.weight_scale = weight_scale
+        self.bias = bias
+        self.input_scale = input_scale
+        self.output_scale = output_scale
+
+    def forward(self, x):
+        qinput = static_per_tensor_quantize(x, self.input_scale)
+        output = fp8_gemm(
+            A=qinput,
+            A_scale=self.input_scale,
+            B=self.weight,
+            B_scale=self.weight_scale,
+            bias=self.bias,
+            out_dtype=x.dtype,
+        )
+
+        if self.output_scale:
+            qoutput = static_per_tensor_quantize(output, self.output_scale)
+            output = qoutput.to(output.dtype) * self.output_scale
+
+        return output
+
+
+class AutoFP8ForCausalLM:
+    def __init__(
+        self,
+        model: AutoModelForCausalLM,
+        quantize_config: BaseQuantizeConfig,
+    ):
+        self.model = model
+        self.model_type = self.model.config.model_type
+        self.config = self.model.config
+
+        # Gather the Linear module names that we want to ignore
+        quantize_config.ignored_layers = get_layers_to_ignore(
+            self.model, quantize_config.ignore_patterns
+        )
+
+        if quantize_config.kv_cache_quant_targets:
+            kv_cache_quant_layers = get_kv_cache_quant_layers(
+                self.model, quantize_config.kv_cache_quant_targets
+            )
+            if len(kv_cache_quant_layers) == 0:
+                raise ValueError(
+                    f"Could not find any kv cache layers using kv_cache_quant_targets={quantize_config.kv_cache_quant_targets}, please fix your argument."
+                )
+            quantize_config.kv_cache_quant_layers = kv_cache_quant_layers
+
+        self.quantize_config = quantize_config
+
+    @classmethod
+    def from_pretrained(
+        cls,
+        pretrained_model_name_or_path: str,
+        quantize_config: BaseQuantizeConfig,
+        **model_init_kwargs,
+    ):
+        """Load the un-quantized pretrained model"""
+
+        def skip(*args, **kwargs):
+            pass
+
+        torch.nn.init.kaiming_uniform_ = skip
+        torch.nn.init.uniform_ = skip
+        torch.nn.init.normal_ = skip
+
+        # Parameters related to loading from Hugging Face Hub
+        cache_dir = model_init_kwargs.pop("cache_dir", None)
+        force_download = model_init_kwargs.pop("force_download", False)
+        resume_download = model_init_kwargs.pop("resume_download", False)
+        proxies = model_init_kwargs.pop("proxies", None)
+        local_files_only = model_init_kwargs.pop("local_files_only", False)
+        use_auth_token = model_init_kwargs.pop("use_auth_token", None)
+        revision = model_init_kwargs.pop("revision", None)
+        subfolder = model_init_kwargs.pop("subfolder", "")
+        commit_hash = model_init_kwargs.pop("_commit_hash", None)
+
+        cached_file_kwargs = {
+            "cache_dir": cache_dir,
+            "force_download": force_download,
+            "proxies": proxies,
+            "resume_download": resume_download,
+            "local_files_only": local_files_only,
+            "use_auth_token": use_auth_token,
+            "revision": revision,
+            "subfolder": subfolder,
+            "_commit_hash": commit_hash,
+        }
+
+        torch.cuda.empty_cache()
+
+        # Important defaults
+        if "torch_dtype" not in model_init_kwargs:
+            model_init_kwargs["torch_dtype"] = "auto"
+
+        if "device_map" not in model_init_kwargs:
+            model_init_kwargs["device_map"] = "auto"
+
+        merged_kwargs = {**model_init_kwargs, **cached_file_kwargs}
+        print("Loading model with the following kwargs:", merged_kwargs)
+        model = AutoModelForCausalLM.from_pretrained(
+            pretrained_model_name_or_path, **merged_kwargs
+        )
+
+        model_config = model.config.to_dict()
+        seq_len_keys = ["max_position_embeddings", "seq_length", "n_positions"]
+        if any(k in model_config for k in seq_len_keys):
+            for key in seq_len_keys:
+                if key in model_config:
+                    model.seqlen = model_config[key]
+                    break
+        else:
+            print("Can't get model's sequence length, setting to 2048.")
+            model.seqlen = 2048
+        model.eval()
+
+        return cls(model, quantize_config)
+
+    def quantize(self, calibration_tokens: Optional[torch.Tensor] = None):
+
+        # Always quantize the weights as they do not require calibration data
+        quantize_weights(self.model, self.quantize_config)
+
+        if self.quantize_config.activation_scheme == "static":
+            assert (
+                calibration_tokens is not None
+            ), "Calibration tokens required for activation quantization"
+
+            def _prepare_calibration_data(calibration_tokens):
+                if hasattr(calibration_tokens, "input_ids"):
+                    return calibration_tokens.input_ids
+                return calibration_tokens
+
+            quantize_activations(
+                self.model,
+                self.quantize_config,
+                _prepare_calibration_data(calibration_tokens),
+            )
+
+    def save_quantized(self, save_dir):
+        save_quantized_model(
+            self.model,
+            quant_config=self.quantize_config,
+            save_dir=save_dir,
+        )
+
+
+def cleanup_memory():
+    gc.collect()
+    torch.cuda.empty_cache()
+
+
+def per_tensor_quantize(tensor: torch.Tensor) -> Tuple[torch.Tensor, float]:
+    """Quantize a tensor using per-tensor static scaling factor.
+    Args:
+        tensor: The input tensor.
+    """
+    finfo = torch.finfo(torch.float8_e4m3fn)
+    # Calculate the scale as dtype max divided by absmax.
+    # Since .abs() creates a new tensor, we use aminmax to get
+    # the min and max first and then calculate the absmax.
+    if tensor.numel() == 0:
+        # Deal with empty tensors (triggered by empty MoE experts)
+        min_val, max_val = (
+            torch.tensor(-16.0, dtype=tensor.dtype),
+            torch.tensor(16.0, dtype=tensor.dtype),
+        )
+    else:
+        min_val, max_val = tensor.aminmax()
+    amax = torch.maximum(min_val.abs(), max_val.abs())
+    scale = finfo.max / amax.clamp(min=1e-12)
+    # Scale and clamp the tensor to bring it to
+    # the representative range of float8 data type
+    # (as default cast is unsaturated)
+    qweight = (tensor * scale).clamp(min=finfo.min, max=finfo.max)
+    # Return both float8 data and the inverse scale (as float),
+    # as both required as inputs to torch._scaled_mm
+    qweight = qweight.to(torch.float8_e4m3fn)
+    scale = scale.float().reciprocal()
+    return qweight, scale
+
+
+def static_per_tensor_quantize(tensor: torch.Tensor, inv_scale: float) -> torch.Tensor:
+    finfo = torch.finfo(torch.float8_e4m3fn)
+    qweight = (tensor / inv_scale).clamp(min=finfo.min, max=finfo.max)
+    return qweight.to(torch.float8_e4m3fn)
+
+
+def fp8_gemm(A, A_scale, B, B_scale, bias, out_dtype):
+    if A.numel() == 0:
+        # Deal with empty tensors (triggeted by empty MoE experts)
+        return torch.empty(size=(0, B.shape[0]), dtype=out_dtype, device=A.device)
+
+    # TODO: Disable native fp8 gemm for now, always just dequantize
+    # native_fp8_support = (
+    #     torch.cuda.is_available() and torch.cuda.get_device_capability() >= (8, 9)
+    # )
+    native_fp8_support = False
+    if native_fp8_support:
+        need_reshape = A.dim() == 3
+        if need_reshape:
+            batch_size = A.shape[0]
+            A_input = A.reshape(-1, A.shape[-1])
+        else:
+            batch_size = None
+            A_input = A
+        output, _ = torch._scaled_mm(
+            A_input,
+            B.t(),
+            out_dtype=out_dtype,
+            scale_a=A_scale,
+            scale_b=B_scale,
+            bias=bias,
+        )
+        if need_reshape:
+            output = output.reshape(
+                batch_size, output.shape[0] // batch_size, output.shape[1]
+            )
+    else:
+        output = torch.nn.functional.linear(
+            A.to(out_dtype) * A_scale,
+            B.to(out_dtype) * B_scale.to(out_dtype),
+            bias=bias,
+        )
+    return output
+
+
+def replace_module(model: AutoModelForCausalLM, name: str, new_module: torch.nn.Module):
+    if "." in name:
+        parent_name = name.rsplit(".", 1)[0]
+        child_name = name[len(parent_name) + 1 :]
+        parent = model.get_submodule(parent_name)
+    else:
+        parent_name = ""
+        parent = model
+        child_name = name
+    setattr(parent, child_name, new_module)
+
+
+def quantize_weights(
+    model: AutoModelForCausalLM,
+    quantize_config: BaseQuantizeConfig,
+):
+    named_modules = list(model.named_modules())
+    for name, linear in tqdm.tqdm(named_modules, desc="Quantizing weights"):
+        if (
+            not isinstance(linear, torch.nn.Linear)
+            or name in quantize_config.ignored_layers
+        ):
+            continue
+        quant_weight, weight_scale = per_tensor_quantize(linear.weight)
+        bias = copy.deepcopy(linear.bias) if linear.bias is not None else None
+        quant_linear = FP8DynamicLinear(
+            weight=quant_weight, weight_scale=weight_scale, bias=bias
+        )
+        replace_module(model, name, quant_linear)
+        del linear.weight
+        del linear.bias
+        del linear
+    cleanup_memory()
+
+
+def quantize_activations(
+    model: AutoModelForCausalLM,
+    quantize_config: BaseQuantizeConfig,
+    calibration_tokens,
+):
+    # Replace weight quantizer with a dynamic activation quantizer observer
+    for name, dynamic_quant_linear in model.named_modules():
+        if (
+            not isinstance(dynamic_quant_linear, FP8DynamicLinear)
+            or name in quantize_config.ignored_layers
+        ):
+            continue
+        quantizer = FP8StaticLinearQuantizer(
+            weight=dynamic_quant_linear.weight,
+            weight_scale=dynamic_quant_linear.weight_scale,
+            bias=dynamic_quant_linear.bias,
+            quantize_output=(
+                hasattr(quantize_config, "kv_cache_quant_layers")
+                and name in quantize_config.kv_cache_quant_layers
+            ),
+        )
+        replace_module(model, name, quantizer)
+        del dynamic_quant_linear
+    cleanup_memory()
+
+    # Pass through calibration data to measure activation scales
+    with torch.inference_mode():
+        with tqdm.tqdm(
+            total=calibration_tokens.shape[0], desc="Calibrating activation scales"
+        ) as pbar:
+            for row_idx in range(calibration_tokens.shape[0]):
+                model(calibration_tokens[row_idx].reshape(1, -1))
+                cleanup_memory()
+                pbar.update(1)
+
+    # Replace dynamic quantizer observer with StaticLinear for export
+    for name, quantizer in model.named_modules():
+        if (
+            not isinstance(quantizer, FP8StaticLinearQuantizer)
+            or name in quantize_config.ignored_layers
+        ):
+            continue
+        static_proj = FP8StaticLinear(
+            weight=quantizer.weight,
+            weight_scale=quantizer.weight_scale,
+            bias=quantizer.bias,
+            input_scale=quantizer.input_scale,
+            output_scale=quantizer.output_scale,
+        )
+        replace_module(model, name, static_proj)
+        del quantizer
+    cleanup_memory()
+
+    # Post-process step for kv cache scales to take the k/v module
+    # `output_scale` parameters, and store them in the parent attention
+    # module as `k_scale` and `v_scale`
+    if hasattr(quantize_config, "kv_cache_quant_layers"):
+        # Assumes that list is ordered such that [layer0.k_proj, layer0.v_proj, layer1.k_proj, layer1.v_proj, ...]
+        # so we make a list of tuples [(layer0.k_proj, layer0.v_proj), (layer1.k_proj, layer1.v_proj), ...]
+        kv_proj_pairs = zip(*[iter(quantize_config.kv_cache_quant_layers)] * 2)
+        for k_proj_name, v_proj_name in kv_proj_pairs:
+            parent_module_name = ".".join(k_proj_name.split(".")[:-1])
+            assert parent_module_name == ".".join(v_proj_name.split(".")[:-1])
+            parent_module = dict(model.named_modules())[parent_module_name]
+
+            k_proj = dict(model.named_modules())[k_proj_name]
+            v_proj = dict(model.named_modules())[v_proj_name]
+
+            parent_module.k_scale = torch.nn.Parameter(
+                k_proj.output_scale, requires_grad=False
+            )
+            parent_module.v_scale = torch.nn.Parameter(
+                v_proj.output_scale, requires_grad=False
+            )
+
+            # Remove output_scale from k_proj and v_proj
+            k_proj.output_scale = None
+            v_proj.output_scale = None
+    cleanup_memory()
+
+
+def save_quantized_model(
+    model: AutoModelForCausalLM,
+    quant_config: BaseQuantizeConfig,
+    save_dir: str,
+):
+    print(model)
+    print(f"Saving the model to {save_dir}")
+    static_q_dict = {
+        "quantization_config": {
+            "quant_method": "fp8",
+            "activation_scheme": quant_config.activation_scheme,
+            "ignored_layers": quant_config.ignored_layers,
+        }
+    }
+    if hasattr(quant_config, "kv_cache_quant_layers"):
+        static_q_dict["quantization_config"]["kv_cache_scheme"] = "static"
+    model.config.update(static_q_dict)
+    model.save_pretrained(save_dir)
+    tokenizer = AutoTokenizer.from_pretrained(model.config._name_or_path)
+    tokenizer.save_pretrained(save_dir)
+
+
+def get_layers_to_ignore(model, ignore_patterns) -> List[str]:
+    ignored_layers = set()
+
+    for name, linear in model.named_modules():
+        if not isinstance(linear, torch.nn.Linear):
+            continue
+
+        for ignore_pattern in ignore_patterns:
+            regex_prefix = "re:"
+            if ignore_pattern.startswith(regex_prefix):
+                # check if name matches regex and add to set if true
+                regex_pattern = ignore_pattern[len(regex_prefix) :]
+                if re.search(regex_pattern, name):
+                    ignored_layers.add(name)
+            else:
+                # else, exact match
+                if ignore_pattern == name:
+                    ignored_layers.add(name)
+
+    return list(ignored_layers)
+
+
+def get_kv_cache_quant_layers(model, kv_cache_quant_targets: Tuple[str]) -> List[str]:
+    kv_cache_quant_layers = []
+
+    for name, linear in model.named_modules():
+        if not isinstance(linear, torch.nn.Linear):
+            continue
+
+        for output_quant_target in kv_cache_quant_targets:
+            if name.endswith(output_quant_target):
+                kv_cache_quant_layers.append(name)
+
+    return kv_cache_quant_layers
+
+
+def quantize_to_fp8_dynamic(input_model_dir: str, output_model_dir: str) -> None:
+    # Define quantization config with static activation scales
+    quantize_config = BaseQuantizeConfig(
+        quant_method="fp8", activation_scheme="dynamic"
+    )
+
+    # Load the model, quantize, and save checkpoint
+    model = AutoFP8ForCausalLM.from_pretrained(input_model_dir, quantize_config)
+    # No examples for dynamic quantization
+    model.quantize([])
+    model.save_quantized(output_model_dir)
+
+
+if __name__ == "__main__":
+    quantize_to_fp8_dynamic(sys.argv[0], sys.argv[1])
--- a/src/ui_update.py
+++ b/src/ui_update.py
@ -0,0 +1,227 @@
+from typing import Tuple
+
+import psutil
+from PySide6.QtCore import QTimer
+from PySide6.QtGui import Qt
+from PySide6.QtWidgets import QFileDialog, QLabel
+
+from Localizations import *
+from error_handling import show_error
+
+
+def resize_window(self, larger) -> None:
+    factor = 1.1 if larger else 1 / 1.1
+    current_width = self.width()
+    current_height = self.height()
+    new_width = int(current_width * factor)
+    new_height = int(current_height * factor)
+    self.resize(new_width, new_height)
+
+
+def reset_size(self) -> None:
+    self.resize(self.default_width, self.default_height)
+
+
+def parse_resolution(self) -> Tuple[int, int]:
+    res = os.environ.get("AUTOGGUF_RESOLUTION", "1650x1100")
+    try:
+        width, height = map(int, res.split("x"))
+        if width <= 0 or height <= 0:
+            raise ValueError
+        return width, height
+    except (ValueError, AttributeError):
+        return 1650, 1100
+
+
+def browse_base_model(self) -> None:
+    self.logger.info(BROWSING_FOR_BASE_MODEL_FOLDER)  # Updated log message
+    base_model_folder = QFileDialog.getExistingDirectory(self, SELECT_BASE_MODEL_FOLDER)
+    if base_model_folder:
+        self.base_model_path.setText(os.path.abspath(base_model_folder))
+
+
+def browse_hf_model_input(self) -> None:
+    self.logger.info(BROWSE_FOR_HF_MODEL_DIRECTORY)
+    model_dir = QFileDialog.getExistingDirectory(self, SELECT_HF_MODEL_DIRECTORY)
+    if model_dir:
+        self.hf_model_input.setText(os.path.abspath(model_dir))
+
+
+def browse_hf_outfile(self) -> None:
+    self.logger.info(BROWSE_FOR_HF_TO_GGUF_OUTPUT)
+    outfile, _ = QFileDialog.getSaveFileName(self, SELECT_OUTPUT_FILE, "", GGUF_FILES)
+    if outfile:
+        self.hf_outfile.setText(os.path.abspath(outfile))
+
+
+def browse_imatrix_datafile(self) -> None:
+    self.logger.info(BROWSING_FOR_IMATRIX_DATA_FILE)
+    datafile, _ = QFileDialog.getOpenFileName(self, SELECT_DATA_FILE, "", ALL_FILES)
+    if datafile:
+        self.imatrix_datafile.setText(os.path.abspath(datafile))
+
+
+def browse_imatrix_model(self) -> None:
+    self.logger.info(BROWSING_FOR_IMATRIX_MODEL_FILE)
+    model_file, _ = QFileDialog.getOpenFileName(self, SELECT_MODEL_FILE, "", GGUF_FILES)
+    if model_file:
+        self.imatrix_model.setText(os.path.abspath(model_file))
+
+
+def browse_imatrix_output(self) -> None:
+    self.logger.info(BROWSING_FOR_IMATRIX_OUTPUT_FILE)
+    output_file, _ = QFileDialog.getSaveFileName(
+        self, SELECT_OUTPUT_FILE, "", DAT_FILES
+    )
+    if output_file:
+        self.imatrix_output.setText(os.path.abspath(output_file))
+
+
+def create_label(self, text, tooltip) -> QLabel:
+    label = QLabel(text)
+    label.setToolTip(tooltip)
+    return label
+
+
+def toggle_gpu_offload_auto(self, state) -> None:
+    is_auto = state == Qt.CheckState.Checked
+    self.gpu_offload_slider.setEnabled(not is_auto)
+    self.gpu_offload_spinbox.setEnabled(not is_auto)
+
+
+def update_model_info(logger, model_info) -> None:
+    logger.debug(UPDATING_MODEL_INFO.format(model_info))
+    pass
+
+
+def update_system_info(self) -> None:
+    ram = psutil.virtual_memory()
+    cpu = psutil.cpu_percent()
+
+    # Smooth transition for RAM bar
+    animate_bar(self, self.ram_bar, ram.percent)
+
+    # Smooth transition for CPU bar
+    animate_bar(self, self.cpu_bar, cpu)
+
+    self.ram_bar.setFormat(
+        RAM_USAGE_FORMAT.format(
+            ram.percent, ram.used // 1024 // 1024, ram.total // 1024 // 1024
+        )
+    )
+    self.cpu_label.setText(CPU_USAGE_FORMAT.format(cpu))
+
+    # Collect CPU and RAM usage data
+    self.cpu_data.append(cpu)
+    self.ram_data.append(ram.percent)
+
+    if len(self.cpu_data) > 60:
+        self.cpu_data.pop(0)
+        self.ram_data.pop(0)
+
+
+def animate_bar(self, bar, target_value) -> None:
+    current_value = bar.value()
+    difference = target_value - current_value
+
+    if abs(difference) <= 1:  # Avoid animation for small changes
+        bar.setValue(target_value)
+        return
+
+    step = 1 if difference > 0 else -1  # Increment or decrement based on difference
+    timer = QTimer(self)
+    timer.timeout.connect(lambda: _animate_step(bar, target_value, step, timer))
+    timer.start(10)  # Adjust the interval for animation speed
+
+
+def _animate_step(bar, target_value, step, timer) -> None:
+    current_value = bar.value()
+    new_value = current_value + step
+
+    if (step > 0 and new_value > target_value) or (
+        step < 0 and new_value < target_value
+    ):
+        bar.setValue(target_value)
+        timer.stop()
+    else:
+        bar.setValue(new_value)
+
+
+def update_download_progress(self, progress) -> None:
+    self.download_progress.setValue(progress)
+
+
+def update_cuda_backends(self) -> None:
+    self.logger.debug(UPDATING_CUDA_BACKENDS)
+    self.backend_combo_cuda.clear()
+    llama_bin = os.path.abspath("llama_bin")
+    if os.path.exists(llama_bin):
+        for item in os.listdir(llama_bin):
+            item_path = os.path.join(llama_bin, item)
+            if os.path.isdir(item_path) and "cudart-llama" not in item.lower():
+                if (
+                    "cu1" in item.lower() or "cuda-1" in item.lower()
+                ):  # Only include CUDA-capable backends
+                    self.backend_combo_cuda.addItem(item, userData=item_path)
+
+    if self.backend_combo_cuda.count() == 0:
+        self.backend_combo_cuda.addItem(NO_SUITABLE_CUDA_BACKENDS)
+        self.backend_combo_cuda.setEnabled(False)
+    else:
+        self.backend_combo_cuda.setEnabled(True)
+
+
+def update_threads_spinbox(self, value) -> None:
+    self.threads_spinbox.setValue(value)
+
+
+def update_threads_slider(self, value) -> None:
+    self.threads_slider.setValue(value)
+
+
+def update_gpu_offload_spinbox(self, value) -> None:
+    self.gpu_offload_spinbox.setValue(value)
+
+
+def update_gpu_offload_slider(self, value) -> None:
+    self.gpu_offload_slider.setValue(value)
+
+
+def update_cuda_option(self) -> None:
+    self.logger.debug(UPDATING_CUDA_OPTIONS)
+    asset = self.asset_combo.currentData()
+
+    # Handle the case where asset is None
+    if asset is None:
+        self.logger.warning(NO_ASSET_SELECTED_FOR_CUDA_CHECK)
+        self.cuda_extract_checkbox.setVisible(False)
+        self.cuda_backend_label.setVisible(False)
+        self.backend_combo_cuda.setVisible(False)
+        return  # Exit the function early
+
+    is_cuda = asset and "cudart" in asset["name"].lower()
+    self.cuda_extract_checkbox.setVisible(is_cuda)
+    self.cuda_backend_label.setVisible(is_cuda)
+    self.backend_combo_cuda.setVisible(is_cuda)
+    if is_cuda:
+        self.update_cuda_backends()
+
+
+def update_assets(self) -> None:
+    self.logger.debug(UPDATING_ASSET_LIST)
+    self.asset_combo.clear()
+    release = self.release_combo.currentData()
+    if release:
+        if "assets" in release:
+            for asset in release["assets"]:
+                self.asset_combo.addItem(asset["name"], userData=asset)
+        else:
+            show_error(
+                self.logger, NO_ASSETS_FOUND_FOR_RELEASE.format(release["tag_name"])
+            )
+    self.update_cuda_option()
+
+
+def update_base_model_visibility(self, index) -> None:
+    is_gguf = self.lora_output_type_combo.itemText(index) == "GGUF"
+    self.base_model_wrapper.setVisible(is_gguf)
--- a/src/utils.py
+++ b/src/utils.py
@ -0,0 +1,219 @@
+from typing import Any, Union
+
+import urllib.request
+import urllib.error
+import json
+import ssl
+import certifi
+from PySide6.QtCore import Qt
+from PySide6.QtWidgets import QFileDialog, QInputDialog, QMenu
+
+from DownloadThread import DownloadThread
+from Localizations import *
+from error_handling import show_error
+from globals import ensure_directory
+from KVOverrideEntry import KVOverrideEntry
+
+
+def show_model_context_menu(self, position):
+    item = self.model_tree.itemAt(position)
+    if item:
+        # Child of a sharded model or top-level item without children
+        if item.parent() is not None or item.childCount() == 0:
+            menu = QMenu()
+            rename_action = menu.addAction(RENAME)
+            delete_action = menu.addAction(DELETE)
+
+            action = menu.exec(self.model_tree.viewport().mapToGlobal(position))
+            if action == rename_action:
+                self.rename_model(item)
+            elif action == delete_action:
+                self.delete_model(item)
+
+
+def rename_model(self, item):
+    old_name = item.text(0)
+    new_name, ok = QInputDialog.getText(self, RENAME, f"New name for {old_name}:")
+    if ok and new_name:
+        old_path = os.path.join(self.models_input.text(), old_name)
+        new_path = os.path.join(self.models_input.text(), new_name)
+        try:
+            os.rename(old_path, new_path)
+            item.setText(0, new_name)
+            self.logger.info(MODEL_RENAMED_SUCCESSFULLY.format(old_name, new_name))
+        except Exception as e:
+            show_error(self.logger, f"Error renaming model: {e}")
+
+
+def add_kv_override(self, override_string=None) -> None:
+    entry = KVOverrideEntry()
+    entry.deleted.connect(self.remove_kv_override)
+    if override_string:
+        key, value = override_string.split("=")
+        type_, val = value.split(":")
+        entry.key_input.setText(key)
+        entry.type_combo.setCurrentText(type_)
+        entry.value_input.setText(val)
+    self.kv_override_layout.addWidget(entry)
+    self.kv_override_entries.append(entry)
+
+
+def remove_kv_override(self, entry) -> None:
+    self.kv_override_layout.removeWidget(entry)
+    self.kv_override_entries.remove(entry)
+    entry.deleteLater()
+
+
+def get_models_data(self) -> list[dict[str, Union[str, Any]]]:
+    models = []
+    root = self.model_tree.invisibleRootItem()
+    child_count = root.childCount()
+    for i in range(child_count):
+        item = root.child(i)
+        model_name = item.text(0)
+        model_type = "sharded" if "sharded" in model_name.lower() else "single"
+        model_path = item.data(0, Qt.ItemDataRole.UserRole)
+        models.append({"name": model_name, "type": model_type, "path": model_path})
+    return models
+
+
+def get_tasks_data(self) -> list[dict[str, Union[int, Any]]]:
+    tasks = []
+    for i in range(self.task_list.count()):
+        item = self.task_list.item(i)
+        task_widget = self.task_list.itemWidget(item)
+        if task_widget:
+            tasks.append(
+                {
+                    "name": task_widget.task_name,
+                    "status": task_widget.status,
+                    "progress": (
+                        task_widget.progress_bar.value()
+                        if hasattr(task_widget, "progress_bar")
+                        else 0
+                    ),
+                    "log_file": task_widget.log_file,
+                }
+            )
+    return tasks
+
+
+def browse_models(self) -> None:
+    self.logger.info(BROWSING_FOR_MODELS_DIRECTORY)
+    models_path = QFileDialog.getExistingDirectory(self, SELECT_MODELS_DIRECTORY)
+    if models_path:
+        self.models_input.setText(os.path.abspath(models_path))
+        ensure_directory(models_path)
+        self.load_models()
+
+
+def browse_output(self) -> None:
+    self.logger.info(BROWSING_FOR_OUTPUT_DIRECTORY)
+    output_path = QFileDialog.getExistingDirectory(self, SELECT_OUTPUT_DIRECTORY)
+    if output_path:
+        self.output_input.setText(os.path.abspath(output_path))
+        ensure_directory(output_path)
+
+
+def browse_logs(self) -> None:
+    self.logger.info(BROWSING_FOR_LOGS_DIRECTORY)
+    logs_path = QFileDialog.getExistingDirectory(self, SELECT_LOGS_DIRECTORY)
+    if logs_path:
+        self.logs_input.setText(os.path.abspath(logs_path))
+        ensure_directory(logs_path)
+
+
+def browse_imatrix(self) -> None:
+    self.logger.info(BROWSING_FOR_IMATRIX_FILE)
+    imatrix_file, _ = QFileDialog.getOpenFileName(
+        self, SELECT_IMATRIX_FILE, "", DAT_FILES
+    )
+    if imatrix_file:
+        self.imatrix.setText(os.path.abspath(imatrix_file))
+
+
+def browse_lora_input(self) -> None:
+    self.logger.info(BROWSING_FOR_LORA_INPUT_DIRECTORY)
+    lora_input_path = QFileDialog.getExistingDirectory(
+        self, SELECT_LORA_INPUT_DIRECTORY
+    )
+    if lora_input_path:
+        self.lora_input.setText(os.path.abspath(lora_input_path))
+        ensure_directory(lora_input_path)
+
+
+def browse_lora_output(self) -> None:
+    self.logger.info(BROWSING_FOR_LORA_OUTPUT_FILE)
+    lora_output_file, _ = QFileDialog.getSaveFileName(
+        self, SELECT_LORA_OUTPUT_FILE, "", GGUF_AND_BIN_FILES
+    )
+    if lora_output_file:
+        self.lora_output.setText(os.path.abspath(lora_output_file))
+
+
+def download_llama_cpp(self) -> None:
+    self.logger.info(STARTING_LLAMACPP_DOWNLOAD)
+    asset = self.asset_combo.currentData()
+    if not asset:
+        show_error(self.logger, NO_ASSET_SELECTED)
+        return
+
+    llama_bin = os.path.abspath("llama_bin")
+    os.makedirs(llama_bin, exist_ok=True)
+
+    save_path = os.path.join(llama_bin, asset["name"])
+
+    self.download_thread = DownloadThread(asset["browser_download_url"], save_path)
+    self.download_thread.progress_signal.connect(self.update_download_progress)
+    self.download_thread.finished_signal.connect(self.download_finished)
+    self.download_thread.error_signal.connect(self.download_error)
+    self.download_thread.start()
+
+    self.download_button.setEnabled(False)
+    self.download_progress.setValue(0)
+
+
+def get_repo_from_env() -> tuple[str, str]:
+    repo = os.getenv("AUTOGGUF_BACKEND_REPO", "ggerganov/llama.cpp")
+
+    if not repo or "/" not in repo:
+        raise ValueError(INVALID_REPOSITORY_FORMAT)
+
+    owner, repo_name = repo.split("/", 1)
+    if not all(part.strip() for part in (owner, repo_name)):
+        raise ValueError(REPO_CANNOT_BE_EMPTY)
+
+    return owner, repo_name
+
+
+def refresh_releases(self) -> None:
+    self.logger.info(REFRESHING_LLAMACPP_RELEASES)
+    try:
+        owner, repo = get_repo_from_env()
+        url = f"https://api.github.com/repos/{owner}/{repo}/releases"
+
+        # Create SSL context with certifi certificates
+        ssl_context = ssl.create_default_context(cafile=certifi.where())
+
+        # Create request
+        req = urllib.request.Request(url)
+
+        # Make the request
+        with urllib.request.urlopen(req, context=ssl_context) as response:
+            if response.status != 200:
+                raise urllib.error.HTTPError(
+                    url, response.status, "HTTP Error", response.headers, None
+                )
+
+            releases = json.loads(response.read().decode("utf-8"))
+
+        self.release_combo.clear()
+        for release in releases:
+            self.release_combo.addItem(release["tag_name"], userData=release)
+        self.release_combo.currentIndexChanged.connect(self.update_assets)
+        self.update_assets()
+
+    except ValueError as e:
+        show_error(self.logger, f"Invalid repository configuration: {str(e)}")
+    except (urllib.error.URLError, urllib.error.HTTPError) as e:
+        show_error(self.logger, ERROR_FETCHING_RELEASES.format(str(e)))
				`@ -0,0 +1 @@`
				`/* Leave this file blank for default theme */`