mirror of https://github.com/leafspark/AutoGGUF
Compare commits
No commits in common. "main" and "v2.0.0" have entirely different histories.
|
@ -19,8 +19,6 @@ jobs:
|
||||||
os: [windows-latest, ubuntu-latest, macos-latest]
|
os: [windows-latest, ubuntu-latest, macos-latest]
|
||||||
arch: [x64]
|
arch: [x64]
|
||||||
runs-on: ${{ matrix.os }}
|
runs-on: ${{ matrix.os }}
|
||||||
outputs:
|
|
||||||
artifact-names: ${{ steps.set-outputs.outputs.artifact-names }}
|
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
|
@ -66,7 +64,6 @@ jobs:
|
||||||
Copy-Item -Path "src\convert_lora_to_gguf.py" -Destination "$distPath\src"
|
Copy-Item -Path "src\convert_lora_to_gguf.py" -Destination "$distPath\src"
|
||||||
Copy-Item -Path "src\convert_lora_to_ggml.py" -Destination "$distPath\src"
|
Copy-Item -Path "src\convert_lora_to_ggml.py" -Destination "$distPath\src"
|
||||||
Copy-Item -Path "src\quantize_to_fp8_dynamic.py" -Destination "$distPath\src"
|
Copy-Item -Path "src\quantize_to_fp8_dynamic.py" -Destination "$distPath\src"
|
||||||
Copy-Item -Path ".env.example" -Destination "$distPath\"
|
|
||||||
|
|
||||||
- name: Copy additional files (Linux/macOS)
|
- name: Copy additional files (Linux/macOS)
|
||||||
if: matrix.os != 'windows-latest'
|
if: matrix.os != 'windows-latest'
|
||||||
|
@ -78,58 +75,46 @@ jobs:
|
||||||
cp src/convert_lora_to_gguf.py $distPath/src/
|
cp src/convert_lora_to_gguf.py $distPath/src/
|
||||||
cp src/convert_lora_to_ggml.py $distPath/src/
|
cp src/convert_lora_to_ggml.py $distPath/src/
|
||||||
cp src/quantize_to_fp8_dynamic.py $distPath/src/
|
cp src/quantize_to_fp8_dynamic.py $distPath/src/
|
||||||
cp .env.example $distPath/
|
|
||||||
|
|
||||||
- name: Set outputs for artifact name
|
- name: Generate SHA256 (Windows)
|
||||||
id: set-outputs
|
if: matrix.os == 'windows-latest'
|
||||||
run: echo "artifact-name=AutoGGUF-${{ matrix.os }}-${{ matrix.arch }}-${{ github.event.inputs.build_type }}-${{ github.sha }}" >> $GITHUB_OUTPUT
|
run: |
|
||||||
|
$distPath = if ("${{ github.event.inputs.build_type }}" -eq "RELEASE") { "build\release\dist" } else { "build\dev\dist" }
|
||||||
|
$archSuffix = "-x64"
|
||||||
|
$exeName = "AutoGGUF$archSuffix.exe"
|
||||||
|
$versionHash = "${{ github.sha }}".Substring(0, 7)
|
||||||
|
$hashFile = "AutoGGUF-${{ matrix.os }}-${{ matrix.arch }}-$versionHash.sha256"
|
||||||
|
$hash = (Get-FileHash "$distPath\$exeName" -Algorithm SHA256).Hash.ToLower()
|
||||||
|
"$hash *$exeName" | Out-File -FilePath "$distPath\$hashFile" -Encoding utf8
|
||||||
|
|
||||||
|
- name: Generate SHA256 (Linux)
|
||||||
|
if: matrix.os == 'ubuntu-latest'
|
||||||
|
run: |
|
||||||
|
distPath=$(if [ "${{ github.event.inputs.build_type }}" = "RELEASE" ]; then echo "build/release/dist"; else echo "build/dev/dist"; fi)
|
||||||
|
exeName="AutoGGUF-x64"
|
||||||
|
versionHash=$(echo ${{ github.sha }} | cut -c1-7)
|
||||||
|
hashFile="AutoGGUF-${{ matrix.os }}-x64-$versionHash.sha256"
|
||||||
|
cd $distPath && sha256sum $exeName > $hashFile
|
||||||
|
|
||||||
|
- name: Generate SHA256 (macOS)
|
||||||
|
if: matrix.os == 'macos-latest'
|
||||||
|
run: |
|
||||||
|
distPath=$(if [ "${{ github.event.inputs.build_type }}" = "RELEASE" ]; then echo "build/release/dist"; else echo "build/dev/dist"; fi)
|
||||||
|
exeName="AutoGGUF-x64"
|
||||||
|
versionHash=$(echo ${{ github.sha }} | cut -c1-7)
|
||||||
|
hashFile="AutoGGUF-${{ matrix.os }}-x64-$versionHash.sha256"
|
||||||
|
cd $distPath && shasum -a 256 $exeName > $hashFile
|
||||||
|
|
||||||
- name: Upload Artifact
|
- name: Upload Artifact
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v3
|
||||||
with:
|
with:
|
||||||
name: AutoGGUF-${{ matrix.os }}-${{ matrix.arch }}-${{ github.event.inputs.build_type }}-${{ github.sha }}
|
name: AutoGGUF-${{ matrix.os }}-${{ matrix.arch }}-${{ github.event.inputs.build_type }}-${{ github.sha }}
|
||||||
path: build/${{ github.event.inputs.build_type == 'RELEASE' && 'release' || 'dev' }}/dist
|
path: |
|
||||||
|
build/${{ github.event.inputs.build_type == 'RELEASE' && 'release' || 'dev' }}/dist
|
||||||
|
!build/${{ github.event.inputs.build_type == 'RELEASE' && 'release' || 'dev' }}/dist/AutoGGUF-*.sha256
|
||||||
|
|
||||||
generate-checksums:
|
- name: Upload SHA256
|
||||||
needs: build
|
uses: actions/upload-artifact@v3
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Download all artifacts
|
|
||||||
uses: actions/download-artifact@v4
|
|
||||||
with:
|
|
||||||
path: ./artifacts
|
|
||||||
|
|
||||||
- name: Generate SHA256 checksums for all artifacts
|
|
||||||
run: |
|
|
||||||
cd artifacts
|
|
||||||
versionHash=$(echo ${{ github.sha }} | cut -c1-7)
|
|
||||||
echo "# AutoGGUF Build Checksums" > ../checksums.txt
|
|
||||||
echo "Build: ${{ github.event.inputs.build_type }}" >> ../checksums.txt
|
|
||||||
echo "Commit: ${{ github.sha }}" >> ../checksums.txt
|
|
||||||
echo "Date: $(date -u)" >> ../checksums.txt
|
|
||||||
echo "" >> ../checksums.txt
|
|
||||||
|
|
||||||
# Find all artifact directories and generate checksums of their zip equivalents
|
|
||||||
for artifact_dir in AutoGGUF-*-${{ github.event.inputs.build_type }}-${{ github.sha }}; do
|
|
||||||
if [ -d "$artifact_dir" ]; then
|
|
||||||
echo "Processing $artifact_dir..."
|
|
||||||
cd "$artifact_dir"
|
|
||||||
|
|
||||||
# Create a temporary zip to calculate hash (simulating what GitHub creates)
|
|
||||||
zip -r "../temp_${artifact_dir}.zip" .
|
|
||||||
cd ..
|
|
||||||
|
|
||||||
# Generate SHA256 of the zip file
|
|
||||||
hash=$(sha256sum "temp_${artifact_dir}.zip" | cut -d' ' -f1)
|
|
||||||
echo "${hash} ${artifact_dir}.zip" >> ../checksums.txt
|
|
||||||
|
|
||||||
# Clean up the temporary zip
|
|
||||||
rm "temp_${artifact_dir}.zip"
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
|
|
||||||
- name: Upload checksums
|
|
||||||
uses: actions/upload-artifact@v4
|
|
||||||
with:
|
with:
|
||||||
name: AutoGGUF-${{ github.sha }}-SHA256
|
name: AutoGGUF-${{ github.sha }}-SHA256
|
||||||
path: checksums.txt
|
path: build/${{ github.event.inputs.build_type == 'RELEASE' && 'release' || 'dev' }}/dist/AutoGGUF-*.sha256
|
||||||
|
|
|
@ -52,7 +52,7 @@ jobs:
|
||||||
cat requirements.txt >> detailed_report.txt
|
cat requirements.txt >> detailed_report.txt
|
||||||
|
|
||||||
- name: Upload audit results
|
- name: Upload audit results
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v3
|
||||||
with:
|
with:
|
||||||
name: pip-audit-report
|
name: pip-audit-report
|
||||||
path: detailed_report.txt
|
path: detailed_report.txt
|
||||||
|
|
49
CHANGELOG.md
49
CHANGELOG.md
|
@ -1,28 +1,5 @@
|
||||||
# Changelog
|
# Changelog
|
||||||
|
|
||||||
## [v2.0.1] - 2025-05-24
|
|
||||||
|
|
||||||
### Added
|
|
||||||
- Human readable mappings from KV pairs into model properties
|
|
||||||
- certifi library for backend download and update checking
|
|
||||||
- Automated checksums in CI process
|
|
||||||
|
|
||||||
### Changed
|
|
||||||
- Updated llama.cpp backend
|
|
||||||
- Improved backend UI, logging, and task handling
|
|
||||||
- Enhanced display of model properties and cleaner formatting of KV pairs
|
|
||||||
- Updated tensor data formatting and removed redundant KV pairs property
|
|
||||||
- Updated CUDA backend check for latest llama.cpp release format
|
|
||||||
- Global urllib usage implementation
|
|
||||||
- Updated README with more information about patches and updates
|
|
||||||
- Edited quick start instructions
|
|
||||||
- Small file formatting improvements
|
|
||||||
|
|
||||||
### Fixed
|
|
||||||
- Type hints corrections
|
|
||||||
- Build errors in CI
|
|
||||||
- `@upload-artifact` updated to v4
|
|
||||||
|
|
||||||
## [v2.0.0] - 2025-01-27
|
## [v2.0.0] - 2025-01-27
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
|
@ -260,7 +237,7 @@ ### Notes
|
||||||
- Fast build: Higher unzipped size (97MB), smaller download (38MB)
|
- Fast build: Higher unzipped size (97MB), smaller download (38MB)
|
||||||
- Standard build: Created with PyInstaller, medium download and unzipped size (50MB), potentially slower
|
- Standard build: Created with PyInstaller, medium download and unzipped size (50MB), potentially slower
|
||||||
|
|
||||||
## [v1.6.0] - 2024-08-08
|
## [1.6.0] - 2024-08-08
|
||||||
|
|
||||||
### Changed
|
### Changed
|
||||||
- Resolve licensing issues by using PySide6
|
- Resolve licensing issues by using PySide6
|
||||||
|
@ -268,7 +245,7 @@ ### Changed
|
||||||
### Added
|
### Added
|
||||||
- Add GPU monitoring support for NVIDIA GPUs
|
- Add GPU monitoring support for NVIDIA GPUs
|
||||||
|
|
||||||
## [v1.5.1] - 2024-08-08
|
## [1.5.1] - 2024-08-08
|
||||||
|
|
||||||
### Changed
|
### Changed
|
||||||
- Refactor localizations to use them in HF conversion area
|
- Refactor localizations to use them in HF conversion area
|
||||||
|
@ -280,7 +257,7 @@ ### Removed
|
||||||
### Added
|
### Added
|
||||||
- Support loading *.gguf file types
|
- Support loading *.gguf file types
|
||||||
|
|
||||||
## [v1.5.0] - 2024-08-06
|
## [1.5.0] - 2024-08-06
|
||||||
|
|
||||||
### Changed
|
### Changed
|
||||||
- Refactor localizations to use them in HF conversion area
|
- Refactor localizations to use them in HF conversion area
|
||||||
|
@ -293,7 +270,7 @@ ### Added
|
||||||
### Fixed
|
### Fixed
|
||||||
- Fix scaling on low resolution screens, interface now scrolls
|
- Fix scaling on low resolution screens, interface now scrolls
|
||||||
|
|
||||||
## [v1.4.3] - 2024-08-05
|
## [1.4.3] - 2024-08-05
|
||||||
|
|
||||||
### Changed
|
### Changed
|
||||||
- Updated src file in release to be Black formatted
|
- Updated src file in release to be Black formatted
|
||||||
|
@ -306,7 +283,7 @@ ### Added
|
||||||
- Added model sharding management support
|
- Added model sharding management support
|
||||||
- Allow multiple quantization types to be selected and started simultaneously
|
- Allow multiple quantization types to be selected and started simultaneously
|
||||||
|
|
||||||
## [v1.4.2] - 2024-08-04
|
## [1.4.2] - 2024-08-04
|
||||||
|
|
||||||
### Fixed
|
### Fixed
|
||||||
- Resolves bug where Base Model text was shown even when GGML type was selected
|
- Resolves bug where Base Model text was shown even when GGML type was selected
|
||||||
|
@ -315,13 +292,13 @@ ### Fixed
|
||||||
### Changed
|
### Changed
|
||||||
- Minor repository changes
|
- Minor repository changes
|
||||||
|
|
||||||
## [v1.4.1] - 2024-08-04
|
## [1.4.1] - 2024-08-04
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
- Dynamic KV Overrides (see wiki: AutoGGUF/wiki/Dynamic-KV-Overrides)
|
- Dynamic KV Overrides (see wiki: AutoGGUF/wiki/Dynamic-KV-Overrides)
|
||||||
- Quantization commands are now printed and logged
|
- Quantization commands are now printed and logged
|
||||||
|
|
||||||
## [v1.4.0] - 2024-08-04
|
## [1.4.0] - 2024-08-04
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
- LoRA Conversion:
|
- LoRA Conversion:
|
||||||
|
@ -345,7 +322,7 @@ ### Added
|
||||||
- Currently includes src folder with conversion tools
|
- Currently includes src folder with conversion tools
|
||||||
- No console window popup
|
- No console window popup
|
||||||
|
|
||||||
## [v1.3.1] - 2024-08-04
|
## [1.3.1] - 2024-08-04
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
- AUTOGGUF_CHECK_BACKEND environment variable to disable backend check on start
|
- AUTOGGUF_CHECK_BACKEND environment variable to disable backend check on start
|
||||||
|
@ -353,7 +330,7 @@ ### Added
|
||||||
### Changed
|
### Changed
|
||||||
- --onefile build with PyInstaller, _internal directory is no longer required
|
- --onefile build with PyInstaller, _internal directory is no longer required
|
||||||
|
|
||||||
## [v1.3.0] - 2024-08-03
|
## [1.3.0] - 2024-08-03
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
- Support for new llama-imatrix parameters:
|
- Support for new llama-imatrix parameters:
|
||||||
|
@ -375,7 +352,7 @@ ### Fixed
|
||||||
### Removed
|
### Removed
|
||||||
- Duplicated functions
|
- Duplicated functions
|
||||||
|
|
||||||
## [v1.2.1] - 2024-08-03
|
## [1.2.1] - 2024-08-03
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
- Refresh Models button
|
- Refresh Models button
|
||||||
|
@ -384,13 +361,13 @@ ### Added
|
||||||
### Fixed
|
### Fixed
|
||||||
- iostream llama.cpp issue, quantized_models directory created on launch
|
- iostream llama.cpp issue, quantized_models directory created on launch
|
||||||
|
|
||||||
## [v1.2.0] - 2024-08-03
|
## [1.2.0] - 2024-08-03
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
- More robust logging (find logs at latest_<timestamp>.log in logs folder)
|
- More robust logging (find logs at latest_<timestamp>.log in logs folder)
|
||||||
- Localizations with support for 28 languages (machine translated using Gemini Experimental 0801)
|
- Localizations with support for 28 languages (machine translated using Gemini Experimental 0801)
|
||||||
|
|
||||||
## [v1.1.0] - 2024-08-03
|
## [1.1.0] - 2024-08-03
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
- Dynamic KV override functionality
|
- Dynamic KV override functionality
|
||||||
|
@ -413,7 +390,7 @@ ### Added
|
||||||
### Fixed
|
### Fixed
|
||||||
- Issue where quantization errored with "AutoGGUF does not have x attribute"
|
- Issue where quantization errored with "AutoGGUF does not have x attribute"
|
||||||
|
|
||||||
## [v1.0.0] - 2024-08-02
|
## [1.0.0] - 2024-08-02
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
- Initial release
|
- Initial release
|
||||||
|
|
42
README.md
42
README.md
|
@ -18,7 +18,7 @@ # AutoGGUF - automated GGUF model quantizer
|
||||||

|

|
||||||

|

|
||||||

|

|
||||||
<!--  -->
|

|
||||||
|
|
||||||
<!-- Contribution -->
|
<!-- Contribution -->
|
||||||
[](https://github.com/leafspark/AutoGGUF/issues)
|
[](https://github.com/leafspark/AutoGGUF/issues)
|
||||||
|
@ -29,16 +29,16 @@ # AutoGGUF - automated GGUF model quantizer
|
||||||
|
|
||||||
## Features
|
## Features
|
||||||
|
|
||||||
- 📩 Update and manage llama.cpp backends
|
- 📩 Download and manage llama.cpp backends
|
||||||
- 🗃️ Download and quantize GGUF/safetensors models
|
- 🗃️ Select and quantize GGUF models
|
||||||
- 📐 Configure quantization parameters
|
- 📐 Configure quantization parameters
|
||||||
- 💻 Monitor system resources in real time during quantization
|
- 💻 Monitor system resources during quantization
|
||||||
- ⏳ Parallel quantization + imatrix generation
|
- ⏳ Parallel quantization + imatrix generation
|
||||||
- 🎉 LoRA conversion and merging
|
- 🎉 LoRA conversion and merging
|
||||||
- 📁 Preset saving and loading
|
- 📁 Preset saving and loading
|
||||||
- 8️⃣ AutoFP8 quantization
|
- 8️⃣ AutoFP8 quantization
|
||||||
- 🪓 GGUF splitting and merging
|
- 🪓 GGUF splitting and merging
|
||||||
- 🌐 HTTP API for automation and monitoring
|
- 🌐 HTTP API for automated monitoring
|
||||||
|
|
||||||
## Why AutoGGUF?
|
## Why AutoGGUF?
|
||||||
- Fast: Saves time on manual configuration
|
- Fast: Saves time on manual configuration
|
||||||
|
@ -50,7 +50,7 @@ ## Why AutoGGUF?
|
||||||
|
|
||||||
## Quick Start
|
## Quick Start
|
||||||
|
|
||||||
### Cross-platform (recommended)
|
### Cross-platform
|
||||||
1. `git clone https://github.com/leafspark/AutoGGUF`
|
1. `git clone https://github.com/leafspark/AutoGGUF`
|
||||||
2. `cd AutoGGUF`
|
2. `cd AutoGGUF`
|
||||||
3. Install dependencies:
|
3. Install dependencies:
|
||||||
|
@ -73,13 +73,13 @@ ### Windows (for the impatient)
|
||||||
4. Any necessary folders will be automatically created
|
4. Any necessary folders will be automatically created
|
||||||
|
|
||||||
Setup builds:
|
Setup builds:
|
||||||
1. Download the setup variant of latest release
|
1. Download setup variant of latest release
|
||||||
2. Extract all files to a folder
|
2. Extract all files to a folder
|
||||||
3. Run the setup program
|
3. Run the setup program
|
||||||
4. The .gguf extension will be registered with the program automatically
|
4. The .GGUF extension will be registered with the program automatically
|
||||||
5. Run the program from the Start Menu or desktop shortcuts
|
5. Run the program from the Start Menu or desktop shortcuts
|
||||||
|
|
||||||
After launching the program, you may access its local server at port 7001 (set `AUTOGGUF_SERVER` to "enabled" first).
|
After launching the program, you may access its local server at port 7001 (set `AUTOGGUF_SERVER` to "enabled" first)
|
||||||
|
|
||||||
### Verifying Releases
|
### Verifying Releases
|
||||||
|
|
||||||
|
@ -132,27 +132,23 @@ ## Localizations
|
||||||
|
|
||||||
View the list of supported languages at [AutoGGUF/wiki/Installation#configuration](https://github.com/leafspark/AutoGGUF/wiki/Installation#configuration) (LLM translated, except for English).
|
View the list of supported languages at [AutoGGUF/wiki/Installation#configuration](https://github.com/leafspark/AutoGGUF/wiki/Installation#configuration) (LLM translated, except for English).
|
||||||
|
|
||||||
Languages will be updated as soon as possible after an update, or as a part of the update.
|
More languages will be updated as soon as possible!
|
||||||
|
|
||||||
To use a specific language, set the `AUTOGGUF_LANGUAGE` environment variable to one of the listed language codes (note: some languages may not be fully supported yet, in which the UI elements will fall back to English).
|
To use a specific language, set the `AUTOGGUF_LANGUAGE` environment variable to one of the listed language codes (note: some languages may not be fully supported yet, those will fall back to English).
|
||||||
|
|
||||||
## Issues
|
## Issues
|
||||||
|
|
||||||
- Some inconsistent logging and signal handling
|
- Some inconsistent logging
|
||||||
- Missing or duplicated translations (priority)
|
- Missing translations
|
||||||
- Buggy/incomplete API interfaces
|
|
||||||
- Code review and formatting (priority)
|
|
||||||
|
|
||||||
## Planned Features
|
## Planned Features
|
||||||
|
|
||||||
- [ ] Time estimation for quantization
|
- Time estimation for quantization
|
||||||
- [ ] Quantization file size estimate
|
- Quantization file size estimate
|
||||||
- [ ] Perplexity testing
|
- Perplexity testing
|
||||||
- [ ] bitsandbytes support
|
- bitsandbytes
|
||||||
|
|
||||||
#### Project Status
|
Due to my limited availability and a lack of time, I won't be actively developing new features for this project as much. While I'll continue to publish builds from time to time, I strongly recommend running from source if you want to stay up to date with the latest changes. I'm still committed to keeping dependencies updated weekly and making small maintenance fixes to ensure everything runs smoothly. If you run into any problems or notice issues, please don't hesitate to let me know - I appreciate your feedback and will do my best to address them.
|
||||||
|
|
||||||
AutoGGUF has now entered maintenance mode. It's considered stable and feature-complete for most use cases, so I'm not actively developing new features, but I’ll continue to publish occasional builds, update dependencies regularly, and fix critical bugs as needed. If you encounter issues or have suggestions, feel free to open an issue.
|
|
||||||
|
|
||||||
## Support
|
## Support
|
||||||
|
|
||||||
|
@ -166,5 +162,3 @@ ## Contributing
|
||||||
## Stargazers
|
## Stargazers
|
||||||
|
|
||||||
[](https://star-history.com/#leafspark/AutoGGUF&Date)
|
[](https://star-history.com/#leafspark/AutoGGUF&Date)
|
||||||
|
|
||||||
`Last Updated: May 24, 2025`
|
|
||||||
|
|
|
@ -4,10 +4,10 @@ ## Supported Versions
|
||||||
|
|
||||||
| Version | Supported |
|
| Version | Supported |
|
||||||
|-----------------|--------------------|
|
|-----------------|--------------------|
|
||||||
| stable (v2.0.x) | :white_check_mark: |
|
| stable (v1.9.x) | :white_check_mark: |
|
||||||
|
|
||||||
Beta versions are not officially supported and may contain unknown security vulnerabilities. Use them at your own risk.
|
Beta versions are not officially supported and may contain unknown security vulnerabilities. Use them at your own risk.
|
||||||
|
|
||||||
## Reporting a Vulnerability
|
## Reporting a Vulnerability
|
||||||
|
|
||||||
Use the Issues tab, or for severe vulnerabilities, please contact the maintainers via email.
|
Use the Issues tab, or for severe vulnerabilities please contact the maintainers via email.
|
||||||
|
|
|
@ -1,14 +1,13 @@
|
||||||
PyYAML~=6.0.2
|
PyYAML~=6.0.2
|
||||||
psutil~=7.0.0
|
psutil~=6.1.1
|
||||||
pynvml~=12.0.0
|
pynvml~=12.0.0
|
||||||
PySide6~=6.9.1
|
PySide6~=6.8.1
|
||||||
safetensors~=0.5.3
|
safetensors~=0.5.2
|
||||||
numpy<2.0.0
|
numpy<2.0.0
|
||||||
torch~=2.7.0
|
torch~=2.5.1
|
||||||
sentencepiece~=0.2.0
|
sentencepiece~=0.2.0
|
||||||
setuptools~=80.7.1
|
setuptools~=75.6.0
|
||||||
huggingface-hub~=0.33.1
|
huggingface-hub~=0.27.0
|
||||||
transformers~=4.51.3
|
transformers~=4.48.0
|
||||||
fastapi~=0.115.12
|
fastapi~=0.115.6
|
||||||
uvicorn~=0.34.2
|
uvicorn~=0.34.0
|
||||||
certifi~=2025.4.26
|
|
||||||
|
|
4
setup.py
4
setup.py
|
@ -5,12 +5,12 @@
|
||||||
|
|
||||||
setup(
|
setup(
|
||||||
name="AutoGGUF",
|
name="AutoGGUF",
|
||||||
version="v2.0.1",
|
version="v1.9.0",
|
||||||
packages=[""],
|
packages=[""],
|
||||||
url="https://github.com/leafspark/AutoGGUF",
|
url="https://github.com/leafspark/AutoGGUF",
|
||||||
license="apache-2.0",
|
license="apache-2.0",
|
||||||
author="leafspark",
|
author="leafspark",
|
||||||
author_email="leafspark@proton.me",
|
author_email="",
|
||||||
description="automatically quant GGUF models",
|
description="automatically quant GGUF models",
|
||||||
install_requires=required,
|
install_requires=required,
|
||||||
entry_points={"console_scripts": ["autogguf-gui = main:main"]},
|
entry_points={"console_scripts": ["autogguf-gui = main:main"]},
|
||||||
|
|
|
@ -1,12 +1,11 @@
|
||||||
import json
|
import json
|
||||||
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
import urllib.error
|
import urllib.error
|
||||||
import urllib.request
|
import urllib.request
|
||||||
import certifi
|
|
||||||
import ssl
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from functools import partial, wraps
|
from functools import partial, wraps
|
||||||
from typing import List
|
from typing import Any, List, Union
|
||||||
|
|
||||||
from PySide6.QtCore import *
|
from PySide6.QtCore import *
|
||||||
from PySide6.QtGui import *
|
from PySide6.QtGui import *
|
||||||
|
@ -340,15 +339,15 @@ def __init__(self, args: List[str]) -> None:
|
||||||
output_layout.addWidget(output_button)
|
output_layout.addWidget(output_button)
|
||||||
self.merge_gguf_layout.addLayout(output_layout)
|
self.merge_gguf_layout.addLayout(output_layout)
|
||||||
|
|
||||||
# Merge button
|
# Split button
|
||||||
merge_button = QPushButton(MERGE_GGUF)
|
split_button = QPushButton(MERGE_GGUF)
|
||||||
merge_button.clicked.connect(
|
split_button.clicked.connect(
|
||||||
lambda: self.merge_gguf(
|
lambda: self.merge_gguf(
|
||||||
self.merge_gguf_input.text(),
|
self.merge_gguf_input.text(),
|
||||||
self.merge_gguf_output.text(),
|
self.merge_gguf_output.text(),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
self.merge_gguf_layout.addWidget(merge_button)
|
self.merge_gguf_layout.addWidget(split_button)
|
||||||
self.merge_gguf_dialog.setLayout(self.merge_gguf_layout)
|
self.merge_gguf_dialog.setLayout(self.merge_gguf_layout)
|
||||||
|
|
||||||
# HF Upload Window
|
# HF Upload Window
|
||||||
|
@ -764,7 +763,7 @@ def __init__(self, args: List[str]) -> None:
|
||||||
|
|
||||||
self.extra_arguments = QLineEdit()
|
self.extra_arguments = QLineEdit()
|
||||||
quant_options_layout.addRow(
|
quant_options_layout.addRow(
|
||||||
self.create_label(EXTRA_ARGUMENTS, EXTRA_ARGUMENTS_LABEL),
|
self.create_label(EXTRA_ARGUMENTS, EXTRA_COMMAND_ARGUMENTS),
|
||||||
self.extra_arguments,
|
self.extra_arguments,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -1150,10 +1149,7 @@ def check_for_updates(self) -> None:
|
||||||
url = "https://api.github.com/repos/leafspark/AutoGGUF/releases/latest"
|
url = "https://api.github.com/repos/leafspark/AutoGGUF/releases/latest"
|
||||||
req = urllib.request.Request(url)
|
req = urllib.request.Request(url)
|
||||||
|
|
||||||
# Create SSL context with certifi certificates
|
with urllib.request.urlopen(req) as response:
|
||||||
ssl_context = ssl.create_default_context(cafile=certifi.where())
|
|
||||||
|
|
||||||
with urllib.request.urlopen(req, context=ssl_context) as response:
|
|
||||||
if response.status != 200:
|
if response.status != 200:
|
||||||
raise urllib.error.HTTPError(
|
raise urllib.error.HTTPError(
|
||||||
url, response.status, "HTTP Error", response.headers, None
|
url, response.status, "HTTP Error", response.headers, None
|
||||||
|
@ -1206,25 +1202,15 @@ def refresh_backends(self) -> None:
|
||||||
and "cudart-llama" not in item.lower()
|
and "cudart-llama" not in item.lower()
|
||||||
]
|
]
|
||||||
|
|
||||||
def extract_b_val(name: str) -> int:
|
|
||||||
match = re.search(r"b(\d+)", name)
|
|
||||||
return int(match.group(1)) if match else -1
|
|
||||||
|
|
||||||
if valid_backends:
|
if valid_backends:
|
||||||
# Sort by newest version
|
|
||||||
valid_backends.sort(key=lambda x: extract_b_val(x[0]), reverse=True)
|
|
||||||
|
|
||||||
for name, path in valid_backends:
|
for name, path in valid_backends:
|
||||||
self.backend_combo.addItem(name, userData=path)
|
self.backend_combo.addItem(name, userData=path)
|
||||||
|
self.backend_combo.setEnabled(
|
||||||
self.backend_combo.setEnabled(True)
|
True
|
||||||
|
) # Enable the combo box if there are valid backends
|
||||||
# Selects the newest version (now at index 0)
|
|
||||||
self.backend_combo.setCurrentIndex(0)
|
|
||||||
else:
|
else:
|
||||||
self.backend_combo.addItem(NO_BACKENDS_AVAILABLE)
|
self.backend_combo.addItem(NO_BACKENDS_AVAILABLE)
|
||||||
self.backend_combo.setEnabled(False)
|
self.backend_combo.setEnabled(False)
|
||||||
|
|
||||||
self.logger.info(FOUND_VALID_BACKENDS.format(len(valid_backends)))
|
self.logger.info(FOUND_VALID_BACKENDS.format(len(valid_backends)))
|
||||||
|
|
||||||
def save_task_preset(self, task_item) -> None:
|
def save_task_preset(self, task_item) -> None:
|
||||||
|
@ -1266,13 +1252,13 @@ def download_finished(self, extract_dir) -> None:
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
QMessageBox.warning(
|
QMessageBox.warning(
|
||||||
self, CUDA_EXTRACTION_FAILED, NO_SUITABLE_CUDA_BACKEND_EXTRACTION
|
self, CUDA_EXTRACTION_FAILED, NO_SUITABLE_CUDA_BACKEND_FOUND
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
QMessageBox.information(
|
QMessageBox.information(
|
||||||
self,
|
self,
|
||||||
DOWNLOAD_COMPLETE,
|
DOWNLOAD_COMPLETE,
|
||||||
LLAMACPP_DOWNLOADED_AND_EXTRACTED.format(extract_dir),
|
LLAMACPP_BINARY_DOWNLOADED_AND_EXTRACTED.format(extract_dir),
|
||||||
)
|
)
|
||||||
|
|
||||||
self.refresh_backends() # Refresh the backends after successful download
|
self.refresh_backends() # Refresh the backends after successful download
|
||||||
|
@ -1920,25 +1906,12 @@ def show_task_details(self, item) -> None:
|
||||||
# Load existing content
|
# Load existing content
|
||||||
if os.path.exists(task_item.log_file):
|
if os.path.exists(task_item.log_file):
|
||||||
with open_file_safe(task_item.log_file, "r") as f:
|
with open_file_safe(task_item.log_file, "r") as f:
|
||||||
content = f.read().rstrip("\n") # Remove trailing newlines
|
log_text.setPlainText(f.read())
|
||||||
log_text.setPlainText(content)
|
|
||||||
|
|
||||||
# Scroll to the end
|
|
||||||
log_text.moveCursor(QTextCursor.End)
|
|
||||||
|
|
||||||
# Connect to the thread if it's still running
|
# Connect to the thread if it's still running
|
||||||
for thread in self.quant_threads:
|
for thread in self.quant_threads:
|
||||||
if thread.log_file == task_item.log_file:
|
if thread.log_file == task_item.log_file:
|
||||||
# Create a local slot function that updates the text
|
thread.output_signal.connect(log_text.appendPlainText)
|
||||||
def update_log(text):
|
|
||||||
log_text.appendPlainText(text)
|
|
||||||
log_text.moveCursor(QTextCursor.End)
|
|
||||||
|
|
||||||
thread.output_signal.connect(update_log)
|
|
||||||
# Disconnect the signal when the dialog is destroyed
|
|
||||||
log_dialog.destroyed.connect(
|
|
||||||
lambda: thread.output_signal.disconnect(update_log)
|
|
||||||
)
|
|
||||||
break
|
break
|
||||||
|
|
||||||
log_dialog.exec()
|
log_dialog.exec()
|
||||||
|
|
|
@ -98,7 +98,7 @@ def mouseMoveEvent(self, event) -> None:
|
||||||
def mouseReleaseEvent(self, event) -> None:
|
def mouseReleaseEvent(self, event) -> None:
|
||||||
self.pressing = False
|
self.pressing = False
|
||||||
|
|
||||||
def toggle_maximize(self) -> None:
|
def toggle_maximize(self):
|
||||||
if self.isMaximized:
|
if self.isMaximized:
|
||||||
self.parent.showNormal()
|
self.parent.showNormal()
|
||||||
if self.normal_size:
|
if self.normal_size:
|
||||||
|
|
|
@ -2,8 +2,6 @@
|
||||||
import urllib.request
|
import urllib.request
|
||||||
import urllib.error
|
import urllib.error
|
||||||
import zipfile
|
import zipfile
|
||||||
import ssl
|
|
||||||
import certifi
|
|
||||||
from PySide6.QtCore import QThread, Signal
|
from PySide6.QtCore import QThread, Signal
|
||||||
|
|
||||||
|
|
||||||
|
@ -21,10 +19,7 @@ def run(self) -> None:
|
||||||
try:
|
try:
|
||||||
req = urllib.request.Request(self.url)
|
req = urllib.request.Request(self.url)
|
||||||
|
|
||||||
# Create SSL context with certifi certificates
|
with urllib.request.urlopen(req) as response:
|
||||||
ssl_context = ssl.create_default_context(cafile=certifi.where())
|
|
||||||
|
|
||||||
with urllib.request.urlopen(req, context=ssl_context) as response:
|
|
||||||
if response.status != 200:
|
if response.status != 200:
|
||||||
raise urllib.error.HTTPError(
|
raise urllib.error.HTTPError(
|
||||||
self.url, response.status, "HTTP Error", response.headers, None
|
self.url, response.status, "HTTP Error", response.headers, None
|
||||||
|
|
|
@ -22,7 +22,6 @@ def __init__(self, parent=None) -> None:
|
||||||
|
|
||||||
self.key_input = QLineEdit()
|
self.key_input = QLineEdit()
|
||||||
self.key_input.setPlaceholderText("Key")
|
self.key_input.setPlaceholderText("Key")
|
||||||
|
|
||||||
# Set validator for key input (letters and dots only)
|
# Set validator for key input (letters and dots only)
|
||||||
key_validator = QRegularExpressionValidator(QRegularExpression(r"[A-Za-z.]+"))
|
key_validator = QRegularExpressionValidator(QRegularExpression(r"[A-Za-z.]+"))
|
||||||
self.key_input.setValidator(key_validator)
|
self.key_input.setValidator(key_validator)
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
|
||||||
AUTOGGUF_VERSION = "v2.0.1"
|
AUTOGGUF_VERSION = "v2.0.0"
|
||||||
|
|
||||||
|
|
||||||
class _Localization:
|
class _Localization:
|
||||||
|
@ -53,11 +53,13 @@ def __init__(self):
|
||||||
self.QUANTIZE_TO_FP8_DYNAMIC = "Quantize to FP8 Dynamic"
|
self.QUANTIZE_TO_FP8_DYNAMIC = "Quantize to FP8 Dynamic"
|
||||||
self.OPEN_MODEL_FOLDER = "Open Model Folder"
|
self.OPEN_MODEL_FOLDER = "Open Model Folder"
|
||||||
self.QUANTIZE = "Quantize"
|
self.QUANTIZE = "Quantize"
|
||||||
|
self.OPEN_MODEL_FOLDER = "Open Model Folder"
|
||||||
self.INPUT_MODEL = "Input Model:"
|
self.INPUT_MODEL = "Input Model:"
|
||||||
|
|
||||||
# GGUF Verification
|
# GGUF Verification
|
||||||
self.INVALID_GGUF_FILE = "Invalid GGUF file: {}"
|
self.INVALID_GGUF_FILE = "Invalid GGUF file: {}"
|
||||||
self.SHARDED_MODEL_NAME = "{} (Sharded)"
|
self.SHARDED_MODEL_NAME = "{} (Sharded)"
|
||||||
|
self.IMPORTED_MODEL_TOOLTIP = "Imported model: {}"
|
||||||
self.CONCATENATED_FILE_WARNING = "This is a concatenated file part. It will not work with llama-quantize; please concat the file first."
|
self.CONCATENATED_FILE_WARNING = "This is a concatenated file part. It will not work with llama-quantize; please concat the file first."
|
||||||
self.CONCATENATED_FILES_FOUND = (
|
self.CONCATENATED_FILES_FOUND = (
|
||||||
"Found {} concatenated file parts. Please concat the files first."
|
"Found {} concatenated file parts. Please concat the files first."
|
||||||
|
@ -248,6 +250,12 @@ def __init__(self):
|
||||||
self.LLAMACPP_DOWNLOADED_AND_EXTRACTED = (
|
self.LLAMACPP_DOWNLOADED_AND_EXTRACTED = (
|
||||||
"llama.cpp binary downloaded and extracted to {0}"
|
"llama.cpp binary downloaded and extracted to {0}"
|
||||||
)
|
)
|
||||||
|
self.NO_SUITABLE_CUDA_BACKEND_FOUND = (
|
||||||
|
"No suitable CUDA backend found for extraction"
|
||||||
|
)
|
||||||
|
self.LLAMACPP_BINARY_DOWNLOADED_AND_EXTRACTED = (
|
||||||
|
"llama.cpp binary downloaded and extracted to {0}"
|
||||||
|
)
|
||||||
self.REFRESHING_LLAMACPP_RELEASES = "Refreshing llama.cpp releases"
|
self.REFRESHING_LLAMACPP_RELEASES = "Refreshing llama.cpp releases"
|
||||||
self.UPDATING_ASSET_LIST = "Updating asset list"
|
self.UPDATING_ASSET_LIST = "Updating asset list"
|
||||||
self.UPDATING_CUDA_OPTIONS = "Updating CUDA options"
|
self.UPDATING_CUDA_OPTIONS = "Updating CUDA options"
|
||||||
|
@ -365,7 +373,7 @@ def __init__(self):
|
||||||
self.ADDING_LORA_ADAPTER = "Adding LoRA Adapter..."
|
self.ADDING_LORA_ADAPTER = "Adding LoRA Adapter..."
|
||||||
self.DELETING_LORA_ADAPTER = "Deleting LoRA Adapter..."
|
self.DELETING_LORA_ADAPTER = "Deleting LoRA Adapter..."
|
||||||
self.SELECT_LORA_ADAPTER_FILE = "Select LoRA Adapter File"
|
self.SELECT_LORA_ADAPTER_FILE = "Select LoRA Adapter File"
|
||||||
self.STARTING_LORA_EXPORT = "Starting LoRA export"
|
self.STARTING_LORA_EXPORT = "Starting LoRA export..."
|
||||||
self.SELECT_OUTPUT_TYPE = "Select Output Type (GGUF or GGML)"
|
self.SELECT_OUTPUT_TYPE = "Select Output Type (GGUF or GGML)"
|
||||||
self.BASE_MODEL = "Base Model"
|
self.BASE_MODEL = "Base Model"
|
||||||
self.SELECT_BASE_MODEL_FILE = "Select Base Model File (GGUF)"
|
self.SELECT_BASE_MODEL_FILE = "Select Base Model File (GGUF)"
|
||||||
|
@ -446,6 +454,7 @@ def __init__(self):
|
||||||
self.UPLOAD = "Upload"
|
self.UPLOAD = "Upload"
|
||||||
self.INFO = "Info"
|
self.INFO = "Info"
|
||||||
|
|
||||||
|
self.EXTRA_COMMAND_ARGUMENTS = "Additional command-line arguments"
|
||||||
self.COPIED_COMMAND_TO_CLIPBOARD = "Copied command to clipboard:"
|
self.COPIED_COMMAND_TO_CLIPBOARD = "Copied command to clipboard:"
|
||||||
|
|
||||||
# Repository
|
# Repository
|
||||||
|
|
|
@ -24,21 +24,8 @@ def __init__(self, model_info, parent=None) -> None:
|
||||||
def format_model_info(self, model_info) -> str:
|
def format_model_info(self, model_info) -> str:
|
||||||
html = "<h2>Model Information</h2>"
|
html = "<h2>Model Information</h2>"
|
||||||
html += f"<p><b>Architecture:</b> {model_info.get('architecture', 'N/A')}</p>"
|
html += f"<p><b>Architecture:</b> {model_info.get('architecture', 'N/A')}</p>"
|
||||||
|
html += f"<p><b>Quantization Type:</b> {model_info.get('quantization_type', 'N/A')}</p>"
|
||||||
# Format quantization types
|
html += f"<p><b>KV Pairs:</b> {model_info.get('kv_pairs', 'N/A')}</p>"
|
||||||
quant_types = model_info.get("quantization_type", [])
|
|
||||||
if quant_types:
|
|
||||||
# Clean up the format: remove "- type " prefix and join with " | "
|
|
||||||
formatted_types = []
|
|
||||||
for qtype in quant_types:
|
|
||||||
# Remove "- type " prefix if present
|
|
||||||
clean_type = qtype.replace("- type ", "").strip()
|
|
||||||
formatted_types.append(clean_type)
|
|
||||||
quant_display = " | ".join(formatted_types)
|
|
||||||
else:
|
|
||||||
quant_display = "N/A"
|
|
||||||
|
|
||||||
html += f"<p><b>Quantization Type:</b> {quant_display}</p>"
|
|
||||||
html += f"<p><b>Tensors:</b> {model_info.get('tensors', 'N/A')}</p>"
|
html += f"<p><b>Tensors:</b> {model_info.get('tensors', 'N/A')}</p>"
|
||||||
|
|
||||||
html += "<h3>Key-Value Pairs:</h3>"
|
html += "<h3>Key-Value Pairs:</h3>"
|
||||||
|
|
|
@ -59,34 +59,6 @@ def run(self) -> None:
|
||||||
self.error_signal.emit(str(e))
|
self.error_signal.emit(str(e))
|
||||||
|
|
||||||
def parse_model_info(self, line) -> None:
|
def parse_model_info(self, line) -> None:
|
||||||
# Mapping of technical keys to human-readable names
|
|
||||||
key_mappings = {
|
|
||||||
"general.architecture": "Architecture",
|
|
||||||
"general.name": "Model Name",
|
|
||||||
"general.file_type": "File Type",
|
|
||||||
"general.quantization_version": "Quantization Version",
|
|
||||||
"llama.block_count": "Layers",
|
|
||||||
"llama.context_length": "Context Length",
|
|
||||||
"llama.embedding_length": "Embedding Size",
|
|
||||||
"llama.feed_forward_length": "Feed Forward Length",
|
|
||||||
"llama.attention.head_count": "Attention Heads",
|
|
||||||
"llama.attention.head_count_kv": "Key-Value Heads",
|
|
||||||
"llama.attention.layer_norm_rms_epsilon": "RMS Norm Epsilon",
|
|
||||||
"llama.rope.freq_base": "RoPE Frequency Base",
|
|
||||||
"llama.rope.dimension_count": "RoPE Dimensions",
|
|
||||||
"llama.vocab_size": "Vocabulary Size",
|
|
||||||
"tokenizer.ggml.model": "Tokenizer Model",
|
|
||||||
"tokenizer.ggml.pre": "Tokenizer Preprocessing",
|
|
||||||
"tokenizer.ggml.tokens": "Tokens",
|
|
||||||
"tokenizer.ggml.token_type": "Token Types",
|
|
||||||
"tokenizer.ggml.merges": "BPE Merges",
|
|
||||||
"tokenizer.ggml.bos_token_id": "Begin of Sequence Token ID",
|
|
||||||
"tokenizer.ggml.eos_token_id": "End of Sequence Token ID",
|
|
||||||
"tokenizer.chat_template": "Chat Template",
|
|
||||||
"tokenizer.ggml.padding_token_id": "Padding Token ID",
|
|
||||||
"tokenizer.ggml.unk_token_id": "Unknown Token ID",
|
|
||||||
}
|
|
||||||
|
|
||||||
# Parse output for model information
|
# Parse output for model information
|
||||||
if "llama_model_loader: loaded meta data with" in line:
|
if "llama_model_loader: loaded meta data with" in line:
|
||||||
parts = line.split()
|
parts = line.split()
|
||||||
|
@ -94,25 +66,10 @@ def parse_model_info(self, line) -> None:
|
||||||
self.model_info["tensors"] = parts[9]
|
self.model_info["tensors"] = parts[9]
|
||||||
elif "general.architecture" in line:
|
elif "general.architecture" in line:
|
||||||
self.model_info["architecture"] = line.split("=")[-1].strip()
|
self.model_info["architecture"] = line.split("=")[-1].strip()
|
||||||
elif line.startswith("llama_model_loader: - kv") and "=" in line:
|
elif line.startswith("llama_model_loader: - kv"):
|
||||||
# Split on '=' and take the parts
|
key = line.split(":")[2].strip()
|
||||||
parts = line.split("=", 1) # Split only on first '='
|
value = line.split("=")[-1].strip()
|
||||||
left_part = parts[0].strip()
|
self.model_info.setdefault("kv_data", {})[key] = value
|
||||||
value = parts[1].strip()
|
|
||||||
|
|
||||||
# Extract key and type from left part
|
|
||||||
# Format: "llama_model_loader: - kv N: key type"
|
|
||||||
kv_parts = left_part.split(":")
|
|
||||||
if len(kv_parts) >= 3:
|
|
||||||
key_type_part = kv_parts[2].strip() # This is "key type"
|
|
||||||
key = key_type_part.rsplit(" ", 1)[
|
|
||||||
0
|
|
||||||
] # Everything except last word (type)
|
|
||||||
|
|
||||||
# Use human-readable name if available, otherwise use original key
|
|
||||||
display_key = key_mappings.get(key, key)
|
|
||||||
|
|
||||||
self.model_info.setdefault("kv_data", {})[display_key] = value
|
|
||||||
elif line.startswith("llama_model_loader: - type"):
|
elif line.startswith("llama_model_loader: - type"):
|
||||||
parts = line.split(":")
|
parts = line.split(":")
|
||||||
if len(parts) > 1:
|
if len(parts) > 1:
|
||||||
|
|
|
@ -95,41 +95,29 @@ def show_task_context_menu(self, position) -> None:
|
||||||
|
|
||||||
def show_task_properties(self, item) -> None:
|
def show_task_properties(self, item) -> None:
|
||||||
self.logger.debug(SHOWING_PROPERTIES_FOR_TASK.format(item.text()))
|
self.logger.debug(SHOWING_PROPERTIES_FOR_TASK.format(item.text()))
|
||||||
|
task_item = self.task_list.itemWidget(item)
|
||||||
for thread in self.quant_threads:
|
for thread in self.quant_threads:
|
||||||
|
if thread.log_file == task_item.log_file:
|
||||||
model_info_dialog = ModelInfoDialog(thread.model_info, self)
|
model_info_dialog = ModelInfoDialog(thread.model_info, self)
|
||||||
|
|
||||||
model_info_dialog.exec()
|
model_info_dialog.exec()
|
||||||
break
|
break
|
||||||
|
|
||||||
def cancel_task(self, item) -> None:
|
def cancel_task(self, item) -> None:
|
||||||
# TODO: fix possibly buggy signal behavior
|
self.logger.info(CANCELLING_TASK.format(item.text()))
|
||||||
task_item = self.task_list.itemWidget(item)
|
task_item = self.task_list.itemWidget(item)
|
||||||
if task_item:
|
|
||||||
task_name = task_item.task_name # Store the name before any changes
|
|
||||||
self.logger.info(CANCELLING_TASK.format(task_name))
|
|
||||||
|
|
||||||
# Find the thread and disconnect signals before terminating
|
|
||||||
for thread in self.quant_threads:
|
for thread in self.quant_threads:
|
||||||
if thread.log_file == task_item.log_file:
|
if thread.log_file == task_item.log_file:
|
||||||
# Disconnect all signals from this thread first
|
|
||||||
try:
|
|
||||||
thread.error_signal.disconnect() # Disconnect all error signal connections
|
|
||||||
thread.output_signal.disconnect() # Disconnect all output signal connections
|
|
||||||
except TypeError:
|
|
||||||
# No connections to disconnect
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Now terminate the thread
|
|
||||||
thread.terminate()
|
thread.terminate()
|
||||||
|
task_item.update_status(CANCELED)
|
||||||
self.quant_threads.remove(thread)
|
self.quant_threads.remove(thread)
|
||||||
break
|
break
|
||||||
|
|
||||||
def delete_task(self, item) -> None:
|
def delete_task(self, item) -> None:
|
||||||
task_item = self.task_list.itemWidget(item)
|
self.logger.info(DELETING_TASK.format(item.text()))
|
||||||
if not task_item:
|
|
||||||
return
|
|
||||||
|
|
||||||
task_name = task_item.task_name # Store task_name before deletion
|
# Cancel the task first
|
||||||
self.logger.info(DELETING_TASK.format(task_name))
|
self.cancel_task(item)
|
||||||
|
|
||||||
reply = QMessageBox.question(
|
reply = QMessageBox.question(
|
||||||
self,
|
self,
|
||||||
|
@ -138,16 +126,12 @@ def delete_task(self, item) -> None:
|
||||||
QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No,
|
QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No,
|
||||||
QMessageBox.StandardButton.No,
|
QMessageBox.StandardButton.No,
|
||||||
)
|
)
|
||||||
|
|
||||||
if reply == QMessageBox.StandardButton.Yes:
|
if reply == QMessageBox.StandardButton.Yes:
|
||||||
# Cancel the task first (which disconnects signals)
|
task_item = self.task_list.itemWidget(item)
|
||||||
self.cancel_task(item)
|
|
||||||
|
|
||||||
# Now remove from list and delete
|
|
||||||
row = self.task_list.row(item)
|
row = self.task_list.row(item)
|
||||||
self.task_list.takeItem(row)
|
self.task_list.takeItem(row)
|
||||||
|
|
||||||
# Delete the widget after removing from list
|
if task_item:
|
||||||
task_item.deleteLater()
|
task_item.deleteLater()
|
||||||
|
|
||||||
def update_status(self, status) -> None:
|
def update_status(self, status) -> None:
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -24,10 +24,11 @@
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from torch import Tensor
|
from torch import Tensor
|
||||||
|
|
||||||
import gguf
|
import gguf
|
||||||
|
|
||||||
# reuse model definitions from convert_hf_to_gguf.py
|
# reuse model definitions from convert_hf_to_gguf.py
|
||||||
from convert_hf_to_gguf import LazyTorchTensor, ModelBase
|
from convert_hf_to_gguf import LazyTorchTensor, Model
|
||||||
|
|
||||||
logger = logging.getLogger("lora-to-gguf")
|
logger = logging.getLogger("lora-to-gguf")
|
||||||
|
|
||||||
|
@ -372,11 +373,11 @@ def load_hparams_from_hf(hf_model_id: str) -> dict[str, Any]:
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
else:
|
else:
|
||||||
logger.info(f"Loading base model: {dir_base_model.name}")
|
logger.info(f"Loading base model: {dir_base_model.name}")
|
||||||
hparams = ModelBase.load_hparams(dir_base_model)
|
hparams = Model.load_hparams(dir_base_model)
|
||||||
|
|
||||||
with torch.inference_mode():
|
with torch.inference_mode():
|
||||||
try:
|
try:
|
||||||
model_class = ModelBase.from_model_architecture(hparams["architectures"][0])
|
model_class = Model.from_model_architecture(hparams["architectures"][0])
|
||||||
except NotImplementedError:
|
except NotImplementedError:
|
||||||
logger.error(f"Model {hparams['architectures'][0]} is not supported")
|
logger.error(f"Model {hparams['architectures'][0]} is not supported")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
@ -436,7 +437,7 @@ def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
|
||||||
"Embeddings is present in the adapter. This can be due to new tokens added during fine tuning"
|
"Embeddings is present in the adapter. This can be due to new tokens added during fine tuning"
|
||||||
)
|
)
|
||||||
logger.error(
|
logger.error(
|
||||||
"Please refer to https://github.com/ggml-org/llama.cpp/pull/9948"
|
"Please refer to https://github.com/ggerganov/llama.cpp/pull/9948"
|
||||||
)
|
)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
@ -466,7 +467,7 @@ def modify_tensors(
|
||||||
# some archs may have the same tensor for lm_head and output (tie word embeddings)
|
# some archs may have the same tensor for lm_head and output (tie word embeddings)
|
||||||
# in this case, adapters targeting lm_head will fail when using llama-export-lora
|
# in this case, adapters targeting lm_head will fail when using llama-export-lora
|
||||||
# therefore, we ignore them for now
|
# therefore, we ignore them for now
|
||||||
# see: https://github.com/ggml-org/llama.cpp/issues/9065
|
# see: https://github.com/ggerganov/llama.cpp/issues/9065
|
||||||
if name == "lm_head.weight" and len(dest) == 0:
|
if name == "lm_head.weight" and len(dest) == 0:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"lm_head is present in adapter, but is ignored in base model"
|
"lm_head is present in adapter, but is ignored in base model"
|
||||||
|
|
|
@ -108,7 +108,6 @@ class LLM:
|
||||||
EXPERT_WEIGHTS_SCALE = "{arch}.expert_weights_scale"
|
EXPERT_WEIGHTS_SCALE = "{arch}.expert_weights_scale"
|
||||||
EXPERT_WEIGHTS_NORM = "{arch}.expert_weights_norm"
|
EXPERT_WEIGHTS_NORM = "{arch}.expert_weights_norm"
|
||||||
EXPERT_GATING_FUNC = "{arch}.expert_gating_func"
|
EXPERT_GATING_FUNC = "{arch}.expert_gating_func"
|
||||||
MOE_EVERY_N_LAYERS = "{arch}.moe_every_n_layers"
|
|
||||||
POOLING_TYPE = "{arch}.pooling_type"
|
POOLING_TYPE = "{arch}.pooling_type"
|
||||||
LOGIT_SCALE = "{arch}.logit_scale"
|
LOGIT_SCALE = "{arch}.logit_scale"
|
||||||
DECODER_START_TOKEN_ID = "{arch}.decoder_start_token_id"
|
DECODER_START_TOKEN_ID = "{arch}.decoder_start_token_id"
|
||||||
|
@ -120,8 +119,6 @@ class LLM:
|
||||||
TIME_DECAY_EXTRA_DIM = "{arch}.time_decay_extra_dim"
|
TIME_DECAY_EXTRA_DIM = "{arch}.time_decay_extra_dim"
|
||||||
RESIDUAL_SCALE = "{arch}.residual_scale"
|
RESIDUAL_SCALE = "{arch}.residual_scale"
|
||||||
EMBEDDING_SCALE = "{arch}.embedding_scale"
|
EMBEDDING_SCALE = "{arch}.embedding_scale"
|
||||||
TOKEN_SHIFT_COUNT = "{arch}.token_shift_count"
|
|
||||||
INTERLEAVE_MOE_LAYER_STEP = "{arch}.interleave_moe_layer_step"
|
|
||||||
|
|
||||||
class Attention:
|
class Attention:
|
||||||
HEAD_COUNT = "{arch}.attention.head_count"
|
HEAD_COUNT = "{arch}.attention.head_count"
|
||||||
|
@ -137,15 +134,9 @@ class Attention:
|
||||||
CAUSAL = "{arch}.attention.causal"
|
CAUSAL = "{arch}.attention.causal"
|
||||||
Q_LORA_RANK = "{arch}.attention.q_lora_rank"
|
Q_LORA_RANK = "{arch}.attention.q_lora_rank"
|
||||||
KV_LORA_RANK = "{arch}.attention.kv_lora_rank"
|
KV_LORA_RANK = "{arch}.attention.kv_lora_rank"
|
||||||
DECAY_LORA_RANK = "{arch}.attention.decay_lora_rank"
|
|
||||||
ICLR_LORA_RANK = "{arch}.attention.iclr_lora_rank"
|
|
||||||
VALUE_RESIDUAL_MIX_LORA_RANK = "{arch}.attention.value_residual_mix_lora_rank"
|
|
||||||
GATE_LORA_RANK = "{arch}.attention.gate_lora_rank"
|
|
||||||
REL_BUCKETS_COUNT = "{arch}.attention.relative_buckets_count"
|
REL_BUCKETS_COUNT = "{arch}.attention.relative_buckets_count"
|
||||||
SLIDING_WINDOW = "{arch}.attention.sliding_window"
|
SLIDING_WINDOW = "{arch}.attention.sliding_window"
|
||||||
SCALE = "{arch}.attention.scale"
|
SCALE = "{arch}.attention.scale"
|
||||||
KEY_LENGTH_MLA = "{arch}.attention.key_length_mla"
|
|
||||||
VALUE_LENGTH_MLA = "{arch}.attention.value_length_mla"
|
|
||||||
|
|
||||||
class Rope:
|
class Rope:
|
||||||
DIMENSION_COUNT = "{arch}.rope.dimension_count"
|
DIMENSION_COUNT = "{arch}.rope.dimension_count"
|
||||||
|
@ -198,6 +189,7 @@ class Tokenizer:
|
||||||
UNK_ID = "tokenizer.ggml.unknown_token_id"
|
UNK_ID = "tokenizer.ggml.unknown_token_id"
|
||||||
SEP_ID = "tokenizer.ggml.seperator_token_id"
|
SEP_ID = "tokenizer.ggml.seperator_token_id"
|
||||||
PAD_ID = "tokenizer.ggml.padding_token_id"
|
PAD_ID = "tokenizer.ggml.padding_token_id"
|
||||||
|
CLS_ID = "tokenizer.ggml.cls_token_id"
|
||||||
MASK_ID = "tokenizer.ggml.mask_token_id"
|
MASK_ID = "tokenizer.ggml.mask_token_id"
|
||||||
ADD_BOS = "tokenizer.ggml.add_bos_token"
|
ADD_BOS = "tokenizer.ggml.add_bos_token"
|
||||||
ADD_EOS = "tokenizer.ggml.add_eos_token"
|
ADD_EOS = "tokenizer.ggml.add_eos_token"
|
||||||
|
@ -225,30 +217,6 @@ class Adapter:
|
||||||
TYPE = "adapter.type"
|
TYPE = "adapter.type"
|
||||||
LORA_ALPHA = "adapter.lora.alpha"
|
LORA_ALPHA = "adapter.lora.alpha"
|
||||||
|
|
||||||
class ClipVision:
|
|
||||||
PROJECTOR_TYPE = "clip.projector_type"
|
|
||||||
HAS_VISION_ENCODER = "clip.has_vision_encoder"
|
|
||||||
HAS_LLAVA_PROJECTOR = "clip.has_llava_projector"
|
|
||||||
IMAGE_SIZE = "clip.vision.image_size"
|
|
||||||
PATCH_SIZE = "clip.vision.patch_size"
|
|
||||||
EMBEDDING_LENGTH = "clip.vision.embedding_length"
|
|
||||||
FEED_FORWARD_LENGTH = "clip.vision.feed_forward_length"
|
|
||||||
PROJECTION_DIM = "clip.vision.projection_dim"
|
|
||||||
BLOCK_COUNT = "clip.vision.block_count"
|
|
||||||
IMAGE_MEAN = "clip.vision.image_mean"
|
|
||||||
IMAGE_STD = "clip.vision.image_std"
|
|
||||||
SPATIAL_MERGE_SIZE = "clip.vision.spatial_merge_size"
|
|
||||||
USE_GELU = "clip.use_gelu"
|
|
||||||
USE_SILU = "clip.use_silu"
|
|
||||||
N_WA_PATTERN = "clip.vision.n_wa_pattern" # used by qwen2.5vl
|
|
||||||
|
|
||||||
class Attention:
|
|
||||||
HEAD_COUNT = "clip.vision.attention.head_count"
|
|
||||||
LAYERNORM_EPS = "clip.vision.attention.layer_norm_epsilon"
|
|
||||||
|
|
||||||
class Projector:
|
|
||||||
SCALE_FACTOR = "clip.vision.projector.scale_factor"
|
|
||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# recommended mapping of model tensor names for storage in gguf
|
# recommended mapping of model tensor names for storage in gguf
|
||||||
|
@ -258,13 +226,10 @@ class Projector:
|
||||||
class GGUFType:
|
class GGUFType:
|
||||||
MODEL = "model"
|
MODEL = "model"
|
||||||
ADAPTER = "adapter"
|
ADAPTER = "adapter"
|
||||||
CLIP_VISION = "clip-vision"
|
|
||||||
|
|
||||||
|
|
||||||
class MODEL_ARCH(IntEnum):
|
class MODEL_ARCH(IntEnum):
|
||||||
CLIP_VISION = auto() # dummy arch for clip.cpp
|
|
||||||
LLAMA = auto()
|
LLAMA = auto()
|
||||||
LLAMA4 = auto()
|
|
||||||
DECI = auto()
|
DECI = auto()
|
||||||
FALCON = auto()
|
FALCON = auto()
|
||||||
BAICHUAN = auto()
|
BAICHUAN = auto()
|
||||||
|
@ -277,7 +242,6 @@ class MODEL_ARCH(IntEnum):
|
||||||
REFACT = auto()
|
REFACT = auto()
|
||||||
BERT = auto()
|
BERT = auto()
|
||||||
NOMIC_BERT = auto()
|
NOMIC_BERT = auto()
|
||||||
NOMIC_BERT_MOE = auto()
|
|
||||||
JINA_BERT_V2 = auto()
|
JINA_BERT_V2 = auto()
|
||||||
BLOOM = auto()
|
BLOOM = auto()
|
||||||
STABLELM = auto()
|
STABLELM = auto()
|
||||||
|
@ -285,11 +249,8 @@ class MODEL_ARCH(IntEnum):
|
||||||
QWEN2 = auto()
|
QWEN2 = auto()
|
||||||
QWEN2MOE = auto()
|
QWEN2MOE = auto()
|
||||||
QWEN2VL = auto()
|
QWEN2VL = auto()
|
||||||
QWEN3 = auto()
|
|
||||||
QWEN3MOE = auto()
|
|
||||||
PHI2 = auto()
|
PHI2 = auto()
|
||||||
PHI3 = auto()
|
PHI3 = auto()
|
||||||
PHIMOE = auto()
|
|
||||||
PLAMO = auto()
|
PLAMO = auto()
|
||||||
CODESHELL = auto()
|
CODESHELL = auto()
|
||||||
ORION = auto()
|
ORION = auto()
|
||||||
|
@ -298,12 +259,8 @@ class MODEL_ARCH(IntEnum):
|
||||||
MINICPM3 = auto()
|
MINICPM3 = auto()
|
||||||
GEMMA = auto()
|
GEMMA = auto()
|
||||||
GEMMA2 = auto()
|
GEMMA2 = auto()
|
||||||
GEMMA3 = auto()
|
|
||||||
STARCODER2 = auto()
|
STARCODER2 = auto()
|
||||||
RWKV6 = auto()
|
RWKV6 = auto()
|
||||||
RWKV6QWEN2 = auto()
|
|
||||||
RWKV7 = auto()
|
|
||||||
ARWKV7 = auto()
|
|
||||||
MAMBA = auto()
|
MAMBA = auto()
|
||||||
XVERSE = auto()
|
XVERSE = auto()
|
||||||
COMMAND_R = auto()
|
COMMAND_R = auto()
|
||||||
|
@ -317,7 +274,6 @@ class MODEL_ARCH(IntEnum):
|
||||||
DEEPSEEK = auto()
|
DEEPSEEK = auto()
|
||||||
DEEPSEEK2 = auto()
|
DEEPSEEK2 = auto()
|
||||||
CHATGLM = auto()
|
CHATGLM = auto()
|
||||||
GLM4 = auto()
|
|
||||||
BITNET = auto()
|
BITNET = auto()
|
||||||
T5 = auto()
|
T5 = auto()
|
||||||
T5ENCODER = auto()
|
T5ENCODER = auto()
|
||||||
|
@ -328,18 +284,6 @@ class MODEL_ARCH(IntEnum):
|
||||||
GRANITE_MOE = auto()
|
GRANITE_MOE = auto()
|
||||||
CHAMELEON = auto()
|
CHAMELEON = auto()
|
||||||
WAVTOKENIZER_DEC = auto()
|
WAVTOKENIZER_DEC = auto()
|
||||||
PLM = auto()
|
|
||||||
BAILINGMOE = auto()
|
|
||||||
|
|
||||||
|
|
||||||
class VISION_PROJECTOR_TYPE(IntEnum):
|
|
||||||
MLP = auto()
|
|
||||||
LDP = auto()
|
|
||||||
LDPV2 = auto()
|
|
||||||
RESAMPLER = auto()
|
|
||||||
GLM_EDGE = auto()
|
|
||||||
MERGER = auto()
|
|
||||||
GEMMA3 = auto()
|
|
||||||
|
|
||||||
|
|
||||||
class MODEL_TENSOR(IntEnum):
|
class MODEL_TENSOR(IntEnum):
|
||||||
|
@ -389,26 +333,13 @@ class MODEL_TENSOR(IntEnum):
|
||||||
SSM_A = auto()
|
SSM_A = auto()
|
||||||
SSM_D = auto()
|
SSM_D = auto()
|
||||||
SSM_OUT = auto()
|
SSM_OUT = auto()
|
||||||
TIME_MIX_W0 = auto()
|
|
||||||
TIME_MIX_W1 = auto()
|
TIME_MIX_W1 = auto()
|
||||||
TIME_MIX_W2 = auto()
|
TIME_MIX_W2 = auto()
|
||||||
TIME_MIX_A0 = auto()
|
|
||||||
TIME_MIX_A1 = auto()
|
|
||||||
TIME_MIX_A2 = auto()
|
|
||||||
TIME_MIX_V0 = auto()
|
|
||||||
TIME_MIX_V1 = auto()
|
|
||||||
TIME_MIX_V2 = auto()
|
|
||||||
TIME_MIX_G1 = auto()
|
|
||||||
TIME_MIX_G2 = auto()
|
|
||||||
TIME_MIX_K_K = auto()
|
|
||||||
TIME_MIX_K_A = auto()
|
|
||||||
TIME_MIX_R_K = auto()
|
|
||||||
TIME_MIX_LERP_X = auto()
|
TIME_MIX_LERP_X = auto()
|
||||||
TIME_MIX_LERP_K = auto()
|
TIME_MIX_LERP_K = auto()
|
||||||
TIME_MIX_LERP_V = auto()
|
TIME_MIX_LERP_V = auto()
|
||||||
TIME_MIX_LERP_R = auto()
|
TIME_MIX_LERP_R = auto()
|
||||||
TIME_MIX_LERP_G = auto()
|
TIME_MIX_LERP_G = auto()
|
||||||
TIME_MIX_LERP_FUSED = auto()
|
|
||||||
TIME_MIX_LERP_W = auto()
|
TIME_MIX_LERP_W = auto()
|
||||||
TIME_MIX_FIRST = auto()
|
TIME_MIX_FIRST = auto()
|
||||||
TIME_MIX_DECAY = auto()
|
TIME_MIX_DECAY = auto()
|
||||||
|
@ -429,8 +360,6 @@ class MODEL_TENSOR(IntEnum):
|
||||||
ATTN_Q_B = auto()
|
ATTN_Q_B = auto()
|
||||||
ATTN_KV_A_MQA = auto()
|
ATTN_KV_A_MQA = auto()
|
||||||
ATTN_KV_B = auto()
|
ATTN_KV_B = auto()
|
||||||
ATTN_K_B = auto()
|
|
||||||
ATTN_V_B = auto()
|
|
||||||
ATTN_Q_A_NORM = auto()
|
ATTN_Q_A_NORM = auto()
|
||||||
ATTN_KV_A_NORM = auto()
|
ATTN_KV_A_NORM = auto()
|
||||||
FFN_SUB_NORM = auto()
|
FFN_SUB_NORM = auto()
|
||||||
|
@ -481,51 +410,10 @@ class MODEL_TENSOR(IntEnum):
|
||||||
POSNET_ATTN_K = auto()
|
POSNET_ATTN_K = auto()
|
||||||
POSNET_ATTN_V = auto()
|
POSNET_ATTN_V = auto()
|
||||||
POSNET_ATTN_OUT = auto()
|
POSNET_ATTN_OUT = auto()
|
||||||
# vision
|
|
||||||
V_MMPROJ = auto()
|
|
||||||
V_MMPROJ_FC = auto()
|
|
||||||
V_MMPROJ_MLP = auto()
|
|
||||||
V_MMPROJ_PEG = auto()
|
|
||||||
V_ENC_EMBD_CLS = auto()
|
|
||||||
V_ENC_EMBD_PATCH = auto()
|
|
||||||
V_ENC_EMBD_POS = auto()
|
|
||||||
V_ENC_ATTN_Q = auto()
|
|
||||||
V_ENC_ATTN_Q_NORM = auto()
|
|
||||||
V_ENC_ATTN_K = auto()
|
|
||||||
V_ENC_ATTN_K_NORM = auto()
|
|
||||||
V_ENC_ATTN_V = auto()
|
|
||||||
V_ENC_INPUT_NORM = auto()
|
|
||||||
V_ENC_OUTPUT = auto()
|
|
||||||
V_ENC_OUTPUT_NORM = auto()
|
|
||||||
V_ENC_FFN_UP = auto()
|
|
||||||
V_ENC_FFN_GATE = auto()
|
|
||||||
V_ENC_FFN_DOWN = auto()
|
|
||||||
V_LAYER_SCALE_1 = auto()
|
|
||||||
V_LAYER_SCALE_2 = auto()
|
|
||||||
V_PRE_NORM = auto()
|
|
||||||
V_POST_NORM = auto()
|
|
||||||
V_MM_INP_NORM = auto()
|
|
||||||
V_MM_INP_PROJ = auto() # gemma3
|
|
||||||
V_MM_SOFT_EMB_NORM = auto() # gemma3
|
|
||||||
V_RESMPL_POS_EMBD_K = auto() # minicpmv
|
|
||||||
V_RESMPL_ATTN_Q = auto() # minicpmv
|
|
||||||
V_RESMPL_ATTN_K = auto() # minicpmv
|
|
||||||
V_RESMPL_ATTN_V = auto() # minicpmv
|
|
||||||
V_RESMPL_ATTN_OUT = auto() # minicpmv
|
|
||||||
V_RESMPL_KV = auto() # minicpmv
|
|
||||||
V_RESMPL_KV_NORM = auto() # minicpmv
|
|
||||||
V_RESMPL_POST_NORM = auto() # minicpmv
|
|
||||||
V_RESMPL_Q_NORM = auto() # minicpmv
|
|
||||||
V_RESMPL_PROJ = auto() # minicpmv
|
|
||||||
V_RESMPL_QUERY = auto() # minicpmv
|
|
||||||
V_TOK_EMBD_IMG_BREAK = auto() # pixtral
|
|
||||||
V_MM_PATCH_MERGER = auto() # mistral small 3.1
|
|
||||||
|
|
||||||
|
|
||||||
MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
||||||
MODEL_ARCH.CLIP_VISION: "clip", # dummy arch for clip.cpp
|
|
||||||
MODEL_ARCH.LLAMA: "llama",
|
MODEL_ARCH.LLAMA: "llama",
|
||||||
MODEL_ARCH.LLAMA4: "llama4",
|
|
||||||
MODEL_ARCH.DECI: "deci",
|
MODEL_ARCH.DECI: "deci",
|
||||||
MODEL_ARCH.FALCON: "falcon",
|
MODEL_ARCH.FALCON: "falcon",
|
||||||
MODEL_ARCH.BAICHUAN: "baichuan",
|
MODEL_ARCH.BAICHUAN: "baichuan",
|
||||||
|
@ -538,7 +426,6 @@ class MODEL_TENSOR(IntEnum):
|
||||||
MODEL_ARCH.REFACT: "refact",
|
MODEL_ARCH.REFACT: "refact",
|
||||||
MODEL_ARCH.BERT: "bert",
|
MODEL_ARCH.BERT: "bert",
|
||||||
MODEL_ARCH.NOMIC_BERT: "nomic-bert",
|
MODEL_ARCH.NOMIC_BERT: "nomic-bert",
|
||||||
MODEL_ARCH.NOMIC_BERT_MOE: "nomic-bert-moe",
|
|
||||||
MODEL_ARCH.JINA_BERT_V2: "jina-bert-v2",
|
MODEL_ARCH.JINA_BERT_V2: "jina-bert-v2",
|
||||||
MODEL_ARCH.BLOOM: "bloom",
|
MODEL_ARCH.BLOOM: "bloom",
|
||||||
MODEL_ARCH.STABLELM: "stablelm",
|
MODEL_ARCH.STABLELM: "stablelm",
|
||||||
|
@ -546,11 +433,8 @@ class MODEL_TENSOR(IntEnum):
|
||||||
MODEL_ARCH.QWEN2: "qwen2",
|
MODEL_ARCH.QWEN2: "qwen2",
|
||||||
MODEL_ARCH.QWEN2MOE: "qwen2moe",
|
MODEL_ARCH.QWEN2MOE: "qwen2moe",
|
||||||
MODEL_ARCH.QWEN2VL: "qwen2vl",
|
MODEL_ARCH.QWEN2VL: "qwen2vl",
|
||||||
MODEL_ARCH.QWEN3: "qwen3",
|
|
||||||
MODEL_ARCH.QWEN3MOE: "qwen3moe",
|
|
||||||
MODEL_ARCH.PHI2: "phi2",
|
MODEL_ARCH.PHI2: "phi2",
|
||||||
MODEL_ARCH.PHI3: "phi3",
|
MODEL_ARCH.PHI3: "phi3",
|
||||||
MODEL_ARCH.PHIMOE: "phimoe",
|
|
||||||
MODEL_ARCH.PLAMO: "plamo",
|
MODEL_ARCH.PLAMO: "plamo",
|
||||||
MODEL_ARCH.CODESHELL: "codeshell",
|
MODEL_ARCH.CODESHELL: "codeshell",
|
||||||
MODEL_ARCH.ORION: "orion",
|
MODEL_ARCH.ORION: "orion",
|
||||||
|
@ -559,12 +443,8 @@ class MODEL_TENSOR(IntEnum):
|
||||||
MODEL_ARCH.MINICPM3: "minicpm3",
|
MODEL_ARCH.MINICPM3: "minicpm3",
|
||||||
MODEL_ARCH.GEMMA: "gemma",
|
MODEL_ARCH.GEMMA: "gemma",
|
||||||
MODEL_ARCH.GEMMA2: "gemma2",
|
MODEL_ARCH.GEMMA2: "gemma2",
|
||||||
MODEL_ARCH.GEMMA3: "gemma3",
|
|
||||||
MODEL_ARCH.STARCODER2: "starcoder2",
|
MODEL_ARCH.STARCODER2: "starcoder2",
|
||||||
MODEL_ARCH.RWKV6: "rwkv6",
|
MODEL_ARCH.RWKV6: "rwkv6",
|
||||||
MODEL_ARCH.RWKV6QWEN2: "rwkv6qwen2",
|
|
||||||
MODEL_ARCH.RWKV7: "rwkv7",
|
|
||||||
MODEL_ARCH.ARWKV7: "arwkv7",
|
|
||||||
MODEL_ARCH.MAMBA: "mamba",
|
MODEL_ARCH.MAMBA: "mamba",
|
||||||
MODEL_ARCH.XVERSE: "xverse",
|
MODEL_ARCH.XVERSE: "xverse",
|
||||||
MODEL_ARCH.COMMAND_R: "command-r",
|
MODEL_ARCH.COMMAND_R: "command-r",
|
||||||
|
@ -578,7 +458,6 @@ class MODEL_TENSOR(IntEnum):
|
||||||
MODEL_ARCH.DEEPSEEK: "deepseek",
|
MODEL_ARCH.DEEPSEEK: "deepseek",
|
||||||
MODEL_ARCH.DEEPSEEK2: "deepseek2",
|
MODEL_ARCH.DEEPSEEK2: "deepseek2",
|
||||||
MODEL_ARCH.CHATGLM: "chatglm",
|
MODEL_ARCH.CHATGLM: "chatglm",
|
||||||
MODEL_ARCH.GLM4: "glm4",
|
|
||||||
MODEL_ARCH.BITNET: "bitnet",
|
MODEL_ARCH.BITNET: "bitnet",
|
||||||
MODEL_ARCH.T5: "t5",
|
MODEL_ARCH.T5: "t5",
|
||||||
MODEL_ARCH.T5ENCODER: "t5encoder",
|
MODEL_ARCH.T5ENCODER: "t5encoder",
|
||||||
|
@ -589,18 +468,6 @@ class MODEL_TENSOR(IntEnum):
|
||||||
MODEL_ARCH.GRANITE_MOE: "granitemoe",
|
MODEL_ARCH.GRANITE_MOE: "granitemoe",
|
||||||
MODEL_ARCH.CHAMELEON: "chameleon",
|
MODEL_ARCH.CHAMELEON: "chameleon",
|
||||||
MODEL_ARCH.WAVTOKENIZER_DEC: "wavtokenizer-dec",
|
MODEL_ARCH.WAVTOKENIZER_DEC: "wavtokenizer-dec",
|
||||||
MODEL_ARCH.PLM: "plm",
|
|
||||||
MODEL_ARCH.BAILINGMOE: "bailingmoe",
|
|
||||||
}
|
|
||||||
|
|
||||||
VISION_PROJECTOR_TYPE_NAMES: dict[VISION_PROJECTOR_TYPE, str] = {
|
|
||||||
VISION_PROJECTOR_TYPE.MLP: "mlp",
|
|
||||||
VISION_PROJECTOR_TYPE.LDP: "ldp",
|
|
||||||
VISION_PROJECTOR_TYPE.LDPV2: "ldpv2",
|
|
||||||
VISION_PROJECTOR_TYPE.RESAMPLER: "resampler",
|
|
||||||
VISION_PROJECTOR_TYPE.GLM_EDGE: "adapter",
|
|
||||||
VISION_PROJECTOR_TYPE.MERGER: "qwen2vl_merger",
|
|
||||||
VISION_PROJECTOR_TYPE.GEMMA3: "gemma3",
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
||||||
|
@ -650,26 +517,13 @@ class MODEL_TENSOR(IntEnum):
|
||||||
MODEL_TENSOR.SSM_A: "blk.{bid}.ssm_a",
|
MODEL_TENSOR.SSM_A: "blk.{bid}.ssm_a",
|
||||||
MODEL_TENSOR.SSM_D: "blk.{bid}.ssm_d",
|
MODEL_TENSOR.SSM_D: "blk.{bid}.ssm_d",
|
||||||
MODEL_TENSOR.SSM_OUT: "blk.{bid}.ssm_out",
|
MODEL_TENSOR.SSM_OUT: "blk.{bid}.ssm_out",
|
||||||
MODEL_TENSOR.TIME_MIX_W0: "blk.{bid}.time_mix_w0",
|
|
||||||
MODEL_TENSOR.TIME_MIX_W1: "blk.{bid}.time_mix_w1",
|
MODEL_TENSOR.TIME_MIX_W1: "blk.{bid}.time_mix_w1",
|
||||||
MODEL_TENSOR.TIME_MIX_W2: "blk.{bid}.time_mix_w2",
|
MODEL_TENSOR.TIME_MIX_W2: "blk.{bid}.time_mix_w2",
|
||||||
MODEL_TENSOR.TIME_MIX_A0: "blk.{bid}.time_mix_a0",
|
|
||||||
MODEL_TENSOR.TIME_MIX_A1: "blk.{bid}.time_mix_a1",
|
|
||||||
MODEL_TENSOR.TIME_MIX_A2: "blk.{bid}.time_mix_a2",
|
|
||||||
MODEL_TENSOR.TIME_MIX_V0: "blk.{bid}.time_mix_v0",
|
|
||||||
MODEL_TENSOR.TIME_MIX_V1: "blk.{bid}.time_mix_v1",
|
|
||||||
MODEL_TENSOR.TIME_MIX_V2: "blk.{bid}.time_mix_v2",
|
|
||||||
MODEL_TENSOR.TIME_MIX_G1: "blk.{bid}.time_mix_g1",
|
|
||||||
MODEL_TENSOR.TIME_MIX_G2: "blk.{bid}.time_mix_g2",
|
|
||||||
MODEL_TENSOR.TIME_MIX_K_K: "blk.{bid}.time_mix_k_k",
|
|
||||||
MODEL_TENSOR.TIME_MIX_K_A: "blk.{bid}.time_mix_k_a",
|
|
||||||
MODEL_TENSOR.TIME_MIX_R_K: "blk.{bid}.time_mix_r_k",
|
|
||||||
MODEL_TENSOR.TIME_MIX_LERP_X: "blk.{bid}.time_mix_lerp_x",
|
MODEL_TENSOR.TIME_MIX_LERP_X: "blk.{bid}.time_mix_lerp_x",
|
||||||
MODEL_TENSOR.TIME_MIX_LERP_K: "blk.{bid}.time_mix_lerp_k",
|
MODEL_TENSOR.TIME_MIX_LERP_K: "blk.{bid}.time_mix_lerp_k",
|
||||||
MODEL_TENSOR.TIME_MIX_LERP_V: "blk.{bid}.time_mix_lerp_v",
|
MODEL_TENSOR.TIME_MIX_LERP_V: "blk.{bid}.time_mix_lerp_v",
|
||||||
MODEL_TENSOR.TIME_MIX_LERP_R: "blk.{bid}.time_mix_lerp_r",
|
MODEL_TENSOR.TIME_MIX_LERP_R: "blk.{bid}.time_mix_lerp_r",
|
||||||
MODEL_TENSOR.TIME_MIX_LERP_G: "blk.{bid}.time_mix_lerp_g",
|
MODEL_TENSOR.TIME_MIX_LERP_G: "blk.{bid}.time_mix_lerp_g",
|
||||||
MODEL_TENSOR.TIME_MIX_LERP_FUSED: "blk.{bid}.time_mix_lerp_fused",
|
|
||||||
MODEL_TENSOR.TIME_MIX_LERP_W: "blk.{bid}.time_mix_lerp_w",
|
MODEL_TENSOR.TIME_MIX_LERP_W: "blk.{bid}.time_mix_lerp_w",
|
||||||
MODEL_TENSOR.TIME_MIX_FIRST: "blk.{bid}.time_mix_first",
|
MODEL_TENSOR.TIME_MIX_FIRST: "blk.{bid}.time_mix_first",
|
||||||
MODEL_TENSOR.TIME_MIX_DECAY: "blk.{bid}.time_mix_decay",
|
MODEL_TENSOR.TIME_MIX_DECAY: "blk.{bid}.time_mix_decay",
|
||||||
|
@ -690,8 +544,6 @@ class MODEL_TENSOR(IntEnum):
|
||||||
MODEL_TENSOR.ATTN_Q_B: "blk.{bid}.attn_q_b",
|
MODEL_TENSOR.ATTN_Q_B: "blk.{bid}.attn_q_b",
|
||||||
MODEL_TENSOR.ATTN_KV_A_MQA: "blk.{bid}.attn_kv_a_mqa",
|
MODEL_TENSOR.ATTN_KV_A_MQA: "blk.{bid}.attn_kv_a_mqa",
|
||||||
MODEL_TENSOR.ATTN_KV_B: "blk.{bid}.attn_kv_b",
|
MODEL_TENSOR.ATTN_KV_B: "blk.{bid}.attn_kv_b",
|
||||||
MODEL_TENSOR.ATTN_K_B: "blk.{bid}.attn_k_b",
|
|
||||||
MODEL_TENSOR.ATTN_V_B: "blk.{bid}.attn_v_b",
|
|
||||||
MODEL_TENSOR.ATTN_Q_A_NORM: "blk.{bid}.attn_q_a_norm",
|
MODEL_TENSOR.ATTN_Q_A_NORM: "blk.{bid}.attn_q_a_norm",
|
||||||
MODEL_TENSOR.ATTN_KV_A_NORM: "blk.{bid}.attn_kv_a_norm",
|
MODEL_TENSOR.ATTN_KV_A_NORM: "blk.{bid}.attn_kv_a_norm",
|
||||||
MODEL_TENSOR.ATTN_SUB_NORM: "blk.{bid}.attn_sub_norm",
|
MODEL_TENSOR.ATTN_SUB_NORM: "blk.{bid}.attn_sub_norm",
|
||||||
|
@ -742,88 +594,9 @@ class MODEL_TENSOR(IntEnum):
|
||||||
MODEL_TENSOR.POSNET_ATTN_K: "posnet.{bid}.attn_k",
|
MODEL_TENSOR.POSNET_ATTN_K: "posnet.{bid}.attn_k",
|
||||||
MODEL_TENSOR.POSNET_ATTN_V: "posnet.{bid}.attn_v",
|
MODEL_TENSOR.POSNET_ATTN_V: "posnet.{bid}.attn_v",
|
||||||
MODEL_TENSOR.POSNET_ATTN_OUT: "posnet.{bid}.attn_output",
|
MODEL_TENSOR.POSNET_ATTN_OUT: "posnet.{bid}.attn_output",
|
||||||
# vision
|
|
||||||
MODEL_TENSOR.V_MMPROJ: "mm.{bid}",
|
|
||||||
MODEL_TENSOR.V_MMPROJ_FC: "mm.model.fc",
|
|
||||||
MODEL_TENSOR.V_MMPROJ_MLP: "mm.model.mlp.{bid}",
|
|
||||||
MODEL_TENSOR.V_MMPROJ_PEG: "mm.model.peg.{bid}",
|
|
||||||
MODEL_TENSOR.V_ENC_EMBD_CLS: "v.class_embd",
|
|
||||||
MODEL_TENSOR.V_ENC_EMBD_PATCH: "v.patch_embd",
|
|
||||||
MODEL_TENSOR.V_ENC_EMBD_POS: "v.position_embd",
|
|
||||||
MODEL_TENSOR.V_ENC_ATTN_Q: "v.blk.{bid}.attn_q",
|
|
||||||
MODEL_TENSOR.V_ENC_ATTN_Q_NORM: "v.blk.{bid}.attn_q_norm",
|
|
||||||
MODEL_TENSOR.V_ENC_ATTN_K: "v.blk.{bid}.attn_k",
|
|
||||||
MODEL_TENSOR.V_ENC_ATTN_K_NORM: "v.blk.{bid}.attn_k_norm",
|
|
||||||
MODEL_TENSOR.V_ENC_ATTN_V: "v.blk.{bid}.attn_v",
|
|
||||||
MODEL_TENSOR.V_ENC_INPUT_NORM: "v.blk.{bid}.ln1",
|
|
||||||
MODEL_TENSOR.V_ENC_OUTPUT: "v.blk.{bid}.attn_out",
|
|
||||||
MODEL_TENSOR.V_ENC_OUTPUT_NORM: "v.blk.{bid}.ln2",
|
|
||||||
MODEL_TENSOR.V_ENC_FFN_UP: "v.blk.{bid}.ffn_up",
|
|
||||||
MODEL_TENSOR.V_ENC_FFN_GATE: "v.blk.{bid}.ffn_gate",
|
|
||||||
MODEL_TENSOR.V_ENC_FFN_DOWN: "v.blk.{bid}.ffn_down",
|
|
||||||
MODEL_TENSOR.V_LAYER_SCALE_1: "v.blk.{bid}.ls1",
|
|
||||||
MODEL_TENSOR.V_LAYER_SCALE_2: "v.blk.{bid}.ls2",
|
|
||||||
MODEL_TENSOR.V_PRE_NORM: "v.pre_ln",
|
|
||||||
MODEL_TENSOR.V_POST_NORM: "v.post_ln",
|
|
||||||
MODEL_TENSOR.V_MM_INP_PROJ: "mm.input_projection",
|
|
||||||
MODEL_TENSOR.V_MM_INP_NORM: "mm.input_norm",
|
|
||||||
MODEL_TENSOR.V_MM_SOFT_EMB_NORM: "mm.soft_emb_norm",
|
|
||||||
MODEL_TENSOR.V_RESMPL_POS_EMBD_K: "resampler.pos_embd_k",
|
|
||||||
MODEL_TENSOR.V_RESMPL_ATTN_Q: "resampler.attn.q",
|
|
||||||
MODEL_TENSOR.V_RESMPL_ATTN_K: "resampler.attn.k",
|
|
||||||
MODEL_TENSOR.V_RESMPL_ATTN_V: "resampler.attn.v",
|
|
||||||
MODEL_TENSOR.V_RESMPL_ATTN_OUT: "resampler.attn.out",
|
|
||||||
MODEL_TENSOR.V_RESMPL_KV: "resampler.kv",
|
|
||||||
MODEL_TENSOR.V_RESMPL_KV_NORM: "resampler.ln_kv",
|
|
||||||
MODEL_TENSOR.V_RESMPL_POST_NORM: "resampler.ln_post",
|
|
||||||
MODEL_TENSOR.V_RESMPL_Q_NORM: "resampler.ln_q",
|
|
||||||
MODEL_TENSOR.V_RESMPL_PROJ: "resampler.proj",
|
|
||||||
MODEL_TENSOR.V_RESMPL_QUERY: "resampler.query",
|
|
||||||
MODEL_TENSOR.V_TOK_EMBD_IMG_BREAK: "v.token_embd.img_break", # pixtral
|
|
||||||
MODEL_TENSOR.V_MM_PATCH_MERGER: "mm.patch_merger", # mistral small 3.1
|
|
||||||
}
|
}
|
||||||
|
|
||||||
MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
||||||
MODEL_ARCH.CLIP_VISION: [
|
|
||||||
MODEL_TENSOR.V_MMPROJ,
|
|
||||||
MODEL_TENSOR.V_MMPROJ_FC,
|
|
||||||
MODEL_TENSOR.V_MMPROJ_MLP,
|
|
||||||
MODEL_TENSOR.V_MMPROJ_PEG,
|
|
||||||
MODEL_TENSOR.V_ENC_EMBD_CLS,
|
|
||||||
MODEL_TENSOR.V_ENC_EMBD_PATCH,
|
|
||||||
MODEL_TENSOR.V_ENC_EMBD_POS,
|
|
||||||
MODEL_TENSOR.V_ENC_ATTN_Q,
|
|
||||||
MODEL_TENSOR.V_ENC_ATTN_Q_NORM,
|
|
||||||
MODEL_TENSOR.V_ENC_ATTN_K,
|
|
||||||
MODEL_TENSOR.V_ENC_ATTN_K_NORM,
|
|
||||||
MODEL_TENSOR.V_ENC_ATTN_V,
|
|
||||||
MODEL_TENSOR.V_ENC_INPUT_NORM,
|
|
||||||
MODEL_TENSOR.V_ENC_OUTPUT,
|
|
||||||
MODEL_TENSOR.V_ENC_OUTPUT_NORM,
|
|
||||||
MODEL_TENSOR.V_ENC_FFN_UP,
|
|
||||||
MODEL_TENSOR.V_ENC_FFN_GATE,
|
|
||||||
MODEL_TENSOR.V_ENC_FFN_DOWN,
|
|
||||||
MODEL_TENSOR.V_LAYER_SCALE_1,
|
|
||||||
MODEL_TENSOR.V_LAYER_SCALE_2,
|
|
||||||
MODEL_TENSOR.V_PRE_NORM,
|
|
||||||
MODEL_TENSOR.V_POST_NORM,
|
|
||||||
MODEL_TENSOR.V_MM_INP_PROJ,
|
|
||||||
MODEL_TENSOR.V_MM_INP_NORM,
|
|
||||||
MODEL_TENSOR.V_MM_SOFT_EMB_NORM,
|
|
||||||
MODEL_TENSOR.V_RESMPL_POS_EMBD_K,
|
|
||||||
MODEL_TENSOR.V_RESMPL_ATTN_Q,
|
|
||||||
MODEL_TENSOR.V_RESMPL_ATTN_K,
|
|
||||||
MODEL_TENSOR.V_RESMPL_ATTN_V,
|
|
||||||
MODEL_TENSOR.V_RESMPL_ATTN_OUT,
|
|
||||||
MODEL_TENSOR.V_RESMPL_KV,
|
|
||||||
MODEL_TENSOR.V_RESMPL_KV_NORM,
|
|
||||||
MODEL_TENSOR.V_RESMPL_POST_NORM,
|
|
||||||
MODEL_TENSOR.V_RESMPL_Q_NORM,
|
|
||||||
MODEL_TENSOR.V_RESMPL_PROJ,
|
|
||||||
MODEL_TENSOR.V_RESMPL_QUERY,
|
|
||||||
MODEL_TENSOR.V_TOK_EMBD_IMG_BREAK,
|
|
||||||
MODEL_TENSOR.V_MM_PATCH_MERGER,
|
|
||||||
],
|
|
||||||
MODEL_ARCH.LLAMA: [
|
MODEL_ARCH.LLAMA: [
|
||||||
MODEL_TENSOR.TOKEN_EMBD,
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
MODEL_TENSOR.OUTPUT_NORM,
|
MODEL_TENSOR.OUTPUT_NORM,
|
||||||
|
@ -844,29 +617,6 @@ class MODEL_TENSOR(IntEnum):
|
||||||
MODEL_TENSOR.FFN_DOWN_EXP,
|
MODEL_TENSOR.FFN_DOWN_EXP,
|
||||||
MODEL_TENSOR.FFN_UP_EXP,
|
MODEL_TENSOR.FFN_UP_EXP,
|
||||||
],
|
],
|
||||||
MODEL_ARCH.LLAMA4: [
|
|
||||||
MODEL_TENSOR.TOKEN_EMBD,
|
|
||||||
MODEL_TENSOR.OUTPUT_NORM,
|
|
||||||
MODEL_TENSOR.OUTPUT,
|
|
||||||
MODEL_TENSOR.ROPE_FREQS,
|
|
||||||
MODEL_TENSOR.ATTN_NORM,
|
|
||||||
MODEL_TENSOR.ATTN_Q,
|
|
||||||
MODEL_TENSOR.ATTN_K,
|
|
||||||
MODEL_TENSOR.ATTN_V,
|
|
||||||
MODEL_TENSOR.ATTN_OUT,
|
|
||||||
MODEL_TENSOR.ATTN_ROT_EMBD,
|
|
||||||
MODEL_TENSOR.FFN_GATE_INP,
|
|
||||||
MODEL_TENSOR.FFN_NORM,
|
|
||||||
MODEL_TENSOR.FFN_GATE,
|
|
||||||
MODEL_TENSOR.FFN_DOWN,
|
|
||||||
MODEL_TENSOR.FFN_UP,
|
|
||||||
MODEL_TENSOR.FFN_GATE_EXP,
|
|
||||||
MODEL_TENSOR.FFN_DOWN_EXP,
|
|
||||||
MODEL_TENSOR.FFN_UP_EXP,
|
|
||||||
MODEL_TENSOR.FFN_GATE_SHEXP,
|
|
||||||
MODEL_TENSOR.FFN_DOWN_SHEXP,
|
|
||||||
MODEL_TENSOR.FFN_UP_SHEXP,
|
|
||||||
],
|
|
||||||
MODEL_ARCH.DECI: [
|
MODEL_ARCH.DECI: [
|
||||||
MODEL_TENSOR.TOKEN_EMBD,
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
MODEL_TENSOR.OUTPUT_NORM,
|
MODEL_TENSOR.OUTPUT_NORM,
|
||||||
|
@ -990,22 +740,6 @@ class MODEL_TENSOR(IntEnum):
|
||||||
MODEL_TENSOR.FFN_UP,
|
MODEL_TENSOR.FFN_UP,
|
||||||
MODEL_TENSOR.LAYER_OUT_NORM,
|
MODEL_TENSOR.LAYER_OUT_NORM,
|
||||||
],
|
],
|
||||||
MODEL_ARCH.NOMIC_BERT_MOE: [
|
|
||||||
MODEL_TENSOR.TOKEN_EMBD,
|
|
||||||
MODEL_TENSOR.TOKEN_EMBD_NORM,
|
|
||||||
MODEL_TENSOR.TOKEN_TYPES,
|
|
||||||
MODEL_TENSOR.POS_EMBD,
|
|
||||||
MODEL_TENSOR.OUTPUT_NORM,
|
|
||||||
MODEL_TENSOR.ATTN_OUT_NORM,
|
|
||||||
MODEL_TENSOR.ATTN_QKV,
|
|
||||||
MODEL_TENSOR.ATTN_OUT,
|
|
||||||
MODEL_TENSOR.FFN_DOWN,
|
|
||||||
MODEL_TENSOR.FFN_UP,
|
|
||||||
MODEL_TENSOR.FFN_GATE_INP,
|
|
||||||
MODEL_TENSOR.FFN_DOWN_EXP,
|
|
||||||
MODEL_TENSOR.FFN_UP_EXP,
|
|
||||||
MODEL_TENSOR.LAYER_OUT_NORM,
|
|
||||||
],
|
|
||||||
MODEL_ARCH.JINA_BERT_V2: [
|
MODEL_ARCH.JINA_BERT_V2: [
|
||||||
MODEL_TENSOR.TOKEN_EMBD,
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
MODEL_TENSOR.TOKEN_EMBD_NORM,
|
MODEL_TENSOR.TOKEN_EMBD_NORM,
|
||||||
|
@ -1156,40 +890,6 @@ class MODEL_TENSOR(IntEnum):
|
||||||
MODEL_TENSOR.FFN_DOWN_SHEXP,
|
MODEL_TENSOR.FFN_DOWN_SHEXP,
|
||||||
MODEL_TENSOR.FFN_UP_SHEXP,
|
MODEL_TENSOR.FFN_UP_SHEXP,
|
||||||
],
|
],
|
||||||
MODEL_ARCH.QWEN3: [
|
|
||||||
MODEL_TENSOR.TOKEN_EMBD,
|
|
||||||
MODEL_TENSOR.OUTPUT_NORM,
|
|
||||||
MODEL_TENSOR.OUTPUT,
|
|
||||||
MODEL_TENSOR.ROPE_FREQS,
|
|
||||||
MODEL_TENSOR.ATTN_NORM,
|
|
||||||
MODEL_TENSOR.ATTN_Q,
|
|
||||||
MODEL_TENSOR.ATTN_Q_NORM,
|
|
||||||
MODEL_TENSOR.ATTN_K,
|
|
||||||
MODEL_TENSOR.ATTN_K_NORM,
|
|
||||||
MODEL_TENSOR.ATTN_V,
|
|
||||||
MODEL_TENSOR.ATTN_OUT,
|
|
||||||
MODEL_TENSOR.FFN_NORM,
|
|
||||||
MODEL_TENSOR.FFN_GATE,
|
|
||||||
MODEL_TENSOR.FFN_DOWN,
|
|
||||||
MODEL_TENSOR.FFN_UP,
|
|
||||||
],
|
|
||||||
MODEL_ARCH.QWEN3MOE: [
|
|
||||||
MODEL_TENSOR.TOKEN_EMBD,
|
|
||||||
MODEL_TENSOR.OUTPUT_NORM,
|
|
||||||
MODEL_TENSOR.OUTPUT,
|
|
||||||
MODEL_TENSOR.ATTN_NORM,
|
|
||||||
MODEL_TENSOR.ATTN_Q,
|
|
||||||
MODEL_TENSOR.ATTN_Q_NORM,
|
|
||||||
MODEL_TENSOR.ATTN_K,
|
|
||||||
MODEL_TENSOR.ATTN_K_NORM,
|
|
||||||
MODEL_TENSOR.ATTN_V,
|
|
||||||
MODEL_TENSOR.ATTN_OUT,
|
|
||||||
MODEL_TENSOR.FFN_NORM,
|
|
||||||
MODEL_TENSOR.FFN_GATE_INP,
|
|
||||||
MODEL_TENSOR.FFN_GATE_EXP,
|
|
||||||
MODEL_TENSOR.FFN_DOWN_EXP,
|
|
||||||
MODEL_TENSOR.FFN_UP_EXP,
|
|
||||||
],
|
|
||||||
MODEL_ARCH.PLAMO: [
|
MODEL_ARCH.PLAMO: [
|
||||||
MODEL_TENSOR.TOKEN_EMBD,
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
MODEL_TENSOR.OUTPUT_NORM,
|
MODEL_TENSOR.OUTPUT_NORM,
|
||||||
|
@ -1247,24 +947,6 @@ class MODEL_TENSOR(IntEnum):
|
||||||
MODEL_TENSOR.FFN_DOWN,
|
MODEL_TENSOR.FFN_DOWN,
|
||||||
MODEL_TENSOR.FFN_UP,
|
MODEL_TENSOR.FFN_UP,
|
||||||
],
|
],
|
||||||
MODEL_ARCH.PHIMOE: [
|
|
||||||
MODEL_TENSOR.TOKEN_EMBD,
|
|
||||||
MODEL_TENSOR.OUTPUT_NORM,
|
|
||||||
MODEL_TENSOR.OUTPUT,
|
|
||||||
MODEL_TENSOR.ROPE_FACTORS_LONG,
|
|
||||||
MODEL_TENSOR.ROPE_FACTORS_SHORT,
|
|
||||||
MODEL_TENSOR.ATTN_NORM,
|
|
||||||
MODEL_TENSOR.ATTN_QKV,
|
|
||||||
MODEL_TENSOR.ATTN_Q,
|
|
||||||
MODEL_TENSOR.ATTN_K,
|
|
||||||
MODEL_TENSOR.ATTN_V,
|
|
||||||
MODEL_TENSOR.ATTN_OUT,
|
|
||||||
MODEL_TENSOR.FFN_NORM,
|
|
||||||
MODEL_TENSOR.FFN_GATE_INP,
|
|
||||||
MODEL_TENSOR.FFN_GATE_EXP,
|
|
||||||
MODEL_TENSOR.FFN_DOWN_EXP,
|
|
||||||
MODEL_TENSOR.FFN_UP_EXP,
|
|
||||||
],
|
|
||||||
MODEL_ARCH.CODESHELL: [
|
MODEL_ARCH.CODESHELL: [
|
||||||
MODEL_TENSOR.TOKEN_EMBD,
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
MODEL_TENSOR.POS_EMBD,
|
MODEL_TENSOR.POS_EMBD,
|
||||||
|
@ -1378,24 +1060,6 @@ class MODEL_TENSOR(IntEnum):
|
||||||
MODEL_TENSOR.FFN_PRE_NORM,
|
MODEL_TENSOR.FFN_PRE_NORM,
|
||||||
MODEL_TENSOR.FFN_POST_NORM,
|
MODEL_TENSOR.FFN_POST_NORM,
|
||||||
],
|
],
|
||||||
MODEL_ARCH.GEMMA3: [
|
|
||||||
MODEL_TENSOR.TOKEN_EMBD,
|
|
||||||
MODEL_TENSOR.OUTPUT,
|
|
||||||
MODEL_TENSOR.OUTPUT_NORM,
|
|
||||||
MODEL_TENSOR.ATTN_Q,
|
|
||||||
MODEL_TENSOR.ATTN_Q_NORM,
|
|
||||||
MODEL_TENSOR.ATTN_K,
|
|
||||||
MODEL_TENSOR.ATTN_K_NORM,
|
|
||||||
MODEL_TENSOR.ATTN_V,
|
|
||||||
MODEL_TENSOR.ATTN_OUT,
|
|
||||||
MODEL_TENSOR.FFN_GATE,
|
|
||||||
MODEL_TENSOR.FFN_DOWN,
|
|
||||||
MODEL_TENSOR.FFN_UP,
|
|
||||||
MODEL_TENSOR.ATTN_NORM,
|
|
||||||
MODEL_TENSOR.ATTN_POST_NORM,
|
|
||||||
MODEL_TENSOR.FFN_PRE_NORM,
|
|
||||||
MODEL_TENSOR.FFN_POST_NORM,
|
|
||||||
],
|
|
||||||
MODEL_ARCH.STARCODER2: [
|
MODEL_ARCH.STARCODER2: [
|
||||||
MODEL_TENSOR.TOKEN_EMBD,
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
MODEL_TENSOR.OUTPUT_NORM,
|
MODEL_TENSOR.OUTPUT_NORM,
|
||||||
|
@ -1426,7 +1090,6 @@ class MODEL_TENSOR(IntEnum):
|
||||||
MODEL_TENSOR.TIME_MIX_LERP_R,
|
MODEL_TENSOR.TIME_MIX_LERP_R,
|
||||||
MODEL_TENSOR.TIME_MIX_LERP_G,
|
MODEL_TENSOR.TIME_MIX_LERP_G,
|
||||||
MODEL_TENSOR.TIME_MIX_LERP_W,
|
MODEL_TENSOR.TIME_MIX_LERP_W,
|
||||||
MODEL_TENSOR.TIME_MIX_LERP_FUSED,
|
|
||||||
MODEL_TENSOR.TIME_MIX_FIRST,
|
MODEL_TENSOR.TIME_MIX_FIRST,
|
||||||
MODEL_TENSOR.TIME_MIX_DECAY,
|
MODEL_TENSOR.TIME_MIX_DECAY,
|
||||||
MODEL_TENSOR.TIME_MIX_DECAY_W1,
|
MODEL_TENSOR.TIME_MIX_DECAY_W1,
|
||||||
|
@ -1443,97 +1106,6 @@ class MODEL_TENSOR(IntEnum):
|
||||||
MODEL_TENSOR.CHANNEL_MIX_RECEPTANCE,
|
MODEL_TENSOR.CHANNEL_MIX_RECEPTANCE,
|
||||||
MODEL_TENSOR.CHANNEL_MIX_VALUE,
|
MODEL_TENSOR.CHANNEL_MIX_VALUE,
|
||||||
],
|
],
|
||||||
MODEL_ARCH.RWKV6QWEN2: [
|
|
||||||
MODEL_TENSOR.TOKEN_EMBD,
|
|
||||||
MODEL_TENSOR.OUTPUT_NORM,
|
|
||||||
MODEL_TENSOR.OUTPUT,
|
|
||||||
MODEL_TENSOR.ATTN_NORM,
|
|
||||||
MODEL_TENSOR.TIME_MIX_W1,
|
|
||||||
MODEL_TENSOR.TIME_MIX_W2,
|
|
||||||
MODEL_TENSOR.TIME_MIX_LERP_X,
|
|
||||||
MODEL_TENSOR.TIME_MIX_LERP_K,
|
|
||||||
MODEL_TENSOR.TIME_MIX_LERP_V,
|
|
||||||
MODEL_TENSOR.TIME_MIX_LERP_R,
|
|
||||||
MODEL_TENSOR.TIME_MIX_LERP_G,
|
|
||||||
MODEL_TENSOR.TIME_MIX_LERP_W,
|
|
||||||
MODEL_TENSOR.TIME_MIX_LERP_FUSED,
|
|
||||||
MODEL_TENSOR.TIME_MIX_FIRST,
|
|
||||||
MODEL_TENSOR.TIME_MIX_DECAY,
|
|
||||||
MODEL_TENSOR.TIME_MIX_DECAY_W1,
|
|
||||||
MODEL_TENSOR.TIME_MIX_DECAY_W2,
|
|
||||||
MODEL_TENSOR.TIME_MIX_KEY,
|
|
||||||
MODEL_TENSOR.TIME_MIX_VALUE,
|
|
||||||
MODEL_TENSOR.TIME_MIX_RECEPTANCE,
|
|
||||||
MODEL_TENSOR.TIME_MIX_GATE,
|
|
||||||
MODEL_TENSOR.TIME_MIX_LN,
|
|
||||||
MODEL_TENSOR.TIME_MIX_OUTPUT,
|
|
||||||
MODEL_TENSOR.FFN_NORM,
|
|
||||||
MODEL_TENSOR.FFN_GATE,
|
|
||||||
MODEL_TENSOR.FFN_DOWN,
|
|
||||||
MODEL_TENSOR.FFN_UP,
|
|
||||||
],
|
|
||||||
MODEL_ARCH.RWKV7: [
|
|
||||||
MODEL_TENSOR.TOKEN_EMBD,
|
|
||||||
MODEL_TENSOR.TOKEN_EMBD_NORM,
|
|
||||||
MODEL_TENSOR.OUTPUT_NORM,
|
|
||||||
MODEL_TENSOR.OUTPUT,
|
|
||||||
MODEL_TENSOR.ATTN_NORM,
|
|
||||||
MODEL_TENSOR.ATTN_NORM_2,
|
|
||||||
MODEL_TENSOR.TIME_MIX_LERP_FUSED,
|
|
||||||
MODEL_TENSOR.TIME_MIX_W0,
|
|
||||||
MODEL_TENSOR.TIME_MIX_W1,
|
|
||||||
MODEL_TENSOR.TIME_MIX_W2,
|
|
||||||
MODEL_TENSOR.TIME_MIX_A0,
|
|
||||||
MODEL_TENSOR.TIME_MIX_A1,
|
|
||||||
MODEL_TENSOR.TIME_MIX_A2,
|
|
||||||
MODEL_TENSOR.TIME_MIX_V0,
|
|
||||||
MODEL_TENSOR.TIME_MIX_V1,
|
|
||||||
MODEL_TENSOR.TIME_MIX_V2,
|
|
||||||
MODEL_TENSOR.TIME_MIX_G1,
|
|
||||||
MODEL_TENSOR.TIME_MIX_G2,
|
|
||||||
MODEL_TENSOR.TIME_MIX_K_K,
|
|
||||||
MODEL_TENSOR.TIME_MIX_K_A,
|
|
||||||
MODEL_TENSOR.TIME_MIX_R_K,
|
|
||||||
MODEL_TENSOR.TIME_MIX_KEY,
|
|
||||||
MODEL_TENSOR.TIME_MIX_VALUE,
|
|
||||||
MODEL_TENSOR.TIME_MIX_RECEPTANCE,
|
|
||||||
MODEL_TENSOR.TIME_MIX_LN,
|
|
||||||
MODEL_TENSOR.TIME_MIX_OUTPUT,
|
|
||||||
MODEL_TENSOR.CHANNEL_MIX_LERP_K,
|
|
||||||
MODEL_TENSOR.CHANNEL_MIX_KEY,
|
|
||||||
MODEL_TENSOR.CHANNEL_MIX_VALUE,
|
|
||||||
],
|
|
||||||
MODEL_ARCH.ARWKV7: [
|
|
||||||
MODEL_TENSOR.TOKEN_EMBD,
|
|
||||||
MODEL_TENSOR.TOKEN_EMBD_NORM,
|
|
||||||
MODEL_TENSOR.OUTPUT_NORM,
|
|
||||||
MODEL_TENSOR.OUTPUT,
|
|
||||||
MODEL_TENSOR.ATTN_NORM,
|
|
||||||
MODEL_TENSOR.TIME_MIX_LERP_FUSED,
|
|
||||||
MODEL_TENSOR.TIME_MIX_W0,
|
|
||||||
MODEL_TENSOR.TIME_MIX_W1,
|
|
||||||
MODEL_TENSOR.TIME_MIX_W2,
|
|
||||||
MODEL_TENSOR.TIME_MIX_A0,
|
|
||||||
MODEL_TENSOR.TIME_MIX_A1,
|
|
||||||
MODEL_TENSOR.TIME_MIX_A2,
|
|
||||||
MODEL_TENSOR.TIME_MIX_V0,
|
|
||||||
MODEL_TENSOR.TIME_MIX_V1,
|
|
||||||
MODEL_TENSOR.TIME_MIX_V2,
|
|
||||||
MODEL_TENSOR.TIME_MIX_G1,
|
|
||||||
MODEL_TENSOR.TIME_MIX_G2,
|
|
||||||
MODEL_TENSOR.TIME_MIX_K_K,
|
|
||||||
MODEL_TENSOR.TIME_MIX_K_A,
|
|
||||||
MODEL_TENSOR.TIME_MIX_R_K,
|
|
||||||
MODEL_TENSOR.TIME_MIX_KEY,
|
|
||||||
MODEL_TENSOR.TIME_MIX_VALUE,
|
|
||||||
MODEL_TENSOR.TIME_MIX_RECEPTANCE,
|
|
||||||
MODEL_TENSOR.TIME_MIX_LN,
|
|
||||||
MODEL_TENSOR.TIME_MIX_OUTPUT,
|
|
||||||
MODEL_TENSOR.FFN_NORM,
|
|
||||||
MODEL_TENSOR.FFN_GATE,
|
|
||||||
MODEL_TENSOR.FFN_DOWN,
|
|
||||||
MODEL_TENSOR.FFN_UP,
|
|
||||||
],
|
|
||||||
MODEL_ARCH.MAMBA: [
|
MODEL_ARCH.MAMBA: [
|
||||||
MODEL_TENSOR.TOKEN_EMBD,
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
MODEL_TENSOR.OUTPUT_NORM,
|
MODEL_TENSOR.OUTPUT_NORM,
|
||||||
|
@ -1714,8 +1286,6 @@ class MODEL_TENSOR(IntEnum):
|
||||||
MODEL_TENSOR.ATTN_Q_B,
|
MODEL_TENSOR.ATTN_Q_B,
|
||||||
MODEL_TENSOR.ATTN_KV_A_MQA,
|
MODEL_TENSOR.ATTN_KV_A_MQA,
|
||||||
MODEL_TENSOR.ATTN_KV_B,
|
MODEL_TENSOR.ATTN_KV_B,
|
||||||
MODEL_TENSOR.ATTN_K_B,
|
|
||||||
MODEL_TENSOR.ATTN_V_B,
|
|
||||||
MODEL_TENSOR.ATTN_Q_A_NORM,
|
MODEL_TENSOR.ATTN_Q_A_NORM,
|
||||||
MODEL_TENSOR.ATTN_KV_A_NORM,
|
MODEL_TENSOR.ATTN_KV_A_NORM,
|
||||||
MODEL_TENSOR.ATTN_OUT,
|
MODEL_TENSOR.ATTN_OUT,
|
||||||
|
@ -1733,20 +1303,6 @@ class MODEL_TENSOR(IntEnum):
|
||||||
MODEL_TENSOR.FFN_UP_SHEXP,
|
MODEL_TENSOR.FFN_UP_SHEXP,
|
||||||
MODEL_TENSOR.FFN_EXP_PROBS_B,
|
MODEL_TENSOR.FFN_EXP_PROBS_B,
|
||||||
],
|
],
|
||||||
MODEL_ARCH.PLM: [
|
|
||||||
MODEL_TENSOR.TOKEN_EMBD,
|
|
||||||
MODEL_TENSOR.OUTPUT,
|
|
||||||
MODEL_TENSOR.OUTPUT_NORM,
|
|
||||||
MODEL_TENSOR.ATTN_NORM,
|
|
||||||
MODEL_TENSOR.ATTN_Q,
|
|
||||||
MODEL_TENSOR.ATTN_KV_A_MQA,
|
|
||||||
MODEL_TENSOR.ATTN_KV_A_NORM,
|
|
||||||
MODEL_TENSOR.ATTN_KV_B,
|
|
||||||
MODEL_TENSOR.ATTN_OUT,
|
|
||||||
MODEL_TENSOR.FFN_NORM,
|
|
||||||
MODEL_TENSOR.FFN_UP,
|
|
||||||
MODEL_TENSOR.FFN_DOWN,
|
|
||||||
],
|
|
||||||
MODEL_ARCH.CHATGLM: [
|
MODEL_ARCH.CHATGLM: [
|
||||||
MODEL_TENSOR.TOKEN_EMBD,
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
MODEL_TENSOR.ROPE_FREQS,
|
MODEL_TENSOR.ROPE_FREQS,
|
||||||
|
@ -1754,31 +1310,11 @@ class MODEL_TENSOR(IntEnum):
|
||||||
MODEL_TENSOR.OUTPUT,
|
MODEL_TENSOR.OUTPUT,
|
||||||
MODEL_TENSOR.ATTN_NORM,
|
MODEL_TENSOR.ATTN_NORM,
|
||||||
MODEL_TENSOR.ATTN_QKV,
|
MODEL_TENSOR.ATTN_QKV,
|
||||||
MODEL_TENSOR.ATTN_Q,
|
|
||||||
MODEL_TENSOR.ATTN_K,
|
|
||||||
MODEL_TENSOR.ATTN_V,
|
|
||||||
MODEL_TENSOR.ATTN_OUT,
|
MODEL_TENSOR.ATTN_OUT,
|
||||||
MODEL_TENSOR.FFN_NORM,
|
MODEL_TENSOR.FFN_NORM,
|
||||||
MODEL_TENSOR.FFN_DOWN,
|
MODEL_TENSOR.FFN_DOWN,
|
||||||
MODEL_TENSOR.FFN_UP,
|
MODEL_TENSOR.FFN_UP,
|
||||||
],
|
],
|
||||||
MODEL_ARCH.GLM4: [
|
|
||||||
MODEL_TENSOR.TOKEN_EMBD,
|
|
||||||
MODEL_TENSOR.ROPE_FREQS,
|
|
||||||
MODEL_TENSOR.OUTPUT_NORM,
|
|
||||||
MODEL_TENSOR.OUTPUT,
|
|
||||||
MODEL_TENSOR.ATTN_NORM,
|
|
||||||
MODEL_TENSOR.ATTN_QKV,
|
|
||||||
MODEL_TENSOR.ATTN_Q,
|
|
||||||
MODEL_TENSOR.ATTN_K,
|
|
||||||
MODEL_TENSOR.ATTN_V,
|
|
||||||
MODEL_TENSOR.ATTN_OUT,
|
|
||||||
MODEL_TENSOR.FFN_NORM,
|
|
||||||
MODEL_TENSOR.FFN_DOWN,
|
|
||||||
MODEL_TENSOR.FFN_UP,
|
|
||||||
MODEL_TENSOR.ATTN_POST_NORM,
|
|
||||||
MODEL_TENSOR.FFN_POST_NORM,
|
|
||||||
],
|
|
||||||
MODEL_ARCH.BITNET: [
|
MODEL_ARCH.BITNET: [
|
||||||
MODEL_TENSOR.ATTN_Q,
|
MODEL_TENSOR.ATTN_Q,
|
||||||
MODEL_TENSOR.ATTN_K,
|
MODEL_TENSOR.ATTN_K,
|
||||||
|
@ -1912,9 +1448,6 @@ class MODEL_TENSOR(IntEnum):
|
||||||
MODEL_TENSOR.FFN_GATE_EXP,
|
MODEL_TENSOR.FFN_GATE_EXP,
|
||||||
MODEL_TENSOR.FFN_DOWN_EXP,
|
MODEL_TENSOR.FFN_DOWN_EXP,
|
||||||
MODEL_TENSOR.FFN_UP_EXP,
|
MODEL_TENSOR.FFN_UP_EXP,
|
||||||
MODEL_TENSOR.FFN_GATE_SHEXP,
|
|
||||||
MODEL_TENSOR.FFN_UP_SHEXP,
|
|
||||||
MODEL_TENSOR.FFN_DOWN_SHEXP,
|
|
||||||
],
|
],
|
||||||
MODEL_ARCH.CHAMELEON: [
|
MODEL_ARCH.CHAMELEON: [
|
||||||
MODEL_TENSOR.TOKEN_EMBD,
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
|
@ -1954,25 +1487,6 @@ class MODEL_TENSOR(IntEnum):
|
||||||
MODEL_TENSOR.POSNET_ATTN_V,
|
MODEL_TENSOR.POSNET_ATTN_V,
|
||||||
MODEL_TENSOR.POSNET_ATTN_OUT,
|
MODEL_TENSOR.POSNET_ATTN_OUT,
|
||||||
],
|
],
|
||||||
MODEL_ARCH.BAILINGMOE: [
|
|
||||||
MODEL_TENSOR.TOKEN_EMBD,
|
|
||||||
MODEL_TENSOR.OUTPUT_NORM,
|
|
||||||
MODEL_TENSOR.OUTPUT,
|
|
||||||
MODEL_TENSOR.ROPE_FREQS,
|
|
||||||
MODEL_TENSOR.ATTN_NORM,
|
|
||||||
MODEL_TENSOR.ATTN_Q,
|
|
||||||
MODEL_TENSOR.ATTN_K,
|
|
||||||
MODEL_TENSOR.ATTN_V,
|
|
||||||
MODEL_TENSOR.ATTN_OUT,
|
|
||||||
MODEL_TENSOR.FFN_GATE_INP,
|
|
||||||
MODEL_TENSOR.FFN_NORM,
|
|
||||||
MODEL_TENSOR.FFN_GATE_EXP,
|
|
||||||
MODEL_TENSOR.FFN_DOWN_EXP,
|
|
||||||
MODEL_TENSOR.FFN_UP_EXP,
|
|
||||||
MODEL_TENSOR.FFN_GATE_SHEXP,
|
|
||||||
MODEL_TENSOR.FFN_DOWN_SHEXP,
|
|
||||||
MODEL_TENSOR.FFN_UP_SHEXP,
|
|
||||||
],
|
|
||||||
# TODO
|
# TODO
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2025,9 +1539,6 @@ class MODEL_TENSOR(IntEnum):
|
||||||
MODEL_TENSOR.ROPE_FREQS,
|
MODEL_TENSOR.ROPE_FREQS,
|
||||||
MODEL_TENSOR.ATTN_ROT_EMBD,
|
MODEL_TENSOR.ATTN_ROT_EMBD,
|
||||||
],
|
],
|
||||||
MODEL_ARCH.BAILINGMOE: [
|
|
||||||
MODEL_TENSOR.ROPE_FREQS,
|
|
||||||
],
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#
|
#
|
||||||
|
@ -2055,8 +1566,6 @@ class PoolingType(IntEnum):
|
||||||
NONE = 0
|
NONE = 0
|
||||||
MEAN = 1
|
MEAN = 1
|
||||||
CLS = 2
|
CLS = 2
|
||||||
LAST = 3
|
|
||||||
RANK = 4
|
|
||||||
|
|
||||||
|
|
||||||
class GGMLQuantizationType(IntEnum):
|
class GGMLQuantizationType(IntEnum):
|
||||||
|
@ -2183,15 +1692,6 @@ def get_type(val: Any) -> GGUFValueType:
|
||||||
raise ValueError(f"Unknown type: {type(val)}")
|
raise ValueError(f"Unknown type: {type(val)}")
|
||||||
|
|
||||||
|
|
||||||
class VisionProjectorType:
|
|
||||||
GEMMA3 = "gemma3"
|
|
||||||
IDEFICS3 = "idefics3"
|
|
||||||
PIXTRAL = "pixtral"
|
|
||||||
QWEN2VL = "qwen2vl_merger"
|
|
||||||
QWEN25VL = "qwen2.5vl_merger"
|
|
||||||
INTERNVL = "internvl"
|
|
||||||
|
|
||||||
|
|
||||||
# Items here are (block size, type size)
|
# Items here are (block size, type size)
|
||||||
QK_K = 256
|
QK_K = 256
|
||||||
GGML_QUANT_SIZES: dict[GGMLQuantizationType, tuple[int, int]] = {
|
GGML_QUANT_SIZES: dict[GGMLQuantizationType, tuple[int, int]] = {
|
||||||
|
@ -2289,6 +1789,7 @@ class VisionProjectorType:
|
||||||
KEY_TOKENIZER_UNK_ID = Keys.Tokenizer.UNK_ID
|
KEY_TOKENIZER_UNK_ID = Keys.Tokenizer.UNK_ID
|
||||||
KEY_TOKENIZER_SEP_ID = Keys.Tokenizer.SEP_ID
|
KEY_TOKENIZER_SEP_ID = Keys.Tokenizer.SEP_ID
|
||||||
KEY_TOKENIZER_PAD_ID = Keys.Tokenizer.PAD_ID
|
KEY_TOKENIZER_PAD_ID = Keys.Tokenizer.PAD_ID
|
||||||
|
KEY_TOKENIZER_CLS_ID = Keys.Tokenizer.CLS_ID
|
||||||
KEY_TOKENIZER_MASK_ID = Keys.Tokenizer.MASK_ID
|
KEY_TOKENIZER_MASK_ID = Keys.Tokenizer.MASK_ID
|
||||||
KEY_TOKENIZER_HF_JSON = Keys.Tokenizer.HF_JSON
|
KEY_TOKENIZER_HF_JSON = Keys.Tokenizer.HF_JSON
|
||||||
KEY_TOKENIZER_RWKV = Keys.Tokenizer.RWKV
|
KEY_TOKENIZER_RWKV = Keys.Tokenizer.RWKV
|
||||||
|
|
|
@ -1,11 +0,0 @@
|
||||||
import importlib
|
|
||||||
import sys
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
||||||
|
|
||||||
# Compatibility for people trying to import gguf/gguf.py directly instead of as a package.
|
|
||||||
importlib.invalidate_caches()
|
|
||||||
import gguf # noqa: E402
|
|
||||||
|
|
||||||
importlib.reload(gguf)
|
|
|
@ -1,8 +1,11 @@
|
||||||
|
#
|
||||||
|
# GGUF file reading/modification support. For API usage information,
|
||||||
|
# please see the files scripts/ for some fairly simple examples.
|
||||||
|
#
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import sys
|
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
from typing import Any, Literal, NamedTuple, TypeVar, Union
|
from typing import Any, Literal, NamedTuple, TypeVar, Union
|
||||||
|
|
||||||
|
@ -12,6 +15,7 @@
|
||||||
from .quants import quant_shape_to_byte_shape
|
from .quants import quant_shape_to_byte_shape
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
# Allow running file in package as a script.
|
# Allow running file in package as a script.
|
||||||
|
@ -24,7 +28,6 @@
|
||||||
GGUF_VERSION,
|
GGUF_VERSION,
|
||||||
GGMLQuantizationType,
|
GGMLQuantizationType,
|
||||||
GGUFValueType,
|
GGUFValueType,
|
||||||
GGUFEndian,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
@ -50,52 +53,6 @@ class ReaderField(NamedTuple):
|
||||||
|
|
||||||
types: list[GGUFValueType] = []
|
types: list[GGUFValueType] = []
|
||||||
|
|
||||||
def contents(self, index_or_slice: int | slice = slice(None)) -> Any:
|
|
||||||
if self.types:
|
|
||||||
to_string = lambda x: str(x.tobytes(), encoding="utf-8") # noqa: E731
|
|
||||||
main_type = self.types[0]
|
|
||||||
|
|
||||||
if main_type == GGUFValueType.ARRAY:
|
|
||||||
sub_type = self.types[-1]
|
|
||||||
|
|
||||||
if sub_type == GGUFValueType.STRING:
|
|
||||||
indices = self.data[index_or_slice]
|
|
||||||
|
|
||||||
if isinstance(index_or_slice, int):
|
|
||||||
return to_string(self.parts[indices]) # type: ignore
|
|
||||||
else:
|
|
||||||
return [to_string(self.parts[idx]) for idx in indices] # type: ignore
|
|
||||||
else:
|
|
||||||
# FIXME: When/if _get_field_parts() support multi-dimensional arrays, this must do so too
|
|
||||||
|
|
||||||
# Check if it's unsafe to perform slice optimization on data
|
|
||||||
# if any(True for idx in self.data if len(self.parts[idx]) != 1):
|
|
||||||
# optim_slice = slice(None)
|
|
||||||
# else:
|
|
||||||
# optim_slice = index_or_slice
|
|
||||||
# index_or_slice = slice(None)
|
|
||||||
|
|
||||||
# if isinstance(optim_slice, int):
|
|
||||||
# return self.parts[self.data[optim_slice]].tolist()[0]
|
|
||||||
# else:
|
|
||||||
# return [pv for idx in self.data[optim_slice] for pv in self.parts[idx].tolist()][index_or_slice]
|
|
||||||
|
|
||||||
if isinstance(index_or_slice, int):
|
|
||||||
return self.parts[self.data[index_or_slice]].tolist()[0]
|
|
||||||
else:
|
|
||||||
return [
|
|
||||||
pv
|
|
||||||
for idx in self.data[index_or_slice]
|
|
||||||
for pv in self.parts[idx].tolist()
|
|
||||||
]
|
|
||||||
|
|
||||||
if main_type == GGUFValueType.STRING:
|
|
||||||
return to_string(self.parts[-1])
|
|
||||||
else:
|
|
||||||
return self.parts[-1].tolist()[0]
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
class ReaderTensor(NamedTuple):
|
class ReaderTensor(NamedTuple):
|
||||||
name: str
|
name: str
|
||||||
|
@ -146,23 +103,12 @@ def __init__(
|
||||||
# If we get 0 here that means it's (probably) a GGUF file created for
|
# If we get 0 here that means it's (probably) a GGUF file created for
|
||||||
# the opposite byte order of the machine this script is running on.
|
# the opposite byte order of the machine this script is running on.
|
||||||
self.byte_order = "S"
|
self.byte_order = "S"
|
||||||
temp_version = temp_version.view(
|
temp_version = temp_version.newbyteorder(self.byte_order)
|
||||||
temp_version.dtype.newbyteorder(self.byte_order)
|
|
||||||
)
|
|
||||||
version = temp_version[0]
|
version = temp_version[0]
|
||||||
if version not in READER_SUPPORTED_VERSIONS:
|
if version not in READER_SUPPORTED_VERSIONS:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Sorry, file appears to be version {version} which we cannot handle"
|
f"Sorry, file appears to be version {version} which we cannot handle"
|
||||||
)
|
)
|
||||||
if sys.byteorder == "little":
|
|
||||||
# Host is little endian
|
|
||||||
host_endian = GGUFEndian.LITTLE
|
|
||||||
swapped_endian = GGUFEndian.BIG
|
|
||||||
else:
|
|
||||||
# Sorry PDP or other weird systems that don't use BE or LE.
|
|
||||||
host_endian = GGUFEndian.BIG
|
|
||||||
swapped_endian = GGUFEndian.LITTLE
|
|
||||||
self.endianess = swapped_endian if self.byte_order == "S" else host_endian
|
|
||||||
self.fields: OrderedDict[str, ReaderField] = OrderedDict()
|
self.fields: OrderedDict[str, ReaderField] = OrderedDict()
|
||||||
self.tensors: list[ReaderTensor] = []
|
self.tensors: list[ReaderTensor] = []
|
||||||
offs += self._push_field(
|
offs += self._push_field(
|
||||||
|
@ -224,11 +170,9 @@ def _get(
|
||||||
itemsize = int(np.empty([], dtype=dtype).itemsize)
|
itemsize = int(np.empty([], dtype=dtype).itemsize)
|
||||||
end_offs = offset + itemsize * count
|
end_offs = offset + itemsize * count
|
||||||
arr = self.data[offset:end_offs].view(dtype=dtype)[:count]
|
arr = self.data[offset:end_offs].view(dtype=dtype)[:count]
|
||||||
return arr.view(
|
if override_order is None:
|
||||||
arr.dtype.newbyteorder(
|
return arr
|
||||||
self.byte_order if override_order is None else override_order
|
return arr.view(arr.dtype.newbyteorder(override_order))
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int:
|
def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int:
|
||||||
if field.name in self.fields:
|
if field.name in self.fields:
|
||||||
|
@ -274,7 +218,6 @@ def _get_field_parts(
|
||||||
offs += int(alen.nbytes)
|
offs += int(alen.nbytes)
|
||||||
aparts: list[npt.NDArray[Any]] = [raw_itype, alen]
|
aparts: list[npt.NDArray[Any]] = [raw_itype, alen]
|
||||||
data_idxs: list[int] = []
|
data_idxs: list[int] = []
|
||||||
# FIXME: Handle multi-dimensional arrays properly instead of flattening
|
|
||||||
for idx in range(alen[0]):
|
for idx in range(alen[0]):
|
||||||
curr_size, curr_parts, curr_idxs, curr_types = self._get_field_parts(
|
curr_size, curr_parts, curr_idxs, curr_types = self._get_field_parts(
|
||||||
offs, raw_itype[0]
|
offs, raw_itype[0]
|
||||||
|
|
|
@ -774,12 +774,6 @@ def add_key_length(self, length: int) -> None:
|
||||||
def add_value_length(self, length: int) -> None:
|
def add_value_length(self, length: int) -> None:
|
||||||
self.add_uint32(Keys.Attention.VALUE_LENGTH.format(arch=self.arch), length)
|
self.add_uint32(Keys.Attention.VALUE_LENGTH.format(arch=self.arch), length)
|
||||||
|
|
||||||
def add_key_length_mla(self, length: int) -> None:
|
|
||||||
self.add_uint32(Keys.Attention.KEY_LENGTH_MLA.format(arch=self.arch), length)
|
|
||||||
|
|
||||||
def add_value_length_mla(self, length: int) -> None:
|
|
||||||
self.add_uint32(Keys.Attention.VALUE_LENGTH_MLA.format(arch=self.arch), length)
|
|
||||||
|
|
||||||
def add_max_alibi_bias(self, bias: float) -> None:
|
def add_max_alibi_bias(self, bias: float) -> None:
|
||||||
self.add_float32(Keys.Attention.MAX_ALIBI_BIAS.format(arch=self.arch), bias)
|
self.add_float32(Keys.Attention.MAX_ALIBI_BIAS.format(arch=self.arch), bias)
|
||||||
|
|
||||||
|
@ -813,9 +807,6 @@ def add_expert_weights_norm(self, value: bool) -> None:
|
||||||
def add_expert_gating_func(self, value: ExpertGatingFuncType) -> None:
|
def add_expert_gating_func(self, value: ExpertGatingFuncType) -> None:
|
||||||
self.add_uint32(Keys.LLM.EXPERT_GATING_FUNC.format(arch=self.arch), value.value)
|
self.add_uint32(Keys.LLM.EXPERT_GATING_FUNC.format(arch=self.arch), value.value)
|
||||||
|
|
||||||
def add_moe_every_n_layers(self, value: int) -> None:
|
|
||||||
self.add_uint32(Keys.LLM.MOE_EVERY_N_LAYERS.format(arch=self.arch), value)
|
|
||||||
|
|
||||||
def add_swin_norm(self, value: bool) -> None:
|
def add_swin_norm(self, value: bool) -> None:
|
||||||
self.add_bool(Keys.LLM.SWIN_NORM.format(arch=self.arch), value)
|
self.add_bool(Keys.LLM.SWIN_NORM.format(arch=self.arch), value)
|
||||||
|
|
||||||
|
@ -837,14 +828,6 @@ def add_embedding_scale(self, value: float) -> None:
|
||||||
def add_wkv_head_size(self, size: int) -> None:
|
def add_wkv_head_size(self, size: int) -> None:
|
||||||
self.add_uint32(Keys.WKV.HEAD_SIZE.format(arch=self.arch), size)
|
self.add_uint32(Keys.WKV.HEAD_SIZE.format(arch=self.arch), size)
|
||||||
|
|
||||||
def add_token_shift_count(self, count: int) -> None:
|
|
||||||
self.add_uint32(Keys.LLM.TOKEN_SHIFT_COUNT.format(arch=self.arch), count)
|
|
||||||
|
|
||||||
def add_interleave_moe_layer_step(self, value: int) -> None:
|
|
||||||
self.add_uint32(
|
|
||||||
Keys.LLM.INTERLEAVE_MOE_LAYER_STEP.format(arch=self.arch), value
|
|
||||||
)
|
|
||||||
|
|
||||||
def add_layer_norm_eps(self, value: float) -> None:
|
def add_layer_norm_eps(self, value: float) -> None:
|
||||||
self.add_float32(Keys.Attention.LAYERNORM_EPS.format(arch=self.arch), value)
|
self.add_float32(Keys.Attention.LAYERNORM_EPS.format(arch=self.arch), value)
|
||||||
|
|
||||||
|
@ -866,20 +849,6 @@ def add_q_lora_rank(self, length: int) -> None:
|
||||||
def add_kv_lora_rank(self, length: int) -> None:
|
def add_kv_lora_rank(self, length: int) -> None:
|
||||||
self.add_uint32(Keys.Attention.KV_LORA_RANK.format(arch=self.arch), length)
|
self.add_uint32(Keys.Attention.KV_LORA_RANK.format(arch=self.arch), length)
|
||||||
|
|
||||||
def add_decay_lora_rank(self, length: int) -> None:
|
|
||||||
self.add_uint32(Keys.Attention.DECAY_LORA_RANK.format(arch=self.arch), length)
|
|
||||||
|
|
||||||
def add_iclr_lora_rank(self, length: int) -> None:
|
|
||||||
self.add_uint32(Keys.Attention.ICLR_LORA_RANK.format(arch=self.arch), length)
|
|
||||||
|
|
||||||
def add_value_residual_mix_lora_rank(self, length: int) -> None:
|
|
||||||
self.add_uint32(
|
|
||||||
Keys.Attention.VALUE_RESIDUAL_MIX_LORA_RANK.format(arch=self.arch), length
|
|
||||||
)
|
|
||||||
|
|
||||||
def add_gate_lora_rank(self, length: int) -> None:
|
|
||||||
self.add_uint32(Keys.Attention.GATE_LORA_RANK.format(arch=self.arch), length)
|
|
||||||
|
|
||||||
def add_relative_attn_buckets_count(self, value: int) -> None:
|
def add_relative_attn_buckets_count(self, value: int) -> None:
|
||||||
self.add_uint32(Keys.Attention.REL_BUCKETS_COUNT.format(arch=self.arch), value)
|
self.add_uint32(Keys.Attention.REL_BUCKETS_COUNT.format(arch=self.arch), value)
|
||||||
|
|
||||||
|
@ -974,6 +943,9 @@ def add_sep_token_id(self, id: int) -> None:
|
||||||
def add_pad_token_id(self, id: int) -> None:
|
def add_pad_token_id(self, id: int) -> None:
|
||||||
self.add_uint32(Keys.Tokenizer.PAD_ID, id)
|
self.add_uint32(Keys.Tokenizer.PAD_ID, id)
|
||||||
|
|
||||||
|
def add_cls_token_id(self, id: int) -> None:
|
||||||
|
self.add_uint32(Keys.Tokenizer.CLS_ID, id)
|
||||||
|
|
||||||
def add_mask_token_id(self, id: int) -> None:
|
def add_mask_token_id(self, id: int) -> None:
|
||||||
self.add_uint32(Keys.Tokenizer.MASK_ID, id)
|
self.add_uint32(Keys.Tokenizer.MASK_ID, id)
|
||||||
|
|
||||||
|
@ -1031,59 +1003,6 @@ def add_eot_token_id(self, id: int) -> None:
|
||||||
def add_eom_token_id(self, id: int) -> None:
|
def add_eom_token_id(self, id: int) -> None:
|
||||||
self.add_uint32(Keys.Tokenizer.EOM_ID, id)
|
self.add_uint32(Keys.Tokenizer.EOM_ID, id)
|
||||||
|
|
||||||
# for vision models
|
|
||||||
|
|
||||||
def add_vision_projection_dim(self, value: int) -> None:
|
|
||||||
self.add_uint32(Keys.ClipVision.PROJECTION_DIM, value)
|
|
||||||
|
|
||||||
def add_vision_has_vision_encoder(self, value: bool) -> None:
|
|
||||||
self.add_bool(Keys.ClipVision.HAS_VISION_ENCODER, value)
|
|
||||||
|
|
||||||
def add_vision_patch_size(self, value: int) -> None:
|
|
||||||
self.add_uint32(Keys.ClipVision.PATCH_SIZE, value)
|
|
||||||
|
|
||||||
def add_vision_embedding_length(self, value: int) -> None:
|
|
||||||
self.add_uint32(Keys.ClipVision.EMBEDDING_LENGTH, value)
|
|
||||||
|
|
||||||
def add_vision_feed_forward_length(self, value: int) -> None:
|
|
||||||
self.add_uint32(Keys.ClipVision.FEED_FORWARD_LENGTH, value)
|
|
||||||
|
|
||||||
def add_vision_block_count(self, value: int) -> None:
|
|
||||||
self.add_uint32(Keys.ClipVision.BLOCK_COUNT, value)
|
|
||||||
|
|
||||||
def add_vision_head_count(self, value: int) -> None:
|
|
||||||
self.add_uint32(Keys.ClipVision.Attention.HEAD_COUNT, value)
|
|
||||||
|
|
||||||
def add_vision_projector_type(self, value: str) -> None:
|
|
||||||
self.add_string(Keys.ClipVision.PROJECTOR_TYPE, value)
|
|
||||||
|
|
||||||
def add_vision_attention_layernorm_eps(self, value: float) -> None:
|
|
||||||
self.add_float32(Keys.ClipVision.Attention.LAYERNORM_EPS, value)
|
|
||||||
|
|
||||||
def add_vision_image_size(self, value: int) -> None:
|
|
||||||
self.add_uint32(Keys.ClipVision.IMAGE_SIZE, value)
|
|
||||||
|
|
||||||
def add_vision_image_mean(self, values: Sequence[float]) -> None:
|
|
||||||
self.add_array(Keys.ClipVision.IMAGE_MEAN, values)
|
|
||||||
|
|
||||||
def add_vision_image_std(self, values: Sequence[float]) -> None:
|
|
||||||
self.add_array(Keys.ClipVision.IMAGE_STD, values)
|
|
||||||
|
|
||||||
def add_vision_spatial_merge_size(self, value: int) -> None:
|
|
||||||
self.add_uint32(Keys.ClipVision.SPATIAL_MERGE_SIZE, value)
|
|
||||||
|
|
||||||
def add_vision_use_gelu(self, value: bool) -> None:
|
|
||||||
self.add_bool(Keys.ClipVision.USE_GELU, value)
|
|
||||||
|
|
||||||
def add_vision_use_silu(self, value: bool) -> None:
|
|
||||||
self.add_bool(Keys.ClipVision.USE_SILU, value)
|
|
||||||
|
|
||||||
def add_vision_projector_scale_factor(self, value: int) -> None:
|
|
||||||
self.add_uint32(Keys.ClipVision.Projector.SCALE_FACTOR, value)
|
|
||||||
|
|
||||||
def add_vision_n_wa_pattern(self, value: int) -> None:
|
|
||||||
self.add_uint32(Keys.ClipVision.N_WA_PATTERN, value)
|
|
||||||
|
|
||||||
def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes:
|
def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes:
|
||||||
pack_prefix = ""
|
pack_prefix = ""
|
||||||
if not skip_pack_prefix:
|
if not skip_pack_prefix:
|
||||||
|
|
|
@ -201,27 +201,6 @@ def wrapped_fn(*args, **kwargs):
|
||||||
return cls(
|
return cls(
|
||||||
meta=cls.eager_to_meta(res), args=args, kwargs=kwargs, func=fn
|
meta=cls.eager_to_meta(res), args=args, kwargs=kwargs, func=fn
|
||||||
)
|
)
|
||||||
elif isinstance(res, tuple) and all(
|
|
||||||
isinstance(t, cls._tensor_type) for t in res
|
|
||||||
):
|
|
||||||
# share the evaluation between lazy tuple elements
|
|
||||||
shared_args: list = [args, None]
|
|
||||||
|
|
||||||
def eager_tuple_element(a: list[Any], i: int = 0, /, **kw) -> LazyBase:
|
|
||||||
assert len(a) == 2
|
|
||||||
if a[1] is None:
|
|
||||||
a[1] = fn(*a[0], **kw)
|
|
||||||
return a[1][i]
|
|
||||||
|
|
||||||
return tuple(
|
|
||||||
cls(
|
|
||||||
meta=cls.eager_to_meta(res[i]),
|
|
||||||
args=(shared_args, i),
|
|
||||||
kwargs=kwargs,
|
|
||||||
func=eager_tuple_element,
|
|
||||||
)
|
|
||||||
for i in range(len(res))
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
del res # not needed
|
del res # not needed
|
||||||
# non-tensor return likely relies on the contents of the args
|
# non-tensor return likely relies on the contents of the args
|
||||||
|
|
|
@ -160,32 +160,12 @@ def load_model_card(model_path: Optional[Path] = None) -> dict[str, Any]:
|
||||||
if not model_card_path.is_file():
|
if not model_card_path.is_file():
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
# The model card metadata is assumed to always be in YAML (frontmatter)
|
# The model card metadata is assumed to always be in YAML
|
||||||
# ref: https://github.com/huggingface/transformers/blob/a5c642fe7a1f25d3bdcd76991443ba6ff7ee34b2/src/transformers/modelcard.py#L468-L473
|
# ref: https://github.com/huggingface/transformers/blob/a5c642fe7a1f25d3bdcd76991443ba6ff7ee34b2/src/transformers/modelcard.py#L468-L473
|
||||||
yaml_content: str = ""
|
|
||||||
with open(model_card_path, "r", encoding="utf-8") as f:
|
with open(model_card_path, "r", encoding="utf-8") as f:
|
||||||
content = f.read()
|
if f.readline() == "---\n":
|
||||||
lines = content.splitlines()
|
raw = f.read().partition("---\n")[0]
|
||||||
lines_yaml = []
|
data = yaml.safe_load(raw)
|
||||||
if len(lines) == 0:
|
|
||||||
# Empty file
|
|
||||||
return {}
|
|
||||||
if len(lines) > 0 and lines[0] != "---":
|
|
||||||
# No frontmatter
|
|
||||||
return {}
|
|
||||||
for line in lines[1:]:
|
|
||||||
if line == "---":
|
|
||||||
break # End of frontmatter
|
|
||||||
else:
|
|
||||||
lines_yaml.append(line)
|
|
||||||
yaml_content = "\n".join(lines_yaml) + "\n"
|
|
||||||
|
|
||||||
# Quick hack to fix the Norway problem
|
|
||||||
# https://hitchdev.com/strictyaml/why/implicit-typing-removed/
|
|
||||||
yaml_content = yaml_content.replace("- no\n", '- "no"\n')
|
|
||||||
|
|
||||||
if yaml_content:
|
|
||||||
data = yaml.safe_load(yaml_content)
|
|
||||||
if isinstance(data, dict):
|
if isinstance(data, dict):
|
||||||
return data
|
return data
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -13,7 +13,7 @@ class TensorNameMap:
|
||||||
"transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx jais exaone
|
"transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx jais exaone
|
||||||
"transformer.word_embeddings", # falcon
|
"transformer.word_embeddings", # falcon
|
||||||
"word_embeddings", # bloom
|
"word_embeddings", # bloom
|
||||||
"model.embed_tokens", # llama-hf nemotron olmoe olmo2 rwkv6qwen2 glm4-0414
|
"model.embed_tokens", # llama-hf nemotron olmoe olmo2
|
||||||
"tok_embeddings", # llama-pth
|
"tok_embeddings", # llama-pth
|
||||||
"embeddings.word_embeddings", # bert nomic-bert
|
"embeddings.word_embeddings", # bert nomic-bert
|
||||||
"language_model.embedding.word_embeddings", # persimmon
|
"language_model.embedding.word_embeddings", # persimmon
|
||||||
|
@ -27,10 +27,7 @@ class TensorNameMap:
|
||||||
"embedding.word_embeddings", # chatglm
|
"embedding.word_embeddings", # chatglm
|
||||||
"transformer.token_embeddings", # openelm
|
"transformer.token_embeddings", # openelm
|
||||||
"shared", # t5
|
"shared", # t5
|
||||||
"rwkv.embeddings", # rwkv6
|
"rwkv.embeddings", # rwkv
|
||||||
"model.embeddings", # rwkv7
|
|
||||||
"model.word_embeddings", # bailingmoe
|
|
||||||
"language_model.model.embed_tokens", # llama4
|
|
||||||
),
|
),
|
||||||
# Token type embeddings
|
# Token type embeddings
|
||||||
MODEL_TENSOR.TOKEN_TYPES: (
|
MODEL_TENSOR.TOKEN_TYPES: (
|
||||||
|
@ -43,9 +40,6 @@ class TensorNameMap:
|
||||||
"emb_ln", # nomic-bert
|
"emb_ln", # nomic-bert
|
||||||
"transformer.norm", # openelm
|
"transformer.norm", # openelm
|
||||||
"rwkv.blocks.0.pre_ln", # rwkv
|
"rwkv.blocks.0.pre_ln", # rwkv
|
||||||
"rwkv.blocks.0.pre_ln", # rwkv6
|
|
||||||
"model.pre_ln", # rwkv7
|
|
||||||
"model.layers.0.pre_norm", # rwkv7
|
|
||||||
"backbone.norm", # wavtokenizer
|
"backbone.norm", # wavtokenizer
|
||||||
),
|
),
|
||||||
# Position embeddings
|
# Position embeddings
|
||||||
|
@ -57,20 +51,19 @@ class TensorNameMap:
|
||||||
# Output
|
# Output
|
||||||
MODEL_TENSOR.OUTPUT: (
|
MODEL_TENSOR.OUTPUT: (
|
||||||
"embed_out", # gptneox
|
"embed_out", # gptneox
|
||||||
"lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone olmoe olmo2 phimoe
|
"lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone olmoe olmo2
|
||||||
"output", # llama-pth bloom internlm2
|
"output", # llama-pth bloom internlm2
|
||||||
"word_embeddings_for_head", # persimmon
|
"word_embeddings_for_head", # persimmon
|
||||||
"lm_head.linear", # phi2
|
"lm_head.linear", # phi2
|
||||||
"output_layer", # chatglm
|
"output_layer", # chatglm
|
||||||
"head", # rwkv
|
"head", # rwkv
|
||||||
"head.out", # wavtokenizer
|
"head.out", # wavtokenizer
|
||||||
"lm_head", # llama4
|
|
||||||
),
|
),
|
||||||
# Output norm
|
# Output norm
|
||||||
MODEL_TENSOR.OUTPUT_NORM: (
|
MODEL_TENSOR.OUTPUT_NORM: (
|
||||||
"gpt_neox.final_layer_norm", # gptneox
|
"gpt_neox.final_layer_norm", # gptneox
|
||||||
"transformer.ln_f", # gpt2 gpt-j falcon jais exaone
|
"transformer.ln_f", # gpt2 gpt-j falcon jais exaone
|
||||||
"model.norm", # llama-hf baichuan internlm2 olmoe olmo2 phimoe
|
"model.norm", # llama-hf baichuan internlm2 olmoe olmo2
|
||||||
"norm", # llama-pth
|
"norm", # llama-pth
|
||||||
"transformer.norm_f", # mpt dbrx
|
"transformer.norm_f", # mpt dbrx
|
||||||
"ln_f", # refact bloom qwen gpt2
|
"ln_f", # refact bloom qwen gpt2
|
||||||
|
@ -83,10 +76,8 @@ class TensorNameMap:
|
||||||
"encoder.final_layernorm", # chatglm
|
"encoder.final_layernorm", # chatglm
|
||||||
"transformer.norm", # openelm
|
"transformer.norm", # openelm
|
||||||
"model.norm", # nemotron
|
"model.norm", # nemotron
|
||||||
"rwkv.ln_out", # rwkv6
|
"rwkv.ln_out", # rwkv
|
||||||
"model.ln_out", # rwkv7
|
|
||||||
"backbone.final_layer_norm", # wavtokenizer
|
"backbone.final_layer_norm", # wavtokenizer
|
||||||
"model.norm", # llama4
|
|
||||||
),
|
),
|
||||||
# Rope frequencies
|
# Rope frequencies
|
||||||
MODEL_TENSOR.ROPE_FREQS: (
|
MODEL_TENSOR.ROPE_FREQS: (
|
||||||
|
@ -107,7 +98,7 @@ class TensorNameMap:
|
||||||
"transformer.h.{bid}.input_layernorm", # falcon7b
|
"transformer.h.{bid}.input_layernorm", # falcon7b
|
||||||
"h.{bid}.input_layernorm", # bloom
|
"h.{bid}.input_layernorm", # bloom
|
||||||
"transformer.h.{bid}.ln_mlp", # falcon40b
|
"transformer.h.{bid}.ln_mlp", # falcon40b
|
||||||
"model.layers.{bid}.input_layernorm", # llama-hf nemotron olmoe phimoe
|
"model.layers.{bid}.input_layernorm", # llama-hf nemotron olmoe
|
||||||
"layers.{bid}.attention_norm", # llama-pth
|
"layers.{bid}.attention_norm", # llama-pth
|
||||||
"language_model.encoder.layers.{bid}.input_layernorm", # persimmon
|
"language_model.encoder.layers.{bid}.input_layernorm", # persimmon
|
||||||
"model.layers.{bid}.ln1", # yi
|
"model.layers.{bid}.ln1", # yi
|
||||||
|
@ -121,16 +112,13 @@ class TensorNameMap:
|
||||||
"transformer.blocks.{bid}.norm_attn_norm.norm_1", # dbrx
|
"transformer.blocks.{bid}.norm_attn_norm.norm_1", # dbrx
|
||||||
"encoder.layers.{bid}.input_layernorm", # chatglm
|
"encoder.layers.{bid}.input_layernorm", # chatglm
|
||||||
"transformer.layers.{bid}.attn_norm", # openelm
|
"transformer.layers.{bid}.attn_norm", # openelm
|
||||||
"rwkv.blocks.{bid}.ln1", # rwkv6
|
"rwkv.blocks.{bid}.ln1", # rwkv
|
||||||
"model.layers.{bid}.ln1", # rwkv7
|
|
||||||
"model.layers.{bid}.input_layernorm", # llama4
|
|
||||||
),
|
),
|
||||||
# Attention norm 2
|
# Attention norm 2
|
||||||
MODEL_TENSOR.ATTN_NORM_2: (
|
MODEL_TENSOR.ATTN_NORM_2: (
|
||||||
"transformer.h.{bid}.ln_attn", # falcon40b
|
"transformer.h.{bid}.ln_attn", # falcon40b
|
||||||
"encoder.layer.{bid}.layer_norm_1", # jina-v2-code
|
"encoder.layer.{bid}.layer_norm_1", # jina-v2-code
|
||||||
"rwkv.blocks.{bid}.ln2", # rwkv6
|
"rwkv.blocks.{bid}.ln2", # rwkv
|
||||||
"model.layers.{bid}.ln2", # rwkv7
|
|
||||||
),
|
),
|
||||||
# Attention query-key-value
|
# Attention query-key-value
|
||||||
MODEL_TENSOR.ATTN_QKV: (
|
MODEL_TENSOR.ATTN_QKV: (
|
||||||
|
@ -151,7 +139,7 @@ class TensorNameMap:
|
||||||
),
|
),
|
||||||
# Attention query
|
# Attention query
|
||||||
MODEL_TENSOR.ATTN_Q: (
|
MODEL_TENSOR.ATTN_Q: (
|
||||||
"model.layers.{bid}.self_attn.q_proj", # llama-hf nemotron olmoe olmo2 phimoe
|
"model.layers.{bid}.self_attn.q_proj", # llama-hf nemotron olmoe olmo2
|
||||||
"model.layers.{bid}.self_attn.q_proj_no_perm", # llama-custom
|
"model.layers.{bid}.self_attn.q_proj_no_perm", # llama-custom
|
||||||
"layers.{bid}.attention.wq", # llama-pth
|
"layers.{bid}.attention.wq", # llama-pth
|
||||||
"encoder.layer.{bid}.attention.self.query", # bert
|
"encoder.layer.{bid}.attention.self.query", # bert
|
||||||
|
@ -160,11 +148,10 @@ class TensorNameMap:
|
||||||
"model.layers.{bid}.attention.wq", # internlm2
|
"model.layers.{bid}.attention.wq", # internlm2
|
||||||
"transformer.decoder_layer.{bid}.multi_head_attention.query", # Grok
|
"transformer.decoder_layer.{bid}.multi_head_attention.query", # Grok
|
||||||
"transformer.h.{bid}.attn.attention.q_proj", # exaone
|
"transformer.h.{bid}.attn.attention.q_proj", # exaone
|
||||||
"model.layers.{bid}.self_attn.q_proj", # llama4
|
|
||||||
),
|
),
|
||||||
# Attention key
|
# Attention key
|
||||||
MODEL_TENSOR.ATTN_K: (
|
MODEL_TENSOR.ATTN_K: (
|
||||||
"model.layers.{bid}.self_attn.k_proj", # llama-hf nemotron olmoe olmo2 phimoe
|
"model.layers.{bid}.self_attn.k_proj", # llama-hf nemotron olmoe olmo2
|
||||||
"model.layers.{bid}.self_attn.k_proj_no_perm", # llama-custom
|
"model.layers.{bid}.self_attn.k_proj_no_perm", # llama-custom
|
||||||
"layers.{bid}.attention.wk", # llama-pth
|
"layers.{bid}.attention.wk", # llama-pth
|
||||||
"encoder.layer.{bid}.attention.self.key", # bert
|
"encoder.layer.{bid}.attention.self.key", # bert
|
||||||
|
@ -174,11 +161,10 @@ class TensorNameMap:
|
||||||
"model.layers.{bid}.attention.wk", # internlm2
|
"model.layers.{bid}.attention.wk", # internlm2
|
||||||
"transformer.decoder_layer.{bid}.multi_head_attention.key", # Grok
|
"transformer.decoder_layer.{bid}.multi_head_attention.key", # Grok
|
||||||
"transformer.h.{bid}.attn.attention.k_proj", # exaone
|
"transformer.h.{bid}.attn.attention.k_proj", # exaone
|
||||||
"model.layers.{bid}.self_attn.k_proj", # llama4
|
|
||||||
),
|
),
|
||||||
# Attention value
|
# Attention value
|
||||||
MODEL_TENSOR.ATTN_V: (
|
MODEL_TENSOR.ATTN_V: (
|
||||||
"model.layers.{bid}.self_attn.v_proj", # llama-hf nemotron olmoe olmo2 phimoe
|
"model.layers.{bid}.self_attn.v_proj", # llama-hf nemotron olmoe olmo2
|
||||||
"layers.{bid}.attention.wv", # llama-pth
|
"layers.{bid}.attention.wv", # llama-pth
|
||||||
"encoder.layer.{bid}.attention.self.value", # bert
|
"encoder.layer.{bid}.attention.self.value", # bert
|
||||||
"transformer.h.{bid}.attn.v_proj", # gpt-j
|
"transformer.h.{bid}.attn.v_proj", # gpt-j
|
||||||
|
@ -187,7 +173,6 @@ class TensorNameMap:
|
||||||
"model.layers.{bid}.attention.wv", # internlm2
|
"model.layers.{bid}.attention.wv", # internlm2
|
||||||
"transformer.decoder_layer.{bid}.multi_head_attention.value", # Grok
|
"transformer.decoder_layer.{bid}.multi_head_attention.value", # Grok
|
||||||
"transformer.h.{bid}.attn.attention.v_proj", # exaone
|
"transformer.h.{bid}.attn.attention.v_proj", # exaone
|
||||||
"model.layers.{bid}.self_attn.v_proj", # llama4
|
|
||||||
),
|
),
|
||||||
# Attention output
|
# Attention output
|
||||||
MODEL_TENSOR.ATTN_OUT: (
|
MODEL_TENSOR.ATTN_OUT: (
|
||||||
|
@ -196,7 +181,7 @@ class TensorNameMap:
|
||||||
"transformer.blocks.{bid}.attn.out_proj", # mpt
|
"transformer.blocks.{bid}.attn.out_proj", # mpt
|
||||||
"transformer.h.{bid}.self_attention.dense", # falcon
|
"transformer.h.{bid}.self_attention.dense", # falcon
|
||||||
"h.{bid}.self_attention.dense", # bloom
|
"h.{bid}.self_attention.dense", # bloom
|
||||||
"model.layers.{bid}.self_attn.o_proj", # llama-hf nemotron olmoe olmo2 phimoe
|
"model.layers.{bid}.self_attn.o_proj", # llama-hf nemotron olmoe olmo2
|
||||||
"model.layers.{bid}.self_attn.linear_attn", # deci
|
"model.layers.{bid}.self_attn.linear_attn", # deci
|
||||||
"layers.{bid}.attention.wo", # llama-pth
|
"layers.{bid}.attention.wo", # llama-pth
|
||||||
"encoder.layer.{bid}.attention.output.dense", # bert
|
"encoder.layer.{bid}.attention.output.dense", # bert
|
||||||
|
@ -213,7 +198,6 @@ class TensorNameMap:
|
||||||
"encoder.layers.{bid}.self_attention.dense", # chatglm
|
"encoder.layers.{bid}.self_attention.dense", # chatglm
|
||||||
"transformer.layers.{bid}.attn.out_proj", # openelm
|
"transformer.layers.{bid}.attn.out_proj", # openelm
|
||||||
"transformer.h.{bid}.attn.attention.out_proj", # exaone
|
"transformer.h.{bid}.attn.attention.out_proj", # exaone
|
||||||
"model.layers.{bid}.self_attn.o_proj", # llama4
|
|
||||||
),
|
),
|
||||||
# Attention output norm
|
# Attention output norm
|
||||||
MODEL_TENSOR.ATTN_OUT_NORM: (
|
MODEL_TENSOR.ATTN_OUT_NORM: (
|
||||||
|
@ -223,8 +207,7 @@ class TensorNameMap:
|
||||||
"transformer.blocks.{bid}.norm_attn_norm.norm_2", # dbrx
|
"transformer.blocks.{bid}.norm_attn_norm.norm_2", # dbrx
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.ATTN_POST_NORM: (
|
MODEL_TENSOR.ATTN_POST_NORM: (
|
||||||
"model.layers.{bid}.post_attention_layernorm", # gemma2 olmo2 # ge
|
"model.layers.{bid}.post_attention_layernorm", # gemma2 olmo2
|
||||||
"model.layers.{bid}.post_self_attn_layernorm", # glm-4-0414
|
|
||||||
),
|
),
|
||||||
# Rotary embeddings
|
# Rotary embeddings
|
||||||
MODEL_TENSOR.ATTN_ROT_EMBD: (
|
MODEL_TENSOR.ATTN_ROT_EMBD: (
|
||||||
|
@ -239,7 +222,7 @@ class TensorNameMap:
|
||||||
"transformer.h.{bid}.ln_2", # gpt2 refact qwen jais exaone
|
"transformer.h.{bid}.ln_2", # gpt2 refact qwen jais exaone
|
||||||
"h.{bid}.post_attention_layernorm", # bloom
|
"h.{bid}.post_attention_layernorm", # bloom
|
||||||
"transformer.blocks.{bid}.norm_2", # mpt
|
"transformer.blocks.{bid}.norm_2", # mpt
|
||||||
"model.layers.{bid}.post_attention_layernorm", # llama-hf nemotron olmoe phimoe
|
"model.layers.{bid}.post_attention_layernorm", # llama-hf nemotron olmoe
|
||||||
"layers.{bid}.ffn_norm", # llama-pth
|
"layers.{bid}.ffn_norm", # llama-pth
|
||||||
"language_model.encoder.layers.{bid}.post_attention_layernorm", # persimmon
|
"language_model.encoder.layers.{bid}.post_attention_layernorm", # persimmon
|
||||||
"model.layers.{bid}.ln2", # yi
|
"model.layers.{bid}.ln2", # yi
|
||||||
|
@ -248,7 +231,6 @@ class TensorNameMap:
|
||||||
"transformer.decoder_layer.{bid}.rms_norm_2", # Grok
|
"transformer.decoder_layer.{bid}.rms_norm_2", # Grok
|
||||||
"encoder.layers.{bid}.post_attention_layernorm", # chatglm
|
"encoder.layers.{bid}.post_attention_layernorm", # chatglm
|
||||||
"transformer.layers.{bid}.ffn_norm", # openelm
|
"transformer.layers.{bid}.ffn_norm", # openelm
|
||||||
"model.layers.{bid}.post_attention_layernorm", # llama4
|
|
||||||
),
|
),
|
||||||
# Post feed-forward norm
|
# Post feed-forward norm
|
||||||
MODEL_TENSOR.FFN_PRE_NORM: (
|
MODEL_TENSOR.FFN_PRE_NORM: (
|
||||||
|
@ -257,17 +239,14 @@ class TensorNameMap:
|
||||||
# Post feed-forward norm
|
# Post feed-forward norm
|
||||||
MODEL_TENSOR.FFN_POST_NORM: (
|
MODEL_TENSOR.FFN_POST_NORM: (
|
||||||
"model.layers.{bid}.post_feedforward_layernorm", # gemma2 olmo2
|
"model.layers.{bid}.post_feedforward_layernorm", # gemma2 olmo2
|
||||||
"model.layers.{bid}.post_mlp_layernorm", # glm-4-0414
|
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.FFN_GATE_INP: (
|
MODEL_TENSOR.FFN_GATE_INP: (
|
||||||
"layers.{bid}.feed_forward.gate", # mixtral
|
"layers.{bid}.feed_forward.gate", # mixtral
|
||||||
"model.layers.{bid}.block_sparse_moe.gate", # mixtral phimoe
|
"model.layers.{bid}.block_sparse_moe.gate", # mixtral
|
||||||
"model.layers.{bid}.mlp.gate", # qwen2moe olmoe
|
"model.layers.{bid}.mlp.gate", # qwen2moe olmoe
|
||||||
"transformer.decoder_layer.{bid}.router", # Grok
|
"transformer.decoder_layer.{bid}.router", # Grok
|
||||||
"transformer.blocks.{bid}.ffn.router.layer", # dbrx
|
"transformer.blocks.{bid}.ffn.router.layer", # dbrx
|
||||||
"model.layers.{bid}.block_sparse_moe.router.layer", # granitemoe
|
"model.layers.{bid}.block_sparse_moe.router.layer", # granitemoe
|
||||||
"model.layers.{bid}.feed_forward.router", # llama4
|
|
||||||
"encoder.layers.{bid}.mlp.router.layer", # nomic-bert-moe
|
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.FFN_GATE_INP_SHEXP: (
|
MODEL_TENSOR.FFN_GATE_INP_SHEXP: (
|
||||||
"model.layers.{bid}.mlp.shared_expert_gate", # qwen2moe
|
"model.layers.{bid}.mlp.shared_expert_gate", # qwen2moe
|
||||||
|
@ -293,31 +272,25 @@ class TensorNameMap:
|
||||||
"h.{bid}.mlp.c_fc", # gpt2
|
"h.{bid}.mlp.c_fc", # gpt2
|
||||||
"transformer.h.{bid}.mlp.fc1", # phi2
|
"transformer.h.{bid}.mlp.fc1", # phi2
|
||||||
"model.layers.{bid}.mlp.fc1", # phi2
|
"model.layers.{bid}.mlp.fc1", # phi2
|
||||||
"model.layers.{bid}.mlp.gate_up_proj", # phi3 glm-4-0414
|
"model.layers.{bid}.mlp.gate_up_proj", # phi3
|
||||||
"model.layers.layers.{bid}.mlp.up_proj", # plamo
|
"model.layers.layers.{bid}.mlp.up_proj", # plamo
|
||||||
"model.layers.{bid}.feed_forward.w3", # internlm2
|
"model.layers.{bid}.feed_forward.w3", # internlm2
|
||||||
"encoder.layers.{bid}.mlp.fc11", # nomic-bert
|
"encoder.layers.{bid}.mlp.fc11", # nomic-bert
|
||||||
"encoder.layers.{bid}.mlp.fc1", # nomic-bert-moe
|
|
||||||
"model.layers.{bid}.mlp.c_fc", # starcoder2
|
"model.layers.{bid}.mlp.c_fc", # starcoder2
|
||||||
"encoder.layer.{bid}.mlp.gated_layers_v", # jina-bert-v2
|
"encoder.layer.{bid}.mlp.gated_layers_v", # jina-bert-v2
|
||||||
"model.layers.{bid}.residual_mlp.w3", # arctic
|
"model.layers.{bid}.residual_mlp.w3", # arctic
|
||||||
"encoder.layers.{bid}.mlp.dense_h_to_4h", # chatglm
|
"encoder.layers.{bid}.mlp.dense_h_to_4h", # chatglm
|
||||||
"transformer.h.{bid}.mlp.c_fc_1", # exaone
|
"transformer.h.{bid}.mlp.c_fc_1", # exaone
|
||||||
"model.layers.{bid}.feed_forward.up_proj", # llama4
|
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.FFN_UP_EXP: (
|
MODEL_TENSOR.FFN_UP_EXP: (
|
||||||
"layers.{bid}.feed_forward.experts.w3", # mixtral (merged)
|
"layers.{bid}.feed_forward.experts.w3", # mixtral (merged)
|
||||||
"transformer.decoder_layer.{bid}.moe.linear_v", # Grok (merged)
|
"transformer.decoder_layer.{bid}.moe.linear_v", # Grok (merged)
|
||||||
"transformer.blocks.{bid}.ffn.experts.mlp.v1", # dbrx
|
"transformer.blocks.{bid}.ffn.experts.mlp.v1", # dbrx
|
||||||
"model.layers.{bid}.mlp.experts.up_proj", # qwen2moe olmoe (merged)
|
"model.layers.{bid}.mlp.experts.up_proj", # qwen2moe olmoe (merged)
|
||||||
"model.layers.{bid}.block_sparse_moe.experts.w3", # phimoe (merged)
|
|
||||||
"model.layers.{bid}.feed_forward.experts.up_proj", # llama4
|
|
||||||
"encoder.layers.{bid}.mlp.experts.mlp.w1", # nomic-bert-moe
|
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.FFN_UP_SHEXP: (
|
MODEL_TENSOR.FFN_UP_SHEXP: (
|
||||||
"model.layers.{bid}.mlp.shared_expert.up_proj", # qwen2moe
|
"model.layers.{bid}.mlp.shared_expert.up_proj", # qwen2moe
|
||||||
"model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek deepseek2
|
"model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek deepseek2
|
||||||
"model.layers.{bid}.feed_forward.shared_expert.up_proj", # llama4
|
|
||||||
),
|
),
|
||||||
# AWQ-activation gate
|
# AWQ-activation gate
|
||||||
MODEL_TENSOR.FFN_ACT: ("transformer.blocks.{bid}.ffn.act",), # mpt
|
MODEL_TENSOR.FFN_ACT: ("transformer.blocks.{bid}.ffn.act",), # mpt
|
||||||
|
@ -334,20 +307,16 @@ class TensorNameMap:
|
||||||
"transformer.h.{bid}.mlp.linear_1", # refact
|
"transformer.h.{bid}.mlp.linear_1", # refact
|
||||||
"model.layers.{bid}.residual_mlp.w1", # arctic
|
"model.layers.{bid}.residual_mlp.w1", # arctic
|
||||||
"transformer.h.{bid}.mlp.c_fc_0", # exaone
|
"transformer.h.{bid}.mlp.c_fc_0", # exaone
|
||||||
"model.layers.{bid}.feed_forward.gate_proj", # llama4
|
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.FFN_GATE_EXP: (
|
MODEL_TENSOR.FFN_GATE_EXP: (
|
||||||
"layers.{bid}.feed_forward.experts.w1", # mixtral (merged)
|
"layers.{bid}.feed_forward.experts.w1", # mixtral (merged)
|
||||||
"transformer.decoder_layer.{bid}.moe.linear", # Grok (merged)
|
"transformer.decoder_layer.{bid}.moe.linear", # Grok (merged)
|
||||||
"transformer.blocks.{bid}.ffn.experts.mlp.w1", # dbrx
|
"transformer.blocks.{bid}.ffn.experts.mlp.w1", # dbrx
|
||||||
"model.layers.{bid}.mlp.experts.gate_proj", # qwen2moe olmoe (merged)
|
"model.layers.{bid}.mlp.experts.gate_proj", # qwen2moe olmoe (merged)
|
||||||
"model.layers.{bid}.block_sparse_moe.experts.w1", # phimoe (merged)
|
|
||||||
"model.layers.{bid}.feed_forward.experts.gate_proj", # llama4
|
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.FFN_GATE_SHEXP: (
|
MODEL_TENSOR.FFN_GATE_SHEXP: (
|
||||||
"model.layers.{bid}.mlp.shared_expert.gate_proj", # qwen2moe
|
"model.layers.{bid}.mlp.shared_expert.gate_proj", # qwen2moe
|
||||||
"model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek deepseek2
|
"model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek deepseek2
|
||||||
"model.layers.{bid}.feed_forward.shared_expert.gate_proj", # llama4
|
|
||||||
),
|
),
|
||||||
# Feed-forward down
|
# Feed-forward down
|
||||||
MODEL_TENSOR.FFN_DOWN: (
|
MODEL_TENSOR.FFN_DOWN: (
|
||||||
|
@ -375,7 +344,6 @@ class TensorNameMap:
|
||||||
"encoder.layer.{bid}.mlp.down_layer", # jina-bert-v2
|
"encoder.layer.{bid}.mlp.down_layer", # jina-bert-v2
|
||||||
"encoder.layers.{bid}.mlp.dense_4h_to_h", # chatglm
|
"encoder.layers.{bid}.mlp.dense_4h_to_h", # chatglm
|
||||||
"model.layers.h.{bid}.mlp.c_proj", # exaone
|
"model.layers.h.{bid}.mlp.c_proj", # exaone
|
||||||
"model.layers.{bid}.feed_forward.down_proj", # llama4
|
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.FFN_DOWN_EXP: (
|
MODEL_TENSOR.FFN_DOWN_EXP: (
|
||||||
"layers.{bid}.feed_forward.experts.w2", # mixtral (merged)
|
"layers.{bid}.feed_forward.experts.w2", # mixtral (merged)
|
||||||
|
@ -383,15 +351,10 @@ class TensorNameMap:
|
||||||
"transformer.blocks.{bid}.ffn.experts.mlp.w2", # dbrx
|
"transformer.blocks.{bid}.ffn.experts.mlp.w2", # dbrx
|
||||||
"model.layers.{bid}.mlp.experts.down_proj", # qwen2moe olmoe (merged)
|
"model.layers.{bid}.mlp.experts.down_proj", # qwen2moe olmoe (merged)
|
||||||
"model.layers.{bid}.block_sparse_moe.output_linear", # granitemoe
|
"model.layers.{bid}.block_sparse_moe.output_linear", # granitemoe
|
||||||
"model.layers.{bid}.block_sparse_moe.experts.w2", # phimoe (merged)
|
|
||||||
"model.layers.{bid}.feed_forward.experts.down_proj", # llama4
|
|
||||||
"encoder.layers.{bid}.mlp.experts.mlp.w2", # nomic-bert-moe
|
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.FFN_DOWN_SHEXP: (
|
MODEL_TENSOR.FFN_DOWN_SHEXP: (
|
||||||
"model.layers.{bid}.mlp.shared_expert.down_proj", # qwen2moe
|
"model.layers.{bid}.mlp.shared_expert.down_proj", # qwen2moe
|
||||||
"model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek deepseek2
|
"model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek deepseek2
|
||||||
"model.layers.{bid}.feed_forward.shared_expert.down_proj", # llama4
|
|
||||||
"model.layers.{bid}.shared_mlp.output_linear", # granitemoe
|
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.ATTN_Q_NORM: (
|
MODEL_TENSOR.ATTN_Q_NORM: (
|
||||||
"language_model.encoder.layers.{bid}.self_attention.q_layernorm",
|
"language_model.encoder.layers.{bid}.self_attention.q_layernorm",
|
||||||
|
@ -447,116 +410,62 @@ class TensorNameMap:
|
||||||
"model.layers.{bid}.out_proj",
|
"model.layers.{bid}.out_proj",
|
||||||
"backbone.layers.{bid}.mixer.out_proj",
|
"backbone.layers.{bid}.mixer.out_proj",
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.TIME_MIX_W0: ("model.layers.{bid}.attention.w0",), # rwkv7
|
|
||||||
MODEL_TENSOR.TIME_MIX_W1: (
|
MODEL_TENSOR.TIME_MIX_W1: (
|
||||||
"rwkv.blocks.{bid}.attention.time_maa_w1", # rwkv6
|
"rwkv.blocks.{bid}.attention.time_maa_w1", # rwkv v6
|
||||||
"model.layers.{bid}.self_attn.time_maa_w1", # rwkv6qwen2
|
|
||||||
"model.layers.{bid}.attention.w1", # rwkv7
|
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.TIME_MIX_W2: (
|
MODEL_TENSOR.TIME_MIX_W2: (
|
||||||
"rwkv.blocks.{bid}.attention.time_maa_w2", # rwkv6
|
"rwkv.blocks.{bid}.attention.time_maa_w2", # rwkv v6
|
||||||
"model.layers.{bid}.self_attn.time_maa_w2", # rwkv6qwen2
|
|
||||||
"model.layers.{bid}.attention.w2", # rwkv7
|
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.TIME_MIX_A0: ("model.layers.{bid}.attention.a0",), # rwkv7
|
|
||||||
MODEL_TENSOR.TIME_MIX_A1: ("model.layers.{bid}.attention.a1",), # rwkv7
|
|
||||||
MODEL_TENSOR.TIME_MIX_A2: ("model.layers.{bid}.attention.a2",), # rwkv7
|
|
||||||
MODEL_TENSOR.TIME_MIX_V0: ("model.layers.{bid}.attention.v0",), # rwkv7
|
|
||||||
MODEL_TENSOR.TIME_MIX_V1: ("model.layers.{bid}.attention.v1",), # rwkv7
|
|
||||||
MODEL_TENSOR.TIME_MIX_V2: ("model.layers.{bid}.attention.v2",), # rwkv7
|
|
||||||
MODEL_TENSOR.TIME_MIX_G1: ("model.layers.{bid}.attention.g1",), # rwkv7
|
|
||||||
MODEL_TENSOR.TIME_MIX_G2: ("model.layers.{bid}.attention.g2",), # rwkv7
|
|
||||||
MODEL_TENSOR.TIME_MIX_K_K: ("model.layers.{bid}.attention.k_k",), # rwkv7
|
|
||||||
MODEL_TENSOR.TIME_MIX_K_A: ("model.layers.{bid}.attention.k_a",), # rwkv7
|
|
||||||
MODEL_TENSOR.TIME_MIX_R_K: ("model.layers.{bid}.attention.r_k",), # rwkv7
|
|
||||||
MODEL_TENSOR.TIME_MIX_LERP_X: (
|
MODEL_TENSOR.TIME_MIX_LERP_X: (
|
||||||
"rwkv.blocks.{bid}.attention.time_maa_x", # rwkv6
|
"rwkv.blocks.{bid}.attention.time_maa_x", # rwkv v6
|
||||||
"model.layers.{bid}.self_attn.time_maa_x", # rwkv6qwen2
|
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.TIME_MIX_LERP_K: (
|
MODEL_TENSOR.TIME_MIX_LERP_K: (
|
||||||
"rwkv.blocks.{bid}.attention.time_maa_k", # rwkv6
|
"rwkv.blocks.{bid}.attention.time_maa_k", # rwkv v6
|
||||||
"model.layers.{bid}.self_attn.time_maa_k", # rwkv6qwen2
|
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.TIME_MIX_LERP_V: (
|
MODEL_TENSOR.TIME_MIX_LERP_V: (
|
||||||
"rwkv.blocks.{bid}.attention.time_maa_v", # rwkv6
|
"rwkv.blocks.{bid}.attention.time_maa_v", # rwkv v6
|
||||||
"model.layers.{bid}.self_attn.time_maa_v", # rwkv6qwen2
|
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.TIME_MIX_LERP_R: (
|
MODEL_TENSOR.TIME_MIX_LERP_R: (
|
||||||
"rwkv.blocks.{bid}.attention.time_maa_r", # rwkv6
|
"rwkv.blocks.{bid}.attention.time_maa_r", # rwkv v6
|
||||||
"model.layers.{bid}.self_attn.time_maa_r", # rwkv6qwen2
|
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.TIME_MIX_LERP_G: (
|
MODEL_TENSOR.TIME_MIX_LERP_G: (
|
||||||
"rwkv.blocks.{bid}.attention.time_maa_g", # rwkv6
|
"rwkv.blocks.{bid}.attention.time_maa_g", # rwkv v6
|
||||||
"model.layers.{bid}.self_attn.time_maa_g", # rwkv6qwen2
|
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.TIME_MIX_LERP_W: (
|
MODEL_TENSOR.TIME_MIX_LERP_W: (
|
||||||
"rwkv.blocks.{bid}.attention.time_maa_w", # rwkv6
|
"rwkv.blocks.{bid}.attention.time_maa_w", # rwkv v6
|
||||||
"model.layers.{bid}.self_attn.time_maa_w", # rwkv6qwen2
|
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.TIME_MIX_FIRST: (
|
MODEL_TENSOR.TIME_MIX_FIRST: (
|
||||||
"rwkv.blocks.{bid}.attention.time_faaaa", # rwkv6
|
"rwkv.blocks.{bid}.attention.time_faaaa", # rwkv v6
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.TIME_MIX_DECAY: (
|
MODEL_TENSOR.TIME_MIX_DECAY: (
|
||||||
"rwkv.blocks.{bid}.attention.time_decay", # rwkv6
|
"rwkv.blocks.{bid}.attention.time_decay", # rwkv v6
|
||||||
"model.layers.{bid}.self_attn.time_decay", # rwkv6qwen2
|
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.TIME_MIX_DECAY_W1: (
|
MODEL_TENSOR.TIME_MIX_DECAY_W1: (
|
||||||
"rwkv.blocks.{bid}.attention.time_decay_w1", # rwkv6
|
"rwkv.blocks.{bid}.attention.time_decay_w1", # rwkv v6
|
||||||
"model.layers.{bid}.self_attn.time_decay_w1", # rwkv6qwen2
|
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.TIME_MIX_DECAY_W2: (
|
MODEL_TENSOR.TIME_MIX_DECAY_W2: (
|
||||||
"rwkv.blocks.{bid}.attention.time_decay_w2", # rwkv6
|
"rwkv.blocks.{bid}.attention.time_decay_w2", # rwkv v6
|
||||||
"model.layers.{bid}.self_attn.time_decay_w2", # rwkv6qwen2
|
|
||||||
),
|
|
||||||
MODEL_TENSOR.TIME_MIX_KEY: (
|
|
||||||
"rwkv.blocks.{bid}.attention.key", # rwkv6
|
|
||||||
"model.layers.{bid}.self_attn.k_proj", # rwkv6qwen2
|
|
||||||
"model.layers.{bid}.attention.key", # rwkv7
|
|
||||||
"model.layers.{bid}.attention.k_proj", # rwkv7
|
|
||||||
),
|
|
||||||
MODEL_TENSOR.TIME_MIX_VALUE: (
|
|
||||||
"rwkv.blocks.{bid}.attention.value", # rwkv6
|
|
||||||
"model.layers.{bid}.self_attn.v_proj", # rwkv6qwen2
|
|
||||||
"model.layers.{bid}.attention.value", # rwkv7
|
|
||||||
"model.layers.{bid}.attention.v_proj", # rwkv7
|
|
||||||
),
|
),
|
||||||
|
MODEL_TENSOR.TIME_MIX_KEY: ("rwkv.blocks.{bid}.attention.key",), # rwkv
|
||||||
|
MODEL_TENSOR.TIME_MIX_VALUE: ("rwkv.blocks.{bid}.attention.value",), # rwkv
|
||||||
MODEL_TENSOR.TIME_MIX_RECEPTANCE: (
|
MODEL_TENSOR.TIME_MIX_RECEPTANCE: (
|
||||||
"rwkv.blocks.{bid}.attention.receptance", # rwkv6
|
"rwkv.blocks.{bid}.attention.receptance", # rwkv
|
||||||
"model.layers.{bid}.self_attn.q_proj", # rwkv6qwen2
|
|
||||||
"model.layers.{bid}.attention.receptance", # rwkv7
|
|
||||||
"model.layers.{bid}.attention.r_proj", # rwkv7
|
|
||||||
),
|
|
||||||
MODEL_TENSOR.TIME_MIX_GATE: (
|
|
||||||
"rwkv.blocks.{bid}.attention.gate", # rwkv6
|
|
||||||
"model.layers.{bid}.self_attn.gate", # rwkv6qwen2
|
|
||||||
),
|
|
||||||
MODEL_TENSOR.TIME_MIX_LN: (
|
|
||||||
"rwkv.blocks.{bid}.attention.ln_x", # rwkv6
|
|
||||||
"model.layers.{bid}.attention.ln_x", # rwkv7
|
|
||||||
),
|
|
||||||
MODEL_TENSOR.TIME_MIX_OUTPUT: (
|
|
||||||
"rwkv.blocks.{bid}.attention.output", # rwkv6
|
|
||||||
"model.layers.{bid}.self_attn.o_proj", # rwkv6qwen2
|
|
||||||
"model.layers.{bid}.attention.output", # rwkv7
|
|
||||||
"model.layers.{bid}.attention.o_proj", # rwkv7
|
|
||||||
),
|
),
|
||||||
|
MODEL_TENSOR.TIME_MIX_GATE: ("rwkv.blocks.{bid}.attention.gate",), # rwkv
|
||||||
|
MODEL_TENSOR.TIME_MIX_LN: ("rwkv.blocks.{bid}.attention.ln_x",), # rwkv
|
||||||
|
MODEL_TENSOR.TIME_MIX_OUTPUT: ("rwkv.blocks.{bid}.attention.output",), # rwkv
|
||||||
MODEL_TENSOR.CHANNEL_MIX_LERP_K: (
|
MODEL_TENSOR.CHANNEL_MIX_LERP_K: (
|
||||||
"rwkv.blocks.{bid}.feed_forward.time_maa_k", # rwkv6
|
"rwkv.blocks.{bid}.feed_forward.time_maa_k", # rwkv v6
|
||||||
"model.layers.{bid}.feed_forward.x_k", # rwkv7
|
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.CHANNEL_MIX_LERP_R: (
|
MODEL_TENSOR.CHANNEL_MIX_LERP_R: (
|
||||||
"rwkv.blocks.{bid}.feed_forward.time_maa_r", # rwkv6
|
"rwkv.blocks.{bid}.feed_forward.time_maa_r", # rwkv v6
|
||||||
),
|
|
||||||
MODEL_TENSOR.CHANNEL_MIX_KEY: (
|
|
||||||
"rwkv.blocks.{bid}.feed_forward.key", # rwkv6
|
|
||||||
"model.layers.{bid}.feed_forward.key", # rwkv7
|
|
||||||
),
|
),
|
||||||
|
MODEL_TENSOR.CHANNEL_MIX_KEY: ("rwkv.blocks.{bid}.feed_forward.key",), # rwkv
|
||||||
MODEL_TENSOR.CHANNEL_MIX_RECEPTANCE: (
|
MODEL_TENSOR.CHANNEL_MIX_RECEPTANCE: (
|
||||||
"rwkv.blocks.{bid}.feed_forward.receptance", # rwkv6
|
"rwkv.blocks.{bid}.feed_forward.receptance", # rwkv
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.CHANNEL_MIX_VALUE: (
|
MODEL_TENSOR.CHANNEL_MIX_VALUE: (
|
||||||
"rwkv.blocks.{bid}.feed_forward.value", # rwkv6
|
"rwkv.blocks.{bid}.feed_forward.value", # rwkv
|
||||||
"model.layers.{bid}.feed_forward.value", # rwkv7
|
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.ATTN_Q_A: ("model.layers.{bid}.self_attn.q_a_proj",), # deepseek2
|
MODEL_TENSOR.ATTN_Q_A: ("model.layers.{bid}.self_attn.q_a_proj",), # deepseek2
|
||||||
MODEL_TENSOR.ATTN_Q_B: ("model.layers.{bid}.self_attn.q_b_proj",), # deepseek2
|
MODEL_TENSOR.ATTN_Q_B: ("model.layers.{bid}.self_attn.q_b_proj",), # deepseek2
|
||||||
|
@ -566,8 +475,6 @@ class TensorNameMap:
|
||||||
MODEL_TENSOR.ATTN_KV_B: (
|
MODEL_TENSOR.ATTN_KV_B: (
|
||||||
"model.layers.{bid}.self_attn.kv_b_proj", # deepseek2
|
"model.layers.{bid}.self_attn.kv_b_proj", # deepseek2
|
||||||
),
|
),
|
||||||
MODEL_TENSOR.ATTN_K_B: ("model.layers.{bid}.self_attn.k_b_proj",), # deepseek2
|
|
||||||
MODEL_TENSOR.ATTN_V_B: ("model.layers.{bid}.self_attn.v_b_proj",), # deepseek2
|
|
||||||
MODEL_TENSOR.ATTN_Q_A_NORM: (
|
MODEL_TENSOR.ATTN_Q_A_NORM: (
|
||||||
"model.layers.{bid}.self_attn.q_a_layernorm", # deepseek2
|
"model.layers.{bid}.self_attn.q_a_layernorm", # deepseek2
|
||||||
),
|
),
|
||||||
|
@ -665,147 +572,6 @@ class TensorNameMap:
|
||||||
MODEL_TENSOR.POSNET_ATTN_OUT: (
|
MODEL_TENSOR.POSNET_ATTN_OUT: (
|
||||||
"backbone.posnet.{bid}.proj_out", # wavtokenizer
|
"backbone.posnet.{bid}.proj_out", # wavtokenizer
|
||||||
),
|
),
|
||||||
#############################################################################
|
|
||||||
## Vision encoder
|
|
||||||
MODEL_TENSOR.V_MMPROJ: (
|
|
||||||
"multi_modal_projector.linear_{bid}",
|
|
||||||
"visual.merger.mlp.{bid}", # qwen2vl
|
|
||||||
),
|
|
||||||
MODEL_TENSOR.V_MMPROJ_FC: (
|
|
||||||
"model.connector.modality_projection.proj", # SmolVLM
|
|
||||||
),
|
|
||||||
MODEL_TENSOR.V_MMPROJ_MLP: (
|
|
||||||
"model.mm_projector.mlp.mlp.{bid}",
|
|
||||||
"mlp1.{bid}", # InternVL
|
|
||||||
),
|
|
||||||
MODEL_TENSOR.V_MMPROJ_PEG: ("model.mm_projector.peg.peg.{bid}",),
|
|
||||||
MODEL_TENSOR.V_ENC_EMBD_CLS: (
|
|
||||||
"vision_tower.vision_model.embeddings.class_embedding",
|
|
||||||
),
|
|
||||||
MODEL_TENSOR.V_ENC_EMBD_PATCH: (
|
|
||||||
"vision_tower.vision_model.embeddings.patch_embedding",
|
|
||||||
"vpm.embeddings.patch_embedding",
|
|
||||||
"model.vision_model.embeddings.patch_embedding", # SmolVLM
|
|
||||||
"vision_tower.patch_conv", # pixtral
|
|
||||||
"visual.patch_embed.proj", # qwen2vl
|
|
||||||
),
|
|
||||||
MODEL_TENSOR.V_ENC_EMBD_POS: (
|
|
||||||
"vision_tower.vision_model.embeddings.position_embedding",
|
|
||||||
"vpm.embeddings.position_embedding",
|
|
||||||
"model.vision_model.embeddings.position_embedding", # SmolVLM
|
|
||||||
),
|
|
||||||
MODEL_TENSOR.V_ENC_ATTN_Q: (
|
|
||||||
"vision_tower.vision_model.encoder.layers.{bid}.self_attn.q_proj",
|
|
||||||
"vpm.encoder.layers.{bid}.self_attn.q_proj",
|
|
||||||
"model.vision_model.encoder.layers.{bid}.self_attn.q_proj", # SmolVLM
|
|
||||||
"vision_tower.transformer.layers.{bid}.attention.q_proj", # pixtral
|
|
||||||
"visual.blocks.{bid}.attn.q", # qwen2vl, generated
|
|
||||||
),
|
|
||||||
MODEL_TENSOR.V_ENC_ATTN_Q_NORM: (
|
|
||||||
"vision_tower.vision_model.encoder.layers.{bid}.attn.q_norm", # InternVL
|
|
||||||
),
|
|
||||||
MODEL_TENSOR.V_ENC_ATTN_K: (
|
|
||||||
"vision_tower.vision_model.encoder.layers.{bid}.self_attn.k_proj",
|
|
||||||
"vpm.encoder.layers.{bid}.self_attn.k_proj",
|
|
||||||
"model.vision_model.encoder.layers.{bid}.self_attn.k_proj", # SmolVLM
|
|
||||||
"vision_tower.transformer.layers.{bid}.attention.k_proj", # pixtral
|
|
||||||
"visual.blocks.{bid}.attn.k", # qwen2vl, generated
|
|
||||||
),
|
|
||||||
MODEL_TENSOR.V_ENC_ATTN_K_NORM: (
|
|
||||||
"vision_tower.vision_model.encoder.layers.{bid}.attn.k_norm", # InternVL
|
|
||||||
),
|
|
||||||
MODEL_TENSOR.V_ENC_ATTN_V: (
|
|
||||||
"vision_tower.vision_model.encoder.layers.{bid}.self_attn.v_proj",
|
|
||||||
"vpm.encoder.layers.{bid}.self_attn.v_proj",
|
|
||||||
"model.vision_model.encoder.layers.{bid}.self_attn.v_proj", # SmolVLM
|
|
||||||
"vision_tower.transformer.layers.{bid}.attention.v_proj", # pixtral
|
|
||||||
"visual.blocks.{bid}.attn.v", # qwen2vl, generated
|
|
||||||
),
|
|
||||||
MODEL_TENSOR.V_ENC_INPUT_NORM: (
|
|
||||||
"vision_tower.vision_model.encoder.layers.{bid}.layer_norm1",
|
|
||||||
"vision_tower.vision_model.encoder.layers.{bid}.norm1", # InternVL
|
|
||||||
"vpm.encoder.layers.{bid}.layer_norm1",
|
|
||||||
"model.vision_model.encoder.layers.{bid}.layer_norm1", # SmolVLM
|
|
||||||
"vision_tower.transformer.layers.{bid}.attention_norm", # pixtral
|
|
||||||
"visual.blocks.{bid}.norm1", # qwen2vl
|
|
||||||
),
|
|
||||||
MODEL_TENSOR.V_ENC_OUTPUT: (
|
|
||||||
"vision_tower.vision_model.encoder.layers.{bid}.self_attn.out_proj",
|
|
||||||
"vision_tower.vision_model.encoder.layers.{bid}.attn.proj", # InternVL
|
|
||||||
"vpm.encoder.layers.{bid}.self_attn.out_proj",
|
|
||||||
"model.vision_model.encoder.layers.{bid}.self_attn.out_proj", # SmolVLM
|
|
||||||
"vision_tower.transformer.layers.{bid}.attention.o_proj", # pixtral
|
|
||||||
"visual.blocks.{bid}.attn.proj", # qwen2vl
|
|
||||||
),
|
|
||||||
MODEL_TENSOR.V_ENC_OUTPUT_NORM: (
|
|
||||||
"vision_tower.vision_model.encoder.layers.{bid}.layer_norm2",
|
|
||||||
"vision_tower.vision_model.encoder.layers.{bid}.norm2", # InternVL
|
|
||||||
"vpm.encoder.layers.{bid}.layer_norm2",
|
|
||||||
"model.vision_model.encoder.layers.{bid}.layer_norm2", # SmolVLM
|
|
||||||
"vision_tower.transformer.layers.{bid}.ffn_norm", # pixtral
|
|
||||||
"visual.blocks.{bid}.norm2", # qwen2vl
|
|
||||||
),
|
|
||||||
MODEL_TENSOR.V_ENC_FFN_UP: (
|
|
||||||
"vision_tower.vision_model.encoder.layers.{bid}.mlp.fc1",
|
|
||||||
"vpm.encoder.layers.{bid}.mlp.fc1",
|
|
||||||
"model.vision_model.encoder.layers.{bid}.mlp.fc1", # SmolVLM, gemma3
|
|
||||||
"vision_tower.transformer.layers.{bid}.feed_forward.up_proj", # pixtral
|
|
||||||
"visual.blocks.{bid}.mlp.fc1", # qwen2vl
|
|
||||||
"visual.blocks.{bid}.mlp.up_proj", # qwen2.5vl
|
|
||||||
),
|
|
||||||
MODEL_TENSOR.V_ENC_FFN_GATE: (
|
|
||||||
"vision_tower.transformer.layers.{bid}.feed_forward.gate_proj", # pixtral
|
|
||||||
"visual.blocks.{bid}.mlp.gate_proj", # qwen2.5vl
|
|
||||||
),
|
|
||||||
MODEL_TENSOR.V_ENC_FFN_DOWN: (
|
|
||||||
"vision_tower.vision_model.encoder.layers.{bid}.mlp.fc2",
|
|
||||||
"vpm.encoder.layers.{bid}.mlp.fc2",
|
|
||||||
"model.vision_model.encoder.layers.{bid}.mlp.fc2", # SmolVLM, gemma3
|
|
||||||
"vision_tower.transformer.layers.{bid}.feed_forward.down_proj", # pixtral
|
|
||||||
"visual.blocks.{bid}.mlp.fc2", # qwen2vl
|
|
||||||
"visual.blocks.{bid}.mlp.down_proj", # qwen2.5vl
|
|
||||||
),
|
|
||||||
MODEL_TENSOR.V_LAYER_SCALE_1: (
|
|
||||||
"vision_tower.vision_model.encoder.layers.{bid}.ls1", # InternVL
|
|
||||||
),
|
|
||||||
MODEL_TENSOR.V_LAYER_SCALE_2: (
|
|
||||||
"vision_tower.vision_model.encoder.layers.{bid}.ls2", # InternVL
|
|
||||||
),
|
|
||||||
MODEL_TENSOR.V_PRE_NORM: (
|
|
||||||
"vision_tower.vision_model.pre_layrnorm",
|
|
||||||
"vision_tower.ln_pre", # pixtral
|
|
||||||
),
|
|
||||||
MODEL_TENSOR.V_POST_NORM: (
|
|
||||||
"vision_tower.vision_model.post_layernorm",
|
|
||||||
"model.vision_model.post_layernorm", # SmolVLM
|
|
||||||
"visual.merger.ln_q", # qwen2vl
|
|
||||||
),
|
|
||||||
MODEL_TENSOR.V_MM_INP_PROJ: ("multi_modal_projector.mm_input_projection",),
|
|
||||||
MODEL_TENSOR.V_MM_INP_NORM: ("multi_modal_projector.norm",),
|
|
||||||
MODEL_TENSOR.V_MM_SOFT_EMB_NORM: ("multi_modal_projector.mm_soft_emb_norm",),
|
|
||||||
MODEL_TENSOR.V_RESMPL_POS_EMBD_K: ("resampler.pos_embed_k",),
|
|
||||||
MODEL_TENSOR.V_RESMPL_ATTN_Q: (
|
|
||||||
"resampler.attn.in_proj_q", # tensor generated from resampler.attn.in_proj
|
|
||||||
),
|
|
||||||
MODEL_TENSOR.V_RESMPL_ATTN_K: (
|
|
||||||
"resampler.attn.in_proj_k", # tensor generated from resampler.attn.in_proj
|
|
||||||
),
|
|
||||||
MODEL_TENSOR.V_RESMPL_ATTN_V: (
|
|
||||||
"resampler.attn.in_proj_v", # tensor generated from resampler.attn.in_proj
|
|
||||||
),
|
|
||||||
MODEL_TENSOR.V_RESMPL_ATTN_OUT: ("resampler.attn.out_proj",),
|
|
||||||
MODEL_TENSOR.V_RESMPL_KV: ("resampler.kv_proj",),
|
|
||||||
MODEL_TENSOR.V_RESMPL_POST_NORM: ("resampler.ln_post",),
|
|
||||||
MODEL_TENSOR.V_RESMPL_KV_NORM: ("resampler.ln_kv",),
|
|
||||||
MODEL_TENSOR.V_RESMPL_Q_NORM: ("resampler.ln_q",),
|
|
||||||
MODEL_TENSOR.V_RESMPL_PROJ: ("resampler.proj",),
|
|
||||||
MODEL_TENSOR.V_RESMPL_QUERY: ("resampler.query",),
|
|
||||||
MODEL_TENSOR.V_TOK_EMBD_IMG_BREAK: (
|
|
||||||
"v.token_embd.img_break", # for pixtral, this is a generated vector
|
|
||||||
),
|
|
||||||
MODEL_TENSOR.V_MM_PATCH_MERGER: (
|
|
||||||
"multi_modal_projector.patch_merger.merging_layer", # mistral small 3.1
|
|
||||||
),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# architecture-specific block mappings
|
# architecture-specific block mappings
|
||||||
|
|
|
@ -1,11 +1,7 @@
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from dataclasses import dataclass
|
|
||||||
from typing import Literal
|
from typing import Literal
|
||||||
|
|
||||||
import os
|
|
||||||
import json
|
|
||||||
|
|
||||||
|
|
||||||
def fill_templated_filename(filename: str, output_type: str | None) -> str:
|
def fill_templated_filename(filename: str, output_type: str | None) -> str:
|
||||||
# Given a file name fill in any type templates e.g. 'some-model-name.{ftype}.gguf'
|
# Given a file name fill in any type templates e.g. 'some-model-name.{ftype}.gguf'
|
||||||
|
@ -71,7 +67,7 @@ def naming_convention(
|
||||||
output_type: str | None,
|
output_type: str | None,
|
||||||
model_type: Literal["vocab", "LoRA"] | None = None,
|
model_type: Literal["vocab", "LoRA"] | None = None,
|
||||||
) -> str:
|
) -> str:
|
||||||
# Reference: https://github.com/ggml-org/ggml/blob/master/docs/gguf.md#gguf-naming-convention
|
# Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention
|
||||||
|
|
||||||
if base_name is not None:
|
if base_name is not None:
|
||||||
name = base_name.strip().replace(" ", "-").replace("/", "-")
|
name = base_name.strip().replace(" ", "-").replace("/", "-")
|
||||||
|
@ -103,214 +99,3 @@ def naming_convention(
|
||||||
kind = f"-{model_type.strip().replace(' ', '-')}" if model_type is not None else ""
|
kind = f"-{model_type.strip().replace(' ', '-')}" if model_type is not None else ""
|
||||||
|
|
||||||
return f"{name}{parameters}{finetune}{version}{encoding}{kind}"
|
return f"{name}{parameters}{finetune}{version}{encoding}{kind}"
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class RemoteTensor:
|
|
||||||
dtype: str
|
|
||||||
shape: tuple[int, ...]
|
|
||||||
offset_start: int
|
|
||||||
size: int
|
|
||||||
url: str
|
|
||||||
|
|
||||||
def data(self) -> bytearray:
|
|
||||||
# TODO: handle request errors (maybe with limited retries?)
|
|
||||||
# NOTE: using a bytearray, otherwise PyTorch complains the buffer is not writeable
|
|
||||||
data = bytearray(
|
|
||||||
SafetensorRemote.get_data_by_range(
|
|
||||||
url=self.url, start=self.offset_start, size=self.size
|
|
||||||
)
|
|
||||||
)
|
|
||||||
return data
|
|
||||||
|
|
||||||
|
|
||||||
class SafetensorRemote:
|
|
||||||
"""
|
|
||||||
Uility class to handle remote safetensor files.
|
|
||||||
This class is designed to work with Hugging Face model repositories.
|
|
||||||
|
|
||||||
Example (one model has single safetensor file, the other has multiple):
|
|
||||||
for model_id in ["ngxson/TEST-Tiny-Llama4", "Qwen/Qwen2.5-7B-Instruct"]:
|
|
||||||
tensors = SafetensorRemote.get_list_tensors_hf_model(model_id)
|
|
||||||
print(tensors)
|
|
||||||
|
|
||||||
Example reading tensor data:
|
|
||||||
tensors = SafetensorRemote.get_list_tensors_hf_model(model_id)
|
|
||||||
for name, meta in tensors.items():
|
|
||||||
dtype, shape, offset_start, size, remote_safetensor_url = meta
|
|
||||||
# read the tensor data
|
|
||||||
data = SafetensorRemote.get_data_by_range(remote_safetensor_url, offset_start, size)
|
|
||||||
print(data)
|
|
||||||
"""
|
|
||||||
|
|
||||||
BASE_DOMAIN = "https://huggingface.co"
|
|
||||||
ALIGNMENT = 8 # bytes
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def get_list_tensors_hf_model(cls, model_id: str) -> dict[str, RemoteTensor]:
|
|
||||||
"""
|
|
||||||
Get list of tensors from a Hugging Face model repository.
|
|
||||||
|
|
||||||
Returns a dictionary of tensor names and their metadata.
|
|
||||||
Each tensor is represented as a tuple of (dtype, shape, offset_start, size, remote_safetensor_url)
|
|
||||||
"""
|
|
||||||
# case 1: model has only one single model.safetensor file
|
|
||||||
is_single_file = cls.check_file_exist(
|
|
||||||
f"{cls.BASE_DOMAIN}/{model_id}/resolve/main/model.safetensors"
|
|
||||||
)
|
|
||||||
if is_single_file:
|
|
||||||
url = f"{cls.BASE_DOMAIN}/{model_id}/resolve/main/model.safetensors"
|
|
||||||
return cls.get_list_tensors(url)
|
|
||||||
|
|
||||||
# case 2: model has multiple files
|
|
||||||
index_url = (
|
|
||||||
f"{cls.BASE_DOMAIN}/{model_id}/resolve/main/model.safetensors.index.json"
|
|
||||||
)
|
|
||||||
is_multiple_files = cls.check_file_exist(index_url)
|
|
||||||
if is_multiple_files:
|
|
||||||
# read the index file
|
|
||||||
index_data = cls.get_data_by_range(index_url, 0)
|
|
||||||
index_str = index_data.decode("utf-8")
|
|
||||||
index_json = json.loads(index_str)
|
|
||||||
assert (
|
|
||||||
index_json.get("weight_map") is not None
|
|
||||||
), "weight_map not found in index file"
|
|
||||||
weight_map = index_json["weight_map"]
|
|
||||||
# get the list of files
|
|
||||||
all_files = list(set(weight_map.values()))
|
|
||||||
all_files.sort() # make sure we load shard files in order
|
|
||||||
# get the list of tensors
|
|
||||||
tensors: dict[str, RemoteTensor] = {}
|
|
||||||
for file in all_files:
|
|
||||||
url = f"{cls.BASE_DOMAIN}/{model_id}/resolve/main/{file}"
|
|
||||||
for key, val in cls.get_list_tensors(url).items():
|
|
||||||
tensors[key] = val
|
|
||||||
return tensors
|
|
||||||
|
|
||||||
raise ValueError(f"Model {model_id} does not have any safetensor files")
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def get_list_tensors(cls, url: str) -> dict[str, RemoteTensor]:
|
|
||||||
"""
|
|
||||||
Get list of tensors from a remote safetensor file.
|
|
||||||
|
|
||||||
Returns a dictionary of tensor names and their metadata.
|
|
||||||
Each tensor is represented as a tuple of (dtype, shape, offset_start, size)
|
|
||||||
"""
|
|
||||||
metadata, data_start_offset = cls.get_metadata(url)
|
|
||||||
res: dict[str, RemoteTensor] = {}
|
|
||||||
|
|
||||||
for name, meta in metadata.items():
|
|
||||||
if name == "__metadata__":
|
|
||||||
continue
|
|
||||||
if not isinstance(meta, dict):
|
|
||||||
raise ValueError(f"Invalid metadata for tensor '{name}': {meta}")
|
|
||||||
try:
|
|
||||||
dtype = meta["dtype"]
|
|
||||||
shape = meta["shape"]
|
|
||||||
offset_start_relative, offset_end_relative = meta["data_offsets"]
|
|
||||||
size = offset_end_relative - offset_start_relative
|
|
||||||
offset_start = data_start_offset + offset_start_relative
|
|
||||||
res[name] = RemoteTensor(
|
|
||||||
dtype=dtype,
|
|
||||||
shape=tuple(shape),
|
|
||||||
offset_start=offset_start,
|
|
||||||
size=size,
|
|
||||||
url=url,
|
|
||||||
)
|
|
||||||
except KeyError as e:
|
|
||||||
raise ValueError(
|
|
||||||
f"Missing key in metadata for tensor '{name}': {e}, meta = {meta}"
|
|
||||||
)
|
|
||||||
|
|
||||||
return res
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def get_metadata(cls, url: str) -> tuple[dict, int]:
|
|
||||||
"""
|
|
||||||
Get JSON metadata from a remote safetensor file.
|
|
||||||
|
|
||||||
Returns tuple of (metadata, data_start_offset)
|
|
||||||
"""
|
|
||||||
# Request first 5MB of the file (hopefully enough for metadata)
|
|
||||||
read_size = 5 * 1024 * 1024
|
|
||||||
raw_data = cls.get_data_by_range(url, 0, read_size)
|
|
||||||
|
|
||||||
# Parse header
|
|
||||||
# First 8 bytes contain the metadata length as u64 little-endian
|
|
||||||
if len(raw_data) < 8:
|
|
||||||
raise ValueError("Not enough data to read metadata size")
|
|
||||||
metadata_length = int.from_bytes(raw_data[:8], byteorder="little")
|
|
||||||
|
|
||||||
# Calculate the data start offset
|
|
||||||
data_start_offset = 8 + metadata_length
|
|
||||||
alignment = SafetensorRemote.ALIGNMENT
|
|
||||||
if data_start_offset % alignment != 0:
|
|
||||||
data_start_offset += alignment - (data_start_offset % alignment)
|
|
||||||
|
|
||||||
# Check if we have enough data to read the metadata
|
|
||||||
if len(raw_data) < 8 + metadata_length:
|
|
||||||
raise ValueError(
|
|
||||||
f"Could not read complete metadata. Need {8 + metadata_length} bytes, got {len(raw_data)}"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Extract metadata bytes and parse as JSON
|
|
||||||
metadata_bytes = raw_data[8 : 8 + metadata_length]
|
|
||||||
metadata_str = metadata_bytes.decode("utf-8")
|
|
||||||
try:
|
|
||||||
metadata = json.loads(metadata_str)
|
|
||||||
return metadata, data_start_offset
|
|
||||||
except json.JSONDecodeError as e:
|
|
||||||
raise ValueError(f"Failed to parse safetensor metadata as JSON: {e}")
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def get_data_by_range(cls, url: str, start: int, size: int = -1) -> bytes:
|
|
||||||
"""
|
|
||||||
Get raw byte data from a remote file by range.
|
|
||||||
If size is not specified, it will read the entire file.
|
|
||||||
"""
|
|
||||||
import requests
|
|
||||||
from urllib.parse import urlparse
|
|
||||||
|
|
||||||
parsed_url = urlparse(url)
|
|
||||||
if not parsed_url.scheme or not parsed_url.netloc:
|
|
||||||
raise ValueError(f"Invalid URL: {url}")
|
|
||||||
|
|
||||||
headers = cls._get_request_headers()
|
|
||||||
if size > -1:
|
|
||||||
headers["Range"] = f"bytes={start}-{start + size}"
|
|
||||||
response = requests.get(url, allow_redirects=True, headers=headers)
|
|
||||||
response.raise_for_status()
|
|
||||||
|
|
||||||
# Get raw byte data
|
|
||||||
return response.content[:size]
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def check_file_exist(cls, url: str) -> bool:
|
|
||||||
"""
|
|
||||||
Check if a file exists at the given URL.
|
|
||||||
Returns True if the file exists, False otherwise.
|
|
||||||
"""
|
|
||||||
import requests
|
|
||||||
from urllib.parse import urlparse
|
|
||||||
|
|
||||||
parsed_url = urlparse(url)
|
|
||||||
if not parsed_url.scheme or not parsed_url.netloc:
|
|
||||||
raise ValueError(f"Invalid URL: {url}")
|
|
||||||
|
|
||||||
try:
|
|
||||||
headers = cls._get_request_headers()
|
|
||||||
headers["Range"] = "bytes=0-0"
|
|
||||||
response = requests.head(url, allow_redirects=True, headers=headers)
|
|
||||||
# Success (2xx) or redirect (3xx)
|
|
||||||
return 200 <= response.status_code < 400
|
|
||||||
except requests.RequestException:
|
|
||||||
return False
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def _get_request_headers(cls) -> dict[str, str]:
|
|
||||||
"""Prepare common headers for requests."""
|
|
||||||
headers = {"User-Agent": "convert_hf_to_gguf"}
|
|
||||||
if os.environ.get("HF_TOKEN"):
|
|
||||||
headers["Authorization"] = f"Bearer {os.environ['HF_TOKEN']}"
|
|
||||||
return headers
|
|
||||||
|
|
|
@ -166,7 +166,7 @@ def _try_load_from_tokenizer_json(self, path: Path) -> bool:
|
||||||
and isinstance(merges[0][0], str)
|
and isinstance(merges[0][0], str)
|
||||||
):
|
):
|
||||||
# New format since transformers 4.45 to support spaces in merges
|
# New format since transformers 4.45 to support spaces in merges
|
||||||
# ref: https://github.com/ggml-org/llama.cpp/issues/9692
|
# ref: https://github.com/ggerganov/llama.cpp/issues/9692
|
||||||
# TODO: internally store as the new format instead of converting to old
|
# TODO: internally store as the new format instead of converting to old
|
||||||
if any(" " in s for pair in merges for s in pair):
|
if any(" " in s for pair in merges for s in pair):
|
||||||
logger.warning(
|
logger.warning(
|
||||||
|
@ -195,12 +195,7 @@ def _try_load_from_tokenizer_json(self, path: Path) -> bool:
|
||||||
return True
|
return True
|
||||||
with open(tokenizer_config_file, encoding="utf-8") as f:
|
with open(tokenizer_config_file, encoding="utf-8") as f:
|
||||||
tokenizer_config = json.load(f)
|
tokenizer_config = json.load(f)
|
||||||
chat_template_alt = None
|
chat_template = tokenizer_config.get("chat_template")
|
||||||
chat_template_file = path / "chat_template.json"
|
|
||||||
if chat_template_file.is_file():
|
|
||||||
with open(chat_template_file, encoding="utf-8") as f:
|
|
||||||
chat_template_alt = json.load(f).get("chat_template")
|
|
||||||
chat_template = tokenizer_config.get("chat_template", chat_template_alt)
|
|
||||||
if chat_template is None or isinstance(chat_template, (str, list)):
|
if chat_template is None or isinstance(chat_template, (str, list)):
|
||||||
self.chat_template = chat_template
|
self.chat_template = chat_template
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
from typing import Any, IO, List, TextIO, Union
|
from typing import Any, List, TextIO, Union
|
||||||
|
|
||||||
from PySide6.QtWidgets import (
|
from PySide6.QtWidgets import (
|
||||||
QMessageBox,
|
QMessageBox,
|
||||||
|
@ -86,9 +86,9 @@ def show_about(self) -> None:
|
||||||
|
|
||||||
A tool for managing and converting GGUF models.
|
A tool for managing and converting GGUF models.
|
||||||
This application is licensed under the Apache License 2.0.
|
This application is licensed under the Apache License 2.0.
|
||||||
Copyright (c) 2024-2025 leafspark.
|
Copyright (c) 2025 leafspark.
|
||||||
It also utilizes llama.cpp, licensed under the MIT License.
|
It also utilizes llama.cpp, licensed under the MIT License.
|
||||||
Copyright (c) 2023-2025 The ggml authors."""
|
Copyright (c) 2023-2024 The ggml authors."""
|
||||||
QMessageBox.about(self, "About AutoGGUF", about_text)
|
QMessageBox.about(self, "About AutoGGUF", about_text)
|
||||||
|
|
||||||
|
|
||||||
|
@ -97,7 +97,7 @@ def ensure_directory(path) -> None:
|
||||||
os.makedirs(path)
|
os.makedirs(path)
|
||||||
|
|
||||||
|
|
||||||
def open_file_safe(file_path, mode="r") -> IO[Any]:
|
def open_file_safe(file_path, mode="r") -> TextIO:
|
||||||
encodings = ["utf-8", "latin-1", "ascii", "utf-16"]
|
encodings = ["utf-8", "latin-1", "ascii", "utf-16"]
|
||||||
for encoding in encodings:
|
for encoding in encodings:
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -159,9 +159,7 @@ def update_cuda_backends(self) -> None:
|
||||||
for item in os.listdir(llama_bin):
|
for item in os.listdir(llama_bin):
|
||||||
item_path = os.path.join(llama_bin, item)
|
item_path = os.path.join(llama_bin, item)
|
||||||
if os.path.isdir(item_path) and "cudart-llama" not in item.lower():
|
if os.path.isdir(item_path) and "cudart-llama" not in item.lower():
|
||||||
if (
|
if "cu1" in item.lower(): # Only include CUDA-capable backends
|
||||||
"cu1" in item.lower() or "cuda-1" in item.lower()
|
|
||||||
): # Only include CUDA-capable backends
|
|
||||||
self.backend_combo_cuda.addItem(item, userData=item_path)
|
self.backend_combo_cuda.addItem(item, userData=item_path)
|
||||||
|
|
||||||
if self.backend_combo_cuda.count() == 0:
|
if self.backend_combo_cuda.count() == 0:
|
||||||
|
|
26
src/utils.py
26
src/utils.py
|
@ -1,10 +1,6 @@
|
||||||
from typing import Any, Union
|
from typing import Any, Union
|
||||||
|
|
||||||
import urllib.request
|
import requests
|
||||||
import urllib.error
|
|
||||||
import json
|
|
||||||
import ssl
|
|
||||||
import certifi
|
|
||||||
from PySide6.QtCore import Qt
|
from PySide6.QtCore import Qt
|
||||||
from PySide6.QtWidgets import QFileDialog, QInputDialog, QMenu
|
from PySide6.QtWidgets import QFileDialog, QInputDialog, QMenu
|
||||||
|
|
||||||
|
@ -192,28 +188,16 @@ def refresh_releases(self) -> None:
|
||||||
owner, repo = get_repo_from_env()
|
owner, repo = get_repo_from_env()
|
||||||
url = f"https://api.github.com/repos/{owner}/{repo}/releases"
|
url = f"https://api.github.com/repos/{owner}/{repo}/releases"
|
||||||
|
|
||||||
# Create SSL context with certifi certificates
|
response = requests.get(url)
|
||||||
ssl_context = ssl.create_default_context(cafile=certifi.where())
|
response.raise_for_status()
|
||||||
|
|
||||||
# Create request
|
|
||||||
req = urllib.request.Request(url)
|
|
||||||
|
|
||||||
# Make the request
|
|
||||||
with urllib.request.urlopen(req, context=ssl_context) as response:
|
|
||||||
if response.status != 200:
|
|
||||||
raise urllib.error.HTTPError(
|
|
||||||
url, response.status, "HTTP Error", response.headers, None
|
|
||||||
)
|
|
||||||
|
|
||||||
releases = json.loads(response.read().decode("utf-8"))
|
|
||||||
|
|
||||||
|
releases = response.json()
|
||||||
self.release_combo.clear()
|
self.release_combo.clear()
|
||||||
for release in releases:
|
for release in releases:
|
||||||
self.release_combo.addItem(release["tag_name"], userData=release)
|
self.release_combo.addItem(release["tag_name"], userData=release)
|
||||||
self.release_combo.currentIndexChanged.connect(self.update_assets)
|
self.release_combo.currentIndexChanged.connect(self.update_assets)
|
||||||
self.update_assets()
|
self.update_assets()
|
||||||
|
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
show_error(self.logger, f"Invalid repository configuration: {str(e)}")
|
show_error(self.logger, f"Invalid repository configuration: {str(e)}")
|
||||||
except (urllib.error.URLError, urllib.error.HTTPError) as e:
|
except requests.exceptions.RequestException as e:
|
||||||
show_error(self.logger, ERROR_FETCHING_RELEASES.format(str(e)))
|
show_error(self.logger, ERROR_FETCHING_RELEASES.format(str(e)))
|
||||||
|
|
Loading…
Reference in New Issue