diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..b59ff61 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,37 @@ +--- +name: Bug report +about: Create a report to help us improve vAnalytics +title: '[BUG] ' +labels: bug +assignees: '' +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the behavior: +1. Go to '...' +2. Click on '....' +3. Scroll down to '....' +4. See error + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Screenshots** +If applicable, add screenshots to help explain your problem. + +**Environment (please complete the following information):** + - OS: [e.g. Windows, macOS, Linux] + - vAnalytics Version: [e.g. v1.4.2] + - Python Version (if running from source): [e.g. 3.9] + - vLLM version(s) + +**Additional context** +Add any other context about the problem here. Include any relevant log outputs or error messages. + +**Checklist:** +- [ ] I have checked the existing issues to make sure this is not a duplicate +- [ ] I have included all relevant information to reproduce the issue +- [ ] I am running the latest version of vAnalytics \ No newline at end of file diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..500eb9a --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,8 @@ +version: 2 +updates: + - package-ecosystem: "pip" + directory: "/" + schedule: + interval: "weekly" + day: "sunday" + open-pull-requests-limit: 10 diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml new file mode 100644 index 0000000..c2756b3 --- /dev/null +++ b/.github/workflows/black.yml @@ -0,0 +1,20 @@ +name: Black + +on: + push: + paths: + - '**.py' + pull_request: + paths: + - '**.py' + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + - uses: psf/black@stable + with: + options: "--check --verbose" + src: "./src" diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 0000000..00058bf --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,99 @@ +# For most projects, this workflow file will not need changing; you simply need +# to commit it to your repository. +# +# You may wish to alter this file to override the set of languages analyzed, +# or to provide custom queries or build logic. +# +# ******** NOTE ******** +# We have attempted to detect the languages in your repository. Please check +# the `language` matrix defined below to confirm you have the correct set of +# supported CodeQL languages. +# +name: "CodeQL" + +on: + push: + branches: [ "main" ] + paths-ignore: + - '**/*.md' + - '**/*.txt' + pull_request: + branches: [ "main" ] + paths-ignore: + - '**/*.md' + - '**/*.txt' + schedule: + - cron: '21 20 * * 6' + +jobs: + analyze: + name: Analyze (${{ matrix.language }}) + # Runner size impacts CodeQL analysis time. To learn more, please see: + # - https://gh.io/recommended-hardware-resources-for-running-codeql + # - https://gh.io/supported-runners-and-hardware-resources + # - https://gh.io/using-larger-runners (GitHub.com only) + # Consider using larger runners or machines with greater resources for possible analysis time improvements. + runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }} + timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }} + permissions: + # required for all workflows + security-events: write + + # required to fetch internal or private CodeQL packs + packages: read + + # only required for workflows in private repositories + actions: read + contents: read + + strategy: + fail-fast: false + matrix: + include: + - language: python + build-mode: none + # CodeQL supports the following values keywords for 'language': 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift' + # Use `c-cpp` to analyze code written in C, C++ or both + # Use 'java-kotlin' to analyze code written in Java, Kotlin or both + # Use 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both + # To learn more about changing the languages that are analyzed or customizing the build mode for your analysis, + # see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning. + # If you are analyzing a compiled language, you can modify the 'build-mode' for that language to customize how + # your codebase is analyzed, see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/codeql-code-scanning-for-compiled-languages + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@v3 + with: + languages: ${{ matrix.language }} + build-mode: ${{ matrix.build-mode }} + # If you wish to specify custom queries, you can do so here or in a config file. + # By default, queries listed here will override any specified in a config file. + # Prefix the list here with "+" to use these queries and those in the config file. + + # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs + # queries: security-extended,security-and-quality + + # If the analysis step fails for one of the languages you are analyzing with + # "We were unable to automatically build your code", modify the matrix above + # to set the build mode to "manual" for that language. Then modify this step + # to build your code. + # â„šī¸ Command-line programs to run using the OS shell. + # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun + - if: matrix.build-mode == 'manual' + shell: bash + run: | + echo 'If you are using a "manual" build mode for one or more of the' \ + 'languages you are analyzing, replace this with the commands to build' \ + 'your code, for example:' + echo ' make bootstrap' + echo ' make release' + exit 1 + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v3 + with: + category: "/language:${{matrix.language}}" diff --git a/.github/workflows/pip-audit.yml b/.github/workflows/pip-audit.yml new file mode 100644 index 0000000..9bc0cfe --- /dev/null +++ b/.github/workflows/pip-audit.yml @@ -0,0 +1,59 @@ +name: Dependency Audit + +on: + push: + paths: + - '**/requirements.txt' + pull_request: + paths: + - '**/requirements.txt' + schedule: + - cron: '0 0 * * *' # Run daily at midnight UTC + +jobs: + audit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pip-audit + + - name: Run pip-audit + run: | + pip-audit -r requirements.txt > audit_output.txt + continue-on-error: true + + - name: Display audit results + run: cat audit_output.txt + + - name: Create detailed report + run: | + echo "Pip Audit Report" > detailed_report.txt + echo "==================" >> detailed_report.txt + echo "" >> detailed_report.txt + echo "Date: $(date)" >> detailed_report.txt + echo "" >> detailed_report.txt + echo "Audit Results:" >> detailed_report.txt + cat audit_output.txt >> detailed_report.txt + echo "" >> detailed_report.txt + echo "Environment:" >> detailed_report.txt + python --version >> detailed_report.txt + pip --version >> detailed_report.txt + echo "" >> detailed_report.txt + echo "Requirements:" >> detailed_report.txt + cat requirements.txt >> detailed_report.txt + + - name: Upload audit results + uses: actions/upload-artifact@v3 + with: + name: pip-audit-report + path: detailed_report.txt + diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml new file mode 100644 index 0000000..fb22433 --- /dev/null +++ b/.github/workflows/pylint.yml @@ -0,0 +1,28 @@ +name: Pylint +on: + push: + paths: + - '**.py' + pull_request: + paths: + - '**.py' +jobs: + build: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.9", "3.10"] + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install $(grep -v "^torch" requirements.txt | tr '\n' ' ') + pip install pylint + - name: Analysing the code with pylint + run: | + pylint $(git ls-files '*.py') --disable=all --enable=E0001,E0100,E0101,E0102,E0103,E0104,E0105,E0107,E0108,E0110,E0111,E0112,E0113,E0114,E0115,E0116,E0117,E0118,E0202,E0203,E0211,E0213,E0236,E0237,E0238,E0239,E0240,E0241,E0301,E0302,E0303,E0401,E0402,E0701,E0702,E0703,E0704,E0710,E0711,E0712,E1003,E1101,E1102,E1111,E1120,E1121,E1123,E1124,E1125,E1126,E1127,E1128,E1129,E1130,E1131,E1132,E1133,E1134,E1135,E1136,E1137,E1138,E1139,E1200,E1201,E1205,E1206,E1300,E1301,E1302,E1303,E1304,E1305,E1306,E1310,E1700,E1701,W0311,W0312,W0611,W0612,W0613,W0702,W1401,W1402,C0123,C0200,C0325,C0411,C0412 --fail-under=5 diff --git a/.github/workflows/radon.yml b/.github/workflows/radon.yml new file mode 100644 index 0000000..bb82b80 --- /dev/null +++ b/.github/workflows/radon.yml @@ -0,0 +1,72 @@ +name: Radon Code Metrics + +on: + workflow_dispatch: + push: + paths: + - '**.py' + pull_request: + paths: + - '**.py' + +jobs: + radon: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + + - name: Install radon + run: pip install radon + + - name: Run radon + run: | + if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then + CHANGED_FILES=$(git ls-files '*.py') + else + CHANGED_FILES=$(git diff --name-only ${{ github.event.before }} ${{ github.sha }} | grep '\.py$' || echo "") + fi + + echo "Files to be analyzed:" + echo "$CHANGED_FILES" + + if [ -n "$CHANGED_FILES" ]; then + echo "Running Cyclomatic Complexity check..." + radon cc $CHANGED_FILES -a -s -n F --exclude "AutoGGUF.quantize_model" + + echo "Running Maintainability Index check..." + radon mi $CHANGED_FILES -s -n F + else + echo "No Python files to analyze." + fi + continue-on-error: true + + - name: Check radon output + run: | + if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then + CHANGED_FILES=$(git ls-files '*.py') + else + CHANGED_FILES=$(git diff --name-only ${{ github.event.before }} ${{ github.sha }} | grep '\.py$' || echo "") + fi + + if [ -n "$CHANGED_FILES" ]; then + CC_OUTPUT=$(radon cc $CHANGED_FILES -a -s -n F --exclude "AutoGGUF.quantize_model") + MI_OUTPUT=$(radon mi $CHANGED_FILES -s -n F) + + if [ -n "$CC_OUTPUT" ] || [ -n "$MI_OUTPUT" ]; then + echo "Radon detected code complexity or maintainability issues:" + [ -n "$CC_OUTPUT" ] && echo "$CC_OUTPUT" + [ -n "$MI_OUTPUT" ] && echo "$MI_OUTPUT" + exit 1 + else + echo "No code complexity or maintainability issues detected." + fi + else + echo "No Python files to analyze." + fi diff --git a/.gitignore b/.gitignore index 82f9275..90772fa 100644 --- a/.gitignore +++ b/.gitignore @@ -129,6 +129,7 @@ venv/ ENV/ env.bak/ venv.bak/ +.idea/ # Spyder project settings .spyderproject diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..f1e6944 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,10 @@ +repos: +- repo: https://github.com/psf/black + rev: 22.10.0 + hooks: + - id: black + language_version: python3 +- repo: https://github.com/Lucas-C/pre-commit-hooks + rev: v1.1.9 + hooks: + - id: remove-crlf diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..1021240 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1 @@ +# Changelog diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..1ecf241 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,127 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, religion, or sexual identity +and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +* Focusing on what is best not just for us as individuals, but for the + overall community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or + advances of any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email + address, without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official e-mail address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement in the Discussions tab. +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series +of actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or +permanent ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within +the community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.0, available at +https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. + +Community Impact Guidelines were inspired by [Mozilla's code of conduct +enforcement ladder](https://github.com/mozilla/diversity). + +[homepage]: https://www.contributor-covenant.org + +For answers to common questions about this code of conduct, see the FAQ at +https://www.contributor-covenant.org/faq. Translations are available at +https://www.contributor-covenant.org/translations. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..5a31230 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,62 @@ +# Contributing to vAnalytics + +First off, thanks for taking the time to contribute! 🎉👍 + +## How Can I Contribute? + +### Reporting Bugs + +- Use the issue tracker to report bugs +- Describe the bug in detail +- Include screenshots if possible + +### Suggesting Enhancements + +- Use the issue tracker to suggest enhancements +- Explain why this enhancement would be useful + +### Your First Code Contribution + +You can find issues labeled with "good first issue" in the Issues tab as a starting point. Code refactors and optimizations are also appreciated, although if there's a vulnrability please report it privately in the Security tab. For feature PRs, please make a discussion first to make sure your feature can be added and continously maintained. + +1. Fork the repo +2. Create your feature branch (`git checkout -b feature/AmazingFeature`) +3. Install pre-commit: (`pip install pre-commit`) +4. Set up the git hook scripts: (`pre-commit install`) +5. Commit your changes (`git commit -m 'Add some AmazingFeature'`) +6. Push to the branch (`git push origin feature/AmazingFeature`) +7. Open a Pull Request + +## Styleguides + +### Git Commit Messages + +- Use the present tense ("Add feature" not "Added feature") +- Use the imperative mood ("Move cursor to..." not "Moves cursor to...") +- Limit the first line to 72 characters or fewer + +### Commit Types: + +``` +feat: Added new feature +fix: Fixed a bug +docs: Updated documentation +style: Code style changes (formatting, etc.) +refactor: Code refactoring +perf: Performance improvements +test: Added or modified tests +build: Changes to build system or external dependencies +ci: Changes to CI configuration files and scripts +chore: Other changes that don't modify src or test files +``` + +### Python Styleguide + +- Follow PEP 8 +- Please use Black to format your code first +- Use meaningful variable names +- Comment your code, but don't overdo it + +## Questions? + +Feel free to contact the project maintainers if you have any questions. diff --git a/LICENSE b/LICENSE index 261eeb9..0b5e765 100644 --- a/LICENSE +++ b/LICENSE @@ -186,7 +186,7 @@ same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright [yyyy] [name of copyright owner] + Copyright 2024 leafspark Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/README.md b/README.md index 050f0b3..adbecdd 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,37 @@ -# vAnalytics -time series analytics for vLLM +# vAnalytics - time series analytics for vLLM + + +[![GitHub release](https://img.shields.io/github/release/leafspark/vAnalytics.svg)](https://github.com/leafspark/vAnalytics/releases) +[![GitHub last commit](https://img.shields.io/github/last-commit/leafspark/vAnalytics.svg)](https://github.com/leafspark/vAnalytics/commits) +[![CI/CD Status](https://img.shields.io/badge/CI%2FCD-passing-brightgreen)]() + + +[![Powered by llama.cpp](https://img.shields.io/badge/Powered%20by-llama.cpp-green.svg)](https://github.com/ggerganov/llama.cpp) +![GitHub top language](https://img.shields.io/github/languages/top/leafspark/vAnalytics.svg) +[![Platform Compatibility](https://img.shields.io/badge/platform-Linux%20%7C%20macOS%20%7C%20Windows-blue)]() +[![GitHub license](https://img.shields.io/github/license/leafspark/vAnalytics.svg)](https://github.com/leafspark/vAnalytics/blob/main/LICENSE) + + +![GitHub stars](https://img.shields.io/github/stars/leafspark/vAnalytics.svg) +![GitHub forks](https://img.shields.io/github/forks/leafspark/vAnalytics.svg) +![GitHub release (latest by date)](https://img.shields.io/github/downloads/leafspark/vAnalytics/latest/total?color=green) +![GitHub repo size](https://img.shields.io/github/repo-size/leafspark/vAnalytics.svg) +![Lines of Code](https://tokei.rs/b1/github/leafspark/vAnalytics?category=code) + + +[![Code Style: Black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) +[![Issues](https://img.shields.io/github/issues/leafspark/vAnalytics)](https://github.com/leafspark/vAnalytics/issues) +[![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg)](https://github.com/leafspark/vAnalytics/pulls) + +vAnalytics provides a web interface to help easily monitor vLLM instance metrics. It allows users to easily monitor multiple vLLM instances, as well as being easy to setup and configure. + +## Features +- Specify vLLM backends easily using name and host configuration +- Uses SQLite for easy database management +- Intuitive and includes error handling +- Flexible schemas and data plotting using Plotly + +## Usage +Configure your instances in monitor.py, then use `python src/monitor.py`. This will start monitoring in a `/data` folder, where it will store SQLite databases with your model name. + +To start the web interface, execute `python src/graph.py`. The web interface is avaliable at `localhost:4412`. diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..1d9678e --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,13 @@ +# Security Policy + +## Supported Versions + +| Version | Supported | +|-----------------|--------------------| +| stable (v1.0.0) | :white_check_mark: | + +Beta versions are not supported, and may have unknown security issues. + +## Reporting a Vulnerability + +Use the Issues tab, or for severe vulnerabilities please contact the maintainers via email. diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..54197ba --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +numpy~=1.26.4 +uvicorn~=0.30.6 +requests~=2.32.3 +pandas~=2.2.3 +plotly~=5.24.1 +flask~=3.0.3 +zstd~=1.5.5.1 \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..c285bf4 --- /dev/null +++ b/setup.py @@ -0,0 +1,12 @@ +from setuptools import setup + +setup( + name='vAnalytics', + version='v1.0.0', + packages=[''], + url='https://github.com/leafspark/vAnalytics', + license='apache-2.0', + author='leafspark', + author_email='', + description='time series analytics for vLLM' +) diff --git a/src/get_data.py b/src/get_data.py new file mode 100644 index 0000000..85fee03 --- /dev/null +++ b/src/get_data.py @@ -0,0 +1,138 @@ +import os +import json +import argparse +import sqlite3 +from datetime import datetime, timedelta + + +def get_latest_rows(db_file, hours=1): + now = datetime.now() + one_hour_ago = now - timedelta(hours=hours) + one_hour_ago_timestamp = int(one_hour_ago.timestamp()) + + conn = sqlite3.connect(db_file) + cursor = conn.cursor() + + cursor.execute( + f"SELECT timestamp, data FROM json_data WHERE timestamp >= {one_hour_ago_timestamp} ORDER BY timestamp DESC" + ) + rows = cursor.fetchall() + + conn.close() + + if not rows: + print( + f"No rows found in the last {hours} hour(s). Showing info for last 5 rows:" + ) + conn = sqlite3.connect(db_file) + cursor = conn.cursor() + cursor.execute( + f"SELECT timestamp, data FROM json_data ORDER BY timestamp DESC LIMIT 5" + ) + rows = cursor.fetchall() + conn.close() + for timestamp, _ in rows: + print(f" {datetime.fromtimestamp(timestamp)}") + + return rows + + +def extract_stats(data_json): + try: + data = json.loads(data_json) + total_prompt_tokens = float(data["vllm:prompt_tokens_total"][0]["value"]) + total_generation_tokens = float( + data["vllm:generation_tokens_total"][0]["value"] + ) + total_requests = sum( + float(item["value"]) for item in data["vllm:request_success_total"] + ) + avg_prompt_throughput = float( + data["vllm:avg_prompt_throughput_toks_per_s"][0]["value"] + ) + avg_generation_throughput = float( + data["vllm:avg_generation_throughput_toks_per_s"][0]["value"] + ) + gpu_cache_usage_perc = float(data["vllm:gpu_cache_usage_perc"][0]["value"]) + num_requests_running = float(data["vllm:num_requests_running"][0]["value"]) + + except (KeyError, IndexError, json.JSONDecodeError) as e: + print(f"Error extracting stats from data: {str(e)}") + return None + + return { + "total_prompt_tokens": total_prompt_tokens, + "total_generation_tokens": total_generation_tokens, + "total_requests": total_requests, + "avg_prompt_throughput": avg_prompt_throughput, + "avg_generation_throughput": avg_generation_throughput, + "gpu_cache_usage_perc": gpu_cache_usage_perc, + "num_requests_running": num_requests_running, + } + + +def main(db_file, hours): + latest_rows = get_latest_rows(db_file, hours) + + if not latest_rows: + print(f"No rows found for the last {hours} hour(s).") + return + + print(f"Processing {len(latest_rows)} rows.") + + valid_stats = [ + extract_stats(data) + for _, data in latest_rows + if extract_stats(data) is not None + ] + + if not valid_stats: + print("No valid statistics could be extracted from the rows.") + return + + first_stats = valid_stats[-1] # Oldest row + last_stats = valid_stats[0] # Newest row + + tokens_processed = ( + last_stats["total_prompt_tokens"] + - first_stats["total_prompt_tokens"] + + last_stats["total_generation_tokens"] + - first_stats["total_generation_tokens"] + ) + requests_processed = last_stats["total_requests"] - first_stats["total_requests"] + + avg_prompt_throughput = sum( + stat["avg_prompt_throughput"] for stat in valid_stats + ) / len(valid_stats) + avg_generation_throughput = sum( + stat["avg_generation_throughput"] for stat in valid_stats + ) / len(valid_stats) + avg_num_requests_running = sum( + stat["num_requests_running"] for stat in valid_stats + ) / len(valid_stats) + avg_gpu_cache_usage_perc = sum( + stat["gpu_cache_usage_perc"] for stat in valid_stats + ) / len(valid_stats) + + print(f"\nStats for the last {hours} hour(s):") + print(f"Tokens processed: {tokens_processed:,.0f}") + print(f"Requests processed: {requests_processed:,.0f}") + print(f"Average prompt throughput: {avg_prompt_throughput:.2f} tokens/s") + print(f"Average generation throughput: {avg_generation_throughput:.2f} tokens/s") + print( + f"Average number of requests running: {avg_num_requests_running:.2f} requests" + ) + print(f"Average GPU cache usage percent: {avg_gpu_cache_usage_perc * 100:.2f}%") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Extract stats from a SQLite database for a specified time period" + ) + parser.add_argument("db_file", help="Path to the SQLite database file") + parser.add_argument( + "--hours", type=int, default=1, help="Number of hours to look back (default: 1)" + ) + args = parser.parse_args() + + main(args.db_file, args.hours) diff --git a/src/graph.py b/src/graph.py new file mode 100644 index 0000000..b8bedb8 --- /dev/null +++ b/src/graph.py @@ -0,0 +1,323 @@ +import asyncio +import json +import logging +import numpy as np +import os +import pandas as pd +import plotly.graph_objects as go +import plotly.offline as pyo +import sqlite3 +import subprocess +import threading +import time +from concurrent.futures import ThreadPoolExecutor, as_completed +from datetime import datetime, timedelta +from flask import Flask, render_template, request, send_file +from functools import lru_cache +from plotly.subplots import make_subplots +from plotly.subplots import make_subplots +from scipy.interpolate import make_interp_spline + +# Set up logging with a higher level +logging.basicConfig( + level=logging.WARNING, # Changed from DEBUG to WARNING + format="%(asctime)s - %(levelname)s - %(message)s", +) + +# Global variable to store the cached data +cached_data = {} +last_modified_times = {} + + +async def load_data_from_db(filepath): + global cached_data + + try: + conn = sqlite3.connect(filepath) + cursor = conn.cursor() + + twenty_four_hours_ago = datetime.now() - timedelta(hours=24) + timestamp_24h_ago = int(twenty_four_hours_ago.timestamp()) + + cursor.execute( + "SELECT data, timestamp FROM json_data WHERE timestamp >= ?", + (timestamp_24h_ago,), + ) + rows = cursor.fetchall() + + model_name = os.path.splitext(os.path.basename(filepath))[0] + + # Optimize data structure creation + new_data = {} + for row in rows: + data = json.loads(row[0]) + timestamp = datetime.fromtimestamp(row[1]) + + for metric_name, metric_data in data.items(): + if metric_name not in new_data: + new_data[metric_name] = {} + if model_name not in new_data[metric_name]: + new_data[metric_name][model_name] = [] + new_data[metric_name][model_name].append((timestamp, metric_data)) + + # Update cached_data efficiently + for metric_name, model_data in new_data.items(): + if metric_name not in cached_data: + cached_data[metric_name] = model_data + else: + cached_data[metric_name].update(model_data) + + except sqlite3.Error as e: + logging.error(f"SQLite error in {filepath}: {e}") + except json.JSONDecodeError as e: + logging.error(f"JSON decode error in {filepath}: {e}") + except Exception as e: + logging.error(f"Error processing file {filepath}: {e}") + finally: + if conn: + conn.close() + + +async def load_data(): + data_dir = "./data" + + tasks = [] + for filename in os.listdir(data_dir): + if filename.endswith(".sqlite"): + filepath = os.path.join(data_dir, filename) + tasks.append(load_data_from_db(filepath)) + + await asyncio.gather(*tasks) + logging.info(f"Loaded data for {len(cached_data)} metrics") + if len(cached_data) == 0: + logging.warning( + "No data was loaded. Check if SQLite files exist and contain recent data." + ) + + +async def background_data_loader(): + while True: + await load_data() + await asyncio.sleep(30) # Check for updates every 30 seconds + + +def start_background_loop(loop): + asyncio.set_event_loop(loop) + loop.run_forever() + + +# Start the background data loader +threading.Thread(target=background_data_loader, daemon=True).start() + + +def create_trace(model_name, metric_name, data_points, row, col): + return ( + go.Scattergl( + x=[point[0] for point in data_points], + y=[point[1] for point in data_points], + mode="lines", + name=f"{model_name} - {metric_name}", + ), + row, + col, + ) + + +def create_plots(selected_model): + global cached_data + start_time = time.time() + + all_data = {} + selected_models = selected_model.split(",") + for metric, data in cached_data.items(): + all_data[metric] = { + model: data[model] for model in selected_models if model in data + } + + data_prep_time = time.time() - start_time + print(f"Data preparation took {data_prep_time:.2f} seconds") + + num_metrics = len(all_data) + if num_metrics == 0: + logging.warning("No valid data found.") + return None + + num_cols = 2 + num_rows = (num_metrics + num_cols - 1) // num_cols + fig = make_subplots( + rows=num_rows, cols=num_cols, subplot_titles=list(all_data.keys()) + ) + + subplot_creation_time = time.time() - start_time - data_prep_time + print(f"Subplot creation took {subplot_creation_time:.2f} seconds") + + now = datetime.now() + twenty_four_hours_ago = now - timedelta(hours=24) + + trace_creation_start = time.time() + + with ThreadPoolExecutor() as executor: + futures = [] + for index, (metric_name, model_data) in enumerate(all_data.items()): + row = index // num_cols + 1 + col = index % num_cols + 1 + + for model_name, metric_data_list in model_data.items(): + if isinstance(metric_data_list[0][1], list): + for label_set in metric_data_list[0][1]: + data_points = [] + for timestamp, metric_data in metric_data_list: + if timestamp >= twenty_four_hours_ago: + for data_point in metric_data: + if data_point["labels"] == label_set["labels"]: + try: + value = float(data_point["value"]) + data_points.append((timestamp, value)) + except ValueError: + logging.warning( + f"Invalid numeric value for {model_name} - {metric_name}: {data_point['value']}" + ) + if not data_points: + continue + data_points.sort(key=lambda x: x[0]) + futures.append( + executor.submit( + create_trace, + model_name, + str(label_set["labels"]), + data_points, + row, + col, + ) + ) + else: + data_points = [] + for timestamp, metric_data in metric_data_list: + if timestamp >= twenty_four_hours_ago: + try: + value = float(metric_data) + data_points.append((timestamp, value)) + except ValueError: + logging.warning( + f"Invalid numeric value for {model_name} - {metric_name}: {metric_data}" + ) + if not data_points: + continue + data_points.sort(key=lambda x: x[0]) + futures.append( + executor.submit( + create_trace, model_name, metric_name, data_points, row, col + ) + ) + + for future in as_completed(futures): + trace, row, col = future.result() + fig.add_trace(trace, row=row, col=col) + + trace_creation_time = time.time() - trace_creation_start + print(f"Trace creation took {trace_creation_time:.2f} seconds") + + layout_update_start = time.time() + fig.update_layout( + height=300 * num_rows, + showlegend=True, + template="plotly_dark", + font=dict(family="Arial", size=10, color="white"), + paper_bgcolor="rgb(30, 30, 30)", + plot_bgcolor="rgb(30, 30, 30)", + ) + fig.update_xaxes(title_text="Time", tickformat="%Y-%m-%d %H:%M:%S") + fig.update_yaxes(title_text="Value") + fig.update_traces(hovertemplate="%{x|%Y-%m-%d %H:%M:%S}
%{y:.3f}") + + layout_update_time = time.time() - layout_update_start + print(f"Layout update took {layout_update_time:.2f} seconds") + + total_time = time.time() - start_time + print(f"Total plot creation took {total_time:.2f} seconds") + + return fig + + +app = Flask(__name__) + + +@app.route("/", methods=["GET", "POST"]) +def index(): + data_dir = "./data" + model_names = [ + name[:-7] + for name in os.listdir(data_dir) + if name.endswith(".sqlite") and os.path.isfile(os.path.join(data_dir, name)) + ] + + if request.method == "POST": + selected_model = request.form.get("model_select") + else: + selected_model = model_names[0] if model_names else None + + plot_div = None + error_message = None + if selected_model: + try: + fig = create_plots(selected_model) + if fig is not None: + fig.update_layout(showlegend=False) + plot_div = pyo.plot(fig, output_type="div", include_plotlyjs=True) + else: + error_message = "No data available for the selected model." + except Exception as e: + logging.error(f"Error creating plot: {str(e)}") + error_message = ( + "An error occurred while creating the plot. Please try again later." + ) + + command = [ + "python", + "get_data.py", + "--hours", + "24", + f".\\data\\{selected_model}.sqlite", + ] + + result = subprocess.run(command, capture_output=True, text=True) + else: + result = None + + return render_template( + "index.html", + plot_div=plot_div, + model_name=selected_model, + model_names=model_names, + result=result.stdout if result else None, + error_message=error_message, + ) + + +@app.route("/favicon.ico") +def favicon(): + return send_file("favicon.ico", mimetype="image/vnd.microsoft.icon") + + +if __name__ == "__main__": + import uvicorn + from asgiref.wsgi import WsgiToAsgi + + # Initial data load + logging.info("Starting initial data load") + asyncio.run(load_data()) + logging.info("Initial data load complete") + + # Create a new event loop for the background task + loop = asyncio.new_event_loop() + + def start_background_loop(): + asyncio.set_event_loop(loop) + loop.create_task(background_data_loader()) + loop.run_forever() + + t = threading.Thread(target=start_background_loop, daemon=True) + t.start() + + asgi_app = WsgiToAsgi(app) + uvicorn.run(asgi_app, host="0.0.0.0", port=4421) diff --git a/src/monitor.py b/src/monitor.py new file mode 100644 index 0000000..196b3f7 --- /dev/null +++ b/src/monitor.py @@ -0,0 +1,140 @@ +import requests +import time +import os +import json +from datetime import datetime +import logging +import zstd +import sqlite3 + +print("Starting monitor.") + +# Set up basic configuration for logging +logging.basicConfig( + level=logging.DEBUG, + format="%(asctime)s - %(levelname)s - %(message)s", + filename="monitor.log", # Log to a file named monitor.log + filemode="a", +) # Append to the log file +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" +) + +# Model information +models = { + "Example-Model-22B-FP8-dynamic": "http://112.83.15.44:8883", + "Mistral-7B-bf16": "http://57.214.142.199:8090", +} + + +def call_metrics_endpoint(model_name, base_url): + url = f"{base_url}/metrics" + logging.debug(f"Calling metrics endpoint: {url}") + try: + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36" + } + response = requests.get(url, headers=headers) + response.raise_for_status() + logging.debug(f"Received successful response from {url}") + return response.text + except requests.exceptions.RequestException as e: + logging.error(f"Error calling {url}: {e}") + return f"Error calling {url}: {e}" + + +def normalize_metrics(metrics_data): + """Normalizes the metrics data from vLLM.""" + normalized_data = {} + lines = metrics_data.strip().split("\n") + for line in lines: + if line.startswith("#"): # Ignore comment lines + continue + parts = line.split(" ") + metric_name = parts[0] + metric_value_str = parts[1] + + # Try to convert to decimal, otherwise keep as string + try: + metric_value = float(metric_value_str) + if metric_name.endswith("_total") or metric_name.endswith("_count"): + metric_value = int(metric_value) + elif "e+" in metric_value_str or "e-" in metric_value_str: + metric_value = "{:.10f}".format(metric_value) + except ValueError: + metric_value = metric_value_str + + # Extract labels from metric name + if "{" in metric_name: + metric_name, labels_str = metric_name[:-1].split("{") + labels = {} + for label_pair in labels_str.split(","): + key, value = label_pair.split("=") + labels[key.strip('"')] = value.strip('"') + if metric_name not in normalized_data: + normalized_data[metric_name] = [] + normalized_data[metric_name].append( + {"labels": labels, "value": metric_value} + ) + else: + normalized_data[metric_name] = metric_value + + return normalized_data + + +def log_response(model_name, response_data): + timestamp = int(datetime.now().timestamp()) + normalized_data = normalize_metrics(response_data) + + db_filename = f"./data/{model_name}.sqlite" + os.makedirs(os.path.dirname(db_filename), exist_ok=True) + + max_retries = 3 + for retry in range(max_retries): + try: + conn = sqlite3.connect(db_filename) + cursor = conn.cursor() + + # Create table if it doesn't exist + cursor.execute( + """ + CREATE TABLE IF NOT EXISTS json_data + (id INTEGER PRIMARY KEY AUTOINCREMENT, + data TEXT NOT NULL, + timestamp INTEGER NOT NULL) + """ + ) + + # Insert the data + cursor.execute( + "INSERT INTO json_data (data, timestamp) VALUES (?, ?)", + (json.dumps(normalized_data), timestamp), + ) + + conn.commit() + conn.close() + + logging.debug(f"Saved metrics data to {db_filename}") + break # Exit the retry loop if successful + except sqlite3.OperationalError as e: + if "database is locked" in str(e): + logging.warning( + f"Database locked for {model_name}, retrying in 5 seconds... (Attempt {retry+1}/{max_retries})" + ) + time.sleep(5) # Wait before retrying + else: + logging.error(f"Error writing to database for {model_name}: {e}") + break # Exit the retry loop for other errors + + +while True: + for model_name, base_url in models.items(): + response_data = call_metrics_endpoint(model_name, base_url) + if response_data and not response_data.startswith( + "Error" + ): # Check for valid data + logging.info(f"Metrics for {model_name} valid") # Log metrics to console + log_response(model_name, response_data) + + logging.debug("Waiting for 30 seconds...") + time.sleep(30)