docs: add README.md and initial commit

- add code - add GitHub workflows - add GitHub repo files
2024-09-21 16:30:15 -07:00 · 2024-09-21 16:30:15 -07:00 · dff316c725
parent ffc14cb984
commit dff316c725
20 changed files with 1195 additions and 3 deletions
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@ -0,0 +1,37 @@
 ---
 name: Bug report
 about: Create a report to help us improve vAnalytics
 title: '[BUG] '
 labels: bug
 assignees: ''
 ---
 **Describe the bug**
 A clear and concise description of what the bug is.
 **To Reproduce**
 Steps to reproduce the behavior:
 1. Go to '...'
 2. Click on '....'
 3. Scroll down to '....'
 4. See error
 **Expected behavior**
 A clear and concise description of what you expected to happen.
 **Screenshots**
 If applicable, add screenshots to help explain your problem.
 **Environment (please complete the following information):**
 - OS: [e.g. Windows, macOS, Linux]
 - vAnalytics Version: [e.g. v1.4.2]
 - Python Version (if running from source): [e.g. 3.9]
 - vLLM version(s)
 **Additional context**
 Add any other context about the problem here. Include any relevant log outputs or error messages.
 **Checklist:**
 - [ ] I have checked the existing issues to make sure this is not a duplicate
 - [ ] I have included all relevant information to reproduce the issue
 - [ ] I am running the latest version of vAnalytics
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@ -0,0 +1,8 @@
 version: 2
 updates:
  - package-ecosystem: "pip"
    directory: "/"
    schedule:
      interval: "weekly"
      day: "sunday"
    open-pull-requests-limit: 10
--- a/.github/workflows/black.yml
+++ b/.github/workflows/black.yml
@ -0,0 +1,20 @@
 name: Black
 on:
  push:
    paths:
      - '**.py'
  pull_request:
    paths:
      - '**.py'
 jobs:
  lint:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-python@v5
      - uses: psf/black@stable
        with:
          options: "--check --verbose"
          src: "./src"
--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@ -0,0 +1,99 @@
 # For most projects, this workflow file will not need changing; you simply need
 # to commit it to your repository.
 #
 # You may wish to alter this file to override the set of languages analyzed,
 # or to provide custom queries or build logic.
 #
 # ******** NOTE ********
 # We have attempted to detect the languages in your repository. Please check
 # the `language` matrix defined below to confirm you have the correct set of
 # supported CodeQL languages.
 #
 name: "CodeQL"
 on:
  push:
    branches: [ "main" ]
    paths-ignore:
      - '**/*.md'
      - '**/*.txt'
  pull_request:
    branches: [ "main" ]
    paths-ignore:
      - '**/*.md'
      - '**/*.txt'
  schedule:
    - cron: '21 20 * * 6'
 jobs:
  analyze:
    name: Analyze (${{ matrix.language }})
    # Runner size impacts CodeQL analysis time. To learn more, please see:
    #   - https://gh.io/recommended-hardware-resources-for-running-codeql
    #   - https://gh.io/supported-runners-and-hardware-resources
    #   - https://gh.io/using-larger-runners (GitHub.com only)
    # Consider using larger runners or machines with greater resources for possible analysis time improvements.
    runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }}
    timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }}
    permissions:
      # required for all workflows
      security-events: write
      # required to fetch internal or private CodeQL packs
      packages: read
      # only required for workflows in private repositories
      actions: read
      contents: read
    strategy:
      fail-fast: false
      matrix:
        include:
        - language: python
          build-mode: none
        # CodeQL supports the following values keywords for 'language': 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift'
        # Use `c-cpp` to analyze code written in C, C++ or both
        # Use 'java-kotlin' to analyze code written in Java, Kotlin or both
        # Use 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both
        # To learn more about changing the languages that are analyzed or customizing the build mode for your analysis,
        # see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning.
        # If you are analyzing a compiled language, you can modify the 'build-mode' for that language to customize how
        # your codebase is analyzed, see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/codeql-code-scanning-for-compiled-languages
    steps:
    - name: Checkout repository
      uses: actions/checkout@v4
    # Initializes the CodeQL tools for scanning.
    - name: Initialize CodeQL
      uses: github/codeql-action/init@v3
      with:
        languages: ${{ matrix.language }}
        build-mode: ${{ matrix.build-mode }}
        # If you wish to specify custom queries, you can do so here or in a config file.
        # By default, queries listed here will override any specified in a config file.
        # Prefix the list here with "+" to use these queries and those in the config file.
        # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
        # queries: security-extended,security-and-quality
    # If the analysis step fails for one of the languages you are analyzing with
    # "We were unable to automatically build your code", modify the matrix above
    # to set the build mode to "manual" for that language. Then modify this step
    # to build your code.
    # ℹ️ Command-line programs to run using the OS shell.
    # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
    - if: matrix.build-mode == 'manual'
      shell: bash
      run: |
        echo 'If you are using a "manual" build mode for one or more of the' \
          'languages you are analyzing, replace this with the commands to build' \
          'your code, for example:'
        echo '  make bootstrap'
        echo '  make release'
        exit 1
    - name: Perform CodeQL Analysis
      uses: github/codeql-action/analyze@v3
      with:
        category: "/language:${{matrix.language}}"
--- a/.github/workflows/pip-audit.yml
+++ b/.github/workflows/pip-audit.yml
@ -0,0 +1,59 @@
 name: Dependency Audit
 on:
  push:
    paths:
      - '**/requirements.txt'
  pull_request:
    paths:
      - '**/requirements.txt'
  schedule:
    - cron: '0 0 * * *'  # Run daily at midnight UTC
 jobs:
  audit:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v4
    - name: Set up Python
      uses: actions/setup-python@v5
      with:
        python-version: '3.x'
    - name: Install dependencies
      run: |
        python -m pip install --upgrade pip
        pip install pip-audit
    - name: Run pip-audit
      run: |
        pip-audit -r requirements.txt > audit_output.txt
      continue-on-error: true
    - name: Display audit results
      run: cat audit_output.txt
    - name: Create detailed report
      run: |
        echo "Pip Audit Report" > detailed_report.txt
        echo "==================" >> detailed_report.txt
        echo "" >> detailed_report.txt
        echo "Date: $(date)" >> detailed_report.txt
        echo "" >> detailed_report.txt
        echo "Audit Results:" >> detailed_report.txt
        cat audit_output.txt >> detailed_report.txt
        echo "" >> detailed_report.txt
        echo "Environment:" >> detailed_report.txt
        python --version >> detailed_report.txt
        pip --version >> detailed_report.txt
        echo "" >> detailed_report.txt
        echo "Requirements:" >> detailed_report.txt
        cat requirements.txt >> detailed_report.txt
    - name: Upload audit results
      uses: actions/upload-artifact@v3
      with:
        name: pip-audit-report
        path: detailed_report.txt
--- a/.github/workflows/pylint.yml
+++ b/.github/workflows/pylint.yml
@ -0,0 +1,28 @@
 name: Pylint
 on:
  push:
    paths:
      - '**.py'
  pull_request:
    paths:
      - '**.py'
 jobs:
  build:
    runs-on: ubuntu-latest
    strategy:
      matrix:
        python-version: ["3.9", "3.10"]
    steps:
    - uses: actions/checkout@v4
    - name: Set up Python ${{ matrix.python-version }}
      uses: actions/setup-python@v5
      with:
        python-version: ${{ matrix.python-version }}
    - name: Install dependencies
      run: |
        python -m pip install --upgrade pip
        pip install $(grep -v "^torch" requirements.txt | tr '\n' ' ')
        pip install pylint
    - name: Analysing the code with pylint
      run: |
        pylint $(git ls-files '*.py') --disable=all --enable=E0001,E0100,E0101,E0102,E0103,E0104,E0105,E0107,E0108,E0110,E0111,E0112,E0113,E0114,E0115,E0116,E0117,E0118,E0202,E0203,E0211,E0213,E0236,E0237,E0238,E0239,E0240,E0241,E0301,E0302,E0303,E0401,E0402,E0701,E0702,E0703,E0704,E0710,E0711,E0712,E1003,E1101,E1102,E1111,E1120,E1121,E1123,E1124,E1125,E1126,E1127,E1128,E1129,E1130,E1131,E1132,E1133,E1134,E1135,E1136,E1137,E1138,E1139,E1200,E1201,E1205,E1206,E1300,E1301,E1302,E1303,E1304,E1305,E1306,E1310,E1700,E1701,W0311,W0312,W0611,W0612,W0613,W0702,W1401,W1402,C0123,C0200,C0325,C0411,C0412 --fail-under=5
--- a/.github/workflows/radon.yml
+++ b/.github/workflows/radon.yml
@ -0,0 +1,72 @@
 name: Radon Code Metrics
 on:
  workflow_dispatch:
  push:
    paths:
      - '**.py'
  pull_request:
    paths:
      - '**.py'
 jobs:
  radon:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v4
      with:
        fetch-depth: 0
    - name: Set up Python
      uses: actions/setup-python@v5
      with:
        python-version: '3.x'
    - name: Install radon
      run: pip install radon
    - name: Run radon
      run: |
        if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
          CHANGED_FILES=$(git ls-files '*.py')
        else
          CHANGED_FILES=$(git diff --name-only ${{ github.event.before }} ${{ github.sha }} | grep '\.py$' || echo "")
        fi
        echo "Files to be analyzed:"
        echo "$CHANGED_FILES"
        if [ -n "$CHANGED_FILES" ]; then
          echo "Running Cyclomatic Complexity check..."
          radon cc $CHANGED_FILES -a -s -n F --exclude "AutoGGUF.quantize_model"
          echo "Running Maintainability Index check..."
          radon mi $CHANGED_FILES -s -n F
        else
          echo "No Python files to analyze."
        fi
      continue-on-error: true
    - name: Check radon output
      run: |
        if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
          CHANGED_FILES=$(git ls-files '*.py')
        else
          CHANGED_FILES=$(git diff --name-only ${{ github.event.before }} ${{ github.sha }} | grep '\.py$' || echo "")
        fi
        if [ -n "$CHANGED_FILES" ]; then
          CC_OUTPUT=$(radon cc $CHANGED_FILES -a -s -n F --exclude "AutoGGUF.quantize_model")
          MI_OUTPUT=$(radon mi $CHANGED_FILES -s -n F)
          if [ -n "$CC_OUTPUT" ] || [ -n "$MI_OUTPUT" ]; then
            echo "Radon detected code complexity or maintainability issues:"
            [ -n "$CC_OUTPUT" ] && echo "$CC_OUTPUT"
            [ -n "$MI_OUTPUT" ] && echo "$MI_OUTPUT"
            exit 1
          else
            echo "No code complexity or maintainability issues detected."
          fi
        else
          echo "No Python files to analyze."
        fi
--- a/.gitignore
+++ b/.gitignore
@ -129,6 +129,7 @@ venv/
 ENV/
 env.bak/
 venv.bak/
 .idea/
 # Spyder project settings
 .spyderproject
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -0,0 +1,10 @@
 repos:
 - repo: https://github.com/psf/black
  rev: 22.10.0
  hooks:
    - id: black
      language_version: python3
 - repo: https://github.com/Lucas-C/pre-commit-hooks
  rev: v1.1.9
  hooks:
    - id: remove-crlf
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -0,0 +1 @@
 # Changelog
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@ -0,0 +1,127 @@
 # Contributor Covenant Code of Conduct
 ## Our Pledge
 We as members, contributors, and leaders pledge to make participation in our
 community a harassment-free experience for everyone, regardless of age, body
 size, visible or invisible disability, ethnicity, sex characteristics, gender
 identity and expression, level of experience, education, socio-economic status,
 nationality, personal appearance, race, religion, or sexual identity
 and orientation.
 We pledge to act and interact in ways that contribute to an open, welcoming,
 diverse, inclusive, and healthy community.
 ## Our Standards
 Examples of behavior that contributes to a positive environment for our
 community include:
 * Demonstrating empathy and kindness toward other people
 * Being respectful of differing opinions, viewpoints, and experiences
 * Giving and gracefully accepting constructive feedback
 * Accepting responsibility and apologizing to those affected by our mistakes,
  and learning from the experience
 * Focusing on what is best not just for us as individuals, but for the
  overall community
 Examples of unacceptable behavior include:
 * The use of sexualized language or imagery, and sexual attention or
  advances of any kind
 * Trolling, insulting or derogatory comments, and personal or political attacks
 * Public or private harassment
 * Publishing others' private information, such as a physical or email
  address, without their explicit permission
 * Other conduct which could reasonably be considered inappropriate in a
  professional setting
 ## Enforcement Responsibilities
 Community leaders are responsible for clarifying and enforcing our standards of
 acceptable behavior and will take appropriate and fair corrective action in
 response to any behavior that they deem inappropriate, threatening, offensive,
 or harmful.
 Community leaders have the right and responsibility to remove, edit, or reject
 comments, commits, code, wiki edits, issues, and other contributions that are
 not aligned to this Code of Conduct, and will communicate reasons for moderation
 decisions when appropriate.
 ## Scope
 This Code of Conduct applies within all community spaces, and also applies when
 an individual is officially representing the community in public spaces.
 Examples of representing our community include using an official e-mail address,
 posting via an official social media account, or acting as an appointed
 representative at an online or offline event.
 ## Enforcement
 Instances of abusive, harassing, or otherwise unacceptable behavior may be
 reported to the community leaders responsible for enforcement in the Discussions tab.
 All complaints will be reviewed and investigated promptly and fairly.
 All community leaders are obligated to respect the privacy and security of the
 reporter of any incident.
 ## Enforcement Guidelines
 Community leaders will follow these Community Impact Guidelines in determining
 the consequences for any action they deem in violation of this Code of Conduct:
 ### 1. Correction
 **Community Impact**: Use of inappropriate language or other behavior deemed
 unprofessional or unwelcome in the community.
 **Consequence**: A private, written warning from community leaders, providing
 clarity around the nature of the violation and an explanation of why the
 behavior was inappropriate. A public apology may be requested.
 ### 2. Warning
 **Community Impact**: A violation through a single incident or series
 of actions.
 **Consequence**: A warning with consequences for continued behavior. No
 interaction with the people involved, including unsolicited interaction with
 those enforcing the Code of Conduct, for a specified period of time. This
 includes avoiding interactions in community spaces as well as external channels
 like social media. Violating these terms may lead to a temporary or
 permanent ban.
 ### 3. Temporary Ban
 **Community Impact**: A serious violation of community standards, including
 sustained inappropriate behavior.
 **Consequence**: A temporary ban from any sort of interaction or public
 communication with the community for a specified period of time. No public or
 private interaction with the people involved, including unsolicited interaction
 with those enforcing the Code of Conduct, is allowed during this period.
 Violating these terms may lead to a permanent ban.
 ### 4. Permanent Ban
 **Community Impact**: Demonstrating a pattern of violation of community
 standards, including sustained inappropriate behavior,  harassment of an
 individual, or aggression toward or disparagement of classes of individuals.
 **Consequence**: A permanent ban from any sort of public interaction within
 the community.
 ## Attribution
 This Code of Conduct is adapted from the [Contributor Covenant][homepage],
 version 2.0, available at
 https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
 Community Impact Guidelines were inspired by [Mozilla's code of conduct
 enforcement ladder](https://github.com/mozilla/diversity).
 [homepage]: https://www.contributor-covenant.org
 For answers to common questions about this code of conduct, see the FAQ at
 https://www.contributor-covenant.org/faq. Translations are available at
 https://www.contributor-covenant.org/translations.
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -0,0 +1,62 @@
 # Contributing to vAnalytics
 First off, thanks for taking the time to contribute! 🎉👍
 ## How Can I Contribute?
 ### Reporting Bugs
 - Use the issue tracker to report bugs
 - Describe the bug in detail
 - Include screenshots if possible
 ### Suggesting Enhancements
 - Use the issue tracker to suggest enhancements
 - Explain why this enhancement would be useful
 ### Your First Code Contribution
 You can find issues labeled with "good first issue" in the Issues tab as a starting point. Code refactors and optimizations are also appreciated, although if there's a vulnrability please report it privately in the Security tab. For feature PRs, please make a discussion first to make sure your feature can be added and continously maintained.
 1. Fork the repo
 2. Create your feature branch (`git checkout -b feature/AmazingFeature`)
 3. Install pre-commit: (`pip install pre-commit`)
 4. Set up the git hook scripts: (`pre-commit install`)
 5. Commit your changes (`git commit -m 'Add some AmazingFeature'`)
 6. Push to the branch (`git push origin feature/AmazingFeature`)
 7. Open a Pull Request
 ## Styleguides
 ### Git Commit Messages
 - Use the present tense ("Add feature" not "Added feature")
 - Use the imperative mood ("Move cursor to..." not "Moves cursor to...")
 - Limit the first line to 72 characters or fewer
 ### Commit Types:
 ```
 feat: Added new feature
 fix: Fixed a bug
 docs: Updated documentation
 style: Code style changes (formatting, etc.)
 refactor: Code refactoring
 perf: Performance improvements
 test: Added or modified tests
 build: Changes to build system or external dependencies
 ci: Changes to CI configuration files and scripts
 chore: Other changes that don't modify src or test files
 ```
 ### Python Styleguide
 - Follow PEP 8
 - Please use Black to format your code first
 - Use meaningful variable names
 - Comment your code, but don't overdo it
 ## Questions?
 Feel free to contact the project maintainers if you have any questions.
--- a/2
+++ b/2
@ -186,7 +186,7 @@
      same "printed page" as the copyright notice for easier
      identification within third-party archives.
-   Copyright [yyyy] [name of copyright owner]
+   Copyright 2024 leafspark
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
--- a/README.md
+++ b/README.md
@ -1,2 +1,37 @@
-# vAnalytics
+# vAnalytics - time series analytics for vLLM
-time series analytics for vLLM
+
 <!-- Project Status -->
 [![GitHub release](https://img.shields.io/github/release/leafspark/vAnalytics.svg)](https://github.com/leafspark/vAnalytics/releases)
 [![GitHub last commit](https://img.shields.io/github/last-commit/leafspark/vAnalytics.svg)](https://github.com/leafspark/vAnalytics/commits)
 [![CI/CD Status](https://img.shields.io/badge/CI%2FCD-passing-brightgreen)]()
 <!-- Project Info -->
 [![Powered by llama.cpp](https://img.shields.io/badge/Powered%20by-llama.cpp-green.svg)](https://github.com/ggerganov/llama.cpp)
 ![GitHub top language](https://img.shields.io/github/languages/top/leafspark/vAnalytics.svg)
 [![Platform Compatibility](https://img.shields.io/badge/platform-Linux%20%7C%20macOS%20%7C%20Windows-blue)]()
 [![GitHub license](https://img.shields.io/github/license/leafspark/vAnalytics.svg)](https://github.com/leafspark/vAnalytics/blob/main/LICENSE)
 <!-- Repository Stats -->
 ![GitHub stars](https://img.shields.io/github/stars/leafspark/vAnalytics.svg)
 ![GitHub forks](https://img.shields.io/github/forks/leafspark/vAnalytics.svg)
 ![GitHub release (latest by date)](https://img.shields.io/github/downloads/leafspark/vAnalytics/latest/total?color=green)
 ![GitHub repo size](https://img.shields.io/github/repo-size/leafspark/vAnalytics.svg)
 ![Lines of Code](https://tokei.rs/b1/github/leafspark/vAnalytics?category=code)
 <!-- Contribution -->
 [![Code Style: Black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
 [![Issues](https://img.shields.io/github/issues/leafspark/vAnalytics)](https://github.com/leafspark/vAnalytics/issues)
 [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg)](https://github.com/leafspark/vAnalytics/pulls)
 vAnalytics provides a web interface to help easily monitor vLLM instance metrics. It allows users to easily monitor multiple vLLM instances, as well as being easy to setup and configure.
 ## Features
 - Specify vLLM backends easily using name and host configuration
 - Uses SQLite for easy database management
 - Intuitive and includes error handling
 - Flexible schemas and data plotting using Plotly
 ## Usage
 Configure your instances in monitor.py, then use `python src/monitor.py`. This will start monitoring in a `/data` folder, where it will store SQLite databases with your model name.
 To start the web interface, execute `python src/graph.py`. The web interface is avaliable at `localhost:4412`.
--- a/SECURITY.md
+++ b/SECURITY.md
@ -0,0 +1,13 @@
 # Security Policy
 ## Supported Versions
 | Version         | Supported          |
 |-----------------|--------------------|
 | stable (v1.0.0) | :white_check_mark: |
 Beta versions are not supported, and may have unknown security issues.
 ## Reporting a Vulnerability
 Use the Issues tab, or for severe vulnerabilities please contact the maintainers via email.
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,7 @@
 numpy~=1.26.4
 uvicorn~=0.30.6
 requests~=2.32.3
 pandas~=2.2.3
 plotly~=5.24.1
 flask~=3.0.3
 zstd~=1.5.5.1
--- a/setup.py
+++ b/setup.py
@ -0,0 +1,12 @@
 from setuptools import setup
 setup(
    name='vAnalytics',
    version='v1.0.0',
    packages=[''],
    url='https://github.com/leafspark/vAnalytics',
    license='apache-2.0',
    author='leafspark',
    author_email='',
    description='time series analytics for vLLM'
 )
--- a/src/get_data.py
+++ b/src/get_data.py
@ -0,0 +1,138 @@
 import os
 import json
 import argparse
 import sqlite3
 from datetime import datetime, timedelta
 def get_latest_rows(db_file, hours=1):
    now = datetime.now()
    one_hour_ago = now - timedelta(hours=hours)
    one_hour_ago_timestamp = int(one_hour_ago.timestamp())
    conn = sqlite3.connect(db_file)
    cursor = conn.cursor()
    cursor.execute(
        f"SELECT timestamp, data FROM json_data WHERE timestamp >= {one_hour_ago_timestamp} ORDER BY timestamp DESC"
    )
    rows = cursor.fetchall()
    conn.close()
    if not rows:
        print(
            f"No rows found in the last {hours} hour(s). Showing info for last 5 rows:"
        )
        conn = sqlite3.connect(db_file)
        cursor = conn.cursor()
        cursor.execute(
            f"SELECT timestamp, data FROM json_data ORDER BY timestamp DESC LIMIT 5"
        )
        rows = cursor.fetchall()
        conn.close()
        for timestamp, _ in rows:
            print(f"  {datetime.fromtimestamp(timestamp)}")
    return rows
 def extract_stats(data_json):
    try:
        data = json.loads(data_json)
        total_prompt_tokens = float(data["vllm:prompt_tokens_total"][0]["value"])
        total_generation_tokens = float(
            data["vllm:generation_tokens_total"][0]["value"]
        )
        total_requests = sum(
            float(item["value"]) for item in data["vllm:request_success_total"]
        )
        avg_prompt_throughput = float(
            data["vllm:avg_prompt_throughput_toks_per_s"][0]["value"]
        )
        avg_generation_throughput = float(
            data["vllm:avg_generation_throughput_toks_per_s"][0]["value"]
        )
        gpu_cache_usage_perc = float(data["vllm:gpu_cache_usage_perc"][0]["value"])
        num_requests_running = float(data["vllm:num_requests_running"][0]["value"])
    except (KeyError, IndexError, json.JSONDecodeError) as e:
        print(f"Error extracting stats from data: {str(e)}")
        return None
    return {
        "total_prompt_tokens": total_prompt_tokens,
        "total_generation_tokens": total_generation_tokens,
        "total_requests": total_requests,
        "avg_prompt_throughput": avg_prompt_throughput,
        "avg_generation_throughput": avg_generation_throughput,
        "gpu_cache_usage_perc": gpu_cache_usage_perc,
        "num_requests_running": num_requests_running,
    }
 def main(db_file, hours):
    latest_rows = get_latest_rows(db_file, hours)
    if not latest_rows:
        print(f"No rows found for the last {hours} hour(s).")
        return
    print(f"Processing {len(latest_rows)} rows.")
    valid_stats = [
        extract_stats(data)
        for _, data in latest_rows
        if extract_stats(data) is not None
    ]
    if not valid_stats:
        print("No valid statistics could be extracted from the rows.")
        return
    first_stats = valid_stats[-1]  # Oldest row
    last_stats = valid_stats[0]  # Newest row
    tokens_processed = (
        last_stats["total_prompt_tokens"]
        - first_stats["total_prompt_tokens"]
        + last_stats["total_generation_tokens"]
        - first_stats["total_generation_tokens"]
    )
    requests_processed = last_stats["total_requests"] - first_stats["total_requests"]
    avg_prompt_throughput = sum(
        stat["avg_prompt_throughput"] for stat in valid_stats
    ) / len(valid_stats)
    avg_generation_throughput = sum(
        stat["avg_generation_throughput"] for stat in valid_stats
    ) / len(valid_stats)
    avg_num_requests_running = sum(
        stat["num_requests_running"] for stat in valid_stats
    ) / len(valid_stats)
    avg_gpu_cache_usage_perc = sum(
        stat["gpu_cache_usage_perc"] for stat in valid_stats
    ) / len(valid_stats)
    print(f"\nStats for the last {hours} hour(s):")
    print(f"Tokens processed: {tokens_processed:,.0f}")
    print(f"Requests processed: {requests_processed:,.0f}")
    print(f"Average prompt throughput: {avg_prompt_throughput:.2f} tokens/s")
    print(f"Average generation throughput: {avg_generation_throughput:.2f} tokens/s")
    print(
        f"Average number of requests running: {avg_num_requests_running:.2f} requests"
    )
    print(f"Average GPU cache usage percent: {avg_gpu_cache_usage_perc * 100:.2f}%")
 if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Extract stats from a SQLite database for a specified time period"
    )
    parser.add_argument("db_file", help="Path to the SQLite database file")
    parser.add_argument(
        "--hours", type=int, default=1, help="Number of hours to look back (default: 1)"
    )
    args = parser.parse_args()
    main(args.db_file, args.hours)
--- a/src/graph.py
+++ b/src/graph.py
@ -0,0 +1,323 @@
 import asyncio
 import json
 import logging
 import numpy as np
 import os
 import pandas as pd
 import plotly.graph_objects as go
 import plotly.offline as pyo
 import sqlite3
 import subprocess
 import threading
 import time
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from datetime import datetime, timedelta
 from flask import Flask, render_template, request, send_file
 from functools import lru_cache
 from plotly.subplots import make_subplots
 from plotly.subplots import make_subplots
 from scipy.interpolate import make_interp_spline
 # Set up logging with a higher level
 logging.basicConfig(
    level=logging.WARNING,  # Changed from DEBUG to WARNING
    format="%(asctime)s - %(levelname)s - %(message)s",
 )
 # Global variable to store the cached data
 cached_data = {}
 last_modified_times = {}
 async def load_data_from_db(filepath):
    global cached_data
    try:
        conn = sqlite3.connect(filepath)
        cursor = conn.cursor()
        twenty_four_hours_ago = datetime.now() - timedelta(hours=24)
        timestamp_24h_ago = int(twenty_four_hours_ago.timestamp())
        cursor.execute(
            "SELECT data, timestamp FROM json_data WHERE timestamp >= ?",
            (timestamp_24h_ago,),
        )
        rows = cursor.fetchall()
        model_name = os.path.splitext(os.path.basename(filepath))[0]
        # Optimize data structure creation
        new_data = {}
        for row in rows:
            data = json.loads(row[0])
            timestamp = datetime.fromtimestamp(row[1])
            for metric_name, metric_data in data.items():
                if metric_name not in new_data:
                    new_data[metric_name] = {}
                if model_name not in new_data[metric_name]:
                    new_data[metric_name][model_name] = []
                new_data[metric_name][model_name].append((timestamp, metric_data))
        # Update cached_data efficiently
        for metric_name, model_data in new_data.items():
            if metric_name not in cached_data:
                cached_data[metric_name] = model_data
            else:
                cached_data[metric_name].update(model_data)
    except sqlite3.Error as e:
        logging.error(f"SQLite error in {filepath}: {e}")
    except json.JSONDecodeError as e:
        logging.error(f"JSON decode error in {filepath}: {e}")
    except Exception as e:
        logging.error(f"Error processing file {filepath}: {e}")
    finally:
        if conn:
            conn.close()
 async def load_data():
    data_dir = "./data"
    tasks = []
    for filename in os.listdir(data_dir):
        if filename.endswith(".sqlite"):
            filepath = os.path.join(data_dir, filename)
            tasks.append(load_data_from_db(filepath))
    await asyncio.gather(*tasks)
    logging.info(f"Loaded data for {len(cached_data)} metrics")
    if len(cached_data) == 0:
        logging.warning(
            "No data was loaded. Check if SQLite files exist and contain recent data."
        )
 async def background_data_loader():
    while True:
        await load_data()
        await asyncio.sleep(30)  # Check for updates every 30 seconds
 def start_background_loop(loop):
    asyncio.set_event_loop(loop)
    loop.run_forever()
 # Start the background data loader
 threading.Thread(target=background_data_loader, daemon=True).start()
 def create_trace(model_name, metric_name, data_points, row, col):
    return (
        go.Scattergl(
            x=[point[0] for point in data_points],
            y=[point[1] for point in data_points],
            mode="lines",
            name=f"{model_name} - {metric_name}",
        ),
        row,
        col,
    )
 def create_plots(selected_model):
    global cached_data
    start_time = time.time()
    all_data = {}
    selected_models = selected_model.split(",")
    for metric, data in cached_data.items():
        all_data[metric] = {
            model: data[model] for model in selected_models if model in data
        }
    data_prep_time = time.time() - start_time
    print(f"Data preparation took {data_prep_time:.2f} seconds")
    num_metrics = len(all_data)
    if num_metrics == 0:
        logging.warning("No valid data found.")
        return None
    num_cols = 2
    num_rows = (num_metrics + num_cols - 1) // num_cols
    fig = make_subplots(
        rows=num_rows, cols=num_cols, subplot_titles=list(all_data.keys())
    )
    subplot_creation_time = time.time() - start_time - data_prep_time
    print(f"Subplot creation took {subplot_creation_time:.2f} seconds")
    now = datetime.now()
    twenty_four_hours_ago = now - timedelta(hours=24)
    trace_creation_start = time.time()
    with ThreadPoolExecutor() as executor:
        futures = []
        for index, (metric_name, model_data) in enumerate(all_data.items()):
            row = index // num_cols + 1
            col = index % num_cols + 1
            for model_name, metric_data_list in model_data.items():
                if isinstance(metric_data_list[0][1], list):
                    for label_set in metric_data_list[0][1]:
                        data_points = []
                        for timestamp, metric_data in metric_data_list:
                            if timestamp >= twenty_four_hours_ago:
                                for data_point in metric_data:
                                    if data_point["labels"] == label_set["labels"]:
                                        try:
                                            value = float(data_point["value"])
                                            data_points.append((timestamp, value))
                                        except ValueError:
                                            logging.warning(
                                                f"Invalid numeric value for {model_name} - {metric_name}: {data_point['value']}"
                                            )
                        if not data_points:
                            continue
                        data_points.sort(key=lambda x: x[0])
                        futures.append(
                            executor.submit(
                                create_trace,
                                model_name,
                                str(label_set["labels"]),
                                data_points,
                                row,
                                col,
                            )
                        )
                else:
                    data_points = []
                    for timestamp, metric_data in metric_data_list:
                        if timestamp >= twenty_four_hours_ago:
                            try:
                                value = float(metric_data)
                                data_points.append((timestamp, value))
                            except ValueError:
                                logging.warning(
                                    f"Invalid numeric value for {model_name} - {metric_name}: {metric_data}"
                                )
                    if not data_points:
                        continue
                    data_points.sort(key=lambda x: x[0])
                    futures.append(
                        executor.submit(
                            create_trace, model_name, metric_name, data_points, row, col
                        )
                    )
        for future in as_completed(futures):
            trace, row, col = future.result()
            fig.add_trace(trace, row=row, col=col)
    trace_creation_time = time.time() - trace_creation_start
    print(f"Trace creation took {trace_creation_time:.2f} seconds")
    layout_update_start = time.time()
    fig.update_layout(
        height=300 * num_rows,
        showlegend=True,
        template="plotly_dark",
        font=dict(family="Arial", size=10, color="white"),
        paper_bgcolor="rgb(30, 30, 30)",
        plot_bgcolor="rgb(30, 30, 30)",
    )
    fig.update_xaxes(title_text="Time", tickformat="%Y-%m-%d %H:%M:%S")
    fig.update_yaxes(title_text="Value")
    fig.update_traces(hovertemplate="%{x|%Y-%m-%d %H:%M:%S}<br>%{y:.3f}")
    layout_update_time = time.time() - layout_update_start
    print(f"Layout update took {layout_update_time:.2f} seconds")
    total_time = time.time() - start_time
    print(f"Total plot creation took {total_time:.2f} seconds")
    return fig
 app = Flask(__name__)
@app.route("/", methods=["GET", "POST"])
 def index():
    data_dir = "./data"
    model_names = [
        name[:-7]
        for name in os.listdir(data_dir)
        if name.endswith(".sqlite") and os.path.isfile(os.path.join(data_dir, name))
    ]
    if request.method == "POST":
        selected_model = request.form.get("model_select")
    else:
        selected_model = model_names[0] if model_names else None
    plot_div = None
    error_message = None
    if selected_model:
        try:
            fig = create_plots(selected_model)
            if fig is not None:
                fig.update_layout(showlegend=False)
                plot_div = pyo.plot(fig, output_type="div", include_plotlyjs=True)
            else:
                error_message = "No data available for the selected model."
        except Exception as e:
            logging.error(f"Error creating plot: {str(e)}")
            error_message = (
                "An error occurred while creating the plot. Please try again later."
            )
        command = [
            "python",
            "get_data.py",
            "--hours",
            "24",
            f".\\data\\{selected_model}.sqlite",
        ]
        result = subprocess.run(command, capture_output=True, text=True)
    else:
        result = None
    return render_template(
        "index.html",
        plot_div=plot_div,
        model_name=selected_model,
        model_names=model_names,
        result=result.stdout if result else None,
        error_message=error_message,
    )
@app.route("/favicon.ico")
 def favicon():
    return send_file("favicon.ico", mimetype="image/vnd.microsoft.icon")
 if __name__ == "__main__":
    import uvicorn
    from asgiref.wsgi import WsgiToAsgi
    # Initial data load
    logging.info("Starting initial data load")
    asyncio.run(load_data())
    logging.info("Initial data load complete")
    # Create a new event loop for the background task
    loop = asyncio.new_event_loop()
    def start_background_loop():
        asyncio.set_event_loop(loop)
        loop.create_task(background_data_loader())
        loop.run_forever()
    t = threading.Thread(target=start_background_loop, daemon=True)
    t.start()
    asgi_app = WsgiToAsgi(app)
    uvicorn.run(asgi_app, host="0.0.0.0", port=4421)
--- a/src/monitor.py
+++ b/src/monitor.py
@ -0,0 +1,140 @@
 import requests
 import time
 import os
 import json
 from datetime import datetime
 import logging
 import zstd
 import sqlite3
 print("Starting monitor.")
 # Set up basic configuration for logging
 logging.basicConfig(
    level=logging.DEBUG,
    format="%(asctime)s - %(levelname)s - %(message)s",
    filename="monitor.log",  # Log to a file named monitor.log
    filemode="a",
 )  # Append to the log file
 logging.basicConfig(
    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
 )
 # Model information
 models = {
    "Example-Model-22B-FP8-dynamic": "http://112.83.15.44:8883",
    "Mistral-7B-bf16": "http://57.214.142.199:8090",
 }
 def call_metrics_endpoint(model_name, base_url):
    url = f"{base_url}/metrics"
    logging.debug(f"Calling metrics endpoint: {url}")
    try:
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36"
        }
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        logging.debug(f"Received successful response from {url}")
        return response.text
    except requests.exceptions.RequestException as e:
        logging.error(f"Error calling {url}: {e}")
        return f"Error calling {url}: {e}"
 def normalize_metrics(metrics_data):
    """Normalizes the metrics data from vLLM."""
    normalized_data = {}
    lines = metrics_data.strip().split("\n")
    for line in lines:
        if line.startswith("#"):  # Ignore comment lines
            continue
        parts = line.split(" ")
        metric_name = parts[0]
        metric_value_str = parts[1]
        # Try to convert to decimal, otherwise keep as string
        try:
            metric_value = float(metric_value_str)
            if metric_name.endswith("_total") or metric_name.endswith("_count"):
                metric_value = int(metric_value)
            elif "e+" in metric_value_str or "e-" in metric_value_str:
                metric_value = "{:.10f}".format(metric_value)
        except ValueError:
            metric_value = metric_value_str
        # Extract labels from metric name
        if "{" in metric_name:
            metric_name, labels_str = metric_name[:-1].split("{")
            labels = {}
            for label_pair in labels_str.split(","):
                key, value = label_pair.split("=")
                labels[key.strip('"')] = value.strip('"')
            if metric_name not in normalized_data:
                normalized_data[metric_name] = []
            normalized_data[metric_name].append(
                {"labels": labels, "value": metric_value}
            )
        else:
            normalized_data[metric_name] = metric_value
    return normalized_data
 def log_response(model_name, response_data):
    timestamp = int(datetime.now().timestamp())
    normalized_data = normalize_metrics(response_data)
    db_filename = f"./data/{model_name}.sqlite"
    os.makedirs(os.path.dirname(db_filename), exist_ok=True)
    max_retries = 3
    for retry in range(max_retries):
        try:
            conn = sqlite3.connect(db_filename)
            cursor = conn.cursor()
            # Create table if it doesn't exist
            cursor.execute(
                """
            CREATE TABLE IF NOT EXISTS json_data
            (id INTEGER PRIMARY KEY AUTOINCREMENT,
             data TEXT NOT NULL,
             timestamp INTEGER NOT NULL)
            """
            )
            # Insert the data
            cursor.execute(
                "INSERT INTO json_data (data, timestamp) VALUES (?, ?)",
                (json.dumps(normalized_data), timestamp),
            )
            conn.commit()
            conn.close()
            logging.debug(f"Saved metrics data to {db_filename}")
            break  # Exit the retry loop if successful
        except sqlite3.OperationalError as e:
            if "database is locked" in str(e):
                logging.warning(
                    f"Database locked for {model_name}, retrying in 5 seconds... (Attempt {retry+1}/{max_retries})"
                )
                time.sleep(5)  # Wait before retrying
            else:
                logging.error(f"Error writing to database for {model_name}: {e}")
                break  # Exit the retry loop for other errors
 while True:
    for model_name, base_url in models.items():
        response_data = call_metrics_endpoint(model_name, base_url)
        if response_data and not response_data.startswith(
            "Error"
        ):  # Check for valid data
            logging.info(f"Metrics for {model_name} valid")  # Log metrics to console
        log_response(model_name, response_data)
    logging.debug("Waiting for 30 seconds...")
    time.sleep(30)