docs: add README.md and initial commit

- add code - add GitHub workflows - add GitHub repo files
2024-09-21 16:30:15 -07:00 · 2024-09-21 16:30:15 -07:00 · dff316c725
parent ffc14cb984
commit dff316c725
20 changed files with 1195 additions and 3 deletions
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@ -0,0 +1,37 @@
+---
+name: Bug report
+about: Create a report to help us improve vAnalytics
+title: '[BUG] '
+labels: bug
+assignees: ''
+---
+
+**Describe the bug**
+A clear and concise description of what the bug is.
+
+**To Reproduce**
+Steps to reproduce the behavior:
+1. Go to '...'
+2. Click on '....'
+3. Scroll down to '....'
+4. See error
+
+**Expected behavior**
+A clear and concise description of what you expected to happen.
+
+**Screenshots**
+If applicable, add screenshots to help explain your problem.
+
+**Environment (please complete the following information):**
+ - OS: [e.g. Windows, macOS, Linux]
+ - vAnalytics Version: [e.g. v1.4.2]
+ - Python Version (if running from source): [e.g. 3.9]
+ - vLLM version(s)
+
+**Additional context**
+Add any other context about the problem here. Include any relevant log outputs or error messages.
+
+**Checklist:**
+- [ ] I have checked the existing issues to make sure this is not a duplicate
+- [ ] I have included all relevant information to reproduce the issue
+- [ ] I am running the latest version of vAnalytics
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@ -0,0 +1,8 @@
+version: 2
+updates:
+  - package-ecosystem: "pip"
+    directory: "/"
+    schedule:
+      interval: "weekly"
+      day: "sunday"
+    open-pull-requests-limit: 10
--- a/.github/workflows/black.yml
+++ b/.github/workflows/black.yml
@ -0,0 +1,20 @@
+name: Black
+
+on:
+  push:
+    paths:
+      - '**.py'
+  pull_request:
+    paths:
+      - '**.py'
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+      - uses: psf/black@stable
+        with:
+          options: "--check --verbose"
+          src: "./src"
--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@ -0,0 +1,99 @@
+# For most projects, this workflow file will not need changing; you simply need
+# to commit it to your repository.
+#
+# You may wish to alter this file to override the set of languages analyzed,
+# or to provide custom queries or build logic.
+#
+# ******** NOTE ********
+# We have attempted to detect the languages in your repository. Please check
+# the `language` matrix defined below to confirm you have the correct set of
+# supported CodeQL languages.
+#
+name: "CodeQL"
+
+on:
+  push:
+    branches: [ "main" ]
+    paths-ignore:
+      - '**/*.md'
+      - '**/*.txt'
+  pull_request:
+    branches: [ "main" ]
+    paths-ignore:
+      - '**/*.md'
+      - '**/*.txt'
+  schedule:
+    - cron: '21 20 * * 6'
+
+jobs:
+  analyze:
+    name: Analyze (${{ matrix.language }})
+    # Runner size impacts CodeQL analysis time. To learn more, please see:
+    #   - https://gh.io/recommended-hardware-resources-for-running-codeql
+    #   - https://gh.io/supported-runners-and-hardware-resources
+    #   - https://gh.io/using-larger-runners (GitHub.com only)
+    # Consider using larger runners or machines with greater resources for possible analysis time improvements.
+    runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }}
+    timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }}
+    permissions:
+      # required for all workflows
+      security-events: write
+
+      # required to fetch internal or private CodeQL packs
+      packages: read
+
+      # only required for workflows in private repositories
+      actions: read
+      contents: read
+
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+        - language: python
+          build-mode: none
+        # CodeQL supports the following values keywords for 'language': 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift'
+        # Use `c-cpp` to analyze code written in C, C++ or both
+        # Use 'java-kotlin' to analyze code written in Java, Kotlin or both
+        # Use 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both
+        # To learn more about changing the languages that are analyzed or customizing the build mode for your analysis,
+        # see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning.
+        # If you are analyzing a compiled language, you can modify the 'build-mode' for that language to customize how
+        # your codebase is analyzed, see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/codeql-code-scanning-for-compiled-languages
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v4
+
+    # Initializes the CodeQL tools for scanning.
+    - name: Initialize CodeQL
+      uses: github/codeql-action/init@v3
+      with:
+        languages: ${{ matrix.language }}
+        build-mode: ${{ matrix.build-mode }}
+        # If you wish to specify custom queries, you can do so here or in a config file.
+        # By default, queries listed here will override any specified in a config file.
+        # Prefix the list here with "+" to use these queries and those in the config file.
+
+        # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
+        # queries: security-extended,security-and-quality
+
+    # If the analysis step fails for one of the languages you are analyzing with
+    # "We were unable to automatically build your code", modify the matrix above
+    # to set the build mode to "manual" for that language. Then modify this step
+    # to build your code.
+    # ℹ️ Command-line programs to run using the OS shell.
+    # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
+    - if: matrix.build-mode == 'manual'
+      shell: bash
+      run: |
+        echo 'If you are using a "manual" build mode for one or more of the' \
+          'languages you are analyzing, replace this with the commands to build' \
+          'your code, for example:'
+        echo '  make bootstrap'
+        echo '  make release'
+        exit 1
+
+    - name: Perform CodeQL Analysis
+      uses: github/codeql-action/analyze@v3
+      with:
+        category: "/language:${{matrix.language}}"
--- a/.github/workflows/pip-audit.yml
+++ b/.github/workflows/pip-audit.yml
@ -0,0 +1,59 @@
+name: Dependency Audit
+
+on:
+  push:
+    paths:
+      - '**/requirements.txt'
+  pull_request:
+    paths:
+      - '**/requirements.txt'
+  schedule:
+    - cron: '0 0 * * *'  # Run daily at midnight UTC
+
+jobs:
+  audit:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: '3.x'
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install pip-audit
+
+    - name: Run pip-audit
+      run: |
+        pip-audit -r requirements.txt > audit_output.txt
+      continue-on-error: true
+
+    - name: Display audit results
+      run: cat audit_output.txt
+
+    - name: Create detailed report
+      run: |
+        echo "Pip Audit Report" > detailed_report.txt
+        echo "==================" >> detailed_report.txt
+        echo "" >> detailed_report.txt
+        echo "Date: $(date)" >> detailed_report.txt
+        echo "" >> detailed_report.txt
+        echo "Audit Results:" >> detailed_report.txt
+        cat audit_output.txt >> detailed_report.txt
+        echo "" >> detailed_report.txt
+        echo "Environment:" >> detailed_report.txt
+        python --version >> detailed_report.txt
+        pip --version >> detailed_report.txt
+        echo "" >> detailed_report.txt
+        echo "Requirements:" >> detailed_report.txt
+        cat requirements.txt >> detailed_report.txt
+
+    - name: Upload audit results
+      uses: actions/upload-artifact@v3
+      with:
+        name: pip-audit-report
+        path: detailed_report.txt
+
--- a/.github/workflows/pylint.yml
+++ b/.github/workflows/pylint.yml
@ -0,0 +1,28 @@
+name: Pylint
+on:
+  push:
+    paths:
+      - '**.py'
+  pull_request:
+    paths:
+      - '**.py'
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.9", "3.10"]
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v5
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install $(grep -v "^torch" requirements.txt | tr '\n' ' ')
+        pip install pylint
+    - name: Analysing the code with pylint
+      run: |
+        pylint $(git ls-files '*.py') --disable=all --enable=E0001,E0100,E0101,E0102,E0103,E0104,E0105,E0107,E0108,E0110,E0111,E0112,E0113,E0114,E0115,E0116,E0117,E0118,E0202,E0203,E0211,E0213,E0236,E0237,E0238,E0239,E0240,E0241,E0301,E0302,E0303,E0401,E0402,E0701,E0702,E0703,E0704,E0710,E0711,E0712,E1003,E1101,E1102,E1111,E1120,E1121,E1123,E1124,E1125,E1126,E1127,E1128,E1129,E1130,E1131,E1132,E1133,E1134,E1135,E1136,E1137,E1138,E1139,E1200,E1201,E1205,E1206,E1300,E1301,E1302,E1303,E1304,E1305,E1306,E1310,E1700,E1701,W0311,W0312,W0611,W0612,W0613,W0702,W1401,W1402,C0123,C0200,C0325,C0411,C0412 --fail-under=5
--- a/.github/workflows/radon.yml
+++ b/.github/workflows/radon.yml
@ -0,0 +1,72 @@
+name: Radon Code Metrics
+
+on:
+  workflow_dispatch:
+  push:
+    paths:
+      - '**.py'
+  pull_request:
+    paths:
+      - '**.py'
+
+jobs:
+  radon:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
+
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: '3.x'
+
+    - name: Install radon
+      run: pip install radon
+
+    - name: Run radon
+      run: |
+        if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
+          CHANGED_FILES=$(git ls-files '*.py')
+        else
+          CHANGED_FILES=$(git diff --name-only ${{ github.event.before }} ${{ github.sha }} | grep '\.py$' || echo "")
+        fi
+        
+        echo "Files to be analyzed:"
+        echo "$CHANGED_FILES"
+        
+        if [ -n "$CHANGED_FILES" ]; then
+          echo "Running Cyclomatic Complexity check..."
+          radon cc $CHANGED_FILES -a -s -n F --exclude "AutoGGUF.quantize_model"
+          
+          echo "Running Maintainability Index check..."
+          radon mi $CHANGED_FILES -s -n F
+        else
+          echo "No Python files to analyze."
+        fi
+      continue-on-error: true
+
+    - name: Check radon output
+      run: |
+        if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
+          CHANGED_FILES=$(git ls-files '*.py')
+        else
+          CHANGED_FILES=$(git diff --name-only ${{ github.event.before }} ${{ github.sha }} | grep '\.py$' || echo "")
+        fi
+        
+        if [ -n "$CHANGED_FILES" ]; then
+          CC_OUTPUT=$(radon cc $CHANGED_FILES -a -s -n F --exclude "AutoGGUF.quantize_model")
+          MI_OUTPUT=$(radon mi $CHANGED_FILES -s -n F)
+          
+          if [ -n "$CC_OUTPUT" ] || [ -n "$MI_OUTPUT" ]; then
+            echo "Radon detected code complexity or maintainability issues:"
+            [ -n "$CC_OUTPUT" ] && echo "$CC_OUTPUT"
+            [ -n "$MI_OUTPUT" ] && echo "$MI_OUTPUT"
+            exit 1
+          else
+            echo "No code complexity or maintainability issues detected."
+          fi
+        else
+          echo "No Python files to analyze."
+        fi
--- a/.gitignore
+++ b/.gitignore
@ -129,6 +129,7 @@ venv/
 ENV/
 env.bak/
 venv.bak/
+.idea/

 # Spyder project settings
 .spyderproject
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -0,0 +1,10 @@
+repos:
+- repo: https://github.com/psf/black
+  rev: 22.10.0
+  hooks:
+    - id: black
+      language_version: python3
+- repo: https://github.com/Lucas-C/pre-commit-hooks
+  rev: v1.1.9
+  hooks:
+    - id: remove-crlf
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -0,0 +1 @@
+# Changelog
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@ -0,0 +1,127 @@
+# Contributor Covenant Code of Conduct
+
+## Our Pledge
+
+We as members, contributors, and leaders pledge to make participation in our
+community a harassment-free experience for everyone, regardless of age, body
+size, visible or invisible disability, ethnicity, sex characteristics, gender
+identity and expression, level of experience, education, socio-economic status,
+nationality, personal appearance, race, religion, or sexual identity
+and orientation.
+
+We pledge to act and interact in ways that contribute to an open, welcoming,
+diverse, inclusive, and healthy community.
+
+## Our Standards
+
+Examples of behavior that contributes to a positive environment for our
+community include:
+
+* Demonstrating empathy and kindness toward other people
+* Being respectful of differing opinions, viewpoints, and experiences
+* Giving and gracefully accepting constructive feedback
+* Accepting responsibility and apologizing to those affected by our mistakes,
+  and learning from the experience
+* Focusing on what is best not just for us as individuals, but for the
+  overall community
+
+Examples of unacceptable behavior include:
+
+* The use of sexualized language or imagery, and sexual attention or
+  advances of any kind
+* Trolling, insulting or derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information, such as a physical or email
+  address, without their explicit permission
+* Other conduct which could reasonably be considered inappropriate in a
+  professional setting
+
+## Enforcement Responsibilities
+
+Community leaders are responsible for clarifying and enforcing our standards of
+acceptable behavior and will take appropriate and fair corrective action in
+response to any behavior that they deem inappropriate, threatening, offensive,
+or harmful.
+
+Community leaders have the right and responsibility to remove, edit, or reject
+comments, commits, code, wiki edits, issues, and other contributions that are
+not aligned to this Code of Conduct, and will communicate reasons for moderation
+decisions when appropriate.
+
+## Scope
+
+This Code of Conduct applies within all community spaces, and also applies when
+an individual is officially representing the community in public spaces.
+Examples of representing our community include using an official e-mail address,
+posting via an official social media account, or acting as an appointed
+representative at an online or offline event.
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported to the community leaders responsible for enforcement in the Discussions tab.
+All complaints will be reviewed and investigated promptly and fairly.
+
+All community leaders are obligated to respect the privacy and security of the
+reporter of any incident.
+
+## Enforcement Guidelines
+
+Community leaders will follow these Community Impact Guidelines in determining
+the consequences for any action they deem in violation of this Code of Conduct:
+
+### 1. Correction
+
+**Community Impact**: Use of inappropriate language or other behavior deemed
+unprofessional or unwelcome in the community.
+
+**Consequence**: A private, written warning from community leaders, providing
+clarity around the nature of the violation and an explanation of why the
+behavior was inappropriate. A public apology may be requested.
+
+### 2. Warning
+
+**Community Impact**: A violation through a single incident or series
+of actions.
+
+**Consequence**: A warning with consequences for continued behavior. No
+interaction with the people involved, including unsolicited interaction with
+those enforcing the Code of Conduct, for a specified period of time. This
+includes avoiding interactions in community spaces as well as external channels
+like social media. Violating these terms may lead to a temporary or
+permanent ban.
+
+### 3. Temporary Ban
+
+**Community Impact**: A serious violation of community standards, including
+sustained inappropriate behavior.
+
+**Consequence**: A temporary ban from any sort of interaction or public
+communication with the community for a specified period of time. No public or
+private interaction with the people involved, including unsolicited interaction
+with those enforcing the Code of Conduct, is allowed during this period.
+Violating these terms may lead to a permanent ban.
+
+### 4. Permanent Ban
+
+**Community Impact**: Demonstrating a pattern of violation of community
+standards, including sustained inappropriate behavior,  harassment of an
+individual, or aggression toward or disparagement of classes of individuals.
+
+**Consequence**: A permanent ban from any sort of public interaction within
+the community.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage],
+version 2.0, available at
+https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
+
+Community Impact Guidelines were inspired by [Mozilla's code of conduct
+enforcement ladder](https://github.com/mozilla/diversity).
+
+[homepage]: https://www.contributor-covenant.org
+
+For answers to common questions about this code of conduct, see the FAQ at
+https://www.contributor-covenant.org/faq. Translations are available at
+https://www.contributor-covenant.org/translations.
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -0,0 +1,62 @@
+# Contributing to vAnalytics
+
+First off, thanks for taking the time to contribute! 🎉👍
+
+## How Can I Contribute?
+
+### Reporting Bugs
+
+- Use the issue tracker to report bugs
+- Describe the bug in detail
+- Include screenshots if possible
+
+### Suggesting Enhancements
+
+- Use the issue tracker to suggest enhancements
+- Explain why this enhancement would be useful
+
+### Your First Code Contribution
+
+You can find issues labeled with "good first issue" in the Issues tab as a starting point. Code refactors and optimizations are also appreciated, although if there's a vulnrability please report it privately in the Security tab. For feature PRs, please make a discussion first to make sure your feature can be added and continously maintained.
+
+1. Fork the repo
+2. Create your feature branch (`git checkout -b feature/AmazingFeature`)
+3. Install pre-commit: (`pip install pre-commit`)
+4. Set up the git hook scripts: (`pre-commit install`)
+5. Commit your changes (`git commit -m 'Add some AmazingFeature'`)
+6. Push to the branch (`git push origin feature/AmazingFeature`)
+7. Open a Pull Request
+
+## Styleguides
+
+### Git Commit Messages
+
+- Use the present tense ("Add feature" not "Added feature")
+- Use the imperative mood ("Move cursor to..." not "Moves cursor to...")
+- Limit the first line to 72 characters or fewer
+
+### Commit Types:
+
+```
+feat: Added new feature
+fix: Fixed a bug
+docs: Updated documentation
+style: Code style changes (formatting, etc.)
+refactor: Code refactoring
+perf: Performance improvements
+test: Added or modified tests
+build: Changes to build system or external dependencies
+ci: Changes to CI configuration files and scripts
+chore: Other changes that don't modify src or test files
+```
+
+### Python Styleguide
+
+- Follow PEP 8
+- Please use Black to format your code first
+- Use meaningful variable names
+- Comment your code, but don't overdo it
+
+## Questions?
+
+Feel free to contact the project maintainers if you have any questions.
--- a/2
+++ b/2
@ -186,7 +186,7 @@
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

-   Copyright [yyyy] [name of copyright owner]
+   Copyright 2024 leafspark

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
--- a/README.md
+++ b/README.md
@ -1,2 +1,37 @@
-# vAnalytics
-time series analytics for vLLM
+# vAnalytics - time series analytics for vLLM
+
+<!-- Project Status -->
+[![GitHub release](https://img.shields.io/github/release/leafspark/vAnalytics.svg)](https://github.com/leafspark/vAnalytics/releases)
+[![GitHub last commit](https://img.shields.io/github/last-commit/leafspark/vAnalytics.svg)](https://github.com/leafspark/vAnalytics/commits)
+[![CI/CD Status](https://img.shields.io/badge/CI%2FCD-passing-brightgreen)]()
+
+<!-- Project Info -->
+[![Powered by llama.cpp](https://img.shields.io/badge/Powered%20by-llama.cpp-green.svg)](https://github.com/ggerganov/llama.cpp)
+![GitHub top language](https://img.shields.io/github/languages/top/leafspark/vAnalytics.svg)
+[![Platform Compatibility](https://img.shields.io/badge/platform-Linux%20%7C%20macOS%20%7C%20Windows-blue)]()
+[![GitHub license](https://img.shields.io/github/license/leafspark/vAnalytics.svg)](https://github.com/leafspark/vAnalytics/blob/main/LICENSE)
+
+<!-- Repository Stats -->
+![GitHub stars](https://img.shields.io/github/stars/leafspark/vAnalytics.svg)
+![GitHub forks](https://img.shields.io/github/forks/leafspark/vAnalytics.svg)
+![GitHub release (latest by date)](https://img.shields.io/github/downloads/leafspark/vAnalytics/latest/total?color=green)
+![GitHub repo size](https://img.shields.io/github/repo-size/leafspark/vAnalytics.svg)
+![Lines of Code](https://tokei.rs/b1/github/leafspark/vAnalytics?category=code)
+
+<!-- Contribution -->
+[![Code Style: Black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
+[![Issues](https://img.shields.io/github/issues/leafspark/vAnalytics)](https://github.com/leafspark/vAnalytics/issues)
+[![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg)](https://github.com/leafspark/vAnalytics/pulls)
+
+vAnalytics provides a web interface to help easily monitor vLLM instance metrics. It allows users to easily monitor multiple vLLM instances, as well as being easy to setup and configure.
+
+## Features
+- Specify vLLM backends easily using name and host configuration
+- Uses SQLite for easy database management
+- Intuitive and includes error handling
+- Flexible schemas and data plotting using Plotly
+
+## Usage
+Configure your instances in monitor.py, then use `python src/monitor.py`. This will start monitoring in a `/data` folder, where it will store SQLite databases with your model name.
+
+To start the web interface, execute `python src/graph.py`. The web interface is avaliable at `localhost:4412`.
--- a/SECURITY.md
+++ b/SECURITY.md
@ -0,0 +1,13 @@
+# Security Policy
+
+## Supported Versions
+
+| Version         | Supported          |
+|-----------------|--------------------|
+| stable (v1.0.0) | :white_check_mark: |
+
+Beta versions are not supported, and may have unknown security issues.
+
+## Reporting a Vulnerability
+
+Use the Issues tab, or for severe vulnerabilities please contact the maintainers via email.
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,7 @@
+numpy~=1.26.4
+uvicorn~=0.30.6
+requests~=2.32.3
+pandas~=2.2.3
+plotly~=5.24.1
+flask~=3.0.3
+zstd~=1.5.5.1
--- a/setup.py
+++ b/setup.py
@ -0,0 +1,12 @@
+from setuptools import setup
+
+setup(
+    name='vAnalytics',
+    version='v1.0.0',
+    packages=[''],
+    url='https://github.com/leafspark/vAnalytics',
+    license='apache-2.0',
+    author='leafspark',
+    author_email='',
+    description='time series analytics for vLLM'
+)
--- a/src/get_data.py
+++ b/src/get_data.py
@ -0,0 +1,138 @@
+import os
+import json
+import argparse
+import sqlite3
+from datetime import datetime, timedelta
+
+
+def get_latest_rows(db_file, hours=1):
+    now = datetime.now()
+    one_hour_ago = now - timedelta(hours=hours)
+    one_hour_ago_timestamp = int(one_hour_ago.timestamp())
+
+    conn = sqlite3.connect(db_file)
+    cursor = conn.cursor()
+
+    cursor.execute(
+        f"SELECT timestamp, data FROM json_data WHERE timestamp >= {one_hour_ago_timestamp} ORDER BY timestamp DESC"
+    )
+    rows = cursor.fetchall()
+
+    conn.close()
+
+    if not rows:
+        print(
+            f"No rows found in the last {hours} hour(s). Showing info for last 5 rows:"
+        )
+        conn = sqlite3.connect(db_file)
+        cursor = conn.cursor()
+        cursor.execute(
+            f"SELECT timestamp, data FROM json_data ORDER BY timestamp DESC LIMIT 5"
+        )
+        rows = cursor.fetchall()
+        conn.close()
+        for timestamp, _ in rows:
+            print(f"  {datetime.fromtimestamp(timestamp)}")
+
+    return rows
+
+
+def extract_stats(data_json):
+    try:
+        data = json.loads(data_json)
+        total_prompt_tokens = float(data["vllm:prompt_tokens_total"][0]["value"])
+        total_generation_tokens = float(
+            data["vllm:generation_tokens_total"][0]["value"]
+        )
+        total_requests = sum(
+            float(item["value"]) for item in data["vllm:request_success_total"]
+        )
+        avg_prompt_throughput = float(
+            data["vllm:avg_prompt_throughput_toks_per_s"][0]["value"]
+        )
+        avg_generation_throughput = float(
+            data["vllm:avg_generation_throughput_toks_per_s"][0]["value"]
+        )
+        gpu_cache_usage_perc = float(data["vllm:gpu_cache_usage_perc"][0]["value"])
+        num_requests_running = float(data["vllm:num_requests_running"][0]["value"])
+
+    except (KeyError, IndexError, json.JSONDecodeError) as e:
+        print(f"Error extracting stats from data: {str(e)}")
+        return None
+
+    return {
+        "total_prompt_tokens": total_prompt_tokens,
+        "total_generation_tokens": total_generation_tokens,
+        "total_requests": total_requests,
+        "avg_prompt_throughput": avg_prompt_throughput,
+        "avg_generation_throughput": avg_generation_throughput,
+        "gpu_cache_usage_perc": gpu_cache_usage_perc,
+        "num_requests_running": num_requests_running,
+    }
+
+
+def main(db_file, hours):
+    latest_rows = get_latest_rows(db_file, hours)
+
+    if not latest_rows:
+        print(f"No rows found for the last {hours} hour(s).")
+        return
+
+    print(f"Processing {len(latest_rows)} rows.")
+
+    valid_stats = [
+        extract_stats(data)
+        for _, data in latest_rows
+        if extract_stats(data) is not None
+    ]
+
+    if not valid_stats:
+        print("No valid statistics could be extracted from the rows.")
+        return
+
+    first_stats = valid_stats[-1]  # Oldest row
+    last_stats = valid_stats[0]  # Newest row
+
+    tokens_processed = (
+        last_stats["total_prompt_tokens"]
+        - first_stats["total_prompt_tokens"]
+        + last_stats["total_generation_tokens"]
+        - first_stats["total_generation_tokens"]
+    )
+    requests_processed = last_stats["total_requests"] - first_stats["total_requests"]
+
+    avg_prompt_throughput = sum(
+        stat["avg_prompt_throughput"] for stat in valid_stats
+    ) / len(valid_stats)
+    avg_generation_throughput = sum(
+        stat["avg_generation_throughput"] for stat in valid_stats
+    ) / len(valid_stats)
+    avg_num_requests_running = sum(
+        stat["num_requests_running"] for stat in valid_stats
+    ) / len(valid_stats)
+    avg_gpu_cache_usage_perc = sum(
+        stat["gpu_cache_usage_perc"] for stat in valid_stats
+    ) / len(valid_stats)
+
+    print(f"\nStats for the last {hours} hour(s):")
+    print(f"Tokens processed: {tokens_processed:,.0f}")
+    print(f"Requests processed: {requests_processed:,.0f}")
+    print(f"Average prompt throughput: {avg_prompt_throughput:.2f} tokens/s")
+    print(f"Average generation throughput: {avg_generation_throughput:.2f} tokens/s")
+    print(
+        f"Average number of requests running: {avg_num_requests_running:.2f} requests"
+    )
+    print(f"Average GPU cache usage percent: {avg_gpu_cache_usage_perc * 100:.2f}%")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Extract stats from a SQLite database for a specified time period"
+    )
+    parser.add_argument("db_file", help="Path to the SQLite database file")
+    parser.add_argument(
+        "--hours", type=int, default=1, help="Number of hours to look back (default: 1)"
+    )
+    args = parser.parse_args()
+
+    main(args.db_file, args.hours)
--- a/src/graph.py
+++ b/src/graph.py
@ -0,0 +1,323 @@
+import asyncio
+import json
+import logging
+import numpy as np
+import os
+import pandas as pd
+import plotly.graph_objects as go
+import plotly.offline as pyo
+import sqlite3
+import subprocess
+import threading
+import time
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from datetime import datetime, timedelta
+from flask import Flask, render_template, request, send_file
+from functools import lru_cache
+from plotly.subplots import make_subplots
+from plotly.subplots import make_subplots
+from scipy.interpolate import make_interp_spline
+
+# Set up logging with a higher level
+logging.basicConfig(
+    level=logging.WARNING,  # Changed from DEBUG to WARNING
+    format="%(asctime)s - %(levelname)s - %(message)s",
+)
+
+# Global variable to store the cached data
+cached_data = {}
+last_modified_times = {}
+
+
+async def load_data_from_db(filepath):
+    global cached_data
+
+    try:
+        conn = sqlite3.connect(filepath)
+        cursor = conn.cursor()
+
+        twenty_four_hours_ago = datetime.now() - timedelta(hours=24)
+        timestamp_24h_ago = int(twenty_four_hours_ago.timestamp())
+
+        cursor.execute(
+            "SELECT data, timestamp FROM json_data WHERE timestamp >= ?",
+            (timestamp_24h_ago,),
+        )
+        rows = cursor.fetchall()
+
+        model_name = os.path.splitext(os.path.basename(filepath))[0]
+
+        # Optimize data structure creation
+        new_data = {}
+        for row in rows:
+            data = json.loads(row[0])
+            timestamp = datetime.fromtimestamp(row[1])
+
+            for metric_name, metric_data in data.items():
+                if metric_name not in new_data:
+                    new_data[metric_name] = {}
+                if model_name not in new_data[metric_name]:
+                    new_data[metric_name][model_name] = []
+                new_data[metric_name][model_name].append((timestamp, metric_data))
+
+        # Update cached_data efficiently
+        for metric_name, model_data in new_data.items():
+            if metric_name not in cached_data:
+                cached_data[metric_name] = model_data
+            else:
+                cached_data[metric_name].update(model_data)
+
+    except sqlite3.Error as e:
+        logging.error(f"SQLite error in {filepath}: {e}")
+    except json.JSONDecodeError as e:
+        logging.error(f"JSON decode error in {filepath}: {e}")
+    except Exception as e:
+        logging.error(f"Error processing file {filepath}: {e}")
+    finally:
+        if conn:
+            conn.close()
+
+
+async def load_data():
+    data_dir = "./data"
+
+    tasks = []
+    for filename in os.listdir(data_dir):
+        if filename.endswith(".sqlite"):
+            filepath = os.path.join(data_dir, filename)
+            tasks.append(load_data_from_db(filepath))
+
+    await asyncio.gather(*tasks)
+    logging.info(f"Loaded data for {len(cached_data)} metrics")
+    if len(cached_data) == 0:
+        logging.warning(
+            "No data was loaded. Check if SQLite files exist and contain recent data."
+        )
+
+
+async def background_data_loader():
+    while True:
+        await load_data()
+        await asyncio.sleep(30)  # Check for updates every 30 seconds
+
+
+def start_background_loop(loop):
+    asyncio.set_event_loop(loop)
+    loop.run_forever()
+
+
+# Start the background data loader
+threading.Thread(target=background_data_loader, daemon=True).start()
+
+
+def create_trace(model_name, metric_name, data_points, row, col):
+    return (
+        go.Scattergl(
+            x=[point[0] for point in data_points],
+            y=[point[1] for point in data_points],
+            mode="lines",
+            name=f"{model_name} - {metric_name}",
+        ),
+        row,
+        col,
+    )
+
+
+def create_plots(selected_model):
+    global cached_data
+    start_time = time.time()
+
+    all_data = {}
+    selected_models = selected_model.split(",")
+    for metric, data in cached_data.items():
+        all_data[metric] = {
+            model: data[model] for model in selected_models if model in data
+        }
+
+    data_prep_time = time.time() - start_time
+    print(f"Data preparation took {data_prep_time:.2f} seconds")
+
+    num_metrics = len(all_data)
+    if num_metrics == 0:
+        logging.warning("No valid data found.")
+        return None
+
+    num_cols = 2
+    num_rows = (num_metrics + num_cols - 1) // num_cols
+    fig = make_subplots(
+        rows=num_rows, cols=num_cols, subplot_titles=list(all_data.keys())
+    )
+
+    subplot_creation_time = time.time() - start_time - data_prep_time
+    print(f"Subplot creation took {subplot_creation_time:.2f} seconds")
+
+    now = datetime.now()
+    twenty_four_hours_ago = now - timedelta(hours=24)
+
+    trace_creation_start = time.time()
+
+    with ThreadPoolExecutor() as executor:
+        futures = []
+        for index, (metric_name, model_data) in enumerate(all_data.items()):
+            row = index // num_cols + 1
+            col = index % num_cols + 1
+
+            for model_name, metric_data_list in model_data.items():
+                if isinstance(metric_data_list[0][1], list):
+                    for label_set in metric_data_list[0][1]:
+                        data_points = []
+                        for timestamp, metric_data in metric_data_list:
+                            if timestamp >= twenty_four_hours_ago:
+                                for data_point in metric_data:
+                                    if data_point["labels"] == label_set["labels"]:
+                                        try:
+                                            value = float(data_point["value"])
+                                            data_points.append((timestamp, value))
+                                        except ValueError:
+                                            logging.warning(
+                                                f"Invalid numeric value for {model_name} - {metric_name}: {data_point['value']}"
+                                            )
+                        if not data_points:
+                            continue
+                        data_points.sort(key=lambda x: x[0])
+                        futures.append(
+                            executor.submit(
+                                create_trace,
+                                model_name,
+                                str(label_set["labels"]),
+                                data_points,
+                                row,
+                                col,
+                            )
+                        )
+                else:
+                    data_points = []
+                    for timestamp, metric_data in metric_data_list:
+                        if timestamp >= twenty_four_hours_ago:
+                            try:
+                                value = float(metric_data)
+                                data_points.append((timestamp, value))
+                            except ValueError:
+                                logging.warning(
+                                    f"Invalid numeric value for {model_name} - {metric_name}: {metric_data}"
+                                )
+                    if not data_points:
+                        continue
+                    data_points.sort(key=lambda x: x[0])
+                    futures.append(
+                        executor.submit(
+                            create_trace, model_name, metric_name, data_points, row, col
+                        )
+                    )
+
+        for future in as_completed(futures):
+            trace, row, col = future.result()
+            fig.add_trace(trace, row=row, col=col)
+
+    trace_creation_time = time.time() - trace_creation_start
+    print(f"Trace creation took {trace_creation_time:.2f} seconds")
+
+    layout_update_start = time.time()
+    fig.update_layout(
+        height=300 * num_rows,
+        showlegend=True,
+        template="plotly_dark",
+        font=dict(family="Arial", size=10, color="white"),
+        paper_bgcolor="rgb(30, 30, 30)",
+        plot_bgcolor="rgb(30, 30, 30)",
+    )
+    fig.update_xaxes(title_text="Time", tickformat="%Y-%m-%d %H:%M:%S")
+    fig.update_yaxes(title_text="Value")
+    fig.update_traces(hovertemplate="%{x|%Y-%m-%d %H:%M:%S}<br>%{y:.3f}")
+
+    layout_update_time = time.time() - layout_update_start
+    print(f"Layout update took {layout_update_time:.2f} seconds")
+
+    total_time = time.time() - start_time
+    print(f"Total plot creation took {total_time:.2f} seconds")
+
+    return fig
+
+
+app = Flask(__name__)
+
+
+@app.route("/", methods=["GET", "POST"])
+def index():
+    data_dir = "./data"
+    model_names = [
+        name[:-7]
+        for name in os.listdir(data_dir)
+        if name.endswith(".sqlite") and os.path.isfile(os.path.join(data_dir, name))
+    ]
+
+    if request.method == "POST":
+        selected_model = request.form.get("model_select")
+    else:
+        selected_model = model_names[0] if model_names else None
+
+    plot_div = None
+    error_message = None
+    if selected_model:
+        try:
+            fig = create_plots(selected_model)
+            if fig is not None:
+                fig.update_layout(showlegend=False)
+                plot_div = pyo.plot(fig, output_type="div", include_plotlyjs=True)
+            else:
+                error_message = "No data available for the selected model."
+        except Exception as e:
+            logging.error(f"Error creating plot: {str(e)}")
+            error_message = (
+                "An error occurred while creating the plot. Please try again later."
+            )
+
+        command = [
+            "python",
+            "get_data.py",
+            "--hours",
+            "24",
+            f".\\data\\{selected_model}.sqlite",
+        ]
+
+        result = subprocess.run(command, capture_output=True, text=True)
+    else:
+        result = None
+
+    return render_template(
+        "index.html",
+        plot_div=plot_div,
+        model_name=selected_model,
+        model_names=model_names,
+        result=result.stdout if result else None,
+        error_message=error_message,
+    )
+
+
+@app.route("/favicon.ico")
+def favicon():
+    return send_file("favicon.ico", mimetype="image/vnd.microsoft.icon")
+
+
+if __name__ == "__main__":
+    import uvicorn
+    from asgiref.wsgi import WsgiToAsgi
+
+    # Initial data load
+    logging.info("Starting initial data load")
+    asyncio.run(load_data())
+    logging.info("Initial data load complete")
+
+    # Create a new event loop for the background task
+    loop = asyncio.new_event_loop()
+
+    def start_background_loop():
+        asyncio.set_event_loop(loop)
+        loop.create_task(background_data_loader())
+        loop.run_forever()
+
+    t = threading.Thread(target=start_background_loop, daemon=True)
+    t.start()
+
+    asgi_app = WsgiToAsgi(app)
+    uvicorn.run(asgi_app, host="0.0.0.0", port=4421)
--- a/src/monitor.py
+++ b/src/monitor.py
@ -0,0 +1,140 @@
+import requests
+import time
+import os
+import json
+from datetime import datetime
+import logging
+import zstd
+import sqlite3
+
+print("Starting monitor.")
+
+# Set up basic configuration for logging
+logging.basicConfig(
+    level=logging.DEBUG,
+    format="%(asctime)s - %(levelname)s - %(message)s",
+    filename="monitor.log",  # Log to a file named monitor.log
+    filemode="a",
+)  # Append to the log file
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
+
+# Model information
+models = {
+    "Example-Model-22B-FP8-dynamic": "http://112.83.15.44:8883",
+    "Mistral-7B-bf16": "http://57.214.142.199:8090",
+}
+
+
+def call_metrics_endpoint(model_name, base_url):
+    url = f"{base_url}/metrics"
+    logging.debug(f"Calling metrics endpoint: {url}")
+    try:
+        headers = {
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36"
+        }
+        response = requests.get(url, headers=headers)
+        response.raise_for_status()
+        logging.debug(f"Received successful response from {url}")
+        return response.text
+    except requests.exceptions.RequestException as e:
+        logging.error(f"Error calling {url}: {e}")
+        return f"Error calling {url}: {e}"
+
+
+def normalize_metrics(metrics_data):
+    """Normalizes the metrics data from vLLM."""
+    normalized_data = {}
+    lines = metrics_data.strip().split("\n")
+    for line in lines:
+        if line.startswith("#"):  # Ignore comment lines
+            continue
+        parts = line.split(" ")
+        metric_name = parts[0]
+        metric_value_str = parts[1]
+
+        # Try to convert to decimal, otherwise keep as string
+        try:
+            metric_value = float(metric_value_str)
+            if metric_name.endswith("_total") or metric_name.endswith("_count"):
+                metric_value = int(metric_value)
+            elif "e+" in metric_value_str or "e-" in metric_value_str:
+                metric_value = "{:.10f}".format(metric_value)
+        except ValueError:
+            metric_value = metric_value_str
+
+        # Extract labels from metric name
+        if "{" in metric_name:
+            metric_name, labels_str = metric_name[:-1].split("{")
+            labels = {}
+            for label_pair in labels_str.split(","):
+                key, value = label_pair.split("=")
+                labels[key.strip('"')] = value.strip('"')
+            if metric_name not in normalized_data:
+                normalized_data[metric_name] = []
+            normalized_data[metric_name].append(
+                {"labels": labels, "value": metric_value}
+            )
+        else:
+            normalized_data[metric_name] = metric_value
+
+    return normalized_data
+
+
+def log_response(model_name, response_data):
+    timestamp = int(datetime.now().timestamp())
+    normalized_data = normalize_metrics(response_data)
+
+    db_filename = f"./data/{model_name}.sqlite"
+    os.makedirs(os.path.dirname(db_filename), exist_ok=True)
+
+    max_retries = 3
+    for retry in range(max_retries):
+        try:
+            conn = sqlite3.connect(db_filename)
+            cursor = conn.cursor()
+
+            # Create table if it doesn't exist
+            cursor.execute(
+                """
+            CREATE TABLE IF NOT EXISTS json_data
+            (id INTEGER PRIMARY KEY AUTOINCREMENT,
+             data TEXT NOT NULL,
+             timestamp INTEGER NOT NULL)
+            """
+            )
+
+            # Insert the data
+            cursor.execute(
+                "INSERT INTO json_data (data, timestamp) VALUES (?, ?)",
+                (json.dumps(normalized_data), timestamp),
+            )
+
+            conn.commit()
+            conn.close()
+
+            logging.debug(f"Saved metrics data to {db_filename}")
+            break  # Exit the retry loop if successful
+        except sqlite3.OperationalError as e:
+            if "database is locked" in str(e):
+                logging.warning(
+                    f"Database locked for {model_name}, retrying in 5 seconds... (Attempt {retry+1}/{max_retries})"
+                )
+                time.sleep(5)  # Wait before retrying
+            else:
+                logging.error(f"Error writing to database for {model_name}: {e}")
+                break  # Exit the retry loop for other errors
+
+
+while True:
+    for model_name, base_url in models.items():
+        response_data = call_metrics_endpoint(model_name, base_url)
+        if response_data and not response_data.startswith(
+            "Error"
+        ):  # Check for valid data
+            logging.info(f"Metrics for {model_name} valid")  # Log metrics to console
+        log_response(model_name, response_data)
+
+    logging.debug("Waiting for 30 seconds...")
+    time.sleep(30)