feat(config): add configuration options
- add configuration options via environment variables - move get_data.py functionality to graph.py to avoid using subprocess - update .gitignore - update .env.example - create globals.py to store helper functions - add run.bat to start and configure program
This commit is contained in:
parent
559c821bae
commit
9a94d5bbda
|
@ -0,0 +1,7 @@
|
||||||
|
HOST=0.0.0.0
|
||||||
|
PORT=4421
|
||||||
|
DATA_DIR=./data
|
||||||
|
MODEL_CONFIG=./models.json
|
||||||
|
MODEL_DIRECTORY=
|
||||||
|
UPDATE_INTERVAL=30
|
||||||
|
LOG_LEVEL=WARNING
|
|
@ -1,163 +1,29 @@
|
||||||
# Byte-compiled / optimized / DLL files
|
# Ignore everything
|
||||||
__pycache__/
|
*
|
||||||
*.py[cod]
|
|
||||||
*$py.class
|
|
||||||
|
|
||||||
# C extensions
|
# Don't ignore directories, so we can recurse into them
|
||||||
*.so
|
!*/
|
||||||
|
|
||||||
# Distribution / packaging
|
# Don't ignore .gitignore
|
||||||
.Python
|
!.gitignore
|
||||||
build/
|
|
||||||
develop-eggs/
|
|
||||||
dist/
|
|
||||||
downloads/
|
|
||||||
eggs/
|
|
||||||
.eggs/
|
|
||||||
lib/
|
|
||||||
lib64/
|
|
||||||
parts/
|
|
||||||
sdist/
|
|
||||||
var/
|
|
||||||
wheels/
|
|
||||||
share/python-wheels/
|
|
||||||
*.egg-info/
|
|
||||||
.installed.cfg
|
|
||||||
*.egg
|
|
||||||
MANIFEST
|
|
||||||
|
|
||||||
# PyInstaller
|
# Don't ignore LICENSE
|
||||||
# Usually these files are written by a python script from a template
|
!LICENSE
|
||||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
||||||
*.manifest
|
|
||||||
*.spec
|
|
||||||
|
|
||||||
# Installer logs
|
# Don't ignore these files
|
||||||
pip-log.txt
|
!*.example
|
||||||
pip-delete-this-directory.txt
|
!setup.py
|
||||||
|
!.pre-commit-config.yaml
|
||||||
|
!run.bat
|
||||||
|
|
||||||
# Unit test / coverage reports
|
# Don't ignore .github directory
|
||||||
htmlcov/
|
!.github/**
|
||||||
.tox/
|
|
||||||
.nox/
|
|
||||||
.coverage
|
|
||||||
.coverage.*
|
|
||||||
.cache
|
|
||||||
nosetests.xml
|
|
||||||
coverage.xml
|
|
||||||
*.cover
|
|
||||||
*.py,cover
|
|
||||||
.hypothesis/
|
|
||||||
.pytest_cache/
|
|
||||||
cover/
|
|
||||||
|
|
||||||
# Translations
|
# Don't ignore Python files in src directory
|
||||||
*.mo
|
!src/**/*.py
|
||||||
*.pot
|
|
||||||
|
|
||||||
# Django stuff:
|
# Don't ignore requirements.txt in root
|
||||||
*.log
|
!requirements.txt
|
||||||
local_settings.py
|
|
||||||
db.sqlite3
|
|
||||||
db.sqlite3-journal
|
|
||||||
|
|
||||||
# Flask stuff:
|
# Don't ignore Markdown files in root
|
||||||
instance/
|
!*.md
|
||||||
.webassets-cache
|
|
||||||
|
|
||||||
# Scrapy stuff:
|
|
||||||
.scrapy
|
|
||||||
|
|
||||||
# Sphinx documentation
|
|
||||||
docs/_build/
|
|
||||||
|
|
||||||
# PyBuilder
|
|
||||||
.pybuilder/
|
|
||||||
target/
|
|
||||||
|
|
||||||
# Jupyter Notebook
|
|
||||||
.ipynb_checkpoints
|
|
||||||
|
|
||||||
# IPython
|
|
||||||
profile_default/
|
|
||||||
ipython_config.py
|
|
||||||
|
|
||||||
# pyenv
|
|
||||||
# For a library or package, you might want to ignore these files since the code is
|
|
||||||
# intended to run in multiple environments; otherwise, check them in:
|
|
||||||
# .python-version
|
|
||||||
|
|
||||||
# pipenv
|
|
||||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
||||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
||||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
||||||
# install all needed dependencies.
|
|
||||||
#Pipfile.lock
|
|
||||||
|
|
||||||
# poetry
|
|
||||||
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
|
||||||
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
||||||
# commonly ignored for libraries.
|
|
||||||
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
|
||||||
#poetry.lock
|
|
||||||
|
|
||||||
# pdm
|
|
||||||
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
|
||||||
#pdm.lock
|
|
||||||
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
|
||||||
# in version control.
|
|
||||||
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
|
||||||
.pdm.toml
|
|
||||||
.pdm-python
|
|
||||||
.pdm-build/
|
|
||||||
|
|
||||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
|
||||||
__pypackages__/
|
|
||||||
|
|
||||||
# Celery stuff
|
|
||||||
celerybeat-schedule
|
|
||||||
celerybeat.pid
|
|
||||||
|
|
||||||
# SageMath parsed files
|
|
||||||
*.sage.py
|
|
||||||
|
|
||||||
# Environments
|
|
||||||
.env
|
|
||||||
.venv
|
|
||||||
env/
|
|
||||||
venv/
|
|
||||||
ENV/
|
|
||||||
env.bak/
|
|
||||||
venv.bak/
|
|
||||||
.idea/
|
|
||||||
|
|
||||||
# Spyder project settings
|
|
||||||
.spyderproject
|
|
||||||
.spyproject
|
|
||||||
|
|
||||||
# Rope project settings
|
|
||||||
.ropeproject
|
|
||||||
|
|
||||||
# mkdocs documentation
|
|
||||||
/site
|
|
||||||
|
|
||||||
# mypy
|
|
||||||
.mypy_cache/
|
|
||||||
.dmypy.json
|
|
||||||
dmypy.json
|
|
||||||
|
|
||||||
# Pyre type checker
|
|
||||||
.pyre/
|
|
||||||
|
|
||||||
# pytype static type analyzer
|
|
||||||
.pytype/
|
|
||||||
|
|
||||||
# Cython debug symbols
|
|
||||||
cython_debug/
|
|
||||||
|
|
||||||
# PyCharm
|
|
||||||
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
|
||||||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
|
||||||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
|
||||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
|
||||||
#.idea/
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
# Default ignored files
|
||||||
|
/shelf/
|
||||||
|
/workspace.xml
|
|
@ -0,0 +1,6 @@
|
||||||
|
@echo off
|
||||||
|
echo Starting monitor...
|
||||||
|
start python ./src/monitor.py
|
||||||
|
echo Starting web UI...
|
||||||
|
start python ./src/graph.py
|
||||||
|
pause
|
138
src/get_data.py
138
src/get_data.py
|
@ -1,138 +0,0 @@
|
||||||
import os
|
|
||||||
import json
|
|
||||||
import argparse
|
|
||||||
import sqlite3
|
|
||||||
from datetime import datetime, timedelta
|
|
||||||
|
|
||||||
|
|
||||||
def get_latest_rows(db_file, hours=1):
|
|
||||||
now = datetime.now()
|
|
||||||
one_hour_ago = now - timedelta(hours=hours)
|
|
||||||
one_hour_ago_timestamp = int(one_hour_ago.timestamp())
|
|
||||||
|
|
||||||
conn = sqlite3.connect(db_file)
|
|
||||||
cursor = conn.cursor()
|
|
||||||
|
|
||||||
cursor.execute(
|
|
||||||
f"SELECT timestamp, data FROM json_data WHERE timestamp >= {one_hour_ago_timestamp} ORDER BY timestamp DESC"
|
|
||||||
)
|
|
||||||
rows = cursor.fetchall()
|
|
||||||
|
|
||||||
conn.close()
|
|
||||||
|
|
||||||
if not rows:
|
|
||||||
print(
|
|
||||||
f"No rows found in the last {hours} hour(s). Showing info for last 5 rows:"
|
|
||||||
)
|
|
||||||
conn = sqlite3.connect(db_file)
|
|
||||||
cursor = conn.cursor()
|
|
||||||
cursor.execute(
|
|
||||||
f"SELECT timestamp, data FROM json_data ORDER BY timestamp DESC LIMIT 5"
|
|
||||||
)
|
|
||||||
rows = cursor.fetchall()
|
|
||||||
conn.close()
|
|
||||||
for timestamp, _ in rows:
|
|
||||||
print(f" {datetime.fromtimestamp(timestamp)}")
|
|
||||||
|
|
||||||
return rows
|
|
||||||
|
|
||||||
|
|
||||||
def extract_stats(data_json):
|
|
||||||
try:
|
|
||||||
data = json.loads(data_json)
|
|
||||||
total_prompt_tokens = float(data["vllm:prompt_tokens_total"][0]["value"])
|
|
||||||
total_generation_tokens = float(
|
|
||||||
data["vllm:generation_tokens_total"][0]["value"]
|
|
||||||
)
|
|
||||||
total_requests = sum(
|
|
||||||
float(item["value"]) for item in data["vllm:request_success_total"]
|
|
||||||
)
|
|
||||||
avg_prompt_throughput = float(
|
|
||||||
data["vllm:avg_prompt_throughput_toks_per_s"][0]["value"]
|
|
||||||
)
|
|
||||||
avg_generation_throughput = float(
|
|
||||||
data["vllm:avg_generation_throughput_toks_per_s"][0]["value"]
|
|
||||||
)
|
|
||||||
gpu_cache_usage_perc = float(data["vllm:gpu_cache_usage_perc"][0]["value"])
|
|
||||||
num_requests_running = float(data["vllm:num_requests_running"][0]["value"])
|
|
||||||
|
|
||||||
except (KeyError, IndexError, json.JSONDecodeError) as e:
|
|
||||||
print(f"Error extracting stats from data: {str(e)}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
return {
|
|
||||||
"total_prompt_tokens": total_prompt_tokens,
|
|
||||||
"total_generation_tokens": total_generation_tokens,
|
|
||||||
"total_requests": total_requests,
|
|
||||||
"avg_prompt_throughput": avg_prompt_throughput,
|
|
||||||
"avg_generation_throughput": avg_generation_throughput,
|
|
||||||
"gpu_cache_usage_perc": gpu_cache_usage_perc,
|
|
||||||
"num_requests_running": num_requests_running,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def main(db_file, hours):
|
|
||||||
latest_rows = get_latest_rows(db_file, hours)
|
|
||||||
|
|
||||||
if not latest_rows:
|
|
||||||
print(f"No rows found for the last {hours} hour(s).")
|
|
||||||
return
|
|
||||||
|
|
||||||
print(f"Processing {len(latest_rows)} rows.")
|
|
||||||
|
|
||||||
valid_stats = [
|
|
||||||
extract_stats(data)
|
|
||||||
for _, data in latest_rows
|
|
||||||
if extract_stats(data) is not None
|
|
||||||
]
|
|
||||||
|
|
||||||
if not valid_stats:
|
|
||||||
print("No valid statistics could be extracted from the rows.")
|
|
||||||
return
|
|
||||||
|
|
||||||
first_stats = valid_stats[-1] # Oldest row
|
|
||||||
last_stats = valid_stats[0] # Newest row
|
|
||||||
|
|
||||||
tokens_processed = (
|
|
||||||
last_stats["total_prompt_tokens"]
|
|
||||||
- first_stats["total_prompt_tokens"]
|
|
||||||
+ last_stats["total_generation_tokens"]
|
|
||||||
- first_stats["total_generation_tokens"]
|
|
||||||
)
|
|
||||||
requests_processed = last_stats["total_requests"] - first_stats["total_requests"]
|
|
||||||
|
|
||||||
avg_prompt_throughput = sum(
|
|
||||||
stat["avg_prompt_throughput"] for stat in valid_stats
|
|
||||||
) / len(valid_stats)
|
|
||||||
avg_generation_throughput = sum(
|
|
||||||
stat["avg_generation_throughput"] for stat in valid_stats
|
|
||||||
) / len(valid_stats)
|
|
||||||
avg_num_requests_running = sum(
|
|
||||||
stat["num_requests_running"] for stat in valid_stats
|
|
||||||
) / len(valid_stats)
|
|
||||||
avg_gpu_cache_usage_perc = sum(
|
|
||||||
stat["gpu_cache_usage_perc"] for stat in valid_stats
|
|
||||||
) / len(valid_stats)
|
|
||||||
|
|
||||||
print(f"\nStats for the last {hours} hour(s):")
|
|
||||||
print(f"Tokens processed: {tokens_processed:,.0f}")
|
|
||||||
print(f"Requests processed: {requests_processed:,.0f}")
|
|
||||||
print(f"Average prompt throughput: {avg_prompt_throughput:.2f} tokens/s")
|
|
||||||
print(f"Average generation throughput: {avg_generation_throughput:.2f} tokens/s")
|
|
||||||
print(
|
|
||||||
f"Average number of requests running: {avg_num_requests_running:.2f} requests"
|
|
||||||
)
|
|
||||||
print(f"Average GPU cache usage percent: {avg_gpu_cache_usage_perc * 100:.2f}%")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
parser = argparse.ArgumentParser(
|
|
||||||
description="Extract stats from a SQLite database for a specified time period"
|
|
||||||
)
|
|
||||||
parser.add_argument("db_file", help="Path to the SQLite database file")
|
|
||||||
parser.add_argument(
|
|
||||||
"--hours", type=int, default=1, help="Number of hours to look back (default: 1)"
|
|
||||||
)
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
main(args.db_file, args.hours)
|
|
|
@ -0,0 +1,14 @@
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
# Load environment variables from .env file
|
||||||
|
def load_env():
|
||||||
|
try:
|
||||||
|
with open(".env") as f:
|
||||||
|
for line in f:
|
||||||
|
line = line.strip()
|
||||||
|
if line and not line.startswith("#"):
|
||||||
|
key, value = line.split("=", 1)
|
||||||
|
os.environ[key.strip()] = value.strip()
|
||||||
|
except FileNotFoundError:
|
||||||
|
print("No .env file found. Using default environment variables.")
|
164
src/graph.py
164
src/graph.py
|
@ -3,7 +3,6 @@ import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import sqlite3
|
import sqlite3
|
||||||
import subprocess
|
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
|
@ -14,9 +13,21 @@ import plotly.offline as pyo
|
||||||
from flask import Flask, render_template, request, send_file
|
from flask import Flask, render_template, request, send_file
|
||||||
from plotly.subplots import make_subplots
|
from plotly.subplots import make_subplots
|
||||||
|
|
||||||
# Set up logging with a higher level
|
from globals import load_env
|
||||||
|
|
||||||
|
|
||||||
|
load_env()
|
||||||
|
|
||||||
|
# Configuration
|
||||||
|
HOST = os.getenv("HOST", "0.0.0.0")
|
||||||
|
PORT = int(os.getenv("PORT", 4421))
|
||||||
|
DATA_DIR = os.getenv("DATA_DIR", "./data")
|
||||||
|
UPDATE_INTERVAL = int(os.getenv("UPDATE_INTERVAL", 30))
|
||||||
|
LOG_LEVEL = os.getenv("LOG_LEVEL", "WARNING")
|
||||||
|
|
||||||
|
# Set up logging
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
level=logging.WARNING, # Changed from DEBUG to WARNING
|
level=getattr(logging, LOG_LEVEL),
|
||||||
format="%(asctime)s - %(levelname)s - %(message)s",
|
format="%(asctime)s - %(levelname)s - %(message)s",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -75,12 +86,10 @@ async def load_data_from_db(filepath):
|
||||||
|
|
||||||
|
|
||||||
async def load_data():
|
async def load_data():
|
||||||
data_dir = "./data"
|
|
||||||
|
|
||||||
tasks = []
|
tasks = []
|
||||||
for filename in os.listdir(data_dir):
|
for filename in os.listdir(DATA_DIR):
|
||||||
if filename.endswith(".sqlite"):
|
if filename.endswith(".sqlite"):
|
||||||
filepath = os.path.join(data_dir, filename)
|
filepath = os.path.join(DATA_DIR, filename)
|
||||||
tasks.append(load_data_from_db(filepath))
|
tasks.append(load_data_from_db(filepath))
|
||||||
|
|
||||||
await asyncio.gather(*tasks)
|
await asyncio.gather(*tasks)
|
||||||
|
@ -94,7 +103,7 @@ async def load_data():
|
||||||
async def background_data_loader():
|
async def background_data_loader():
|
||||||
while True:
|
while True:
|
||||||
await load_data()
|
await load_data()
|
||||||
await asyncio.sleep(30) # Check for updates every 30 seconds
|
await asyncio.sleep(UPDATE_INTERVAL)
|
||||||
|
|
||||||
|
|
||||||
def start_background_loop(loop):
|
def start_background_loop(loop):
|
||||||
|
@ -235,16 +244,136 @@ def create_plots(selected_model):
|
||||||
return fig
|
return fig
|
||||||
|
|
||||||
|
|
||||||
|
def get_latest_rows(db_file, hours=1):
|
||||||
|
now = datetime.now()
|
||||||
|
one_hour_ago = now - timedelta(hours=hours)
|
||||||
|
one_hour_ago_timestamp = int(one_hour_ago.timestamp())
|
||||||
|
|
||||||
|
conn = sqlite3.connect(db_file)
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
cursor.execute(
|
||||||
|
f"SELECT timestamp, data FROM json_data WHERE timestamp >= {one_hour_ago_timestamp} ORDER BY timestamp DESC"
|
||||||
|
)
|
||||||
|
rows = cursor.fetchall()
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
if not rows:
|
||||||
|
print(
|
||||||
|
f"No rows found in the last {hours} hour(s). Showing info for last 5 rows:"
|
||||||
|
)
|
||||||
|
conn = sqlite3.connect(db_file)
|
||||||
|
cursor = conn.cursor()
|
||||||
|
cursor.execute(
|
||||||
|
f"SELECT timestamp, data FROM json_data ORDER BY timestamp DESC LIMIT 5"
|
||||||
|
)
|
||||||
|
rows = cursor.fetchall()
|
||||||
|
conn.close()
|
||||||
|
for timestamp, _ in rows:
|
||||||
|
print(f" {datetime.fromtimestamp(timestamp)}")
|
||||||
|
|
||||||
|
return rows
|
||||||
|
|
||||||
|
|
||||||
|
def extract_stats(data_json):
|
||||||
|
try:
|
||||||
|
data = json.loads(data_json)
|
||||||
|
total_prompt_tokens = float(data["vllm:prompt_tokens_total"][0]["value"])
|
||||||
|
total_generation_tokens = float(
|
||||||
|
data["vllm:generation_tokens_total"][0]["value"]
|
||||||
|
)
|
||||||
|
total_requests = sum(
|
||||||
|
float(item["value"]) for item in data["vllm:request_success_total"]
|
||||||
|
)
|
||||||
|
avg_prompt_throughput = float(
|
||||||
|
data["vllm:avg_prompt_throughput_toks_per_s"][0]["value"]
|
||||||
|
)
|
||||||
|
avg_generation_throughput = float(
|
||||||
|
data["vllm:avg_generation_throughput_toks_per_s"][0]["value"]
|
||||||
|
)
|
||||||
|
gpu_cache_usage_perc = float(data["vllm:gpu_cache_usage_perc"][0]["value"])
|
||||||
|
num_requests_running = float(data["vllm:num_requests_running"][0]["value"])
|
||||||
|
|
||||||
|
except (KeyError, IndexError, json.JSONDecodeError) as e:
|
||||||
|
print(f"Error extracting stats from data: {str(e)}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
return {
|
||||||
|
"total_prompt_tokens": total_prompt_tokens,
|
||||||
|
"total_generation_tokens": total_generation_tokens,
|
||||||
|
"total_requests": total_requests,
|
||||||
|
"avg_prompt_throughput": avg_prompt_throughput,
|
||||||
|
"avg_generation_throughput": avg_generation_throughput,
|
||||||
|
"gpu_cache_usage_perc": gpu_cache_usage_perc,
|
||||||
|
"num_requests_running": num_requests_running,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def get_data(db_file, hours):
|
||||||
|
latest_rows = get_latest_rows(db_file, hours)
|
||||||
|
|
||||||
|
if not latest_rows:
|
||||||
|
print(f"No rows found for the last {hours} hour(s).")
|
||||||
|
return
|
||||||
|
|
||||||
|
print(f"Processing {len(latest_rows)} rows.")
|
||||||
|
|
||||||
|
valid_stats = [
|
||||||
|
extract_stats(data)
|
||||||
|
for _, data in latest_rows
|
||||||
|
if extract_stats(data) is not None
|
||||||
|
]
|
||||||
|
|
||||||
|
if not valid_stats:
|
||||||
|
print("No valid statistics could be extracted from the rows.")
|
||||||
|
return
|
||||||
|
|
||||||
|
first_stats = valid_stats[-1] # Oldest row
|
||||||
|
last_stats = valid_stats[0] # Newest row
|
||||||
|
|
||||||
|
tokens_processed = (
|
||||||
|
last_stats["total_prompt_tokens"]
|
||||||
|
- first_stats["total_prompt_tokens"]
|
||||||
|
+ last_stats["total_generation_tokens"]
|
||||||
|
- first_stats["total_generation_tokens"]
|
||||||
|
)
|
||||||
|
requests_processed = last_stats["total_requests"] - first_stats["total_requests"]
|
||||||
|
|
||||||
|
avg_prompt_throughput = sum(
|
||||||
|
stat["avg_prompt_throughput"] for stat in valid_stats
|
||||||
|
) / len(valid_stats)
|
||||||
|
avg_generation_throughput = sum(
|
||||||
|
stat["avg_generation_throughput"] for stat in valid_stats
|
||||||
|
) / len(valid_stats)
|
||||||
|
avg_num_requests_running = sum(
|
||||||
|
stat["num_requests_running"] for stat in valid_stats
|
||||||
|
) / len(valid_stats)
|
||||||
|
avg_gpu_cache_usage_perc = sum(
|
||||||
|
stat["gpu_cache_usage_perc"] for stat in valid_stats
|
||||||
|
) / len(valid_stats)
|
||||||
|
|
||||||
|
return (
|
||||||
|
f"\nStats for the last {hours} hour(s):\n"
|
||||||
|
f"Tokens processed: {tokens_processed:,.0f}\n"
|
||||||
|
f"Requests processed: {requests_processed:,.0f}\n"
|
||||||
|
f"Average prompt throughput: {avg_prompt_throughput:.2f} tokens/s\n"
|
||||||
|
f"Average generation throughput: {avg_generation_throughput:.2f} tokens/s\n"
|
||||||
|
f"Average tokens per request: {tokens_processed/requests_processed:,.2f} tokens\n"
|
||||||
|
f"Average number of requests running: {avg_num_requests_running:.2f} requests\n"
|
||||||
|
f"Average GPU cache usage percent: {avg_gpu_cache_usage_perc * 100:.2f}%"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
|
||||||
@app.route("/", methods=["GET", "POST"])
|
@app.route("/", methods=["GET", "POST"])
|
||||||
def index():
|
def index():
|
||||||
data_dir = "./data"
|
|
||||||
model_names = [
|
model_names = [
|
||||||
name[:-7]
|
name[:-7]
|
||||||
for name in os.listdir(data_dir)
|
for name in os.listdir(DATA_DIR)
|
||||||
if name.endswith(".sqlite") and os.path.isfile(os.path.join(data_dir, name))
|
if name.endswith(".sqlite") and os.path.isfile(os.path.join(DATA_DIR, name))
|
||||||
]
|
]
|
||||||
valid_model_names = set(model_names)
|
valid_model_names = set(model_names)
|
||||||
|
|
||||||
|
@ -269,14 +398,7 @@ def index():
|
||||||
"An error occurred while creating the plot. Please try again later."
|
"An error occurred while creating the plot. Please try again later."
|
||||||
)
|
)
|
||||||
|
|
||||||
command = [
|
result = get_data(f"{DATA_DIR}/{selected_model}.sqlite", 24)
|
||||||
"python",
|
|
||||||
"get_data.py",
|
|
||||||
"--hours",
|
|
||||||
"24",
|
|
||||||
f".\\data\\{selected_model}.sqlite",
|
|
||||||
]
|
|
||||||
result = subprocess.run(command, capture_output=True, text=True)
|
|
||||||
else:
|
else:
|
||||||
logging.error(f"Invalid model selected: {selected_model}")
|
logging.error(f"Invalid model selected: {selected_model}")
|
||||||
result = None
|
result = None
|
||||||
|
@ -286,7 +408,7 @@ def index():
|
||||||
plot_div=plot_div,
|
plot_div=plot_div,
|
||||||
model_name=selected_model,
|
model_name=selected_model,
|
||||||
model_names=model_names,
|
model_names=model_names,
|
||||||
result=result.stdout if result else None,
|
result=result,
|
||||||
error_message=error_message,
|
error_message=error_message,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -317,4 +439,4 @@ if __name__ == "__main__":
|
||||||
t.start()
|
t.start()
|
||||||
|
|
||||||
asgi_app = WsgiToAsgi(app)
|
asgi_app = WsgiToAsgi(app)
|
||||||
uvicorn.run(asgi_app, host="0.0.0.0", port=4421)
|
uvicorn.run(asgi_app, host=HOST, port=PORT)
|
||||||
|
|
|
@ -6,6 +6,8 @@ from datetime import datetime
|
||||||
import logging
|
import logging
|
||||||
import sqlite3
|
import sqlite3
|
||||||
|
|
||||||
|
from globals import load_env
|
||||||
|
|
||||||
# Set up basic configuration for logging
|
# Set up basic configuration for logging
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
level=logging.DEBUG,
|
level=logging.DEBUG,
|
||||||
|
@ -16,8 +18,17 @@ logging.basicConfig(
|
||||||
|
|
||||||
print("Starting monitor.")
|
print("Starting monitor.")
|
||||||
|
|
||||||
|
load_env()
|
||||||
|
|
||||||
|
# Configuration
|
||||||
|
USER_AGENT = os.getenv(
|
||||||
|
"USER_AGENT",
|
||||||
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36",
|
||||||
|
)
|
||||||
|
MODEL_CONFIG = os.getenv("MODEL_CONFIG", "./models.json")
|
||||||
|
|
||||||
# Load model info
|
# Load model info
|
||||||
with open("models.json", "r") as file:
|
with open(MODEL_CONFIG, "r") as file:
|
||||||
models = json.load(file)
|
models = json.load(file)
|
||||||
|
|
||||||
|
|
||||||
|
@ -25,9 +36,7 @@ def call_metrics_endpoint(model_name, base_url):
|
||||||
url = f"{base_url}/metrics"
|
url = f"{base_url}/metrics"
|
||||||
logging.debug(f"Calling metrics endpoint: {url}")
|
logging.debug(f"Calling metrics endpoint: {url}")
|
||||||
try:
|
try:
|
||||||
headers = {
|
headers = {"User-Agent": USER_AGENT}
|
||||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36"
|
|
||||||
}
|
|
||||||
response = requests.get(url, headers=headers)
|
response = requests.get(url, headers=headers)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
logging.debug(f"Received successful response from {url}")
|
logging.debug(f"Received successful response from {url}")
|
||||||
|
|
Loading…
Reference in New Issue