Merge pull request #1 from LukeHagar/master

This commit is contained in:
Luke Hagar
2025-08-12 10:37:09 -05:00
committed by GitHub
104 changed files with 6376 additions and 4825 deletions

View File

@@ -0,0 +1,33 @@
{
"name": "pypistats.dev",
"dockerComposeFile": [
"../docker-compose.yml",
"../docker-compose.dev.yml"
],
"service": "web",
"workspaceFolder": "/app",
"build": {
"args": {
"SKIP_APP_BUILD": "1"
}
},
"runArgs": ["--env-file", ".env"],
"forwardPorts": [5173, 3000, 5555],
"portsAttributes": {
"5173": { "label": "Vite Dev Server" },
"3000": { "label": "Node Adapter Server" },
"5555": { "label": "Prisma Studio" }
},
"customizations": {
"vscode": {
"extensions": [
"esbenp.prettier-vscode",
"Prisma.prisma",
"svelte.svelte-vscode",
"dbaeumer.vscode-eslint"
]
}
}
}

View File

@@ -1,41 +1,15 @@
# project
.git .git
.gitignoreold node_modules
.dockerignore .pnpm-store
.tool-versions .svelte-kit
docker-compose.yml build
envs/ .vscode
.venv/ .idea
scripts/ .DS_Store
kubernetes/ .devcontainer
pypistats.egg-info/ .env
Dockerfile .env.*
Makefile npm-debug.log*
README.rst yarn-debug.log*
yarn-error.log*
# mac osx pnpm-debug.log*
**/.DS_Store
# python bytecode
*.py[cod]
**/__pycache__/
# celery
celerybeat-schedule
celerybeat.pid
# redis
dump.rdb
# Elastic Beanstalk Files
.elasticbeanstalk
.ebignore
# intellij
.idea/
# secrets
*.env
.env.sample
.gitignore

View File

@@ -1,23 +0,0 @@
ENV=development
CELERY_BROKER_URL=redis://redis
FLOWER_PORT=5555
FLASK_APP=pypistats/run.py
FLASK_DEBUG=1
GOOGLE_TYPE=
GOOGLE_PROJECT_ID=
GOOGLE_PRIVATE_KEY_ID=
GOOGLE_PRIVATE_KEY=
GOOGLE_CLIENT_EMAIL=
GOOGLE_CLIENT_ID=
GOOGLE_AUTH_URI=
GOOGLE_TOKEN_URI=
GOOGLE_AUTH_PROVIDER_X509_CERT_URL=
GOOGLE_CLIENT_X509_CERT_URL=
POSTGRESQL_HOST=postgresql
POSTGRESQL_PORT=5432
POSTGRESQL_USERNAME=admin
POSTGRESQL_PASSWORD=root
POSTGRESQL_DBNAME=pypistats
GITHUB_CLIENT_ID=
GITHUB_CLIENT_SECRET=
PYPISTATS_SECRET=secret

168
.gitignore vendored
View File

@@ -1,155 +1,25 @@
# Byte-compiled / optimized / DLL files node_modules
__pycache__/
*.py[cod]
*$py.class
# C extensions # Output
*.so .output
.vercel
.netlify
.wrangler
/.svelte-kit
/build
# Distribution / packaging # OS
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
envs/.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# MacOS
.DS_Store .DS_Store
Thumbs.db
# Intellij # Env
.idea/ .env
.env.*
!.env.example
# TODO remove !.env.test
# EB
.elasticbeanstalk/
# Creds
envs/
*.env
# Vite
vite.config.js.timestamp-*
vite.config.ts.timestamp-*
/../generated/prisma

1
.npmrc Normal file
View File

@@ -0,0 +1 @@
engine-strict=true

9
.prettierignore Normal file
View File

@@ -0,0 +1,9 @@
# Package Managers
package-lock.json
pnpm-lock.yaml
yarn.lock
bun.lock
bun.lockb
# Miscellaneous
/static/

16
.prettierrc Normal file
View File

@@ -0,0 +1,16 @@
{
"useTabs": true,
"singleQuote": true,
"trailingComma": "none",
"printWidth": 100,
"plugins": ["prettier-plugin-svelte", "prettier-plugin-tailwindcss"],
"overrides": [
{
"files": "*.svelte",
"options": {
"parser": "svelte"
}
}
],
"tailwindStylesheet": "./src/app.css"
}

View File

@@ -1,3 +0,0 @@
python 3.8.5
poetry 1.0.10
kubectl 1.17.4

View File

@@ -1,43 +1,37 @@
FROM python:3.8.5-slim FROM node:20-slim
# Add build deps for python packages # Install deps needed by Prisma and shell
# libpq-dev is required to install psycopg2-binary RUN apt-get update && apt-get install -y openssl bash && rm -rf /var/lib/apt/lists/*
# curl is used to install poetry
RUN apt-get update && \
apt-get install -y curl libpq-dev && \
apt-get clean
# Set the working directory to /app
WORKDIR /app WORKDIR /app
# Create python user to avoid having to run as root # Allow skipping app build in devcontainer
RUN useradd -m python && \ ARG SKIP_APP_BUILD=0
chown python:python -R /app
# Set the user
USER python
# Set the poetry version # Copy package manifests first for better cache
ARG POETRY_VERSION=1.0.10 COPY package.json pnpm-lock.yaml* ./
# Set to ensure logs are output promptly
ENV PYTHONUNBUFFERED=1
# Update the path
ENV PATH=/home/python/.poetry/bin:/home/python/.local/bin:$PATH
# Install vendored poetry # Enable and use pnpm via corepack
RUN curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py | python RUN corepack enable && corepack prepare pnpm@9.12.3 --activate
# Add poetry stuff # Install dependencies
ADD pyproject.toml . RUN pnpm install --frozen-lockfile
ADD poetry.lock .
# Install all the dependencies and cleanup # Copy the rest of the source
RUN poetry config virtualenvs.create false && \ COPY . .
poetry run pip install --user -U pip && \
poetry install --no-dev && \ # Generate Prisma client and build SvelteKit (Node adapter)
"yes" | poetry cache clear --all pypi RUN pnpm prisma generate
RUN if [ "$SKIP_APP_BUILD" != "1" ]; then pnpm build; fi
ENV NODE_ENV=production
# Entrypoint handles migrations and start
COPY docker/entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh
EXPOSE 3000
ENTRYPOINT ["/entrypoint.sh"]
# Add everything
ADD . .
# Set the entrypoint script
ENTRYPOINT ["./docker-entrypoint.sh"]

View File

@@ -1,33 +0,0 @@
# format everything
fmt:
poetry run isort .
poetry run black .
# launch the application in docker-compose
.PHONY: pypistats
pypistats:
docker-compose down
docker-compose build
docker-compose up
# bring down the application and destroy the db volumes
cleanup:
docker-compose down -v
# setup a local environment
setup:
brew install asdf || true
asdf install
poetry install
# deploy to gke
deploy:
sh kubernetes/deploy.sh
# port forward flower
pfflower:
open http://localhost:7777 && kubectl get pods -n pypistats | grep flower | awk '{print $$1}' | xargs -I % kubectl port-forward -n pypistats % 7777:5555
# port forward web
pfweb:
open http://localhost:7000 && kubectl get pods -n pypistats | grep web | awk '{print $$1}' | xargs -I % kubectl port-forward -n pypistats % 7000:5000

38
README.md Normal file
View File

@@ -0,0 +1,38 @@
# sv
Everything you need to build a Svelte project, powered by [`sv`](https://github.com/sveltejs/cli).
## Creating a project
If you're seeing this, you've probably already done this step. Congrats!
```sh
# create a new project in the current directory
npx sv create
# create a new project in my-app
npx sv create my-app
```
## Developing
Once you've created a project and installed dependencies with `npm install` (or `pnpm install` or `yarn`), start a development server:
```sh
npm run dev
# or start the server and open the app in a new browser tab
npm run dev -- --open
```
## Building
To create a production version of your app:
```sh
npm run build
```
You can preview the production build with `npm run preview`.
> To deploy your app, you may need to install an [adapter](https://svelte.dev/docs/kit/adapters) for your target environment.

View File

@@ -1,26 +0,0 @@
PyPI Stats
==========
A simple analytics dashboard for aggregate data on PyPI downloads. PyPI Stats is built using Flask with plotly.js.
`PyPI Stats <https://pypistats.org/>`_
GitHub OAuth
------------
PyPI Stats has an integration with GitHub so you can track install data on the packages you maintain.
`User page <https://pypistats.org/user>`_
JSON API
--------
PyPI Stats provides a simple JSON API to retrieve aggregate download stats and time histories of pypi packages.
`JSON API <https://pypistats.org/api>`_
Development
-----------
Run ``make pypistats`` to launch a complete development environment using docker-compose.

24
README_DOCKER.md Normal file
View File

@@ -0,0 +1,24 @@
### Running locally with Docker
Prerequisites: Docker and Docker Compose.
1. Build and start the full stack (Postgres, Redis, Web):
```
docker compose up --build
```
2. Configure BigQuery credentials via environment variables (e.g., export `GOOGLE_PROJECT_ID` and `GOOGLE_APPLICATION_CREDENTIALS_JSON`). For local compose, you can add them under the `web.environment` section in `docker-compose.yml`.
3. The app runs on `http://localhost:3000`.
Environment variables of interest:
- `DATABASE_URL`: Postgres connection string.
- `REDIS_URL`: Redis URL.
- `ENABLE_CRON`: Set to `true` to run the daily ETL.
- `CRON_SCHEDULE`: Cron string (default 2 AM UTC daily).
- `GOOGLE_PROJECT_ID`, `GOOGLE_APPLICATION_CREDENTIALS_JSON` or `GOOGLE_APPLICATION_CREDENTIALS` for BigQuery.
The container entrypoint waits for Postgres, applies Prisma migrations, then starts the app.

26
docker-compose.dev.yml Normal file
View File

@@ -0,0 +1,26 @@
version: '3.9'
services:
web:
build:
context: .
args:
SKIP_APP_BUILD: "1"
command: sh -lc "\
corepack enable && corepack prepare pnpm@9.12.3 --activate && \
pnpm install && \
pnpm prisma generate && \
pnpm prisma migrate deploy || true && \
pnpm dev --host 0.0.0.0 --port 5173"
volumes:
- ./:/app
- web_node_modules:/app/node_modules
env_file:
- .env
ports:
- "5173:5173"
volumes:
web_node_modules:

View File

@@ -1,92 +1,53 @@
x-envs: &envs version: '3.9'
environment:
- FLASK_APP=pypistats/run.py
- FLASK_ENV=development
- FLASK_DEBUG=1
- POSTGRESQL_HOST=postgresql
- POSTGRESQL_PORT=5432
- POSTGRESQL_USERNAME=admin
- POSTGRESQL_PASSWORD=root
- POSTGRESQL_DBNAME=pypistats
- CELERY_BROKER_URL=redis://redis
- BASIC_AUTH_USER=user
- BASIC_AUTH_PASSWORD=password
version: "3.4"
volumes:
pgdata: {}
services: services:
web: db:
build: image: postgres:16
context: . environment:
image: web POSTGRES_DB: pypistats
command: webdev POSTGRES_USER: pypistats
depends_on: POSTGRES_PASSWORD: pypistats
- postgresql volumes:
<<: *envs - pgdata:/var/lib/postgresql/data
ports: ports:
- "5000:5000" - "5432:5432"
volumes: healthcheck:
- "./pypistats/:/app/pypistats/" test: ["CMD-SHELL", "pg_isready -U $$POSTGRES_USER -d $$POSTGRES_DB"]
beat: interval: 5s
image: web timeout: 5s
command: beat retries: 20
depends_on:
- redis
<<: *envs
volumes:
- "./pypistats/:/app/pypistats/"
celery:
image: web
command: celery
depends_on:
- redis
- postgresql
<<: *envs
volumes:
- "./pypistats/:/app/pypistats/"
flower:
image: web
command: flower
depends_on:
- redis
<<: *envs
ports:
- "5555:5555"
volumes:
- "./pypistats/:/app/pypistats/"
migrate:
image: web
command: migrate
depends_on:
- postgresql
<<: *envs
volumes:
- "./pypistats/:/app/pypistats/"
- "./migrations/:/app/migrations/"
seeds:
image: web
command: seeds
depends_on:
- postgresql
- migrate
<<: *envs
volumes:
- "./pypistats/:/app/pypistats/"
- "./migrations/:/app/migrations/"
redis: redis:
image: "redis:5.0.7-alpine" image: redis:7
ports: ports:
- "6379:6379" - "6379:6379"
postgresql: healthcheck:
image: "postgres:12" test: ["CMD", "redis-cli", "ping"]
interval: 5s
timeout: 5s
retries: 20
web:
build: .
depends_on:
db:
condition: service_healthy
redis:
condition: service_healthy
environment: environment:
- POSTGRES_USER=admin NODE_ENV: production
- POSTGRES_PASSWORD=root PORT: 3000
- POSTGRES_DB=pypistats DATABASE_URL: postgresql://pypistats:pypistats@db:5432/pypistats?schema=public
REDIS_URL: redis://redis:6379
ENABLE_CRON: "true"
# Set your BigQuery project and credentials
# GOOGLE_PROJECT_ID: your-project
# GOOGLE_APPLICATION_CREDENTIALS_JSON: '{"type":"service_account",...}'
ports: ports:
- "5433:5432" - "3000:3000"
command: ["/entrypoint.sh"]
volumes: volumes:
- "pgdata:/var/lib/postgresql/data" pgdata:

View File

@@ -1,36 +0,0 @@
#!/usr/bin/env bash
if [[ "$1" = "webdev" ]]
then
exec poetry run flask run --host 0.0.0.0
fi
if [[ "$1" = "web" ]]
then
exec poetry run gunicorn -b 0.0.0.0:5000 -w 2 --access-logfile - --error-log - --access-logformat "%({x-forwarded-for}i)s %(l)s %(h)s %(l)s %(u)s %(t)s \"%(r)s\" %(s)s %(b)s \"%(f)s\" \"%(a)s\"" pypistats.run:app
fi
if [[ "$1" = "celery" ]]
then
exec poetry run celery -A pypistats.extensions.celery worker -l info --concurrency=1
fi
if [[ "$1" = "beat" ]]
then
exec poetry run celery -A pypistats.extensions.celery beat -l info
fi
if [[ "$1" = "flower" ]]
then
exec poetry run flower -A pypistats.extensions.celery -l info
fi
if [[ "$1" = "migrate" ]]
then
exec poetry run flask db upgrade
fi
if [[ "$1" = "seeds" ]]
then
exec poetry run python -m migrations.seeds
fi

71
docker/entrypoint.sh Normal file
View File

@@ -0,0 +1,71 @@
#!/usr/bin/env bash
set -euo pipefail
# Wait for Postgres if DATABASE_URL is provided
if [[ -n "${DATABASE_URL:-}" ]]; then
echo "Waiting for database..."
ATTEMPTS=0
until node -e "const { Client } = require('pg'); (async () => { try { const c=new Client({ connectionString: process.env.DATABASE_URL }); await c.connect(); await c.end(); process.exit(0);} catch(e){ process.exit(1);} })()" >/dev/null 2>&1; do
ATTEMPTS=$((ATTEMPTS+1))
if [[ $ATTEMPTS -gt 60 ]]; then
echo "Database did not become ready in time" >&2
exit 1
fi
sleep 1
done
fi
# Run Prisma migrations (safe for prod) with retry
if [[ "${RUN_DB_MIGRATIONS:-1}" == "1" ]]; then
echo "Running prisma migrate deploy..."
ATTEMPTS=0
until pnpm prisma migrate deploy; do
ATTEMPTS=$((ATTEMPTS+1))
if [[ $ATTEMPTS -gt 10 ]]; then
echo "Prisma migrate failed after retries" >&2
exit 1
fi
echo "Retrying migrations in 3s..."
sleep 3
done
fi
# Start the app (SvelteKit Node adapter)
exec node build/index.js
#!/usr/bin/env bash
set -euo pipefail
# Wait for Postgres if DATABASE_URL is provided
if [[ -n "${DATABASE_URL:-}" ]]; then
echo "Waiting for database..."
ATTEMPTS=0
until node -e "const { Client } = require('pg'); (async () => { try { const c=new Client({ connectionString: process.env.DATABASE_URL }); await c.connect(); await c.end(); process.exit(0);} catch(e){ process.exit(1);} })()" >/dev/null 2>&1; do
ATTEMPTS=$((ATTEMPTS+1))
if [[ $ATTEMPTS -gt 60 ]]; then
echo "Database did not become ready in time" >&2
exit 1
fi
sleep 1
done
fi
# Run Prisma migrations (safe for prod) with retry
if [[ "${RUN_DB_MIGRATIONS:-1}" == "1" ]]; then
echo "Running prisma migrate deploy..."
ATTEMPTS=0
until pnpm prisma migrate deploy; do
ATTEMPTS=$((ATTEMPTS+1))
if [[ $ATTEMPTS -gt 10 ]]; then
echo "Prisma migrate failed after retries" >&2
exit 1
fi
echo "Retrying migrations in 3s..."
sleep 3
done
fi
# Start the app (SvelteKit Node adapter)
exec node build/index.js

View File

@@ -1,30 +0,0 @@
#!/usr/bin/env bash
docker build -t us.gcr.io/pypistats-org/pypistats:$(poetry version | tail -c +14) .
docker push us.gcr.io/pypistats-org/pypistats:$(poetry version | tail -c +14)
kubectl apply -f https://raw.githubusercontent.com/kubernetes/dashboard/v2.0.0/aio/deploy/recommended.yaml
# create namespace ``pypistats``
kubectl apply -f kubernetes/namespace.yaml
# create secret from the env file
#kubectl delete secret pypistats-secrets --namespace=pypistats
# create
kubectl create secret generic pypistats-secrets --from-env-file=gke.env --namespace=pypistats
# update
kubectl create secret generic pypistats-secrets --from-env-file=gke.env --namespace=pypistats --dry-run -o yaml | kubectl apply -f -
# create redis and flower
kubectl apply -f kubernetes/redis.yaml --namespace=pypistats
kubectl apply -f kubernetes/flower.yaml --namespace=pypistats
# launch the web components
kubectl apply -f kubernetes/web.yaml --namespace=pypistats
# launch the tasks components
kubectl apply -f kubernetes/tasks.yaml --namespace=pypistats
# get info about connecting
kubectl cluster-info
kubectl get services --namespace=pypistats

View File

@@ -1,12 +0,0 @@
#!/usr/bin/env bash
poetry version major
export PYPISTATS_VERSION=$(poetry version | tail -c +14)
docker build -t us.gcr.io/pypistats-org/pypistats:${PYPISTATS_VERSION} .
docker push us.gcr.io/pypistats-org/pypistats:${PYPISTATS_VERSION}
kubectl create secret generic pypistats-secrets --from-env-file=gke.env --namespace=pypistats --dry-run -o yaml | kubectl apply -f -
sed -i '.bak' 's|us.gcr.io\/pypistats-org\/pypistats.*|us.gcr.io\/pypistats-org\/pypistats:'"$PYPISTATS_VERSION"'|g' kubernetes/*.yaml
rm kubernetes/*.bak
kubectl apply -f kubernetes/redis.yaml --namespace=pypistats
kubectl apply -f kubernetes/tasks.yaml --namespace=pypistats
kubectl apply -f kubernetes/flower.yaml --namespace=pypistats
kubectl apply -f kubernetes/web.yaml --namespace=pypistats

View File

@@ -1,45 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: flower
namespace: pypistats
labels:
app: pypistats
component: flower
spec:
replicas: 1
selector:
matchLabels:
app: pypistats
component: flower
template:
metadata:
labels:
app: pypistats
component: flower
spec:
containers:
- name: pypistats-flower
image: us.gcr.io/pypistats-org/pypistats:11
imagePullPolicy: Always
args: ["flower"]
envFrom:
- secretRef:
name: pypistats-secrets
---
apiVersion: v1
kind: Service
metadata:
name: flower
labels:
app: pypistats
component: flower
spec:
ports:
- port: 5555
targetPort: 5555
selector:
app: pypistats
component: flower

View File

@@ -1,4 +0,0 @@
apiVersion: v1
kind: Namespace
metadata:
name: pypistats

View File

@@ -1,36 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: redis
labels:
app: redis
spec:
selector:
matchLabels:
app: redis
replicas: 1
template:
metadata:
labels:
app: redis
spec:
containers:
- name: redis
image: redis:5.0.7-alpine
ports:
- containerPort: 6379
---
apiVersion: v1
kind: Service
metadata:
name: redis
labels:
app: redis
spec:
ports:
- port: 6379
targetPort: 6379
selector:
app: redis

View File

@@ -1,35 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: tasks
namespace: pypistats
labels:
app: pypistats
component: tasks
spec:
replicas: 1
selector:
matchLabels:
app: pypistats
component: tasks
template:
metadata:
labels:
app: pypistats
component: tasks
spec:
containers:
- name: beat
image: us.gcr.io/pypistats-org/pypistats:11
imagePullPolicy: Always
args: ["beat"]
envFrom:
- secretRef:
name: pypistats-secrets
- name: celery
image: us.gcr.io/pypistats-org/pypistats:11
imagePullPolicy: Always
args: ["celery"]
envFrom:
- secretRef:
name: pypistats-secrets

View File

@@ -1,81 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: web
namespace: pypistats
labels:
app: pypistats
component: web
spec:
replicas: 2
selector:
matchLabels:
app: pypistats
component: web
template:
metadata:
labels:
app: pypistats
component: web
spec:
initContainers:
- name: migrate
image: us.gcr.io/pypistats-org/pypistats:11
imagePullPolicy: Always
envFrom:
- secretRef:
name: pypistats-secrets
args: ["migrate"]
containers:
- name: web
image: us.gcr.io/pypistats-org/pypistats:11
imagePullPolicy: Always
envFrom:
- secretRef:
name: pypistats-secrets
args: ["web"]
ports:
- containerPort: 5000
readinessProbe:
httpGet:
path: /health
port: 5000
initialDelaySeconds: 5
periodSeconds: 5
---
apiVersion: v1
kind: Service
metadata:
name: web
namespace: pypistats
spec:
type: NodePort
ports:
- name: http
protocol: TCP
port: 5000
targetPort: 5000
selector:
app: pypistats
component: web
---
apiVersion: networking.k8s.io/v1beta1
kind: Ingress
metadata:
name: web
namespace: pypistats
spec:
backend:
serviceName: web
servicePort: http
rules:
- http:
paths:
- backend:
serviceName: web
servicePort: http
path: /

View File

@@ -1 +0,0 @@
Generic single-database configuration.

View File

@@ -1,45 +0,0 @@
# A generic, single database configuration.
[alembic]
# template used to generate migration files
file_template = %%(year)d%%(month).2d%%(day).2d_%%(hour).2d%%(minute).2d%%(second).2d_%%(rev)s_%%(slug)s
# set to 'true' to run the environment during
# the 'revision' command, regardless of autogenerate
# revision_environment = false
# Logging configuration
[loggers]
keys = root,sqlalchemy,alembic
[handlers]
keys = console
[formatters]
keys = generic
[logger_root]
level = WARN
handlers = console
qualname =
[logger_sqlalchemy]
level = WARN
handlers =
qualname = sqlalchemy.engine
[logger_alembic]
level = INFO
handlers =
qualname = alembic
[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic
[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S

View File

@@ -1,88 +0,0 @@
import logging
from logging.config import fileConfig
from alembic import context
from flask import current_app
from sqlalchemy import engine_from_config
from sqlalchemy import pool
# this is the Alembic Config object, which provides
# access to the values within the .ini file in use.
config = context.config
# Interpret the config file for Python logging.
# This line sets up loggers basically.
fileConfig(config.config_file_name)
logger = logging.getLogger("alembic.env")
config.set_main_option("sqlalchemy.url", current_app.config.get("SQLALCHEMY_DATABASE_URI"))
target_metadata = current_app.extensions["migrate"].db.metadata
# other values from the config, defined by the needs of env.py,
# can be acquired:
# my_important_option = config.get_main_option("my_important_option")
# ... etc.
def run_migrations_offline():
"""Run migrations in 'offline' mode.
This configures the context with just a URL
and not an Engine, though an Engine is acceptable
here as well. By skipping the Engine creation
we don't even need a DBAPI to be available.
Calls to context.execute() here emit the given string to the
script output.
"""
url = config.get_main_option("sqlalchemy.url")
context.configure(url=url)
with context.begin_transaction():
context.run_migrations()
def run_migrations_online():
"""Run migrations in 'online' mode.
In this scenario we need to create an Engine
and associate a connection with the context.
"""
# this callback is used to prevent an auto-migration from being generated
# when there are no changes to the schema
# reference: http://alembic.zzzcomputing.com/en/latest/cookbook.html
def process_revision_directives(context, revision, directives):
if getattr(config.cmd_opts, "autogenerate", False):
script = directives[0]
if script.upgrade_ops.is_empty():
directives[:] = []
logger.info("No changes in schema detected.")
engine = engine_from_config(
config.get_section(config.config_ini_section), prefix="sqlalchemy.", poolclass=pool.NullPool
)
connection = engine.connect()
context.configure(
connection=connection,
target_metadata=target_metadata,
compare_type=True,
process_revision_directives=process_revision_directives,
**current_app.extensions["migrate"].configure_args,
)
try:
with context.begin_transaction():
context.run_migrations()
finally:
connection.close()
if context.is_offline_mode():
run_migrations_offline()
else:
run_migrations_online()

View File

@@ -1,24 +0,0 @@
"""${message}
Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}
"""
from alembic import op
import sqlalchemy as sa
${imports if imports else ""}
# revision identifiers, used by Alembic.
revision = ${repr(up_revision)}
down_revision = ${repr(down_revision)}
branch_labels = ${repr(branch_labels)}
depends_on = ${repr(depends_on)}
def upgrade():
${upgrades if upgrades else "pass"}
def downgrade():
${downgrades if downgrades else "pass"}

View File

@@ -1,97 +0,0 @@
import datetime
import logging
import random
import subprocess
import sys
from pypistats.application import create_app
from pypistats.application import db
from pypistats.models.download import OverallDownloadCount
from pypistats.models.download import PythonMajorDownloadCount
from pypistats.models.download import PythonMinorDownloadCount
from pypistats.models.download import RecentDownloadCount
from pypistats.models.download import SystemDownloadCount
# required to use the db models outside of the context of the app
app = create_app()
app.app_context().push()
if db.session.query(RecentDownloadCount.package).count() > 0:
print("Seeds already exist.")
sys.exit(0)
# use the currently installed dependencies as seed packages
result = subprocess.run(["poetry", "show"], stdout=subprocess.PIPE)
output = result.stdout.decode()
# extract just the package names from the output
# skip the first line which is a poetry warning
# and the last line which is empty
packages = []
for line in output.split("\n")[1:-1]:
packages.append(line.split(" ")[0])
# add some packages that have optional dependencies
packages.append("apache-airflow")
packages.append("databricks-dbapi")
logging.info(packages)
# take the last 120 days
end_date = datetime.date.today()
date_list = [end_date - datetime.timedelta(days=x) for x in range(120)][::-1]
baseline = 1000
# build a bunch of seed records with random values
records = []
for package in packages + ["__all__"]:
print("Seeding: " + package)
for idx, category in enumerate(["day", "week", "month"]):
record = RecentDownloadCount(
package=package, category=category, downloads=baseline * (idx + 1) + random.randint(-100, 100)
)
records.append(record)
for date in date_list:
for idx, category in enumerate(["with_mirrors", "without_mirrors"]):
record = OverallDownloadCount(
date=date,
package=package,
category=category,
downloads=baseline * (idx + 1) + random.randint(-100, 100),
)
records.append(record)
for idx, category in enumerate(["2", "3"]):
record = PythonMajorDownloadCount(
date=date,
package=package,
category=category,
downloads=baseline * (idx + 1) + random.randint(-100, 100),
)
records.append(record)
for idx, category in enumerate(["2.7", "3.4", "3.5", "3.6", "3.7", "3.8"]):
record = PythonMinorDownloadCount(
date=date,
package=package,
category=category,
downloads=baseline * (idx + 1) + random.randint(-100, 100),
)
records.append(record)
for idx, category in enumerate(["windows", "linux", "darwin"]):
record = SystemDownloadCount(
date=date,
package=package,
category=category,
downloads=baseline * (idx + 1) + random.randint(-100, 100),
)
records.append(record)
# push to the local database
db.session.bulk_save_objects(records)
db.session.commit()

View File

@@ -1,95 +0,0 @@
"""setup_tables
Revision ID: 0cf9945079f1
Revises:
Create Date: 2020-03-03 22:17:51.438119
"""
import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = "0cf9945079f1"
down_revision = None
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.create_table(
"overall",
sa.Column("date", sa.Date(), nullable=False),
sa.Column("package", sa.String(length=128), nullable=False),
sa.Column("category", sa.String(length=16), nullable=False),
sa.Column("downloads", sa.Integer(), nullable=False),
sa.PrimaryKeyConstraint("date", "package", "category"),
)
op.create_index(op.f("ix_overall_package"), "overall", ["package"], unique=False)
op.create_table(
"python_major",
sa.Column("date", sa.Date(), nullable=False),
sa.Column("package", sa.String(length=128), nullable=False),
sa.Column("category", sa.String(length=4), nullable=True),
sa.Column("downloads", sa.Integer(), nullable=False),
sa.PrimaryKeyConstraint("date", "package", "category"),
)
op.create_index(op.f("ix_python_major_package"), "python_major", ["package"], unique=False)
op.create_table(
"python_minor",
sa.Column("date", sa.Date(), nullable=False),
sa.Column("package", sa.String(length=128), nullable=False),
sa.Column("category", sa.String(length=4), nullable=True),
sa.Column("downloads", sa.Integer(), nullable=False),
sa.PrimaryKeyConstraint("date", "package", "category"),
)
op.create_index(op.f("ix_python_minor_package"), "python_minor", ["package"], unique=False)
op.create_table(
"recent",
sa.Column("package", sa.String(length=128), nullable=False),
sa.Column("category", sa.String(length=8), nullable=False),
sa.Column("downloads", sa.BigInteger(), nullable=False),
sa.PrimaryKeyConstraint("package", "category"),
)
op.create_index(op.f("ix_recent_package"), "recent", ["package"], unique=False)
op.create_table(
"system",
sa.Column("date", sa.Date(), nullable=False),
sa.Column("package", sa.String(length=128), nullable=False),
sa.Column("category", sa.String(length=8), nullable=True),
sa.Column("downloads", sa.Integer(), nullable=False),
sa.PrimaryKeyConstraint("date", "package", "category"),
)
op.create_index(op.f("ix_system_package"), "system", ["package"], unique=False)
op.create_table(
"users",
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("uid", sa.Integer(), nullable=True),
sa.Column("username", sa.String(length=39), nullable=False),
sa.Column("avatar_url", sa.String(length=256), nullable=True),
sa.Column("token", sa.String(length=256), nullable=True),
sa.Column("created_at", sa.DateTime(), nullable=False),
sa.Column("active", sa.Boolean(), nullable=True),
sa.Column("is_admin", sa.Boolean(), nullable=True),
sa.Column("favorites", postgresql.ARRAY(sa.String(length=128), dimensions=1), nullable=True),
sa.PrimaryKeyConstraint("id"),
sa.UniqueConstraint("uid"),
)
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table("users")
op.drop_index(op.f("ix_system_package"), table_name="system")
op.drop_table("system")
op.drop_index(op.f("ix_recent_package"), table_name="recent")
op.drop_table("recent")
op.drop_index(op.f("ix_python_minor_package"), table_name="python_minor")
op.drop_table("python_minor")
op.drop_index(op.f("ix_python_major_package"), table_name="python_major")
op.drop_table("python_major")
op.drop_index(op.f("ix_overall_package"), table_name="overall")
op.drop_table("overall")
# ### end Alembic commands ###

51
package.json Normal file
View File

@@ -0,0 +1,51 @@
{
"name": "pypistats",
"private": true,
"version": "0.0.1",
"type": "module",
"scripts": {
"dev": "vite dev",
"build": "svelte-kit build",
"start": "node build/index.js",
"prepare": "svelte-kit sync || echo ''",
"check": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json",
"check:watch": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json --watch",
"format": "prettier --write .",
"lint": "prettier --check .",
"db:studio": "prisma studio",
"db:generate": "prisma generate",
"db:migrate": "prisma migrate dev",
"db:deploy": "prisma migrate deploy"
},
"dependencies": {
"@google-cloud/bigquery": "^8.1.1",
"@prisma/client": "^6.13.0",
"@sveltejs/adapter-node": "^5.2.8",
"@types/node-cron": "^3.0.11",
"node-cron": "^4.2.1",
"redis": "^5.7.0"
},
"devDependencies": {
"@sveltejs/adapter-auto": "^6.0.1",
"@sveltejs/kit": "^2.27.0",
"@sveltejs/vite-plugin-svelte": "^6.1.0",
"@tailwindcss/forms": "^0.5.10",
"@tailwindcss/typography": "^0.5.16",
"@tailwindcss/vite": "^4.1.11",
"mdsvex": "^0.12.6",
"prettier": "^3.6.2",
"prettier-plugin-svelte": "^3.4.0",
"prettier-plugin-tailwindcss": "^0.6.14",
"prisma": "^6.13.0",
"svelte": "^5.37.3",
"svelte-check": "^4.3.1",
"tailwindcss": "^4.1.11",
"typescript": "^5.9.2",
"vite": "^7.0.6"
},
"pnpm": {
"onlyBuiltDependencies": [
"esbuild"
]
}
}

2797
pnpm-lock.yaml generated Normal file

File diff suppressed because it is too large Load Diff

1290
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,48 @@
-- CreateTable
CREATE TABLE "public"."recent" (
"package" TEXT NOT NULL,
"category" TEXT NOT NULL,
"downloads" BIGINT NOT NULL,
CONSTRAINT "recent_pkey" PRIMARY KEY ("package","category")
);
-- CreateTable
CREATE TABLE "public"."overall" (
"date" DATE NOT NULL,
"package" TEXT NOT NULL,
"category" TEXT NOT NULL,
"downloads" INTEGER NOT NULL,
CONSTRAINT "overall_pkey" PRIMARY KEY ("date","package","category")
);
-- CreateTable
CREATE TABLE "public"."python_major" (
"date" DATE NOT NULL,
"package" TEXT NOT NULL,
"category" TEXT,
"downloads" INTEGER NOT NULL,
CONSTRAINT "python_major_pkey" PRIMARY KEY ("date","package")
);
-- CreateTable
CREATE TABLE "public"."python_minor" (
"date" DATE NOT NULL,
"package" TEXT NOT NULL,
"category" TEXT,
"downloads" INTEGER NOT NULL,
CONSTRAINT "python_minor_pkey" PRIMARY KEY ("date","package")
);
-- CreateTable
CREATE TABLE "public"."system" (
"date" DATE NOT NULL,
"package" TEXT NOT NULL,
"category" TEXT,
"downloads" INTEGER NOT NULL,
CONSTRAINT "system_pkey" PRIMARY KEY ("date","package")
);

View File

@@ -0,0 +1,3 @@
# Please do not edit this file manually
# It should be added in your version-control system (e.g., Git)
provider = "postgresql"

70
prisma/schema.prisma Normal file
View File

@@ -0,0 +1,70 @@
// This is your Prisma schema file,
// learn more about it in the docs: https://pris.ly/d/prisma-schema
generator client {
provider = "prisma-client-js"
}
datasource db {
provider = "postgresql"
url = env("DATABASE_URL")
}
model RecentDownloadCount {
package String
category String
downloads BigInt
@@id([package, category])
@@map("recent")
}
model OverallDownloadCount {
date DateTime @db.Date
package String
category String
downloads Int
@@id([date, package, category])
@@map("overall")
}
model PythonMajorDownloadCount {
date DateTime @db.Date
package String
category String
downloads Int
@@id([date, package, category])
@@map("python_major")
}
model PythonMinorDownloadCount {
date DateTime @db.Date
package String
category String
downloads Int
@@id([date, package, category])
@@map("python_minor")
}
model SystemDownloadCount {
date DateTime @db.Date
package String
category String
downloads Int
@@id([date, package, category])
@@map("system")
}
model InstallerDownloadCount {
date DateTime @db.Date
package String
category String
downloads Int
@@id([date, package, category])
@@map("installer")
}

View File

@@ -1,50 +0,0 @@
"""PyPIStats application."""
from celery import Task
from flask import Flask
from pypistats import views
from pypistats.config import DevConfig
from pypistats.extensions import celery
from pypistats.extensions import db
from pypistats.extensions import github
from pypistats.extensions import migrate
def create_app(config_object=DevConfig):
"""Create the application."""
app = Flask(__name__.split(".")[0])
app.config.from_object(config_object)
register_extensions(app)
register_blueprints(app)
init_celery(celery, app)
return app
def init_celery(celery_, app):
"""Create a celery object."""
celery_.conf.update(app.config)
class ContextTask(Task):
abstract = True
def __call__(self, *args, **kwargs):
with app.app_context():
return Task.__call__(self, *args, **kwargs)
celery_.Task = ContextTask
def register_blueprints(app):
"""Register Flask blueprints."""
app.register_blueprint(views.admin.blueprint)
app.register_blueprint(views.api.blueprint)
app.register_blueprint(views.error.blueprint)
app.register_blueprint(views.general.blueprint)
app.register_blueprint(views.user.blueprint)
def register_extensions(app):
"""Register Flask extensions."""
db.init_app(app)
github.init_app(app)
migrate.init_app(app, db)

View File

@@ -1,71 +0,0 @@
"""Application configuration."""
import os
from celery.schedules import crontab
from flask import json
def get_db_uri():
"""Get the database URI."""
return "postgresql://{username}:{password}@{host}:{port}/{dbname}".format(
username=os.environ.get("POSTGRESQL_USERNAME"),
password=os.environ.get("POSTGRESQL_PASSWORD"),
host=os.environ.get("POSTGRESQL_HOST"),
port=os.environ.get("POSTGRESQL_PORT"),
dbname=os.environ.get("POSTGRESQL_DBNAME"),
)
class Config:
"""Base configuration."""
APP_DIR = os.path.abspath(os.path.dirname(__file__))
CELERY_BROKER_URL = (os.environ.get("CELERY_BROKER_URL"),)
BROKER_TRANSPORT_OPTIONS = {"visibility_timeout": 86400}
CELERY_IMPORTS = "pypistats.tasks.pypi"
CELERYBEAT_SCHEDULE = {
"update_db": {"task": "pypistats.tasks.pypi.etl", "schedule": crontab(minute=0, hour=1)} # 1am UTC
}
GITHUB_CLIENT_ID = os.environ.get("GITHUB_CLIENT_ID")
GITHUB_CLIENT_SECRET = os.environ.get("GITHUB_CLIENT_SECRET")
PROJECT_ROOT = os.path.abspath(os.path.join(APP_DIR, os.pardir))
SECRET_KEY = os.environ.get("PYPISTATS_SECRET", "secret-key")
SQLALCHEMY_TRACK_MODIFICATIONS = False
SQLALCHEMY_DATABASE_URI = get_db_uri()
# Plotly chart definitions
PLOT_BASE = json.load(open(os.path.join(os.path.dirname(__file__), "plots", "plot_base.json")))
DATA_BASE = json.load(open(os.path.join(os.path.dirname(__file__), "plots", "data_base.json")))
class LocalConfig(Config):
"""Local configuration."""
DEBUG = True
ENV = "local"
class ProdConfig(Config):
"""Production configuration."""
DEBUG = False
ENV = "prod"
class DevConfig(Config):
"""Development configuration."""
DEBUG = True
ENV = "dev"
class TestConfig(Config):
"""Test configuration."""
DEBUG = True
ENV = "dev"
TESTING = True
WTF_CSRF_ENABLED = False # Allows form testing
configs = {"development": DevConfig, "local": LocalConfig, "production": ProdConfig, "test": TestConfig}

View File

@@ -1,58 +0,0 @@
"""Database classes and models."""
from pypistats.extensions import db
Column = db.Column
basestring = (str, bytes)
class CRUDMixin(object):
"""Mixin that adds convenience methods for CRUD operations."""
@classmethod
def create(cls, **kwargs):
"""Create a new record and save it the database."""
instance = cls(**kwargs)
return instance.save()
def update(self, commit=True, **kwargs):
"""Update specific fields of a record."""
for attr, value in kwargs.items():
setattr(self, attr, value)
return commit and self.save() or self
def save(self, commit=True):
"""Save the record."""
db.session.add(self)
if commit:
db.session.commit()
return self
def delete(self, commit=True):
"""Remove the record from the database."""
db.session.delete(self)
return commit and db.session.commit()
class Model(CRUDMixin, db.Model):
"""Base model class that includes CRUD convenience methods."""
__abstract__ = True
class SurrogatePK(object):
"""A mixin that adds a surrogate integer "primary key" column.
Adds a surrogate integer "primary key" column named ``id`` to any
declarative-mapped class.
"""
__table_args__ = {"extend_existing": True}
id = Column(db.Integer, primary_key=True)
@classmethod
def get_by_id(cls, record_id):
"""Get record by ID."""
if any((isinstance(record_id, basestring) and record_id.isdigit(), isinstance(record_id, (int, float)))):
return cls.query.get(int(record_id))
return None

View File

@@ -1,24 +0,0 @@
"""Flask extensions."""
from celery import Celery
from flask_github import GitHub
from flask_httpauth import HTTPBasicAuth
from flask_migrate import Migrate
from flask_sqlalchemy import SQLAlchemy
from pypistats.config import Config
db = SQLAlchemy()
github = GitHub()
migrate = Migrate()
auth = HTTPBasicAuth()
def create_celery(name=__name__, config=Config):
"""Create a celery object."""
redis_uri = "redis://localhost:6379"
celery = Celery(name, broker=redis_uri)
celery.config_from_object(config)
return celery
celery = create_celery()

View File

@@ -1,81 +0,0 @@
"""Package stats tables."""
from pypistats.database import Column
from pypistats.database import Model
from pypistats.extensions import db
class OverallDownloadCount(Model):
"""Overall download counts."""
__tablename__ = "overall"
date = Column(db.Date, primary_key=True, nullable=False)
package = Column(db.String(128), primary_key=True, nullable=False, index=True)
# with_mirrors or without_mirrors
category = Column(db.String(16), primary_key=True, nullable=False)
downloads = Column(db.Integer(), nullable=False)
def __repr__(self):
return "<OverallDownloadCount {}".format(f"{str(self.date)} - {str(self.package)} - {str(self.category)}")
class PythonMajorDownloadCount(Model):
"""Download counts by python major version."""
__tablename__ = "python_major"
date = Column(db.Date, primary_key=True, nullable=False)
package = Column(db.String(128), primary_key=True, nullable=False, index=True)
# python_major version, 2 or 3 (or null)
category = Column(db.String(4), primary_key=True, nullable=True)
downloads = Column(db.Integer(), nullable=False)
def __repr__(self):
return "<PythonMajorDownloadCount {}".format(f"{str(self.date)} - {str(self.package)} - {str(self.category)}")
class PythonMinorDownloadCount(Model):
"""Download counts by python minor version."""
__tablename__ = "python_minor"
date = Column(db.Date, primary_key=True)
package = Column(db.String(128), primary_key=True, nullable=False, index=True)
# python_minor version, e.g. 2.7 or 3.6 (or null)
category = Column(db.String(4), primary_key=True, nullable=True)
downloads = Column(db.Integer(), nullable=False)
def __repr__(self):
return "<PythonMinorDownloadCount {}".format(f"{str(self.date)} - {str(self.package)} - {str(self.category)}")
RECENT_CATEGORIES = ["day", "week", "month"]
class RecentDownloadCount(Model):
"""Recent day/week/month download counts."""
__tablename__ = "recent"
package = Column(db.String(128), primary_key=True, nullable=False, index=True)
# recency, e.g. day, week, month
category = Column(db.String(8), primary_key=True, nullable=False)
downloads = Column(db.BigInteger(), nullable=False)
def __repr__(self):
return "<RecentDownloadCount {}>".format(f"{str(self.package)} - {str(self.category)}")
class SystemDownloadCount(Model):
"""Download counts by system."""
__tablename__ = "system"
date = Column(db.Date, primary_key=True)
package = Column(db.String(128), primary_key=True, nullable=False, index=True)
# system, e.g. Windows or Linux or Darwin (or null)
category = Column(db.String(8), primary_key=True, nullable=True)
downloads = Column(db.Integer(), nullable=False)
def __repr__(self):
return "<SystemDownloadCount {}".format(f"{str(self.date)} - {str(self.package)} - {str(self.category)}")

View File

@@ -1,35 +0,0 @@
"""User tables."""
import datetime
from flask_login import UserMixin
from sqlalchemy.dialects.postgresql import ARRAY
from pypistats.database import Column
from pypistats.database import Model
from pypistats.database import SurrogatePK
from pypistats.extensions import db
MAX_FAVORITES = 20
class User(UserMixin, SurrogatePK, Model):
"""A user of the app."""
__tablename__ = "users"
uid = Column(db.Integer(), unique=True)
username = Column(db.String(39), nullable=False)
avatar_url = Column(db.String(256))
token = Column(db.String(256))
created_at = Column(db.DateTime, nullable=False, default=datetime.datetime.utcnow)
active = Column(db.Boolean(), default=False)
is_admin = Column(db.Boolean(), default=False)
favorites = Column(ARRAY(db.String(128), dimensions=1))
def __init__(self, token, **kwargs):
"""Create instance."""
db.Model.__init__(self, token=token, **kwargs)
def __repr__(self):
"""Represent instance as a unique string."""
return f"<User({self.username})>"

View File

@@ -1,72 +0,0 @@
{
"downloads": {
"data": [
{
"x": [
"2017-05-01",
"2017-05-02",
"2017-05-03"
],
"y": [
"2",
"5",
"4"
],
"name": "Downloads",
"type": "scatter",
"mode": "lines+markers",
"connectgaps": true,
"marker": {
"symbol": "circle",
"line": {
"color": "#444",
"width": 1
}
},
"line": {
"shape": "linear",
"smoothing": 1,
"width": 2
}
}
]
},
"percentages": {
"data": [
{
"x": [
"2017-05-01",
"2017-05-02",
"2017-05-03"
],
"y": [
"2",
"5",
"4"
],
"text": [
"2",
"5",
"4"
],
"name": "Proportional downloads",
"hoverinfo": "x+text+name",
"type": "scatter",
"mode": "lines+markers",
"connectgaps": true,
"marker": {
"symbol": "circle",
"line": {
"color": "#444",
"width": 1
}
},
"line": {
"shape": "linear",
"smoothing": 1,
"width": 2
}
}
]
}
}

View File

@@ -1,212 +0,0 @@
{
"downloads": {
"layout": {
"autosize": true,
"height": 400,
"margin": {
"r": 100,
"t": 80,
"autoexpand": true,
"b": 80,
"l": 100,
"pad": 0
},
"paper_bgcolor": "#fff",
"plot_bgcolor": "rgba(175, 175, 175, 0.2)",
"showlegend": true,
"legend": {
"orientation": "v",
"bgcolor": "#e7e7e7",
"xanchor": "left",
"yanchor": "middle",
"x": 0,
"y": 0.5
},
"title": "Downloads",
"xaxis": {
"tickformat": "%m-%d",
"dtick": 604800000,
"tick0": "2017-08-07",
"gridcolor": "#FFF",
"gridwidth": 2,
"anchor": "y",
"domain": [
0,
1
],
"title": "Date",
"titlefont": {
"family": "Geneva, Verdana, Geneva, sans-serif",
"size": 16,
"color": "#7f7f7f"
},
"showline": true,
"linecolor": "rgba(148, 148, 148, 1)",
"linewidth": 2,
"tickangle": -45,
"rangeselector": {
"buttons": [
{
"step": "day",
"stepmode": "backward",
"count": 31,
"label": "30d"
},
{
"step": "day",
"stepmode": "backward",
"count": 61,
"label": "60d"
},
{
"step": "day",
"stepmode": "backward",
"count": 91,
"label": "90d"
},
{
"step": "day",
"stepmode": "backward",
"count": 181,
"label": "all"
}
]
}
},
"yaxis": {
"hoverformat": ",.0",
"tickformat": ",.0",
"gridcolor": "#FFF",
"gridwidth": 2,
"autotick": true,
"rangemode": "tozero",
"showline": true,
"title": "Downloads",
"ticksuffix": "",
"tickmode": "auto",
"linecolor": "rgba(148, 148, 148, 1)",
"linewidth": 2,
"rangeselector": {
"buttons": [
{
"step": "day",
"stepmode": "backward",
"count": 31,
"label": "30d"
},
{
"step": "day",
"stepmode": "backward",
"count": 61,
"label": "60d"
},
{
"step": "day",
"stepmode": "backward",
"count": 91,
"label": "90d"
},
{
"step": "day",
"stepmode": "backward",
"count": 181,
"label": "all"
}
]
}
}
},
"config": {
"displaylogo": false,
"modeBarButtonsToRemove": [
"toImage",
"sendDataToCloud",
"zoom2d",
"pan2d",
"select2d",
"lasso2d",
"zoomIn2d",
"zoomOut2d",
"toggleSpikelines"
]
}
},
"percentages": {
"layout": {
"autosize": true,
"height": 400,
"margin": {
"r": 100,
"t": 80,
"autoexpand": true,
"b": 80,
"l": 100,
"pad": 0
},
"paper_bgcolor": "#fff",
"plot_bgcolor": "rgba(175, 175, 175, 0.2)",
"showlegend": true,
"legend": {
"orientation": "v",
"bgcolor": "#e7e7e7",
"xanchor": "left",
"yanchor": "middle",
"x": 0,
"y": 0.5
},
"title": "Proportional Downloads",
"xaxis": {
"tickformat": "%m-%d",
"dtick": 604800000,
"tick0": "2017-08-07",
"gridcolor": "#FFF",
"gridwidth": 2,
"anchor": "y",
"domain": [
0,
1
],
"title": "Date",
"titlefont": {
"family": "Geneva, Verdana, Geneva, sans-serif",
"size": 16,
"color": "#7f7f7f"
},
"showline": true,
"linecolor": "rgba(148, 148, 148, 1)",
"linewidth": 2,
"tickangle": -45
},
"yaxis": {
"range": [
0,
100
],
"dtick": 20,
"gridcolor": "#FFF",
"gridwidth": 2,
"autotick": false,
"showline": true,
"title": "Download Proportion",
"ticksuffix": "%",
"tickmode": "auto",
"linecolor": "rgba(148, 148, 148, 1)",
"linewidth": 2
}
},
"config": {
"displaylogo": false,
"modeBarButtonsToRemove": [
"toImage",
"sendDataToCloud",
"zoom2d",
"pan2d",
"select2d",
"lasso2d",
"zoomIn2d",
"zoomOut2d",
"toggleSpikelines"
]
}
}
}

View File

@@ -1,39 +0,0 @@
"""Run the application."""
import os
from flask import g
from flask import redirect
from flask import request
from flask import session
from flask_limiter import Limiter
from flask_limiter.util import get_remote_address
from werkzeug.middleware.proxy_fix import ProxyFix
from pypistats.application import create_app
from pypistats.config import configs
from pypistats.models.user import User
# change this for migrations
env = os.environ.get("ENV", "development")
app = create_app(configs[env])
# Rate limiting per IP/worker
app.wsgi_app = ProxyFix(app.wsgi_app, x_for=2)
limiter = Limiter(app, key_func=get_remote_address, application_limits=["5 per second", "30 per minute"])
app.logger.info(f"Environment: {env}")
@app.before_request
def before_request():
"""Execute before requests."""
# http -> https
scheme = request.headers.get("X-Forwarded-Proto")
if scheme and scheme == "http" and request.url.startswith("http://"):
url = request.url.replace("http://", "https://", 1)
return redirect(url, code=301)
# set user
g.user = None
if "user_id" in session:
g.user = User.query.get(session["user_id"])

View File

@@ -1,408 +0,0 @@
"""Get the download stats for a specific day."""
import datetime
import os
import time
import psycopg2
from google.auth.crypt._python_rsa import RSASigner
from google.cloud import bigquery
from google.oauth2.service_account import Credentials
from psycopg2.extras import execute_values
from pypistats.extensions import celery
# Mirrors to disregard when considering downloads
MIRRORS = ("bandersnatch", "z3c.pypimirror", "Artifactory", "devpi")
# PyPI systems
SYSTEMS = ("Windows", "Linux", "Darwin")
# postgresql tables to update for __all__
PSQL_TABLES = ["overall", "python_major", "python_minor", "system"]
# Number of days to retain records
MAX_RECORD_AGE = 180
def get_google_credentials():
"""Obtain the Google credentials object explicitly."""
private_key = os.environ["GOOGLE_PRIVATE_KEY"].replace('"', "").replace("\\n", "\n")
private_key_id = os.environ["GOOGLE_PRIVATE_KEY_ID"]
signer = RSASigner.from_string(key=private_key, key_id=private_key_id)
project_id = os.environ["GOOGLE_PROJECT_ID"]
service_account_email = os.environ["GOOGLE_CLIENT_EMAIL"]
scopes = ("https://www.googleapis.com/auth/bigquery", "https://www.googleapis.com/auth/cloud-platform")
token_uri = os.environ["GOOGLE_TOKEN_URI"]
credentials = Credentials(
signer=signer,
service_account_email=service_account_email,
token_uri=token_uri,
scopes=scopes,
project_id=project_id,
)
return credentials
def get_daily_download_stats(date):
"""Get daily download stats for pypi packages from BigQuery."""
start = time.time()
job_config = bigquery.QueryJobConfig()
credentials = get_google_credentials()
bq_client = bigquery.Client(project=os.environ["GOOGLE_PROJECT_ID"], credentials=credentials)
if date is None:
date = str(datetime.date.today() - datetime.timedelta(days=1))
print(date)
print("Sending query to BigQuery...")
query = get_query(date)
print(query)
print("Sent.")
query_job = bq_client.query(query, job_config=job_config)
iterator = query_job.result()
print("Downloading results.")
rows = list(iterator)
print(len(rows), "rows from gbq")
data = {}
for row in rows:
if row["category_label"] not in data:
data[row["category_label"]] = []
data[row["category_label"]].append([date, row["package"], row["category"], row["downloads"]])
results = update_db(data, date)
print("Elapsed: " + str(time.time() - start))
results["elapsed"] = time.time() - start
return results
def update_db(data, date=None):
"""Update the db with new data by table."""
connection, cursor = get_connection_cursor()
success = {}
for category_label, rows in data.items():
table = category_label
success[table] = update_table(connection, cursor, table, rows, date)
return success
def update_table(connection, cursor, table, rows, date):
"""Update a table."""
print(table)
delete_rows = []
for row_idx, row in enumerate(rows):
for idx, item in enumerate(row):
if item is None:
row[idx] = "null"
else:
# Some hacky packages have long names; ignore them
if len(str(item)) > 128:
delete_rows.append(row_idx)
print(row)
# Some packages have installs with empty (non-null) python version; ignore
if table in ("python_major", "python_minor"):
for idx, row in enumerate(rows):
if row[2] in ("", "."):
delete_rows.append(idx)
print(row)
print(delete_rows)
# Delete ignored rows
for idx in sorted(delete_rows, reverse=True):
rows.pop(idx)
delete_query = f"""DELETE FROM {table}
WHERE date = '{date}'"""
insert_query = f"""INSERT INTO {table} (date, package, category, downloads)
VALUES %s"""
try:
print(delete_query)
cursor.execute(delete_query)
print(insert_query)
execute_values(cursor, insert_query, rows)
connection.commit()
return True
except psycopg2.IntegrityError as e:
connection.rollback()
return False
def update_all_package_stats(date=None):
"""Update stats for __all__ packages."""
print("__all__")
start = time.time()
if date is None:
date = str(datetime.date.today() - datetime.timedelta(days=1))
connection, cursor = get_connection_cursor()
success = {}
for table in PSQL_TABLES:
aggregate_query = f"""SELECT date, '__all__' AS package, category, sum(downloads) AS downloads
FROM {table} where date = '{date}' GROUP BY date, category"""
cursor.execute(aggregate_query, (table,))
values = cursor.fetchall()
delete_query = f"""DELETE FROM {table}
WHERE date = '{date}' and package = '__all__'"""
insert_query = f"""INSERT INTO {table} (date, package, category, downloads)
VALUES %s"""
try:
print(delete_query)
cursor.execute(delete_query)
print(insert_query)
execute_values(cursor, insert_query, values)
connection.commit()
success[table] = True
except psycopg2.IntegrityError as e:
connection.rollback()
success[table] = False
print("Elapsed: " + str(time.time() - start))
success["elapsed"] = time.time() - start
return success
def update_recent_stats(date=None):
"""Update daily, weekly, monthly stats for all packages."""
print("recent")
start = time.time()
if date is None:
date = str(datetime.date.today() - datetime.timedelta(days=1))
connection, cursor = get_connection_cursor()
downloads_table = "overall"
recent_table = "recent"
date = datetime.datetime.strptime(date, "%Y-%m-%d").date()
date_week = date - datetime.timedelta(days=7)
date_month = date - datetime.timedelta(days=30)
where = {
"day": f"date = '{str(date)}'",
"week": f"date > '{str(date_week)}'",
"month": f"date > '{str(date_month)}'",
}
success = {}
for period, clause in where.items():
select_query = f"""SELECT package, '{period}' as category, sum(downloads) AS downloads
FROM {downloads_table}
WHERE category = 'without_mirrors' and {clause}
GROUP BY package"""
cursor.execute(select_query)
values = cursor.fetchall()
delete_query = f"""DELETE FROM {recent_table}
WHERE category = '{period}'"""
insert_query = f"""INSERT INTO {recent_table}
(package, category, downloads) VALUES %s"""
try:
print(delete_query)
cursor.execute(delete_query)
print(insert_query)
execute_values(cursor, insert_query, values)
connection.commit()
success[period] = True
except psycopg2.IntegrityError as e:
connection.rollback()
success[period] = False
print("Elapsed: " + str(time.time() - start))
success["elapsed"] = time.time() - start
return success
def get_connection_cursor():
"""Get a db connection cursor."""
connection = psycopg2.connect(
dbname=os.environ["POSTGRESQL_DBNAME"],
user=os.environ["POSTGRESQL_USERNAME"],
password=os.environ["POSTGRESQL_PASSWORD"],
host=os.environ["POSTGRESQL_HOST"],
port=os.environ["POSTGRESQL_PORT"],
# sslmode='require',
)
cursor = connection.cursor()
return connection, cursor
def purge_old_data(date=None):
"""Purge old data records."""
print("Purge")
age = MAX_RECORD_AGE
start = time.time()
if date is None:
date = str(datetime.date.today() - datetime.timedelta(days=1))
connection, cursor = get_connection_cursor()
date = datetime.datetime.strptime(date, "%Y-%m-%d")
purge_date = date - datetime.timedelta(days=age)
purge_date = purge_date.strftime("%Y-%m-%d")
success = {}
for table in PSQL_TABLES:
delete_query = f"""DELETE FROM {table} where date < '{purge_date}'"""
try:
print(delete_query)
cursor.execute(delete_query)
connection.commit()
success[table] = True
except psycopg2.IntegrityError as e:
connection.rollback()
success[table] = False
print("Elapsed: " + str(time.time() - start))
success["elapsed"] = time.time() - start
return success
def vacuum_analyze():
"""Vacuum and analyze the db."""
connection, cursor = get_connection_cursor()
connection.set_isolation_level(0)
results = {}
start = time.time()
cursor.execute("VACUUM")
results["vacuum"] = time.time() - start
start = time.time()
cursor.execute("ANALYZE")
results["analyze"] = time.time() - start
print(results)
return results
def get_query(date):
"""Get the query to execute against pypistats on bigquery."""
return f"""
WITH
dls AS (
SELECT
file.project AS package,
details.installer.name AS installer,
details.python AS python_version,
details.system.name AS system
FROM
`bigquery-public-data.pypi.file_downloads`
WHERE
DATE(timestamp) = '{date}'
AND
(REGEXP_CONTAINS(details.python,r'^[0-9]\.[0-9]+.{{0,}}$') OR
details.python IS NULL)
)
SELECT
package,
'python_major' AS category_label,
cast(SPLIT(python_version, '.')[
OFFSET
(0)] as string) AS category,
COUNT(*) AS downloads
FROM
dls
WHERE
installer NOT IN {str(MIRRORS)}
GROUP BY
package,
category
UNION ALL
SELECT
package,
'python_minor' AS category_label,
REGEXP_EXTRACT(python_version, r'^[0-9]+\.[0-9]+') AS category,
COUNT(*) AS downloads
FROM
dls
WHERE
installer NOT IN {str(MIRRORS)}
GROUP BY
package,
category
UNION ALL
SELECT
package,
'overall' AS category_label,
'with_mirrors' AS category,
COUNT(*) AS downloads
FROM
dls
GROUP BY
package,
category
UNION ALL
SELECT
package,
'overall' AS category_label,
'without_mirrors' AS category,
COUNT(*) AS downloads
FROM
dls
WHERE
installer NOT IN {str(MIRRORS)}
GROUP BY
package,
category
UNION ALL
SELECT
package,
'system' AS category_label,
CASE
WHEN system NOT IN {str(SYSTEMS)} THEN 'other'
ELSE system
END AS category,
COUNT(*) AS downloads
FROM
dls
WHERE
installer NOT IN {str(MIRRORS)}
GROUP BY
package,
category
"""
@celery.task
def etl(date=None, purge=True):
"""Perform the stats download."""
if date is None:
date = str(datetime.date.today() - datetime.timedelta(days=1))
results = dict()
results["downloads"] = get_daily_download_stats(date)
results["__all__"] = update_all_package_stats(date)
results["recent"] = update_recent_stats()
results["cleanup"] = vacuum_analyze()
if purge:
results["purge"] = purge_old_data(date)
return results
@celery.task
def example(thing):
print(thing)
print("Sleeping")
time.sleep(10)
print("done")
if __name__ == "__main__":
run_date = "2020-01-09"
print(run_date)
# print(purge_old_data(run_date))
# vacuum_analyze()
print(get_daily_download_stats(run_date))
print(update_all_package_stats(run_date))
# print(update_recent_stats(run_date))
# vacuum_analyze(env)

View File

@@ -1,44 +0,0 @@
{% extends "layout.html" %}
{% block title %}PyPI Download Stats{% endblock %}
{% block body %}
<h1>About PyPI Stats</h1>
<hr>
<h3>Goal</h3>
<p>PyPI Stats aims to provide aggregate download information on python packages available from the Python Package
Index in lieu of having to execute queries against raw download records in Google BigQuery.</p>
<h3>Data</h3>
<p>Download stats are sourced from the Python Software Foundation's publicly available
<a href="https://bigquery.cloud.google.com/table/bigquery-public-data:pypi.downloads">download stats</a>
on Google BigQuery. All aggregate download stats ignore known PyPI mirrors (such as
<a href="{{ url_for('general.package_page', package='bandersnatch') }}">bandersnatch</a>) unless noted
otherwise.</p>
<p>PyPI Stats retains data for 180 days.</p>
<h3>API</h3>
<p>A simple
<a href="{{ url_for('api.api') }}">JSON API</a>
is available for aggregate download stats and time series for packages.</p>
<h3>Downstream</h3>
<p>
<ul>
<li>
<a href="{{ url_for('general.package_page', package='pypistats') }}">pypistats</a> is a <a
href="https://github.com/hugovk/pypistats">python package</a> that provides a client and CLI tool for
the pypistats.org JSON API</a>
</li>
<li>
<a href="https://shields.io/#/examples/downloads">shields.io</a> uses the pypistats.org JSON API to provide
download count badges, like this one for <a
href="{{ url_for('general.package_page', package='pypistats') }}">pypistats</a> <a
href="https://github.com/hugovk/pypistats"><img src="https://img.shields.io/pypi/dm/pypistats.svg"></a>
</li>
</ul>
<p>PyPIStats.org is also <a href="https://github.com/crflynn/pypistats.org">open source</a>.</p>
<h3>Who</h3>
<p>PyPI Stats was created by
<a href="https://flynn.gg">Christopher Flynn</a>.
</p>
<p>
Thanks to <a href="https://github.com/hugovk">Hugo (hugovk)</a> for providing a client interface to the API.
</p>
{% endblock %}

View File

@@ -1,20 +0,0 @@
{% extends "layout.html" %}
{% block title %}PyPI Download Stats{% endblock %}
{% block body %}
<h1>Analytics for PyPI packages</h1>
<hr>
<form method="POST" action="/admin">
{{ form.csrf_token }}
{{ form.date.label }}
{{ form.date(size=24) }}
<input type="submit" value="Submit">
</form>
<br>
{% if not date %}
<p>Submit date to run backfill.</p>
{% endif %}
{% if date %}
<br>
{{ date }} submitted.
{% endif %}
{% endblock %}

View File

@@ -1,269 +0,0 @@
{% extends "layout.html" %}
{% block title %}PyPI Download Stats{% endblock %}
{% block body %}
<h1>PyPI Stats API</h1>
<hr>
<p>
PyPI Stats provides a simple JSON API for retrieving aggregate download stats and time series for packages. The
following are the valid endpoints using host:
<code>https://pypistats.org/</code>
</p>
<h2>NOTES</h2>
<p>
<ul>
<li>All download stats exclude known mirrors (such as
<a href="{{ url_for('general.package_page', package='bandersnatch') }}">bandersnatch</a>) unless noted
otherwise.
</li>
<li>Time series data is retained only for 180 days.</li>
<li>All download data is updated once daily.</li>
</ul>
</p>
<h2>Etiquette</h2>
<p>
If you plan on using the API to download historical data for every python package in the database (e.g. for some
personal data exploration), <b>DON'T</b>. This website runs on limited resources and you will degrade
the site performance by doing this. It will also take a very long time.
</p>
<p>
You are much better off extracting the data directly from the Google
BigQuery <a href="https://bigquery.cloud.google.com/table/bigquery-public-data:pypi.downloads">pypi downloads tables</a>. You
can query up to 1TB of data FREE every month before having to pay. The volume of data queried for this website
falls well under that limit (each month of data is less than 100 GB queried) and you will have your data
in a relatively short amount of time. <a
href="https://packaging.python.org/guides/analyzing-pypi-package-downloads/">Here is a quick guide</a>.
</p>
<p>
If you want to regularly fetch download counts for a particular package or set of packages, cache your results.
The data provided here is updated <b>once</b> daily, so you should not need to fetch results from the same API
endpoint more than once per day.
</p>
<h2>Rate Limiting</h2>
<p>
IP-based rate limiting is imposed application-wide.
</p>
<h2>API Client</h2>
<p>
The <a href="{{ url_for('general.package_page', package='pypistats') }}">pypistats</a> <a
href="https://pypi.org/project/pypistats">package</a> is a python client and CLI tool for easily
accessing, aggregating, and formatting results from the API. To install, use pip:
<pre><code>pip install -U pypistats</code></pre>
Refer to the <a href="https://github.com/hugovk/pypistats">documentation</a> for usage.
</p>
<h2>Endpoints</h2>
<h3>/api/packages/&lt;package&gt;/recent</h3>
<p>Retrieve the aggregate download quantities for the last day/week/month.
</p>
<p>Query arguments:
<ul>
<li>
<b>period</b>
(optional):
<code>day</code>
or
<code>week</code>
or
<code>month</code>. If omitted returns all values.
</li>
</ul>
Example response:
<pre><code>{
"data": {
"last_day": 1,
"last_month": 2,
"last_week": 3
},
"package": "package_name",
"type": "recent_downloads"
}</code></pre>
</p>
<h3>/api/packages/&lt;package&gt;/overall</h3>
<p>Retrieve the aggregate daily download time series with or without mirror downloads.
</p>
<p>Query arguments:
<ul>
<li>
<b>mirrors</b>
(optional):
<code>true</code>
or
<code>false</code>. If omitted returns both series data.
</li>
<!-- <li> <b>start_date</b> (optional): starting date of time series in format <code>YYYY-MM-DD</code> </li> <li> <b>end_date</b> (optional): ending date of time series in format <code>YYYY-MM-DD</code> </li> -->
</ul>
Example response:
<pre><code>{
"data": [
{
"category": "with_mirrors",
"date": "2018-02-08",
"downloads": 1
},
{
"category": "without_mirrors",
"date": "2018-02-08",
"downloads": 1
}
],
"package": "package_name",
"type": "overall_downloads"
}</code></pre>
</p>
<h3>/api/packages/&lt;package&gt;/python_major</h3>
<p>Retrieve the aggregate daily download time series by Python major version number.
</p>
<p>Query arguments:
<ul>
<li>
<b>version</b>
(optional): the Python major version number, e.g.
<code>2</code>
or
<code>3</code>. If omitted returns all series data (including
<code>null</code>).
</li>
<!-- <li> <b>start_date</b> (optional): starting date of time series in format <code>YYYY-MM-DD</code> </li> <li> <b>end_date</b> (optional): ending date of time series in format <code>YYYY-MM-DD</code> </li> -->
</ul>
Example response:
<pre><code>{
"data": [
{
"category": "2",
"date": "2018-02-08",
"downloads": 1
},
{
"category": "3",
"date": "2018-02-08",
"downloads": 1
},
{
"category": "null",
"date": "2018-02-08",
"downloads": 1
}
],
"package": "package_name",
"type": "python_major_downloads"
}</code></pre>
</p>
<h3>/api/packages/&lt;package&gt;/python_minor</h3>
<p>Retrieve the aggregate daily download time series by Python minor version number.
</p>
<p>Query arguments:
<ul>
<li>
<b>version</b>
(optional): the Python major version number, e.g.
<code>2.7</code>
or
<code>3.6</code>. If omitted returns all series data (including
<code>null</code>).
</li>
<!-- <li> <b>start_date</b> (optional): starting date of time series in format <code>YYYY-MM-DD</code> </li> <li> <b>end_date</b> (optional): ending date of time series in format <code>YYYY-MM-DD</code> </li> -->
</ul>
Example response:
<pre><code>{
"data": [
{
"category": "2.6",
"date": "2018-02-08",
"downloads": 1
},
{
"category": "2.7",
"date": "2018-02-08",
"downloads": 1
},
{
"category": "3.2",
"date": "2018-02-08",
"downloads": 1
},
{
"category": "3.3",
"date": "2018-02-08",
"downloads": 1
},
{
"category": "3.4",
"date": "2018-02-08",
"downloads": 1
},
{
"category": "3.5",
"date": "2018-02-08",
"downloads": 1
},
{
"category": "3.6",
"date": "2018-02-08",
"downloads": 1
},
{
"category": "3.7",
"date": "2018-02-08",
"downloads": 1
},
{
"category": "null",
"date": "2018-02-08",
"downloads": 1
}
],
"package": "package_name",
"type": "python_minor_downloads"
}</code></pre>
</p>
<h3>/api/packages/&lt;package&gt;/system</h3>
<p>Retrieve the aggregate daily download time series by operating system.
</p>
<p>Query arguments:
<ul>
<li>
<b>os</b>
(optional): the operating system name, e.g.
<code>windows</code>,
<code>linux</code>,
<code>darwin</code>
or
<code>other</code>. If omitted returns all series data (including
<code>null</code>).
</li>
<!-- <li> <b>start_date</b> (optional): starting date of time series in format <code>YYYY-MM-DD</code> </li> <li> <b>end_date</b> (optional): ending date of time series in format <code>YYYY-MM-DD</code> </li> -->
</ul>
Example response:
<pre><code>{
"data": [
{
"category": "darwin",
"date": "2018-02-08",
"downloads": 1
},
{
"category": "linux",
"date": "2018-02-08",
"downloads": 1
},
{
"category": "null",
"date": "2018-02-08",
"downloads": 1
},
{
"category": "other",
"date": "2018-02-08",
"downloads": 1
},
{
"category": "windows",
"date": "2018-02-08",
"downloads": 1
}
],
"package": "package_name",
"type": "system_downloads"
}</code></pre>
</p>
{% endblock %}

View File

@@ -1,89 +0,0 @@
{% extends "layout.html" %}
{% block title %}PyPI Download Stats{% endblock %}
{% block body %}
<h1>FAQs</h1>
<hr>
<h3>
What is the source of the download data?
</h3>
<p>
PyPI provides download records as a publicly available dataset on Google's BigQuery. You can access the data
with a Google Cloud account <a href="https://bigquery.cloud.google.com/table/bigquery-public-data:pypi.downloads">here</a>.
</p>
<h3>
When is the website data updated?
</h3>
<p>
The data update begins at 01:00:00 UTC and should take about 10 minutes.
</p>
<h3>
Why are there so many more downloads after July 26, 2018?
</h3>
<p>
PyPI download records are generated by a service known as <a
href="https://github.com/pypa/linehaul">linehaul</a>. The previous iteration of the service had an issue
which caused it to restart regularly due to running out of memory, resulting in a large quantity of dropped
download records. On July 26, a newer version of the service was deployed, which is much more robust and
reliable.
</p>
<h3>
Why are the cumulative download counts different from the sum of the downloads from the <i>overall</i> chart?
</h3>
<p>
The cumulative download counts consider only the download records which are not from a known set of PyPI mirror
applications, namely <code>bandersnatch</code>, <code>z3c.pypimirror</code>, <code>Artifactory</code>, and
<code>devpi</code>. In other words, the cumulative download counts take the sum of the downloads from the
<i>Without_Mirrors</i> dataset from the chart.
</p>
<h3>
What is the difference between <i>Without_Mirrors</i> and <i>With_Mirrors</i> downloads?
</h3>
<p>
The <b>With_Mirrors</b> and <b>Without_Mirrors</b> downloads are not mutually exclusive sets of download counts
like the other segmentations provided. In fact, the <b>Without_Mirrors</b> downloads are a subset of the
downloads in <b>With_Mirrors</b>.
</p>
<p>
Some entities will create a mirror, or clone, of the PyPI repository using a tool like <a
href="{{ url_for('general.package_page', package='bandersnatch') }}">bandersnatch</a>
for the sake of security or availability. This means that their mirror repository regularly syncs with PyPI by
downloading all of the Python packages available (and versions thereof) that it does not already have. Those
downloads are recorded by PyPI with <code>bandersnatch</code> as the user-agent. You will see also that on days
in which you release a new version of your package there will be many more downloads from mirrors, as active
mirrors will sync with PyPI by downloading those new releases.
</p>
<p>
pypistats.org filters downloads from known mirrors from the version and system segmentations on the website.
Downloads by mirrors are intentionally excluded from download breakdowns because they do not
represent end-users of the software. Instead, they serve as an alternative provider to <i>other</i> end-users on
a separate (sometimes private) network.
</p>
<p>
The existence of mirrors means that the downloads provided by PyPI and BigQuery come with some uncertainty with
respect to the actual aggregate usage of Python packages. One might expect that mirrors will mask end-user
downloads for more commonly used packages while simultaneously inflating the download counts of less common
ones. This uncertainty is difficult to quantify because the mirrors don't report subsequent downloads back to
PyPI.
</p>
<p>
One can, however, assume that PyPI serves a significant proportion of the Python community's packaging
downloads. Hopefully significant enough that the quantities provided here are representative of their users and
relevant to package maintainers. There are other distributors, like Conda, which also serve python packages,
but their download data is currently not publicly available at the event level like PyPI's, and thus are not
incorporated into the metrics on this website.
</p>
<h3>
Why disregard mirrors from aggregate data?
</h3>
<p>
The intent of disregarding mirrors is to provide metrics that reflect end-user download aggregation.
</p>
<h3>
What about downloads due to CI/CD tools?
</h3>
<p>
Downloads from CI/CD tools are included in all metrics. There is currently no easy way to attribute downloads to
build/deployment tools.
</p>
{% endblock %}

View File

@@ -1,24 +0,0 @@
{% extends "layout.html" %}
{% block title %}PyPI Download Stats{% endblock %}
{% block body %}
<h1>Analytics for PyPI packages</h1>
<hr>
<form method="POST" action="/">
{{ form.csrf_token }}
{{ form.name.label }}
{{ form.name(size=24) }}
<input type="submit" value="Search">
</form>
<br>
{% if not search %}
<p>Search among
<b>{{ "{:,.0f}".format(package_count) }}</b>
python packages from PyPI (updated daily).</p>
{% else %}
Search results:
{% endif %}
{% if search %}
<br>
{% include "results.html" %}
{% endif %}
{% endblock %}

View File

@@ -1,85 +0,0 @@
<!doctype html>
<html>
<head>
<!-- Global site tag (gtag.js) - Google Analytics -->
<script async src="https://www.googletagmanager.com/gtag/js?id=UA-101875606-5"></script>
<script>
window.dataLayer = window.dataLayer || [];
function gtag() {
dataLayer.push(arguments);
}
gtag('js', new Date());
gtag('config', 'UA-101875606-5');
</script>
<link rel="stylesheet" href="/static/style.css">
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="chrome=1">
<meta name="description" content="PyPI Download Stats">
<meta name="keywords" content="Python,PyPI,package,downloads,stats">
<title>
{% block title %}{% endblock %}
</title>
<meta name="viewport" content="width=device-width">
<!--[if lt IE 9]> <script src="//html5shiv.googlecode.com/svn/trunk/html5.js"></script> <![endif]-->
<script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMS-MML_HTMLorMML"
type="text/javascript"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/anchor-js/4.0.0/anchor.min.js" type="text/javascript"></script>
<!-- Place this tag in your head or just before your close body tag. -->
{% block plot %}{% endblock %}
{% block auth %}{% endblock %}
</head>
<body>
<div class="wrapper">
<header>
<h1>PyPI Stats</h1>
<p>
<a href="{{ url_for('general.index') }}">Search</a>
<br>
<br>
<a href="{{ url_for('general.package_page', package='__all__') }}">All packages</a>
<br>
<a href="{{ url_for('general.top') }}">Top packages</a>
<br>
<br>
{% if user %}
<a href="{{ url_for('user.user') }}">{{ user.username }}'s packages</a>
{% else %}
<a href="{{ url_for('user.user') }}">Track packages</a>
{% endif %}
<br>
<br>
{% if user %}
<a href="{{ url_for('user.logout') }}">Logout</a>
{% endif %}
</p>
</header>
<section>
{% block body %}{% endblock %}
</section>
<footer>
<p>
<a href="{{ url_for('api.api') }}">API</a>
<br>
<a href="{{ url_for('general.about') }}">About</a>
<br>
<a href="{{ url_for('general.faqs') }}">FAQs</a>
<br>
</p>
</footer>
</div>
<script>
anchors.add();
</script>
</body>
</html>

View File

@@ -1,105 +0,0 @@
{% extends "layout.html" %}
{% block title %}PyPI Download Stats{% endblock %}
{% block plot %}
<script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.1.0/jquery.min.js"></script>
{% endblock %}
{% block body %}
<h1>{{ package }}</h1>
<hr>
{% if user %}
{% if user.favorites and package in user.favorites %}
<p>
<img src="{{ user.avatar_url }}" height="20" width="20">
<a href="{{ url_for('user.user_package', package=package) }}">REMOVE from my packages</a><br></p>
{% else %}
<p>
<img src="{{ user.avatar_url }}" height="20" width="20">
<a href="{{ url_for('user.user_package', package=package) }}">ADD to my packages</a><br></p>
{% endif %}
{% endif %}
{% if package == "__all__" %}
<p>Download stats for __all__ indicate downloads across all packages on PyPI.</p>
{% else %}
<p>
{% if metadata %}
<a href="{{ metadata['info']['package_url'] }}">PyPI page</a>
<br>
<a href="{{ metadata['info']['home_page'] }}">Home page</a>
<br>
Author:
{{ metadata['info']['author'] }}
{% if metadata['info']['license'] is not none %}
<br>
License:
{% if metadata['info']['license'] | length > 200 %}
{{ metadata['info']['license'][:200] }}...
{% else %}
{{ metadata['info']['license'] }}
{% endif %}
{% endif %}
<br>
Summary:
{{ metadata['info']['summary'] }}
<br>
Latest version:
{{ metadata['info']['version'] }}
<br>
{% if metadata['requires'] %}
Required dependencies:
{% for required in metadata['requires'] %}
<a href="{{ url_for('general.package_page', package=required.lower()) }}">{{ required.lower() }}</a>
{% if not loop.last %}|{% endif %}
{% endfor %}
{% endif %}
{% if metadata['optional'] %}
<br>
Optional dependencies:
{% for optional in metadata['optional'] %}
<a href="{{ url_for('general.package_page', package=optional.lower()) }}">{{ optional.lower() }}</a>
{% if not loop.last %}|{% endif %}
{% endfor %}
{% endif %}
{% else %}
No metadata found.
{% endif %}
{% endif %}
<br>
<br>
Downloads last day:
{{ "{:,.0f}".format(recent['day']) }}
<br>
Downloads last week:
{{ "{:,.0f}".format(recent['week']) }}
<br>
Downloads last month:
{{ "{:,.0f}".format(recent['month']) }}
</p>
<script>
(function () {
var WIDTH_IN_PERCENT_OF_PARENT = 100
// var HEIGHT = '300px'
var divelems = []
var data =
{{ plots|tojson }}
for (plt in data) {
var gd3 = Plotly.d3.select('section').append('div').style({
width: WIDTH_IN_PERCENT_OF_PARENT + '%',
'margin-left': (100 - WIDTH_IN_PERCENT_OF_PARENT) / 2 + '%',
// height: HEIGHT,
})
var gd = gd3.node()
divelems.push(gd)
Plotly.newPlot(divelems[divelems.length - 1], data[plt].data, data[plt].layout, data[plt].config);
}
window.onresize = function () {
for (chart in divelems) {
Plotly.Plots.resize(divelems[chart]);
}
};
})();
</script>
{% endblock %}

View File

@@ -1,12 +0,0 @@
<br>
{% if packages %}
<ul>
{% for package in packages %}
<li>
<a href="{{ url_for('general.package_page', package=package) }}">{{ package }}</a>
</li>
{% endfor %}
</ul>
{% else %}
No results.
{% endif %}

View File

@@ -1 +0,0 @@
{% include "index.html" %}

View File

@@ -1,37 +0,0 @@
{% extends "layout.html" %}
{% block title %}PyPI Download Stats{% endblock %}
{% block body %}
<h1>Most downloaded PyPI packages</h1>
<hr>
<table>
<tr>
{% for best in top %}
<td>
Most downloaded past
<b>{{ best['category'].lower() }}</b>.
</td>
{% endfor %}
</tr>
<tr>
{% for best in top %}
<td>
<table>
{% for package in best['packages'] %}
<tr>
<td>
{{ loop.index }}
</td>
<td>
<a href="{{ url_for('general.package_page', package=package['package']) }}">{{ package['package'] }}</a>
</td>
<td>
{{ "{:,.0f}".format(package['downloads']) }}
</td>
</tr>
{% endfor %}
</table>
</td>
{% endfor %}
</tr>
</table>
{% endblock %}

View File

@@ -1,31 +0,0 @@
{% extends "layout.html" %}
{% block title %}PyPI Download Stats{% endblock %}
{% block body %}
{% if user %}
<h1><img src="{{ user.avatar_url }}" height="25" width="25">
{{ user.username }}'s Packages</h1>
<hr>
<p>Currently saved packages.</p>
{% if user.favorites %}
<p>
<ul>
{% for package in user.favorites %}
<li>
<a href="{{ url_for('general.package_page', package=package) }}">{{ package }}</a>
</li>
{% endfor %}
</ul>
</p>
{% else %}
<p>Not tracking any packages.</p>
{% endif %}
{% else %}
<h1>My Packages</h1>
<hr>
<p>Log in with GitHub OAuth to track your own set of packages.</p>
<p>
<a href="{{ url_for('user.login') }}">Log in</a>
</p>
{% endif %}
{% endblock %}

View File

@@ -1,6 +0,0 @@
"""The view blueprint modules."""
from pypistats.views import admin
from pypistats.views import api
from pypistats.views import error
from pypistats.views import general
from pypistats.views import user

View File

@@ -1,38 +0,0 @@
import os
from flask import Blueprint
from flask import render_template
from flask_wtf import FlaskForm
from werkzeug.security import check_password_hash
from werkzeug.security import generate_password_hash
from wtforms import DateField
from wtforms.validators import DataRequired
from pypistats.extensions import auth
from pypistats.tasks.pypi import etl
users = {os.environ["BASIC_AUTH_USER"]: generate_password_hash(os.environ["BASIC_AUTH_PASSWORD"])}
blueprint = Blueprint("admin", __name__, template_folder="templates")
@auth.verify_password
def verify_password(username, password):
if username in users and check_password_hash(users.get(username), password):
return username
class BackfillDateForm(FlaskForm):
date = DateField("Date: ", validators=[DataRequired()])
@blueprint.route("/admin", methods=("GET", "POST"))
@auth.login_required
def index():
form = BackfillDateForm()
if form.validate_on_submit():
date = form.date.data
etl.apply_async(args=(str(date),))
return render_template("admin.html", form=form, date=date)
return render_template("admin.html", form=form)

View File

@@ -1,148 +0,0 @@
"""JSON API routes."""
from flask import Blueprint
from flask import abort
from flask import g
from flask import jsonify
from flask import render_template
from flask import request
from pypistats.models.download import RECENT_CATEGORIES
from pypistats.models.download import OverallDownloadCount
from pypistats.models.download import PythonMajorDownloadCount
from pypistats.models.download import PythonMinorDownloadCount
from pypistats.models.download import RecentDownloadCount
from pypistats.models.download import SystemDownloadCount
blueprint = Blueprint("api", __name__, url_prefix="/api")
@blueprint.route("/")
def api():
"""Get API documentation."""
return render_template("api.html", user=g.user)
@blueprint.route("/packages/<package>/recent")
def api_downloads_recent(package):
"""Get the recent downloads of a package."""
# abort(503)
if package != "__all__":
package = package.replace(".", "-").replace("_", "-")
category = request.args.get("period")
if category is None:
downloads = RecentDownloadCount.query.filter_by(package=package).all()
elif category in RECENT_CATEGORIES:
downloads = RecentDownloadCount.query.filter_by(package=package, category=category).all()
else:
abort(404)
response = {"package": package, "type": "recent_downloads"}
if len(downloads) > 0:
if category is None:
response["data"] = {"last_" + rc: 0 for rc in RECENT_CATEGORIES}
else:
response["data"] = {"last_" + category: 0}
for r in downloads:
response["data"]["last_" + r.category] = r.downloads
else:
abort(404)
return jsonify(response)
@blueprint.route("/packages/<package>/overall")
def api_downloads_overall(package):
"""Get the overall download time series of a package."""
# abort(503)
if package != "__all__":
package = package.replace(".", "-").replace("_", "-")
mirrors = request.args.get("mirrors")
if mirrors == "true":
downloads = (
OverallDownloadCount.query.filter_by(package=package, category="with_mirrors")
.order_by(OverallDownloadCount.date)
.all()
)
elif mirrors == "false":
downloads = (
OverallDownloadCount.query.filter_by(package=package, category="without_mirrors")
.order_by(OverallDownloadCount.date)
.all()
)
else:
downloads = (
OverallDownloadCount.query.filter_by(package=package)
.order_by(OverallDownloadCount.category, OverallDownloadCount.date)
.all()
)
response = {"package": package, "type": "overall_downloads"}
if len(downloads) > 0:
response["data"] = [{"date": str(r.date), "category": r.category, "downloads": r.downloads} for r in downloads]
else:
abort(404)
return jsonify(response)
@blueprint.route("/packages/<package>/python_major")
def api_downloads_python_major(package):
"""Get the python major download time series of a package."""
return generic_downloads(PythonMajorDownloadCount, package, "version", "python_major")
@blueprint.route("/packages/<package>/python_minor")
def api_downloads_python_minor(package):
"""Get the python minor download time series of a package."""
return generic_downloads(PythonMinorDownloadCount, package, "version", "python_minor")
@blueprint.route("/packages/<package>/system")
def api_downloads_system(package):
"""Get the system download time series of a package."""
return generic_downloads(SystemDownloadCount, package, "os", "system")
def generic_downloads(model, package, arg, name):
"""Generate a generic response."""
# abort(503)
if package != "__all__":
package = package.replace(".", "-").replace("_", "-")
category = request.args.get(arg)
if category is not None:
downloads = model.query.filter_by(package=package, category=category.title()).order_by(model.date).all()
else:
downloads = model.query.filter_by(package=package).order_by(model.category, model.date).all()
response = {"package": package, "type": f"{name}_downloads"}
if downloads is not None:
response["data"] = [{"date": str(r.date), "category": r.category, "downloads": r.downloads} for r in downloads]
else:
abort(404)
return jsonify(response)
# TODO
# @blueprint.route("/top/overall")
# def api_top_packages():
# """Get the most downloaded packages by recency."""
# return "top overall"
#
#
# @blueprint.route("/top/python_major")
# def api_top_python_major():
# """Get the most downloaded packages by python major version."""
# return "top python_major"
#
#
# @blueprint.route("/top/python_minor")
# def api_top_python_minor():
# """Get the most downloaded packages by python minor version."""
# return "top python_minor"
#
#
# @blueprint.route("/top/system")
# def api_top_system():
# """Get the most downloaded packages by system."""
# return "top python_minor"

View File

@@ -1,40 +0,0 @@
"""Error page handlers."""
from flask import Blueprint
from flask import url_for
blueprint = Blueprint("error", __name__, template_folder="templates")
@blueprint.app_errorhandler(400)
def handle_400(err):
"""Return 400."""
return "400", 400
@blueprint.app_errorhandler(401)
def handle_401(err):
"""Return 401."""
return "401", 401
@blueprint.app_errorhandler(404)
def handle_404(err):
"""Return 404."""
return "404", 404
@blueprint.app_errorhandler(429)
def handle_429(err):
return f"""<a href="{url_for("api.api")}#etiquette">429 RATE LIMIT EXCEEDED</a>""", 429
@blueprint.app_errorhandler(500)
def handle_500(err):
"""Return 500."""
return "500", 500
@blueprint.app_errorhandler(503)
def handle_503(err):
"""Return 500."""
return "503 TEMPORARILY DISABLED", 503

View File

@@ -1,320 +0,0 @@
"""General pages."""
import datetime
import re
from collections import defaultdict
from copy import deepcopy
import requests
from flask import Blueprint
from flask import current_app
from flask import g
from flask import redirect
from flask import render_template
from flask import request
from flask_wtf import FlaskForm
from wtforms import StringField
from wtforms.validators import DataRequired
from pypistats.models.download import RECENT_CATEGORIES
from pypistats.models.download import OverallDownloadCount
from pypistats.models.download import PythonMajorDownloadCount
from pypistats.models.download import PythonMinorDownloadCount
from pypistats.models.download import RecentDownloadCount
from pypistats.models.download import SystemDownloadCount
blueprint = Blueprint("general", __name__, template_folder="templates")
MODELS = [OverallDownloadCount, PythonMajorDownloadCount, PythonMinorDownloadCount, SystemDownloadCount]
class PackageSearchForm(FlaskForm):
"""Search form."""
name = StringField("Package: ", validators=[DataRequired()])
@blueprint.route("/", methods=("GET", "POST"))
def index():
"""Render the home page."""
form = PackageSearchForm()
if form.validate_on_submit():
package = form.name.data
return redirect(f"/search/{package.lower()}")
package_count = RecentDownloadCount.query.filter_by(category="month").count()
return render_template("index.html", form=form, user=g.user, package_count=package_count)
@blueprint.route("/health")
def health():
return "OK"
@blueprint.route("/search/<package>", methods=("GET", "POST"))
def search(package):
"""Render the home page."""
package = package.replace(".", "-")
form = PackageSearchForm()
if form.validate_on_submit():
package = form.name.data
return redirect(f"/search/{package}")
results = (
RecentDownloadCount.query.filter(
RecentDownloadCount.package.like(f"{package}%"), RecentDownloadCount.category == "month"
)
.order_by(RecentDownloadCount.package)
.limit(20)
.all()
)
packages = [r.package for r in results]
if len(packages) == 1:
package = packages[0]
return redirect(f"/packages/{package}")
return render_template("search.html", search=True, form=form, packages=packages, user=g.user)
@blueprint.route("/about")
def about():
"""Render the about page."""
return render_template("about.html", user=g.user)
@blueprint.route("/faqs")
def faqs():
"""Render the FAQs page."""
return render_template("faqs.html", user=g.user)
@blueprint.route("/packages/<package>")
def package_page(package):
"""Render the package page."""
package = package.replace(".", "-")
# Recent download stats
try:
# Take the min of the lookback and 180
lookback = min(abs(int(request.args.get("lookback", 180))), 180)
except ValueError:
lookback = 180
start_date = str(datetime.date.today() - datetime.timedelta(lookback))
recent_downloads = RecentDownloadCount.query.filter_by(package=package).all()
if len(recent_downloads) == 0:
return redirect(f"/search/{package}")
recent = {r: 0 for r in RECENT_CATEGORIES}
for r in recent_downloads:
recent[r.category] = r.downloads
# PyPI metadata
metadata = None
if package != "__all__":
try:
metadata = requests.get(f"https://pypi.python.org/pypi/{package}/json", timeout=5).json()
if metadata["info"].get("requires_dist", None):
requires, optional = set(), set()
for dependency in metadata["info"]["requires_dist"]:
package_name = re.split(r"[^0-9a-zA-Z_.-]+", dependency.lower())[0]
if "; extra ==" in dependency:
optional.add(package_name)
else:
requires.add(package_name)
metadata["requires"] = sorted(requires)
metadata["optional"] = sorted(optional)
except Exception:
pass
# Get data from db
model_data = []
for model in MODELS:
records = (
model.query.filter_by(package=package)
.filter(model.date >= start_date)
.order_by(model.date, model.category)
.all()
)
if model == OverallDownloadCount:
metrics = ["downloads"]
else:
metrics = ["downloads", "percentages"]
for metric in metrics:
model_data.append({"metric": metric, "name": model.__tablename__, "data": data_function[metric](records)})
# Build the plots
plots = []
for model in model_data:
plot = deepcopy(current_app.config["PLOT_BASE"])[model["metric"]]
# Set data
data = []
for category, values in model["data"].items():
base = deepcopy(current_app.config["DATA_BASE"][model["metric"]]["data"][0])
base["x"] = values["x"]
base["y"] = values["y"]
if model["metric"] == "percentages":
base["text"] = values["text"]
base["name"] = category.title()
data.append(base)
plot["data"] = data
# Add titles
if model["metric"] == "percentages":
plot["layout"][
"title"
] = f"Daily Download Proportions of {package} package - {model['name'].title().replace('_', ' ')}" # noqa
else:
plot["layout"][
"title"
] = f"Daily Download Quantity of {package} package - {model['name'].title().replace('_', ' ')}" # noqa
# Explicitly set range
plot["layout"]["xaxis"]["range"] = [str(records[0].date - datetime.timedelta(1)), str(datetime.date.today())]
# Add range buttons
plot["layout"]["xaxis"]["rangeselector"] = {"buttons": []}
drange = (datetime.date.today() - records[0].date).days
for k in [30, 60, 90, 120, 9999]:
if k <= drange:
plot["layout"]["xaxis"]["rangeselector"]["buttons"].append(
{"step": "day", "stepmode": "backward", "count": k + 1, "label": f"{k}d"}
)
else:
plot["layout"]["xaxis"]["rangeselector"]["buttons"].append(
{"step": "day", "stepmode": "backward", "count": drange + 1, "label": "all"}
)
break
plots.append(plot)
return render_template("package.html", package=package, plots=plots, metadata=metadata, recent=recent, user=g.user)
def get_download_data(records):
"""Organize the data for the absolute plots."""
data = defaultdict(lambda: {"x": [], "y": []})
date_categories = []
all_categories = []
prev_date = records[0].date
for record in records:
if record.category not in all_categories:
all_categories.append(record.category)
all_categories = sorted(all_categories)
for category in all_categories:
data[category] # set the dict value (keeps it ordered)
for record in records:
# Fill missing intermediate dates with zeros
if record.date != prev_date:
for category in all_categories:
if category not in date_categories:
data[category]["x"].append(str(prev_date))
data[category]["y"].append(0)
# Fill missing intermediate dates with zeros
days_between = (record.date - prev_date).days
date_list = [prev_date + datetime.timedelta(days=x) for x in range(1, days_between)]
for date in date_list:
for category in all_categories:
data[category]["x"].append(str(date))
data[category]["y"].append(0)
# Reset
date_categories = []
prev_date = record.date
# Track categories for this date
date_categories.append(record.category)
data[record.category]["x"].append(str(record.date))
data[record.category]["y"].append(record.downloads)
else:
# Fill in missing final date with zeros
for category in all_categories:
if category not in date_categories:
data[category]["x"].append(str(records[-1].date))
data[category]["y"].append(0)
return data
def get_proportion_data(records):
"""Organize the data for the fill plots."""
data = defaultdict(lambda: {"x": [], "y": [], "text": []})
date_categories = defaultdict(lambda: 0)
all_categories = []
prev_date = records[0].date
for record in records:
if record.category not in all_categories:
all_categories.append(record.category)
all_categories = sorted(all_categories)
for category in all_categories:
data[category] # set the dict value (keeps it ordered)
for record in records:
if record.date != prev_date:
total = sum(date_categories.values()) / 100
for category in all_categories:
data[category]["x"].append(str(prev_date))
value = date_categories[category] / total
data[category]["y"].append(value)
data[category]["text"].append("{0:.2f}%".format(value) + " = {:,}".format(date_categories[category]))
date_categories = defaultdict(lambda: 0)
prev_date = record.date
# Track categories for this date
date_categories[record.category] = record.downloads
else:
# Fill in missing final date with zeros
total = sum(date_categories.values()) / 100
for category in all_categories:
if category not in date_categories:
data[category]["x"].append(str(records[-1].date))
data[category]["y"].append(0)
data[category]["text"].append("{0:.2f}%".format(0) + " = {:,}".format(0))
else:
data[category]["x"].append(str(records[-1].date))
value = date_categories[category] / total
data[category]["y"].append(value)
data[category]["text"].append("{0:.2f}%".format(value) + " = {:,}".format(date_categories[category]))
return data
data_function = {"downloads": get_download_data, "percentages": get_proportion_data}
@blueprint.route("/top")
def top():
"""Render the top packages page."""
top_ = []
for category in ("day", "week", "month"):
downloads = (
RecentDownloadCount.query.filter_by(category=category)
.filter(RecentDownloadCount.package != "__all__")
.order_by(RecentDownloadCount.downloads.desc())
.limit(20)
.all()
)
top_.append(
{"category": category, "packages": [{"package": d.package, "downloads": d.downloads} for d in downloads]}
)
return render_template("top.html", top=top_, user=g.user)
@blueprint.route("/status")
def status():
"""Return OK."""
return "OK"

View File

@@ -1,133 +0,0 @@
"""User page for tracking packages."""
from flask import Blueprint
from flask import abort
from flask import flash
from flask import g
from flask import redirect
from flask import render_template
from flask import request
from flask import session
from flask import url_for
from pypistats.extensions import github
from pypistats.models.download import RecentDownloadCount
from pypistats.models.user import MAX_FAVORITES
from pypistats.models.user import User
blueprint = Blueprint("user", __name__, template_folder="templates")
@github.access_token_getter
def token_getter():
"""Get the token for a user."""
this_user = g.user
if this_user is not None:
return this_user.token
@blueprint.route("/github-callback")
@github.authorized_handler
def authorized(oauth_token):
"""Github authorization callback."""
next_url = request.args.get("next") or url_for("user.user")
if oauth_token is None:
flash("Authorization failed.")
return redirect(next_url)
# Ensure a user with token doesn't already exist
this_user = User.query.filter_by(token=oauth_token).first()
if this_user is None:
this_user = User(token=oauth_token)
# Set this to use API to get user data
g.user = this_user
user_data = github.get("user")
# extract data
uid = user_data["id"]
username = user_data["login"]
avatar_url = user_data["avatar_url"]
# Create/update the user
this_user = User.query.filter_by(uid=uid).first()
if this_user is None:
this_user = User(token=oauth_token, uid=uid, username=username, avatar_url=avatar_url)
else:
this_user.username = username
this_user.avatar_url = avatar_url
this_user.token = oauth_token
this_user.save()
session["username"] = this_user.username
session["user_id"] = this_user.id
g.user = this_user
return redirect(next_url)
@blueprint.route("/login")
def login():
"""Login via GitHub OAuth."""
if session.get("user_id", None) is None:
return github.authorize()
else:
return redirect(url_for("user.user"))
@blueprint.route("/logout")
def logout():
"""Logout."""
session.pop("user_id", None)
session.pop("username", None)
g.user = None
return redirect(url_for("general.index"))
@blueprint.route("/user")
def user():
"""Render the user's personal page."""
return render_template("user.html", user=g.user)
@blueprint.route("/user/packages/<package>")
def user_package(package):
"""Handle adding and deleting packages to user's list."""
if g.user:
# Ensure package is valid.
downloads = RecentDownloadCount.query.filter_by(package=package).all()
# Handle add/remove to favorites
if g.user.favorites is None:
# Ensure package is valid before adding
if len(downloads) == 0:
return abort(400)
g.user.favorites = [package]
g.user.update()
return redirect(url_for("user.user"))
elif package in g.user.favorites:
favorites = g.user.favorites
favorites.remove(package)
# Workaround for sqlalchemy mutable ARRAY types
g.user.favorites = None
g.user.save()
g.user.favorites = favorites
g.user.save()
return redirect(url_for("user.user"))
else:
if len(g.user.favorites) < MAX_FAVORITES:
# Ensure package is valid before adding
if len(downloads) == 0:
return abort(400)
favorites = g.user.favorites
favorites.append(package)
favorites = sorted(favorites)
# Workaround for sqlalchemy mutable ARRAY types
g.user.favorites = None
g.user.save()
g.user.favorites = favorites
g.user.save()
return redirect(url_for("user.user"))
else:
return f"Maximum package number reached ({MAX_FAVORITES})."
return abort(400)

View File

@@ -1,63 +0,0 @@
[tool.poetry]
name = "pypistatsorg"
version = "11"
description = "Download counts dashboard for python packages"
authors = ["Flynn <crf204@gmail.com>"]
[tool.poetry.dependencies]
python = "^3.7"
google-cloud-bigquery = "^1.17"
flask = "^1.1"
github-flask = "^3.2"
flask-sqlalchemy = "^2.4"
flask-migrate = "^2.5"
flask-login = "^0.4.1"
flask-wtf = "^0.14.2"
gunicorn = "^19.9"
requests = "^2.22"
celery = "^4.3"
psycopg2-binary = "^2.8"
redis = "^3.3"
flask-limiter = "^1.2.1"
flower = "^0.9.5"
flask-httpauth = "^4.1.0"
[tool.poetry.dev-dependencies]
black = "^19.10b0"
isort = "^5.3"
[tool.black]
line-length = 120
target-version = ['py37']
include = '\.pyi?$'
exclude = '''
(
/(
\.eggs
| \.circleci
| \.git
| \.github
| \.hg
| \.mypy_cache
| \.pytest_cache
| \.tox
| \.venv
| _build
| buck-out
| build
| dist
)/
)
'''
[tool.isort]
force_single_line = true
multi_line_output = 3
include_trailing_comma = true
force_grid_wrap = 0
use_parentheses = true
line_length = 120
[build-system]
requires = ["poetry>=1.0"]
build-backend = "poetry.masonry.api"

3
src/app.css Normal file
View File

@@ -0,0 +1,3 @@
@import 'tailwindcss';
@plugin '@tailwindcss/forms';
@plugin '@tailwindcss/typography';

13
src/app.d.ts vendored Normal file
View File

@@ -0,0 +1,13 @@
// See https://svelte.dev/docs/kit/types#app.d.ts
// for information about these interfaces
declare global {
namespace App {
// interface Error {}
// interface Locals {}
// interface PageData {}
// interface PageState {}
// interface Platform {}
}
}
export {};

11
src/app.html Normal file
View File

@@ -0,0 +1,11 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
%sveltekit.head%
</head>
<body data-sveltekit-preload-data="hover">
<div style="display: contents">%sveltekit.body%</div>
</body>
</html>

35
src/hooks.server.ts Normal file
View File

@@ -0,0 +1,35 @@
import { closeRedisClient, forceDisconnectRedis } from '$lib/redis.js';
import type { Handle } from '@sveltejs/kit';
// Minimal server hooks without cron
if (typeof process !== 'undefined') {
// Graceful shutdown
process.on('SIGTERM', async () => {
console.log('🛑 Received SIGTERM, closing connections...');
await closeRedisClient();
process.exit(0);
});
process.on('SIGINT', async () => {
console.log('🛑 Received SIGINT, closing connections...');
await closeRedisClient();
process.exit(0);
});
// Handle uncaught exceptions
process.on('uncaughtException', async (error) => {
console.error('🛑 Uncaught Exception:', error);
await forceDisconnectRedis();
process.exit(1);
});
process.on('unhandledRejection', async (reason, promise) => {
console.error('🛑 Unhandled Rejection at:', promise, 'reason:', reason);
await forceDisconnectRedis();
process.exit(1);
});
}
export const handle: Handle = async ({ event, resolve }) => {
return resolve(event);
};

271
src/lib/api.ts Normal file
View File

@@ -0,0 +1,271 @@
import { prisma } from './prisma.js';
import { RECENT_CATEGORIES } from './database.js';
import { CacheManager } from './redis.js';
const cache = new CacheManager();
export type Results = {
date: string;
category: string;
downloads: number | bigint;
}
export async function getRecentDownloads(packageName: string, category?: string): Promise<Results[]> {
const cacheKey = CacheManager.getRecentStatsKey(packageName);
// Try to get from cache first
const cached = await cache.get<Results[]>(cacheKey);
if (cached && !category) {
return cached;
}
if (category && RECENT_CATEGORIES.includes(category)) {
// Compute recent from overall without mirrors
const bounds = getRecentBounds(category);
const result = await prisma.overallDownloadCount.groupBy({
by: ['package'],
where: {
package: packageName,
category: 'without_mirrors',
date: { gte: bounds.start }
},
_sum: { downloads: true }
});
return result.map(r => ({
date: new Date().toISOString().split('T')[0],
category,
downloads: r._sum.downloads || 0
}));
}
// Default: return day/week/month computed on the fly
const day: Results[] = await getRecentDownloads(packageName, 'day');
const week: Results[] = await getRecentDownloads(packageName, 'week');
const month: Results[] = await getRecentDownloads(packageName, 'month');
const result: Results[] = [...day, ...week, ...month];
// Cache the result for 1 hour
await cache.set(cacheKey, result, 3600);
return result;
}
function getRecentBounds(category: string) {
const today = new Date();
let start = new Date(today);
if (category === 'day') {
// include today
} else if (category === 'week') {
start = new Date(today.getTime() - 7 * 24 * 60 * 60 * 1000);
} else if (category === 'month') {
start = new Date(today.getTime() - 30 * 24 * 60 * 60 * 1000);
}
return { start };
}
export async function getOverallDownloads(packageName: string, mirrors?: string) {
const cacheKey = CacheManager.getPackageKey(packageName, `overall_${mirrors || 'all'}`);
// Try to get from cache first
const cached = await cache.get<Results[]>(cacheKey);
if (cached) {
return cached;
}
const whereClause: any = {
package: packageName
};
if (mirrors === 'true') {
whereClause.category = 'with_mirrors';
} else if (mirrors === 'false') {
whereClause.category = 'without_mirrors';
}
const result = await prisma.overallDownloadCount.findMany({
where: whereClause,
orderBy: {
date: 'asc'
}
});
// Cache the result for 1 hour
await cache.set(cacheKey, result, 3600);
return result;
}
export async function getPythonMajorDownloads(packageName: string, version?: string) {
const cacheKey = CacheManager.getPackageKey(packageName, `python_major_${version || 'all'}`);
// Try to get from cache first
const cached = await cache.get<Results[]>(cacheKey);
if (cached) {
return cached;
}
const whereClause: any = {
package: packageName
};
if (version) {
whereClause.category = version;
}
const result = await prisma.pythonMajorDownloadCount.findMany({
where: whereClause,
orderBy: {
date: 'asc'
}
});
// Cache the result for 1 hour
await cache.set(cacheKey, result, 3600);
return result;
}
export async function getPythonMinorDownloads(packageName: string, version?: string) {
const cacheKey = CacheManager.getPackageKey(packageName, `python_minor_${version || 'all'}`);
// Try to get from cache first
const cached = await cache.get<Results[]>(cacheKey);
if (cached) {
return cached;
}
const whereClause: any = {
package: packageName
};
if (version) {
whereClause.category = version;
}
const result = await prisma.pythonMinorDownloadCount.findMany({
where: whereClause,
orderBy: {
date: 'asc'
}
});
// Cache the result for 1 hour
await cache.set(cacheKey, result, 3600);
return result;
}
export async function getSystemDownloads(packageName: string, os?: string) {
const cacheKey = CacheManager.getPackageKey(packageName, `system_${os || 'all'}`);
// Try to get from cache first
const cached = await cache.get<Results[]>(cacheKey);
if (cached) {
return cached;
}
const whereClause: any = {
package: packageName
};
if (os) {
whereClause.category = os;
}
const result = await prisma.systemDownloadCount.findMany({
where: whereClause,
orderBy: {
date: 'asc'
}
});
// Cache the result for 1 hour
await cache.set(cacheKey, result, 3600);
return result;
}
export async function searchPackages(searchTerm: string) {
const cacheKey = CacheManager.getSearchKey(searchTerm);
// Try to get from cache first
const cached = await cache.get<string[]>(cacheKey);
if (cached) {
return cached;
}
const results = await prisma.recentDownloadCount.findMany({
where: {
package: {
startsWith: searchTerm
},
category: 'month'
},
select: {
package: true
},
distinct: ['package'],
orderBy: {
package: 'asc'
},
take: 20
});
const packages = results.map(result => result.package);
// Cache the result for 30 minutes (search results change less frequently)
await cache.set(cacheKey, packages, 1800);
return packages;
}
export async function getPackageCount() {
const cacheKey = CacheManager.getPackageCountKey();
// Try to get from cache first
const cached = await cache.get<number>(cacheKey);
if (cached !== null) {
return cached;
}
const result = await prisma.recentDownloadCount.groupBy({
by: ['package'],
where: {
category: 'month'
}
});
const count = result.length;
// Cache the result for 1 hour
await cache.set(cacheKey, count, 3600);
return count;
}
// Cache invalidation functions
export async function invalidatePackageCache(packageName: string) {
const patterns = [
CacheManager.getRecentStatsKey(packageName),
CacheManager.getPackageKey(packageName, 'overall_all'),
CacheManager.getPackageKey(packageName, 'overall_true'),
CacheManager.getPackageKey(packageName, 'overall_false'),
CacheManager.getPackageKey(packageName, 'python_major_all'),
CacheManager.getPackageKey(packageName, 'python_minor_all'),
CacheManager.getPackageKey(packageName, 'system_all'),
];
for (const pattern of patterns) {
await cache.del(pattern);
}
}
export async function invalidateSearchCache() {
// This would need to be implemented with pattern matching
// For now, we'll just clear the package count cache
await cache.del(CacheManager.getPackageCountKey());
}
export async function clearAllCache() {
await cache.flush();
}

View File

@@ -0,0 +1 @@
<svg xmlns="http://www.w3.org/2000/svg" width="107" height="128" viewBox="0 0 107 128"><title>svelte-logo</title><path d="M94.157 22.819c-10.4-14.885-30.94-19.297-45.792-9.835L22.282 29.608A29.92 29.92 0 0 0 8.764 49.65a31.5 31.5 0 0 0 3.108 20.231 30 30 0 0 0-4.477 11.183 31.9 31.9 0 0 0 5.448 24.116c10.402 14.887 30.942 19.297 45.791 9.835l26.083-16.624A29.92 29.92 0 0 0 98.235 78.35a31.53 31.53 0 0 0-3.105-20.232 30 30 0 0 0 4.474-11.182 31.88 31.88 0 0 0-5.447-24.116" style="fill:#ff3e00"/><path d="M45.817 106.582a20.72 20.72 0 0 1-22.237-8.243 19.17 19.17 0 0 1-3.277-14.503 18 18 0 0 1 .624-2.435l.49-1.498 1.337.981a33.6 33.6 0 0 0 10.203 5.098l.97.294-.09.968a5.85 5.85 0 0 0 1.052 3.878 6.24 6.24 0 0 0 6.695 2.485 5.8 5.8 0 0 0 1.603-.704L69.27 76.28a5.43 5.43 0 0 0 2.45-3.631 5.8 5.8 0 0 0-.987-4.371 6.24 6.24 0 0 0-6.698-2.487 5.7 5.7 0 0 0-1.6.704l-9.953 6.345a19 19 0 0 1-5.296 2.326 20.72 20.72 0 0 1-22.237-8.243 19.17 19.17 0 0 1-3.277-14.502 17.99 17.99 0 0 1 8.13-12.052l26.081-16.623a19 19 0 0 1 5.3-2.329 20.72 20.72 0 0 1 22.237 8.243 19.17 19.17 0 0 1 3.277 14.503 18 18 0 0 1-.624 2.435l-.49 1.498-1.337-.98a33.6 33.6 0 0 0-10.203-5.1l-.97-.294.09-.968a5.86 5.86 0 0 0-1.052-3.878 6.24 6.24 0 0 0-6.696-2.485 5.8 5.8 0 0 0-1.602.704L37.73 51.72a5.42 5.42 0 0 0-2.449 3.63 5.79 5.79 0 0 0 .986 4.372 6.24 6.24 0 0 0 6.698 2.486 5.8 5.8 0 0 0 1.602-.704l9.952-6.342a19 19 0 0 1 5.295-2.328 20.72 20.72 0 0 1 22.237 8.242 19.17 19.17 0 0 1 3.277 14.503 18 18 0 0 1-8.13 12.053l-26.081 16.622a19 19 0 0 1-5.3 2.328" style="fill:#fff"/></svg>

After

Width:  |  Height:  |  Size: 1.5 KiB

742
src/lib/data-processor.ts Normal file
View File

@@ -0,0 +1,742 @@
import { prisma } from './prisma.js';
import type { Prisma } from '@prisma/client';
import { BigQuery } from '@google-cloud/bigquery';
import { CacheManager, LockManager } from './redis.js';
// Configuration constants
const MIRRORS = ['bandersnatch', 'z3c.pypimirror', 'Artifactory', 'devpi'];
const SYSTEMS = ['Windows', 'Linux', 'Darwin'];
const MAX_RECORD_AGE = 180;
interface DownloadRecord {
package: string;
category_label: string;
category: string;
downloads: number;
}
interface ProcessedData {
[category: string]: Array<{
date: string;
package: string;
category: string;
downloads: number;
}>;
}
export class DataProcessor {
private bigquery: BigQuery;
private cache: CacheManager;
private locks: LockManager;
constructor() {
// Initialize BigQuery client with flexible credential handling
const bigQueryConfig: any = {
projectId: process.env.GOOGLE_PROJECT_ID,
};
// Handle credentials from environment variable or file
if (process.env.GOOGLE_APPLICATION_CREDENTIALS_JSON) {
// Use JSON credentials from environment variable
try {
const credentials = JSON.parse(process.env.GOOGLE_APPLICATION_CREDENTIALS_JSON);
bigQueryConfig.credentials = credentials;
bigQueryConfig.credentials.private_key = credentials.private_key.replace(/\\n/g, '\n');
} catch (error) {
console.error('Failed to parse GOOGLE_APPLICATION_CREDENTIALS_JSON:', error);
throw new Error('Invalid GOOGLE_APPLICATION_CREDENTIALS_JSON format');
}
} else if (process.env.GOOGLE_APPLICATION_CREDENTIALS) {
// Use file path (existing behavior)
bigQueryConfig.keyFilename = process.env.GOOGLE_APPLICATION_CREDENTIALS;
} else {
// Try to use default credentials (for local development with gcloud auth)
console.log('No explicit credentials provided, using default credentials');
}
this.bigquery = new BigQuery(bigQueryConfig);
// Initialize cache and locks
this.cache = new CacheManager();
this.locks = new LockManager();
}
/**
* Main ETL process - replicates the Python etl() function
*/
async etl(date?: string, purge: boolean = true) {
const targetDate = date || this.getYesterdayDate();
console.log(`Starting ETL process for date: ${targetDate}`);
const etlLockKey = `pypistats:lock:etl:${targetDate}`;
const processedKey = `pypistats:processed:${targetDate}`;
let lockToken: string | null = null;
const results: any = {};
try {
// If we've already processed this date, skip idempotently
const alreadyProcessed = await this.cache.get<boolean>(processedKey);
if (alreadyProcessed) {
console.log(`Date ${targetDate} already processed, skipping ETL.`);
return { skipped: true };
}
// Acquire a short-lived lock to avoid concurrent ETL for the same date
lockToken = await this.locks.acquireLock(etlLockKey, 60 * 30); // 30 minutes
if (!lockToken) {
console.log(`Another ETL is running for ${targetDate}, skipping.`);
return { locked: true };
}
// Get daily download stats
results.downloads = await this.getDailyDownloadStats(targetDate);
// Update __all__ package stats
results.__all__ = await this.updateAllPackageStats(targetDate);
// Update recent stats
results.recent = await this.updateRecentStats();
// Database maintenance
results.cleanup = await this.vacuumAnalyze();
// Purge old data
if (purge) {
results.purge = await this.purgeOldData(targetDate);
}
// Mark processed and clear cache
await this.cache.set(processedKey, true, 60 * 60 * 24 * 14); // remember for 14 days
await this.clearCache();
console.log('ETL process completed successfully');
return results;
} catch (error) {
console.error('ETL process failed:', error);
throw error;
} finally {
// Best-effort release; if no lock held, it is a no-op
try {
if (lockToken) {
await this.locks.releaseLock(etlLockKey, lockToken);
}
} catch {}
}
}
/**
* Get daily download stats from BigQuery
*/
async getDailyDownloadStats(date: string): Promise<any> {
console.log(`Fetching download stats for ${date} from BigQuery...`);
const query = this.getBigQueryQuery(date);
const [rows] = await this.bigquery.query({ query });
console.log(`Retrieved ${rows.length} rows from BigQuery`);
// Process data by category
const data: ProcessedData = {};
for (const row of rows as DownloadRecord[]) {
if (!data[row.category_label]) {
data[row.category_label] = [];
}
data[row.category_label].push({
date,
package: row.package,
category: row.category,
downloads: row.downloads,
});
}
// Update database with new data
return await this.updateDatabase(data, date);
}
/**
* Update database with processed data
*/
async updateDatabase(data: ProcessedData, date: string): Promise<any> {
const results: any = {};
for (const [category, rows] of Object.entries(data)) {
console.log(`Updating ${category} table with ${rows.length} records`);
try {
// Wrap as a transaction to ensure idempotency and avoid partial writes
await prisma.$transaction(async (tx) => {
await this.deleteExistingRecords(category, date, tx);
await this.insertRecords(category, rows, tx);
});
results[category] = true;
} catch (error) {
console.error(`Error updating ${category} table:`, error);
results[category] = false;
}
}
return results;
}
/**
* Update stats for __all__ packages (aggregated data)
*/
async updateAllPackageStats(date: string): Promise<any> {
console.log('Updating __all__ package stats');
const tables = ['overall', 'python_major', 'python_minor', 'system'];
const results: any = {};
for (const table of tables) {
try {
// Get aggregated data for __all__
const aggregatedData = await this.getAggregatedData(table, date);
// Delete existing __all__ records
await this.deleteAllPackageRecords(table, date);
// Insert aggregated records
await this.insertAllPackageRecords(table, aggregatedData);
results[table] = true;
} catch (error) {
console.error(`Error updating __all__ for ${table}:`, error);
results[table] = false;
}
}
return results;
}
/**
* Update recent stats (day, week, month)
*/
async updateRecentStats(): Promise<any> {
console.log('Updating recent stats');
const periods = ['day', 'week', 'month'];
const results: any = {};
for (const period of periods) {
try {
const recentData = await this.getRecentData(period);
// Delete existing records for this period
await prisma.recentDownloadCount.deleteMany({
where: { category: period }
});
// Insert new records
await prisma.recentDownloadCount.createMany({
data: recentData
});
results[period] = true;
} catch (error) {
console.error(`Error updating recent stats for ${period}:`, error);
results[period] = false;
}
}
return results;
}
/**
* Purge old data (keep only MAX_RECORD_AGE days)
*/
async purgeOldData(date: string): Promise<any> {
console.log('Purging old data');
const purgeDate = new Date();
purgeDate.setDate(purgeDate.getDate() - MAX_RECORD_AGE);
const tables = ['overall', 'python_major', 'python_minor', 'system'];
const results: any = {};
for (const table of tables) {
try {
const deletedCount = await this.deleteOldRecords(table, purgeDate);
results[table] = deletedCount;
} catch (error) {
console.error(`Error purging ${table}:`, error);
results[table] = false;
}
}
return results;
}
/**
* Database maintenance (VACUUM and ANALYZE)
*/
async vacuumAnalyze(): Promise<any> {
console.log('Running database maintenance');
const results: any = {};
try {
// Note: Prisma doesn't support VACUUM/ANALYZE directly
// These would need to be run via raw SQL if needed
results.vacuum = 'skipped'; // Would need raw SQL
results.analyze = 'skipped'; // Would need raw SQL
} catch (error) {
console.error('Error during database maintenance:', error);
}
return results;
}
/**
* Clear all cache after data update
*/
private async clearCache(): Promise<void> {
console.log('Clearing cache after data update');
try {
await this.cache.flush();
console.log('Cache cleared successfully');
} catch (error) {
console.error('Error clearing cache:', error);
}
}
// Helper methods
private getYesterdayDate(): string {
const yesterday = new Date();
yesterday.setDate(yesterday.getDate() - 1);
return yesterday.toISOString().split('T')[0];
}
private getBigQueryQuery(date: string): string {
return `
WITH dls AS (
SELECT
file.project AS package,
details.installer.name AS installer,
details.python AS python_version,
details.system.name AS system
FROM \`bigquery-public-data.pypi.file_downloads\`
WHERE DATE(timestamp) = '${date}'
AND (REGEXP_CONTAINS(details.python, r'^[0-9]\\.[0-9]+.{0,}$') OR details.python IS NULL)
)
SELECT
package,
'python_major' AS category_label,
COALESCE(CAST(SPLIT(python_version, '.')[OFFSET(0)] AS STRING), 'unknown') AS category,
COUNT(*) AS downloads
FROM dls
WHERE installer NOT IN (${MIRRORS.map(m => `'${m}'`).join(', ')})
GROUP BY package, category
UNION ALL
SELECT
package,
'python_minor' AS category_label,
COALESCE(REGEXP_EXTRACT(python_version, r'^[0-9]+\\.[0-9]+'), 'unknown') AS category,
COUNT(*) AS downloads
FROM dls
WHERE installer NOT IN (${MIRRORS.map(m => `'${m}'`).join(', ')})
GROUP BY package, category
UNION ALL
SELECT
package,
'overall' AS category_label,
'with_mirrors' AS category,
COUNT(*) AS downloads
FROM dls
GROUP BY package, category
UNION ALL
SELECT
package,
'overall' AS category_label,
'without_mirrors' AS category,
COUNT(*) AS downloads
FROM dls
WHERE installer NOT IN (${MIRRORS.map(m => `'${m}'`).join(', ')})
GROUP BY package, category
UNION ALL
SELECT
package,
'system' AS category_label,
COALESCE(CASE
WHEN system NOT IN (${SYSTEMS.map(s => `'${s}'`).join(', ')}) THEN 'other'
ELSE system
END, 'other') AS category,
COUNT(*) AS downloads
FROM dls
WHERE installer NOT IN (${MIRRORS.map(m => `'${m}'`).join(', ')})
GROUP BY package, category
`;
}
/**
* Ensure a package has up-to-date data. If missing or stale, fetch from BigQuery.
*/
async ensurePackageFreshness(packageName: string): Promise<void> {
const yesterday = this.getYesterdayDate();
const last = await prisma.overallDownloadCount.findFirst({
where: { package: packageName },
orderBy: { date: 'desc' },
select: { date: true }
});
const lastDate = last?.date ? last.date.toISOString().split('T')[0] : null;
if (lastDate === yesterday) return; // up to date
// Determine start date (inclusive)
let startDate: string;
if (lastDate) {
const d = new Date(lastDate);
d.setDate(d.getDate() + 1);
startDate = d.toISOString().split('T')[0];
} else {
// If no data, pull last 30 days to seed
const d = new Date();
d.setDate(d.getDate() - 30);
startDate = d.toISOString().split('T')[0];
}
const endDate = yesterday;
if (new Date(startDate) > new Date(endDate)) return;
// Lock per package to avoid duplicate ingestion
const lockKey = `pypistats:lock:pkg:${packageName}:${startDate}:${endDate}`;
const token = await this.locks.acquireLock(lockKey, 60 * 15);
if (!token) return;
try {
const data = await this.getPackageDownloadStats(packageName, startDate, endDate);
await this.updateDatabase(data, startDate); // date not used inside for deletes per date; safe
// Recompute __all__ for these dates for this package is not needed; __all__ refers to special package '__all__'
} finally {
await this.locks.releaseLock(lockKey, token);
}
}
/**
* Query BigQuery for a package between dates (inclusive) aggregating required categories per day.
*/
private async getPackageDownloadStats(packageName: string, startDate: string, endDate: string): Promise<ProcessedData> {
const query = this.getPackageBigQueryQuery(packageName, startDate, endDate);
const [rows] = await this.bigquery.query({ query });
const data: ProcessedData = {};
for (const row of rows as any[]) {
const label = row.category_label as string;
if (!data[label]) data[label] = [];
data[label].push({
date: row.date,
package: row.package,
category: row.category,
downloads: Number(row.downloads)
});
}
return data;
}
private getPackageBigQueryQuery(packageName: string, startDate: string, endDate: string): string {
return `
WITH dls AS (
SELECT
DATE(timestamp) AS date,
file.project AS package,
details.installer.name AS installer,
details.python AS python_version,
details.system.name AS system
FROM \`bigquery-public-data.pypi.file_downloads\`
WHERE DATE(timestamp) BETWEEN '${startDate}' AND '${endDate}'
AND file.project = '${packageName}'
AND (REGEXP_CONTAINS(details.python, r'^[0-9]\\.[0-9]+.{0,}$') OR details.python IS NULL)
)
SELECT
date,
package,
'python_major' AS category_label,
COALESCE(CAST(SPLIT(python_version, '.')[OFFSET(0)] AS STRING), 'unknown') AS category,
COUNT(*) AS downloads
FROM dls
GROUP BY date, package, category
UNION ALL
SELECT
date,
package,
'python_minor' AS category_label,
COALESCE(REGEXP_EXTRACT(python_version, r'^[0-9]+\\.[0-9]+'), 'unknown') AS category,
COUNT(*) AS downloads
FROM dls
GROUP BY date, package, category
UNION ALL
SELECT
date,
package,
'overall' AS category_label,
'with_mirrors' AS category,
COUNT(*) AS downloads
FROM dls
GROUP BY date, package, category
UNION ALL
SELECT
date,
package,
'overall' AS category_label,
'without_mirrors' AS category,
COUNT(*) AS downloads
FROM dls
WHERE installer NOT IN (${MIRRORS.map(m => `'${m}'`).join(', ')})
GROUP BY date, package, category
UNION ALL
SELECT
date,
package,
'system' AS category_label,
COALESCE(CASE WHEN system NOT IN (${SYSTEMS.map(s => `'${s}'`).join(', ')}) THEN 'other' ELSE system END, 'other') AS category,
COUNT(*) AS downloads
FROM dls
GROUP BY date, package, category
UNION ALL
SELECT
date,
package,
'installer' AS category_label,
COALESCE(installer, 'unknown') AS category,
COUNT(*) AS downloads
FROM dls
GROUP BY date, package, category
`;
}
private async deleteExistingRecords(table: string, date: string, tx: Prisma.TransactionClient): Promise<void> {
const dateObj = new Date(date);
switch (table) {
case 'overall':
await tx.overallDownloadCount.deleteMany({
where: { date: dateObj }
});
break;
case 'python_major':
await tx.pythonMajorDownloadCount.deleteMany({
where: { date: dateObj }
});
break;
case 'python_minor':
await tx.pythonMinorDownloadCount.deleteMany({
where: { date: dateObj }
});
break;
case 'system':
await tx.systemDownloadCount.deleteMany({
where: { date: dateObj }
});
break;
case 'installer':
await (tx as any).installerDownloadCount.deleteMany({
where: { date: dateObj }
});
break;
}
}
private async insertRecords(table: string, records: any[], tx: Prisma.TransactionClient): Promise<void> {
switch (table) {
case 'overall':
await tx.overallDownloadCount.createMany({
data: records.map(r => ({
date: new Date(r.date),
package: r.package,
category: r.category ?? 'unknown',
downloads: r.downloads
}))
});
break;
case 'python_major':
await tx.pythonMajorDownloadCount.createMany({
data: records.map(r => ({
date: new Date(r.date),
package: r.package,
category: r.category ?? 'unknown',
downloads: r.downloads
}))
});
break;
case 'python_minor':
await tx.pythonMinorDownloadCount.createMany({
data: records.map(r => ({
date: new Date(r.date),
package: r.package,
category: r.category ?? 'unknown',
downloads: r.downloads
}))
});
break;
case 'system':
await tx.systemDownloadCount.createMany({
data: records.map(r => ({
date: new Date(r.date),
package: r.package,
category: r.category ?? 'other',
downloads: r.downloads
}))
});
break;
case 'installer':
await (tx as any).installerDownloadCount.createMany({
data: records.map(r => ({
date: new Date(r.date),
package: r.package,
category: r.category ?? 'unknown',
downloads: r.downloads
}))
});
break;
}
}
private async getAggregatedData(table: string, date: string) {
const dateObj = new Date(date);
switch (table) {
case 'overall':
return await prisma.overallDownloadCount.groupBy({
by: ['date', 'category'],
where: { date: dateObj },
_sum: { downloads: true }
});
case 'python_major':
return await prisma.pythonMajorDownloadCount.groupBy({
by: ['date', 'category'],
where: { date: dateObj },
_sum: { downloads: true }
});
case 'python_minor':
return await prisma.pythonMinorDownloadCount.groupBy({
by: ['date', 'category'],
where: { date: dateObj },
_sum: { downloads: true }
});
case 'system':
return await prisma.systemDownloadCount.groupBy({
by: ['date', 'category'],
where: { date: dateObj },
_sum: { downloads: true }
});
default:
return [];
}
}
private async deleteAllPackageRecords(table: string, date: string): Promise<void> {
const dateObj = new Date(date);
switch (table) {
case 'overall':
await prisma.overallDownloadCount.deleteMany({
where: { date: dateObj, package: '__all__' }
});
break;
case 'python_major':
await prisma.pythonMajorDownloadCount.deleteMany({
where: { date: dateObj, package: '__all__' }
});
break;
case 'python_minor':
await prisma.pythonMinorDownloadCount.deleteMany({
where: { date: dateObj, package: '__all__' }
});
break;
case 'system':
await prisma.systemDownloadCount.deleteMany({
where: { date: dateObj, package: '__all__' }
});
break;
}
}
private async insertAllPackageRecords(table: string, aggregatedData: any[]): Promise<void> {
const records = aggregatedData.map(data => ({
date: data.date,
package: '__all__',
category: data.category ?? (table === 'system' ? 'other' : 'unknown'),
downloads: data._sum.downloads || 0
}));
await this.insertRecords(table, records, prisma);
}
private async getRecentData(period: string): Promise<any[]> {
const today = new Date();
let startDate: Date;
switch (period) {
case 'day':
startDate = new Date(today);
break;
case 'week':
startDate = new Date(today.getTime() - 7 * 24 * 60 * 60 * 1000);
break;
case 'month':
startDate = new Date(today.getTime() - 30 * 24 * 60 * 60 * 1000);
break;
default:
throw new Error(`Invalid period: ${period}`);
}
const results = await prisma.overallDownloadCount.groupBy({
by: ['package'],
where: {
date: { gte: startDate },
category: 'without_mirrors'
},
_sum: { downloads: true }
});
return results.map(result => ({
package: result.package,
category: period,
downloads: result._sum.downloads || 0
}));
}
private async deleteOldRecords(table: string, purgeDate: Date): Promise<number> {
switch (table) {
case 'overall':
const overallResult = await prisma.overallDownloadCount.deleteMany({
where: { date: { lt: purgeDate } }
});
return overallResult.count;
case 'python_major':
const majorResult = await prisma.pythonMajorDownloadCount.deleteMany({
where: { date: { lt: purgeDate } }
});
return majorResult.count;
case 'python_minor':
const minorResult = await prisma.pythonMinorDownloadCount.deleteMany({
where: { date: { lt: purgeDate } }
});
return minorResult.count;
case 'system':
const systemResult = await prisma.systemDownloadCount.deleteMany({
where: { date: { lt: purgeDate } }
});
return systemResult.count;
default:
return 0;
}
}
}

View File

@@ -0,0 +1,144 @@
import { prisma } from './prisma.js';
export interface DatabaseFreshness {
isFresh: boolean;
lastUpdateDate: Date | null;
expectedDate: Date;
daysBehind: number;
needsUpdate: boolean;
}
/**
* Check if the database is up to date with the latest data
*/
export async function checkDatabaseFreshness(): Promise<DatabaseFreshness> {
try {
// Get the most recent date from the database
const lastUpdate = await getLastUpdateDate();
// Calculate the expected date (yesterday)
const expectedDate = getExpectedDate();
if (lastUpdate === null) {
return {
isFresh: false,
lastUpdateDate: null,
expectedDate,
daysBehind: 999,
needsUpdate: true
};
}
// Calculate how many days behind we are
const daysBehind = lastUpdate
? Math.floor((expectedDate.getTime() - lastUpdate.getTime()) / (1000 * 60 * 60 * 24))
: 999; // If no data exists, consider it very behind
// Determine if we need an update
// We consider it fresh if it's within 1 day of expected date
const isFresh = daysBehind <= 1;
const needsUpdate = !isFresh;
return {
isFresh,
lastUpdateDate: lastUpdate,
expectedDate,
daysBehind,
needsUpdate
};
} catch (error) {
console.error('Error checking database freshness:', error);
// If we can't check, assume we need an update
return {
isFresh: false,
lastUpdateDate: null,
expectedDate: getExpectedDate(),
daysBehind: 999,
needsUpdate: true
};
}
}
/**
* Get the most recent date from any of our data tables
*/
async function getLastUpdateDate(): Promise<Date | null> {
try {
// Check multiple tables to find the most recent date
const queries = [
prisma.overallDownloadCount.findFirst({
orderBy: { date: 'desc' },
select: { date: true }
}),
prisma.pythonMajorDownloadCount.findFirst({
orderBy: { date: 'desc' },
select: { date: true }
}),
prisma.pythonMinorDownloadCount.findFirst({
orderBy: { date: 'desc' },
select: { date: true }
}),
prisma.systemDownloadCount.findFirst({
orderBy: { date: 'desc' },
select: { date: true }
})
];
const results = await Promise.all(queries);
// Find the most recent date across all tables
const dates = results
.map(result => result?.date)
.filter(date => date !== null) as Date[];
if (dates.length === 0) {
return null;
}
return new Date(Math.max(...dates.map(date => date.getTime())));
} catch (error) {
console.error('Error getting last update date:', error);
return null;
}
}
/**
* Get the expected date (yesterday, since today's data might not be available yet)
*/
function getExpectedDate(): Date {
const yesterday = new Date();
yesterday.setDate(yesterday.getDate() - 1);
return yesterday;
}
/**
* Check if we have data for a specific date
*/
export async function hasDataForDate(date: Date): Promise<boolean> {
try {
const count = await prisma.overallDownloadCount.count({
where: { date }
});
return count > 0;
} catch (error) {
console.error('Error checking data for date:', error);
return false;
}
}
/**
* Get a summary of database freshness for logging
*/
export async function getFreshnessSummary(): Promise<string> {
const freshness = await checkDatabaseFreshness();
if (freshness.isFresh) {
return `Database is fresh (last update: ${freshness.lastUpdateDate?.toISOString().split('T')[0]})`;
} else if (freshness.lastUpdateDate) {
return `Database is ${freshness.daysBehind} days behind (last update: ${freshness.lastUpdateDate.toISOString().split('T')[0]}, expected: ${freshness.expectedDate.toISOString().split('T')[0]})`;
} else {
return 'Database has no data and needs initial population';
}
}

11
src/lib/database.ts Normal file
View File

@@ -0,0 +1,11 @@
// Database types and constants
export const RECENT_CATEGORIES = ['day', 'week', 'month'];
// Re-export Prisma types for convenience
export type {
RecentDownloadCount,
OverallDownloadCount,
PythonMajorDownloadCount,
PythonMinorDownloadCount,
SystemDownloadCount
} from '@prisma/client';

1
src/lib/index.ts Normal file
View File

@@ -0,0 +1 @@
// place files you want to import through the `$lib` alias in this folder.

9
src/lib/prisma.ts Normal file
View File

@@ -0,0 +1,9 @@
import { PrismaClient } from '@prisma/client';
const globalForPrisma = globalThis as unknown as {
prisma: PrismaClient | undefined;
};
export const prisma = globalForPrisma.prisma ?? new PrismaClient();
if (process.env.NODE_ENV !== 'production') globalForPrisma.prisma = prisma;

341
src/lib/redis.ts Normal file
View File

@@ -0,0 +1,341 @@
import { createClient } from 'redis';
// Redis client instance
let redisClient: ReturnType<typeof createClient> | null = null;
let isConnecting = false;
let isDisconnecting = false;
export function getRedisClient() {
if (!redisClient && !isConnecting) {
isConnecting = true;
redisClient = createClient({
url: process.env.REDIS_URL || 'redis://localhost:6379',
});
redisClient.on('error', (err) => {
console.error('Redis Client Error:', err);
});
redisClient.on('connect', () => {
console.log('Redis Client Connected');
isConnecting = false;
});
redisClient.on('disconnect', () => {
console.log('Redis Client Disconnected');
});
redisClient.on('end', () => {
console.log('Redis Client Connection Ended');
redisClient = null;
isConnecting = false;
isDisconnecting = false;
});
redisClient.connect().catch((error) => {
console.error('Redis Client Connection Failed:', error);
isConnecting = false;
});
}
return redisClient;
}
/**
* Close the Redis client connection
*/
export async function closeRedisClient(): Promise<void> {
if (redisClient && !isDisconnecting) {
isDisconnecting = true;
try {
console.log('Closing Redis client connection...');
await redisClient.quit();
console.log('Redis client connection closed successfully');
} catch (error) {
console.error('Error closing Redis client:', error);
} finally {
redisClient = null;
isDisconnecting = false;
}
}
}
/**
* Force disconnect the Redis client (for cleanup)
*/
export async function forceDisconnectRedis(): Promise<void> {
if (redisClient && !isDisconnecting) {
isDisconnecting = true;
try {
console.log('Force disconnecting Redis client...');
await redisClient.disconnect();
console.log('Redis client force disconnected');
} catch (error) {
console.error('Error force disconnecting Redis client:', error);
} finally {
redisClient = null;
isDisconnecting = false;
}
}
}
// Cache utilities
export class CacheManager {
private client = getRedisClient();
private defaultTTL = 3600; // 1 hour
async get<T>(key: string): Promise<T | null> {
try {
const client = this.client;
if (!client) {
console.warn('Redis client not available for get operation');
return null;
}
const value = await client.get(key);
return value ? JSON.parse(value) : null;
} catch (error) {
console.error('Redis get error:', error);
return null;
}
}
async set(key: string, value: any, ttl: number = this.defaultTTL): Promise<void> {
try {
const client = this.client;
if (!client) {
console.warn('Redis client not available for set operation');
return;
}
await client.setEx(key, ttl, JSON.stringify(value));
} catch (error) {
console.error('Redis set error:', error);
}
}
async del(key: string): Promise<void> {
try {
const client = this.client;
if (!client) {
console.warn('Redis client not available for del operation');
return;
}
await client.del(key);
} catch (error) {
console.error('Redis del error:', error);
}
}
async exists(key: string): Promise<boolean> {
try {
const client = this.client;
if (!client) {
console.warn('Redis client not available for exists operation');
return false;
}
const result = await client.exists(key);
return result === 1;
} catch (error) {
console.error('Redis exists error:', error);
return false;
}
}
async flush(): Promise<void> {
try {
const client = this.client;
if (!client) {
console.warn('Redis client not available for flush operation');
return;
}
await client.flushDb();
} catch (error) {
console.error('Redis flush error:', error);
}
}
// Cache key generators
static getPackageKey(packageName: string, type: string): string {
return `pypistats:package:${packageName}:${type}`;
}
static getSearchKey(query: string): string {
return `pypistats:search:${query}`;
}
static getPackageCountKey(): string {
return 'pypistats:package_count';
}
static getRecentStatsKey(packageName: string): string {
return `pypistats:recent:${packageName}`;
}
}
/**
* Distributed lock utilities backed by Redis
*/
export class LockManager {
private client = getRedisClient();
/**
* Try to acquire a lock for a specific key. Returns a unique token if acquired, or null if not.
*/
async acquireLock(key: string, ttlSeconds: number): Promise<string | null> {
try {
const client = this.client;
if (!client) {
console.warn('Redis client not available for acquireLock');
return null;
}
const token = `${Date.now()}-${Math.random().toString(36).slice(2)}`;
const result = await client.set(key, token, { NX: true, EX: ttlSeconds });
return result === 'OK' ? token : null;
} catch (error) {
console.error('Redis acquireLock error:', error);
return null;
}
}
/**
* Release a lock only if the token matches
*/
async releaseLock(key: string, token: string): Promise<boolean> {
try {
const client = this.client;
if (!client) {
console.warn('Redis client not available for releaseLock');
return false;
}
// Lua script to atomically check token and delete
const lua = `
if redis.call('get', KEYS[1]) == ARGV[1] then
return redis.call('del', KEYS[1])
else
return 0
end
`;
const res = (await client.eval(lua, {
keys: [key],
arguments: [token]
})) as number;
return res === 1;
} catch (error) {
console.error('Redis releaseLock error:', error);
return false;
}
}
/**
* Convenience helper to run a function while holding a lock
*/
async withLock<T>(key: string, ttlSeconds: number, fn: () => Promise<T>): Promise<T | null> {
const token = await this.acquireLock(key, ttlSeconds);
if (!token) return null;
try {
const result = await fn();
return result;
} finally {
await this.releaseLock(key, token);
}
}
}
// Rate limiting utilities
export class RateLimiter {
private client = getRedisClient();
async isRateLimited(key: string, limit: number, window: number): Promise<boolean> {
try {
const client = this.client;
if (!client) {
console.warn('Redis client not available for rate limiting');
return false;
}
const current = await client.incr(key);
if (current === 1) {
await client.expire(key, window);
}
return current > limit;
} catch (error) {
console.error('Rate limiter error:', error);
return false;
}
}
async getRemainingRequests(key: string): Promise<number> {
try {
const client = this.client;
if (!client) {
console.warn('Redis client not available for remaining requests check');
return 100;
}
const current = await client.get(key);
return current ? Math.max(0, 100 - parseInt(current)) : 100; // Default limit of 100
} catch (error) {
console.error('Get remaining requests error:', error);
return 100;
}
}
}
// Session management utilities
export class SessionManager {
private client = getRedisClient();
private defaultTTL = 86400; // 24 hours
async setSession(sessionId: string, data: any): Promise<void> {
try {
const client = this.client;
if (!client) {
console.warn('Redis client not available for set session');
return;
}
await client.setEx(sessionId, this.defaultTTL, JSON.stringify(data));
} catch (error) {
console.error('Set session error:', error);
}
}
async getSession(sessionId: string): Promise<any | null> {
try {
const client = this.client;
if (!client) {
console.warn('Redis client not available for get session');
return null;
}
const data = await client.get(sessionId);
return data ? JSON.parse(data) : null;
} catch (error) {
console.error('Get session error:', error);
return null;
}
}
async deleteSession(sessionId: string): Promise<void> {
try {
const client = this.client;
if (!client) {
console.warn('Redis client not available for delete session');
return;
}
await client.del(sessionId);
} catch (error) {
console.error('Delete session error:', error);
}
}
async refreshSession(sessionId: string): Promise<void> {
try {
const client = this.client;
if (!client) {
console.warn('Redis client not available for refresh session');
return;
}
const data = await client.get(sessionId);
if (data) {
await client.setEx(sessionId, this.defaultTTL, data);
}
} catch (error) {
console.error('Refresh session error:', error);
}
}
}

51
src/routes/+layout.svelte Normal file
View File

@@ -0,0 +1,51 @@
<script lang="ts">
import '../app.css';
</script>
<div class="min-h-screen bg-gray-50">
<!-- Navigation -->
<nav class="bg-white shadow-sm border-b">
<div class="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8">
<div class="flex justify-between h-16">
<div class="flex">
<div class="flex-shrink-0 flex items-center">
<a href="/" class="text-xl font-bold text-gray-900">
PyPI Stats
</a>
</div>
<div class="hidden sm:ml-6 sm:flex sm:space-x-8">
<a href="/" class="border-transparent text-gray-500 hover:border-gray-300 hover:text-gray-700 inline-flex items-center px-1 pt-1 border-b-2 text-sm font-medium">
Home
</a>
<a href="/about" class="border-transparent text-gray-500 hover:border-gray-300 hover:text-gray-700 inline-flex items-center px-1 pt-1 border-b-2 text-sm font-medium">
About
</a>
<a href="/faqs" class="border-transparent text-gray-500 hover:border-gray-300 hover:text-gray-700 inline-flex items-center px-1 pt-1 border-b-2 text-sm font-medium">
FAQs
</a>
<a href="/api" class="border-transparent text-gray-500 hover:border-gray-300 hover:text-gray-700 inline-flex items-center px-1 pt-1 border-b-2 text-sm font-medium">
API
</a>
<a href="/admin" class="border-transparent text-gray-500 hover:border-gray-300 hover:text-gray-700 inline-flex items-center px-1 pt-1 border-b-2 text-sm font-medium">
Admin
</a>
</div>
</div>
</div>
</div>
</nav>
<!-- Main content -->
<main>
<slot />
</main>
<!-- Footer -->
<footer class="bg-white border-t mt-16">
<div class="max-w-7xl mx-auto py-12 px-4 sm:px-6 lg:px-8">
<div class="text-center text-gray-500 text-sm">
<p>PyPI Stats - Download statistics for Python packages</p>
</div>
</div>
</footer>
</div>

View File

@@ -0,0 +1,15 @@
import { getPackageCount } from '$lib/api.js';
export const load = async () => {
try {
const packageCount = getPackageCount();
return {
packageCount
};
} catch (error) {
console.error('Error loading page data:', error);
return {
packageCount: 0
};
}
};

71
src/routes/+page.svelte Normal file
View File

@@ -0,0 +1,71 @@
<script lang="ts">
import { enhance } from '$app/forms';
import type { PageData } from './$types';
const { data } = $props<{ data: PageData }>();
let searchTerm = $state('');
</script>
<svelte:head>
<title>PyPI Stats - Download statistics for Python packages</title>
<meta name="description" content="Get download statistics for Python packages from PyPI" />
</svelte:head>
<div class="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8 py-12">
<div class="text-center">
<h1 class="text-4xl font-bold text-gray-900 mb-8">
PyPI Stats
</h1>
<p class="text-xl text-gray-600 mb-8">
Download statistics for Python packages
</p>
<!-- Search Form -->
<div class="max-w-md mx-auto">
<form method="POST" action="/search" use:enhance class="flex gap-2">
<input
type="text"
name="q"
bind:value={searchTerm}
placeholder="Enter package name..."
class="flex-1 px-4 py-2 border border-gray-300 rounded-md focus:outline-none focus:ring-2 focus:ring-blue-500 focus:border-transparent"
required
/>
<button
type="submit"
class="px-6 py-2 bg-blue-600 text-white rounded-md hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-blue-500 focus:ring-offset-2"
>
Search
</button>
</form>
</div>
{#await data.packageCount then packageCount}
<div class="mt-8 text-sm text-gray-500">
Tracking {packageCount?.toLocaleString()} packages
</div>
{/await}
</div>
<!-- Quick Links -->
<div class="mt-16 grid grid-cols-1 md:grid-cols-3 gap-8">
<div class="bg-white p-6 rounded-lg shadow-sm border">
<h3 class="text-lg font-semibold text-gray-900 mb-2">Popular Packages</h3>
<p class="text-gray-600 mb-4">Check download stats for popular Python packages</p>
<a href="/search/numpy" class="text-blue-600 hover:text-blue-800 font-medium">View Examples →</a>
</div>
<div class="bg-white p-6 rounded-lg shadow-sm border">
<h3 class="text-lg font-semibold text-gray-900 mb-2">API Access</h3>
<p class="text-gray-600 mb-4">Programmatic access to download statistics</p>
<a href="/api" class="text-blue-600 hover:text-blue-800 font-medium">API Documentation →</a>
</div>
<div class="bg-white p-6 rounded-lg shadow-sm border">
<h3 class="text-lg font-semibold text-gray-900 mb-2">About</h3>
<p class="text-gray-600 mb-4">Learn more about PyPI Stats and how it works</p>
<a href="/about" class="text-blue-600 hover:text-blue-800 font-medium">Learn More →</a>
</div>
</div>
</div>

View File

@@ -0,0 +1,42 @@
<svelte:head>
<title>About - PyPI Stats</title>
<meta name="description" content="Learn about PyPI Stats and how it works" />
</svelte:head>
<div class="max-w-4xl mx-auto px-4 sm:px-6 lg:px-8 py-12">
<h1 class="text-3xl font-bold text-gray-900 mb-8">About PyPI Stats</h1>
<div class="prose prose-lg max-w-none">
<p class="text-gray-600 mb-6">
PyPI Stats provides download statistics for Python packages from the Python Package Index (PyPI).
Our data is collected from PyPI's download logs and processed to provide insights into package usage.
</p>
<h2 class="text-2xl font-semibold text-gray-900 mt-8 mb-4">What We Track</h2>
<ul class="list-disc pl-6 text-gray-600 mb-6">
<li>Overall download counts (with and without mirrors)</li>
<li>Downloads by Python major version (2.x, 3.x)</li>
<li>Downloads by Python minor version (2.7, 3.6, 3.7, etc.)</li>
<li>Downloads by operating system (Windows, Linux, macOS)</li>
<li>Recent download statistics (day, week, month)</li>
</ul>
<h2 class="text-2xl font-semibold text-gray-900 mt-8 mb-4">Data Sources</h2>
<p class="text-gray-600 mb-6">
Our data comes from PyPI's BigQuery public dataset, which contains download logs from PyPI's CDN.
We process this data daily to provide up-to-date statistics.
</p>
<h2 class="text-2xl font-semibold text-gray-900 mt-8 mb-4">API Access</h2>
<p class="text-gray-600 mb-6">
We provide a RESTful API for programmatic access to download statistics.
All endpoints return JSON data and are free to use.
</p>
<h2 class="text-2xl font-semibold text-gray-900 mt-8 mb-4">Privacy</h2>
<p class="text-gray-600 mb-6">
We only collect aggregate statistics and do not track individual users or their download patterns.
All data is anonymized and used solely for providing download statistics.
</p>
</div>
</div>

View File

@@ -0,0 +1,323 @@
<script lang="ts">
import { onMount } from 'svelte';
let processing = $state(false);
let cacheClearing = $state(false);
let results: any = $state(null);
let error: string | null = $state(null);
let cacheInfo: any = $state(null);
let cronStatus: any = $state(null);
let date = $state('');
let purge = $state(true);
let packageName = $state('');
let searchQuery = $state('');
// Get yesterday's date as default
onMount(() => {
const d = new Date();
d.setDate(d.getDate() - 1);
date = d.toISOString().split('T')[0];
});
async function processData() {
processing = true;
error = null;
results = null;
try {
const response = await fetch('/api/admin/process-data', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({ date, purge })
});
const data = await response.json();
if (data.success) {
results = data.results;
} else {
error = data.message || 'Data processing failed';
}
} catch (err) {
error = err instanceof Error ? err.message : 'Network error';
} finally {
processing = false;
}
}
async function getCacheInfo() {
try {
const response = await fetch('/api/admin/cache');
cacheInfo = await response.json();
} catch (err) {
console.error('Failed to get cache info:', err);
}
}
async function clearAllCache() {
cacheClearing = true;
error = null;
try {
const response = await fetch('/api/admin/cache', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({ action: 'clear' })
});
const data = await response.json();
if (data.success) {
await getCacheInfo();
} else {
error = data.message || 'Failed to clear cache';
}
} catch (err) {
error = err instanceof Error ? err.message : 'Network error';
} finally {
cacheClearing = false;
}
}
async function invalidatePackageCache() {
if (!packageName.trim()) {
error = 'Package name is required';
return;
}
try {
const response = await fetch('/api/admin/cache', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({
action: 'invalidate-package',
packageName: packageName.trim()
})
});
const data = await response.json();
if (data.success) {
await getCacheInfo();
} else {
error = data.message || 'Failed to invalidate package cache';
}
} catch (err) {
error = err instanceof Error ? err.message : 'Network error';
}
}
async function invalidateSearchCache() {
try {
const response = await fetch('/api/admin/cache', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({ action: 'invalidate-search' })
});
const data = await response.json();
if (data.success) {
await getCacheInfo();
} else {
error = data.message || 'Failed to invalidate search cache';
}
} catch (err) {
error = err instanceof Error ? err.message : 'Network error';
}
}
async function getCronStatus() { cronStatus = null; }
async function runCronNow() {}
// Load cache info and cron status on mount
onMount(() => {
getCacheInfo();
getCronStatus();
});
</script>
<svelte:head>
<title>Admin Dashboard - PyPI Stats</title>
</svelte:head>
<div class="min-h-screen bg-gray-50 py-8">
<div class="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8">
<div class="mb-8">
<h1 class="text-3xl font-bold text-gray-900">Admin Dashboard</h1>
<p class="mt-2 text-gray-600">Manage data processing and cache operations</p>
</div>
{#if error}
<div class="mb-6 bg-red-50 border border-red-200 rounded-md p-4">
<div class="flex">
<div class="flex-shrink-0">
<svg class="h-5 w-5 text-red-400" viewBox="0 0 20 20" fill="currentColor">
<path fill-rule="evenodd" d="M10 18a8 8 0 100-16 8 8 0 000 16zM8.707 7.293a1 1 0 00-1.414 1.414L8.586 10l-1.293 1.293a1 1 0 101.414 1.414L10 11.414l1.293 1.293a1 1 0 001.414-1.414L11.414 10l1.293-1.293a1 1 0 00-1.414-1.414L10 8.586 8.707 7.293z" clip-rule="evenodd" />
</svg>
</div>
<div class="ml-3">
<h3 class="text-sm font-medium text-red-800">Error</h3>
<div class="mt-2 text-sm text-red-700">{error}</div>
</div>
</div>
</div>
{/if}
<div class="grid grid-cols-1 lg:grid-cols-3 gap-8">
<!-- Data Processing Section -->
<div class="bg-white shadow rounded-lg p-6">
<h2 class="text-xl font-semibold text-gray-900 mb-4">Data Processing</h2>
<div class="space-y-4">
<div>
<label for="date" class="block text-sm font-medium text-gray-700">Processing Date</label>
<input
type="date"
id="date"
bind:value={date}
class="mt-1 block w-full border-gray-300 rounded-md shadow-sm focus:ring-blue-500 focus:border-blue-500"
/>
</div>
<div class="flex items-center">
<input
type="checkbox"
id="purge"
bind:checked={purge}
class="h-4 w-4 text-blue-600 focus:ring-blue-500 border-gray-300 rounded"
/>
<label for="purge" class="ml-2 block text-sm text-gray-900">
Purge old data (keep only 180 days)
</label>
</div>
<div class="grid grid-cols-1 gap-3">
<button
onclick={processData}
disabled={processing}
class="w-full bg-blue-600 text-white px-4 py-2 rounded-md hover:bg-blue-700 disabled:opacity-50 disabled:cursor-not-allowed"
>
{processing ? 'Processing...' : 'Process Data'}
</button>
<button
onclick={runCronNow}
disabled={processing}
class="w-full bg-green-600 text-white px-4 py-2 rounded-md hover:bg-green-700 disabled:opacity-50 disabled:cursor-not-allowed"
>
{processing ? 'Processing...' : 'Run Cron Now'}
</button>
</div>
</div>
{#if results}
<div class="mt-6">
<h3 class="text-lg font-medium text-gray-900 mb-3">Results</h3>
<div class="bg-gray-50 rounded-md p-4">
<pre class="text-sm text-gray-700 overflow-auto">{JSON.stringify(results, null, 2)}</pre>
</div>
</div>
{/if}
</div>
<!-- Cron removed -->
<!-- Cache Management Section -->
<div class="bg-white shadow rounded-lg p-6">
<h2 class="text-xl font-semibold text-gray-900 mb-4">Cache Management</h2>
<div class="space-y-4">
<div>
<label for="packageName" class="block text-sm font-medium text-gray-700">Package Name</label>
<input
type="text"
id="packageName"
bind:value={packageName}
placeholder="e.g., numpy"
class="mt-1 block w-full border-gray-300 rounded-md shadow-sm focus:ring-blue-500 focus:border-blue-500"
/>
</div>
<div class="grid grid-cols-1 gap-3">
<button
onclick={invalidatePackageCache}
disabled={!packageName.trim()}
class="bg-yellow-600 text-white px-4 py-2 rounded-md hover:bg-yellow-700 disabled:opacity-50 disabled:cursor-not-allowed"
>
Invalidate Package Cache
</button>
<button
onclick={invalidateSearchCache}
class="bg-orange-600 text-white px-4 py-2 rounded-md hover:bg-orange-700"
>
Invalidate Search Cache
</button>
<button
onclick={clearAllCache}
disabled={cacheClearing}
class="bg-red-600 text-white px-4 py-2 rounded-md hover:bg-red-700 disabled:opacity-50 disabled:cursor-not-allowed"
>
{cacheClearing ? 'Clearing...' : 'Clear All Cache'}
</button>
</div>
</div>
{#if cacheInfo}
<div class="mt-6">
<h3 class="text-lg font-medium text-gray-900 mb-3">Cache Information</h3>
<div class="bg-gray-50 rounded-md p-4">
<pre class="text-sm text-gray-700 overflow-auto">{JSON.stringify(cacheInfo, null, 2)}</pre>
</div>
</div>
{/if}
</div>
</div>
<!-- Environment Information -->
<div class="mt-8 bg-white shadow rounded-lg p-6">
<h2 class="text-xl font-semibold text-gray-900 mb-4">Environment Information</h2>
<div class="grid grid-cols-1 md:grid-cols-2 gap-4">
<div>
<h3 class="text-sm font-medium text-gray-700">Database</h3>
<p class="text-sm text-gray-900">
{typeof process !== 'undefined' && process.env.DATABASE_URL ? 'Configured' : 'Not configured'}
</p>
</div>
<div>
<h3 class="text-sm font-medium text-gray-700">Google Cloud</h3>
<p class="text-sm text-gray-900">
{typeof process !== 'undefined' && process.env.GOOGLE_PROJECT_ID ? 'Configured' : 'Not configured'}
</p>
</div>
<div>
<h3 class="text-sm font-medium text-gray-700">Redis</h3>
<p class="text-sm text-gray-900">
{typeof process !== 'undefined' && process.env.REDIS_URL ? 'Configured' : 'Not configured'}
</p>
</div>
<div>
<h3 class="text-sm font-medium text-gray-700">Environment</h3>
<p class="text-sm text-gray-900">
{typeof process !== 'undefined' ? process.env.NODE_ENV || 'development' : 'development'}
</p>
</div>
</div>
</div>
</div>
</div>

167
src/routes/api/+page.svelte Normal file
View File

@@ -0,0 +1,167 @@
<svelte:head>
<title>API Documentation - PyPI Stats</title>
<meta name="description" content="API documentation for PyPI Stats" />
</svelte:head>
<div class="max-w-6xl mx-auto px-4 sm:px-6 lg:px-8 py-12">
<h1 class="text-3xl font-bold text-gray-900 mb-8">API Documentation</h1>
<div class="prose prose-lg max-w-none">
<p class="text-gray-600 mb-8">
The PyPI Stats API provides programmatic access to download statistics for Python packages.
All endpoints return JSON data and are free to use.
</p>
<h2 class="text-2xl font-semibold text-gray-900 mt-8 mb-4">Base URL</h2>
<div class="bg-gray-100 p-4 rounded-md mb-6">
<code class="text-sm">https://pypistats.org/api</code>
</div>
<h2 class="text-2xl font-semibold text-gray-900 mt-8 mb-4">Endpoints</h2>
<div class="space-y-8">
<!-- Recent Downloads -->
<div class="bg-white border rounded-lg p-6">
<h3 class="text-xl font-semibold text-gray-900 mb-3">Recent Downloads</h3>
<div class="bg-gray-100 p-4 rounded-md mb-4">
<code class="text-sm">GET /api/packages/&#123;package&#125;/recent</code>
</div>
<p class="text-gray-600 mb-4">
Get recent download statistics for a package (day, week, month).
</p>
<div class="mb-4">
<strong class="text-gray-900">Parameters:</strong>
<ul class="list-disc pl-6 text-gray-600 mt-2">
<li><code>period</code> (optional): Filter by period (day, week, month)</li>
</ul>
</div>
<div class="mb-4">
<strong class="text-gray-900">Example:</strong>
<div class="bg-gray-100 p-4 rounded-md mt-2">
<code class="text-sm">GET /api/packages/numpy/recent?period=month</code>
</div>
</div>
</div>
<!-- Overall Downloads -->
<div class="bg-white border rounded-lg p-6">
<h3 class="text-xl font-semibold text-gray-900 mb-3">Overall Downloads</h3>
<div class="bg-gray-100 p-4 rounded-md mb-4">
<code class="text-sm">GET /api/packages/&#123;package&#125;/overall</code>
</div>
<p class="text-gray-600 mb-4">
Get overall download time series for a package.
</p>
<div class="mb-4">
<strong class="text-gray-900">Parameters:</strong>
<ul class="list-disc pl-6 text-gray-600 mt-2">
<li><code>mirrors</code> (optional): Include mirror downloads (true/false)</li>
</ul>
</div>
<div class="mb-4">
<strong class="text-gray-900">Example:</strong>
<div class="bg-gray-100 p-4 rounded-md mt-2">
<code class="text-sm">GET /api/packages/numpy/overall?mirrors=true</code>
</div>
</div>
</div>
<!-- Python Major -->
<div class="bg-white border rounded-lg p-6">
<h3 class="text-xl font-semibold text-gray-900 mb-3">Python Major Version Downloads</h3>
<div class="bg-gray-100 p-4 rounded-md mb-4">
<code class="text-sm">GET /api/packages/&#123;package&#125;/python_major</code>
</div>
<p class="text-gray-600 mb-4">
Get download statistics by Python major version (2.x, 3.x).
</p>
<div class="mb-4">
<strong class="text-gray-900">Parameters:</strong>
<ul class="list-disc pl-6 text-gray-600 mt-2">
<li><code>version</code> (optional): Filter by Python major version (2, 3)</li>
</ul>
</div>
<div class="mb-4">
<strong class="text-gray-900">Example:</strong>
<div class="bg-gray-100 p-4 rounded-md mt-2">
<code class="text-sm">GET /api/packages/numpy/python_major?version=3</code>
</div>
</div>
</div>
<!-- Python Minor -->
<div class="bg-white border rounded-lg p-6">
<h3 class="text-xl font-semibold text-gray-900 mb-3">Python Minor Version Downloads</h3>
<div class="bg-gray-100 p-4 rounded-md mb-4">
<code class="text-sm">GET /api/packages/&#123;package&#125;/python_minor</code>
</div>
<p class="text-gray-600 mb-4">
Get download statistics by Python minor version (2.7, 3.6, 3.7, etc.).
</p>
<div class="mb-4">
<strong class="text-gray-900">Parameters:</strong>
<ul class="list-disc pl-6 text-gray-600 mt-2">
<li><code>version</code> (optional): Filter by Python minor version (2.7, 3.6, etc.)</li>
</ul>
</div>
<div class="mb-4">
<strong class="text-gray-900">Example:</strong>
<div class="bg-gray-100 p-4 rounded-md mt-2">
<code class="text-sm">GET /api/packages/numpy/python_minor?version=3.8</code>
</div>
</div>
</div>
<!-- System -->
<div class="bg-white border rounded-lg p-6">
<h3 class="text-xl font-semibold text-gray-900 mb-3">System Downloads</h3>
<div class="bg-gray-100 p-4 rounded-md mb-4">
<code class="text-sm">GET /api/packages/&#123;package&#125;/system</code>
</div>
<p class="text-gray-600 mb-4">
Get download statistics by operating system (Windows, Linux, macOS).
</p>
<div class="mb-4">
<strong class="text-gray-900">Parameters:</strong>
<ul class="list-disc pl-6 text-gray-600 mt-2">
<li><code>os</code> (optional): Filter by operating system (Windows, Linux, Darwin)</li>
</ul>
</div>
<div class="mb-4">
<strong class="text-gray-900">Example:</strong>
<div class="bg-gray-100 p-4 rounded-md mt-2">
<code class="text-sm">GET /api/packages/numpy/system?os=Linux</code>
</div>
</div>
</div>
</div>
<h2 class="text-2xl font-semibold text-gray-900 mt-8 mb-4">Response Format</h2>
<p class="text-gray-600 mb-4">
All API endpoints return JSON responses with the following structure:
</p>
<div class="bg-gray-100 p-4 rounded-md mb-6">
<pre class="text-sm"><code>{`{
"package": "package-name",
"type": "endpoint_type",
"data": [...]
}`}</code></pre>
</div>
<h2 class="text-2xl font-semibold text-gray-900 mt-8 mb-4">Error Handling</h2>
<p class="text-gray-600 mb-4">
The API uses standard HTTP status codes:
</p>
<ul class="list-disc pl-6 text-gray-600 mb-6">
<li><strong>200:</strong> Success</li>
<li><strong>404:</strong> Package not found</li>
<li><strong>500:</strong> Internal server error</li>
</ul>
<h2 class="text-2xl font-semibold text-gray-900 mt-8 mb-4">Rate Limiting</h2>
<p class="text-gray-600 mb-6">
We may implement rate limiting to ensure fair usage. Please be respectful of our servers
and implement appropriate caching in your applications.
</p>
</div>
</div>

88
src/routes/api/admin/cache/+server.ts vendored Normal file
View File

@@ -0,0 +1,88 @@
import { json } from '@sveltejs/kit';
import type { RequestHandler } from './$types';
import { CacheManager } from '$lib/redis.js';
import { clearAllCache, invalidatePackageCache, invalidateSearchCache } from '$lib/api.js';
const cache = new CacheManager();
export const GET: RequestHandler = async () => {
try {
// Get cache statistics
const stats = {
message: 'Cache management endpoint',
operations: ['GET', 'POST', 'DELETE'],
endpoints: {
'GET /api/admin/cache': 'Get cache information',
'POST /api/admin/cache/clear': 'Clear all cache',
'POST /api/admin/cache/invalidate-package': 'Invalidate package cache',
'POST /api/admin/cache/invalidate-search': 'Invalidate search cache'
}
};
return json(stats);
} catch (error) {
return json({ error: 'Failed to get cache information' }, { status: 500 });
}
};
export const POST: RequestHandler = async ({ request }) => {
try {
const body = await request.json();
const { action, packageName } = body;
switch (action) {
case 'clear':
await clearAllCache();
return json({
success: true,
message: 'All cache cleared successfully'
});
case 'invalidate-package':
if (!packageName) {
return json({
error: 'Package name is required'
}, { status: 400 });
}
await invalidatePackageCache(packageName);
return json({
success: true,
message: `Cache invalidated for package: ${packageName}`
});
case 'invalidate-search':
await invalidateSearchCache();
return json({
success: true,
message: 'Search cache invalidated successfully'
});
default:
return json({
error: 'Invalid action. Use: clear, invalidate-package, or invalidate-search'
}, { status: 400 });
}
} catch (error) {
console.error('Cache management error:', error);
return json({
error: 'Cache management failed',
message: error instanceof Error ? error.message : 'Unknown error'
}, { status: 500 });
}
};
export const DELETE: RequestHandler = async () => {
try {
await clearAllCache();
return json({
success: true,
message: 'All cache cleared successfully'
});
} catch (error) {
console.error('Cache clear error:', error);
return json({
error: 'Failed to clear cache',
message: error instanceof Error ? error.message : 'Unknown error'
}, { status: 500 });
}
};

View File

@@ -0,0 +1,11 @@
import { json } from '@sveltejs/kit';
import type { RequestHandler } from './$types';
// Cron removed; keep endpoint for compatibility
export const GET: RequestHandler = async () => {
return json({ success: true, message: 'Cron removed; on-demand ingestion active.' });
};
export const POST: RequestHandler = async () => {
return json({ success: true, message: 'Cron removed; on-demand ingestion active.' });
};

View File

@@ -0,0 +1,28 @@
import { json } from '@sveltejs/kit';
import type { RequestHandler } from './$types';
import { DataProcessor } from '$lib/data-processor.js';
export const POST: RequestHandler = async ({ request }) => {
try {
const body = await request.json();
const { date, purge = true } = body;
console.log('Starting data processing via API...');
const processor = new DataProcessor();
const results = await processor.etl(date, purge);
return json({
success: true,
message: 'Data processing completed successfully',
results
});
} catch (error) {
console.error('Data processing failed:', error);
return json({
success: false,
message: 'Data processing failed',
error: error instanceof Error ? error.message : 'Unknown error'
}, { status: 500 });
}
};

View File

@@ -0,0 +1,33 @@
import { json } from '@sveltejs/kit';
import type { RequestHandler } from './$types';
import { prisma } from '$lib/prisma.js';
import { DataProcessor } from '$lib/data-processor.js';
export const GET: RequestHandler = async ({ params }) => {
const packageName = params.package?.replace(/\./g, '-').replace(/_/g, '-') || '';
if (!packageName || packageName === '__all__') {
return json({ error: 'Invalid package name' }, { status: 400 });
}
try {
const processor = new DataProcessor();
await processor.ensurePackageFreshness(packageName);
const rows = await prisma.installerDownloadCount.findMany({
where: { package: packageName },
orderBy: { date: 'asc' }
});
const response = {
package: packageName,
type: 'installer_downloads',
data: rows.map(r => ({ date: r.date, category: r.category, downloads: r.downloads }))
};
return json(response);
} catch (error) {
console.error('Error fetching installer downloads:', error);
return json({ error: 'Internal server error' }, { status: 500 });
}
};

View File

@@ -0,0 +1,35 @@
import { json } from '@sveltejs/kit';
import type { RequestHandler } from './$types';
import { getOverallDownloads } from '$lib/api.js';
export const GET: RequestHandler = async ({ params, url }) => {
const packageName = params.package?.replace(/\./g, '-').replace(/_/g, '-') || '';
const mirrors = url.searchParams.get('mirrors');
if (packageName === '__all__') {
return json({ error: 'Invalid package name' }, { status: 400 });
}
try {
const downloads = await getOverallDownloads(packageName, mirrors || undefined);
if (downloads.length === 0) {
return json({ error: 'Package not found' }, { status: 404 });
}
const response = {
package: packageName,
type: 'overall_downloads',
data: downloads.map(r => ({
date: r.date,
category: r.category,
downloads: r.downloads
}))
};
return json(response);
} catch (error) {
console.error('Error fetching overall downloads:', error);
return json({ error: 'Internal server error' }, { status: 500 });
}
};

View File

@@ -0,0 +1,35 @@
import { json } from '@sveltejs/kit';
import type { RequestHandler } from './$types';
import { getPythonMajorDownloads } from '$lib/api.js';
export const GET: RequestHandler = async ({ params, url }) => {
const packageName = params.package?.replace(/\./g, '-').replace(/_/g, '-') || '';
const version = url.searchParams.get('version');
if (packageName === '__all__') {
return json({ error: 'Invalid package name' }, { status: 400 });
}
try {
const downloads = await getPythonMajorDownloads(packageName, version || undefined);
if (downloads.length === 0) {
return json({ error: 'Package not found' }, { status: 404 });
}
const response = {
package: packageName,
type: 'python_major_downloads',
data: downloads.map(r => ({
date: r.date,
category: r.category,
downloads: r.downloads
}))
};
return json(response);
} catch (error) {
console.error('Error fetching Python major downloads:', error);
return json({ error: 'Internal server error' }, { status: 500 });
}
};

View File

@@ -0,0 +1,35 @@
import { json } from '@sveltejs/kit';
import type { RequestHandler } from './$types';
import { getPythonMinorDownloads } from '$lib/api.js';
export const GET: RequestHandler = async ({ params, url }) => {
const packageName = params.package?.replace(/\./g, '-').replace(/_/g, '-') || '';
const version = url.searchParams.get('version');
if (packageName === '__all__') {
return json({ error: 'Invalid package name' }, { status: 400 });
}
try {
const downloads = await getPythonMinorDownloads(packageName, version || undefined);
if (downloads.length === 0) {
return json({ error: 'Package not found' }, { status: 404 });
}
const response = {
package: packageName,
type: 'python_minor_downloads',
data: downloads.map(r => ({
date: r.date,
category: r.category,
downloads: r.downloads
}))
};
return json(response);
} catch (error) {
console.error('Error fetching Python minor downloads:', error);
return json({ error: 'Internal server error' }, { status: 500 });
}
};

View File

@@ -0,0 +1,70 @@
import { json } from '@sveltejs/kit';
import type { RequestHandler } from './$types';
import { getRecentDownloads } from '$lib/api.js';
import { RECENT_CATEGORIES } from '$lib/database.js';
import { RateLimiter } from '$lib/redis.js';
import { DataProcessor } from '$lib/data-processor.js';
const rateLimiter = new RateLimiter();
export const GET: RequestHandler = async ({ params, url, request }) => {
const packageName = params.package?.replace(/\./g, '-').replace(/_/g, '-') || '';
const category = url.searchParams.get('period');
if (packageName === '__all__') {
return json({ error: 'Invalid package name' }, { status: 400 });
}
// Rate limiting
const clientIP = request.headers.get('x-forwarded-for') ||
request.headers.get('x-real-ip') ||
'unknown';
const rateLimitKey = `rate_limit:recent:${clientIP}`;
const isLimited = await rateLimiter.isRateLimited(rateLimitKey, 100, 3600); // 100 requests per hour
if (isLimited) {
return json({
error: 'Rate limit exceeded',
message: 'Too many requests. Please try again later.'
}, { status: 429 });
}
try {
// Ensure package data is present/fresh on demand
const processor = new DataProcessor();
await processor.ensurePackageFreshness(packageName);
const downloads = await getRecentDownloads(packageName, category || undefined);
if (downloads.length === 0) {
return json({ error: 'Package not found' }, { status: 404 });
}
const response: any = {
package: packageName,
type: 'recent_downloads'
};
if (category) {
response.data = { [`last_${category}`]: 0 };
} else {
response.data = { [`last_${RECENT_CATEGORIES[0]}`]: 0, [`last_${RECENT_CATEGORIES[1]}`]: 0, [`last_${RECENT_CATEGORIES[2]}`]: 0 };
}
for (const download of downloads) {
response.data[`last_${download.category}`] = download.downloads;
}
// Add rate limit headers
const remaining = await rateLimiter.getRemainingRequests(rateLimitKey);
const headers = {
'X-RateLimit-Remaining': remaining.toString(),
'X-RateLimit-Reset': (Math.floor(Date.now() / 1000) + 3600).toString()
};
return json(response, { headers });
} catch (error) {
console.error('Error fetching recent downloads:', error);
return json({ error: 'Internal server error' }, { status: 500 });
}
};

View File

@@ -0,0 +1,35 @@
import { json } from '@sveltejs/kit';
import type { RequestHandler } from './$types';
import { getSystemDownloads } from '$lib/api.js';
export const GET: RequestHandler = async ({ params, url }) => {
const packageName = params.package?.replace(/\./g, '-').replace(/_/g, '-') || '';
const os = url.searchParams.get('os');
if (packageName === '__all__') {
return json({ error: 'Invalid package name' }, { status: 400 });
}
try {
const downloads = await getSystemDownloads(packageName, os || undefined);
if (downloads.length === 0) {
return json({ error: 'Package not found' }, { status: 404 });
}
const response = {
package: packageName,
type: 'system_downloads',
data: downloads.map(r => ({
date: r.date,
category: r.category,
downloads: r.downloads
}))
};
return json(response);
} catch (error) {
console.error('Error fetching system downloads:', error);
return json({ error: 'Internal server error' }, { status: 500 });
}
};

View File

@@ -0,0 +1,79 @@
<svelte:head>
<title>FAQs - PyPI Stats</title>
<meta name="description" content="Frequently asked questions about PyPI Stats" />
</svelte:head>
<div class="max-w-4xl mx-auto px-4 sm:px-6 lg:px-8 py-12">
<h1 class="text-3xl font-bold text-gray-900 mb-8">Frequently Asked Questions</h1>
<div class="space-y-8">
<div>
<h2 class="text-xl font-semibold text-gray-900 mb-3">What is PyPI Stats?</h2>
<p class="text-gray-600">
PyPI Stats is a service that provides download statistics for Python packages from the Python Package Index (PyPI).
We collect and process download data to help developers understand package usage patterns.
</p>
</div>
<div>
<h2 class="text-xl font-semibold text-gray-900 mb-3">How accurate is the data?</h2>
<p class="text-gray-600">
Our data comes directly from PyPI's download logs, so it represents actual downloads from PyPI's CDN.
However, this may not capture all downloads if users are using mirrors or other distribution methods.
</p>
</div>
<div>
<h2 class="text-xl font-semibold text-gray-900 mb-3">How often is the data updated?</h2>
<p class="text-gray-600">
We process new data daily from PyPI's BigQuery dataset. Recent statistics (day, week, month) are updated more frequently.
</p>
</div>
<div>
<h2 class="text-xl font-semibold text-gray-900 mb-3">What do the different download categories mean?</h2>
<ul class="list-disc pl-6 text-gray-600">
<li><strong>Overall:</strong> Total downloads, with options for including or excluding mirror downloads</li>
<li><strong>Python Major:</strong> Downloads by Python major version (2.x vs 3.x)</li>
<li><strong>Python Minor:</strong> Downloads by specific Python versions (2.7, 3.6, 3.7, etc.)</li>
<li><strong>System:</strong> Downloads by operating system (Windows, Linux, macOS)</li>
</ul>
</div>
<div>
<h2 class="text-xl font-semibold text-gray-900 mb-3">Is the API free to use?</h2>
<p class="text-gray-600">
Yes, all our API endpoints are free to use. We don't require authentication for basic usage,
though we may implement rate limiting to ensure fair usage.
</p>
</div>
<div>
<h2 class="text-xl font-semibold text-gray-900 mb-3">How do I use the API?</h2>
<p class="text-gray-600">
Our API provides RESTful endpoints that return JSON data. You can find detailed documentation
on our <a href="/api" class="text-blue-600 hover:text-blue-800">API page</a>.
</p>
</div>
<div>
<h2 class="text-xl font-semibold text-gray-900 mb-3">Why don't I see data for my package?</h2>
<p class="text-gray-600">
If your package doesn't appear in our database, it might be because:
</p>
<ul class="list-disc pl-6 text-gray-600">
<li>The package has very few downloads</li>
<li>The package is relatively new and hasn't been processed yet</li>
<li>There might be an issue with the package name format</li>
</ul>
</div>
<div>
<h2 class="text-xl font-semibold text-gray-900 mb-3">Can I contribute to PyPI Stats?</h2>
<p class="text-gray-600">
Yes! PyPI Stats is open source. You can contribute by reporting bugs, suggesting features,
or submitting pull requests on our GitHub repository.
</p>
</div>
</div>
</div>

View File

@@ -0,0 +1,155 @@
<script lang="ts">
import type { PageData } from './$types';
const { data } = $props<{ data: PageData }>();
</script>
<svelte:head>
<title>{data.packageName} - PyPI Stats</title>
<meta name="description" content="Download statistics for {data.packageName} package" />
</svelte:head>
<div class="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8 py-12">
<div class="mb-8">
<h1 class="text-3xl font-bold text-gray-900 mb-2">{data.packageName}</h1>
<p class="text-gray-600">Download statistics from PyPI</p>
</div>
<!-- Recent Stats -->
{#if data.recentStats}
<div class="bg-white rounded-lg shadow-sm border mb-8">
<div class="px-6 py-4 border-b">
<h2 class="text-lg font-semibold text-gray-900">Recent Downloads</h2>
</div>
<div class="p-6">
<div class="grid grid-cols-1 md:grid-cols-3 gap-6">
{#each Object.entries(data.recentStats) as [period, count]}
<div class="text-center">
<div class="text-2xl font-bold text-blue-600">
{(count as number).toLocaleString()}
</div>
<div class="text-sm text-gray-500 capitalize">
{period.replace('last_', '')}
</div>
</div>
{/each}
</div>
</div>
</div>
{/if}
<!-- Overall Stats -->
{#if data.overallStats && data.overallStats.length > 0}
<div class="bg-white rounded-lg shadow-sm border mb-8">
<div class="px-6 py-4 border-b">
<h2 class="text-lg font-semibold text-gray-900">Overall Downloads</h2>
</div>
<div class="p-6">
<div class="overflow-x-auto">
<table class="min-w-full divide-y divide-gray-200">
<thead>
<tr>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Date</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Category</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Downloads</th>
</tr>
</thead>
<tbody class="bg-white divide-y divide-gray-200">
{#each data.overallStats.slice(0, 10) as stat}
<tr>
<td class="px-6 py-4 whitespace-nowrap text-sm text-gray-900">{stat.date}</td>
<td class="px-6 py-4 whitespace-nowrap text-sm text-gray-500">{stat.category}</td>
<td class="px-6 py-4 whitespace-nowrap text-sm text-gray-900">{stat.downloads.toLocaleString()}</td>
</tr>
{/each}
</tbody>
</table>
</div>
</div>
</div>
{/if}
<!-- Python Version Stats -->
{#if data.pythonMajorStats && data.pythonMajorStats.length > 0}
<div class="bg-white rounded-lg shadow-sm border mb-8">
<div class="px-6 py-4 border-b">
<h2 class="text-lg font-semibold text-gray-900">Python Major Version Downloads</h2>
</div>
<div class="p-6">
<div class="overflow-x-auto">
<table class="min-w-full divide-y divide-gray-200">
<thead>
<tr>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Date</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Version</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Downloads</th>
</tr>
</thead>
<tbody class="bg-white divide-y divide-gray-200">
{#each data.pythonMajorStats.slice(0, 10) as stat}
<tr>
<td class="px-6 py-4 whitespace-nowrap text-sm text-gray-900">{stat.date}</td>
<td class="px-6 py-4 whitespace-nowrap text-sm text-gray-500">{stat.category}</td>
<td class="px-6 py-4 whitespace-nowrap text-sm text-gray-900">{stat.downloads.toLocaleString()}</td>
</tr>
{/each}
</tbody>
</table>
</div>
</div>
</div>
{/if}
<!-- System Stats -->
{#if data.systemStats && data.systemStats.length > 0}
<div class="bg-white rounded-lg shadow-sm border mb-8">
<div class="px-6 py-4 border-b">
<h2 class="text-lg font-semibold text-gray-900">System Downloads</h2>
</div>
<div class="p-6">
<div class="overflow-x-auto">
<table class="min-w-full divide-y divide-gray-200">
<thead>
<tr>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Date</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">System</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Downloads</th>
</tr>
</thead>
<tbody class="bg-white divide-y divide-gray-200">
{#each data.systemStats.slice(0, 10) as stat}
<tr>
<td class="px-6 py-4 whitespace-nowrap text-sm text-gray-900">{stat.date}</td>
<td class="px-6 py-4 whitespace-nowrap text-sm text-gray-500">{stat.category}</td>
<td class="px-6 py-4 whitespace-nowrap text-sm text-gray-900">{stat.downloads.toLocaleString()}</td>
</tr>
{/each}
</tbody>
</table>
</div>
</div>
</div>
{/if}
<!-- API Links -->
<div class="bg-blue-50 rounded-lg p-6">
<h3 class="text-lg font-semibold text-gray-900 mb-4">API Access</h3>
<div class="grid grid-cols-1 md:grid-cols-2 gap-4 text-sm">
<div>
<strong>Recent downloads:</strong>
<a href="/api/packages/{data.packageName}/recent" class="text-blue-600 hover:text-blue-800 ml-2">JSON</a>
</div>
<div>
<strong>Overall downloads:</strong>
<a href="/api/packages/{data.packageName}/overall" class="text-blue-600 hover:text-blue-800 ml-2">JSON</a>
</div>
<div>
<strong>Python major versions:</strong>
<a href="/api/packages/{data.packageName}/python_major" class="text-blue-600 hover:text-blue-800 ml-2">JSON</a>
</div>
<div>
<strong>System downloads:</strong>
<a href="/api/packages/{data.packageName}/system" class="text-blue-600 hover:text-blue-800 ml-2">JSON</a>
</div>
</div>
</div>
</div>

View File

@@ -0,0 +1,59 @@
import {
getRecentDownloads,
getOverallDownloads,
getPythonMajorDownloads,
getPythonMinorDownloads,
getSystemDownloads
} from '$lib/api.js';
import type { PageLoad } from './$types';
export const load: PageLoad = async ({ params }) => {
const packageName = params.package?.replace(/\./g, '-').replace(/_/g, '-') || '';
if (!packageName || packageName === '__all__') {
return {
packageName,
recentStats: null,
overallStats: [],
pythonMajorStats: [],
pythonMinorStats: [],
systemStats: []
};
}
try {
// Fetch all statistics in parallel
const [recentStats, overallStats, pythonMajorStats, pythonMinorStats, systemStats] = await Promise.all([
getRecentDownloads(packageName),
getOverallDownloads(packageName),
getPythonMajorDownloads(packageName),
getPythonMinorDownloads(packageName),
getSystemDownloads(packageName)
]);
// Process recent stats into the expected format
const recentStatsFormatted: Record<string, number> = {};
for (const stat of recentStats) {
recentStatsFormatted[`last_${stat.category}`] = Number(stat.downloads);
}
return {
packageName,
recentStats: recentStatsFormatted,
overallStats,
pythonMajorStats,
pythonMinorStats,
systemStats
};
} catch (error) {
console.error('Error loading package data:', error);
return {
packageName,
recentStats: null,
overallStats: [],
pythonMajorStats: [],
pythonMinorStats: [],
systemStats: []
};
}
};

View File

@@ -0,0 +1,54 @@
import { searchPackages } from '$lib/api.js';
import type { PageLoad } from './$types';
export const load: PageLoad = async ({ url }) => {
const searchTerm = url.searchParams.get('q');
if (!searchTerm) {
return {
packages: [],
searchTerm: null
};
}
try {
const packages = await searchPackages(searchTerm);
return {
packages,
searchTerm
};
} catch (error) {
console.error('Error searching packages:', error);
return {
packages: [],
searchTerm
};
}
};
export const actions = {
default: async ({ request }) => {
const formData = await request.formData();
const searchTerm = formData.get('q');
if (!searchTerm) {
return {
packages: [],
searchTerm: null
};
}
try {
const packages = await searchPackages(searchTerm.toString());
return {
packages,
searchTerm
};
} catch (error) {
console.error('Error searching packages:', error);
return {
packages: [],
searchTerm
};
}
}
};

View File

@@ -0,0 +1,67 @@
<script lang="ts">
import { enhance } from '$app/forms';
import type { PageData } from './$types';
const { data } = $props<{ data: PageData }>();
let searchTerm = $state('');
</script>
<svelte:head>
<title>Search Packages - PyPI Stats</title>
</svelte:head>
<div class="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8 py-12">
<div class="max-w-2xl mx-auto">
<h1 class="text-3xl font-bold text-gray-900 mb-8">Search Packages</h1>
<!-- Search Form -->
<form method="GET" action="/search" use:enhance class="mb-8">
<div class="flex gap-2">
<input
type="text"
name="q"
bind:value={searchTerm}
placeholder="Enter package name..."
class="flex-1 px-4 py-2 border border-gray-300 rounded-md focus:outline-none focus:ring-2 focus:ring-blue-500 focus:border-transparent"
required
/>
<button
type="submit"
class="px-6 py-2 bg-blue-600 text-white rounded-md hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-blue-500 focus:ring-offset-2"
>
Search
</button>
</div>
</form>
{#if data.packages && data.packages.length > 0}
<div class="bg-white rounded-lg shadow-sm border">
<div class="px-6 py-4 border-b">
<h2 class="text-lg font-semibold text-gray-900">
Found {data.packages.length} package{data.packages.length === 1 ? '' : 's'}
</h2>
</div>
<div class="divide-y divide-gray-200">
{#each data.packages as pkg}
<div class="px-6 py-4 hover:bg-gray-50">
<a href="/packages/{pkg}" class="block">
<div class="text-lg font-medium text-blue-600 hover:text-blue-800">
{pkg}
</div>
<div class="text-sm text-gray-500">
View download statistics
</div>
</a>
</div>
{/each}
</div>
</div>
{:else if data.searchTerm}
<div class="text-center py-12">
<div class="text-gray-500">
<p class="text-lg mb-2">No packages found</p>
<p class="text-sm">Try searching for a different package name</p>
</div>
</div>
{/if}
</div>
</div>

3
static/robots.txt Normal file
View File

@@ -0,0 +1,3 @@
# allow crawling everything by default
User-agent: *
Disallow:

Some files were not shown because too many files have changed in this diff Show More