* update ignore files

* setup poetry

* add db seeds

* black

* set up compose

* backfill script

* add makefile, update readme

* update poetry

* readme

* Fixes

* cleanup and rate limit changes

* poetry 1.0.5

* some more cleanup

* k8s

* k8s

* update yml

* cleanup and admin

* deploy
This commit is contained in:
Flynn
2020-08-15 18:36:37 -04:00
committed by GitHub
parent 4a5c5175cc
commit a5bb518b88
56 changed files with 2574 additions and 1372 deletions

View File

@@ -1,22 +1,24 @@
# project # project
.git .git
.gitignore .gitignoreold
.dockerignore .dockerignore
.tool-versions
docker-compose.yml
envs/
.venv/
scripts/
kubernetes/
pypistats.egg-info/
Dockerfile Dockerfile
Makefile
README.rst README.rst
docs/
migrations/
tests/
# mac osx # mac osx
.DS_Store **/.DS_Store
# python bytecode # python bytecode
*.py[cod] *.py[cod]
__pycache__ **/__pycache__/
# ignore
ignore/
# celery # celery
celerybeat-schedule celerybeat-schedule
@@ -27,5 +29,13 @@ dump.rdb
# Elastic Beanstalk Files # Elastic Beanstalk Files
.elasticbeanstalk .elasticbeanstalk
.git .ebignore
# intellij
.idea/
# secrets
*.env
.env.sample
.gitignore .gitignore

View File

@@ -1,30 +0,0 @@
# project
.git
.gitignore
.dockerignore
README.rst
docs/
migrations/
tests/
# mac osx
.DS_Store
# python bytecode
*.py[cod]
__pycache__
# ignore
ignore/
# celery
celerybeat-schedule
celerybeat.pid
# redis
dump.rdb
# Elastic Beanstalk Files
.elasticbeanstalk
.git
.gitignore

23
.env.sample Normal file
View File

@@ -0,0 +1,23 @@
ENV=development
CELERY_BROKER_URL=redis://redis
FLOWER_PORT=5555
FLASK_APP=pypistats/run.py
FLASK_DEBUG=1
GOOGLE_TYPE=
GOOGLE_PROJECT_ID=
GOOGLE_PRIVATE_KEY_ID=
GOOGLE_PRIVATE_KEY=
GOOGLE_CLIENT_EMAIL=
GOOGLE_CLIENT_ID=
GOOGLE_AUTH_URI=
GOOGLE_TOKEN_URI=
GOOGLE_AUTH_PROVIDER_X509_CERT_URL=
GOOGLE_CLIENT_X509_CERT_URL=
POSTGRESQL_HOST=postgresql
POSTGRESQL_PORT=5432
POSTGRESQL_USERNAME=admin
POSTGRESQL_PASSWORD=root
POSTGRESQL_DBNAME=pypistats
GITHUB_CLIENT_ID=
GITHUB_CLIENT_SECRET=
PYPISTATS_SECRET=secret

166
.gitignore vendored
View File

@@ -1,27 +1,155 @@
# credentials # Byte-compiled / optimized / DLL files
*.env __pycache__/
# mac osx
.DS_Store
# python bytecode
*.py[cod] *.py[cod]
__pycache__ *$py.class
# ignore # C extensions
ignore/ *.so
# celery # Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule celerybeat-schedule
celerybeat.pid celerybeat.pid
# redis # SageMath parsed files
dump.rdb *.sage.py
# Environments
envs/.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# MacOS
.DS_Store
# Intellij
.idea/
# TODO remove
# EB
.elasticbeanstalk/
# Creds
envs/
*.env
# Elastic Beanstalk Files
.elasticbeanstalk
!.elasticbeanstalk/*.cfg.yml
!.elasticbeanstalk/*.global.yml
# intellij
.idea/

3
.tool-versions Normal file
View File

@@ -0,0 +1,3 @@
python 3.8.5
poetry 1.0.10
kubectl 1.17.4

View File

@@ -1,20 +1,43 @@
FROM python:3.6-slim FROM python:3.8.5-slim
RUN apt-get update && apt-get install -y supervisor redis-server
RUN pip install pipenv==2018.10.13
ENV WORKON_HOME=/venv # Add build deps for python packages
ENV C_FORCE_ROOT="true" # libpq-dev is required to install psycopg2-binary
# curl is used to install poetry
RUN apt-get update && \
apt-get install -y curl libpq-dev && \
apt-get clean
# Set the working directory to /app
WORKDIR /app WORKDIR /app
ADD Pipfile /app # Create python user to avoid having to run as root
ADD Pipfile.lock /app RUN useradd -m python && \
chown python:python -R /app
# Set the user
USER python
RUN pipenv install # Set the poetry version
ARG POETRY_VERSION=1.0.10
# Set to ensure logs are output promptly
ENV PYTHONUNBUFFERED=1
# Update the path
ENV PATH=/home/python/.poetry/bin:/home/python/.local/bin:$PATH
ADD . /app # Install vendored poetry
RUN curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py | python
EXPOSE 5000 # Add poetry stuff
ADD pyproject.toml .
ADD poetry.lock .
# Install all the dependencies and cleanup
RUN poetry config virtualenvs.create false && \
poetry run pip install --user -U pip && \
poetry install --no-dev && \
"yes" | poetry cache clear --all pypi
CMD /usr/bin/supervisord # Add everything
ADD . .
# Set the entrypoint script
ENTRYPOINT ["./docker-entrypoint.sh"]

33
Makefile Normal file
View File

@@ -0,0 +1,33 @@
# format everything
fmt:
poetry run isort .
poetry run black .
# launch the application in docker-compose
.PHONY: pypistats
pypistats:
docker-compose down
docker-compose build
docker-compose up
# bring down the application and destroy the db volumes
cleanup:
docker-compose down -v
# setup a local environment
setup:
brew install asdf || true
asdf install
poetry install
# deploy to gke
deploy:
sh kubernetes/deploy.sh
# port forward flower
pfflower:
open http://localhost:7777 && kubectl get pods -n pypistats | grep flower | awk '{print $$1}' | xargs -I % kubectl port-forward % 7777:5555
# port forward web
pfweb:
open http://localhost:7000 && kubectl get pods -n pypistats | grep web | awk '{print $$1}' | xargs -I % kubectl port-forward % 7000:5000

27
Pipfile
View File

@@ -1,27 +0,0 @@
[[source]]
url = "https://pypi.python.org/simple"
verify_ssl = true
name = "pypi"
[packages]
google-cloud-bigquery = "*"
"psycopg2" = "*"
flask = "*"
github-flask = "*"
flask-sqlalchemy = "*"
flask-migrate = "*"
flask-login = "*"
flask-wtf = "*"
celery = "*"
redis = "*"
flask-sslify = "*"
gunicorn = "*"
requests = "*"
[dev-packages]
awsebcli = "*"

595
Pipfile.lock generated
View File

@@ -1,595 +0,0 @@
{
"_meta": {
"hash": {
"sha256": "256017a270df60f33447a331ea8f632a2de1d41054b45baaff8fb33ad2a7983c"
},
"host-environment-markers": {
"implementation_name": "cpython",
"implementation_version": "3.6.5",
"os_name": "posix",
"platform_machine": "x86_64",
"platform_python_implementation": "CPython",
"platform_release": "17.3.0",
"platform_system": "Darwin",
"platform_version": "Darwin Kernel Version 17.3.0: Thu Nov 9 18:09:22 PST 2017; root:xnu-4570.31.3~1/RELEASE_X86_64",
"python_full_version": "3.6.5",
"python_version": "3.6",
"sys_platform": "darwin"
},
"pipfile-spec": 6,
"requires": {},
"sources": [
{
"name": "pypi",
"url": "https://pypi.python.org/simple",
"verify_ssl": true
}
]
},
"default": {
"alembic": {
"hashes": [
"sha256:04bcb970ca8659c3607ddd8ffd86cc9d6a99661c9bc590955e8813c66bfa582b"
],
"version": "==1.0.2"
},
"amqp": {
"hashes": [
"sha256:eed41946890cd43e8dee44a316b85cf6fee5a1a34bb4a562b660a358eb529e1b",
"sha256:073dd02fdd73041bffc913b767866015147b61f2a9bc104daef172fc1a0066eb"
],
"version": "==2.3.2"
},
"billiard": {
"hashes": [
"sha256:ed65448da5877b5558f19d2f7f11f8355ea76b3e63e1c0a6059f47cfae5f1c84"
],
"version": "==3.5.0.4"
},
"cachetools": {
"hashes": [
"sha256:d1c398969c478d336f767ba02040fa22617333293fb0b8968e79b16028dfee35",
"sha256:90f1d559512fc073483fe573ef5ceb39bf6ad3d39edc98dc55178a2b2b176fa3"
],
"version": "==2.1.0"
},
"celery": {
"hashes": [
"sha256:ad7a7411772b80a4d6c64f2f7f723200e39fb66cf614a7fdfab76d345acc7b13",
"sha256:77dab4677e24dc654d42dfbdfed65fa760455b6bb563a0877ecc35f4cfcfc678"
],
"version": "==4.2.1"
},
"certifi": {
"hashes": [
"sha256:339dc09518b07e2fa7eda5450740925974815557727d6bd35d319c1524a04a4c",
"sha256:6d58c986d22b038c8c0df30d639f23a3e6d172a05c3583e766f4c0b785c0986a"
],
"version": "==2018.10.15"
},
"chardet": {
"hashes": [
"sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691",
"sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae"
],
"version": "==3.0.4"
},
"click": {
"hashes": [
"sha256:2335065e6395b9e67ca716de5f7526736bfa6ceead690adf616d925bdc622b13",
"sha256:5b94b49521f6456670fdb30cd82a4eca9412788a93fa6dd6df72c94d5a8ff2d7"
],
"version": "==7.0"
},
"flask": {
"hashes": [
"sha256:a080b744b7e345ccfcbc77954861cb05b3c63786e93f2b3875e0913d44b43f05",
"sha256:2271c0070dbcb5275fad4a82e29f23ab92682dc45f9dfbc22c02ba9b9322ce48"
],
"version": "==1.0.2"
},
"flask-login": {
"hashes": [
"sha256:c815c1ac7b3e35e2081685e389a665f2c74d7e077cb93cecabaea352da4752ec"
],
"version": "==0.4.1"
},
"flask-migrate": {
"hashes": [
"sha256:cb7d7b37feb68e3a8769aaf7a3954ecbdcd9bdeef8f21cede9eaa07c813f8af9",
"sha256:a25b3d2d2bb0f0724f104afbadae888a4b942e7221b451f720c69698d4863da7"
],
"version": "==2.3.0"
},
"flask-sqlalchemy": {
"hashes": [
"sha256:3bc0fac969dd8c0ace01b32060f0c729565293302f0c4269beed154b46bec50b",
"sha256:5971b9852b5888655f11db634e87725a9031e170f37c0ce7851cf83497f56e53"
],
"version": "==2.3.2"
},
"flask-sslify": {
"hashes": [
"sha256:d33e1d3c09cd95154176aa8a7319418e52129fc482dd56d8a8ad7c24500d543e"
],
"version": "==0.1.5"
},
"flask-wtf": {
"hashes": [
"sha256:d9a9e366b32dcbb98ef17228e76be15702cd2600675668bca23f63a7947fd5ac",
"sha256:5d14d55cfd35f613d99ee7cba0fc3fbbe63ba02f544d349158c14ca15561cc36"
],
"version": "==0.14.2"
},
"github-flask": {
"hashes": [
"sha256:24600b720f698bac10667b76b136995ba7821d884e58b27e2a18ca0e4760c786"
],
"version": "==3.2.0"
},
"google-api-core": {
"hashes": [
"sha256:35c0fdb7d0ea8e699d46611c31cdbbaef55c4b3905b394f16a8c41800be5de51",
"sha256:7cf597628cb9c5ceb24834b30a325dc271d3ba15d868d81c20aa80a77e13be65"
],
"version": "==1.5.1"
},
"google-auth": {
"hashes": [
"sha256:a4cf9e803f2176b5de442763bd339b313d3f1ed3002e3e1eb6eec1d7c9bbc9b4",
"sha256:9ca363facbf2622d9ba828017536ccca2e0f58bd15e659b52f312172f8815530"
],
"version": "==1.5.1"
},
"google-cloud-bigquery": {
"hashes": [
"sha256:a9c7bf529e207664b7a1e69968e99290bb098e53130884526c8d49e6b61f0af9",
"sha256:d559ba1e05cf6a960e09bb5aab3aeb4d50ad9e08c77a20a17c01c9b2bd8d6cb7"
],
"version": "==1.6.0"
},
"google-cloud-core": {
"hashes": [
"sha256:0090df83dbc5cb2405fa90844366d13176d1c0b48181c1807ab15f53be403f73",
"sha256:89e8140a288acec20c5e56159461d3afa4073570c9758c05d4e6cb7f2f8cc440"
],
"version": "==0.28.1"
},
"google-resumable-media": {
"hashes": [
"sha256:116de90b9cd483b17c53618ee6a5a20f33e741c648140c8cc9c2141e07616ff1",
"sha256:97de518f8166d442cc0b61fab308bcd319dbb970981e667ec8ded44f5ce49836"
],
"version": "==0.3.1"
},
"googleapis-common-protos": {
"hashes": [
"sha256:0946967c4c29b1339bb211949e1e17dbe0ae9ff8265fafa7bf4cf2164ef5a3b1"
],
"version": "==1.5.5"
},
"gunicorn": {
"hashes": [
"sha256:aa8e0b40b4157b36a5df5e599f45c9c76d6af43845ba3b3b0efe2c70473c2471",
"sha256:fa2662097c66f920f53f70621c6c58ca4a3c4d3434205e608e121b5b3b71f4f3"
],
"version": "==19.9.0"
},
"idna": {
"hashes": [
"sha256:156a6814fb5ac1fc6850fb002e0852d56c0c8d2531923a51032d1b70760e186e",
"sha256:684a38a6f903c1d71d6d5fac066b58d7768af4de2b832e426ec79c30daa94a16"
],
"version": "==2.7"
},
"itsdangerous": {
"hashes": [
"sha256:b12271b2047cb23eeb98c8b5622e2e5c5e9abd9784a153e9d8ef9cb4dd09d749",
"sha256:321b033d07f2a4136d3ec762eac9f16a10ccd60f53c0c91af90217ace7ba1f19"
],
"version": "==1.1.0"
},
"jinja2": {
"hashes": [
"sha256:74c935a1b8bb9a3947c50a54766a969d4846290e1e788ea44c1392163723c3bd",
"sha256:f84be1bb0040caca4cea721fcbbbbd61f9be9464ca236387158b0feea01914a4"
],
"version": "==2.10"
},
"kombu": {
"hashes": [
"sha256:b274db3a4eacc4789aeb24e1de3e460586db7c4fc8610f7adcc7a3a1709a60af",
"sha256:86adec6c60f63124e2082ea8481bbe4ebe04fde8ebed32c177c7f0cd2c1c9082"
],
"version": "==4.2.1"
},
"mako": {
"hashes": [
"sha256:4e02fde57bd4abb5ec400181e4c314f56ac3e49ba4fb8b0d50bba18cb27d25ae"
],
"version": "==1.0.7"
},
"markupsafe": {
"hashes": [
"sha256:a6be69091dac236ea9c6bc7d012beab42010fa914c459791d627dad4910eb665"
],
"version": "==1.0"
},
"protobuf": {
"hashes": [
"sha256:10394a4d03af7060fa8a6e1cbf38cea44be1467053b0aea5bbfcb4b13c4b88c4",
"sha256:59cd75ded98094d3cf2d79e84cdb38a46e33e7441b2826f3838dcc7c07f82995",
"sha256:1931d8efce896981fe410c802fd66df14f9f429c32a72dd9cfeeac9815ec6444",
"sha256:92e8418976e52201364a3174e40dc31f5fd8c147186d72380cbda54e0464ee19",
"sha256:a7ee3bb6de78185e5411487bef8bc1c59ebd97e47713cba3c460ef44e99b3db9",
"sha256:5ee0522eed6680bb5bac5b6d738f7b0923b3cafce8c4b1a039a6107f0841d7ed",
"sha256:fcfc907746ec22716f05ea96b7f41597dfe1a1c088f861efb8a0d4f4196a6f10",
"sha256:ceec283da2323e2431c49de58f80e1718986b79be59c266bb0509cbf90ca5b9e",
"sha256:65917cfd5da9dfc993d5684643063318a2e875f798047911a9dd71ca066641c9",
"sha256:46e34fdcc2b1f2620172d3a4885128705a4e658b9b62355ae5e98f9ea19f42c2",
"sha256:9335f79d1940dfb9bcaf8ec881fb8ab47d7a2c721fb8b02949aab8bbf8b68625",
"sha256:685bc4ec61a50f7360c9fd18e277b65db90105adbf9c79938bd315435e526b90",
"sha256:574085a33ca0d2c67433e5f3e9a0965c487410d6cb3406c83bdaf549bfc2992e",
"sha256:4b92e235a3afd42e7493b281c8b80c0c65cbef45de30f43d571d1ee40a1f77ef",
"sha256:e7a5ccf56444211d79e3204b05087c1460c212a2c7d62f948b996660d0165d68",
"sha256:196d3a80f93c537f27d2a19a4fafb826fb4c331b0b99110f985119391d170f96",
"sha256:1489b376b0f364bcc6f89519718c057eb191d7ad6f1b395ffd93d1aa45587811"
],
"version": "==3.6.1"
},
"psycopg2": {
"hashes": [
"sha256:36f8f9c216fcca048006f6dd60e4d3e6f406afde26cfb99e063f137070139eaf",
"sha256:e9ca911f8e2d3117e5241d5fa9aaa991cb22fb0792627eeada47425d706b5ec8",
"sha256:3232ec1a3bf4dba97fbf9b03ce12e4b6c1d01ea3c85773903a67ced725728232",
"sha256:f93d508ca64d924d478fb11e272e09524698f0c581d9032e68958cfbdd41faef",
"sha256:e168aa795ffbb11379c942cf95bf813c7db9aa55538eb61de8c6815e092416f5",
"sha256:86b52e146da13c896e50c5a3341a9448151f1092b1a4153e425d1e8b62fec508",
"sha256:b2abbff9e4141484bb89b96eb8eae186d77bc6d5ffbec6b01783ee5c3c467351",
"sha256:efa19deae6b9e504a74347fe5e25c2cb9343766c489c2ae921b05f37338b18d1",
"sha256:b1964ed645ef8317806d615d9ff006c0dadc09dfc54b99ae67f9ba7a1ec9d5d2",
"sha256:6e737915de826650d1a5f7ff4ac6cf888a26f021a647390ca7bafdba0e85462b",
"sha256:0f4c784e1b5a320efb434c66a50b8dd7e30a7dc047e8f45c0a8d2694bfe72781",
"sha256:6bc3e68ee16f571681b8c0b6d5c0a77bef3c589012352b3f0cf5520e674e9d01",
"sha256:db5e3c52576cc5b93a959a03ccc3b02cb8f0af1fbbdc80645f7a215f0b864f3a",
"sha256:6dbbd7aabbc861eec6b910522534894d9dbb507d5819bc982032c3ea2e974f51",
"sha256:985c06c2a0f227131733ae58d6a541a5bc8b665e7305494782bebdb74202b793",
"sha256:19eaac4eb25ab078bd0f28304a0cb08702d120caadfe76bb1e6846ed1f68635e",
"sha256:aa3cd07f7f7e3183b63d48300666f920828a9dbd7d7ec53d450df2c4953687a9",
"sha256:59c1a0e4f9abe970062ed35d0720935197800a7ef7a62b3a9e3a70588d9ca40b",
"sha256:11fbf688d5c953c0a5ba625cc42dea9aeb2321942c7c5ed9341a68f865dc8cb1",
"sha256:d7de3bf0986d777807611c36e809b77a13bf1888f5c8db0ebf24b47a52d10726",
"sha256:0fdbaa32c9eb09ef09d425dc154628fca6fa69d2f7c1a33f889abb7e0efb3909",
"sha256:6506c5ff88750948c28d41852c09c5d2a49f51f28c6d90cbf1b6808e18c64e88",
"sha256:711ec617ba453fdfc66616db2520db3a6d9a891e3bf62ef9aba4c95bb4e61230",
"sha256:6ed9b2cfe85abc720e8943c1808eeffd41daa73e18b7c1e1a228b0b91f768ccc",
"sha256:844dacdf7530c5c612718cf12bc001f59b2d9329d35b495f1ff25045161aa6af",
"sha256:cc33c3a90492e21713260095f02b12bee02b8d1f2c03a221d763ce04fa90e2e9",
"sha256:f4b0460a21f784abe17b496f66e74157a6c36116fa86da8bf6aa028b9e8ad5fe",
"sha256:0b9e48a1c1505699a64ac58815ca99104aacace8321e455072cee4f7fe7b2698",
"sha256:a86dfe45f4f9c55b1a2312ff20a59b30da8d39c0e8821d00018372a2a177098f",
"sha256:eccf962d41ca46e6326b97c8fe0a6687b58dfc1a5f6540ed071ff1474cea749e"
],
"version": "==2.7.5"
},
"pyasn1": {
"hashes": [
"sha256:dea873d6c907c1cf1341fd88742a61efce33227d7743cb37564ab7d7e77dd9fd",
"sha256:13794d835643ee970b2c059dbfe4eb5d751e16c693c8baee61c526abd209e5c7",
"sha256:edad117649643230493aeb4955456ce19ab4b12e94489dde6f7094cdb5a3c87e",
"sha256:e8b69ea2200d42201cbedd486eedb8980f320d4534f83ce2fb468e96aa5545d0",
"sha256:b9d3abc5031e61927c82d4d96c1cec1e55676c1a991623cfed28faea73cdd7ca",
"sha256:d3bbd726c1a760d4ca596a4d450c380b81737612fe0182f5bb3caebc17461fd9",
"sha256:74ac8521a0480f228549be20bea555ae35678f0e754c2fbc6f1576b0959bec43",
"sha256:ded5eea5cb88bc1ce9aa074b5a3092f95ce4741887e317e9b49c7ece75d7ea0e",
"sha256:0ad0fe0593dde1e599cac0bf65bb1a4ec663032f0bc68ee44850db4251e8c501",
"sha256:49a8ed515f26913049113820b462f698e6ed26df62c389dafb6fa3685ddca8de",
"sha256:89399ca8ecd4524f974e926d4ef9e7a787903e01f0a9cdff3131ad1361792fe5",
"sha256:8f291e0338d519a1a0d07f0b9d03c9265f6be26eb32fdd21af6d3259d14ea49c",
"sha256:f58f2a3d12fd754aa123e9fa74fb7345333000a035f3921dbdaa08597aa53137"
],
"version": "==0.4.4"
},
"pyasn1-modules": {
"hashes": [
"sha256:e64679de1940f41ead5170fce364d54e7b9e2e862f064727b6bcb5cee753b7a2",
"sha256:0da2f947e8ad2697e86fe5fd0e55a4093a2fd79d839c9e19c34e28097db7002c",
"sha256:c355a45b32c5bc1d9893eceb704b0cfcd1126f91b5a7b9ee64c1c05383283381",
"sha256:a728bb9502d1fdc104c66f24a176b6a70a32e89d1d8a5b55c959233ed51c67be",
"sha256:a38a8811ea784c0136abfdba73963876328f66172db21a05a82f9515909bfb4e",
"sha256:f183f0940b9f5ed2ad9d04c80cab2451440fa9af4fc959d85113fadd2e777962",
"sha256:35ff894a0b5df8e28b700126b2869c7dcfb2b2db5bc82e5d5e82547069241553",
"sha256:44688b94841349648b1e1a5a7a3d96e6596d5d4f21d0b59a82307e153c4dc74b",
"sha256:077250b34432520430bc1c80dcbda4e354090785567c33ded35faa6df8d24753",
"sha256:833716dde880a7f2f2ccdeea9a096842626981ff2a477d8b318c0906367ac11b",
"sha256:ed71d20225c356881c29f0b1d7a0d6521563a389d9478e8f95d798cc5ba07b88",
"sha256:c30a098435ea0989c37005a971843e9d3966c7f6d056ddbf052e5061c06e3291",
"sha256:a0cf3e1842e7c60fde97cb22d275eb6f9524f5c5250489e292529de841417547"
],
"version": "==0.2.2"
},
"python-dateutil": {
"hashes": [
"sha256:063df5763652e21de43de7d9e00ccf239f953a832941e37be541614732cdfc93",
"sha256:88f9287c0174266bb0d8cedd395cfba9c58e87e5ad86b2ce58859bc11be3cf02"
],
"version": "==2.7.5"
},
"python-editor": {
"hashes": [
"sha256:a3c066acee22a1c94f63938341d4fb374e3fdd69366ed6603d7b24bed1efc565"
],
"version": "==1.0.3"
},
"pytz": {
"hashes": [
"sha256:8e0f8568c118d3077b46be7d654cc8167fa916092e28320cde048e54bfc9f1e6",
"sha256:31cb35c89bd7d333cd32c5f278fca91b523b0834369e757f4c5641ea252236ca"
],
"version": "==2018.7"
},
"redis": {
"hashes": [
"sha256:8a1900a9f2a0a44ecf6e8b5eb3e967a9909dfed219ad66df094f27f7d6f330fb",
"sha256:a22ca993cea2962dbb588f9f30d0015ac4afcc45bee27d3978c0dbe9e97c6c0f"
],
"version": "==2.10.6"
},
"requests": {
"hashes": [
"sha256:a84b8c9ab6239b578f22d1c21d51b696dcfe004032bb80ea832398d6909d7279",
"sha256:99dcfdaaeb17caf6e526f32b6a7b780461512ab3f1d992187801694cba42770c"
],
"version": "==2.20.0"
},
"rsa": {
"hashes": [
"sha256:14ba45700ff1ec9eeb206a2ce76b32814958a98e372006c8fb76ba820211be66",
"sha256:1a836406405730121ae9823e19c6e806c62bbad73f890574fff50efa4122c487"
],
"version": "==4.0"
},
"six": {
"hashes": [
"sha256:832dc0e10feb1aa2c68dcc57dbb658f1c7e65b9b61af69048abc87a2db00a0eb",
"sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9"
],
"version": "==1.11.0"
},
"sqlalchemy": {
"hashes": [
"sha256:84412de3794acee05630e7788f25e80e81f78eb4837e7b71d0499129f660486a"
],
"version": "==1.2.13"
},
"urllib3": {
"hashes": [
"sha256:61bf29cada3fc2fbefad4fdf059ea4bd1b4a86d2b6d15e1c7c0b582b9752fe39",
"sha256:de9529817c93f27c8ccbfead6985011db27bd0ddfcdb2d86f3f663385c6a9c22"
],
"version": "==1.24.1"
},
"vine": {
"hashes": [
"sha256:6849544be74ec3638e84d90bc1cf2e1e9224cc10d96cd4383ec3f69e9bce077b",
"sha256:52116d59bc45392af9fdd3b75ed98ae48a93e822cee21e5fda249105c59a7a72"
],
"version": "==1.1.4"
},
"werkzeug": {
"hashes": [
"sha256:d5da73735293558eb1651ee2fddc4d0dedcfa06538b8813a2e20011583c9e49b",
"sha256:c3fd7a7d41976d9f44db327260e263132466836cef6f91512889ed60ad26557c"
],
"version": "==0.14.1"
},
"wtforms": {
"hashes": [
"sha256:e3ee092c827582c50877cdbd49e9ce6d2c5c1f6561f849b3b068c1b8029626f1",
"sha256:0cdbac3e7f6878086c334aa25dc5a33869a3954e9d1e015130d65a69309b3b61"
],
"version": "==2.2.1"
}
},
"develop": {
"awsebcli": {
"hashes": [
"sha256:b2dff9efbd83979b8050a95803eb88b0902660d926eae31e3a0bfe0836c2d318"
],
"version": "==3.14.6"
},
"blessed": {
"hashes": [
"sha256:9a0a98c7070b016341ae0300415deeda930a340ef7961d9b920a5cb200a601e2",
"sha256:777b0b6b5ce51f3832e498c22bc6a093b6b5f99148c7cbf866d26e2dec51ef21"
],
"version": "==1.15.0"
},
"botocore": {
"hashes": [
"sha256:a256dbe50b05111a53640ac5defd71aa589d1fab27bd7df7310d7f0da72447a7",
"sha256:eed1b39027ee882ebd0df10dcb7307db20fc4b468debae513dc183743e850d17"
],
"version": "==1.12.37"
},
"cached-property": {
"hashes": [
"sha256:3a026f1a54135677e7da5ce819b0c690f156f37976f3e30c5430740725203d7f",
"sha256:9217a59f14a5682da7c4b8829deadbfc194ac22e9908ccf7c8820234e80a1504"
],
"version": "==1.5.1"
},
"cement": {
"hashes": [
"sha256:8765ed052c061d74e4d0189addc33d268de544ca219b259d797741f725e422d2"
],
"version": "==2.8.2"
},
"certifi": {
"hashes": [
"sha256:339dc09518b07e2fa7eda5450740925974815557727d6bd35d319c1524a04a4c",
"sha256:6d58c986d22b038c8c0df30d639f23a3e6d172a05c3583e766f4c0b785c0986a"
],
"version": "==2018.10.15"
},
"chardet": {
"hashes": [
"sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691",
"sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae"
],
"version": "==3.0.4"
},
"colorama": {
"hashes": [
"sha256:463f8483208e921368c9f306094eb6f725c6ca42b0f97e313cb5d5512459feda",
"sha256:48eb22f4f8461b1df5734a074b57042430fb06e1d61bd1e11b078c0fe6d7a1f1"
],
"version": "==0.3.9"
},
"docker": {
"hashes": [
"sha256:31421f16c01ffbd1ea7353c7e7cd7540bf2e5906d6173eb51c8fea4e0ea38b19",
"sha256:fbe82af9b94ccced752527c8de07fa20267f9634b48674ba478a0bb4000a0b1e"
],
"version": "==3.5.1"
},
"docker-compose": {
"hashes": [
"sha256:27b8dab8d12b8aaedf16fcf829d3ae7cd107d819082d35e6fe248e74e2294093",
"sha256:68b07193755440d5f8d4f47e6f3484212afc255d5b785a81353ea1e9298c1c2c"
],
"version": "==1.21.2"
},
"docker-pycreds": {
"hashes": [
"sha256:0a941b290764ea7286bd77f54c0ace43b86a8acd6eb9ead3de9840af52384079",
"sha256:8b0e956c8d206f832b06aa93a710ba2c3bcbacb5a314449c040b0b814355bbff"
],
"version": "==0.3.0"
},
"dockerpty": {
"hashes": [
"sha256:69a9d69d573a0daa31bcd1c0774eeed5c15c295fe719c61aca550ed1393156ce"
],
"version": "==0.4.1"
},
"docopt": {
"hashes": [
"sha256:49b3a825280bd66b3aa83585ef59c4a8c82f2c8a522dbe754a8bc8d08c85c491"
],
"version": "==0.6.2"
},
"docutils": {
"hashes": [
"sha256:7a4bd47eaf6596e1295ecb11361139febe29b084a87bf005bf899f9a42edc3c6",
"sha256:02aec4bd92ab067f6ff27a38a38a41173bf01bed8f89157768c1573f53e474a6",
"sha256:51e64ef2ebfb29cae1faa133b3710143496eca21c530f3f71424d77687764274"
],
"version": "==0.14"
},
"idna": {
"hashes": [
"sha256:8c7309c718f94b3a625cb648ace320157ad16ff131ae0af362c9f21b80ef6ec4",
"sha256:2c6a5de3089009e3da7c5dde64a141dbc8551d5b7f6cf4ed7c2568d0cc520a8f"
],
"version": "==2.6"
},
"jmespath": {
"hashes": [
"sha256:f11b4461f425740a1d908e9a3f7365c3d2e569f6ca68a2ff8bc5bcd9676edd63",
"sha256:6a81d4c9aa62caf061cb517b4d9ad1dd300374cd4706997aff9cd6aedd61fc64"
],
"version": "==0.9.3"
},
"jsonschema": {
"hashes": [
"sha256:000e68abd33c972a5248544925a0cae7d1125f9bf6c58280d37546b946769a08",
"sha256:6ff5f3180870836cae40f06fa10419f557208175f13ad7bc26caa77beb1f6e02"
],
"version": "==2.6.0"
},
"pathspec": {
"hashes": [
"sha256:72c495d1bbe76674219e307f6d1c6062f2e1b0b483a5e4886435127d0df3d0d3"
],
"version": "==0.5.5"
},
"python-dateutil": {
"hashes": [
"sha256:063df5763652e21de43de7d9e00ccf239f953a832941e37be541614732cdfc93",
"sha256:88f9287c0174266bb0d8cedd395cfba9c58e87e5ad86b2ce58859bc11be3cf02"
],
"version": "==2.7.5"
},
"pyyaml": {
"hashes": [
"sha256:d5eef459e30b09f5a098b9cea68bebfeb268697f78d647bd255a085371ac7f3f",
"sha256:e01d3203230e1786cd91ccfdc8f8454c8069c91bee3962ad93b87a4b2860f537",
"sha256:558dd60b890ba8fd982e05941927a3911dc409a63dcb8b634feaa0cda69330d3",
"sha256:d46d7982b62e0729ad0175a9bc7e10a566fc07b224d2c79fafb5e032727eaa04",
"sha256:a7c28b45d9f99102fa092bb213aa12e0aaf9a6a1f5e395d36166639c1f96c3a1",
"sha256:bc558586e6045763782014934bfaf39d48b8ae85a2713117d16c39864085c613",
"sha256:40c71b8e076d0550b2e6380bada1f1cd1017b882f7e16f09a65be98e017f211a",
"sha256:3d7da3009c0f3e783b2c873687652d83b1bbfd5c88e9813fb7e5b03c0dd3108b",
"sha256:e170a9e6fcfd19021dd29845af83bb79236068bf5fd4df3327c1be18182b2531",
"sha256:aa7dd4a6a427aed7df6fb7f08a580d68d9b118d90310374716ae90b710280af1",
"sha256:3ef3092145e9b70e3ddd2c7ad59bdd0252a94dfe3949721633e41344de00a6bf"
],
"version": "==3.13"
},
"requests": {
"hashes": [
"sha256:6a1b267aa90cac58ac3a765d067950e7dbbf75b1da07e895d1f594193a40a38b",
"sha256:9c443e7324ba5b85070c4a818ade28bfabedf16ea10206da1132edaa6dda237e"
],
"version": "==2.18.4"
},
"semantic-version": {
"hashes": [
"sha256:75aa594dc3c8000102a35b92e0e21db7b88e083bcb95d0499dea8494b549c3f2",
"sha256:3baad35dcb074a49419539cea6a33b484706b6c2dd03f05b67763eba4c1bb65c"
],
"version": "==2.5.0"
},
"six": {
"hashes": [
"sha256:832dc0e10feb1aa2c68dcc57dbb658f1c7e65b9b61af69048abc87a2db00a0eb",
"sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9"
],
"version": "==1.11.0"
},
"termcolor": {
"hashes": [
"sha256:1d6d69ce66211143803fbc56652b41d73b4a400a2891d7bf7a1cdf4c02de613b"
],
"version": "==1.1.0"
},
"texttable": {
"hashes": [
"sha256:119041773ff03596b56392532f9315cb3a3116e404fd6f36e76a7dc088d95c79"
],
"version": "==0.9.1"
},
"urllib3": {
"hashes": [
"sha256:06330f386d6e4b195fbfc736b297f58c5a892e4440e54d294d7004e3a9bbea1b",
"sha256:cc44da8e1145637334317feebd728bd869a35285b93cbb4cca2577da7e62db4f"
],
"version": "==1.22"
},
"wcwidth": {
"hashes": [
"sha256:f4ebe71925af7b40a864553f761ed559b43544f8f71746c2d756c7fe788ade7c",
"sha256:3df37372226d6e63e1b1e1eda15c594bca98a22d33a23832a90998faa96bc65e"
],
"version": "==0.1.7"
},
"websocket-client": {
"hashes": [
"sha256:8c8bf2d4f800c3ed952df206b18c28f7070d9e3dcbd6ca6291127574f57ee786",
"sha256:e51562c91ddb8148e791f0155fdb01325d99bb52c4cdbb291aee7a3563fd0849"
],
"version": "==0.54.0"
}
}
}

View File

@@ -1,23 +1,26 @@
PyPI Stats PyPI Stats
========== ==========
A simple analytics dashboard for aggregate data on PyPI downloads. PyPI Stats A simple analytics dashboard for aggregate data on PyPI downloads. PyPI Stats is built using Flask with plotly.js.
is built using Flask with plotly.js and deployed to AWS elasticbeanstalk.
`PyPI Stats <https://pypistats.org/>`_ `PyPI Stats <https://pypistats.org/>`_
GitHub OAuth GitHub OAuth
------------ ------------
PyPI Stats has an integration with GitHub so you can track install data on the PyPI Stats has an integration with GitHub so you can track install data on the packages you maintain.
packages you maintain.
`User page <https://pypistats.org/user>`_ `User page <https://pypistats.org/user>`_
JSON API JSON API
-------- --------
PyPI Stats provides a simple JSON API to retrieve aggregate download stats PyPI Stats provides a simple JSON API to retrieve aggregate download stats and time histories of pypi packages.
and time histories of pypi packages.
`JSON API <https://pypistats.org/api>`_ `JSON API <https://pypistats.org/api>`_
Development
-----------
Run ``make pypistats`` to launch a complete development environment using docker-compose.

96
docker-compose.yml Normal file
View File

@@ -0,0 +1,96 @@
x-envs: &envs
environment:
- FLASK_APP=pypistats/run.py
- FLASK_ENV=development
- FLASK_DEBUG=1
- POSTGRESQL_HOST=postgresql
- POSTGRESQL_PORT=5432
- POSTGRESQL_USERNAME=admin
- POSTGRESQL_PASSWORD=root
- POSTGRESQL_DBNAME=pypistats
- CELERY_BROKER_URL=redis://redis
- BASIC_AUTH_USER=user
- BASIC_AUTH_PASSWORD=password
version: "3.4"
volumes:
pgdata: {}
services:
web:
build:
context: .
command: webdev
depends_on:
- postgresql
<<: *envs
ports:
- "5000:5000"
volumes:
- "./pypistats/:/app/pypistats/"
beat:
build:
context: .
command: beat
depends_on:
- redis
<<: *envs
volumes:
- "./pypistats/:/app/pypistats/"
celery:
build:
context: .
command: celery
depends_on:
- redis
- postgresql
<<: *envs
volumes:
- "./pypistats/:/app/pypistats/"
flower:
build:
context: .
command: flower
depends_on:
- redis
<<: *envs
ports:
- "5555:5555"
volumes:
- "./pypistats/:/app/pypistats/"
migrate:
build:
context: .
command: migrate
depends_on:
- postgresql
<<: *envs
volumes:
- "./pypistats/:/app/pypistats/"
- "./migrations/:/app/migrations/"
seeds:
build:
context: .
command: seeds
depends_on:
- postgresql
- migrate
<<: *envs
volumes:
- "./pypistats/:/app/pypistats/"
- "./migrations/:/app/migrations/"
redis:
image: "redis:5.0.7-alpine"
ports:
- "6379:6379"
postgresql:
image: "postgres:12"
environment:
- POSTGRES_USER=admin
- POSTGRES_PASSWORD=root
- POSTGRES_DB=pypistats
ports:
- "5433:5432"
volumes:
- "pgdata:/var/lib/postgresql/data"

36
docker-entrypoint.sh Executable file
View File

@@ -0,0 +1,36 @@
#!/usr/bin/env bash
if [[ "$1" = "webdev" ]]
then
exec poetry run flask run --host 0.0.0.0
fi
if [[ "$1" = "web" ]]
then
exec poetry run gunicorn -b 0.0.0.0:5000 -w 2 --access-logfile - --error-log - --access-logformat "%({x-forwarded-for}i)s %(h)s %(l)s %(u)s %(t)s \"%(r)s\" %(s)s %(b)s \"%(f)s\" \"%(a)s\"" pypistats.run:app
fi
if [[ "$1" = "celery" ]]
then
exec poetry run celery -A pypistats.extensions.celery worker -l info --concurrency=1
fi
if [[ "$1" = "beat" ]]
then
exec poetry run celery -A pypistats.extensions.celery beat -l info
fi
if [[ "$1" = "flower" ]]
then
exec poetry run flower -A pypistats.extensions.celery -l info
fi
if [[ "$1" = "migrate" ]]
then
exec poetry run flask db upgrade
fi
if [[ "$1" = "seeds" ]]
then
exec poetry run python -m migrations.seeds
fi

30
kubernetes/commands.sh Normal file
View File

@@ -0,0 +1,30 @@
#!/usr/bin/env bash
docker build -t us.gcr.io/pypistats-org/pypistats:$(poetry version | tail -c +14) .
docker push us.gcr.io/pypistats-org/pypistats:$(poetry version | tail -c +14)
kubectl apply -f https://raw.githubusercontent.com/kubernetes/dashboard/v2.0.0/aio/deploy/recommended.yaml
# create namespace ``pypistats``
kubectl apply -f kubernetes/namespace.yaml
# create secret from the env file
#kubectl delete secret pypistats-secrets --namespace=pypistats
# create
kubectl create secret generic pypistats-secrets --from-env-file=gke.env --namespace=pypistats
# update
kubectl create secret generic pypistats-secrets --from-env-file=gke.env --namespace=pypistats --dry-run -o yaml | kubectl apply -f -
# create redis and flower
kubectl apply -f kubernetes/redis.yaml --namespace=pypistats
kubectl apply -f kubernetes/flower.yaml --namespace=pypistats
# launch the web components
kubectl apply -f kubernetes/web.yaml --namespace=pypistats
# launch the tasks components
kubectl apply -f kubernetes/tasks.yaml --namespace=pypistats
# get info about connecting
kubectl cluster-info
kubectl get services --namespace=pypistats

12
kubernetes/deploy.sh Normal file
View File

@@ -0,0 +1,12 @@
#!/usr/bin/env bash
poetry version major
export PYPISTATS_VERSION=$(poetry version | tail -c +14)
docker build -t us.gcr.io/pypistats-org/pypistats:${PYPISTATS_VERSION} .
docker push us.gcr.io/pypistats-org/pypistats:${PYPISTATS_VERSION}
kubectl create secret generic pypistats-secrets --from-env-file=gke.env --namespace=pypistats --dry-run -o yaml | kubectl apply -f -
sed -i '.bak' 's|us.gcr.io\/pypistats-org\/pypistats.*|us.gcr.io\/pypistats-org\/pypistats:'"$PYPISTATS_VERSION"'|g' kubernetes/*.yaml
rm kubernetes/*.bak
kubectl apply -f kubernetes/redis.yaml --namespace=pypistats
kubectl apply -f kubernetes/tasks.yaml --namespace=pypistats
kubectl apply -f kubernetes/flower.yaml --namespace=pypistats
kubectl apply -f kubernetes/web.yaml --namespace=pypistats

45
kubernetes/flower.yaml Normal file
View File

@@ -0,0 +1,45 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: flower
namespace: pypistats
labels:
app: pypistats
component: flower
spec:
replicas: 1
selector:
matchLabels:
app: pypistats
component: flower
template:
metadata:
labels:
app: pypistats
component: flower
spec:
containers:
- name: pypistats-flower
image: us.gcr.io/pypistats-org/pypistats:6
imagePullPolicy: Always
args: ["flower"]
envFrom:
- secretRef:
name: pypistats-secrets
---
apiVersion: v1
kind: Service
metadata:
name: flower
labels:
app: pypistats
component: flower
spec:
ports:
- port: 5555
targetPort: 5555
selector:
app: pypistats
component: flower

View File

@@ -0,0 +1,4 @@
apiVersion: v1
kind: Namespace
metadata:
name: pypistats

36
kubernetes/redis.yaml Normal file
View File

@@ -0,0 +1,36 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: redis
labels:
app: redis
spec:
selector:
matchLabels:
app: redis
replicas: 1
template:
metadata:
labels:
app: redis
spec:
containers:
- name: redis
image: redis:5.0.7-alpine
ports:
- containerPort: 6379
---
apiVersion: v1
kind: Service
metadata:
name: redis
labels:
app: redis
spec:
ports:
- port: 6379
targetPort: 6379
selector:
app: redis

35
kubernetes/tasks.yaml Normal file
View File

@@ -0,0 +1,35 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: tasks
namespace: pypistats
labels:
app: pypistats
component: tasks
spec:
replicas: 1
selector:
matchLabels:
app: pypistats
component: tasks
template:
metadata:
labels:
app: pypistats
component: tasks
spec:
containers:
- name: beat
image: us.gcr.io/pypistats-org/pypistats:6
imagePullPolicy: Always
args: ["beat"]
envFrom:
- secretRef:
name: pypistats-secrets
- name: celery
image: us.gcr.io/pypistats-org/pypistats:6
imagePullPolicy: Always
args: ["celery"]
envFrom:
- secretRef:
name: pypistats-secrets

81
kubernetes/web.yaml Normal file
View File

@@ -0,0 +1,81 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: web
namespace: pypistats
labels:
app: pypistats
component: web
spec:
replicas: 2
selector:
matchLabels:
app: pypistats
component: web
template:
metadata:
labels:
app: pypistats
component: web
spec:
initContainers:
- name: migrate
image: us.gcr.io/pypistats-org/pypistats:6
imagePullPolicy: Always
envFrom:
- secretRef:
name: pypistats-secrets
args: ["migrate"]
containers:
- name: web
image: us.gcr.io/pypistats-org/pypistats:6
imagePullPolicy: Always
envFrom:
- secretRef:
name: pypistats-secrets
args: ["web"]
ports:
- containerPort: 5000
readinessProbe:
httpGet:
path: /health
port: 5000
initialDelaySeconds: 5
periodSeconds: 5
---
apiVersion: v1
kind: Service
metadata:
name: web
namespace: pypistats
spec:
type: NodePort
ports:
- name: http
protocol: TCP
port: 5000
targetPort: 5000
selector:
app: pypistats
component: web
---
apiVersion: networking.k8s.io/v1beta1
kind: Ingress
metadata:
name: web
namespace: pypistats
spec:
backend:
serviceName: web
servicePort: http
rules:
- http:
paths:
- backend:
serviceName: web
servicePort: http
path: /

View File

@@ -2,7 +2,7 @@
[alembic] [alembic]
# template used to generate migration files # template used to generate migration files
# file_template = %%(rev)s_%%(slug)s file_template = %%(year)d%%(month).2d%%(day).2d_%%(hour).2d%%(minute).2d%%(second).2d_%%(rev)s_%%(slug)s
# set to 'true' to run the environment during # set to 'true' to run the environment during
# the 'revision' command, regardless of autogenerate # the 'revision' command, regardless of autogenerate

View File

@@ -1,8 +1,10 @@
from __future__ import with_statement
from alembic import context
from sqlalchemy import engine_from_config, pool
from logging.config import fileConfig
import logging import logging
from logging.config import fileConfig
from alembic import context
from flask import current_app
from sqlalchemy import engine_from_config
from sqlalchemy import pool
# this is the Alembic Config object, which provides # this is the Alembic Config object, which provides
# access to the values within the .ini file in use. # access to the values within the .ini file in use.
@@ -11,16 +13,11 @@ config = context.config
# Interpret the config file for Python logging. # Interpret the config file for Python logging.
# This line sets up loggers basically. # This line sets up loggers basically.
fileConfig(config.config_file_name) fileConfig(config.config_file_name)
logger = logging.getLogger('alembic.env') logger = logging.getLogger("alembic.env")
# add your model's MetaData object here
# for 'autogenerate' support config.set_main_option("sqlalchemy.url", current_app.config.get("SQLALCHEMY_DATABASE_URI"))
# from myapp import mymodel target_metadata = current_app.extensions["migrate"].db.metadata
# target_metadata = mymodel.Base.metadata
from flask import current_app
config.set_main_option('sqlalchemy.url',
current_app.config.get('SQLALCHEMY_DATABASE_URI'))
target_metadata = current_app.extensions['migrate'].db.metadata
# other values from the config, defined by the needs of env.py, # other values from the config, defined by the needs of env.py,
# can be acquired: # can be acquired:
@@ -59,22 +56,24 @@ def run_migrations_online():
# when there are no changes to the schema # when there are no changes to the schema
# reference: http://alembic.zzzcomputing.com/en/latest/cookbook.html # reference: http://alembic.zzzcomputing.com/en/latest/cookbook.html
def process_revision_directives(context, revision, directives): def process_revision_directives(context, revision, directives):
if getattr(config.cmd_opts, 'autogenerate', False): if getattr(config.cmd_opts, "autogenerate", False):
script = directives[0] script = directives[0]
if script.upgrade_ops.is_empty(): if script.upgrade_ops.is_empty():
directives[:] = [] directives[:] = []
logger.info('No changes in schema detected.') logger.info("No changes in schema detected.")
engine = engine_from_config(config.get_section(config.config_ini_section), engine = engine_from_config(
prefix='sqlalchemy.', config.get_section(config.config_ini_section), prefix="sqlalchemy.", poolclass=pool.NullPool
poolclass=pool.NullPool) )
connection = engine.connect() connection = engine.connect()
context.configure(connection=connection, context.configure(
target_metadata=target_metadata, connection=connection,
compare_type=True, target_metadata=target_metadata,
process_revision_directives=process_revision_directives, compare_type=True,
**current_app.extensions['migrate'].configure_args) process_revision_directives=process_revision_directives,
**current_app.extensions["migrate"].configure_args,
)
try: try:
with context.begin_transaction(): with context.begin_transaction():
@@ -82,6 +81,7 @@ def run_migrations_online():
finally: finally:
connection.close() connection.close()
if context.is_offline_mode(): if context.is_offline_mode():
run_migrations_offline() run_migrations_offline()
else: else:

View File

@@ -5,7 +5,6 @@ Revises: ${down_revision | comma,n}
Create Date: ${create_date} Create Date: ${create_date}
""" """
# flake8: noqa
from alembic import op from alembic import op
import sqlalchemy as sa import sqlalchemy as sa
${imports if imports else ""} ${imports if imports else ""}

93
migrations/seeds.py Normal file
View File

@@ -0,0 +1,93 @@
import datetime
import logging
import random
import subprocess
import sys
from pypistats.application import create_app
from pypistats.application import db
from pypistats.models.download import OverallDownloadCount
from pypistats.models.download import PythonMajorDownloadCount
from pypistats.models.download import PythonMinorDownloadCount
from pypistats.models.download import RecentDownloadCount
from pypistats.models.download import SystemDownloadCount
# required to use the db models outside of the context of the app
app = create_app()
app.app_context().push()
if db.session.query(RecentDownloadCount.package).count() > 0:
print("Seeds already exist.")
sys.exit(0)
# use the currently installed dependencies as seed packages
result = subprocess.run(["poetry", "show"], stdout=subprocess.PIPE)
output = result.stdout.decode()
# extract just the package names from the output
# skip the first line which is a poetry warning
# and the last line which is empty
packages = []
for line in output.split("\n")[1:-1]:
packages.append(line.split(" ")[0])
logging.info(packages)
# take the last 120 days
end_date = datetime.date.today()
date_list = [end_date - datetime.timedelta(days=x) for x in range(120)][::-1]
baseline = 1000
# build a bunch of seed records with random values
records = []
for package in packages + ["__all__"]:
print("Seeding: " + package)
for idx, category in enumerate(["day", "week", "month"]):
record = RecentDownloadCount(
package=package, category=category, downloads=baseline * (idx + 1) + random.randint(-100, 100)
)
records.append(record)
for date in date_list:
for idx, category in enumerate(["with_mirrors", "without_mirrors"]):
record = OverallDownloadCount(
date=date,
package=package,
category=category,
downloads=baseline * (idx + 1) + random.randint(-100, 100),
)
records.append(record)
for idx, category in enumerate(["2", "3"]):
record = PythonMajorDownloadCount(
date=date,
package=package,
category=category,
downloads=baseline * (idx + 1) + random.randint(-100, 100),
)
records.append(record)
for idx, category in enumerate(["2.7", "3.4", "3.5", "3.6", "3.7", "3.8"]):
record = PythonMinorDownloadCount(
date=date,
package=package,
category=category,
downloads=baseline * (idx + 1) + random.randint(-100, 100),
)
records.append(record)
for idx, category in enumerate(["windows", "linux", "darwin"]):
record = SystemDownloadCount(
date=date,
package=package,
category=category,
downloads=baseline * (idx + 1) + random.randint(-100, 100),
)
records.append(record)
# push to the local database
db.session.bulk_save_objects(records)
db.session.commit()

View File

@@ -0,0 +1,95 @@
"""setup_tables
Revision ID: 0cf9945079f1
Revises:
Create Date: 2020-03-03 22:17:51.438119
"""
import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = "0cf9945079f1"
down_revision = None
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.create_table(
"overall",
sa.Column("date", sa.Date(), nullable=False),
sa.Column("package", sa.String(length=128), nullable=False),
sa.Column("category", sa.String(length=16), nullable=False),
sa.Column("downloads", sa.Integer(), nullable=False),
sa.PrimaryKeyConstraint("date", "package", "category"),
)
op.create_index(op.f("ix_overall_package"), "overall", ["package"], unique=False)
op.create_table(
"python_major",
sa.Column("date", sa.Date(), nullable=False),
sa.Column("package", sa.String(length=128), nullable=False),
sa.Column("category", sa.String(length=4), nullable=True),
sa.Column("downloads", sa.Integer(), nullable=False),
sa.PrimaryKeyConstraint("date", "package", "category"),
)
op.create_index(op.f("ix_python_major_package"), "python_major", ["package"], unique=False)
op.create_table(
"python_minor",
sa.Column("date", sa.Date(), nullable=False),
sa.Column("package", sa.String(length=128), nullable=False),
sa.Column("category", sa.String(length=4), nullable=True),
sa.Column("downloads", sa.Integer(), nullable=False),
sa.PrimaryKeyConstraint("date", "package", "category"),
)
op.create_index(op.f("ix_python_minor_package"), "python_minor", ["package"], unique=False)
op.create_table(
"recent",
sa.Column("package", sa.String(length=128), nullable=False),
sa.Column("category", sa.String(length=8), nullable=False),
sa.Column("downloads", sa.BigInteger(), nullable=False),
sa.PrimaryKeyConstraint("package", "category"),
)
op.create_index(op.f("ix_recent_package"), "recent", ["package"], unique=False)
op.create_table(
"system",
sa.Column("date", sa.Date(), nullable=False),
sa.Column("package", sa.String(length=128), nullable=False),
sa.Column("category", sa.String(length=8), nullable=True),
sa.Column("downloads", sa.Integer(), nullable=False),
sa.PrimaryKeyConstraint("date", "package", "category"),
)
op.create_index(op.f("ix_system_package"), "system", ["package"], unique=False)
op.create_table(
"users",
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("uid", sa.Integer(), nullable=True),
sa.Column("username", sa.String(length=39), nullable=False),
sa.Column("avatar_url", sa.String(length=256), nullable=True),
sa.Column("token", sa.String(length=256), nullable=True),
sa.Column("created_at", sa.DateTime(), nullable=False),
sa.Column("active", sa.Boolean(), nullable=True),
sa.Column("is_admin", sa.Boolean(), nullable=True),
sa.Column("favorites", postgresql.ARRAY(sa.String(length=128), dimensions=1), nullable=True),
sa.PrimaryKeyConstraint("id"),
sa.UniqueConstraint("uid"),
)
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table("users")
op.drop_index(op.f("ix_system_package"), table_name="system")
op.drop_table("system")
op.drop_index(op.f("ix_recent_package"), table_name="recent")
op.drop_table("recent")
op.drop_index(op.f("ix_python_minor_package"), table_name="python_minor")
op.drop_table("python_minor")
op.drop_index(op.f("ix_python_major_package"), table_name="python_major")
op.drop_table("python_major")
op.drop_index(op.f("ix_overall_package"), table_name="overall")
op.drop_table("overall")
# ### end Alembic commands ###

View File

@@ -1,78 +0,0 @@
"""initial models
Revision ID: 9116cea0e0d7
Revises:
Create Date: 2018-04-04 23:48:49.351410
"""
# flake8: noqa
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = '9116cea0e0d7'
down_revision = None
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('overall',
sa.Column('date', sa.Date(), nullable=False),
sa.Column('package', sa.String(length=128), nullable=False),
sa.Column('category', sa.String(length=16), nullable=False),
sa.Column('downloads', sa.Integer(), nullable=False),
sa.PrimaryKeyConstraint('date', 'package', 'category')
)
op.create_table('python_major',
sa.Column('date', sa.Date(), nullable=False),
sa.Column('package', sa.String(length=128), nullable=False),
sa.Column('category', sa.String(length=4), nullable=True),
sa.Column('downloads', sa.Integer(), nullable=False),
sa.PrimaryKeyConstraint('date', 'package', 'category')
)
op.create_table('python_minor',
sa.Column('date', sa.Date(), nullable=False),
sa.Column('package', sa.String(length=128), nullable=False),
sa.Column('category', sa.String(length=4), nullable=True),
sa.Column('downloads', sa.Integer(), nullable=False),
sa.PrimaryKeyConstraint('date', 'package', 'category')
)
op.create_table('recent',
sa.Column('package', sa.String(length=128), nullable=False),
sa.Column('category', sa.String(length=8), nullable=False),
sa.Column('downloads', sa.Integer(), nullable=False),
sa.PrimaryKeyConstraint('package', 'category')
)
op.create_table('system',
sa.Column('date', sa.Date(), nullable=False),
sa.Column('package', sa.String(length=128), nullable=False),
sa.Column('category', sa.String(length=8), nullable=True),
sa.Column('downloads', sa.Integer(), nullable=False),
sa.PrimaryKeyConstraint('date', 'package', 'category')
)
op.create_table('users',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('username', sa.String(length=39), nullable=False),
sa.Column('token', sa.String(length=256), nullable=True),
sa.Column('created_at', sa.DateTime(), nullable=False),
sa.Column('active', sa.Boolean(), nullable=True),
sa.Column('is_admin', sa.Boolean(), nullable=True),
sa.Column('favorites', postgresql.ARRAY(sa.String(length=128), dimensions=1), nullable=True),
sa.PrimaryKeyConstraint('id'),
sa.UniqueConstraint('username')
)
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table('users')
op.drop_table('system')
op.drop_table('recent')
op.drop_table('python_minor')
op.drop_table('python_major')
op.drop_table('overall')
# ### end Alembic commands ###

View File

@@ -1,37 +0,0 @@
"""add indexes
Revision ID: a91799876ec2
Revises: e65ba8f3cdcf
Create Date: 2018-05-14 22:27:11.123192
"""
# flake8: noqa
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = 'a91799876ec2'
down_revision = 'e65ba8f3cdcf'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.create_index(op.f('ix_overall_package'), 'overall', ['package'], unique=False)
op.create_index(op.f('ix_python_major_package'), 'python_major', ['package'], unique=False)
op.create_index(op.f('ix_python_minor_package'), 'python_minor', ['package'], unique=False)
op.create_index(op.f('ix_recent_package'), 'recent', ['package'], unique=False)
op.create_index(op.f('ix_system_package'), 'system', ['package'], unique=False)
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_index(op.f('ix_system_package'), table_name='system')
op.drop_index(op.f('ix_recent_package'), table_name='recent')
op.drop_index(op.f('ix_python_minor_package'), table_name='python_minor')
op.drop_index(op.f('ix_python_major_package'), table_name='python_major')
op.drop_index(op.f('ix_overall_package'), table_name='overall')
# ### end Alembic commands ###

View File

@@ -1,35 +0,0 @@
"""change int to bigint for downloads in recent table
Revision ID: c81b3715b9e5
Revises: 9116cea0e0d7
Create Date: 2018-04-05 00:56:02.276823
"""
# flake8: noqa
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = 'c81b3715b9e5'
down_revision = '9116cea0e0d7'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.alter_column('recent', 'downloads',
existing_type=sa.INTEGER(),
type_=sa.BigInteger(),
existing_nullable=False)
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.alter_column('recent', 'downloads',
existing_type=sa.BigInteger(),
type_=sa.INTEGER(),
existing_nullable=False)
# ### end Alembic commands ###

View File

@@ -1,35 +0,0 @@
"""empty message
Revision ID: e65ba8f3cdcf
Revises: c81b3715b9e5
Create Date: 2018-04-06 17:58:19.643259
"""
# flake8: noqa
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = 'e65ba8f3cdcf'
down_revision = 'c81b3715b9e5'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('users', sa.Column('avatar_url', sa.String(length=256), nullable=True))
op.add_column('users', sa.Column('uid', sa.Integer(), nullable=True))
op.drop_constraint('users_username_key', 'users', type_='unique')
op.create_unique_constraint(None, 'users', ['uid'])
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_constraint(None, 'users', type_='unique')
op.create_unique_constraint('users_username_key', 'users', ['username'])
op.drop_column('users', 'uid')
op.drop_column('users', 'avatar_url')
# ### end Alembic commands ###

1290
poetry.lock generated Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -1,13 +1,13 @@
"""PyPIStats application.""" """PyPIStats application."""
from celery import Celery
from celery import Task from celery import Task
from flask import Flask from flask import Flask
from pypistats import views from pypistats import views
from pypistats.config import DevConfig
from pypistats.extensions import celery
from pypistats.extensions import db from pypistats.extensions import db
from pypistats.extensions import github from pypistats.extensions import github
from pypistats.extensions import migrate from pypistats.extensions import migrate
from pypistats.settings import DevConfig
def create_app(config_object=DevConfig): def create_app(config_object=DevConfig):
@@ -16,13 +16,13 @@ def create_app(config_object=DevConfig):
app.config.from_object(config_object) app.config.from_object(config_object)
register_extensions(app) register_extensions(app)
register_blueprints(app) register_blueprints(app)
init_celery(celery, app)
return app return app
def create_celery(app): def init_celery(celery_, app):
"""Create a celery object.""" """Create a celery object."""
celery = Celery(app.import_name, broker=app.config["CELERY_BROKER_URL"]) celery_.conf.update(app.config)
celery.config_from_object(app.config)
class ContextTask(Task): class ContextTask(Task):
abstract = True abstract = True
@@ -31,12 +31,12 @@ def create_celery(app):
with app.app_context(): with app.app_context():
return Task.__call__(self, *args, **kwargs) return Task.__call__(self, *args, **kwargs)
celery.Task = ContextTask celery_.Task = ContextTask
return celery
def register_blueprints(app): def register_blueprints(app):
"""Register Flask blueprints.""" """Register Flask blueprints."""
app.register_blueprint(views.admin.blueprint)
app.register_blueprint(views.api.blueprint) app.register_blueprint(views.api.blueprint)
app.register_blueprint(views.error.blueprint) app.register_blueprint(views.error.blueprint)
app.register_blueprint(views.general.blueprint) app.register_blueprint(views.general.blueprint)

70
pypistats/config.py Normal file
View File

@@ -0,0 +1,70 @@
"""Application configuration."""
import os
from celery.schedules import crontab
from flask import json
def get_db_uri():
"""Get the database URI."""
return "postgresql://{username}:{password}@{host}:{port}/{dbname}".format(
username=os.environ.get("POSTGRESQL_USERNAME"),
password=os.environ.get("POSTGRESQL_PASSWORD"),
host=os.environ.get("POSTGRESQL_HOST"),
port=os.environ.get("POSTGRESQL_PORT"),
dbname=os.environ.get("POSTGRESQL_DBNAME"),
)
class Config:
"""Base configuration."""
APP_DIR = os.path.abspath(os.path.dirname(__file__))
CELERY_BROKER_URL = (os.environ.get("CELERY_BROKER_URL"),)
CELERY_IMPORTS = "pypistats.tasks.pypi"
CELERYBEAT_SCHEDULE = {
"update_db": {"task": "pypistats.tasks.pypi.etl", "schedule": crontab(minute=0, hour=1)} # 1am UTC
}
GITHUB_CLIENT_ID = os.environ.get("GITHUB_CLIENT_ID")
GITHUB_CLIENT_SECRET = os.environ.get("GITHUB_CLIENT_SECRET")
PROJECT_ROOT = os.path.abspath(os.path.join(APP_DIR, os.pardir))
SECRET_KEY = os.environ.get("PYPISTATS_SECRET", "secret-key")
SQLALCHEMY_TRACK_MODIFICATIONS = False
SQLALCHEMY_DATABASE_URI = get_db_uri()
# Plotly chart definitions
PLOT_BASE = json.load(open(os.path.join(os.path.dirname(__file__), "plots", "plot_base.json")))
DATA_BASE = json.load(open(os.path.join(os.path.dirname(__file__), "plots", "data_base.json")))
class LocalConfig(Config):
"""Local configuration."""
DEBUG = True
ENV = "local"
class ProdConfig(Config):
"""Production configuration."""
DEBUG = False
ENV = "prod"
class DevConfig(Config):
"""Development configuration."""
DEBUG = True
ENV = "dev"
class TestConfig(Config):
"""Test configuration."""
DEBUG = True
ENV = "dev"
TESTING = True
WTF_CSRF_ENABLED = False # Allows form testing
configs = {"development": DevConfig, "local": LocalConfig, "production": ProdConfig, "test": TestConfig}

View File

@@ -1,7 +1,6 @@
"""Database classes and models.""" """Database classes and models."""
from pypistats.extensions import db from pypistats.extensions import db
Column = db.Column Column = db.Column
basestring = (str, bytes) basestring = (str, bytes)
@@ -54,9 +53,6 @@ class SurrogatePK(object):
@classmethod @classmethod
def get_by_id(cls, record_id): def get_by_id(cls, record_id):
"""Get record by ID.""" """Get record by ID."""
if any( if any((isinstance(record_id, basestring) and record_id.isdigit(), isinstance(record_id, (int, float)))):
(isinstance(record_id, basestring) and record_id.isdigit(),
isinstance(record_id, (int, float))),
):
return cls.query.get(int(record_id)) return cls.query.get(int(record_id))
return None return None

View File

@@ -1,9 +1,24 @@
"""Flask extensions.""" """Flask extensions."""
from celery import Celery
from flask_github import GitHub from flask_github import GitHub
from flask_httpauth import HTTPBasicAuth
from flask_migrate import Migrate from flask_migrate import Migrate
from flask_sqlalchemy import SQLAlchemy from flask_sqlalchemy import SQLAlchemy
from pypistats.config import Config
db = SQLAlchemy() db = SQLAlchemy()
github = GitHub() github = GitHub()
migrate = Migrate() migrate = Migrate()
auth = HTTPBasicAuth()
def create_celery(name=__name__, config=Config):
"""Create a celery object."""
redis_uri = "redis://localhost:6379"
celery = Celery(name, broker=redis_uri)
celery.config_from_object(config)
return celery
celery = create_celery()

View File

@@ -10,17 +10,13 @@ class OverallDownloadCount(Model):
__tablename__ = "overall" __tablename__ = "overall"
date = Column(db.Date, primary_key=True, nullable=False) date = Column(db.Date, primary_key=True, nullable=False)
package = Column( package = Column(db.String(128), primary_key=True, nullable=False, index=True)
db.String(128), primary_key=True, nullable=False, index=True
)
# with_mirrors or without_mirrors # with_mirrors or without_mirrors
category = Column(db.String(16), primary_key=True, nullable=False) category = Column(db.String(16), primary_key=True, nullable=False)
downloads = Column(db.Integer(), nullable=False) downloads = Column(db.Integer(), nullable=False)
def __repr__(self): def __repr__(self):
return "<OverallDownloadCount {}".format( return "<OverallDownloadCount {}".format(f"{str(self.date)} - {str(self.package)} - {str(self.category)}")
f"{str(self.date)} - {str(self.package)} - {str(self.category)}"
)
class PythonMajorDownloadCount(Model): class PythonMajorDownloadCount(Model):
@@ -29,17 +25,13 @@ class PythonMajorDownloadCount(Model):
__tablename__ = "python_major" __tablename__ = "python_major"
date = Column(db.Date, primary_key=True, nullable=False) date = Column(db.Date, primary_key=True, nullable=False)
package = Column( package = Column(db.String(128), primary_key=True, nullable=False, index=True)
db.String(128), primary_key=True, nullable=False, index=True
)
# python_major version, 2 or 3 (or null) # python_major version, 2 or 3 (or null)
category = Column(db.String(4), primary_key=True, nullable=True) category = Column(db.String(4), primary_key=True, nullable=True)
downloads = Column(db.Integer(), nullable=False) downloads = Column(db.Integer(), nullable=False)
def __repr__(self): def __repr__(self):
return "<PythonMajorDownloadCount {}".format( return "<PythonMajorDownloadCount {}".format(f"{str(self.date)} - {str(self.package)} - {str(self.category)}")
f"{str(self.date)} - {str(self.package)} - {str(self.category)}"
)
class PythonMinorDownloadCount(Model): class PythonMinorDownloadCount(Model):
@@ -48,17 +40,13 @@ class PythonMinorDownloadCount(Model):
__tablename__ = "python_minor" __tablename__ = "python_minor"
date = Column(db.Date, primary_key=True) date = Column(db.Date, primary_key=True)
package = Column( package = Column(db.String(128), primary_key=True, nullable=False, index=True)
db.String(128), primary_key=True, nullable=False, index=True
)
# python_minor version, e.g. 2.7 or 3.6 (or null) # python_minor version, e.g. 2.7 or 3.6 (or null)
category = Column(db.String(4), primary_key=True, nullable=True) category = Column(db.String(4), primary_key=True, nullable=True)
downloads = Column(db.Integer(), nullable=False) downloads = Column(db.Integer(), nullable=False)
def __repr__(self): def __repr__(self):
return "<PythonMinorDownloadCount {}".format( return "<PythonMinorDownloadCount {}".format(f"{str(self.date)} - {str(self.package)} - {str(self.category)}")
f"{str(self.date)} - {str(self.package)} - {str(self.category)}"
)
RECENT_CATEGORIES = ["day", "week", "month"] RECENT_CATEGORIES = ["day", "week", "month"]
@@ -69,17 +57,13 @@ class RecentDownloadCount(Model):
__tablename__ = "recent" __tablename__ = "recent"
package = Column( package = Column(db.String(128), primary_key=True, nullable=False, index=True)
db.String(128), primary_key=True, nullable=False, index=True
)
# recency, e.g. day, week, month # recency, e.g. day, week, month
category = Column(db.String(8), primary_key=True, nullable=False) category = Column(db.String(8), primary_key=True, nullable=False)
downloads = Column(db.BigInteger(), nullable=False) downloads = Column(db.BigInteger(), nullable=False)
def __repr__(self): def __repr__(self):
return "<RecentDownloadCount {}>".format( return "<RecentDownloadCount {}>".format(f"{str(self.package)} - {str(self.category)}")
f"{str(self.package)} - {str(self.category)}"
)
class SystemDownloadCount(Model): class SystemDownloadCount(Model):
@@ -88,14 +72,10 @@ class SystemDownloadCount(Model):
__tablename__ = "system" __tablename__ = "system"
date = Column(db.Date, primary_key=True) date = Column(db.Date, primary_key=True)
package = Column( package = Column(db.String(128), primary_key=True, nullable=False, index=True)
db.String(128), primary_key=True, nullable=False, index=True
)
# system, e.g. Windows or Linux or Darwin (or null) # system, e.g. Windows or Linux or Darwin (or null)
category = Column(db.String(8), primary_key=True, nullable=True) category = Column(db.String(8), primary_key=True, nullable=True)
downloads = Column(db.Integer(), nullable=False) downloads = Column(db.Integer(), nullable=False)
def __repr__(self): def __repr__(self):
return "<SystemDownloadCount {}".format( return "<SystemDownloadCount {}".format(f"{str(self.date)} - {str(self.package)} - {str(self.category)}")
f"{str(self.date)} - {str(self.package)} - {str(self.category)}"
)

View File

@@ -9,21 +9,19 @@ from pypistats.database import Model
from pypistats.database import SurrogatePK from pypistats.database import SurrogatePK
from pypistats.extensions import db from pypistats.extensions import db
MAX_FAVORITES = 20 MAX_FAVORITES = 20
class User(UserMixin, SurrogatePK, Model): class User(UserMixin, SurrogatePK, Model):
"""A user of the app.""" """A user of the app."""
__tablename__ = 'users' __tablename__ = "users"
uid = Column(db.Integer(), unique=True) uid = Column(db.Integer(), unique=True)
username = Column(db.String(39), nullable=False) username = Column(db.String(39), nullable=False)
avatar_url = Column(db.String(256)) avatar_url = Column(db.String(256))
token = Column(db.String(256)) token = Column(db.String(256))
created_at = \ created_at = Column(db.DateTime, nullable=False, default=datetime.datetime.utcnow)
Column(db.DateTime, nullable=False, default=datetime.datetime.utcnow)
active = Column(db.Boolean(), default=False) active = Column(db.Boolean(), default=False)
is_admin = Column(db.Boolean(), default=False) is_admin = Column(db.Boolean(), default=False)
favorites = Column(ARRAY(db.String(128), dimensions=1)) favorites = Column(ARRAY(db.String(128), dimensions=1))

View File

@@ -2,21 +2,25 @@
import os import os
from flask import g from flask import g
from flask import redirect
from flask import request
from flask import session from flask import session
from flask_sslify import SSLify from flask_limiter import Limiter
from flask_limiter.util import get_remote_address
from werkzeug.middleware.proxy_fix import ProxyFix
from pypistats.application import create_app from pypistats.application import create_app
from pypistats.application import create_celery from pypistats.config import configs
from pypistats.models.user import User from pypistats.models.user import User
from pypistats.settings import configs
# change this for migrations # change this for migrations
env = os.environ.get("ENV", "dev") env = os.environ.get("ENV", "development")
app = create_app(configs[env]) app = create_app(configs[env])
sslify = SSLify(app)
celery = create_celery(app) # Rate limiting per IP/worker
app.wsgi_app = ProxyFix(app.wsgi_app)
limiter = Limiter(app, key_func=get_remote_address, application_limits=["5 per second", "30 per minute"])
app.logger.info(f"Environment: {env}") app.logger.info(f"Environment: {env}")
@@ -24,6 +28,12 @@ app.logger.info(f"Environment: {env}")
@app.before_request @app.before_request
def before_request(): def before_request():
"""Execute before requests.""" """Execute before requests."""
# http -> https
scheme = request.headers.get("X-Forwarded-Proto")
if scheme and scheme == "http" and request.url.startswith("http://"):
url = request.url.replace("http://", "https://", 1)
return redirect(url, code=301)
# set user
g.user = None g.user = None
if "user_id" in session: if "user_id" in session:
g.user = User.query.get(session["user_id"]) g.user = User.query.get(session["user_id"])

View File

@@ -1 +0,0 @@
from .secret import *

View File

@@ -1,78 +0,0 @@
"""Application configuration."""
import os
from celery.schedules import crontab
from flask import json
def get_db_uri(env):
"""Get the database URI."""
return \
"postgresql://{username}:{password}@{host}:{port}/{dbname}".format(
username=os.environ.get("POSTGRESQL_USERNAME"),
password=os.environ.get("POSTGRESQL_PASSWORD"),
host=os.environ.get("POSTGRESQL_HOST"),
port=os.environ.get("POSTGRESQL_PORT"),
dbname=os.environ.get("POSTGRESQL_DBNAME"),
)
class Config(object):
"""Base configuration."""
APP_DIR = os.path.abspath(os.path.dirname(__file__))
CELERY_BROKER_URL = os.environ.get("CELERY_BROKER_URL"),
CELERY_IMPORTS = ("pypistats.tasks.pypi")
CELERYBEAT_SCHEDULE = {
"update_db": {
"task": "pypistats.tasks.pypi.etl",
"schedule": crontab(minute=0, hour=1), # 1am UTC
},
}
GITHUB_CLIENT_ID = os.environ.get("GITHUB_CLIENT_ID")
GITHUB_CLIENT_SECRET = os.environ.get("GITHUB_CLIENT_SECRET")
PROJECT_ROOT = os.path.abspath(os.path.join(APP_DIR, os.pardir))
SECRET_KEY = os.environ.get("PYPISTATS_SECRET", "secret-key")
SQLALCHEMY_TRACK_MODIFICATIONS = False
# Plotly chart definitions
PLOT_BASE = json.load(
open(os.path.join(os.path.dirname(__file__), "plots", "plot_base.json"))
)
DATA_BASE = json.load(
open(os.path.join(os.path.dirname(__file__), "plots", "data_base.json"))
)
class ProdConfig(Config):
"""Production configuration."""
DEBUG = False
ENV = "prod"
SQLALCHEMY_DATABASE_URI = get_db_uri(ENV)
class DevConfig(Config):
"""Development configuration."""
DEBUG = True
ENV = "dev"
SQLALCHEMY_DATABASE_URI = get_db_uri(ENV)
class TestConfig(Config):
"""Test configuration."""
DEBUG = True
ENV = "dev"
SQLALCHEMY_DATABASE_URI = get_db_uri(ENV)
TESTING = True
WTF_CSRF_ENABLED = False # Allows form testing
configs = {
"dev": DevConfig,
"prod": ProdConfig,
"test": TestConfig,
}

View File

@@ -1,16 +1,15 @@
"""Get the download stats for a specific day.""" """Get the download stats for a specific day."""
import datetime import datetime
import time
import os import os
import time
import psycopg2
from google.auth.crypt._python_rsa import RSASigner from google.auth.crypt._python_rsa import RSASigner
from google.cloud import bigquery from google.cloud import bigquery
from google.oauth2.service_account import Credentials from google.oauth2.service_account import Credentials
import psycopg2
from psycopg2.extras import execute_values from psycopg2.extras import execute_values
from pypistats.run import celery from pypistats.extensions import celery
# Mirrors to disregard when considering downloads # Mirrors to disregard when considering downloads
MIRRORS = ("bandersnatch", "z3c.pypimirror", "Artifactory", "devpi") MIRRORS = ("bandersnatch", "z3c.pypimirror", "Artifactory", "devpi")
@@ -27,16 +26,13 @@ MAX_RECORD_AGE = 180
def get_google_credentials(): def get_google_credentials():
"""Obtain the Google credentials object explicitly.""" """Obtain the Google credentials object explicitly."""
private_key = os.environ["GOOGLE_PRIVATE_KEY"] private_key = os.environ["GOOGLE_PRIVATE_KEY"].replace('"', "").replace("\\n", "\n")
private_key_id = os.environ["GOOGLE_PRIVATE_KEY_ID"] private_key_id = os.environ["GOOGLE_PRIVATE_KEY_ID"]
signer = RSASigner.from_string(key=private_key, key_id=private_key_id) signer = RSASigner.from_string(key=private_key, key_id=private_key_id)
project_id = os.environ["GOOGLE_PROJECT_ID"] project_id = os.environ["GOOGLE_PROJECT_ID"]
service_account_email = os.environ["GOOGLE_CLIENT_EMAIL"] service_account_email = os.environ["GOOGLE_CLIENT_EMAIL"]
scopes = ( scopes = ("https://www.googleapis.com/auth/bigquery", "https://www.googleapis.com/auth/cloud-platform")
'https://www.googleapis.com/auth/bigquery',
'https://www.googleapis.com/auth/cloud-platform'
)
token_uri = os.environ["GOOGLE_TOKEN_URI"] token_uri = os.environ["GOOGLE_TOKEN_URI"]
credentials = Credentials( credentials = Credentials(
signer=signer, signer=signer,
@@ -48,16 +44,13 @@ def get_google_credentials():
return credentials return credentials
def get_daily_download_stats(env="dev", date=None): def get_daily_download_stats(date):
"""Get daily download stats for pypi packages from BigQuery.""" """Get daily download stats for pypi packages from BigQuery."""
start = time.time() start = time.time()
job_config = bigquery.QueryJobConfig() job_config = bigquery.QueryJobConfig()
credentials = get_google_credentials() credentials = get_google_credentials()
bq_client = bigquery.Client( bq_client = bigquery.Client(project=os.environ["GOOGLE_PROJECT_ID"], credentials=credentials)
project=os.environ["GOOGLE_PROJECT_ID"],
credentials=credentials
)
if date is None: if date is None:
date = str(datetime.date.today() - datetime.timedelta(days=1)) date = str(datetime.date.today() - datetime.timedelta(days=1))
@@ -76,29 +69,22 @@ def get_daily_download_stats(env="dev", date=None):
for row in rows: for row in rows:
if row["category_label"] not in data: if row["category_label"] not in data:
data[row["category_label"]] = [] data[row["category_label"]] = []
data[row["category_label"]].append([ data[row["category_label"]].append([date, row["package"], row["category"], row["downloads"]])
date,
row["package"],
row["category"],
row["downloads"],
])
results = update_db(data, env, date) results = update_db(data, date)
print("Elapsed: " + str(time.time() - start)) print("Elapsed: " + str(time.time() - start))
results["elapsed"] = time.time() - start results["elapsed"] = time.time() - start
return results return results
def update_db(data, env="dev", date=None): def update_db(data, date=None):
"""Update the db with new data by table.""" """Update the db with new data by table."""
connection, cursor = get_connection_cursor(env) connection, cursor = get_connection_cursor()
success = {} success = {}
for category_label, rows in data.items(): for category_label, rows in data.items():
table = category_label table = category_label
success[table] = update_table( success[table] = update_table(connection, cursor, table, rows, date)
connection, cursor, table, rows, date
)
return success return success
@@ -130,11 +116,9 @@ def update_table(connection, cursor, table, rows, date):
for idx in sorted(delete_rows, reverse=True): for idx in sorted(delete_rows, reverse=True):
rows.pop(idx) rows.pop(idx)
delete_query = \ delete_query = f"""DELETE FROM {table}
f"""DELETE FROM {table}
WHERE date = '{date}'""" WHERE date = '{date}'"""
insert_query = \ insert_query = f"""INSERT INTO {table} (date, package, category, downloads)
f"""INSERT INTO {table} (date, package, category, downloads)
VALUES %s""" VALUES %s"""
try: try:
@@ -149,7 +133,7 @@ def update_table(connection, cursor, table, rows, date):
return False return False
def update_all_package_stats(env="dev", date=None): def update_all_package_stats(date=None):
"""Update stats for __all__ packages.""" """Update stats for __all__ packages."""
print("__all__") print("__all__")
start = time.time() start = time.time()
@@ -157,21 +141,18 @@ def update_all_package_stats(env="dev", date=None):
if date is None: if date is None:
date = str(datetime.date.today() - datetime.timedelta(days=1)) date = str(datetime.date.today() - datetime.timedelta(days=1))
connection, cursor = get_connection_cursor(env) connection, cursor = get_connection_cursor()
success = {} success = {}
for table in PSQL_TABLES: for table in PSQL_TABLES:
aggregate_query = \ aggregate_query = f"""SELECT date, '__all__' AS package, category, sum(downloads) AS downloads
f"""SELECT date, '__all__' AS package, category, sum(downloads) AS downloads
FROM {table} where date = '{date}' GROUP BY date, category""" FROM {table} where date = '{date}' GROUP BY date, category"""
cursor.execute(aggregate_query, (table,)) cursor.execute(aggregate_query, (table,))
values = cursor.fetchall() values = cursor.fetchall()
delete_query = \ delete_query = f"""DELETE FROM {table}
f"""DELETE FROM {table}
WHERE date = '{date}' and package = '__all__'""" WHERE date = '{date}' and package = '__all__'"""
insert_query = \ insert_query = f"""INSERT INTO {table} (date, package, category, downloads)
f"""INSERT INTO {table} (date, package, category, downloads)
VALUES %s""" VALUES %s"""
try: try:
print(delete_query) print(delete_query)
@@ -189,7 +170,7 @@ def update_all_package_stats(env="dev", date=None):
return success return success
def update_recent_stats(env="dev", date=None): def update_recent_stats(date=None):
"""Update daily, weekly, monthly stats for all packages.""" """Update daily, weekly, monthly stats for all packages."""
print("recent") print("recent")
start = time.time() start = time.time()
@@ -197,7 +178,7 @@ def update_recent_stats(env="dev", date=None):
if date is None: if date is None:
date = str(datetime.date.today() - datetime.timedelta(days=1)) date = str(datetime.date.today() - datetime.timedelta(days=1))
connection, cursor = get_connection_cursor(env) connection, cursor = get_connection_cursor()
downloads_table = "overall" downloads_table = "overall"
recent_table = "recent" recent_table = "recent"
@@ -214,19 +195,16 @@ def update_recent_stats(env="dev", date=None):
success = {} success = {}
for period, clause in where.items(): for period, clause in where.items():
select_query = \ select_query = f"""SELECT package, '{period}' as category, sum(downloads) AS downloads
f"""SELECT package, '{period}' as category, sum(downloads) AS downloads
FROM {downloads_table} FROM {downloads_table}
WHERE category = 'without_mirrors' and {clause} WHERE category = 'without_mirrors' and {clause}
GROUP BY package""" GROUP BY package"""
cursor.execute(select_query) cursor.execute(select_query)
values = cursor.fetchall() values = cursor.fetchall()
delete_query = \ delete_query = f"""DELETE FROM {recent_table}
f"""DELETE FROM {recent_table}
WHERE category = '{period}'""" WHERE category = '{period}'"""
insert_query = \ insert_query = f"""INSERT INTO {recent_table}
f"""INSERT INTO {recent_table}
(package, category, downloads) VALUES %s""" (package, category, downloads) VALUES %s"""
try: try:
print(delete_query) print(delete_query)
@@ -244,7 +222,7 @@ def update_recent_stats(env="dev", date=None):
return success return success
def get_connection_cursor(env): def get_connection_cursor():
"""Get a db connection cursor.""" """Get a db connection cursor."""
connection = psycopg2.connect( connection = psycopg2.connect(
dbname=os.environ["POSTGRESQL_DBNAME"], dbname=os.environ["POSTGRESQL_DBNAME"],
@@ -258,7 +236,7 @@ def get_connection_cursor(env):
return connection, cursor return connection, cursor
def purge_old_data(env="dev", date=None): def purge_old_data(date=None):
"""Purge old data records.""" """Purge old data records."""
print("Purge") print("Purge")
age = MAX_RECORD_AGE age = MAX_RECORD_AGE
@@ -267,11 +245,11 @@ def purge_old_data(env="dev", date=None):
if date is None: if date is None:
date = str(datetime.date.today() - datetime.timedelta(days=1)) date = str(datetime.date.today() - datetime.timedelta(days=1))
connection, cursor = get_connection_cursor(env) connection, cursor = get_connection_cursor()
date = datetime.datetime.strptime(date, '%Y-%m-%d') date = datetime.datetime.strptime(date, "%Y-%m-%d")
purge_date = date - datetime.timedelta(days=age) purge_date = date - datetime.timedelta(days=age)
purge_date = purge_date.strftime('%Y-%m-%d') purge_date = purge_date.strftime("%Y-%m-%d")
success = {} success = {}
for table in PSQL_TABLES: for table in PSQL_TABLES:
@@ -290,9 +268,9 @@ def purge_old_data(env="dev", date=None):
return success return success
def vacuum_analyze(env="dev"): def vacuum_analyze():
"""Vacuum and analyze the db.""" """Vacuum and analyze the db."""
connection, cursor = get_connection_cursor(env) connection, cursor = get_connection_cursor()
connection.set_isolation_level(0) connection.set_isolation_level(0)
results = {} results = {}
@@ -321,7 +299,7 @@ def get_query(date):
FROM FROM
`the-psf.pypi.downloads{date.replace("-", "")}` `the-psf.pypi.downloads{date.replace("-", "")}`
WHERE WHERE
REGEXP_CONTAINS(details.python,r'^[0-9]+\.[0-9]+.{{0,}}$') OR REGEXP_CONTAINS(details.python,r'^[0-9]\.[0-9]+.{{0,}}$') OR
details.python IS NULL ) details.python IS NULL )
SELECT SELECT
package, package,
@@ -341,11 +319,7 @@ def get_query(date):
SELECT SELECT
package, package,
'python_minor' AS category_label, 'python_minor' AS category_label,
cast(CONCAT(SPLIT(python_version, '.')[ REGEXP_EXTRACT(python_version, r'^[0-9]+\.[0-9]+') AS category,
OFFSET
(0)],'.',SPLIT(python_version, '.')[
OFFSET
(1)]) as string) AS category,
COUNT(*) AS downloads COUNT(*) AS downloads
FROM FROM
dls dls
@@ -398,25 +372,34 @@ def get_query(date):
@celery.task @celery.task
def etl(): def etl(date=None, purge=True):
"""Perform the stats download.""" """Perform the stats download."""
env = os.environ.get("ENV") if date is None:
date = str(datetime.date.today() - datetime.timedelta(days=1)) date = str(datetime.date.today() - datetime.timedelta(days=1))
results = dict() results = dict()
results["purge"] = purge_old_data(env, date) results["downloads"] = get_daily_download_stats(date)
results["downloads"] = get_daily_download_stats(env, date) results["__all__"] = update_all_package_stats(date)
results["__all__"] = update_all_package_stats(env, date) results["recent"] = update_recent_stats()
results["recent"] = update_recent_stats(env, date) results["cleanup"] = vacuum_analyze()
results["cleanup"] = vacuum_analyze(env) if purge:
results["purge"] = purge_old_data(date)
return results return results
@celery.task
def example(thing):
print(thing)
print("Sleeping")
time.sleep(10)
print("done")
if __name__ == "__main__": if __name__ == "__main__":
date = "2018-12-23" run_date = "2020-01-09"
env = "prod" print(run_date)
print(date, env) # print(purge_old_data(run_date))
# print(purge_old_data(env, date)) # vacuum_analyze()
print(get_daily_download_stats(env, date)) print(get_daily_download_stats(run_date))
print(update_all_package_stats(env, date)) print(update_all_package_stats(run_date))
print(update_recent_stats(env, date)) # print(update_recent_stats(run_date))
# vacuum_analyze(env) # vacuum_analyze(env)

View File

@@ -12,8 +12,7 @@
on Google BigQuery. All aggregate download stats ignore known PyPI mirrors (such as on Google BigQuery. All aggregate download stats ignore known PyPI mirrors (such as
<a href="{{ url_for('general.package_page', package='bandersnatch') }}">bandersnatch</a>) unless noted <a href="{{ url_for('general.package_page', package='bandersnatch') }}">bandersnatch</a>) unless noted
otherwise.</p> otherwise.</p>
<p>PyPI Stats attempts to operate within the free tier of its hosted services. For this reason, aggregate data is <p>PyPI Stats retains data for 180 days.</p>
only retained for 180 days.</p>
<h3>API</h3> <h3>API</h3>
<p>A simple <p>A simple
<a href="{{ url_for('api.api') }}">JSON API</a> <a href="{{ url_for('api.api') }}">JSON API</a>
@@ -33,49 +32,6 @@
href="https://github.com/hugovk/pypistats"><img src="https://img.shields.io/pypi/dm/pypistats.svg"></a> href="https://github.com/hugovk/pypistats"><img src="https://img.shields.io/pypi/dm/pypistats.svg"></a>
</li> </li>
</ul> </ul>
</p>
<h3>Tech</h3>
<p>PyPI Stats is a project developed using Python 3.6. Here are some of the tools used to create it:
<ul>
<li>Framework:
<a href="{{ url_for('general.package_page', package='flask') }}">Flask</a>
</li>
<li>
Host:
<a href="{{ url_for('general.package_page', package='awscli') }}">AWS</a>
</li>
<li>Authentication:
<a href="{{ url_for('general.package_page', package='github-flask') }}">GitHub OAuth</a>
</li>
<li>
ORM:
<a href="{{ url_for('general.package_page', package='sqlalchemy') }}">SQLAlchemy</a>
</li>
<li>
DBAPI:
<a href="{{ url_for('general.package_page', package='psycopg2') }}">psycopg2</a>
</li>
<li>
RDBMS:
<a href="{{ url_for('general.package_page', package='alembic') }}">alembic</a>
</li>
<li>
Templating:
<a href="{{ url_for('general.package_page', package='jinja2') }}">jinja2</a>
</li>
<li>Charts:
<a href="{{ url_for('general.package_page', package='plotly') }}">plotly.js</a>
</li>
<li>Data:
<a href="{{ url_for('general.package_page', package='google-cloud-bigquery') }}">Google Cloud's BigQuery</a>
</li>
<li>
<a href="{{ url_for('general.package_page', package='__all__') }}">And many more open source software
packages</a>
</li>
</ul>
</p>
<p>PyPIStats.org is also <a href="https://github.com/crflynn/pypistats.org">open source</a>.</p> <p>PyPIStats.org is also <a href="https://github.com/crflynn/pypistats.org">open source</a>.</p>
<h3>Who</h3> <h3>Who</h3>
<p>PyPI Stats was created by <p>PyPI Stats was created by

View File

@@ -0,0 +1,20 @@
{% extends "layout.html" %}
{% block title %}PyPI Download Stats{% endblock %}
{% block body %}
<h1>Analytics for PyPI packages</h1>
<hr>
<form method="POST" action="/admin">
{{ form.csrf_token }}
{{ form.date.label }}
{{ form.date(size=24) }}
<input type="submit" value="Submit">
</form>
<br>
{% if not date %}
<p>Submit date to run backfill.</p>
{% endif %}
{% if date %}
<br>
{{ date }} submitted.
{% endif %}
{% endblock %}

View File

@@ -33,6 +33,15 @@
in a relatively short amount of time. <a in a relatively short amount of time. <a
href="https://packaging.python.org/guides/analyzing-pypi-package-downloads/">Here is a quick guide</a>. href="https://packaging.python.org/guides/analyzing-pypi-package-downloads/">Here is a quick guide</a>.
</p> </p>
<p>
If you want to regularly fetch download counts for a particular package or set of packages, cache your results.
The data provided here is updated <b>once</b> daily, so you should not need to fetch results from the same API
endpoint more than once per day.
</p>
<h2>Rate Limiting</h2>
<p>
IP-based rate limiting is imposed application-wide.
</p>
<h2>API Client</h2> <h2>API Client</h2>
<p> <p>
The <a href="{{ url_for('general.package_page', package='pypistats') }}">pypistats</a> <a The <a href="{{ url_for('general.package_page', package='pypistats') }}">pypistats</a> <a

View File

@@ -60,9 +60,11 @@
</p> </p>
</header> </header>
<section> <section>
{% block body %}{% endblock %} {% block body %}{% endblock %}
</section> </section>
<footer> <footer>
<p> <p>
<a href="{{ url_for('api.api') }}">API</a> <a href="{{ url_for('api.api') }}">API</a>
@@ -72,7 +74,6 @@
<a href="{{ url_for('general.faqs') }}">FAQs</a> <a href="{{ url_for('general.faqs') }}">FAQs</a>
<br> <br>
</p> </p>
</p>
</footer> </footer>
</div> </div>
<script> <script>

View File

@@ -1,5 +1,5 @@
"""The view blueprint modules.""" """The view blueprint modules."""
# flake8: noqa from pypistats.views import admin
from pypistats.views import api from pypistats.views import api
from pypistats.views import error from pypistats.views import error
from pypistats.views import general from pypistats.views import general

38
pypistats/views/admin.py Normal file
View File

@@ -0,0 +1,38 @@
import os
from flask import Blueprint
from flask import render_template
from flask_wtf import FlaskForm
from werkzeug.security import check_password_hash
from werkzeug.security import generate_password_hash
from wtforms import DateField
from wtforms.validators import DataRequired
from pypistats.extensions import auth
from pypistats.tasks.pypi import etl
users = {os.environ["BASIC_AUTH_USER"]: generate_password_hash(os.environ["BASIC_AUTH_PASSWORD"])}
blueprint = Blueprint("admin", __name__, template_folder="templates")
@auth.verify_password
def verify_password(username, password):
if username in users and check_password_hash(users.get(username), password):
return username
class BackfillDateForm(FlaskForm):
date = DateField("Date: ", validators=[DataRequired()])
@blueprint.route("/admin", methods=("GET", "POST"))
@auth.login_required
def index():
form = BackfillDateForm()
if form.validate_on_submit():
date = form.date.data
etl.apply_async(args=(str(date),))
return render_template("admin.html", form=form, date=date)
return render_template("admin.html", form=form)

View File

@@ -1,19 +1,18 @@
"""JSON API routes.""" """JSON API routes."""
from flask import abort
from flask import Blueprint from flask import Blueprint
from flask import abort
from flask import g from flask import g
from flask import jsonify from flask import jsonify
from flask import render_template from flask import render_template
from flask import request from flask import request
from pypistats.models.download import RECENT_CATEGORIES
from pypistats.models.download import OverallDownloadCount from pypistats.models.download import OverallDownloadCount
from pypistats.models.download import PythonMajorDownloadCount from pypistats.models.download import PythonMajorDownloadCount
from pypistats.models.download import PythonMinorDownloadCount from pypistats.models.download import PythonMinorDownloadCount
from pypistats.models.download import RECENT_CATEGORIES
from pypistats.models.download import RecentDownloadCount from pypistats.models.download import RecentDownloadCount
from pypistats.models.download import SystemDownloadCount from pypistats.models.download import SystemDownloadCount
blueprint = Blueprint("api", __name__, url_prefix="/api") blueprint = Blueprint("api", __name__, url_prefix="/api")
@@ -26,15 +25,14 @@ def api():
@blueprint.route("/packages/<package>/recent") @blueprint.route("/packages/<package>/recent")
def api_downloads_recent(package): def api_downloads_recent(package):
"""Get the recent downloads of a package.""" """Get the recent downloads of a package."""
# abort(503)
if package != "__all__": if package != "__all__":
package = package.replace(".", "-").replace("_", "-") package = package.replace(".", "-").replace("_", "-")
category = request.args.get("period") category = request.args.get("period")
if category is None: if category is None:
downloads = RecentDownloadCount.query.\ downloads = RecentDownloadCount.query.filter_by(package=package).all()
filter_by(package=package).all()
elif category in RECENT_CATEGORIES: elif category in RECENT_CATEGORIES:
downloads = RecentDownloadCount.query.\ downloads = RecentDownloadCount.query.filter_by(package=package, category=category).all()
filter_by(package=package, category=category).all()
else: else:
abort(404) abort(404)
@@ -60,26 +58,27 @@ def api_downloads_overall(package):
package = package.replace(".", "-").replace("_", "-") package = package.replace(".", "-").replace("_", "-")
mirrors = request.args.get("mirrors") mirrors = request.args.get("mirrors")
if mirrors == "true": if mirrors == "true":
downloads = OverallDownloadCount.query.\ downloads = (
filter_by(package=package, category="with_mirrors").\ OverallDownloadCount.query.filter_by(package=package, category="with_mirrors")
order_by(OverallDownloadCount.date).all() .order_by(OverallDownloadCount.date)
.all()
)
elif mirrors == "false": elif mirrors == "false":
downloads = OverallDownloadCount.query.\ downloads = (
filter_by(package=package, category="without_mirrors").\ OverallDownloadCount.query.filter_by(package=package, category="without_mirrors")
order_by(OverallDownloadCount.date).all() .order_by(OverallDownloadCount.date)
.all()
)
else: else:
downloads = OverallDownloadCount.query.\ downloads = (
filter_by(package=package).\ OverallDownloadCount.query.filter_by(package=package)
order_by(OverallDownloadCount.category, .order_by(OverallDownloadCount.category, OverallDownloadCount.date)
OverallDownloadCount.date).all() .all()
)
response = {"package": package, "type": "overall_downloads"} response = {"package": package, "type": "overall_downloads"}
if len(downloads) > 0: if len(downloads) > 0:
response["data"] = [{ response["data"] = [{"date": str(r.date), "category": r.category, "downloads": r.downloads} for r in downloads]
"date": str(r.date),
"category": r.category,
"downloads": r.downloads,
} for r in downloads]
else: else:
abort(404) abort(404)
@@ -89,22 +88,19 @@ def api_downloads_overall(package):
@blueprint.route("/packages/<package>/python_major") @blueprint.route("/packages/<package>/python_major")
def api_downloads_python_major(package): def api_downloads_python_major(package):
"""Get the python major download time series of a package.""" """Get the python major download time series of a package."""
return generic_downloads( return generic_downloads(PythonMajorDownloadCount, package, "version", "python_major")
PythonMajorDownloadCount, package, "version", "python_major")
@blueprint.route("/packages/<package>/python_minor") @blueprint.route("/packages/<package>/python_minor")
def api_downloads_python_minor(package): def api_downloads_python_minor(package):
"""Get the python minor download time series of a package.""" """Get the python minor download time series of a package."""
return generic_downloads( return generic_downloads(PythonMinorDownloadCount, package, "version", "python_minor")
PythonMinorDownloadCount, package, "version", "python_minor")
@blueprint.route("/packages/<package>/system") @blueprint.route("/packages/<package>/system")
def api_downloads_system(package): def api_downloads_system(package):
"""Get the system download time series of a package.""" """Get the system download time series of a package."""
return generic_downloads( return generic_downloads(SystemDownloadCount, package, "os", "system")
SystemDownloadCount, package, "os", "system")
def generic_downloads(model, package, arg, name): def generic_downloads(model, package, arg, name):
@@ -114,26 +110,19 @@ def generic_downloads(model, package, arg, name):
package = package.replace(".", "-").replace("_", "-") package = package.replace(".", "-").replace("_", "-")
category = request.args.get(arg) category = request.args.get(arg)
if category is not None: if category is not None:
downloads = model.query.\ downloads = model.query.filter_by(package=package, category=category.title()).order_by(model.date).all()
filter_by(package=package, category=category.title()).\
order_by(model.date).all()
else: else:
downloads = model.query.\ downloads = model.query.filter_by(package=package).order_by(model.category, model.date).all()
filter_by(package=package).\
order_by(model.category, model.date).all()
response = {"package": package, "type": f"{name}_downloads"} response = {"package": package, "type": f"{name}_downloads"}
if downloads is not None: if downloads is not None:
response["data"] = [{ response["data"] = [{"date": str(r.date), "category": r.category, "downloads": r.downloads} for r in downloads]
"date": str(r.date),
"category": r.category,
"downloads": r.downloads,
} for r in downloads]
else: else:
abort(404) abort(404)
return jsonify(response) return jsonify(response)
# TODO # TODO
# @blueprint.route("/top/overall") # @blueprint.route("/top/overall")
# def api_top_packages(): # def api_top_packages():

View File

@@ -1,7 +1,8 @@
"""Error page handlers.""" """Error page handlers."""
from flask import Blueprint from flask import Blueprint
from flask import url_for
blueprint = Blueprint('error', __name__, template_folder='templates') blueprint = Blueprint("error", __name__, template_folder="templates")
@blueprint.app_errorhandler(400) @blueprint.app_errorhandler(400)
@@ -22,6 +23,11 @@ def handle_404(err):
return "404", 404 return "404", 404
@blueprint.app_errorhandler(429)
def handle_429(err):
return f"""<a href="{url_for("api.api")}#etiquette">429 RATE LIMIT EXCEEDED</a>""", 429
@blueprint.app_errorhandler(500) @blueprint.app_errorhandler(500)
def handle_500(err): def handle_500(err):
"""Return 500.""" """Return 500."""

View File

@@ -1,42 +1,34 @@
"""General pages.""" """General pages."""
import datetime
import re
from collections import defaultdict from collections import defaultdict
from copy import deepcopy from copy import deepcopy
import datetime
import os
import re
from flask import abort import requests
from flask import Blueprint from flask import Blueprint
from flask import current_app from flask import current_app
from flask import g from flask import g
from flask import json
from flask import redirect from flask import redirect
from flask import render_template from flask import render_template
from flask import request from flask import request
from flask_wtf import FlaskForm from flask_wtf import FlaskForm
import requests
from wtforms import StringField from wtforms import StringField
from wtforms.validators import DataRequired from wtforms.validators import DataRequired
from pypistats.models.download import RECENT_CATEGORIES
from pypistats.models.download import OverallDownloadCount from pypistats.models.download import OverallDownloadCount
from pypistats.models.download import PythonMajorDownloadCount from pypistats.models.download import PythonMajorDownloadCount
from pypistats.models.download import PythonMinorDownloadCount from pypistats.models.download import PythonMinorDownloadCount
from pypistats.models.download import RECENT_CATEGORIES
from pypistats.models.download import RecentDownloadCount from pypistats.models.download import RecentDownloadCount
from pypistats.models.download import SystemDownloadCount from pypistats.models.download import SystemDownloadCount
blueprint = Blueprint("general", __name__, template_folder="templates") blueprint = Blueprint("general", __name__, template_folder="templates")
MODELS = [ MODELS = [OverallDownloadCount, PythonMajorDownloadCount, PythonMinorDownloadCount, SystemDownloadCount]
OverallDownloadCount,
PythonMajorDownloadCount,
PythonMinorDownloadCount,
SystemDownloadCount,
]
class MyForm(FlaskForm): class PackageSearchForm(FlaskForm):
"""Search form.""" """Search form."""
name = StringField("Package: ", validators=[DataRequired()]) name = StringField("Package: ", validators=[DataRequired()])
@@ -45,40 +37,40 @@ class MyForm(FlaskForm):
@blueprint.route("/", methods=("GET", "POST")) @blueprint.route("/", methods=("GET", "POST"))
def index(): def index():
"""Render the home page.""" """Render the home page."""
form = MyForm() form = PackageSearchForm()
if form.validate_on_submit(): if form.validate_on_submit():
package = form.name.data package = form.name.data
return redirect(f"/search/{package.lower()}") return redirect(f"/search/{package.lower()}")
package_count = \ package_count = RecentDownloadCount.query.filter_by(category="month").count()
RecentDownloadCount.query.filter_by(category="month").count() return render_template("index.html", form=form, user=g.user, package_count=package_count)
return render_template(
"index.html",
form=form, @blueprint.route("/health")
user=g.user, def health():
package_count=package_count return "OK"
)
@blueprint.route("/search/<package>", methods=("GET", "POST")) @blueprint.route("/search/<package>", methods=("GET", "POST"))
def search(package): def search(package):
"""Render the home page.""" """Render the home page."""
package = package.replace(".", "-") package = package.replace(".", "-")
form = MyForm() form = PackageSearchForm()
if form.validate_on_submit(): if form.validate_on_submit():
package = form.name.data package = form.name.data
return redirect(f"/search/{package}") return redirect(f"/search/{package}")
results = RecentDownloadCount.query.filter( results = (
RecentDownloadCount.package.like(f"{package}%"), RecentDownloadCount.query.filter(
RecentDownloadCount.category == "month").\ RecentDownloadCount.package.like(f"{package}%"), RecentDownloadCount.category == "month"
order_by(RecentDownloadCount.package).\ )
limit(20).all() .order_by(RecentDownloadCount.package)
.limit(20)
.all()
)
packages = [r.package for r in results] packages = [r.package for r in results]
if len(packages) == 1: if len(packages) == 1:
package = packages[0] package = packages[0]
return redirect(f"/packages/{package}") return redirect(f"/packages/{package}")
return render_template( return render_template("search.html", search=True, form=form, packages=packages, user=g.user)
"search.html", search=True, form=form, packages=packages, user=g.user
)
@blueprint.route("/about") @blueprint.route("/about")
@@ -106,8 +98,7 @@ def package_page(package):
start_date = str(datetime.date.today() - datetime.timedelta(lookback)) start_date = str(datetime.date.today() - datetime.timedelta(lookback))
recent_downloads = RecentDownloadCount.query.\ recent_downloads = RecentDownloadCount.query.filter_by(package=package).all()
filter_by(package=package).all()
if len(recent_downloads) == 0: if len(recent_downloads) == 0:
return redirect(f"/search/{package}") return redirect(f"/search/{package}")
@@ -119,24 +110,24 @@ def package_page(package):
metadata = None metadata = None
if package != "__all__": if package != "__all__":
try: try:
metadata = requests.get( metadata = requests.get(f"https://pypi.python.org/pypi/{package}/json", timeout=5).json()
f"https://pypi.python.org/pypi/{package}/json",
timeout=5).json()
if metadata["info"].get("requires_dist", None): if metadata["info"].get("requires_dist", None):
metadata["requires"] = [] requires = set()
for required in metadata["info"]["requires_dist"]: for required in metadata["info"]["requires_dist"]:
metadata["requires"].append( requires.add(re.split(r"[^0-9a-zA-Z_.-]+", required)[0])
re.split(r"[^0-9a-zA-Z_.-]+", required)[0] metadata["requires"] = sorted(list(requires))
)
except Exception: except Exception:
pass pass
# Get data from db # Get data from db
model_data = [] model_data = []
for model in MODELS: for model in MODELS:
records = model.query.filter_by(package=package).\ records = (
filter(model.date >= start_date).\ model.query.filter_by(package=package)
order_by(model.date, model.category).all() .filter(model.date >= start_date)
.order_by(model.date, model.category)
.all()
)
if model == OverallDownloadCount: if model == OverallDownloadCount:
metrics = ["downloads"] metrics = ["downloads"]
@@ -144,11 +135,7 @@ def package_page(package):
metrics = ["downloads", "percentages"] metrics = ["downloads", "percentages"]
for metric in metrics: for metric in metrics:
model_data.append({ model_data.append({"metric": metric, "name": model.__tablename__, "data": data_function[metric](records)})
"metric": metric,
"name": model.__tablename__,
"data": data_function[metric](records),
})
# Build the plots # Build the plots
plots = [] plots = []
@@ -169,11 +156,13 @@ def package_page(package):
# Add titles # Add titles
if model["metric"] == "percentages": if model["metric"] == "percentages":
plot["layout"]["title"] = \ plot["layout"][
f"Daily Download Proportions of {package} package - {model['name'].title().replace('_', ' ')}" # noqa "title"
] = f"Daily Download Proportions of {package} package - {model['name'].title().replace('_', ' ')}" # noqa
else: else:
plot["layout"]["title"] = \ plot["layout"][
f"Daily Download Quantity of {package} package - {model['name'].title().replace('_', ' ')}" # noqa "title"
] = f"Daily Download Quantity of {package} package - {model['name'].title().replace('_', ' ')}" # noqa
# Explicitly set range # Explicitly set range
plot["layout"]["xaxis"]["range"] = [str(records[0].date - datetime.timedelta(1)), str(datetime.date.today())] plot["layout"]["xaxis"]["range"] = [str(records[0].date - datetime.timedelta(1)), str(datetime.date.today())]
@@ -183,31 +172,18 @@ def package_page(package):
drange = (datetime.date.today() - records[0].date).days drange = (datetime.date.today() - records[0].date).days
for k in [30, 60, 90, 120, 9999]: for k in [30, 60, 90, 120, 9999]:
if k <= drange: if k <= drange:
plot["layout"]["xaxis"]["rangeselector"]["buttons"].append({ plot["layout"]["xaxis"]["rangeselector"]["buttons"].append(
"step": "day", {"step": "day", "stepmode": "backward", "count": k + 1, "label": f"{k}d"}
"stepmode": "backward", )
"count": k+1,
"label": f"{k}d"
})
else: else:
plot["layout"]["xaxis"]["rangeselector"]["buttons"].append({ plot["layout"]["xaxis"]["rangeselector"]["buttons"].append(
"step": "day", {"step": "day", "stepmode": "backward", "count": drange + 1, "label": "all"}
"stepmode": "backward", )
"count": drange + 1,
"label": "all"
})
break break
plots.append(plot) plots.append(plot)
return render_template( return render_template("package.html", package=package, plots=plots, metadata=metadata, recent=recent, user=g.user)
"package.html",
package=package,
plots=plots,
metadata=metadata,
recent=recent,
user=g.user
)
def get_download_data(records): def get_download_data(records):
@@ -312,28 +288,25 @@ def get_proportion_data(records):
return data return data
data_function = { data_function = {"downloads": get_download_data, "percentages": get_proportion_data}
"downloads": get_download_data,
"percentages": get_proportion_data,
}
@blueprint.route("/top") @blueprint.route("/top")
def top(): def top():
"""Render the top packages page.""" """Render the top packages page."""
top = [] top_ = []
for category in ("day", "week", "month"): for category in ("day", "week", "month"):
downloads = RecentDownloadCount.query.filter_by(category=category).\ downloads = (
filter(RecentDownloadCount.package != "__all__").\ RecentDownloadCount.query.filter_by(category=category)
order_by(RecentDownloadCount.downloads.desc()).limit(20).all() .filter(RecentDownloadCount.package != "__all__")
top.append({ .order_by(RecentDownloadCount.downloads.desc())
"category": category, .limit(20)
"packages": [{ .all()
"package": d.package, )
"downloads": d.downloads, top_.append(
} for d in downloads] {"category": category, "packages": [{"package": d.package, "downloads": d.downloads} for d in downloads]}
}) )
return render_template("top.html", top=top, user=g.user) return render_template("top.html", top=top_, user=g.user)
@blueprint.route("/status") @blueprint.route("/status")

View File

@@ -1,6 +1,6 @@
"""User page for tracking packages.""" """User page for tracking packages."""
from flask import abort
from flask import Blueprint from flask import Blueprint
from flask import abort
from flask import flash from flask import flash
from flask import g from flask import g
from flask import redirect from flask import redirect
@@ -14,16 +14,15 @@ from pypistats.models.download import RecentDownloadCount
from pypistats.models.user import MAX_FAVORITES from pypistats.models.user import MAX_FAVORITES
from pypistats.models.user import User from pypistats.models.user import User
blueprint = Blueprint("user", __name__, template_folder="templates") blueprint = Blueprint("user", __name__, template_folder="templates")
@github.access_token_getter @github.access_token_getter
def token_getter(): def token_getter():
"""Get the token for a user.""" """Get the token for a user."""
user = g.user this_user = g.user
if user is not None: if this_user is not None:
return user.token return this_user.token
@blueprint.route("/github-callback") @blueprint.route("/github-callback")
@@ -36,12 +35,12 @@ def authorized(oauth_token):
return redirect(next_url) return redirect(next_url)
# Ensure a user with token doesn't already exist # Ensure a user with token doesn't already exist
user = User.query.filter_by(token=oauth_token).first() this_user = User.query.filter_by(token=oauth_token).first()
if user is None: if this_user is None:
user = User(token=oauth_token) this_user = User(token=oauth_token)
# Set this to use API to get user data # Set this to use API to get user data
g.user = user g.user = this_user
user_data = github.get("user") user_data = github.get("user")
# extract data # extract data
@@ -50,24 +49,19 @@ def authorized(oauth_token):
avatar_url = user_data["avatar_url"] avatar_url = user_data["avatar_url"]
# Create/update the user # Create/update the user
user = User.query.filter_by(uid=uid).first() this_user = User.query.filter_by(uid=uid).first()
if user is None: if this_user is None:
user = User( this_user = User(token=oauth_token, uid=uid, username=username, avatar_url=avatar_url)
token=oauth_token,
uid=uid,
username=username,
avatar_url=avatar_url,
)
else: else:
user.username = username this_user.username = username
user.avatar_url = avatar_url this_user.avatar_url = avatar_url
user.token = oauth_token this_user.token = oauth_token
user.save() this_user.save()
session["username"] = user.username session["username"] = this_user.username
session["user_id"] = user.id session["user_id"] = this_user.id
g.user = user g.user = this_user
return redirect(next_url) return redirect(next_url)

63
pyproject.toml Normal file
View File

@@ -0,0 +1,63 @@
[tool.poetry]
name = "pypistatsorg"
version = "6"
description = "Download counts dashboard for python packages"
authors = ["Flynn <crf204@gmail.com>"]
[tool.poetry.dependencies]
python = "^3.7"
google-cloud-bigquery = "^1.17"
flask = "^1.1"
github-flask = "^3.2"
flask-sqlalchemy = "^2.4"
flask-migrate = "^2.5"
flask-login = "^0.4.1"
flask-wtf = "^0.14.2"
gunicorn = "^19.9"
requests = "^2.22"
celery = "^4.3"
psycopg2-binary = "^2.8"
redis = "^3.3"
flask-limiter = "^1.2.1"
flower = "^0.9.5"
flask-httpauth = "^4.1.0"
[tool.poetry.dev-dependencies]
black = "^19.10b0"
isort = "^5.3"
[tool.black]
line-length = 120
target-version = ['py37']
include = '\.pyi?$'
exclude = '''
(
/(
\.eggs
| \.circleci
| \.git
| \.github
| \.hg
| \.mypy_cache
| \.pytest_cache
| \.tox
| \.venv
| _build
| buck-out
| build
| dist
)/
)
'''
[tool.isort]
force_single_line = true
multi_line_output = 3
include_trailing_comma = true
force_grid_wrap = 0
use_parentheses = true
line_length = 120
[build-system]
requires = ["poetry>=1.0"]
build-backend = "poetry.masonry.api"

View File

@@ -1,5 +0,0 @@
export ENV=prod
set -o allexport
source pypistats/secret/$ENV.env
set +o allexport
pipenv run celery beat -A pypistats.run.celery -l info

View File

@@ -1,5 +0,0 @@
export ENV=prod
set -o allexport
source pypistats/secret/$ENV.env
set +o allexport
pipenv run celery -A pypistats.run.celery worker -l info

View File

@@ -1,5 +0,0 @@
export ENV=prod
set -o allexport
source pypistats/secret/$ENV.env
set +o allexport
pipenv run python -m pypistats.tasks.pypi

View File

@@ -1,6 +0,0 @@
export ENV=prod
set -o allexport
source pypistats/secret/$ENV.env
set +o allexport
# pipenv run flask run --host=0.0.0.0
pipenv run gunicorn -b 0.0.0.0:5000 -w 4 --access-logfile - --error-log - pypistats.run:app

View File

@@ -1,6 +0,0 @@
export ENV=prod
set -o allexport
source pypistats/secret/$ENV.env
set +o allexport
# flask db revision --message "message" --autogenerate
# flask db upgrade

View File

@@ -1,31 +0,0 @@
[supervisord]
nodaemon=true
[program:redis]
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
stderr_logfile=/dev/stdout
stderr_logfile_maxbytes=0
command=redis-server
[program:pypistats]
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
stderr_logfile=/dev/stdout
stderr_logfile_maxbytes=0
command=bash -c "scripts/run_flask.sh"
[program:celery-worker]
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
stderr_logfile=/dev/stdout
stderr_logfile_maxbytes=0
user=root
command=bash -c "scripts/run_celery.sh"
[program:celery-beat]
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
stderr_logfile=/dev/stdout
stderr_logfile_maxbytes=0
command=bash -c "scripts/run_beat.sh"