cleanup tasks; add required packages to package view; update docs and other views

This commit is contained in:
crflynn
2018-04-08 13:01:13 -04:00
parent 505e9ceecf
commit a42a34dc38
10 changed files with 189 additions and 141 deletions

View File

@@ -6,8 +6,6 @@ from pypistats.extensions import db
from pypistats.extensions import github
from pypistats.extensions import migrate
from pypistats.settings import DevConfig
from pypistats.settings import ProdConfig
from pypistats.settings import TestConfig
def create_app(config_object=DevConfig):

View File

@@ -1,4 +1,6 @@
"""Run the application."""
import os
from flask import g
from flask import session

View File

@@ -35,6 +35,9 @@ SCHEMA = [
bigquery.SchemaField("downloads", "INTEGER", mode="NULLABLE"),
]
# postgresql tables to update for __all__
PSQL_TABLES = ["overall", "python_major", "python_minor", "system"]
def get_daily_download_stats(date, env="dev"):
"""Get daily download stats for pypi packages from BigQuery."""
@@ -108,16 +111,8 @@ def get_daily_download_stats(date, env="dev"):
def update_db(df, env="dev"):
"""Update the db for the table."""
connection = psycopg2.connect(
dbname=postgresql[env]['dbname'],
user=postgresql[env]['username'],
password=postgresql[env]['password'],
host=postgresql[env]['host'],
port=postgresql[env]['port'],
# sslmode='require',
)
cursor = connection.cursor()
"""Update the db with new data by table."""
connection, cursor = get_connection_cursor(env)
df_groups = df.groupby("category_label")
@@ -130,15 +125,17 @@ def update_db(df, env="dev"):
"category",
"downloads",
]]
# success[table] = update_table(cursor, table, df_category, date)
update_all_package_stats(cursor, table, date)
success[table] = update_table(
connection, cursor, table, df_category, date
)
# update_all_package_stats(cursor, table, date)
update_recent_stats(cursor, date)
# update_recent_stats(cursor, date)
return success
def update_table(cursor, table, df, date):
def update_table(connection, cursor, table, df, date):
"""Update a table."""
print(table)
df = df.fillna("null")
@@ -153,16 +150,19 @@ def update_table(cursor, table, df, date):
try:
cursor.execute(delete_query)
execute_values(cursor, insert_query, values)
cursor.execute("commit")
connection.commit()
return True
except psycopg2.IntegrityError as e:
cursor.execute("rollback")
connection.rollback()
return False
def update_all_package_stats(cursor, table, date):
def update_all_package_stats(date, env="dev"):
"""Update stats for __all__ packages."""
print("__all__")
connection, cursor = get_connection_cursor(env)
for table in PSQL_TABLES:
aggregate_query = \
f"""SELECT date, '__all__' AS package, category, sum(downloads) AS downloads
FROM {table} GROUP BY date, category"""
@@ -178,16 +178,18 @@ def update_all_package_stats(cursor, table, date):
try:
cursor.execute(delete_query)
execute_values(cursor, insert_query, values)
cursor.execute("commit")
connection.commit()
return True
except psycopg2.IntegrityError as e:
cursor.execute("rollback")
connection.rollback()
return False
def update_recent_stats(cursor, date):
def update_recent_stats(date, env="dev"):
"""Update daily, weekly, monthly stats for all packages."""
print("recent")
connection, cursor = get_connection_cursor(env)
downloads_table = "overall"
recent_table = "recent"
@@ -220,13 +222,27 @@ def update_recent_stats(cursor, date):
try:
cursor.execute(delete_query)
execute_values(cursor, insert_query, values)
cursor.execute("commit")
connection.commit()
success[time] = True
except psycopg2.IntegrityError as e:
cursor.execute("rollback")
connection.rollback()
success[time] = False
def get_connection_cursor(env):
"""Get a db connection cursor."""
connection = psycopg2.connect(
dbname=postgresql[env]['dbname'],
user=postgresql[env]['username'],
password=postgresql[env]['password'],
host=postgresql[env]['host'],
port=postgresql[env]['port'],
# sslmode='require',
)
cursor = connection.cursor()
return connection, cursor
def get_query(date):
"""Get the query to execute against pypistats on bigquery."""
return f"""

View File

@@ -7,7 +7,7 @@
PyPI Stats provides a simple JSON API for retrieving aggregate download stats and time series for packages. The following are the valid endpoints using host:
<code>https://pypistats.org/</code>
</p>
<h3>NOTES</h3>
<h2>NOTES</h2>
<p>
<ul>
<li>All download stats exclude known mirrors (such as
@@ -18,7 +18,9 @@
</p>
<h2>Endpoints</h2>
<h3>/api/&lt;package&gt;/recent</h3>
<p>Retrieve the aggregate download quantities for the last day/week/month. Query arguments:
<p>Retrieve the aggregate download quantities for the last day/week/month.
</p>
<p>Query arguments:
<ul>
<li>
<b>period</b>
@@ -27,7 +29,7 @@
or
<code>week</code>
or
<code>month</code>
<code>month</code>. If omitted returns all values.
</li>
</ul>
Example response:
@@ -42,25 +44,18 @@
}</code></pre>
</p>
<h3>/api/&lt;package&gt;/overall</h3>
<p>Retrieve the aggregate daily download time series with or without mirror downloads. Query arguments:
<p>Retrieve the aggregate daily download time series with or without mirror downloads.
</p>
<p>Query arguments:
<ul>
<li>
<b>mirrors</b>
(optional):
<code>true</code>
or
<code>false</code>
</li>
<li>
<b>start_date</b>
(optional): starting date of time series in format
<code>YYYY-MM-DD</code>
</li>
<li>
<b>end_date</b>
(optional): ending date of time series in format
<code>YYYY-MM-DD</code>
<code>false</code>. If omitted returns both series data.
</li>
<!-- <li> <b>start_date</b> (optional): starting date of time series in format <code>YYYY-MM-DD</code> </li> <li> <b>end_date</b> (optional): ending date of time series in format <code>YYYY-MM-DD</code> </li> -->
</ul>
Example response:
<pre><code>{
@@ -81,25 +76,19 @@
}</code></pre>
</p>
<h3>/api/&lt;package&gt;/python_major</h3>
<p>Retrieve the aggregate daily download time series by Python major version number. Query arguments:
<p>Retrieve the aggregate daily download time series by Python major version number.
</p>
<p>Query arguments:
<ul>
<li>
<b>version</b>
(optional): the Python major version number, e.g.
<code>2</code>
or
<code>3</code>
</li>
<li>
<b>start_date</b>
(optional): starting date of time series in format
<code>YYYY-MM-DD</code>
</li>
<li>
<b>end_date</b>
(optional): ending date of time series in format
<code>YYYY-MM-DD</code>
<code>3</code>. If omitted returns all series data (including
<code>null</code>).
</li>
<!-- <li> <b>start_date</b> (optional): starting date of time series in format <code>YYYY-MM-DD</code> </li> <li> <b>end_date</b> (optional): ending date of time series in format <code>YYYY-MM-DD</code> </li> -->
</ul>
Example response:
<pre><code>{
@@ -125,25 +114,19 @@
}</code></pre>
</p>
<h3>/api/&lt;package&gt;/python_minor</h3>
<p>Retrieve the aggregate daily download time series by Python minor version number. Query arguments:
<p>Retrieve the aggregate daily download time series by Python minor version number.
</p>
<p>Query arguments:
<ul>
<li>
<b>version</b>
(optional): the Python major version number, e.g.
<code>2.7</code>
or
<code>3.6</code>
</li>
<li>
<b>start_date</b>
(optional): starting date of time series in format
<code>YYYY-MM-DD</code>
</li>
<li>
<b>end_date</b>
(optional): ending date of time series in format
<code>YYYY-MM-DD</code>
<code>3.6</code>. If omitted returns all series data (including
<code>null</code>).
</li>
<!-- <li> <b>start_date</b> (optional): starting date of time series in format <code>YYYY-MM-DD</code> </li> <li> <b>end_date</b> (optional): ending date of time series in format <code>YYYY-MM-DD</code> </li> -->
</ul>
Example response:
<pre><code>{
@@ -199,26 +182,21 @@
}</code></pre>
</p>
<h3>/api/&lt;package&gt;/system</h3>
<p>Retrieve the aggregate daily download time series by operating system. Query arguments:
<p>Retrieve the aggregate daily download time series by operating system.
</p>
<p>Query arguments:
<ul>
<li>
<b>os</b>
(optional): the operating system name, e.g.
<code>windows</code>,
<code>linux</code>, or
<code>linux</code>,
<code>darwin</code>
(Mac OSX).
</li>
<li>
<b>start_date</b>
(optional): starting date of time series in format
<code>YYYY-MM-DD</code>
</li>
<li>
<b>end_date</b>
(optional): ending date of time series in format
<code>YYYY-MM-DD</code>
or
<code>other</code>. If omitted returns all series data (including
<code>null</code>).
</li>
<!-- <li> <b>start_date</b> (optional): starting date of time series in format <code>YYYY-MM-DD</code> </li> <li> <b>end_date</b> (optional): ending date of time series in format <code>YYYY-MM-DD</code> </li> -->
</ul>
Example response:
<pre><code>{

View File

@@ -3,14 +3,22 @@
{% block body %}
<h1>Analytics for PyPI packages</h1>
<hr>
<p>Search for a python package on PyPI.</p>
<form method="POST" action="/">
{{ form.csrf_token }}
{{ form.name.label }}
{{ form.name(size=24) }}
<input type="submit" value="Search">
</form>
<br>
{% if not search %}
<p>Search among
<b>{{ "{:,.0f}".format(package_count) }}</b>
python packages from PyPI (updated daily).</p>
{% else %}
Search results:
{% endif %}
{% if search %}
<br>
{% include "results.html" %}
{% endif %}
{% endblock %}

View File

@@ -48,22 +48,24 @@
<h1>PyPI Stats</h1>
<p>
<a href="{{ url_for('general.index') }}">Home</a>
<a href="{{ url_for('general.index') }}">Search</a>
<br>
<br>
<a href="{{ url_for('general.package', package='__all__') }}">__all__</a>
<a href="{{ url_for('general.package', package='__all__') }}">All packages</a>
<br>
<a href="{{ url_for('general.top') }}">__top__</a>
<a href="{{ url_for('general.top') }}">Top packages</a>
<br>
<br>
{% if user %}
<a href="{{ url_for('user.user')}}">{{ user.username }}'s Packages</a>
<a href="{{ url_for('user.user')}}">{{ user.username }}'s packages</a>
{% else %}
<a href="{{ url_for('user.user')}}">My Packages</a>
<a href="{{ url_for('user.user')}}">Track packages</a>
{% endif %}
<br>
<br>
{% if user %}
<a href="{{ url_for('user.logout') }}">Logout</a>
{% endif %}
</p>
</header>

View File

@@ -23,6 +23,7 @@
<br>
{% else %}
<p>
{% if metadata %}
<a href="{{ metadata['info']['package_url']}}">PyPI page</a>
<br>
<a href="{{ metadata['info']['home_page']}}">Home page</a>
@@ -31,13 +32,26 @@
{{metadata['info']['author']}}
<br>
License:
{% if metadata['info']['license'] | length > 200 %}
{{metadata['info']['license'][:200]}}...
{% else %}
{{metadata['info']['license']}}
{% endif %}
<br>
Summary:
Summary :
{{metadata['info']['summary']}}
<br>
Latest version:
Latest version :
{{metadata['info']['version']}}
<br>
{% if metadata['requires'] %}
Requires:
{% for required in metadata['requires'] %}
<a href="{{ url_for('general.package', package=required.lower())}}">{{required.lower()}}</a>
{% if not loop.last %}|{% endif %}
{% endfor %}
{% endif %}
<br><br>
Downloads last day:
{{"{:,.0f}".format(recent['day'])}}
@@ -47,6 +61,10 @@
<br>
Downloads last month:
{{"{:,.0f}".format(recent['month'])}}
{% else %}
No metadata found.
{%endif%}
</p>
{% endif %}
<script>

View File

@@ -3,7 +3,7 @@
{% block body %}
{% if user %}
<h1><img src="{{ user.avatar_url }}" height="30" width="30">
<h1><img src="{{ user.avatar_url }}" height="25" width="25">
{{ user.username }}'s Packages</h1>
<hr>
<p>Currently saved packages.</p>

View File

@@ -1,7 +1,9 @@
"""General pages."""
from copy import deepcopy
import os
import re
from flask import abort
from flask import Blueprint
from flask import current_app
from flask import g
@@ -19,7 +21,7 @@ from pypistats.models.download import PythonMinorDownloadCount
from pypistats.models.download import RECENT_CATEGORIES
from pypistats.models.download import RecentDownloadCount
from pypistats.models.download import SystemDownloadCount
from sqlalchemy import distinct
blueprint = Blueprint("general", __name__, template_folder="templates")
@@ -45,7 +47,15 @@ def index():
if form.validate_on_submit():
package = form.name.data
return redirect(f"/search/{package}")
return render_template("index.html", form=form, user=g.user)
package_count = \
RecentDownloadCount.query.filter_by(category="month").count()
print(package_count)
return render_template(
"index.html",
form=form,
user=g.user,
package_count=package_count
)
@blueprint.route("/search/<package>", methods=("GET", "POST"))
@@ -75,12 +85,31 @@ def about():
@blueprint.route("/package/<package>")
def package(package):
"""Render the package page."""
# Recent download stats
recent_downloads = RecentDownloadCount.query.\
filter_by(package=package).all()
if len(recent_downloads) == 0:
abort(404)
recent = {r: 0 for r in RECENT_CATEGORIES}
for r in recent_downloads:
recent[r.category] = r.downloads
# PyPI metadata
metadata = None
if package != "__all__":
try:
metadata = requests.get(
f"https://pypi.python.org/pypi/{package}/json").json()
f"https://pypi.python.org/pypi/{package}/json",
timeout=5).json()
if metadata["info"].get("requires_dist", None):
metadata["requires"] = []
for required in metadata["info"]["requires_dist"]:
print(package, re.split(r"[^0-9a-zA-Z_.-]+", required))
metadata["requires"].append(
re.split(r"[^0-9a-zA-Z_.-]+", required)[0]
)
except Exception:
metadata = None
pass
# Get data from db
model_data = []
@@ -114,13 +143,6 @@ def package(package):
f"Downloads of {package} package - {model['name'].title().replace('_', ' ')}" # noqa
plots.append(plot)
# Recent download stats
recent_downloads = RecentDownloadCount.query.\
filter_by(package=package).all()
recent = {r: 0 for r in RECENT_CATEGORIES}
for r in recent_downloads:
recent[r.category] = r.downloads
return render_template(
"package.html",
package=package,

View File

@@ -102,11 +102,12 @@ def user_package(package):
if g.user:
# Ensure package is valid.
downloads = RecentDownloadCount.query.filter_by(package=package).all()
if downloads is None:
return abort(400)
# Handle add/remove to favorites
if g.user.favorites is None:
# Ensure package is valid before adding
if len(downloads) == 0:
return abort(400)
g.user.favorites = [package]
g.user.update()
return redirect(url_for("user.user"))
@@ -121,6 +122,9 @@ def user_package(package):
return redirect(url_for("user.user"))
else:
if len(g.user.favorites) < MAX_FAVORITES:
# Ensure package is valid before adding
if len(downloads) == 0:
return abort(400)
favorites = g.user.favorites
favorites.append(package)
favorites = sorted(favorites)