cleanup tasks; add required packages to package view; update docs and other views

This commit is contained in:
crflynn
2018-04-08 13:01:13 -04:00
parent 505e9ceecf
commit a42a34dc38
10 changed files with 189 additions and 141 deletions

View File

@@ -6,8 +6,6 @@ from pypistats.extensions import db
from pypistats.extensions import github from pypistats.extensions import github
from pypistats.extensions import migrate from pypistats.extensions import migrate
from pypistats.settings import DevConfig from pypistats.settings import DevConfig
from pypistats.settings import ProdConfig
from pypistats.settings import TestConfig
def create_app(config_object=DevConfig): def create_app(config_object=DevConfig):

View File

@@ -1,4 +1,6 @@
"""Run the application.""" """Run the application."""
import os
from flask import g from flask import g
from flask import session from flask import session

View File

@@ -35,6 +35,9 @@ SCHEMA = [
bigquery.SchemaField("downloads", "INTEGER", mode="NULLABLE"), bigquery.SchemaField("downloads", "INTEGER", mode="NULLABLE"),
] ]
# postgresql tables to update for __all__
PSQL_TABLES = ["overall", "python_major", "python_minor", "system"]
def get_daily_download_stats(date, env="dev"): def get_daily_download_stats(date, env="dev"):
"""Get daily download stats for pypi packages from BigQuery.""" """Get daily download stats for pypi packages from BigQuery."""
@@ -108,16 +111,8 @@ def get_daily_download_stats(date, env="dev"):
def update_db(df, env="dev"): def update_db(df, env="dev"):
"""Update the db for the table.""" """Update the db with new data by table."""
connection = psycopg2.connect( connection, cursor = get_connection_cursor(env)
dbname=postgresql[env]['dbname'],
user=postgresql[env]['username'],
password=postgresql[env]['password'],
host=postgresql[env]['host'],
port=postgresql[env]['port'],
# sslmode='require',
)
cursor = connection.cursor()
df_groups = df.groupby("category_label") df_groups = df.groupby("category_label")
@@ -130,15 +125,17 @@ def update_db(df, env="dev"):
"category", "category",
"downloads", "downloads",
]] ]]
# success[table] = update_table(cursor, table, df_category, date) success[table] = update_table(
update_all_package_stats(cursor, table, date) connection, cursor, table, df_category, date
)
# update_all_package_stats(cursor, table, date)
update_recent_stats(cursor, date) # update_recent_stats(cursor, date)
return success return success
def update_table(cursor, table, df, date): def update_table(connection, cursor, table, df, date):
"""Update a table.""" """Update a table."""
print(table) print(table)
df = df.fillna("null") df = df.fillna("null")
@@ -153,41 +150,46 @@ def update_table(cursor, table, df, date):
try: try:
cursor.execute(delete_query) cursor.execute(delete_query)
execute_values(cursor, insert_query, values) execute_values(cursor, insert_query, values)
cursor.execute("commit") connection.commit()
return True return True
except psycopg2.IntegrityError as e: except psycopg2.IntegrityError as e:
cursor.execute("rollback") connection.rollback()
return False return False
def update_all_package_stats(cursor, table, date): def update_all_package_stats(date, env="dev"):
"""Update stats for __all__ packages.""" """Update stats for __all__ packages."""
print("__all__") print("__all__")
aggregate_query = \ connection, cursor = get_connection_cursor(env)
f"""SELECT date, '__all__' AS package, category, sum(downloads) AS downloads
FROM {table} GROUP BY date, category"""
cursor.execute(aggregate_query, (table,))
values = cursor.fetchall()
delete_query = \ for table in PSQL_TABLES:
f"""DELETE FROM {table} aggregate_query = \
WHERE date = '{date}' and package = '__all__'""" f"""SELECT date, '__all__' AS package, category, sum(downloads) AS downloads
insert_query = \ FROM {table} GROUP BY date, category"""
f"""INSERT INTO {table} (date, package, category, downloads) cursor.execute(aggregate_query, (table,))
VALUES %s""" values = cursor.fetchall()
try:
cursor.execute(delete_query) delete_query = \
execute_values(cursor, insert_query, values) f"""DELETE FROM {table}
cursor.execute("commit") WHERE date = '{date}' and package = '__all__'"""
return True insert_query = \
except psycopg2.IntegrityError as e: f"""INSERT INTO {table} (date, package, category, downloads)
cursor.execute("rollback") VALUES %s"""
return False try:
cursor.execute(delete_query)
execute_values(cursor, insert_query, values)
connection.commit()
return True
except psycopg2.IntegrityError as e:
connection.rollback()
return False
def update_recent_stats(cursor, date): def update_recent_stats(date, env="dev"):
"""Update daily, weekly, monthly stats for all packages.""" """Update daily, weekly, monthly stats for all packages."""
print("recent") print("recent")
connection, cursor = get_connection_cursor(env)
downloads_table = "overall" downloads_table = "overall"
recent_table = "recent" recent_table = "recent"
@@ -220,13 +222,27 @@ def update_recent_stats(cursor, date):
try: try:
cursor.execute(delete_query) cursor.execute(delete_query)
execute_values(cursor, insert_query, values) execute_values(cursor, insert_query, values)
cursor.execute("commit") connection.commit()
success[time] = True success[time] = True
except psycopg2.IntegrityError as e: except psycopg2.IntegrityError as e:
cursor.execute("rollback") connection.rollback()
success[time] = False success[time] = False
def get_connection_cursor(env):
"""Get a db connection cursor."""
connection = psycopg2.connect(
dbname=postgresql[env]['dbname'],
user=postgresql[env]['username'],
password=postgresql[env]['password'],
host=postgresql[env]['host'],
port=postgresql[env]['port'],
# sslmode='require',
)
cursor = connection.cursor()
return connection, cursor
def get_query(date): def get_query(date):
"""Get the query to execute against pypistats on bigquery.""" """Get the query to execute against pypistats on bigquery."""
return f""" return f"""

View File

@@ -7,7 +7,7 @@
PyPI Stats provides a simple JSON API for retrieving aggregate download stats and time series for packages. The following are the valid endpoints using host: PyPI Stats provides a simple JSON API for retrieving aggregate download stats and time series for packages. The following are the valid endpoints using host:
<code>https://pypistats.org/</code> <code>https://pypistats.org/</code>
</p> </p>
<h3>NOTES</h3> <h2>NOTES</h2>
<p> <p>
<ul> <ul>
<li>All download stats exclude known mirrors (such as <li>All download stats exclude known mirrors (such as
@@ -18,7 +18,9 @@
</p> </p>
<h2>Endpoints</h2> <h2>Endpoints</h2>
<h3>/api/&lt;package&gt;/recent</h3> <h3>/api/&lt;package&gt;/recent</h3>
<p>Retrieve the aggregate download quantities for the last day/week/month. Query arguments: <p>Retrieve the aggregate download quantities for the last day/week/month.
</p>
<p>Query arguments:
<ul> <ul>
<li> <li>
<b>period</b> <b>period</b>
@@ -27,7 +29,7 @@
or or
<code>week</code> <code>week</code>
or or
<code>month</code> <code>month</code>. If omitted returns all values.
</li> </li>
</ul> </ul>
Example response: Example response:
@@ -42,25 +44,18 @@
}</code></pre> }</code></pre>
</p> </p>
<h3>/api/&lt;package&gt;/overall</h3> <h3>/api/&lt;package&gt;/overall</h3>
<p>Retrieve the aggregate daily download time series with or without mirror downloads. Query arguments: <p>Retrieve the aggregate daily download time series with or without mirror downloads.
</p>
<p>Query arguments:
<ul> <ul>
<li> <li>
<b>mirrors</b> <b>mirrors</b>
(optional): (optional):
<code>true</code> <code>true</code>
or or
<code>false</code> <code>false</code>. If omitted returns both series data.
</li>
<li>
<b>start_date</b>
(optional): starting date of time series in format
<code>YYYY-MM-DD</code>
</li>
<li>
<b>end_date</b>
(optional): ending date of time series in format
<code>YYYY-MM-DD</code>
</li> </li>
<!-- <li> <b>start_date</b> (optional): starting date of time series in format <code>YYYY-MM-DD</code> </li> <li> <b>end_date</b> (optional): ending date of time series in format <code>YYYY-MM-DD</code> </li> -->
</ul> </ul>
Example response: Example response:
<pre><code>{ <pre><code>{
@@ -81,25 +76,19 @@
}</code></pre> }</code></pre>
</p> </p>
<h3>/api/&lt;package&gt;/python_major</h3> <h3>/api/&lt;package&gt;/python_major</h3>
<p>Retrieve the aggregate daily download time series by Python major version number. Query arguments: <p>Retrieve the aggregate daily download time series by Python major version number.
</p>
<p>Query arguments:
<ul> <ul>
<li> <li>
<b>version</b> <b>version</b>
(optional): the Python major version number, e.g. (optional): the Python major version number, e.g.
<code>2</code> <code>2</code>
or or
<code>3</code> <code>3</code>. If omitted returns all series data (including
</li> <code>null</code>).
<li>
<b>start_date</b>
(optional): starting date of time series in format
<code>YYYY-MM-DD</code>
</li>
<li>
<b>end_date</b>
(optional): ending date of time series in format
<code>YYYY-MM-DD</code>
</li> </li>
<!-- <li> <b>start_date</b> (optional): starting date of time series in format <code>YYYY-MM-DD</code> </li> <li> <b>end_date</b> (optional): ending date of time series in format <code>YYYY-MM-DD</code> </li> -->
</ul> </ul>
Example response: Example response:
<pre><code>{ <pre><code>{
@@ -125,25 +114,19 @@
}</code></pre> }</code></pre>
</p> </p>
<h3>/api/&lt;package&gt;/python_minor</h3> <h3>/api/&lt;package&gt;/python_minor</h3>
<p>Retrieve the aggregate daily download time series by Python minor version number. Query arguments: <p>Retrieve the aggregate daily download time series by Python minor version number.
</p>
<p>Query arguments:
<ul> <ul>
<li> <li>
<b>version</b> <b>version</b>
(optional): the Python major version number, e.g. (optional): the Python major version number, e.g.
<code>2.7</code> <code>2.7</code>
or or
<code>3.6</code> <code>3.6</code>. If omitted returns all series data (including
</li> <code>null</code>).
<li>
<b>start_date</b>
(optional): starting date of time series in format
<code>YYYY-MM-DD</code>
</li>
<li>
<b>end_date</b>
(optional): ending date of time series in format
<code>YYYY-MM-DD</code>
</li> </li>
<!-- <li> <b>start_date</b> (optional): starting date of time series in format <code>YYYY-MM-DD</code> </li> <li> <b>end_date</b> (optional): ending date of time series in format <code>YYYY-MM-DD</code> </li> -->
</ul> </ul>
Example response: Example response:
<pre><code>{ <pre><code>{
@@ -199,26 +182,21 @@
}</code></pre> }</code></pre>
</p> </p>
<h3>/api/&lt;package&gt;/system</h3> <h3>/api/&lt;package&gt;/system</h3>
<p>Retrieve the aggregate daily download time series by operating system. Query arguments: <p>Retrieve the aggregate daily download time series by operating system.
</p>
<p>Query arguments:
<ul> <ul>
<li> <li>
<b>os</b> <b>os</b>
(optional): the operating system name, e.g. (optional): the operating system name, e.g.
<code>windows</code>, <code>windows</code>,
<code>linux</code>, or <code>linux</code>,
<code>darwin</code> <code>darwin</code>
(Mac OSX). or
</li> <code>other</code>. If omitted returns all series data (including
<li> <code>null</code>).
<b>start_date</b>
(optional): starting date of time series in format
<code>YYYY-MM-DD</code>
</li>
<li>
<b>end_date</b>
(optional): ending date of time series in format
<code>YYYY-MM-DD</code>
</li> </li>
<!-- <li> <b>start_date</b> (optional): starting date of time series in format <code>YYYY-MM-DD</code> </li> <li> <b>end_date</b> (optional): ending date of time series in format <code>YYYY-MM-DD</code> </li> -->
</ul> </ul>
Example response: Example response:
<pre><code>{ <pre><code>{

View File

@@ -3,14 +3,22 @@
{% block body %} {% block body %}
<h1>Analytics for PyPI packages</h1> <h1>Analytics for PyPI packages</h1>
<hr> <hr>
<p>Search for a python package on PyPI.</p>
<form method="POST" action="/"> <form method="POST" action="/">
{{ form.csrf_token }} {{ form.csrf_token }}
{{ form.name.label }} {{ form.name.label }}
{{ form.name(size=24) }} {{ form.name(size=24) }}
<input type="submit" value="Search"> <input type="submit" value="Search">
</form> </form>
<br>
{% if not search %}
<p>Search among
<b>{{ "{:,.0f}".format(package_count) }}</b>
python packages from PyPI (updated daily).</p>
{% else %}
Search results:
{% endif %}
{% if search %} {% if search %}
<br>
{% include "results.html" %} {% include "results.html" %}
{% endif %} {% endif %}
{% endblock %} {% endblock %}

View File

@@ -48,22 +48,24 @@
<h1>PyPI Stats</h1> <h1>PyPI Stats</h1>
<p> <p>
<a href="{{ url_for('general.index') }}">Home</a> <a href="{{ url_for('general.index') }}">Search</a>
<br> <br>
<br> <br>
<a href="{{ url_for('general.package', package='__all__') }}">__all__</a> <a href="{{ url_for('general.package', package='__all__') }}">All packages</a>
<br> <br>
<a href="{{ url_for('general.top') }}">__top__</a> <a href="{{ url_for('general.top') }}">Top packages</a>
<br> <br>
<br> <br>
{% if user %} {% if user %}
<a href="{{ url_for('user.user')}}">{{ user.username }}'s Packages</a> <a href="{{ url_for('user.user')}}">{{ user.username }}'s packages</a>
{% else %} {% else %}
<a href="{{ url_for('user.user')}}">My Packages</a> <a href="{{ url_for('user.user')}}">Track packages</a>
{% endif %} {% endif %}
<br> <br>
<br> <br>
<a href="{{ url_for('user.logout') }}">Logout</a> {% if user %}
<a href="{{ url_for('user.logout') }}">Logout</a>
{% endif %}
</p> </p>
</header> </header>

View File

@@ -23,30 +23,48 @@
<br> <br>
{% else %} {% else %}
<p> <p>
<a href="{{ metadata['info']['package_url']}}">PyPI page</a> {% if metadata %}
<br> <a href="{{ metadata['info']['package_url']}}">PyPI page</a>
<a href="{{ metadata['info']['home_page']}}">Home page</a> <br>
<br> <a href="{{ metadata['info']['home_page']}}">Home page</a>
Author: <br>
{{metadata['info']['author']}} Author:
<br> {{metadata['info']['author']}}
License: <br>
{{metadata['info']['license']}} License:
<br> {% if metadata['info']['license'] | length > 200 %}
Summary:
{{metadata['info']['summary']}} {{metadata['info']['license'][:200]}}...
<br> {% else %}
Latest version: {{metadata['info']['license']}}
{{metadata['info']['version']}} {% endif %}
<br><br> <br>
Downloads last day: Summary :
{{"{:,.0f}".format(recent['day'])}} {{metadata['info']['summary']}}
<br> <br>
Downloads last week: Latest version :
{{"{:,.0f}".format(recent['week'])}} {{metadata['info']['version']}}
<br> <br>
Downloads last month: {% if metadata['requires'] %}
{{"{:,.0f}".format(recent['month'])}} Requires:
{% for required in metadata['requires'] %}
<a href="{{ url_for('general.package', package=required.lower())}}">{{required.lower()}}</a>
{% if not loop.last %}|{% endif %}
{% endfor %}
{% endif %}
<br><br>
Downloads last day:
{{"{:,.0f}".format(recent['day'])}}
<br>
Downloads last week:
{{"{:,.0f}".format(recent['week'])}}
<br>
Downloads last month:
{{"{:,.0f}".format(recent['month'])}}
{% else %}
No metadata found.
{%endif%}
</p>
{% endif %} {% endif %}
<script> <script>

View File

@@ -3,7 +3,7 @@
{% block body %} {% block body %}
{% if user %} {% if user %}
<h1><img src="{{ user.avatar_url }}" height="30" width="30"> <h1><img src="{{ user.avatar_url }}" height="25" width="25">
{{ user.username }}'s Packages</h1> {{ user.username }}'s Packages</h1>
<hr> <hr>
<p>Currently saved packages.</p> <p>Currently saved packages.</p>

View File

@@ -1,7 +1,9 @@
"""General pages.""" """General pages."""
from copy import deepcopy from copy import deepcopy
import os import os
import re
from flask import abort
from flask import Blueprint from flask import Blueprint
from flask import current_app from flask import current_app
from flask import g from flask import g
@@ -19,7 +21,7 @@ from pypistats.models.download import PythonMinorDownloadCount
from pypistats.models.download import RECENT_CATEGORIES from pypistats.models.download import RECENT_CATEGORIES
from pypistats.models.download import RecentDownloadCount from pypistats.models.download import RecentDownloadCount
from pypistats.models.download import SystemDownloadCount from pypistats.models.download import SystemDownloadCount
from sqlalchemy import distinct
blueprint = Blueprint("general", __name__, template_folder="templates") blueprint = Blueprint("general", __name__, template_folder="templates")
@@ -45,7 +47,15 @@ def index():
if form.validate_on_submit(): if form.validate_on_submit():
package = form.name.data package = form.name.data
return redirect(f"/search/{package}") return redirect(f"/search/{package}")
return render_template("index.html", form=form, user=g.user) package_count = \
RecentDownloadCount.query.filter_by(category="month").count()
print(package_count)
return render_template(
"index.html",
form=form,
user=g.user,
package_count=package_count
)
@blueprint.route("/search/<package>", methods=("GET", "POST")) @blueprint.route("/search/<package>", methods=("GET", "POST"))
@@ -75,12 +85,31 @@ def about():
@blueprint.route("/package/<package>") @blueprint.route("/package/<package>")
def package(package): def package(package):
"""Render the package page.""" """Render the package page."""
# Recent download stats
recent_downloads = RecentDownloadCount.query.\
filter_by(package=package).all()
if len(recent_downloads) == 0:
abort(404)
recent = {r: 0 for r in RECENT_CATEGORIES}
for r in recent_downloads:
recent[r.category] = r.downloads
# PyPI metadata # PyPI metadata
try: metadata = None
metadata = requests.get( if package != "__all__":
f"https://pypi.python.org/pypi/{package}/json").json() try:
except Exception: metadata = requests.get(
metadata = None f"https://pypi.python.org/pypi/{package}/json",
timeout=5).json()
if metadata["info"].get("requires_dist", None):
metadata["requires"] = []
for required in metadata["info"]["requires_dist"]:
print(package, re.split(r"[^0-9a-zA-Z_.-]+", required))
metadata["requires"].append(
re.split(r"[^0-9a-zA-Z_.-]+", required)[0]
)
except Exception:
pass
# Get data from db # Get data from db
model_data = [] model_data = []
@@ -114,13 +143,6 @@ def package(package):
f"Downloads of {package} package - {model['name'].title().replace('_', ' ')}" # noqa f"Downloads of {package} package - {model['name'].title().replace('_', ' ')}" # noqa
plots.append(plot) plots.append(plot)
# Recent download stats
recent_downloads = RecentDownloadCount.query.\
filter_by(package=package).all()
recent = {r: 0 for r in RECENT_CATEGORIES}
for r in recent_downloads:
recent[r.category] = r.downloads
return render_template( return render_template(
"package.html", "package.html",
package=package, package=package,

View File

@@ -102,11 +102,12 @@ def user_package(package):
if g.user: if g.user:
# Ensure package is valid. # Ensure package is valid.
downloads = RecentDownloadCount.query.filter_by(package=package).all() downloads = RecentDownloadCount.query.filter_by(package=package).all()
if downloads is None:
return abort(400)
# Handle add/remove to favorites # Handle add/remove to favorites
if g.user.favorites is None: if g.user.favorites is None:
# Ensure package is valid before adding
if len(downloads) == 0:
return abort(400)
g.user.favorites = [package] g.user.favorites = [package]
g.user.update() g.user.update()
return redirect(url_for("user.user")) return redirect(url_for("user.user"))
@@ -121,6 +122,9 @@ def user_package(package):
return redirect(url_for("user.user")) return redirect(url_for("user.user"))
else: else:
if len(g.user.favorites) < MAX_FAVORITES: if len(g.user.favorites) < MAX_FAVORITES:
# Ensure package is valid before adding
if len(downloads) == 0:
return abort(400)
favorites = g.user.favorites favorites = g.user.favorites
favorites.append(package) favorites.append(package)
favorites = sorted(favorites) favorites = sorted(favorites)