diff --git a/pypistats/run.py b/pypistats/run.py
index 09513cd..bf71088 100644
--- a/pypistats/run.py
+++ b/pypistats/run.py
@@ -9,9 +9,10 @@ from pypistats.models.user import User
from pypistats.settings import configs
+# change this for migrations
env = os.environ.get("ENV", "dev")
-# change this for migrations
+
app = create_app(configs[env])
diff --git a/pypistats/settings.py b/pypistats/settings.py
index 4fef6dc..814c88b 100644
--- a/pypistats/settings.py
+++ b/pypistats/settings.py
@@ -3,15 +3,13 @@ import json
import os
-# Load env vars
-ENV = os.environ.get("ENV", None)
-
-# If none then load dev locally.
-if ENV is None:
+# For local use.
+def load_env_vars(env="dev"):
+ """Load environment variables."""
local_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
"secret",
- "env_vars_dev.json")
+ f"env_vars_{env}.json")
for key, value in json.load(open(local_path, 'r')).items():
os.environ[key] = value
@@ -36,7 +34,6 @@ class Config(object):
GITHUB_CLIENT_SECRET = os.environ.get("GITHUB_CLIENT_SECRET")
PROJECT_ROOT = os.path.abspath(os.path.join(APP_DIR, os.pardir))
SECRET_KEY = os.environ.get("PYPISTATS_SECRET", "secret-key")
- SQLALCHEMY_DATABASE_URI = get_db_uri(ENV)
SQLALCHEMY_TRACK_MODIFICATIONS = False
@@ -45,6 +42,9 @@ class ProdConfig(Config):
DEBUG = False
ENV = "prod"
+ if os.environ.get("ENV", None) is None:
+ load_env_vars(ENV)
+ SQLALCHEMY_DATABASE_URI = get_db_uri(ENV)
class DevConfig(Config):
@@ -52,6 +52,9 @@ class DevConfig(Config):
DEBUG = True
ENV = "dev"
+ if os.environ.get("ENV", None) is None:
+ load_env_vars(ENV)
+ SQLALCHEMY_DATABASE_URI = get_db_uri(ENV)
class TestConfig(Config):
@@ -59,6 +62,9 @@ class TestConfig(Config):
DEBUG = True
ENV = "dev"
+ if os.environ.get("ENV", None) is None:
+ load_env_vars(ENV)
+ SQLALCHEMY_DATABASE_URI = get_db_uri(ENV)
TESTING = True
WTF_CSRF_ENABLED = False # Allows form testing
diff --git a/pypistats/tasks/db.py b/pypistats/tasks/db.py
deleted file mode 100644
index 4859d5a..0000000
--- a/pypistats/tasks/db.py
+++ /dev/null
@@ -1,53 +0,0 @@
-"""Database tasks."""
-import psycopg2
-from sqlalchemy import create_engine
-from sqlalchemy.exc import ProgrammingError
-
-# from pypistats.extensions import db
-from pypistats.secret import postgresql
-
-
-DBNAME = "pypistats"
-
-
-def create_databases():
- """Create the databases for each environment."""
- env = "prod"
- url = \
- "postgresql://{username}:{password}@{host}:{port}/{dbname}".format(
- username=postgresql[env]["username"],
- password=postgresql[env]["password"],
- host=postgresql[env]["host"],
- port=postgresql[env]["port"],
- dbname=DBNAME,
- )
- engine = create_engine(url)
- connection = engine.connect()
-
- for env, config in postgresql.items():
- query = f"""CREATE DATABASE {config["dbname"]}"""
- try:
- connection.execute("commit")
- connection.execute(query)
- connection.execute("commit")
- print(f"Created db: {config['dbname']}.")
- except ProgrammingError:
- print(f"Database {config['dbname']} already exists.")
-
-
-def get_db_connection(env="dev"):
- """Get a db connection cursor."""
- connection = psycopg2.connect(
- dbname=postgresql[env]['dbname'],
- user=postgresql[env]['username'],
- password=postgresql[env]['password'],
- host=postgresql[env]['host'],
- port=postgresql[env]['port'],
- # sslmode='require',
- )
- cursor = connection.cursor()
- return cursor
-
-
-if __name__ == "__main__":
- create_databases()
diff --git a/pypistats/tasks/pypi.py b/pypistats/tasks/pypi.py
index fe9d272..55d317f 100644
--- a/pypistats/tasks/pypi.py
+++ b/pypistats/tasks/pypi.py
@@ -12,25 +12,16 @@ import psycopg2
from psycopg2.extras import execute_values
-# Load env vars
-ENV = os.environ.get("ENV", None)
-
-# If none then load dev locally.
-if ENV is None:
+# For local use.
+def load_env_vars(env="dev"):
+ """Load environment variables."""
local_path = os.path.join(
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
"secret",
- "env_vars_dev.json")
+ f"env_vars_{env}.json")
for key, value in json.load(open(local_path, 'r')).items():
os.environ[key] = value
-# # OLD: FOR LOCAL EXECUTION
-# os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = \
-# os.path.join(
-# os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
-# "secret",
-# "secret.json",
-# )
# Mirrors to disregard when considering downloads
MIRRORS = ("bandersnatch", "z3c.pypimirror", "Artifactory", "devpi")
@@ -78,15 +69,19 @@ def get_google_credentials():
return credentials
-def get_daily_download_stats(date, env="dev"):
+def get_daily_download_stats(env="dev", date="None"):
"""Get daily download stats for pypi packages from BigQuery."""
start = time.time()
+ if os.environ.get("ENV", None) is None:
+ load_env_vars(env)
job_config = bigquery.QueryJobConfig()
credentials = get_google_credentials()
bq_client = bigquery.Client(
project=os.environ["GOOGLE_PROJECT_ID"],
credentials=credentials
)
+ if date == "None":
+ date = str(datetime.date.today() - datetime.timedelta(days=1))
# # Prepare a reference to the new dataset
# dataset_ref = bq_client.dataset(DATASET_ID)
@@ -130,6 +125,7 @@ def get_daily_download_stats(date, env="dev"):
query_job = bq_client.query(query, job_config=job_config)
iterator = query_job.result()
rows = list(iterator)
+ print(len(rows), "rows from gbq")
data = []
for row in rows:
@@ -149,7 +145,8 @@ def get_daily_download_stats(date, env="dev"):
"downloads",
])
- df.to_csv("ignore/sample_data.csv")
+ # # For local testing
+ # df.to_csv("ignore/sample_data.csv")
results = update_db(df, env)
print("Elapsed: " + str(time.time() - start))
@@ -158,6 +155,8 @@ def get_daily_download_stats(date, env="dev"):
def update_db(df, env="dev"):
"""Update the db with new data by table."""
+ if os.environ.get("ENV", None) is None:
+ load_env_vars(env)
connection, cursor = get_connection_cursor(env)
df_groups = df.groupby("category_label")
@@ -194,7 +193,9 @@ def update_table(connection, cursor, table, df, date):
VALUES %s"""
values = list(df.itertuples(index=False, name=None))
try:
+ print(delete_query)
cursor.execute(delete_query)
+ print(insert_query)
execute_values(cursor, insert_query, values)
connection.commit()
return True
@@ -203,10 +204,16 @@ def update_table(connection, cursor, table, df, date):
return False
-def update_all_package_stats(date, env="dev"):
+def update_all_package_stats(env="dev", date="None"):
"""Update stats for __all__ packages."""
print("__all__")
start = time.time()
+
+ if date == "None":
+ date = str(datetime.date.today() - datetime.timedelta(days=1))
+
+ if os.environ.get("ENV", None) is None:
+ load_env_vars(env)
connection, cursor = get_connection_cursor(env)
success = {}
@@ -224,7 +231,9 @@ def update_all_package_stats(date, env="dev"):
f"""INSERT INTO {table} (date, package, category, downloads)
VALUES %s"""
try:
+ print(delete_query)
cursor.execute(delete_query)
+ print(insert_query)
execute_values(cursor, insert_query, values)
connection.commit()
success[table] = True
@@ -236,10 +245,16 @@ def update_all_package_stats(date, env="dev"):
return success
-def update_recent_stats(date, env="dev"):
+def update_recent_stats(env="dev", date="None"):
"""Update daily, weekly, monthly stats for all packages."""
print("recent")
start = time.time()
+
+ if date == "None":
+ date = str(datetime.date.today() - datetime.timedelta(days=1))
+
+ if os.environ.get("ENV", None) is None:
+ load_env_vars(env)
connection, cursor = get_connection_cursor(env)
downloads_table = "overall"
@@ -272,7 +287,9 @@ def update_recent_stats(date, env="dev"):
f"""INSERT INTO {recent_table}
(package, category, downloads) VALUES %s"""
try:
+ print(delete_query)
cursor.execute(delete_query)
+ print(insert_query)
execute_values(cursor, insert_query, values)
connection.commit()
success[period] = True
@@ -298,10 +315,16 @@ def get_connection_cursor(env):
return connection, cursor
-def purge_old_data(date, env="dev", age=MAX_RECORD_AGE):
+def purge_old_data(env="dev", age=MAX_RECORD_AGE, date="None"):
"""Purge old data records."""
print("Purge")
start = time.time()
+
+ if date == "None":
+ date = str(datetime.date.today() - datetime.timedelta(days=1))
+
+ if os.environ.get("ENV", None) is None:
+ load_env_vars(env)
connection, cursor = get_connection_cursor(env)
date = datetime.datetime.strptime(date, '%Y-%m-%d')
@@ -312,6 +335,7 @@ def purge_old_data(date, env="dev", age=MAX_RECORD_AGE):
for table in PSQL_TABLES:
delete_query = f"""DELETE FROM {table} where date < '{purge_date}'"""
try:
+ print(delete_query)
cursor.execute(delete_query)
connection.commit()
success[table] = True
@@ -415,8 +439,9 @@ def get_query(date):
if __name__ == "__main__":
- date = "2018-02-09"
- env = "dev"
- # print(get_daily_download_stats(date, env))
- print(update_all_package_stats(date, env))
- # print(update_recent_stats(date, env))
+ date = "2018-04-16"
+ env = "prod"
+ print(date, env)
+ print(get_daily_download_stats(env, date))
+ print(update_all_package_stats(env, date))
+ print(update_recent_stats(env, date))
diff --git a/pypistats/templates/layout.html b/pypistats/templates/layout.html
index 148d22c..7c1c779 100644
--- a/pypistats/templates/layout.html
+++ b/pypistats/templates/layout.html
@@ -12,30 +12,6 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-