diff --git a/dev/config.yml b/dev/config.yml index 6a1fa59eb6c7..93603504eb13 100644 --- a/dev/config.yml +++ b/dev/config.yml @@ -5,7 +5,6 @@ site: database: url: "postgresql://localhost/warehouse" - download_statistics_url: "postgresql://localhost/warehouse" redis: url: "redis://localhost:6379/0" diff --git a/docs/configuration.rst b/docs/configuration.rst index 63a19ddb94d8..10722768292b 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -84,17 +84,6 @@ database.url The URL for the primary database. This must be a PostgreSQL 9.3+ database and must be in the form of ``postgresql://hostname[:port]/databasename``. -database.download_statistics_url -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -:Type: URL -:Default: ``None`` -:Required: Yes -:Description: - The URL for the download statistics database. This must be a PostgreSQL - 9.3+ database and must be in the form of - ``postgresql://hostname[:port]/databasename``. - redis.url ~~~~~~~~~ @@ -160,7 +149,6 @@ Example Configuration database: url: "postgresql://localhost/warehouse" - download_statistics_url: "postgresql://localhost/warehouse" redis: url: "redis://localhost:6379/0" diff --git a/setup.py b/setup.py index 7110ddc87607..ef172d4f19d7 100644 --- a/setup.py +++ b/setup.py @@ -65,7 +65,6 @@ def recursive_glob(path, pattern, cutdirs=0): }, install_requires=[ - "alchimia", "alembic", "arrow", "babel", diff --git a/tests/conftest.py b/tests/conftest.py index fba9ed831dc2..af7cae310634 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -79,7 +79,6 @@ def _check_name(engine, name): override={ "database": { "url": database_url, - "download_statistics_url": database_url, }, "search": {"hosts": []}, }, diff --git a/tests/test_download_statistics.py b/tests/test_download_statistics.py deleted file mode 100644 index 55fbbecfaf24..000000000000 --- a/tests/test_download_statistics.py +++ /dev/null @@ -1,587 +0,0 @@ -# Copyright 2013 Donald Stufft -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import ( - absolute_import, division, print_function, unicode_literals -) - -import datetime -from collections import namedtuple - -import alchimia - -import pretend - -import pytest - -from sqlalchemy import create_engine -from sqlalchemy.sql import func - -from twisted.internet.defer import succeed -from twisted.python.failure import Failure - -from warehouse.download_statistics import tables -from warehouse.download_statistics.cli import ( - TwistedCommand, FastlySyslogProtocol, FastlySyslogProtocolFactory, - process_logs_main -) -from warehouse.download_statistics.helpers import ( - ParsedUserAgent, ParsedLogLine, parse_useragent, parse_log_line, - compute_version, compute_distribution_type -) -from warehouse.download_statistics.models import DownloadStatisticsModels - - -FakeDownload = namedtuple("FakeDownload", [ - "package_name", - "package_version", - "distribution_type", - "python_type", - "python_release", - "python_version", - "installer_type", - "installer_version", - "operating_system", - "operating_system_version", - "download_time", - "raw_user_agent", -]) - - -class FakeDownloadStatisticsModels(object): - def __init__(self): - self.downloads = [] - - def create_download(self, package_name, package_version, distribution_type, - python_type, python_release, python_version, - installer_type, installer_version, operating_system, - operating_system_version, download_time, - raw_user_agent): - self.downloads.append(FakeDownload( - package_name=package_name, - package_version=package_version, - distribution_type=distribution_type, - python_type=python_type, - python_release=python_release, - python_version=python_version, - installer_type=installer_type, - installer_version=installer_version, - operating_system=operating_system, - operating_system_version=operating_system_version, - download_time=download_time, - raw_user_agent=raw_user_agent, - )) - - -class FakeThreaderedReactor(object): - def getThreadPool(self): - return FakeThreadPool() - - def callFromThread(self, f, *args, **kwargs): - return f(*args, **kwargs) - - -class FakeThreadPool(object): - def callInThreadWithCallback(self, cb, f, *args, **kwargs): - try: - result = f(*args, **kwargs) - except Exception as e: - cb(False, Failure(e)) - else: - cb(True, result) - - -class TestParsing(object): - @pytest.mark.parametrize(("ua", "expected"), [ - ( - "Python-urllib/2.7 setuptools/2.0", - ParsedUserAgent( - python_version="2.7", - python_release=None, - python_type=None, - - installer_type="setuptools", - installer_version="2.0", - - operating_system=None, - operating_system_version=None, - raw_user_agent="Python-urllib/2.7 setuptools/2.0", - ) - ), - ( - "Python-urllib/2.6 distribute/0.6.10", - ParsedUserAgent( - python_version="2.6", - python_release=None, - python_type=None, - - installer_type="distribute", - installer_version="0.6.10", - - operating_system=None, - operating_system_version=None, - raw_user_agent="Python-urllib/2.6 distribute/0.6.10", - ) - ), - ( - "Python-urllib/2.7", - ParsedUserAgent( - python_version="2.7", - python_release=None, - python_type=None, - - installer_type="pip", - installer_version=None, - - operating_system=None, - operating_system_version=None, - raw_user_agent="Python-urllib/2.7", - ) - ), - ( - "pip/1.4.1 CPython/2.7.6 Darwin/12.5.0", - ParsedUserAgent( - python_version="2.7.6", - python_release=None, - python_type="cpython", - - installer_type="pip", - installer_version="1.4.1", - - operating_system="Darwin", - operating_system_version="12.5.0", - raw_user_agent="pip/1.4.1 CPython/2.7.6 Darwin/12.5.0", - ) - ), - ( - "pip/1.5rc1 PyPy/2.2.1 Linux/2.6.32-042stab061.2", - ParsedUserAgent( - python_version="2.7.3", - python_release="2.2.1", - python_type="pypy", - - installer_type="pip", - installer_version="1.5rc1", - - operating_system="Linux", - operating_system_version="2.6.32-042stab061.2", - raw_user_agent=( - "pip/1.5rc1 PyPy/2.2.1 Linux/2.6.32-042stab061.2" - ), - ) - ), - ( - "pip/1.4.1 CPython/2.7.3 CYGWIN_NT-6.1-WOW64/1.7.25(0.270/5/3)", - ParsedUserAgent( - python_version="2.7.3", - python_release=None, - python_type="cpython", - - installer_type="pip", - installer_version="1.4.1", - - operating_system="CYGWIN_NT-6.1-WOW64", - operating_system_version="1.7.25(0.270/5/3)", - raw_user_agent=( - "pip/1.4.1 CPython/2.7.3 " - "CYGWIN_NT-6.1-WOW64/1.7.25(0.270/5/3)" - ), - ) - ), - ( - ("bandersnatch/1.1 (CPython 2.7.3-final0, " - "Linux 3.8.0-31-generic x86_64)"), - ParsedUserAgent( - python_version="2.7.3-final0", - python_release=None, - python_type="cpython", - - installer_type="bandersnatch", - installer_version="1.1", - - operating_system="Linux", - operating_system_version="3.8.0-31-generic x86_64", - raw_user_agent=( - "bandersnatch/1.1 (CPython 2.7.3-final0, " - "Linux 3.8.0-31-generic x86_64)" - ), - ) - ), - ( - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8)", - ParsedUserAgent( - python_version=None, - python_release=None, - python_type=None, - - installer_type="browser", - installer_version=None, - - operating_system=None, - operating_system_version=None, - raw_user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8)" - ) - ), - ( - "BlackBerry9700/5.0.0.743", - ParsedUserAgent( - python_version=None, - python_release=None, - python_type=None, - - installer_type="browser", - installer_version=None, - - operating_system=None, - operating_system_version=None, - raw_user_agent="BlackBerry9700/5.0.0.743", - ) - ), - ( - "z3c.pypimirror/1.0.16", - ParsedUserAgent( - python_version=None, - python_release=None, - python_type=None, - - installer_type="z3c.pypimirror", - installer_version="1.0.16", - - operating_system=None, - operating_system_version=None, - raw_user_agent="z3c.pypimirror/1.0.16", - ) - ), - ( - "pep381client/1.5", - ParsedUserAgent( - python_version=None, - python_release=None, - python_type=None, - - installer_type="pep381client", - installer_version="1.5", - - operating_system=None, - operating_system_version=None, - raw_user_agent="pep381client/1.5", - ) - ), - ( - "devpi-server/1.2.1 (py2.7.3rc2; linux2)", - ParsedUserAgent( - python_version="2.7.3rc2", - python_release=None, - python_type=None, - - installer_type="devpi", - installer_version="1.2.1", - - operating_system=None, - operating_system_version="linux2", - raw_user_agent="devpi-server/1.2.1 (py2.7.3rc2; linux2)", - ) - ), - ( - "Go 1.1 package http", - ParsedUserAgent( - python_version=None, - python_release=None, - python_type=None, - - installer_type=None, - installer_version=None, - - operating_system=None, - operating_system_version=None, - raw_user_agent="Go 1.1 package http", - ) - ), - ( - "errant nonsense here", - ParsedUserAgent( - python_version=None, - python_release=None, - python_type=None, - - installer_type=None, - installer_version=None, - - operating_system=None, - operating_system_version=None, - raw_user_agent="errant nonsense here", - ) - ) - ]) - def test_parse_useragent(self, ua, expected): - assert parse_useragent(ua) == expected - - def test_parse_log_line(self): - line = ( - '2013-12-08T23:24:40Z cache-c31 pypi-cdn[18322]: 199.182.120.6 ' - '"Sun, 08 Dec 2013 23:24:40 GMT" "-" "GET ' - '/packages/source/I/INITools/INITools-0.2.tar.gz" HTTP/1.1 200 ' - '16930 156751 HIT 326 "(null)" "(null)" "pip/1.5rc1 PyPy/2.2.1 ' - 'Linux/2.6.32-042stab061.2"\n' - ) - assert parse_log_line(line) == ParsedLogLine( - package_name="INITools", - package_version="0.2", - distribution_type="sdist", - download_time=datetime.datetime(2013, 12, 8, 23, 24, 40), - user_agent=ParsedUserAgent( - python_version="2.7.3", - python_release="2.2.1", - python_type="pypy", - installer_type="pip", - installer_version="1.5rc1", - operating_system="Linux", - operating_system_version="2.6.32-042stab061.2", - raw_user_agent=( - "pip/1.5rc1 PyPy/2.2.1 Linux/2.6.32-042stab061.2" - ), - ) - ) - - line = ( - '2013-12-08T23:27:24Z cache-c31 pypi-cdn[11386]: 199.182.120.6 ' - '"Sun, 08 Dec 2013 23:27:24 GMT" "-" ' - '"GET /packages/2.7/w/wheel/wheel-0.22.0-py2.py3-none-any.whl" ' - 'HTTP/1.1 200 54823 329778 HIT 42 "(null)" "(null)" ' - '"pip/1.5rc1 PyPy/2.2.1 Linux/2.6.32-042stab061.2"' - ) - assert parse_log_line(line) == ParsedLogLine( - package_name="wheel", - package_version="0.22.0", - distribution_type="bdist_wheel", - download_time=datetime.datetime(2013, 12, 8, 23, 27, 24), - user_agent=ParsedUserAgent( - python_version="2.7.3", - python_release="2.2.1", - python_type="pypy", - installer_type="pip", - installer_version="1.5rc1", - operating_system="Linux", - operating_system_version="2.6.32-042stab061.2", - raw_user_agent=( - "pip/1.5rc1 PyPy/2.2.1 Linux/2.6.32-042stab061.2" - ), - ) - ) - - def test_parse_log_line_not_download(self): - # The URL path doesn't point at a package download - line = ( - '2013-12-08T23:24:34Z cache-v43 pypi-cdn[18322]: 162.243.117.93 ' - '"Sun, 08 Dec 2013 23:24:33 GMT" "-" "GET /simple/icalendar/3.5" ' - 'HTTP/1.1 200 0 0 MISS 0 "(null)" "(null)" "Python-urllib/2.7"' - ) - assert parse_log_line(line) is None - - line = ( - '2013-12-08T23:24:46Z cache-fra1232 pypi-cdn[7902]: 193.183.99.5 ' - '"Sun, 08 Dec 2013 23:20:28 GMT" "-" "GET ' - '/packages/source/p/pymongo/" HTTP/1.1 200 9944 33573 HIT 1 ' - '"(null)" "en" "Lynx/2.8.8dev.9 libwww-FM/2.14 SSL-MM/1.4.1 ' - 'GNUTLS/2.12.14"' - ) - assert parse_log_line(line) is None - - line = ( - '2013-12-08T23:25:04Z cache-ty68 pypi-cdn[18322]: 1.72.6.148 ' - '"Sun, 08 Dec 2013 23:25:03 GMT" "-" ' - '"GET /packages/source/P/PyMySQL/PyMySQL-0.6.1.tar.gzwget" ' - 'HTTP/1.0 301 0 0 MISS 0 "(null)" "(null)" ' - '"Wget/1.12 (solaris2.11)"' - ) - assert parse_log_line(line) is None - - line = ( - '2013-12-08T23:24:35.150361+00:00 cache-c32 pypi-cdn[11386]: last ' - 'message repeated 2 times' - ) - assert parse_log_line(line) is None - - def test_parse_log_line_non_ascii(self): - line = ( - b'2013-12-08T23:24:34Z cache-v43 pypi-cdn[18322]: 162.243.117.93 ' - b'"Sun, 08 Dec 2013 23:24:33 GMT" "-" "GET /simple/icalendar/3.5" ' - b'HTTP/1.1 200 0 0 MISS 0 "(null)" "(\xff)" "Python-urllib/2.7"' - ) - assert parse_log_line(line) is None - - def test_parse_log_line_bad_quoting(self): - line = ( - '2013-12-08T23:27:24Z cache-c31 pypi-cdn[11386]: 199.182.120.6 ' - '"Sun, 08 Dec 2013 23:27:24 GMT" "-" ' - '"GET /packages/2.7/" /wheel/wheel-0.22.0-py2.py3-none-any.whl" ' - 'HTTP/1.1 200 54823 329778 HIT 42 "(null)" "(null)" ' - '"pip/1.5rc1 PyPy/2.2.1 Linux/2.6.32-042stab061.2"' - ) - assert parse_log_line(line) is None - - @pytest.mark.parametrize(("filename", "expected"), [ - ("INITools-0.2.tar.gz", "0.2"), - ("wheel-0.22.0-py2.py3-none-any.whl", "0.22.0"), - ("Twisted-12.0.0.win32-py2.7.msi", None), - ]) - def test_compute_version(self, filename, expected): - assert compute_version(filename) == expected - - @pytest.mark.parametrize(("filename", "expected"), [ - ("foo.tar.gz", "sdist"), - ("foo.tar.bz2", "sdist"), - ("foo.tgz", "sdist"), - ("foo.zip", "sdist"), - ("foo.whl", "bdist_wheel"), - ("foo.egg", "bdist_egg"), - ("foo.exe", "bdist_wininst"), - ("foo.msi", "bdist_msi"), - ("foo.dmg", "bdist_dmg"), - ("foo.rpm", "bdist_rpm"), - ("foo", None) - ]) - def test_compute_distribution_type(self, filename, expected): - assert compute_distribution_type(filename) == expected - - -class TestModels(object): - def test_create_download(self, _database_url): - tw_engine = create_engine( - _database_url, - strategy=alchimia.TWISTED_STRATEGY, - reactor=FakeThreaderedReactor() - ) - models = DownloadStatisticsModels(tw_engine) - models.create_download( - package_name="foo", - package_version="1.0", - distribution_type="sdist", - python_type="cpython", - python_release=None, - python_version="2.7", - installer_type="pip", - installer_version="1.4", - operating_system=None, - operating_system_version=None, - download_time=datetime.datetime.utcnow(), - raw_user_agent="foo", - ) - - engine = create_engine(_database_url) - res = engine.execute(func.count(tables.downloads.c.id)) - assert res.scalar() == 1 - - -class TestFastlySyslog(object): - - def test_connection_lost(self): - deferred = pretend.stub(callback=pretend.call_recorder(lambda x: None)) - protocol = FastlySyslogProtocol(pretend.stub(), deferred) - protocol.connectionLost(None) - - assert deferred.callback.calls == [pretend.call(None)] - - def test_handle_line(self): - line = ( - '2013-12-08T23:24:40Z cache-c31 pypi-cdn[18322]: 199.182.120.6 ' - '"Sun, 08 Dec 2013 23:24:40 GMT" "-" "GET ' - '/packages/source/I/INITools/INITools-0.2.tar.gz" HTTP/1.1 200 ' - '16930 156751 HIT 326 "(null)" "(null)" "pip/1.5rc1 PyPy/2.2.1 ' - 'Linux/2.6.32-042stab061.2"\n' - ) - - models = FakeDownloadStatisticsModels() - protocol = FastlySyslogProtocol(models, None) - protocol.handle_line(line) - - assert models.downloads == [ - FakeDownload( - package_name="INITools", - package_version="0.2", - distribution_type="sdist", - download_time=datetime.datetime(2013, 12, 8, 23, 24, 40), - python_version="2.7.3", - python_release="2.2.1", - python_type="pypy", - installer_type="pip", - installer_version="1.5rc1", - operating_system="Linux", - operating_system_version="2.6.32-042stab061.2", - raw_user_agent=( - "pip/1.5rc1 PyPy/2.2.1 Linux/2.6.32-042stab061.2" - ), - ) - ] - - def test_handle_line_not_download(self): - # The URL path doesn't point at a package download - line = ( - '2013-12-08T23:24:34Z cache-v43 pypi-cdn[18322]: 162.243.117.93 ' - '"Sun, 08 Dec 2013 23:24:33 GMT" "-" "GET /simple/icalendar/3.5" ' - 'HTTP/1.1 301 0 0 MISS 0 "(null)" "(null)" "Python-urllib/2.7"' - ) - models = FakeDownloadStatisticsModels() - protocol = FastlySyslogProtocol(models, None) - protocol.handle_line(line) - - assert models.downloads == [] - - def test_lineReceived_error(self): - line = ( - '2013-12-08T23:24:40Z cache-c31 pypi-cdn[18322]: 199.182.120.6 ' - '"Sun, 08 Dec 2013 23:24:40 GMT" "-" "GET ' - '/packages/source/I/INITools/INITools-0.2.tar.gz" HTTP/1.1 200 ' - '16930 156751 HIT 326 "(null)" "(null)" "pip/1.5rc1 PyPy/2.2.1 ' - 'Linux/2.6.32-042stab061.2"\n' - ) - - models = pretend.stub(create_download=pretend.raiser(ValueError)) - protocol = FastlySyslogProtocol(models, None) - protocol.lineReceived(line) - # No exception was raised - - def test_factory_buildProtocol(self): - engine = pretend.stub() - factory = FastlySyslogProtocolFactory(engine, None) - protocol = factory.buildProtocol(None) - assert protocol._models._engine is engine - - def test_main(self, _database_url): - app = pretend.stub( - config=pretend.stub( - database=pretend.stub( - download_statistics_url=_database_url - ) - ) - ) - fake_reactor = pretend.stub() - process_logs_main(fake_reactor, app) - - def test_twisted_command(self): - @pretend.call_recorder - def main(reactor, app): - return succeed(None) - - app = pretend.stub() - reactor = pretend.stub( - addSystemEventTrigger=lambda when, event, f, *args, **kwargs: None, - run=lambda: None, - ) - command = TwistedCommand(main, reactor=reactor) - with pytest.raises(SystemExit) as exc_info: - command(app) - assert exc_info.value.code == 0 - - assert main.calls == [pretend.call(reactor, app)] diff --git a/warehouse/application.py b/warehouse/application.py index d715efae8c04..a46bd635831a 100644 --- a/warehouse/application.py +++ b/warehouse/application.py @@ -52,7 +52,6 @@ # Register the SQLAlchemy tables by importing them import warehouse.accounts.tables -import warehouse.download_statistics.tables import warehouse.packaging.tables # Get the various models diff --git a/warehouse/cli.py b/warehouse/cli.py index 253b77b64616..fe85648139b5 100644 --- a/warehouse/cli.py +++ b/warehouse/cli.py @@ -17,7 +17,6 @@ import werkzeug.serving import warehouse -import warehouse.download_statistics.cli import warehouse.migrations.cli import warehouse.search.cli @@ -66,5 +65,4 @@ def create_parser(self, parser): "migrate": warehouse.migrations.cli.__commands__, "search": warehouse.search.cli.__commands__, "serve": ServeCommand(), - "download-statistics": warehouse.download_statistics.cli.__commands__, } diff --git a/warehouse/download_statistics/__init__.py b/warehouse/download_statistics/__init__.py deleted file mode 100644 index 8be02df20cfb..000000000000 --- a/warehouse/download_statistics/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright 2013 Donald Stufft -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import ( - absolute_import, division, print_function, unicode_literals -) diff --git a/warehouse/download_statistics/cli.py b/warehouse/download_statistics/cli.py deleted file mode 100644 index e9e14b9c9f9d..000000000000 --- a/warehouse/download_statistics/cli.py +++ /dev/null @@ -1,117 +0,0 @@ -# Copyright 2013 Donald Stufft -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import ( - absolute_import, division, print_function, unicode_literals -) - -import json -import logging - -import alchimia - -import sqlalchemy - -from twisted.internet.defer import Deferred -from twisted.internet.endpoints import StandardIOEndpoint -from twisted.internet.protocol import Factory -from twisted.internet.task import react -from twisted.protocols.basic import LineOnlyReceiver - -from warehouse.download_statistics.helpers import parse_log_line -from warehouse.download_statistics.models import DownloadStatisticsModels - - -logger = logging.getLogger(__name__) - - -class FastlySyslogProtocol(LineOnlyReceiver): - delimiter = b"\n" - - def __init__(self, models, finished): - self._models = models - self._finished = finished - - def connectionLost(self, reason): - self._finished.callback(None) - - def lineReceived(self, line): - try: - self.handle_line(line) - except Exception: - logger.exception(json.dumps({ - "event": "download_statistics.lineReceived.exception", - "line": repr(line) - })) - - def handle_line(self, line): - parsed = parse_log_line(line) - if parsed is None: - return - - ua = parsed.user_agent - self._models.create_download( - package_name=parsed.package_name, - package_version=parsed.package_version, - distribution_type=parsed.distribution_type, - python_type=ua.python_type, - python_release=ua.python_release, - python_version=ua.python_version, - installer_type=ua.installer_type, - installer_version=ua.installer_version, - operating_system=ua.operating_system, - operating_system_version=ua.operating_system_version, - download_time=parsed.download_time, - raw_user_agent=ua.raw_user_agent, - ) - - -class FastlySyslogProtocolFactory(Factory): - def __init__(self, engine, finished): - self._engine = engine - self._finished = finished - - def buildProtocol(self, addr): - return FastlySyslogProtocol( - DownloadStatisticsModels(self._engine), - self._finished, - ) - - -class TwistedCommand(object): - def __init__(self, main_func, reactor=None): - self._main_func = main_func - self._reactor = reactor - - def __call__(self, app): - react(self._main_func, [app], _reactor=self._reactor) - - -def process_logs_main(reactor, app): - finished = Deferred() - - download_statistic_engine = sqlalchemy.create_engine( - app.config.database.download_statistics_url, - strategy=alchimia.TWISTED_STRATEGY, - reactor=reactor - ) - endpoint = StandardIOEndpoint(reactor) - endpoint.listen( - FastlySyslogProtocolFactory(download_statistic_engine, finished), - ) - return finished - - -__commands__ = { - "process-logs": TwistedCommand(process_logs_main), -} diff --git a/warehouse/download_statistics/helpers.py b/warehouse/download_statistics/helpers.py deleted file mode 100644 index 25e8850fd80e..000000000000 --- a/warehouse/download_statistics/helpers.py +++ /dev/null @@ -1,238 +0,0 @@ -# Copyright 2013 Donald Stufft -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import ( - absolute_import, division, print_function, unicode_literals -) - -import csv -import datetime -import json -import logging -import posixpath -import re -import urlparse - -from collections import namedtuple -from email.utils import parsedate - -from setuptools.package_index import distros_for_url - - -logger = logging.getLogger(__name__) - - -ParsedUserAgent = namedtuple("ParsedUserAgent", [ - "raw_user_agent", - - "python_type", - "python_release", - "python_version", - - "installer_type", - "installer_version", - - "operating_system", - "operating_system_version", -]) -ParsedLogLine = namedtuple("ParsedLogLine", [ - "package_name", - "package_version", - "distribution_type", - "download_time", - "user_agent", -]) - -PYTHON_IMPL_RELEASE_TO_VERSION = { - ("pypy", "2.2.1"): "2.7.3", - ("pypy", "2.2.0"): "2.7.3", - ("pypy", "2.1.0"): "2.7.3", -} - -BANDERSNATCH_RE = re.compile(r""" -\((?P.*?)\ (?P.*?), -\ (?P.*?)\ (?P.*?)\) -""", re.VERBOSE) - -DEVPI_RE = re.compile(r""" -\(py(?P.*?);\ (?P.*?)\) -""", re.VERBOSE) - -IGNORED_UAS = re.compile(r""" -(^Go\ .*?\ package\ http) | -(^Wget/) | -(^curl/) | -(^python-requests/) | -(^Homebrew) | -(^Chef\ Client/) | -(^fetch\ libfetch/) | -(^MacPorts) | -(^\(null\)$) -""", re.VERBOSE) - -WHEEL_RE = re.compile(r""" -^(?P(?P.+?)(-(?P\d.+?))?) -((-(?P\d.*?))?-(?P.+?)-(?P.+?)-(?P.+?) -\.whl|\.dist-info)$ -""", re.VERBOSE) - - -def parse_useragent(ua): - python_type = None - python_version = None - python_release = None - installer_type = None - installer_version = None - operating_system = None - operating_system_version = None - - if ua.startswith("pip/"): - pip_part, python_part, system_part = ua.split(" ") - installer_type, installer_version = pip_part.split("/") - python_type, python_release = python_part.split("/") - operating_system, operating_system_version = system_part.split("/", 1) - elif "setuptools/" in ua or "distribute/" in ua: - urllib_part, installer_part = ua.split(" ") - _, python_version = urllib_part.split("/") - installer_type, installer_version = installer_part.split("/") - elif ua.startswith("Python-urllib"): - _, python_version = ua.split("/") - # Probably, technically it could just be a random urllib user - installer_type = "pip" - elif ua.startswith("bandersnatch"): - bander_part, rest = ua.split(" ", 1) - installer_type, installer_version = bander_part.split("/") - match = BANDERSNATCH_RE.match(rest) - python_type = match.group("python_type") - python_version = match.group("python_version") - operating_system = match.group("operating_system") - operating_system_version = match.group("operating_system_version") - elif ua.startswith("devpi-server"): - devpi_part, rest = ua.split(" ", 1) - _, installer_version = devpi_part.split("/") - installer_type = "devpi" - match = DEVPI_RE.match(rest) - python_version = match.group("python_version") - operating_system_version = match.group("operating_system_version") - elif ua.startswith(("z3c.pypimirror", "pep381client")): - installer_type, installer_version = ua.split("/") - elif "Mozilla" in ua or ua.startswith(("BlackBerry", "Opera")): - installer_type = "browser" - else: - if not IGNORED_UAS.search(ua): - logger.info(json.dumps({ - "event": "download_statitics.parse_useragent.ignore", - "user_agent": ua, - })) - - if python_type is not None: - python_type = python_type.lower() - if python_type == "cpython" and python_release is not None: - python_version = python_release - python_release = None - if python_version is None: - python_version = PYTHON_IMPL_RELEASE_TO_VERSION.get( - (python_type, python_release) - ) - - return ParsedUserAgent( - python_type=python_type, - python_release=python_release, - python_version=python_version, - - installer_type=installer_type, - installer_version=installer_version, - - operating_system=operating_system, - operating_system_version=operating_system_version, - - raw_user_agent=ua, - ) - - -def parse_log_line(line): - # Some weird syslog/fastly thing, just ignore it - if b"last message repeated" in line: - return - - row = list(csv.reader([line], delimiter=str(" ")))[0] - timestamp = row[4] - req = row[6] - response_status = row[8] - ua = row[15] - - try: - if int(response_status) != 200: - return - except ValueError: - # Broken log lines cause this - return - - path = urlparse.urlparse(req.split(" ", 1)[1]).path - - if not path.startswith("/packages/"): - return - - download_time = datetime.datetime(*parsedate(timestamp)[:6]) - directory, filename = posixpath.split(path) - - if not filename or filename.endswith(".asc"): - return - - project = posixpath.basename(directory) - return ParsedLogLine( - package_name=project, - package_version=compute_version(filename), - distribution_type=compute_distribution_type(filename), - download_time=download_time, - user_agent=parse_useragent(ua) - ) - - -def compute_version(filename): - match = WHEEL_RE.match(filename) - if match: - return match.group("ver") - try: - distro = next(distros_for_url(filename)) - except StopIteration: - logger.info({ - "event": "download_statitics.compute_version.ignore", - "filename": filename - }) - return None - else: - return distro.version - - -def compute_distribution_type(filename): - if filename.endswith((".tar.gz", ".tar.bz2", ".tgz", ".zip")): - return "sdist" - elif filename.endswith(".egg"): - return "bdist_egg" - elif filename.endswith(".exe"): - return "bdist_wininst" - elif filename.endswith(".whl"): - return "bdist_wheel" - elif filename.endswith(".msi"): - return "bdist_msi" - elif filename.endswith(".dmg"): - return "bdist_dmg" - elif filename.endswith(".rpm"): - return "bdist_rpm" - else: - logger.info(json.dumps({ - "event": "download_statitics.compute_distribution_type.ignore", - "filename": filename - })) - return None diff --git a/warehouse/download_statistics/models.py b/warehouse/download_statistics/models.py deleted file mode 100644 index 620f4421ab3f..000000000000 --- a/warehouse/download_statistics/models.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright 2013 Donald Stufft -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -class DownloadStatisticsModels(object): - def __init__(self, engine): - self._engine = engine - - def create_download(self, package_name, package_version, distribution_type, - python_type, python_release, python_version, - installer_type, installer_version, operating_system, - operating_system_version, download_time, - raw_user_agent): - - from warehouse.download_statistics.tables import downloads - - return self._engine.execute(downloads.insert().values( - package_name=package_name, - package_version=package_version, - distribution_type=distribution_type, - python_type=python_type, - python_release=python_release, - python_version=python_version, - installer_type=installer_type, - installer_version=installer_version, - operating_system=operating_system, - operating_system_version=operating_system_version, - download_time=download_time, - raw_user_agent=raw_user_agent, - )) diff --git a/warehouse/download_statistics/tables.py b/warehouse/download_statistics/tables.py deleted file mode 100644 index 3f1847e48462..000000000000 --- a/warehouse/download_statistics/tables.py +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright 2013 Donald Stufft -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import ( - absolute_import, division, print_function, unicode_literals -) - -from sqlalchemy import ( - Table, Column, UnicodeText, Text, Enum, DateTime -) -from sqlalchemy.dialects.postgresql import UUID -from sqlalchemy.sql import func - -from warehouse import db - - -downloads = Table( - "downloads", db.metadata, - Column( - "id", - UUID(), - primary_key=True, - nullable=False, - server_default=func.uuid_generate_v4() - ), - - Column("package_name", UnicodeText(), nullable=False), - Column("package_version", UnicodeText()), - Column( - "distribution_type", - Enum("sdist", "wheel", "exe", "egg", "msi", name="distribution_type") - ), - - Column( - "python_type", - Enum("cpython", "pypy", "jython", "ironpython", name="python_type") - ), - Column("python_release", Text()), - Column("python_version", Text()), - - Column( - "installer_type", - Enum( - "browser", - "pip", - "setuptools", - "distribute", - "bandersnatch", - "z3c.pypimirror", - "pep381client", - "devpi", - name="installer_type" - ) - ), - Column("installer_version", Text()), - - Column("operating_system", Text()), - Column("operating_system_version", Text()), - - Column("download_time", DateTime(), nullable=False), - Column("raw_user_agent", Text(), nullable=False), -) diff --git a/warehouse/migrations/versions/8f38eea7678_remove_the_download_statistics.py b/warehouse/migrations/versions/8f38eea7678_remove_the_download_statistics.py new file mode 100644 index 000000000000..327c85805e21 --- /dev/null +++ b/warehouse/migrations/versions/8f38eea7678_remove_the_download_statistics.py @@ -0,0 +1,148 @@ +# Copyright 2013 Donald Stufft +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Remove the download statistics + +Revision ID: 8f38eea7678 +Revises: 4c8b2dd27587 +Create Date: 2014-03-02 21:19:24.642402 +""" +from __future__ import absolute_import, division, print_function + +# revision identifiers, used by Alembic. +revision = '8f38eea7678' +down_revision = '4c8b2dd27587' + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + + +def upgrade(): + op.drop_table('downloads') + op.execute("DROP TYPE distribution_type") + op.execute("DROP TYPE python_type") + op.execute("DROP TYPE installer_type") + + +def downgrade(): + op.create_table( + 'downloads', + sa.Column( + 'id', + postgresql.UUID(), + server_default=sa.text('uuid_generate_v4()'), + autoincrement=False, + nullable=False, + ), + sa.Column( + 'package_name', + sa.TEXT(), + autoincrement=False, + nullable=False, + ), + sa.Column( + 'package_version', + sa.TEXT(), + autoincrement=False, + nullable=True, + ), + sa.Column( + 'distribution_type', + postgresql.ENUM( + u'bdist_dmg', + u'bdist_dumb', + u'bdist_egg', + u'bdist_msi', + u'bdist_rpm', + u'bdist_wheel', + u'bdist_wininst', + u'sdist', + name='distribution_type', + ), + autoincrement=False, + nullable=True, + ), + sa.Column( + 'python_type', + postgresql.ENUM( + u'cpython', + u'pypy', + u'jython', + u'ironpython', + name='python_type', + ), + autoincrement=False, + nullable=True, + ), + sa.Column( + 'python_release', + sa.TEXT(), + autoincrement=False, + nullable=True, + ), + sa.Column( + 'python_version', + sa.TEXT(), + autoincrement=False, + nullable=True, + ), + sa.Column( + 'installer_type', + postgresql.ENUM( + u'browser', + u'pip', + u'setuptools', + u'distribute', + u'bandersnatch', + u'z3c.pypimirror', + u'pep381client', + u'devpi', + name='installer_type', + ), + autoincrement=False, + nullable=True, + ), + sa.Column( + 'installer_version', + sa.TEXT(), + autoincrement=False, + nullable=True, + ), + sa.Column( + 'operating_system', + sa.TEXT(), + autoincrement=False, + nullable=True, + ), + sa.Column( + 'operating_system_version', + sa.TEXT(), + autoincrement=False, + nullable=True, + ), + sa.Column( + 'download_time', + postgresql.TIMESTAMP(), + autoincrement=False, + nullable=False, + ), + sa.Column( + 'raw_user_agent', + sa.TEXT(), + autoincrement=False, + nullable=False, + ), + sa.PrimaryKeyConstraint('id', name=u'downloads_pkey'), + )