diff --git a/.coveragerc b/.coveragerc index 6e1334bda..c2d1c7c9a 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,6 +1,21 @@ [run] branch = 1 cover_pylib = 0 -omit = celery.utils.debug,celery.tests.*,celery.bin.graph +include=*celery/* +omit = celery.tests.* + [report] -omit = */python?.?/*,*/site-packages/*,*/pypy/* +omit = + */python?.?/* + */site-packages/* + */pypy/* + */celery/bin/graph.py + *celery/bin/logtool.py + *celery/task/base.py + *celery/five.py + *celery/contrib/sphinx.py + *celery/backends/couchdb.py + *celery/backends/couchbase.py + *celery/backends/riak.py + *celery/concurrency/asynpool.py + *celery/utils/debug.py diff --git a/.gitignore b/.gitignore index 0f856d445..70d602b25 100644 --- a/.gitignore +++ b/.gitignore @@ -24,4 +24,4 @@ Documentation/ celery/tests/cover/ .ve* cover/ - +.vagrant/ diff --git a/.travis.yml b/.travis.yml index a30e3602f..26d593ad5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,42 +1,25 @@ language: python -python: 2.7 +sudo: false +cache: false +python: + - '3.5' env: global: PYTHONUNBUFFERED=yes matrix: - - TOXENV=2.6 - - TOXENV=2.7 - - TOXENV=3.3 - - TOXENV=3.4 + - TOXENV=2.7 + - TOXENV=3.4 - TOXENV=pypy -before_install: - - | - if [[ $TOXENV = pypy ]]; then - deactivate - sudo apt-add-repository --yes ppa:pypy/ppa - sudo apt-get update - sudo apt-get install pypy - source ~/virtualenv/pypy/bin/activate - fi - if [[ $TOXENV = 3.4 ]]; then - sudo apt-get update - sudo apt-get install python3.4-dev - source ~/virtualenv/python3.4 - virtualenv ~/virtualenv/python3.4 --python=$(which python3.4) - source ~/virtualenv/python3.4/bin/activate - fi - python --version - uname -a - lsb_release -a -install: - - pip install tox -script: - - tox -v -- -v + - TOXENV=3.5 + - TOXENV=pypy3 +install: travis_retry pip install -U tox +script: tox -v -- -v after_success: - - .tox/$TRAVIS_PYTHON_VERSION/bin/coveralls + - .tox/$TRAVIS_PYTHON_VERSION/bin/coverage xml + - .tox/$TRAVIS_PYTHON_VERSION/bin/codecov -e TOXENV notifications: irc: channels: - "chat.freenode.net#celery" - on_success: always - on_failure: always \ No newline at end of file + on_success: change + on_failure: change diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 6a248b428..cd1948254 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -187,7 +187,7 @@ the developers fix the bug. A bug could be fixed by some other improvements and fixes - it might not have an existing report in the bug tracker. Make sure you're using the latest releases of -celery, billiard and kombu. +celery, billiard, kombu, amqp and vine. 5) **Collect information about the bug.** @@ -246,6 +246,7 @@ issue tracker. * Celery: http://github.com/celery/celery/issues/ * Kombu: http://github.com/celery/kombu/issues * pyamqp: http://github.com/celery/pyamqp/issues +* vine: http://github.com/celery/vine/issues * librabbitmq: http://github.com/celery/librabbitmq/issues * Django-Celery: http://github.com/celery/django-celery/issues @@ -271,7 +272,7 @@ semver: http://semver.org. Stable releases are published at PyPI while development releases are only available in the GitHub git repository as tags. -All version tags starts with "v", so version 0.8.0 is the tag v0.8.0. +All version tags starts with “v”, so version 0.8.0 is the tag v0.8.0. .. _git-branches: @@ -442,12 +443,20 @@ To run the Celery test suite you need to install a few dependencies. A complete list of the dependencies needed are located in ``requirements/test.txt``. -Installing the test requirements: +If you're working on the development version, then you need to +install the development requirements first: +:: + + $ pip install -U -r requirements/dev.txt + +Both the stable and the development version have testing related +dependencies, so install these next: :: $ pip install -U -r requirements/test.txt + $ pip install -U -r requirements/default.txt -When installation of dependencies is complete you can execute +After installing the dependencies required, you can now execute the test suite by calling ``nosetests``: :: @@ -538,7 +547,7 @@ If you only want to test specific Python versions use the ``-e`` option: :: - $ tox -e py26 + $ tox -e 2.7 Building the documentation -------------------------- @@ -580,13 +589,13 @@ To ensure that your changes conform to PEP8 and to run pyflakes execute: :: - $ paver flake8 + $ make flakecheck -To not return a negative exit code when this command fails use the -``-E`` option, this can be convenient while developing: +To not return a negative exit code when this command fails use +the ``flakes`` target instead: :: - $ paver flake8 -E + $ make flakes§ API reference ~~~~~~~~~~~~~ @@ -595,8 +604,8 @@ To make sure that all modules have a corresponding section in the API reference please execute: :: - $ paver autodoc - $ paver verifyindex + $ make apicheck + $ make indexcheck If files are missing you can add them by copying an existing reference file. @@ -619,7 +628,7 @@ Edit the file using your favorite editor: $ vim celery.worker.awesome.rst - # change every occurance of ``celery.schedules`` to + # change every occurrence of ``celery.schedules`` to # ``celery.worker.awesome`` @@ -812,7 +821,7 @@ that require 3rd party libraries must be added. :: $ pip install -U requirements/pkgutils.txt - $ paver readme + $ make readme That's all that needs to be done, but remember that if your feature @@ -887,6 +896,7 @@ celery :git: https://github.com/celery/celery :CI: http://travis-ci.org/#!/celery/celery +:Windows-CI: https://ci.appveyor.com/project/ask/celery :PyPI: http://pypi.python.org/pypi/celery :docs: http://docs.celeryproject.org @@ -897,6 +907,7 @@ Messaging library. :git: https://github.com/celery/kombu :CI: http://travis-ci.org/#!/celery/kombu +:Windows-CI: https://ci.appveyor.com/project/ask/kombu :PyPI: http://pypi.python.org/pypi/kombu :docs: http://kombu.readthedocs.org @@ -907,9 +918,21 @@ Python AMQP 0.9.1 client. :git: https://github.com/celery/py-amqp :CI: http://travis-ci.org/#!/celery/py-amqp +:Windows-CI: https://ci.appveyor.com/project/ask/py-amqp :PyPI: http://pypi.python.org/pypi/amqp :docs: http://amqp.readthedocs.org +vine +---- + +Promise/deferred implementation. + +:git: https://github.com/celery/vine/ +:CI: http://travis-ci.org/#!/celery/vine/ +:Windows-CI: https://ci.appveyor.com/project/ask/vine +:PyPI: http://pypi.python.org/pypi/vine +:docs: http://vine.readthedocs.org + billiard -------- @@ -917,6 +940,8 @@ Fork of multiprocessing containing improvements that will eventually be merged into the Python stdlib. :git: https://github.com/celery/billiard +:CI: http://travis-ci.org/#!/celery/billiard/ +:Windows-CI: https://ci.appveyor.com/project/ask/billiard :PyPI: http://pypi.python.org/pypi/billiard librabbitmq @@ -1013,11 +1038,11 @@ The version number must be updated two places: After you have changed these files you must render the ``README`` files. There is a script to convert sphinx syntax -to generic reStructured Text syntax, and the paver task `readme` +to generic reStructured Text syntax, and the make target `readme` does this for you: :: - $ paver readme + $ make readme Now commit the changes: :: @@ -1035,10 +1060,9 @@ Releasing Commands to make a new public stable release:: - $ paver releaseok # checks pep8, autodoc index, runs tests and more - $ paver removepyc # Remove .pyc files - $ git clean -xdn # Check that there's no left-over files in the repo - $ python setup.py sdist upload # Upload package to PyPI + $ make distcheck # checks pep8, autodoc index, runs tests and more + $ make dist # NOTE: Runs git clean -xdf and removes files not in the repo. + $ python setup.py sdist bdist_wheel upload # Upload package to PyPI If this is a new release series then you also need to do the following: diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index b11226613..63fc7b60a 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -11,7 +11,7 @@ that everyone must add themselves here, and not be added by others, so it's currently incomplete waiting for everyone to add their names. -The full list of authors can be found in docs/AUTHORS.txt. +The list of authors added before the policy change can be found in docs/AUTHORS.txt. -- @@ -156,5 +156,54 @@ Antoine Legrand, 2014/01/09 Pepijn de Vos, 2014/01/15 Dan McGee, 2014/01/27 Paul Kilgo, 2014/01/28 +Môshe van der Sterre, 2014/01/31 Martin Davidsson, 2014/02/08 Chris Clark, 2014/02/20 +Matthew Duggan, 2014/04/10 +Brian Bouterse, 2014/04/10 +Dmitry Malinovsky, 2014/04/28 +Luke Pomfrey, 2014/05/06 +Alexey Kotlyarov, 2014/05/16 +Ross Deane, 2014/07/11 +Tadej Janež, 2014/08/08 +Akexander Koshelev, 2014/08/19 +Davide Quarta, 2014/08/19 +John Whitlock, 2014/08/19 +Konstantinos Koukopoulos, 2014/08/24 +Albert Yee Wang, 2014/08/29 +Andrea Rabbaglietti, 2014/10/02 +Joe Jevnik, 2014/10/22 +Nathan Van Gheem, 2014/10/28 +Gino Ledesma, 2014/10/28 +Thomas French, 2014/11/10 +Michael Permana, 2014/11/6 +William King, 2014/11/21 +Bert Vanderbauwhede, 2014/12/18 +John Anderson, 2014/12/27 +Luke Burden, 2015/01/24 +Mickaël Penhard, 2015/02/15 +Mark Parncutt, 2015/02/16 +Samuel Jaillet, 2015/03/24 +Ilya Georgievsky, 2015/03/31 +Fatih Sucu, 2015/04/17 +James Pulec, 2015/04/19 +Alexander Lebedev, 2015/04/25 +Frantisek Holop, 2015/05/21 +Feanil Patel, 2015/05/21 +Jocelyn Delalande, 2015/06/03 +Justin Patrin, 2015/08/06 +Juan Rossi, 2015/08/10 +Piotr Maślanka, 2015/08/24 +Gerald Manipon, 2015/10/19 +Krzysztof Bujniewicz, 2015/10/21 +Sukrit Khera, 2015/10/26 +Dave Smith, 2015/10/27 +Dennis Brakhane, 2015/10/30 +Chris Harris, 2015/11/27 +Valentyn Klindukh, 2016/01/15 +Wayne Chang, 2016/01/15 +Mike Attwood, 2016/01/22 +David Harrigan, 2016/02/01 +Ahmet Demir, 2016/02/27 +Maxime Verger, 2016/02/29 +Alexander Oblovatniy, 2016/03/10 diff --git a/Changelog b/Changelog index 9ecc78129..4da5b706f 100644 --- a/Changelog +++ b/Changelog @@ -4,818 +4,17 @@ Change history ================ -This document contains change notes for bugfix releases in the 3.1.x series -(Cipater), please see :ref:`whatsnew-3.1` for an overview of what's -new in Celery 3.1. +This document contains change notes for bugfix releases in +the 4.0.x series (0today8), please see :ref:`whatsnew-4.0` for +an overview of what's new in Celery 4.0. -.. _version-3.1.10: +.. _version-4.0.0: -3.1.10 -====== -:release-date: 2014-XX-XX XX:XX X.X UTC -:release-by: XX - -- **Requirements**: - - - Now depends on :ref:`Kombu 3.0.14 `. - -- **Redis:** Important note about events (Issue #1882). - - There is a new transport option for Redis that enables monitors - to filter out unwanted events. Enabling this option in the workers - will increase performance considerably: - - .. code-block:: python - - BROKER_TRANSPORT_OPTIONS = {'fanout_patterns': True} - - Enabling this option means that your workers will not be able to see - workers with the option disabled (or is running an older version of - Celery), so if you do enable it then make sure you do so on all - nodes. - - See :ref:`redis-caveats-fanout-patterns`. - - This will be the default in Celery 3.2. - -- **Results**: The :class:`@AsyncResult` object now keeps a local cache - of the final state of the task. - - This means that the global result cache can finally be disabled, - and you can do so by setting :setting:`CELERY_MAX_CACHED_RESULTS` to - :const:`-1`. The lifetime of the cache will then be bound to the - lifetime of the result object, and this will be the default behavior - in Celery 3.2. - -- **Events**: The "Substantial drift" warning message is now logged once - per node name only (Issue #1802). - -- **Worker**: Ability to use one log file per child process when using the - prefork pool. - - This can be enabled by using the new ``%i`` and ``%I`` format specifiers - for the log file name. See :ref:`worker-files-process-index`. - -- **Redis**: New experimental chord join implementation. - - This is an optimization for chords when using the Redis result backend, - where the join operation is now considerably faster and using less - resources than the previous strategy. - - The new option can be set in the result backend URL: - - CELERY_RESULT_BACKEND = 'redis://localhost?new_join=1' - - This must be enabled manually as it's incompatible - with workers and clients not using it, so be sure to enable - the option in all clients and workers if you decide to use it. - -- **Multi**: With ``-opt:index`` (e.g. ``-c:1``) the index now always refers - to the position of a node in the argument list. - - This means that referring to a number will work when specifying a list - of node names and not just for a number range: - - .. code-block:: bash - - celery multi start A B C D -c:1 4 -c:2-4 8 - - In this example ``1`` refers to node A (as it's the first node in the - list). - -- **Signals**: The sender argument to ``Signal.connect`` can now be a proxy - object, which means that it can be used with the task decorator - (Issue #1873). - -- **Task**: A regression caused the ``queue`` argument to ``Task.retry`` to be - ignored (Issue #1892). - -- **App**: Fixed error message for :meth:`~@Celery.config_from_envvar`. - - Fix contributed by Dmitry Malinovsky. - -- **Canvas**: Chords can now contain a group of other chords (Issue #1921). - -- **Canvas**: Chords can now be combined when using the amqp result backend - (a chord where the callback is also a chord). - -- **Canvas**: Calling ``result.get()`` for a chain task will now complete - even if one of the tasks in the chain is ``ignore_result=True`` - (Issue #1905). - -- **Canvas**: Worker now also logs chord errors. - -- **Canvas**: A chord task raising an exception will now result in - any errbacks (``link_error``) to the chord callback to also be called. - -- **Task**: Task callbacks and errbacks are now called using the group - primitive. - -- **Task**: ``Task.apply`` now properly sets ``request.headers`` - (Issue #1874). - -- **Worker**: Fixed ``UnicodeEncodeError`` occuring when worker is started - by `supervisord`. - - Fix contributed by Codeb Fan. - -- **Beat**: No longer attempts to upgrade a newly created database file - (Issue #1923). - -- **Beat**: New setting :setting:``CELERYBEAT_SYNC_EVERY`` can be be used - to control file sync by specifying the number of tasks to send between - each sync. - - Contributed by Chris Clark. - -- **Commands**: :program:`celery inspect memdump` no longer crashes - if the :mod:`psutil` module is not installed (Issue #1914). - -- **Worker**: Remote control commands now always accepts json serialized - messages (Issue #1870). - -- **Worker**: Gossip will now drop any task related events it receives - by mistake (Issue #1882). - - -.. _version-3.1.9: - -3.1.9 -===== -:release-date: 2014-02-10 06:43 P.M UTC -:release-by: Ask Solem - -- **Requirements**: - - - Now depends on :ref:`Kombu 3.0.12 `. - -- **Prefork pool**: Better handling of exiting child processes. - - Fix contributed by Ionel Cristian Mărieș. - -- **Prefork pool**: Now makes sure all file descriptors are removed - from the hub when a process is cleaned up. - - Fix contributed by Ionel Cristian Mărieș. - -- **New Sphinx extension**: for autodoc documentation of tasks: - :mod:`celery.contrib.spinx` (Issue #1833). - -- **Django**: Now works with Django 1.7a1. - -- **Task**: Task.backend is now a property that forwards to ``app.backend`` - if no custom backend has been specified for the task (Issue #1821). - -- **Generic init scripts**: Fixed bug in stop command. - - Fix contributed by Rinat Shigapov. - -- **Generic init scripts**: Fixed compatibility with GNU :manpage:`stat`. - - Fix contributed by Paul Kilgo. - -- **Generic init scripts**: Fixed compatibility with the minimal - :program:`dash` shell (Issue #1815). - -- **Commands**: The :program:`celery amqp basic.publish` command was not - working properly. - - Fix contributed by Andrey Voronov. - -- **Commands**: Did no longer emit an error message if the pidfile exists - and the process is still alive (Issue #1855). - -- **Commands**: Better error message for missing arguments to preload - options (Issue #1860). - -- **Commands**: :program:`celery -h` did not work because of a bug in the - argument parser (Issue #1849). - -- **Worker**: Improved error message for message decoding errors. - -- **Time**: Now properly parses the `Z` timezone specifier in ISO 8601 date - strings. - - Fix contributed by Martin Davidsson. - -- **Worker**: Now uses the *negotiated* heartbeat value to calculate - how often to run the heartbeat checks. - -- **Beat**: Fixed problem with beat hanging after the first schedule - iteration (Issue #1822). - - Fix contributed by Roger Hu. - -- **Signals**: The header argument to :signal:`before_task_publish` is now - always a dictionary instance so that signal handlers can add headers. - -- **Worker**: A list of message headers is now included in message related - errors. - -.. _version-3.1.8: - -3.1.8 +4.0.0 ===== -:release-date: 2014-01-17 10:45 P.M UTC -:release-by: Ask Solem - -- **Requirements**: - - - Now depends on :ref:`Kombu 3.0.10 `. - - - Now depends on `billiard 3.3.0.14`_. - -.. _`billiard 3.3.0.14`: - https://github.com/celery/billiard/blob/master/CHANGES.txt - -- **Worker**: The event loop was not properly reinitialized at consumer restart - which would force the worker to continue with a closed ``epoll`` instance on - Linux, resulting in a crash. - -- **Events:** Fixed issue with both heartbeats and task events that could - result in the data not being kept in sorted order. - - As a result this would force the worker to log "heartbeat missed" - events even though the remote node was sending heartbeats in a timely manner. - -- **Results:** The pickle serializer no longer converts group results to tuples, - and will keep the original type (*Issue #1750*). - -- **Results:** ``ResultSet.iterate`` is now pending deprecation. - - The method will be deprecated in version 3.2 and removed in version 3.3. - - Use ``result.get(callback=)`` (or ``result.iter_native()`` where available) - instead. - -- **Worker**\|eventlet/gevent: A regression caused ``Ctrl+C`` to be ineffective - for shutdown. - -- **Redis result backend:** Now using a pipeline to store state changes - for improved performance. - - Contributed by Pepijn de Vos. - -- **Redis result backend:** Will now retry storing the result if disconnected. - -- **Worker**\|gossip: Fixed attribute error occurring when another node leaves. - - Fix contributed by Brodie Rao. - -- **Generic init scripts:** Now runs a check at startup to verify - that any configuration scripts are owned by root and that they - are not world/group writeable. - - The init script configuration is a shell script executed by root, - so this is a preventive measure to ensure that users do not - leave this file vulnerable to changes by unprivileged users. - - .. note:: - - Note that upgrading celery will not update the init scripts, - instead you need to manually copy the improved versions from the - source distribution: - https://github.com/celery/celery/tree/3.1/extra/generic-init.d - -- **Commands**: The :program:`celery purge` command now warns that the operation - will delete all tasks and prompts the user for confirmation. - - A new :option:`-f` was added that can be used to disable - interactive mode. - -- **Task**: ``.retry()`` did not raise the value provided in the ``exc`` argument - when called outside of an error context (*Issue #1755*). - -- **Commands:** The :program:`celery multi` command did not forward command - line configuration to the target workers. - - The change means that multi will forward the special ``--`` argument and - configuration content at the end of the arguments line to the specified - workers. - - Example using command-line configuration to set a broker heartbeat - from :program:`celery multi`: - - .. code-block:: bash - - $ celery multi start 1 -c3 -- broker.heartbeat=30 - - Fix contributed by Antoine Legrand. - -- **Canvas:** ``chain.apply_async()`` now properly forwards execution options. - - Fix contributed by Konstantin Podshumok. - -- **Redis result backend:** Now takes ``connection_pool`` argument that can be - used to change the connection pool class/constructor. - -- **Worker:** Now truncates very long arguments and keyword arguments logged by - the pool at debug severity. - -- **Worker:** The worker now closes all open files on :sig:`SIGHUP` (regression) - (*Issue #1768*). - - Fix contributed by Brodie Rao - -- **Worker:** Will no longer accept remote control commands while the - worker startup phase is incomplete (*Issue #1741*). - -- **Commands:** The output of the event dump utility - (:program:`celery events -d`) can now be piped into other commands. - -- **Documentation:** The RabbitMQ installation instructions for OS X was - updated to use modern homebrew practices. - - Contributed by Jon Chen. - -- **Commands:** The :program:`celery inspect conf` utility now works. - -- **Commands:** The :option:`-no-color` argument was not respected by - all commands (*Issue #1799*). - -- **App:** Fixed rare bug with ``autodiscover_tasks()`` (*Issue #1797*). - -- **Distribution:** The sphinx docs will now always add the parent directory - to path so that the current celery source code is used as a basis for - API documentation (*Issue #1782*). - -- **Documentation:** Supervisord examples contained an extraneous '-' in a - `--logfile` argument example. - - Fix contributed by Mohammad Almeer. - -.. _version-3.1.7: - -3.1.7 -===== -:release-date: 2013-12-17 06:00 P.M UTC -:release-by: Ask Solem - -.. _v317-important: - -Important Notes ---------------- - -Init script security improvements ---------------------------------- - -Where the generic init scripts (for ``celeryd``, and ``celerybeat``) before -delegated the responsibility of dropping privileges to the target application, -it will now use ``su`` instead, so that the Python program is not trusted -with superuser privileges. - -This is not in reaction to any known exploit, but it will -limit the possibility of a privilege escalation bug being abused in the -future. - -You have to upgrade the init scripts manually from this directory: -https://github.com/celery/celery/tree/3.1/extra/generic-init.d - -AMQP result backend -~~~~~~~~~~~~~~~~~~~ - -The 3.1 release accidentally left the amqp backend configured to be -non-persistent by default. - -Upgrading from 3.0 would give a "not equivalent" error when attempting to -set or retrieve results for a task. That is unless you manually set the -persistence setting:: - - CELERY_RESULT_PERSISTENT = True - -This version restores the previous value so if you already forced -the upgrade by removing the existing exchange you must either -keep the configuration by setting ``CELERY_RESULT_PERSISTENT = False`` -or delete the ``celeryresults`` exchange again. - -Synchronous subtasks -~~~~~~~~~~~~~~~~~~~~ - -Tasks waiting for the result of a subtask will now emit -a :exc:`RuntimeWarning` warning when using the prefork pool, -and in 3.2 this will result in an exception being raised. - -It's not legal for tasks to block by waiting for subtasks -as this is likely to lead to resource starvation and eventually -deadlock when using the prefork pool (see also :ref:`task-synchronous-subtasks`). - -If you really know what you are doing you can avoid the warning (and -the future exception being raised) by moving the operation in a whitelist -block: - -.. code-block:: python - - from celery.result import allow_join_result - - @app.task - def misbehaving(): - result = other_task.delay() - with allow_join_result(): - result.get() - -Note also that if you wait for the result of a subtask in any form -when using the prefork pool you must also disable the pool prefetching -behavior with the worker :ref:`-Ofair option `. - -.. _v317-fixes: - -Fixes ------ - -- Now depends on :ref:`Kombu 3.0.8 `. - -- Now depends on :mod:`billiard` 3.3.0.13 - -- Events: Fixed compatibility with non-standard json libraries - that sends float as :class:`decimal.Decimal` (Issue #1731) - -- Events: State worker objects now always defines attributes: - ``active``, ``processed``, ``loadavg``, ``sw_ident``, ``sw_ver`` - and ``sw_sys``. - -- Worker: Now keeps count of the total number of tasks processed, - not just by type (``all_active_count``). - -- Init scripts: Fixed problem with reading configuration file - when the init script is symlinked to a runlevel (e.g. ``S02celeryd``). - (Issue #1740). - - This also removed a rarely used feature where you can symlink the script - to provide alternative configurations. You instead copy the script - and give it a new name, but perhaps a better solution is to provide - arguments to ``CELERYD_OPTS`` to separate them: - - .. code-block:: bash - - CELERYD_NODES="X1 X2 Y1 Y2" - CELERYD_OPTS="-A:X1 x -A:X2 x -A:Y1 y -A:Y2 y" - -- Fallback chord unlock task is now always called after the chord header - (Issue #1700). - - This means that the unlock task will not be started if there's - an error sending the header. - -- Celery command: Fixed problem with arguments for some control commands. - - Fix contributed by Konstantin Podshumok. - -- Fixed bug in ``utcoffset`` where the offset when in DST would be - completely wrong (Issue #1743). - -- Worker: Errors occurring while attempting to serialize the result of a - task will now cause the task to be marked with failure and a - :class:`kombu.exceptions.EncodingError` error. - - Fix contributed by Ionel Cristian Mărieș. - -- Worker with ``-B`` argument did not properly shut down the beat instance. - -- Worker: The ``%n`` and ``%h`` formats are now also supported by the - :option:`--logfile`, :option:`--pidfile` and :option:`--statedb` arguments. - - Example: - - .. code-block:: bash - - $ celery -A proj worker -n foo@%h --logfile=%n.log --statedb=%n.db - -- Redis/Cache result backends: Will now timeout if keys evicted while trying - to join a chord. - -- The fallbock unlock chord task now raises :exc:`Retry` so that the - retry even is properly logged by the worker. - -- Multi: Will no longer apply Eventlet/gevent monkey patches (Issue #1717). - -- Redis result backend: Now supports UNIX sockets. - - Like the Redis broker transport the result backend now also supports - using ``redis+socket:///tmp/redis.sock`` URLs. - - Contributed by Alcides Viamontes Esquivel. - -- Events: Events sent by clients was mistaken for worker related events - (Issue #1714). - - For ``events.State`` the tasks now have a ``Task.client`` attribute - that is set when a ``task-sent`` event is being received. - - Also, a clients logical clock is not in sync with the cluster so - they live in a "time bubble". So for this reason monitors will no - longer attempt to merge with the clock of an event sent by a client, - instead it will fake the value by using the current clock with - a skew of -1. - -- Prefork pool: The method used to find terminated processes was flawed - in that it did not also take into account missing popen objects. - -- Canvas: ``group`` and ``chord`` now works with anon signatures as long - as the group/chord object is associated with an app instance (Issue #1744). - - You can pass the app by using ``group(..., app=app)``. - -.. _version-3.1.6: - -3.1.6 -===== -:release-date: 2013-12-02 06:00 P.M UTC -:release-by: Ask Solem - -- Now depends on :mod:`billiard` 3.3.0.10. - -- Now depends on :ref:`Kombu 3.0.7 `. - -- Fixed problem where Mingle caused the worker to hang at startup - (Issue #1686). - -- Beat: Would attempt to drop privileges twice (Issue #1708). - -- Windows: Fixed error with ``geteuid`` not being available (Issue #1676). - -- Tasks can now provide a list of expected error classes (Issue #1682). - - The list should only include errors that the task is expected to raise - during normal operation:: - - @task(throws=(KeyError, HttpNotFound)) - - What happens when an exceptions is raised depends on the type of error: - - - Expected errors (included in ``Task.throws``) - - Will be logged using severity ``INFO``, and traceback is excluded. - - - Unexpected errors - - Will be logged using severity ``ERROR``, with traceback included. - -- Cache result backend now compatible with Python 3 (Issue #1697). - -- CentOS init script: Now compatible with sys-v style init symlinks. - - Fix contributed by Jonathan Jordan. - -- Events: Fixed problem when task name is not defined (Issue #1710). - - Fix contributed by Mher Movsisyan. - -- Task: Fixed unbound local errors (Issue #1684). - - Fix contributed by Markus Ullmann. - -- Canvas: Now unrolls groups with only one task (optimization) (Issue #1656). - -- Task: Fixed problem with eta and timezones. - - Fix contributed by Alexander Koval. - -- Django: Worker now performs model validation (Issue #1681). - -- Task decorator now emits less confusing errors when used with - incorrect arguments (Issue #1692). - -- Task: New method ``Task.send_event`` can be used to send custom events - to Flower and other monitors. - -- Fixed a compatibility issue with non-abstract task classes - -- Events from clients now uses new node name format (``gen@``). - -- Fixed rare bug with Callable not being defined at interpreter shutdown - (Issue #1678). - - Fix contributed by Nick Johnson. - -- Fixed Python 2.6 compatibility (Issue #1679). - -.. _version-3.1.5: - -3.1.5 -===== -:release-date: 2013-11-21 06:20 P.M UTC -:release-by: Ask Solem - -- Now depends on :ref:`Kombu 3.0.6 `. - -- Now depends on :mod:`billiard` 3.3.0.8 - -- App: ``config_from_object`` is now lazy (Issue #1665). - -- App: ``autodiscover_tasks`` is now lazy. - - Django users should now wrap access to the settings object - in a lambda:: - - app.autodiscover_tasks(lambda: settings.INSTALLED_APPS) - - this ensures that the settings object is not prepared - prematurely. - -- Fixed regression for ``--app`` argument experienced by - some users (Issue #1653). - -- Worker: Now respects the ``--uid`` and ``--gid`` arguments - even if ``--detach`` is not enabled. - -- Beat: Now respects the ``--uid`` and ``--gid`` arguments - even if ``--detach`` is not enabled. - -- Python 3: Fixed unorderable error occuring with the worker ``-B`` - argument enabled. - -- ``celery.VERSION`` is now a named tuple. - -- ``maybe_signature(list)`` is now applied recursively (Issue #1645). - -- ``celery shell`` command: Fixed ``IPython.frontend`` deprecation warning. - -- The default app no longer includes the builtin fixups. - - This fixes a bug where ``celery multi`` would attempt - to load the Django settings module before entering - the target working directory. - -- The Django daemonization tutorial was changed. - - Users no longer have to explicitly export ``DJANGO_SETTINGS_MODULE`` - in :file:`/etc/default/celeryd` when the new project layout is used. - -- Redis result backend: expiry value can now be 0 (Issue #1661). - -- Censoring settings now accounts for non-string keys (Issue #1663). - -- App: New ``autofinalize`` option. - - Apps are automatically finalized when the task registry is accessed. - You can now disable this behavior so that an exception is raised - instead. - - Example: - - .. code-block:: python - - app = Celery(autofinalize=False) - - # raises RuntimeError - tasks = app.tasks - - @app.task - def add(x, y): - return x + y - - # raises RuntimeError - add.delay(2, 2) - - app.finalize() - # no longer raises: - tasks = app.tasks - add.delay(2, 2) - -- The worker did not send monitoring events during shutdown. - -- Worker: Mingle and gossip is now automatically disabled when - used with an unsupported transport (Issue #1664). - -- ``celery`` command: Preload options now supports - the rare ``--opt value`` format (Issue #1668). - -- ``celery`` command: Accidentally removed options - appearing before the subcommand, these are now moved to the end - instead. - -- Worker now properly responds to ``inspect stats`` commands - even if received before startup is complete (Issue #1659). - -- :signal:`task_postrun` is now sent within a finally block, to make - sure the signal is always sent. - -- Beat: Fixed syntax error in string formatting. - - Contributed by nadad. - -- Fixed typos in the documentation. - - Fixes contributed by Loic Bistuer, sunfinite. - -- Nested chains now works properly when constructed using the - ``chain`` type instead of the ``|`` operator (Issue #1656). - -.. _version-3.1.4: - -3.1.4 -===== -:release-date: 2013-11-15 11:40 P.M UTC -:release-by: Ask Solem - -- Now depends on :ref:`Kombu 3.0.5 `. - -- Now depends on :mod:`billiard` 3.3.0.7 - -- Worker accidentally set a default socket timeout of 5 seconds. - -- Django: Fixup now sets the default app so that threads will use - the same app instance (e.g. for manage.py runserver). - -- Worker: Fixed Unicode error crash at startup experienced by some users. - -- Calling ``.apply_async`` on an empty chain now works again (Issue #1650). - -- The ``celery multi show`` command now generates the same arguments - as the start command does. - -- The ``--app`` argument could end up using a module object instead - of an app instance (with a resulting crash). - -- Fixed a syntax error problem in the celerybeat init script. - - Fix contributed by Vsevolod. - -- Tests now passing on PyPy 2.1 and 2.2. - -.. _version-3.1.3: - -3.1.3 -===== -:release-date: 2013-11-13 00:55 A.M UTC -:release-by: Ask Solem - -- Fixed compatibility problem with Python 2.7.0 - 2.7.5 (Issue #1637) - - ``unpack_from`` started supporting ``memoryview`` arguments - in Python 2.7.6. - -- Worker: :option:`-B` argument accidentally closed files used - for logging. - -- Task decorated tasks now keep their docstring (Issue #1636) - -.. _version-3.1.2: - -3.1.2 -===== -:release-date: 2013-11-12 08:00 P.M UTC -:release-by: Ask Solem - -- Now depends on :mod:`billiard` 3.3.0.6 - -- No longer needs the billiard C extension to be installed. - -- The worker silently ignored task errors. - -- Django: Fixed ``ImproperlyConfigured`` error raised - when no database backend specified. - - Fix contributed by j0hnsmith - -- Prefork pool: Now using ``_multiprocessing.read`` with ``memoryview`` - if available. - -- ``close_open_fds`` now uses ``os.closerange`` if available. - -- ``get_fdmax`` now takes value from ``sysconfig`` if possible. - -.. _version-3.1.1: - -3.1.1 -===== -:release-date: 2013-11-11 06:30 P.M UTC -:release-by: Ask Solem - -- Now depends on :mod:`billiard` 3.3.0.4. - -- Python 3: Fixed compatibility issues. - -- Windows: Accidentally showed warning that the billiard C extension - was not installed (Issue #1630). - -- Django: Tutorial updated with a solution that sets a default - :envvar:`DJANGO_SETTINGS_MODULE` so that it doesn't have to be typed - in with the :program:`celery` command. - - Also fixed typos in the tutorial, and added the settings - required to use the Django database backend. - - Thanks to Chris Ward, orarbel. - -- Django: Fixed a problem when using the Django settings in Django 1.6. - -- Django: Fixup should not be applied if the django loader is active. - -- Worker: Fixed attribute error for ``human_write_stats`` when using the - compatibility prefork pool implementation. - -- Worker: Fixed compatibility with billiard without C extension. - -- Inspect.conf: Now supports a ``with_defaults`` argument. - -- Group.restore: The backend argument was not respected. - -.. _version-3.1.0: - -3.1.0 -======= -:release-date: 2013-11-09 11:00 P.M UTC -:release-by: Ask Solem +:release-date: TBA +:status: *FROZEN* +:branch: master +:release-by: -See :ref:`whatsnew-3.1`. +See :ref:`whatsnew-4.0`. diff --git a/LICENSE b/LICENSE index fc1793197..06221a278 100644 --- a/LICENSE +++ b/LICENSE @@ -1,5 +1,6 @@ -Copyright (c) 2009, 2010, 2011, 2012 Ask Solem, and individual contributors. All Rights Reserved. -Copyright (c) 2012-2013 GoPivotal, Inc. All rights reserved. +Copyright (c) 2015-2016 Ask Solem & contributors. All rights reserved. +Copyright (c) 2012-2014 GoPivotal, Inc. All rights reserved. +Copyright (c) 2009, 2010, 2011, 2012 Ask Solem, and individual contributors. All rights reserved. Celery is licensed under The BSD License (3 Clause, also known as the new BSD license). The license is an OSI approved Open Source @@ -39,9 +40,9 @@ Documentation License The documentation portion of Celery (the rendered contents of the "docs" directory of a software distribution or checkout) is supplied -under the Creative Commons Attribution-Noncommercial-Share Alike 3.0 -United States License as described by -http://creativecommons.org/licenses/by-nc-sa/3.0/us/ +under the "Creative Commons Attribution-ShareAlike 4.0 +International" (CC BY-SA 4.0) License as described by +http://creativecommons.org/licenses/by-sa/4.0/ Footnotes ========= diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..98557216e --- /dev/null +++ b/Makefile @@ -0,0 +1,95 @@ +PYTHON=python +SPHINX_DIR="docs/" +SPHINX_BUILDDIR="${SPHINX_DIR}/.build" +README="README.rst" +CONTRIBUTING="CONTRIBUTING.rst" +CONFIGREF_SRC="docs/configuration.rst" +README_SRC="docs/templates/readme.txt" +CONTRIBUTING_SRC="docs/contributing.rst" +SPHINX2RST="extra/release/sphinx-to-rst.py" +WORKER_GRAPH_FULL="docs/images/worker_graph_full.png" + +SPHINX_HTMLDIR = "${SPHINX_BUILDDIR}/html" + +html: + (cd "$(SPHINX_DIR)"; make html) + mv "$(SPHINX_HTMLDIR)" Documentation + +docsclean: + -rm -rf "$(SPHINX_BUILDDIR)" + +htmlclean: + -rm -rf "$(SPHINX)" + +apicheck: + extra/release/doc4allmods celery + +indexcheck: + extra/release/verify-reference-index.sh + +configcheck: + PYTHONPATH=. $(PYTHON) extra/release/verify_config_reference.py $(CONFIGREF_SRC) + +flakecheck: + flake8 celery + +flakediag: + -$(MAKE) flakecheck + +flakepluscheck: + flakeplus celery --2.6 + +flakeplusdiag: + -$(MAKE) flakepluscheck + +flakes: flakediag flakeplusdiag + +readmeclean: + -rm -f $(README) + +readmecheck: + iconv -f ascii -t ascii $(README) >/dev/null + +$(README): + $(PYTHON) $(SPHINX2RST) $(README_SRC) --ascii > $@ + +readme: readmeclean $(README) readmecheck + +contributingclean: + -rm -f CONTRIBUTING.rst + +$(CONTRIBUTING): + $(PYTHON) $(SPHINX2RST) $(CONTRIBUTING_SRC) > $@ + +contributing: contributingclean $(CONTRIBUTING) + +test: + nosetests -xv celery.tests + +cov: + nosetests -xv celery.tests --with-coverage --cover-html --cover-branch + +removepyc: + -find . -type f -a \( -name "*.pyc" -o -name "*$$py.class" \) | xargs rm + -find . -type d -name "__pycache__" | xargs rm -r + +$(WORKER_GRAPH_FULL): + $(PYTHON) -m celery graph bootsteps | dot -Tpng -o $@ + +graphclean: + -rm -f $(WORKER_GRAPH_FULL) + +graph: graphclean $(WORKER_GRAPH_FULL) + +gitclean: + git clean -xdn + +gitcleanforce: + git clean -xdf + +distcheck: flakecheck apicheck indexcheck configcheck readmecheck test gitclean + +authorcheck: + git shortlog -se | cut -f2 | extra/release/attribution.py + +dist: readme contributing docsclean gitcleanforce removepyc diff --git a/README.rst b/README.rst index 2ec057fce..0a82f53ca 100644 --- a/README.rst +++ b/README.rst @@ -4,7 +4,7 @@ .. image:: http://cloud.github.com/downloads/celery/celery/celery_128.png -:Version: 3.1.10 (Cipater) +:Version: 4.0.0rc1 (0today8) :Web: http://celeryproject.org/ :Download: http://pypi.python.org/pypi/celery/ :Source: http://github.com/celery/celery/ @@ -44,7 +44,7 @@ What do I need? Celery version 3.0 runs on, -- Python (2.5, 2.6, 2.7, 3.2, 3.3) +- Python (2.7, 3.4, 3.5) - PyPy (1.8, 1.9) - Jython (2.5, 2.7). @@ -82,7 +82,7 @@ getting started tutorials: http://docs.celeryproject.org/en/latest/getting-started/next-steps.html Celery is... -============ +========== - **Simple** @@ -120,7 +120,7 @@ Celery is... schedulers, consumers, producers, autoscalers, broker transports and much more. It supports... -============== +============ - **Message Transports** @@ -139,7 +139,7 @@ It supports... - AMQP, Redis - memcached, MongoDB - SQLAlchemy, Django ORM - - Apache Cassandra, IronCache + - Apache Cassandra, IronCache, Elasticsearch - **Serialization** @@ -183,7 +183,7 @@ development easier, and sometimes they add important hooks like closing database connections at ``fork``. .. _`Django`: http://djangoproject.com/ -.. _`Pylons`: http://pylonshq.com/ +.. _`Pylons`: http://pylonsproject.org/ .. _`Flask`: http://flask.pocoo.org/ .. _`web2py`: http://web2py.com/ .. _`Bottle`: http://bottlepy.org/ @@ -234,9 +234,9 @@ by using brackets. Multiple bundles can be specified by separating them by commas. :: - $ pip install celery[librabbitmq] + $ pip install "celery[librabbitmq]" - $ pip install celery[librabbitmq,redis,auth,msgpack] + $ pip install "celery[librabbitmq,redis,auth,msgpack]" The following bundles are available: @@ -284,7 +284,7 @@ Transports and Backends for using memcached as a result backend. :celery[cassandra]: - for using Apache Cassandra as a result backend. + for using Apache Cassandra as a result backend with DataStax driver. :celery[couchdb]: for using CouchDB as a message transport (*experimental*). @@ -292,6 +292,9 @@ Transports and Backends :celery[couchbase]: for using CouchBase as a result backend. +:celery[riak]: + for using Riak as a result backend. + :celery[beanstalk]: for using Beanstalk as a message transport (*experimental*). diff --git a/appveyor.yml b/appveyor.yml new file mode 100644 index 000000000..07c259a04 --- /dev/null +++ b/appveyor.yml @@ -0,0 +1,54 @@ +environment: + + global: + # SDK v7.0 MSVC Express 2008's SetEnv.cmd script will fail if the + # /E:ON and /V:ON options are not enabled in the batch script intepreter + # See: http://stackoverflow.com/a/13751649/163740 + WITH_COMPILER: "cmd /E:ON /V:ON /C .\\extra\\appveyor\\run_with_compiler.cmd" + + matrix: + + # Pre-installed Python versions, which Appveyor may upgrade to + # a later point release. + # See: http://www.appveyor.com/docs/installed-software#python + + - PYTHON: "C:\\Python27" + PYTHON_VERSION: "2.7.x" + PYTHON_ARCH: "32" + + - PYTHON: "C:\\Python34" + PYTHON_VERSION: "3.4.x" + PYTHON_ARCH: "32" + + - PYTHON: "C:\\Python27-x64" + PYTHON_VERSION: "2.7.x" + PYTHON_ARCH: "64" + WINDOWS_SDK_VERSION: "v7.0" + + - PYTHON: "C:\\Python34-x64" + PYTHON_VERSION: "3.4.x" + PYTHON_ARCH: "64" + WINDOWS_SDK_VERSION: "v7.1" + + +init: + - "ECHO %PYTHON% %PYTHON_VERSION% %PYTHON_ARCH%" + +install: + - "powershell extra\\appveyor\\install.ps1" + - "%PYTHON%/Scripts/pip.exe install -U setuptools" + - "%PYTHON%/Scripts/pip.exe install -r requirements/dev.txt" + +build: off + +test_script: + - "%WITH_COMPILER% %PYTHON%/python setup.py test" + +after_test: + - "%WITH_COMPILER% %PYTHON%/python setup.py bdist_wheel" + +artifacts: + - path: dist\* + +#on_success: +# - TODO: upload the content of dist/*.whl to a public wheelhouse diff --git a/celery/__init__.py b/celery/__init__.py index 489b6b4e1..5f3911fce 100644 --- a/celery/__init__.py +++ b/celery/__init__.py @@ -1,11 +1,15 @@ # -*- coding: utf-8 -*- """Distributed Task Queue""" +# :copyright: (c) 2015-2016 Ask Solem. All rights reserved. +# :copyright: (c) 2012-2014 GoPivotal, Inc., All rights reserved. # :copyright: (c) 2009 - 2012 Ask Solem and individual contributors, # All rights reserved. -# :copyright: (c) 2012-2013 GoPivotal, Inc., All rights reserved. # :license: BSD (3 Clause), see LICENSE for more details. -from __future__ import absolute_import +from __future__ import absolute_import, print_function, unicode_literals + +import os +import sys from collections import namedtuple @@ -13,31 +17,32 @@ 'version_info_t', ('major', 'minor', 'micro', 'releaselevel', 'serial'), ) -SERIES = 'Cipater' -VERSION = version_info_t(3, 1, 10, '', '') +SERIES = '0today8' +VERSION = version_info_t(4, 0, 0, 'rc1', '') + __version__ = '{0.major}.{0.minor}.{0.micro}{0.releaselevel}'.format(VERSION) __author__ = 'Ask Solem' __contact__ = 'ask@celeryproject.org' __homepage__ = 'http://celeryproject.org' __docformat__ = 'restructuredtext' + +# -eof meta- + __all__ = [ 'Celery', 'bugreport', 'shared_task', 'task', 'current_app', 'current_task', 'maybe_signature', 'chain', 'chord', 'chunks', 'group', 'signature', 'xmap', 'xstarmap', 'uuid', 'version', '__version__', ] + VERSION_BANNER = '{0} ({1})'.format(__version__, SERIES) -# -eof meta- -import os -import sys if os.environ.get('C_IMPDEBUG'): # pragma: no cover from .five import builtins - real_import = builtins.__import__ def debug_import(name, locals=None, globals=None, - fromlist=None, level=-1): + fromlist=None, level=-1, real_import=builtins.__import__): glob = globals or getattr(sys, 'emarfteg_'[::-1])(1).f_globals importer_name = glob and glob.get('__name__') or 'unknown' print('-- {0} imports {1}'.format(importer_name, name)) @@ -66,18 +71,18 @@ def debug_import(name, locals=None, globals=None, def _find_option_with_arg(argv, short_opts=None, long_opts=None): - """Search argv for option specifying its short and longopt - alternatives. + """Search argv for options specifying short and longopt alternatives. - Return the value of the option if found. + :returns: value for option found + :raises KeyError: if option not found. """ for i, arg in enumerate(argv): if arg.startswith('-'): if long_opts and arg.startswith('--'): - name, _, val = arg.partition('=') + name, sep, val = arg.partition('=') if name in long_opts: - return val + return val if sep else argv[i + 1] if short_opts and arg in short_opts: return argv[i + 1] raise KeyError('|'.join(short_opts or [] + long_opts or [])) @@ -86,21 +91,22 @@ def _find_option_with_arg(argv, short_opts=None, long_opts=None): def _patch_eventlet(): import eventlet import eventlet.debug + eventlet.monkey_patch() - EVENTLET_DBLOCK = int(os.environ.get('EVENTLET_NOBLOCK', 0)) - if EVENTLET_DBLOCK: - eventlet.debug.hub_blocking_detection(EVENTLET_DBLOCK) + blockdetect = float(os.environ.get('EVENTLET_NOBLOCK', 0)) + if blockdetect: + eventlet.debug.hub_blocking_detection(blockdetect, blockdetect) def _patch_gevent(): - from gevent import monkey, version_info + from gevent import monkey, signal as gsignal, version_info + monkey.patch_all() if version_info[0] == 0: # pragma: no cover # Signals aren't working in gevent versions <1.0, # and are not monkey patched by patch_all() - from gevent import signal as _gevent_signal _signal = __import__('signal') - _signal.signal = _gevent_signal + _signal.signal = gsignal def maybe_patch_concurrency(argv=sys.argv, @@ -122,30 +128,34 @@ def maybe_patch_concurrency(argv=sys.argv, pass else: patcher() - # set up eventlet/gevent environments ASAP. + + # set up eventlet/gevent environments ASAP from celery import concurrency concurrency.get_implementation(pool) # Lazy loading -from .five import recreate_module +from celery import five # noqa -old_module, new_module = recreate_module( # pragma: no cover +old_module, new_module = five.recreate_module( # pragma: no cover __name__, by_module={ 'celery.app': ['Celery', 'bugreport', 'shared_task'], 'celery.app.task': ['Task'], 'celery._state': ['current_app', 'current_task'], - 'celery.canvas': ['chain', 'chord', 'chunks', 'group', - 'signature', 'maybe_signature', 'subtask', - 'xmap', 'xstarmap'], + 'celery.canvas': [ + 'chain', 'chord', 'chunks', 'group', + 'signature', 'maybe_signature', 'subtask', + 'xmap', 'xstarmap', + ], 'celery.utils': ['uuid'], }, direct={'task': 'celery.task'}, __package__='celery', __file__=__file__, __path__=__path__, __doc__=__doc__, __version__=__version__, __author__=__author__, __contact__=__contact__, - __homepage__=__homepage__, __docformat__=__docformat__, + __homepage__=__homepage__, __docformat__=__docformat__, five=five, VERSION=VERSION, SERIES=SERIES, VERSION_BANNER=VERSION_BANNER, + version_info_t=version_info_t, maybe_patch_concurrency=maybe_patch_concurrency, _find_option_with_arg=_find_option_with_arg, ) diff --git a/celery/__main__.py b/celery/__main__.py index 04448e239..590c94766 100644 --- a/celery/__main__.py +++ b/celery/__main__.py @@ -1,27 +1,11 @@ -from __future__ import absolute_import +from __future__ import absolute_import, print_function, unicode_literals import sys -from os.path import basename - from . import maybe_patch_concurrency __all__ = ['main'] -DEPRECATED_FMT = """ -The {old!r} command is deprecated, please use {new!r} instead: - -$ {new_argv} - -""" - - -def _warn_deprecated(new): - print(DEPRECATED_FMT.format( - old=basename(sys.argv[0]), new=new, - new_argv=' '.join([new] + sys.argv[1:])), - ) - def main(): if 'multi' not in sys.argv: @@ -30,25 +14,5 @@ def main(): main() -def _compat_worker(): - maybe_patch_concurrency() - _warn_deprecated('celery worker') - from celery.bin.worker import main - main() - - -def _compat_multi(): - _warn_deprecated('celery multi') - from celery.bin.multi import main - main() - - -def _compat_beat(): - maybe_patch_concurrency() - _warn_deprecated('celery beat') - from celery.bin.beat import main - main() - - if __name__ == '__main__': # pragma: no cover main() diff --git a/celery/_state.py b/celery/_state.py index 3af39bf91..5047182e3 100644 --- a/celery/_state.py +++ b/celery/_state.py @@ -9,7 +9,7 @@ This module shouldn't be used directly. """ -from __future__ import absolute_import, print_function +from __future__ import absolute_import, print_function, unicode_literals import os import sys @@ -19,41 +19,35 @@ from celery.local import Proxy from celery.utils.threads import LocalStack -try: - from weakref import WeakSet as AppSet -except ImportError: # XXX Py2.6 +__all__ = [ + 'set_default_app', 'get_current_app', 'get_current_task', + 'get_current_worker_task', 'current_app', 'current_task', + 'connect_on_app_finalize', +] - class AppSet(object): # noqa +#: Global default app used when no current app. +default_app = None - def __init__(self): - self._refs = set() +#: List of all app instances (weakrefs), must not be used directly. +_apps = weakref.WeakSet() - def add(self, app): - self._refs.add(weakref.ref(app)) +#: global set of functions to call whenever a new app is finalized +#: E.g. Shared tasks, and builtin tasks are created +#: by adding callbacks here. +_on_app_finalizers = set() - def __iter__(self): - dirty = [] - try: - for appref in self._refs: - app = appref() - if app is None: - dirty.append(appref) - else: - yield app - finally: - while dirty: - self._refs.discard(dirty.pop()) +_task_join_will_block = False -__all__ = ['set_default_app', 'get_current_app', 'get_current_task', - 'get_current_worker_task', 'current_app', 'current_task'] -#: Global default app used when no current app. -default_app = None +def connect_on_app_finalize(callback): + _on_app_finalizers.add(callback) + return callback -#: List of all app instances (weakrefs), must not be used directly. -_apps = AppSet() -_task_join_will_block = False +def _announce_app_finalized(app): + callbacks = set(_on_app_finalizers) + for callback in callbacks: + callback(app) def _set_task_join_will_block(blocks): @@ -85,13 +79,16 @@ def _get_current_app(): #: creates the global fallback app instance. from celery.app import Celery set_default_app(Celery( - 'default', + 'default', fixups=[], set_as_current=False, loader=os.environ.get('CELERY_LOADER') or 'default', - fixups=[], - set_as_current=False, accept_magic_kwargs=True, )) return _tls.current_app or default_app + +def _set_current_app(app): + _tls.current_app = app + + C_STRICT_APP = os.environ.get('C_STRICT_APP') if os.environ.get('C_STRICT_APP'): # pragma: no cover def get_current_app(): @@ -133,5 +130,9 @@ def _register_app(app): _apps.add(app) +def _deregister_app(app): + _apps.discard(app) + + def _get_active_apps(): return _apps diff --git a/celery/app/__init__.py b/celery/app/__init__.py index 426fed6ce..8e8d9a79c 100644 --- a/celery/app/__init__.py +++ b/celery/app/__init__.py @@ -6,22 +6,18 @@ Celery Application. """ -from __future__ import absolute_import +from __future__ import absolute_import, print_function, unicode_literals import os from celery.local import Proxy from celery import _state from celery._state import ( - set_default_app, get_current_app as current_app, get_current_task as current_task, - _get_active_apps, - _task_stack, + connect_on_app_finalize, set_default_app, _get_active_apps, _task_stack, ) -from celery.utils import gen_task_name -from .builtins import shared_task as _shared_task from .base import Celery, AppPickler __all__ = ['Celery', 'AppPickler', 'default_app', 'app_or_default', @@ -67,13 +63,16 @@ def _app_or_default(app=None): def _app_or_default_trace(app=None): # pragma: no cover from traceback import print_stack - from billiard import current_process + try: + from billiard.process import current_process + except ImportError: + current_process = None if app is None: if getattr(_state._tls, 'current_app', None): print('-- RETURNING TO CURRENT APP --') # noqa+ print_stack() return _state._tls.current_app - if current_process()._name == 'MainProcess': + if not current_process or current_process()._name == 'MainProcess': raise Exception('DEFAULT APP') print('-- RETURNING TO DEFAULT APP --') # noqa+ print_stack() @@ -128,7 +127,9 @@ def __inner(fun): name = options.get('name') # Set as shared task so that unfinalized apps, # and future apps will load the task. - _shared_task(lambda app: app._task_from_fun(fun, **options)) + connect_on_app_finalize( + lambda app: app._task_from_fun(fun, **options) + ) # Force all finalized apps to take this task as well. for app in _get_active_apps(): @@ -141,7 +142,7 @@ def __inner(fun): def task_by_cons(): app = current_app() return app.tasks[ - name or gen_task_name(app, fun.__name__, fun.__module__) + name or app.gen_task_name(fun.__name__, fun.__module__) ] return Proxy(task_by_cons) return __inner diff --git a/celery/app/amqp.py b/celery/app/amqp.py index b0dae95e0..455cb5597 100644 --- a/celery/app/amqp.py +++ b/celery/app/amqp.py @@ -6,29 +6,40 @@ Sending and receiving messages using Kombu. """ -from __future__ import absolute_import +from __future__ import absolute_import, unicode_literals import numbers +import sys +from collections import Mapping, namedtuple from datetime import timedelta from weakref import WeakValueDictionary +from kombu import pools from kombu import Connection, Consumer, Exchange, Producer, Queue from kombu.common import Broadcast -from kombu.pools import ProducerPool -from kombu.utils import cached_property, uuid -from kombu.utils.encoding import safe_repr +from kombu.utils import cached_property from kombu.utils.functional import maybe_list from celery import signals from celery.five import items, string_t +from celery.local import try_import +from celery.utils import anon_nodename +from celery.utils.saferepr import saferepr from celery.utils.text import indent as textindent -from celery.utils.timeutils import to_utc +from celery.utils.timeutils import maybe_make_aware, to_utc -from . import app_or_default from . import routes as _routes -__all__ = ['AMQP', 'Queues', 'TaskProducer', 'TaskConsumer'] +__all__ = ['AMQP', 'Queues', 'task_message'] + +PY3 = sys.version_info[0] == 3 + +#: earliest date supported by time.mktime. +INT_MIN = -2147483648 + +# json in Python 2.7 borks if dict contains byte keys. +JSON_NEEDS_UNICODE_KEYS = not PY3 and not try_import('simplejson') #: Human readable queue declaration. QUEUE_FORMAT = """ @@ -36,6 +47,14 @@ key={0.routing_key} """ +task_message = namedtuple('task_message', + ('headers', 'properties', 'body', 'sent_event')) + + +def utf8dict(d, encoding='utf-8'): + return {k.decode(encoding) if isinstance(k, bytes) else k: v + for k, v in items(d)} + class Queues(dict): """Queue name⇒ declaration mapping. @@ -46,6 +65,7 @@ class Queues(dict): the occurrence of unknown queues in `wanted` will raise :exc:`KeyError`. :keyword ha_policy: Default HA policy for queues with none set. + :keyword max_priority: Default x-max-priority for queues with none set. """ @@ -54,15 +74,17 @@ class Queues(dict): _consume_from = None def __init__(self, queues=None, default_exchange=None, - create_missing=True, ha_policy=None, autoexchange=None): + create_missing=True, ha_policy=None, autoexchange=None, + max_priority=None): dict.__init__(self) self.aliases = WeakValueDictionary() self.default_exchange = default_exchange self.create_missing = create_missing self.ha_policy = ha_policy self.autoexchange = Exchange if autoexchange is None else autoexchange + self.max_priority = max_priority if isinstance(queues, (tuple, list)): - queues = dict((q.name, q) for q in queues) + queues = {q.name: q for q in queues} for name, q in items(queues or {}): self.add(q) if isinstance(q, Queue) else self.add_compat(name, **q) @@ -73,8 +95,7 @@ def __getitem__(self, name): return dict.__getitem__(self, name) def __setitem__(self, name, queue): - if self.default_exchange and (not queue.exchange or - not queue.exchange.name): + if self.default_exchange and not queue.exchange: queue.exchange = self.default_exchange dict.__setitem__(self, name, queue) if queue.alias: @@ -106,6 +127,10 @@ def add(self, queue, **kwargs): if queue.queue_arguments is None: queue.queue_arguments = {} self._set_ha_policy(queue.queue_arguments) + if self.max_priority is not None: + if queue.queue_arguments is None: + queue.queue_arguments = {} + self._set_max_priority(queue.queue_arguments) self[queue.name] = queue return queue @@ -116,6 +141,8 @@ def add_compat(self, name, **options): options['routing_key'] = name if self.ha_policy is not None: self._set_ha_policy(options.setdefault('queue_arguments', {})) + if self.max_priority is not None: + self._set_max_priority(options.setdefault('queue_arguments', {})) q = self[name] = Queue.from_dict(name, **options) return q @@ -126,6 +153,10 @@ def _set_ha_policy(self, args): 'x-ha-policy-params': list(policy)}) args['x-ha-policy'] = policy + def _set_max_priority(self, args): + if 'x-max-priority' not in args and self.max_priority is not None: + return args.update({'x-max-priority': self.max_priority}) + def format(self, indent=0, indent_first=True): """Format routing table into string for log dumps.""" active = self.consume_from @@ -153,8 +184,9 @@ def select(self, include): Can be iterable or string. """ if include: - self._consume_from = dict((name, self[name]) - for name in maybe_list(include)) + self._consume_from = { + name: self[name] for name in maybe_list(include) + } select_subset = select # XXX compat def deselect(self, exclude): @@ -184,204 +216,14 @@ def consume_from(self): return self -class TaskProducer(Producer): - app = None - auto_declare = False - retry = False - retry_policy = None - utc = True - event_dispatcher = None - send_sent_event = False - - def __init__(self, channel=None, exchange=None, *args, **kwargs): - self.retry = kwargs.pop('retry', self.retry) - self.retry_policy = kwargs.pop('retry_policy', - self.retry_policy or {}) - self.send_sent_event = kwargs.pop('send_sent_event', - self.send_sent_event) - exchange = exchange or self.exchange - self.queues = self.app.amqp.queues # shortcut - self.default_queue = self.app.amqp.default_queue - super(TaskProducer, self).__init__(channel, exchange, *args, **kwargs) - - def publish_task(self, task_name, task_args=None, task_kwargs=None, - countdown=None, eta=None, task_id=None, group_id=None, - taskset_id=None, # compat alias to group_id - expires=None, exchange=None, exchange_type=None, - event_dispatcher=None, retry=None, retry_policy=None, - queue=None, now=None, retries=0, chord=None, - callbacks=None, errbacks=None, routing_key=None, - serializer=None, delivery_mode=None, compression=None, - reply_to=None, time_limit=None, soft_time_limit=None, - declare=None, headers=None, - send_before_publish=signals.before_task_publish.send, - before_receivers=signals.before_task_publish.receivers, - send_after_publish=signals.after_task_publish.send, - after_receivers=signals.after_task_publish.receivers, - send_task_sent=signals.task_sent.send, # XXX deprecated - sent_receivers=signals.task_sent.receivers, - **kwargs): - """Send task message.""" - retry = self.retry if retry is None else retry - headers = {} if headers is None else headers - - qname = queue - if queue is None and exchange is None: - queue = self.default_queue - if queue is not None: - if isinstance(queue, string_t): - qname, queue = queue, self.queues[queue] - else: - qname = queue.name - exchange = exchange or queue.exchange.name - routing_key = routing_key or queue.routing_key - if declare is None and queue and not isinstance(queue, Broadcast): - declare = [queue] - - # merge default and custom policy - retry = self.retry if retry is None else retry - _rp = (dict(self.retry_policy, **retry_policy) if retry_policy - else self.retry_policy) - task_id = task_id or uuid() - task_args = task_args or [] - task_kwargs = task_kwargs or {} - if not isinstance(task_args, (list, tuple)): - raise ValueError('task args must be a list or tuple') - if not isinstance(task_kwargs, dict): - raise ValueError('task kwargs must be a dictionary') - if countdown: # Convert countdown to ETA. - now = now or self.app.now() - eta = now + timedelta(seconds=countdown) - if self.utc: - eta = to_utc(eta).astimezone(self.app.timezone) - if isinstance(expires, numbers.Real): - now = now or self.app.now() - expires = now + timedelta(seconds=expires) - if self.utc: - expires = to_utc(expires).astimezone(self.app.timezone) - eta = eta and eta.isoformat() - expires = expires and expires.isoformat() - - body = { - 'task': task_name, - 'id': task_id, - 'args': task_args, - 'kwargs': task_kwargs, - 'retries': retries or 0, - 'eta': eta, - 'expires': expires, - 'utc': self.utc, - 'callbacks': callbacks, - 'errbacks': errbacks, - 'timelimit': (time_limit, soft_time_limit), - 'taskset': group_id or taskset_id, - 'chord': chord, - } - - if before_receivers: - send_before_publish( - sender=task_name, body=body, - exchange=exchange, - routing_key=routing_key, - declare=declare, - headers=headers, - properties=kwargs, - retry_policy=retry_policy, - ) - - self.publish( - body, - exchange=exchange, routing_key=routing_key, - serializer=serializer or self.serializer, - compression=compression or self.compression, - headers=headers, - retry=retry, retry_policy=_rp, - reply_to=reply_to, - correlation_id=task_id, - delivery_mode=delivery_mode, declare=declare, - **kwargs - ) - - if after_receivers: - send_after_publish(sender=task_name, body=body, - exchange=exchange, routing_key=routing_key) - - if sent_receivers: # XXX deprecated - send_task_sent(sender=task_name, task_id=task_id, - task=task_name, args=task_args, - kwargs=task_kwargs, eta=eta, - taskset=group_id or taskset_id) - if self.send_sent_event: - evd = event_dispatcher or self.event_dispatcher - exname = exchange or self.exchange - if isinstance(exname, Exchange): - exname = exname.name - evd.publish( - 'task-sent', - { - 'uuid': task_id, - 'name': task_name, - 'args': safe_repr(task_args), - 'kwargs': safe_repr(task_kwargs), - 'retries': retries, - 'eta': eta, - 'expires': expires, - 'queue': qname, - 'exchange': exname, - 'routing_key': routing_key, - }, - self, retry=retry, retry_policy=retry_policy, - ) - return task_id - delay_task = publish_task # XXX Compat - - @cached_property - def event_dispatcher(self): - # We call Dispatcher.publish with a custom producer - # so don't need the dispatcher to be "enabled". - return self.app.events.Dispatcher(enabled=False) - - -class TaskPublisher(TaskProducer): - """Deprecated version of :class:`TaskProducer`.""" - - def __init__(self, channel=None, exchange=None, *args, **kwargs): - self.app = app_or_default(kwargs.pop('app', self.app)) - self.retry = kwargs.pop('retry', self.retry) - self.retry_policy = kwargs.pop('retry_policy', - self.retry_policy or {}) - exchange = exchange or self.exchange - if not isinstance(exchange, Exchange): - exchange = Exchange(exchange, - kwargs.pop('exchange_type', 'direct')) - self.queues = self.app.amqp.queues # shortcut - super(TaskPublisher, self).__init__(channel, exchange, *args, **kwargs) - - -class TaskConsumer(Consumer): - app = None - - def __init__(self, channel, queues=None, app=None, accept=None, **kw): - self.app = app or self.app - if accept is None: - accept = self.app.conf.CELERY_ACCEPT_CONTENT - super(TaskConsumer, self).__init__( - channel, - queues or list(self.app.amqp.queues.consume_from.values()), - accept=accept, - **kw - ) - - class AMQP(object): Connection = Connection Consumer = Consumer + Producer = Producer #: compat alias to Connection BrokerConnection = Connection - producer_cls = TaskProducer - consumer_cls = TaskConsumer queues_cls = Queues #: Cached and prepared routing table. @@ -397,76 +239,346 @@ class AMQP(object): # and instead send directly to the queue named in the routing key. autoexchange = None + #: Max size of positional argument representation used for + #: logging purposes. + argsrepr_maxsize = 1024 + + #: Max size of keyword argument representation used for logging purposes. + kwargsrepr_maxsize = 1024 + def __init__(self, app): self.app = app + self.task_protocols = { + 1: self.as_task_v1, + 2: self.as_task_v2, + } - def flush_routes(self): - self._rtable = _routes.prepare(self.app.conf.CELERY_ROUTES) + @cached_property + def create_task_message(self): + return self.task_protocols[self.app.conf.task_protocol] + + @cached_property + def send_task_message(self): + return self._create_task_sender() def Queues(self, queues, create_missing=None, ha_policy=None, - autoexchange=None): + autoexchange=None, max_priority=None): """Create new :class:`Queues` instance, using queue defaults from the current configuration.""" conf = self.app.conf if create_missing is None: - create_missing = conf.CELERY_CREATE_MISSING_QUEUES + create_missing = conf.task_create_missing_queues if ha_policy is None: - ha_policy = conf.CELERY_QUEUE_HA_POLICY - if not queues and conf.CELERY_DEFAULT_QUEUE: - queues = (Queue(conf.CELERY_DEFAULT_QUEUE, + ha_policy = conf.task_queue_ha_policy + if max_priority is None: + max_priority = conf.task_queue_max_priority + if not queues and conf.task_default_queue: + queues = (Queue(conf.task_default_queue, exchange=self.default_exchange, - routing_key=conf.CELERY_DEFAULT_ROUTING_KEY), ) + routing_key=conf.task_default_routing_key),) autoexchange = (self.autoexchange if autoexchange is None else autoexchange) return self.queues_cls( queues, self.default_exchange, create_missing, - ha_policy, autoexchange, + ha_policy, autoexchange, max_priority, ) def Router(self, queues=None, create_missing=None): """Return the current task router.""" return _routes.Router(self.routes, queues or self.queues, - self.app.either('CELERY_CREATE_MISSING_QUEUES', + self.app.either('task_create_missing_queues', create_missing), app=self.app) - @cached_property - def TaskConsumer(self): - """Return consumer configured to consume from the queues - we are configured for (``app.amqp.queues.consume_from``).""" - return self.app.subclass_with_self(self.consumer_cls, - reverse='amqp.TaskConsumer') - get_task_consumer = TaskConsumer # XXX compat + def flush_routes(self): + self._rtable = _routes.prepare(self.app.conf.task_routes) - @cached_property - def TaskProducer(self): - """Return publisher used to send tasks. + def TaskConsumer(self, channel, queues=None, accept=None, **kw): + if accept is None: + accept = self.app.conf.accept_content + return self.Consumer( + channel, accept=accept, + queues=queues or list(self.queues.consume_from.values()), + **kw + ) - You should use `app.send_task` instead. + def as_task_v2(self, task_id, name, args=None, kwargs=None, + countdown=None, eta=None, group_id=None, + expires=None, retries=0, chord=None, + callbacks=None, errbacks=None, reply_to=None, + time_limit=None, soft_time_limit=None, + create_sent_event=False, root_id=None, parent_id=None, + shadow=None, chain=None, now=None, timezone=None, + origin=None): + args = args or () + kwargs = kwargs or {} + if not isinstance(args, (list, tuple)): + raise TypeError('task args must be a list or tuple') + if not isinstance(kwargs, Mapping): + raise TypeError('task keyword arguments must be a mapping') + if countdown: # convert countdown to ETA + self._verify_seconds(countdown, 'countdown') + now = now or self.app.now() + timezone = timezone or self.app.timezone + eta = maybe_make_aware( + now + timedelta(seconds=countdown), tz=timezone, + ) + if isinstance(expires, numbers.Real): + self._verify_seconds(expires, 'expires') + now = now or self.app.now() + timezone = timezone or self.app.timezone + expires = maybe_make_aware( + now + timedelta(seconds=expires), tz=timezone, + ) + eta = eta and eta.isoformat() + expires = expires and expires.isoformat() - """ - conf = self.app.conf - return self.app.subclass_with_self( - self.producer_cls, - reverse='amqp.TaskProducer', - exchange=self.default_exchange, - routing_key=conf.CELERY_DEFAULT_ROUTING_KEY, - serializer=conf.CELERY_TASK_SERIALIZER, - compression=conf.CELERY_MESSAGE_COMPRESSION, - retry=conf.CELERY_TASK_PUBLISH_RETRY, - retry_policy=conf.CELERY_TASK_PUBLISH_RETRY_POLICY, - send_sent_event=conf.CELERY_SEND_TASK_SENT_EVENT, - utc=conf.CELERY_ENABLE_UTC, + argsrepr = saferepr(args, self.argsrepr_maxsize) + kwargsrepr = saferepr(kwargs, self.kwargsrepr_maxsize) + + if JSON_NEEDS_UNICODE_KEYS: # pragma: no cover + if callbacks: + callbacks = [utf8dict(callback) for callback in callbacks] + if errbacks: + errbacks = [utf8dict(errback) for errback in errbacks] + if chord: + chord = utf8dict(chord) + + return task_message( + headers={ + 'lang': 'py', + 'task': name, + 'id': task_id, + 'eta': eta, + 'expires': expires, + 'group': group_id, + 'retries': retries, + 'timelimit': [time_limit, soft_time_limit], + 'root_id': root_id, + 'parent_id': parent_id, + 'argsrepr': argsrepr, + 'kwargsrepr': kwargsrepr, + 'origin': origin or anon_nodename() + }, + properties={ + 'correlation_id': task_id, + 'reply_to': reply_to or '', + }, + body=( + args, kwargs, { + 'callbacks': callbacks, + 'errbacks': errbacks, + 'chain': chain, + 'chord': chord, + }, + ), + sent_event={ + 'uuid': task_id, + 'root_id': root_id, + 'parent_id': parent_id, + 'name': name, + 'args': argsrepr, + 'kwargs': kwargsrepr, + 'retries': retries, + 'eta': eta, + 'expires': expires, + } if create_sent_event else None, + ) + + def as_task_v1(self, task_id, name, args=None, kwargs=None, + countdown=None, eta=None, group_id=None, + expires=None, retries=0, + chord=None, callbacks=None, errbacks=None, reply_to=None, + time_limit=None, soft_time_limit=None, + create_sent_event=False, root_id=None, parent_id=None, + shadow=None, now=None, timezone=None): + args = args or () + kwargs = kwargs or {} + utc = self.utc + if not isinstance(args, (list, tuple)): + raise ValueError('task args must be a list or tuple') + if not isinstance(kwargs, Mapping): + raise ValueError('task keyword arguments must be a mapping') + if countdown: # convert countdown to ETA + self._verify_seconds(countdown, 'countdown') + now = now or self.app.now() + timezone = timezone or self.app.timezone + eta = now + timedelta(seconds=countdown) + if utc: + eta = to_utc(eta).astimezone(timezone) + if isinstance(expires, numbers.Real): + self._verify_seconds(expires, 'expires') + now = now or self.app.now() + timezone = timezone or self.app.timezone + expires = now + timedelta(seconds=expires) + if utc: + expires = to_utc(expires).astimezone(timezone) + eta = eta and eta.isoformat() + expires = expires and expires.isoformat() + + if JSON_NEEDS_UNICODE_KEYS: # pragma: no cover + if callbacks: + callbacks = [utf8dict(callback) for callback in callbacks] + if errbacks: + errbacks = [utf8dict(errback) for errback in errbacks] + if chord: + chord = utf8dict(chord) + + return task_message( + headers={}, + properties={ + 'correlation_id': task_id, + 'reply_to': reply_to or '', + }, + body={ + 'task': name, + 'id': task_id, + 'args': args, + 'kwargs': kwargs, + 'group': group_id, + 'retries': retries, + 'eta': eta, + 'expires': expires, + 'utc': utc, + 'callbacks': callbacks, + 'errbacks': errbacks, + 'timelimit': (time_limit, soft_time_limit), + 'taskset': group_id, + 'chord': chord, + }, + sent_event={ + 'uuid': task_id, + 'name': name, + 'args': saferepr(args), + 'kwargs': saferepr(kwargs), + 'retries': retries, + 'eta': eta, + 'expires': expires, + } if create_sent_event else None, ) - TaskPublisher = TaskProducer # compat + + def _verify_seconds(self, s, what): + if s < INT_MIN: + raise ValueError('%s is out of range: %r' % (what, s)) + return s + + def _create_task_sender(self): + default_retry = self.app.conf.task_publish_retry + default_policy = self.app.conf.task_publish_retry_policy + default_delivery_mode = self.app.conf.task_default_delivery_mode + default_queue = self.default_queue + queues = self.queues + send_before_publish = signals.before_task_publish.send + before_receivers = signals.before_task_publish.receivers + send_after_publish = signals.after_task_publish.send + after_receivers = signals.after_task_publish.receivers + + send_task_sent = signals.task_sent.send # XXX compat + sent_receivers = signals.task_sent.receivers + + default_evd = self._event_dispatcher + default_exchange = self.default_exchange + + default_rkey = self.app.conf.task_default_routing_key + default_serializer = self.app.conf.task_serializer + default_compressor = self.app.conf.result_compression + + def send_task_message(producer, name, message, + exchange=None, routing_key=None, queue=None, + event_dispatcher=None, + retry=None, retry_policy=None, + serializer=None, delivery_mode=None, + compression=None, declare=None, + headers=None, exchange_type=None, **kwargs): + retry = default_retry if retry is None else retry + headers2, properties, body, sent_event = message + if headers: + headers2.update(headers) + if kwargs: + properties.update(kwargs) + + qname = queue + if queue is None and exchange is None: + queue = default_queue + if queue is not None: + if isinstance(queue, string_t): + qname, queue = queue, queues[queue] + else: + qname = queue.name + + if delivery_mode is None: + try: + delivery_mode = queue.exchange.delivery_mode + except AttributeError: + pass + delivery_mode = delivery_mode or default_delivery_mode + + if exchange_type is None: + try: + exchange_type = queue.exchange.type + except AttributeError: + exchange_type = 'direct' + + if not exchange and not routing_key and exchange_type == 'direct': + exchange, routing_key = '', qname + else: + exchange = exchange or queue.exchange.name or default_exchange + routing_key = routing_key or queue.routing_key or default_rkey + if declare is None and queue and not isinstance(queue, Broadcast): + declare = [queue] + + # merge default and custom policy + retry = default_retry if retry is None else retry + _rp = (dict(default_policy, **retry_policy) if retry_policy + else default_policy) + + if before_receivers: + send_before_publish( + sender=name, body=body, + exchange=exchange, routing_key=routing_key, + declare=declare, headers=headers2, + properties=kwargs, retry_policy=retry_policy, + ) + ret = producer.publish( + body, + exchange=exchange, + routing_key=routing_key, + serializer=serializer or default_serializer, + compression=compression or default_compressor, + retry=retry, retry_policy=_rp, + delivery_mode=delivery_mode, declare=declare, + headers=headers2, + **properties + ) + if after_receivers: + send_after_publish(sender=name, body=body, headers=headers2, + exchange=exchange, routing_key=routing_key) + if sent_receivers: # XXX deprecated + send_task_sent(sender=name, task_id=body['id'], task=name, + args=body['args'], kwargs=body['kwargs'], + eta=body['eta'], taskset=body['taskset']) + if sent_event: + evd = event_dispatcher or default_evd + exname = exchange + if isinstance(exname, Exchange): + exname = exname.name + sent_event.update({ + 'queue': qname, + 'exchange': exname, + 'routing_key': routing_key, + }) + evd.publish('task-sent', sent_event, + self, retry=retry, retry_policy=retry_policy) + return ret + return send_task_message @cached_property def default_queue(self): - return self.queues[self.app.conf.CELERY_DEFAULT_QUEUE] + return self.queues[self.app.conf.task_default_queue] @cached_property def queues(self): """Queue name⇒ declaration mapping.""" - return self.Queues(self.app.conf.CELERY_QUEUES) + return self.Queues(self.app.conf.task_queues) @queues.setter # noqa def queues(self, queues): @@ -485,15 +597,23 @@ def router(self): @property def producer_pool(self): if self._producer_pool is None: - self._producer_pool = ProducerPool( - self.app.pool, - limit=self.app.pool.limit, - Producer=self.TaskProducer, - ) + self._producer_pool = pools.producers[ + self.app.connection_for_write()] + self._producer_pool.limit = self.app.pool.limit return self._producer_pool publisher_pool = producer_pool # compat alias @cached_property def default_exchange(self): - return Exchange(self.app.conf.CELERY_DEFAULT_EXCHANGE, - self.app.conf.CELERY_DEFAULT_EXCHANGE_TYPE) + return Exchange(self.app.conf.task_default_exchange, + self.app.conf.task_default_exchange_type) + + @cached_property + def utc(self): + return self.app.conf.enable_utc + + @cached_property + def _event_dispatcher(self): + # We call Dispatcher.publish with a custom producer + # so don't need the diuspatcher to be enabled. + return self.app.events.Dispatcher(enabled=False) diff --git a/celery/app/annotations.py b/celery/app/annotations.py index 62ee2e72e..9ae1aea70 100644 --- a/celery/app/annotations.py +++ b/celery/app/annotations.py @@ -7,7 +7,7 @@ task classes in the configuration. This prepares and performs the annotations in the - :setting:`CELERY_ANNOTATIONS` setting. + :setting:`task_annotations` setting. """ from __future__ import absolute_import @@ -38,7 +38,7 @@ def annotate(self, task): def prepare(annotations): - """Expands the :setting:`CELERY_ANNOTATIONS` setting.""" + """Expands the :setting:`task_annotations` setting.""" def expand_annotation(annotation): if isinstance(annotation, dict): @@ -50,7 +50,7 @@ def expand_annotation(annotation): if annotations is None: return () elif not isinstance(annotations, (list, tuple)): - annotations = (annotations, ) + annotations = (annotations,) return [expand_annotation(anno) for anno in annotations] diff --git a/celery/app/base.py b/celery/app/base.py index 153a5575a..4127ea465 100644 --- a/celery/app/base.py +++ b/celery/app/base.py @@ -13,44 +13,56 @@ import warnings from collections import defaultdict, deque -from contextlib import contextmanager -from copy import deepcopy from operator import attrgetter +from functools import wraps -from amqp import promise -from billiard.util import register_after_fork +from kombu import pools from kombu.clocks import LamportClock from kombu.common import oid_from -from kombu.utils import cached_property, uuid +from kombu.utils import cached_property, register_after_fork, uuid +from vine import starpromise from celery import platforms from celery import signals from celery._state import ( - _task_stack, _tls, get_current_app, set_default_app, - _register_app, get_current_worker_task, + _task_stack, get_current_app, _set_current_app, set_default_app, + _register_app, _deregister_app, + get_current_worker_task, connect_on_app_finalize, + _announce_app_finalized, ) +from celery.datastructures import AttributeDictMixin from celery.exceptions import AlwaysEagerIgnored, ImproperlyConfigured -from celery.five import items, values +from celery.five import UserDict, values from celery.loaders import get_loader_cls from celery.local import PromiseProxy, maybe_evaluate -from celery.utils.functional import first, maybe_list +from celery.utils import abstract +from celery.utils import gen_task_name +from celery.utils.dispatch import Signal +from celery.utils.functional import first, maybe_list, head_from_fun +from celery.utils.timeutils import timezone from celery.utils.imports import instantiate, symbol_by_name -from celery.utils.objects import mro_lookup +from celery.utils.log import get_logger +from celery.utils.objects import FallbackContext, mro_lookup from .annotations import prepare as prepare_annotations -from .builtins import shared_task, load_shared_tasks -from .defaults import DEFAULTS, find_deprecated_settings +from .defaults import find_deprecated_settings from .registry import TaskRegistry from .utils import ( - AppPickler, Settings, bugreport, _unpickle_app, _unpickle_app_v2, appstr, + AppPickler, Settings, + bugreport, _unpickle_app, _unpickle_app_v2, appstr, detect_settings, ) +# Load all builtin tasks +from . import builtins # noqa + __all__ = ['Celery'] -_EXECV = os.environ.get('FORKED_BY_MULTIPROCESSING') -BUILTIN_FIXUPS = frozenset([ +logger = get_logger(__name__) + +USING_EXECV = os.environ.get('FORKED_BY_MULTIPROCESSING') +BUILTIN_FIXUPS = { 'celery.fixups.django:fixup', -]) +} ERR_ENVVAR_NOT_SET = """\ The environment variable {0!r} is not set, @@ -60,7 +72,7 @@ def app_has_custom(app, attr): - return mro_lookup(app.__class__, attr, stop=(Celery, object), + return mro_lookup(app.__class__, attr, stop={Celery, object}, monkey_patched=[__name__]) @@ -70,13 +82,74 @@ def _unpickle_appattr(reverse_name, args): return get_current_app()._rgetattr(reverse_name)(*args) +def _after_fork_cleanup_app(app): + try: + app._after_fork() + except Exception as exc: + logger.info('after forker raised exception: %r', exc, exc_info=1) + + +class PendingConfiguration(UserDict, AttributeDictMixin): + + callback = None + data = None + + def __init__(self, conf, callback): + object.__setattr__(self, 'data', conf) + object.__setattr__(self, 'callback', callback) + + def __getitem__(self, key): + return self.callback(key) + + class Celery(object): + """Celery application. + + :param main: Name of the main module if running as `__main__`. + This is used as the prefix for autogenerated task names. + + :keyword broker: URL of the default broker used. + :keyword loader: The loader class, or the name of the loader class to use. + Default is :class:`celery.loaders.app.AppLoader`. + :keyword backend: The result store backend class, or the name of the + backend class to use. Default is the value of the + :setting:`result_backend` setting. + :keyword amqp: AMQP object or class name. + :keyword events: Events object or class name. + :keyword log: Log object or class name. + :keyword control: Control object or class name. + :keyword set_as_current: Make this the global current app. + :keyword tasks: A task registry or the name of a registry class. + :keyword include: List of modules every worker should import. + :keyword fixups: List of fixup plug-ins (see e.g. + :mod:`celery.fixups.django`). + :keyword autofinalize: If set to False a :exc:`RuntimeError` + will be raised if the task registry or tasks are used before + the app is finalized. + + """ #: This is deprecated, use :meth:`reduce_keys` instead Pickler = AppPickler SYSTEM = platforms.SYSTEM IS_OSX, IS_WINDOWS = platforms.IS_OSX, platforms.IS_WINDOWS + #: Name of the `__main__` module. Required for standalone scripts. + #: + #: If set this will be used instead of `__main__` when automatically + #: generating task names. + main = None + + #: Custom options for command-line programs. + #: See :ref:`extending-commandoptions` + user_options = None + + #: Custom bootsteps to extend and modify the worker. + #: See :ref:`extending-bootsteps`. + steps = None + + builtin_fixups = BUILTIN_FIXUPS + amqp_cls = 'celery.app.amqp:AMQP' backend_cls = None events_cls = 'celery.events:Events' @@ -85,20 +158,32 @@ class Celery(object): control_cls = 'celery.app.control:Control' task_cls = 'celery.app.task:Task' registry_cls = TaskRegistry + _fixups = None _pool = None - builtin_fixups = BUILTIN_FIXUPS + _conf = None + _after_fork_registered = False + + #: Signal sent when app is loading configuration. + on_configure = None + + #: Signal sent after app has prepared the configuration. + on_after_configure = None + + #: Signal sent after app has been finalized. + on_after_finalize = None + + #: Signal sent by every new process after fork. + on_after_fork = None def __init__(self, main=None, loader=None, backend=None, amqp=None, events=None, log=None, control=None, - set_as_current=True, accept_magic_kwargs=False, - tasks=None, broker=None, include=None, changes=None, - config_source=None, fixups=None, task_cls=None, - autofinalize=True, **kwargs): + set_as_current=True, tasks=None, broker=None, include=None, + changes=None, config_source=None, fixups=None, task_cls=None, + autofinalize=True, namespace=None, **kwargs): self.clock = LamportClock() self.main = main self.amqp_cls = amqp or self.amqp_cls - self.backend_cls = backend or self.backend_cls self.events_cls = events or self.events_cls self.loader_cls = loader or self.loader_cls self.log_cls = log or self.log_cls @@ -106,14 +191,15 @@ def __init__(self, main=None, loader=None, backend=None, self.task_cls = task_cls or self.task_cls self.set_as_current = set_as_current self.registry_cls = symbol_by_name(self.registry_cls) - self.accept_magic_kwargs = accept_magic_kwargs self.user_options = defaultdict(set) self.steps = defaultdict(set) self.autofinalize = autofinalize + self.namespace = namespace self.configured = False self._config_source = config_source self._pending_defaults = deque() + self._pending_periodic_tasks = deque() self.finalized = False self._finalize_mutex = threading.Lock() @@ -122,7 +208,7 @@ def __init__(self, main=None, loader=None, backend=None, if not isinstance(self._tasks, TaskRegistry): self._tasks = TaskRegistry(self._tasks or {}) - # If the class defins a custom __reduce_args__ we need to use + # If the class defines a custom __reduce_args__ we need to use # the old way of pickling apps, which is pickling a list of # args instead of the new way that pickles a dict of keywords. self._using_v1_reduce = app_has_custom(self, '__reduce_args__') @@ -130,10 +216,15 @@ def __init__(self, main=None, loader=None, backend=None, # these options are moved to the config to # simplify pickling of the app object. self._preconf = changes or {} - if broker: - self._preconf['BROKER_URL'] = broker - if include: - self._preconf['CELERY_IMPORTS'] = include + self._preconf_set_by_auto = set() + self.__autoset('broker_url', broker) + self.__autoset('result_backend', backend) + self.__autoset('include', include) + self._conf = Settings( + PendingConfiguration( + self._preconf, self._get_from_conf_and_finalize), + prefix=self.namespace, + ) # - Apply fixups. self.fixups = set(self.builtin_fixups) if fixups is None else fixups @@ -143,15 +234,40 @@ def __init__(self, main=None, loader=None, backend=None, if self.set_as_current: self.set_current() + # Signals + if self.on_configure is None: + # used to be a method pre 4.0 + self.on_configure = Signal() + self.on_after_configure = Signal() + self.on_after_finalize = Signal() + self.on_after_fork = Signal() + self.on_init() _register_app(self) + def on_init(self): + """Optional callback called at init.""" + pass + + def __autoset(self, key, value): + if value: + self._preconf[key] = value + self._preconf_set_by_auto.add(key) + def set_current(self): - _tls.current_app = self + """Makes this the current app for this thread.""" + _set_current_app(self) def set_default(self): + """Makes this the default app for all threads.""" set_default_app(self) + def _ensure_after_fork(self): + if not self._after_fork_registered: + self._after_fork_registered = True + if register_after_fork is not None: + register_after_fork(self, _after_fork_cleanup_app) + def __enter__(self): return self @@ -159,52 +275,91 @@ def __exit__(self, *exc_info): self.close() def close(self): - self._maybe_close_pool() + """Clean up after the application. - def on_init(self): - """Optional callback called at init.""" - pass + Only necessary for dynamically created apps for which you can + use the with statement instead:: + + with Celery(set_as_current=False) as app: + with app.connection_for_write() as conn: + pass + """ + self._pool = None + _deregister_app(self) def start(self, argv=None): + """Run :program:`celery` using `argv`. + + Uses :data:`sys.argv` if `argv` is not specified. + + """ return instantiate( - 'celery.bin.celery:CeleryCommand', - app=self).execute_from_commandline(argv) + 'celery.bin.celery:CeleryCommand', app=self + ).execute_from_commandline(argv) def worker_main(self, argv=None): + """Run :program:`celery worker` using `argv`. + + Uses :data:`sys.argv` if `argv` is not specified. + + """ return instantiate( - 'celery.bin.worker:worker', - app=self).execute_from_commandline(argv) + 'celery.bin.worker:worker', app=self + ).execute_from_commandline(argv) def task(self, *args, **opts): - """Creates new task class from any callable.""" - if _EXECV and not opts.get('_force_evaluate'): + """Decorator to create a task class out of any callable. + + Examples: + + .. code-block:: python + + @app.task + def refresh_feed(url): + return … + + with setting extra options: + + .. code-block:: python + + @app.task(exchange="feeds") + def refresh_feed(url): + return … + + .. admonition:: App Binding + + For custom apps the task decorator will return a proxy + object, so that the act of creating the task is not performed + until the task is used or the task registry is accessed. + + If you are depending on binding to be deferred, then you must + not access any attributes on the returned object until the + application is fully set up (finalized). + + """ + if USING_EXECV and opts.get('lazy', True): # When using execv the task in the original module will point to a # different app, so doing things like 'add.request' will point to - # a differnt task instance. This makes sure it will always use + # a different task instance. This makes sure it will always use # the task instance from the current app. # Really need a better solution for this :( - from . import shared_task as proxies_to_curapp - return proxies_to_curapp(*args, _force_evaluate=True, **opts) + from . import shared_task + return shared_task(*args, lazy=False, **opts) - def inner_create_task_cls(shared=True, filter=None, **opts): + def inner_create_task_cls(shared=True, filter=None, lazy=True, **opts): _filt = filter # stupid 2to3 def _create_task_cls(fun): if shared: - cons = lambda app: app._task_from_fun(fun, **opts) + def cons(app): + return app._task_from_fun(fun, **opts) cons.__name__ = fun.__name__ - shared_task(cons) - if self.accept_magic_kwargs: # compat mode - task = self._task_from_fun(fun, **opts) - if filter: - task = filter(task) - return task - - if self.finalized or opts.get('_force_evaluate'): + connect_on_app_finalize(cons) + if not lazy or self.finalized: ret = self._task_from_fun(fun, **opts) else: # return a proxy object that evaluates on first use - ret = PromiseProxy(self._task_from_fun, (fun, ), opts, + ret = PromiseProxy(self._task_from_fun, (fun,), opts, __doc__=fun.__doc__) self._pending.append(ret) if _filt: @@ -223,30 +378,55 @@ def _create_task_cls(fun): sum([len(args), len(opts)]))) return inner_create_task_cls(**opts) - def _task_from_fun(self, fun, **options): + def _task_from_fun(self, fun, name=None, base=None, bind=False, **options): if not self.finalized and not self.autofinalize: raise RuntimeError('Contract breach: app not finalized') - base = options.pop('base', None) or self.Task - bind = options.pop('bind', False) - - T = type(fun.__name__, (base, ), dict({ - 'app': self, - 'accept_magic_kwargs': False, - 'run': fun if bind else staticmethod(fun), - '_decorated': True, - '__doc__': fun.__doc__, - '__module__': fun.__module__, - '__wrapped__': fun}, **options))() - task = self._tasks[T.name] # return global instance. + name = name or self.gen_task_name(fun.__name__, fun.__module__) + base = base or self.Task + + if name not in self._tasks: + run = fun if bind else staticmethod(fun) + task = type(fun.__name__, (base,), dict({ + 'app': self, + 'name': name, + 'run': run, + '_decorated': True, + '__doc__': fun.__doc__, + '__module__': fun.__module__, + '__header__': staticmethod(head_from_fun(fun, bound=bind)), + '__wrapped__': run}, **options))() + self._tasks[task.name] = task + task.bind(self) # connects task to this app + + autoretry_for = tuple(options.get('autoretry_for', ())) + retry_kwargs = options.get('retry_kwargs', {}) + + if autoretry_for and not hasattr(task, '_orig_run'): + + @wraps(task.run) + def run(*args, **kwargs): + try: + return task._orig_run(*args, **kwargs) + except autoretry_for as exc: + raise task.retry(exc=exc, **retry_kwargs) + + task._orig_run, task.run = task.run, run + else: + task = self._tasks[name] return task + def gen_task_name(self, name, module): + return gen_task_name(self, name, module) + def finalize(self, auto=False): + """Finalizes the app by loading built-in tasks, + and evaluating pending task decorators.""" with self._finalize_mutex: if not self.finalized: if auto and not self.autofinalize: raise RuntimeError('Contract breach: app not finalized') self.finalized = True - load_shared_tasks(self) + _announce_app_finalized(self) pending = self._pending while pending: @@ -255,20 +435,68 @@ def finalize(self, auto=False): for task in values(self._tasks): task.bind(self) + self.on_after_finalize.send(sender=self) + def add_defaults(self, fun): + """Add default configuration from dict ``d``. + + If the argument is a callable function then it will be regarded + as a promise, and it won't be loaded until the configuration is + actually needed. + + This method can be compared to:: + + >>> celery.conf.update(d) + + with a difference that 1) no copy will be made and 2) the dict will + not be transferred when the worker spawns child processes, so + it's important that the same configuration happens at import time + when pickle restores the object on the other side. + + """ if not callable(fun): d, fun = fun, lambda: d if self.configured: - return self.conf.add_defaults(fun()) + return self._conf.add_defaults(fun()) self._pending_defaults.append(fun) - def config_from_object(self, obj, silent=False, force=False): + def config_from_object(self, obj, + silent=False, force=False, namespace=None): + """Reads configuration from object, where object is either + an object or the name of a module to import. + + :keyword silent: If true then import errors will be ignored. + + :keyword force: Force reading configuration immediately. + By default the configuration will be read only when required. + + .. code-block:: pycon + + >>> celery.config_from_object("myapp.celeryconfig") + + >>> from myapp import celeryconfig + >>> celery.config_from_object(celeryconfig) + + """ self._config_source = obj + self.namespace = namespace or self.namespace if force or self.configured: - del(self.conf) - return self.loader.config_from_object(obj, silent=silent) + self._conf = None + if self.loader.config_from_object(obj, silent=silent): + return self.conf def config_from_envvar(self, variable_name, silent=False, force=False): + """Read configuration from environment variable. + + The value of the environment variable must be the name + of a module to import. + + .. code-block:: pycon + + >>> os.environ["CELERY_CONFIG_MODULE"] = "myapp.celeryconfig" + >>> celery.config_from_envvar("CELERY_CONFIG_MODULE") + + """ module_name = os.environ.get(variable_name) if not module_name: if silent: @@ -278,106 +506,273 @@ def config_from_envvar(self, variable_name, silent=False, force=False): return self.config_from_object(module_name, silent=silent, force=force) def config_from_cmdline(self, argv, namespace='celery'): - self.conf.update(self.loader.cmdline_config_parser(argv, namespace)) + self._conf.update( + self.loader.cmdline_config_parser(argv, namespace) + ) def setup_security(self, allowed_serializers=None, key=None, cert=None, store=None, digest='sha1', serializer='json'): + """Setup the message-signing serializer. + + This will affect all application instances (a global operation). + + Disables untrusted serializers and if configured to use the ``auth`` + serializer will register the auth serializer with the provided settings + into the Kombu serializer registry. + + :keyword allowed_serializers: List of serializer names, or + content_types that should be exempt from being disabled. + :keyword key: Name of private key file to use. + Defaults to the :setting:`security_key` setting. + :keyword cert: Name of certificate file to use. + Defaults to the :setting:`security_certificate` setting. + :keyword store: Directory containing certificates. + Defaults to the :setting:`security_cert_store` setting. + :keyword digest: Digest algorithm used when signing messages. + Default is ``sha1``. + :keyword serializer: Serializer used to encode messages after + they have been signed. See :setting:`task_serializer` for + the serializers supported. + Default is ``json``. + + """ from celery.security import setup_security return setup_security(allowed_serializers, key, cert, store, digest, serializer, app=self) - def autodiscover_tasks(self, packages, related_name='tasks', force=False): + def autodiscover_tasks(self, packages=None, + related_name='tasks', force=False): + """Try to autodiscover and import modules with a specific name (by + default 'tasks'). + + If the name is empty, this will be delegated to fixups (e.g. Django). + + For example if you have an (imagined) directory tree like this:: + + foo/__init__.py + tasks.py + models.py + + bar/__init__.py + tasks.py + models.py + + baz/__init__.py + models.py + + Then calling ``app.autodiscover_tasks(['foo', bar', 'baz'])`` will + result in the modules ``foo.tasks`` and ``bar.tasks`` being imported. + + :param packages: List of packages to search. + This argument may also be a callable, in which case the + value returned is used (for lazy evaluation). + :keyword related_name: The name of the module to find. Defaults + to "tasks", which means it look for "module.tasks" for every + module in ``packages``. + :keyword force: By default this call is lazy so that the actual + autodiscovery will not happen until an application imports the + default modules. Forcing will cause the autodiscovery to happen + immediately. + + """ if force: return self._autodiscover_tasks(packages, related_name) - signals.import_modules.connect(promise( - self._autodiscover_tasks, (packages, related_name), + signals.import_modules.connect(starpromise( + self._autodiscover_tasks, packages, related_name, ), weak=False, sender=self) - def _autodiscover_tasks(self, packages, related_name='tasks', **kwargs): - # argument may be lazy - packages = packages() if callable(packages) else packages - self.loader.autodiscover_tasks(packages, related_name) + def _autodiscover_tasks(self, packages, related_name, **kwargs): + if packages: + return self._autodiscover_tasks_from_names(packages, related_name) + return self._autodiscover_tasks_from_fixups(related_name) + + def _autodiscover_tasks_from_names(self, packages, related_name): + # packages argument can be lazy + return self.loader.autodiscover_tasks( + packages() if callable(packages) else packages, related_name, + ) + + def _autodiscover_tasks_from_fixups(self, related_name): + return self._autodiscover_tasks_from_names([ + pkg for fixup in self._fixups + for pkg in fixup.autodiscover_tasks() + if hasattr(fixup, 'autodiscover_tasks') + ], related_name=related_name) def send_task(self, name, args=None, kwargs=None, countdown=None, eta=None, task_id=None, producer=None, connection=None, router=None, result_cls=None, expires=None, publisher=None, link=None, link_error=None, - add_to_parent=True, reply_to=None, **options): + add_to_parent=True, group_id=None, retries=0, chord=None, + reply_to=None, time_limit=None, soft_time_limit=None, + root_id=None, parent_id=None, route_name=None, + shadow=None, chain=None, **options): + """Send task by name. + + :param name: Name of task to call (e.g. `"tasks.add"`). + :keyword result_cls: Specify custom result class. Default is + using :meth:`AsyncResult`. + + Otherwise supports the same arguments as :meth:`@-Task.apply_async`. + + """ + parent = have_parent = None + amqp = self.amqp task_id = task_id or uuid() producer = producer or publisher # XXX compat - router = router or self.amqp.router + router = router or amqp.router conf = self.conf - if conf.CELERY_ALWAYS_EAGER: # pragma: no cover + if conf.task_always_eager: # pragma: no cover warnings.warn(AlwaysEagerIgnored( - 'CELERY_ALWAYS_EAGER has no effect on send_task', + 'task_always_eager has no effect on send_task', ), stacklevel=2) - options = router.route(options, name, args, kwargs) + options = router.route(options, route_name or name, args, kwargs) + + if root_id is None: + parent, have_parent = self.current_worker_task, True + if parent: + root_id = parent.request.root_id or parent.request.id + if parent_id is None: + if not have_parent: + parent, have_parent = self.current_worker_task, True + if parent: + parent_id = parent.request.id + + message = amqp.create_task_message( + task_id, name, args, kwargs, countdown, eta, group_id, + expires, retries, chord, + maybe_list(link), maybe_list(link_error), + reply_to or self.oid, time_limit, soft_time_limit, + self.conf.task_send_sent_event, + root_id, parent_id, shadow, chain, + ) + if connection: - producer = self.amqp.TaskProducer(connection) + producer = amqp.Producer(connection) with self.producer_or_acquire(producer) as P: self.backend.on_task_call(P, task_id) - task_id = P.publish_task( - name, args, kwargs, countdown=countdown, eta=eta, - task_id=task_id, expires=expires, - callbacks=maybe_list(link), errbacks=maybe_list(link_error), - reply_to=reply_to or self.oid, **options - ) + amqp.send_task_message(P, name, message, **options) result = (result_cls or self.AsyncResult)(task_id) if add_to_parent: - parent = get_current_worker_task() + if not have_parent: + parent, have_parent = self.current_worker_task, True if parent: parent.add_trail(result) return result + def connection_for_read(self, url=None, **kwargs): + """Establish connection used for consuming. + + See :meth:`connection` for supported arguments. + + """ + return self._connection(url or self.conf.broker_read_url, **kwargs) + + def connection_for_write(self, url=None, **kwargs): + """Establish connection used for producing. + + See :meth:`connection` for supported arguments. + + """ + return self._connection(url or self.conf.broker_write_url, **kwargs) + def connection(self, hostname=None, userid=None, password=None, virtual_host=None, port=None, ssl=None, connect_timeout=None, transport=None, transport_options=None, heartbeat=None, login_method=None, failover_strategy=None, **kwargs): + """Establish a connection to the message broker. + + Please use :meth:`connection_for_read` and + :meth:`connection_for_write` instead, to convey the intent + of use for this connection. + + :param url: Either the URL or the hostname of the broker to use. + + :keyword hostname: URL, Hostname/IP-address of the broker. + If an URL is used, then the other argument below will + be taken from the URL instead. + :keyword userid: Username to authenticate as. + :keyword password: Password to authenticate with + :keyword virtual_host: Virtual host to use (domain). + :keyword port: Port to connect to. + :keyword ssl: Defaults to the :setting:`broker_use_ssl` setting. + :keyword transport: defaults to the :setting:`broker_transport` + setting. + :keyword transport_options: Dictionary of transport specific options. + :keyword heartbeat: AMQP Heartbeat in seconds (pyamqp only). + :keyword login_method: Custom login method to use (amqp only). + :keyword failover_strategy: Custom failover strategy. + :keyword \*\*kwargs: Additional arguments to :class:`kombu.Connection`. + + :returns :class:`kombu.Connection`: + + """ + return self.connection_for_write( + hostname or self.conf.broker_write_url, + userid=userid, password=password, + virtual_host=virtual_host, port=port, ssl=ssl, + connect_timeout=connect_timeout, transport=transport, + transport_options=transport_options, heartbeat=heartbeat, + login_method=login_method, failover_strategy=failover_strategy, + **kwargs + ) + + def _connection(self, url, userid=None, password=None, + virtual_host=None, port=None, ssl=None, + connect_timeout=None, transport=None, + transport_options=None, heartbeat=None, + login_method=None, failover_strategy=None, **kwargs): conf = self.conf return self.amqp.Connection( - hostname or conf.BROKER_URL, - userid or conf.BROKER_USER, - password or conf.BROKER_PASSWORD, - virtual_host or conf.BROKER_VHOST, - port or conf.BROKER_PORT, - transport=transport or conf.BROKER_TRANSPORT, - ssl=self.either('BROKER_USE_SSL', ssl), + url, + userid or conf.broker_user, + password or conf.broker_password, + virtual_host or conf.broker_vhost, + port or conf.broker_port, + transport=transport or conf.broker_transport, + ssl=self.either('broker_use_ssl', ssl), heartbeat=heartbeat, - login_method=login_method or conf.BROKER_LOGIN_METHOD, + login_method=login_method or conf.broker_login_method, failover_strategy=( - failover_strategy or conf.BROKER_FAILOVER_STRATEGY + failover_strategy or conf.broker_failover_strategy ), transport_options=dict( - conf.BROKER_TRANSPORT_OPTIONS, **transport_options or {} + conf.broker_transport_options, **transport_options or {} ), connect_timeout=self.either( - 'BROKER_CONNECTION_TIMEOUT', connect_timeout + 'broker_connection_timeout', connect_timeout ), ) broker_connection = connection - @contextmanager - def connection_or_acquire(self, connection=None, pool=True, - *args, **kwargs): - if connection: - yield connection - else: - if pool: - with self.pool.acquire(block=True) as connection: - yield connection - else: - with self.connection() as connection: - yield connection + def _acquire_connection(self, pool=True): + """Helper for :meth:`connection_or_acquire`.""" + if pool: + return self.pool.acquire(block=True) + return self.connection_for_write() + + def connection_or_acquire(self, connection=None, pool=True, *_, **__): + """For use within a with-statement to get a connection from the pool + if one is not already provided. + + :keyword connection: If not provided, then a connection will be + acquired from the connection pool. + """ + return FallbackContext(connection, self._acquire_connection, pool=pool) default_connection = connection_or_acquire # XXX compat - @contextmanager def producer_or_acquire(self, producer=None): - if producer: - yield producer - else: - with self.amqp.producer_pool.acquire(block=True) as producer: - yield producer + """For use within a with-statement to get a producer from the pool + if one is not already provided + + :keyword producer: If not provided, then a producer will be + acquired from the producer pool. + + """ + return FallbackContext( + producer, self.amqp.producer_pool.acquire, block=True, + ) default_producer = producer_or_acquire # XXX compat def prepare_config(self, c): @@ -385,78 +780,130 @@ def prepare_config(self, c): return find_deprecated_settings(c) def now(self): - return self.loader.now(utc=self.conf.CELERY_ENABLE_UTC) + """Return the current time and date as a + :class:`~datetime.datetime` object.""" + return self.loader.now(utc=self.conf.enable_utc) def mail_admins(self, subject, body, fail_silently=False): - if self.conf.ADMINS: - to = [admin_email for _, admin_email in self.conf.ADMINS] + """Sends an email to the admins in the :setting:`admins` setting.""" + conf = self.conf + if conf.admins: + to = [admin_email for _, admin_email in conf.admins] return self.loader.mail_admins( subject, body, fail_silently, to=to, - sender=self.conf.SERVER_EMAIL, - host=self.conf.EMAIL_HOST, - port=self.conf.EMAIL_PORT, - user=self.conf.EMAIL_HOST_USER, - password=self.conf.EMAIL_HOST_PASSWORD, - timeout=self.conf.EMAIL_TIMEOUT, - use_ssl=self.conf.EMAIL_USE_SSL, - use_tls=self.conf.EMAIL_USE_TLS, + sender=conf.server_email, + host=conf.email_host, + port=conf.email_port, + user=conf.email_host_user, + password=conf.email_host_password, + timeout=conf.email_timeout, + use_ssl=conf.email_use_ssl, + use_tls=conf.email_use_tls, + charset=conf.email_charset, ) def select_queues(self, queues=None): + """Select a subset of queues, where queues must be a list of queue + names to keep.""" return self.amqp.queues.select(queues) def either(self, default_key, *values): """Fallback to the value of a configuration key if none of the `*values` are true.""" - return first(None, values) or self.conf.get(default_key) + return first(None, [ + first(None, values), starpromise(self.conf.get, default_key), + ]) def bugreport(self): + """Return a string with information useful for the Celery core + developers when reporting a bug.""" return bugreport(self) def _get_backend(self): from celery.backends import get_backend_by_url backend, url = get_backend_by_url( - self.backend_cls or self.conf.CELERY_RESULT_BACKEND, + self.backend_cls or self.conf.result_backend, self.loader) return backend(app=self, url=url) - def on_configure(self): - """Callback calld when the app loads configuration""" - pass - - def _get_config(self): - self.on_configure() + def _load_config(self): + if isinstance(self.on_configure, Signal): + self.on_configure.send(sender=self) + else: + # used to be a method pre 4.0 + self.on_configure() if self._config_source: self.loader.config_from_object(self._config_source) + self.configured = True - s = Settings({}, [self.prepare_config(self.loader.conf), - deepcopy(DEFAULTS)]) + settings = detect_settings( + self.prepare_config(self.loader.conf), self._preconf, + ignore_keys=self._preconf_set_by_auto, prefix=self.namespace, + ) + if self._conf is not None: + # replace in place, as someone may have referenced app.conf, + # done some changes, accessed a key, and then try to make more + # changes to the reference and not the finalized value. + self._conf.swap_with(settings) + else: + self._conf = settings # load lazy config dict initializers. - pending = self._pending_defaults - while pending: - s.add_defaults(maybe_evaluate(pending.popleft()())) - if self._preconf: - for key, value in items(self._preconf): - setattr(s, key, value) - return s - - def _after_fork(self, obj_): - self._maybe_close_pool() - - def _maybe_close_pool(self): - if self._pool: - self._pool.force_close_all() - self._pool = None - amqp = self.__dict__.get('amqp') - if amqp is not None and amqp._producer_pool is not None: - amqp._producer_pool.force_close_all() - amqp._producer_pool = None + pending_def = self._pending_defaults + while pending_def: + self._conf.add_defaults(maybe_evaluate(pending_def.popleft()())) + + # load lazy periodic tasks + pending_beat = self._pending_periodic_tasks + while pending_beat: + self._add_periodic_task(*pending_beat.popleft()) + + self.on_after_configure.send(sender=self, source=self._conf) + return self._conf + + def _after_fork(self): + self._pool = None + try: + self.__dict__['amqp']._producer_pool = None + except (AttributeError, KeyError): + pass + self.on_after_fork.send(sender=self) def signature(self, *args, **kwargs): + """Return a new :class:`~celery.canvas.Signature` bound to this app. + + See :meth:`~celery.signature` + + """ kwargs['app'] = self return self.canvas.signature(*args, **kwargs) + def add_periodic_task(self, schedule, sig, + args=(), kwargs=(), name=None, **opts): + key, entry = self._sig_to_periodic_task_entry( + schedule, sig, args, kwargs, name, **opts) + if self.configured: + self._add_periodic_task(key, entry) + else: + self._pending_periodic_tasks.append((key, entry)) + return key + + def _sig_to_periodic_task_entry(self, schedule, sig, + args=(), kwargs={}, name=None, **opts): + sig = (sig.clone(args, kwargs) + if isinstance(sig, abstract.CallableSignature) + else self.signature(sig.name, args, kwargs)) + return name or repr(sig), { + 'schedule': schedule, + 'task': sig.name, + 'args': sig.args, + 'kwargs': sig.kwargs, + 'options': dict(sig.options, **opts), + } + + def _add_periodic_task(self, key, entry): + self._conf.beat_schedule[key] = entry + def create_task_cls(self): """Creates a base task class using default configuration taken from this app.""" @@ -491,7 +938,7 @@ def __reduce__(self): if not keep_reduce: attrs['__reduce__'] = __reduce__ - return type(name or Class.__name__, (Class, ), attrs) + return type(name or Class.__name__, (Class,), attrs) def _rgetattr(self, path): return attrgetter(path)(self) @@ -518,48 +965,62 @@ def __reduce_keys__(self): when unpickling.""" return { 'main': self.main, - 'changes': self.conf.changes, + 'changes': + self._conf.changes if self.configured else self._preconf, 'loader': self.loader_cls, 'backend': self.backend_cls, 'amqp': self.amqp_cls, 'events': self.events_cls, 'log': self.log_cls, 'control': self.control_cls, - 'accept_magic_kwargs': self.accept_magic_kwargs, 'fixups': self.fixups, 'config_source': self._config_source, 'task_cls': self.task_cls, + 'namespace': self.namespace, } def __reduce_args__(self): """Deprecated method, please use :meth:`__reduce_keys__` instead.""" - return (self.main, self.conf.changes, + return (self.main, self._conf.changes if self.configured else {}, self.loader_cls, self.backend_cls, self.amqp_cls, self.events_cls, self.log_cls, self.control_cls, - self.accept_magic_kwargs, self._config_source) + False, self._config_source) @cached_property def Worker(self): + """Worker application. See :class:`~@Worker`.""" return self.subclass_with_self('celery.apps.worker:Worker') @cached_property def WorkController(self, **kwargs): + """Embeddable worker. See :class:`~@WorkController`.""" return self.subclass_with_self('celery.worker:WorkController') @cached_property def Beat(self, **kwargs): + """Celerybeat scheduler application. + + See :class:`~@Beat`. + + """ return self.subclass_with_self('celery.apps.beat:Beat') @cached_property def Task(self): + """Base task class for this app.""" return self.create_task_cls() @cached_property def annotations(self): - return prepare_annotations(self.conf.CELERY_ANNOTATIONS) + return prepare_annotations(self.conf.task_annotations) @cached_property def AsyncResult(self): + """Create new result instance. + + See :class:`celery.result.AsyncResult`. + + """ return self.subclass_with_self('celery.result:AsyncResult') @cached_property @@ -568,60 +1029,84 @@ def ResultSet(self): @cached_property def GroupResult(self): - return self.subclass_with_self('celery.result:GroupResult') + """Create new group result instance. - @cached_property - def TaskSet(self): # XXX compat - """Deprecated! Please use :class:`celery.group` instead.""" - return self.subclass_with_self('celery.task.sets:TaskSet') + See :class:`celery.result.GroupResult`. - @cached_property - def TaskSetResult(self): # XXX compat - """Deprecated! Please use :attr:`GroupResult` instead.""" - return self.subclass_with_self('celery.result:TaskSetResult') + """ + return self.subclass_with_self('celery.result:GroupResult') @property def pool(self): + """Broker connection pool: :class:`~@pool`. + + This attribute is not related to the workers concurrency pool. + + """ if self._pool is None: - register_after_fork(self, self._after_fork) - limit = self.conf.BROKER_POOL_LIMIT - self._pool = self.connection().Pool(limit=limit) + self._ensure_after_fork() + limit = self.conf.broker_pool_limit + pools.set_limit(limit) + self._pool = pools.connections[self.connection_for_write()] return self._pool @property def current_task(self): + """The instance of the task that is being executed, or + :const:`None`.""" return _task_stack.top + @property + def current_worker_task(self): + return get_current_worker_task() + @cached_property def oid(self): return oid_from(self) @cached_property def amqp(self): + """AMQP related functionality: :class:`~@amqp`.""" return instantiate(self.amqp_cls, app=self) @cached_property def backend(self): + """Current backend instance.""" return self._get_backend() - @cached_property + @property def conf(self): - return self._get_config() + """Current configuration.""" + if self._conf is None: + self._conf = self._load_config() + return self._conf + + def _get_from_conf_and_finalize(self, key): + conf = self._conf = self._load_config() + return conf[key] + + @conf.setter + def conf(self, d): # noqa + self._conf = d @cached_property def control(self): + """Remote control: :class:`~@control`.""" return instantiate(self.control_cls, app=self) @cached_property def events(self): + """Consuming and sending events: :class:`~@events`.""" return instantiate(self.events_cls, app=self) @cached_property def loader(self): + """Current loader instance.""" return get_loader_cls(self.loader_cls)(app=self) @cached_property def log(self): + """Logging: :class:`~@log`.""" return instantiate(self.log_cls, app=self) @cached_property @@ -631,16 +1116,26 @@ def canvas(self): @cached_property def tasks(self): + """Task registry. + + Accessing this attribute will also finalize the app. + + """ self.finalize(auto=True) return self._tasks @cached_property def timezone(self): - from celery.utils.timeutils import timezone + """Current timezone for this app. + + This is a cached property taking the time zone from the + :setting:`timezone` setting. + + """ conf = self.conf - tz = conf.CELERY_TIMEZONE + tz = conf.timezone if not tz: - return (timezone.get_timezone('UTC') if conf.CELERY_ENABLE_UTC + return (timezone.get_timezone('UTC') if conf.enable_utc else timezone.local) - return timezone.get_timezone(self.conf.CELERY_TIMEZONE) + return timezone.get_timezone(conf.timezone) App = Celery # compat diff --git a/celery/app/builtins.py b/celery/app/builtins.py index a60920840..5d3993474 100644 --- a/celery/app/builtins.py +++ b/celery/app/builtins.py @@ -9,386 +9,189 @@ """ from __future__ import absolute_import -from collections import deque - -from celery._state import get_current_worker_task -from celery.utils import uuid +from celery._state import connect_on_app_finalize from celery.utils.log import get_logger -__all__ = ['shared_task', 'load_shared_tasks'] +__all__ = [] logger = get_logger(__name__) -#: global list of functions defining tasks that should be -#: added to all apps. -_shared_tasks = set() - - -def shared_task(constructor): - """Decorator that specifies a function that generates a built-in task. - - The function will then be called for every new app instance created - (lazily, so more exactly when the task registry for that app is needed). - - The function must take a single ``app`` argument. - """ - _shared_tasks.add(constructor) - return constructor - - -def load_shared_tasks(app): - """Create built-in tasks for an app instance.""" - constructors = set(_shared_tasks) - for constructor in constructors: - constructor(app) - -@shared_task +@connect_on_app_finalize def add_backend_cleanup_task(app): """The backend cleanup task can be used to clean up the default result backend. If the configured backend requires periodic cleanup this task is also - automatically configured to run every day at midnight (requires + automatically configured to run every day at 4am (requires :program:`celery beat` to be running). """ - @app.task(name='celery.backend_cleanup', - shared=False, _force_evaluate=True) + @app.task(name='celery.backend_cleanup', shared=False, lazy=False) def backend_cleanup(): app.backend.cleanup() return backend_cleanup -@shared_task +@connect_on_app_finalize +def add_accumulate_task(app): + """This task is used by Task.replace when replacing a task with + a group, to "collect" results.""" + @app.task(bind=True, name='celery.accumulate', shared=False, lazy=False) + def accumulate(self, *args, **kwargs): + index = kwargs.get('index') + return args[index] if index is not None else args + + +@connect_on_app_finalize def add_unlock_chord_task(app): """This task is used by result backends without native chord support. It joins chords by creating a task chain polling the header for completion. """ - from celery.canvas import signature + from celery.canvas import maybe_signature from celery.exceptions import ChordError from celery.result import allow_join_result, result_from_tuple - default_propagate = app.conf.CELERY_CHORD_PROPAGATES - @app.task(name='celery.chord_unlock', max_retries=None, shared=False, - default_retry_delay=1, ignore_result=True, _force_evaluate=True) - def unlock_chord(group_id, callback, interval=None, propagate=None, + default_retry_delay=1, ignore_result=True, lazy=False, bind=True) + def unlock_chord(self, group_id, callback, interval=None, max_retries=None, result=None, Result=app.AsyncResult, GroupResult=app.GroupResult, - result_from_tuple=result_from_tuple): - # if propagate is disabled exceptions raised by chord tasks - # will be sent as part of the result list to the chord callback. - # Since 3.1 propagate will be enabled by default, and instead - # the chord callback changes state to FAILURE with the - # exception set to ChordError. - propagate = default_propagate if propagate is None else propagate + result_from_tuple=result_from_tuple, **kwargs): if interval is None: - interval = unlock_chord.default_retry_delay + interval = self.default_retry_delay # check if the task group is ready, and if so apply the callback. + callback = maybe_signature(callback, app) deps = GroupResult( group_id, [result_from_tuple(r, app=app) for r in result], + app=app, ) j = deps.join_native if deps.supports_native_join else deps.join - if deps.ready(): - callback = signature(callback, app=app) + try: + ready = deps.ready() + except Exception as exc: + raise self.retry( + exc=exc, countdown=interval, max_retries=max_retries, + ) + else: + if not ready: + raise self.retry(countdown=interval, max_retries=max_retries) + + callback = maybe_signature(callback, app=app) + try: + with allow_join_result(): + ret = j(timeout=3.0, propagate=True) + except Exception as exc: try: - with allow_join_result(): - ret = j(timeout=3.0, propagate=propagate) + culprit = next(deps._failed_join_report()) + reason = 'Dependency {0.id} raised {1!r}'.format( + culprit, exc, + ) + except StopIteration: + reason = repr(exc) + logger.error('Chord %r raised: %r', group_id, exc, exc_info=1) + app.backend.chord_error_from_stack(callback, + ChordError(reason)) + else: + try: + callback.delay(ret) except Exception as exc: - try: - culprit = next(deps._failed_join_report()) - reason = 'Dependency {0.id} raised {1!r}'.format( - culprit, exc, - ) - except StopIteration: - reason = repr(exc) logger.error('Chord %r raised: %r', group_id, exc, exc_info=1) - app.backend.chord_error_from_stack(callback, - ChordError(reason)) - else: - try: - callback.delay(ret) - except Exception as exc: - logger.error('Chord %r raised: %r', group_id, exc, - exc_info=1) - app.backend.chord_error_from_stack( - callback, - exc=ChordError('Callback error: {0!r}'.format(exc)), - ) - else: - raise unlock_chord.retry(countdown=interval, - max_retries=max_retries) + app.backend.chord_error_from_stack( + callback, + exc=ChordError('Callback error: {0!r}'.format(exc)), + ) return unlock_chord -@shared_task +@connect_on_app_finalize def add_map_task(app): from celery.canvas import signature - @app.task(name='celery.map', shared=False, _force_evaluate=True) + @app.task(name='celery.map', shared=False, lazy=False) def xmap(task, it): task = signature(task, app=app).type return [task(item) for item in it] return xmap -@shared_task +@connect_on_app_finalize def add_starmap_task(app): from celery.canvas import signature - @app.task(name='celery.starmap', shared=False, _force_evaluate=True) + @app.task(name='celery.starmap', shared=False, lazy=False) def xstarmap(task, it): task = signature(task, app=app).type return [task(*item) for item in it] return xstarmap -@shared_task +@connect_on_app_finalize def add_chunk_task(app): from celery.canvas import chunks as _chunks - @app.task(name='celery.chunks', shared=False, _force_evaluate=True) + @app.task(name='celery.chunks', shared=False, lazy=False) def chunks(task, it, n): return _chunks.apply_chunks(task, it, n) return chunks -@shared_task +@connect_on_app_finalize def add_group_task(app): - _app = app - from celery.canvas import maybe_signature, signature + """No longer used, but here for backwards compatibility.""" + from celery.canvas import maybe_signature from celery.result import result_from_tuple - class Group(app.Task): - app = _app - name = 'celery.group' - accept_magic_kwargs = False - _decorated = True - - def run(self, tasks, result, group_id, partial_args): - app = self.app - result = result_from_tuple(result, app) - # any partial args are added to all tasks in the group - taskit = (signature(task, app=app).clone(partial_args) - for i, task in enumerate(tasks)) - if self.request.is_eager or app.conf.CELERY_ALWAYS_EAGER: - return app.GroupResult( - result.id, - [stask.apply(group_id=group_id) for stask in taskit], - ) - with app.producer_or_acquire() as pub: - [stask.apply_async(group_id=group_id, producer=pub, - add_to_parent=False) for stask in taskit] - parent = get_current_worker_task() - if parent: - parent.add_trail(result) - return result - - def prepare(self, options, tasks, args, **kwargs): - options['group_id'] = group_id = ( - options.setdefault('task_id', uuid())) - - def prepare_member(task): - task = maybe_signature(task, app=self.app) - task.options['group_id'] = group_id - return task, task.freeze() - - try: - tasks, res = list(zip( - *[prepare_member(task) for task in tasks] - )) - except ValueError: # tasks empty - tasks, res = [], [] - return (tasks, self.app.GroupResult(group_id, res), group_id, args) - - def apply_async(self, partial_args=(), kwargs={}, **options): - if self.app.conf.CELERY_ALWAYS_EAGER: - return self.apply(partial_args, kwargs, **options) - tasks, result, gid, args = self.prepare( - options, args=partial_args, **kwargs - ) - super(Group, self).apply_async(( - list(tasks), result.as_tuple(), gid, args), **options - ) - return result - - def apply(self, args=(), kwargs={}, **options): - return super(Group, self).apply( - self.prepare(options, args=args, **kwargs), - **options).get() - return Group - - -@shared_task + @app.task(name='celery.group', bind=True, shared=False, lazy=False) + def group(self, tasks, result, group_id, partial_args, add_to_parent=True): + app = self.app + result = result_from_tuple(result, app) + # any partial args are added to all tasks in the group + taskit = (maybe_signature(task, app=app).clone(partial_args) + for i, task in enumerate(tasks)) + with app.producer_or_acquire() as producer: + [stask.apply_async(group_id=group_id, producer=producer, + add_to_parent=False) for stask in taskit] + parent = app.current_worker_task + if add_to_parent and parent: + parent.add_trail(result) + return result + return group + + +@connect_on_app_finalize def add_chain_task(app): - from celery.canvas import ( - Signature, chain, chord, group, maybe_signature, maybe_unroll_group, - ) - - _app = app - - class Chain(app.Task): - app = _app - name = 'celery.chain' - accept_magic_kwargs = False - _decorated = True - - def prepare_steps(self, args, tasks): - app = self.app - steps = deque(tasks) - next_step = prev_task = prev_res = None - tasks, results = [], [] - i = 0 - while steps: - # First task get partial args from chain. - task = maybe_signature(steps.popleft(), app=app) - task = task.clone() if i else task.clone(args) - res = task.freeze() - i += 1 - - if isinstance(task, group): - task = maybe_unroll_group(task) - if isinstance(task, chain): - # splice the chain - steps.extendleft(reversed(task.tasks)) - continue - - elif isinstance(task, group) and steps and \ - not isinstance(steps[0], group): - # automatically upgrade group(..) | s to chord(group, s) - try: - next_step = steps.popleft() - # for chords we freeze by pretending it's a normal - # task instead of a group. - res = Signature.freeze(next_step) - task = chord(task, body=next_step, task_id=res.task_id) - except IndexError: - pass # no callback, so keep as group - if prev_task: - # link previous task to this task. - prev_task.link(task) - # set the results parent attribute. - if not res.parent: - res.parent = prev_res - - if not isinstance(prev_task, chord): - results.append(res) - tasks.append(task) - prev_task, prev_res = task, res - - return tasks, results - - def apply_async(self, args=(), kwargs={}, group_id=None, chord=None, - task_id=None, link=None, link_error=None, **options): - if self.app.conf.CELERY_ALWAYS_EAGER: - return self.apply(args, kwargs, **options) - options.pop('publisher', None) - tasks, results = self.prepare_steps(args, kwargs['tasks']) - result = results[-1] - if group_id: - tasks[-1].set(group_id=group_id) - if chord: - tasks[-1].set(chord=chord) - if task_id: - tasks[-1].set(task_id=task_id) - result = tasks[-1].type.AsyncResult(task_id) - # make sure we can do a link() and link_error() on a chain object. - if link: - tasks[-1].set(link=link) - # and if any task in the chain fails, call the errbacks - if link_error: - for task in tasks: - task.set(link_error=link_error) - tasks[0].apply_async(**options) - return result - - def apply(self, args=(), kwargs={}, signature=maybe_signature, - **options): - app = self.app - last, fargs = None, args # fargs passed to first task only - for task in kwargs['tasks']: - res = signature(task, app=app).clone(fargs).apply( - last and (last.get(), ), - ) - res.parent, last, fargs = last, res, None - return last - return Chain + """No longer used, but here for backwards compatibility.""" + @app.task(name='celery.chain', shared=False, lazy=False) + def chain(*args, **kwargs): + raise NotImplementedError('chain is not a real task') + return chain -@shared_task + +@connect_on_app_finalize def add_chord_task(app): - """Every chord is executed in a dedicated task, so that the chord - can be used as a signature, and this generates the task - responsible for that.""" - from celery import group + """No longer used, but here for backwards compatibility.""" + from celery import group, chord as _chord from celery.canvas import maybe_signature - _app = app - default_propagate = app.conf.CELERY_CHORD_PROPAGATES - - class Chord(app.Task): - app = _app - name = 'celery.chord' - accept_magic_kwargs = False - ignore_result = False - _decorated = True - - def run(self, header, body, partial_args=(), interval=None, - countdown=1, max_retries=None, propagate=None, - eager=False, **kwargs): - app = self.app - propagate = default_propagate if propagate is None else propagate - group_id = uuid() - - # - convert back to group if serialized - tasks = header.tasks if isinstance(header, group) else header - header = group([ - maybe_signature(s, app=app).clone() for s in tasks - ], app=self.app) - # - eager applies the group inline - if eager: - return header.apply(args=partial_args, task_id=group_id) - - body.setdefault('chord_size', len(header.tasks)) - results = header.freeze(group_id=group_id, chord=body).results - - return self.backend.apply_chord( - header, partial_args, group_id, - body, interval=interval, countdown=countdown, - max_retries=max_retries, propagate=propagate, result=results, - ) - def apply_async(self, args=(), kwargs={}, task_id=None, - group_id=None, chord=None, **options): - app = self.app - if app.conf.CELERY_ALWAYS_EAGER: - return self.apply(args, kwargs, **options) - header = kwargs.pop('header') - body = kwargs.pop('body') - header, body = (maybe_signature(header, app=app), - maybe_signature(body, app=app)) - # forward certain options to body - if chord is not None: - body.options['chord'] = chord - if group_id is not None: - body.options['group_id'] = group_id - [body.link(s) for s in options.pop('link', [])] - [body.link_error(s) for s in options.pop('link_error', [])] - body_result = body.freeze(task_id) - parent = super(Chord, self).apply_async((header, body, args), - kwargs, **options) - body_result.parent = parent - return body_result - - def apply(self, args=(), kwargs={}, propagate=True, **options): - body = kwargs['body'] - res = super(Chord, self).apply(args, dict(kwargs, eager=True), - **options) - return maybe_signature(body, app=self.app).apply( - args=(res.get(propagate=propagate).get(), )) - return Chord + @app.task(name='celery.chord', bind=True, ignore_result=False, + shared=False, lazy=False) + def chord(self, header, body, partial_args=(), interval=None, + countdown=1, max_retries=None, eager=False, **kwargs): + app = self.app + # - convert back to group if serialized + tasks = header.tasks if isinstance(header, group) else header + header = group([ + maybe_signature(s, app=app) for s in tasks + ], app=self.app) + body = maybe_signature(body, app=app) + ch = _chord(header, body) + return ch.run(header, body, partial_args, app, interval, + countdown, max_retries, **kwargs) + return chord diff --git a/celery/app/control.py b/celery/app/control.py index 34076df0e..0c4446906 100644 --- a/celery/app/control.py +++ b/celery/app/control.py @@ -11,30 +11,34 @@ import warnings +from billiard.common import TERM_SIGNAME + from kombu.pidbox import Mailbox from kombu.utils import cached_property +from kombu.utils.functional import lazy from celery.exceptions import DuplicateNodenameWarning +from celery.utils.text import pluralize __all__ = ['Inspect', 'Control', 'flatten_reply'] W_DUPNODE = """\ -Received multiple replies from node name {0!r}. +Received multiple replies from node {0}: {1}. Please make sure you give each node a unique nodename using the `-n` option.\ """ def flatten_reply(reply): - nodes = {} - seen = set() + nodes, dupes = {}, set() for item in reply: - dup = next((nodename in seen for nodename in item), None) - if dup: - warnings.warn(DuplicateNodenameWarning( - W_DUPNODE.format(dup), - )) - seen.update(item) + [dupes.add(name) for name in item if name in nodes] nodes.update(item) + if dupes: + warnings.warn(DuplicateNodenameWarning( + W_DUPNODE.format( + pluralize(len(dupes), 'name'), ', '.join(sorted(dupes)), + ), + )) return nodes @@ -51,13 +55,12 @@ def __init__(self, destination=None, timeout=1, callback=None, self.limit = limit def _prepare(self, reply): - if not reply: - return - by_node = flatten_reply(reply) - if self.destination and \ - not isinstance(self.destination, (list, tuple)): - return by_node.get(self.destination) - return by_node + if reply: + by_node = flatten_reply(reply) + if (self.destination and + not isinstance(self.destination, (list, tuple))): + return by_node.get(self.destination) + return by_node def _request(self, command, **kwargs): return self._prepare(self.app.control.broadcast( @@ -125,7 +128,12 @@ class Control(object): def __init__(self, app=None): self.app = app - self.mailbox = self.Mailbox('celery', type='fanout', accept=['json']) + self.mailbox = self.Mailbox( + 'celery', + type='fanout', + accept=['json'], + producer_pool=lazy(lambda: self.app.amqp.producer_pool), + ) @cached_property def inspect(self): @@ -150,7 +158,7 @@ def election(self, id, topic, action=None, connection=None): }) def revoke(self, task_id, destination=None, terminate=False, - signal='SIGTERM', **kwargs): + signal=TERM_SIGNAME, **kwargs): """Tell all (or specific) workers to revoke a task by id. If a task is revoked, the workers will ignore the task and @@ -262,7 +270,7 @@ def enable_events(self, destination=None, **kwargs): return self.broadcast('enable_events', {}, destination, **kwargs) def disable_events(self, destination=None, **kwargs): - """Tell all (or specific) workers to enable events.""" + """Tell all (or specific) workers to disable events.""" return self.broadcast('disable_events', {}, destination, **kwargs) def pool_grow(self, n=1, destination=None, **kwargs): @@ -281,6 +289,15 @@ def pool_shrink(self, n=1, destination=None, **kwargs): """ return self.broadcast('pool_shrink', {'n': n}, destination, **kwargs) + def autoscale(self, max, min, destination=None, **kwargs): + """Change worker(s) autoscale setting. + + Supports the same arguments as :meth:`broadcast`. + + """ + return self.broadcast( + 'autoscale', {'max': max, 'min': min}, destination, **kwargs) + def broadcast(self, command, arguments=None, destination=None, connection=None, reply=False, timeout=1, limit=None, callback=None, channel=None, **extra_kwargs): diff --git a/celery/app/defaults.py b/celery/app/defaults.py index 15f7fcfb6..3690ae751 100644 --- a/celery/app/defaults.py +++ b/celery/app/defaults.py @@ -13,7 +13,7 @@ from collections import deque, namedtuple from datetime import timedelta -from celery.five import items +from celery.five import items, keys, values from celery.utils import strtobool from celery.utils.functional import memoize @@ -39,18 +39,30 @@ DEFAULT_TASK_LOG_FMT = """[%(asctime)s: %(levelname)s/%(processName)s] \ %(task_name)s[%(task_id)s]: %(message)s""" -_BROKER_OLD = {'deprecate_by': '2.5', 'remove_by': '4.0', - 'alt': 'BROKER_URL setting'} -_REDIS_OLD = {'deprecate_by': '2.5', 'remove_by': '4.0', - 'alt': 'URL form of CELERY_RESULT_BACKEND'} +OLD_NS = {'celery_{0}'} +OLD_NS_BEAT = {'celerybeat_{0}'} +OLD_NS_WORKER = {'celeryd_{0}'} searchresult = namedtuple('searchresult', ('namespace', 'key', 'type')) +def Namespace(__old__=None, **options): + if __old__ is not None: + for opt in values(options): + if not opt.old: + opt.old = __old__ + return options + + +def old_ns(ns): + return {'{0}_{{0}}'.format(ns)} + + class Option(object): alt = None deprecate_by = None remove_by = None + old = set() typemap = dict(string=str, int=int, float=float, any=lambda v: v, bool=strtobool, dict=dict, tuple=tuple) @@ -67,176 +79,267 @@ def __repr__(self): return '{0} default->{1!r}>'.format(self.type, self.default) -NAMESPACES = { - 'BROKER': { - 'URL': Option(None, type='string'), - 'CONNECTION_TIMEOUT': Option(4, type='float'), - 'CONNECTION_RETRY': Option(True, type='bool'), - 'CONNECTION_MAX_RETRIES': Option(100, type='int'), - 'FAILOVER_STRATEGY': Option(None, type='string'), - 'HEARTBEAT': Option(None, type='int'), - 'HEARTBEAT_CHECKRATE': Option(3.0, type='int'), - 'LOGIN_METHOD': Option(None, type='string'), - 'POOL_LIMIT': Option(10, type='int'), - 'USE_SSL': Option(False, type='bool'), - 'TRANSPORT': Option(type='string'), - 'TRANSPORT_OPTIONS': Option({}, type='dict'), - 'HOST': Option(type='string', **_BROKER_OLD), - 'PORT': Option(type='int', **_BROKER_OLD), - 'USER': Option(type='string', **_BROKER_OLD), - 'PASSWORD': Option(type='string', **_BROKER_OLD), - 'VHOST': Option(type='string', **_BROKER_OLD), - }, - 'CASSANDRA': { - 'COLUMN_FAMILY': Option(type='string'), - 'DETAILED_MODE': Option(False, type='bool'), - 'KEYSPACE': Option(type='string'), - 'READ_CONSISTENCY': Option(type='string'), - 'SERVERS': Option(type='list'), - 'WRITE_CONSISTENCY': Option(type='string'), - }, - 'CELERY': { - 'ACCEPT_CONTENT': Option(DEFAULT_ACCEPT_CONTENT, type='list'), - 'ACKS_LATE': Option(False, type='bool'), - 'ALWAYS_EAGER': Option(False, type='bool'), - 'ANNOTATIONS': Option(type='any'), - 'BROADCAST_QUEUE': Option('celeryctl'), - 'BROADCAST_EXCHANGE': Option('celeryctl'), - 'BROADCAST_EXCHANGE_TYPE': Option('fanout'), - 'CACHE_BACKEND': Option(), - 'CACHE_BACKEND_OPTIONS': Option({}, type='dict'), - 'CHORD_PROPAGATES': Option(True, type='bool'), - 'COUCHBASE_BACKEND_SETTINGS': Option(None, type='dict'), - 'CREATE_MISSING_QUEUES': Option(True, type='bool'), - 'DEFAULT_RATE_LIMIT': Option(type='string'), - 'DISABLE_RATE_LIMITS': Option(False, type='bool'), - 'DEFAULT_ROUTING_KEY': Option('celery'), - 'DEFAULT_QUEUE': Option('celery'), - 'DEFAULT_EXCHANGE': Option('celery'), - 'DEFAULT_EXCHANGE_TYPE': Option('direct'), - 'DEFAULT_DELIVERY_MODE': Option(2, type='string'), - 'EAGER_PROPAGATES_EXCEPTIONS': Option(False, type='bool'), - 'ENABLE_UTC': Option(True, type='bool'), - 'ENABLE_REMOTE_CONTROL': Option(True, type='bool'), - 'EVENT_SERIALIZER': Option('json'), - 'EVENT_QUEUE_EXPIRES': Option(None, type='float'), - 'EVENT_QUEUE_TTL': Option(None, type='float'), - 'IMPORTS': Option((), type='tuple'), - 'INCLUDE': Option((), type='tuple'), - 'IGNORE_RESULT': Option(False, type='bool'), - 'MAX_CACHED_RESULTS': Option(100, type='int'), - 'MESSAGE_COMPRESSION': Option(type='string'), - 'MONGODB_BACKEND_SETTINGS': Option(type='dict'), - 'REDIS_HOST': Option(type='string', **_REDIS_OLD), - 'REDIS_PORT': Option(type='int', **_REDIS_OLD), - 'REDIS_DB': Option(type='int', **_REDIS_OLD), - 'REDIS_PASSWORD': Option(type='string', **_REDIS_OLD), - 'REDIS_MAX_CONNECTIONS': Option(type='int'), - 'RESULT_BACKEND': Option(type='string'), - 'RESULT_DB_SHORT_LIVED_SESSIONS': Option(False, type='bool'), - 'RESULT_DB_TABLENAMES': Option(type='dict'), - 'RESULT_DBURI': Option(), - 'RESULT_ENGINE_OPTIONS': Option(type='dict'), - 'RESULT_EXCHANGE': Option('celeryresults'), - 'RESULT_EXCHANGE_TYPE': Option('direct'), - 'RESULT_SERIALIZER': Option('pickle'), - 'RESULT_PERSISTENT': Option(None, type='bool'), - 'ROUTES': Option(type='any'), - 'SEND_EVENTS': Option(False, type='bool'), - 'SEND_TASK_ERROR_EMAILS': Option(False, type='bool'), - 'SEND_TASK_SENT_EVENT': Option(False, type='bool'), - 'STORE_ERRORS_EVEN_IF_IGNORED': Option(False, type='bool'), - 'TASK_PUBLISH_RETRY': Option(True, type='bool'), - 'TASK_PUBLISH_RETRY_POLICY': Option({ - 'max_retries': 3, - 'interval_start': 0, - 'interval_max': 1, - 'interval_step': 0.2}, type='dict'), - 'TASK_RESULT_EXPIRES': Option(timedelta(days=1), type='float'), - 'TASK_SERIALIZER': Option('pickle'), - 'TIMEZONE': Option(type='string'), - 'TRACK_STARTED': Option(False, type='bool'), - 'REDIRECT_STDOUTS': Option(True, type='bool'), - 'REDIRECT_STDOUTS_LEVEL': Option('WARNING'), - 'QUEUES': Option(type='dict'), - 'QUEUE_HA_POLICY': Option(None, type='string'), - 'SECURITY_KEY': Option(type='string'), - 'SECURITY_CERTIFICATE': Option(type='string'), - 'SECURITY_CERT_STORE': Option(type='string'), - 'WORKER_DIRECT': Option(False, type='bool'), - }, - 'CELERYD': { - 'AGENT': Option(None, type='string'), - 'AUTOSCALER': Option('celery.worker.autoscale:Autoscaler'), - 'AUTORELOADER': Option('celery.worker.autoreload:Autoreloader'), - 'CONCURRENCY': Option(0, type='int'), - 'TIMER': Option(type='string'), - 'TIMER_PRECISION': Option(1.0, type='float'), - 'FORCE_EXECV': Option(False, type='bool'), - 'HIJACK_ROOT_LOGGER': Option(True, type='bool'), - 'CONSUMER': Option('celery.worker.consumer:Consumer', type='string'), - 'LOG_FORMAT': Option(DEFAULT_PROCESS_LOG_FMT), - 'LOG_COLOR': Option(type='bool'), - 'LOG_LEVEL': Option('WARN', deprecate_by='2.4', remove_by='4.0', - alt='--loglevel argument'), - 'LOG_FILE': Option(deprecate_by='2.4', remove_by='4.0', - alt='--logfile argument'), - 'MAX_TASKS_PER_CHILD': Option(type='int'), - 'POOL': Option(DEFAULT_POOL), - 'POOL_PUTLOCKS': Option(True, type='bool'), - 'POOL_RESTARTS': Option(False, type='bool'), - 'PREFETCH_MULTIPLIER': Option(4, type='int'), - 'STATE_DB': Option(), - 'TASK_LOG_FORMAT': Option(DEFAULT_TASK_LOG_FMT), - 'TASK_SOFT_TIME_LIMIT': Option(type='float'), - 'TASK_TIME_LIMIT': Option(type='float'), - 'WORKER_LOST_WAIT': Option(10.0, type='float') - }, - 'CELERYBEAT': { - 'SCHEDULE': Option({}, type='dict'), - 'SCHEDULER': Option('celery.beat:PersistentScheduler'), - 'SCHEDULE_FILENAME': Option('celerybeat-schedule'), - 'SYNC_EVERY': Option(0, type='int'), - 'MAX_LOOP_INTERVAL': Option(0, type='float'), - 'LOG_LEVEL': Option('INFO', deprecate_by='2.4', remove_by='4.0', - alt='--loglevel argument'), - 'LOG_FILE': Option(deprecate_by='2.4', remove_by='4.0', - alt='--logfile argument'), - }, - 'CELERYMON': { - 'LOG_LEVEL': Option('INFO', deprecate_by='2.4', remove_by='4.0', - alt='--loglevel argument'), - 'LOG_FILE': Option(deprecate_by='2.4', remove_by='4.0', - alt='--logfile argument'), - 'LOG_FORMAT': Option(DEFAULT_LOG_FMT), - }, - 'EMAIL': { - 'HOST': Option('localhost'), - 'PORT': Option(25, type='int'), - 'HOST_USER': Option(), - 'HOST_PASSWORD': Option(), - 'TIMEOUT': Option(2, type='float'), - 'USE_SSL': Option(False, type='bool'), - 'USE_TLS': Option(False, type='bool'), - }, - 'SERVER_EMAIL': Option('celery@localhost'), - 'ADMINS': Option((), type='tuple'), -} +NAMESPACES = Namespace( + accept_content=Option(DEFAULT_ACCEPT_CONTENT, type='list', old=OLD_NS), + admins=Option((), type='tuple'), + enable_utc=Option(True, type='bool'), + imports=Option((), type='tuple', old=OLD_NS), + include=Option((), type='tuple', old=OLD_NS), + server_email=Option('celery@localhost'), + timezone=Option(type='string', old=OLD_NS), + beat=Namespace( + __old__=OLD_NS_BEAT, + + max_loop_interval=Option(0, type='float'), + schedule=Option({}, type='dict'), + scheduler=Option('celery.beat:PersistentScheduler'), + schedule_filename=Option('celerybeat-schedule'), + sync_every=Option(0, type='int'), + ), + broker=Namespace( + url=Option(None, type='string'), + read_url=Option(None, type='string'), + write_url=Option(None, type='string'), + transport=Option(type='string'), + transport_options=Option({}, type='dict'), + connection_timeout=Option(4, type='float'), + connection_retry=Option(True, type='bool'), + connection_max_retries=Option(100, type='int'), + failover_strategy=Option(None, type='string'), + heartbeat=Option(None, type='int'), + heartbeat_checkrate=Option(3.0, type='int'), + login_method=Option(None, type='string'), + pool_limit=Option(10, type='int'), + use_ssl=Option(False, type='bool'), + + host=Option(type='string'), + port=Option(type='int'), + user=Option(type='string'), + password=Option(type='string'), + vhost=Option(type='string'), + ), + cache=Namespace( + __old__=old_ns('celery_cache'), + + backend=Option(), + backend_options=Option({}, type='dict'), + ), + cassandra=Namespace( + entry_ttl=Option(type="float"), + keyspace=Option(type='string'), + port=Option(type="string"), + read_consistency=Option(type='string'), + servers=Option(type='list'), + table=Option(type='string'), + write_consistency=Option(type='string'), + ), + couchbase=Namespace( + __old__=old_ns('celery_couchbase'), + + backend_settings=Option(None, type='dict'), + ), + email=Namespace( + charset=Option('utf-8'), + host=Option('localhost'), + host_user=Option(), + host_password=Option(), + port=Option(25, type='int'), + timeout=Option(2, type='float'), + use_ssl=Option(False, type='bool'), + use_tls=Option(False, type='bool'), + ), + mongodb=Namespace( + __old__=old_ns('celery_mongodb'), + + backend_settings=Option(type='dict'), + ), + event=Namespace( + __old__=old_ns('celery_event'), + + queue_expires=Option(60.0, type='float'), + queue_ttl=Option(5.0, type='float'), + serializer=Option('json'), + ), + redis=Namespace( + __old__=old_ns('celery_redis'), + db=Option(type='int'), + host=Option(type='string'), + max_connections=Option(type='int'), + password=Option(type='string'), + port=Option(type='int'), + socket_timeout=Option(5.0, type='float'), + ), + result=Namespace( + __old__=old_ns('celery_result'), -def flatten(d, ns=''): - stack = deque([(ns, d)]) + backend=Option(type='string'), + cache_max=Option( + -1, + type='int', old={'celery_max_cached_results'}, + ), + compression=Option(type='str'), + exchange=Option('celeryresults'), + exchange_type=Option('direct'), + expires=Option( + timedelta(days=1), + type='float', old={'celery_task_result_expires'}, + ), + persistent=Option(None, type='bool'), + serializer=Option('json'), + + fspath=Option(None), + ), + riak=Namespace( + __old__=old_ns('celery_riak'), + + backend_settings=Option(type='dict'), + ), + security=Namespace( + __old__=old_ns('celery_security'), + + certificate=Option(type='string'), + cert_store=Option(type='string'), + key=Option(type='string'), + ), + sqlalchemy=Namespace( + dburi=Option(old={'celery_result_dburi'}), + engine_options=Option( + type='dict', old={'celery_result_engine_options'}, + ), + short_lived_sessions=Option( + False, type='bool', old={'celery_result_db_short_lived_sessions'}, + ), + table_names=Option(type='dict', old={'celery_result_db_tablenames'}), + ), + task=Namespace( + __old__=OLD_NS, + acks_late=Option(False, type='bool'), + always_eager=Option(False, type='bool'), + annotations=Option(type='any'), + compression=Option(type='string', old={'celery_message_compression'}), + create_missing_queues=Option(True, type='bool'), + default_delivery_mode=Option(2, type='string'), + default_exchange=Option('celery'), + default_exchange_type=Option('direct'), + default_queue=Option('celery'), + default_rate_limit=Option(type='string'), + default_routing_key=Option('celery'), + eager_propagates=Option( + False, type='bool', old={'celery_eager_propagates_exceptions'}, + ), + ignore_result=Option(False, type='bool'), + protocol=Option(1, type='int', old={'celery_task_protocol'}), + publish_retry=Option( + True, type='bool', old={'celery_task_publish_retry'}, + ), + publish_retry_policy=Option( + {'max_retries': 3, + 'interval_start': 0, + 'interval_max': 1, + 'interval_step': 0.2}, + type='dict', old={'celery_task_publish_retry_policy'}, + ), + queues=Option(type='dict'), + queue_ha_policy=Option(None, type='string'), + queue_max_priority=Option(None, type='int'), + reject_on_worker_lost=Option(type='bool'), + routes=Option(type='any'), + send_error_emails=Option( + False, type='bool', old={'celery_send_task_error_emails'}, + ), + send_sent_event=Option( + False, type='bool', old={'celery_send_task_sent_event'}, + ), + serializer=Option('json', old={'celery_task_serializer'}), + soft_time_limit=Option( + type='float', old={'celeryd_task_soft_time_limit'}, + ), + time_limit=Option( + type='float', old={'celeryd_task_time_limit'}, + ), + store_errors_even_if_ignored=Option(False, type='bool'), + track_started=Option(False, type='bool'), + ), + worker=Namespace( + __old__=OLD_NS_WORKER, + agent=Option(None, type='string'), + autoscaler=Option('celery.worker.autoscale:Autoscaler'), + autoreloader=Option('celery.worker.autoreload:Autoreloader'), + concurrency=Option(0, type='int'), + consumer=Option('celery.worker.consumer:Consumer', type='string'), + direct=Option(False, type='bool', old={'celery_worker_direct'}), + disable_rate_limits=Option( + False, type='bool', old={'celery_disable_rate_limits'}, + ), + enable_remote_control=Option( + True, type='bool', old={'celery_enable_remote_control'}, + ), + force_execv=Option(False, type='bool'), + hijack_root_logger=Option(True, type='bool'), + log_color=Option(type='bool'), + log_format=Option(DEFAULT_PROCESS_LOG_FMT), + lost_wait=Option(10.0, type='float', old={'celeryd_worker_lost_wait'}), + max_memory_per_child=Option(type='int'), + max_tasks_per_child=Option(type='int'), + pool=Option(DEFAULT_POOL), + pool_putlocks=Option(True, type='bool'), + pool_restarts=Option(False, type='bool'), + prefetch_multiplier=Option(4, type='int'), + redirect_stdouts=Option( + True, type='bool', old={'celery_redirect_stdouts'}, + ), + redirect_stdouts_level=Option( + 'WARNING', old={'celery_redirect_stdouts_level'}, + ), + send_task_events=Option( + False, type='bool', old={'celeryd_send_events'}, + ), + state_db=Option(), + task_log_format=Option(DEFAULT_TASK_LOG_FMT), + timer=Option(type='string'), + timer_precision=Option(1.0, type='float'), + ), +) + + +def _flatten_keys(ns, key, opt): + return [(ns + key, opt)] + + +def _to_compat(ns, key, opt): + if opt.old: + return [ + (oldkey.format(key).upper(), ns + key, opt) + for oldkey in opt.old + ] + return [((ns + key).upper(), ns + key, opt)] + + +def flatten(d, root='', keyfilter=_flatten_keys): + stack = deque([(root, d)]) while stack: - name, space = stack.popleft() - for key, value in items(space): - if isinstance(value, dict): - stack.append((name + key + '_', value)) + ns, options = stack.popleft() + for key, opt in items(options): + if isinstance(opt, dict): + stack.append((ns + key + '_', opt)) else: - yield name + key, value -DEFAULTS = dict((key, value.default) for key, value in flatten(NAMESPACES)) + for ret in keyfilter(ns, key, opt): + yield ret +DEFAULTS = { + key: opt.default for key, opt in flatten(NAMESPACES) +} +__compat = list(flatten(NAMESPACES, keyfilter=_to_compat)) +_OLD_DEFAULTS = {old_key: opt.default for old_key, _, opt in __compat} +_TO_OLD_KEY = {new_key: old_key for old_key, new_key, _ in __compat} +_TO_NEW_KEY = {old_key: new_key for old_key, new_key, _ in __compat} +__compat = None + +SETTING_KEYS = set(keys(DEFAULTS)) +_OLD_SETTING_KEYS = set(keys(_TO_NEW_KEY)) -def find_deprecated_settings(source): +def find_deprecated_settings(source): # pragma: no cover from celery.utils import warn_deprecated for name, opt in flatten(NAMESPACES): if (opt.deprecate_by or opt.remove_by) and getattr(source, name, None): @@ -250,20 +353,20 @@ def find_deprecated_settings(source): @memoize(maxsize=None) def find(name, namespace='celery'): # - Try specified namespace first. - namespace = namespace.upper() + namespace = namespace.lower() try: return searchresult( - namespace, name.upper(), NAMESPACES[namespace][name.upper()], + namespace, name.lower(), NAMESPACES[namespace][name.lower()], ) except KeyError: # - Try all the other namespaces. - for ns, keys in items(NAMESPACES): - if ns.upper() == name.upper(): - return searchresult(None, ns, keys) - elif isinstance(keys, dict): + for ns, opts in items(NAMESPACES): + if ns.lower() == name.lower(): + return searchresult(None, ns, opts) + elif isinstance(opts, dict): try: - return searchresult(ns, name.upper(), keys[name.upper()]) + return searchresult(ns, name.lower(), opts[name.lower()]) except KeyError: pass # - See if name is a qualname last. - return searchresult(None, name.upper(), DEFAULTS[name.upper()]) + return searchresult(None, name.lower(), DEFAULTS[name.lower()]) diff --git a/celery/app/log.py b/celery/app/log.py index 9dde09b16..9b643217f 100644 --- a/celery/app/log.py +++ b/celery/app/log.py @@ -18,7 +18,6 @@ from logging.handlers import WatchedFileHandler -from kombu.log import NullHandler from kombu.utils.encoding import set_default_encoding_file from celery import signals @@ -27,8 +26,7 @@ from celery.utils import isatty, node_format from celery.utils.log import ( get_logger, mlevel, - ColorFormatter, ensure_process_aware_logger, - LoggingProxy, get_multiprocessing_logger, + ColorFormatter, LoggingProxy, get_multiprocessing_logger, reset_multiprocessing_logger, ) from celery.utils.term import colored @@ -59,10 +57,10 @@ class Logging(object): def __init__(self, app): self.app = app - self.loglevel = mlevel(self.app.conf.CELERYD_LOG_LEVEL) - self.format = self.app.conf.CELERYD_LOG_FORMAT - self.task_format = self.app.conf.CELERYD_TASK_LOG_FORMAT - self.colorize = self.app.conf.CELERYD_LOG_COLOR + self.loglevel = mlevel(logging.WARN) + self.format = self.app.conf.worker_log_format + self.task_format = self.app.conf.worker_task_log_format + self.colorize = self.app.conf.worker_log_color def setup(self, loglevel=None, logfile=None, redirect_stdouts=False, redirect_level='WARNING', colorize=None, hostname=None): @@ -98,7 +96,6 @@ def setup_logging_subsystem(self, loglevel=None, logfile=None, format=None, format = format or self.format colorize = self.supports_color(colorize, logfile) reset_multiprocessing_logger() - ensure_process_aware_logger() receivers = signals.setup_logging.send( sender=None, loglevel=loglevel, logfile=logfile, format=format, colorize=colorize, @@ -107,7 +104,7 @@ def setup_logging_subsystem(self, loglevel=None, logfile=None, format=None, if not receivers: root = logging.getLogger() - if self.app.conf.CELERYD_HIJACK_ROOT_LOGGER: + if self.app.conf.worker_hijack_root_logger: root.handlers = [] get_logger('celery').handlers = [] get_logger('celery.task').handlers = [] @@ -176,8 +173,8 @@ def setup_task_loggers(self, loglevel=None, logfile=None, format=None, formatter=TaskFormatter, **kwargs ) logger.setLevel(loglevel) - logger.propagate = int(propagate) # this is an int for some reason. - # better to not question why. + # this is an int for some reason, better to not question why. + logger.propagate = int(propagate) signals.after_setup_task_logger.send( sender=None, logger=logger, loglevel=loglevel, logfile=logfile, @@ -233,8 +230,10 @@ def _detect_handler(self, logfile=None): return WatchedFileHandler(logfile) def _has_handler(self, logger): - if logger.handlers: - return any(not isinstance(h, NullHandler) for h in logger.handlers) + return any( + not isinstance(h, logging.NullHandler) + for h in logger.handlers or [] + ) def _is_configured(self, logger): return self._has_handler(logger) and not getattr( diff --git a/celery/app/registry.py b/celery/app/registry.py index 7046554d9..ce7b398e3 100644 --- a/celery/app/registry.py +++ b/celery/app/registry.py @@ -57,8 +57,8 @@ def periodic(self): return self.filter_types('periodic') def filter_types(self, type): - return dict((name, task) for name, task in items(self) - if getattr(task, 'type', 'regular') == type) + return {name: task for name, task in items(self) + if getattr(task, 'type', 'regular') == type} def _unpickle_task(name): diff --git a/celery/app/routes.py b/celery/app/routes.py index d654f9d70..5a367d651 100644 --- a/celery/app/routes.py +++ b/celery/app/routes.py @@ -4,33 +4,68 @@ ~~~~~~~~~~~~~ Contains utilities for working with task routers, - (:setting:`CELERY_ROUTES`). + (:setting:`task_routes`). """ from __future__ import absolute_import +import re +import string + +from collections import Mapping, OrderedDict + +from kombu import Queue + from celery.exceptions import QueueNotFound -from celery.five import string_t +from celery.five import items, string_t from celery.utils import lpmerge -from celery.utils.functional import firstmethod, mlazy +from celery.utils.functional import firstmethod, fun_takes_argument, mlazy from celery.utils.imports import instantiate __all__ = ['MapRoute', 'Router', 'prepare'] + +def _try_route(meth, task, args, kwargs, options=None): + if fun_takes_argument('options', meth, position=4): + return meth(task, args, kwargs, options) + return meth(task, args, kwargs) + _first_route = firstmethod('route_for_task') +def glob_to_re(glob, quote=string.punctuation.replace('*', '')): + glob = ''.join('\\' + c if c in quote else c for c in glob) + return glob.replace('*', '.+?') + + class MapRoute(object): """Creates a router out of a :class:`dict`.""" def __init__(self, map): - self.map = map + map = items(map) if isinstance(map, Mapping) else map + self.map = {} + self.patterns = OrderedDict() + for k, v in map: + if isinstance(k, re._pattern_type): + self.patterns[k] = v + elif '*' in k: + self.patterns[re.compile(glob_to_re(k))] = v + else: + self.map[k] = v def route_for_task(self, task, *args, **kwargs): try: return dict(self.map[task]) except KeyError: pass + except ValueError: + return {'queue': self.map[task]} + for regex, route in items(self.patterns): + if regex.match(task): + try: + return dict(route) + except ValueError: + return {'queue': route} class Router(object): @@ -45,12 +80,12 @@ def __init__(self, routes=None, queues=None, def route(self, options, task, args=(), kwargs={}): options = self.expand_destination(options) # expands 'queue' if self.routes: - route = self.lookup_route(task, args, kwargs) + route = self.lookup_route(task, args, kwargs, options) if route: # expands 'queue' in route. return lpmerge(self.expand_destination(route), options) if 'queue' not in options: options = lpmerge(self.expand_destination( - self.app.conf.CELERY_DEFAULT_QUEUE), options) + self.app.conf.task_default_queue), options) return options def expand_destination(self, route): @@ -63,24 +98,25 @@ def expand_destination(self, route): queue = route.pop('queue', None) if queue: - try: - Q = self.queues[queue] # noqa - except KeyError: - raise QueueNotFound( - 'Queue {0!r} missing from CELERY_QUEUES'.format(queue)) - # needs to be declared by publisher - route['queue'] = Q + if isinstance(queue, Queue): + route['queue'] = queue + else: + try: + route['queue'] = self.queues[queue] + except KeyError: + raise QueueNotFound( + 'Queue {0!r} missing from task_queues'.format(queue)) return route - def lookup_route(self, task, args=None, kwargs=None): - return _first_route(self.routes, task, args, kwargs) + def lookup_route(self, task, args=None, kwargs=None, options=None): + return _first_route(self.routes, task, args, kwargs, options) def prepare(routes): - """Expands the :setting:`CELERY_ROUTES` setting.""" + """Expands the :setting:`task_routes` setting.""" def expand_route(route): - if isinstance(route, dict): + if isinstance(route, (Mapping, list, tuple)): return MapRoute(route) if isinstance(route, string_t): return mlazy(instantiate, route) @@ -89,5 +125,5 @@ def expand_route(route): if routes is None: return () if not isinstance(routes, (list, tuple)): - routes = (routes, ) + routes = (routes,) return [expand_route(route) for route in routes] diff --git a/celery/app/task.py b/celery/app/task.py index 79f6d3fca..12271aa4d 100644 --- a/celery/app/task.py +++ b/celery/app/task.py @@ -12,15 +12,15 @@ from billiard.einfo import ExceptionInfo -from celery import current_app +from celery import current_app, group from celery import states from celery._state import _task_stack from celery.canvas import signature -from celery.exceptions import MaxRetriesExceededError, Reject, Retry -from celery.five import class_property, items, with_metaclass -from celery.local import Proxy +from celery.exceptions import Ignore, MaxRetriesExceededError, Reject, Retry +from celery.five import class_property, items from celery.result import EagerResult -from celery.utils import gen_task_name, fun_takes_kwargs, uuid, maybe_reraise +from celery.utils import abstract +from celery.utils import uuid, maybe_reraise from celery.utils.functional import mattrgetter, maybe_list from celery.utils.imports import instantiate from celery.utils.mail import ErrorMail @@ -44,21 +44,8 @@ R_SELF_TASK = '<@task {0.name} bound to other {0.__self__}>' R_INSTANCE = '<@task: {0.name} of {app}{flags}>' - -class _CompatShared(object): - - def __init__(self, name, cons): - self.name = name - self.cons = cons - - def __hash__(self): - return hash(self.name) - - def __repr__(self): - return '' % (self.name, ) - - def __call__(self, app): - return self.cons(app) +#: Here for backwards compatibility as tasks no longer use a custom metaclass. +TaskType = type def _strflags(flags, default=''): @@ -93,15 +80,19 @@ class Context(object): headers = None delivery_info = None reply_to = None + root_id = None + parent_id = None correlation_id = None taskset = None # compat alias to group group = None chord = None + chain = None utc = None called_directly = True callbacks = None errbacks = None timelimit = None + origin = None _children = None # see property _protected = 0 @@ -128,84 +119,6 @@ def children(self): return self._children -class TaskType(type): - """Meta class for tasks. - - Automatically registers the task in the task registry (except - if the :attr:`Task.abstract`` attribute is set). - - If no :attr:`Task.name` attribute is provided, then the name is generated - from the module and class name. - - """ - _creation_count = {} # used by old non-abstract task classes - - def __new__(cls, name, bases, attrs): - new = super(TaskType, cls).__new__ - task_module = attrs.get('__module__') or '__main__' - - # - Abstract class: abstract attribute should not be inherited. - abstract = attrs.pop('abstract', None) - if abstract or not attrs.get('autoregister', True): - return new(cls, name, bases, attrs) - - # The 'app' attribute is now a property, with the real app located - # in the '_app' attribute. Previously this was a regular attribute, - # so we should support classes defining it. - app = attrs.pop('_app', None) or attrs.pop('app', None) - - # Attempt to inherit app from one the bases - if not isinstance(app, Proxy) and app is None: - for base in bases: - if getattr(base, '_app', None): - app = base._app - break - else: - app = current_app._get_current_object() - attrs['_app'] = app - - # - Automatically generate missing/empty name. - task_name = attrs.get('name') - if not task_name: - attrs['name'] = task_name = gen_task_name(app, name, task_module) - - if not attrs.get('_decorated'): - # non decorated tasks must also be shared in case - # an app is created multiple times due to modules - # imported under multiple names. - # Hairy stuff, here to be compatible with 2.x. - # People should not use non-abstract task classes anymore, - # use the task decorator. - from celery.app.builtins import shared_task - unique_name = '.'.join([task_module, name]) - if unique_name not in cls._creation_count: - # the creation count is used as a safety - # so that the same task is not added recursively - # to the set of constructors. - cls._creation_count[unique_name] = 1 - shared_task(_CompatShared( - unique_name, - lambda app: TaskType.__new__(cls, name, bases, - dict(attrs, _app=app)), - )) - - # - Create and register class. - # Because of the way import happens (recursively) - # we may or may not be the first time the task tries to register - # with the framework. There should only be one class for each task - # name, so we always return the registered version. - tasks = app._tasks - if task_name not in tasks: - tasks.register(new(cls, name, bases, attrs)) - instance = tasks[task_name] - instance.bind(app) - return instance.__class__ - - def __repr__(cls): - return _reprtask(cls) - - -@with_metaclass(TaskType) class Task(object): """Task base class. @@ -235,10 +148,6 @@ class Task(object): #: If :const:`True` the task is an abstract base class. abstract = True - #: If disabled the worker will not forward magic keyword arguments. - #: Deprecated and scheduled for removal in v4.0. - accept_magic_kwargs = False - #: Maximum number of retries before giving up. If set to :const:`None`, #: it will **never** stop retrying. max_retries = 3 @@ -253,7 +162,7 @@ class Task(object): rate_limit = None #: If enabled the worker will not store task state and return values - #: for this task. Defaults to the :setting:`CELERY_IGNORE_RESULT` + #: for this task. Defaults to the :setting:`task_ignore_result` #: setting. ignore_result = None @@ -266,7 +175,7 @@ class Task(object): #: configured to ignore results. store_errors_even_if_ignored = None - #: If enabled an email will be sent to :setting:`ADMINS` whenever a task + #: If enabled an email will be sent to :setting:`admins` whenever a task #: of this type fails. send_error_emails = None @@ -275,11 +184,11 @@ class Task(object): serializer = None #: Hard time limit. - #: Defaults to the :setting:`CELERYD_TASK_TIME_LIMIT` setting. + #: Defaults to the :setting:`task_time_limit` setting. time_limit = None #: Soft time limit. - #: Defaults to the :setting:`CELERYD_TASK_SOFT_TIME_LIMIT` setting. + #: Defaults to the :setting:`task_soft_time_limit` setting. soft_time_limit = None #: The result store backend used for this task. @@ -298,7 +207,7 @@ class Task(object): #: running. #: #: The application default can be overridden using the - #: :setting:`CELERY_TRACK_STARTED` setting. + #: :setting:`task_track_started` setting. track_started = None #: When enabled messages for this task will be acknowledged **after** @@ -310,10 +219,22 @@ class Task(object): #: applications). #: #: The application default can be overridden with the - #: :setting:`CELERY_ACKS_LATE` setting. + #: :setting:`task_acks_late` setting. acks_late = None - #: List/tuple of expected exceptions. + #: Even if :attr:`acks_late` is enabled, the worker will + #: acknowledge tasks when the worker process executing them abrubtly + #: exits or is signaled (e.g. :sig:`KILL`/:sig:`INT`, etc). + #: + #: Setting this to true allows the message to be requeued instead, + #: so that the task will execute again by the same worker, or another + #: worker. + #: + #: Warning: Enabling this can cause message loops; make sure you know + #: what you're doing. + reject_on_worker_lost = None + + #: Tuple of expected exceptions. #: #: These are errors that are expected in normal operation #: and that should not be regarded as a real error by the worker. @@ -324,6 +245,12 @@ class Task(object): #: Default task expiry time. expires = None + #: Max length of result representation used in logs and events. + resultrepr_maxsize = 1024 + + #: Task request stack, the current request will be the topmost. + request_stack = None + #: Some may expect a request to exist even if the task has not been #: called. This should probably be deprecated. _default_request = None @@ -333,20 +260,18 @@ class Task(object): __bound__ = False from_config = ( - ('send_error_emails', 'CELERY_SEND_TASK_ERROR_EMAILS'), - ('serializer', 'CELERY_TASK_SERIALIZER'), - ('rate_limit', 'CELERY_DEFAULT_RATE_LIMIT'), - ('track_started', 'CELERY_TRACK_STARTED'), - ('acks_late', 'CELERY_ACKS_LATE'), - ('ignore_result', 'CELERY_IGNORE_RESULT'), - ('store_errors_even_if_ignored', - 'CELERY_STORE_ERRORS_EVEN_IF_IGNORED'), + ('send_error_emails', 'task_send_error_emails'), + ('serializer', 'task_serializer'), + ('rate_limit', 'task_default_rate_limit'), + ('track_started', 'task_track_started'), + ('acks_late', 'task_acks_late'), + ('reject_on_worker_lost', 'task_reject_on_worker_lost'), + ('ignore_result', 'task_ignore_result'), + ('store_errors_even_if_ignored', 'task_store_errors_even_if_ignored'), ) _backend = None # set by backend property. - __bound__ = False - # - Tasks are lazily bound, so that configuration is not set # - until the task is actually used @@ -360,8 +285,6 @@ def bind(self, app): for attr_name, config_name in self.from_config: if getattr(self, attr_name, None) is None: setattr(self, attr_name, conf[config_name]) - if self.accept_magic_kwargs is None: - self.accept_magic_kwargs = app.accept_magic_kwargs # decorate with annotations from config. if not was_bound: @@ -412,7 +335,7 @@ def add_around(self, attr, around): def __call__(self, *args, **kwargs): _task_stack.push(self) - self.push_request() + self.push_request(args=args, kwargs=kwargs) try: # add self if this is a bound task if self.__self__ is not None: @@ -453,7 +376,7 @@ def delay(self, *args, **kwargs): return self.apply_async(args, kwargs) def apply_async(self, args=None, kwargs=None, task_id=None, producer=None, - link=None, link_error=None, **options): + link=None, link_error=None, shadow=None, **options): """Apply tasks asynchronously by sending a message. :keyword args: The positional arguments to pass on to the @@ -477,17 +400,21 @@ def apply_async(self, args=None, kwargs=None, task_id=None, producer=None, the task should expire. The task will not be executed after the expiration time. + :keyword shadow: Override task name used in logs/monitoring + (default from :meth:`shadow_name`). + :keyword connection: Re-use existing broker connection instead of establishing a new one. :keyword retry: If enabled sending of the task message will be retried in the event of connection loss or failure. Default - is taken from the :setting:`CELERY_TASK_PUBLISH_RETRY` - setting. Note you need to handle the + is taken from the :setting:`task_publish_retry` + setting. Note that you need to handle the producer/connection manually for this to work. :keyword retry_policy: Override the retry policy used. See the - :setting:`CELERY_TASK_PUBLISH_RETRY` setting. + :setting:`task_publish_retry_policy` + setting. :keyword routing_key: Custom routing key used to route the task to a worker server. If in combination with a @@ -495,8 +422,8 @@ def apply_async(self, args=None, kwargs=None, task_id=None, producer=None, routing keys to topic exchanges. :keyword queue: The queue to route the task to. This must be a key - present in :setting:`CELERY_QUEUES`, or - :setting:`CELERY_CREATE_MISSING_QUEUES` must be + present in :setting:`task_queues`, or + :setting:`task_create_missing_queues` must be enabled. See :ref:`guide-routing` for more information. @@ -518,45 +445,89 @@ def apply_async(self, args=None, kwargs=None, task_id=None, producer=None, to use. Can be one of ``zlib``, ``bzip2``, or any custom compression methods registered with :func:`kombu.compression.register`. Defaults to - the :setting:`CELERY_MESSAGE_COMPRESSION` + the :setting:`task_compression` setting. :keyword link: A single, or a list of tasks to apply if the task exits successfully. :keyword link_error: A single, or a list of tasks to apply if an error occurs while executing the task. - :keyword producer: :class:~@amqp.TaskProducer` instance to use. + :keyword producer: :class:`kombu.Producer` instance to use. + :keyword add_to_parent: If set to True (default) and the task is applied while executing another task, then the result will be appended to the parent tasks ``request.children`` attribute. Trailing can also be disabled by default using the :attr:`trail` attribute + :keyword publisher: Deprecated alias to ``producer``. + :keyword headers: Message headers to be sent in the + task (a :class:`dict`) + + :rtype :class:`celery.result.AsyncResult`: if + :setting:`task_always_eager` is not set, otherwise + :class:`celery.result.EagerResult`: + Also supports all keyword arguments supported by :meth:`kombu.Producer.publish`. .. note:: - If the :setting:`CELERY_ALWAYS_EAGER` setting is set, it will + If the :setting:`task_always_eager` setting is set, it will be replaced by a local :func:`apply` call instead. """ + try: + check_arguments = self.__header__ + except AttributeError: # pragma: no cover + pass + else: + check_arguments(*(args or ()), **(kwargs or {})) + app = self._get_app() - if app.conf.CELERY_ALWAYS_EAGER: + if app.conf.task_always_eager: return self.apply(args, kwargs, task_id=task_id or uuid(), link=link, link_error=link_error, **options) # add 'self' if this is a "task_method". if self.__self__ is not None: args = args if isinstance(args, tuple) else tuple(args or ()) - args = (self.__self__, ) + args + args = (self.__self__,) + args + shadow = shadow or self.shadow_name(args, kwargs, options) + + preopts = self._get_exec_options() + options = dict(preopts, **options) if options else preopts return app.send_task( self.name, args, kwargs, task_id=task_id, producer=producer, link=link, link_error=link_error, result_cls=self.AsyncResult, - **dict(self._get_exec_options(), **options) + shadow=shadow, + **options ) - def subtask_from_request(self, request=None, args=None, kwargs=None, - queue=None, **extra_options): + def shadow_name(self, args, kwargs, options): + """Override for custom task name in worker logs/monitoring. + + :param args: Task positional arguments. + :param kwargs: Task keyword arguments. + :param options: Task execution options. + + **Example**: + + .. code-block:: python + + from celery.utils.imports import qualname + + def shadow_name(task, args, kwargs, options): + return qualname(args[0]) + + @app.task(shadow_name=shadow_name, serializer='pickle') + def apply_function_async(fun, *args, **kwargs): + return fun(*args, **kwargs) + + """ + pass + + def signature_from_request(self, request=None, args=None, kwargs=None, + queue=None, **extra_options): request = self.request if request is None else request args = request.args if args is None else args kwargs = request.kwargs if kwargs is None else kwargs @@ -569,11 +540,16 @@ def subtask_from_request(self, request=None, args=None, kwargs=None, 'chord': request.chord, 'soft_time_limit': limit_soft, 'time_limit': limit_hard, + 'reply_to': request.reply_to, + 'headers': request.headers, } options.update( - {'queue': queue} if queue else (request.delivery_info or {}) + {'queue': queue} if queue else (request.delivery_info or {}), ) - return self.subtask(args, kwargs, options, type=self, **extra_options) + return self.signature( + args, kwargs, options, type=self, **extra_options + ) + subtask_from_request = signature_from_request def retry(self, args=None, kwargs=None, exc=None, throw=True, eta=None, countdown=None, max_retries=None, **options): @@ -594,7 +570,12 @@ def retry(self, args=None, kwargs=None, exc=None, throw=True, :keyword countdown: Time in seconds to delay the retry for. :keyword eta: Explicit time and date to run the retry at (must be a :class:`~datetime.datetime` instance). - :keyword max_retries: If set, overrides the default retry limit. + :keyword max_retries: If set, overrides the default retry limit for + this execution. Changes to this parameter do not propagate to + subsequent task retry attempts. A value of :const:`None`, means + "use the default", so if you want infinite retries you would + have to set the :attr:`max_retries` attribute of the task to + :const:`None` first. :keyword time_limit: If set, overrides the default time limit. :keyword soft_time_limit: If set, overrides the default soft time limit. @@ -614,19 +595,19 @@ def retry(self, args=None, kwargs=None, exc=None, throw=True, **Example** - .. code-block:: python + .. code-block:: pycon >>> from imaginary_twitter_lib import Twitter >>> from proj.celery import app - >>> @app.task() - ... def tweet(auth, message): + >>> @app.task(bind=True) + ... def tweet(self, auth, message): ... twitter = Twitter(oauth=auth) ... try: ... twitter.post_status_update(message) ... except twitter.FailWhale as exc: ... # Retry in 5 minutes. - ... raise tweet.retry(countdown=60 * 5, exc=exc) + ... raise self.retry(countdown=60 * 5, exc=exc) Although the task will never return above as `retry` raises an exception to notify the worker, we use `raise` in front of the retry @@ -647,7 +628,7 @@ def retry(self, args=None, kwargs=None, exc=None, throw=True, countdown = self.default_retry_delay is_eager = request.is_eager - S = self.subtask_from_request( + S = self.signature_from_request( request, args, kwargs, countdown=countdown, eta=eta, retries=retries, **options @@ -658,20 +639,25 @@ def retry(self, args=None, kwargs=None, exc=None, throw=True, # first try to reraise the original exception maybe_reraise() # or if not in an except block then raise the custom exc. - raise exc() + raise exc raise self.MaxRetriesExceededError( "Can't retry {0}[{1}] args:{2} kwargs:{3}".format( self.name, request.id, S.args, S.kwargs)) - # If task was executed eagerly using apply(), - # then the retry must also be executed eagerly. + ret = Retry(exc=exc, when=eta or countdown) + + if is_eager: + # if task was executed eagerly using apply(), + # then the retry must also be executed eagerly. + S.apply().get() + if throw: + raise ret + return ret + try: - S.apply().get() if is_eager else S.apply_async() + S.apply_async() except Exception as exc: - if is_eager: - raise - raise Reject(exc, requeue=True) - ret = Retry(exc=exc, when=eta or countdown) + raise Reject(exc, requeue=False) if throw: raise ret return ret @@ -683,25 +669,24 @@ def apply(self, args=None, kwargs=None, :param args: positional arguments passed on to the task. :param kwargs: keyword arguments passed on to the task. :keyword throw: Re-raise task exceptions. Defaults to - the :setting:`CELERY_EAGER_PROPAGATES_EXCEPTIONS` + the :setting:`task_eager_propagates` setting. :rtype :class:`celery.result.EagerResult`: """ # trace imports Task, so need to import inline. - from celery.app.trace import eager_trace_task + from celery.app.trace import build_tracer app = self._get_app() args = args or () # add 'self' if this is a bound method. if self.__self__ is not None: - args = (self.__self__, ) + tuple(args) + args = (self.__self__,) + tuple(args) kwargs = kwargs or {} task_id = options.get('task_id') or uuid() retries = options.get('retries', 0) - throw = app.either('CELERY_EAGER_PROPAGATES_EXCEPTIONS', - options.pop('throw', None)) + throw = app.either('task_eager_propagates', options.pop('throw', None)) # Make sure we get the task instance, not class. task = app._tasks[self.name] @@ -715,27 +700,16 @@ def apply(self, args=None, kwargs=None, 'errbacks': maybe_list(link_error), 'headers': options.get('headers'), 'delivery_info': {'is_eager': True}} - if self.accept_magic_kwargs: - default_kwargs = {'task_name': task.name, - 'task_id': task_id, - 'task_retries': retries, - 'task_is_eager': True, - 'logfile': options.get('logfile'), - 'loglevel': options.get('loglevel', 0), - 'delivery_info': {'is_eager': True}} - supported_keys = fun_takes_kwargs(task.run, default_kwargs) - extend_with = dict((key, val) - for key, val in items(default_kwargs) - if key in supported_keys) - kwargs.update(extend_with) - tb = None - retval, info = eager_trace_task(task, task_id, args, kwargs, - app=self._get_app(), - request=request, propagate=throw) + tracer = build_tracer( + task.name, task, eager=True, + propagate=throw, app=self._get_app(), + ) + ret = tracer(task_id, args, kwargs, request) + retval = ret.retval if isinstance(retval, ExceptionInfo): retval, tb = retval.exception, retval.traceback - state = states.SUCCESS if info is None else info.state + state = states.SUCCESS if ret.info is None else ret.info.state return EagerResult(task_id, retval, state, traceback=tb) def AsyncResult(self, task_id, **kwargs): @@ -747,20 +721,21 @@ def AsyncResult(self, task_id, **kwargs): return self._get_app().AsyncResult(task_id, backend=self.backend, task_name=self.name, **kwargs) - def subtask(self, args=None, *starargs, **starkwargs): + def signature(self, args=None, *starargs, **starkwargs): """Return :class:`~celery.signature` object for this task, wrapping arguments and execution options for a single task invocation.""" starkwargs.setdefault('app', self.app) return signature(self, args, *starargs, **starkwargs) + subtask = signature def s(self, *args, **kwargs): - """``.s(*a, **k) -> .subtask(a, k)``""" - return self.subtask(args, kwargs) + """``.s(*a, **k) -> .signature(a, k)``""" + return self.signature(args, kwargs) def si(self, *args, **kwargs): - """``.si(*a, **k) -> .subtask(a, k, immutable=True)``""" - return self.subtask(args, kwargs, immutable=True) + """``.si(*a, **k) -> .signature(a, k, immutable=True)``""" + return self.signature(args, kwargs, immutable=True) def chunks(self, it, n): """Creates a :class:`~celery.canvas.chunks` task for this task.""" @@ -782,6 +757,52 @@ def send_event(self, type_, **fields): with self.app.events.default_dispatcher(hostname=req.hostname) as d: return d.send(type_, uuid=req.id, **fields) + def replace(self, sig): + """Replace the current task, with a new task inheriting the + same task id. + + :param sig: :class:`@signature` + + Note: This will raise :exc:`~@Ignore`, so the best practice + is to always use ``raise self.replace(...)`` to convey + to the reader that the task will not continue after being replaced. + + :param: Signature of new task. + + """ + chord = self.request.chord + if isinstance(sig, group): + sig |= self.app.tasks['celery.accumulate'].s(index=0).set( + chord=chord, + ) + chord = None + sig.freeze(self.request.id, + group_id=self.request.group, + chord=chord, + root_id=self.request.root_id) + sig.delay() + raise Ignore('Chord member replaced by new task') + + def add_to_chord(self, sig, lazy=False): + """Add signature to the chord the current task is a member of. + + :param sig: Signature to extend chord with. + :param lazy: If enabled the new task will not actually be called, + and ``sig.delay()`` must be called manually. + + .. versionadded:: 4.0 + + Currently only supported by the Redis result backend. + + """ + if not self.request.chord: + raise ValueError('Current task is not member of any chord') + result = sig.freeze(group_id=self.request.group, + chord=self.request.chord, + root_id=self.request.root_id) + self.backend.add_to_chord(self.request.group, result) + return sig.delay() if not lazy else sig + def update_state(self, task_id=None, state=None, meta=None): """Update task state. @@ -855,9 +876,8 @@ def after_return(self, status, retval, task_id, args, kwargs, einfo): :param status: Current task state. :param retval: Task return value/exception. :param task_id: Unique id of the task. - :param args: Original arguments for the task that failed. - :param kwargs: Original keyword arguments for the task - that failed. + :param args: Original arguments for the task. + :param kwargs: Original keyword arguments for the task. :keyword einfo: :class:`~billiard.einfo.ExceptionInfo` instance, containing the traceback (if any). @@ -918,4 +938,5 @@ def backend(self, value): # noqa @property def __name__(self): return self.__class__.__name__ +abstract.CallableTask.register(Task) BaseTask = Task # compat alias diff --git a/celery/app/trace.py b/celery/app/trace.py index b4c271631..e29d9d990 100644 --- a/celery/app/trace.py +++ b/celery/app/trace.py @@ -15,33 +15,85 @@ # but in the end it only resulted in bad performance and horrible tracebacks, # so instead we now use one closure per task class. +import logging import os -import socket import sys +from collections import namedtuple from warnings import warn from billiard.einfo import ExceptionInfo from kombu.exceptions import EncodeError -from kombu.utils import kwdict +from kombu.serialization import loads as loads_message, prepare_accept_content +from kombu.utils.encoding import safe_repr, safe_str from celery import current_app, group from celery import states, signals from celery._state import _task_stack from celery.app import set_default_app from celery.app.task import Task as BaseTask, Context -from celery.exceptions import Ignore, Reject, Retry +from celery.exceptions import Ignore, Reject, Retry, InvalidTaskError +from celery.five import monotonic +from celery.utils import gethostname from celery.utils.log import get_logger from celery.utils.objects import mro_lookup +from celery.utils.saferepr import saferepr from celery.utils.serialization import ( - get_pickleable_exception, - get_pickleable_etype, + get_pickleable_exception, get_pickled_exception, get_pickleable_etype, ) +from celery.utils.text import truncate -__all__ = ['TraceInfo', 'build_tracer', 'trace_task', 'eager_trace_task', +__all__ = ['TraceInfo', 'build_tracer', 'trace_task', 'setup_worker_optimizations', 'reset_worker_optimizations'] -_logger = get_logger(__name__) +logger = get_logger(__name__) +info = logger.info + +#: Format string used to log task success. +LOG_SUCCESS = """\ +Task %(name)s[%(id)s] succeeded in %(runtime)ss: %(return_value)s\ +""" + +#: Format string used to log task failure. +LOG_FAILURE = """\ +Task %(name)s[%(id)s] %(description)s: %(exc)s\ +""" + +#: Format string used to log task internal error. +LOG_INTERNAL_ERROR = """\ +Task %(name)s[%(id)s] %(description)s: %(exc)s\ +""" + +#: Format string used to log task ignored. +LOG_IGNORED = """\ +Task %(name)s[%(id)s] %(description)s\ +""" + +#: Format string used to log task rejected. +LOG_REJECTED = """\ +Task %(name)s[%(id)s] %(exc)s\ +""" + +#: Format string used to log task retry. +LOG_RETRY = """\ +Task %(name)s[%(id)s] retry: %(exc)s\ +""" + +log_policy_t = namedtuple( + 'log_policy_t', ('format', 'description', 'severity', 'traceback', 'mail'), +) + +log_policy_reject = log_policy_t(LOG_REJECTED, 'rejected', logging.WARN, 1, 1) +log_policy_ignore = log_policy_t(LOG_IGNORED, 'ignored', logging.INFO, 0, 0) +log_policy_internal = log_policy_t( + LOG_INTERNAL_ERROR, 'INTERNAL ERROR', logging.CRITICAL, 1, 1, +) +log_policy_expected = log_policy_t( + LOG_FAILURE, 'raised expected', logging.INFO, 0, 0, +) +log_policy_unexpected = log_policy_t( + LOG_FAILURE, 'raised unexpected', logging.ERROR, 1, 1, +) send_prerun = signals.task_prerun.send send_postrun = signals.task_postrun.send @@ -53,20 +105,35 @@ RETRY = states.RETRY FAILURE = states.FAILURE EXCEPTION_STATES = states.EXCEPTION_STATES -IGNORE_STATES = frozenset([IGNORED, RETRY, REJECTED]) +IGNORE_STATES = frozenset({IGNORED, RETRY, REJECTED}) #: set by :func:`setup_worker_optimizations` -_tasks = None +_localized = [] _patched = {} +trace_ok_t = namedtuple('trace_ok_t', ('retval', 'info', 'runtime', 'retstr')) + def task_has_custom(task, attr): """Return true if the task or one of its bases defines ``attr`` (excluding the one in BaseTask).""" - return mro_lookup(task.__class__, attr, stop=(BaseTask, object), + return mro_lookup(task.__class__, attr, stop={BaseTask, object}, monkey_patched=['celery.app.task']) +def get_log_policy(task, einfo, exc): + if isinstance(exc, Reject): + return log_policy_reject + elif isinstance(exc, Ignore): + return log_policy_ignore + elif einfo.internal: + return log_policy_internal + else: + if task.throws and isinstance(exc, task.throws): + return log_policy_expected + return log_policy_unexpected + + class TraceInfo(object): __slots__ = ('state', 'retval') @@ -74,21 +141,28 @@ def __init__(self, state, retval=None): self.state = state self.retval = retval - def handle_error_state(self, task, eager=False): + def handle_error_state(self, task, req, + eager=False, call_errbacks=True): store_errors = not eager if task.ignore_result: store_errors = task.store_errors_even_if_ignored - return { RETRY: self.handle_retry, FAILURE: self.handle_failure, - }[self.state](task, store_errors=store_errors) + }[self.state](task, req, + store_errors=store_errors, + call_errbacks=call_errbacks) + + def handle_reject(self, task, req, **kwargs): + self._log_error(task, req, ExceptionInfo()) - def handle_retry(self, task, store_errors=True): + def handle_ignore(self, task, req, **kwargs): + self._log_error(task, req, ExceptionInfo()) + + def handle_retry(self, task, req, store_errors=True, **kwargs): """Handle retry exception.""" # the exception raised is the Retry semi-predicate, # and it's exc' attribute is the original exception raised (if any). - req = task.request type_, _, tb = sys.exc_info() try: reason = self.retval @@ -100,37 +174,73 @@ def handle_retry(self, task, store_errors=True): task.on_retry(reason.exc, req.id, req.args, req.kwargs, einfo) signals.task_retry.send(sender=task, request=req, reason=reason, einfo=einfo) + info(LOG_RETRY, { + 'id': req.id, 'name': task.name, + 'exc': safe_repr(reason.exc), + }) return einfo finally: del(tb) - def handle_failure(self, task, store_errors=True): + def handle_failure(self, task, req, store_errors=True, call_errbacks=True): """Handle exception.""" - req = task.request type_, _, tb = sys.exc_info() try: exc = self.retval einfo = ExceptionInfo() einfo.exception = get_pickleable_exception(einfo.exception) einfo.type = get_pickleable_etype(einfo.type) - if store_errors: - task.backend.mark_as_failure( - req.id, exc, einfo.traceback, request=req, - ) + task.backend.mark_as_failure( + req.id, exc, einfo.traceback, + request=req, store_result=store_errors, + call_errbacks=call_errbacks, + ) task.on_failure(exc, req.id, req.args, req.kwargs, einfo) signals.task_failure.send(sender=task, task_id=req.id, exception=exc, args=req.args, kwargs=req.kwargs, traceback=tb, einfo=einfo) + self._log_error(task, req, einfo) return einfo finally: del(tb) + def _log_error(self, task, req, einfo): + eobj = einfo.exception = get_pickled_exception(einfo.exception) + exception, traceback, exc_info, sargs, skwargs = ( + safe_repr(eobj), + safe_str(einfo.traceback), + einfo.exc_info, + safe_repr(req.args), + safe_repr(req.kwargs), + ) + policy = get_log_policy(task, einfo, eobj) + + context = { + 'hostname': req.hostname, + 'id': req.id, + 'name': task.name, + 'exc': exception, + 'traceback': traceback, + 'args': sargs, + 'kwargs': skwargs, + 'description': policy.description, + 'internal': einfo.internal, + } + + logger.log(policy.severity, policy.format.strip(), context, + exc_info=exc_info if policy.traceback else None, + extra={'data': context}) + + if policy.mail: + task.send_error_email(context, einfo.exception) + def build_tracer(name, task, loader=None, hostname=None, store_errors=True, Info=TraceInfo, eager=False, propagate=False, app=None, - IGNORE_STATES=IGNORE_STATES): + monotonic=monotonic, truncate=truncate, + trace_ok_t=trace_ok_t, IGNORE_STATES=IGNORE_STATES): """Return a function that traces task execution; catches all exceptions and updates result backend with the state and result @@ -163,7 +273,7 @@ def build_tracer(name, task, loader=None, hostname=None, store_errors=True, track_started = task.track_started track_started = not eager and (task.track_started and not ignore_result) publish_result = not eager and not ignore_result - hostname = hostname or socket.gethostname() + hostname = hostname or gethostname() loader_task_init = loader.on_task_init loader_cleanup = loader.on_process_cleanup @@ -176,6 +286,7 @@ def build_tracer(name, task, loader=None, hostname=None, store_errors=True, task_after_return = task.after_return store_result = backend.store_result + mark_as_done = backend.mark_as_done backend_cleanup = backend.process_cleanup pid = os.getpid() @@ -185,7 +296,8 @@ def build_tracer(name, task, loader=None, hostname=None, store_errors=True, pop_request = request_stack.pop push_task = _task_stack.push pop_task = _task_stack.pop - on_chord_part_return = backend.on_chord_part_return + _does_info = logger.isEnabledFor(logging.INFO) + resultrepr_maxsize = task.resultrepr_maxsize prerun_receivers = signals.task_prerun.receivers postrun_receivers = signals.task_postrun.receivers @@ -198,17 +310,16 @@ def on_error(request, exc, uuid, state=FAILURE, call_errbacks=True): if propagate: raise I = Info(state, exc) - R = I.handle_error_state(task, eager=eager) - if call_errbacks: - group( - [signature(errback, app=app) - for errback in request.errbacks or []], app=app, - ).apply_async((uuid, )) + R = I.handle_error_state( + task, request, eager=eager, call_errbacks=call_errbacks, + ) return I, R, I.state, I.retval def trace_task(uuid, args, kwargs, request=None): # R - is the possibly prepared return value. # I - is the Info object. + # T - runtime + # Rstr - textual representation of return value # retval - is the always unmodified return value. # state - is the resulting task state. @@ -216,12 +327,19 @@ def trace_task(uuid, args, kwargs, request=None): # for performance reasons, and because the function is so long # we want the main variables (I, and R) to stand out visually from the # the rest of the variables, so breaking PEP8 is worth it ;) - R = I = retval = state = None - kwargs = kwdict(kwargs) + R = I = T = Rstr = retval = state = None + task_request = None + time_start = monotonic() try: + try: + kwargs.items + except AttributeError: + raise InvalidTaskError( + 'Task keyword arguments is not a mapping') push_task(task) task_request = Context(request or {}, args=args, called_directly=False, kwargs=kwargs) + root_id = task_request.root_id or uuid push_request(task_request) try: # -*- PRE -*- @@ -242,13 +360,14 @@ def trace_task(uuid, args, kwargs, request=None): except Reject as exc: I, R = Info(REJECTED, exc), ExceptionInfo(internal=True) state, retval = I.state, I.retval + I.handle_reject(task, task_request) except Ignore as exc: I, R = Info(IGNORED, exc), ExceptionInfo(internal=True) state, retval = I.state, I.retval + I.handle_ignore(task, task_request) except Retry as exc: I, R, state, retval = on_error( - task_request, exc, uuid, RETRY, call_errbacks=False, - ) + task_request, exc, uuid, RETRY, call_errbacks=False) except Exception as exc: I, R, state, retval = on_error(task_request, exc, uuid) except BaseException as exc: @@ -257,15 +376,46 @@ def trace_task(uuid, args, kwargs, request=None): try: # callback tasks must be applied before the result is # stored, so that result.children is populated. - group( - [signature(callback, app=app) - for callback in task.request.callbacks or []], - app=app, - ).apply_async((retval, )) - if publish_result: - store_result( - uuid, retval, SUCCESS, request=task_request, + + # groups are called inline and will store trail + # separately, so need to call them separately + # so that the trail's not added multiple times :( + # (Issue #1936) + callbacks = task.request.callbacks + if callbacks: + if len(task.request.callbacks) > 1: + sigs, groups = [], [] + for sig in callbacks: + sig = signature(sig, app=app) + if isinstance(sig, group): + groups.append(sig) + else: + sigs.append(sig) + for group_ in groups: + group_.apply_async( + (retval,), + parent_id=uuid, root_id=root_id, + ) + if sigs: + group(sigs, app=app).apply_async( + (retval,), + parent_id=uuid, root_id=root_id, + ) + else: + signature(callbacks[0], app=app).apply_async( + (retval,), parent_id=uuid, root_id=root_id, + ) + + # execute first task in chain + chain = task_request.chain + if chain: + signature(chain.pop(), app=app).apply_async( + (retval,), chain=chain, + parent_id=uuid, root_id=root_id, ) + mark_as_done( + uuid, retval, task_request, publish_result, + ) except EncodeError as exc: I, R, state, retval = on_error(task_request, exc, uuid) else: @@ -273,11 +423,16 @@ def trace_task(uuid, args, kwargs, request=None): task_on_success(retval, uuid, args, kwargs) if success_receivers: send_success(sender=task, result=retval) + if _does_info: + T = monotonic() - time_start + Rstr = saferepr(R, resultrepr_maxsize) + info(LOG_SUCCESS, { + 'id': uuid, 'name': name, + 'return_value': Rstr, 'runtime': T, + }) # -* POST *- if state not in IGNORE_STATES: - if task_request.chord: - on_chord_part_return(task, state, R) if task_after_return: task_after_return( state, retval, uuid, args, kwargs, None, @@ -298,15 +453,17 @@ def trace_task(uuid, args, kwargs, request=None): except (KeyboardInterrupt, SystemExit, MemoryError): raise except Exception as exc: - _logger.error('Process cleanup failed: %r', exc, - exc_info=True) + logger.error('Process cleanup failed: %r', exc, + exc_info=True) except MemoryError: raise except Exception as exc: if eager: raise R = report_internal_error(task, exc) - return R, I + if task_request is not None: + I, _, _, _ = on_error(task_request, exc, uuid) + return trace_ok_t(R, I, T, Rstr) return trace_task @@ -315,27 +472,53 @@ def trace_task(task, uuid, args, kwargs, request={}, **opts): try: if task.__trace__ is None: task.__trace__ = build_tracer(task.name, task, **opts) - return task.__trace__(uuid, args, kwargs, request)[0] + return task.__trace__(uuid, args, kwargs, request) except Exception as exc: - return report_internal_error(task, exc) - - -def _trace_task_ret(name, uuid, args, kwargs, request={}, app=None, **opts): - return trace_task((app or current_app).tasks[name], - uuid, args, kwargs, request, app=app, **opts) + return trace_ok_t(report_internal_error(task, exc), None, 0.0, None) + + +def _trace_task_ret(name, uuid, request, body, content_type, + content_encoding, loads=loads_message, app=None, + **extra_request): + app = app or current_app._get_current_object() + embed = None + if content_type: + accept = prepare_accept_content(app.conf.accept_content) + args, kwargs, embed = loads( + body, content_type, content_encoding, accept=accept, + ) + else: + args, kwargs, embed = body + hostname = gethostname() + request.update({ + 'args': args, 'kwargs': kwargs, + 'hostname': hostname, 'is_eager': False, + }, **embed or {}) + R, I, T, Rstr = trace_task(app.tasks[name], + uuid, args, kwargs, request, app=app) + return (1, R, T) if I else (0, Rstr, T) trace_task_ret = _trace_task_ret -def _fast_trace_task(task, uuid, args, kwargs, request={}): - # setup_worker_optimizations will point trace_task_ret to here, - # so this is the function used in the worker. - return _tasks[task].__trace__(uuid, args, kwargs, request)[0] - - -def eager_trace_task(task, uuid, args, kwargs, request=None, **opts): - opts.setdefault('eager', True) - return build_tracer(task.name, task, **opts)( - uuid, args, kwargs, request) +def _fast_trace_task(task, uuid, request, body, content_type, + content_encoding, loads=loads_message, _loc=_localized, + hostname=None, **_): + embed = None + tasks, accept, hostname = _loc + if content_type: + args, kwargs, embed = loads( + body, content_type, content_encoding, accept=accept, + ) + else: + args, kwargs, embed = body + request.update({ + 'args': args, 'kwargs': kwargs, + 'hostname': hostname, 'is_eager': False, + }, **embed or {}) + R, I, T, Rstr = tasks[task].__trace__( + uuid, args, kwargs, request, + ) + return (1, R, T) if I else (0, Rstr, T) def report_internal_error(task, exc): @@ -351,10 +534,11 @@ def report_internal_error(task, exc): del(_tb) -def setup_worker_optimizations(app): - global _tasks +def setup_worker_optimizations(app, hostname=None): global trace_task_ret + hostname = hostname or gethostname() + # make sure custom Task.__call__ methods that calls super # will not mess up the request/task stack. _install_stack_protection() @@ -372,12 +556,16 @@ def setup_worker_optimizations(app): app.finalize() # set fast shortcut to task registry - _tasks = app._tasks + _localized[:] = [ + app._tasks, + prepare_accept_content(app.conf.accept_content), + hostname, + ] trace_task_ret = _fast_trace_task - from celery.worker import job as job_module - job_module.trace_task_ret = _fast_trace_task - job_module.__optimize__() + from celery.worker import request as request_module + request_module.trace_task_ret = _fast_trace_task + request_module.__optimize__() def reset_worker_optimizations(): @@ -391,8 +579,8 @@ def reset_worker_optimizations(): BaseTask.__call__ = _patched.pop('BaseTask.__call__') except KeyError: pass - from celery.worker import job as job_module - job_module.trace_task_ret = _trace_task_ret + from celery.worker import request as request_module + request_module.trace_task_ret = _trace_task_ret def _install_stack_protection(): diff --git a/celery/app/utils.py b/celery/app/utils.py index defdca7b8..47254888e 100644 --- a/celery/app/utils.py +++ b/celery/app/utils.py @@ -12,16 +12,23 @@ import platform as _platform import re -from collections import Mapping +from collections import Mapping, namedtuple +from copy import deepcopy from types import ModuleType +from kombu.utils.url import maybe_sanitize_url + from celery.datastructures import ConfigurationView -from celery.five import items, string_t, values +from celery.exceptions import ImproperlyConfigured +from celery.five import items, keys, string_t, values from celery.platforms import pyimplementation from celery.utils.text import pretty from celery.utils.imports import import_from_cwd, symbol_by_name, qualname -from .defaults import find +from .defaults import ( + _TO_NEW_KEY, _TO_OLD_KEY, _OLD_DEFAULTS, _OLD_SETTING_KEYS, + DEFAULTS, SETTING_KEYS, find, +) __all__ = ['Settings', 'appstr', 'bugreport', 'filter_hidden_settings', 'find_app'] @@ -42,38 +49,70 @@ re.IGNORECASE, ) +E_MIX_OLD_INTO_NEW = """ + +Cannot mix new and old setting keys, please rename the +following settings to the new format: + +{renames} + +""" + +E_MIX_NEW_INTO_OLD = """ + +Cannot mix new setting names with old setting names, please +rename the following settings to use the old format: + +{renames} + +Or change all of the settings to use the new format :) + +""" + +FMT_REPLACE_SETTING = '{replace:<36} -> {with_}' + def appstr(app): """String used in __repr__ etc, to id app instances.""" - return '{0}:0x{1:x}'.format(app.main or '__main__', id(app)) + return '{0}:{1:#x}'.format(app.main or '__main__', id(app)) class Settings(ConfigurationView): - """Celery settings object.""" + """Celery settings object. - @property - def CELERY_RESULT_BACKEND(self): - return self.first('CELERY_RESULT_BACKEND', 'CELERY_BACKEND') + .. seealso: + + :ref:`configuration` for a full list of configuration keys. + + """ @property - def BROKER_TRANSPORT(self): - return self.first('BROKER_TRANSPORT', - 'BROKER_BACKEND', 'CARROT_BACKEND') + def broker_read_url(self): + return ( + os.environ.get('CELERY_BROKER_READ_URL') or + self.get('broker_read_url') or + self.broker_url + ) @property - def BROKER_BACKEND(self): - """Deprecated compat alias to :attr:`BROKER_TRANSPORT`.""" - return self.BROKER_TRANSPORT + def broker_write_url(self): + return ( + os.environ.get('CELERY_BROKER_WRITE_URL') or + self.get('broker_write_url') or + self.broker_url + ) @property - def BROKER_URL(self): - return (os.environ.get('CELERY_BROKER_URL') or - self.first('BROKER_URL', 'BROKER_HOST')) + def broker_url(self): + return ( + os.environ.get('CELERY_BROKER_URL') or + self.first('broker_url', 'broker_host') + ) @property - def CELERY_TIMEZONE(self): + def timezone(self): # this way we also support django's time zone. - return self.first('CELERY_TIMEZONE', 'TIME_ZONE') + return self.first('timezone', 'time_zone') def without_defaults(self): """Return the current configuration, but without defaults.""" @@ -83,18 +122,18 @@ def without_defaults(self): def value_set_for(self, key): return key in self.without_defaults() - def find_option(self, name, namespace='celery'): + def find_option(self, name, namespace=''): """Search for option by name. Will return ``(namespace, key, type)`` tuple, e.g.:: >>> from proj.celery import app >>> app.conf.find_option('disable_rate_limits') - ('CELERY', 'DISABLE_RATE_LIMITS', + ('worker', 'prefetch_multiplier', bool default->False>)) :param name: Name of option, cannot be partial. - :keyword namespace: Preferred namespace (``CELERY`` by default). + :keyword namespace: Preferred namespace (``None`` by default). """ return find(name, namespace) @@ -109,7 +148,7 @@ def get_by_parts(self, *parts): Example:: >>> from proj.celery import app - >>> app.conf.get_by_parts('CELERY', 'DISABLE_RATE_LIMITS') + >>> app.conf.get_by_parts('worker', 'disable_rate_limits') False """ @@ -117,11 +156,11 @@ def get_by_parts(self, *parts): def table(self, with_defaults=False, censored=True): filt = filter_hidden_settings if censored else lambda v: v - return filt(dict( - (k, v) for k, v in items( + return filt({ + k: v for k, v in items( self if with_defaults else self.without_defaults()) - if k.isupper() and not k.startswith('_') - )) + if not k.startswith('_') + }) def humanize(self, with_defaults=False, censored=True): """Return a human readable string showing changes to the @@ -131,6 +170,73 @@ def humanize(self, with_defaults=False, censored=True): for key, value in items(self.table(with_defaults, censored))) +def _new_key_to_old(key, convert=_TO_OLD_KEY.get): + return convert(key, key) + + +def _old_key_to_new(key, convert=_TO_NEW_KEY.get): + return convert(key, key) + + +_settings_info_t = namedtuple('settings_info_t', ( + 'defaults', 'convert', 'key_t', 'mix_error', +)) + +_settings_info = _settings_info_t( + DEFAULTS, _TO_NEW_KEY, _old_key_to_new, E_MIX_OLD_INTO_NEW, +) +_old_settings_info = _settings_info_t( + _OLD_DEFAULTS, _TO_OLD_KEY, _new_key_to_old, E_MIX_NEW_INTO_OLD, +) + + +def detect_settings(conf, preconf={}, ignore_keys=set(), prefix=None, + all_keys=SETTING_KEYS, old_keys=_OLD_SETTING_KEYS): + source = conf + if conf is None: + source, conf = preconf, {} + have = set(keys(source)) - ignore_keys + is_in_new = have.intersection(all_keys) + is_in_old = have.intersection(old_keys) + + info = None + if is_in_new: + # have new setting names + info, left = _settings_info, is_in_old + if is_in_old and len(is_in_old) > len(is_in_new): + # Majority of the settings are old. + info, left = _old_settings_info, is_in_new + if is_in_old: + # have old setting names, or a majority of the names are old. + if not info: + info, left = _old_settings_info, is_in_new + if is_in_new and len(is_in_new) > len(is_in_old): + # Majority of the settings are new + info, left = _settings_info, is_in_old + else: + # no settings, just use new format. + info, left = _settings_info, is_in_old + + if prefix: + # always use new format if prefix is used. + info, left = _settings_info, set() + + # only raise error for keys that the user did not provide two keys + # for (e.g. both ``result_expires`` and ``CELERY_TASK_RESULT_EXPIRES``). + really_left = {key for key in left if info.convert[key] not in have} + if really_left: + # user is mixing old/new, or new/old settings, give renaming + # suggestions. + raise ImproperlyConfigured(info.mix_error.format(renames='\n'.join( + FMT_REPLACE_SETTING.format(replace=key, with_=info.convert[key]) + for key in sorted(really_left) + ))) + + preconf = {info.convert.get(k, k): v for k, v in items(preconf)} + defaults = dict(deepcopy(info.defaults), **preconf) + return Settings(preconf, [conf, defaults], info.key_t, prefix=prefix) + + class AppPickler(object): """Old application pickler/unpickler (< 3.1).""" @@ -152,7 +258,6 @@ def build_standard_kwargs(self, main, changes, loader, backend, amqp, return dict(main=main, loader=loader, backend=backend, amqp=amqp, changes=changes, events=events, log=log, control=control, set_as_current=False, - accept_magic_kwargs=accept_magic_kwargs, config_source=config_source) def construct(self, cls, **kwargs): @@ -175,14 +280,18 @@ def filter_hidden_settings(conf): def maybe_censor(key, value, mask='*' * 8): if isinstance(value, Mapping): return filter_hidden_settings(value) - if isinstance(value, string_t) and HIDDEN_SETTINGS.search(key): - return mask - if isinstance(key, string_t) and 'BROKER_URL' in key.upper(): - from kombu import Connection - return Connection(value).as_uri(mask=mask) + if isinstance(key, string_t): + if HIDDEN_SETTINGS.search(key): + return mask + elif 'broker_url' in key.lower(): + from kombu import Connection + return Connection(value).as_uri(mask=mask) + elif 'backend' in key.lower(): + return maybe_sanitize_url(value, mask=mask) + return value - return dict((k, maybe_censor(k, v)) for k, v in items(conf)) + return {k: maybe_censor(k, v) for k, v in items(conf)} def bugreport(app): @@ -209,7 +318,7 @@ def bugreport(app): py_v=_platform.python_version(), driver_v=driver_v, transport=transport, - results=app.conf.CELERY_RESULT_BACKEND or 'disabled', + results=maybe_sanitize_url(app.conf.result_backend or 'disabled'), human_settings=app.conf.humanize(), loader=qualname(app.loader.__class__), ) diff --git a/celery/apps/beat.py b/celery/apps/beat.py index 46cef9b8b..b66756adb 100644 --- a/celery/apps/beat.py +++ b/celery/apps/beat.py @@ -10,13 +10,16 @@ and so on. """ -from __future__ import absolute_import, unicode_literals +from __future__ import absolute_import, print_function, unicode_literals import numbers import socket import sys +from datetime import datetime + from celery import VERSION_BANNER, platforms, beat +from celery.five import text_t from celery.utils.imports import qualname from celery.utils.log import LOG_LEVELS, get_logger from celery.utils.timeutils import humanize_seconds @@ -24,6 +27,7 @@ __all__ = ['Beat'] STARTUP_INFO_FMT = """ +LocalTime -> {timestamp} Configuration -> . broker -> {conninfo} . loader -> {loader} @@ -42,20 +46,21 @@ class Beat(object): def __init__(self, max_interval=None, app=None, socket_timeout=30, pidfile=None, no_color=None, - loglevel=None, logfile=None, schedule=None, + loglevel='WARN', logfile=None, schedule=None, scheduler_cls=None, redirect_stdouts=None, redirect_stdouts_level=None, **kwargs): """Starts the beat task scheduler.""" self.app = app = app or self.app - self.loglevel = self._getopt('log_level', loglevel) - self.logfile = self._getopt('log_file', logfile) - self.schedule = self._getopt('schedule_filename', schedule) - self.scheduler_cls = self._getopt('scheduler', scheduler_cls) - self.redirect_stdouts = self._getopt( - 'redirect_stdouts', redirect_stdouts, + either = self.app.either + self.loglevel = loglevel + self.logfile = logfile + self.schedule = either('beat_schedule_filename', schedule) + self.scheduler_cls = either('beat_scheduler', scheduler_cls) + self.redirect_stdouts = either( + 'worker_redirect_stdouts', redirect_stdouts, ) - self.redirect_stdouts_level = self._getopt( - 'redirect_stdouts_level', redirect_stdouts_level, + self.redirect_stdouts_level = either( + 'worker_redirect_stdouts_level', redirect_stdouts_level, ) self.max_interval = max_interval @@ -70,11 +75,6 @@ def __init__(self, max_interval=None, app=None, if not isinstance(self.loglevel, numbers.Integral): self.loglevel = LOG_LEVELS[self.loglevel.upper()] - def _getopt(self, key, value): - if value is not None: - return value - return self.app.conf.find_value_for_key(key, namespace='celerybeat') - def run(self): print(str(self.colored.cyan( 'celery beat v{0} is starting.'.format(VERSION_BANNER)))) @@ -98,10 +98,12 @@ def start_scheduler(self): scheduler_cls=self.scheduler_cls, schedule_filename=self.schedule) - print(str(c.blue('__ ', c.magenta('-'), - c.blue(' ... __ '), c.magenta('-'), - c.blue(' _\n'), - c.reset(self.startup_info(beat))))) + print(text_t( # noqa (pyflakes chokes on print) + c.blue('__ ', c.magenta('-'), + c.blue(' ... __ '), c.magenta('-'), + c.blue(' _\n'), + c.reset(self.startup_info(beat))), + )) self.setup_logging() if self.socket_timeout: logger.debug('Setting default socket timeout to %r', @@ -125,6 +127,7 @@ def startup_info(self, beat): scheduler = beat.get_scheduler(lazy=True) return STARTUP_INFO_FMT.format( conninfo=self.app.connection().as_uri(), + timestamp=datetime.now().replace(microsecond=0), logfile=self.logfile or '[stderr]', loglevel=LOG_LEVELS[self.loglevel], loader=qualname(self.app.loader), diff --git a/celery/apps/worker.py b/celery/apps/worker.py index d19071108..873ac0b8a 100644 --- a/celery/apps/worker.py +++ b/celery/apps/worker.py @@ -16,21 +16,19 @@ import os import platform as _platform import sys -import warnings +from datetime import datetime from functools import partial -from billiard import current_process +from billiard.process import current_process from kombu.utils.encoding import safe_str from celery import VERSION_BANNER, platforms, signals from celery.app import trace -from celery.exceptions import ( - CDeprecationWarning, WorkerShutdown, WorkerTerminate, -) +from celery.exceptions import WorkerShutdown, WorkerTerminate from celery.five import string, string_t from celery.loaders.app import AppLoader -from celery.platforms import check_privileges +from celery.platforms import EX_FAILURE, EX_OK, check_privileges from celery.utils import cry, isatty from celery.utils.imports import qualname from celery.utils.log import get_logger, in_sighandler, set_in_sighandler @@ -43,25 +41,6 @@ is_jython = sys.platform.startswith('java') is_pypy = hasattr(sys, 'pypy_version_info') -W_PICKLE_DEPRECATED = """ -Starting from version 3.2 Celery will refuse to accept pickle by default. - -The pickle serializer is a security concern as it may give attackers -the ability to execute any command. It's important to secure -your broker from unauthorized access when using pickle, so we think -that enabling pickle should require a deliberate action and not be -the default choice. - -If you depend on pickle then you should set a setting to disable this -warning and to be sure that everything will continue working -when you upgrade to Celery 3.2:: - - CELERY_ACCEPT_CONTENT = ['pickle', 'json', 'msgpack', 'yaml'] - -You must only enable the serializers that you will actually use. - -""" - def active_thread_count(): from threading import enumerate @@ -90,7 +69,7 @@ def safe_say(msg): BANNER = """\ {hostname} v{version} -{platform} +{platform} {timestamp} [config] .> app: {app} @@ -111,7 +90,7 @@ def safe_say(msg): class Worker(WorkController): def on_before_init(self, **kwargs): - trace.setup_worker_optimizations(self.app) + trace.setup_worker_optimizations(self.app, self.hostname) # this signal can be used to set up configuration for # workers by name. @@ -119,16 +98,16 @@ def on_before_init(self, **kwargs): sender=self.hostname, instance=self, conf=self.app.conf, options=kwargs, ) - check_privileges(self.app.conf.CELERY_ACCEPT_CONTENT) + check_privileges(self.app.conf.accept_content) def on_after_init(self, purge=False, no_color=None, redirect_stdouts=None, redirect_stdouts_level=None, **kwargs): - self.redirect_stdouts = self._getopt( - 'redirect_stdouts', redirect_stdouts, + self.redirect_stdouts = self.app.either( + 'worker_redirect_stdouts', redirect_stdouts, ) - self.redirect_stdouts_level = self._getopt( - 'redirect_stdouts_level', redirect_stdouts_level, + self.redirect_stdouts_level = self.app.either( + 'worker_redirect_stdouts_level', redirect_stdouts_level, ) super(Worker, self).setup_defaults(**kwargs) self.purge = purge @@ -143,23 +122,21 @@ def on_init_blueprint(self): self._custom_logging = self.setup_logging() # apply task execution optimizations # -- This will finalize the app! - trace.setup_worker_optimizations(self.app) + trace.setup_worker_optimizations(self.app, self.hostname) def on_start(self): + app = self.app if not self._custom_logging and self.redirect_stdouts: - self.app.log.redirect_stdouts(self.redirect_stdouts_level) + app.log.redirect_stdouts(self.redirect_stdouts_level) WorkController.on_start(self) # this signal can be used to e.g. change queues after # the -Q option has been applied. signals.celeryd_after_setup.send( - sender=self.hostname, instance=self, conf=self.app.conf, + sender=self.hostname, instance=self, conf=app.conf, ) - if not self.app.conf.value_set_for('CELERY_ACCEPT_CONTENT'): - warnings.warn(CDeprecationWarning(W_PICKLE_DEPRECATED)) - if self.purge: self.purge_messages() @@ -174,7 +151,7 @@ def on_start(self): def on_consumer_ready(self, consumer): signals.worker_ready.send(sender=consumer) - print('{0} ready.'.format(safe_str(self.hostname), )) + print('{0} ready.'.format(safe_str(self.hostname),)) def setup_logging(self, colorize=None): if colorize is None and self.no_color is not None: @@ -186,7 +163,7 @@ def setup_logging(self, colorize=None): def purge_messages(self): count = self.app.control.purge() - if count: + if count: # pragma: no cover print('purge: Erased {0} {1} from the queue.\n'.format( count, pluralize(count, 'message'))) @@ -205,10 +182,10 @@ def extra_info(self): def startup_info(self): app = self.app concurrency = string(self.concurrency) - appr = '{0}:0x{1:x}'.format(app.main or '__main__', id(app)) + appr = '{0}:{1:#x}'.format(app.main or '__main__', id(app)) if not isinstance(app.loader, AppLoader): loader = qualname(app.loader) - if loader.startswith('celery.loaders'): + if loader.startswith('celery.loaders'): # pragma: no cover loader = loader[14:] appr += ' ({0})'.format(loader) if self.autoscale: @@ -225,9 +202,10 @@ def startup_info(self): banner = BANNER.format( app=appr, hostname=safe_str(self.hostname), + timestamp=datetime.now().replace(microsecond=0), version=VERSION_BANNER, conninfo=self.app.connection().as_uri(), - results=self.app.conf.CELERY_RESULT_BACKEND or 'disabled', + results=self.app.backend.as_uri(), concurrency=concurrency, platform=safe_str(_platform.platform()), events=events, @@ -277,8 +255,7 @@ def set_process_status(self, info): def _shutdown_handler(worker, sig='TERM', how='Warm', - exc=WorkerShutdown, callback=None): - + exc=WorkerShutdown, callback=None, exitcode=EX_OK): def _handle_request(*args): with in_sighandler(): from celery.worker import state @@ -288,9 +265,9 @@ def _handle_request(*args): safe_say('worker: {0} shutdown (MainProcess)'.format(how)) if active_thread_count() > 1: setattr(state, {'Warm': 'should_stop', - 'Cold': 'should_terminate'}[how], True) + 'Cold': 'should_terminate'}[how], exitcode) else: - raise exc() + raise exc(exitcode) _handle_request.__name__ = str('worker_{0}'.format(how)) platforms.signals[sig] = _handle_request install_worker_term_handler = partial( @@ -299,6 +276,7 @@ def _handle_request(*args): if not is_jython: # pragma: no cover install_worker_term_hard_handler = partial( _shutdown_handler, sig='SIGQUIT', how='Cold', exc=WorkerTerminate, + exitcode=EX_FAILURE, ) else: # pragma: no cover install_worker_term_handler = \ @@ -310,10 +288,12 @@ def on_SIGINT(worker): install_worker_term_hard_handler(worker, sig='SIGINT') if not is_jython: # pragma: no cover install_worker_int_handler = partial( - _shutdown_handler, sig='SIGINT', callback=on_SIGINT + _shutdown_handler, sig='SIGINT', callback=on_SIGINT, + exitcode=EX_FAILURE, ) else: # pragma: no cover - install_worker_int_handler = lambda *a, **kw: None + def install_worker_int_handler(*args, **kwargs): + pass def _reload_current_worker(): @@ -332,7 +312,7 @@ def restart_worker_sig_handler(*args): import atexit atexit.register(_reload_current_worker) from celery.worker import state - state.should_stop = True + state.should_stop = EX_OK platforms.signals[sig] = restart_worker_sig_handler diff --git a/celery/backends/__init__.py b/celery/backends/__init__.py index 421f7f480..77c6480e7 100644 --- a/celery/backends/__init__.py +++ b/celery/backends/__init__.py @@ -9,14 +9,13 @@ from __future__ import absolute_import import sys +import types -from kombu.utils.url import _parse_url - +from celery.exceptions import ImproperlyConfigured from celery.local import Proxy from celery._state import current_app from celery.five import reraise from celery.utils.imports import symbol_by_name -from celery.utils.functional import memoize __all__ = ['get_backend_cls', 'get_backend_by_url'] @@ -32,8 +31,12 @@ 'mongodb': 'celery.backends.mongodb:MongoBackend', 'db': 'celery.backends.database:DatabaseBackend', 'database': 'celery.backends.database:DatabaseBackend', + 'elasticsearch': 'celery.backends.elasticsearch:ElasticsearchBackend', 'cassandra': 'celery.backends.cassandra:CassandraBackend', 'couchbase': 'celery.backends.couchbase:CouchBaseBackend', + 'couchdb': 'celery.backends.couchdb:CouchDBBackend', + 'riak': 'celery.backends.riak:RiakBackend', + 'file': 'celery.backends.filesystem:FilesystemBackend', 'disabled': 'celery.backends.base:DisabledBackend', } @@ -41,25 +44,29 @@ default_backend = Proxy(lambda: current_app.backend) -@memoize(100) def get_backend_cls(backend=None, loader=None): """Get backend class by name/alias""" backend = backend or 'disabled' loader = loader or current_app.loader aliases = dict(BACKEND_ALIASES, **loader.override_backends) try: - return symbol_by_name(backend, aliases) + cls = symbol_by_name(backend, aliases) except ValueError as exc: - reraise(ValueError, ValueError(UNKNOWN_BACKEND.format( - backend, exc)), sys.exc_info()[2]) + reraise(ImproperlyConfigured, ImproperlyConfigured( + UNKNOWN_BACKEND.format(backend, exc)), sys.exc_info()[2]) + if isinstance(cls, types.ModuleType): + raise ImproperlyConfigured(UNKNOWN_BACKEND.format( + backend, 'is a Python module, not a backend class.')) + return cls def get_backend_by_url(backend=None, loader=None): url = None if backend and '://' in backend: url = backend - if '+' in url[:url.index('://')]: + scheme, _, _ = url.partition('://') + if '+' in scheme: backend, url = url.split('+', 1) else: - backend, _, _, _, _, _, _ = _parse_url(url) + backend = scheme return get_backend_cls(backend, loader), url diff --git a/celery/backends/amqp.py b/celery/backends/amqp.py index 62cf2034e..6af14a192 100644 --- a/celery/backends/amqp.py +++ b/celery/backends/amqp.py @@ -10,21 +10,17 @@ """ from __future__ import absolute_import -import socket - -from collections import deque -from operator import itemgetter - from kombu import Exchange, Queue, Producer, Consumer +from kombu.utils import register_after_fork from celery import states -from celery.exceptions import TimeoutError -from celery.five import range, monotonic +from celery.five import range from celery.utils.functional import dictfilter from celery.utils.log import get_logger from celery.utils.timeutils import maybe_s_to_ms -from .base import BaseBackend +from . import base +from .async import AsyncBackendMixin, BaseResultConsumer __all__ = ['BacklogLimitExceeded', 'AMQPBackend'] @@ -42,16 +38,69 @@ def repair_uuid(s): return '%s-%s-%s-%s-%s' % (s[:8], s[8:12], s[12:16], s[16:20], s[20:]) +def _on_after_fork_cleanup_backend(backend): + backend._after_fork() + + class NoCacheQueue(Queue): can_cache_declaration = False -class AMQPBackend(BaseBackend): +class ResultConsumer(BaseResultConsumer): + Consumer = Consumer + + _connection = None + _consumer = None + + def __init__(self, *args, **kwargs): + super(ResultConsumer, self).__init__(*args, **kwargs) + self._create_binding = self.backend._create_binding + + def start(self, initial_task_id, no_ack=True): + self._connection = self.app.connection() + initial_queue = self._create_binding(initial_task_id) + self._consumer = self.Consumer( + self._connection.default_channel, [initial_queue], + callbacks=[self.on_state_change], no_ack=no_ack, + accept=self.accept) + self._consumer.consume() + + def drain_events(self, timeout=None): + return self._connection.drain_events(timeout=timeout) + + def stop(self): + try: + self._consumer.cancel() + finally: + self._connection.close() + + def on_after_fork(self): + self._consumer = None + if self._connection is not None: + self._connection.collect() + self._connection = None + + def consume_from(self, task_id): + if self._consumer is None: + return self.start(task_id) + queue = self._create_binding(task_id) + if not self._consumer.consuming_from(queue): + self._consumer.add_queue(queue) + self._consumer.consume() + + def cancel_for(self, task_id): + if self._consumer: + self._consumer.cancel_by_queue(self._create_binding(task_id).name) + + +class AMQPBackend(base.Backend, AsyncBackendMixin): """Publishes results by sending messages.""" + Exchange = Exchange Queue = NoCacheQueue Consumer = Consumer Producer = Producer + ResultConsumer = ResultConsumer BacklogLimitExceeded = BacklogLimitExceeded @@ -71,22 +120,27 @@ def __init__(self, app, connection=None, exchange=None, exchange_type=None, super(AMQPBackend, self).__init__(app, **kwargs) conf = self.app.conf self._connection = connection + self._out_of_band = {} self.persistent = self.prepare_persistent(persistent) self.delivery_mode = 2 if self.persistent else 1 - exchange = exchange or conf.CELERY_RESULT_EXCHANGE - exchange_type = exchange_type or conf.CELERY_RESULT_EXCHANGE_TYPE + exchange = exchange or conf.result_exchange + exchange_type = exchange_type or conf.result_exchange_type self.exchange = self._create_exchange( exchange, exchange_type, self.delivery_mode, ) - self.serializer = serializer or conf.CELERY_RESULT_SERIALIZER + self.serializer = serializer or conf.result_serializer self.auto_delete = auto_delete - - self.expires = None - if 'expires' not in kwargs or kwargs['expires'] is not None: - self.expires = self.prepare_expires(kwargs.get('expires')) self.queue_arguments = dictfilter({ 'x-expires': maybe_s_to_ms(self.expires), }) + self.result_consumer = self.ResultConsumer( + self, self.app, self.accept, self._pending_results) + if register_after_fork is not None: + register_after_fork(self, _on_after_fork_cleanup_backend) + + def _after_fork(self): + self._pending_results.clear() + self.result_consumer._after_fork() def _create_exchange(self, name, type='direct', delivery_mode=2): return self.Exchange(name=name, @@ -115,16 +169,16 @@ def destination_for(self, task_id, request): return self.rkey(task_id), request.correlation_id or task_id return self.rkey(task_id), task_id - def store_result(self, task_id, result, status, + def store_result(self, task_id, result, state, traceback=None, request=None, **kwargs): - """Send task return value and status.""" + """Send task return value and state.""" routing_key, correlation_id = self.destination_for(task_id, request) if not routing_key: return with self.app.amqp.producer_pool.acquire(block=True) as producer: producer.publish( - {'task_id': task_id, 'status': status, - 'result': self.encode_result(result, status), + {'task_id': task_id, 'status': state, + 'result': self.encode_result(result, state), 'traceback': traceback, 'children': self.current_task_children(request)}, exchange=self.exchange, @@ -140,28 +194,20 @@ def store_result(self, task_id, result, status, def on_reply_declare(self, task_id): return [self._create_binding(task_id)] - def wait_for(self, task_id, timeout=None, cache=True, propagate=True, - no_ack=True, on_interval=None, - READY_STATES=states.READY_STATES, - PROPAGATE_STATES=states.PROPAGATE_STATES, - **kwargs): - cached_meta = self._cache.get(task_id) - if cache and cached_meta and \ - cached_meta['status'] in READY_STATES: - meta = cached_meta - else: - try: - meta = self.consume(task_id, timeout=timeout, no_ack=no_ack, - on_interval=on_interval) - except socket.timeout: - raise TimeoutError('The operation timed out.') - - if meta['status'] in PROPAGATE_STATES and propagate: - raise self.exception_to_python(meta['result']) - # consume() always returns READY_STATE. - return meta['result'] + def on_out_of_band_result(self, task_id, message): + if self.result_consumer: + self.result_consumer.on_out_of_band_result(message) + self._out_of_band[task_id] = message def get_task_meta(self, task_id, backlog_limit=1000): + try: + buffered = self._out_of_band.pop(task_id) + except KeyError: + pass + else: + payload = self._cache[task_id] = self.meta_from_decoded( + buffered.payload) + return payload # Polling and using basic_get with self.app.pool.acquire_channel(block=True) as (_, channel): binding = self._create_binding(task_id)(channel) @@ -174,18 +220,25 @@ def get_task_meta(self, task_id, backlog_limit=1000): ) if not acc: # no more messages break - if acc.payload['task_id'] == task_id: + try: + message_task_id = acc.properties['correlation_id'] + except (AttributeError, KeyError): + message_task_id = acc.payload['task_id'] + if message_task_id == task_id: prev, latest = latest, acc - if prev: - # backends are not expected to keep history, - # so we delete everything except the most recent state. - prev.ack() - prev = None + if prev: + # backends are not expected to keep history, + # so we delete everything except the most recent state. + prev.ack() + prev = None + else: + self.on_out_of_band_result(message_task_id, acc) else: raise self.BacklogLimitExceeded(task_id) if latest: - payload = self._cache[task_id] = latest.payload + payload = self._cache[task_id] = self.meta_from_decoded( + latest.payload) latest.requeue() return payload else: @@ -197,92 +250,6 @@ def get_task_meta(self, task_id, backlog_limit=1000): return {'status': states.PENDING, 'result': None} poll = get_task_meta # XXX compat - def drain_events(self, connection, consumer, - timeout=None, on_interval=None, now=monotonic, wait=None): - wait = wait or connection.drain_events - results = {} - - def callback(meta, message): - if meta['status'] in states.READY_STATES: - results[meta['task_id']] = meta - - consumer.callbacks[:] = [callback] - time_start = now() - - while 1: - # Total time spent may exceed a single call to wait() - if timeout and now() - time_start >= timeout: - raise socket.timeout() - wait(timeout=timeout) - if on_interval: - on_interval() - if results: # got event on the wanted channel. - break - self._cache.update(results) - return results - - def consume(self, task_id, timeout=None, no_ack=True, on_interval=None): - wait = self.drain_events - with self.app.pool.acquire_channel(block=True) as (conn, channel): - binding = self._create_binding(task_id) - with self.Consumer(channel, binding, - no_ack=no_ack, accept=self.accept) as consumer: - while 1: - try: - return wait( - conn, consumer, timeout, on_interval)[task_id] - except KeyError: - continue - - def _many_bindings(self, ids): - return [self._create_binding(task_id) for task_id in ids] - - def get_many(self, task_ids, timeout=None, no_ack=True, - now=monotonic, getfields=itemgetter('status', 'task_id'), - READY_STATES=states.READY_STATES, - PROPAGATE_STATES=states.PROPAGATE_STATES, **kwargs): - with self.app.pool.acquire_channel(block=True) as (conn, channel): - ids = set(task_ids) - cached_ids = set() - mark_cached = cached_ids.add - for task_id in ids: - try: - cached = self._cache[task_id] - except KeyError: - pass - else: - if cached['status'] in READY_STATES: - yield task_id, cached - mark_cached(task_id) - ids.difference_update(cached_ids) - results = deque() - push_result = results.append - push_cache = self._cache.__setitem__ - to_exception = self.exception_to_python - - def on_message(message): - body = message.decode() - state, uid = getfields(body) - if state in READY_STATES: - if state in PROPAGATE_STATES: - body['result'] = to_exception(body['result']) - push_result(body) \ - if uid in task_ids else push_cache(uid, body) - - bindings = self._many_bindings(task_ids) - with self.Consumer(channel, bindings, on_message=on_message, - accept=self.accept, no_ack=no_ack): - wait = conn.drain_events - popleft = results.popleft - while ids: - wait(timeout=timeout) - while results: - state = popleft() - task_id = state['task_id'] - ids.discard(task_id) - push_cache(task_id, state) - yield task_id, state - def reload_task_result(self, task_id): raise NotImplementedError( 'reload_task_result is not supported by this backend.') @@ -304,6 +271,9 @@ def delete_group(self, group_id): raise NotImplementedError( 'delete_group is not supported by this backend.') + def as_uri(self, include_password=True): + return 'amqp://' + def __reduce__(self, args=(), kwargs={}): kwargs.update( connection=self._connection, diff --git a/celery/backends/async.py b/celery/backends/async.py new file mode 100644 index 000000000..aac64bb5d --- /dev/null +++ b/celery/backends/async.py @@ -0,0 +1,244 @@ +""" + celery.backends.async + ~~~~~~~~~~~~~~~~~~~~~ + + Async backend support utilitites. + +""" +from __future__ import absolute_import, unicode_literals + +import socket + +from collections import deque +from time import sleep +from weakref import WeakKeyDictionary + +from kombu.syn import detect_environment +from kombu.utils import cached_property + +from celery import states +from celery.exceptions import TimeoutError +from celery.five import monotonic + +drainers = {} + + +def register_drainer(name): + + def _inner(cls): + drainers[name] = cls + return cls + return _inner + + +@register_drainer('default') +class Drainer(object): + + def __init__(self, result_consumer): + self.result_consumer = result_consumer + + def drain_events_until(self, p, timeout=None, on_interval=None, + monotonic=monotonic, wait=None): + wait = wait or self.result_consumer.drain_events + time_start = monotonic() + + while 1: + # Total time spent may exceed a single call to wait() + if timeout and monotonic() - time_start >= timeout: + raise socket.timeout() + try: + yield self.wait_for(p, wait, timeout=1) + except socket.timeout: + pass + if on_interval: + on_interval() + if p.ready: # got event on the wanted channel. + break + + def wait_for(self, p, wait, timeout=None): + wait(timeout=timeout) + + +class greenletDrainer(Drainer): + spawn = None + _g = None + _stopped = False + + def run(self): + while not self._stopped: + try: + self.result_consumer.drain_events(timeout=1) + except socket.timeout: + pass + + def start(self): + if self._g is None: + self._g = self.spawn(self.run) + + def stop(self): + self._stopped = True + + def wait_for(self, p, wait, timeout=None): + if self._g is None: + self.start() + if not p.ready: + sleep(0) + + +@register_drainer('eventlet') +class eventletDrainer(greenletDrainer): + + @cached_property + def spawn(self): + from eventlet import spawn + return spawn + + +@register_drainer('gevent') +class geventDrainer(greenletDrainer): + + @cached_property + def spawn(self): + from gevent import spawn + return spawn + + +class AsyncBackendMixin(object): + + def _collect_into(self, result, bucket): + self.result_consumer.buckets[result] = bucket + + def iter_native(self, result, timeout=None, interval=0.5, no_ack=True, + on_message=None, on_interval=None): + results = result.results + if not results: + raise StopIteration() + + bucket = deque() + for node in results: + if node._cache: + bucket.append(node) + else: + self._collect_into(node, bucket) + + for _ in self._wait_for_pending( + result, + timeout=timeout, interval=interval, no_ack=no_ack, + on_message=on_message, on_interval=on_interval): + while bucket: + node = bucket.popleft() + yield result.id, node._cache + while bucket: + node = bucket.popleft() + yield result.id, node._cache + + def add_pending_result(self, result): + if result.id not in self._pending_results: + self._pending_results[result.id] = result + self.result_consumer.consume_from(result.id) + return result + + def remove_pending_result(self, result): + self._pending_results.pop(result.id, None) + self.on_result_fulfilled(result) + return result + + def on_result_fulfilled(self, result): + self.result_consumer.cancel_for(result.id) + + def wait_for_pending(self, result, + callback=None, propagate=True, **kwargs): + for _ in self._wait_for_pending(result, **kwargs): + pass + return result.maybe_throw(callback=callback, propagate=propagate) + + def _wait_for_pending(self, result, + timeout=None, on_interval=None, on_message=None, + **kwargs): + return self.result_consumer._wait_for_pending( + result, timeout=timeout, + on_interval=on_interval, on_message=on_message, + ) + + @property + def is_async(self): + return True + + +class BaseResultConsumer(object): + + def __init__(self, backend, app, accept, pending_results): + self.backend = backend + self.app = app + self.accept = accept + self._pending_results = pending_results + self.on_message = None + self.buckets = WeakKeyDictionary() + self.drainer = drainers[detect_environment()](self) + + def start(self): + raise NotImplementedError() + + def stop(self): + pass + + def drain_events(self, timeout=None): + raise NotImplementedError() + + def consume_from(self, task_id): + raise NotImplementedError() + + def cancel_for(self, task_id): + raise NotImplementedError() + + def _after_fork(self): + self.bucket.clear() + self.buckets = WeakKeyDictionary() + self.on_message = None + self.on_after_fork() + + def on_after_fork(self): + pass + + def drain_events_until(self, p, timeout=None, on_interval=None): + return self.drainer.drain_events_until( + p, timeout=timeout, on_interval=on_interval) + + def _wait_for_pending(self, result, + timeout=None, on_interval=None, on_message=None, + **kwargs): + self.on_wait_for_pending(result, timeout=timeout, **kwargs) + prev_on_m, self.on_message = self.on_message, on_message + try: + for _ in self.drain_events_until( + result.on_ready, timeout=timeout, + on_interval=on_interval): + yield + sleep(0) + except socket.timeout: + raise TimeoutError('The operation timed out.') + finally: + self.on_message = prev_on_m + + def on_wait_for_pending(self, result, timeout=None, **kwargs): + pass + + def on_out_of_band_result(self, message): + self.on_state_change(message.payload, message) + + def on_state_change(self, meta, message): + if self.on_message: + self.on_message(meta) + if meta['status'] in states.READY_STATES: + try: + result = self._pending_results[meta['task_id']] + except KeyError: + return + result._maybe_set_cache(meta) + buckets = self.buckets + try: + buckets[result].append(result) + buckets.pop(result) + except KeyError: + pass + sleep(0) diff --git a/celery/backends/base.py b/celery/backends/base.py index 2ca4cc001..6fe734cec 100644 --- a/celery/backends/base.py +++ b/celery/backends/base.py @@ -13,8 +13,8 @@ """ from __future__ import absolute_import -import time import sys +import time from datetime import timedelta @@ -24,16 +24,16 @@ registry as serializer_registry, ) from kombu.utils.encoding import bytes_to_str, ensure_bytes, from_utf8 +from kombu.utils.url import maybe_sanitize_url from celery import states -from celery import current_app, maybe_signature +from celery import current_app, group, maybe_signature from celery.app import current_task from celery.exceptions import ChordError, TimeoutError, TaskRevokedError from celery.five import items from celery.result import ( GroupResult, ResultBase, allow_join_result, result_from_tuple, ) -from celery.utils import timeutils from celery.utils.functional import LRUCache from celery.utils.log import get_logger from celery.utils.serialization import ( @@ -44,7 +44,7 @@ __all__ = ['BaseBackend', 'KeyValueStoreBackend', 'DisabledBackend'] -EXCEPTION_ABLE_CODECS = frozenset(['pickle', 'yaml']) +EXCEPTION_ABLE_CODECS = frozenset({'pickle'}) PY3 = sys.version_info >= (3, 0) logger = get_logger(__name__) @@ -57,11 +57,12 @@ def unpickle_backend(cls, args, kwargs): class _nulldict(dict): - def __setitem__(self, k, v): + def ignore(self, *a, **kw): pass + __setitem__ = update = setdefault = ignore -class BaseBackend(object): +class Backend(object): READY_STATES = states.READY_STATES UNREADY_STATES = states.UNREADY_STATES EXCEPTION_STATES = states.EXCEPTION_STATES @@ -91,32 +92,78 @@ class BaseBackend(object): 'interval_max': 1, } - def __init__(self, app, serializer=None, - max_cached_results=None, accept=None, **kwargs): + def __init__(self, app, + serializer=None, max_cached_results=None, accept=None, + expires=None, expires_type=None, url=None, **kwargs): self.app = app conf = self.app.conf - self.serializer = serializer or conf.CELERY_RESULT_SERIALIZER + self.serializer = serializer or conf.result_serializer (self.content_type, self.content_encoding, self.encoder) = serializer_registry._encoders[self.serializer] - cmax = max_cached_results or conf.CELERY_MAX_CACHED_RESULTS + cmax = max_cached_results or conf.result_cache_max self._cache = _nulldict() if cmax == -1 else LRUCache(limit=cmax) + + self.expires = self.prepare_expires(expires, expires_type) self.accept = prepare_accept_content( - conf.CELERY_ACCEPT_CONTENT if accept is None else accept, + conf.accept_content if accept is None else accept, ) + self._pending_results = {} + self.url = url + + def as_uri(self, include_password=False): + """Return the backend as an URI, sanitizing the password or not""" + # when using maybe_sanitize_url(), "/" is added + # we're stripping it for consistency + if include_password: + return self.url + url = maybe_sanitize_url(self.url or '') + return url[:-1] if url.endswith(':///') else url def mark_as_started(self, task_id, **meta): """Mark a task as started""" - return self.store_result(task_id, meta, status=states.STARTED) + return self.store_result(task_id, meta, states.STARTED) - def mark_as_done(self, task_id, result, request=None): + def mark_as_done(self, task_id, result, + request=None, store_result=True, state=states.SUCCESS): """Mark task as successfully executed.""" - return self.store_result(task_id, result, - status=states.SUCCESS, request=request) - - def mark_as_failure(self, task_id, exc, traceback=None, request=None): - """Mark task as executed with failure. Stores the execption.""" - return self.store_result(task_id, exc, status=states.FAILURE, + if store_result: + self.store_result(task_id, result, state, request=request) + if request and request.chord: + self.on_chord_part_return(request, state, result) + + def mark_as_failure(self, task_id, exc, + traceback=None, request=None, + store_result=True, call_errbacks=True, + state=states.FAILURE): + """Mark task as executed with failure. Stores the exception.""" + if store_result: + self.store_result(task_id, exc, state, + traceback=traceback, request=request) + if request: + if request.chord: + self.on_chord_part_return(request, state, exc) + if call_errbacks: + root_id = request.root_id or task_id + group( + [self.app.signature(errback) + for errback in request.errbacks or []], app=self.app, + ).apply_async((task_id,), parent_id=task_id, root_id=root_id) + + def mark_as_revoked(self, task_id, reason='', + request=None, store_result=True, state=states.REVOKED): + exc = TaskRevokedError(reason) + if store_result: + self.store_result(task_id, exc, state, + traceback=None, request=request) + if request and request.chord: + self.on_chord_part_return(request, state, exc) + + def mark_as_retry(self, task_id, exc, traceback=None, + request=None, store_result=True, state=states.RETRY): + """Mark task as being retries. Stores the current + exception (if any).""" + return self.store_result(task_id, exc, state, traceback=traceback, request=request) def chord_error_from_stack(self, callback, exc=None): @@ -128,7 +175,7 @@ def chord_error_from_stack(self, callback, exc=None): [app.signature(errback) for errback in callback.options.get('link_error') or []], app=app, - ).apply_async((callback.id, )) + ).apply_async((callback.id,)) except Exception as eb_exc: return backend.fail_from_current_stack(callback.id, exc=eb_exc) else: @@ -144,17 +191,6 @@ def fail_from_current_stack(self, task_id, exc=None): finally: del(tb) - def mark_as_retry(self, task_id, exc, traceback=None, request=None): - """Mark task as being retries. Stores the current - exception (if any).""" - return self.store_result(task_id, exc, status=states.RETRY, - traceback=traceback, request=request) - - def mark_as_revoked(self, task_id, reason='', request=None): - return self.store_result(task_id, TaskRevokedError(reason), - status=states.REVOKED, traceback=None, - request=request) - def prepare_exception(self, exc, serializer=None): """Prepare exception for serialization.""" serializer = self.serializer if serializer is None else serializer @@ -164,10 +200,13 @@ def prepare_exception(self, exc, serializer=None): def exception_to_python(self, exc): """Convert serialized exception to Python exception.""" - if self.serializer in EXCEPTION_ABLE_CODECS: - return get_pickled_exception(exc) - return create_exception_cls( - from_utf8(exc['exc_type']), __name__)(exc['exc_message']) + if exc: + if not isinstance(exc, BaseException): + exc = create_exception_cls( + from_utf8(exc['exc_type']), __name__)(exc['exc_message']) + if self.serializer in EXCEPTION_ABLE_CODECS: + exc = get_pickled_exception(exc) + return exc def prepare_value(self, result): """Prepare value for storage.""" @@ -179,6 +218,14 @@ def encode(self, data): _, _, payload = dumps(data, serializer=self.serializer) return payload + def meta_from_decoded(self, meta): + if meta['status'] in self.EXCEPTION_STATES: + meta['result'] = self.exception_to_python(meta['result']) + return meta + + def decode_result(self, payload): + return self.meta_from_decoded(self.decode(payload)) + def decode(self, payload): payload = PY3 and payload or str(payload) return loads(payload, @@ -186,44 +233,11 @@ def decode(self, payload): content_encoding=self.content_encoding, accept=self.accept) - def wait_for(self, task_id, - timeout=None, propagate=True, interval=0.5, no_ack=True, - on_interval=None): - """Wait for task and return its result. - - If the task raises an exception, this exception - will be re-raised by :func:`wait_for`. - - If `timeout` is not :const:`None`, this raises the - :class:`celery.exceptions.TimeoutError` exception if the operation - takes longer than `timeout` seconds. - - """ - - time_elapsed = 0.0 - - while 1: - status = self.get_status(task_id) - if status == states.SUCCESS: - return self.get_result(task_id) - elif status in states.PROPAGATE_STATES: - result = self.get_result(task_id) - if propagate: - raise result - return result - if on_interval: - on_interval() - # avoid hammering the CPU checking status. - time.sleep(interval) - time_elapsed += interval - if timeout and time_elapsed >= timeout: - raise TimeoutError('The operation timed out.') - def prepare_expires(self, value, type=None): if value is None: - value = self.app.conf.CELERY_TASK_RESULT_EXPIRES + value = self.app.conf.result_expires if isinstance(value, timedelta): - value = timeutils.timedelta_seconds(value) + value = value.total_seconds() if value is not None and type: return type(value) return value @@ -231,11 +245,11 @@ def prepare_expires(self, value, type=None): def prepare_persistent(self, enabled=None): if enabled is not None: return enabled - p = self.app.conf.CELERY_RESULT_PERSISTENT + p = self.app.conf.result_persistent return self.persistent if p is None else p - def encode_result(self, result, status): - if status in self.EXCEPTION_STATES and isinstance(result, Exception): + def encode_result(self, result, state): + if state in self.EXCEPTION_STATES and isinstance(result, Exception): return self.prepare_exception(result) else: return self.prepare_value(result) @@ -243,11 +257,11 @@ def encode_result(self, result, status): def is_cached(self, task_id): return task_id in self._cache - def store_result(self, task_id, result, status, + def store_result(self, task_id, result, state, traceback=None, request=None, **kwargs): """Update task state and result.""" - result = self.encode_result(result, status) - self._store_result(task_id, result, status, traceback, + result = self.encode_result(result, state) + self._store_result(task_id, result, state, traceback, request=request, **kwargs) return result @@ -258,9 +272,10 @@ def forget(self, task_id): def _forget(self, task_id): raise NotImplementedError('backend does not implement forget.') - def get_status(self, task_id): - """Get the status of a task.""" + def get_state(self, task_id): + """Get the state of a task.""" return self.get_task_meta(task_id)['status'] + get_status = get_state # XXX compat def get_traceback(self, task_id): """Get the traceback for a failed task.""" @@ -268,11 +283,7 @@ def get_traceback(self, task_id): def get_result(self, task_id): """Get the result of a task.""" - meta = self.get_task_meta(task_id) - if meta['status'] in self.EXCEPTION_STATES: - return self.exception_to_python(meta['result']) - else: - return meta['result'] + return self.get_task_meta(task_id).get('result') def get_children(self, task_id): """Get the list of subtasks sent by a task.""" @@ -339,19 +350,24 @@ def process_cleanup(self): def on_task_call(self, producer, task_id): return {} - def on_chord_part_return(self, task, state, result, propagate=False): + def add_to_chord(self, chord_id, result): + raise NotImplementedError('Backend does not support add_to_chord') + + def on_chord_part_return(self, request, state, result, **kwargs): pass def fallback_chord_unlock(self, group_id, body, result=None, countdown=1, **kwargs): kwargs['result'] = [r.as_tuple() for r in result] self.app.tasks['celery.chord_unlock'].apply_async( - (group_id, body, ), kwargs, countdown=countdown, + (group_id, body,), kwargs, countdown=countdown, ) - def apply_chord(self, header, partial_args, group_id, body, **options): - result = header(*partial_args, task_id=group_id) - self.fallback_chord_unlock(group_id, body, **options) + def apply_chord(self, header, partial_args, group_id, body, + options={}, **kwargs): + fixed_options = {k: v for k, v in items(options) if k != 'task_id'} + result = header(*partial_args, task_id=group_id, **fixed_options or {}) + self.fallback_chord_unlock(group_id, body, **kwargs) return result def current_task_children(self, request=None): @@ -361,10 +377,78 @@ def current_task_children(self, request=None): def __reduce__(self, args=(), kwargs={}): return (unpickle_backend, (self.__class__, args, kwargs)) + + +class SyncBackendMixin(object): + + def iter_native(self, result, timeout=None, interval=0.5, no_ack=True, + on_message=None, on_interval=None): + results = result.results + if not results: + return iter([]) + return self.get_many( + {r.id for r in results}, + timeout=timeout, interval=interval, no_ack=no_ack, + on_message=on_message, on_interval=on_interval, + ) + + def wait_for_pending(self, result, timeout=None, interval=0.5, + no_ack=True, on_interval=None, callback=None, + propagate=True): + meta = self.wait_for( + result.id, timeout=timeout, + interval=interval, + on_interval=on_interval, + no_ack=no_ack, + ) + if meta: + result._maybe_set_cache(meta) + return result.maybe_throw(propagate=propagate, callback=callback) + + def wait_for(self, task_id, + timeout=None, interval=0.5, no_ack=True, on_interval=None): + """Wait for task and return its result. + + If the task raises an exception, this exception + will be re-raised by :func:`wait_for`. + + If `timeout` is not :const:`None`, this raises the + :class:`celery.exceptions.TimeoutError` exception if the operation + takes longer than `timeout` seconds. + + """ + + time_elapsed = 0.0 + + while 1: + meta = self.get_task_meta(task_id) + if meta['status'] in states.READY_STATES: + return meta + if on_interval: + on_interval() + # avoid hammering the CPU checking status. + time.sleep(interval) + time_elapsed += interval + if timeout and time_elapsed >= timeout: + raise TimeoutError('The operation timed out.') + + def add_pending_result(self, result): + return result + + def remove_pending_result(self, result): + return result + + @property + def is_async(self): + return False + + +class BaseBackend(Backend, SyncBackendMixin): + pass BaseDictBackend = BaseBackend # XXX compat -class KeyValueStoreBackend(BaseBackend): +class BaseKeyValueStoreBackend(Backend): key_t = ensure_bytes task_keyprefix = 'celery-task-meta-' group_keyprefix = 'celery-taskset-meta-' @@ -372,10 +456,10 @@ class KeyValueStoreBackend(BaseBackend): implements_incr = False def __init__(self, *args, **kwargs): - if hasattr(self.key_t, '__func__'): + if hasattr(self.key_t, '__func__'): # pragma: no cover self.key_t = self.key_t.__func__ # remove binding self._encode_prefixes() - super(KeyValueStoreBackend, self).__init__(*args, **kwargs) + super(BaseKeyValueStoreBackend, self).__init__(*args, **kwargs) if self.implements_incr: self.apply_chord = self._apply_chord_incr @@ -431,19 +515,29 @@ def _strip_prefix(self, key): return bytes_to_str(key[len(prefix):]) return bytes_to_str(key) + def _filter_ready(self, values, READY_STATES=states.READY_STATES): + for k, v in values: + if v is not None: + v = self.decode_result(v) + if v['status'] in READY_STATES: + yield k, v + def _mget_to_results(self, values, keys): if hasattr(values, 'items'): # client returns dict so mapping preserved. - return dict((self._strip_prefix(k), self.decode(v)) - for k, v in items(values) - if v is not None) + return { + self._strip_prefix(k): v + for k, v in self._filter_ready(items(values)) + } else: # client returns list so need to recreate mapping. - return dict((bytes_to_str(keys[i]), self.decode(value)) - for i, value in enumerate(values) - if value is not None) + return { + bytes_to_str(keys[i]): v + for i, v in self._filter_ready(enumerate(values)) + } def get_many(self, task_ids, timeout=None, interval=0.5, no_ack=True, + on_message=None, on_interval=None, max_iterations=None, READY_STATES=states.READY_STATES): interval = 0.5 if interval is None else interval ids = task_ids if isinstance(task_ids, set) else set(task_ids) @@ -466,21 +560,28 @@ def get_many(self, task_ids, timeout=None, interval=0.5, no_ack=True, r = self._mget_to_results(self.mget([self.get_key_for_task(k) for k in keys]), keys) cache.update(r) - ids.difference_update(set(bytes_to_str(v) for v in r)) + ids.difference_update({bytes_to_str(v) for v in r}) for key, value in items(r): + if on_message is not None: + on_message(value) yield bytes_to_str(key), value if timeout and iterations * interval >= timeout: raise TimeoutError('Operation timed out ({0})'.format(timeout)) + if on_interval: + on_interval() time.sleep(interval) # don't busy loop. iterations += 1 + if max_iterations and iterations >= max_iterations: + break def _forget(self, task_id): self.delete(self.get_key_for_task(task_id)) - def _store_result(self, task_id, result, status, + def _store_result(self, task_id, result, state, traceback=None, request=None, **kwargs): - meta = {'status': status, 'result': result, 'traceback': traceback, - 'children': self.current_task_children(request)} + meta = {'status': state, 'result': result, 'traceback': traceback, + 'children': self.current_task_children(request), + 'task_id': task_id} self.set(self.get_key_for_task(task_id), self.encode(meta)) return result @@ -497,7 +598,7 @@ def _get_task_meta_for(self, task_id): meta = self.get(self.get_key_for_task(task_id)) if not meta: return {'status': states.PENDING, 'result': None} - return self.decode(meta) + return self.decode_result(meta) def _restore_group(self, group_id): """Get task metadata for a task by id.""" @@ -512,24 +613,25 @@ def _restore_group(self, group_id): return meta def _apply_chord_incr(self, header, partial_args, group_id, body, - result=None, **options): + result=None, options={}, **kwargs): self.save_group(group_id, self.app.GroupResult(group_id, result)) - return header(*partial_args, task_id=group_id) - def on_chord_part_return(self, task, state, result, propagate=None): + fixed_options = {k: v for k, v in items(options) if k != 'task_id'} + + return header(*partial_args, task_id=group_id, **fixed_options or {}) + + def on_chord_part_return(self, request, state, result, **kwargs): if not self.implements_incr: return app = self.app - if propagate is None: - propagate = app.conf.CELERY_CHORD_PROPAGATES - gid = task.request.group + gid = request.group if not gid: return key = self.get_key_for_chord(gid) try: - deps = GroupResult.restore(gid, backend=task.backend) + deps = GroupResult.restore(gid, backend=self) except Exception as exc: - callback = maybe_signature(task.request.chord, app=app) + callback = maybe_signature(request.chord, app=app) logger.error('Chord %r raised: %r', gid, exc, exc_info=1) return self.chord_error_from_stack( callback, @@ -539,7 +641,7 @@ def on_chord_part_return(self, task, state, result, propagate=None): try: raise ValueError(gid) except ValueError as exc: - callback = maybe_signature(task.request.chord, app=app) + callback = maybe_signature(request.chord, app=app) logger.error('Chord callback %r raised: %r', gid, exc, exc_info=1) return self.chord_error_from_stack( @@ -547,12 +649,16 @@ def on_chord_part_return(self, task, state, result, propagate=None): ChordError('GroupResult {0} no longer exists'.format(gid)), ) val = self.incr(key) - if val >= len(deps): - callback = maybe_signature(task.request.chord, app=app) + size = len(deps) + if val > size: # pragma: no cover + logger.warning('Chord counter incremented too many times for %r', + gid) + elif val == size: + callback = maybe_signature(request.chord, app=app) j = deps.join_native if deps.supports_native_join else deps.join try: with allow_join_result(): - ret = j(timeout=3.0, propagate=propagate) + ret = j(timeout=3.0, propagate=True) except Exception as exc: try: culprit = next(deps._failed_join_report()) @@ -580,6 +686,10 @@ def on_chord_part_return(self, task, state, result, propagate=None): self.expire(key, 86400) +class KeyValueStoreBackend(BaseKeyValueStoreBackend, SyncBackendMixin): + pass + + class DisabledBackend(BaseBackend): _cache = {} # need this attribute to reset cache in tests. @@ -590,4 +700,9 @@ def _is_disabled(self, *args, **kwargs): raise NotImplementedError( 'No result backend configured. ' 'Please see the documentation for more information.') - wait_for = get_status = get_result = get_traceback = _is_disabled + + def as_uri(self, *args, **kwargs): + return 'disabled://' + + get_state = get_status = get_result = get_traceback = _is_disabled + wait_for = get_many = _is_disabled diff --git a/celery/backends/cache.py b/celery/backends/cache.py index ac8710099..122e70f6b 100644 --- a/celery/backends/cache.py +++ b/celery/backends/cache.py @@ -45,7 +45,7 @@ def import_best_memcache(): import memcache # noqa except ImportError: raise ImproperlyConfigured(REQUIRES_BACKEND) - if PY3: + if PY3: # pragma: no cover memcache_key_t = bytes_to_str _imp[0] = (is_pylibmc, memcache, memcache_key_t) return _imp[0] @@ -73,7 +73,7 @@ def get(self, key, *args, **kwargs): def get_multi(self, keys): cache = self.cache - return dict((k, cache[k]) for k in keys if k in cache) + return {k: cache[k] for k in keys if k in cache} def set(self, key, value, *args, **kwargs): self.cache[key] = value @@ -100,11 +100,12 @@ class CacheBackend(KeyValueStoreBackend): def __init__(self, app, expires=None, backend=None, options={}, url=None, **kwargs): super(CacheBackend, self).__init__(app, **kwargs) + self.url = url - self.options = dict(self.app.conf.CELERY_CACHE_BACKEND_OPTIONS, + self.options = dict(self.app.conf.cache_backend_options, **options) - self.backend = url or backend or self.app.conf.CELERY_CACHE_BACKEND + self.backend = url or backend or self.app.conf.cache_backend if self.backend: self.backend, _, servers = self.backend.partition('://') self.servers = servers.rstrip('/').split(';') @@ -129,7 +130,7 @@ def delete(self, key): return self.client.delete(key) def _apply_chord_incr(self, header, partial_args, group_id, body, **opts): - self.client.set(self.get_key_for_chord(group_id), '0', time=86400) + self.client.set(self.get_key_for_chord(group_id), 0, time=86400) return super(CacheBackend, self)._apply_chord_incr( header, partial_args, group_id, body, **opts ) @@ -149,3 +150,12 @@ def __reduce__(self, args=(), kwargs={}): expires=self.expires, options=self.options)) return super(CacheBackend, self).__reduce__(args, kwargs) + + def as_uri(self, *args, **kwargs): + """Return the backend as an URI. + + This properly handles the case of multiple servers. + + """ + servers = ';'.join(self.servers) + return '{0}://{1}/'.format(self.backend, servers) diff --git a/celery/backends/cassandra.py b/celery/backends/cassandra.py index 774e6b792..2bd2a78e4 100644 --- a/celery/backends/cassandra.py +++ b/celery/backends/cassandra.py @@ -3,192 +3,246 @@ celery.backends.cassandra ~~~~~~~~~~~~~~~~~~~~~~~~~ - Apache Cassandra result store backend. + Apache Cassandra result store backend using DataStax driver """ from __future__ import absolute_import +import sys try: # pragma: no cover - import pycassa - from thrift import Thrift - C = pycassa.cassandra.ttypes + import cassandra + import cassandra.auth + import cassandra.cluster except ImportError: # pragma: no cover - pycassa = None # noqa - -import socket -import time + cassandra = None # noqa from celery import states from celery.exceptions import ImproperlyConfigured -from celery.five import monotonic from celery.utils.log import get_logger -from celery.utils.timeutils import maybe_timedelta, timedelta_seconds - from .base import BaseBackend __all__ = ['CassandraBackend'] logger = get_logger(__name__) +E_NO_CASSANDRA = """ +You need to install the cassandra-driver library to +use the Cassandra backend. See https://github.com/datastax/python-driver +""" -class CassandraBackend(BaseBackend): - """Highly fault tolerant Cassandra backend. +E_NO_SUCH_CASSANDRA_AUTH_PROVIDER = """ +CASSANDRA_AUTH_PROVIDER you provided is not a valid auth_provider class. +See https://datastax.github.io/python-driver/api/cassandra/auth.html. +""" + +Q_INSERT_RESULT = """ +INSERT INTO {table} ( + task_id, status, result, date_done, traceback, children) VALUES ( + %s, %s, %s, %s, %s, %s) {expires}; +""" - .. attribute:: servers +Q_SELECT_RESULT = """ +SELECT status, result, date_done, traceback, children +FROM {table} +WHERE task_id=%s +LIMIT 1 +""" - List of Cassandra servers with format: ``hostname:port``. +Q_CREATE_RESULT_TABLE = """ +CREATE TABLE {table} ( + task_id text, + status text, + result blob, + date_done timestamp, + traceback blob, + children blob, + PRIMARY KEY ((task_id), date_done) +) WITH CLUSTERING ORDER BY (date_done DESC); +""" + +Q_EXPIRES = """ + USING TTL {0} +""" + +if sys.version_info[0] == 3: + def buf_t(x): + return bytes(x, 'utf8') +else: + buf_t = buffer # noqa + + +class CassandraBackend(BaseBackend): + """Cassandra backend utilizing DataStax driver :raises celery.exceptions.ImproperlyConfigured: if - module :mod:`pycassa` is not available. + module :mod:`cassandra` is not available. """ - servers = [] - keyspace = None - column_family = None - detailed_mode = False - _retry_timeout = 300 - _retry_wait = 3 - supports_autoexpire = True - - def __init__(self, servers=None, keyspace=None, column_family=None, - cassandra_options=None, detailed_mode=False, **kwargs): + + #: List of Cassandra servers with format: ``hostname``. + servers = None + + supports_autoexpire = True # autoexpire supported via entry_ttl + + def __init__(self, servers=None, keyspace=None, table=None, entry_ttl=None, + port=9042, **kwargs): """Initialize Cassandra backend. Raises :class:`celery.exceptions.ImproperlyConfigured` if - the :setting:`CASSANDRA_SERVERS` setting is not set. + the :setting:`cassandra_servers` setting is not set. """ super(CassandraBackend, self).__init__(**kwargs) - self.expires = kwargs.get('expires') or maybe_timedelta( - self.app.conf.CELERY_TASK_RESULT_EXPIRES) - - if not pycassa: - raise ImproperlyConfigured( - 'You need to install the pycassa library to use the ' - 'Cassandra backend. See https://github.com/pycassa/pycassa') + if not cassandra: + raise ImproperlyConfigured(E_NO_CASSANDRA) conf = self.app.conf self.servers = (servers or - conf.get('CASSANDRA_SERVERS') or - self.servers) + conf.get('cassandra_servers', None)) + self.port = (port or + conf.get('cassandra_port', None)) self.keyspace = (keyspace or - conf.get('CASSANDRA_KEYSPACE') or - self.keyspace) - self.column_family = (column_family or - conf.get('CASSANDRA_COLUMN_FAMILY') or - self.column_family) - self.cassandra_options = dict(conf.get('CASSANDRA_OPTIONS') or {}, - **cassandra_options or {}) - self.detailed_mode = (detailed_mode or - conf.get('CASSANDRA_DETAILED_MODE') or - self.detailed_mode) - read_cons = conf.get('CASSANDRA_READ_CONSISTENCY') or 'LOCAL_QUORUM' - write_cons = conf.get('CASSANDRA_WRITE_CONSISTENCY') or 'LOCAL_QUORUM' - try: - self.read_consistency = getattr(pycassa.ConsistencyLevel, - read_cons) - except AttributeError: - self.read_consistency = pycassa.ConsistencyLevel.LOCAL_QUORUM - try: - self.write_consistency = getattr(pycassa.ConsistencyLevel, - write_cons) - except AttributeError: - self.write_consistency = pycassa.ConsistencyLevel.LOCAL_QUORUM - - if not self.servers or not self.keyspace or not self.column_family: - raise ImproperlyConfigured( - 'Cassandra backend not configured.') - - self._column_family = None - - def _retry_on_error(self, fun, *args, **kwargs): - ts = monotonic() + self._retry_timeout - while 1: - try: - return fun(*args, **kwargs) - except (pycassa.InvalidRequestException, - pycassa.TimedOutException, - pycassa.UnavailableException, - pycassa.AllServersUnavailable, - socket.error, - socket.timeout, - Thrift.TException) as exc: - if monotonic() > ts: - raise - logger.warning('Cassandra error: %r. Retrying...', exc) - time.sleep(self._retry_wait) - - def _get_column_family(self): - if self._column_family is None: - conn = pycassa.ConnectionPool(self.keyspace, - server_list=self.servers, - **self.cassandra_options) - self._column_family = pycassa.ColumnFamily( - conn, self.column_family, - read_consistency_level=self.read_consistency, - write_consistency_level=self.write_consistency, - ) - return self._column_family + conf.get('cassandra_keyspace', None)) + self.table = (table or + conf.get('cassandra_table', None)) + + if not self.servers or not self.keyspace or not self.table: + raise ImproperlyConfigured('Cassandra backend not configured.') + + expires = (entry_ttl or conf.get('cassandra_entry_ttl', None)) + + self.cqlexpires = (Q_EXPIRES.format(expires) + if expires is not None else '') + + read_cons = conf.get('cassandra_read_consistency') or 'LOCAL_QUORUM' + write_cons = conf.get('cassandra_write_consistency') or 'LOCAL_QUORUM' + + self.read_consistency = getattr( + cassandra.ConsistencyLevel, read_cons, + cassandra.ConsistencyLevel.LOCAL_QUORUM, + ) + self.write_consistency = getattr( + cassandra.ConsistencyLevel, write_cons, + cassandra.ConsistencyLevel.LOCAL_QUORUM, + ) + + self.auth_provider = None + auth_provider = conf.get('cassandra_auth_provider', None) + auth_kwargs = conf.get('cassandra_auth_kwargs', None) + if auth_provider and auth_kwargs: + auth_provider_class = getattr(cassandra.auth, auth_provider, None) + if not auth_provider_class: + raise ImproperlyConfigured(E_NO_SUCH_CASSANDRA_AUTH_PROVIDER) + self.auth_provider = auth_provider_class(**auth_kwargs) + + self._connection = None + self._session = None + self._write_stmt = None + self._read_stmt = None + self._make_stmt = None def process_cleanup(self): - if self._column_family is not None: - self._column_family = None + if self._connection is not None: + self._connection.shutdown() # also shuts down _session + + self._connection = None + self._session = None + + def _get_connection(self, write=False): + """Prepare the connection for action + + :param write: bool - are we a writer? - def _store_result(self, task_id, result, status, + """ + if self._connection is None: + try: + self._connection = cassandra.cluster.Cluster( + self.servers, port=self.port, + auth_provider=self.auth_provider) + self._session = self._connection.connect(self.keyspace) + + # We are forced to do concatenation below, as formatting would + # blow up on superficial %s that will be processed by Cassandra + self._write_stmt = cassandra.query.SimpleStatement( + Q_INSERT_RESULT.format( + table=self.table, expires=self.cqlexpires), + ) + self._write_stmt.consistency_level = self.write_consistency + + self._read_stmt = cassandra.query.SimpleStatement( + Q_SELECT_RESULT.format(table=self.table), + ) + self._read_stmt.consistency_level = self.read_consistency + + if write: + # Only possible writers "workers" are allowed to issue + # CREATE TABLE. This is to prevent conflicting situations + # where both task-creator and task-executor would issue it + # at the same time. + + # Anyway; if you're doing anything critical, you should + # have created this table in advance, in which case + # this query will be a no-op (AlreadyExists) + self._make_stmt = cassandra.query.SimpleStatement( + Q_CREATE_RESULT_TABLE.format(table=self.table), + ) + self._make_stmt.consistency_level = self.write_consistency + + try: + self._session.execute(self._make_stmt) + except cassandra.AlreadyExists: + pass + + except cassandra.OperationTimedOut: + # a heavily loaded or gone Cassandra cluster failed to respond. + # leave this class in a consistent state + if self._connection is not None: + self._connection.shutdown() # also shuts down _session + + self._connection = None + self._session = None + raise # we did fail after all - reraise + + def _store_result(self, task_id, result, state, traceback=None, request=None, **kwargs): - """Store return value and status of an executed task.""" - - def _do_store(): - cf = self._get_column_family() - date_done = self.app.now() - meta = {'status': status, - 'date_done': date_done.strftime('%Y-%m-%dT%H:%M:%SZ'), - 'traceback': self.encode(traceback), - 'children': self.encode( - self.current_task_children(request), - )} - if self.detailed_mode: - meta['result'] = result - cf.insert(task_id, {date_done: self.encode(meta)}, - ttl=self.expires and timedelta_seconds(self.expires)) - else: - meta['result'] = self.encode(result) - cf.insert(task_id, meta, - ttl=self.expires and timedelta_seconds(self.expires)) - - return self._retry_on_error(_do_store) + """Store return value and state of an executed task.""" + self._get_connection(write=True) + + self._session.execute(self._write_stmt, ( + task_id, + state, + buf_t(self.encode(result)), + self.app.now(), + buf_t(self.encode(traceback)), + buf_t(self.encode(self.current_task_children(request))) + )) + + def as_uri(self, include_password=True): + return 'cassandra://' def _get_task_meta_for(self, task_id): """Get task metadata for a task by id.""" + self._get_connection() - def _do_get(): - cf = self._get_column_family() - try: - if self.detailed_mode: - row = cf.get(task_id, column_reversed=True, column_count=1) - meta = self.decode(list(row.values())[0]) - meta['task_id'] = task_id - else: - obj = cf.get(task_id) - meta = { - 'task_id': task_id, - 'status': obj['status'], - 'result': self.decode(obj['result']), - 'date_done': obj['date_done'], - 'traceback': self.decode(obj['traceback']), - 'children': self.decode(obj['children']), - } - except (KeyError, pycassa.NotFoundException): - meta = {'status': states.PENDING, 'result': None} - return meta - - return self._retry_on_error(_do_get) + res = self._session.execute(self._read_stmt, (task_id, )) + if not res: + return {'status': states.PENDING, 'result': None} + + status, result, date_done, traceback, children = res[0] + + return self.meta_from_decoded({ + 'task_id': task_id, + 'status': status, + 'result': self.decode(result), + 'date_done': date_done.strftime('%Y-%m-%dT%H:%M:%SZ'), + 'traceback': self.decode(traceback), + 'children': self.decode(children), + }) def __reduce__(self, args=(), kwargs={}): kwargs.update( dict(servers=self.servers, keyspace=self.keyspace, - column_family=self.column_family, - cassandra_options=self.cassandra_options)) + table=self.table)) return super(CassandraBackend, self).__reduce__(args, kwargs) diff --git a/celery/backends/couchbase.py b/celery/backends/couchbase.py index 2d51b8001..0f3483072 100644 --- a/celery/backends/couchbase.py +++ b/celery/backends/couchbase.py @@ -17,10 +17,10 @@ except ImportError: Couchbase = Connection = NotFoundError = None # noqa +from kombu.utils.encoding import str_t from kombu.utils.url import _parse_url from celery.exceptions import ImproperlyConfigured -from celery.utils.timeutils import maybe_timedelta from .base import KeyValueStoreBackend @@ -28,6 +28,12 @@ class CouchBaseBackend(KeyValueStoreBackend): + """CouchBase backend. + + :raises celery.exceptions.ImproperlyConfigured: if + module :mod:`couchbase` is not available. + + """ bucket = 'default' host = 'localhost' port = 8091 @@ -38,19 +44,13 @@ class CouchBaseBackend(KeyValueStoreBackend): unlock_gil = True timeout = 2.5 transcoder = None - # supports_autoexpire = False - - def __init__(self, url=None, *args, **kwargs): - """Initialize CouchBase backend instance. - :raises celery.exceptions.ImproperlyConfigured: if - module :mod:`couchbase` is not available. + # Use str as couchbase key not bytes + key_t = str_t - """ + def __init__(self, url=None, *args, **kwargs): super(CouchBaseBackend, self).__init__(*args, **kwargs) - - self.expires = kwargs.get('expires') or maybe_timedelta( - self.app.conf.CELERY_TASK_RESULT_EXPIRES) + self.url = url if Couchbase is None: raise ImproperlyConfigured( @@ -63,7 +63,7 @@ def __init__(self, url=None, *args, **kwargs): _, uhost, uport, uname, upass, ubucket, _ = _parse_url(url) ubucket = ubucket.strip('/') if ubucket else None - config = self.app.conf.get('CELERY_COUCHBASE_BACKEND_SETTINGS', None) + config = self.app.conf.get('couchbase_backend_settings', None) if config is not None: if not isinstance(config, dict): raise ImproperlyConfigured( diff --git a/celery/backends/couchdb.py b/celery/backends/couchdb.py new file mode 100644 index 000000000..32ae7826f --- /dev/null +++ b/celery/backends/couchdb.py @@ -0,0 +1,107 @@ +# -*- coding: utf-8 -*- +""" + celery.backends.couchdb + ~~~~~~~~~~~~~~~~~~~~~~~~~ + + CouchDB result store backend. + +""" +from __future__ import absolute_import + +try: + import pycouchdb +except ImportError: + pycouchdb = None # noqa + +from kombu.utils.url import _parse_url + +from celery.exceptions import ImproperlyConfigured + +from .base import KeyValueStoreBackend + +__all__ = ['CouchBackend'] + +ERR_LIB_MISSING = """\ +You need to install the pycouchdb library to use the CouchDB result backend\ +""" + + +class CouchBackend(KeyValueStoreBackend): + """CouchDB backend. + + :raises celery.exceptions.ImproperlyConfigured: if + module :mod:`pycouchdb` is not available. + + """ + container = 'default' + scheme = 'http' + host = 'localhost' + port = 5984 + username = None + password = None + + def __init__(self, url=None, *args, **kwargs): + super(CouchBackend, self).__init__(*args, **kwargs) + self.url = url + + if pycouchdb is None: + raise ImproperlyConfigured(ERR_LIB_MISSING) + + uscheme = uhost = uport = uname = upass = ucontainer = None + if url: + _, uhost, uport, uname, upass, ucontainer, _ = _parse_url(url) # noqa + ucontainer = ucontainer.strip('/') if ucontainer else None + + self.scheme = uscheme or self.scheme + self.host = uhost or self.host + self.port = int(uport or self.port) + self.container = ucontainer or self.container + self.username = uname or self.username + self.password = upass or self.password + + self._connection = None + + def _get_connection(self): + """Connect to the CouchDB server.""" + if self.username and self.password: + conn_string = '%s://%s:%s@%s:%s' % ( + self.scheme, self.username, self.password, + self.host, str(self.port)) + server = pycouchdb.Server(conn_string, authmethod='basic') + else: + conn_string = '%s://%s:%s' % ( + self.scheme, self.host, str(self.port)) + server = pycouchdb.Server(conn_string) + + try: + return server.database(self.container) + except pycouchdb.exceptions.NotFound: + return server.create(self.container) + + @property + def connection(self): + if self._connection is None: + self._connection = self._get_connection() + return self._connection + + def get(self, key): + try: + return self.connection.get(key)['value'] + except pycouchdb.exceptions.NotFound: + return None + + def set(self, key, value): + data = {'_id': key, 'value': value} + try: + self.connection.save(data) + except pycouchdb.exceptions.Conflict: + # document already exists, update it + data = self.connection.get(key) + data['value'] = value + self.connection.save(data) + + def mget(self, keys): + return [self.get(key) for key in keys] + + def delete(self, key): + self.connection.delete(key) diff --git a/celery/backends/database/__init__.py b/celery/backends/database/__init__.py index 58109e782..2a88687a0 100644 --- a/celery/backends/database/__init__.py +++ b/celery/backends/database/__init__.py @@ -8,32 +8,42 @@ """ from __future__ import absolute_import +import logging +from contextlib import contextmanager from functools import wraps from celery import states +from celery.backends.base import BaseBackend from celery.exceptions import ImproperlyConfigured from celery.five import range from celery.utils.timeutils import maybe_timedelta -from celery.backends.base import BaseBackend +from .models import Task +from .models import TaskSet +from .session import SessionManager + +try: + from sqlalchemy.exc import DatabaseError, InvalidRequestError + from sqlalchemy.orm.exc import StaleDataError +except ImportError: # pragma: no cover + raise ImproperlyConfigured( + 'The database result backend requires SQLAlchemy to be installed.' + 'See http://pypi.python.org/pypi/SQLAlchemy') -from .models import Task, TaskSet -from .session import ResultSession +logger = logging.getLogger(__name__) __all__ = ['DatabaseBackend'] -def _sqlalchemy_installed(): +@contextmanager +def session_cleanup(session): try: - import sqlalchemy - except ImportError: - raise ImproperlyConfigured( - 'The database result backend requires SQLAlchemy to be installed.' - 'See http://pypi.python.org/pypi/SQLAlchemy') - return sqlalchemy -_sqlalchemy_installed() - -from sqlalchemy.exc import DatabaseError, OperationalError + yield + except Exception: + session.rollback() + raise + finally: + session.close() def retry(fun): @@ -45,7 +55,12 @@ def _inner(*args, **kwargs): for retries in range(max_retries): try: return fun(*args, **kwargs) - except (DatabaseError, OperationalError): + except (DatabaseError, InvalidRequestError, StaleDataError): + logger.warning( + "Failed operation %s. Retrying %s more times.", + fun.__name__, max_retries - retries - 1, + exc_info=True, + ) if retries + 1 >= max_retries: raise @@ -58,135 +73,125 @@ class DatabaseBackend(BaseBackend): # to not bombard the database with queries. subpolling_interval = 0.5 - def __init__(self, dburi=None, expires=None, - engine_options=None, url=None, **kwargs): + def __init__(self, dburi=None, engine_options=None, url=None, **kwargs): # The `url` argument was added later and is used by # the app to set backend by url (celery.backends.get_backend_by_url) - super(DatabaseBackend, self).__init__(**kwargs) + super(DatabaseBackend, self).__init__( + expires_type=maybe_timedelta, + url=url, + **kwargs + ) conf = self.app.conf - self.expires = maybe_timedelta(self.prepare_expires(expires)) - self.dburi = url or dburi or conf.CELERY_RESULT_DBURI + self.url = url or dburi or conf.sqlalchemy_dburi self.engine_options = dict( engine_options or {}, - **conf.CELERY_RESULT_ENGINE_OPTIONS or {}) + **conf.sqlalchemy_engine_options or {}) self.short_lived_sessions = kwargs.get( 'short_lived_sessions', - conf.CELERY_RESULT_DB_SHORT_LIVED_SESSIONS, + conf.sqlalchemy_short_lived_sessions, ) - tablenames = conf.CELERY_RESULT_DB_TABLENAMES or {} + tablenames = conf.sqlalchemy_table_names or {} Task.__table__.name = tablenames.get('task', 'celery_taskmeta') TaskSet.__table__.name = tablenames.get('group', 'celery_tasksetmeta') - if not self.dburi: + if not self.url: raise ImproperlyConfigured( - 'Missing connection string! Do you have ' - 'CELERY_RESULT_DBURI set to a real value?') + 'Missing connection string! Do you have the' + ' sqlalchemy_dburi setting set to a real value?') - def ResultSession(self): - return ResultSession( - dburi=self.dburi, + def ResultSession(self, session_manager=SessionManager()): + return session_manager.session_factory( + dburi=self.url, short_lived_sessions=self.short_lived_sessions, **self.engine_options ) @retry - def _store_result(self, task_id, result, status, + def _store_result(self, task_id, result, state, traceback=None, max_retries=3, **kwargs): - """Store return value and status of an executed task.""" + """Store return value and state of an executed task.""" session = self.ResultSession() - try: - task = session.query(Task).filter(Task.task_id == task_id).first() + with session_cleanup(session): + task = list(session.query(Task).filter(Task.task_id == task_id)) + task = task and task[0] if not task: task = Task(task_id) session.add(task) session.flush() task.result = result - task.status = status + task.status = state task.traceback = traceback session.commit() return result - finally: - session.close() @retry def _get_task_meta_for(self, task_id): """Get task metadata for a task by id.""" session = self.ResultSession() - try: - task = session.query(Task).filter(Task.task_id == task_id).first() - if task is None: + with session_cleanup(session): + task = list(session.query(Task).filter(Task.task_id == task_id)) + task = task and task[0] + if not task: task = Task(task_id) task.status = states.PENDING task.result = None - return task.to_dict() - finally: - session.close() + return self.meta_from_decoded(task.to_dict()) @retry def _save_group(self, group_id, result): """Store the result of an executed group.""" session = self.ResultSession() - try: + with session_cleanup(session): group = TaskSet(group_id, result) session.add(group) session.flush() session.commit() return result - finally: - session.close() @retry def _restore_group(self, group_id): """Get metadata for group by id.""" session = self.ResultSession() - try: + with session_cleanup(session): group = session.query(TaskSet).filter( TaskSet.taskset_id == group_id).first() if group: return group.to_dict() - finally: - session.close() @retry def _delete_group(self, group_id): """Delete metadata for group by id.""" session = self.ResultSession() - try: + with session_cleanup(session): session.query(TaskSet).filter( TaskSet.taskset_id == group_id).delete() session.flush() session.commit() - finally: - session.close() @retry def _forget(self, task_id): """Forget about result.""" session = self.ResultSession() - try: + with session_cleanup(session): session.query(Task).filter(Task.task_id == task_id).delete() session.commit() - finally: - session.close() def cleanup(self): """Delete expired metadata.""" session = self.ResultSession() expires = self.expires now = self.app.now() - try: + with session_cleanup(session): session.query(Task).filter( Task.date_done < (now - expires)).delete() session.query(TaskSet).filter( TaskSet.date_done < (now - expires)).delete() session.commit() - finally: - session.close() def __reduce__(self, args=(), kwargs={}): kwargs.update( - dict(dburi=self.dburi, + dict(dburi=self.url, expires=self.expires, engine_options=self.engine_options)) return super(DatabaseBackend, self).__reduce__(args, kwargs) diff --git a/celery/backends/database/models.py b/celery/backends/database/models.py index 2802a007c..82bc20d8f 100644 --- a/celery/backends/database/models.py +++ b/celery/backends/database/models.py @@ -28,7 +28,7 @@ class Task(ResultModelBase): id = sa.Column(sa.Integer, sa.Sequence('task_id_sequence'), primary_key=True, autoincrement=True) - task_id = sa.Column(sa.String(255), unique=True) + task_id = sa.Column(sa.String(155), unique=True) status = sa.Column(sa.String(50), default=states.PENDING) result = sa.Column(PickleType, nullable=True) date_done = sa.Column(sa.DateTime, default=datetime.utcnow, @@ -56,7 +56,7 @@ class TaskSet(ResultModelBase): id = sa.Column(sa.Integer, sa.Sequence('taskset_id_sequence'), autoincrement=True, primary_key=True) - taskset_id = sa.Column(sa.String(255), unique=True) + taskset_id = sa.Column(sa.String(155), unique=True) result = sa.Column(PickleType, nullable=True) date_done = sa.Column(sa.DateTime, default=datetime.utcnow, nullable=True) diff --git a/celery/backends/database/session.py b/celery/backends/database/session.py index fef3843e4..451c735c6 100644 --- a/celery/backends/database/session.py +++ b/celery/backends/database/session.py @@ -8,58 +8,60 @@ """ from __future__ import absolute_import -from collections import defaultdict -from multiprocessing.util import register_after_fork - from sqlalchemy import create_engine -from sqlalchemy.orm import sessionmaker from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import sessionmaker +from sqlalchemy.pool import NullPool -ResultModelBase = declarative_base() - -_SETUP = defaultdict(lambda: False) -_ENGINES = {} -_SESSIONS = {} - -__all__ = ['ResultSession', 'get_engine', 'create_session'] - - -class _after_fork(object): - registered = False - - def __call__(self): - self.registered = False # child must reregister - for engine in list(_ENGINES.values()): - engine.dispose() - _ENGINES.clear() - _SESSIONS.clear() -after_fork = _after_fork() - - -def get_engine(dburi, **kwargs): - try: - return _ENGINES[dburi] - except KeyError: - engine = _ENGINES[dburi] = create_engine(dburi, **kwargs) - after_fork.registered = True - register_after_fork(after_fork, after_fork) - return engine - - -def create_session(dburi, short_lived_sessions=False, **kwargs): - engine = get_engine(dburi, **kwargs) - if short_lived_sessions or dburi not in _SESSIONS: - _SESSIONS[dburi] = sessionmaker(bind=engine) - return engine, _SESSIONS[dburi] - - -def setup_results(engine): - if not _SETUP['results']: - ResultModelBase.metadata.create_all(engine) - _SETUP['results'] = True +from kombu.utils import register_after_fork +ResultModelBase = declarative_base() -def ResultSession(dburi, **kwargs): - engine, session = create_session(dburi, **kwargs) - setup_results(engine) - return session() +__all__ = ['SessionManager'] + + +def _after_fork_cleanup_session(session): + session._after_fork() + + +class SessionManager(object): + + def __init__(self): + self._engines = {} + self._sessions = {} + self.forked = False + self.prepared = False + if register_after_fork is not None: + register_after_fork(self, _after_fork_cleanup_session) + + def _after_fork(self): + self.forked = True + + def get_engine(self, dburi, **kwargs): + if self.forked: + try: + return self._engines[dburi] + except KeyError: + engine = self._engines[dburi] = create_engine(dburi, **kwargs) + return engine + else: + return create_engine(dburi, poolclass=NullPool) + + def create_session(self, dburi, short_lived_sessions=False, **kwargs): + engine = self.get_engine(dburi, **kwargs) + if self.forked: + if short_lived_sessions or dburi not in self._sessions: + self._sessions[dburi] = sessionmaker(bind=engine) + return engine, self._sessions[dburi] + else: + return engine, sessionmaker(bind=engine) + + def prepare_models(self, engine): + if not self.prepared: + ResultModelBase.metadata.create_all(engine) + self.prepared = True + + def session_factory(self, dburi, **kwargs): + engine, session = self.create_session(dburi, **kwargs) + self.prepare_models(engine) + return session() diff --git a/celery/backends/elasticsearch.py b/celery/backends/elasticsearch.py new file mode 100644 index 000000000..78d1aa3e2 --- /dev/null +++ b/celery/backends/elasticsearch.py @@ -0,0 +1,122 @@ +# -* coding: utf-8 -*- +""" + celery.backends.elasticsearch + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Elasticsearch result store backend. + +""" +from __future__ import absolute_import, unicode_literals + +from datetime import datetime + +from kombu.utils.url import _parse_url + +from celery.exceptions import ImproperlyConfigured + +from .base import KeyValueStoreBackend + +try: + import elasticsearch +except ImportError: + elasticsearch = None # noqa + +__all__ = ['ElasticsearchBackend'] + +E_LIB_MISSING = """\ +You need to install the elasticsearch library to use the Elasticsearch \ +result backend.\ +""" + + +class ElasticsearchBackend(KeyValueStoreBackend): + """Elasticsearch Backend. + + :raises celery.exceptions.ImproperlyConfigured: if + module :mod:`elasticsearch` is not available. + + """ + + index = 'celery' + doc_type = 'backend' + scheme = 'http' + host = 'localhost' + port = 9200 + + def __init__(self, url=None, *args, **kwargs): + super(ElasticsearchBackend, self).__init__(*args, **kwargs) + self.url = url + + if elasticsearch is None: + raise ImproperlyConfigured(E_LIB_MISSING) + + index = doc_type = scheme = host = port = None + + if url: + scheme, host, port, _, _, path, _ = _parse_url(url) # noqa + if path: + path = path.strip('/') + index, _, doc_type = path.partition('/') + + self.index = index or self.index + self.doc_type = doc_type or self.doc_type + self.scheme = scheme or self.scheme + self.host = host or self.host + self.port = port or self.port + + self._server = None + + def get(self, key): + try: + res = self.server.get( + index=self.index, + doc_type=self.doc_type, + id=key, + ) + try: + if res['found']: + return res['_source'][key] + except (TypeError, KeyError): + pass + except elasticsearch.exceptions.NotFoundError: + pass + + def set(self, key, value): + try: + self._index( + id=key, + body={ + key: value, + '@timestamp': '{0}Z'.format( + datetime.utcnow().isoformat()[:-3] + ), + }, + ) + except elasticsearch.exceptions.ConflictError: + # document already exists, update it + data = self.get(key) + data[key] = value + self._index(key, data, refresh=True) + + def _index(self, id, body, **kwargs): + return self.server.index( + index=self.index, + doc_type=self.doc_type, + **kwargs + ) + + def mget(self, keys): + return [self.get(key) for key in keys] + + def delete(self, key): + self.server.delete(index=self.index, doc_type=self.doc_type, id=key) + + def _get_server(self): + """Connect to the Elasticsearch server.""" + return elasticsearch.Elasticsearch(self.host) + + @property + def server(self): + if self._server is None: + self._server = self._get_server() + return self._server diff --git a/celery/backends/filesystem.py b/celery/backends/filesystem.py new file mode 100644 index 000000000..e42a5eeaf --- /dev/null +++ b/celery/backends/filesystem.py @@ -0,0 +1,99 @@ +# -*- coding: utf-8 -*- +""" + celery.backends.filesystem + ~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Filesystem result store backend. +""" +from __future__ import absolute_import + +from kombu.utils.encoding import ensure_bytes + +from celery.exceptions import ImproperlyConfigured +from celery.backends.base import KeyValueStoreBackend +from celery.utils import uuid + +import os +import locale +default_encoding = locale.getpreferredencoding(False) + +# Python 2 does not have FileNotFoundError and IsADirectoryError +try: + FileNotFoundError +except NameError: + FileNotFoundError = IOError + IsADirectoryError = IOError + +E_PATH_INVALID = """\ +The configured path for the Filesystem backend does not +work correctly, please make sure that it exists and has +the correct permissions.\ +""" + + +class FilesystemBackend(KeyValueStoreBackend): + """Filesystem result backend. + + Keyword arguments (in addition to those of KeyValueStoreBackend): + + :param url: URL to the directory we should use + :param open: open function to use when opening files + :param unlink: unlink function to use when deleting files + :param sep: directory seperator (to join the directory with the key) + :param encoding: encoding used on the filesystem + + """ + + def __init__(self, url=None, open=open, unlink=os.unlink, sep=os.sep, + encoding=default_encoding, *args, **kwargs): + super(FilesystemBackend, self).__init__(*args, **kwargs) + self.url = url + path = self._find_path(url) + + # We need the path and seperator as bytes objects + self.path = path.encode(encoding) + self.sep = sep.encode(encoding) + + self.open = open + self.unlink = unlink + + # Lets verify that we have everything setup right + self._do_directory_test(b'.fs-backend-' + uuid().encode(encoding)) + + def _find_path(self, url): + if url is not None and url.startswith('file:///'): + return url[7:] + path = self.app.conf.result_fspath + if not path: + raise ImproperlyConfigured( + 'You need to configure a path for the Filesystem backend') + return path + + def _do_directory_test(self, key): + try: + self.set(key, b'test value') + assert self.get(key) == b'test value' + self.delete(key) + except IOError: + raise ImproperlyConfigured(E_PATH_INVALID) + + def _filename(self, key): + return self.sep.join((self.path, key)) + + def get(self, key): + try: + with self.open(self._filename(key), 'rb') as infile: + return infile.read() + except FileNotFoundError: + pass + + def set(self, key, value): + with self.open(self._filename(key), 'wb') as outfile: + outfile.write(ensure_bytes(value)) + + def mget(self, keys): + for key in keys: + yield self.get(key) + + def delete(self, key): + self.unlink(self._filename(key)) diff --git a/celery/backends/mongodb.py b/celery/backends/mongodb.py index c3229d51c..938b7e193 100644 --- a/celery/backends/mongodb.py +++ b/celery/backends/mongodb.py @@ -8,7 +8,16 @@ """ from __future__ import absolute_import -from datetime import datetime +from datetime import datetime, timedelta + +from kombu.utils import cached_property +from kombu.utils.url import maybe_sanitize_url +from kombu.exceptions import EncodeError +from celery import states +from celery.exceptions import ImproperlyConfigured +from celery.five import string_t, items + +from .base import BaseBackend try: import pymongo @@ -20,35 +29,32 @@ from bson.binary import Binary except ImportError: # pragma: no cover from pymongo.binary import Binary # noqa + from pymongo.errors import InvalidDocument # noqa else: # pragma: no cover Binary = None # noqa -from kombu.syn import detect_environment -from kombu.utils import cached_property - -from celery import states -from celery.exceptions import ImproperlyConfigured -from celery.five import string_t -from celery.utils.timeutils import maybe_timedelta - -from .base import BaseBackend + class InvalidDocument(Exception): # noqa + pass __all__ = ['MongoBackend'] -class Bunch(object): +class MongoBackend(BaseBackend): + """MongoDB result backend. - def __init__(self, **kw): - self.__dict__.update(kw) + :raises celery.exceptions.ImproperlyConfigured: if + module :mod:`pymongo` is not available. + """ -class MongoBackend(BaseBackend): + mongo_host = None host = 'localhost' port = 27017 user = None password = None database_name = 'celery' taskmeta_collection = 'celery_taskmeta' + groupmeta_collection = 'celery_groupmeta' max_pool_size = 10 options = None @@ -56,148 +62,179 @@ class MongoBackend(BaseBackend): _connection = None - def __init__(self, *args, **kwargs): - """Initialize MongoDB backend instance. - - :raises celery.exceptions.ImproperlyConfigured: if - module :mod:`pymongo` is not available. - - """ + def __init__(self, app=None, **kwargs): self.options = {} - super(MongoBackend, self).__init__(*args, **kwargs) - self.expires = kwargs.get('expires') or maybe_timedelta( - self.app.conf.CELERY_TASK_RESULT_EXPIRES) + + super(MongoBackend, self).__init__(app, **kwargs) if not pymongo: raise ImproperlyConfigured( 'You need to install the pymongo library to use the ' 'MongoDB backend.') - config = self.app.conf.get('CELERY_MONGODB_BACKEND_SETTINGS') + # Set option defaults + for key, value in items(self._prepare_client_options()): + self.options.setdefault(key, value) + + # update conf with mongo uri data, only if uri was given + if self.url: + if self.url == 'mongodb://': + self.url += 'localhost' + + uri_data = pymongo.uri_parser.parse_uri(self.url) + # build the hosts list to create a mongo connection + hostslist = [ + '{0}:{1}'.format(x[0], x[1]) for x in uri_data['nodelist'] + ] + self.user = uri_data['username'] + self.password = uri_data['password'] + self.mongo_host = hostslist + if uri_data['database']: + # if no database is provided in the uri, use default + self.database_name = uri_data['database'] + + self.options.update(uri_data['options']) + + # update conf with specific settings + config = self.app.conf.get('mongodb_backend_settings') if config is not None: if not isinstance(config, dict): raise ImproperlyConfigured( 'MongoDB backend settings should be grouped in a dict') config = dict(config) # do not modify original + if 'host' in config or 'port' in config: + # these should take over uri conf + self.mongo_host = None + self.host = config.pop('host', self.host) - self.port = int(config.pop('port', self.port)) + self.port = config.pop('port', self.port) + self.mongo_host = config.pop('mongo_host', self.mongo_host) self.user = config.pop('user', self.user) self.password = config.pop('password', self.password) self.database_name = config.pop('database', self.database_name) self.taskmeta_collection = config.pop( 'taskmeta_collection', self.taskmeta_collection, ) + self.groupmeta_collection = config.pop( + 'groupmeta_collection', self.groupmeta_collection, + ) - self.options = dict(config, **config.pop('options', None) or {}) - - # Set option defaults - self.options.setdefault('ssl', self.app.conf.BROKER_USE_SSL) - self.options.setdefault('max_pool_size', self.max_pool_size) - self.options.setdefault('auto_start_request', False) + self.options.update(config.pop('options', {})) + self.options.update(config) - url = kwargs.get('url') - if url: - # Specifying backend as an URL - self.host = url + def _prepare_client_options(self): + if pymongo.version_tuple >= (3,): + return {'maxPoolSize': self.max_pool_size} + else: # pragma: no cover + return {'max_pool_size': self.max_pool_size, + 'auto_start_request': False} def _get_connection(self): """Connect to the MongoDB server.""" if self._connection is None: from pymongo import MongoClient - # The first pymongo.Connection() argument (host) can be - # a list of ['host:port'] elements or a mongodb connection - # URI. If this is the case, don't use self.port - # but let pymongo get the port(s) from the URI instead. - # This enables the use of replica sets and sharding. - # See pymongo.Connection() for more info. - url = self.host - if isinstance(url, string_t) \ - and not url.startswith('mongodb://'): - url = 'mongodb://{0}:{1}'.format(url, self.port) - if url == 'mongodb://': - url = url + 'localhost' - if detect_environment() != 'default': - self.options['use_greenlets'] = True - self._connection = MongoClient(host=url, **self.options) + host = self.mongo_host + if not host: + # The first pymongo.Connection() argument (host) can be + # a list of ['host:port'] elements or a mongodb connection + # URI. If this is the case, don't use self.port + # but let pymongo get the port(s) from the URI instead. + # This enables the use of replica sets and sharding. + # See pymongo.Connection() for more info. + host = self.host + if isinstance(host, string_t) \ + and not host.startswith('mongodb://'): + host = 'mongodb://{0}:{1}'.format(host, self.port) + # don't change self.options + conf = dict(self.options) + conf['host'] = host + + self._connection = MongoClient(**conf) return self._connection - def process_cleanup(self): - if self._connection is not None: - # MongoDB connection will be closed automatically when object - # goes out of scope - del(self.collection) - del(self.database) - self._connection = None + def encode(self, data): + if self.serializer == 'bson': + # mongodb handles serialization + return data + return super(MongoBackend, self).encode(data) + + def decode(self, data): + if self.serializer == 'bson': + return data + return super(MongoBackend, self).decode(data) - def _store_result(self, task_id, result, status, + def _store_result(self, task_id, result, state, traceback=None, request=None, **kwargs): - """Store return value and status of an executed task.""" + """Store return value and state of an executed task.""" + meta = {'_id': task_id, - 'status': status, - 'result': Binary(self.encode(result)), + 'status': state, + 'result': self.encode(result), 'date_done': datetime.utcnow(), - 'traceback': Binary(self.encode(traceback)), - 'children': Binary(self.encode( + 'traceback': self.encode(traceback), + 'children': self.encode( self.current_task_children(request), - ))} - self.collection.save(meta) + )} + + try: + self.collection.save(meta) + except InvalidDocument as exc: + raise EncodeError(exc) return result def _get_task_meta_for(self, task_id): """Get task metadata for a task by id.""" - obj = self.collection.find_one({'_id': task_id}) - if not obj: - return {'status': states.PENDING, 'result': None} - - meta = { - 'task_id': obj['_id'], - 'status': obj['status'], - 'result': self.decode(obj['result']), - 'date_done': obj['date_done'], - 'traceback': self.decode(obj['traceback']), - 'children': self.decode(obj['children']), - } - - return meta + if obj: + return self.meta_from_decoded({ + 'task_id': obj['_id'], + 'status': obj['status'], + 'result': self.decode(obj['result']), + 'date_done': obj['date_done'], + 'traceback': self.decode(obj['traceback']), + 'children': self.decode(obj['children']), + }) + return {'status': states.PENDING, 'result': None} def _save_group(self, group_id, result): """Save the group result.""" + + task_ids = [i.id for i in result] + meta = {'_id': group_id, - 'result': Binary(self.encode(result)), + 'result': self.encode(task_ids), 'date_done': datetime.utcnow()} - self.collection.save(meta) + self.group_collection.save(meta) return result def _restore_group(self, group_id): """Get the result for a group by id.""" - obj = self.collection.find_one({'_id': group_id}) - if not obj: - return + obj = self.group_collection.find_one({'_id': group_id}) + if obj: + tasks = [self.app.AsyncResult(task) + for task in self.decode(obj['result'])] - meta = { - 'task_id': obj['_id'], - 'result': self.decode(obj['result']), - 'date_done': obj['date_done'], - } - - return meta + return { + 'task_id': obj['_id'], + 'result': tasks, + 'date_done': obj['date_done'], + } def _delete_group(self, group_id): """Delete a group by id.""" - self.collection.remove({'_id': group_id}) + self.group_collection.remove({'_id': group_id}) def _forget(self, task_id): - """ - Remove result from MongoDB. + """Remove result from MongoDB. + + :raises celery.exceptions.OperationsError: + if the task_id could not be removed. - :raises celery.exceptions.OperationsError: if the task_id could not be - removed. """ # By using safe=True, this will wait until it receives a response from # the server. Likewise, it will raise an OperationsError if the @@ -207,13 +244,16 @@ def _forget(self, task_id): def cleanup(self): """Delete expired metadata.""" self.collection.remove( - {'date_done': {'$lt': self.app.now() - self.expires}}, + {'date_done': {'$lt': self.app.now() - self.expires_delta}}, + ) + self.group_collection.remove( + {'date_done': {'$lt': self.app.now() - self.expires_delta}}, ) def __reduce__(self, args=(), kwargs={}): - kwargs.update( - dict(expires=self.expires)) - return super(MongoBackend, self).__reduce__(args, kwargs) + return super(MongoBackend, self).__reduce__( + args, dict(kwargs, expires=self.expires, url=self.url), + ) def _get_database(self): conn = self._get_connection() @@ -240,3 +280,34 @@ def collection(self): # in the background. Once completed cleanup will be much faster collection.ensure_index('date_done', background='true') return collection + + @cached_property + def group_collection(self): + """Get the metadata task collection.""" + collection = self.database[self.groupmeta_collection] + + # Ensure an index on date_done is there, if not process the index + # in the background. Once completed cleanup will be much faster + collection.ensure_index('date_done', background='true') + return collection + + @cached_property + def expires_delta(self): + return timedelta(seconds=self.expires) + + def as_uri(self, include_password=False): + """Return the backend as an URI. + + :keyword include_password: Censor passwords. + + """ + if not self.url: + return 'mongodb://' + if include_password: + return self.url + + if ',' not in self.url: + return maybe_sanitize_url(self.url) + + uri1, remainder = self.url.split(',', 1) + return ','.join([maybe_sanitize_url(uri1), remainder]) diff --git a/celery/backends/redis.py b/celery/backends/redis.py index 314b1d2b8..5daecd381 100644 --- a/celery/backends/redis.py +++ b/celery/backends/redis.py @@ -14,15 +14,17 @@ from kombu.utils.url import _parse_url from celery import states +from celery._state import task_join_will_block from celery.canvas import maybe_signature from celery.exceptions import ChordError, ImproperlyConfigured from celery.five import string_t -from celery.utils import deprecated_property, strtobool +from celery.utils import deprecated_property from celery.utils.functional import dictfilter from celery.utils.log import get_logger from celery.utils.timeutils import humanize_seconds -from .base import KeyValueStoreBackend +from . import async +from . import base try: import redis @@ -39,13 +41,67 @@ You need to install the redis library in order to use \ the Redis result store backend.""" +E_LOST = """\ +Connection to Redis lost: Retry (%s/%s) %s.\ +""" + logger = get_logger(__name__) error = logger.error -class RedisBackend(KeyValueStoreBackend): +class ResultConsumer(async.BaseResultConsumer): + + _pubsub = None + + def __init__(self, *args, **kwargs): + super(ResultConsumer, self).__init__(*args, **kwargs) + self._get_key_for_task = self.backend.get_key_for_task + self._decode_result = self.backend.decode_result + self.subscribed_to = set() + + def start(self, initial_task_id): + self._pubsub = self.backend.client.pubsub( + ignore_subscribe_messages=True, + ) + self._consume_from(initial_task_id) + + def on_wait_for_pending(self, result, **kwargs): + for meta in result._iter_meta(): + if meta is not None: + self.on_state_change(meta, None) + + def stop(self): + if self._pubsub is not None: + self._pubsub.close() + + def drain_events(self, timeout=None): + m = self._pubsub.get_message(timeout=timeout) + if m and m['type'] == 'message': + self.on_state_change(self._decode_result(m['data']), m) + + def consume_from(self, task_id): + if self._pubsub is None: + return self.start(task_id) + self._consume_from(task_id) + + def _consume_from(self, task_id): + key = self._get_key_for_task(task_id) + if key not in self.subscribed_to: + self.subscribed_to.add(key) + self._pubsub.subscribe(key) + + def cancel_for(self, task_id): + if self._pubsub: + key = self._get_key_for_task(task_id) + self.subscribed_to.discard(key) + self._pubsub.unsubscribe(key) + + +class RedisBackend(base.BaseKeyValueStoreBackend, async.AsyncBackendMixin): """Redis task result store.""" + ResultConsumer = ResultConsumer + #: redis-py client module. redis = redis @@ -54,55 +110,43 @@ class RedisBackend(KeyValueStoreBackend): supports_autoexpire = True supports_native_join = True - implements_incr = True def __init__(self, host=None, port=None, db=None, password=None, - expires=None, max_connections=None, url=None, - connection_pool=None, new_join=False, **kwargs): - super(RedisBackend, self).__init__(**kwargs) - conf = self.app.conf + max_connections=None, url=None, + connection_pool=None, **kwargs): + super(RedisBackend, self).__init__(expires_type=int, **kwargs) + _get = self.app.conf.get if self.redis is None: raise ImproperlyConfigured(REDIS_MISSING) - # For compatibility with the old REDIS_* configuration keys. - def _get(key): - for prefix in 'CELERY_REDIS_{0}', 'REDIS_{0}': - try: - return conf[prefix.format(key)] - except KeyError: - pass if host and '://' in host: url = host host = None self.max_connections = ( - max_connections or _get('MAX_CONNECTIONS') or self.max_connections + max_connections or + _get('redis_max_connections') or + self.max_connections ) self._ConnectionPool = connection_pool self.connparams = { - 'host': _get('HOST') or 'localhost', - 'port': _get('PORT') or 6379, - 'db': _get('DB') or 0, - 'password': _get('PASSWORD'), - 'max_connections': max_connections, + 'host': _get('redis_host') or 'localhost', + 'port': _get('redis_port') or 6379, + 'db': _get('redis_db') or 0, + 'password': _get('redis_password'), + 'socket_timeout': _get('redis_socket_timeout'), + 'max_connections': self.max_connections, } if url: self.connparams = self._params_from_url(url, self.connparams) self.url = url - self.expires = self.prepare_expires(expires, type=int) - - try: - new_join = strtobool(self.connparams.pop('new_join')) - except KeyError: - pass - if new_join: - self.apply_chord = self._new_chord_apply - self.on_chord_part_return = self._new_chord_return self.connection_errors, self.channel_errors = ( get_redis_error_classes() if get_redis_error_classes else ((), ())) + self.result_consumer = self.ResultConsumer( + self, self.app, self.accept, self._pending_results) def _params_from_url(self, url, defaults): scheme, host, port, user, password, path, query = _parse_url(url) @@ -134,6 +178,10 @@ def _params_from_url(self, url, defaults): connparams.update(query) return connparams + def on_task_call(self, producer, task_id): + if not task_join_will_block(): + self.result_consumer.consume_from(task_id) + def get(self, key): return self.client.get(key) @@ -151,8 +199,7 @@ def ensure(self, fun, args, **policy): def on_connection_error(self, max_retries, exc, intervals, retries): tts = next(intervals) - error('Connection to Redis lost: Retry (%s/%s) %s.', - retries, max_retries or 'Inf', + error(E_LOST, retries, max_retries or 'Inf', humanize_seconds(tts, 'in ')) return tts @@ -160,13 +207,13 @@ def set(self, key, value, **retry_policy): return self.ensure(self._set, (key, value), **retry_policy) def _set(self, key, value): - pipe = self.client.pipeline() - if self.expires: - pipe.setex(key, value, self.expires) - else: - pipe.set(key, value) - pipe.publish(key, value) - pipe.execute() + with self.client.pipeline() as pipe: + if self.expires: + pipe.setex(key, self.expires, value) + else: + pipe.set(key, value) + pipe.publish(key, value) + pipe.execute() def delete(self, key): self.client.delete(key) @@ -177,66 +224,86 @@ def incr(self, key): def expire(self, key, value): return self.client.expire(key, value) + def add_to_chord(self, group_id, result): + self.client.incr(self.get_key_for_group(group_id, '.t'), 1) + def _unpack_chord_result(self, tup, decode, + EXCEPTION_STATES=states.EXCEPTION_STATES, PROPAGATE_STATES=states.PROPAGATE_STATES): _, tid, state, retval = decode(tup) + if state in EXCEPTION_STATES: + retval = self.exception_to_python(retval) if state in PROPAGATE_STATES: raise ChordError('Dependency {0} raised {1!r}'.format(tid, retval)) return retval - def _new_chord_apply(self, header, partial_args, group_id, body, - result=None, **options): + def apply_chord(self, header, partial_args, group_id, body, + result=None, options={}, **kwargs): # avoids saving the group in the redis db. - return header(*partial_args, task_id=group_id) + options['task_id'] = group_id + return header(*partial_args, **options or {}) - def _new_chord_return(self, task, state, result, propagate=None, - PROPAGATE_STATES=states.PROPAGATE_STATES): + def on_chord_part_return(self, request, state, result, propagate=None): app = self.app - if propagate is None: - propagate = self.app.conf.CELERY_CHORD_PROPAGATES - request = task.request tid, gid = request.id, request.group if not gid or not tid: return client = self.client jkey = self.get_key_for_group(gid, '.j') + tkey = self.get_key_for_group(gid, '.t') result = self.encode_result(result, state) - _, readycount, _ = client.pipeline() \ - .rpush(jkey, self.encode([1, tid, state, result])) \ - .llen(jkey) \ - .expire(jkey, 86400) \ - .execute() + with client.pipeline() as pipe: + _, readycount, totaldiff, _, _ = pipe \ + .rpush(jkey, self.encode([1, tid, state, result])) \ + .llen(jkey) \ + .get(tkey) \ + .expire(jkey, 86400) \ + .expire(tkey, 86400) \ + .execute() + + totaldiff = int(totaldiff or 0) try: callback = maybe_signature(request.chord, app=app) - total = callback['chord_size'] - if readycount >= total: + total = callback['chord_size'] + totaldiff + if readycount == total: decode, unpack = self.decode, self._unpack_chord_result - resl, _ = client.pipeline() \ - .lrange(jkey, 0, total) \ - .delete(jkey) \ - .execute() + with client.pipeline() as pipe: + resl, _, _ = pipe \ + .lrange(jkey, 0, total) \ + .delete(jkey) \ + .delete(tkey) \ + .execute() try: callback.delay([unpack(tup, decode) for tup in resl]) except Exception as exc: error('Chord callback for %r raised: %r', request.group, exc, exc_info=1) - app._tasks[callback.task].backend.fail_from_current_stack( - callback.id, - exc=ChordError('Callback error: {0!r}'.format(exc)), + return self.chord_error_from_stack( + callback, + ChordError('Callback error: {0!r}'.format(exc)), ) except ChordError as exc: error('Chord %r raised: %r', request.group, exc, exc_info=1) - app._tasks[callback.task].backend.fail_from_current_stack( - callback.id, exc=exc, - ) + return self.chord_error_from_stack(callback, exc) except Exception as exc: error('Chord %r raised: %r', request.group, exc, exc_info=1) - app._tasks[callback.task].backend.fail_from_current_stack( - callback.id, exc=ChordError('Join error: {0!r}'.format(exc)), + return self.chord_error_from_stack( + callback, + ChordError('Join error: {0!r}'.format(exc)), ) + def _create_client(self, socket_timeout=None, socket_connect_timeout=None, + **params): + return self.redis.StrictRedis( + connection_pool=self.ConnectionPool( + socket_timeout=socket_timeout and float(socket_timeout), + socket_connect_timeout=socket_connect_timeout and float( + socket_connect_timeout), + **params), + ) + @property def ConnectionPool(self): if self._ConnectionPool is None: @@ -245,27 +312,25 @@ def ConnectionPool(self): @cached_property def client(self): - return self.redis.Redis( - connection_pool=self.ConnectionPool(**self.connparams), - ) + return self._create_client(**self.connparams) def __reduce__(self, args=(), kwargs={}): return super(RedisBackend, self).__reduce__( - (self.url, ), {'expires': self.expires}, + (self.url,), {'expires': self.expires}, ) - @deprecated_property(3.2, 3.3) + @deprecated_property(4.0, 5.0) def host(self): return self.connparams['host'] - @deprecated_property(3.2, 3.3) + @deprecated_property(4.0, 5.0) def port(self): return self.connparams['port'] - @deprecated_property(3.2, 3.3) + @deprecated_property(4.0, 5.0) def db(self): return self.connparams['db'] - @deprecated_property(3.2, 3.3) + @deprecated_property(4.0, 5.0) def password(self): return self.connparams['password'] diff --git a/celery/backends/riak.py b/celery/backends/riak.py new file mode 100644 index 000000000..de2138e3d --- /dev/null +++ b/celery/backends/riak.py @@ -0,0 +1,146 @@ +# -*- coding: utf-8 -*- +""" + celery.backends.riak + ~~~~~~~~~~~~~~~~~~~~ + + Riak result store backend. + +""" +from __future__ import absolute_import + +import sys + +try: + import riak + from riak import RiakClient + from riak.resolver import last_written_resolver +except ImportError: # pragma: no cover + riak = RiakClient = last_written_resolver = None # noqa + +from kombu.utils.url import _parse_url + +from celery.exceptions import ImproperlyConfigured + +from .base import KeyValueStoreBackend + +E_BUCKET_NAME = """\ +Riak bucket names must be composed of ASCII characters only, not: {0!r}\ +""" + +if sys.version_info[0] == 3: + + def to_bytes(s): + return s.encode() if isinstance(s, str) else s + + def str_decode(s, encoding): + return to_bytes(s).decode(encoding) + +else: + + def str_decode(s, encoding): + return s.decode("ascii") + + +def is_ascii(s): + try: + str_decode(s, 'ascii') + except UnicodeDecodeError: + return False + return True + + +class RiakBackend(KeyValueStoreBackend): + """Riak result backend. + + :raises celery.exceptions.ImproperlyConfigured: if + module :mod:`riak` is not available. + + """ + # TODO: allow using other protocols than protobuf ? + #: default protocol used to connect to Riak, might be `http` or `pbc` + protocol = 'pbc' + + #: default Riak bucket name (`default`) + bucket_name = 'celery' + + #: default Riak server hostname (`localhost`) + host = 'localhost' + + #: default Riak server port (8087) + port = 8087 + + # supports_autoexpire = False + + def __init__(self, host=None, port=None, bucket_name=None, protocol=None, + url=None, *args, **kwargs): + super(RiakBackend, self).__init__(*args, **kwargs) + self.url = url + + if not riak: + raise ImproperlyConfigured( + 'You need to install the riak library to use the ' + 'Riak backend.') + + uhost = uport = uname = upass = ubucket = None + if url: + uprot, uhost, uport, uname, upass, ubucket, _ = _parse_url(url) + if ubucket: + ubucket = ubucket.strip('/') + + config = self.app.conf.get('riak_backend_settings', None) + if config is not None: + if not isinstance(config, dict): + raise ImproperlyConfigured( + 'Riak backend settings should be grouped in a dict') + else: + config = {} + + self.host = uhost or config.get('host', self.host) + self.port = int(uport or config.get('port', self.port)) + self.bucket_name = ubucket or config.get('bucket', self.bucket_name) + self.protocol = protocol or config.get('protocol', self.protocol) + + # riak bucket must be ascii letters or numbers only + if not is_ascii(self.bucket_name): + raise ValueError(E_BUCKET_NAME.format(self.bucket_name)) + + self._client = None + + def _get_client(self): + """Get client connection.""" + if self._client is None or not self._client.is_alive(): + self._client = RiakClient(protocol=self.protocol, + host=self.host, + pb_port=self.port) + self._client.resolver = last_written_resolver + return self._client + + def _get_bucket(self): + """Connect to our bucket.""" + if ( + self._client is None or not self._client.is_alive() or + not self._bucket + ): + self._bucket = self.client.bucket(self.bucket_name) + return self._bucket + + @property + def client(self): + return self._get_client() + + @property + def bucket(self): + return self._get_bucket() + + def get(self, key): + return self.bucket.get(key).data + + def set(self, key, value): + _key = self.bucket.new(key, data=value) + _key.store() + + def mget(self, keys): + return [self.get(key).data for key in keys] + + def delete(self, key): + self.bucket.delete(key) diff --git a/celery/backends/rpc.py b/celery/backends/rpc.py index 28d54263f..620055583 100644 --- a/celery/backends/rpc.py +++ b/celery/backends/rpc.py @@ -13,6 +13,7 @@ from kombu.utils import cached_property from celery import current_task +from celery._state import task_join_will_block from celery.backends import amqp __all__ = ['RPCBackend'] @@ -29,7 +30,8 @@ def _create_exchange(self, name, type='direct', delivery_mode=2): return Exchange(None) def on_task_call(self, producer, task_id): - maybe_declare(self.binding(producer.channel), retry=True) + if not task_join_will_block(): + maybe_declare(self.binding(producer.channel), retry=True) def _create_binding(self, task_id): return self.binding @@ -54,10 +56,16 @@ def destination_for(self, task_id, request): def on_reply_declare(self, task_id): pass + def on_result_fulfilled(self, result): + pass + + def as_uri(self, include_password=True): + return 'rpc://' + @property def binding(self): return self.Queue(self.oid, self.exchange, self.oid, - durable=False, auto_delete=False) + durable=False, auto_delete=True) @cached_property def oid(self): diff --git a/celery/beat.py b/celery/beat.py index 8205c2781..6fc500ed9 100644 --- a/celery/beat.py +++ b/celery/beat.py @@ -9,15 +9,19 @@ from __future__ import absolute_import import errno +import heapq import os import time import shelve import sys import traceback +from collections import namedtuple +from functools import total_ordering from threading import Event, Thread -from billiard import Process, ensure_multiprocessing +from billiard import ensure_multiprocessing +from billiard.context import Process from billiard.common import reset_signals from kombu.utils import cached_property, reprcall from kombu.utils.functional import maybe_evaluate @@ -34,6 +38,8 @@ __all__ = ['SchedulingError', 'ScheduleEntry', 'Scheduler', 'PersistentScheduler', 'Service', 'EmbeddedService'] +event_t = namedtuple('event_t', ('time', 'priority', 'entry')) + logger = get_logger(__name__) debug, info, error, warning = (logger.debug, logger.info, logger.error, logger.warning) @@ -45,6 +51,7 @@ class SchedulingError(Exception): """An error occured while scheduling a task.""" +@total_ordering class ScheduleEntry(object): """An entry in the scheduler. @@ -136,6 +143,17 @@ def __repr__(self): call=reprcall(self.task, self.args or (), self.kwargs or {}), ) + def __lt__(self, other): + if isinstance(other, ScheduleEntry): + # How the object is ordered doesn't really matter, as + # in the scheduler heap, the order is decided by the + # preceding members of the tuple ``(time, priority, entry)``. + # + # If all that is left to order on is the entry then it can + # just as well be random. + return id(self) < id(other) + return NotImplemented + class Scheduler(object): """Scheduler for periodic tasks. @@ -170,22 +188,23 @@ class Scheduler(object): logger = logger # compat def __init__(self, app, schedule=None, max_interval=None, - Publisher=None, lazy=False, sync_every_tasks=None, **kwargs): + Producer=None, lazy=False, sync_every_tasks=None, **kwargs): self.app = app self.data = maybe_evaluate({} if schedule is None else schedule) - self.max_interval = (max_interval - or app.conf.CELERYBEAT_MAX_LOOP_INTERVAL - or self.max_interval) + self.max_interval = (max_interval or + app.conf.beat_max_loop_interval or + self.max_interval) + self.Producer = Producer or app.amqp.Producer + self._heap = None self.sync_every_tasks = ( - app.conf.CELERYBEAT_SYNC_EVERY if sync_every_tasks is None + app.conf.beat_sync_every if sync_every_tasks is None else sync_every_tasks) - self.Publisher = Publisher or app.amqp.TaskProducer if not lazy: self.setup_schedule() def install_default_entries(self, data): entries = {} - if self.app.conf.CELERY_TASK_RESULT_EXPIRES and \ + if self.app.conf.result_expires and \ not self.app.backend.supports_autoexpire: if 'celery.backend_cleanup' not in data: entries['celery.backend_cleanup'] = { @@ -194,36 +213,63 @@ def install_default_entries(self, data): 'options': {'expires': 12 * 3600}} self.update_from_dict(entries) - def maybe_due(self, entry, publisher=None): - is_due, next_time_to_run = entry.is_due() + def apply_entry(self, entry, producer=None): + info('Scheduler: Sending due task %s (%s)', entry.name, entry.task) + try: + result = self.apply_async(entry, producer=producer, advance=False) + except Exception as exc: + error('Message Error: %s\n%s', + exc, traceback.format_stack(), exc_info=True) + else: + debug('%s sent. id->%s', entry.task, result.id) + + def adjust(self, n, drift=-0.010): + if n and n > 0: + return n + drift + return n - if is_due: - info('Scheduler: Sending due task %s (%s)', entry.name, entry.task) - try: - result = self.apply_async(entry, publisher=publisher) - except Exception as exc: - error('Message Error: %s\n%s', - exc, traceback.format_stack(), exc_info=True) - else: - debug('%s sent. id->%s', entry.task, result.id) - return next_time_to_run + def is_due(self, entry): + return entry.is_due() - def tick(self): + def tick(self, event_t=event_t, min=min, + heappop=heapq.heappop, heappush=heapq.heappush, + heapify=heapq.heapify, mktime=time.mktime): """Run a tick, that is one iteration of the scheduler. - Executes all due tasks. + Executes one due task per call. + Returns preferred delay in seconds for next call. """ - remaining_times = [] - try: - for entry in values(self.schedule): - next_time_to_run = self.maybe_due(entry, self.publisher) - if next_time_to_run: - remaining_times.append(next_time_to_run) - except RuntimeError: - pass - return min(remaining_times + [self.max_interval]) + def _when(entry, next_time_to_run): + return (mktime(entry.schedule.now().timetuple()) + + (adjust(next_time_to_run) or 0)) + + adjust = self.adjust + max_interval = self.max_interval + H = self._heap + if H is None: + H = self._heap = [event_t(_when(e, e.is_due()[1]) or 0, 5, e) + for e in values(self.schedule)] + heapify(H) + if not H: + return max_interval + + event = H[0] + entry = event[2] + is_due, next_time_to_run = self.is_due(entry) + if is_due: + verify = heappop(H) + if verify is event: + next_entry = self.reserve(entry) + self.apply_entry(entry, producer=self.producer) + heappush(H, event_t(_when(next_entry, next_time_to_run), + event[1], next_entry)) + return 0 + else: + heappush(H, verify) + return min(verify[0], max_interval) + return min(adjust(next_time_to_run) or max_interval, max_interval) def should_sync(self): return ( @@ -237,22 +283,22 @@ def reserve(self, entry): new_entry = self.schedule[entry.name] = next(entry) return new_entry - def apply_async(self, entry, publisher=None, **kwargs): + def apply_async(self, entry, producer=None, advance=True, **kwargs): # Update timestamps and run counts before we actually execute, # so we have that done if an exception is raised (doesn't schedule # forever.) - entry = self.reserve(entry) + entry = self.reserve(entry) if advance else entry task = self.app.tasks.get(entry.task) try: if task: - result = task.apply_async(entry.args, entry.kwargs, - publisher=publisher, - **entry.options) - else: - result = self.send_task(entry.task, entry.args, entry.kwargs, - publisher=publisher, + return task.apply_async(entry.args, entry.kwargs, + producer=producer, **entry.options) + else: + return self.send_task(entry.task, entry.args, entry.kwargs, + producer=producer, + **entry.options) except Exception as exc: reraise(SchedulingError, SchedulingError( "Couldn't apply scheduled task {0.name}: {exc}".format( @@ -261,7 +307,6 @@ def apply_async(self, entry, publisher=None, **kwargs): self._tasks_since_sync += 1 if self.should_sync(): self._do_sync() - return result def send_task(self, *args, **kwargs): return self.app.send_task(*args, **kwargs) @@ -295,9 +340,10 @@ def _maybe_entry(self, name, entry): return self.Entry(**dict(entry, name=name, app=self.app)) def update_from_dict(self, dict_): - self.schedule.update(dict( - (name, self._maybe_entry(name, entry)) - for name, entry in items(dict_))) + self.schedule.update({ + name: self._maybe_entry(name, entry) + for name, entry in items(dict_) + }) def merge_inplace(self, b): schedule = self.schedule @@ -323,7 +369,7 @@ def _error_handler(exc, interval): 'Trying again in %s seconds...', exc, interval) return self.connection.ensure_connection( - _error_handler, self.app.conf.BROKER_CONNECTION_MAX_RETRIES + _error_handler, self.app.conf.broker_connection_max_retries ) def get_schedule(self): @@ -335,11 +381,11 @@ def set_schedule(self, schedule): @cached_property def connection(self): - return self.app.connection() + return self.app.connection_for_write() @cached_property - def publisher(self): - return self.Publisher(self._ensure_connected()) + def producer(self): + return self.Producer(self._ensure_connected()) @property def info(self): @@ -361,22 +407,37 @@ def _remove_db(self): with platforms.ignore_errno(errno.ENOENT): os.remove(self.schedule_filename + suffix) + def _open_schedule(self): + return self.persistence.open(self.schedule_filename, writeback=True) + + def _destroy_open_corrupted_schedule(self, exc): + error('Removing corrupted schedule file %r: %r', + self.schedule_filename, exc, exc_info=True) + self._remove_db() + return self._open_schedule() + def setup_schedule(self): try: - self._store = self.persistence.open(self.schedule_filename, - writeback=True) + self._store = self._open_schedule() + # In some cases there may be different errors from a storage + # backend for corrupted files. Example - DBPageNotFoundError + # exception from bsddb. In such case the file will be + # successfully opened but the error will be raised on first key + # retrieving. + self._store.keys() except Exception as exc: - error('Removing corrupted schedule file %r: %r', - self.schedule_filename, exc, exc_info=True) - self._remove_db() - self._store = self.persistence.open(self.schedule_filename, - writeback=True) - else: + self._store = self._destroy_open_corrupted_schedule(exc) + + for _ in (1, 2): try: self._store['entries'] except KeyError: # new schedule db - self._store['entries'] = {} + try: + self._store['entries'] = {} + except KeyError as exc: + self._store = self._destroy_open_corrupted_schedule(exc) + continue else: if '__version__' not in self._store: warning('DB Reset: Account for new __version__ field') @@ -387,13 +448,14 @@ def setup_schedule(self): elif 'utc_enabled' not in self._store: warning('DB Reset: Account for new utc_enabled field') self._store.clear() # remove schedule at 3.0.9 upgrade + break - tz = self.app.conf.CELERY_TIMEZONE + tz = self.app.conf.timezone stored_tz = self._store.get('tz') if stored_tz is not None and stored_tz != tz: warning('Reset: Timezone changed from %r to %r', stored_tz, tz) self._store.clear() # Timezone changed, reset db! - utc = self.app.conf.CELERY_ENABLE_UTC + utc = self.app.conf.enable_utc stored_utc = self._store.get('utc_enabled') if stored_utc is not None and stored_utc != utc: choices = {True: 'enabled', False: 'disabled'} @@ -401,7 +463,7 @@ def setup_schedule(self): choices[stored_utc], choices[utc]) self._store.clear() # UTC setting changed, reset db! entries = self._store.setdefault('entries', {}) - self.merge_inplace(self.app.conf.CELERYBEAT_SCHEDULE) + self.merge_inplace(self.app.conf.beat_schedule) self.install_default_entries(self.schedule) self._store.update(__version__=__version__, tz=tz, utc_enabled=utc) self.sync() @@ -434,11 +496,11 @@ class Service(object): def __init__(self, app, max_interval=None, schedule_filename=None, scheduler_cls=None): self.app = app - self.max_interval = (max_interval - or app.conf.CELERYBEAT_MAX_LOOP_INTERVAL) + self.max_interval = (max_interval or + app.conf.beat_max_loop_interval) self.scheduler_cls = scheduler_cls or self.scheduler_cls self.schedule_filename = ( - schedule_filename or app.conf.CELERYBEAT_SCHEDULE_FILENAME) + schedule_filename or app.conf.beat_schedule_filename) self._is_shutdown = Event() self._is_stopped = Event() @@ -460,9 +522,12 @@ def start(self, embedded_process=False): try: while not self._is_shutdown.is_set(): interval = self.scheduler.tick() - debug('beat: Waking up %s.', - humanize_seconds(interval, prefix='in ')) - time.sleep(interval) + if interval and interval > 0.0: + debug('beat: Waking up %s.', + humanize_seconds(interval, prefix='in ')) + time.sleep(interval) + if self.scheduler.should_sync(): + self.scheduler._do_sync() except (KeyboardInterrupt, SystemExit): self._is_shutdown.set() finally: @@ -494,13 +559,15 @@ def scheduler(self): class _Threaded(Thread): """Embedded task scheduler using threading.""" - def __init__(self, *args, **kwargs): + def __init__(self, app, **kwargs): super(_Threaded, self).__init__() - self.service = Service(*args, **kwargs) + self.app = app + self.service = Service(app, **kwargs) self.daemon = True self.name = 'Beat' def run(self): + self.app.set_current() self.service.start() def stop(self): @@ -514,9 +581,10 @@ def stop(self): else: class _Process(Process): # noqa - def __init__(self, *args, **kwargs): + def __init__(self, app, **kwargs): super(_Process, self).__init__() - self.service = Service(*args, **kwargs) + self.app = app + self.service = Service(app, **kwargs) self.name = 'Beat' def run(self): @@ -524,6 +592,8 @@ def run(self): platforms.close_open_fds([ sys.__stdin__, sys.__stdout__, sys.__stderr__, ] + list(iter_open_logger_fds())) + self.app.set_default() + self.app.set_current() self.service.start(embedded_process=True) def stop(self): @@ -531,7 +601,7 @@ def stop(self): self.terminate() -def EmbeddedService(*args, **kwargs): +def EmbeddedService(app, max_interval=None, **kwargs): """Return embedded clock service. :keyword thread: Run threaded instead of as a separate process. @@ -541,6 +611,5 @@ def EmbeddedService(*args, **kwargs): if kwargs.pop('thread', False) or _Process is None: # Need short max interval to be able to stop thread # in reasonable time. - kwargs.setdefault('max_interval', 1) - return _Threaded(*args, **kwargs) - return _Process(*args, **kwargs) + return _Threaded(app, max_interval=1, **kwargs) + return _Process(app, max_interval=max_interval, **kwargs) diff --git a/celery/bin/amqp.py b/celery/bin/amqp.py index 4dab1527a..40e858e25 100644 --- a/celery/bin/amqp.py +++ b/celery/bin/amqp.py @@ -182,6 +182,16 @@ class AMQShell(cmd.Cmd): 'basic.ack': Spec(('delivery_tag', int)), } + def _prepare_spec(self, conn): + # XXX Hack to fix Issue #2013 + from amqp import Connection, Message + if isinstance(conn.connection, Connection): + self.amqp['basic.publish'] = Spec(('msg', Message), + ('exchange', str), + ('routing_key', str), + ('mandatory', bool, 'no'), + ('immediate', bool, 'no')) + def __init__(self, *args, **kwargs): self.connect = kwargs.pop('connect') self.silent = kwargs.pop('silent', False) @@ -246,32 +256,34 @@ def completenames(self, text, *ignored): return [cmd for cmd in names if cmd.partition('.')[2].startswith(text)] - def dispatch(self, cmd, argline): + def dispatch(self, cmd, arglist): """Dispatch and execute the command. Lookup order is: :attr:`builtins` -> :attr:`amqp`. """ - arglist = shlex.split(safe_str(argline)) + if isinstance(arglist, string_t): + arglist = shlex.split(safe_str(arglist)) if cmd in self.builtins: return getattr(self, self.builtins[cmd])(*arglist) fun, args, formatter = self.get_amqp_api_command(cmd, arglist) return formatter(fun(*args)) - def parseline(self, line): + def parseline(self, parts): """Parse input line. :returns: tuple of three items: `(command_name, arglist, original_line)` """ - parts = line.split() if parts: - return parts[0], ' '.join(parts[1:]), line - return '', '', line + return parts[0], parts[1:], ' '.join(parts) + return '', '', '' def onecmd(self, line): """Parse line and execute command.""" + if isinstance(line, string_t): + line = shlex.split(safe_str(line)) cmd, arg, line = self.parseline(line) if not line: return self.emptyline() @@ -296,6 +308,7 @@ def respond(self, retval): def _reconnect(self): """Re-establish connection to the AMQP server.""" self.conn = self.connect(self.conn) + self._prepare_spec(self.conn) self.chan = self.conn.default_channel self.needs_reconnect = False @@ -326,7 +339,7 @@ def connect(self, conn=None): def run(self): shell = self.Shell(connect=self.connect, out=self.out) if self.args: - return shell.onecmd(' '.join(self.args)) + return shell.onecmd(self.args) try: return shell.cmdloop() except KeyboardInterrupt: diff --git a/celery/bin/base.py b/celery/bin/base.py index 9ad794665..3b729d2fb 100644 --- a/celery/bin/base.py +++ b/celery/bin/base.py @@ -56,12 +56,17 @@ .. cmdoption:: --umask - Effective umask of the process after detaching. Default is 0. + Effective umask (in octal) of the process after detaching. Inherits + the umask of the parent process by default. .. cmdoption:: --workdir Optional directory to change to after detaching. +.. cmdoption:: --executable + + Executable to use for the detached process. + """ from __future__ import absolute_import, print_function, unicode_literals @@ -74,14 +79,15 @@ from collections import defaultdict from heapq import heappush -from inspect import getargspec -from optparse import OptionParser, IndentedHelpFormatter, make_option as Option +from optparse import ( + OptionParser, OptionGroup, IndentedHelpFormatter, make_option as Option, +) from pprint import pformat from celery import VERSION_BANNER, Celery, maybe_patch_concurrency from celery import signals from celery.exceptions import CDeprecationWarning, CPendingDeprecationWarning -from celery.five import items, string, string_t +from celery.five import getfullargspec, items, string, string_t from celery.platforms import EX_FAILURE, EX_OK, EX_USAGE from celery.utils import term from celery.utils import text @@ -90,7 +96,7 @@ try: input = raw_input -except NameError: +except NameError: # pragma: no cover pass # always enable DeprecationWarnings, so our users can see them. @@ -214,7 +220,7 @@ class Command(object): enable_config_from_cmdline = False #: Default configuration namespace. - namespace = 'celery' + namespace = None #: Text to print at end of --help epilog = None @@ -276,7 +282,7 @@ def __call__(self, *args, **kwargs): return exc.status def verify_args(self, given, _index=0): - S = getargspec(self.run) + S = getfullargspec(self.run) _index = 1 if S.args and S.args[0] == 'self' else _index required = S.args[_index:-len(S.defaults) if S.defaults else None] missing = required[len(given):] @@ -323,6 +329,9 @@ def get_options(self): """Get supported command-line options.""" return self.option_list + def prepare_arguments(self, parser): + pass + def expanduser(self, value): if isinstance(value, string_t): return os.path.expanduser(value) @@ -373,9 +382,10 @@ def handle_argv(self, prog_name, argv, command=None): def prepare_args(self, options, args): if options: - options = dict((k, self.expanduser(v)) - for k, v in items(vars(options)) - if not k.startswith('_')) + options = { + k: self.expanduser(v) + for k, v in items(vars(options)) if not k.startswith('_') + } args = [self.expanduser(arg) for arg in args] self.check_args(args) return options, args @@ -407,20 +417,24 @@ def parse_options(self, prog_name, arguments, command=None): return self.parser.parse_args(arguments) def create_parser(self, prog_name, command=None): - option_list = ( - self.preload_options + - self.get_options() + - tuple(self.app.user_options['preload']) - ) - return self.prepare_parser(self.Parser( + parser = self.Parser( prog=prog_name, usage=self.usage(command), version=self.version, epilog=self.epilog, formatter=HelpFormatter(), description=self.description, - option_list=option_list, - )) + ) + parser.add_options(self.preload_options) + for typ_ in reversed(type(self).mro()): + try: + prepare_arguments = typ_.prepare_arguments + except AttributeError: + continue + prepare_arguments(self, parser) + parser.add_options(self.get_options() or ()) + parser.add_options(self.app.user_options['preload']) + return self.prepare_parser(parser) def prepare_parser(self, parser): docs = [self.parse_doc(doc) for doc in (self.doc, __doc__) if doc] @@ -499,6 +513,14 @@ def process_cmdline_config(self, argv): def parse_preload_options(self, args): return self.preparse_options(args, self.preload_options) + def add_append_opt(self, acc, opt, value): + default = opt.default or [] + + if opt.dest not in acc: + acc[opt.dest] = default + + acc[opt.dest].append(value) + def preparse_options(self, args, options): acc = {} opts = {} @@ -514,13 +536,19 @@ def preparse_options(self, args, options): key, value = arg.split('=', 1) opt = opts.get(key) if opt: - acc[opt.dest] = value + if opt.action == 'append': + self.add_append_opt(acc, opt, value) + else: + acc[opt.dest] = value else: opt = opts.get(arg) if opt and opt.takes_value(): # optparse also supports ['--opt', 'value'] # (Issue #1668) - acc[opt.dest] = args[index + 1] + if opt.action == 'append': + self.add_append_opt(acc, opt, args[index + 1]) + else: + acc[opt.dest] = args[index + 1] index += 1 elif opt and opt.action == 'store_true': acc[opt.dest] = True @@ -642,11 +670,12 @@ def no_color(self, value): self._colored.enabled = not self._no_color -def daemon_options(default_pidfile=None, default_logfile=None): - return ( - Option('-f', '--logfile', default=default_logfile), - Option('--pidfile', default=default_pidfile), - Option('--uid', default=None), - Option('--gid', default=None), - Option('--umask', default=0, type='int'), - ) +def daemon_options(parser, default_pidfile=None, default_logfile=None): + group = OptionGroup(parser, "Daemonization Options") + group.add_option('-f', '--logfile', default=default_logfile), + group.add_option('--pidfile', default=default_pidfile), + group.add_option('--uid', default=None), + group.add_option('--gid', default=None), + group.add_option('--umask', default=None), + group.add_option('--executable', default=None), + parser.add_option_group(group) diff --git a/celery/bin/beat.py b/celery/bin/beat.py index 6b5b73468..ebc1cbedc 100644 --- a/celery/bin/beat.py +++ b/celery/bin/beat.py @@ -44,7 +44,7 @@ from celery.platforms import detached, maybe_drop_privileges -from celery.bin.base import Command, Option, daemon_options +from celery.bin.base import Command, daemon_options __all__ = ['beat'] @@ -78,19 +78,15 @@ def run(self, detach=False, logfile=None, pidfile=None, uid=None, else: return beat().run() - def get_options(self): + def prepare_arguments(self, parser): c = self.app.conf - - return ( - (Option('--detach', action='store_true'), - Option('-s', '--schedule', - default=c.CELERYBEAT_SCHEDULE_FILENAME), - Option('--max-interval', type='float'), - Option('-S', '--scheduler', dest='scheduler_cls'), - Option('-l', '--loglevel', default=c.CELERYBEAT_LOG_LEVEL)) - + daemon_options(default_pidfile='celerybeat.pid') - + tuple(self.app.user_options['beat']) - ) + parser.add_option('--detach', action='store_true') + parser.add_option('-s', '--schedule', default=c.beat_schedule_filename) + parser.add_option('--max-interval', type='float') + parser.add_option('-S', '--scheduler', dest='scheduler_cls') + parser.add_option('-l', '--loglevel', default='WARN') + daemon_options(parser, default_pidfile='celerybeat.pid') + parser.add_options(self.app.user_options['beat']) def main(app=None): diff --git a/celery/bin/celery.py b/celery/bin/celery.py index 10d7c0324..2b0c74c81 100644 --- a/celery/bin/celery.py +++ b/celery/bin/celery.py @@ -8,7 +8,6 @@ """ from __future__ import absolute_import, unicode_literals -import anyjson import numbers import os import sys @@ -16,6 +15,8 @@ from functools import partial from importlib import import_module +from kombu.utils import json + from celery.five import string_t, values from celery.platforms import EX_OK, EX_FAILURE, EX_UNAVAILABLE, EX_USAGE from celery.utils import term @@ -30,6 +31,7 @@ from celery.bin.beat import beat from celery.bin.events import events from celery.bin.graph import graph +from celery.bin.logtool import logtool from celery.bin.worker import worker __all__ = ['CeleryCommand', 'main'] @@ -57,7 +59,7 @@ ] if DEBUG: # pragma: no cover command_classes.append( - ('Debug', ['graph'], 'red'), + ('Debug', ['graph', 'logtool'], 'red'), ) @@ -88,7 +90,7 @@ class multi(Command): respects_app_option = False def get_options(self): - return () + pass def run_from_argv(self, prog_name, argv, command=None): from celery.bin.multi import MultiTool @@ -115,7 +117,8 @@ def list_bindings(self, management): except NotImplementedError: raise self.Error('Your transport cannot list bindings.') - fmt = lambda q, e, r: self.out('{0:<28} {1:<28} {2}'.format(q, e, r)) + def fmt(q, e, r): + return self.out('{0:<28} {1:<28} {2}'.format(q, e, r)) fmt('Queue', 'Exchange', 'Routing Key') fmt('-' * 16, '-' * 16, '-' * 16) for b in bindings: @@ -162,12 +165,12 @@ def run(self, name, *_, **kw): # Positional args. args = kw.get('args') or () if isinstance(args, string_t): - args = anyjson.loads(args) + args = json.loads(args) # Keyword args. kwargs = kw.get('kwargs') or {} if isinstance(kwargs, string_t): - kwargs = anyjson.loads(kwargs) + kwargs = json.loads(kwargs) # Expires can be int/float. expires = kw.get('expires') or None @@ -268,7 +271,10 @@ class _RemoteControl(Command): Option('--timeout', '-t', type='float', help='Timeout in seconds (float) waiting for reply'), Option('--destination', '-d', - help='Comma separated list of destination node names.')) + help='Comma separated list of destination node names.'), + Option('--json', '-j', action='store_true', + help='Use json as output format.'), + ) def __init__(self, *args, **kwargs): self.show_body = kwargs.pop('show_body', True) @@ -331,9 +337,10 @@ def do_call_method(self, args, **kwargs): raise self.UsageError( 'Unknown {0.name} method {1}'.format(self, method)) - if self.app.connection().transport.driver_type == 'sql': + if self.app.connection_for_write().transport.driver_type == 'sql': raise self.Error('Broadcast not supported by SQL broker transport') + output_json = kwargs.get('json') destination = kwargs.get('destination') timeout = kwargs.get('timeout') or self.choices[method][0] if destination and isinstance(destination, string_t): @@ -341,12 +348,16 @@ def do_call_method(self, args, **kwargs): handler = getattr(self, method, self.call) + callback = None if output_json else self.say_remote_command_reply + replies = handler(method, *args[1:], timeout=timeout, destination=destination, - callback=self.say_remote_command_reply) + callback=callback) if not replies: raise self.Error('No nodes replied within time constraint.', status=EX_UNAVAILABLE) + if output_json: + self.out(json.dumps(replies)) return replies @@ -572,10 +583,10 @@ def run(self, force_ipython=False, force_bpython=False, 'signature': celery.signature} if not without_tasks: - self.locals.update(dict( - (task.__name__, task) for task in values(self.app.tasks) - if not task.name.startswith('celery.')), - ) + self.locals.update({ + task.__name__: task for task in values(self.app.tasks) + if not task.name.startswith('celery.') + }) if force_python: return self.invoke_fallback_shell() @@ -612,12 +623,35 @@ def invoke_fallback_shell(self): code.interact(local=self.locals) def invoke_ipython_shell(self): - try: - from IPython.terminal import embed - embed.TerminalInteractiveShell(user_ns=self.locals).mainloop() - except ImportError: # ipython < 0.11 - from IPython.Shell import IPShell - IPShell(argv=[], user_ns=self.locals).mainloop() + for ip in (self._ipython, self._ipython_pre_10, + self._ipython_terminal, self._ipython_010, + self._no_ipython): + try: + return ip() + except ImportError: + pass + + def _ipython(self): + from IPython import start_ipython + start_ipython(argv=[], user_ns=self.locals) + + def _ipython_pre_10(self): # pragma: no cover + from IPython.frontend.terminal.ipapp import TerminalIPythonApp + app = TerminalIPythonApp.instance() + app.initialize(argv=[]) + app.shell.user_ns.update(self.locals) + app.start() + + def _ipython_terminal(self): # pragma: no cover + from IPython.terminal import embed + embed.TerminalInteractiveShell(user_ns=self.locals).mainloop() + + def _ipython_010(self): # pragma: no cover + from IPython.Shell import IPShell + IPShell(argv=[], user_ns=self.locals).mainloop() + + def _no_ipython(self): # pragma: no cover + raise ImportError("no suitable ipython found") def invoke_bpython_shell(self): import bpython @@ -649,7 +683,6 @@ def run(self, *args, **kwargs): class CeleryCommand(Command): - namespace = 'celery' ext_fmt = '{self.namespace}.commands' commands = { 'amqp': amqp, @@ -661,6 +694,7 @@ class CeleryCommand(Command): 'help': help, 'inspect': inspect, 'list': list_, + 'logtool': logtool, 'migrate': migrate, 'multi': multi, 'purge': purge, @@ -729,13 +763,13 @@ def _relocate_args_from_start(self, argv, index=0): # is (maybe) a value for this option rest.extend([value, nxt]) index += 1 - except IndexError: + except IndexError: # pragma: no cover rest.append(value) break else: break index += 1 - if argv[index:]: + if argv[index:]: # pragma: no cover # if there are more arguments left then divide and swap # we assume the first argument in argv[i:] is the command # name. diff --git a/celery/bin/celeryd_detach.py b/celery/bin/celeryd_detach.py index 1db2ff041..ed3f0bf9a 100644 --- a/celery/bin/celeryd_detach.py +++ b/celery/bin/celeryd_detach.py @@ -19,9 +19,10 @@ from optparse import OptionParser, BadOptionError from celery.platforms import EX_FAILURE, detached +from celery.utils import default_nodename, node_format from celery.utils.log import get_logger -from celery.bin.base import daemon_options, Option +from celery.bin.base import daemon_options __all__ = ['detached_celeryd', 'detach'] @@ -29,24 +30,26 @@ C_FAKEFORK = os.environ.get('C_FAKEFORK') -OPTION_LIST = daemon_options(default_pidfile='celeryd.pid') + ( - Option('--fake', - default=False, action='store_true', dest='fake', - help="Don't fork (for debugging purposes)"), -) - def detach(path, argv, logfile=None, pidfile=None, uid=None, - gid=None, umask=0, working_directory=None, fake=False, app=None): + gid=None, umask=None, working_directory=None, fake=False, app=None, + executable=None, hostname=None): + hostname = default_nodename(hostname) + logfile = node_format(logfile, hostname) + pidfile = node_format(pidfile, hostname) fake = 1 if C_FAKEFORK else fake - with detached(logfile, pidfile, uid, gid, umask, working_directory, fake): + with detached(logfile, pidfile, uid, gid, umask, working_directory, fake, + after_forkers=False): try: + if executable is not None: + path = executable os.execv(path, [path] + argv) except Exception: if app is None: from celery import current_app app = current_app - app.log.setup_logging_subsystem('ERROR', logfile) + app.log.setup_logging_subsystem( + 'ERROR', logfile, hostname=hostname) logger.critical("Can't exec %r", ' '.join([path] + argv), exc_info=True) return EX_FAILURE @@ -109,40 +112,40 @@ def _process_short_opts(self, rargs, values): class detached_celeryd(object): - option_list = OPTION_LIST usage = '%prog [options] [celeryd options]' version = celery.VERSION_BANNER description = ('Detaches Celery worker nodes. See `celery worker --help` ' 'for the list of supported worker arguments.') command = sys.executable execv_path = sys.executable - if sys.version_info < (2, 7): # does not support pkg/__main__.py - execv_argv = ['-m', 'celery.__main__', 'worker'] - else: - execv_argv = ['-m', 'celery', 'worker'] + execv_argv = ['-m', 'celery', 'worker'] def __init__(self, app=None): self.app = app - def Parser(self, prog_name): - return PartialOptionParser(prog=prog_name, - option_list=self.option_list, - usage=self.usage, - description=self.description, - version=self.version) + def create_parser(self, prog_name): + p = PartialOptionParser( + prog=prog_name, + usage=self.usage, + description=self.description, + version=self.version, + ) + self.prepare_arguments(p) + return p def parse_options(self, prog_name, argv): - parser = self.Parser(prog_name) + parser = self.create_parser(prog_name) options, values = parser.parse_args(argv) if options.logfile: parser.leftovers.append('--logfile={0}'.format(options.logfile)) if options.pidfile: parser.leftovers.append('--pidfile={0}'.format(options.pidfile)) + if options.hostname: + parser.leftovers.append('--hostname={0}'.format(options.hostname)) return options, values, parser.leftovers def execute_from_commandline(self, argv=None): - if argv is None: - argv = sys.argv + argv = sys.argv if argv is None else argv config = [] seen_cargs = 0 for arg in argv: @@ -160,6 +163,16 @@ def execute_from_commandline(self, argv=None): **vars(options) )) + def prepare_arguments(self, parser): + daemon_options(parser, default_pidfile='celeryd.pid') + parser.add_option('--workdir', default=None, dest='working_directory') + parser.add_option('-n', '--hostname') + parser.add_option( + '--fake', + default=False, action='store_true', dest='fake', + help="Don't fork (for debugging purposes)", + ) + def main(app=None): detached_celeryd(app).execute_from_commandline() diff --git a/celery/bin/events.py b/celery/bin/events.py index d98750504..4fa7eeb01 100644 --- a/celery/bin/events.py +++ b/celery/bin/events.py @@ -42,7 +42,7 @@ from functools import partial from celery.platforms import detached, set_process_title, strargv -from celery.bin.base import Command, Option, daemon_options +from celery.bin.base import Command, daemon_options __all__ = ['events'] @@ -117,18 +117,16 @@ def set_process_status(self, prog, info=''): info = '{0} {1}'.format(info, strargv(sys.argv)) return set_process_title(prog, info=info) - def get_options(self): - return ( - (Option('-d', '--dump', action='store_true'), - Option('-c', '--camera'), - Option('--detach', action='store_true'), - Option('-F', '--frequency', '--freq', - type='float', default=1.0), - Option('-r', '--maxrate'), - Option('-l', '--loglevel', default='INFO')) - + daemon_options(default_pidfile='celeryev.pid') - + tuple(self.app.user_options['events']) - ) + def prepare_arguments(self, parser): + parser.add_option('-d', '--dump', action='store_true') + parser.add_option('-c', '--camera') + parser.add_option('--detach', action='store_true') + parser.add_option('-F', '--frequency', '--freq', + type='float', default=1.0) + parser.add_option('-r', '--maxrate') + parser.add_option('-l', '--loglevel', default='INFO') + daemon_options(parser, default_pidfile='celeryev.pid') + parser.add_options(self.app.user_options['events']) def main(): diff --git a/celery/bin/graph.py b/celery/bin/graph.py index 5d5847672..d441a54ca 100644 --- a/celery/bin/graph.py +++ b/celery/bin/graph.py @@ -34,7 +34,7 @@ def run(self, what=None, *args, **kwargs): def bootsteps(self, *args, **kwargs): worker = self.app.WorkController() - include = set(arg.lower() for arg in args or ['worker', 'consumer']) + include = {arg.lower() for arg in args or ['worker', 'consumer']} if 'worker' in include: graph = worker.blueprint.graph if 'consumer' in include: @@ -156,7 +156,7 @@ def maybe_abbr(l, name, max=Wmax): threads.append(reply['pool']['max-concurrency']) wlen = len(workers) - backend = args.get('backend', self.app.conf.CELERY_RESULT_BACKEND) + backend = args.get('backend', self.app.conf.result_backend) threads_for = {} workers = maybe_abbr(workers, 'Worker') if Wmax and wlen > Wmax: @@ -166,7 +166,8 @@ def maybe_abbr(l, name, max=Wmax): list(range(int(threads))), 'P', Tmax, ) - broker = Broker(args.get('broker', self.app.connection().as_uri())) + broker = Broker(args.get( + 'broker', self.app.connection_for_read().as_uri())) backend = Backend(backend) if backend else None graph = DependencyGraph(formatter=Formatter()) graph.add_arc(broker) diff --git a/celery/bin/logtool.py b/celery/bin/logtool.py new file mode 100644 index 000000000..7e1fffa94 --- /dev/null +++ b/celery/bin/logtool.py @@ -0,0 +1,171 @@ +# -*- coding: utf-8 -*- +""" + +The :program:`celery logtool` command. + +.. program:: celery logtool + +""" + +from __future__ import absolute_import, unicode_literals + +import re + +from collections import Counter +from fileinput import FileInput + +from .base import Command + +__all__ = ['logtool'] + +RE_LOG_START = re.compile('^\[\d\d\d\d\-\d\d-\d\d ') +RE_TASK_RECEIVED = re.compile('.+?\] Received') +RE_TASK_READY = re.compile('.+?\] Task') +RE_TASK_INFO = re.compile('.+?([\w\.]+)\[(.+?)\].+') +RE_TASK_RESULT = re.compile('.+?[\w\.]+\[.+?\] (.+)') + +REPORT_FORMAT = """ +Report +====== + +Task total: {task[total]} +Task errors: {task[errors]} +Task success: {task[succeeded]} +Task completed: {task[completed]} + +Tasks +===== +{task[types].format} +""" + + +class _task_counts(list): + + @property + def format(self): + return '\n'.join('{0}: {1}'.format(*i) for i in self) + + +def task_info(line): + m = RE_TASK_INFO.match(line) + return m.groups() + + +class Audit(object): + + def __init__(self, on_task_error=None, on_trace=None, on_debug=None): + self.ids = set() + self.names = {} + self.results = {} + self.ready = set() + self.task_types = Counter() + self.task_errors = 0 + self.on_task_error = on_task_error + self.on_trace = on_trace + self.on_debug = on_debug + self.prev_line = None + + def run(self, files): + for line in FileInput(files): + self.feed(line) + return self + + def task_received(self, line, task_name, task_id): + self.names[task_id] = task_name + self.ids.add(task_id) + self.task_types[task_name] += 1 + + def task_ready(self, line, task_name, task_id, result): + self.ready.add(task_id) + self.results[task_id] = result + if 'succeeded' not in result: + self.task_error(line, task_name, task_id, result) + + def task_error(self, line, task_name, task_id, result): + self.task_errors += 1 + if self.on_task_error: + self.on_task_error(line, task_name, task_id, result) + + def feed(self, line): + if RE_LOG_START.match(line): + if RE_TASK_RECEIVED.match(line): + task_name, task_id = task_info(line) + self.task_received(line, task_name, task_id) + elif RE_TASK_READY.match(line): + task_name, task_id = task_info(line) + result = RE_TASK_RESULT.match(line) + if result: + result, = result.groups() + self.task_ready(line, task_name, task_id, result) + else: + if self.on_debug: + self.on_debug(line) + self.prev_line = line + else: + if self.on_trace: + self.on_trace('\n'.join(filter(None, [self.prev_line, line]))) + self.prev_line = None + + def incomplete_tasks(self): + return self.ids ^ self.ready + + def report(self): + return { + 'task': { + 'types': _task_counts(self.task_types.most_common()), + 'total': len(self.ids), + 'errors': self.task_errors, + 'completed': len(self.ready), + 'succeeded': len(self.ready) - self.task_errors, + } + } + + +class logtool(Command): + args = """ [arguments] + ..... stats [file1|- [file2 [...]]] + ..... traces [file1|- [file2 [...]]] + ..... errors [file1|- [file2 [...]]] + ..... incomplete [file1|- [file2 [...]]] + ..... debug [file1|- [file2 [...]]] + """ + + def run(self, what=None, *files, **kwargs): + map = { + 'stats': self.stats, + 'traces': self.traces, + 'errors': self.errors, + 'incomplete': self.incomplete, + 'debug': self.debug, + } + if not what: + raise self.UsageError('missing action') + elif what not in map: + raise self.Error( + 'action {0} not in {1}'.format(what, '|'.join(map)), + ) + + return map[what](files) + + def stats(self, files): + self.out(REPORT_FORMAT.format( + **Audit().run(files).report() + )) + + def traces(self, files): + Audit(on_trace=self.out).run(files) + + def errors(self, files): + Audit(on_task_error=self.say1).run(files) + + def incomplete(self, files): + audit = Audit() + audit.run(files) + for task_id in audit.incomplete_tasks(): + self.error('Did not complete: %r' % (task_id,)) + + def debug(self, files): + Audit(on_debug=self.out).run(files) + + def say1(self, line, *_): + self.out(line) diff --git a/celery/bin/multi.py b/celery/bin/multi.py index ca14c0bfe..39919c42a 100644 --- a/celery/bin/multi.py +++ b/celery/bin/multi.py @@ -6,76 +6,79 @@ Examples ======== -.. code-block:: bash +.. code-block:: console - # Single worker with explicit name and events enabled. + $ # Single worker with explicit name and events enabled. $ celery multi start Leslie -E - # Pidfiles and logfiles are stored in the current directory - # by default. Use --pidfile and --logfile argument to change - # this. The abbreviation %N will be expanded to the current - # node name. - $ celery multi start Leslie -E --pidfile=/var/run/celery/%N.pid - --logfile=/var/log/celery/%N.log + $ # Pidfiles and logfiles are stored in the current directory + $ # by default. Use --pidfile and --logfile argument to change + $ # this. The abbreviation %n will be expanded to the current + $ # node name. + $ celery multi start Leslie -E --pidfile=/var/run/celery/%n.pid + --logfile=/var/log/celery/%n%I.log - # You need to add the same arguments when you restart, - # as these are not persisted anywhere. - $ celery multi restart Leslie -E --pidfile=/var/run/celery/%N.pid - --logfile=/var/run/celery/%N.log + $ # You need to add the same arguments when you restart, + $ # as these are not persisted anywhere. + $ celery multi restart Leslie -E --pidfile=/var/run/celery/%n.pid + --logfile=/var/run/celery/%n%I.log - # To stop the node, you need to specify the same pidfile. - $ celery multi stop Leslie --pidfile=/var/run/celery/%N.pid + $ # To stop the node, you need to specify the same pidfile. + $ celery multi stop Leslie --pidfile=/var/run/celery/%n.pid - # 3 workers, with 3 processes each + $ # 3 workers, with 3 processes each $ celery multi start 3 -c 3 celery worker -n celery1@myhost -c 3 celery worker -n celery2@myhost -c 3 celery worker -n celery3@myhost -c 3 - # start 3 named workers + $ # start 3 named workers $ celery multi start image video data -c 3 celery worker -n image@myhost -c 3 celery worker -n video@myhost -c 3 celery worker -n data@myhost -c 3 - # specify custom hostname + $ # specify custom hostname $ celery multi start 2 --hostname=worker.example.com -c 3 celery worker -n celery1@worker.example.com -c 3 celery worker -n celery2@worker.example.com -c 3 - # specify fully qualified nodenames + $ # specify fully qualified nodenames $ celery multi start foo@worker.example.com bar@worker.example.com -c 3 - # Advanced example starting 10 workers in the background: - # * Three of the workers processes the images and video queue - # * Two of the workers processes the data queue with loglevel DEBUG - # * the rest processes the default' queue. + $ # fully qualified nodenames but using the current hostname + $ celery multi start foo@%h bar@%h + + $ # Advanced example starting 10 workers in the background: + $ # * Three of the workers processes the images and video queue + $ # * Two of the workers processes the data queue with loglevel DEBUG + $ # * the rest processes the default' queue. $ celery multi start 10 -l INFO -Q:1-3 images,video -Q:4,5 data -Q default -L:4,5 DEBUG - # You can show the commands necessary to start the workers with - # the 'show' command: + $ # You can show the commands necessary to start the workers with + $ # the 'show' command: $ celery multi show 10 -l INFO -Q:1-3 images,video -Q:4,5 data -Q default -L:4,5 DEBUG - # Additional options are added to each celery worker' comamnd, - # but you can also modify the options for ranges of, or specific workers + $ # Additional options are added to each celery worker' comamnd, + $ # but you can also modify the options for ranges of, or specific workers - # 3 workers: Two with 3 processes, and one with 10 processes. + $ # 3 workers: Two with 3 processes, and one with 10 processes. $ celery multi start 3 -c 3 -c:1 10 celery worker -n celery1@myhost -c 10 celery worker -n celery2@myhost -c 3 celery worker -n celery3@myhost -c 3 - # can also specify options for named workers + $ # can also specify options for named workers $ celery multi start image video data -c 3 -c:image 10 celery worker -n image@myhost -c 10 celery worker -n video@myhost -c 3 celery worker -n data@myhost -c 3 - # ranges and lists of workers in options is also allowed: - # (-c:1-3 can also be written as -c:1,2,3) + $ # ranges and lists of workers in options is also allowed: + $ # (-c:1-3 can also be written as -c:1,2,3) $ celery multi start 5 -c 3 -c:1-3 10 celery worker -n celery1@myhost -c 10 celery worker -n celery2@myhost -c 10 @@ -83,7 +86,7 @@ celery worker -n celery4@myhost -c 3 celery worker -n celery5@myhost -c 3 - # lists also works with named workers + $ # lists also works with named workers $ celery multi start foo bar baz xuzzy -c 3 -c:foo,bar,baz 10 celery worker -n foo@myhost -c 10 celery worker -n bar@myhost -c 10 @@ -97,28 +100,28 @@ import os import shlex import signal -import socket import sys -from collections import defaultdict, namedtuple +from collections import OrderedDict, defaultdict, namedtuple +from functools import partial from subprocess import Popen from time import sleep from kombu.utils import cached_property -from kombu.utils.compat import OrderedDict from kombu.utils.encoding import from_utf8 from celery import VERSION_BANNER from celery.five import items from celery.platforms import Pidfile, IS_WINDOWS -from celery.utils import term, nodesplit +from celery.utils import term +from celery.utils import gethostname, host_format, node_format, nodesplit from celery.utils.text import pluralize __all__ = ['MultiTool'] -SIGNAMES = set(sig for sig in dir(signal) - if sig.startswith('SIG') and '_' not in sig) -SIGMAP = dict((getattr(signal, name), name) for name in SIGNAMES) +SIGNAMES = {sig for sig in dir(signal) + if sig.startswith('SIG') and '_' not in sig} +SIGMAP = {getattr(signal, name): name for name in SIGNAMES} USAGE = """\ usage: {prog_name} start [worker options] @@ -139,6 +142,7 @@ * --verbose: Show more output. * --no-color: Don't display colors. """ +CELERY_EXE = 'celery' multi_args_t = namedtuple( 'multi_args_t', ('name', 'argv', 'expander', 'namespace'), @@ -149,22 +153,18 @@ def main(): sys.exit(MultiTool().execute_from_commandline(sys.argv)) -CELERY_EXE = 'celery' -if sys.version_info < (2, 7): - # pkg.__main__ first supported in Py2.7 - CELERY_EXE = 'celery.__main__' - - def celery_exe(*args): - return ' '.join((CELERY_EXE, ) + args) + return ' '.join((CELERY_EXE,) + args) class MultiTool(object): retcode = 0 # Final exit code. def __init__(self, env=None, fh=None, quiet=False, verbose=False, - no_color=False, nosplash=False): - self.fh = fh or sys.stderr + no_color=False, nosplash=False, stdout=None, stderr=None): + """fh is an old alias to stdout.""" + self.stdout = self.fh = stdout or fh or sys.stdout + self.stderr = stderr or sys.stderr self.env = env self.nosplash = nosplash self.quiet = quiet @@ -209,8 +209,11 @@ def execute_from_commandline(self, argv, cmd='celery worker'): return self.retcode - def say(self, m, newline=True): - print(m, file=self.fh, end='\n' if newline else '') + def say(self, m, newline=True, file=None): + print(m, file=file or self.stdout, end='\n' if newline else '') + + def carp(self, m, newline=True, file=None): + return self.say(m, newline, file or self.stderr) def names(self, argv, cmd): p = NamespacedOptionParser(argv) @@ -241,18 +244,19 @@ def start(self, argv, cmd): self.note('> Starting nodes...') for node in multi_args(p, cmd): self.note('\t> {0}: '.format(node.name), newline=False) - retcode = self.waitexec(node.argv) + retcode = self.waitexec(node.argv, path=p.options['--executable']) self.note(retcode and self.FAILED or self.OK) retcodes.append(retcode) self.retcode = int(any(retcodes)) def with_detacher_default_options(self, p): - _setdefaultopt(p.options, ['--pidfile', '-p'], '%N.pid') - _setdefaultopt(p.options, ['--logfile', '-f'], '%N.log') + _setdefaultopt(p.options, ['--pidfile', '-p'], '%n.pid') + _setdefaultopt(p.options, ['--logfile', '-f'], '%n%I.log') p.options.setdefault( '--cmd', '-m {0}'.format(celery_exe('worker', '--detach')), ) + _setdefaultopt(p.options, ['--executable'], sys.executable) def signal_node(self, nodename, pid, sig): try: @@ -320,7 +324,7 @@ def note_waiting(): self.note('') def getpids(self, p, cmd, callback=None): - _setdefaultopt(p.options, ['--pidfile', '-p'], '%N.pid') + _setdefaultopt(p.options, ['--pidfile', '-p'], '%n.pid') nodes = [] for node in multi_args(p, cmd): @@ -373,7 +377,7 @@ def restart(self, argv, cmd): def on_node_shutdown(nodename, argv, pid): self.note(self.colored.blue( '> Restarting node {0}: '.format(nodename)), newline=False) - retval = self.waitexec(argv) + retval = self.waitexec(argv, path=p.options['--executable']) self.note(retval and self.FAILED or self.OK) retvals.append(retval) @@ -420,7 +424,7 @@ def waitexec(self, argv, path=sys.executable): def error(self, msg=None): if msg: - self.say(msg) + self.carp(msg) self.usage() self.retcode = 1 return 1 @@ -450,66 +454,87 @@ def DOWN(self): return str(self.colored.magenta('DOWN')) +def _args_for_node(p, name, prefix, suffix, cmd, append, options): + name, nodename, expand = _get_nodename( + name, prefix, suffix, options) + + argv = ([expand(cmd)] + + [format_opt(opt, expand(value)) + for opt, value in items(p.optmerge(name, options))] + + [p.passthrough]) + if append: + argv.append(expand(append)) + return multi_args_t(nodename, argv, expand, name) + + def multi_args(p, cmd='celery worker', append='', prefix='', suffix=''): names = p.values options = dict(p.options) - passthrough = p.passthrough ranges = len(names) == 1 if ranges: try: - noderange = int(names[0]) + names, prefix = _get_ranges(names) except ValueError: pass - else: - names = [str(n) for n in range(1, noderange + 1)] - prefix = 'celery' cmd = options.pop('--cmd', cmd) append = options.pop('--append', append) hostname = options.pop('--hostname', - options.pop('-n', socket.gethostname())) + options.pop('-n', gethostname())) prefix = options.pop('--prefix', prefix) or '' suffix = options.pop('--suffix', suffix) or hostname - if suffix in ('""', "''"): - suffix = '' + suffix = '' if suffix in ('""', "''") else suffix + + _update_ns_opts(p, names) + _update_ns_ranges(p, ranges) + return (_args_for_node(p, name, prefix, suffix, cmd, append, options) + for name in names) - for ns_name, ns_opts in list(items(p.namespaces)): - if ',' in ns_name or (ranges and '-' in ns_name): - for subns in parse_ns_range(ns_name, ranges): - p.namespaces[subns].update(ns_opts) - p.namespaces.pop(ns_name) +def _get_ranges(names): + noderange = int(names[0]) + names = [str(n) for n in range(1, noderange + 1)] + prefix = 'celery' + return names, prefix + + +def _update_ns_opts(p, names): # Numbers in args always refers to the index in the list of names. # (e.g. `start foo bar baz -c:1` where 1 is foo, 2 is bar, and so on). for ns_name, ns_opts in list(items(p.namespaces)): if ns_name.isdigit(): ns_index = int(ns_name) - 1 if ns_index < 0: - raise KeyError('Indexes start at 1 got: %r' % (ns_name, )) + raise KeyError('Indexes start at 1 got: %r' % (ns_name,)) try: p.namespaces[names[ns_index]].update(ns_opts) except IndexError: - raise KeyError('No node at index %r' % (ns_name, )) + raise KeyError('No node at index %r' % (ns_name,)) + + +def _update_ns_ranges(p, ranges): + for ns_name, ns_opts in list(items(p.namespaces)): + if ',' in ns_name or (ranges and '-' in ns_name): + for subns in parse_ns_range(ns_name, ranges): + p.namespaces[subns].update(ns_opts) + p.namespaces.pop(ns_name) + - for name in names: - this_suffix = suffix +def _get_nodename(name, prefix, suffix, options): + hostname = suffix if '@' in name: - this_name = options['-n'] = name - nodename, this_suffix = nodesplit(name) - name = nodename + nodename = options['-n'] = host_format(name) + shortname, hostname = nodesplit(nodename) + name = shortname else: - nodename = '%s%s' % (prefix, name) - this_name = options['-n'] = '%s@%s' % (nodename, this_suffix) - expand = abbreviations({'%h': this_name, - '%n': name, - '%N': nodename, - '%d': this_suffix}) - argv = ([expand(cmd)] + - [format_opt(opt, expand(value)) - for opt, value in items(p.optmerge(name, options))] + - [passthrough]) - if append: - argv.append(expand(append)) - yield multi_args_t(this_name, argv, expand, name) + shortname = '%s%s' % (prefix, name) + nodename = options['-n'] = host_format( + '{0}@{1}'.format(shortname, hostname), + ) + expand = partial( + node_format, nodename=nodename, N=shortname, d=hostname, + h=nodename, i='%i', I='%I', + ) + return name, nodename, expand class NamespacedOptionParser(object): @@ -591,18 +616,6 @@ def parse_ns_range(ns, ranges=False): return ret -def abbreviations(mapping): - - def expand(S): - ret = S - if S is not None: - for short_opt, long_opt in items(mapping): - ret = ret.replace(short_opt, long_opt) - return ret - - return expand - - def findsig(args, default=signal.SIGTERM): for arg in reversed(args): if len(arg) == 2 and arg[0] == '-': diff --git a/celery/bin/worker.py b/celery/bin/worker.py index bdc564d4f..2d91f4a47 100644 --- a/celery/bin/worker.py +++ b/celery/bin/worker.py @@ -71,8 +71,8 @@ .. cmdoption:: -E, --events - Send events that can be captured by monitors like :program:`celery events`, - `celerymon`, and others. + Send task-related events that can be captured by monitors like + :program:`celery events`, `celerymon`, and others. .. cmdoption:: --without-gossip @@ -86,6 +86,10 @@ Do not send event heartbeats. +.. cmdoption:: --heartbeat-interval + + Interval in seconds at which to send worker heartbeat + .. cmdoption:: --purge Purges all waiting tasks before the daemon is started. @@ -105,6 +109,14 @@ Maximum number of tasks a pool worker can execute before it's terminated and replaced by a new worker. +.. cmdoption:: --maxmemperchild + + Maximum amount of resident memory, in KiB, that may be consumed by a + child process before it will be replaced by a new one. If a single + task causes a child process to exceed this limit, the task will be + completed and the child process will be replaced afterwards. + Default: no limit. + .. cmdoption:: --pidfile Optional file used to store the workers pid. @@ -134,8 +146,10 @@ import sys +from optparse import OptionGroup + from celery import concurrency -from celery.bin.base import Command, Option, daemon_options +from celery.bin.base import Command, daemon_options from celery.bin.celeryd_detach import detached_celeryd from celery.five import string_t from celery.platforms import maybe_drop_privileges @@ -161,7 +175,7 @@ class worker(Command): celery worker --autoscale=10,0 """ doc = __MODULE_DOC__ # parse help from this too - namespace = 'celeryd' + namespace = 'worker' enable_config_from_cmdline = True supports_args = False @@ -171,7 +185,7 @@ def run_from_argv(self, prog_name, argv=None, command=None): # parse options before detaching so errors can be handled. options, args = self.prepare_args( *self.parse_options(prog_name, argv, command)) - self.maybe_detach([command] + sys.argv[1:]) + self.maybe_detach([command] + argv) return self(*args, **options) def maybe_detach(self, argv, dopts=['-D', '--detach']): @@ -188,7 +202,7 @@ def run(self, hostname=None, pool_cls=None, app=None, uid=None, gid=None, # Pools like eventlet/gevent needs to patch libs as early # as possible. pool_cls = (concurrency.get_implementation(pool_cls) or - self.app.conf.CELERYD_POOL) + self.app.conf.worker_pool) if self.app.IS_WINDOWS and kwargs.get('beat'): self.die('-B option does not work on Windows. ' 'Please run celery beat as a separate service.') @@ -201,53 +215,116 @@ def run(self, hostname=None, pool_cls=None, app=None, uid=None, gid=None, loglevel, '|'.join( l for l in LOG_LEVELS if isinstance(l, string_t)))) - return self.app.Worker( + worker = self.app.Worker( hostname=hostname, pool_cls=pool_cls, loglevel=loglevel, logfile=logfile, # node format handled by celery.app.log.setup pidfile=self.node_format(pidfile, hostname), state_db=self.node_format(state_db, hostname), **kwargs - ).start() + ) + worker.start() + return worker.exitcode def with_pool_option(self, argv): # this command support custom pools # that may have to be loaded as early as possible. return (['-P'], ['--pool']) - def get_options(self): + def prepare_arguments(self, parser): conf = self.app.conf - return ( - Option('-c', '--concurrency', - default=conf.CELERYD_CONCURRENCY, type='int'), - Option('-P', '--pool', default=conf.CELERYD_POOL, dest='pool_cls'), - Option('--purge', '--discard', default=False, action='store_true'), - Option('-l', '--loglevel', default=conf.CELERYD_LOG_LEVEL), - Option('-n', '--hostname'), - Option('-B', '--beat', action='store_true'), - Option('-s', '--schedule', dest='schedule_filename', - default=conf.CELERYBEAT_SCHEDULE_FILENAME), - Option('--scheduler', dest='scheduler_cls'), - Option('-S', '--statedb', - default=conf.CELERYD_STATE_DB, dest='state_db'), - Option('-E', '--events', default=conf.CELERY_SEND_EVENTS, - action='store_true', dest='send_events'), - Option('--time-limit', type='float', dest='task_time_limit', - default=conf.CELERYD_TASK_TIME_LIMIT), - Option('--soft-time-limit', dest='task_soft_time_limit', - default=conf.CELERYD_TASK_SOFT_TIME_LIMIT, type='float'), - Option('--maxtasksperchild', dest='max_tasks_per_child', - default=conf.CELERYD_MAX_TASKS_PER_CHILD, type='int'), - Option('--queues', '-Q', default=[]), - Option('--exclude-queues', '-X', default=[]), - Option('--include', '-I', default=[]), - Option('--autoscale'), - Option('--autoreload', action='store_true'), - Option('--no-execv', action='store_true', default=False), - Option('--without-gossip', action='store_true', default=False), - Option('--without-mingle', action='store_true', default=False), - Option('--without-heartbeat', action='store_true', default=False), - Option('-O', dest='optimization'), - Option('-D', '--detach', action='store_true'), - ) + daemon_options() + tuple(self.app.user_options['worker']) + + wopts = OptionGroup(parser, 'Worker Options') + wopts.add_option('-n', '--hostname') + wopts.add_option('-D', '--detach', action='store_true') + wopts.add_option( + '-S', '--statedb', + default=conf.worker_state_db, dest='state_db', + ) + wopts.add_option('-l', '--loglevel', default='WARN') + wopts.add_option('-O', dest='optimization') + wopts.add_option( + '--prefetch-multiplier', + dest='prefetch_multiplier', type='int', + default=conf.worker_prefetch_multiplier, + ) + parser.add_option_group(wopts) + + topts = OptionGroup(parser, 'Pool Options') + topts.add_option( + '-c', '--concurrency', + default=conf.worker_concurrency, type='int', + ) + topts.add_option( + '-P', '--pool', + default=conf.worker_pool, dest='pool_cls', + ) + topts.add_option( + '-E', '--events', + default=conf.worker_send_task_events, + action='store_true', dest='send_events', + ) + topts.add_option( + '--time-limit', + type='float', dest='task_time_limit', + default=conf.task_time_limit, + ) + topts.add_option( + '--soft-time-limit', + dest='task_soft_time_limit', type='float', + default=conf.task_soft_time_limit, + ) + topts.add_option( + '--maxtasksperchild', + dest='max_tasks_per_child', type='int', + default=conf.worker_max_tasks_per_child, + ) + topts.add_option( + '--maxmemperchild', + dest='max_memory_per_child', type='int', + default=conf.worker_max_memory_per_child, + ) + parser.add_option_group(topts) + + qopts = OptionGroup(parser, 'Queue Options') + qopts.add_option( + '--purge', '--discard', + default=False, action='store_true', + ) + qopts.add_option('--queues', '-Q', default=[]) + qopts.add_option('--exclude-queues', '-X', default=[]) + qopts.add_option('--include', '-I', default=[]) + parser.add_option_group(qopts) + + fopts = OptionGroup(parser, 'Features') + fopts.add_option('--autoscale') + fopts.add_option('--autoreload', action='store_true') + fopts.add_option( + '--without-gossip', action='store_true', default=False, + ) + fopts.add_option( + '--without-mingle', action='store_true', default=False, + ) + fopts.add_option( + '--without-heartbeat', action='store_true', default=False, + ) + fopts.add_option('--heartbeat-interval', type='int') + parser.add_option_group(fopts) + + daemon_options(parser) + + bopts = OptionGroup(parser, 'Embedded Beat Options') + bopts.add_option('-B', '--beat', action='store_true') + bopts.add_option( + '-s', '--schedule', dest='schedule_filename', + default=conf.beat_schedule_filename, + ) + bopts.add_option('--scheduler', dest='scheduler_cls') + parser.add_option_group(bopts) + + user_options = self.app.user_options['worker'] + if user_options: + uopts = OptionGroup(parser, 'User Options') + uopts.options_list.extend(user_options) + parser.add_option_group(uopts) def main(app=None): diff --git a/celery/bootsteps.py b/celery/bootsteps.py index 9c0427fe6..85a351cf3 100644 --- a/celery/bootsteps.py +++ b/celery/bootsteps.py @@ -13,6 +13,7 @@ from kombu.common import ignore_errors from kombu.utils import symbol_by_name +from kombu.utils.encoding import bytes_to_str from .datastructures import DependencyGraph, GraphFormatter from .five import values, with_metaclass @@ -21,9 +22,10 @@ try: from greenlet import GreenletExit - IGNORE_ERRORS = (GreenletExit, ) except ImportError: # pragma: no cover IGNORE_ERRORS = () +else: + IGNORE_ERRORS = (GreenletExit,) __all__ = ['Blueprint', 'Step', 'StartStopStep', 'ConsumerStep'] @@ -33,7 +35,6 @@ TERMINATE = 0x3 logger = get_logger(__name__) -debug = logger.debug def _pre(ns, fmt): @@ -58,7 +59,8 @@ class StepFormatter(GraphFormatter): def label(self, step): return step and '{0}{1}'.format( self._get_prefix(step), - (step.label or _label(step)).encode('utf-8', 'ignore'), + bytes_to_str( + (step.label or _label(step)).encode('utf-8', 'ignore')), ) def _get_prefix(self, step): @@ -121,7 +123,7 @@ def start(self, parent): self._debug('Starting %s', step.alias) self.started = i + 1 step.start(parent) - debug('^-- substep ok') + logger.debug('^-- substep ok') def human_state(self): return self.state_to_name[self.state or 0] @@ -232,6 +234,8 @@ def _find_last(self): return next((C for C in values(self.steps) if C.last), None) def _firstpass(self, steps): + for step in values(steps): + step.requires = [symbol_by_name(dep) for dep in step.requires] stream = deque(step.requires for step in values(steps)) while stream: for node in stream.popleft(): @@ -267,7 +271,7 @@ def load_step(self, step): return step.name, step def _debug(self, msg, *args): - return debug(_pre(self, msg), *args) + return logger.debug(_pre(self, msg), *args) @property def alias(self): @@ -283,7 +287,6 @@ def __new__(cls, name, bases, attrs): attrs.update( __qualname__=qname, name=attrs.get('name') or qname, - requires=attrs.get('requires', ()), ) return super(StepType, cls).__new__(cls, name, bases, attrs) @@ -392,7 +395,7 @@ def include(self, parent): class ConsumerStep(StartStopStep): - requires = ('celery.worker.consumer:Connection', ) + requires = ('celery.worker.consumer:Connection',) consumers = None def get_consumers(self, channel): diff --git a/celery/canvas.py b/celery/canvas.py index cabc5070c..f01c12b4f 100644 --- a/celery/canvas.py +++ b/celery/canvas.py @@ -10,25 +10,36 @@ """ -from __future__ import absolute_import +from __future__ import absolute_import, unicode_literals +import sys + +from collections import MutableSequence, deque from copy import deepcopy from functools import partial as _partial, reduce from operator import itemgetter from itertools import chain as _chain -from kombu.utils import cached_property, fxrange, kwdict, reprcall, uuid +from kombu.utils import cached_property, fxrange, reprcall, uuid +from vine import barrier from celery._state import current_app +from celery.local import try_import +from celery.result import GroupResult +from celery.utils import abstract from celery.utils.functional import ( - maybe_list, is_list, regen, - chunks as _chunks, + maybe_list, is_list, _regen, regen, chunks as _chunks, ) from celery.utils.text import truncate __all__ = ['Signature', 'chain', 'xmap', 'xstarmap', 'chunks', 'group', 'chord', 'signature', 'maybe_signature'] +PY3 = sys.version_info[0] == 3 + +# json in Python 2.7 borks if dict contains byte keys. +JSON_NEEDS_UNICODE_KEYS = PY3 and not try_import('simplejson') + class _getitem_property(object): """Attribute -> dict key descriptor. @@ -91,13 +102,23 @@ def maybe_unroll_group(g): try: size = g.tasks.__length_hint__() except (AttributeError, TypeError): - pass + return g else: return list(g.tasks)[0] if size == 1 else g else: return g.tasks[0] if size == 1 else g +def task_name_from(task): + return getattr(task, 'name', task) + + +def _upgrade(fields, sig): + """Used by custom signatures in .from_dict, to keep common fields.""" + sig.update(chord_size=fields.get('chord_size')) + return sig + + class Signature(dict): """Class that wraps the arguments and execution options for a single task invocation. @@ -132,7 +153,7 @@ def register_type(cls, subclass, name=None): def from_dict(self, d, app=None): typ = d.get('subtask_type') if typ: - return self.TYPES[typ].from_dict(kwdict(d), app=app) + return self.TYPES[typ].from_dict(d, app=app) return Signature(d, app=app) def __init__(self, task=None, args=None, kwargs=None, options=None, @@ -157,7 +178,8 @@ def __init__(self, task=None, args=None, kwargs=None, options=None, kwargs=kwargs or {}, options=dict(options or {}, **ex), subtask_type=subtask_type, - immutable=immutable) + immutable=immutable, + chord_size=None) def __call__(self, *partial_args, **partial_kwargs): args, kwargs, _ = self._merge(partial_args, partial_kwargs, None) @@ -172,8 +194,8 @@ def apply(self, args=(), kwargs={}, **options): args, kwargs, options = self._merge(args, kwargs, options) return self.type.apply(args, kwargs, **options) - def _merge(self, args=(), kwargs={}, options={}): - if self.immutable: + def _merge(self, args=(), kwargs={}, options={}, force=False): + if self.immutable and not force: return (self.args, self.kwargs, dict(self.options, **options) if options else self.options) return (tuple(args) + tuple(self.args) if args else self.args, @@ -189,17 +211,23 @@ def clone(self, args=(), kwargs={}, **opts): s = Signature.from_dict({'task': self.task, 'args': tuple(args), 'kwargs': kwargs, 'options': deepcopy(opts), 'subtask_type': self.subtask_type, + 'chord_size': self.chord_size, 'immutable': self.immutable}, app=self._app) s._type = self._type return s partial = clone - def freeze(self, _id=None, group_id=None, chord=None): + def freeze(self, _id=None, group_id=None, chord=None, + root_id=None, parent_id=None): opts = self.options try: tid = opts['task_id'] except KeyError: tid = opts['task_id'] = _id or uuid() + if root_id: + opts['root_id'] = root_id + if parent_id: + opts['parent_id'] = parent_id if 'reply_to' not in opts: opts['reply_to'] = self.app.oid if group_id: @@ -228,10 +256,14 @@ def set(self, immutable=None, **options): def set_immutable(self, immutable): self.immutable = immutable - def apply_async(self, args=(), kwargs={}, **options): + def set_parent_id(self, parent_id): + self.parent_id = parent_id + + def apply_async(self, args=(), kwargs={}, route_name=None, **options): try: _apply = self._apply_async - except IndexError: # no tasks for chain, etc to find type + except IndexError: # pragma: no cover + # no tasks for chain, etc to find type return # For callbacks: extra args are prepended to the stored args. if args or kwargs or options: @@ -240,12 +272,22 @@ def apply_async(self, args=(), kwargs={}, **options): args, kwargs, options = self.args, self.kwargs, self.options return _apply(args, kwargs, **options) - def append_to_list_option(self, key, value): + def _with_list_option(self, key): items = self.options.setdefault(key, []) + if not isinstance(items, MutableSequence): + items = self.options[key] = [items] + return items + + def append_to_list_option(self, key, value): + items = self._with_list_option(key) if value not in items: items.append(value) return value + def extend_list_option(self, key, value): + items = self._with_list_option(key) + items.extend(maybe_list(value)) + def link(self, callback): return self.append_to_list_option('link', callback) @@ -260,15 +302,20 @@ def flatten_links(self): ))) def __or__(self, other): - if isinstance(other, group): + if isinstance(self, group): + if isinstance(other, group): + return group(_chain(self.tasks, other.tasks), app=self.app) + return chord(self, body=other, app=self._app) + elif isinstance(other, group): other = maybe_unroll_group(other) + if not isinstance(self, chain) and isinstance(other, chain): - return chain((self, ) + other.tasks, app=self._app) + return chain((self,) + other.tasks, app=self._app) elif isinstance(other, chain): return chain(*self.tasks + other.tasks, app=self._app) elif isinstance(other, Signature): if isinstance(self, chain): - return chain(*self.tasks + (other, ), app=self._app) + return chain(*self.tasks + (other,), app=self._app) return chain(self, other, app=self._app) return NotImplemented @@ -282,10 +329,13 @@ def __invert__(self): def __reduce__(self): # for serialization, the task type is lazily loaded, # and not stored in the dict itself. - return subtask, (dict(self), ) + return signature, (dict(self),) + + def __json__(self): + return dict(self) def reprcall(self, *args, **kwargs): - args, kwargs, _ = self._merge(args, kwargs, {}) + args, kwargs, _ = self._merge(args, kwargs, {}, force=True) return reprcall(self['task'], args, kwargs) def election(self): @@ -302,6 +352,16 @@ def election(self): def __repr__(self): return self.reprcall() + if JSON_NEEDS_UNICODE_KEYS: # pragma: no cover + def items(self): + for k, v in dict.items(self): + yield k.decode() if isinstance(k, bytes) else k, v + + @property + def name(self): + # for duck typing compatibility with Task.name + return self.task + @cached_property def type(self): return self._type or self.app.tasks[self['task']] @@ -324,16 +384,21 @@ def _apply_async(self): except KeyError: return _partial(self.app.send_task, self['task']) id = _getitem_property('options.task_id') + parent_id = _getitem_property('options.parent_id') + root_id = _getitem_property('options.root_id') task = _getitem_property('task') args = _getitem_property('args') kwargs = _getitem_property('kwargs') options = _getitem_property('options') subtask_type = _getitem_property('subtask_type') + chord_size = _getitem_property('chord_size') immutable = _getitem_property('immutable') +abstract.CallableSignature.register(Signature) @Signature.register_type class chain(Signature): + tasks = _getitem_property('kwargs.tasks') def __init__(self, *tasks, **options): tasks = (regen(tasks[0]) if len(tasks) == 1 and is_list(tasks[0]) @@ -341,27 +406,186 @@ def __init__(self, *tasks, **options): Signature.__init__( self, 'celery.chain', (), {'tasks': tasks}, **options ) - self.tasks = tasks + self._use_link = options.pop('use_link', None) self.subtask_type = 'chain' + self._frozen = None def __call__(self, *args, **kwargs): if self.tasks: return self.apply_async(args, kwargs) + def clone(self, *args, **kwargs): + s = Signature.clone(self, *args, **kwargs) + s.kwargs['tasks'] = [sig.clone() for sig in s.kwargs['tasks']] + return s + + def apply_async(self, args=(), kwargs={}, **options): + # python is best at unpacking kwargs, so .run is here to do that. + app = self.app + if app.conf.task_always_eager: + return self.apply(args, kwargs, **options) + return self.run(args, kwargs, app=app, **( + dict(self.options, **options) if options else self.options)) + + def run(self, args=(), kwargs={}, group_id=None, chord=None, + task_id=None, link=None, link_error=None, publisher=None, + producer=None, root_id=None, parent_id=None, app=None, **options): + app = app or self.app + use_link = self._use_link + if use_link is None and app.conf.task_protocol == 1: + use_link = True + args = (tuple(args) + tuple(self.args) + if args and not self.immutable else self.args) + + if self._frozen: + tasks, results = self._frozen + else: + tasks, results = self.prepare_steps( + args, self.tasks, root_id, parent_id, link_error, app, + task_id, group_id, chord, + ) + + if results: + if link: + tasks[0].extend_list_option('link', link) + first_task = tasks.pop() + first_task.apply_async( + chain=tasks if not use_link else None, **options) + return results[0] + + def freeze(self, _id=None, group_id=None, chord=None, + root_id=None, parent_id=None): + _, results = self._frozen = self.prepare_steps( + self.args, self.tasks, root_id, parent_id, None, + self.app, _id, group_id, chord, clone=False, + ) + return results[0] + + def prepare_steps(self, args, tasks, + root_id=None, parent_id=None, link_error=None, app=None, + last_task_id=None, group_id=None, chord_body=None, + clone=True, from_dict=Signature.from_dict): + app = app or self.app + # use chain message field for protocol 2 and later. + # this avoids pickle blowing the stack on the recursion + # required by linking task together in a tree structure. + # (why is pickle using recursion? or better yet why cannot python + # do tail call optimization making recursion actually useful?) + use_link = self._use_link + if use_link is None and app.conf.task_protocol == 1: + use_link = True + steps = deque(tasks) + + steps_pop = steps.pop + steps_extend = steps.extend + + prev_task = None + prev_res = prev_prev_res = None + tasks, results = [], [] + i = 0 + while steps: + task = steps_pop() + is_first_task, is_last_task = not steps, not i + + if not isinstance(task, abstract.CallableSignature): + task = from_dict(task, app=app) + if isinstance(task, group): + task = maybe_unroll_group(task) + + # first task gets partial args from chain + if clone: + task = task.clone(args) if is_first_task else task.clone() + elif is_first_task: + task.args = tuple(args) + tuple(task.args) + + if isinstance(task, chain): + # splice the chain + steps_extend(task.tasks) + continue + + if isinstance(task, group) and prev_task: + # automatically upgrade group(...) | s to chord(group, s) + # for chords we freeze by pretending it's a normal + # signature instead of a group. + tasks.pop() + results.pop() + task = chord( + task, body=prev_task, + task_id=prev_res.task_id, root_id=root_id, app=app, + ) + prev_res = prev_prev_res + if is_last_task: + # chain(task_id=id) means task id is set for the last task + # in the chain. If the chord is part of a chord/group + # then that chord/group must synchronize based on the + # last task in the chain, so we only set the group_id and + # chord callback for the last task. + res = task.freeze( + last_task_id, + root_id=root_id, group_id=group_id, chord=chord_body, + ) + else: + res = task.freeze(root_id=root_id) + + i += 1 + + if prev_task: + prev_task.set_parent_id(task.id) + + if use_link: + # link previous task to this task. + task.link(prev_task) + + if prev_res: + prev_res.parent = res + + if is_first_task and parent_id is not None: + task.set_parent_id(parent_id) + + if link_error: + for errback in maybe_list(link_error): + task.link_error(errback) + + tasks.append(task) + results.append(res) + + prev_task, prev_prev_res, prev_res = ( + task, prev_res, res, + ) + + if root_id is None and tasks: + root_id = tasks[-1].id + for task in reversed(tasks): + task.options['root_id'] = root_id + return tasks, results + + def apply(self, args=(), kwargs={}, **options): + last, fargs = None, args + for task in self.tasks: + res = task.clone(fargs).apply( + last and (last.get(),), **dict(self.options, **options)) + res.parent, last, fargs = last, res, None + return last + @classmethod def from_dict(self, d, app=None): tasks = d['kwargs']['tasks'] - if d['args'] and tasks: - # partial args passed on to first task in chain (Issue #1057). - tasks[0]['args'] = tasks[0]._merge(d['args'])[0] - return chain(*d['kwargs']['tasks'], app=app, **kwdict(d['options'])) + if tasks: + if isinstance(tasks, tuple): # aaaargh + tasks = d['kwargs']['tasks'] = list(tasks) + # First task must be signature object to get app + tasks[0] = maybe_signature(tasks[0], app=app) + return _upgrade(d, chain(*tasks, app=app, **d['options'])) @property - def type(self): - try: - return self._type or self.tasks[0].type.app.tasks['celery.chain'] - except KeyError: - return self.app.tasks['celery.chain'] + def app(self): + app = self._app + if app is None: + try: + app = self.tasks[0]._app + except (KeyError, IndexError): + pass + return app or current_app def __repr__(self): return ' | '.join(repr(t) for t in self.tasks) @@ -381,12 +605,15 @@ def apply_async(self, args=(), kwargs={}, **opts): # need to evaluate generators task, it = self._unpack_args(self.kwargs) return self.type.apply_async( - (), {'task': task, 'it': list(it)}, **opts + (), {'task': task, 'it': list(it)}, + route_name=task_name_from(self.kwargs.get('task')), **opts ) @classmethod def from_dict(cls, d, app=None): - return cls(*cls._unpack_args(d['kwargs']), app=app, **d['options']) + return _upgrade( + d, cls(*cls._unpack_args(d['kwargs']), app=app, **d['options']), + ) @Signature.register_type @@ -422,13 +649,19 @@ def __init__(self, task, it, n, **options): @classmethod def from_dict(self, d, app=None): - return chunks(*self._unpack_args(d['kwargs']), app=app, **d['options']) + return _upgrade( + d, chunks(*self._unpack_args( + d['kwargs']), app=app, **d['options']), + ) def apply_async(self, args=(), kwargs={}, **opts): - return self.group().apply_async(args, kwargs, **opts) + return self.group().apply_async( + args, kwargs, + route_name=task_name_from(self.kwargs.get('task')), **opts + ) def __call__(self, **options): - return self.group()(**options) + return self.apply_async(**options) def group(self): # need to evaluate generators @@ -442,48 +675,131 @@ def apply_chunks(cls, task, it, n, app=None): return cls(task, it, n, app=app)() -def _maybe_group(tasks): +def _maybe_group(tasks, app): + if isinstance(tasks, dict): + tasks = signature(tasks, app=app) + if isinstance(tasks, group): - tasks = list(tasks.tasks) - elif isinstance(tasks, Signature): + tasks = tasks.tasks + elif isinstance(tasks, abstract.CallableSignature): tasks = [tasks] else: - tasks = regen(tasks) + tasks = [signature(t, app=app) for t in tasks] return tasks -def _maybe_clone(tasks, app): - return [s.clone() if isinstance(s, Signature) else signature(s, app=app) - for s in tasks] - - @Signature.register_type class group(Signature): + tasks = _getitem_property('kwargs.tasks') def __init__(self, *tasks, **options): if len(tasks) == 1: - tasks = _maybe_group(tasks[0]) + tasks = tasks[0] + if isinstance(tasks, group): + tasks = tasks.tasks + if not isinstance(tasks, _regen): + tasks = regen(tasks) Signature.__init__( self, 'celery.group', (), {'tasks': tasks}, **options ) - self.tasks, self.subtask_type = tasks, 'group' + self.subtask_type = 'group' @classmethod def from_dict(self, d, app=None): - tasks = d['kwargs']['tasks'] - if d['args'] and tasks: - # partial args passed on to all tasks in the group (Issue #1057). - for task in tasks: - task['args'] = task._merge(d['args'])[0] - return group(tasks, app=app, **kwdict(d['options'])) - - def apply_async(self, args=(), kwargs=None, **options): - tasks = _maybe_clone(self.tasks, app=self._app) - if not tasks: + return _upgrade( + d, group(d['kwargs']['tasks'], app=app, **d['options']), + ) + + def __len__(self): + return len(self.tasks) + + def _prepared(self, tasks, partial_args, group_id, root_id, app, + CallableSignature=abstract.CallableSignature, + from_dict=Signature.from_dict, + isinstance=isinstance, tuple=tuple): + for task in tasks: + if isinstance(task, CallableSignature): + # local sigs are always of type Signature, and we + # clone them to make sure we do not modify the originals. + task = task.clone() + else: + # serialized sigs must be converted to Signature. + task = from_dict(task, app=app) + if isinstance(task, group): + # needs yield_from :( + unroll = task._prepared( + task.tasks, partial_args, group_id, root_id, app, + ) + for taskN, resN in unroll: + yield taskN, resN + else: + if partial_args and not task.immutable: + task.args = tuple(partial_args) + tuple(task.args) + yield task, task.freeze(group_id=group_id, root_id=root_id) + + def _apply_tasks(self, tasks, producer=None, app=None, p=None, + add_to_parent=None, chord=None, **options): + app = app or self.app + with app.producer_or_acquire(producer) as producer: + for sig, res in tasks: + sig.apply_async(producer=producer, add_to_parent=False, + chord=sig.options.get('chord') or chord, + **options) + if p: + p.add_noincr(res) + res.backend.add_pending_result(res) + yield res # <-- r.parent, etc set in the frozen result. + + def _freeze_gid(self, options): + # remove task_id and use that as the group_id, + # if we don't remove it then every task will have the same id... + options = dict(self.options, **options) + options['group_id'] = group_id = ( + options.pop('task_id', uuid())) + return options, group_id, options.get('root_id') + + def set_parent_id(self, parent_id): + for task in self.tasks: + task.set_parent_id(parent_id) + + def apply_async(self, args=(), kwargs=None, add_to_parent=True, + producer=None, **options): + app = self.app + if app.conf.task_always_eager: + return self.apply(args, kwargs, **options) + if not self.tasks: return self.freeze() - type = self.type - return type(*type.prepare(dict(self.options, **options), - tasks, args)) + + options, group_id, root_id = self._freeze_gid(options) + tasks = self._prepared(self.tasks, args, group_id, root_id, app) + p = barrier() + results = list(self._apply_tasks(tasks, producer, app, p, **options)) + result = self.app.GroupResult(group_id, results, ready_barrier=p) + p.finalize() + + # - Special case of group(A.s() | group(B.s(), C.s())) + # That is, group with single item that is a chain but the + # last task in that chain is a group. + # + # We cannot actually support arbitrary GroupResults in chains, + # but this special case we can. + if len(result) == 1 and isinstance(result[0], GroupResult): + result = result[0] + + parent_task = app.current_worker_task + if add_to_parent and parent_task: + parent_task.add_trail(result) + return result + + def apply(self, args=(), kwargs={}, **options): + app = self.app + if not self.tasks: + return self.freeze() # empty group returns GroupResult + options, group_id, root_id = self._freeze_gid(options) + tasks = self._prepared(self.tasks, args, group_id, root_id, app) + return app.GroupResult(group_id, [ + sig.apply(**options) for sig, _ in tasks + ]) def set_immutable(self, immutable): for task in self.tasks: @@ -498,15 +814,23 @@ def link_error(self, sig): sig = sig.clone().set(immutable=True) return self.tasks[0].link_error(sig) - def apply(self, *args, **kwargs): - if not self.tasks: - return self.freeze() # empty group returns GroupResult - return Signature.apply(self, *args, **kwargs) - def __call__(self, *partial_args, **options): return self.apply_async(partial_args, **options) - def freeze(self, _id=None, group_id=None, chord=None): + def _freeze_unroll(self, new_tasks, group_id, chord, root_id, parent_id): + stack = deque(self.tasks) + while stack: + task = maybe_signature(stack.popleft(), app=self._app).clone() + if isinstance(task, group): + stack.extendleft(task.tasks) + else: + new_tasks.append(task) + yield task.freeze(group_id=group_id, + chord=chord, root_id=root_id, + parent_id=parent_id) + + def freeze(self, _id=None, group_id=None, chord=None, + root_id=None, parent_id=None): opts = self.options try: gid = opts['task_id'] @@ -515,13 +839,19 @@ def freeze(self, _id=None, group_id=None, chord=None): if group_id: opts['group_id'] = group_id if chord: - opts['chord'] = group_id - new_tasks, results = [], [] - for task in self.tasks: - task = maybe_signature(task, app=self._app).clone() - results.append(task.freeze(group_id=group_id, chord=chord)) - new_tasks.append(task) - self.tasks = self.kwargs['tasks'] = new_tasks + opts['chord'] = chord + root_id = opts.setdefault('root_id', root_id) + parent_id = opts.setdefault('parent_id', parent_id) + new_tasks = [] + # Need to unroll subgroups early so that chord gets the + # right result instance for chord_unlock etc. + results = list(self._freeze_unroll( + new_tasks, group_id, chord, root_id, parent_id, + )) + if isinstance(self.tasks, MutableSequence): + self.tasks[:] = new_tasks + else: + self.tasks = new_tasks return self.app.GroupResult(gid, results) _freeze = freeze @@ -535,38 +865,54 @@ def __iter__(self): return iter(self.tasks) def __repr__(self): - return repr(self.tasks) + return 'group({0.tasks!r})'.format(self) @property - def type(self): - if self._type: - return self._type - # taking the app from the first task in the list, there may be a - # better solution for this, e.g. to consolidate tasks with the same - # app and apply them in batches. - app = self._app if self._app else self.tasks[0].type.app - return app.tasks[self['task']] + def app(self): + app = self._app + if app is None: + try: + app = self.tasks[0].app + except (KeyError, IndexError): + pass + return app if app is not None else current_app @Signature.register_type class chord(Signature): def __init__(self, header, body=None, task='celery.chord', - args=(), kwargs={}, **options): + args=(), kwargs={}, app=None, **options): Signature.__init__( self, task, args, - dict(kwargs, header=_maybe_group(header), - body=maybe_signature(body, app=self._app)), **options + dict(kwargs, header=_maybe_group(header, app), + body=maybe_signature(body, app=app)), app=app, **options ) self.subtask_type = 'chord' - def freeze(self, _id=None, group_id=None, chord=None): - return self.body.freeze(_id, group_id=group_id, chord=chord) + def freeze(self, _id=None, group_id=None, chord=None, + root_id=None, parent_id=None): + if not isinstance(self.tasks, group): + self.tasks = group(self.tasks, app=self.app) + bodyres = self.body.freeze(_id, parent_id=self.id, root_id=root_id) + self.tasks.freeze( + parent_id=parent_id, root_id=root_id, chord=self.body) + self.id = self.tasks.id + self.body.set_parent_id(self.id) + return bodyres + + def set_parent_id(self, parent_id): + tasks = self.tasks + if isinstance(tasks, group): + tasks = tasks.tasks + for task in tasks: + task.set_parent_id(parent_id) + self.parent_id = parent_id @classmethod def from_dict(self, d, app=None): - args, d['kwargs'] = self._unpack_args(**kwdict(d['kwargs'])) - return self(*args, app=app, **kwdict(d)) + args, d['kwargs'] = self._unpack_args(**d['kwargs']) + return _upgrade(d, self(*args, app=app, **d)) @staticmethod def _unpack_args(header=None, body=None, **kwargs): @@ -574,35 +920,81 @@ def _unpack_args(header=None, body=None, **kwargs): # than manually popping things off. return (header, body), kwargs - @property - def type(self): - if self._type: - return self._type - # we will be able to fix this mess in 3.2 when we no longer - # require an actual task implementation for chord/group - if self._app: - app = self._app - else: + @cached_property + def app(self): + return self._get_app(self.body) + + def _get_app(self, body=None): + app = self._app + if app is None: try: - app = self.tasks[0].type.app - except IndexError: - app = self.body.type.app - return app.tasks['celery.chord'] + tasks = self.tasks.tasks # is a group + except AttributeError: + tasks = self.tasks + app = tasks[0]._app + if app is None and body is not None: + app = body._app + return app if app is not None else current_app def apply_async(self, args=(), kwargs={}, task_id=None, producer=None, publisher=None, connection=None, router=None, result_cls=None, **options): + args = (tuple(args) + tuple(self.args) + if args and not self.immutable else self.args) body = kwargs.get('body') or self.kwargs['body'] kwargs = dict(self.kwargs, **kwargs) body = body.clone(**options) + app = self._get_app(body) + tasks = (self.tasks.clone() if isinstance(self.tasks, group) + else group(self.tasks, app=app)) + if app.conf.task_always_eager: + return self.apply((), kwargs, + body=body, task_id=task_id, **options) + return self.run(tasks, body, args, task_id=task_id, **options) + + def apply(self, args=(), kwargs={}, propagate=True, body=None, **options): + body = self.body if body is None else body + tasks = (self.tasks.clone() if isinstance(self.tasks, group) + else group(self.tasks, app=self.app)) + return body.apply( + args=(tasks.apply().get(propagate=propagate),), + ) - _chord = self.type - if _chord.app.conf.CELERY_ALWAYS_EAGER: - return self.apply((), kwargs, task_id=task_id, **options) - res = body.freeze(task_id) - parent = _chord(self.tasks, body, args, **options) - res.parent = parent - return res + def _traverse_tasks(self, tasks, value=None): + stack = deque(list(tasks)) + while stack: + task = stack.popleft() + if isinstance(task, group): + stack.extend(task.tasks) + else: + yield task if value is None else value + + def __length_hint__(self): + return sum(self._traverse_tasks(self.tasks, 1)) + + def run(self, header, body, partial_args, app=None, interval=None, + countdown=1, max_retries=None, eager=False, + task_id=None, **options): + app = app or self._get_app(body) + group_id = uuid() + root_id = body.options.get('root_id') + body.chord_size = self.__length_hint__() + options = dict(self.options, **options) if options else self.options + if options: + options.pop('task_id', None) + body.options.update(options) + + results = header.freeze( + group_id=group_id, chord=body, root_id=root_id).results + bodyres = body.freeze(task_id, root_id=root_id) + + parent = app.backend.apply_chord( + header, partial_args, group_id, body, + interval=interval, countdown=countdown, + options=options, max_retries=max_retries, + result=results) + bodyres.parent = parent + return bodyres def __call__(self, body=None, **options): return self.apply_async((), {'body': body} if body else {}, **options) @@ -639,21 +1031,20 @@ def __repr__(self): def signature(varies, *args, **kwargs): - if not (args or kwargs) and isinstance(varies, dict): - if isinstance(varies, Signature): + app = kwargs.get('app') + if isinstance(varies, dict): + if isinstance(varies, abstract.CallableSignature): return varies.clone() - return Signature.from_dict(varies) + return Signature.from_dict(varies, app=app) return Signature(varies, *args, **kwargs) subtask = signature # XXX compat def maybe_signature(d, app=None): if d is not None: - if isinstance(d, dict): - if not isinstance(d, Signature): - return signature(d, app=app) - elif isinstance(d, list): - return [maybe_signature(s, app=app) for s in d] + if (isinstance(d, dict) and + not isinstance(d, abstract.CallableSignature)): + d = signature(d) if app is not None: d._app = app return d diff --git a/celery/concurrency/asynpool.py b/celery/concurrency/asynpool.py index 5c4d5855c..d8e64acb3 100644 --- a/celery/concurrency/asynpool.py +++ b/celery/concurrency/asynpool.py @@ -19,8 +19,8 @@ from __future__ import absolute_import import errno +import gc import os -import random import select import socket import struct @@ -29,24 +29,24 @@ from collections import deque, namedtuple from io import BytesIO +from numbers import Integral from pickle import HIGHEST_PROTOCOL from time import sleep from weakref import WeakValueDictionary, ref -from amqp.utils import promise from billiard.pool import RUN, TERMINATE, ACK, NACK, WorkersJoined from billiard import pool as _pool from billiard.compat import buf_t, setblocking, isblocking -from billiard.einfo import ExceptionInfo from billiard.queues import _SimpleQueue from kombu.async import READ, WRITE, ERR from kombu.serialization import pickle as _pickle from kombu.utils import fxrange -from kombu.utils.compat import get_errno from kombu.utils.eventio import SELECT_BAD_FD +from vine import promise + from celery.five import Counter, items, values +from celery.utils.functional import noop from celery.utils.log import get_logger -from celery.utils.text import truncate from celery.worker import state as worker_state try: @@ -80,7 +80,7 @@ def unpack_from(fmt, iobuf, unpack=struct.unpack): # noqa logger = get_logger(__name__) error, debug = logger.error, logger.debug -UNAVAIL = frozenset([errno.EAGAIN, errno.EINTR]) +UNAVAIL = frozenset({errno.EAGAIN, errno.EINTR}) #: Constant sent by child process when started (ready to accept work) WORKER_UP = 15 @@ -96,8 +96,6 @@ def unpack_from(fmt, iobuf, unpack=struct.unpack): # noqa 'fair': SCHED_STRATEGY_FAIR, } -RESULT_MAXLEN = 128 - Ack = namedtuple('Ack', ('id', 'fd', 'payload')) @@ -115,8 +113,45 @@ def _get_job_writer(job): return writer() # is a weakref -def _select(readers=None, writers=None, err=None, timeout=0): - """Simple wrapper to :class:`~select.select`. +if hasattr(select, 'poll'): + def _select_imp(readers=None, writers=None, err=None, timeout=0, + poll=select.poll, POLLIN=select.POLLIN, + POLLOUT=select.POLLOUT, POLLERR=select.POLLERR): + poller = poll() + register = poller.register + + if readers: + [register(fd, POLLIN) for fd in readers] + if writers: + [register(fd, POLLOUT) for fd in writers] + if err: + [register(fd, POLLERR) for fd in err] + + R, W = set(), set() + timeout = 0 if timeout and timeout < 0 else round(timeout * 1e3) + events = poller.poll(timeout) + for fd, event in events: + if not isinstance(fd, Integral): + fd = fd.fileno() + if event & POLLIN: + R.add(fd) + if event & POLLOUT: + W.add(fd) + if event & POLLERR: + R.add(fd) + return R, W, 0 +else: + def _select_imp(readers=None, writers=None, err=None, timeout=0): + r, w, e = select.select(readers, writers, err, timeout) + if e: + r = list(set(r) | set(e)) + return r, w, 0 + + +def _select(readers=None, writers=None, err=None, timeout=0, + poll=_select_imp): + """Simple wrapper to :class:`~select.select`, using :`~select.poll` + as the implementation. :param readers: Set of reader fds to test if readable. :param writers: Set of writer fds to test if writable. @@ -138,42 +173,33 @@ def _select(readers=None, writers=None, err=None, timeout=0): writers = set() if writers is None else writers err = set() if err is None else err try: - r, w, e = select.select(readers, writers, err, timeout) - if e: - r = list(set(r) | set(e)) - return r, w, 0 + return poll(readers, writers, err, timeout) except (select.error, socket.error) as exc: - if get_errno(exc) == errno.EINTR: - return [], [], 1 - elif get_errno(exc) in SELECT_BAD_FD: + if exc.errno == errno.EINTR: + return set(), set(), 1 + elif exc.errno in SELECT_BAD_FD: for fd in readers | writers | err: try: select.select([fd], [], [], 0) except (select.error, socket.error) as exc: - if get_errno(exc) not in SELECT_BAD_FD: + if getattr(exc, 'errno', None) not in SELECT_BAD_FD: raise readers.discard(fd) writers.discard(fd) err.discard(fd) - return [], [], 1 + return set(), set(), 1 else: raise class Worker(_pool.Worker): """Pool worker process.""" - dead = False def on_loop_start(self, pid): # our version sends a WORKER_UP message when the process is ready # to accept work, this will tell the parent that the inqueue fd # is writable. - self.outq.put((WORKER_UP, (pid, ))) - - def prepare_result(self, result, RESULT_MAXLEN=RESULT_MAXLEN): - if not isinstance(result, ExceptionInfo): - return truncate(repr(result), RESULT_MAXLEN) - return result + self.outq.put((WORKER_UP, (pid,))) class ResultHandler(_pool.ResultHandler): @@ -197,7 +223,6 @@ def _recv_message(self, add_reader, fd, callback, else: buf = bufv = BytesIO() # header - assert not isblocking(fd) while Hr < 4: try: @@ -205,7 +230,7 @@ def _recv_message(self, add_reader, fd, callback, fd, bufv[Hr:] if readcanbuf else bufv, 4 - Hr, ) except OSError as exc: - if get_errno(exc) not in UNAVAIL: + if exc.errno not in UNAVAIL: raise yield else: @@ -227,7 +252,7 @@ def _recv_message(self, add_reader, fd, callback, fd, bufv[Br:] if readcanbuf else bufv, body_size - Br, ) except OSError as exc: - if get_errno(exc) not in UNAVAIL: + if exc.errno not in UNAVAIL: raise yield else: @@ -250,21 +275,21 @@ def _make_process_result(self, hub): fileno_to_outq = self.fileno_to_outq on_state_change = self.on_state_change add_reader = hub.add_reader - hub_remove = hub.remove + remove_reader = hub.remove_reader recv_message = self._recv_message def on_result_readable(fileno): try: fileno_to_outq[fileno] except KeyError: # process gone - return hub_remove(fileno) + return remove_reader(fileno) it = recv_message(add_reader, fileno, on_state_change) try: next(it) except StopIteration: pass except (IOError, OSError, EOFError): - hub_remove(fileno) + remove_reader(fileno) else: add_reader(fileno, it) return on_result_readable @@ -340,6 +365,11 @@ class AsynPool(_pool.Pool): ResultHandler = ResultHandler Worker = Worker + def WorkerProcess(self, worker): + worker = super(AsynPool, self).WorkerProcess(worker) + worker.dead = False + return worker + def __init__(self, processes=None, synack=False, sched_strategy=None, *args, **kwargs): self.sched_strategy = SCHED_STRATEGIES.get(sched_strategy, @@ -347,8 +377,9 @@ def __init__(self, processes=None, synack=False, processes = self.cpu_count() if processes is None else processes self.synack = synack # create queue-pairs for all our processes in advance. - self._queues = dict((self.create_process_queues(), None) - for _ in range(processes)) + self._queues = { + self.create_process_queues(): None for _ in range(processes) + } # inqueue fileno -> process mapping self._fileno_to_inq = {} @@ -389,14 +420,40 @@ def __init__(self, processes=None, synack=False, # as processes are recycled, or found lost elsewhere. self._fileno_to_outq[proc.outqR_fd] = proc self._fileno_to_synq[proc.synqW_fd] = proc - self.on_soft_timeout = self._timeout_handler.on_soft_timeout - self.on_hard_timeout = self._timeout_handler.on_hard_timeout - def _event_process_exit(self, hub, fd): + self.on_soft_timeout = getattr( + self._timeout_handler, 'on_soft_timeout', noop, + ) + self.on_hard_timeout = getattr( + self._timeout_handler, 'on_hard_timeout', noop, + ) + + def _create_worker_process(self, i): + gc.collect() # Issue #2927 + return super(AsynPool, self)._create_worker_process(i) + + def _event_process_exit(self, hub, proc): # This method is called whenever the process sentinel is readable. - hub.remove(fd) + self._untrack_child_process(proc, hub) self.maintain_pool() + def _track_child_process(self, proc, hub): + try: + fd = proc._sentinel_poll + except AttributeError: + # we need to duplicate the fd here to carefully + # control when the fd is removed from the process table, + # as once the original fd is closed we cannot unregister + # the fd from epoll(7) anymore, causing a 100% CPU poll loop. + fd = proc._sentinel_poll = os.dup(proc._popen.sentinel) + hub.add_reader(fd, self._event_process_exit, hub, proc) + + def _untrack_child_process(self, proc, hub): + if proc._sentinel_poll is not None: + fd, proc._sentinel_poll = proc._sentinel_poll, None + hub.remove(fd) + os.close(fd) + def register_with_event_loop(self, hub): """Registers the async pool with the current event loop.""" self._result_handler.register_with_event_loop(hub) @@ -406,8 +463,7 @@ def register_with_event_loop(self, hub): self._create_write_handlers(hub) # Add handler for when a process exits (calls maintain_pool) - [hub.add_reader(fd, self._event_process_exit, hub, fd) - for fd in self.process_sentinels] + [self._track_child_process(w, hub) for w in self._pool] # Handle_result_event is called whenever one of the # result queues are readable. [hub.add_reader(fd, self.handle_result_event, fd) @@ -485,20 +541,23 @@ def on_job_ready(self, job, i, obj, inqW_fd): def _create_process_handlers(self, hub, READ=READ, ERR=ERR): """For async pool this will create the handlers called when a process is up/down and etc.""" - add_reader, hub_remove = hub.add_reader, hub.remove + add_reader, remove_reader, remove_writer = ( + hub.add_reader, hub.remove_reader, hub.remove_writer, + ) cache = self._cache all_inqueues = self._all_inqueues fileno_to_inq = self._fileno_to_inq fileno_to_outq = self._fileno_to_outq fileno_to_synq = self._fileno_to_synq busy_workers = self._busy_workers - event_process_exit = self._event_process_exit handle_result_event = self.handle_result_event process_flush_queues = self.process_flush_queues waiting_to_start = self._waiting_to_start def verify_process_alive(proc): - if proc._is_alive() and proc in waiting_to_start: + proc = proc() # is a weakref + if (proc is not None and proc._is_alive() and + proc in waiting_to_start): assert proc.outqR_fd in fileno_to_outq assert fileno_to_outq[proc.outqR_fd] is proc assert proc.outqR_fd in hub.readers @@ -518,10 +577,9 @@ def on_process_up(proc): if job._scheduled_for and job._scheduled_for.inqW_fd == infd: job._scheduled_for = proc fileno_to_outq[proc.outqR_fd] = proc + # maintain_pool is called whenever a process exits. - add_reader( - proc.sentinel, event_process_exit, hub, proc.sentinel, - ) + self._track_child_process(proc, hub) assert not isblocking(proc.outq._reader) @@ -531,12 +589,12 @@ def on_process_up(proc): waiting_to_start.add(proc) hub.call_later( - self._proc_alive_timeout, verify_process_alive, proc, + self._proc_alive_timeout, verify_process_alive, ref(proc), ) self.on_process_up = on_process_up - def _remove_from_index(obj, proc, index, callback=None): + def _remove_from_index(obj, proc, index, remove_fun, callback=None): # this remove the file descriptors for a process from # the indices. we have to make sure we don't overwrite # another processes fds, as the fds may be reused. @@ -552,33 +610,39 @@ def _remove_from_index(obj, proc, index, callback=None): except KeyError: pass else: - hub_remove(fd) + remove_fun(fd) if callback is not None: callback(fd) return fd def on_process_down(proc): """Called when a worker process exits.""" - if proc.dead: + if getattr(proc, 'dead', None): return process_flush_queues(proc) - _remove_from_index(proc.outq._reader, proc, fileno_to_outq) + _remove_from_index( + proc.outq._reader, proc, fileno_to_outq, remove_reader, + ) if proc.synq: - _remove_from_index(proc.synq._writer, proc, fileno_to_synq) - inq = _remove_from_index(proc.inq._writer, proc, fileno_to_inq, - callback=all_inqueues.discard) + _remove_from_index( + proc.synq._writer, proc, fileno_to_synq, remove_writer, + ) + inq = _remove_from_index( + proc.inq._writer, proc, fileno_to_inq, remove_writer, + callback=all_inqueues.discard, + ) if inq: busy_workers.discard(inq) - hub_remove(proc.sentinel) + self._untrack_child_process(proc, hub) waiting_to_start.discard(proc) self._active_writes.discard(proc.inqW_fd) - hub_remove(proc.inqW_fd) - hub_remove(proc.outqR_fd) + remove_writer(proc.inq._writer) + remove_reader(proc.outq._reader) if proc.synqR_fd: - hub_remove(proc.synqR_fd) + remove_reader(proc.synq._reader) if proc.synqW_fd: self._active_writes.discard(proc.synqW_fd) - hub_remove(proc.synqW_fd) + remove_reader(proc.synq._writer) self.on_process_down = on_process_down def _create_write_handlers(self, hub, @@ -596,7 +660,7 @@ def _create_write_handlers(self, hub, active_writers = self._active_writers busy_workers = self._busy_workers diff = all_inqueues.difference - add_reader, add_writer = hub.add_reader, hub.add_writer + add_writer = hub.add_writer hub_add, hub_remove = hub.add, hub.remove mark_write_fd_as_active = active_writes.add mark_write_gen_as_active = active_writers.add @@ -608,8 +672,8 @@ def _create_write_handlers(self, hub, revoked_tasks = worker_state.revoked getpid = os.getpid - precalc = {ACK: self._create_payload(ACK, (0, )), - NACK: self._create_payload(NACK, (0, ))} + precalc = {ACK: self._create_payload(ACK, (0,)), + NACK: self._create_payload(NACK, (0,))} def _put_back(job, _time=time.time): # puts back at the end of the queue @@ -639,8 +703,8 @@ def _put_back(job, _time=time.time): def on_poll_start(): if outbound and len(busy_workers) < len(all_inqueues): - #print('ALL: %r ACTIVE: %r' % (len(all_inqueues), - # len(active_writes))) + # print('ALL: %r ACTIVE: %r' % (len(all_inqueues), + # len(active_writes))) inactive = diff(active_writes) [hub_add(fd, None, WRITE | ERR, consolidate=True) for fd in inactive] @@ -669,14 +733,26 @@ def on_inqueue_close(fd, proc): pass self.on_inqueue_close = on_inqueue_close - def schedule_writes(ready_fds, shuffle=random.shuffle): + def schedule_writes(ready_fds, total_write_count=[0]): # Schedule write operation to ready file descriptor. # The file descriptor is writeable, but that does not # mean the process is currently reading from the socket. # The socket is buffered so writeable simply means that # the buffer can accept at least 1 byte of data. - shuffle(ready_fds) - for ready_fd in ready_fds: + + # This means we have to cycle between the ready fds. + # the first version used shuffle, but this version + # using `total_writes % ready_fds` is about 30% faster + # with many processes, and also leans more towards fairness + # in write stats when used with many processes + # [XXX On OS X, this may vary depending + # on event loop implementation (i.e select vs epoll), so + # have to test further] + num_ready = len(ready_fds) + + for i in range(num_ready): + ready_fd = ready_fds[total_write_count[0] % num_ready] + total_write_count[0] += 1 if ready_fd in active_writes: # already writing to this fd continue @@ -722,7 +798,7 @@ def schedule_writes(ready_fds, shuffle=random.shuffle): except StopIteration: pass except OSError as exc: - if get_errno(exc) != errno.EBADF: + if exc.errno != errno.EBADF: raise else: add_writer(ready_fd, cor) @@ -740,8 +816,9 @@ def send_job(tup): put_message(job) self._quick_put = send_job - def on_not_recovering(proc, fd, job): - error('Process inqueue damaged: %r %r' % (proc, proc.exitcode)) + def on_not_recovering(proc, fd, job, exc): + error('Process inqueue damaged: %r %r: %r', + proc, proc.exitcode, exc, exc_info=1) if proc._is_alive(): proc.terminate() hub.remove(fd) @@ -751,7 +828,7 @@ def _write_job(proc, fd, job): # writes job to the worker process. # Operation must complete if more than one byte of data # was written. If the broker connection is lost - # and no data was written the operation shall be cancelled. + # and no data was written the operation shall be canceled. header, body, body_size = job._payload errors = 0 try: @@ -765,12 +842,12 @@ def _write_job(proc, fd, job): try: Hw += send(header, Hw) except Exception as exc: - if get_errno(exc) not in UNAVAIL: + if getattr(exc, 'errno', None) not in UNAVAIL: raise # suspend until more data errors += 1 if errors > 100: - on_not_recovering(proc, fd, job) + on_not_recovering(proc, fd, job, exc) raise StopIteration() yield else: @@ -781,12 +858,12 @@ def _write_job(proc, fd, job): try: Bw += send(body, Bw) except Exception as exc: - if get_errno(exc) not in UNAVAIL: + if getattr(exc, 'errno', None) not in UNAVAIL: raise # suspend until more data errors += 1 if errors > 100: - on_not_recovering(proc, fd, job) + on_not_recovering(proc, fd, job, exc) raise StopIteration() yield else: @@ -806,7 +883,7 @@ def send_ack(response, pid, job, fd, WRITE=WRITE, ERR=ERR): cor = _write_ack(fd, msg, callback=callback) mark_write_gen_as_active(cor) mark_write_fd_as_active(fd) - callback.args = (cor, ) + callback.args = (cor,) add_writer(fd, cor) self.send_ack = send_ack @@ -830,7 +907,7 @@ def _write_ack(fd, ack, callback=None): try: Hw += send(header, Hw) except Exception as exc: - if get_errno(exc) not in UNAVAIL: + if getattr(exc, 'errno', None) not in UNAVAIL: raise yield @@ -839,7 +916,7 @@ def _write_ack(fd, ack, callback=None): try: Bw += send(body, Bw) except Exception as exc: - if get_errno(exc) not in UNAVAIL: + if getattr(exc, 'errno', None) not in UNAVAIL: raise # suspend until more data yield @@ -912,7 +989,7 @@ def flush(self): self._busy_workers.clear() def _flush_writer(self, proc, writer): - fds = set([proc.inq._writer]) + fds = {proc.inq._writer} try: while fds: if not proc._is_alive(): @@ -941,9 +1018,9 @@ def on_grow(self, n): """Grow the pool by ``n`` proceses.""" diff = max(self._processes - len(self._queues), 0) if diff: - self._queues.update( - dict((self.create_process_queues(), None) for _ in range(diff)) - ) + self._queues.update({ + self.create_process_queues(): None for _ in range(diff) + }) def on_shrink(self, n): """Shrink the pool by ``n`` processes.""" @@ -1041,7 +1118,7 @@ def _stop_task_handler(task_handler): try: proc.inq.put(None) except OSError as exc: - if get_errno(exc) != errno.EBADF: + if exc.errno != errno.EBADF: raise def create_result_handler(self): @@ -1080,26 +1157,24 @@ def process_flush_queues(self, proc): all tasks that have not been started will be discarded. In Celery this is called whenever the transport connection is lost - (consumer restart). + (consumer restart), and when a process is terminated. """ resq = proc.outq._reader on_state_change = self._result_handler.on_state_change - fds = set([resq]) + fds = {resq} while fds and not resq.closed and self._state != TERMINATE: readable, _, again = _select(fds, None, fds, timeout=0.01) if readable: try: task = resq.recv() except (OSError, IOError, EOFError) as exc: - if get_errno(exc) == errno.EINTR: + _errno = getattr(exc, 'errno', None) + if _errno == errno.EINTR: continue - elif get_errno(exc) == errno.EAGAIN: + elif _errno == errno.EAGAIN: break - else: - debug('got %r while flushing process %r', - exc, proc, exc_info=1) - if get_errno(exc) not in UNAVAIL: + elif _errno not in UNAVAIL: debug('got %r while flushing process %r', exc, proc, exc_info=1) break @@ -1137,8 +1212,6 @@ def on_partial_read(self, job, proc): self._queues[self.create_process_queues()] = None except ValueError: pass - # Not in queue map, make sure sockets are closed. - #self.destroy_queues((proc.inq, proc.outq, proc.synq)) assert len(self._queues) == before def destroy_queues(self, queues, proc): @@ -1181,7 +1254,7 @@ def _set_result_sentinel(cls, _outqueue, _pool): def _help_stuff_finish_args(self): # Pool._help_stuff_finished is a classmethod so we have to use this # trick to modify the arguments passed to it. - return (self._pool, ) + return (self._pool,) @classmethod def _help_stuff_finish(cls, pool): diff --git a/celery/concurrency/base.py b/celery/concurrency/base.py index 6b3594a96..e40d1d1a6 100644 --- a/celery/concurrency/base.py +++ b/celery/concurrency/base.py @@ -66,20 +66,22 @@ class BasePool(object): _state = None _pool = None + _does_debug = True #: only used by multiprocessing pool uses_semaphore = False task_join_will_block = True + body_can_be_buffer = False - def __init__(self, limit=None, putlocks=True, - forking_enable=True, callbacks_propagate=(), **options): + def __init__(self, limit=None, putlocks=True, forking_enable=True, + callbacks_propagate=(), app=None, **options): self.limit = limit self.putlocks = putlocks self.options = options self.forking_enable = forking_enable self.callbacks_propagate = callbacks_propagate - self._does_debug = logger.isEnabledFor(logging.DEBUG) + self.app = app def on_start(self): pass @@ -111,7 +113,7 @@ def on_hard_timeout(self, job): def maintain_pool(self, *args, **kwargs): pass - def terminate_job(self, pid): + def terminate_job(self, pid, signal=None): raise NotImplementedError( '{0} does not implement kill_job'.format(type(self))) @@ -128,6 +130,7 @@ def terminate(self): self.on_terminate() def start(self): + self._does_debug = logger.isEnabledFor(logging.DEBUG) self.on_start() self._state = self.RUN @@ -156,7 +159,9 @@ def apply_async(self, target, args=[], kwargs={}, **options): **options) def _get_info(self): - return {} + return { + 'max-concurrency': self.limit, + } @property def info(self): diff --git a/celery/concurrency/eventlet.py b/celery/concurrency/eventlet.py index e5319a9b8..c867fd01b 100644 --- a/celery/concurrency/eventlet.py +++ b/celery/concurrency/eventlet.py @@ -28,11 +28,13 @@ import warnings warnings.warn(RuntimeWarning(W_RACE % side)) +# idiotic pep8.py does not allow expressions before imports +# so have to silence errors here +from kombu.async import timer as _timer # noqa -from celery import signals -from celery.utils import timer2 +from celery import signals # noqa -from . import base +from . import base # noqa def apply_target(target, args=(), kwargs={}, callback=None, @@ -41,12 +43,12 @@ def apply_target(target, args=(), kwargs={}, callback=None, pid=getpid()) -class Schedule(timer2.Schedule): +class Timer(_timer.Timer): def __init__(self, *args, **kwargs): from eventlet.greenthread import spawn_after from greenlet import GreenletExit - super(Schedule, self).__init__(*args, **kwargs) + super(Timer, self).__init__(*args, **kwargs) self.GreenletExit = GreenletExit self._spawn_after = spawn_after @@ -60,7 +62,7 @@ def _enter(self, eta, priority, entry): g.entry = entry g.eta = eta g.priority = priority - g.cancelled = False + g.canceled = False return g def _entry_exit(self, g, entry): @@ -69,7 +71,7 @@ def _entry_exit(self, g, entry): g.wait() except self.GreenletExit: entry.cancel() - g.cancelled = True + g.canceled = True finally: self._queue.discard(g) @@ -81,28 +83,15 @@ def clear(self): except (KeyError, self.GreenletExit): pass - @property - def queue(self): - return self._queue - - -class Timer(timer2.Timer): - Schedule = Schedule - - def ensure_started(self): - pass - - def stop(self): - self.schedule.clear() - def cancel(self, tref): try: tref.cancel() - except self.schedule.GreenletExit: + except self.GreenletExit: pass - def start(self): - pass + @property + def queue(self): + return self._queue class TaskPool(base.BasePool): @@ -142,3 +131,22 @@ def on_apply(self, target, args=None, kwargs=None, callback=None, self._quick_put(apply_target, target, args, kwargs, callback, accept_callback, self.getpid) + + def grow(self, n=1): + limit = self.limit + n + self._pool.resize(limit) + self.limit = limit + + def shrink(self, n=1): + limit = self.limit - n + self._pool.resize(limit) + self.limit = limit + + def _get_info(self): + info = super(TaskPool, self)._get_info() + info.update({ + 'max-concurrency': self.limit, + 'free-threads': self._pool.free(), + 'running-threads': self._pool.running(), + }) + return info diff --git a/celery/concurrency/gevent.py b/celery/concurrency/gevent.py index f89de92b2..dc0f13203 100644 --- a/celery/concurrency/gevent.py +++ b/celery/concurrency/gevent.py @@ -15,7 +15,7 @@ except ImportError: # pragma: no cover Timeout = None # noqa -from celery.utils import timer2 +from kombu.async import timer as _timer from .base import apply_target, BasePool @@ -30,12 +30,12 @@ def apply_timeout(target, args=(), kwargs={}, callback=None, with Timeout(timeout): return apply_target(target, args, kwargs, callback, accept_callback, pid, - propagate=(Timeout, ), **rest) + propagate=(Timeout,), **rest) except Timeout: return timeout_callback(False, timeout) -class Schedule(timer2.Schedule): +class Timer(_timer.Timer): def __init__(self, *args, **kwargs): from gevent.greenlet import Greenlet, GreenletExit @@ -45,7 +45,7 @@ class _Greenlet(Greenlet): self._Greenlet = _Greenlet self._GreenletExit = GreenletExit - super(Schedule, self).__init__(*args, **kwargs) + super(Timer, self).__init__(*args, **kwargs) self._queue = set() def _enter(self, eta, priority, entry): @@ -56,7 +56,7 @@ def _enter(self, eta, priority, entry): g.entry = entry g.eta = eta g.priority = priority - g.cancelled = False + g.canceled = False return g def _entry_exit(self, g): @@ -78,19 +78,6 @@ def queue(self): return self._queue -class Timer(timer2.Timer): - Schedule = Schedule - - def ensure_started(self): - pass - - def stop(self): - self.schedule.clear() - - def start(self): - pass - - class TaskPool(BasePool): Timer = Timer diff --git a/celery/concurrency/prefork.py b/celery/concurrency/prefork.py index b579d0e10..b4054d4c8 100644 --- a/celery/concurrency/prefork.py +++ b/celery/concurrency/prefork.py @@ -10,6 +10,7 @@ import os +from billiard.common import REMAP_SIGTERM, TERM_SIGNAME from billiard import forking_enable from billiard.pool import RUN, CLOSE, Pool as BlockingPool @@ -27,14 +28,15 @@ __all__ = ['TaskPool', 'process_initializer', 'process_destructor'] #: List of signals to reset when a child process starts. -WORKER_SIGRESET = frozenset(['SIGTERM', - 'SIGHUP', - 'SIGTTIN', - 'SIGTTOU', - 'SIGUSR1']) +WORKER_SIGRESET = { + 'SIGTERM', 'SIGHUP', 'SIGTTIN', 'SIGTTOU', 'SIGUSR1', +} #: List of signals to ignore when a child process starts. -WORKER_SIGIGNORE = frozenset(['SIGINT']) +if REMAP_SIGTERM: + WORKER_SIGIGNORE = {'SIGINT', TERM_SIGNAME} +else: + WORKER_SIGIGNORE = {'SIGINT'} logger = get_logger(__name__) warning, debug = logger.warning, logger.debug @@ -68,7 +70,7 @@ def process_initializer(app, hostname): hostname=hostname) if os.environ.get('FORKED_BY_MULTIPROCESSING'): # pool did execv after fork - trace.setup_worker_optimizations(app) + trace.setup_worker_optimizations(app, hostname) else: app.set_current() set_default_app(app) @@ -79,6 +81,8 @@ def process_initializer(app, hostname): for name, task in items(app.tasks): task.__trace__ = build_tracer(name, task, app.loader, hostname, app=app) + from celery.worker import state as worker_state + worker_state.reset_state() signals.worker_process_init.send(sender=None) @@ -156,10 +160,7 @@ def on_close(self): self._pool.close() def _get_info(self): - try: - write_stats = self._pool.human_write_stats - except AttributeError: - write_stats = lambda: 'N/A' # only supported by asynpool + write_stats = getattr(self._pool, 'human_write_stats', None) return { 'max-concurrency': self.limit, 'processes': [p.pid for p in self._pool._pool], @@ -167,7 +168,7 @@ def _get_info(self): 'put-guarded-by-semaphore': self.putlocks, 'timeouts': (self._pool.soft_timeout or 0, self._pool.timeout or 0), - 'writes': write_stats() + 'writes': write_stats() if write_stats is not None else 'N/A', } @property diff --git a/celery/concurrency/solo.py b/celery/concurrency/solo.py index a2dc19970..434071908 100644 --- a/celery/concurrency/solo.py +++ b/celery/concurrency/solo.py @@ -17,10 +17,12 @@ class TaskPool(BasePool): """Solo task pool (blocking, inline, fast).""" + body_can_be_buffer = True def __init__(self, *args, **kwargs): super(TaskPool, self).__init__(*args, **kwargs) self.on_apply = apply_target + self.limit = 1 def _get_info(self): return {'max-concurrency': 1, diff --git a/celery/concurrency/threads.py b/celery/concurrency/threads.py index fee901ecf..cb1d4b8d7 100644 --- a/celery/concurrency/threads.py +++ b/celery/concurrency/threads.py @@ -34,6 +34,9 @@ def __init__(self, *args, **kwargs): super(TaskPool, self).__init__(*args, **kwargs) def on_start(self): + # make sure all threads have the same current_app. + self.app.set_default() + self._pool = self.ThreadPool(self.limit) # threadpool stores all work requests until they are processed # we don't need this dict, and it occupies way too much memory. diff --git a/celery/contrib/abortable.py b/celery/contrib/abortable.py index 37dc30d92..eaacebde7 100644 --- a/celery/contrib/abortable.py +++ b/celery/contrib/abortable.py @@ -28,49 +28,52 @@ .. code-block:: python - from celery.contrib.abortable import AbortableTask - from celery.utils.log import get_task_logger - - logger = get_logger(__name__) - - class MyLongRunningTask(AbortableTask): - - def run(self, **kwargs): - results = [] - for x in range(100): - # Check after every 5 loops.. - if x % 5 == 0: # alternatively, check when some timer is due - if self.is_aborted(**kwargs): - # Respect the aborted status and terminate - # gracefully - logger.warning('Task aborted.') - return - y = do_something_expensive(x) - results.append(y) - logger.info('Task finished.') - return results - + from __future__ import absolute_import + + from celery.contrib.abortable import AbortableTask + from celery.utils.log import get_task_logger + + from proj.celery import app + + logger = get_logger(__name__) + + @app.task(bind=True, base=AbortableTask) + def long_running_task(self): + results = [] + for i in range(100): + # check after every 5 iterations... + # (or alternatively, check when some timer is due) + if not i % 5: + if self.is_aborted(): + # respect aborted state, and terminate gracefully. + logger.warning('Task aborted') + return + value = do_something_expensive(i) + results.append(y) + logger.info('Task complete') + return results In the producer: .. code-block:: python - from myproject.tasks import MyLongRunningTask + from __future__ import absolute_import - def myview(request): + import time - async_result = MyLongRunningTask.delay() - # async_result is of type AbortableAsyncResult + from proj.tasks import MyLongRunningTask - # After 10 seconds, abort the task - time.sleep(10) - async_result.abort() + def myview(request): + # result is of type AbortableAsyncResult + result = long_running_task.delay() - ... + # abort the task after 10 seconds + time.sleep(10) + result.abort() -After the `async_result.abort()` call, the task execution is not +After the `result.abort()` call, the task execution is not aborted immediately. In fact, it is not guaranteed to abort at all. Keep -checking the `async_result` status, or call `async_result.wait()` to +checking `result.state` status, or call `result.get(timeout=)` to have it block until the task is finished. .. note:: @@ -129,9 +132,9 @@ def abort(self): """ # TODO: store_result requires all four arguments to be set, - # but only status should be updated here + # but only state should be updated here return self.backend.store_result(self.id, result=None, - status=ABORTED, traceback=None) + state=ABORTED, traceback=None) class AbortableTask(Task): diff --git a/celery/contrib/batches.py b/celery/contrib/batches.py index 0248ebf8d..c2ca0c41b 100644 --- a/celery/contrib/batches.py +++ b/celery/contrib/batches.py @@ -8,7 +8,7 @@ .. warning:: For this to work you have to set - :setting:`CELERYD_PREFETCH_MULTIPLIER` to zero, or some value where + :setting:`worker_prefetch_multiplier` to zero, or some value where the final multiplied value is higher than ``flush_every``. In the future we hope to add the ability to direct batching tasks @@ -17,7 +17,7 @@ **Simple Example** A click counter that flushes the buffer every 100 messages, and every -seconds. Does not do anything with the data, but can easily be modified +10 seconds. Does not do anything with the data, but can easily be modified to store it in a database. .. code-block:: python @@ -57,7 +57,7 @@ def wot_api(requests): ) # use mark_as_done to manually return response data for response, request in zip(reponses, requests): - app.backend.mark_as_done(request.id, response) + app.backend.mark_as_done(request.id, response, request) def wot_api_real(urls): @@ -85,11 +85,14 @@ def wot_api_real(urls): from itertools import count +from kombu.five import buffer_t + from celery.task import Task from celery.five import Empty, Queue from celery.utils.log import get_logger -from celery.worker.job import Request +from celery.worker.request import Request from celery.utils import noop +from celery.worker.strategy import proto1_to_proto2 __all__ = ['Batches'] @@ -163,8 +166,8 @@ def __init__(self, id, name, args, kwargs, delivery_info, hostname): @classmethod def from_request(cls, request): - return cls(request.id, request.name, request.args, - request.kwargs, request.delivery_info, request.hostname) + return cls(request.id, request.name, request.body[0], + request.body[1], request.delivery_info, request.hostname) class Batches(Task): @@ -194,12 +197,25 @@ def Strategy(self, task, app, consumer): timer = consumer.timer put_buffer = self._buffer.put flush_buffer = self._do_flush + body_can_be_buffer = consumer.pool.body_can_be_buffer def task_message_handler(message, body, ack, reject, callbacks, **kw): - request = Req(body, on_ack=ack, app=app, hostname=hostname, - events=eventer, task=task, - connection_errors=connection_errors, - delivery_info=message.delivery_info) + if body is None: + body, headers, decoded, utc = ( + message.body, message.headers, False, True, + ) + if not body_can_be_buffer: + body = bytes(body) if isinstance(body, buffer_t) else body + else: + body, headers, decoded, utc = proto1_to_proto2(message, body) + + request = Req( + message, + on_ack=ack, on_reject=reject, app=app, hostname=hostname, + eventer=eventer, task=task, + body=body, headers=headers, decoded=decoded, utc=utc, + connection_errors=connection_errors, + ) put_buffer(request) if self._tref is None: # first request starts flush timer. @@ -214,7 +230,7 @@ def task_message_handler(message, body, ack, reject, callbacks, **kw): def flush(self, requests): return self.apply_buffer(requests, ([SimpleRequest.from_request(r) - for r in requests], )) + for r in requests],)) def _do_flush(self): logger.debug('Batches: Wake-up to flush buffer...') @@ -225,8 +241,9 @@ def _do_flush(self): logger.debug('Batches: Buffer complete: %s', len(requests)) self.flush(requests) if not requests: - logger.debug('Batches: Cancelling timer: Nothing in buffer.') - self._tref.cancel() # cancel timer. + logger.debug('Batches: Canceling timer: Nothing in buffer.') + if self._tref: + self._tref.cancel() # cancel timer. self._tref = None def apply_buffer(self, requests, args=(), kwargs={}): diff --git a/celery/contrib/methods.py b/celery/contrib/methods.py deleted file mode 100644 index 56aa7f479..000000000 --- a/celery/contrib/methods.py +++ /dev/null @@ -1,126 +0,0 @@ -# -*- coding: utf-8 -*- -""" -celery.contrib.methods -====================== - -Task decorator that supports creating tasks out of methods. - -Examples --------- - -.. code-block:: python - - from celery.contrib.methods import task - - class X(object): - - @task() - def add(self, x, y): - return x + y - -or with any task decorator: - -.. code-block:: python - - from celery.contrib.methods import task_method - - class X(object): - - @app.task(filter=task_method) - def add(self, x, y): - return x + y - -.. note:: - - The task must use the new Task base class (:class:`celery.Task`), - and the old base class using classmethods (``celery.task.Task``, - ``celery.task.base.Task``). - - This means that you have to use the task decorator from a Celery app - instance, and not the old-API: - - .. code-block:: python - - - from celery import task # BAD - from celery.task import task # ALSO BAD - - # GOOD: - app = Celery(...) - - @app.task(filter=task_method) - def foo(self): pass - - # ALSO GOOD: - from celery import current_app - - @current_app.task(filter=task_method) - def foo(self): pass - - # ALSO GOOD: - from celery import shared_task - - @shared_task(filter=task_method) - def foo(self): pass - -Caveats -------- - -- Automatic naming won't be able to know what the class name is. - - The name will still be module_name + task_name, - so two methods with the same name in the same module will collide - so that only one task can run: - - .. code-block:: python - - class A(object): - - @task() - def add(self, x, y): - return x + y - - class B(object): - - @task() - def add(self, x, y): - return x + y - - would have to be written as: - - .. code-block:: python - - class A(object): - @task(name='A.add') - def add(self, x, y): - return x + y - - class B(object): - @task(name='B.add') - def add(self, x, y): - return x + y - -""" - -from __future__ import absolute_import - -from celery import current_app - -__all__ = ['task_method', 'task'] - - -class task_method(object): - - def __init__(self, task, *args, **kwargs): - self.task = task - - def __get__(self, obj, type=None): - if obj is None: - return self.task - task = self.task.__class__() - task.__self__ = obj - return task - - -def task(*args, **kwargs): - return current_app.task(*args, **dict(kwargs, filter=task_method)) diff --git a/celery/contrib/migrate.py b/celery/contrib/migrate.py index e4a10e9b9..8919d9b9f 100644 --- a/celery/contrib/migrate.py +++ b/celery/contrib/migrate.py @@ -99,7 +99,7 @@ def migrate_tasks(source, dest, migrate=migrate_task, app=None, queues=None, **kwargs): app = app_or_default(app) queues = prepare_queues(queues) - producer = app.amqp.TaskProducer(dest) + producer = app.amqp.Producer(dest) migrate = partial(migrate, producer, queues=queues) def on_declare_queue(queue): @@ -141,7 +141,7 @@ def move(predicate, connection=None, exchange=None, routing_key=None, :keyword connection: Custom connection to use. :keyword source: Optional list of source queues to use instead of the - default (which is the queues in :setting:`CELERY_QUEUES`). + default (which is the queues in :setting:`task_queues`). This list can also contain new :class:`~kombu.entity.Queue` instances. :keyword exchange: Default destination exchange. :keyword routing_key: Default destination routing key. @@ -186,7 +186,7 @@ def transform(value): app = app_or_default(app) queues = [_maybe_queue(app, queue) for queue in source or []] or None with app.connection_or_acquire(connection, pool=False) as conn: - producer = app.amqp.TaskProducer(conn) + producer = app.amqp.Producer(conn) state = State() def on_task(body, message): @@ -250,7 +250,7 @@ def start_filter(app, conn, filter, limit=None, timeout=1.0, if isinstance(tasks, string_t): tasks = set(tasks.split(',')) if tasks is None: - tasks = set([]) + tasks = set() def update_state(body, message): state.count += 1 diff --git a/celery/contrib/rdb.py b/celery/contrib/rdb.py index 3e9f55bba..9b0f16c85 100644 --- a/celery/contrib/rdb.py +++ b/celery/contrib/rdb.py @@ -34,7 +34,7 @@ def add(x, y): base port. The selected port will be logged by the worker. """ -from __future__ import absolute_import, print_function +from __future__ import absolute_import, print_function, unicode_literals import errno import os @@ -43,10 +43,9 @@ def add(x, y): from pdb import Pdb -from billiard import current_process +from billiard.process import current_process from celery.five import range -from celery.platforms import ignore_errno __all__ = ['CELERY_RDB_HOST', 'CELERY_RDB_PORT', 'default_port', 'Rdb', 'debugger', 'set_trace'] @@ -133,13 +132,23 @@ def get_avail_port(self, host, port, search_limit=100, skew=+0): def say(self, m): print(m, file=self.out) + def __enter__(self): + return self + + def __exit__(self, *exc_info): + self._close_session() + def _close_session(self): self.stdin, self.stdout = sys.stdin, sys.stdout = self._prev_handles - self._handle.close() - self._client.close() - self._sock.close() - self.active = False - self.say(SESSION_ENDED.format(self=self)) + if self.active: + if self._handle is not None: + self._handle.close() + if self._client is not None: + self._client.close() + if self._sock is not None: + self._sock.close() + self.active = False + self.say(SESSION_ENDED.format(self=self)) def do_continue(self, arg): self._close_session() @@ -153,12 +162,6 @@ def do_quit(self, arg): return 1 do_q = do_exit = do_quit - def set_trace(self, frame=None): - if frame is None: - frame = _frame().f_back - with ignore_errno(errno.ECONNRESET): - Pdb.set_trace(self, frame) - def set_quit(self): # this raises a BdbQuit exception that we are unable to catch. sys.settrace(None) diff --git a/celery/contrib/sphinx.py b/celery/contrib/sphinx.py index d22d82f5f..c72513545 100644 --- a/celery/contrib/sphinx.py +++ b/celery/contrib/sphinx.py @@ -32,12 +32,11 @@ """ from __future__ import absolute_import -from inspect import formatargspec, getargspec - from sphinx.domains.python import PyModulelevel from sphinx.ext.autodoc import FunctionDocumenter from celery.app.task import BaseTask +from celery.five import formatargspec, getfullargspec class TaskDocumenter(FunctionDocumenter): @@ -51,7 +50,7 @@ def can_document_member(cls, member, membername, isattr, parent): def format_args(self): wrapped = getattr(self.object, '__wrapped__') if wrapped is not None: - argspec = getargspec(wrapped) + argspec = getfullargspec(wrapped) fmt = formatargspec(*argspec) fmt = fmt.replace('\\', '\\\\') return fmt diff --git a/celery/datastructures.py b/celery/datastructures.py index 9c36a3957..19a1b6398 100644 --- a/celery/datastructures.py +++ b/celery/datastructures.py @@ -11,17 +11,19 @@ import sys import time -from collections import defaultdict, Mapping, MutableMapping, MutableSet -from heapq import heappush, heappop -from functools import partial +from collections import ( + Callable, Mapping, MutableMapping, MutableSet, defaultdict, +) +from heapq import heapify, heappush, heappop from itertools import chain from billiard.einfo import ExceptionInfo # noqa -from kombu.utils.encoding import safe_str +from kombu.utils.encoding import safe_str, bytes_to_str from kombu.utils.limits import TokenBucket # noqa -from celery.five import items +from celery.five import items, values from celery.utils.functional import LRUCache, first, uniq # noqa +from celery.utils.text import match_case try: from django.utils.functional import LazyObject, LazySettings @@ -30,6 +32,10 @@ class LazyObject(object): # noqa pass LazySettings = LazyObject # noqa +__all__ = ['GraphFormatter', 'CycleError', 'DependencyGraph', + 'AttributeDictMixin', 'AttributeDict', 'DictAttribute', + 'ConfigurationView', 'LimitedSet'] + DOT_HEAD = """ {IN}{type} {id} {{ {INp}graph [{attrs}] @@ -41,9 +47,9 @@ class LazyObject(object): # noqa DOT_DIRS = {'graph': '--', 'digraph': '->'} DOT_TAIL = '{IN}}}' -__all__ = ['GraphFormatter', 'CycleError', 'DependencyGraph', - 'AttributeDictMixin', 'AttributeDict', 'DictAttribute', - 'ConfigurationView', 'LimitedSet'] +REPR_LIMITED_SET = """\ +<{name}({size}): maxlen={0.maxlen}, expires={0.expires}, minlen={0.minlen}>\ +""" def force_mapping(m): @@ -186,9 +192,9 @@ def topsort(self): graph = DependencyGraph() components = self._tarjan72() - NC = dict((node, component) - for component in components - for node in component) + NC = { + node: component for component in components for node in component + } for component in components: graph.add_arc(component) for node in self: @@ -288,7 +294,9 @@ def to_dot(self, fh, formatter=None): """ seen = set() draw = formatter or self.formatter - P = partial(print, file=fh) + + def P(s): + print(bytes_to_str(s), file=fh) def if_not_seen(fun, obj): if draw.label(obj) not in seen: @@ -387,11 +395,8 @@ def get(self, key, default=None): return default def setdefault(self, key, default): - try: - return self[key] - except KeyError: + if key not in self: self[key] = default - return default def __getitem__(self, key): try: @@ -451,13 +456,30 @@ class ConfigurationView(AttributeDictMixin): :param defaults: List of dicts containing the default configuration. """ + key_t = None changes = None defaults = None _order = None - def __init__(self, changes, defaults): - self.__dict__.update(changes=changes, defaults=defaults, - _order=[changes] + defaults) + def __init__(self, changes, defaults=None, key_t=None, prefix=None): + defaults = [] if defaults is None else defaults + self.__dict__.update( + changes=changes, + defaults=defaults, + key_t=key_t, + _order=[changes] + defaults, + prefix=prefix.rstrip('_') + '_' if prefix else prefix, + ) + + def _to_keys(self, key): + prefix = self.prefix + if prefix: + pkey = prefix + key if not key.startswith(prefix) else key + return match_case(pkey, prefix), self._key(key) + return self._key(key), + + def _key(self, key): + return self.key_t(key) if self.key_t is not None else key def add_defaults(self, d): d = force_mapping(d) @@ -465,15 +487,20 @@ def add_defaults(self, d): self._order.insert(1, d) def __getitem__(self, key): - for d in self._order: - try: - return d[key] - except KeyError: - pass + keys = self._to_keys(key) + for k in keys: + for d in self._order: + try: + return d[k] + except KeyError: + pass + if len(keys) > 1: + raise KeyError( + 'Key not found: {0!r} (with prefix: {0!r})'.format(*keys)) raise KeyError(key) def __setitem__(self, key, value): - self.changes[key] = value + self.changes[self._key(key)] = value def first(self, *keys): return first(None, (self.get(key) for key in keys)) @@ -489,17 +516,16 @@ def clear(self): self.changes.clear() def setdefault(self, key, default): - try: - return self[key] - except KeyError: + key = self._key(key) + if key not in self: self[key] = default - return default def update(self, *args, **kwargs): return self.changes.update(*args, **kwargs) def __contains__(self, key): - return any(key in m for m in self._order) + keys = self._to_keys(key) + return any(any(k in m for k in keys) for m in self._order) def __bool__(self): return any(self._order) @@ -521,8 +547,19 @@ def _iter(self, op): # changes takes precedence. return chain(*[op(d) for d in reversed(self._order)]) + def swap_with(self, other): + changes = other.__dict__['changes'] + defaults = other.__dict__['defaults'] + self.__dict__.update( + changes=changes, + defaults=defaults, + key_t=other.__dict__['key_t'], + prefix=other.__dict__['prefix'], + _order=[changes] + defaults + ) + def _iterate_keys(self): - return uniq(self._iter(lambda d: d)) + return uniq(self._iter(lambda d: d.keys())) iterkeys = _iterate_keys def _iterate_items(self): @@ -547,121 +584,228 @@ def items(self): def values(self): return list(self._iterate_values()) - MutableMapping.register(ConfigurationView) class LimitedSet(object): - """Kind-of Set with limitations. + """Kind-of Set (or priority queue) with limitations. Good for when you need to test for membership (`a in set`), - but the list might become too big. + but the set should not grow unbounded. + + Maxlen is enforced at all times, so if the limit is reached + we will also remove non-expired items. + + You can also configure minlen, which is the minimal residual size + of the set. + + All arguments are optional, and no limits are enabled by default. + + :keyword maxlen: Optional max number of items. + + Adding more items than maxlen will result in immediate + removal of items sorted by oldest insertion time. + + :keyword expires: TTL for all items. + + Expired items are purged as keys are inserted. + + :keyword minlen: Minimal residual size of this set. + .. versionadded:: 4.0 + + Value must be less than ``maxlen`` if both are configured. + + Older expired items will be deleted, only after the set + exceeds minlen number of items. - :keyword maxlen: Maximum number of members before we start - evicting expired members. - :keyword expires: Time in seconds, before a membership expires. + :keyword data: Initial data to initialize set with. + Can be an iterable of ``(key, value)`` pairs, + a dict (``{key: insertion_time}``), or another instance + of :class:`LimitedSet`. + + Example:: + + >>> s = LimitedSet(maxlen=50000, expires=3600, minlen=4000) + >>> for i in range(60000): + ... s.add(i) + ... s.add(str(i)) + ... + >>> 57000 in s # last 50k inserted values are kept + True + >>> '10' in s # '10' did expire and was purged from set. + False + >>> len(s) # maxlen is reached + 50000 + >>> s.purge(now=time.time() + 7200) # clock + 2 hours + >>> len(s) # now only minlen items are cached + 4000 + >>>> 57000 in s # even this item is gone now + False """ - def __init__(self, maxlen=None, expires=None, data=None, heap=None): - self.maxlen = maxlen - self.expires = expires - self._data = {} if data is None else data - self._heap = [] if heap is None else heap + max_heap_percent_overload = 15 + + def __init__(self, maxlen=0, expires=0, data=None, minlen=0): + self.maxlen = 0 if maxlen is None else maxlen + self.minlen = 0 if minlen is None else minlen + self.expires = 0 if expires is None else expires + self._data = {} + self._heap = [] + # make shortcuts - self.__len__ = self._heap.__len__ - self.__iter__ = self._heap.__iter__ + self.__len__ = self._data.__len__ self.__contains__ = self._data.__contains__ - def add(self, value, now=time.time): - """Add a new member.""" - # offset is there to modify the length of the list, - # this way we can expire an item before inserting the value, - # and it will end up in correct order. - self.purge(1, offset=1) - inserted = now() - self._data[value] = inserted - heappush(self._heap, (inserted, value)) + if data: + # import items from data + self.update(data) + + if not self.maxlen >= self.minlen >= 0: + raise ValueError( + 'minlen must be a positive number, less or equal to maxlen.') + if self.expires < 0: + raise ValueError('expires cannot be negative!') + + def _refresh_heap(self): + """Time consuming recreating of heap. Do not run this too often.""" + self._heap[:] = [entry for entry in values(self._data)] + heapify(self._heap) def clear(self): - """Remove all members""" + """Clear all data, start from scratch again.""" self._data.clear() self._heap[:] = [] - def discard(self, value): - """Remove membership by finding value.""" - try: - itime = self._data[value] - except KeyError: + def add(self, item, now=None): + """Add a new item, or reset the expiry time of an existing item.""" + now = now or time.time() + if item in self._data: + self.discard(item) + entry = (now, item) + self._data[item] = entry + heappush(self._heap, entry) + if self.maxlen and len(self._data) >= self.maxlen: + self.purge() + + def update(self, other): + """Update this set from other LimitedSet, dict or iterable.""" + if not other: return + if isinstance(other, LimitedSet): + self._data.update(other._data) + self._refresh_heap() + self.purge() + elif isinstance(other, dict): + # revokes are sent as a dict + for key, inserted in items(other): + if isinstance(inserted, (tuple, list)): + # in case someone uses ._data directly for sending update + inserted = inserted[0] + if not isinstance(inserted, float): + raise ValueError( + 'Expecting float timestamp, got type ' + '{0!r} with value: {1}'.format( + type(inserted), inserted)) + self.add(key, inserted) + else: + # XXX AVOID THIS, it could keep old data if more parties + # exchange them all over and over again + for obj in other: + self.add(obj) + + def discard(self, item): + # mark an existing item as removed. If KeyError is not found, pass. try: - self._heap.remove((value, itime)) - except ValueError: + entry = self._data.pop(item) + except KeyError: pass - self._data.pop(value, None) - pop_value = discard # XXX compat + else: + if self._heap_overload > self.max_heap_percent_overload: + self._refresh_heap() + pop_value = discard - def purge(self, limit=None, offset=0, now=time.time): - """Purge expired items.""" - H, maxlen = self._heap, self.maxlen - if not maxlen: - return + def purge(self, now=None): + """Check oldest items and remove them if needed. - # If the data/heap gets corrupted and limit is None - # this will go into an infinite loop, so limit must - # have a value to guard the loop. - limit = len(self) + offset if limit is None else limit + :keyword now: Time of purging -- by default right now. + This can be useful for unit testing. - i = 0 - while len(self) + offset > maxlen: - if i >= limit: - break - try: - item = heappop(H) - except IndexError: - break - if self.expires: - if now() < item[0] + self.expires: - heappush(H, item) - break + """ + now = now or time.time() + now = now() if isinstance(now, Callable) else now + if self.maxlen: + while len(self._data) > self.maxlen: + self.pop() + # time based expiring: + if self.expires: + while len(self._data) > self.minlen >= 0: + inserted_time, _ = self._heap[0] + if inserted_time + self.expires > now: + break # oldest item has not expired yet + self.pop() + + def pop(self, default=None): + """Remove and return the oldest item, or :const:`None` when empty.""" + while self._heap: + _, item = heappop(self._heap) try: - self._data.pop(item[1]) - except KeyError: # out of sync with heap + self._data.pop(item) + except KeyError: pass - i += 1 - - def update(self, other, heappush=heappush): - if isinstance(other, LimitedSet): - self._data.update(other._data) - self._heap.extend(other._heap) - self._heap.sort() - else: - for obj in other: - self.add(obj) + else: + return item + return default def as_dict(self): - return self._data + """Whole set as serializable dictionary. + + Example:: + + >>> s = LimitedSet(maxlen=200) + >>> r = LimitedSet(maxlen=200) + >>> for i in range(500): + ... s.add(i) + ... + >>> r.update(s.as_dict()) + >>> r == s + True + + """ + return {key: inserted for inserted, key in values(self._data)} def __eq__(self, other): - return self._heap == other._heap + return self._data == other._data def __ne__(self, other): return not self.__eq__(other) def __repr__(self): - return 'LimitedSet({0})'.format(len(self)) + return REPR_LIMITED_SET.format( + self, name=type(self).__name__, size=len(self), + ) def __iter__(self): - return (item[1] for item in self._heap) + return (i for _, i in sorted(values(self._data))) def __len__(self): - return len(self._heap) + return len(self._data) def __contains__(self, key): return key in self._data def __reduce__(self): return self.__class__, ( - self.maxlen, self.expires, self._data, self._heap, - ) + self.maxlen, self.expires, self.as_dict(), self.minlen) + + def __bool__(self): + return bool(self._data) + __nonzero__ = __bool__ # Py2 + + @property + def _heap_overload(self): + """Compute how much is heap bigger than data [percents].""" + if not self._data: + return len(self._heap) + return len(self._heap) * 100 / len(self._data) - 100 MutableSet.register(LimitedSet) diff --git a/celery/events/__init__.py b/celery/events/__init__.py index 931f3953e..8c77a9751 100644 --- a/celery/events/__init__.py +++ b/celery/events/__init__.py @@ -4,7 +4,7 @@ ~~~~~~~~~~~~~ Events is a stream of messages sent for certain actions occurring - in the worker (and clients if :setting:`CELERY_SEND_TASK_SENT_EVENT` + in the worker (and clients if :setting:`task_send_sent_event` is enabled), used for monitoring purposes. """ @@ -13,9 +13,8 @@ import os import time import threading -import warnings -from collections import deque +from collections import defaultdict, deque from contextlib import contextmanager from copy import copy from operator import itemgetter @@ -26,6 +25,7 @@ from kombu.utils import cached_property from celery.app import app_or_default +from celery.five import items from celery.utils import anon_nodename, uuid from celery.utils.functional import dictfilter from celery.utils.timeutils import adjust_timestamp, utcoffset, maybe_s_to_ms @@ -36,14 +36,6 @@ _TZGETTER = itemgetter('utcoffset', 'timestamp') -W_YAJL = """ -anyjson is currently using the yajl library. -This json implementation is broken, it severely truncates floats -so timestamps will not work. - -Please uninstall yajl or force anyjson to use a different library. -""" - CLIENT_CLOCK_SKEW = -1 @@ -112,7 +104,7 @@ class EventDispatcher(object): You need to :meth:`close` this after use. """ - DISABLED_TRANSPORTS = set(['sql']) + DISABLED_TRANSPORTS = {'sql'} app = None @@ -124,25 +116,31 @@ class EventDispatcher(object): def __init__(self, connection=None, hostname=None, enabled=True, channel=None, buffer_while_offline=True, app=None, - serializer=None, groups=None): + serializer=None, groups=None, delivery_mode=1, + buffer_group=None, buffer_limit=24, on_send_buffered=None): self.app = app_or_default(app or self.app) self.connection = connection self.channel = channel self.hostname = hostname or anon_nodename() self.buffer_while_offline = buffer_while_offline + self.buffer_group = buffer_group or frozenset() + self.buffer_limit = buffer_limit + self.on_send_buffered = on_send_buffered + self._group_buffer = defaultdict(list) self.mutex = threading.Lock() self.producer = None self._outbound_buffer = deque() - self.serializer = serializer or self.app.conf.CELERY_EVENT_SERIALIZER + self.serializer = serializer or self.app.conf.event_serializer self.on_enabled = set() self.on_disabled = set() self.groups = set(groups or []) self.tzoffset = [-time.timezone, -time.altzone] self.clock = self.app.clock + self.delivery_mode = delivery_mode if not connection and channel: self.connection = channel.connection.client self.enabled = enabled - conninfo = self.connection or self.app.connection() + conninfo = self.connection or self.app.connection_for_write() self.exchange = get_exchange(conninfo) if conninfo.transport.driver_type in self.DISABLED_TRANSPORTS: self.enabled = False @@ -150,12 +148,6 @@ def __init__(self, connection=None, hostname=None, enabled=True, self.enable() self.headers = {'hostname': self.hostname} self.pid = os.getpid() - self.warn_if_yajl() - - def warn_if_yajl(self): - import anyjson - if anyjson.implementation.name == 'yajl': - warnings.warn(UserWarning(W_YAJL)) def __enter__(self): return self @@ -178,9 +170,8 @@ def disable(self): for callback in self.on_disabled: callback() - def publish(self, type, fields, producer, retry=False, - retry_policy=None, blind=False, utcoffset=utcoffset, - Event=Event): + def publish(self, type, fields, producer, + blind=False, Event=Event, **kwargs): """Publish event using a custom :class:`~kombu.Producer` instance. @@ -198,24 +189,35 @@ def publish(self, type, fields, producer, retry=False, :keyword utcoffset: Function returning the current utcoffset in hours. """ - + clock = None if blind else self.clock.forward() + event = Event(type, hostname=self.hostname, utcoffset=utcoffset(), + pid=self.pid, clock=clock, **fields) with self.mutex: - clock = None if blind else self.clock.forward() - event = Event(type, hostname=self.hostname, utcoffset=utcoffset(), - pid=self.pid, clock=clock, **fields) - exchange = self.exchange + return self._publish(event, producer, + routing_key=type.replace('-', '.'), **kwargs) + + def _publish(self, event, producer, routing_key, retry=False, + retry_policy=None, utcoffset=utcoffset): + exchange = self.exchange + try: producer.publish( event, - routing_key=type.replace('-', '.'), + routing_key=routing_key, exchange=exchange.name, retry=retry, retry_policy=retry_policy, declare=[exchange], serializer=self.serializer, headers=self.headers, + delivery_mode=self.delivery_mode, ) + except Exception as exc: + if not self.buffer_while_offline: + raise + self._outbound_buffer.append((event, routing_key, exc)) - def send(self, type, blind=False, **fields): + def send(self, type, blind=False, utcoffset=utcoffset, retry=False, + retry_policy=None, Event=Event, **fields): """Send event. :param type: Event type name, with group separated by dash (`-`). @@ -231,24 +233,40 @@ def send(self, type, blind=False, **fields): """ if self.enabled: - groups = self.groups - if groups and group_from(type) not in groups: + groups, group = self.groups, group_from(type) + if groups and group not in groups: return - try: - self.publish(type, fields, self.producer, blind) - except Exception as exc: - if not self.buffer_while_offline: - raise - self._outbound_buffer.append((type, fields, exc)) + if group in self.buffer_group: + clock = self.clock.forward() + event = Event(type, hostname=self.hostname, + utcoffset=utcoffset(), + pid=self.pid, clock=clock, **fields) + buf = self._group_buffer[group] + buf.append(event) + if len(buf) >= self.buffer_limit: + self.flush() + elif self.on_send_buffered: + self.on_send_buffered() + else: + return self.publish(type, fields, self.producer, blind=blind, + Event=Event, retry=retry, + retry_policy=retry_policy) - def flush(self): + def flush(self, errors=True, groups=True): """Flushes the outbound buffer.""" - while self._outbound_buffer: + if errors: + buf = list(self._outbound_buffer) try: - type, fields, _ = self._outbound_buffer.popleft() - except IndexError: - return - self.send(type, **fields) + with self.mutex: + for event, routing_key, _ in buf: + self._publish(event, self.producer, routing_key) + finally: + self._outbound_buffer.clear() + if groups: + with self.mutex: + for group, events in items(self._group_buffer): + self._publish(events, self.producer, '%s.multi' % group) + events[:] = [] # list.clear def extend_buffer(self, other): """Copies the outbound buffer of another instance.""" @@ -282,32 +300,40 @@ class EventReceiver(ConsumerMixin): def __init__(self, channel, handlers=None, routing_key='#', node_id=None, app=None, queue_prefix='celeryev', - accept=None): + accept=None, queue_ttl=None, queue_expires=None): self.app = app_or_default(app or self.app) self.channel = maybe_channel(channel) self.handlers = {} if handlers is None else handlers self.routing_key = routing_key self.node_id = node_id or uuid() self.queue_prefix = queue_prefix - self.exchange = get_exchange(self.connection or self.app.connection()) - self.queue = Queue('.'.join([self.queue_prefix, self.node_id]), - exchange=self.exchange, - routing_key=self.routing_key, - auto_delete=True, - durable=False, - queue_arguments=self._get_queue_arguments()) + self.exchange = get_exchange( + self.connection or self.app.connection_for_write()) + self.queue = Queue( + '.'.join([self.queue_prefix, self.node_id]), + exchange=self.exchange, + routing_key=self.routing_key, + auto_delete=True, durable=False, + queue_arguments=self._get_queue_arguments( + ttl=queue_ttl, expires=queue_expires, + ), + ) self.clock = self.app.clock self.adjust_clock = self.clock.adjust self.forward_clock = self.clock.forward if accept is None: - accept = set([self.app.conf.CELERY_EVENT_SERIALIZER, 'json']) + accept = {self.app.conf.event_serializer, 'json'} self.accept = accept - def _get_queue_arguments(self): + def _get_queue_arguments(self, ttl=None, expires=None): conf = self.app.conf return dictfilter({ - 'x-message-ttl': maybe_s_to_ms(conf.CELERY_EVENT_QUEUE_TTL), - 'x-expires': maybe_s_to_ms(conf.CELERY_EVENT_QUEUE_EXPIRES), + 'x-message-ttl': maybe_s_to_ms( + ttl if ttl is not None else conf.event_queue_ttl, + ), + 'x-expires': maybe_s_to_ms( + expires if expires is not None else conf.event_queue_expires, + ), }) def process(self, type, event): @@ -332,8 +358,9 @@ def itercapture(self, limit=None, timeout=None, wakeup=True): def capture(self, limit=None, timeout=None, wakeup=True): """Open up a consumer capturing events. - This has to run in the main process, and it will never - stop unless forced via :exc:`KeyboardInterrupt` or :exc:`SystemExit`. + This has to run in the main process, and it will never stop + unless :attr:`EventDispatcher.should_stop` is set to True, or + forced via :exc:`KeyboardInterrupt` or :exc:`SystemExit`. """ return list(self.consume(limit=limit, timeout=timeout, wakeup=wakeup)) @@ -370,8 +397,12 @@ def event_from_message(self, body, localize=True, body['local_received'] = now() return type, body - def _receive(self, body, message): - self.process(*self.event_from_message(body)) + def _receive(self, body, message, list=list, isinstance=isinstance): + if isinstance(body, list): # celery 4.0: List of events + process, from_message = self.process, self.event_from_message + [process(*from_message(event)) for event in body] + else: + self.process(*self.event_from_message(body)) @property def connection(self): diff --git a/celery/events/cursesmon.py b/celery/events/cursesmon.py index 796565fc4..8f49f466e 100644 --- a/celery/events/cursesmon.py +++ b/celery/events/cursesmon.py @@ -236,7 +236,7 @@ def readline(self, x, y): if ch != -1: if ch in (10, curses.KEY_ENTER): # enter break - if ch in (27, ): + if ch in (27,): buffer = str() break buffer += chr(ch) @@ -318,8 +318,8 @@ def selection_result(self): def alert_callback(my, mx, xs): y = count(xs) task = self.state.tasks[self.selected_task] - result = (getattr(task, 'result', None) - or getattr(task, 'exception', None)) + result = (getattr(task, 'result', None) or + getattr(task, 'exception', None)) for line in wrap(result, mx - 2): self.win.addstr(next(y), 3, line) @@ -508,10 +508,10 @@ def on_connection_error(exc, interval): while 1: print('-> evtop: starting capture...', file=sys.stderr) - with app.connection() as conn: + with app.connection_for_read() as conn: try: conn.ensure_connection(on_connection_error, - app.conf.BROKER_CONNECTION_MAX_RETRIES) + app.conf.broker_connection_max_retries) recv = app.events.Receiver(conn, handlers={'*': state.event}) display.resetscreen() display.init_screen() diff --git a/celery/events/dumper.py b/celery/events/dumper.py index 323afc4e1..c793b37e1 100644 --- a/celery/events/dumper.py +++ b/celery/events/dumper.py @@ -7,7 +7,7 @@ as they happen. Think of it like a `tcpdump` for Celery events. """ -from __future__ import absolute_import, print_function +from __future__ import absolute_import, print_function, unicode_literals import sys @@ -48,7 +48,7 @@ def say(self, msg): # need to flush so that output can be piped. try: self.out.flush() - except AttributeError: + except AttributeError: # pragma: no cover pass def on_event(self, ev): @@ -88,7 +88,7 @@ def evdump(app=None, out=sys.stdout): app = app_or_default(app) dumper = Dumper(out=out) dumper.say('-> evdump: starting capture...') - conn = app.connection().clone() + conn = app.connection_for_read().clone() def _error_handler(exc, interval): dumper.say(CONNECTION_ERROR % ( diff --git a/celery/events/snapshot.py b/celery/events/snapshot.py index 0dd41554c..6ca3a31ad 100644 --- a/celery/events/snapshot.py +++ b/celery/events/snapshot.py @@ -10,7 +10,7 @@ in :mod:`djcelery.snapshots` in the `django-celery` distribution. """ -from __future__ import absolute_import +from __future__ import absolute_import, print_function from kombu.utils.limits import TokenBucket @@ -29,7 +29,7 @@ class Polaroid(object): timer = None - shutter_signal = Signal(providing_args=('state', )) + shutter_signal = Signal(providing_args=('state',)) cleanup_signal = Signal() clear_after = False @@ -102,7 +102,7 @@ def evcam(camera, freq=1.0, maxrate=None, loglevel=0, cam = instantiate(camera, state, app=app, freq=freq, maxrate=maxrate, timer=timer) cam.install() - conn = app.connection() + conn = app.connection_for_read() recv = app.events.Receiver(conn, handlers={'*': state.event}) try: try: diff --git a/celery/events/state.py b/celery/events/state.py index c78f2d08a..19800f79a 100644 --- a/celery/events/state.py +++ b/celery/events/state.py @@ -30,11 +30,10 @@ from weakref import ref from kombu.clocks import timetuple -from kombu.utils import cached_property, kwdict +from kombu.utils import cached_property from celery import states -from celery.five import class_property, items, values -from celery.utils import deprecated +from celery.five import items, values from celery.utils.functional import LRUCache, memoize from celery.utils.log import get_logger @@ -54,8 +53,6 @@ %s seconds. [orig: %s recv: %s] """ -CAN_KWDICT = sys.version_info >= (2, 6, 5) - logger = get_logger(__name__) warn = logger.warning @@ -86,7 +83,7 @@ def heartbeat_expires(timestamp, freq=60, def _depickle_task(cls, fields): - return cls(**(fields if CAN_KWDICT else kwdict(fields))) + return cls(**fields) def with_unique_field(attr): @@ -100,7 +97,8 @@ def __eq__(this, other): cls.__eq__ = __eq__ def __ne__(this, other): - return not this.__eq__(other) + res = this.__eq__(other) + return True if res is NotImplemented else not res cls.__ne__ = __ne__ def __hash__(this): @@ -120,7 +118,7 @@ class Worker(object): _fields = ('hostname', 'pid', 'freq', 'heartbeats', 'clock', 'active', 'processed', 'loadavg', 'sw_ident', 'sw_ver', 'sw_sys') - if not PYPY: + if not PYPY: # pragma: no cover __slots__ = _fields + ('event', '__dict__', '__weakref__') def __init__(self, hostname=None, pid=None, freq=60, @@ -165,10 +163,10 @@ def event(type_, timestamp=None, if not local_received or not timestamp: return drift = abs(int(local_received) - int(timestamp)) - if drift > HEARTBEAT_DRIFT_MAX: + if drift > max_drift: _warn_drift(self.hostname, drift, local_received, timestamp) - if local_received: + if local_received: # pragma: no cover hearts = len(heartbeats) if hearts > hbmax - 1: hb_pop(0) @@ -202,45 +200,25 @@ def alive(self, nowfun=time): def id(self): return '{0.hostname}.{0.pid}'.format(self) - @deprecated(3.2, 3.3) - def update_heartbeat(self, received, timestamp): - self.event(None, timestamp, received) - - @deprecated(3.2, 3.3) - def on_online(self, timestamp=None, local_received=None, **fields): - self.event('online', timestamp, local_received, fields) - - @deprecated(3.2, 3.3) - def on_offline(self, timestamp=None, local_received=None, **fields): - self.event('offline', timestamp, local_received, fields) - - @deprecated(3.2, 3.3) - def on_heartbeat(self, timestamp=None, local_received=None, **fields): - self.event('heartbeat', timestamp, local_received, fields) - - @class_property - def _defaults(cls): - """Deprecated, to be removed in 3.3""" - source = cls() - return dict((k, getattr(source, k)) for k in cls._fields) - @with_unique_field('uuid') class Task(object): """Task State.""" name = received = sent = started = succeeded = failed = retried = \ - revoked = args = kwargs = eta = expires = retries = worker = result = \ - exception = timestamp = runtime = traceback = exchange = \ - routing_key = client = None + revoked = rejected = args = kwargs = eta = expires = retries = \ + worker = result = exception = timestamp = runtime = traceback = \ + exchange = routing_key = root_id = parent_id = client = None state = states.PENDING clock = 0 - _fields = ('uuid', 'name', 'state', 'received', 'sent', 'started', - 'succeeded', 'failed', 'retried', 'revoked', 'args', 'kwargs', - 'eta', 'expires', 'retries', 'worker', 'result', 'exception', - 'timestamp', 'runtime', 'traceback', 'exchange', 'routing_key', - 'clock', 'client') - if not PYPY: + _fields = ( + 'uuid', 'name', 'state', 'received', 'sent', 'started', 'rejected', + 'succeeded', 'failed', 'retried', 'revoked', 'args', 'kwargs', + 'eta', 'expires', 'retries', 'worker', 'result', 'exception', + 'timestamp', 'runtime', 'traceback', 'exchange', 'routing_key', + 'clock', 'client', 'root_id', 'parent_id', + ) + if not PYPY: # pragma: no cover __slots__ = ('__dict__', '__weakref__') #: How to merge out of order events. @@ -251,12 +229,19 @@ class Task(object): #: that state. ``(RECEIVED, ('name', 'args')``, means the name and args #: fields are always taken from the RECEIVED state, and any values for #: these fields received before or after is simply ignored. - merge_rules = {states.RECEIVED: ('name', 'args', 'kwargs', - 'retries', 'eta', 'expires')} + merge_rules = { + states.RECEIVED: ( + 'name', 'args', 'kwargs', 'parent_id', + 'root_id' 'retries', 'eta', 'expires', + ), + } #: meth:`info` displays these fields by default. - _info_fields = ('args', 'kwargs', 'retries', 'result', 'eta', 'runtime', - 'expires', 'exception', 'exchange', 'routing_key') + _info_fields = ( + 'args', 'kwargs', 'retries', 'result', 'eta', 'runtime', + 'expires', 'exception', 'exchange', 'routing_key', + 'root_id', 'parent_id', + ) def __init__(self, uuid=None, **kwargs): self.uuid = uuid @@ -269,7 +254,7 @@ def event(self, type_, timestamp=None, local_received=None, fields=None, PENDING=states.PENDING, RECEIVED=states.RECEIVED, STARTED=states.STARTED, FAILURE=states.FAILURE, RETRY=states.RETRY, SUCCESS=states.SUCCESS, - REVOKED=states.REVOKED): + REVOKED=states.REVOKED, REJECTED=states.REJECTED): fields = fields or {} if type_ == 'sent': state, self.sent = PENDING, timestamp @@ -285,6 +270,8 @@ def event(self, type_, timestamp=None, local_received=None, fields=None, state, self.succeeded = SUCCESS, timestamp elif type_ == 'revoked': state, self.revoked = REVOKED, timestamp + elif type_ == 'rejected': + state, self.rejected = REJECTED, timestamp else: state = type_.upper() @@ -295,9 +282,9 @@ def event(self, type_, timestamp=None, local_received=None, fields=None, # this state logically happens-before the current state, so merge. keep = self.merge_rules.get(state) if keep is not None: - fields = dict( - (k, v) for k, v in items(fields) if k in keep - ) + fields = { + k: v for k, v in items(fields) if k in keep + } for key, value in items(fields): setattr(self, key, value) else: @@ -323,9 +310,9 @@ def __repr__(self): def as_dict(self): get = object.__getattribute__ - return dict( - (k, get(self, k)) for k in self._fields - ) + return { + k: get(self, k) for k in self._fields + } def __reduce__(self): return _depickle_task, (self.__class__, self.as_dict()) @@ -338,57 +325,6 @@ def origin(self): def ready(self): return self.state in states.READY_STATES - @deprecated(3.2, 3.3) - def on_sent(self, timestamp=None, **fields): - self.event('sent', timestamp, fields) - - @deprecated(3.2, 3.3) - def on_received(self, timestamp=None, **fields): - self.event('received', timestamp, fields) - - @deprecated(3.2, 3.3) - def on_started(self, timestamp=None, **fields): - self.event('started', timestamp, fields) - - @deprecated(3.2, 3.3) - def on_failed(self, timestamp=None, **fields): - self.event('failed', timestamp, fields) - - @deprecated(3.2, 3.3) - def on_retried(self, timestamp=None, **fields): - self.event('retried', timestamp, fields) - - @deprecated(3.2, 3.3) - def on_succeeded(self, timestamp=None, **fields): - self.event('succeeded', timestamp, fields) - - @deprecated(3.2, 3.3) - def on_revoked(self, timestamp=None, **fields): - self.event('revoked', timestamp, fields) - - @deprecated(3.2, 3.3) - def on_unknown_event(self, shortype, timestamp=None, **fields): - self.event(shortype, timestamp, fields) - - @deprecated(3.2, 3.3) - def update(self, state, timestamp, fields, - _state=states.state, RETRY=states.RETRY): - return self.event(state, timestamp, None, fields) - - @deprecated(3.2, 3.3) - def merge(self, state, timestamp, fields): - keep = self.merge_rules.get(state) - if keep is not None: - fields = dict((k, v) for k, v in items(fields) if k in keep) - for key, value in items(fields): - setattr(self, key, value) - - @class_property - def _defaults(cls): - """Deprecated, to be removed in 3.3.""" - source = cls() - return dict((k, getattr(source, k)) for k in source._fields) - class State(object): """Records clusters state.""" @@ -436,9 +372,10 @@ def clear_tasks(self, ready=True): def _clear_tasks(self, ready=True): if ready: - in_progress = dict( - (uuid, task) for uuid, task in self.itertasks() - if task.state not in states.READY_STATES) + in_progress = { + uuid: task for uuid, task in self.itertasks() + if task.state not in states.READY_STATES + } self.tasks.clear() self.tasks.update(in_progress) else: @@ -602,11 +539,15 @@ def itertasks(self, limit=None): if limit and index + 1 >= limit: break - def tasks_by_time(self, limit=None): + def tasks_by_time(self, limit=None, reverse=True): """Generator giving tasks ordered by time, in ``(uuid, Task)`` tuples.""" + _heap = self._taskheap + if reverse: + _heap = reversed(_heap) + seen = set() - for evtup in islice(reversed(self._taskheap), 0, limit): + for evtup in islice(_heap, 0, limit): task = evtup[3]() if task is not None: uuid = task.uuid @@ -615,24 +556,24 @@ def tasks_by_time(self, limit=None): seen.add(uuid) tasks_by_timestamp = tasks_by_time - def tasks_by_type(self, name, limit=None): + def tasks_by_type(self, name, limit=None, reverse=True): """Get all tasks by type. Return a list of ``(uuid, Task)`` tuples. """ return islice( - ((uuid, task) for uuid, task in self.tasks_by_time() + ((uuid, task) for uuid, task in self.tasks_by_time(reverse=reverse) if task.name == name), 0, limit, ) - def tasks_by_worker(self, hostname, limit=None): + def tasks_by_worker(self, hostname, limit=None, reverse=True): """Get all tasks by worker. """ return islice( - ((uuid, task) for uuid, task in self.tasks_by_time() + ((uuid, task) for uuid, task in self.tasks_by_time(reverse=reverse) if task.worker.hostname == hostname), 0, limit, ) diff --git a/celery/exceptions.py b/celery/exceptions.py index ab6501941..fcd40d1be 100644 --- a/celery/exceptions.py +++ b/celery/exceptions.py @@ -16,22 +16,33 @@ SoftTimeLimitExceeded, TimeLimitExceeded, WorkerLostError, Terminated, ) -__all__ = ['SecurityError', 'Ignore', 'QueueNotFound', - 'WorkerShutdown', 'WorkerTerminate', - 'ImproperlyConfigured', 'NotRegistered', 'AlreadyRegistered', - 'TimeoutError', 'MaxRetriesExceededError', 'Retry', - 'TaskRevokedError', 'NotConfigured', 'AlwaysEagerIgnored', - 'InvalidTaskError', 'ChordError', 'CPendingDeprecationWarning', - 'CDeprecationWarning', 'FixupWarning', 'DuplicateNodenameWarning', - 'SoftTimeLimitExceeded', 'TimeLimitExceeded', 'WorkerLostError', - 'Terminated'] +__all__ = [ + 'CeleryError', 'CeleryWarning', 'TaskPredicate', + 'SecurityError', 'Ignore', 'QueueNotFound', + 'WorkerShutdown', 'WorkerTerminate', + 'ImproperlyConfigured', 'NotRegistered', 'AlreadyRegistered', + 'TimeoutError', 'MaxRetriesExceededError', 'Retry', 'Reject', + 'TaskRevokedError', 'NotConfigured', 'AlwaysEagerIgnored', + 'InvalidTaskError', 'ChordError', 'CPendingDeprecationWarning', + 'CDeprecationWarning', 'FixupWarning', 'DuplicateNodenameWarning', + 'SoftTimeLimitExceeded', 'TimeLimitExceeded', 'WorkerLostError', + 'Terminated', 'IncompleteStream' +] UNREGISTERED_FMT = """\ Task of kind {0} is not registered, please make sure it's imported.\ """ -class SecurityError(Exception): +class CeleryError(Exception): + pass + + +class CeleryWarning(UserWarning): + pass + + +class SecurityError(CeleryError): """Security related exceptions. Handle with care. @@ -39,11 +50,55 @@ class SecurityError(Exception): """ -class Ignore(Exception): +class TaskPredicate(CeleryError): + pass + + +class Retry(TaskPredicate): + """The task is to be retried later.""" + + #: Optional message describing context of retry. + message = None + + #: Exception (if any) that caused the retry to happen. + exc = None + + #: Time of retry (ETA), either :class:`numbers.Real` or + #: :class:`~datetime.datetime`. + when = None + + def __init__(self, message=None, exc=None, when=None, **kwargs): + from kombu.utils.encoding import safe_repr + self.message = message + if isinstance(exc, string_t): + self.exc, self.excs = None, exc + else: + self.exc, self.excs = exc, safe_repr(exc) if exc else None + self.when = when + Exception.__init__(self, exc, when, **kwargs) + + def humanize(self): + if isinstance(self.when, numbers.Real): + return 'in {0.when}s'.format(self) + return 'at {0.when}'.format(self) + + def __str__(self): + if self.message: + return self.message + if self.excs: + return 'Retry {0}: {1}'.format(self.humanize(), self.excs) + return 'Retry {0}'.format(self.humanize()) + + def __reduce__(self): + return self.__class__, (self.message, self.excs, self.when) +RetryTaskError = Retry # XXX compat + + +class Ignore(TaskPredicate): """A task can raise this to ignore doing state updates.""" -class Reject(Exception): +class Reject(TaskPredicate): """A task can raise this if it wants to reject/requeue the message.""" def __init__(self, reason=None, requeue=False): @@ -65,93 +120,53 @@ class WorkerShutdown(SystemExit): class QueueNotFound(KeyError): - """Task routed to a queue not in CELERY_QUEUES.""" + """Task routed to a queue not in ``conf.queues``.""" class ImproperlyConfigured(ImportError): """Celery is somehow improperly configured.""" -class NotRegistered(KeyError): +class NotRegistered(KeyError, CeleryError): """The task is not registered.""" def __repr__(self): return UNREGISTERED_FMT.format(self) -class AlreadyRegistered(Exception): +class AlreadyRegistered(CeleryError): """The task is already registered.""" -class TimeoutError(Exception): +class TimeoutError(CeleryError): """The operation timed out.""" -class MaxRetriesExceededError(Exception): +class MaxRetriesExceededError(CeleryError): """The tasks max restart limit has been exceeded.""" -class Retry(Exception): - """The task is to be retried later.""" - - #: Optional message describing context of retry. - message = None - - #: Exception (if any) that caused the retry to happen. - exc = None - - #: Time of retry (ETA), either :class:`numbers.Real` or - #: :class:`~datetime.datetime`. - when = None - - def __init__(self, message=None, exc=None, when=None, **kwargs): - from kombu.utils.encoding import safe_repr - self.message = message - if isinstance(exc, string_t): - self.exc, self.excs = None, exc - else: - self.exc, self.excs = exc, safe_repr(exc) if exc else None - self.when = when - Exception.__init__(self, exc, when, **kwargs) - - def humanize(self): - if isinstance(self.when, numbers.Real): - return 'in {0.when}s'.format(self) - return 'at {0.when}'.format(self) - - def __str__(self): - if self.message: - return self.message - if self.excs: - return 'Retry {0}: {1}'.format(self.humanize(), self.excs) - return 'Retry {0}'.format(self.humanize()) - - def __reduce__(self): - return self.__class__, (self.message, self.excs, self.when) -RetryTaskError = Retry # XXX compat - - -class TaskRevokedError(Exception): +class TaskRevokedError(CeleryError): """The task has been revoked, so no result available.""" -class NotConfigured(UserWarning): +class NotConfigured(CeleryWarning): """Celery has not been configured, as no config module has been found.""" -class AlwaysEagerIgnored(UserWarning): - """send_task ignores CELERY_ALWAYS_EAGER option""" +class AlwaysEagerIgnored(CeleryWarning): + """send_task ignores :setting:`task_always_eager` option""" -class InvalidTaskError(Exception): +class InvalidTaskError(CeleryError): """The task has invalid data or is not properly constructed.""" -class IncompleteStream(Exception): +class IncompleteStream(CeleryError): """Found the end of a stream of data, but the data is not yet complete.""" -class ChordError(Exception): +class ChordError(CeleryError): """A task part of the chord raised an exception.""" @@ -163,9 +178,9 @@ class CDeprecationWarning(DeprecationWarning): pass -class FixupWarning(UserWarning): +class FixupWarning(CeleryWarning): pass -class DuplicateNodenameWarning(UserWarning): +class DuplicateNodenameWarning(CeleryWarning): """Multiple workers are using the same nodename.""" diff --git a/celery/five.py b/celery/five.py index dfee614e4..1379f1dc3 100644 --- a/celery/five.py +++ b/celery/five.py @@ -10,188 +10,55 @@ """ from __future__ import absolute_import -__all__ = ['Counter', 'reload', 'UserList', 'UserDict', 'Queue', 'Empty', - 'zip_longest', 'map', 'string', 'string_t', - 'long_t', 'text_t', 'range', 'int_types', 'items', 'keys', 'values', - 'nextfun', 'reraise', 'WhateverIO', 'with_metaclass', - 'OrderedDict', 'THREAD_TIMEOUT_MAX', 'format_d', - 'class_property', 'reclassmethod', 'create_module', - 'recreate_module', 'monotonic'] - -import io - -try: - from collections import Counter -except ImportError: # pragma: no cover - from collections import defaultdict - - def Counter(): # noqa - return defaultdict(int) - -############## py3k ######################################################### +import operator import sys -PY3 = sys.version_info[0] == 3 - -try: - reload = reload # noqa -except NameError: # pragma: no cover - from imp import reload # noqa - -try: - from UserList import UserList # noqa -except ImportError: # pragma: no cover - from collections import UserList # noqa - -try: - from UserDict import UserDict # noqa -except ImportError: # pragma: no cover - from collections import UserDict # noqa - - -from kombu.five import monotonic - -if PY3: # pragma: no cover - import builtins - - from queue import Queue, Empty - from itertools import zip_longest - - map = map - string = str - string_t = str - long_t = int - text_t = str - range = range - int_types = (int, ) - _byte_t = bytes - - open_fqdn = 'builtins.open' - - def items(d): - return d.items() - - def keys(d): - return d.keys() - - def values(d): - return d.values() - - def nextfun(it): - return it.__next__ - - exec_ = getattr(builtins, 'exec') - - def reraise(tp, value, tb=None): - if value.__traceback__ is not tb: - raise value.with_traceback(tb) - raise value - -else: - import __builtin__ as builtins # noqa - from Queue import Queue, Empty # noqa - from itertools import imap as map, izip_longest as zip_longest # noqa - string = unicode # noqa - string_t = basestring # noqa - text_t = unicode # noqa - long_t = long # noqa - range = xrange # noqa - int_types = (int, long) # noqa - _byte_t = (str, bytes) # noqa - - open_fqdn = '__builtin__.open' - - def items(d): # noqa - return d.iteritems() - - def keys(d): # noqa - return d.iterkeys() - - def values(d): # noqa - return d.itervalues() - - def nextfun(it): # noqa - return it.next - - def exec_(code, globs=None, locs=None): # pragma: no cover - """Execute code in a namespace.""" - if globs is None: - frame = sys._getframe(1) - globs = frame.f_globals - if locs is None: - locs = frame.f_locals - del frame - elif locs is None: - locs = globs - exec("""exec code in globs, locs""") - - exec_("""def reraise(tp, value, tb=None): raise tp, value, tb""") - - -def with_metaclass(Type, skip_attrs=set(['__dict__', '__weakref__'])): - """Class decorator to set metaclass. - - Works with both Python 2 and Python 3 and it does not add - an extra class in the lookup order like ``six.with_metaclass`` does - (that is -- it copies the original class instead of using inheritance). - - """ - - def _clone_with_metaclass(Class): - attrs = dict((key, value) for key, value in items(vars(Class)) - if key not in skip_attrs) - return Type(Class.__name__, Class.__bases__, attrs) - - return _clone_with_metaclass +from importlib import import_module +from types import ModuleType -############## collections.OrderedDict ###################################### -# was moved to kombu -from kombu.utils.compat import OrderedDict # noqa +# extends amqp.five +from amqp.five import * # noqa +from amqp.five import __all__ as _all_five -############## threading.TIMEOUT_MAX ####################################### try: - from threading import TIMEOUT_MAX as THREAD_TIMEOUT_MAX + from functools import reduce except ImportError: - THREAD_TIMEOUT_MAX = 1e10 # noqa + pass + +try: # pragma: no cover + from inspect import formatargspec, getfullargspec +except ImportError: # Py2 + from collections import namedtuple + from inspect import formatargspec, getargspec as _getargspec # noqa -############## format(int, ',d') ########################## + FullArgSpec = namedtuple('FullArgSpec', ( + 'args', 'varargs', 'varkw', 'defaults', + 'kwonlyargs', 'kwonlydefaults', 'annotations', + )) -if sys.version_info >= (2, 7): # pragma: no cover - def format_d(i): - return format(i, ',d') -else: # pragma: no cover - def format_d(i): # noqa - s = '%d' % i - groups = [] - while s and s[-1].isdigit(): - groups.append(s[-3:]) - s = s[:-3] - return s + ','.join(reversed(groups)) + def getfullargspec(fun, _fill=(None, ) * 3): # noqa + s = _getargspec(fun) + return FullArgSpec(*s + _fill) +__all__ = [ + 'class_property', 'reclassmethod', 'create_module', 'recreate_module', +] +__all__ += _all_five -############## Module Generation ########################## +# ############# Module Generation ########################## # Utilities to dynamically # recreate modules, either for lazy loading or # to create old modules at runtime instead of # having them litter the source tree. -import operator -import sys # import fails in python 2.5. fallback to reduce in stdlib -try: - from functools import reduce -except ImportError: - pass - -from importlib import import_module -from types import ModuleType MODULE_DEPRECATED = """ The module %s is deprecated and will be removed in a future version. """ -DEFAULT_ATTRS = set(['__file__', '__path__', '__doc__', '__all__']) +DEFAULT_ATTRS = {'__file__', '__path__', '__doc__', '__all__'} # im_func is no longer available in Py3. # instead the unbound method itself can be used. @@ -210,35 +77,26 @@ def getappattr(path): return current_app._rgetattr(path) -def _compat_task_decorator(*args, **kwargs): - from celery import current_app - kwargs.setdefault('accept_magic_kwargs', True) - return current_app.task(*args, **kwargs) - - def _compat_periodic_task_decorator(*args, **kwargs): from celery.task import periodic_task - kwargs.setdefault('accept_magic_kwargs', True) return periodic_task(*args, **kwargs) - COMPAT_MODULES = { 'celery': { 'execute': { 'send_task': 'send_task', }, 'decorators': { - 'task': _compat_task_decorator, + 'task': 'task', 'periodic_task': _compat_periodic_task_decorator, }, 'log': { 'get_default_logger': 'log.get_default_logger', 'setup_logger': 'log.setup_logger', - 'setup_loggig_subsystem': 'log.setup_logging_subsystem', + 'setup_logging_subsystem': 'log.setup_logging_subsystem', 'redirect_stdouts_to_logger': 'log.redirect_stdouts_to_logger', }, 'messaging': { - 'TaskPublisher': 'amqp.TaskPublisher', 'TaskConsumer': 'amqp.TaskConsumer', 'establish_connection': 'connection', 'get_consumer_set': 'amqp.TaskConsumer', @@ -296,7 +154,7 @@ def reclassmethod(method): return classmethod(fun_of_method(method)) -class MagicModule(ModuleType): +class LazyModule(ModuleType): _compat_modules = () _all_by_module = {} _direct = {} @@ -318,25 +176,27 @@ def __dir__(self): return list(set(self.__all__) | DEFAULT_ATTRS) def __reduce__(self): - return import_module, (self.__name__, ) + return import_module, (self.__name__,) def create_module(name, attrs, cls_attrs=None, pkg=None, - base=MagicModule, prepare_attr=None): + base=LazyModule, prepare_attr=None): fqdn = '.'.join([pkg.__name__, name]) if pkg else name cls_attrs = {} if cls_attrs is None else cls_attrs pkg, _, modname = name.rpartition('.') cls_attrs['__module__'] = pkg - attrs = dict((attr_name, prepare_attr(attr) if prepare_attr else attr) - for attr_name, attr in items(attrs)) - module = sys.modules[fqdn] = type(modname, (base, ), cls_attrs)(fqdn) + attrs = { + attr_name: (prepare_attr(attr) if prepare_attr else attr) + for attr_name, attr in items(attrs) + } + module = sys.modules[fqdn] = type(modname, (base,), cls_attrs)(name) module.__dict__.update(attrs) return module def recreate_module(name, compat_modules=(), by_module={}, direct={}, - base=MagicModule, **attrs): + base=LazyModule, **attrs): old_module = sys.modules[name] origins = get_origins(by_module) compat_modules = COMPAT_MODULES.get(name, ()) @@ -351,8 +211,9 @@ def recreate_module(name, compat_modules=(), by_module={}, direct={}, ))), ) new_module = create_module(name, attrs, cls_attrs=cattrs, base=base) - new_module.__dict__.update(dict((mod, get_compat_module(new_module, mod)) - for mod in compat_modules)) + new_module.__dict__.update({ + mod: get_compat_module(new_module, mod) for mod in compat_modules + }) return old_module, new_module @@ -361,7 +222,7 @@ def get_compat_module(pkg, name): def prepare(attr): if isinstance(attr, string_t): - return Proxy(getappattr, (attr, )) + return Proxy(getappattr, (attr,)) return attr attrs = COMPAT_MODULES[pkg.__name__][name] @@ -376,18 +237,5 @@ def prepare(attr): def get_origins(defs): origins = {} for module, attrs in items(defs): - origins.update(dict((attr, module) for attr in attrs)) + origins.update({attr: module for attr in attrs}) return origins - - -_SIO_write = io.StringIO.write -_SIO_init = io.StringIO.__init__ - - -class WhateverIO(io.StringIO): - - def __init__(self, v=None, *a, **kw): - _SIO_init(self, v.decode() if isinstance(v, _byte_t) else v, *a, **kw) - - def write(self, data): - _SIO_write(self, data.decode() if isinstance(data, _byte_t) else data) diff --git a/celery/fixups/django.py b/celery/fixups/django.py index ab20325f5..5151ff082 100644 --- a/celery/fixups/django.py +++ b/celery/fixups/django.py @@ -1,6 +1,5 @@ from __future__ import absolute_import -import io import os import sys import warnings @@ -11,8 +10,15 @@ from importlib import import_module from celery import signals +from celery.app import default_app from celery.exceptions import FixupWarning +if sys.version_info[0] < 3 and not hasattr(sys, 'pypy_version_info'): + from StringIO import StringIO +else: # pragma: no cover + from io import StringIO + + __all__ = ['DjangoFixup', 'fixup'] ERR_NOT_INSTALLED = """\ @@ -44,21 +50,38 @@ class DjangoFixup(object): def __init__(self, app): self.app = app - self.app.set_default() + if default_app is None: + self.app.set_default() + self._worker_fixup = None def install(self): # Need to add project directory to path sys.path.append(os.getcwd()) + self._settings = symbol_by_name('django.conf:settings') self.app.loader.now = self.now self.app.loader.mail_admins = self.mail_admins + signals.import_modules.connect(self.on_import_modules) signals.worker_init.connect(self.on_worker_init) return self + @property + def worker_fixup(self): + if self._worker_fixup is None: + self._worker_fixup = DjangoWorkerFixup(self.app) + return self._worker_fixup + + @worker_fixup.setter + def worker_fixup(self, value): + self._worker_fixup = value + + def on_import_modules(self, **kwargs): + # call django.setup() before task modules are imported + self.worker_fixup.validate_models() + def on_worker_init(self, **kwargs): - # keep reference - self._worker_fixup = DjangoWorkerFixup(self.app).install() + self.worker_fixup.install() def now(self, utc=False): return datetime.utcnow() if utc else self._now() @@ -66,6 +89,14 @@ def now(self, utc=False): def mail_admins(self, subject, body, fail_silently=False, **kwargs): return self._mail_admins(subject, body, fail_silently=fail_silently) + def autodiscover_tasks(self): + try: + from django.apps import apps + except ImportError: + return self._settings.INSTALLED_APPS + else: + return [config.name for config in apps.get_app_configs()] + @cached_property def _mail_admins(self): return symbol_by_name('django.core.mail:mail_admins') @@ -88,6 +119,13 @@ def __init__(self, app): self._cache = import_module('django.core.cache') self._settings = symbol_by_name('django.conf:settings') + try: + self.interface_errors = ( + symbol_by_name('django.db.utils.InterfaceError'), + ) + except (ImportError, AttributeError): + self._interface_errors = () + # Database-related exceptions. DatabaseError = symbol_by_name('django.db:DatabaseError') try: @@ -126,43 +164,64 @@ def __init__(self, app): except (ImportError, AttributeError): self._close_old_connections = None self.database_errors = ( - (DatabaseError, ) + + (DatabaseError,) + _my_database_errors + _pg_database_errors + _lite_database_errors + _oracle_database_errors ) + def django_setup(self): + import django + try: + django_setup = django.setup + except AttributeError: # pragma: no cover + pass + else: + django_setup() + def validate_models(self): - s = io.StringIO() + self.django_setup() try: from django.core.management.validation import get_validation_errors except ImportError: - import django - from django.core.management.base import BaseCommand - django.setup() - cmd = BaseCommand() - cmd.stdout, cmd.stderr = sys.stdout, sys.stderr - cmd.check() + self._validate_models_django17() else: + s = StringIO() num_errors = get_validation_errors(s, None) if num_errors: raise RuntimeError( 'One or more Django models did not validate:\n{0}'.format( s.getvalue())) + def _validate_models_django17(self): + from django.core.management import base + print(base) + cmd = base.BaseCommand() + try: + cmd.stdout = base.OutputWrapper(sys.stdout) + cmd.stderr = base.OutputWrapper(sys.stderr) + except ImportError: # before django 1.5 + cmd.stdout, cmd.stderr = sys.stdout, sys.stderr + cmd.check() + def install(self): signals.beat_embedded_init.connect(self.close_database) signals.worker_ready.connect(self.on_worker_ready) signals.task_prerun.connect(self.on_task_prerun) signals.task_postrun.connect(self.on_task_postrun) signals.worker_process_init.connect(self.on_worker_process_init) - self.validate_models() self.close_database() self.close_cache() return self def on_worker_process_init(self, **kwargs): + # Child process must validate models again if on Windows, + # or if they were started using execv. + if os.environ.get('FORKED_BY_MULTIPROCESSING'): + self.validate_models() + + # close connections: # the parent process may have established these, # so need to close them. @@ -173,15 +232,21 @@ def on_worker_process_init(self, **kwargs): try: for c in self._db.connections.all(): if c and c.connection: - _maybe_close_fd(c.connection) + self._maybe_close_db_fd(c.connection) except AttributeError: if self._db.connection and self._db.connection.connection: - _maybe_close_fd(self._db.connection.connection) + self._maybe_close_db_fd(self._db.connection.connection) # use the _ version to avoid DB_REUSE preventing the conn.close() call self._close_database() self.close_cache() + def _maybe_close_db_fd(self, fd): + try: + _maybe_close_fd(fd) + except self.interface_errors: + pass + def on_task_prerun(self, sender, **kwargs): """Called before every task.""" if not getattr(sender.request, 'is_eager', False): @@ -206,7 +271,7 @@ def close_database(self, **kwargs): def _close_database(self): try: - funs = [conn.close for conn in self._db.connections] + funs = [conn.close for conn in self._db.connections.all()] except AttributeError: if hasattr(self._db, 'close_old_connections'): # django 1.6 funs = [self._db.close_old_connections] @@ -217,6 +282,8 @@ def _close_database(self): for close in funs: try: close() + except self.interface_errors: + pass except self.database_errors as exc: str_exc = str(exc) if 'closed' not in str_exc and 'not connected' not in str_exc: diff --git a/celery/loaders/__init__.py b/celery/loaders/__init__.py index 2a39ba2ab..ad6d766c9 100644 --- a/celery/loaders/__init__.py +++ b/celery/loaders/__init__.py @@ -9,8 +9,6 @@ """ from __future__ import absolute_import -from celery._state import current_app -from celery.utils import deprecated from celery.utils.imports import symbol_by_name, import_from_cwd __all__ = ['get_loader_cls'] @@ -23,15 +21,3 @@ def get_loader_cls(loader): """Get loader class by name/alias""" return symbol_by_name(loader, LOADER_ALIASES, imp=import_from_cwd) - - -@deprecated(deprecation=2.5, removal=4.0, - alternative='celery.current_app.loader') -def current_loader(): - return current_app.loader - - -@deprecated(deprecation=2.5, removal=4.0, - alternative='celery.current_app.conf') -def load_settings(): - return current_app.conf diff --git a/celery/loaders/base.py b/celery/loaders/base.py index d73547aad..0223297eb 100644 --- a/celery/loaders/base.py +++ b/celery/loaders/base.py @@ -8,7 +8,6 @@ """ from __future__ import absolute_import -import anyjson import imp as _imp import importlib import os @@ -17,6 +16,7 @@ from datetime import datetime +from kombu.utils import json from kombu.utils import cached_property from kombu.utils.encoding import safe_str @@ -40,6 +40,8 @@ Did you mean '{suggest}'? """ +unconfigured = object() + class BaseLoader(object): """The base class for loaders. @@ -65,7 +67,7 @@ class BaseLoader(object): override_backends = {} worker_initialized = False - _conf = None + _conf = unconfigured def __init__(self, app, **kwargs): self.app = app @@ -117,8 +119,8 @@ def import_default_modules(self): return [ self.import_task_module(m) for m in ( tuple(self.builtin_modules) + - tuple(maybe_list(self.app.conf.CELERY_IMPORTS)) + - tuple(maybe_list(self.app.conf.CELERY_INCLUDE)) + tuple(maybe_list(self.app.conf.imports)) + + tuple(maybe_list(self.app.conf.include)) ) ] @@ -153,7 +155,7 @@ def _smart_import(self, path, imp=None): return symbol_by_name(path, imp=imp) # Not sure if path is just a module name or if it includes an - # attribute name (e.g. ``os.path``, vs, ``os.path.abspath`` + # attribute name (e.g. ``os.path``, vs, ``os.path.abspath``). try: return imp(path) except ImportError: @@ -178,24 +180,24 @@ def find_module(self, module): def cmdline_config_parser( self, args, namespace='celery', re_type=re.compile(r'\((\w+)\)'), - extra_types={'json': anyjson.loads}, + extra_types={'json': json.loads}, override_types={'tuple': 'json', 'list': 'json', 'dict': 'json'}): from celery.app.defaults import Option, NAMESPACES - namespace = namespace.upper() + namespace = namespace and namespace.lower() typemap = dict(Option.typemap, **extra_types) def getarg(arg): """Parse a single configuration definition from the command-line.""" - ## find key/value + # ## find key/value # ns.key=value|ns_key=value (case insensitive) key, value = arg.split('=', 1) - key = key.upper().replace('.', '_') + key = key.lower().replace('.', '_') - ## find namespace. + # ## find namespace. # .key=value|_key=value expands to default namespace. if key[0] == '_': ns, key = namespace, key[1:] @@ -214,7 +216,7 @@ def getarg(arg): value = typemap[type_](value) else: try: - value = NAMESPACES[ns][key].to_python(value) + value = NAMESPACES[ns.lower()][key].to_python(value) except ValueError as exc: # display key name in error message. raise ValueError('{0!r}: {1}'.format(ns_key, exc)) @@ -224,10 +226,11 @@ def getarg(arg): def mail_admins(self, subject, body, fail_silently=False, sender=None, to=None, host=None, port=None, user=None, password=None, timeout=None, - use_ssl=False, use_tls=False): + use_ssl=False, use_tls=False, charset='us-ascii'): message = self.mail.Message(sender=sender, to=to, subject=safe_str(subject), - body=safe_str(body)) + body=safe_str(body), + charset=charset) mailer = self.mail.Mailer(host=host, port=port, user=user, password=password, timeout=timeout, use_ssl=use_ssl, @@ -243,7 +246,6 @@ def read_configuration(self, env='CELERY_CONFIG_MODULE'): if custom_config: usercfg = self._import_config_module(custom_config) return DictAttribute(usercfg) - return {} def autodiscover_tasks(self, packages, related_name='tasks'): self.task_modules.update( @@ -253,7 +255,7 @@ def autodiscover_tasks(self, packages, related_name='tasks'): @property def conf(self): """Loader configuration.""" - if self._conf is None: + if self._conf is unconfigured: self._conf = self.read_configuration() return self._conf @@ -278,6 +280,15 @@ def find_related_module(package, related_name): """Given a package name and a module name, tries to find that module.""" + # Django 1.7 allows for speciying a class name in INSTALLED_APPS. + # (Issue #2248). + try: + importlib.import_module(package) + except ImportError: + package, _, _ = package.rpartition('.') + if not package: + raise + try: pkg_path = importlib.import_module(package).__path__ except AttributeError: diff --git a/celery/local.py b/celery/local.py index ada6e9381..032e81b30 100644 --- a/celery/local.py +++ b/celery/local.py @@ -39,7 +39,7 @@ def __new__(cls, getter): def __get__(self, obj, cls=None): return self.__getter(obj) if obj is not None else self - return type(name, (type_, ), { + return type(name, (type_,), { '__new__': __new__, '__get__': __get__, }) @@ -99,9 +99,10 @@ def _get_current_object(self): loc = object.__getattribute__(self, '_Proxy__local') if not hasattr(loc, '__release_local__'): return loc(*self.__args, **self.__kwargs) - try: + try: # pragma: no cover + # not sure what this is about return getattr(loc, self.__name__) - except AttributeError: + except AttributeError: # pragma: no cover raise RuntimeError('no object bound to {0.__name__}'.format(self)) @property @@ -154,54 +155,144 @@ def __setslice__(self, i, j, seq): def __delslice__(self, i, j): del self._get_current_object()[i:j] - __setattr__ = lambda x, n, v: setattr(x._get_current_object(), n, v) - __delattr__ = lambda x, n: delattr(x._get_current_object(), n) - __str__ = lambda x: str(x._get_current_object()) - __lt__ = lambda x, o: x._get_current_object() < o - __le__ = lambda x, o: x._get_current_object() <= o - __eq__ = lambda x, o: x._get_current_object() == o - __ne__ = lambda x, o: x._get_current_object() != o - __gt__ = lambda x, o: x._get_current_object() > o - __ge__ = lambda x, o: x._get_current_object() >= o - __hash__ = lambda x: hash(x._get_current_object()) - __call__ = lambda x, *a, **kw: x._get_current_object()(*a, **kw) - __len__ = lambda x: len(x._get_current_object()) - __getitem__ = lambda x, i: x._get_current_object()[i] - __iter__ = lambda x: iter(x._get_current_object()) - __contains__ = lambda x, i: i in x._get_current_object() - __getslice__ = lambda x, i, j: x._get_current_object()[i:j] - __add__ = lambda x, o: x._get_current_object() + o - __sub__ = lambda x, o: x._get_current_object() - o - __mul__ = lambda x, o: x._get_current_object() * o - __floordiv__ = lambda x, o: x._get_current_object() // o - __mod__ = lambda x, o: x._get_current_object() % o - __divmod__ = lambda x, o: x._get_current_object().__divmod__(o) - __pow__ = lambda x, o: x._get_current_object() ** o - __lshift__ = lambda x, o: x._get_current_object() << o - __rshift__ = lambda x, o: x._get_current_object() >> o - __and__ = lambda x, o: x._get_current_object() & o - __xor__ = lambda x, o: x._get_current_object() ^ o - __or__ = lambda x, o: x._get_current_object() | o - __div__ = lambda x, o: x._get_current_object().__div__(o) - __truediv__ = lambda x, o: x._get_current_object().__truediv__(o) - __neg__ = lambda x: -(x._get_current_object()) - __pos__ = lambda x: +(x._get_current_object()) - __abs__ = lambda x: abs(x._get_current_object()) - __invert__ = lambda x: ~(x._get_current_object()) - __complex__ = lambda x: complex(x._get_current_object()) - __int__ = lambda x: int(x._get_current_object()) - __float__ = lambda x: float(x._get_current_object()) - __oct__ = lambda x: oct(x._get_current_object()) - __hex__ = lambda x: hex(x._get_current_object()) - __index__ = lambda x: x._get_current_object().__index__() - __coerce__ = lambda x, o: x._get_current_object().__coerce__(o) - __enter__ = lambda x: x._get_current_object().__enter__() - __exit__ = lambda x, *a, **kw: x._get_current_object().__exit__(*a, **kw) - __reduce__ = lambda x: x._get_current_object().__reduce__() - - if not PY3: - __cmp__ = lambda x, o: cmp(x._get_current_object(), o) # noqa - __long__ = lambda x: long(x._get_current_object()) # noqa + def __setattr__(self, name, value): + setattr(self._get_current_object(), name, value) + + def __delattr__(self, name): + delattr(self._get_current_object(), name) + + def __str__(self): + return str(self._get_current_object()) + + def __lt__(self, other): + return self._get_current_object() < other + + def __le__(self, other): + return self._get_current_object() <= other + + def __eq__(self, other): + return self._get_current_object() == other + + def __ne__(self, other): + return self._get_current_object() != other + + def __gt__(self, other): + return self._get_current_object() > other + + def __ge__(self, other): + return self._get_current_object() >= other + + def __hash__(self): + return hash(self._get_current_object()) + + def __call__(self, *a, **kw): + return self._get_current_object()(*a, **kw) + + def __len__(self): + return len(self._get_current_object()) + + def __getitem__(self, i): + return self._get_current_object()[i] + + def __iter__(self): + return iter(self._get_current_object()) + + def __contains__(self, i): + return i in self._get_current_object() + + def __getslice__(self, i, j): + return self._get_current_object()[i:j] + + def __add__(self, other): + return self._get_current_object() + other + + def __sub__(self, other): + return self._get_current_object() - other + + def __mul__(self, other): + return self._get_current_object() * other + + def __floordiv__(self, other): + return self._get_current_object() // other + + def __mod__(self, other): + return self._get_current_object() % other + + def __divmod__(self, other): + return self._get_current_object().__divmod__(other) + + def __pow__(self, other): + return self._get_current_object() ** other + + def __lshift__(self, other): + return self._get_current_object() << other + + def __rshift__(self, other): + return self._get_current_object() >> other + + def __and__(self, other): + return self._get_current_object() & other + + def __xor__(self, other): + return self._get_current_object() ^ other + + def __or__(self, other): + return self._get_current_object() | other + + def __div__(self, other): + return self._get_current_object().__div__(other) + + def __truediv__(self, other): + return self._get_current_object().__truediv__(other) + + def __neg__(self): + return -(self._get_current_object()) + + def __pos__(self): + return +(self._get_current_object()) + + def __abs__(self): + return abs(self._get_current_object()) + + def __invert__(self): + return ~(self._get_current_object()) + + def __complex__(self): + return complex(self._get_current_object()) + + def __int__(self): + return int(self._get_current_object()) + + def __float__(self): + return float(self._get_current_object()) + + def __oct__(self): + return oct(self._get_current_object()) + + def __hex__(self): + return hex(self._get_current_object()) + + def __index__(self): + return self._get_current_object().__index__() + + def __coerce__(self, other): + return self._get_current_object().__coerce__(other) + + def __enter__(self): + return self._get_current_object().__enter__() + + def __exit__(self, *a, **kw): + return self._get_current_object().__exit__(*a, **kw) + + def __reduce__(self): + return self._get_current_object().__reduce__() + + if not PY3: # pragma: no cover + def __cmp__(self, other): + return cmp(self._get_current_object(), other) # noqa + + def __long__(self): + return long(self._get_current_object()) # noqa class PromiseProxy(Proxy): @@ -212,7 +303,7 @@ class PromiseProxy(Proxy): """ - __slots__ = ('__pending__', ) + __slots__ = ('__pending__',) def _get_current_object(self): try: @@ -249,9 +340,10 @@ def __evaluate__(self, '_Proxy__kwargs')): try: thing = Proxy._get_current_object(self) + except: + raise + else: object.__setattr__(self, '__thing', thing) - return thing - finally: for attr in _clean: try: object.__delattr__(self, attr) @@ -270,8 +362,9 @@ def __evaluate__(self, finally: try: object.__delattr__(self, '__pending__') - except AttributeError: + except AttributeError: # pragma: no cover pass + return thing def maybe_evaluate(obj): diff --git a/celery/platforms.py b/celery/platforms.py index c4013b578..b86173554 100644 --- a/celery/platforms.py +++ b/celery/platforms.py @@ -7,7 +7,7 @@ users, groups, and so on. """ -from __future__ import absolute_import, print_function +from __future__ import absolute_import, print_function, unicode_literals import atexit import errno @@ -21,21 +21,25 @@ from collections import namedtuple -from billiard import current_process +from billiard.compat import get_fdmax, close_open_fds # fileno used to be in this module from kombu.utils import maybe_fileno -from kombu.utils.compat import get_errno from kombu.utils.encoding import safe_str from contextlib import contextmanager from .local import try_import -from .five import items, range, reraise, string_t, zip_longest -from .utils.functional import uniq +from .five import items, reraise, string_t + +try: + from billiard.process import current_process +except ImportError: # pragma: no cover + current_process = None _setproctitle = try_import('setproctitle') resource = try_import('resource') pwd = try_import('pwd') grp = try_import('grp') +mputil = try_import('multiprocessing.util') __all__ = ['EX_OK', 'EX_FAILURE', 'EX_UNAVAILABLE', 'EX_USAGE', 'SYSTEM', 'IS_OSX', 'IS_WINDOWS', 'pyimplementation', 'LockFailed', @@ -43,7 +47,8 @@ 'close_open_fds', 'DaemonContext', 'detached', 'parse_uid', 'parse_gid', 'setgroups', 'initgroups', 'setgid', 'setuid', 'maybe_drop_privileges', 'signals', 'set_process_title', - 'set_mp_process_title', 'get_errno_name', 'ignore_errno'] + 'set_mp_process_title', 'get_errno_name', 'ignore_errno', + 'fd_by_path'] # exitcodes EX_OK = getattr(os, 'EX_OK', 0) @@ -56,7 +61,6 @@ IS_OSX = SYSTEM == 'Darwin' IS_WINDOWS = SYSTEM == 'Windows' -DAEMON_UMASK = 0 DAEMON_WORKDIR = '/' PIDFILE_FLAGS = os.O_CREAT | os.O_EXCL | os.O_WRONLY @@ -108,26 +112,6 @@ class LockFailed(Exception): """Raised if a pidlock can't be acquired.""" -def get_fdmax(default=None): - """Return the maximum number of open file descriptors - on this system. - - :keyword default: Value returned if there's no file - descriptor limit. - - """ - try: - return os.sysconf('SC_OPEN_MAX') - except: - pass - if resource is None: # Windows - return default - fdmax = resource.getrlimit(resource.RLIMIT_NOFILE)[1] - if fdmax == resource.RLIM_INFINITY: - return default - return fdmax - - class Pidfile(object): """Pidfile @@ -266,39 +250,57 @@ def _create_pidlock(pidfile): return pidlock -if hasattr(os, 'closerange'): +def fd_by_path(paths): + """Return a list of fds. - def close_open_fds(keep=None): - # must make sure this is 0-inclusive (Issue #1882) - keep = list(uniq(sorted( - f for f in map(maybe_fileno, keep or []) if f is not None - ))) - maxfd = get_fdmax(default=2048) - kL, kH = iter([-1] + keep), iter(keep + [maxfd]) - for low, high in zip_longest(kL, kH): - if low + 1 != high: - os.closerange(low + 1, high) + This method returns list of fds corresponding to + file paths passed in paths variable. -else: + :keyword paths: List of file paths go get fd for. + + :returns: :list:. + + **Example**: + + .. code-block:: python + + keep = fd_by_path(['/dev/urandom', + '/my/precious/']) + """ + stats = set() + for path in paths: + try: + fd = os.open(path, os.O_RDONLY) + except OSError: + continue + try: + stats.add(os.fstat(fd)[1:3]) + finally: + os.close(fd) + + def fd_in_stats(fd): + try: + return os.fstat(fd)[1:3] in stats + except OSError: + return False - def close_open_fds(keep=None): # noqa - keep = [maybe_fileno(f) - for f in (keep or []) if maybe_fileno(f) is not None] - for fd in reversed(range(get_fdmax(default=2048))): - if fd not in keep: - with ignore_errno(errno.EBADF): - os.close(fd) + return [_fd for _fd in range(get_fdmax(2048)) if fd_in_stats(_fd)] class DaemonContext(object): _is_open = False def __init__(self, pidfile=None, workdir=None, umask=None, - fake=False, after_chdir=None, **kwargs): + fake=False, after_chdir=None, after_forkers=True, + **kwargs): + if isinstance(umask, string_t): + # octal or decimal, depending on initial zero. + umask = int(umask, 8 if umask.startswith('0') else 10) self.workdir = workdir or DAEMON_WORKDIR - self.umask = DAEMON_UMASK if umask is None else umask + self.umask = umask self.fake = fake self.after_chdir = after_chdir + self.after_forkers = after_forkers self.stdfds = (sys.stdin, sys.stdout, sys.stderr) def redirect_to_null(self, fd): @@ -312,14 +314,21 @@ def open(self): self._detach() os.chdir(self.workdir) - os.umask(self.umask) + if self.umask is not None: + os.umask(self.umask) if self.after_chdir: self.after_chdir() - close_open_fds(self.stdfds) - for fd in self.stdfds: - self.redirect_to_null(maybe_fileno(fd)) + if not self.fake: + # We need to keep /dev/urandom from closing because + # shelve needs it, and Beat needs shelve to start. + keep = list(self.stdfds) + fd_by_path(['/dev/urandom']) + close_open_fds(keep) + for fd in self.stdfds: + self.redirect_to_null(maybe_fileno(fd)) + if self.after_forkers and mputil is not None: + mputil._run_after_forkers() self._is_open = True __enter__ = open @@ -332,7 +341,8 @@ def close(self, *args): def _detach(self): if os.fork() == 0: # first child os.setsid() # create new session - if os.fork() > 0: # second child + if os.fork() > 0: # pragma: no cover + # second child os._exit(0) else: os._exit(0) @@ -525,7 +535,7 @@ def maybe_drop_privileges(uid=None, gid=None): try: setuid(0) except OSError as exc: - if get_errno(exc) != errno.EPERM: + if exc.errno != errno.EPERM: raise pass # Good: cannot restore privileges. else: @@ -548,7 +558,7 @@ class Signals(object): **Examples**: - .. code-block:: python + .. code-block:: pycon >>> from celery.platforms import signals @@ -698,8 +708,8 @@ def set_mp_process_title(progname, info=None, hostname=None): # noqa """ if hostname: progname = '{0}: {1}'.format(progname, hostname) - return set_process_title( - '{0}:{1}'.format(progname, current_process().name), info=info) + name = current_process().name if current_process else 'MainProcess' + return set_process_title('{0}:{1}'.format(progname, name), info=info) def get_errno_name(n): @@ -726,7 +736,7 @@ def ignore_errno(*errnos, **kwargs): :keyword types: A tuple of exceptions to ignore (when the errno matches), defaults to :exc:`Exception`. """ - types = kwargs.get('types') or (Exception, ) + types = kwargs.get('types') or (Exception,) errnos = [get_errno_name(errno) for errno in errnos] try: yield diff --git a/celery/result.py b/celery/result.py index 069d8fde8..ff5f89ce0 100644 --- a/celery/result.py +++ b/celery/result.py @@ -6,24 +6,23 @@ Task results/state and groups of results. """ -from __future__ import absolute_import +from __future__ import absolute_import, unicode_literals import time -import warnings -from collections import deque +from collections import OrderedDict, deque from contextlib import contextmanager from copy import copy from kombu.utils import cached_property -from kombu.utils.compat import OrderedDict +from vine import Thenable, barrier, promise from . import current_app from . import states from ._state import _set_task_join_will_block, task_join_will_block from .app import app_or_default from .datastructures import DependencyGraph, GraphFormatter -from .exceptions import IncompleteStream, TimeoutError +from .exceptions import ImproperlyConfigured, IncompleteStream, TimeoutError from .five import items, range, string_t, monotonic from .utils import deprecated @@ -34,15 +33,12 @@ Never call result.get() within a task! See http://docs.celeryq.org/en/latest/userguide/tasks.html\ #task-synchronous-subtasks - -In Celery 3.2 this will result in an exception being -raised instead of just being a warning. """ def assert_will_not_block(): if task_join_will_block(): - warnings.warn(RuntimeWarning(E_WOULDBLOCK)) + raise RuntimeError(E_WOULDBLOCK) @contextmanager @@ -80,22 +76,34 @@ class AsyncResult(ResultBase): #: The task result backend to use. backend = None - def __init__(self, id, backend=None, task_name=None, + def __init__(self, id, backend=None, + task_name=None, # deprecated app=None, parent=None): + if id is None: + raise ValueError( + 'AsyncResult requires valid id, not {0}'.format(type(id))) self.app = app_or_default(app or self.app) self.id = id self.backend = backend or self.app.backend - self.task_name = task_name self.parent = parent + self.on_ready = promise(self._on_fulfilled) self._cache = None + def then(self, callback, on_error=None): + self.backend.add_pending_result(self) + return self.on_ready.then(callback, on_error) + + def _on_fulfilled(self, result): + self.backend.remove_pending_result(self) + return result + def as_tuple(self): parent = self.parent return (self.id, parent and parent.as_tuple()), None - serializable = as_tuple # XXX compat def forget(self): """Forget about (and possibly remove the result of) this task.""" + self._cache = None self.backend.forget(self.id) def revoke(self, connection=None, terminate=False, signal=None, @@ -119,8 +127,10 @@ def revoke(self, connection=None, terminate=False, signal=None, terminate=terminate, signal=signal, reply=wait, timeout=timeout) - def get(self, timeout=None, propagate=True, interval=0.5, no_ack=True, - follow_parents=True): + def get(self, timeout=None, propagate=True, interval=0.5, + no_ack=True, follow_parents=True, callback=None, on_interval=None, + EXCEPTION_STATES=states.EXCEPTION_STATES, + PROPAGATE_STATES=states.PROPAGATE_STATES): """Wait until task is ready, and return its result. .. warning:: @@ -149,31 +159,32 @@ def get(self, timeout=None, propagate=True, interval=0.5, no_ack=True, """ assert_will_not_block() - on_interval = None + _on_interval = promise() if follow_parents and propagate and self.parent: - on_interval = self._maybe_reraise_parent_error - on_interval() + on_interval = promise(self._maybe_reraise_parent_error) + self._maybe_reraise_parent_error() + if on_interval: + _on_interval.then(on_interval) if self._cache: if propagate: - self.maybe_reraise() + self.maybe_throw(callback=callback) return self.result - try: - return self.backend.wait_for( - self.id, timeout=timeout, - propagate=propagate, - interval=interval, - on_interval=on_interval, - no_ack=no_ack, - ) - finally: - self._get_task_meta() # update self._cache + self.backend.add_pending_result(self) + return self.backend.wait_for_pending( + self, timeout=timeout, + interval=interval, + on_interval=_on_interval, + no_ack=no_ack, + propagate=propagate, + callback=callback, + ) wait = get # deprecated alias to :meth:`get`. def _maybe_reraise_parent_error(self): for node in reversed(list(self._parents())): - node.maybe_reraise() + node.maybe_throw() def _parents(self): node = self.parent @@ -212,7 +223,7 @@ def pow2(i): Calling :meth:`collect` would return: - .. code-block:: python + .. code-block:: pycon >>> from celery.result import ResultBase >>> from proj.tasks import A @@ -261,9 +272,17 @@ def failed(self): """Returns :const:`True` if the task failed.""" return self.state == states.FAILURE - def maybe_reraise(self): - if self.state in states.PROPAGATE_STATES: - raise self.result + def throw(self, *args, **kwargs): + self.on_ready.throw(*args, **kwargs) + + def maybe_throw(self, propagate=True, callback=None): + cache = self._get_task_meta() if self._cache is None else self._cache + state, value = cache['status'], cache['result'] + if state in states.PROPAGATE_STATES and propagate: + self.throw(value) + if callback is not None: + callback(self.id, value) + return value def build_graph(self, intermediate=False, formatter=None): graph = DependencyGraph( @@ -294,18 +313,19 @@ def __eq__(self, other): return NotImplemented def __ne__(self, other): - return not self.__eq__(other) + res = self.__eq__(other) + return True if res is NotImplemented else not res def __copy__(self): return self.__class__( - self.id, self.backend, self.task_name, self.app, self.parent, + self.id, self.backend, None, self.app, self.parent, ) def __reduce__(self): return self.__class__, self.__reduce_args__() def __reduce_args__(self): - return self.id, self.backend, self.task_name, None, self.parent + return self.id, self.backend, None, None, self.parent def __del__(self): self._cache = None @@ -322,21 +342,25 @@ def supports_native_join(self): def children(self): return self._get_task_meta().get('children') + def _maybe_set_cache(self, meta): + if meta: + state = meta['status'] + if state in states.READY_STATES: + d = self._set_cache(self.backend.meta_from_decoded(meta)) + self.on_ready(self) + return d + return meta + def _get_task_meta(self): if self._cache is None: - meta = self.backend.get_task_meta(self.id) - if meta: - state = meta['status'] - if state == states.SUCCESS or state in states.PROPAGATE_STATES: - self._set_cache(meta) - return self._set_cache(meta) - return meta + return self._maybe_set_cache(self.backend.get_task_meta(self.id)) return self._cache + def _iter_meta(self): + return iter([self._get_task_meta()]) + def _set_cache(self, d): - state, children = d['status'], d.get('children') - if state in states.EXCEPTION_STATES: - d['result'] = self.backend.exception_to_python(d['result']) + children = d.get('children') if children: d['children'] = [ result_from_tuple(child, self.app) for child in children @@ -388,7 +412,7 @@ def state(self): """ return self._get_task_meta()['status'] - status = state + status = state # XXX compat @property def task_id(self): @@ -398,7 +422,7 @@ def task_id(self): @task_id.setter # noqa def task_id(self, id): self.id = id -BaseAsyncResult = AsyncResult # for backwards compatibility. +Thenable.register(AsyncResult) class ResultSet(ResultBase): @@ -407,14 +431,19 @@ class ResultSet(ResultBase): :param results: List of result instances. """ - app = None + _app = None #: List of results in in the set. results = None - def __init__(self, results, app=None, **kwargs): - self.app = app_or_default(app or self.app) + def __init__(self, results, app=None, ready_barrier=None, **kwargs): + self._app = app + self._cache = None self.results = results + self.on_ready = promise(args=(self,)) + self._on_full = ready_barrier or barrier(results) + if self._on_full: + self._on_full.then(promise(self.on_ready)) def add(self, result): """Add :class:`AsyncResult` as a new member of the set. @@ -424,6 +453,14 @@ def add(self, result): """ if result not in self.results: self.results.append(result) + if self._on_full: + self._on_full.add(result) + + def _on_ready(self): + self.backend.remove_pending_result(self) + if self.backend.is_async: + self._cache = [r.get() for r in self.results] + self.on_ready() def remove(self, result): """Remove result from the set; it must be a member. @@ -476,9 +513,9 @@ def failed(self): """ return any(result.failed() for result in self.results) - def maybe_reraise(self): + def maybe_throw(self, callback=None, propagate=True): for result in self.results: - result.maybe_reraise() + result.maybe_throw(callback=callback, propagate=propagate) def waiting(self): """Are any of the tasks incomplete? @@ -536,7 +573,7 @@ def __getitem__(self, index): """`res[i] -> res.results[i]`""" return self.results[index] - @deprecated('3.2', '3.3') + @deprecated('4.0', '5.0') def iterate(self, timeout=None, propagate=True, interval=0.5): """Deprecated method, use :meth:`get` with a callback argument.""" elapsed = 0.0 @@ -561,7 +598,7 @@ def iterate(self, timeout=None, propagate=True, interval=0.5): raise TimeoutError('The operation timed out') def get(self, timeout=None, propagate=True, interval=0.5, - callback=None, no_ack=True): + callback=None, no_ack=True, on_message=None): """See :meth:`join` This is here for API compatibility with :class:`AsyncResult`, @@ -569,12 +606,16 @@ def get(self, timeout=None, propagate=True, interval=0.5, current result backend. """ + if self._cache is not None: + return self._cache return (self.join_native if self.supports_native_join else self.join)( timeout=timeout, propagate=propagate, - interval=interval, callback=callback, no_ack=no_ack) + interval=interval, callback=callback, no_ack=no_ack, + on_message=on_message, + ) def join(self, timeout=None, propagate=True, interval=0.5, - callback=None, no_ack=True): + callback=None, no_ack=True, on_message=None, on_interval=None): """Gathers the results of all tasks as a list in order. .. note:: @@ -626,6 +667,10 @@ def join(self, timeout=None, propagate=True, interval=0.5, time_start = monotonic() remaining = None + if on_message is not None: + raise ImproperlyConfigured( + 'Backend does not support on_message callback') + results = [] for result in self.results: remaining = None @@ -635,7 +680,7 @@ def join(self, timeout=None, propagate=True, interval=0.5, raise TimeoutError('join operation timed out') value = result.get( timeout=remaining, propagate=propagate, - interval=interval, no_ack=no_ack, + interval=interval, no_ack=no_ack, on_interval=on_interval, ) if callback: callback(result.id, value) @@ -643,7 +688,11 @@ def join(self, timeout=None, propagate=True, interval=0.5, results.append(value) return results - def iter_native(self, timeout=None, interval=0.5, no_ack=True): + def then(self, callback, on_error=None): + return self.on_ready.then(callback, on_error) + + def iter_native(self, timeout=None, interval=0.5, no_ack=True, + on_message=None, on_interval=None): """Backend optimized version of :meth:`iterate`. .. versionadded:: 2.2 @@ -655,16 +704,15 @@ def iter_native(self, timeout=None, interval=0.5, no_ack=True): result backends. """ - results = self.results - if not results: - return iter([]) - return results[0].backend.get_many( - set(r.id for r in results), + return self.backend.iter_native( + self, timeout=timeout, interval=interval, no_ack=no_ack, + on_message=on_message, on_interval=on_interval, ) def join_native(self, timeout=None, propagate=True, - interval=0.5, callback=None, no_ack=True): + interval=0.5, callback=None, no_ack=True, + on_message=None, on_interval=None): """Backend optimized version of :meth:`join`. .. versionadded:: 2.2 @@ -677,11 +725,12 @@ def join_native(self, timeout=None, propagate=True, """ assert_will_not_block() - order_index = None if callback else dict( - (result.id, i) for i, result in enumerate(self.results) - ) + order_index = None if callback else { + result.id: i for i, result in enumerate(self.results) + } acc = None if callback else [None for _ in range(len(self))] - for task_id, meta in self.iter_native(timeout, interval, no_ack): + for task_id, meta in self.iter_native(timeout, interval, no_ack, + on_message, on_interval): value = meta['result'] if propagate and meta['status'] in states.PROPAGATE_STATES: raise value @@ -691,6 +740,11 @@ def join_native(self, timeout=None, propagate=True, acc[order_index[task_id]] = value return acc + def _iter_meta(self): + return (meta for _, meta in self.backend.get_many( + {r.id for r in self.results}, max_iterations=1, + )) + def _failed_join_report(self): return (res for res in self.results if res.backend.is_cached(res.id) and @@ -705,27 +759,41 @@ def __eq__(self, other): return NotImplemented def __ne__(self, other): - return not self.__eq__(other) + res = self.__eq__(other) + return True if res is NotImplemented else not res def __repr__(self): return '<{0}: [{1}]>'.format(type(self).__name__, ', '.join(r.id for r in self.results)) @property - def subtasks(self): - """Deprecated alias to :attr:`results`.""" - return self.results + def supports_native_join(self): + try: + return self.results[0].supports_native_join + except IndexError: + pass @property - def supports_native_join(self): - return self.results[0].supports_native_join + def app(self): + if self._app is None: + self._app = (self.results[0].app if self.results else + current_app._get_current_object()) + return self._app + + @app.setter + def app(self, app): # noqa + self._app = app + + @property + def backend(self): + return self.app.backend if self.app else self.results[0].backend +Thenable.register(ResultSet) class GroupResult(ResultSet): """Like :class:`ResultSet`, but with an associated id. - This type is returned by :class:`~celery.group`, and the - deprecated TaskSet, meth:`~celery.task.TaskSet.apply_async` method. + This type is returned by :class:`~celery.group`. It enables inspection of the tasks state and return values as a single entity. @@ -773,7 +841,8 @@ def __eq__(self, other): return NotImplemented def __ne__(self, other): - return not self.__eq__(other) + res = self.__eq__(other) + return True if res is NotImplemented else not res def __repr__(self): return '<{0}: {1} [{2}]>'.format(type(self).__name__, self.id, @@ -781,7 +850,6 @@ def __repr__(self): def as_tuple(self): return self.id, [r.as_tuple() for r in self.results] - serializable = as_tuple # XXX compat @property def children(self): @@ -793,50 +861,28 @@ def restore(self, id, backend=None): return ( backend or (self.app.backend if self.app else current_app.backend) ).restore_group(id) - - -class TaskSetResult(GroupResult): - """Deprecated version of :class:`GroupResult`""" - - def __init__(self, taskset_id, results=None, **kwargs): - # XXX supports the taskset_id kwarg. - # XXX previously the "results" arg was named "subtasks". - if 'subtasks' in kwargs: - results = kwargs['subtasks'] - GroupResult.__init__(self, taskset_id, results, **kwargs) - - def itersubtasks(self): - """Deprecated. Use ``iter(self.results)`` instead.""" - return iter(self.results) - - @property - def total(self): - """Deprecated: Use ``len(r)``.""" - return len(self) - - @property - def taskset_id(self): - """compat alias to :attr:`self.id`""" - return self.id - - @taskset_id.setter # noqa - def taskset_id(self, id): - self.id = id +Thenable.register(ResultSet) class EagerResult(AsyncResult): """Result that we know has already been executed.""" - task_name = None def __init__(self, id, ret_value, state, traceback=None): self.id = id self._result = ret_value self._state = state self._traceback = traceback + self.on_ready = promise(args=(self,)) + self.on_ready() + + def then(self, callback, on_error=None): + return self.on_ready.then(callback, on_error) def _get_task_meta(self): - return {'task_id': self.id, 'result': self._result, 'status': - self._state, 'traceback': self._traceback} + return self._cache + + def __del__(self): + pass def __reduce__(self): return self.__class__, self.__reduce_args__() @@ -858,7 +904,7 @@ def get(self, timeout=None, propagate=True, **kwargs): if propagate: raise self.result return self.result - wait = get + wait = get # XXX Compat (remove 5.0) def forget(self): pass @@ -869,6 +915,11 @@ def revoke(self, *args, **kwargs): def __repr__(self): return ''.format(self) + @property + def _cache(self): + return {'task_id': self.id, 'result': self._result, 'status': + self._state, 'traceback': self._traceback} + @property def result(self): """The tasks return value""" @@ -888,6 +939,7 @@ def traceback(self): @property def supports_native_join(self): return False +Thenable.register(EagerResult) def result_from_tuple(r, app=None): @@ -907,4 +959,3 @@ def result_from_tuple(r, app=None): parent = result_from_tuple(parent, app) return Result(id, parent=parent) return r -from_serializable = result_from_tuple # XXX compat diff --git a/celery/schedules.py b/celery/schedules.py index 6424dfa04..657d6f787 100644 --- a/celery/schedules.py +++ b/celery/schedules.py @@ -7,11 +7,12 @@ should run. """ -from __future__ import absolute_import +from __future__ import absolute_import, unicode_literals import numbers import re +from bisect import bisect, bisect_left from collections import namedtuple from datetime import datetime, timedelta @@ -21,13 +22,13 @@ from .five import range, string_t from .utils import is_iterable from .utils.timeutils import ( - timedelta_seconds, weekday, maybe_timedelta, remaining, - humanize_seconds, timezone, maybe_make_aware, ffwd + weekday, maybe_timedelta, remaining, humanize_seconds, + timezone, maybe_make_aware, ffwd, localize ) from .datastructures import AttributeDict __all__ = ['ParseException', 'schedule', 'crontab', 'crontab_parser', - 'maybe_schedule'] + 'maybe_schedule', 'solar'] schedstate = namedtuple('schedstate', ('is_due', 'next')) @@ -47,6 +48,18 @@ {0._orig_day_of_month} {0._orig_month_of_year} (m/h/d/dM/MY)>\ """ +SOLAR_INVALID_LATITUDE = """\ +Argument latitude {lat} is invalid, must be between -90 and 90.\ +""" + +SOLAR_INVALID_LONGITUDE = """\ +Argument longitude {lon} is invalid, must be between -180 and 180.\ +""" + +SOLAR_INVALID_EVENT = """\ +Argument event "{event}" is invalid, must be one of {all_events}.\ +""" + def cronfield(s): return '*' if s is None else s @@ -60,11 +73,11 @@ class schedule(object): """Schedule for periodic task. :param run_every: Interval in seconds (or a :class:`~datetime.timedelta`). - :param relative: If set to True the run time will be rounded to the + :keyword relative: If set to True the run time will be rounded to the resolution of the interval. - :param nowfun: Function returning the current date and time + :keyword nowfun: Function returning the current date and time (class:`~datetime.datetime`). - :param app: Celery app instance. + :keyword app: Celery app instance. """ relative = False @@ -99,7 +112,7 @@ def is_due(self, last_run_at): The next time to check is used to save energy/cpu cycles, it does not need to be accurate but will influence the precision of your schedule. You must also keep in mind - the value of :setting:`CELERYBEAT_MAX_LOOP_INTERVAL`, + the value of :setting:`beat_max_loop_interval`, which decides the maximum number of seconds the scheduler can sleep between re-checking the periodic task intervals. So if you have a task that changes schedule at runtime then your next_run_at @@ -116,15 +129,13 @@ def is_due(self, last_run_at): """ last_run_at = self.maybe_make_aware(last_run_at) rem_delta = self.remaining_estimate(last_run_at) - remaining_s = timedelta_seconds(rem_delta) + remaining_s = max(rem_delta.total_seconds(), 0) if remaining_s == 0: return schedstate(is_due=True, next=self.seconds) return schedstate(is_due=False, next=remaining_s) def maybe_make_aware(self, dt): - if self.utc_enabled: - return maybe_make_aware(dt, self.tz) - return dt + return maybe_make_aware(dt, self.tz) def __repr__(self): return ''.format(self) @@ -142,7 +153,7 @@ def __reduce__(self): @property def seconds(self): - return timedelta_seconds(self.run_every) + return max(self.run_every.total_seconds(), 0) @property def human_seconds(self): @@ -162,7 +173,7 @@ def tz(self): @cached_property def utc_enabled(self): - return self.app.conf.CELERY_ENABLE_UTC + return self.app.conf.enable_utc def to_local(self, dt): if not self.utc_enabled: @@ -239,7 +250,7 @@ def _parse_part(self, part): m = regex.match(part) if m: return handler(m.groups()) - return self._expand_range((part, )) + return self._expand_range((part,)) def _expand_range(self, toks): fr = self._expand_number(toks[0]) @@ -383,7 +394,7 @@ def _expand_cronspec(cronspec, max_, min_=0): int (like 7) str (like '3-5,*/15', '*', or 'monday') - set (like set([0,15,30,45])) + set (like {0,15,30,45} list (like [8-17]) And convert it to an (expanded) set representing all time unit @@ -403,7 +414,7 @@ def _expand_cronspec(cronspec, max_, min_=0): """ if isinstance(cronspec, numbers.Integral): - result = set([cronspec]) + result = {cronspec} elif isinstance(cronspec, string_t): result = crontab_parser(max_, min_).parse(cronspec) elif isinstance(cronspec, set): @@ -421,14 +432,13 @@ def _expand_cronspec(cronspec, max_, min_=0): return result def _delta_to_next(self, last_run_at, next_hour, next_minute): - """ - Takes a datetime of last run, next minute and hour, and + """Takes a datetime of last run, next minute and hour, and returns a relativedelta for the next scheduled day and time. + Only called when day_of_month and/or month_of_year cronspec is specified to further limit scheduled task execution. - """ - from bisect import bisect, bisect_left + """ datedata = AttributeDict(year=last_run_at.year) days_of_month = sorted(self.day_of_month) months_of_year = sorted(self.month_of_year) @@ -505,16 +515,20 @@ def remaining_delta(self, last_run_at, tz=None, ffwd=ffwd): now = self.maybe_make_aware(self.now()) dow_num = last_run_at.isoweekday() % 7 # Sunday is day 0, not day 7 - execute_this_date = (last_run_at.month in self.month_of_year and - last_run_at.day in self.day_of_month and - dow_num in self.day_of_week) + execute_this_date = ( + last_run_at.month in self.month_of_year and + last_run_at.day in self.day_of_month and + dow_num in self.day_of_week + ) - execute_this_hour = (execute_this_date and - last_run_at.day == now.day and - last_run_at.month == now.month and - last_run_at.year == now.year and - last_run_at.hour in self.hour and - last_run_at.minute < max(self.minute)) + execute_this_hour = ( + execute_this_date and + last_run_at.day == now.day and + last_run_at.month == now.month and + last_run_at.year == now.year and + last_run_at.hour in self.hour and + last_run_at.minute < max(self.minute) + ) if execute_this_hour: next_minute = min(minute for minute in self.minute @@ -539,12 +553,14 @@ def remaining_delta(self, last_run_at, tz=None, ffwd=ffwd): if day > dow_num] or self.day_of_week) add_week = next_day == dow_num - delta = ffwd(weeks=add_week and 1 or 0, - weekday=(next_day - 1) % 7, - hour=next_hour, - minute=next_minute, - second=0, - microsecond=0) + delta = ffwd( + weeks=add_week and 1 or 0, + weekday=(next_day - 1) % 7, + hour=next_hour, + minute=next_minute, + second=0, + microsecond=0, + ) else: delta = self._delta_to_next(last_run_at, next_hour, next_minute) @@ -562,24 +578,29 @@ def is_due(self, last_run_at): """ rem_delta = self.remaining_estimate(last_run_at) - rem = timedelta_seconds(rem_delta) + rem = max(rem_delta.total_seconds(), 0) due = rem == 0 if due: rem_delta = self.remaining_estimate(self.now()) - rem = timedelta_seconds(rem_delta) + rem = max(rem_delta.total_seconds(), 0) return schedstate(due, rem) def __eq__(self, other): if isinstance(other, crontab): - return (other.month_of_year == self.month_of_year and - other.day_of_month == self.day_of_month and - other.day_of_week == self.day_of_week and - other.hour == self.hour and - other.minute == self.minute) + return ( + other.month_of_year == self.month_of_year and + other.day_of_month == self.day_of_month and + other.day_of_week == self.day_of_week and + other.hour == self.hour and + other.minute == self.minute + ) return NotImplemented def __ne__(self, other): - return not self.__eq__(other) + res = self.__eq__(other) + if res is NotImplemented: + return True + return not res def maybe_schedule(s, relative=False, app=None): @@ -591,3 +612,154 @@ def maybe_schedule(s, relative=False, app=None): else: s.app = app return s + + +class solar(schedule): + """A solar event can be used as the `run_every` value of a + :class:`PeriodicTask` to schedule based on certain solar events. + + :param event: Solar event that triggers this task. Available + values are: dawn_astronomical, dawn_nautical, dawn_civil, + sunrise, solar_noon, sunset, dusk_civil, dusk_nautical, + dusk_astronomical + :param lat: The latitude of the observer. + :param lon: The longitude of the observer. + :param nowfun: Function returning the current date and time + (class:`~datetime.datetime`). + :param app: Celery app instance. + """ + + _all_events = [ + 'dawn_astronomical', + 'dawn_nautical', + 'dawn_civil', + 'sunrise', + 'solar_noon', + 'sunset', + 'dusk_civil', + 'dusk_nautical', + 'dusk_astronomical', + ] + _horizons = { + 'dawn_astronomical': '-18', + 'dawn_nautical': '-12', + 'dawn_civil': '-6', + 'sunrise': '-0:34', + 'solar_noon': '0', + 'sunset': '-0:34', + 'dusk_civil': '-6', + 'dusk_nautical': '-12', + 'dusk_astronomical': '18', + } + _methods = { + 'dawn_astronomical': 'next_rising', + 'dawn_nautical': 'next_rising', + 'dawn_civil': 'next_rising', + 'sunrise': 'next_rising', + 'solar_noon': 'next_transit', + 'sunset': 'next_setting', + 'dusk_civil': 'next_setting', + 'dusk_nautical': 'next_setting', + 'dusk_astronomical': 'next_setting', + } + _use_center_l = { + 'dawn_astronomical': True, + 'dawn_nautical': True, + 'dawn_civil': True, + 'sunrise': False, + 'solar_noon': True, + 'sunset': False, + 'dusk_civil': True, + 'dusk_nautical': True, + 'dusk_astronomical': True, + } + + def __init__(self, event, lat, lon, nowfun=None, app=None): + self.ephem = __import__('ephem') + self.event = event + self.lat = lat + self.lon = lon + self.nowfun = nowfun + self._app = app + + if event not in self._all_events: + raise ValueError(SOLAR_INVALID_EVENT.format( + event=event, all_events=', '.join(self._all_events), + )) + if lat < -90 or lat > 90: + raise ValueError(SOLAR_INVALID_LATITUDE.format(lat=lat)) + if lon < -180 or lon > 180: + raise ValueError(SOLAR_INVALID_LONGITUDE.format(lon=lon)) + + cal = self.ephem.Observer() + cal.lat = str(lat) + cal.lon = str(lon) + cal.elev = 0 + cal.horizon = self._horizons[event] + cal.pressure = 0 + self.cal = cal + + self.method = self._methods[event] + self.use_center = self._use_center_l[event] + + def __reduce__(self): + return self.__class__, (self.event, self.lat, self.lon) + + def __repr__(self): + return ''.format( + self.event, self.lat, self.lon, + ) + + def remaining_estimate(self, last_run_at): + """Returns when the periodic task should run next as a timedelta, + or if it shouldn't run today (e.g. the sun does not rise today), + returns the time when the next check should take place.""" + last_run_at = self.maybe_make_aware(last_run_at) + last_run_at_utc = localize(last_run_at, timezone.utc) + self.cal.date = last_run_at_utc + try: + next_utc = getattr(self.cal, self.method)( + self.ephem.Sun(), + start=last_run_at_utc, use_center=self.use_center, + ) + except self.ephem.CircumpolarError: # pragma: no cover + # Sun will not rise/set today. Check again tomorrow + # (specifically, after the next anti-transit). + next_utc = ( + self.cal.next_antitransit(self.ephem.Sun()) + + timedelta(minutes=1) + ) + next = self.maybe_make_aware(next_utc.datetime()) + now = self.maybe_make_aware(self.now()) + delta = next - now + return delta + + def is_due(self, last_run_at): + """Returns tuple of two items `(is_due, next_time_to_run)`, + where next time to run is in seconds. + + See :meth:`celery.schedules.schedule.is_due` for more information. + + """ + rem_delta = self.remaining_estimate(last_run_at) + rem = max(rem_delta.total_seconds(), 0) + due = rem == 0 + if due: + rem_delta = self.remaining_estimate(self.now()) + rem = max(rem_delta.total_seconds(), 0) + return schedstate(due, rem) + + def __eq__(self, other): + if isinstance(other, solar): + return ( + other.event == self.event and + other.lat == self.lat and + other.lon == self.lon + ) + return NotImplemented + + def __ne__(self, other): + res = self.__eq__(other) + if res is NotImplemented: + return True + return not res diff --git a/celery/security/__init__.py b/celery/security/__init__.py index 352d400cf..8366ad7f3 100644 --- a/celery/security/__init__.py +++ b/celery/security/__init__.py @@ -25,9 +25,9 @@ SETTING_MISSING = """\ Sorry, but you have to configure the - * CELERY_SECURITY_KEY - * CELERY_SECURITY_CERTIFICATE, and the - * CELERY_SECURITY_CERT_STORE + * security_key + * security_certificate, and the + * security_cert_storE configuration settings to use the auth serializer. Please see the configuration reference for more information. @@ -46,7 +46,7 @@ def setup_security(allowed_serializers=None, key=None, cert=None, store=None, _disable_insecure_serializers(allowed_serializers) conf = app.conf - if conf.CELERY_TASK_SERIALIZER != 'auth': + if conf.task_serializer != 'auth': return try: @@ -54,9 +54,9 @@ def setup_security(allowed_serializers=None, key=None, cert=None, store=None, except ImportError: raise ImproperlyConfigured(SSL_NOT_INSTALLED) - key = key or conf.CELERY_SECURITY_KEY - cert = cert or conf.CELERY_SECURITY_CERTIFICATE - store = store or conf.CELERY_SECURITY_CERT_STORE + key = key or conf.security_key + cert = cert or conf.security_certificate + store = store or conf.security_cert_store if not (key and cert and store): raise ImproperlyConfigured(SETTING_MISSING) diff --git a/celery/security/serialization.py b/celery/security/serialization.py index f1cab2914..3b0458974 100644 --- a/celery/security/serialization.py +++ b/celery/security/serialization.py @@ -8,26 +8,17 @@ """ from __future__ import absolute_import -import base64 - from kombu.serialization import registry, dumps, loads from kombu.utils.encoding import bytes_to_str, str_to_bytes, ensure_bytes from .certificate import Certificate, FSCertStore from .key import PrivateKey from .utils import reraise_errors +from celery.utils.serialization import b64encode, b64decode __all__ = ['SecureSerializer', 'register_auth'] -def b64encode(s): - return bytes_to_str(base64.b64encode(str_to_bytes(s))) - - -def b64decode(s): - return base64.b64decode(str_to_bytes(s)) - - class SecureSerializer(object): def __init__(self, key=None, cert=None, cert_store=None, @@ -42,7 +33,7 @@ def serialize(self, data): """serialize data structure into string""" assert self._key is not None assert self._cert is not None - with reraise_errors('Unable to serialize: {0!r}', (Exception, )): + with reraise_errors('Unable to serialize: {0!r}', (Exception,)): content_type, content_encoding, body = dumps( bytes_to_str(data), serializer=self._serializer) # What we sign is the serialized body, not the body itself. @@ -57,7 +48,7 @@ def serialize(self, data): def deserialize(self, data): """deserialize data structure from string""" assert self._cert_store is not None - with reraise_errors('Unable to deserialize: {0!r}', (Exception, )): + with reraise_errors('Unable to deserialize: {0!r}', (Exception,)): payload = self._unpack(data) signature, signer, body = (payload['signature'], payload['signer'], diff --git a/celery/security/utils.py b/celery/security/utils.py index d184d0b4c..7683afc59 100644 --- a/celery/security/utils.py +++ b/celery/security/utils.py @@ -26,7 +26,7 @@ @contextmanager def reraise_errors(msg='{0!r}', errors=None): assert crypto is not None - errors = (crypto.Error, ) if errors is None else errors + errors = (crypto.Error,) if errors is None else errors try: yield except errors as exc: diff --git a/celery/signals.py b/celery/signals.py index 2091830cb..ba2c1a213 100644 --- a/celery/signals.py +++ b/celery/signals.py @@ -12,7 +12,8 @@ See :ref:`signals` for more information. """ -from __future__ import absolute_import +from __future__ import absolute_import, unicode_literals + from .utils.dispatch import Signal __all__ = ['before_task_publish', 'after_task_publish', @@ -50,6 +51,12 @@ task_revoked = Signal(providing_args=[ 'request', 'terminated', 'signum', 'expired', ]) +task_rejected = Signal(providing_args=[ + 'message', 'exc', +]) +task_unknown = Signal(providing_args=[ + 'message', 'exc', 'name', 'id', +]) celeryd_init = Signal(providing_args=['instance', 'conf', 'options']) celeryd_after_setup = Signal(providing_args=['instance', 'conf']) import_modules = Signal(providing_args=[]) diff --git a/celery/states.py b/celery/states.py index 665a57baf..697bc1868 100644 --- a/celery/states.py +++ b/celery/states.py @@ -57,7 +57,7 @@ ----- """ -from __future__ import absolute_import +from __future__ import absolute_import, unicode_literals __all__ = ['PENDING', 'RECEIVED', 'STARTED', 'SUCCESS', 'FAILURE', 'REVOKED', 'RETRY', 'IGNORED', 'READY_STATES', 'UNREADY_STATES', @@ -72,6 +72,7 @@ 'REVOKED', 'STARTED', 'RECEIVED', + 'REJECTED', 'RETRY', 'PENDING'] @@ -112,9 +113,6 @@ class state(str): """ - def compare(self, other, fun): - return fun(precedence(self), precedence(other)) - def __gt__(self, other): return precedence(self) < precedence(other) @@ -129,9 +127,9 @@ def __le__(self, other): #: Task state is unknown (assumed pending since you know the id). PENDING = 'PENDING' -#: Task was received by a worker. +#: Task was received by a worker (only used in events). RECEIVED = 'RECEIVED' -#: Task was started by a worker (:setting:`CELERY_TRACK_STARTED`). +#: Task was started by a worker (:setting:`task_track_started`). STARTED = 'STARTED' #: Task succeeded SUCCESS = 'SUCCESS' @@ -139,15 +137,17 @@ def __le__(self, other): FAILURE = 'FAILURE' #: Task was revoked. REVOKED = 'REVOKED' +#: Task was rejected (only used in events). +REJECTED = 'REJECTED' #: Task is waiting for retry. RETRY = 'RETRY' IGNORED = 'IGNORED' REJECTED = 'REJECTED' -READY_STATES = frozenset([SUCCESS, FAILURE, REVOKED]) -UNREADY_STATES = frozenset([PENDING, RECEIVED, STARTED, RETRY]) -EXCEPTION_STATES = frozenset([RETRY, FAILURE, REVOKED]) -PROPAGATE_STATES = frozenset([FAILURE, REVOKED]) +READY_STATES = frozenset({SUCCESS, FAILURE, REVOKED}) +UNREADY_STATES = frozenset({PENDING, RECEIVED, STARTED, REJECTED, RETRY}) +EXCEPTION_STATES = frozenset({RETRY, FAILURE, REVOKED}) +PROPAGATE_STATES = frozenset({FAILURE, REVOKED}) -ALL_STATES = frozenset([PENDING, RECEIVED, STARTED, - SUCCESS, FAILURE, RETRY, REVOKED]) +ALL_STATES = frozenset({PENDING, RECEIVED, STARTED, + SUCCESS, FAILURE, RETRY, REVOKED}) diff --git a/celery/task/__init__.py b/celery/task/__init__.py index f8326e887..3d820166f 100644 --- a/celery/task/__init__.py +++ b/celery/task/__init__.py @@ -12,12 +12,12 @@ from __future__ import absolute_import from celery._state import current_app, current_task as current -from celery.five import MagicModule, recreate_module +from celery.five import LazyModule, recreate_module from celery.local import Proxy __all__ = [ 'BaseTask', 'Task', 'PeriodicTask', 'task', 'periodic_task', - 'group', 'chord', 'subtask', 'TaskSet', + 'group', 'chord', 'subtask', ] @@ -29,10 +29,9 @@ # they contain. from celery.canvas import group, chord, subtask from .base import BaseTask, Task, PeriodicTask, task, periodic_task - from .sets import TaskSet -class module(MagicModule): +class module(LazyModule): def __call__(self, *args, **kwargs): return self.task(*args, **kwargs) @@ -44,7 +43,6 @@ def __call__(self, *args, **kwargs): 'celery.task.base': ['BaseTask', 'Task', 'PeriodicTask', 'task', 'periodic_task'], 'celery.canvas': ['group', 'chord', 'subtask'], - 'celery.task.sets': ['TaskSet'], }, base=module, __package__='celery.task', diff --git a/celery/task/base.py b/celery/task/base.py index 9d466b57c..b7d3b24eb 100644 --- a/celery/task/base.py +++ b/celery/task/base.py @@ -14,20 +14,116 @@ from kombu import Exchange from celery import current_app -from celery.app.task import Context, TaskType, Task as BaseTask # noqa -from celery.five import class_property, reclassmethod +from celery.app.task import Context, Task as BaseTask, _reprtask +from celery.five import class_property, reclassmethod, with_metaclass +from celery.local import Proxy from celery.schedules import maybe_schedule from celery.utils.log import get_task_logger -__all__ = ['Task', 'PeriodicTask', 'task'] +__all__ = ['Context', 'Task', 'TaskType', 'PeriodicTask', 'task'] #: list of methods that must be classmethods in the old API. _COMPAT_CLASSMETHODS = ( 'delay', 'apply_async', 'retry', 'apply', 'subtask_from_request', + 'signature_from_request', 'signature', 'AsyncResult', 'subtask', '_get_request', '_get_exec_options', ) +class _CompatShared(object): + + def __init__(self, name, cons): + self.name = name + self.cons = cons + + def __hash__(self): + return hash(self.name) + + def __repr__(self): + return '' % (self.name,) + + def __call__(self, app): + return self.cons(app) + + +class TaskType(type): + """Meta class for tasks. + + Automatically registers the task in the task registry (except + if the :attr:`Task.abstract`` attribute is set). + + If no :attr:`Task.name` attribute is provided, then the name is generated + from the module and class name. + + """ + _creation_count = {} # used by old non-abstract task classes + + def __new__(cls, name, bases, attrs): + new = super(TaskType, cls).__new__ + task_module = attrs.get('__module__') or '__main__' + + # - Abstract class: abstract attribute should not be inherited. + abstract = attrs.pop('abstract', None) + if abstract or not attrs.get('autoregister', True): + return new(cls, name, bases, attrs) + + # The 'app' attribute is now a property, with the real app located + # in the '_app' attribute. Previously this was a regular attribute, + # so we should support classes defining it. + app = attrs.pop('_app', None) or attrs.pop('app', None) + + # Attempt to inherit app from one the bases + if not isinstance(app, Proxy) and app is None: + for base in bases: + if getattr(base, '_app', None): + app = base._app + break + else: + app = current_app._get_current_object() + attrs['_app'] = app + + # - Automatically generate missing/empty name. + task_name = attrs.get('name') + if not task_name: + attrs['name'] = task_name = app.gen_task_name(name, task_module) + + if not attrs.get('_decorated'): + # non decorated tasks must also be shared in case + # an app is created multiple times due to modules + # imported under multiple names. + # Hairy stuff, here to be compatible with 2.x. + # People should not use non-abstract task classes anymore, + # use the task decorator. + from celery._state import connect_on_app_finalize + unique_name = '.'.join([task_module, name]) + if unique_name not in cls._creation_count: + # the creation count is used as a safety + # so that the same task is not added recursively + # to the set of constructors. + cls._creation_count[unique_name] = 1 + connect_on_app_finalize(_CompatShared( + unique_name, + lambda app: TaskType.__new__(cls, name, bases, + dict(attrs, _app=app)), + )) + + # - Create and register class. + # Because of the way import happens (recursively) + # we may or may not be the first time the task tries to register + # with the framework. There should only be one class for each task + # name, so we always return the registered version. + tasks = app._tasks + if task_name not in tasks: + tasks.register(new(cls, name, bases, attrs)) + instance = tasks[task_name] + instance.bind(app) + return instance.__class__ + + def __repr__(cls): + return _reprtask(cls) + + +@with_metaclass(TaskType) class Task(BaseTask): """Deprecated Task base class. @@ -38,7 +134,7 @@ class Task(BaseTask): __bound__ = False __v2_compat__ = True - #- Deprecated compat. attributes -: + # - Deprecated compat. attributes -: queue = None routing_key = None @@ -50,11 +146,10 @@ class Task(BaseTask): priority = None type = 'regular' disable_error_emails = False - accept_magic_kwargs = False from_config = BaseTask.from_config + ( - ('exchange_type', 'CELERY_DEFAULT_EXCHANGE_TYPE'), - ('delivery_mode', 'CELERY_DEFAULT_DELIVERY_MODE'), + ('exchange_type', 'task_default_exchange_type'), + ('delivery_mode', 'task_default_delivery_mode'), ) # In old Celery the @task decorator didn't exist, so one would create @@ -97,21 +192,28 @@ def establish_connection(self): ... # establish fresh connection - with celery.connection() as conn: + with celery.connection_for_write() as conn: ... """ - return self._get_app().connection() + return self._get_app().connection_for_write() def get_publisher(self, connection=None, exchange=None, exchange_type=None, **options): """Deprecated method to get the task publisher (now called producer). - Should be replaced with :class:`@amqp.TaskProducer`: + Should be replaced with :class:`kombu.Producer`: .. code-block:: python - with celery.connection() as conn: - with celery.amqp.TaskProducer(conn) as prod: + with app.connection_for_write() as conn: + with app.amqp.Producer(conn) as prod: + my_task.apply_async(producer=prod) + + or event better is to use the :class:`@amqp.producer_pool`: + + .. code-block:: python + + with app.producer_or_acquire() as prod: my_task.apply_async(producer=prod) """ @@ -119,7 +221,7 @@ def get_publisher(self, connection=None, exchange=None, if exchange_type is None: exchange_type = self.exchange_type connection = connection or self.establish_connection() - return self._get_app().amqp.TaskProducer( + return self._get_app().amqp.Producer( connection, exchange=exchange and Exchange(exchange, exchange_type), routing_key=self.routing_key, **options @@ -142,7 +244,7 @@ def get_consumer(self, connection=None, queues=None, **kwargs): class PeriodicTask(Task): """A periodic task is a task that adds itself to the - :setting:`CELERYBEAT_SCHEDULE` setting.""" + :setting:`beat_schedule` setting.""" abstract = True ignore_result = True relative = False @@ -158,7 +260,7 @@ def __init__(self): @classmethod def on_bound(cls, app): - app.conf.CELERYBEAT_SCHEDULE[cls.name] = { + app.conf.beat_schedule[cls.name] = { 'task': cls.name, 'schedule': cls.run_every, 'args': (), @@ -170,10 +272,9 @@ def on_bound(cls, app): def task(*args, **kwargs): """Deprecated decorator, please use :func:`celery.task`.""" - return current_app.task(*args, **dict({'accept_magic_kwargs': False, - 'base': Task}, **kwargs)) + return current_app.task(*args, **dict({'base': Task}, **kwargs)) def periodic_task(*args, **options): - """Deprecated decorator, please use :setting:`CELERYBEAT_SCHEDULE`.""" + """Deprecated decorator, please use :setting:`beat_schedule`.""" return task(**dict({'base': PeriodicTask}, **options)) diff --git a/celery/task/http.py b/celery/task/http.py index e170ec3a5..609026a14 100644 --- a/celery/task/http.py +++ b/celery/task/http.py @@ -8,7 +8,6 @@ """ from __future__ import absolute_import -import anyjson import sys try: @@ -17,6 +16,9 @@ from urllib import urlencode # noqa from urlparse import urlparse, parse_qsl # noqa +from kombu.utils import json +from kombu.utils.encoding import bytes_to_str, str_to_bytes + from celery import shared_task, __version__ as celery_version from celery.five import items, reraise from celery.utils.log import get_task_logger @@ -24,7 +26,7 @@ __all__ = ['InvalidResponseError', 'RemoteExecuteError', 'UnknownStatusError', 'HttpDispatch', 'dispatch', 'URL'] -GET_METHODS = frozenset(['GET', 'HEAD']) +GET_METHODS = {'GET', 'HEAD'} logger = get_task_logger(__name__) @@ -41,13 +43,13 @@ def utf8dict(tup): from urllib2 import Request, urlopen # noqa - def utf8dict(tup): # noqa + def utf8dict(tup, enc='utf-8'): # noqa """With a dict's items() tuple return a new dict with any utf-8 keys/values encoded.""" - return dict( - (k.encode('utf-8'), - v.encode('utf-8') if isinstance(v, unicode) else v) # noqa - for k, v in tup) + return { + k.encode(enc): (v.encode(enc) if isinstance(v, unicode) else v) + for k, v in tup + } class InvalidResponseError(Exception): @@ -62,7 +64,7 @@ class UnknownStatusError(InvalidResponseError): """The remote server gave an unknown status.""" -def extract_response(raw_response, loads=anyjson.loads): +def extract_response(raw_response, loads=json.loads): """Extract the response text from a raw JSON response.""" if not raw_response: raise InvalidResponseError('Empty response') @@ -139,7 +141,7 @@ def __init__(self, url, method, task_kwargs, **kwargs): def make_request(self, url, method, params): """Perform HTTP request and return the response.""" - request = Request(url, params) + request = Request(url, str_to_bytes(params)) for key, val in items(self.http_headers): request.add_header(key, val) response = urlopen(request) # user catches errors. @@ -154,7 +156,7 @@ def dispatch(self): else: params = urlencode(utf8dict(items(self.task_kwargs))) raw_response = self.make_request(str(url), self.method, params) - return extract_response(raw_response) + return extract_response(bytes_to_str(raw_response)) @property def http_headers(self): @@ -162,8 +164,7 @@ def http_headers(self): return headers -@shared_task(name='celery.http_dispatch', bind=True, - url=None, method=None, accept_magic_kwargs=False) +@shared_task(name='celery.http_dispatch', bind=True, url=None, method=None) def dispatch(self, url=None, method='GET', **kwargs): """Task dispatching to an URL. diff --git a/celery/task/sets.py b/celery/task/sets.py deleted file mode 100644 index e277b796d..000000000 --- a/celery/task/sets.py +++ /dev/null @@ -1,88 +0,0 @@ -# -*- coding: utf-8 -*- -""" - celery.task.sets - ~~~~~~~~~~~~~~~~ - - Old ``group`` implementation, this module should - not be used anymore use :func:`celery.group` instead. - -""" -from __future__ import absolute_import - -from celery._state import get_current_worker_task -from celery.app import app_or_default -from celery.canvas import maybe_signature # noqa -from celery.utils import uuid, warn_deprecated - -from celery.canvas import subtask # noqa - -warn_deprecated( - 'celery.task.sets and TaskSet', removal='4.0', - alternative="""\ -Please use "group" instead (see the Canvas section in the userguide)\ -""") - - -class TaskSet(list): - """A task containing several subtasks, making it possible - to track how many, or when all of the tasks have been completed. - - :param tasks: A list of :class:`subtask` instances. - - Example:: - - >>> from myproj.tasks import refresh_feed - - >>> urls = ('http://cnn.com/rss', 'http://bbc.co.uk/rss') - >>> s = TaskSet(refresh_feed.s(url) for url in urls) - >>> taskset_result = s.apply_async() - >>> list_of_return_values = taskset_result.join() # *expensive* - - """ - app = None - - def __init__(self, tasks=None, app=None, Publisher=None): - self.app = app_or_default(app or self.app) - super(TaskSet, self).__init__( - maybe_signature(t, app=self.app) for t in tasks or [] - ) - self.Publisher = Publisher or self.app.amqp.TaskProducer - self.total = len(self) # XXX compat - - def apply_async(self, connection=None, publisher=None, taskset_id=None): - """Apply TaskSet.""" - app = self.app - - if app.conf.CELERY_ALWAYS_EAGER: - return self.apply(taskset_id=taskset_id) - - with app.connection_or_acquire(connection) as conn: - setid = taskset_id or uuid() - pub = publisher or self.Publisher(conn) - results = self._async_results(setid, pub) - - result = app.TaskSetResult(setid, results) - parent = get_current_worker_task() - if parent: - parent.add_trail(result) - return result - - def _async_results(self, taskset_id, publisher): - return [task.apply_async(taskset_id=taskset_id, publisher=publisher) - for task in self] - - def apply(self, taskset_id=None): - """Applies the TaskSet locally by blocking until all tasks return.""" - setid = taskset_id or uuid() - return self.app.TaskSetResult(setid, self._sync_results(setid)) - - def _sync_results(self, taskset_id): - return [task.apply(taskset_id=taskset_id) for task in self] - - @property - def tasks(self): - return self - - @tasks.setter # noqa - def tasks(self, tasks): - self[:] = tasks diff --git a/celery/task/trace.py b/celery/task/trace.py deleted file mode 100644 index 5e5f5a8e9..000000000 --- a/celery/task/trace.py +++ /dev/null @@ -1,12 +0,0 @@ -"""This module has moved to celery.app.trace.""" -from __future__ import absolute_import - -import sys - -from celery.utils import warn_deprecated - -warn_deprecated('celery.task.trace', removal='3.2', - alternative='Please use celery.app.trace instead.') - -from celery.app import trace -sys.modules[__name__] = trace diff --git a/celery/tests/__init__.py b/celery/tests/__init__.py index 966787270..629e9279e 100644 --- a/celery/tests/__init__.py +++ b/celery/tests/__init__.py @@ -7,6 +7,8 @@ from importlib import import_module +PYPY3 = getattr(sys, 'pypy_version_info', None) and sys.version_info[0] > 3 + try: WindowsError = WindowsError # noqa except NameError: @@ -16,13 +18,16 @@ class WindowsError(Exception): def setup(): + using_coverage = ( + os.environ.get('COVER_ALL_MODULES') or '--with-coverage' in sys.argv + ) os.environ.update( # warn if config module not found C_WNOCONF='yes', KOMBU_DISABLE_LIMIT_PROTECTION='yes', ) - if os.environ.get('COVER_ALL_MODULES') or '--with-coverage' in sys.argv: + if using_coverage and not PYPY3: from warnings import catch_warnings with catch_warnings(record=True): import_all_modules() @@ -85,3 +90,8 @@ def import_all_modules(name=__name__, file=__file__, import_module(module) except ImportError: pass + except OSError as exc: + warnings.warn(UserWarning( + 'Ignored error importing module {0}: {1!r}'.format( + module, exc, + ))) diff --git a/celery/tests/app/test_amqp.py b/celery/tests/app/test_amqp.py index efb398ac6..79fda1e97 100644 --- a/celery/tests/app/test_amqp.py +++ b/celery/tests/app/test_amqp.py @@ -1,123 +1,36 @@ from __future__ import absolute_import -import datetime - -import pytz +from datetime import datetime, timedelta from kombu import Exchange, Queue -from celery.app.amqp import Queues, TaskPublisher +from celery import uuid +from celery.app.amqp import Queues, utf8dict from celery.five import keys -from celery.tests.case import AppCase, Mock - - -class test_TaskProducer(AppCase): - - def test__exit__(self): - publisher = self.app.amqp.TaskProducer(self.app.connection()) - publisher.release = Mock() - with publisher: - pass - publisher.release.assert_called_with() - - def test_declare(self): - publisher = self.app.amqp.TaskProducer(self.app.connection()) - publisher.exchange.name = 'foo' - publisher.declare() - publisher.exchange.name = None - publisher.declare() - - def test_retry_policy(self): - prod = self.app.amqp.TaskProducer(Mock()) - prod.channel.connection.client.declared_entities = set() - prod.publish_task('tasks.add', (2, 2), {}, - retry_policy={'frobulate': 32.4}) - - def test_publish_no_retry(self): - prod = self.app.amqp.TaskProducer(Mock()) - prod.channel.connection.client.declared_entities = set() - prod.publish_task('tasks.add', (2, 2), {}, retry=False, chord=123) - self.assertFalse(prod.connection.ensure.call_count) - - def test_publish_custom_queue(self): - prod = self.app.amqp.TaskProducer(Mock()) - self.app.amqp.queues['some_queue'] = Queue( - 'xxx', Exchange('yyy'), 'zzz', - ) - prod.channel.connection.client.declared_entities = set() - prod.publish = Mock() - prod.publish_task('tasks.add', (8, 8), {}, retry=False, - queue='some_queue') - self.assertEqual(prod.publish.call_args[1]['exchange'], 'yyy') - self.assertEqual(prod.publish.call_args[1]['routing_key'], 'zzz') - - def test_publish_with_countdown(self): - prod = self.app.amqp.TaskProducer(Mock()) - prod.channel.connection.client.declared_entities = set() - prod.publish = Mock() - now = datetime.datetime(2013, 11, 26, 16, 48, 46) - prod.publish_task('tasks.add', (1, 1), {}, retry=False, - countdown=10, now=now) - self.assertEqual( - prod.publish.call_args[0][0]['eta'], - '2013-11-26T16:48:56+00:00', - ) +from celery.utils.timeutils import to_utc - def test_publish_with_countdown_and_timezone(self): - # use timezone with fixed offset to be sure it won't be changed - self.app.conf.CELERY_TIMEZONE = pytz.FixedOffset(120) - prod = self.app.amqp.TaskProducer(Mock()) - prod.channel.connection.client.declared_entities = set() - prod.publish = Mock() - now = datetime.datetime(2013, 11, 26, 16, 48, 46) - prod.publish_task('tasks.add', (2, 2), {}, retry=False, - countdown=20, now=now) - self.assertEqual( - prod.publish.call_args[0][0]['eta'], - '2013-11-26T18:49:06+02:00', - ) - - def test_event_dispatcher(self): - prod = self.app.amqp.TaskProducer(Mock()) - self.assertTrue(prod.event_dispatcher) - self.assertFalse(prod.event_dispatcher.enabled) +from celery.tests.case import AppCase, Mock class test_TaskConsumer(AppCase): def test_accept_content(self): with self.app.pool.acquire(block=True) as conn: - self.app.conf.CELERY_ACCEPT_CONTENT = ['application/json'] + self.app.conf.accept_content = ['application/json'] self.assertEqual( self.app.amqp.TaskConsumer(conn).accept, - set(['application/json']) + {'application/json'}, ) self.assertEqual( self.app.amqp.TaskConsumer(conn, accept=['json']).accept, - set(['application/json']), + {'application/json'}, ) -class test_compat_TaskPublisher(AppCase): - - def test_compat_exchange_is_string(self): - producer = TaskPublisher(exchange='foo', app=self.app) - self.assertIsInstance(producer.exchange, Exchange) - self.assertEqual(producer.exchange.name, 'foo') - self.assertEqual(producer.exchange.type, 'direct') - producer = TaskPublisher(exchange='foo', exchange_type='topic', - app=self.app) - self.assertEqual(producer.exchange.type, 'topic') - - def test_compat_exchange_is_Exchange(self): - producer = TaskPublisher(exchange=Exchange('foo'), app=self.app) - self.assertEqual(producer.exchange.name, 'foo') - - -class test_PublisherPool(AppCase): +class test_ProducerPool(AppCase): def test_setup_nolimit(self): - self.app.conf.BROKER_POOL_LIMIT = None + self.app.conf.broker_pool_limit = None try: delattr(self.app, '_pool') except AttributeError: @@ -135,7 +48,7 @@ def test_setup_nolimit(self): r2 = pool.acquire() def test_setup(self): - self.app.conf.BROKER_POOL_LIMIT = 2 + self.app.conf.broker_pool_limit = 2 try: delattr(self.app, '_pool') except AttributeError: @@ -220,9 +133,169 @@ def test_add_default_exchange(self): ex = Exchange('fff', 'fanout') q = Queues(default_exchange=ex) q.add(Queue('foo')) - self.assertEqual(q['foo'].exchange, ex) + self.assertEqual(q['foo'].exchange.name, '') def test_alias(self): q = Queues() q.add(Queue('foo', alias='barfoo')) self.assertIs(q['barfoo'], q['foo']) + + def test_with_max_priority(self): + qs1 = Queues(max_priority=10) + qs1.add('foo') + self.assertEqual(qs1['foo'].queue_arguments, {'x-max-priority': 10}) + + q1 = Queue('xyx', queue_arguments={'x-max-priority': 3}) + qs1.add(q1) + self.assertEqual(qs1['xyx'].queue_arguments, { + 'x-max-priority': 3, + }) + + q1 = Queue('moo', queue_arguments=None) + qs1.add(q1) + self.assertEqual(qs1['moo'].queue_arguments, { + 'x-max-priority': 10, + }) + + qs2 = Queues(ha_policy='all', max_priority=5) + qs2.add('bar') + self.assertEqual(qs2['bar'].queue_arguments, { + 'x-ha-policy': 'all', + 'x-max-priority': 5 + }) + + q2 = Queue('xyx2', queue_arguments={'x-max-priority': 2}) + qs2.add(q2) + self.assertEqual(qs2['xyx2'].queue_arguments, { + 'x-ha-policy': 'all', + 'x-max-priority': 2, + }) + + qs3 = Queues(max_priority=None) + qs3.add('foo2') + self.assertEqual(qs3['foo2'].queue_arguments, None) + + q3 = Queue('xyx3', queue_arguments={'x-max-priority': 7}) + qs3.add(q3) + self.assertEqual(qs3['xyx3'].queue_arguments, { + 'x-max-priority': 7, + }) + + +class test_AMQP(AppCase): + + def setup(self): + self.simple_message = self.app.amqp.as_task_v2( + uuid(), 'foo', create_sent_event=True, + ) + + def test_Queues__with_ha_policy(self): + x = self.app.amqp.Queues({}, ha_policy='all') + self.assertEqual(x.ha_policy, 'all') + + def test_Queues__with_max_priority(self): + x = self.app.amqp.Queues({}, max_priority=23) + self.assertEqual(x.max_priority, 23) + + def test_send_task_message__no_kwargs(self): + self.app.amqp.send_task_message(Mock(), 'foo', self.simple_message) + + def test_send_task_message__properties(self): + prod = Mock(name='producer') + self.app.amqp.send_task_message( + prod, 'foo', self.simple_message, foo=1, retry=False, + ) + self.assertEqual(prod.publish.call_args[1]['foo'], 1) + + def test_send_task_message__headers(self): + prod = Mock(name='producer') + self.app.amqp.send_task_message( + prod, 'foo', self.simple_message, headers={'x1x': 'y2x'}, + retry=False, + ) + self.assertEqual(prod.publish.call_args[1]['headers']['x1x'], 'y2x') + + def test_send_task_message__queue_string(self): + prod = Mock(name='producer') + self.app.amqp.send_task_message( + prod, 'foo', self.simple_message, queue='foo', retry=False, + ) + kwargs = prod.publish.call_args[1] + self.assertEqual(kwargs['routing_key'], 'foo') + self.assertEqual(kwargs['exchange'], '') + + def test_send_event_exchange_string(self): + evd = Mock(name="evd") + self.app.amqp.send_task_message( + Mock(), 'foo', self.simple_message, retry=False, + exchange='xyz', routing_key='xyb', + event_dispatcher=evd, + ) + self.assertTrue(evd.publish.called) + event = evd.publish.call_args[0][1] + self.assertEqual(event['routing_key'], 'xyb') + self.assertEqual(event['exchange'], 'xyz') + + def test_send_task_message__with_delivery_mode(self): + prod = Mock(name='producer') + self.app.amqp.send_task_message( + prod, 'foo', self.simple_message, delivery_mode=33, retry=False, + ) + self.assertEqual(prod.publish.call_args[1]['delivery_mode'], 33) + + def test_routes(self): + r1 = self.app.amqp.routes + r2 = self.app.amqp.routes + self.assertIs(r1, r2) + + +class test_as_task_v2(AppCase): + + def test_raises_if_args_is_not_tuple(self): + with self.assertRaises(TypeError): + self.app.amqp.as_task_v2(uuid(), 'foo', args='123') + + def test_raises_if_kwargs_is_not_mapping(self): + with self.assertRaises(TypeError): + self.app.amqp.as_task_v2(uuid(), 'foo', kwargs=(1, 2, 3)) + + def test_countdown_to_eta(self): + now = to_utc(datetime.utcnow()).astimezone(self.app.timezone) + m = self.app.amqp.as_task_v2( + uuid(), 'foo', countdown=10, now=now, + ) + self.assertEqual( + m.headers['eta'], + (now + timedelta(seconds=10)).isoformat(), + ) + + def test_expires_to_datetime(self): + now = to_utc(datetime.utcnow()).astimezone(self.app.timezone) + m = self.app.amqp.as_task_v2( + uuid(), 'foo', expires=30, now=now, + ) + self.assertEqual( + m.headers['expires'], + (now + timedelta(seconds=30)).isoformat(), + ) + + def test_callbacks_errbacks_chord(self): + + @self.app.task + def t(i): + pass + + m = self.app.amqp.as_task_v2( + uuid(), 'foo', + callbacks=[t.s(1), t.s(2)], + errbacks=[t.s(3), t.s(4)], + chord=t.s(5), + ) + _, _, embed = m.body + self.assertListEqual( + embed['callbacks'], [utf8dict(t.s(1)), utf8dict(t.s(2))], + ) + self.assertListEqual( + embed['errbacks'], [utf8dict(t.s(3)), utf8dict(t.s(4))], + ) + self.assertEqual(embed['chord'], utf8dict(t.s(5))) diff --git a/celery/tests/app/test_annotations.py b/celery/tests/app/test_annotations.py index 559f5cb01..1b4f6afd8 100644 --- a/celery/tests/app/test_annotations.py +++ b/celery/tests/app/test_annotations.py @@ -48,7 +48,7 @@ def test_dict_to_MapAnnotation(self): def test_returns_list(self): self.assertListEqual(prepare(1), [1]) self.assertListEqual(prepare([1]), [1]) - self.assertListEqual(prepare((1, )), [1]) + self.assertListEqual(prepare((1,)), [1]) self.assertEqual(prepare(None), ()) def test_evalutes_qualnames(self): diff --git a/celery/tests/app/test_app.py b/celery/tests/app/test_app.py index 113dedae1..546ef6a80 100644 --- a/celery/tests/app/test_app.py +++ b/celery/tests/app/test_app.py @@ -7,24 +7,27 @@ from copy import deepcopy from pickle import loads, dumps -from amqp import promise -from kombu import Exchange +from vine import promise +from celery import Celery from celery import shared_task, current_app from celery import app as _app from celery import _state from celery.app import base as _appbase from celery.app import defaults from celery.exceptions import ImproperlyConfigured -from celery.five import items -from celery.loaders.base import BaseLoader +from celery.five import keys +from celery.loaders.base import BaseLoader, unconfigured from celery.platforms import pyimplementation from celery.utils.serialization import pickle +from celery.utils.timeutils import timezone from celery.tests.case import ( CELERY_TEST_CONFIG, AppCase, Mock, + Case, + ContextMock, depends_on_current_app, mask_modules, patch, @@ -35,6 +38,7 @@ ) from celery.utils import uuid from celery.utils.mail import ErrorMail +from celery.utils.objects import Bunch THIS_IS_A_KEY = 'this is a value' @@ -55,13 +59,6 @@ class ObjectConfig2(object): UNDERSTAND_ME = True -class Object(object): - - def __init__(self, **kwargs): - for key, value in items(kwargs): - setattr(self, key, value) - - def _get_test_config(): return deepcopy(CELERY_TEST_CONFIG) test_config = _get_test_config() @@ -76,6 +73,19 @@ def test_bugreport(self): self.assertTrue(_app.bugreport(app=self.app)) +class test_task_join_will_block(Case): + + def test_task_join_will_block(self): + prev, _state._task_join_will_block = _state._task_join_will_block, 0 + try: + self.assertEqual(_state._task_join_will_block, 0) + _state._set_task_join_will_block(True) + print(_state.task_join_will_block) + self.assertTrue(_state.task_join_will_block()) + finally: + _state._task_join_will_block = prev + + class test_App(AppCase): def setup(self): @@ -115,6 +125,12 @@ def fun(): task = app.task(fun) self.assertEqual(task.name, app.main + '.fun') + def test_task_too_many_args(self): + with self.assertRaises(TypeError): + self.app.task(Mock(name='fun'), True) + with self.assertRaises(TypeError): + self.app.task(Mock(name='fun'), True, 1, 2) + def test_with_config_source(self): with self.Celery(config_source=ObjectConfig) as app: self.assertEqual(app.conf.FOO, 1) @@ -122,9 +138,8 @@ def test_with_config_source(self): @depends_on_current_app def test_task_windows_execv(self): - prev, _appbase._EXECV = _appbase._EXECV, True + prev, _appbase.USING_EXECV = _appbase.USING_EXECV, True try: - @self.app.task(shared=False) def foo(): pass @@ -132,8 +147,8 @@ def foo(): self.assertTrue(foo._get_current_object()) # is proxy finally: - _appbase._EXECV = prev - assert not _appbase._EXECV + _appbase.USING_EXECV = prev + assert not _appbase.USING_EXECV def test_task_takes_no_args(self): with self.assertRaises(TypeError): @@ -144,7 +159,10 @@ def foo(): def test_add_defaults(self): self.assertFalse(self.app.configured) _conf = {'FOO': 300} - conf = lambda: _conf + + def conf(): + return _conf + self.app.add_defaults(conf) self.assertIn(conf, self.app._pending_defaults) self.assertFalse(self.app.configured) @@ -172,20 +190,6 @@ def test_connection_or_acquire(self): with self.app.connection_or_acquire(pool=False): self.assertFalse(self.app.pool._dirty) - def test_maybe_close_pool(self): - cpool = self.app._pool = Mock() - amqp = self.app.__dict__['amqp'] = Mock() - ppool = amqp._producer_pool - self.app._maybe_close_pool() - cpool.force_close_all.assert_called_with() - ppool.force_close_all.assert_called_with() - self.assertIsNone(self.app._pool) - self.assertIsNone(self.app.__dict__['amqp']._producer_pool) - - self.app._pool = Mock() - self.app._maybe_close_pool() - self.app._maybe_close_pool() - def test_using_v1_reduce(self): self.app._using_v1_reduce = True self.assertTrue(loads(dumps(self.app))) @@ -197,8 +201,11 @@ def test_autodiscover_tasks_force(self): ['proj.A', 'proj.B'], 'tasks', ) self.app.loader.autodiscover_tasks = Mock() + + def lazy_list(): + return ['proj.A', 'proj.B'] self.app.autodiscover_tasks( - lambda: ['proj.A', 'proj.B'], + lazy_list, related_name='george', force=True, ) @@ -208,18 +215,137 @@ def test_autodiscover_tasks_force(self): def test_autodiscover_tasks_lazy(self): with patch('celery.signals.import_modules') as import_modules: - packages = lambda: [1, 2, 3] - self.app.autodiscover_tasks(packages) + def lazy_list(): + return [1, 2, 3] + self.app.autodiscover_tasks(lazy_list) self.assertTrue(import_modules.connect.called) prom = import_modules.connect.call_args[0][0] self.assertIsInstance(prom, promise) self.assertEqual(prom.fun, self.app._autodiscover_tasks) self.assertEqual(prom.args[0](), [1, 2, 3]) + def test_autodiscover_tasks__no_packages(self): + fixup1 = Mock(name='fixup') + fixup2 = Mock(name='fixup') + self.app._autodiscover_tasks_from_names = Mock(name='auto') + self.app._fixups = [fixup1, fixup2] + fixup1.autodiscover_tasks.return_value = ['A', 'B', 'C'] + fixup2.autodiscover_tasks.return_value = ['D', 'E', 'F'] + self.app.autodiscover_tasks(force=True) + self.app._autodiscover_tasks_from_names.assert_called_with( + ['A', 'B', 'C', 'D', 'E', 'F'], related_name='tasks', + ) + @with_environ('CELERY_BROKER_URL', '') def test_with_broker(self): with self.Celery(broker='foo://baribaz') as app: - self.assertEqual(app.conf.BROKER_URL, 'foo://baribaz') + self.assertEqual(app.conf.broker_url, 'foo://baribaz') + + def test_pending_configuration__setattr(self): + with self.Celery(broker='foo://bar') as app: + app.conf.task_default_delivery_mode = 44 + app.conf.worker_agent = 'foo:Bar' + self.assertFalse(app.configured) + self.assertEqual(app.conf.worker_agent, 'foo:Bar') + self.assertEqual(app.conf.broker_url, 'foo://bar') + self.assertEqual(app._preconf['worker_agent'], 'foo:Bar') + + self.assertTrue(app.configured) + reapp = pickle.loads(pickle.dumps(app)) + self.assertEqual(reapp._preconf['worker_agent'], 'foo:Bar') + self.assertFalse(reapp.configured) + self.assertEqual(reapp.conf.worker_agent, 'foo:Bar') + self.assertTrue(reapp.configured) + self.assertEqual(reapp.conf.broker_url, 'foo://bar') + self.assertEqual(reapp._preconf['worker_agent'], 'foo:Bar') + + def test_pending_configuration__update(self): + with self.Celery(broker='foo://bar') as app: + app.conf.update( + task_default_delivery_mode=44, + worker_agent='foo:Bar', + ) + self.assertFalse(app.configured) + self.assertEqual(app.conf.worker_agent, 'foo:Bar') + self.assertEqual(app.conf.broker_url, 'foo://bar') + self.assertEqual(app._preconf['worker_agent'], 'foo:Bar') + + def test_pending_configuration__compat_settings(self): + with self.Celery(broker='foo://bar', backend='foo') as app: + app.conf.update( + CELERY_ALWAYS_EAGER=4, + CELERY_DEFAULT_DELIVERY_MODE=63, + CELERYD_AGENT='foo:Barz', + ) + self.assertEqual(app.conf.task_always_eager, 4) + self.assertEqual(app.conf.task_default_delivery_mode, 63) + self.assertEqual(app.conf.worker_agent, 'foo:Barz') + self.assertEqual(app.conf.broker_url, 'foo://bar') + self.assertEqual(app.conf.result_backend, 'foo') + + def test_pending_configuration__compat_settings_mixing(self): + with self.Celery(broker='foo://bar', backend='foo') as app: + app.conf.update( + CELERY_ALWAYS_EAGER=4, + CELERY_DEFAULT_DELIVERY_MODE=63, + CELERYD_AGENT='foo:Barz', + worker_consumer='foo:Fooz', + ) + with self.assertRaises(ImproperlyConfigured): + self.assertEqual(app.conf.task_always_eager, 4) + + def test_pending_configuration__compat_settings_mixing_new(self): + with self.Celery(broker='foo://bar', backend='foo') as app: + app.conf.update( + task_always_eager=4, + task_default_delivery_mode=63, + worker_agent='foo:Barz', + CELERYD_CONSUMER='foo:Fooz', + CELERYD_AUTOSCALER='foo:Xuzzy', + ) + with self.assertRaises(ImproperlyConfigured): + self.assertEqual(app.conf.worker_consumer, 'foo:Fooz') + + def test_pending_configuration__compat_settings_mixing_alt(self): + with self.Celery(broker='foo://bar', backend='foo') as app: + app.conf.update( + task_always_eager=4, + task_default_delivery_mode=63, + worker_agent='foo:Barz', + CELERYD_CONSUMER='foo:Fooz', + worker_consumer='foo:Fooz', + CELERYD_AUTOSCALER='foo:Xuzzy', + worker_autoscaler='foo:Xuzzy' + ) + self.assertEqual(app.conf.task_always_eager, 4) + self.assertEqual(app.conf.worker_autoscaler, 'foo:Xuzzy') + + def test_pending_configuration__setdefault(self): + with self.Celery(broker='foo://bar') as app: + app.conf.setdefault('worker_agent', 'foo:Bar') + self.assertFalse(app.configured) + + def test_pending_configuration__iter(self): + with self.Celery(broker='foo://bar') as app: + app.conf.worker_agent = 'foo:Bar' + self.assertFalse(app.configured) + self.assertTrue(list(keys(app.conf))) + self.assertFalse(app.configured) + self.assertIn('worker_agent', app.conf) + self.assertFalse(app.configured) + self.assertTrue(dict(app.conf)) + self.assertTrue(app.configured) + + def test_pending_configuration__raises_ImproperlyConfigured(self): + with self.Celery(set_as_current=False) as app: + app.conf.worker_agent = 'foo://bar' + app.conf.task_default_delivery_mode = 44 + app.conf.CELERY_ALWAYS_EAGER = 5 + with self.assertRaises(ImproperlyConfigured): + app.finalize() + + with self.Celery() as app: + self.assertFalse(self.app.conf.task_always_eager) def test_repr(self): self.assertTrue(repr(self.app)) @@ -230,7 +356,7 @@ def test_custom_task_registry(self): def test_include_argument(self): with self.Celery(include=('foo', 'bar.foo')) as app: - self.assertEqual(app.conf.CELERY_IMPORTS, ('foo', 'bar.foo')) + self.assertEqual(app.conf.include, ('foo', 'bar.foo')) def test_set_as_current(self): current = _state._tls.current_app @@ -252,14 +378,14 @@ def foo(shared=False): _state._task_stack.pop() def test_task_not_shared(self): - with patch('celery.app.base.shared_task') as sh: + with patch('celery.app.base.connect_on_app_finalize') as sh: @self.app.task(shared=False) def foo(): pass self.assertFalse(sh.called) def test_task_compat_with_filter(self): - with self.Celery(accept_magic_kwargs=True) as app: + with self.Celery() as app: check = Mock() def filter(task): @@ -272,14 +398,14 @@ def foo(): check.assert_called_with(foo) def test_task_with_filter(self): - with self.Celery(accept_magic_kwargs=False) as app: + with self.Celery() as app: check = Mock() def filter(task): check(task) return task - assert not _appbase._EXECV + assert not _appbase.USING_EXECV @app.task(filter=filter, shared=False) def foo(): @@ -318,7 +444,7 @@ def _inner(*args, **kwargs): return fun(*args, **kwargs) return _inner - self.app.conf.CELERY_ANNOTATIONS = { + self.app.conf.task_annotations = { adX.name: {'@__call__': deco} } adX.bind(self.app) @@ -333,22 +459,30 @@ def _inner(*args, **kwargs): def test_apply_async_has__self__(self): @self.app.task(__self__='hello', shared=False) - def aawsX(): + def aawsX(x, y): pass - with patch('celery.app.amqp.TaskProducer.publish_task') as dt: - aawsX.apply_async((4, 5)) - args = dt.call_args[0][1] - self.assertEqual(args, ('hello', 4, 5)) + with self.assertRaises(TypeError): + aawsX.apply_async(()) + with self.assertRaises(TypeError): + aawsX.apply_async((2,)) + + with patch('celery.app.amqp.AMQP.create_task_message') as create: + with patch('celery.app.amqp.AMQP.send_task_message') as send: + create.return_value = Mock(), Mock(), Mock(), Mock() + aawsX.apply_async((4, 5)) + args = create.call_args[0][2] + self.assertEqual(args, ('hello', 4, 5)) + self.assertTrue(send.called) def test_apply_async_adds_children(self): from celery._state import _task_stack - @self.app.task(shared=False) + @self.app.task(bind=True, shared=False) def a3cX1(self): pass - @self.app.task(shared=False) + @self.app.task(bind=True, shared=False) def a3cX2(self): pass @@ -402,7 +536,7 @@ def assert_config2(self): def test_config_from_object__lazy(self): conf = ObjectConfig2() self.app.config_from_object(conf) - self.assertFalse(self.app.loader._conf) + self.assertIs(self.app.loader._conf, unconfigured) self.assertIs(self.app._config_source, conf) self.assert_config2() @@ -413,45 +547,110 @@ def test_config_from_object__force(self): self.assert_config2() + def test_config_from_object__compat(self): + + class Config(object): + CELERY_ALWAYS_EAGER = 44 + CELERY_DEFAULT_DELIVERY_MODE = 30 + CELERY_TASK_PUBLISH_RETRY = False + + self.app.config_from_object(Config) + self.assertEqual(self.app.conf.task_always_eager, 44) + self.assertEqual(self.app.conf.CELERY_ALWAYS_EAGER, 44) + self.assertFalse(self.app.conf.task_publish_retry) + self.assertEqual(self.app.conf.task_default_routing_key, 'celery') + + def test_config_from_object__supports_old_names(self): + + class Config(object): + task_always_eager = 45 + task_default_delivery_mode = 301 + + self.app.config_from_object(Config()) + self.assertEqual(self.app.conf.CELERY_ALWAYS_EAGER, 45) + self.assertEqual(self.app.conf.task_always_eager, 45) + self.assertEqual(self.app.conf.CELERY_DEFAULT_DELIVERY_MODE, 301) + self.assertEqual(self.app.conf.task_default_delivery_mode, 301) + self.assertEqual(self.app.conf.task_default_routing_key, 'testcelery') + + def test_config_from_object__namespace_uppercase(self): + + class Config(object): + CELERY_TASK_ALWAYS_EAGER = 44 + CELERY_TASK_DEFAULT_DELIVERY_MODE = 301 + + self.app.config_from_object(Config(), namespace='CELERY') + self.assertEqual(self.app.conf.task_always_eager, 44) + + def test_config_from_object__namespace_lowercase(self): + + class Config(object): + celery_task_always_eager = 44 + celery_task_default_delivery_mode = 301 + + self.app.config_from_object(Config(), namespace='celery') + self.assertEqual(self.app.conf.task_always_eager, 44) + + def test_config_from_object__mixing_new_and_old(self): + + class Config(object): + task_always_eager = 44 + worker_agent = 'foo:Agent' + worker_consumer = 'foo:Consumer' + beat_schedule = '/foo/schedule' + CELERY_DEFAULT_DELIVERY_MODE = 301 + + with self.assertRaises(ImproperlyConfigured) as exc: + self.app.config_from_object(Config(), force=True) + self.assertTrue( + exc.args[0].startswith('CELERY_DEFAULT_DELIVERY_MODE')) + self.assertIn('task_default_delivery_mode', exc.args[0]) + + def test_config_from_object__mixing_old_and_new(self): + + class Config(object): + CELERY_ALWAYS_EAGER = 46 + CELERYD_AGENT = 'foo:Agent' + CELERYD_CONSUMER = 'foo:Consumer' + CELERYBEAT_SCHEDULE = '/foo/schedule' + task_default_delivery_mode = 301 + + with self.assertRaises(ImproperlyConfigured) as exc: + self.app.config_from_object(Config(), force=True) + self.assertTrue( + exc.args[0].startswith('task_default_delivery_mode')) + self.assertIn('CELERY_DEFAULT_DELIVERY_MODE', exc.args[0]) + def test_config_from_cmdline(self): - cmdline = ['.always_eager=no', - '.result_backend=/dev/null', - 'celeryd.prefetch_multiplier=368', + cmdline = ['task_always_eager=no', + 'result_backend=/dev/null', + 'worker_prefetch_multiplier=368', '.foobarstring=(string)300', '.foobarint=(int)300', - '.result_engine_options=(dict){"foo": "bar"}'] - self.app.config_from_cmdline(cmdline, namespace='celery') - self.assertFalse(self.app.conf.CELERY_ALWAYS_EAGER) - self.assertEqual(self.app.conf.CELERY_RESULT_BACKEND, '/dev/null') - self.assertEqual(self.app.conf.CELERYD_PREFETCH_MULTIPLIER, 368) - self.assertEqual(self.app.conf.CELERY_FOOBARSTRING, '300') - self.assertEqual(self.app.conf.CELERY_FOOBARINT, 300) - self.assertDictEqual(self.app.conf.CELERY_RESULT_ENGINE_OPTIONS, + 'sqlalchemy_engine_options=(dict){"foo": "bar"}'] + self.app.config_from_cmdline(cmdline, namespace='worker') + self.assertFalse(self.app.conf.task_always_eager) + self.assertEqual(self.app.conf.result_backend, '/dev/null') + self.assertEqual(self.app.conf.worker_prefetch_multiplier, 368) + self.assertEqual(self.app.conf.worker_foobarstring, '300') + self.assertEqual(self.app.conf.worker_foobarint, 300) + self.assertDictEqual(self.app.conf.sqlalchemy_engine_options, {'foo': 'bar'}) - def test_compat_setting_CELERY_BACKEND(self): - - self.app.config_from_object(Object(CELERY_BACKEND='set_by_us')) - self.assertEqual(self.app.conf.CELERY_RESULT_BACKEND, 'set_by_us') - - def test_setting_BROKER_TRANSPORT_OPTIONS(self): + def test_setting__broker_transport_options(self): _args = {'foo': 'bar', 'spam': 'baz'} - self.app.config_from_object(Object()) - self.assertEqual(self.app.conf.BROKER_TRANSPORT_OPTIONS, {}) + self.app.config_from_object(Bunch()) + self.assertEqual(self.app.conf.broker_transport_options, {}) - self.app.config_from_object(Object(BROKER_TRANSPORT_OPTIONS=_args)) - self.assertEqual(self.app.conf.BROKER_TRANSPORT_OPTIONS, _args) + self.app.config_from_object(Bunch(broker_transport_options=_args)) + self.assertEqual(self.app.conf.broker_transport_options, _args) def test_Windows_log_color_disabled(self): self.app.IS_WINDOWS = True self.assertFalse(self.app.log.supports_color(True)) - def test_compat_setting_CARROT_BACKEND(self): - self.app.config_from_object(Object(CARROT_BACKEND='set_by_us')) - self.assertEqual(self.app.conf.BROKER_TRANSPORT, 'set_by_us') - def test_WorkController(self): x = self.app.WorkController self.assertIs(x.app, self.app) @@ -522,9 +721,9 @@ def mail_admins(*args, **kwargs): return args, kwargs self.app.loader = Loader(app=self.app) - self.app.conf.ADMINS = None + self.app.conf.admins = None self.assertFalse(self.app.mail_admins('Subject', 'Body')) - self.app.conf.ADMINS = [('George Costanza', 'george@vandelay.com')] + self.app.conf.admins = [('George Costanza', 'george@vandelay.com')] self.assertTrue(self.app.mail_admins('Subject', 'Body')) def test_amqp_get_broker_info(self): @@ -535,8 +734,8 @@ def test_amqp_get_broker_info(self): 'virtual_host': '/'}, self.app.connection('pyamqp://').info(), ) - self.app.conf.BROKER_PORT = 1978 - self.app.conf.BROKER_VHOST = 'foo' + self.app.conf.broker_port = 1978 + self.app.conf.broker_vhost = 'foo' self.assertDictContainsSubset( {'port': 1978, 'virtual_host': 'foo'}, self.app.connection('pyamqp://:1978/foo').info(), @@ -548,14 +747,14 @@ def test_amqp_get_broker_info(self): def test_amqp_failover_strategy_selection(self): # Test passing in a string and make sure the string # gets there untouched - self.app.conf.BROKER_FAILOVER_STRATEGY = 'foo-bar' + self.app.conf.broker_failover_strategy = 'foo-bar' self.assertEqual( self.app.connection('amqp:////value').failover_strategy, 'foo-bar', ) # Try passing in None - self.app.conf.BROKER_FAILOVER_STRATEGY = None + self.app.conf.broker_failover_strategy = None self.assertEqual( self.app.connection('amqp:////value').failover_strategy, itertools.cycle, @@ -565,22 +764,90 @@ def test_amqp_failover_strategy_selection(self): def my_failover_strategy(it): yield True - self.app.conf.BROKER_FAILOVER_STRATEGY = my_failover_strategy + self.app.conf.broker_failover_strategy = my_failover_strategy self.assertEqual( self.app.connection('amqp:////value').failover_strategy, my_failover_strategy, ) - def test_BROKER_BACKEND_alias(self): - self.assertEqual(self.app.conf.BROKER_BACKEND, - self.app.conf.BROKER_TRANSPORT) - def test_after_fork(self): - p = self.app._pool = Mock() - self.app._after_fork(self.app) - p.force_close_all.assert_called_with() + self.app._pool = Mock() + self.app.on_after_fork = Mock(name='on_after_fork') + self.app._after_fork() self.assertIsNone(self.app._pool) - self.app._after_fork(self.app) + self.app.on_after_fork.send.assert_called_with(sender=self.app) + self.app._after_fork() + + def test_global_after_fork(self): + self.app._after_fork = Mock(name='_after_fork') + _appbase._after_fork_cleanup_app(self.app) + self.app._after_fork.assert_called_with() + + @patch('celery.app.base.logger') + def test_after_fork_cleanup_app__raises(self, logger): + self.app._after_fork = Mock(name='_after_fork') + exc = self.app._after_fork.side_effect = KeyError() + _appbase._after_fork_cleanup_app(self.app) + logger.info.assert_called_with( + 'after forker raised exception: %r', exc, exc_info=1) + + def test_ensure_after_fork__no_multiprocessing(self): + prev, _appbase.register_after_fork = ( + _appbase.register_after_fork, None) + try: + self.app._after_fork_registered = False + self.app._ensure_after_fork() + self.assertTrue(self.app._after_fork_registered) + finally: + _appbase.register_after_fork = prev + + def test_canvas(self): + self.assertTrue(self.app.canvas.Signature) + + def test_signature(self): + sig = self.app.signature('foo', (1, 2)) + self.assertIs(sig.app, self.app) + + def test_timezone__none_set(self): + self.app.conf.timezone = None + tz = self.app.timezone + self.assertEqual(tz, timezone.get_timezone('UTC')) + + def test_compat_on_configure(self): + _on_configure = Mock(name='on_configure') + + class CompatApp(Celery): + + def on_configure(self, *args, **kwargs): + # on pypy3 if named on_configure the class function + # will be called, instead of the mock defined above, + # so we add the underscore. + _on_configure(*args, **kwargs) + + with CompatApp(set_as_current=False) as app: + app.loader = Mock() + app.loader.conf = {} + app._load_config() + _on_configure.assert_called_with() + + def test_add_periodic_task(self): + + @self.app.task + def add(x, y): + pass + assert not self.app.configured + self.app.add_periodic_task( + 10, self.app.signature('add', (2, 2)), + name='add1', expires=3, + ) + self.assertTrue(self.app._pending_periodic_tasks) + assert not self.app.configured + + sig2 = add.s(4, 4) + self.assertTrue(self.app.configured) + self.app.add_periodic_task(20, sig2, name='add2', expires=4) + self.assertIn('add1', self.app.conf.beat_schedule) + self.assertIn('add2', self.app.conf.beat_schedule) def test_pool_no_multiprocessing(self): with mask_modules('multiprocessing.util'): @@ -590,6 +857,18 @@ def test_pool_no_multiprocessing(self): def test_bugreport(self): self.assertTrue(self.app.bugreport()) + def test_send_task__connection_provided(self): + connection = Mock(name='connection') + router = Mock(name='router') + router.route.return_value = {} + self.app.amqp = Mock(name='amqp') + self.app.amqp.Producer.attach_mock(ContextMock(), 'return_value') + self.app.send_task('foo', (1, 2), connection=connection, router=router) + self.app.amqp.Producer.assert_called_with(connection) + self.app.amqp.send_task_message.assert_called_with( + self.app.amqp.Producer(), 'foo', + self.app.amqp.create_task_message()) + def test_send_task_sent_event(self): class Dispatcher(object): @@ -609,22 +888,23 @@ def publish(self, type, fields, *args, **kwargs): chan.close() assert conn.transport_cls == 'memory' - prod = self.app.amqp.TaskProducer( - conn, exchange=Exchange('foo_exchange'), - send_sent_event=True, + message = self.app.amqp.create_task_message( + 'id', 'footask', (), {}, create_sent_event=True, ) + prod = self.app.amqp.Producer(conn) dispatcher = Dispatcher() - self.assertTrue(prod.publish_task('footask', (), {}, - exchange='moo_exchange', - routing_key='moo_exchange', - event_dispatcher=dispatcher)) + self.app.amqp.send_task_message( + prod, 'footask', message, + exchange='moo_exchange', routing_key='moo_exchange', + event_dispatcher=dispatcher, + ) self.assertTrue(dispatcher.sent) self.assertEqual(dispatcher.sent[0][0], 'task-sent') - self.assertTrue(prod.publish_task('footask', (), {}, - event_dispatcher=dispatcher, - exchange='bar_exchange', - routing_key='bar_exchange')) + self.app.amqp.send_task_message( + prod, 'footask', message, event_dispatcher=dispatcher, + exchange='bar_exchange', routing_key='bar_exchange', + ) def test_error_mail_sender(self): x = ErrorMail.subject % {'name': 'task_name', @@ -641,6 +921,11 @@ def test_error_mail_disabled(self): x.send(Mock(), Mock()) self.assertFalse(task.app.mail_admins.called) + def test_select_queues(self): + self.app.amqp = Mock(name='amqp') + self.app.select_queues({'foo', 'bar'}) + self.app.amqp.queues.select.assert_called_with({'foo', 'bar'}) + class test_defaults(AppCase): diff --git a/celery/tests/app/test_beat.py b/celery/tests/app/test_beat.py index 62310805a..05edae42f 100644 --- a/celery/tests/app/test_beat.py +++ b/celery/tests/app/test_beat.py @@ -9,11 +9,9 @@ from celery.five import keys, string_t from celery.schedules import schedule from celery.utils import uuid -from celery.tests.case import AppCase, Mock, SkipTest, call, patch - +from celery.utils.objects import Bunch -class Object(object): - pass +from celery.tests.case import AppCase, Mock, SkipTest, call, patch class MockShelve(dict): @@ -83,6 +81,22 @@ def test_repr(self): entry = self.create_entry() self.assertIn(' end[0]: - return META - raise socket.error() - finally: - calls[0] += 1 - get.side_effect = work_eventually - x._retry_timeout = 10 - x._retry_wait = 0.01 - meta = x._get_task_meta_for('task') - self.assertEqual(meta['status'], states.SUCCESS) - - x._retry_timeout = 0.1 - calls[0], end[0] = 0, 100 - with self.assertRaises(socket.error): - x._get_task_meta_for('task') - def test_store_result(self): - with mock_module('pycassa'): + with mock_module(*CASSANDRA_MODULES): from celery.backends import cassandra as mod - mod.pycassa = Mock() - install_exceptions(mod.pycassa) - mod.Thrift = Mock() - install_exceptions(mod.Thrift) - x = mod.CassandraBackend(app=self.app) - Get_Column = x._get_column_family = Mock() - cf = Get_Column.return_value = Mock() - x.detailed_mode = False - x._store_result('task_id', 'result', states.SUCCESS) - self.assertTrue(cf.insert.called) + mod.cassandra = Mock() - cf.insert.reset() - x.detailed_mode = True + x = mod.CassandraBackend(app=self.app) + x._connection = True + session = x._session = Mock() + session.execute = Mock() x._store_result('task_id', 'result', states.SUCCESS) - self.assertTrue(cf.insert.called) def test_process_cleanup(self): - with mock_module('pycassa'): + with mock_module(*CASSANDRA_MODULES): from celery.backends import cassandra as mod x = mod.CassandraBackend(app=self.app) - x._column_family = None x.process_cleanup() - x._column_family = True - x.process_cleanup() - self.assertIsNone(x._column_family) + self.assertIsNone(x._connection) + self.assertIsNone(x._session) - def test_get_column_family(self): - with mock_module('pycassa'): + def test_timeouting_cluster(self): + """Tests behaviour when Cluster.connect raises + cassandra.OperationTimedOut.""" + with mock_module(*CASSANDRA_MODULES): from celery.backends import cassandra as mod - mod.pycassa = Mock() - install_exceptions(mod.pycassa) + + class OTOExc(Exception): + pass + + class VeryFaultyCluster(object): + def __init__(self, *args, **kwargs): + pass + + def connect(self, *args, **kwargs): + raise OTOExc() + + def shutdown(self): + pass + + mod.cassandra = Mock() + mod.cassandra.OperationTimedOut = OTOExc + mod.cassandra.cluster = Mock() + mod.cassandra.cluster.Cluster = VeryFaultyCluster + x = mod.CassandraBackend(app=self.app) - self.assertTrue(x._get_column_family()) - self.assertIsNotNone(x._column_family) - self.assertIs(x._get_column_family(), x._column_family) + + with self.assertRaises(OTOExc): + x._store_result('task_id', 'result', states.SUCCESS) + self.assertIsNone(x._connection) + self.assertIsNone(x._session) + + x.process_cleanup() # should not raise + + def test_please_free_memory(self): + """Ensure that Cluster object IS shut down.""" + with mock_module(*CASSANDRA_MODULES): + from celery.backends import cassandra as mod + + class RAMHoggingCluster(object): + + objects_alive = 0 + + def __init__(self, *args, **kwargs): + pass + + def connect(self, *args, **kwargs): + RAMHoggingCluster.objects_alive += 1 + return Mock() + + def shutdown(self): + RAMHoggingCluster.objects_alive -= 1 + + mod.cassandra = Mock() + + mod.cassandra.cluster = Mock() + mod.cassandra.cluster.Cluster = RAMHoggingCluster + + for x in range(0, 10): + x = mod.CassandraBackend(app=self.app) + x._store_result('task_id', 'result', states.SUCCESS) + x.process_cleanup() + + self.assertEquals(RAMHoggingCluster.objects_alive, 0) + + def test_auth_provider(self): + """Ensure valid auth_provider works properly, and invalid one raises + ImproperlyConfigured exception.""" + class DummyAuth(object): + ValidAuthProvider = Mock() + + with mock_module(*CASSANDRA_MODULES): + from celery.backends import cassandra as mod + + mod.cassandra = Mock() + mod.cassandra.auth = DummyAuth + + # Valid auth_provider + self.app.conf.cassandra_auth_provider = 'ValidAuthProvider' + self.app.conf.cassandra_auth_kwargs = { + 'username': 'stuff' + } + mod.CassandraBackend(app=self.app) + + # Invalid auth_provider + self.app.conf.cassandra_auth_provider = 'SpiderManAuth' + self.app.conf.cassandra_auth_kwargs = { + 'username': 'Jack' + } + with self.assertRaises(ImproperlyConfigured): + mod.CassandraBackend(app=self.app) diff --git a/celery/tests/backends/test_couchbase.py b/celery/tests/backends/test_couchbase.py index 3dc6aadd0..8879ff430 100644 --- a/celery/tests/backends/test_couchbase.py +++ b/celery/tests/backends/test_couchbase.py @@ -1,5 +1,9 @@ +"""Tests for the CouchBaseBackend.""" + from __future__ import absolute_import +from kombu.utils.encoding import str_t + from celery.backends import couchbase as module from celery.backends.couchbase import CouchBaseBackend from celery.exceptions import ImproperlyConfigured @@ -18,32 +22,42 @@ class test_CouchBaseBackend(AppCase): + """CouchBaseBackend TestCase.""" + def setup(self): + """Skip the test if couchbase cannot be imported.""" if couchbase is None: raise SkipTest('couchbase is not installed.') self.backend = CouchBaseBackend(app=self.app) def test_init_no_couchbase(self): - """test init no couchbase raises""" - prev, module.couchbase = module.couchbase, None + """ + Test init no couchbase raises. + + If celery.backends.couchbase cannot import the couchbase client, it + sets the couchbase.Couchbase to None and then handles this in the + CouchBaseBackend __init__ method. + """ + prev, module.Couchbase = module.Couchbase, None try: with self.assertRaises(ImproperlyConfigured): CouchBaseBackend(app=self.app) finally: - module.couchbase = prev + module.Couchbase = prev def test_init_no_settings(self): - """test init no settings""" - self.app.conf.CELERY_COUCHBASE_BACKEND_SETTINGS = [] + """Test init no settings.""" + self.app.conf.couchbase_backend_settings = [] with self.assertRaises(ImproperlyConfigured): CouchBaseBackend(app=self.app) def test_init_settings_is_None(self): - """Test init settings is None""" - self.app.conf.CELERY_COUCHBASE_BACKEND_SETTINGS = None + """Test init settings is None.""" + self.app.conf.couchbase_backend_settings = None CouchBaseBackend(app=self.app) def test_get_connection_connection_exists(self): + """Test _get_connection works.""" with patch('couchbase.connection.Connection') as mock_Connection: self.backend._connection = sentinel._connection @@ -53,14 +67,15 @@ def test_get_connection_connection_exists(self): self.assertFalse(mock_Connection.called) def test_get(self): - """test_get + """ + Test get method. CouchBaseBackend.get should return and take two params db conn to couchbase is mocked. - TODO Should test on key not exists + TODO Should test on key not exists """ - self.app.conf.CELERY_COUCHBASE_BACKEND_SETTINGS = {} + self.app.conf.couchbase_backend_settings = {} x = CouchBaseBackend(app=self.app) x._connection = Mock() mocked_get = x._connection.get = Mock() @@ -70,13 +85,13 @@ def test_get(self): x._connection.get.assert_called_once_with('1f3fab') def test_set(self): - """test_set + """ + Test set method. CouchBaseBackend.set should return None and take two params db conn to couchbase is mocked. - """ - self.app.conf.CELERY_COUCHBASE_BACKEND_SETTINGS = None + self.app.conf.couchbase_backend_settings = None x = CouchBaseBackend(app=self.app) x._connection = MagicMock() x._connection.set = MagicMock() @@ -84,14 +99,15 @@ def test_set(self): self.assertIsNone(x.set(sentinel.key, sentinel.value)) def test_delete(self): - """test_delete + """ + Test delete method. CouchBaseBackend.delete should return and take two params db conn to couchbase is mocked. - TODO Should test on key not exists + TODO Should test on key not exists. """ - self.app.conf.CELERY_COUCHBASE_BACKEND_SETTINGS = {} + self.app.conf.couchbase_backend_settings = {} x = CouchBaseBackend(app=self.app) x._connection = Mock() mocked_delete = x._connection.delete = Mock() @@ -101,11 +117,12 @@ def test_delete(self): x._connection.delete.assert_called_once_with('1f3fab') def test_config_params(self): - """test_config_params + """ + Test config params are correct. - celery.conf.CELERY_COUCHBASE_BACKEND_SETTINGS is properly set + app.conf.couchbase_backend_settings is properly set. """ - self.app.conf.CELERY_COUCHBASE_BACKEND_SETTINGS = { + self.app.conf.couchbase_backend_settings = { 'bucket': 'mycoolbucket', 'host': ['here.host.com', 'there.host.com'], 'username': 'johndoe', @@ -120,12 +137,14 @@ def test_config_params(self): self.assertEqual(x.port, 1234) def test_backend_by_url(self, url='couchbase://myhost/mycoolbucket'): + """Test that a CouchBaseBackend is loaded from the couchbase url.""" from celery.backends.couchbase import CouchBaseBackend backend, url_ = backends.get_backend_by_url(url, self.app.loader) self.assertIs(backend, CouchBaseBackend) self.assertEqual(url_, url) def test_backend_params_by_url(self): + """Test config params are correct from config url.""" url = 'couchbase://johndoe:mysecret@myhost:123/mycoolbucket' with self.Celery(backend=url) as app: x = app.backend @@ -134,3 +153,22 @@ def test_backend_params_by_url(self): self.assertEqual(x.username, 'johndoe') self.assertEqual(x.password, 'mysecret') self.assertEqual(x.port, 123) + + def test_correct_key_types(self): + """ + Test that the key is the correct type for the couchbase python API. + + We check that get_key_for_task, get_key_for_chord, and + get_key_for_group always returns a python string. Need to use str_t + for cross Python reasons. + """ + keys = [ + self.backend.get_key_for_task('task_id', bytes('key')), + self.backend.get_key_for_chord('group_id', bytes('key')), + self.backend.get_key_for_group('group_id', bytes('key')), + self.backend.get_key_for_task('task_id', 'key'), + self.backend.get_key_for_chord('group_id', 'key'), + self.backend.get_key_for_group('group_id', 'key'), + ] + for key in keys: + self.assertIsInstance(key, str_t) diff --git a/celery/tests/backends/test_couchdb.py b/celery/tests/backends/test_couchdb.py new file mode 100644 index 000000000..2a81f54d6 --- /dev/null +++ b/celery/tests/backends/test_couchdb.py @@ -0,0 +1,90 @@ +from __future__ import absolute_import + +from celery.backends import couchdb as module +from celery.backends.couchdb import CouchBackend +from celery.exceptions import ImproperlyConfigured +from celery import backends +from celery.tests.case import ( + AppCase, Mock, SkipTest, patch, sentinel, +) + +try: + import pycouchdb +except ImportError: + pycouchdb = None # noqa + +COUCHDB_CONTAINER = 'celery_container' + + +class test_CouchBackend(AppCase): + + def setup(self): + if pycouchdb is None: + raise SkipTest('pycouchdb is not installed.') + self.backend = CouchBackend(app=self.app) + + def test_init_no_pycouchdb(self): + """test init no pycouchdb raises""" + prev, module.pycouchdb = module.pycouchdb, None + try: + with self.assertRaises(ImproperlyConfigured): + CouchBackend(app=self.app) + finally: + module.pycouchdb = prev + + def test_get_container_exists(self): + with patch('pycouchdb.client.Database') as mock_Connection: + self.backend._connection = sentinel._connection + + connection = self.backend._get_connection() + + self.assertEqual(sentinel._connection, connection) + self.assertFalse(mock_Connection.called) + + def test_get(self): + """test_get + + CouchBackend.get should return and take two params + db conn to couchdb is mocked. + TODO Should test on key not exists + + """ + x = CouchBackend(app=self.app) + x._connection = Mock() + mocked_get = x._connection.get = Mock() + mocked_get.return_value = sentinel.retval + # should return None + self.assertEqual(x.get('1f3fab'), sentinel.retval) + x._connection.get.assert_called_once_with('1f3fab') + + def test_delete(self): + """test_delete + + CouchBackend.delete should return and take two params + db conn to pycouchdb is mocked. + TODO Should test on key not exists + + """ + x = CouchBackend(app=self.app) + x._connection = Mock() + mocked_delete = x._connection.delete = Mock() + mocked_delete.return_value = None + # should return None + self.assertIsNone(x.delete('1f3fab')) + x._connection.delete.assert_called_once_with('1f3fab') + + def test_backend_by_url(self, url='couchdb://myhost/mycoolcontainer'): + from celery.backends.couchdb import CouchBackend + backend, url_ = backends.get_backend_by_url(url, self.app.loader) + self.assertIs(backend, CouchBackend) + self.assertEqual(url_, url) + + def test_backend_params_by_url(self): + url = 'couchdb://johndoe:mysecret@myhost:123/mycoolcontainer' + with self.Celery(backend=url) as app: + x = app.backend + self.assertEqual(x.container, 'mycoolcontainer') + self.assertEqual(x.host, 'myhost') + self.assertEqual(x.username, 'johndoe') + self.assertEqual(x.password, 'mysecret') + self.assertEqual(x.port, 123) diff --git a/celery/tests/backends/test_database.py b/celery/tests/backends/test_database.py index fac02215e..0dbbacd11 100644 --- a/celery/tests/backends/test_database.py +++ b/celery/tests/backends/test_database.py @@ -10,9 +10,10 @@ from celery.tests.case import ( AppCase, + Mock, SkipTest, depends_on_current_app, - mask_modules, + patch, skip_if_pypy, skip_if_jython, ) @@ -21,8 +22,13 @@ import sqlalchemy # noqa except ImportError: DatabaseBackend = Task = TaskSet = retry = None # noqa + SessionManager = session_cleanup = None # noqa else: - from celery.backends.database import DatabaseBackend, retry + from celery.backends.database import ( + DatabaseBackend, retry, session_cleanup, + ) + from celery.backends.database import session + from celery.backends.database.session import SessionManager from celery.backends.database.models import Task, TaskSet @@ -32,6 +38,27 @@ def __init__(self, data): self.data = data +class test_session_cleanup(AppCase): + + def setup(self): + if session_cleanup is None: + raise SkipTest('slqlalchemy not installed') + + def test_context(self): + session = Mock(name='session') + with session_cleanup(session): + pass + session.close.assert_called_with() + + def test_context_raises(self): + session = Mock(name='session') + with self.assertRaises(KeyError): + with session_cleanup(session): + raise KeyError() + session.rollback.assert_called_with() + session.close.assert_called_with() + + class test_DatabaseBackend(AppCase): @skip_if_pypy @@ -40,35 +67,30 @@ def setup(self): if DatabaseBackend is None: raise SkipTest('sqlalchemy not installed') self.uri = 'sqlite:///test.db' + self.app.conf.result_serializer = 'pickle' def test_retry_helper(self): - from celery.backends.database import OperationalError + from celery.backends.database import DatabaseError calls = [0] @retry def raises(): calls[0] += 1 - raise OperationalError(1, 2, 3) + raise DatabaseError(1, 2, 3) - with self.assertRaises(OperationalError): + with self.assertRaises(DatabaseError): raises(max_retries=5) self.assertEqual(calls[0], 5) - def test_missing_SQLAlchemy_raises_ImproperlyConfigured(self): - with mask_modules('sqlalchemy'): - from celery.backends.database import _sqlalchemy_installed - with self.assertRaises(ImproperlyConfigured): - _sqlalchemy_installed() - def test_missing_dburi_raises_ImproperlyConfigured(self): - self.app.conf.CELERY_RESULT_DBURI = None + self.app.conf.sqlalchemy_dburi = None with self.assertRaises(ImproperlyConfigured): DatabaseBackend(app=self.app) def test_missing_task_id_is_PENDING(self): tb = DatabaseBackend(self.uri, app=self.app) - self.assertEqual(tb.get_status('xxx-does-not-exist'), states.PENDING) + self.assertEqual(tb.get_state('xxx-does-not-exist'), states.PENDING) def test_missing_task_meta_is_dict_with_pending(self): tb = DatabaseBackend(self.uri, app=self.app) @@ -84,11 +106,11 @@ def test_mark_as_done(self): tid = uuid() - self.assertEqual(tb.get_status(tid), states.PENDING) + self.assertEqual(tb.get_state(tid), states.PENDING) self.assertIsNone(tb.get_result(tid)) tb.mark_as_done(tid, 42) - self.assertEqual(tb.get_status(tid), states.SUCCESS) + self.assertEqual(tb.get_state(tid), states.SUCCESS) self.assertEqual(tb.get_result(tid), 42) def test_is_pickled(self): @@ -106,13 +128,13 @@ def test_mark_as_started(self): tb = DatabaseBackend(self.uri, app=self.app) tid = uuid() tb.mark_as_started(tid) - self.assertEqual(tb.get_status(tid), states.STARTED) + self.assertEqual(tb.get_state(tid), states.STARTED) def test_mark_as_revoked(self): tb = DatabaseBackend(self.uri, app=self.app) tid = uuid() tb.mark_as_revoked(tid) - self.assertEqual(tb.get_status(tid), states.REVOKED) + self.assertEqual(tb.get_state(tid), states.REVOKED) def test_mark_as_retry(self): tb = DatabaseBackend(self.uri, app=self.app) @@ -123,7 +145,7 @@ def test_mark_as_retry(self): import traceback trace = '\n'.join(traceback.format_stack()) tb.mark_as_retry(tid, exception, traceback=trace) - self.assertEqual(tb.get_status(tid), states.RETRY) + self.assertEqual(tb.get_state(tid), states.RETRY) self.assertIsInstance(tb.get_result(tid), KeyError) self.assertEqual(tb.get_traceback(tid), trace) @@ -137,7 +159,7 @@ def test_mark_as_failure(self): import traceback trace = '\n'.join(traceback.format_stack()) tb.mark_as_failure(tid3, exception, traceback=trace) - self.assertEqual(tb.get_status(tid3), states.FAILURE) + self.assertEqual(tb.get_state(tid3), states.FAILURE) self.assertIsInstance(tb.get_result(tid3), KeyError) self.assertEqual(tb.get_traceback(tid3), trace) @@ -194,3 +216,53 @@ def test_Task__repr__(self): def test_TaskSet__repr__(self): self.assertIn('foo', repr(TaskSet('foo', None))) + + +class test_SessionManager(AppCase): + + def setup(self): + if SessionManager is None: + raise SkipTest('sqlalchemy not installed') + + def test_after_fork(self): + s = SessionManager() + self.assertFalse(s.forked) + s._after_fork() + self.assertTrue(s.forked) + + @patch('celery.backends.database.session.create_engine') + def test_get_engine_forked(self, create_engine): + s = SessionManager() + s._after_fork() + engine = s.get_engine('dburi', foo=1) + create_engine.assert_called_with('dburi', foo=1) + self.assertIs(engine, create_engine()) + engine2 = s.get_engine('dburi', foo=1) + self.assertIs(engine2, engine) + + @patch('celery.backends.database.session.sessionmaker') + def test_create_session_forked(self, sessionmaker): + s = SessionManager() + s.get_engine = Mock(name='get_engine') + s._after_fork() + engine, session = s.create_session('dburi', short_lived_sessions=True) + sessionmaker.assert_called_with(bind=s.get_engine()) + self.assertIs(session, sessionmaker()) + sessionmaker.return_value = Mock(name='new') + engine, session2 = s.create_session('dburi', short_lived_sessions=True) + sessionmaker.assert_called_with(bind=s.get_engine()) + self.assertIsNot(session2, session) + sessionmaker.return_value = Mock(name='new2') + engine, session3 = s.create_session( + 'dburi', short_lived_sessions=False) + sessionmaker.assert_called_with(bind=s.get_engine()) + self.assertIs(session3, session2) + + def test_coverage_madness(self): + prev, session.register_after_fork = ( + session.register_after_fork, None, + ) + try: + SessionManager() + finally: + session.register_after_fork = prev diff --git a/celery/tests/backends/test_elasticsearch.py b/celery/tests/backends/test_elasticsearch.py new file mode 100644 index 000000000..cc5d96fdd --- /dev/null +++ b/celery/tests/backends/test_elasticsearch.py @@ -0,0 +1,89 @@ +from __future__ import absolute_import, unicode_literals + +from celery import backends +from celery.backends import elasticsearch as module +from celery.backends.elasticsearch import ElasticsearchBackend +from celery.exceptions import ImproperlyConfigured + +from celery.tests.case import AppCase, Mock, SkipTest, sentinel + +try: + import elasticsearch +except ImportError: + elasticsearch = None + + +class test_ElasticsearchBackend(AppCase): + + def setup(self): + if elasticsearch is None: + raise SkipTest('elasticsearch is not installed.') + self.backend = ElasticsearchBackend(app=self.app) + + def test_init_no_elasticsearch(self): + prev, module.elasticsearch = module.elasticsearch, None + try: + with self.assertRaises(ImproperlyConfigured): + ElasticsearchBackend(app=self.app) + finally: + module.elasticsearch = prev + + def test_get(self): + x = ElasticsearchBackend(app=self.app) + x._server = Mock() + x._server.get = Mock() + # expected result + r = dict(found=True, _source={sentinel.task_id: sentinel.result}) + x._server.get.return_value = r + dict_result = x.get(sentinel.task_id) + + self.assertEqual(dict_result, sentinel.result) + x._server.get.assert_called_once_with( + doc_type=x.doc_type, + id=sentinel.task_id, + index=x.index, + ) + + def test_get_none(self): + x = ElasticsearchBackend(app=self.app) + x._server = Mock() + x._server.get = Mock() + x._server.get.return_value = sentinel.result + none_result = x.get(sentinel.task_id) + + self.assertEqual(none_result, None) + x._server.get.assert_called_once_with( + doc_type=x.doc_type, + id=sentinel.task_id, + index=x.index, + ) + + def test_delete(self): + x = ElasticsearchBackend(app=self.app) + x._server = Mock() + x._server.delete = Mock() + x._server.delete.return_value = sentinel.result + + self.assertIsNone(x.delete(sentinel.task_id), sentinel.result) + x._server.delete.assert_called_once_with( + doc_type=x.doc_type, + id=sentinel.task_id, + index=x.index, + ) + + def test_backend_by_url(self, url='elasticsearch://localhost:9200/index'): + backend, url_ = backends.get_backend_by_url(url, self.app.loader) + + self.assertIs(backend, ElasticsearchBackend) + self.assertEqual(url_, url) + + def test_backend_params_by_url(self): + url = 'elasticsearch://localhost:9200/index/doc_type' + with self.Celery(backend=url) as app: + x = app.backend + + self.assertEqual(x.index, 'index') + self.assertEqual(x.doc_type, 'doc_type') + self.assertEqual(x.scheme, 'elasticsearch') + self.assertEqual(x.host, 'localhost') + self.assertEqual(x.port, 9200) diff --git a/celery/tests/backends/test_filesystem.py b/celery/tests/backends/test_filesystem.py new file mode 100644 index 000000000..55a3d05dd --- /dev/null +++ b/celery/tests/backends/test_filesystem.py @@ -0,0 +1,84 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import + +import os +import shutil +import sys +import tempfile + +from celery import states +from celery.backends.filesystem import FilesystemBackend +from celery.exceptions import ImproperlyConfigured +from celery.utils import uuid + +from celery.tests.case import AppCase, SkipTest + + +class test_FilesystemBackend(AppCase): + + def setup(self): + if sys.platform == 'win32': + raise SkipTest('win32: skip') + self.directory = tempfile.mkdtemp() + self.url = 'file://' + self.directory + self.path = self.directory.encode('ascii') + + def teardown(self): + shutil.rmtree(self.directory) + + def test_a_path_is_required(self): + with self.assertRaises(ImproperlyConfigured): + FilesystemBackend(app=self.app) + + def test_a_path_in_app_conf(self): + self.app.conf.result_fspath = self.url[7:] + tb = FilesystemBackend(app=self.app) + self.assertEqual(tb.path, self.path) + + def test_a_path_in_url(self): + tb = FilesystemBackend(app=self.app, url=self.url) + self.assertEqual(tb.path, self.path) + + def test_path_is_incorrect(self): + with self.assertRaises(ImproperlyConfigured): + FilesystemBackend(app=self.app, url=self.url + '-incorrect') + + def test_missing_task_is_PENDING(self): + tb = FilesystemBackend(app=self.app, url=self.url) + self.assertEqual(tb.get_state('xxx-does-not-exist'), states.PENDING) + + def test_mark_as_done_writes_file(self): + tb = FilesystemBackend(app=self.app, url=self.url) + tb.mark_as_done(uuid(), 42) + self.assertEqual(len(os.listdir(self.directory)), 1) + + def test_done_task_is_SUCCESS(self): + tb = FilesystemBackend(app=self.app, url=self.url) + tid = uuid() + tb.mark_as_done(tid, 42) + self.assertEqual(tb.get_state(tid), states.SUCCESS) + + def test_correct_result(self): + data = {'foo': 'bar'} + + tb = FilesystemBackend(app=self.app, url=self.url) + tid = uuid() + tb.mark_as_done(tid, data) + self.assertEqual(tb.get_result(tid), data) + + def test_get_many(self): + data = {uuid(): 'foo', uuid(): 'bar', uuid(): 'baz'} + + tb = FilesystemBackend(app=self.app, url=self.url) + for key, value in data.items(): + tb.mark_as_done(key, value) + + for key, result in tb.get_many(data.keys()): + self.assertEqual(result['result'], data[key]) + + def test_forget_deletes_file(self): + tb = FilesystemBackend(app=self.app, url=self.url) + tid = uuid() + tb.mark_as_done(tid, 42) + tb.forget(tid) + self.assertEqual(len(os.listdir(self.directory)), 0) diff --git a/celery/tests/backends/test_mongodb.py b/celery/tests/backends/test_mongodb.py index e260d87f0..96a8db4b3 100644 --- a/celery/tests/backends/test_mongodb.py +++ b/celery/tests/backends/test_mongodb.py @@ -1,31 +1,47 @@ from __future__ import absolute_import import datetime -import uuid from pickle import loads, dumps +from kombu.exceptions import EncodeError + +from celery import uuid from celery import states from celery.backends import mongodb as module -from celery.backends.mongodb import MongoBackend, Bunch, pymongo +from celery.backends.mongodb import ( + InvalidDocument, MongoBackend, pymongo, +) from celery.exceptions import ImproperlyConfigured from celery.tests.case import ( AppCase, MagicMock, Mock, SkipTest, ANY, - depends_on_current_app, patch, sentinel, + depends_on_current_app, disable_stdouts, patch, sentinel, ) COLLECTION = 'taskmeta_celery' -TASK_ID = str(uuid.uuid1()) +TASK_ID = uuid() MONGODB_HOST = 'localhost' MONGODB_PORT = 27017 MONGODB_USER = 'mongo' MONGODB_PASSWORD = '1234' MONGODB_DATABASE = 'testing' MONGODB_COLLECTION = 'collection1' +MONGODB_GROUP_COLLECTION = 'group_collection1' class test_MongoBackend(AppCase): + default_url = 'mongodb://uuuu:pwpw@hostname.dom/database' + replica_set_url = ( + 'mongodb://uuuu:pwpw@hostname.dom,' + 'hostname.dom/database?replicaSet=rs' + ) + sanitized_default_url = 'mongodb://uuuu:**@hostname.dom/database' + sanitized_replica_set_url = ( + 'mongodb://uuuu:**@hostname.dom/,' + 'hostname.dom/database?replicaSet=rs' + ) + def setup(self): if pymongo is None: raise SkipTest('pymongo is not installed.') @@ -36,7 +52,7 @@ def setup(self): R['Binary'], module.Binary = module.Binary, Mock() R['datetime'], datetime.datetime = datetime.datetime, Mock() - self.backend = MongoBackend(app=self.app) + self.backend = MongoBackend(app=self.app, url=self.default_url) def teardown(self): MongoBackend.encode = self._reset['encode'] @@ -44,11 +60,6 @@ def teardown(self): module.Binary = self._reset['Binary'] datetime.datetime = self._reset['datetime'] - def test_Bunch(self): - x = Bunch(foo='foo', bar=2) - self.assertEqual(x.foo, 'foo') - self.assertEqual(x.bar, 2) - def test_init_no_mongodb(self): prev, module.pymongo = module.pymongo, None try: @@ -58,20 +69,71 @@ def test_init_no_mongodb(self): module.pymongo = prev def test_init_no_settings(self): - self.app.conf.CELERY_MONGODB_BACKEND_SETTINGS = [] + self.app.conf.mongodb_backend_settings = [] with self.assertRaises(ImproperlyConfigured): MongoBackend(app=self.app) def test_init_settings_is_None(self): - self.app.conf.CELERY_MONGODB_BACKEND_SETTINGS = None + self.app.conf.mongodb_backend_settings = None MongoBackend(app=self.app) - def test_restore_group_no_entry(self): - x = MongoBackend(app=self.app) - x.collection = Mock() - fo = x.collection.find_one = Mock() - fo.return_value = None - self.assertIsNone(x._restore_group('1f3fab')) + def test_init_with_settings(self): + self.app.conf.mongodb_backend_settings = None + # empty settings + mb = MongoBackend(app=self.app) + + # uri + uri = 'mongodb://localhost:27017' + mb = MongoBackend(app=self.app, url=uri) + self.assertEqual(mb.mongo_host, ['localhost:27017']) + self.assertEqual(mb.options, mb._prepare_client_options()) + self.assertEqual(mb.database_name, 'celery') + + # uri with database name + uri = 'mongodb://localhost:27017/celerydb' + mb = MongoBackend(app=self.app, url=uri) + self.assertEqual(mb.database_name, 'celerydb') + + # uri with user, password, database name, replica set + uri = ('mongodb://' + 'celeryuser:celerypassword@' + 'mongo1.example.com:27017,' + 'mongo2.example.com:27017,' + 'mongo3.example.com:27017/' + 'celerydatabase?replicaSet=rs0') + mb = MongoBackend(app=self.app, url=uri) + self.assertEqual(mb.mongo_host, ['mongo1.example.com:27017', + 'mongo2.example.com:27017', + 'mongo3.example.com:27017']) + self.assertEqual( + mb.options, dict(mb._prepare_client_options(), replicaset='rs0'), + ) + self.assertEqual(mb.user, 'celeryuser') + self.assertEqual(mb.password, 'celerypassword') + self.assertEqual(mb.database_name, 'celerydatabase') + + # same uri, change some parameters in backend settings + self.app.conf.mongodb_backend_settings = { + 'replicaset': 'rs1', + 'user': 'backenduser', + 'database': 'another_db', + 'options': { + 'socketKeepAlive': True, + }, + } + mb = MongoBackend(app=self.app, url=uri) + self.assertEqual(mb.mongo_host, ['mongo1.example.com:27017', + 'mongo2.example.com:27017', + 'mongo3.example.com:27017']) + self.assertEqual( + mb.options, dict(mb._prepare_client_options(), + replicaset='rs1', socketKeepAlive=True), + ) + self.assertEqual(mb.user, 'backenduser') + self.assertEqual(mb.password, 'celerypassword') + self.assertEqual(mb.database_name, 'another_db') + + mb = MongoBackend(app=self.app, url='mongodb://') @depends_on_current_app def test_reduce(self): @@ -79,7 +141,6 @@ def test_reduce(self): self.assertTrue(loads(dumps(x))) def test_get_connection_connection_exists(self): - with patch('pymongo.MongoClient') as mock_Connection: self.backend._connection = sentinel._connection @@ -89,7 +150,6 @@ def test_get_connection_connection_exists(self): self.assertFalse(mock_Connection.called) def test_get_connection_no_connection_host(self): - with patch('pymongo.MongoClient') as mock_Connection: self.backend._connection = None self.backend.host = MONGODB_HOST @@ -98,12 +158,12 @@ def test_get_connection_no_connection_host(self): connection = self.backend._get_connection() mock_Connection.assert_called_once_with( - host='mongodb://localhost:27017', ssl=False, max_pool_size=10, - auto_start_request=False) + host='mongodb://localhost:27017', + **self.backend._prepare_client_options() + ) self.assertEqual(sentinel.connection, connection) def test_get_connection_no_connection_mongodb_uri(self): - with patch('pymongo.MongoClient') as mock_Connection: mongodb_uri = 'mongodb://%s:%d' % (MONGODB_HOST, MONGODB_PORT) self.backend._connection = None @@ -113,8 +173,8 @@ def test_get_connection_no_connection_mongodb_uri(self): connection = self.backend._get_connection() mock_Connection.assert_called_once_with( - host=mongodb_uri, ssl=False, max_pool_size=10, - auto_start_request=False) + host=mongodb_uri, **self.backend._prepare_client_options() + ) self.assertEqual(sentinel.connection, connection) @patch('celery.backends.mongodb.MongoBackend._get_connection') @@ -152,15 +212,6 @@ def test_get_database_no_existing_no_auth(self, mock_get_connection): self.assertFalse(mock_database.authenticate.called) self.assertTrue(self.backend.__dict__['database'] is mock_database) - def test_process_cleanup(self): - self.backend._connection = None - self.backend.process_cleanup() - self.assertEqual(self.backend._connection, None) - - self.backend._connection = 'not none' - self.backend.process_cleanup() - self.assertEqual(self.backend._connection, None) - @patch('celery.backends.mongodb.MongoBackend._get_database') def test_store_result(self, mock_get_database): self.backend.taskmeta_collection = MONGODB_COLLECTION @@ -179,6 +230,11 @@ def test_store_result(self, mock_get_database): mock_collection.save.assert_called_once_with(ANY) self.assertEqual(sentinel.result, ret_val) + mock_collection.save.side_effect = InvalidDocument() + with self.assertRaises(EncodeError): + self.backend._store_result( + sentinel.task_id, sentinel.result, sentinel.status) + @patch('celery.backends.mongodb.MongoBackend._get_database') def test_get_task_meta_for(self, mock_get_database): datetime.datetime = self._reset['datetime'] @@ -196,9 +252,10 @@ def test_get_task_meta_for(self, mock_get_database): mock_get_database.assert_called_once_with() mock_database.__getitem__.assert_called_once_with(MONGODB_COLLECTION) self.assertEqual( - ['status', 'task_id', 'date_done', 'traceback', 'result', - 'children'], - list(ret_val.keys())) + list(sorted(['status', 'task_id', 'date_done', 'traceback', + 'result', 'children'])), + list(sorted(ret_val.keys())), + ) @patch('celery.backends.mongodb.MongoBackend._get_database') def test_get_task_meta_for_no_result(self, mock_get_database): @@ -219,29 +276,36 @@ def test_get_task_meta_for_no_result(self, mock_get_database): @patch('celery.backends.mongodb.MongoBackend._get_database') def test_save_group(self, mock_get_database): - self.backend.taskmeta_collection = MONGODB_COLLECTION + self.backend.groupmeta_collection = MONGODB_GROUP_COLLECTION mock_database = MagicMock(spec=['__getitem__', '__setitem__']) mock_collection = Mock() mock_get_database.return_value = mock_database mock_database.__getitem__.return_value = mock_collection - + res = [self.app.AsyncResult(i) for i in range(3)] ret_val = self.backend._save_group( - sentinel.taskset_id, sentinel.result) - + sentinel.taskset_id, res, + ) mock_get_database.assert_called_once_with() - mock_database.__getitem__.assert_called_once_with(MONGODB_COLLECTION) + mock_database.__getitem__.assert_called_once_with( + MONGODB_GROUP_COLLECTION, + ) mock_collection.save.assert_called_once_with(ANY) - self.assertEqual(sentinel.result, ret_val) + self.assertEqual(res, ret_val) @patch('celery.backends.mongodb.MongoBackend._get_database') def test_restore_group(self, mock_get_database): - self.backend.taskmeta_collection = MONGODB_COLLECTION + self.backend.groupmeta_collection = MONGODB_GROUP_COLLECTION mock_database = MagicMock(spec=['__getitem__', '__setitem__']) mock_collection = Mock() - mock_collection.find_one.return_value = MagicMock() + mock_collection.find_one.return_value = { + '_id': sentinel.taskset_id, + 'result': [uuid(), uuid()], + 'date_done': 1, + } + self.backend.decode.side_effect = lambda r: r mock_get_database.return_value = mock_database mock_database.__getitem__.return_value = mock_collection @@ -249,14 +313,16 @@ def test_restore_group(self, mock_get_database): ret_val = self.backend._restore_group(sentinel.taskset_id) mock_get_database.assert_called_once_with() - mock_database.__getitem__.assert_called_once_with(MONGODB_COLLECTION) mock_collection.find_one.assert_called_once_with( {'_id': sentinel.taskset_id}) - self.assertEqual( + self.assertItemsEqual( ['date_done', 'result', 'task_id'], list(ret_val.keys()), ) + mock_collection.find_one.return_value = None + self.backend._restore_group(sentinel.taskset_id) + @patch('celery.backends.mongodb.MongoBackend._get_database') def test_delete_group(self, mock_get_database): self.backend.taskmeta_collection = MONGODB_COLLECTION @@ -270,7 +336,6 @@ def test_delete_group(self, mock_get_database): self.backend._delete_group(sentinel.taskset_id) mock_get_database.assert_called_once_with() - mock_database.__getitem__.assert_called_once_with(MONGODB_COLLECTION) mock_collection.remove.assert_called_once_with( {'_id': sentinel.taskset_id}) @@ -296,20 +361,21 @@ def test_forget(self, mock_get_database): def test_cleanup(self, mock_get_database): datetime.datetime = self._reset['datetime'] self.backend.taskmeta_collection = MONGODB_COLLECTION + self.backend.groupmeta_collection = MONGODB_GROUP_COLLECTION - mock_database = MagicMock(spec=['__getitem__', '__setitem__']) - mock_collection = Mock() + mock_database = Mock(spec=['__getitem__', '__setitem__'], + name='MD') + self.backend.collections = mock_collection = Mock() mock_get_database.return_value = mock_database + mock_database.__getitem__ = Mock(name='MD.__getitem__') mock_database.__getitem__.return_value = mock_collection self.backend.app.now = datetime.datetime.utcnow self.backend.cleanup() mock_get_database.assert_called_once_with() - mock_database.__getitem__.assert_called_once_with( - MONGODB_COLLECTION) - mock_collection.assert_called_once_with() + self.assertTrue(mock_collection.remove.called) def test_get_database_authfailure(self): x = MongoBackend(app=self.app) @@ -322,3 +388,56 @@ def test_get_database_authfailure(self): with self.assertRaises(ImproperlyConfigured): x._get_database() db.authenticate.assert_called_with('jerry', 'cere4l') + + def test_prepare_client_options(self): + with patch('pymongo.version_tuple', new=(3, 0, 3)): + options = self.backend._prepare_client_options() + self.assertDictEqual(options, { + 'maxPoolSize': self.backend.max_pool_size + }) + + def test_as_uri_include_password(self): + self.assertEqual(self.backend.as_uri(True), self.default_url) + + def test_as_uri_exclude_password(self): + self.assertEqual(self.backend.as_uri(), self.sanitized_default_url) + + def test_as_uri_include_password_replica_set(self): + backend = MongoBackend(app=self.app, url=self.replica_set_url) + self.assertEqual(backend.as_uri(True), self.replica_set_url) + + def test_as_uri_exclude_password_replica_set(self): + backend = MongoBackend(app=self.app, url=self.replica_set_url) + self.assertEqual(backend.as_uri(), self.sanitized_replica_set_url) + + @disable_stdouts + def test_regression_worker_startup_info(self): + self.app.conf.result_backend = ( + 'mongodb://user:password@host0.com:43437,host1.com:43437' + '/work4us?replicaSet=rs&ssl=true' + ) + worker = self.app.Worker() + worker.on_start() + self.assertTrue(worker.startup_info()) + + +class test_MongoBackend_no_mock(AppCase): + + def setup(self): + if pymongo is None: + raise SkipTest('pymongo is not installed.') + + def test_encode_decode(self): + backend = MongoBackend(app=self.app) + data = {'foo': 1} + self.assertTrue(backend.decode(backend.encode(data))) + backend.serializer = 'bson' + self.assertEquals(backend.encode(data), data) + self.assertEquals(backend.decode(data), data) + + def test_de(self): + backend = MongoBackend(app=self.app) + data = {'foo': 1} + self.assertTrue(backend.encode(data)) + backend.serializer = 'bson' + self.assertEquals(backend.encode(data), data) diff --git a/celery/tests/backends/test_redis.py b/celery/tests/backends/test_redis.py index 0ecc5258b..a486969c7 100644 --- a/celery/tests/backends/test_redis.py +++ b/celery/tests/backends/test_redis.py @@ -2,21 +2,32 @@ from datetime import timedelta +from contextlib import contextmanager from pickle import loads, dumps from celery import signature from celery import states -from celery import group from celery import uuid +from celery.canvas import Signature from celery.datastructures import AttributeDict -from celery.exceptions import ImproperlyConfigured -from celery.utils.timeutils import timedelta_seconds +from celery.exceptions import ChordError, ImproperlyConfigured from celery.tests.case import ( - AppCase, Mock, MockCallbacks, SkipTest, depends_on_current_app, patch, + ANY, AppCase, ContextMock, Mock, MockCallbacks, SkipTest, + call, depends_on_current_app, patch, ) +def raise_on_second_call(mock, exc, *retval): + + def on_first_call(*args, **kwargs): + mock.side_effect = exc + return mock.return_value + mock.side_effect = on_first_call + if retval: + mock.return_value, = retval + + class Connection(object): connected = True @@ -37,6 +48,12 @@ def add_step(*args, **kwargs): return self return add_step + def __enter__(self): + return self + + def __exit__(self, type, value, traceback): + pass + def execute(self): return [step(*a, **kw) for step, a, kw in self.steps] @@ -57,7 +74,7 @@ def __init__(self, host=None, port=None, db=None, password=None, **kw): def get(self, key): return self.keyspace.get(key) - def setex(self, key, value, expires): + def setex(self, key, expires, value): self.set(key, value) self.expire(key, expires) @@ -92,7 +109,7 @@ def llen(self, key): class redis(object): - Redis = Redis + StrictRedis = Redis class ConnectionPool(object): @@ -115,14 +132,20 @@ class _RedisBackend(RedisBackend): return _RedisBackend + def get_E_LOST(self): + from celery.backends.redis import E_LOST + return E_LOST + def setup(self): self.Backend = self.get_backend() + self.E_LOST = self.get_E_LOST() + self.b = self.Backend(app=self.app) @depends_on_current_app def test_reduce(self): try: from celery.backends.redis import RedisBackend - x = RedisBackend(app=self.app, new_join=True) + x = RedisBackend(app=self.app) self.assertTrue(loads(dumps(x))) except ImportError: raise SkipTest('redis not installed') @@ -130,12 +153,11 @@ def test_reduce(self): def test_no_redis(self): self.Backend.redis = None with self.assertRaises(ImproperlyConfigured): - self.Backend(app=self.app, new_join=True) + self.Backend(app=self.app) def test_url(self): x = self.Backend( 'redis://:bosco@vandelay.com:123//1', app=self.app, - new_join=True, ) self.assertTrue(x.connparams) self.assertEqual(x.connparams['host'], 'vandelay.com') @@ -146,7 +168,6 @@ def test_url(self): def test_socket_url(self): x = self.Backend( 'socket:///tmp/redis.sock?virtual_host=/3', app=self.app, - new_join=True, ) self.assertTrue(x.connparams) self.assertEqual(x.connparams['path'], '/tmp/redis.sock') @@ -161,7 +182,6 @@ def test_socket_url(self): def test_compat_propertie(self): x = self.Backend( 'redis://:bosco@vandelay.com:123//1', app=self.app, - new_join=True, ) with self.assertPendingDeprecation(): self.assertEqual(x.host, 'vandelay.com') @@ -174,102 +194,207 @@ def test_compat_propertie(self): def test_conf_raises_KeyError(self): self.app.conf = AttributeDict({ - 'CELERY_RESULT_SERIALIZER': 'json', - 'CELERY_MAX_CACHED_RESULTS': 1, - 'CELERY_ACCEPT_CONTENT': ['json'], - 'CELERY_TASK_RESULT_EXPIRES': None, + 'result_serializer': 'json', + 'result_cache_max': 1, + 'result_expires': None, + 'accept_content': ['json'], }) - self.Backend(app=self.app, new_join=True) + self.Backend(app=self.app) + + @patch('celery.backends.redis.error') + def test_on_connection_error(self, error): + intervals = iter([10, 20, 30]) + exc = KeyError() + self.assertEqual( + self.b.on_connection_error(None, exc, intervals, 1), 10, + ) + error.assert_called_with(self.E_LOST, 1, 'Inf', 'in 10.00 seconds') + self.assertEqual( + self.b.on_connection_error(10, exc, intervals, 2), 20, + ) + error.assert_called_with(self.E_LOST, 2, 10, 'in 20.00 seconds') + self.assertEqual( + self.b.on_connection_error(10, exc, intervals, 3), 30, + ) + error.assert_called_with(self.E_LOST, 3, 10, 'in 30.00 seconds') + + def test_incr(self): + self.b.client = Mock(name='client') + self.b.incr('foo') + self.b.client.incr.assert_called_with('foo') + + def test_expire(self): + self.b.client = Mock(name='client') + self.b.expire('foo', 300) + self.b.client.expire.assert_called_with('foo', 300) + + def test_apply_chord(self): + header = Mock(name='header') + header.results = [Mock(name='t1'), Mock(name='t2')] + print(self.b.apply_chord,) + self.b.apply_chord( + header, (1, 2), 'gid', None, + options={'max_retries': 10}, + ) + header.assert_called_with(1, 2, max_retries=10, task_id='gid') + + def test_unpack_chord_result(self): + self.b.exception_to_python = Mock(name='etp') + decode = Mock(name='decode') + exc = KeyError() + tup = decode.return_value = (1, 'id1', states.FAILURE, exc) + with self.assertRaises(ChordError): + self.b._unpack_chord_result(tup, decode) + decode.assert_called_with(tup) + self.b.exception_to_python.assert_called_with(exc) + + exc = ValueError() + tup = decode.return_value = (2, 'id2', states.RETRY, exc) + ret = self.b._unpack_chord_result(tup, decode) + self.b.exception_to_python.assert_called_with(exc) + self.assertIs(ret, self.b.exception_to_python()) + + def test_on_chord_part_return_no_gid_or_tid(self): + request = Mock(name='request') + request.id = request.group = None + self.assertIsNone(self.b.on_chord_part_return(request, 'SUCCESS', 10)) + + def test_ConnectionPool(self): + self.b.redis = Mock(name='redis') + self.assertIsNone(self.b._ConnectionPool) + self.assertIs(self.b.ConnectionPool, self.b.redis.ConnectionPool) + self.assertIs(self.b.ConnectionPool, self.b.redis.ConnectionPool) def test_expires_defaults_to_config(self): - self.app.conf.CELERY_TASK_RESULT_EXPIRES = 10 - b = self.Backend(expires=None, app=self.app, new_join=True) + self.app.conf.result_expires = 10 + b = self.Backend(expires=None, app=self.app) self.assertEqual(b.expires, 10) def test_expires_is_int(self): - b = self.Backend(expires=48, app=self.app, new_join=True) + b = self.Backend(expires=48, app=self.app) self.assertEqual(b.expires, 48) - def test_set_new_join_from_url_query(self): - b = self.Backend('redis://?new_join=True;foobar=1', app=self.app) - self.assertEqual(b.on_chord_part_return, b._new_chord_return) - self.assertEqual(b.apply_chord, b._new_chord_apply) - - def test_default_is_old_join(self): - b = self.Backend(app=self.app) - self.assertNotEqual(b.on_chord_part_return, b._new_chord_return) - self.assertNotEqual(b.apply_chord, b._new_chord_apply) + def test_add_to_chord(self): + b = self.Backend('redis://', app=self.app) + gid = uuid() + b.add_to_chord(gid, 'sig') + b.client.incr.assert_called_with(b.get_key_for_group(gid, '.t'), 1) def test_expires_is_None(self): - b = self.Backend(expires=None, app=self.app, new_join=True) - self.assertEqual(b.expires, timedelta_seconds( - self.app.conf.CELERY_TASK_RESULT_EXPIRES)) + b = self.Backend(expires=None, app=self.app) + self.assertEqual( + b.expires, + self.app.conf.result_expires.total_seconds(), + ) def test_expires_is_timedelta(self): - b = self.Backend( - expires=timedelta(minutes=1), app=self.app, new_join=1, - ) + b = self.Backend(expires=timedelta(minutes=1), app=self.app) self.assertEqual(b.expires, 60) - def test_apply_chord(self): - self.Backend(app=self.app, new_join=True).apply_chord( - group(app=self.app), (), 'group_id', {}, - result=[self.app.AsyncResult(x) for x in [1, 2, 3]], - ) - def test_mget(self): - b = self.Backend(app=self.app, new_join=True) - self.assertTrue(b.mget(['a', 'b', 'c'])) - b.client.mget.assert_called_with(['a', 'b', 'c']) + self.assertTrue(self.b.mget(['a', 'b', 'c'])) + self.b.client.mget.assert_called_with(['a', 'b', 'c']) def test_set_no_expire(self): - b = self.Backend(app=self.app, new_join=True) - b.expires = None - b.set('foo', 'bar') + self.b.expires = None + self.b.set('foo', 'bar') + + def create_task(self): + tid = uuid() + task = Mock(name='task-{0}'.format(tid)) + task.name = 'foobarbaz' + self.app.tasks['foobarbaz'] = task + task.request.chord = signature(task) + task.request.id = tid + task.request.chord['chord_size'] = 10 + task.request.group = 'group_id' + return task @patch('celery.result.GroupResult.restore') def test_on_chord_part_return(self, restore): - b = self.Backend(app=self.app, new_join=True) - - def create_task(): - tid = uuid() - task = Mock(name='task-{0}'.format(tid)) - task.name = 'foobarbaz' - self.app.tasks['foobarbaz'] = task - task.request.chord = signature(task) - task.request.id = tid - task.request.chord['chord_size'] = 10 - task.request.group = 'group_id' - return task - - tasks = [create_task() for i in range(10)] + tasks = [self.create_task() for i in range(10)] for i in range(10): - b.on_chord_part_return(tasks[i], states.SUCCESS, i) - self.assertTrue(b.client.rpush.call_count) - b.client.rpush.reset_mock() - self.assertTrue(b.client.lrange.call_count) - gkey = b.get_key_for_group('group_id', '.j') - b.client.delete.assert_called_with(gkey) - b.client.expire.assert_called_witeh(gkey, 86400) + self.b.on_chord_part_return(tasks[i].request, states.SUCCESS, i) + self.assertTrue(self.b.client.rpush.call_count) + self.b.client.rpush.reset_mock() + self.assertTrue(self.b.client.lrange.call_count) + jkey = self.b.get_key_for_group('group_id', '.j') + tkey = self.b.get_key_for_group('group_id', '.t') + self.b.client.delete.assert_has_calls([call(jkey), call(tkey)]) + self.b.client.expire.assert_has_calls([ + call(jkey, 86400), call(tkey, 86400), + ]) + + def test_on_chord_part_return__success(self): + with self.chord_context(2) as (_, request, callback): + self.b.on_chord_part_return(request, states.SUCCESS, 10) + self.assertFalse(callback.delay.called) + self.b.on_chord_part_return(request, states.SUCCESS, 20) + callback.delay.assert_called_with([10, 20]) + + def test_on_chord_part_return__callback_raises(self): + with self.chord_context(1) as (_, request, callback): + callback.delay.side_effect = KeyError(10) + task = self.app._tasks['add'] = Mock(name='add_task') + self.b.on_chord_part_return(request, states.SUCCESS, 10) + task.backend.fail_from_current_stack.assert_called_with( + callback.id, exc=ANY, + ) + + def test_on_chord_part_return__ChordError(self): + with self.chord_context(1) as (_, request, callback): + self.b.client.pipeline = ContextMock() + raise_on_second_call(self.b.client.pipeline, ChordError()) + self.b.client.pipeline.return_value.rpush().llen().get().expire( + ).expire().execute.return_value = (1, 1, 0, 4, 5) + task = self.app._tasks['add'] = Mock(name='add_task') + self.b.on_chord_part_return(request, states.SUCCESS, 10) + task.backend.fail_from_current_stack.assert_called_with( + callback.id, exc=ANY, + ) + + def test_on_chord_part_return__other_error(self): + with self.chord_context(1) as (_, request, callback): + self.b.client.pipeline = ContextMock() + raise_on_second_call(self.b.client.pipeline, RuntimeError()) + self.b.client.pipeline.return_value.rpush().llen().get().expire( + ).expire().execute.return_value = (1, 1, 0, 4, 5) + task = self.app._tasks['add'] = Mock(name='add_task') + self.b.on_chord_part_return(request, states.SUCCESS, 10) + task.backend.fail_from_current_stack.assert_called_with( + callback.id, exc=ANY, + ) + + @contextmanager + def chord_context(self, size=1): + with patch('celery.backends.redis.maybe_signature') as ms: + tasks = [self.create_task() for i in range(size)] + request = Mock(name='request') + request.id = 'id1' + request.group = 'gid1' + callback = ms.return_value = Signature('add') + callback.id = 'id1' + callback['chord_size'] = size + callback.delay = Mock(name='callback.delay') + yield tasks, request, callback def test_process_cleanup(self): - self.Backend(app=self.app, new_join=True).process_cleanup() + self.b.process_cleanup() def test_get_set_forget(self): - b = self.Backend(app=self.app, new_join=True) tid = uuid() - b.store_result(tid, 42, states.SUCCESS) - self.assertEqual(b.get_status(tid), states.SUCCESS) - self.assertEqual(b.get_result(tid), 42) - b.forget(tid) - self.assertEqual(b.get_status(tid), states.PENDING) + self.b.store_result(tid, 42, states.SUCCESS) + self.assertEqual(self.b.get_state(tid), states.SUCCESS) + self.assertEqual(self.b.get_result(tid), 42) + self.b.forget(tid) + self.assertEqual(self.b.get_state(tid), states.PENDING) def test_set_expires(self): - b = self.Backend(expires=512, app=self.app, new_join=True) + self.b = self.Backend(expires=512, app=self.app) tid = uuid() - key = b.get_key_for_task(tid) - b.store_result(tid, 42, states.SUCCESS) - b.client.expire.assert_called_with( + key = self.b.get_key_for_task(tid) + self.b.store_result(tid, 42, states.SUCCESS) + self.b.client.expire.assert_called_with( key, 512, ) diff --git a/celery/tests/backends/test_riak.py b/celery/tests/backends/test_riak.py new file mode 100644 index 000000000..e5781a910 --- /dev/null +++ b/celery/tests/backends/test_riak.py @@ -0,0 +1,158 @@ +# -*- coding: utf-8 -*- + +from __future__ import absolute_import, with_statement + +from celery.backends import riak as module +from celery.backends.riak import RiakBackend, riak +from celery.exceptions import ImproperlyConfigured +from celery.tests.case import ( + AppCase, MagicMock, Mock, SkipTest, patch, sentinel, +) + + +RIAK_BUCKET = 'riak_bucket' + + +class test_RiakBackend(AppCase): + + def setup(self): + if riak is None: + raise SkipTest('riak is not installed.') + self.app.conf.result_backend = 'riak://' + + @property + def backend(self): + return self.app.backend + + def test_init_no_riak(self): + """ + test init no riak raises + """ + prev, module.riak = module.riak, None + try: + with self.assertRaises(ImproperlyConfigured): + RiakBackend(app=self.app) + finally: + module.riak = prev + + def test_init_no_settings(self): + """Test init no settings.""" + self.app.conf.riak_backend_settings = [] + with self.assertRaises(ImproperlyConfigured): + RiakBackend(app=self.app) + + def test_init_settings_is_None(self): + """ + Test init settings is None + """ + self.app.conf.riak_backend_settings = None + self.assertTrue(self.app.backend) + + def test_get_client_client_exists(self): + """Test get existing client.""" + with patch('riak.client.RiakClient') as mock_connection: + self.backend._client = sentinel._client + + mocked_is_alive = self.backend._client.is_alive = Mock() + mocked_is_alive.return_value.value = True + client = self.backend._get_client() + self.assertEquals(sentinel._client, client) + self.assertFalse(mock_connection.called) + + def test_get(self): + """Test get + + RiakBackend.get + should return and take two params + db conn to riak is mocked + TODO Should test on key not exists + """ + self.app.conf.couchbase_backend_settings = {} + self.backend._client = Mock(name='_client') + self.backend._bucket = Mock(name='_bucket') + mocked_get = self.backend._bucket.get = Mock(name='bucket.get') + mocked_get.return_value.data = sentinel.retval + # should return None + self.assertEqual(self.backend.get('1f3fab'), sentinel.retval) + self.backend._bucket.get.assert_called_once_with('1f3fab') + + def test_set(self): + """Test set + + RiakBackend.set + should return None and take two params + db conn to couchbase is mocked. + + """ + self.app.conf.couchbase_backend_settings = None + self.backend._client = MagicMock() + self.backend._bucket = MagicMock() + self.backend._bucket.set = MagicMock() + # should return None + self.assertIsNone(self.backend.set(sentinel.key, sentinel.value)) + + def test_delete(self): + """Test get + + RiakBackend.get + should return and take two params + db conn to couchbase is mocked + TODO Should test on key not exists + + """ + self.app.conf.couchbase_backend_settings = {} + + self.backend._client = Mock(name='_client') + self.backend._bucket = Mock(name='_bucket') + mocked_delete = self.backend._client.delete = Mock('client.delete') + mocked_delete.return_value = None + # should return None + self.assertIsNone(self.backend.delete('1f3fab')) + self.backend._bucket.delete.assert_called_once_with('1f3fab') + + def test_config_params(self): + """ + test celery.conf.riak_backend_settingS + celery.conf.riak_backend_settingS + is properly set + """ + self.app.conf.riak_backend_settings = { + 'bucket': 'mycoolbucket', + 'host': 'there.host.com', + 'port': '1234', + } + self.assertEqual(self.backend.bucket_name, 'mycoolbucket') + self.assertEqual(self.backend.host, 'there.host.com') + self.assertEqual(self.backend.port, 1234) + + def test_backend_by_url(self, url='riak://myhost/mycoolbucket'): + """ + test get backend by url + """ + from celery import backends + from celery.backends.riak import RiakBackend + backend, url_ = backends.get_backend_by_url(url, self.app.loader) + self.assertIs(backend, RiakBackend) + self.assertEqual(url_, url) + + def test_backend_params_by_url(self): + """ + test get backend params by url + """ + self.app.conf.result_backend = 'riak://myhost:123/mycoolbucket' + self.assertEqual(self.backend.bucket_name, 'mycoolbucket') + self.assertEqual(self.backend.host, 'myhost') + self.assertEqual(self.backend.port, 123) + + def test_non_ASCII_bucket_raises(self): + """test app.conf.riak_backend_settings and + app.conf.riak_backend_settings + is properly set + """ + self.app.conf.riak_backend_settings = { + 'bucket': 'héhé', + 'host': 'there.host.com', + 'port': '1234', + } + with self.assertRaises(ValueError): + RiakBackend(app=self.app) diff --git a/celery/tests/backends/test_rpc.py b/celery/tests/backends/test_rpc.py index 6fe594c19..2b0ccb86b 100644 --- a/celery/tests/backends/test_rpc.py +++ b/celery/tests/backends/test_rpc.py @@ -43,13 +43,16 @@ def test_destination_for(self): with self.assertRaises(RuntimeError): self.b.destination_for('task_id', None) + def test_rkey(self): + self.assertEqual(self.b.rkey('id1'), 'id1') + def test_binding(self): queue = self.b.binding self.assertEqual(queue.name, self.b.oid) self.assertEqual(queue.exchange, self.b.exchange) self.assertEqual(queue.routing_key, self.b.oid) self.assertFalse(queue.durable) - self.assertFalse(queue.auto_delete) + self.assertTrue(queue.auto_delete) def test_many_bindings(self): self.assertListEqual( diff --git a/celery/tests/bin/celery.py b/celery/tests/bin/celery.py new file mode 100644 index 000000000..d52053836 --- /dev/null +++ b/celery/tests/bin/celery.py @@ -0,0 +1,2 @@ +from __future__ import absolute_import, unicode_literals +# here for a test diff --git a/celery/tests/bin/test_amqp.py b/celery/tests/bin/test_amqp.py index 8840a9f10..20ab44168 100644 --- a/celery/tests/bin/test_amqp.py +++ b/celery/tests/bin/test_amqp.py @@ -124,7 +124,7 @@ def test_note(self): self.assertNotIn('FOO', self.fh.getvalue()) def test_run(self): - a = self.create_adm('queue.declare foo') + a = self.create_adm('queue.declare', 'foo') a.run() self.assertIn('ok', self.fh.getvalue()) diff --git a/celery/tests/bin/test_base.py b/celery/tests/bin/test_base.py index 8d1d0d55d..3c02ca8ef 100644 --- a/celery/tests/bin/test_base.py +++ b/celery/tests/bin/test_base.py @@ -8,15 +8,13 @@ Extensions, HelpFormatter, ) +from celery.utils.objects import Bunch + from celery.tests.case import ( AppCase, Mock, depends_on_current_app, override_stdouts, patch, ) -class Object(object): - pass - - class MyApp(object): user_options = {'preload': None} @@ -27,9 +25,7 @@ class MockCommand(Command): mock_args = ('arg1', 'arg2', 'arg3') def parse_options(self, prog_name, arguments, command=None): - options = Object() - options.foo = 'bar' - options.prog_name = prog_name + options = Bunch(foo='bar', prog_name=prog_name) return options, self.mock_args def run(self, *args, **kwargs): @@ -123,7 +119,7 @@ def run(a, b, c): c.run = run with self.assertRaises(c.UsageError): - c.verify_args((1, )) + c.verify_args((1,)) c.verify_args((1, 2, 3)) def test_run_interface(self): @@ -186,7 +182,7 @@ def test_with_custom_broker(self): def test_with_custom_app(self): cmd = MockCommand(app=self.app) app = '.'.join([__name__, 'APP']) - cmd.setup_app_from_commandline(['--app=%s' % (app, ), + cmd.setup_app_from_commandline(['--app=%s' % (app,), '--loglevel=INFO']) self.assertIs(cmd.app, APP) cmd.setup_app_from_commandline(['-A', app, @@ -236,14 +232,29 @@ def test_find_app_suspects(self): self.assertTrue(cmd.find_app('celery.tests.bin.proj.app')) self.assertTrue(cmd.find_app('celery.tests.bin.proj')) self.assertTrue(cmd.find_app('celery.tests.bin.proj:hello')) + self.assertTrue(cmd.find_app('celery.tests.bin.proj.hello')) self.assertTrue(cmd.find_app('celery.tests.bin.proj.app:app')) + self.assertTrue(cmd.find_app('celery.tests.bin.proj.app.app')) + with self.assertRaises(AttributeError): + cmd.find_app('celery.tests.bin') with self.assertRaises(AttributeError): cmd.find_app(__name__) + def test_ask(self): + try: + input = self.patch('celery.bin.base.input') + except AttributeError: + input = self.patch('builtins.input') + cmd = MockCommand(app=self.app) + input.return_value = 'yes' + self.assertEqual(cmd.ask('q', ('yes', 'no'), 'no'), 'yes') + input.return_value = 'nop' + self.assertEqual(cmd.ask('q', ('yes', 'no'), 'no'), 'no') + def test_host_format(self): cmd = MockCommand(app=self.app) - with patch('socket.gethostname') as hn: + with patch('celery.utils.gethostname') as hn: hn.return_value = 'blacktron.example.com' self.assertEqual(cmd.host_format(''), '') self.assertEqual( @@ -281,16 +292,62 @@ def test_say_chat_no_body(self): def test_with_cmdline_config(self): cmd = MockCommand(app=self.app) cmd.enable_config_from_cmdline = True - cmd.namespace = 'celeryd' + cmd.namespace = 'worker' rest = cmd.setup_app_from_commandline(argv=[ '--loglevel=INFO', '--', 'broker.url=amqp://broker.example.com', '.prefetch_multiplier=100']) - self.assertEqual(cmd.app.conf.BROKER_URL, + self.assertEqual(cmd.app.conf.broker_url, 'amqp://broker.example.com') - self.assertEqual(cmd.app.conf.CELERYD_PREFETCH_MULTIPLIER, 100) + self.assertEqual(cmd.app.conf.worker_prefetch_multiplier, 100) self.assertListEqual(rest, ['--loglevel=INFO']) + cmd.app = None + cmd.get_app = Mock(name='get_app') + cmd.get_app.return_value = self.app + self.app.user_options['preload'] = [ + Option('--foo', action='store_true'), + ] + cmd.setup_app_from_commandline(argv=[ + '--foo', '--loglevel=INFO', '--', + 'broker.url=amqp://broker.example.com', + '.prefetch_multiplier=100']) + self.assertIs(cmd.app, cmd.get_app()) + + def test_preparse_options__required_short(self): + cmd = MockCommand(app=self.app) + with self.assertRaises(ValueError): + cmd.preparse_options( + ['a', '-f'], [Option('-f', action='store')]) + + def test_preparse_options__longopt_whitespace(self): + cmd = MockCommand(app=self.app) + cmd.preparse_options( + ['a', '--foo', 'val'], [Option('--foo', action='store')]) + + def test_preparse_options__shortopt_store_true(self): + cmd = MockCommand(app=self.app) + cmd.preparse_options( + ['a', '--foo'], [Option('--foo', action='store_true')]) + + def test_get_default_app(self): + self.patch('celery._state.get_current_app') + cmd = MockCommand(app=self.app) + from celery._state import get_current_app + self.assertIs(cmd._get_default_app(), get_current_app()) + + def test_set_colored(self): + cmd = MockCommand(app=self.app) + cmd.colored = 'foo' + self.assertEqual(cmd.colored, 'foo') + + def test_set_no_color(self): + cmd = MockCommand(app=self.app) + cmd.no_color = False + _ = cmd.colored # noqa + cmd.no_color = True + self.assertFalse(cmd.colored.enabled) + def test_find_app(self): cmd = MockCommand(app=self.app) with patch('celery.bin.base.symbol_by_name') as sbn: @@ -311,6 +368,22 @@ def after(*args, **kwargs): def test_parse_preload_options_shortopt(self): cmd = Command() - cmd.preload_options = (Option('-s', action='store', dest='silent'), ) + cmd.preload_options = (Option('-s', action='store', dest='silent'),) acc = cmd.parse_preload_options(['-s', 'yes']) self.assertEqual(acc.get('silent'), 'yes') + + def test_parse_preload_options_with_equals_and_append(self): + cmd = Command() + opt = Option('--zoom', action='append', default=[]) + cmd.preload_options = (opt,) + acc = cmd.parse_preload_options(['--zoom=1', '--zoom=2']) + + self.assertEqual(acc, {'zoom': ['1', '2']}) + + def test_parse_preload_options_without_equals_and_append(self): + cmd = Command() + opt = Option('--zoom', action='append', default=[]) + cmd.preload_options = (opt,) + acc = cmd.parse_preload_options(['--zoom', '1', '--zoom', '2']) + + self.assertEqual(acc, {'zoom': ['1', '2']}) diff --git a/celery/tests/bin/test_celery.py b/celery/tests/bin/test_celery.py index fbfdb62f6..750f3f51a 100644 --- a/celery/tests/bin/test_celery.py +++ b/celery/tests/bin/test_celery.py @@ -2,12 +2,14 @@ import sys -from anyjson import dumps from datetime import datetime +from kombu.utils.json import dumps + from celery import __main__ from celery.platforms import EX_FAILURE, EX_USAGE, EX_OK from celery.bin.base import Error +from celery.bin import celery as mod from celery.bin.celery import ( Command, list_, @@ -28,19 +30,11 @@ command, ) -from celery.tests.case import ( - AppCase, Mock, WhateverIO, override_stdouts, patch, -) +from celery.tests.case import AppCase, Mock, WhateverIO, patch class test__main__(AppCase): - def test_warn_deprecated(self): - with override_stdouts() as (stdout, _): - __main__._warn_deprecated('YADDA YADDA') - self.assertIn('command is deprecated', stdout.getvalue()) - self.assertIn('YADDA YADDA', stdout.getvalue()) - def test_main(self): with patch('celery.__main__.maybe_patch_concurrency') as mpc: with patch('celery.bin.celery.main') as main: @@ -48,32 +42,16 @@ def test_main(self): mpc.assert_called_with() main.assert_called_with() - def test_compat_worker(self): - with patch('celery.__main__.maybe_patch_concurrency') as mpc: - with patch('celery.__main__._warn_deprecated') as depr: - with patch('celery.bin.worker.main') as main: - __main__._compat_worker() - mpc.assert_called_with() - depr.assert_called_with('celery worker') - main.assert_called_with() - - def test_compat_multi(self): + def test_main__multi(self): with patch('celery.__main__.maybe_patch_concurrency') as mpc: - with patch('celery.__main__._warn_deprecated') as depr: - with patch('celery.bin.multi.main') as main: - __main__._compat_multi() + with patch('celery.bin.celery.main') as main: + prev, sys.argv = sys.argv, ['foo', 'multi'] + try: + __main__.main() self.assertFalse(mpc.called) - depr.assert_called_with('celery multi') - main.assert_called_with() - - def test_compat_beat(self): - with patch('celery.__main__.maybe_patch_concurrency') as mpc: - with patch('celery.__main__._warn_deprecated') as depr: - with patch('celery.bin.beat.main') as main: - __main__._compat_beat() - mpc.assert_called_with() - depr.assert_called_with('celery beat') main.assert_called_with() + finally: + sys.argv = prev class test_Command(AppCase): @@ -202,6 +180,13 @@ def test_run(self, purge_): a.run(force=True) self.assertIn('100 messages', out.getvalue()) + a.out = Mock(name='out') + a.ask = Mock(name='ask') + a.run(force=False) + a.ask.assert_called_with(a.warn_prompt, ('yes', 'no'), 'no') + a.ask.return_value = 'yes' + a.run(force=False) + class test_result(AppCase): @@ -326,6 +311,20 @@ def test_load_extensions_no_commands(self): x = CeleryCommand(app=self.app) x.load_extension_commands() + def test_load_extensions_commands(self): + with patch('celery.bin.celery.Extensions') as Ext: + prev, mod.command_classes = list(mod.command_classes), Mock() + try: + ext = Ext.return_value = Mock(name='Extension') + ext.load.return_value = ['foo', 'bar'] + x = CeleryCommand(app=self.app) + x.load_extension_commands() + mod.command_classes.append.assert_called_with( + ('Extensions', ['foo', 'bar'], 'magenta'), + ) + finally: + mod.command_classes = prev + def test_determine_exit_status(self): self.assertEqual(determine_exit_status('true'), EX_OK) self.assertEqual(determine_exit_status(''), EX_FAILURE) @@ -350,6 +349,15 @@ def test_relocate_args_from_start(self): ['foo', '--foo=1'], ) + def test_register_command(self): + prev, CeleryCommand.commands = dict(CeleryCommand.commands), {} + try: + fun = Mock(name='fun') + CeleryCommand.register_command(fun, name='foo') + self.assertIs(CeleryCommand.commands['foo'], fun) + finally: + CeleryCommand.commands = prev + def test_handle_argv(self): x = CeleryCommand(app=self.app) x.execute = Mock() @@ -480,6 +488,10 @@ def test_run(self, real): callback({'foo': {'ok': 'pong'}}) self.assertIn('OK', out.getvalue()) + with patch('celery.bin.celery.json.dumps') as dumps: + i.run('ping', json=True) + self.assertTrue(dumps.called) + instance = real.return_value = Mock() instance.ping.return_value = None with self.assertRaises(Error): @@ -491,6 +503,18 @@ def test_run(self, real): i.say_chat('<-', 'hello') self.assertFalse(out.getvalue()) + def test_objgraph(self): + i = inspect(app=self.app) + i.call = Mock(name='call') + i.objgraph('Message', foo=1) + i.call.assert_called_with('objgraph', 'Message', foo=1) + + def test_conf(self): + i = inspect(app=self.app) + i.call = Mock(name='call') + i.conf(with_defaults=True, foo=1) + i.call.assert_called_with('conf', True, foo=1) + class test_control(AppCase): @@ -551,7 +575,7 @@ def test_cancel_consumer(self): class test_multi(AppCase): def test_get_options(self): - self.assertTupleEqual(multi(app=self.app).get_options(), ()) + self.assertIsNone(multi(app=self.app).get_options()) def test_run_from_argv(self): with patch('celery.bin.multi.MultiTool') as MultiTool: diff --git a/celery/tests/bin/test_celeryd_detach.py b/celery/tests/bin/test_celeryd_detach.py index 2b6e5ae8d..a2bbe5b2d 100644 --- a/celery/tests/bin/test_celeryd_detach.py +++ b/celery/tests/bin/test_celeryd_detach.py @@ -23,30 +23,58 @@ def test_execs(self, setup_logs, logger, execv, detached): context.__exit__ = Mock() detach('/bin/boo', ['a', 'b', 'c'], logfile='/var/log', - pidfile='/var/pid') - detached.assert_called_with('/var/log', '/var/pid', None, None, 0, - None, False) + pidfile='/var/pid', hostname='foo@example.com') + detached.assert_called_with( + '/var/log', '/var/pid', None, None, None, None, False, + after_forkers=False, + ) execv.assert_called_with('/bin/boo', ['/bin/boo', 'a', 'b', 'c']) - execv.side_effect = Exception('foo') r = detach('/bin/boo', ['a', 'b', 'c'], - logfile='/var/log', pidfile='/var/pid', app=self.app) + logfile='/var/log', pidfile='/var/pid', + executable='/bin/foo', app=self.app) + execv.assert_called_with('/bin/foo', ['/bin/foo', 'a', 'b', 'c']) + + execv.side_effect = Exception('foo') + r = detach( + '/bin/boo', ['a', 'b', 'c'], + logfile='/var/log', pidfile='/var/pid', + hostname='foo@example.com', app=self.app) context.__enter__.assert_called_with() self.assertTrue(logger.critical.called) - setup_logs.assert_called_with('ERROR', '/var/log') + setup_logs.assert_called_with( + 'ERROR', '/var/log', hostname='foo@example.com') self.assertEqual(r, 1) + self.patch('celery.current_app') + from celery import current_app + r = detach( + '/bin/boo', ['a', 'b', 'c'], + logfile='/var/log', pidfile='/var/pid', + hostname='foo@example.com', app=None) + current_app.log.setup_logging_subsystem.assert_called_with( + 'ERROR', '/var/log', hostname='foo@example.com', + ) + class test_PartialOptionParser(AppCase): def test_parser(self): x = detached_celeryd(self.app) - p = x.Parser('celeryd_detach') - options, values = p.parse_args(['--logfile=foo', '--fake', '--enable', - 'a', 'b', '-c1', '-d', '2']) + p = x.create_parser('celeryd_detach') + options, values = p.parse_args([ + '--logfile=foo', '--fake', '--enable', + 'a', 'b', '-c1', '-d', '2', + ]) self.assertEqual(options.logfile, 'foo') self.assertEqual(values, ['a', 'b']) self.assertEqual(p.leftovers, ['--enable', '-c1', '-d', '2']) + options, values = p.parse_args([ + '--fake', '--enable', + '--pidfile=/var/pid/foo.pid', + 'a', 'b', '-c1', '-d', '2', + ]) + self.assertEqual(options.pidfile, '/var/pid/foo.pid') with override_stdouts(): with self.assertRaises(SystemExit): @@ -84,7 +112,8 @@ def test_execute_from_commandline(self, detach, exit): self.assertTrue(exit.called) detach.assert_called_with( path=x.execv_path, uid=None, gid=None, - umask=0, fake=False, logfile='/var/log', pidfile='celeryd.pid', + umask=None, fake=False, logfile='/var/log', pidfile='celeryd.pid', + working_directory=None, executable=None, hostname=None, argv=x.execv_argv + [ '-c', '1', '-lDEBUG', '--logfile=/var/log', '--pidfile=celeryd.pid', diff --git a/celery/tests/bin/test_celeryevdump.py b/celery/tests/bin/test_celeryevdump.py index 09cdc4d1f..9fc54b67d 100644 --- a/celery/tests/bin/test_celeryevdump.py +++ b/celery/tests/bin/test_celeryevdump.py @@ -56,9 +56,9 @@ def se(*_a, **_k): raise KeyError() recv.capture.side_effect = se - Conn = app.connection.return_value = Mock(name='conn') + Conn = app.connection_for_read.return_value = Mock(name='conn') conn = Conn.clone.return_value = Mock(name='cloned_conn') - conn.connection_errors = (KeyError, ) + conn.connection_errors = (KeyError,) conn.channel_errors = () evdump(app) diff --git a/celery/tests/bin/test_events.py b/celery/tests/bin/test_events.py index a6e79f75a..f49f6f7c3 100644 --- a/celery/tests/bin/test_events.py +++ b/celery/tests/bin/test_events.py @@ -32,7 +32,7 @@ def test_run_dump(self): def test_run_top(self): try: import curses # noqa - except ImportError: + except (ImportError, OSError): raise SkipTest('curses monitor requires curses') @_old_patch('celery.events.cursesmon', 'evtop', @@ -64,7 +64,7 @@ def test_run_cam_detached(self, detached, evcam): self.assertTrue(evcam.called) def test_get_options(self): - self.assertTrue(self.ev.get_options()) + self.assertFalse(self.ev.get_options()) @_old_patch('celery.bin.events', 'events', MockCommand) def test_main(self): diff --git a/celery/tests/bin/test_multi.py b/celery/tests/bin/test_multi.py index 0b2ecd981..5e18a9b90 100644 --- a/celery/tests/bin/test_multi.py +++ b/celery/tests/bin/test_multi.py @@ -8,7 +8,6 @@ main, MultiTool, findsig, - abbreviations, parse_ns_range, format_opt, quote, @@ -30,14 +29,6 @@ def test_findsig(self): self.assertEqual(findsig(['-s']), signal.SIGTERM) self.assertEqual(findsig(['-log']), signal.SIGTERM) - def test_abbreviations(self): - expander = abbreviations({'%s': 'START', - '%x': 'STOP'}) - self.assertEqual(expander('foo%s'), 'fooSTART') - self.assertEqual(expander('foo%x'), 'fooSTOP') - self.assertEqual(expander('foo%y'), 'foo%y') - self.assertIsNone(expander(None)) - def test_parse_ns_range(self): self.assertEqual(parse_ns_range('1-3', True), ['1', '2', '3']) self.assertEqual(parse_ns_range('1-3', False), ['1-3']) @@ -76,8 +67,9 @@ def test_parse(self): class test_multi_args(AppCase): - @patch('socket.gethostname') + @patch('celery.bin.multi.gethostname') def test_parse(self, gethostname): + gethostname.return_value = 'example.com' p = NamespacedOptionParser([ '-c:jerry,elaine', '5', '--loglevel:kramer=DEBUG', @@ -120,12 +112,11 @@ def assert_line_in(name, args): ) expand = names[0][2] self.assertEqual(expand('%h'), '*P*jerry@*S*') - self.assertEqual(expand('%n'), 'jerry') + self.assertEqual(expand('%n'), '*P*jerry') names2 = list(multi_args(p, cmd='COMMAND', append='', prefix='*P*', suffix='*S*')) self.assertEqual(names2[0][1][-1], '-- .disable_rate_limits=1') - gethostname.return_value = 'example.com' p2 = NamespacedOptionParser(['10', '-c:1', '5']) names3 = list(multi_args(p2, cmd='COMMAND')) self.assertEqual(len(names3), 10) @@ -174,6 +165,11 @@ def test_note_quiet(self): self.t.note('hello world') self.assertFalse(self.fh.getvalue()) + def test_carp(self): + self.t.say = Mock() + self.t.carp('foo') + self.t.say.assert_called_with('foo', True, self.t.stderr) + def test_info(self): self.t.verbose = True self.t.info('hello info') @@ -185,15 +181,15 @@ def test_info_not_verbose(self): self.assertFalse(self.fh.getvalue()) def test_error(self): - self.t.say = Mock() + self.t.carp = Mock() self.t.usage = Mock() self.assertEqual(self.t.error('foo'), 1) - self.t.say.assert_called_with('foo') + self.t.carp.assert_called_with('foo') self.t.usage.assert_called_with() - self.t.say = Mock() + self.t.carp = Mock() self.assertEqual(self.t.error(), 1) - self.assertFalse(self.t.say.called) + self.assertFalse(self.t.carp.called) self.assertEqual(self.t.retcode, 1) @@ -249,7 +245,7 @@ def test_restart(self): waitexec.return_value = 0 callback('jerry', ['arg'], 13) - waitexec.assert_called_with(['arg']) + waitexec.assert_called_with(['arg'], path=sys.executable) self.assertIn('OK', self.fh.getvalue()) self.fh.seek(0) self.fh.truncate() @@ -302,7 +298,7 @@ def read_pid(self): Pidfile.side_effect = pids @patch('celery.bin.multi.Pidfile') - @patch('socket.gethostname') + @patch('celery.bin.multi.gethostname') def test_getpids(self, gethostname, Pidfile): gethostname.return_value = 'e.com' self.prepare_pidfile_for_getpids(Pidfile) @@ -340,7 +336,7 @@ def test_getpids(self, gethostname, Pidfile): nodes = self.t.getpids(p, 'celery worker', callback=None) @patch('celery.bin.multi.Pidfile') - @patch('socket.gethostname') + @patch('celery.bin.multi.gethostname') @patch('celery.bin.multi.sleep') def test_shutdown_nodes(self, slepp, gethostname, Pidfile): gethostname.return_value = 'e.com' @@ -419,7 +415,7 @@ def test_show(self): self.t.show(['foo', 'bar', 'baz'], 'celery worker') self.assertTrue(self.fh.getvalue()) - @patch('socket.gethostname') + @patch('celery.bin.multi.gethostname') def test_get(self, gethostname): gethostname.return_value = 'e.com' self.t.get(['xuzzy@e.com', 'foo', 'bar', 'baz'], 'celery worker') @@ -427,7 +423,7 @@ def test_get(self, gethostname): self.t.get(['foo@e.com', 'foo', 'bar', 'baz'], 'celery worker') self.assertTrue(self.fh.getvalue()) - @patch('socket.gethostname') + @patch('celery.bin.multi.gethostname') def test_names(self, gethostname): gethostname.return_value = 'e.com' self.t.names(['foo', 'bar', 'baz'], 'celery worker') diff --git a/celery/tests/bin/test_worker.py b/celery/tests/bin/test_worker.py index fbb7c52c2..c69c9502b 100644 --- a/celery/tests/bin/test_worker.py +++ b/celery/tests/bin/test_worker.py @@ -4,9 +4,7 @@ import os import sys -from functools import wraps - -from billiard import current_process +from billiard.process import current_process from kombu import Exchange, Queue from celery import platforms @@ -17,21 +15,19 @@ from celery.exceptions import ( ImproperlyConfigured, WorkerShutdown, WorkerTerminate, ) -from celery.utils.log import ensure_process_aware_logger +from celery.platforms import EX_FAILURE, EX_OK from celery.worker import state from celery.tests.case import ( AppCase, Mock, SkipTest, - WhateverIO, + disable_stdouts, patch, skip_if_pypy, skip_if_jython, ) -ensure_process_aware_logger() - class WorkerAppCase(AppCase): @@ -40,25 +36,6 @@ def tearDown(self): trace.reset_worker_optimizations() -def disable_stdouts(fun): - - @wraps(fun) - def disable(*args, **kwargs): - prev_out, prev_err = sys.stdout, sys.stderr - prev_rout, prev_rerr = sys.__stdout__, sys.__stderr__ - sys.stdout = sys.__stdout__ = WhateverIO() - sys.stderr = sys.__stderr__ = WhateverIO() - try: - return fun(*args, **kwargs) - finally: - sys.stdout = prev_out - sys.stderr = prev_err - sys.__stdout__ = prev_rout - sys.__stderr__ = prev_rerr - - return disable - - class Worker(cd.Worker): redirect_stdouts = False @@ -187,13 +164,13 @@ def test_startup_info(self): prev_loader = self.app.loader worker = self.Worker(app=self.app, queues='foo,bar,baz,xuzzy,do,re,mi') - self.app.loader = Mock() - self.app.loader.__module__ = 'acme.baked_beans' - self.assertTrue(worker.startup_info()) + with patch('celery.apps.worker.qualname') as qualname: + qualname.return_value = 'acme.backed_beans.Loader' + self.assertTrue(worker.startup_info()) - self.app.loader = Mock() - self.app.loader.__module__ = 'celery.loaders.foo' - self.assertTrue(worker.startup_info()) + with patch('celery.apps.worker.qualname') as qualname: + qualname.return_value = 'celery.loaders.Loader' + self.assertTrue(worker.startup_info()) from celery.loaders.app import AppLoader self.app.loader = AppLoader(app=self.app) @@ -206,7 +183,10 @@ def test_startup_info(self): # test when there are too few output lines # to draft the ascii art onto prev, cd.ARTLINES = cd.ARTLINES, ['the quick brown fox'] - self.assertTrue(worker.startup_info()) + try: + self.assertTrue(worker.startup_info()) + finally: + cd.ARTLINES = prev @disable_stdouts def test_run(self): @@ -236,12 +216,12 @@ def test_init_queues(self): self.assertIn('celery', app.amqp.queues) self.assertNotIn('celery', app.amqp.queues.consume_from) - c.CELERY_CREATE_MISSING_QUEUES = False + c.task_create_missing_queues = False del(app.amqp.queues) with self.assertRaises(ImproperlyConfigured): self.Worker(app=self.app).setup_queues(['image']) del(app.amqp.queues) - c.CELERY_CREATE_MISSING_QUEUES = True + c.task_create_missing_queues = True worker = self.Worker(app=self.app) worker.setup_queues(['image']) self.assertIn('image', app.amqp.queues.consume_from) @@ -282,7 +262,7 @@ def test_warns_if_running_as_privileged_user(self, _exit): with patch('os.getuid') as getuid: getuid.return_value = 0 - self.app.conf.CELERY_ACCEPT_CONTENT = ['pickle'] + self.app.conf.accept_content = ['pickle'] worker = self.Worker(app=self.app) worker.on_start() _exit.assert_called_with(1) @@ -296,7 +276,7 @@ def test_warns_if_running_as_privileged_user(self, _exit): worker.on_start() finally: platforms.C_FORCE_ROOT = False - self.app.conf.CELERY_ACCEPT_CONTENT = ['json'] + self.app.conf.accept_content = ['json'] with self.assertWarnsRegex( RuntimeWarning, r'absolutely not recommended'): @@ -322,8 +302,11 @@ def test_setup_logging_no_color(self): app=self.app, redirect_stdouts=False, no_color=True, ) prev, self.app.log.setup = self.app.log.setup, Mock() - worker.setup_logging() - self.assertFalse(self.app.log.setup.call_args[1]['colorize']) + try: + worker.setup_logging() + self.assertFalse(self.app.log.setup.call_args[1]['colorize']) + finally: + self.app.log.setup = prev @disable_stdouts def test_startup_info_pool_is_str(self): @@ -443,8 +426,10 @@ def test_set_process_status(self): def test_parse_options(self): cmd = worker() cmd.app = self.app - opts, args = cmd.parse_options('worker', ['--concurrency=512']) + opts, args = cmd.parse_options('worker', ['--concurrency=512', + '--heartbeat-interval=10']) self.assertEqual(opts.concurrency, 512) + self.assertEqual(opts.heartbeat_interval, 10) @disable_stdouts def test_main(self): @@ -488,8 +473,8 @@ def test_worker_int_handler(self): worker = self._Worker() handlers = self.psig(cd.install_worker_int_handler, worker) next_handlers = {} - state.should_stop = False - state.should_terminate = False + state.should_stop = None + state.should_terminate = None class Signals(platforms.Signals): @@ -502,15 +487,17 @@ def __setitem__(self, sig, handler): try: handlers['SIGINT']('SIGINT', object()) self.assertTrue(state.should_stop) + self.assertEqual(state.should_stop, EX_FAILURE) finally: platforms.signals = p - state.should_stop = False + state.should_stop = None try: next_handlers['SIGINT']('SIGINT', object()) self.assertTrue(state.should_terminate) + self.assertEqual(state.should_terminate, EX_FAILURE) finally: - state.should_terminate = False + state.should_terminate = None with patch('celery.apps.worker.active_thread_count') as c: c.return_value = 1 @@ -541,7 +528,7 @@ def test_worker_int_handler_only_stop_MainProcess(self): self.assertTrue(state.should_stop) finally: process.name = name - state.should_stop = False + state.should_stop = None with patch('celery.apps.worker.active_thread_count') as c: c.return_value = 1 @@ -552,7 +539,7 @@ def test_worker_int_handler_only_stop_MainProcess(self): handlers['SIGINT']('SIGINT', object()) finally: process.name = name - state.should_stop = False + state.should_stop = None @disable_stdouts def test_install_HUP_not_supported_handler(self): @@ -578,14 +565,17 @@ def test_worker_term_hard_handler_only_stop_MainProcess(self): handlers['SIGQUIT']('SIGQUIT', object()) self.assertTrue(state.should_terminate) finally: - state.should_terminate = False + state.should_terminate = None with patch('celery.apps.worker.active_thread_count') as c: c.return_value = 1 worker = self._Worker() handlers = self.psig( cd.install_worker_term_hard_handler, worker) - with self.assertRaises(WorkerTerminate): - handlers['SIGQUIT']('SIGQUIT', object()) + try: + with self.assertRaises(WorkerTerminate): + handlers['SIGQUIT']('SIGQUIT', object()) + finally: + state.should_terminate = None finally: process.name = name @@ -597,9 +587,9 @@ def test_worker_term_handler_when_threads(self): handlers = self.psig(cd.install_worker_term_handler, worker) try: handlers['SIGTERM']('SIGTERM', object()) - self.assertTrue(state.should_stop) + self.assertEqual(state.should_stop, EX_OK) finally: - state.should_stop = False + state.should_stop = None @disable_stdouts def test_worker_term_handler_when_single_thread(self): @@ -611,7 +601,7 @@ def test_worker_term_handler_when_single_thread(self): with self.assertRaises(WorkerShutdown): handlers['SIGTERM']('SIGTERM', object()) finally: - state.should_stop = False + state.should_stop = None @patch('sys.__stderr__') @skip_if_pypy @@ -635,7 +625,7 @@ def test_worker_term_handler_only_stop_MainProcess(self): worker = self._Worker() handlers = self.psig(cd.install_worker_term_handler, worker) handlers['SIGTERM']('SIGTERM', object()) - self.assertTrue(state.should_stop) + self.assertEqual(state.should_stop, EX_OK) with patch('celery.apps.worker.active_thread_count') as c: c.return_value = 1 worker = self._Worker() @@ -644,7 +634,7 @@ def test_worker_term_handler_only_stop_MainProcess(self): handlers['SIGTERM']('SIGTERM', object()) finally: process.name = name - state.should_stop = False + state.should_stop = None @disable_stdouts @patch('celery.platforms.close_open_fds') @@ -663,14 +653,14 @@ def _execv(*args): worker = self._Worker() handlers = self.psig(cd.install_worker_restart_handler, worker) handlers['SIGHUP']('SIGHUP', object()) - self.assertTrue(state.should_stop) + self.assertEqual(state.should_stop, EX_OK) self.assertTrue(register.called) callback = register.call_args[0][0] callback() self.assertTrue(argv) finally: os.execv = execv - state.should_stop = False + state.should_stop = None @disable_stdouts def test_worker_term_hard_handler_when_threaded(self): @@ -682,7 +672,7 @@ def test_worker_term_hard_handler_when_threaded(self): handlers['SIGQUIT']('SIGQUIT', object()) self.assertTrue(state.should_terminate) finally: - state.should_terminate = False + state.should_terminate = None @disable_stdouts def test_worker_term_hard_handler_when_single_threaded(self): diff --git a/celery/tests/case.py b/celery/tests/case.py index c96fd8ec0..da19a4ff5 100644 --- a/celery/tests/case.py +++ b/celery/tests/case.py @@ -33,8 +33,7 @@ import mock # noqa from nose import SkipTest from kombu import Queue -from kombu.log import NullHandler -from kombu.utils import nested, symbol_by_name +from kombu.utils import symbol_by_name from celery import Celery from celery.app import current_app @@ -48,15 +47,15 @@ from celery.utils.imports import qualname __all__ = [ - 'Case', 'AppCase', 'Mock', 'MagicMock', 'ANY', + 'Case', 'AppCase', 'Mock', 'MagicMock', 'ANY', 'TaskMessage', 'patch', 'call', 'sentinel', 'skip_unless_module', 'wrap_logger', 'with_environ', 'sleepdeprived', 'skip_if_environ', 'todo', 'skip', 'skip_if', 'skip_unless', 'mask_modules', 'override_stdouts', 'mock_module', 'replace_module_value', 'sys_platform', 'reset_modules', - 'patch_modules', 'mock_context', 'mock_open', 'patch_many', + 'patch_modules', 'mock_context', 'mock_open', 'assert_signal_called', 'skip_if_pypy', - 'skip_if_jython', 'body_from_sig', 'restore_logging', + 'skip_if_jython', 'task_message_from_sig', 'restore_logging', ] patch = mock.patch call = mock.call @@ -85,25 +84,26 @@ CELERY_TEST_CONFIG = { #: Don't want log output when running suite. - 'CELERYD_HIJACK_ROOT_LOGGER': False, - 'CELERY_SEND_TASK_ERROR_EMAILS': False, - 'CELERY_DEFAULT_QUEUE': 'testcelery', - 'CELERY_DEFAULT_EXCHANGE': 'testcelery', - 'CELERY_DEFAULT_ROUTING_KEY': 'testcelery', - 'CELERY_QUEUES': ( + 'worker_hijack_root_logger': False, + 'worker_log_color': False, + 'task_send_error_emails': False, + 'task_default_queue': 'testcelery', + 'task_default_exchange': 'testcelery', + 'task_default_routing_key': 'testcelery', + 'task_queues': ( Queue('testcelery', routing_key='testcelery'), ), - 'CELERY_ENABLE_UTC': True, - 'CELERY_TIMEZONE': 'UTC', - 'CELERYD_LOG_COLOR': False, + 'accept_content': ('json', 'pickle'), + 'enable_utc': True, + 'timezone': 'UTC', # Mongo results tests (only executed if installed and running) - 'CELERY_MONGODB_BACKEND_SETTINGS': { + 'mongodb_backend_settings': { 'host': os.environ.get('MONGO_HOST') or 'localhost', 'port': os.environ.get('MONGO_PORT') or 27017, 'database': os.environ.get('MONGO_DB') or 'celery_unittests', - 'taskmeta_collection': (os.environ.get('MONGO_TASKMETA_COLLECTION') - or 'taskmeta_collection'), + 'taskmeta_collection': (os.environ.get('MONGO_TASKMETA_COLLECTION') or + 'taskmeta_collection'), 'user': os.environ.get('MONGO_USER'), 'password': os.environ.get('MONGO_PASSWORD'), } @@ -123,14 +123,11 @@ def __init__(self, *args, **kwargs): self.already_setup = True -def UnitApp(name=None, broker=None, backend=None, - set_as_current=False, log=UnitLogging, **kwargs): - +def UnitApp(name=None, set_as_current=False, log=UnitLogging, + broker='memory://', backend='cache+memory://', **kwargs): app = Celery(name or 'celery.tests', - broker=broker or 'memory://', - backend=backend or 'cache+memory://', set_as_current=set_as_current, - log=log, + log=log, broker=broker, backend=backend, **kwargs) app.add_defaults(deepcopy(CELERY_TEST_CONFIG)) return app @@ -151,7 +148,7 @@ class _ContextMock(Mock): in the class, not just the instance.""" def __enter__(self): - pass + return self def __exit__(self, *exc_info): pass @@ -206,7 +203,7 @@ def __inner(*args, **kwargs): try: importlib.import_module(module) except ImportError: - raise SkipTest('Does not have %s' % (module, )) + raise SkipTest('Does not have %s' % (module,)) return fun(*args, **kwargs) @@ -234,10 +231,15 @@ def _is_magic_module(m): # will load _tkinter and other shit when touched. # pyflakes refuses to accept 'noqa' for this isinstance. - cls, modtype = m.__class__, types.ModuleType - return (not cls is modtype and ( - '__getattr__' in vars(m.__class__) or - '__getattribute__' in vars(m.__class__))) + cls, modtype = type(m), types.ModuleType + try: + variables = vars(cls) + except TypeError: + return True + else: + return (cls is not modtype and ( + '__getattr__' in variables or + '__getattribute__' in variables)) class _AssertWarnsContext(_AssertRaisesBaseContext): @@ -300,8 +302,57 @@ def __exit__(self, exc_type, exc_value, tb): raise self.failureException('%s not triggered' % exc_name) +def alive_threads(): + return [thread for thread in threading.enumerate() if thread.is_alive()] + + class Case(unittest.TestCase): + def patch(self, *path, **options): + manager = patch(".".join(path), **options) + patched = manager.start() + self.addCleanup(manager.stop) + return patched + + def mock_modules(self, *mods): + modules = [] + for mod in mods: + mod = mod.split('.') + modules.extend(reversed([ + '.'.join(mod[:-i] if i else mod) for i in range(len(mod)) + ])) + modules = sorted(set(modules)) + return self.wrap_context(mock_module(*modules)) + + def on_nth_call_do(self, mock, side_effect, n=1): + + def on_call(*args, **kwargs): + if mock.call_count >= n: + mock.side_effect = side_effect + return mock.return_value + mock.side_effect = on_call + return mock + + def on_nth_call_return(self, mock, retval, n=1): + + def on_call(*args, **kwargs): + if mock.call_count >= n: + mock.return_value = retval + return mock.return_value + mock.side_effect = on_call + return mock + + def mask_modules(self, *modules): + self.wrap_context(mask_modules(*modules)) + + def wrap_context(self, context): + ret = context.__enter__() + self.addCleanup(partial(context.__exit__, None, None, None)) + return ret + + def mock_environ(self, env_name, env_value): + return self.wrap_context(mock_environ(env_name, env_value)) + def assertWarns(self, expected_warning): return _AssertWarnsContext(expected_warning, self, None) @@ -364,11 +415,11 @@ def assertItemsEqual(self, expected_seq, actual_seq, msg=None): errors = [] if missing: errors.append( - 'Expected, but missing:\n %s' % (safe_repr(missing), ) + 'Expected, but missing:\n %s' % (safe_repr(missing),) ) if unexpected: errors.append( - 'Unexpected, but present:\n %s' % (safe_repr(unexpected), ) + 'Unexpected, but present:\n %s' % (safe_repr(unexpected),) ) if errors: standardMsg = '\n'.join(errors) @@ -388,6 +439,7 @@ def __inner(self, *args, **kwargs): class AppCase(Case): contained = True + _threads_at_startup = [None] def __init__(self, *args, **kwargs): super(AppCase, self).__init__(*args, **kwargs) @@ -403,17 +455,28 @@ def __init__(self, *args, **kwargs): def Celery(self, *args, **kwargs): return UnitApp(*args, **kwargs) + def threads_at_startup(self): + if self._threads_at_startup[0] is None: + self._threads_at_startup[0] = alive_threads() + return self._threads_at_startup[0] + def setUp(self): - self._threads_at_setup = list(threading.enumerate()) + self._threads_at_setup = self.threads_at_startup() from celery import _state from celery import result + self._prev_res_join_block = result.task_join_will_block + self._prev_state_join_block = _state.task_join_will_block result.task_join_will_block = \ _state.task_join_will_block = lambda: False self._current_app = current_app() self._default_app = _state.default_app trap = Trap() + self._prev_tls = _state._tls _state.set_default_app(trap) - _state._tls.current_app = trap + + class NonTLS(object): + current_app = trap + _state._tls = NonTLS() self.app = self.Celery(set_as_current=False) if not self.contained: @@ -429,17 +492,21 @@ def setUp(self): raise def _teardown_app(self): + from celery import _state + from celery import result from celery.utils.log import LoggingProxy assert sys.stdout assert sys.stderr assert sys.__stdout__ assert sys.__stderr__ this = self._get_test_name() - if isinstance(sys.stdout, LoggingProxy) or \ - isinstance(sys.__stdout__, LoggingProxy): + result.task_join_will_block = self._prev_res_join_block + _state.task_join_will_block = self._prev_state_join_block + if isinstance(sys.stdout, (LoggingProxy, Mock)) or \ + isinstance(sys.__stdout__, (LoggingProxy, Mock)): raise RuntimeError(CASE_LOG_REDIRECT_EFFECT.format(this, 'stdout')) - if isinstance(sys.stderr, LoggingProxy) or \ - isinstance(sys.__stderr__, LoggingProxy): + if isinstance(sys.stderr, (LoggingProxy, Mock)) or \ + isinstance(sys.__stderr__, (LoggingProxy, Mock)): raise RuntimeError(CASE_LOG_REDIRECT_EFFECT.format(this, 'stderr')) backend = self.app.__dict__.get('backend') if backend is not None: @@ -447,19 +514,25 @@ def _teardown_app(self): if isinstance(backend.client, DummyClient): backend.client.cache.clear() backend._cache.clear() - from celery._state import ( - _tls, set_default_app, _set_task_join_will_block, - ) - _set_task_join_will_block(False) + from celery import _state + _state._set_task_join_will_block(False) - set_default_app(self._default_app) - _tls.current_app = self._current_app + _state.set_default_app(self._default_app) + _state._tls = self._prev_tls + _state._tls.current_app = self._current_app if self.app is not self._current_app: self.app.close() self.app = None - self.assertEqual( - self._threads_at_setup, list(threading.enumerate()), - ) + self.assertEqual(self._threads_at_setup, alive_threads()) + + # Make sure no test left the shutdown flags enabled. + from celery.worker import state as worker_state + # check for EX_OK + self.assertIsNot(worker_state.should_stop, False) + self.assertIsNot(worker_state.should_terminate, False) + # check for other true values + self.assertFalse(worker_state.should_stop) + self.assertFalse(worker_state.should_terminate) def _get_test_name(self): return '.'.join([self.__class__.__name__, self._testMethodName]) @@ -487,7 +560,10 @@ def teardown(self): def get_handlers(logger): - return [h for h in logger.handlers if not isinstance(h, NullHandler)] + return [ + h for h in logger.handlers + if not isinstance(h, logging.NullHandler) + ] @contextmanager @@ -503,19 +579,28 @@ def wrap_logger(logger, loglevel=logging.ERROR): logger.handlers = old_handlers +@contextmanager +def mock_environ(env_name, env_value): + sentinel = object() + prev_val = os.environ.get(env_name, sentinel) + os.environ[env_name] = env_value + try: + yield env_value + finally: + if prev_val is sentinel: + os.environ.pop(env_name, None) + else: + os.environ[env_name] = prev_val + + def with_environ(env_name, env_value): def _envpatched(fun): @wraps(fun) def _patch_environ(*args, **kwargs): - prev_val = os.environ.get(env_name) - os.environ[env_name] = env_value - try: + with mock_environ(env_name, env_value): return fun(*args, **kwargs) - finally: - os.environ[env_name] = prev_val or '' - return _patch_environ return _envpatched @@ -630,6 +715,7 @@ def myimp(name, *args, **kwargs): def override_stdouts(): """Override `sys.stdout` and `sys.stderr` with `WhateverIO`.""" prev_out, prev_err = sys.stdout, sys.stderr + prev_rout, prev_rerr = sys.__stdout__, sys.__stderr__ mystdout, mystderr = WhateverIO(), WhateverIO() sys.stdout = sys.__stdout__ = mystdout sys.stderr = sys.__stderr__ = mystderr @@ -637,8 +723,19 @@ def override_stdouts(): try: yield mystdout, mystderr finally: - sys.stdout = sys.__stdout__ = prev_out - sys.stderr = sys.__stderr__ = prev_err + sys.stdout = prev_out + sys.stderr = prev_err + sys.__stdout__ = prev_rout + sys.__stderr__ = prev_rerr + + +def disable_stdouts(fun): + + @wraps(fun) + def disable(*args, **kwargs): + with override_stdouts(): + return fun(*args, **kwargs) + return disable def _old_patch(module, name, mocked): @@ -673,7 +770,7 @@ def replace_module_value(module, name, value=None): yield finally: if prev is not None: - setattr(sys, name, prev) + setattr(module, name, prev) if not has_prev: try: delattr(module, name) @@ -698,7 +795,7 @@ def sys_platform(value): @contextmanager def reset_modules(*modules): - prev = dict((k, sys.modules.pop(k)) for k in modules if k in sys.modules) + prev = {k: sys.modules.pop(k) for k in modules if k in sys.modules} try: yield finally: @@ -780,10 +877,6 @@ def mock_open(typ=WhateverIO, side_effect=None): yield val -def patch_many(*targets): - return nested(*[patch(target) for target in targets]) - - @contextmanager def assert_signal_called(signal, **expected): handler = Mock() @@ -816,7 +909,49 @@ def _inner(*args, **kwargs): return _inner -def body_from_sig(app, sig, utc=True): +def TaskMessage(name, id=None, args=(), kwargs={}, callbacks=None, + errbacks=None, chain=None, shadow=None, utc=None, **options): + from celery import uuid + from kombu.serialization import dumps + id = id or uuid() + message = Mock(name='TaskMessage-{0}'.format(id)) + message.headers = { + 'id': id, + 'task': name, + 'shadow': shadow, + } + embed = {'callbacks': callbacks, 'errbacks': errbacks, 'chain': chain} + message.headers.update(options) + message.content_type, message.content_encoding, message.body = dumps( + (args, kwargs, embed), serializer='json', + ) + message.payload = (args, kwargs, embed) + return message + + +def TaskMessage1(name, id=None, args=(), kwargs={}, callbacks=None, + errbacks=None, chain=None, **options): + from celery import uuid + from kombu.serialization import dumps + id = id or uuid() + message = Mock(name='TaskMessage-{0}'.format(id)) + message.headers = {} + message.payload = { + 'task': name, + 'id': id, + 'args': args, + 'kwargs': kwargs, + 'callbacks': callbacks, + 'errbacks': errbacks, + } + message.payload.update(options) + message.content_type, message.content_encoding, message.body = dumps( + message.payload, + ) + return message + + +def task_message_from_sig(app, sig, utc=True, TaskMessage=TaskMessage): sig.freeze() callbacks = sig.options.pop('link', None) errbacks = sig.options.pop('link_error', None) @@ -832,17 +967,16 @@ def body_from_sig(app, sig, utc=True): expires = app.now() + timedelta(seconds=expires) if expires and isinstance(expires, datetime): expires = expires.isoformat() - return { - 'task': sig.task, - 'id': sig.id, - 'args': sig.args, - 'kwargs': sig.kwargs, - 'callbacks': [dict(s) for s in callbacks] if callbacks else None, - 'errbacks': [dict(s) for s in errbacks] if errbacks else None, - 'eta': eta, - 'utc': utc, - 'expires': expires, - } + return TaskMessage( + sig.task, id=sig.id, args=sig.args, + kwargs=sig.kwargs, + callbacks=[dict(s) for s in callbacks] if callbacks else None, + errbacks=[dict(s) for s in errbacks] if errbacks else None, + eta=eta, + expires=expires, + utc=utc, + **sig.options + ) @contextmanager diff --git a/celery/tests/compat_modules/test_compat.py b/celery/tests/compat_modules/test_compat.py index d285188e0..433186950 100644 --- a/celery/tests/compat_modules/test_compat.py +++ b/celery/tests/compat_modules/test_compat.py @@ -2,40 +2,13 @@ from datetime import timedelta -import sys -sys.modules.pop('celery.task', None) - from celery.schedules import schedule from celery.task import ( periodic_task, PeriodicTask ) -from celery.utils.timeutils import timedelta_seconds - -from celery.tests.case import AppCase, depends_on_current_app - - -class test_Task(AppCase): - - def test_base_task_inherits_magic_kwargs_from_app(self): - from celery.task import Task as OldTask - class timkX(OldTask): - abstract = True - - with self.Celery(set_as_current=False, - accept_magic_kwargs=True) as app: - timkX.bind(app) - # see #918 - self.assertFalse(timkX.accept_magic_kwargs) - - from celery import Task as NewTask - - class timkY(NewTask): - abstract = True - - timkY.bind(app) - self.assertFalse(timkY.accept_magic_kwargs) +from celery.tests.case import AppCase, depends_on_current_app # noqa @depends_on_current_app @@ -53,7 +26,7 @@ def now(self): def test_must_have_run_every(self): with self.assertRaises(NotImplementedError): - type('Foo', (PeriodicTask, ), {'__module__': __name__}) + type('Foo', (PeriodicTask,), {'__module__': __name__}) def test_remaining_estimate(self): s = self.my_periodic.run_every @@ -74,8 +47,9 @@ def test_is_due(self): self.now() - p.run_every.run_every, ) self.assertTrue(due) - self.assertEqual(remaining, - timedelta_seconds(p.run_every.run_every)) + self.assertEqual( + remaining, p.run_every.run_every.total_seconds(), + ) def test_schedule_repr(self): p = self.my_periodic diff --git a/celery/tests/compat_modules/test_compat_utils.py b/celery/tests/compat_modules/test_compat_utils.py index b041a0b3e..d1ef81a98 100644 --- a/celery/tests/compat_modules/test_compat_utils.py +++ b/celery/tests/compat_modules/test_compat_utils.py @@ -40,11 +40,7 @@ def test_decorators_task(self): def _test_decorators_task(): pass - self.assertTrue(_test_decorators_task.accept_magic_kwargs) - def test_decorators_periodic_task(self): @celery.decorators.periodic_task(run_every=3600) def _test_decorators_ptask(): pass - - self.assertTrue(_test_decorators_ptask.accept_magic_kwargs) diff --git a/celery/tests/compat_modules/test_decorators.py b/celery/tests/compat_modules/test_decorators.py index 9f5dff947..df95916ae 100644 --- a/celery/tests/compat_modules/test_decorators.py +++ b/celery/tests/compat_modules/test_decorators.py @@ -27,7 +27,6 @@ def setup(self): def assertCompatDecorator(self, decorator, type, **opts): task = decorator(**opts)(add) self.assertEqual(task(8, 8), 16) - self.assertTrue(task.accept_magic_kwargs) self.assertIsInstance(task, type) def test_task(self): diff --git a/celery/tests/compat_modules/test_http.py b/celery/tests/compat_modules/test_http.py index 08505f87e..1c4edf0e4 100644 --- a/celery/tests/compat_modules/test_http.py +++ b/celery/tests/compat_modules/test_http.py @@ -8,8 +8,8 @@ except ImportError: # py3k from urllib.request import addinfourl # noqa -from anyjson import dumps from kombu.utils.encoding import from_utf8 +from kombu.utils.json import dumps from celery.five import WhateverIO, items from celery.task import http @@ -142,7 +142,7 @@ def test_dispatch_POST(self): class test_URL(AppCase): def test_URL_get_async(self): - self.app.conf.CELERY_ALWAYS_EAGER = True + self.app.conf.task_always_eager = True with mock_urlopen(success_response(100)): d = http.URL( 'http://example.com/mul', app=self.app, @@ -150,7 +150,7 @@ def test_URL_get_async(self): self.assertEqual(d.get(), 100) def test_URL_post_async(self): - self.app.conf.CELERY_ALWAYS_EAGER = True + self.app.conf.task_always_eager = True with mock_urlopen(success_response(100)): d = http.URL( 'http://example.com/mul', app=self.app, diff --git a/celery/tests/compat_modules/test_sets.py b/celery/tests/compat_modules/test_sets.py deleted file mode 100644 index c1d2c16fa..000000000 --- a/celery/tests/compat_modules/test_sets.py +++ /dev/null @@ -1,244 +0,0 @@ -from __future__ import absolute_import - -import anyjson -import warnings - -from celery import uuid -from celery.result import TaskSetResult -from celery.task import Task -from celery.canvas import Signature - -from celery.tests.tasks.test_result import make_mock_group -from celery.tests.case import AppCase, Mock, patch - - -class SetsCase(AppCase): - - def setup(self): - with warnings.catch_warnings(record=True): - from celery.task import sets - self.sets = sets - self.subtask = sets.subtask - self.TaskSet = sets.TaskSet - - class MockTask(Task): - app = self.app - name = 'tasks.add' - - def run(self, x, y, **kwargs): - return x + y - - @classmethod - def apply_async(cls, args, kwargs, **options): - return (args, kwargs, options) - - @classmethod - def apply(cls, args, kwargs, **options): - return (args, kwargs, options) - self.MockTask = MockTask - - -class test_TaskSetResult(AppCase): - - def setup(self): - self.size = 10 - self.ts = TaskSetResult(uuid(), make_mock_group(self.app, self.size)) - - def test_total(self): - self.assertEqual(self.ts.total, self.size) - - def test_compat_properties(self): - self.assertEqual(self.ts.taskset_id, self.ts.id) - self.ts.taskset_id = 'foo' - self.assertEqual(self.ts.taskset_id, 'foo') - - def test_compat_subtasks_kwarg(self): - x = TaskSetResult(uuid(), subtasks=[1, 2, 3]) - self.assertEqual(x.results, [1, 2, 3]) - - def test_itersubtasks(self): - it = self.ts.itersubtasks() - - for i, t in enumerate(it): - self.assertEqual(t.get(), i) - - -class test_App(AppCase): - - def test_TaskSet(self): - with warnings.catch_warnings(record=True): - ts = self.app.TaskSet() - self.assertListEqual(ts.tasks, []) - self.assertIs(ts.app, self.app) - - -class test_subtask(SetsCase): - - def test_behaves_like_type(self): - s = self.subtask('tasks.add', (2, 2), {'cache': True}, - {'routing_key': 'CPU-bound'}) - self.assertDictEqual(self.subtask(s), s) - - def test_task_argument_can_be_task_cls(self): - s = self.subtask(self.MockTask, (2, 2)) - self.assertEqual(s.task, self.MockTask.name) - - def test_apply_async(self): - s = self.MockTask.subtask( - (2, 2), {'cache': True}, {'routing_key': 'CPU-bound'}, - ) - args, kwargs, options = s.apply_async() - self.assertTupleEqual(args, (2, 2)) - self.assertDictEqual(kwargs, {'cache': True}) - self.assertDictEqual(options, {'routing_key': 'CPU-bound'}) - - def test_delay_argmerge(self): - s = self.MockTask.subtask( - (2, ), {'cache': True}, {'routing_key': 'CPU-bound'}, - ) - args, kwargs, options = s.delay(10, cache=False, other='foo') - self.assertTupleEqual(args, (10, 2)) - self.assertDictEqual(kwargs, {'cache': False, 'other': 'foo'}) - self.assertDictEqual(options, {'routing_key': 'CPU-bound'}) - - def test_apply_async_argmerge(self): - s = self.MockTask.subtask( - (2, ), {'cache': True}, {'routing_key': 'CPU-bound'}, - ) - args, kwargs, options = s.apply_async((10, ), - {'cache': False, 'other': 'foo'}, - routing_key='IO-bound', - exchange='fast') - - self.assertTupleEqual(args, (10, 2)) - self.assertDictEqual(kwargs, {'cache': False, 'other': 'foo'}) - self.assertDictEqual(options, {'routing_key': 'IO-bound', - 'exchange': 'fast'}) - - def test_apply_argmerge(self): - s = self.MockTask.subtask( - (2, ), {'cache': True}, {'routing_key': 'CPU-bound'}, - ) - args, kwargs, options = s.apply((10, ), - {'cache': False, 'other': 'foo'}, - routing_key='IO-bound', - exchange='fast') - - self.assertTupleEqual(args, (10, 2)) - self.assertDictEqual(kwargs, {'cache': False, 'other': 'foo'}) - self.assertDictEqual( - options, {'routing_key': 'IO-bound', 'exchange': 'fast'}, - ) - - def test_is_JSON_serializable(self): - s = self.MockTask.subtask( - (2, ), {'cache': True}, {'routing_key': 'CPU-bound'}, - ) - s.args = list(s.args) # tuples are not preserved - # but this doesn't matter. - self.assertEqual(s, self.subtask(anyjson.loads(anyjson.dumps(s)))) - - def test_repr(self): - s = self.MockTask.subtask((2, ), {'cache': True}) - self.assertIn('2', repr(s)) - self.assertIn('cache=True', repr(s)) - - def test_reduce(self): - s = self.MockTask.subtask((2, ), {'cache': True}) - cls, args = s.__reduce__() - self.assertDictEqual(dict(cls(*args)), dict(s)) - - -class test_TaskSet(SetsCase): - - def test_task_arg_can_be_iterable__compat(self): - ts = self.TaskSet([self.MockTask.subtask((i, i)) - for i in (2, 4, 8)], app=self.app) - self.assertEqual(len(ts), 3) - - def test_respects_ALWAYS_EAGER(self): - app = self.app - - class MockTaskSet(self.TaskSet): - applied = 0 - - def apply(self, *args, **kwargs): - self.applied += 1 - - ts = MockTaskSet( - [self.MockTask.subtask((i, i)) for i in (2, 4, 8)], - app=self.app, - ) - app.conf.CELERY_ALWAYS_EAGER = True - ts.apply_async() - self.assertEqual(ts.applied, 1) - app.conf.CELERY_ALWAYS_EAGER = False - - with patch('celery.task.sets.get_current_worker_task') as gwt: - parent = gwt.return_value = Mock() - ts.apply_async() - self.assertTrue(parent.add_trail.called) - - def test_apply_async(self): - applied = [0] - - class mocksubtask(Signature): - - def apply_async(self, *args, **kwargs): - applied[0] += 1 - - ts = self.TaskSet([mocksubtask(self.MockTask, (i, i)) - for i in (2, 4, 8)], app=self.app) - ts.apply_async() - self.assertEqual(applied[0], 3) - - class Publisher(object): - - def send(self, *args, **kwargs): - pass - - ts.apply_async(publisher=Publisher()) - - # setting current_task - - @self.app.task(shared=False) - def xyz(): - pass - - from celery._state import _task_stack - xyz.push_request() - _task_stack.push(xyz) - try: - ts.apply_async(publisher=Publisher()) - finally: - _task_stack.pop() - xyz.pop_request() - - def test_apply(self): - - applied = [0] - - class mocksubtask(Signature): - - def apply(self, *args, **kwargs): - applied[0] += 1 - - ts = self.TaskSet([mocksubtask(self.MockTask, (i, i)) - for i in (2, 4, 8)], app=self.app) - ts.apply() - self.assertEqual(applied[0], 3) - - def test_set_app(self): - ts = self.TaskSet([], app=self.app) - ts.app = 42 - self.assertEqual(ts.app, 42) - - def test_set_tasks(self): - ts = self.TaskSet([], app=self.app) - ts.tasks = [1, 2, 3] - self.assertEqual(ts, [1, 2, 3]) - - def test_set_Publisher(self): - ts = self.TaskSet([], app=self.app) - ts.Publisher = 42 - self.assertEqual(ts.Publisher, 42) diff --git a/celery/tests/concurrency/test_concurrency.py b/celery/tests/concurrency/test_concurrency.py index 293887741..7bc021c0c 100644 --- a/celery/tests/concurrency/test_concurrency.py +++ b/celery/tests/concurrency/test_concurrency.py @@ -5,7 +5,8 @@ from itertools import count from celery.concurrency.base import apply_target, BasePool -from celery.tests.case import AppCase, Mock +from celery.exceptions import WorkerShutdown, WorkerTerminate +from celery.tests.case import AppCase, Mock, patch class test_BasePool(AppCase): @@ -29,7 +30,7 @@ def callback(*args): accept_callback=gen_callback('accept_callback')) self.assertDictContainsSubset( - {'target': (1, (8, 16)), 'callback': (2, (42, ))}, + {'target': (1, (8, 16)), 'callback': (2, (42,))}, scratch, ) pa1 = scratch['accept_callback'] @@ -45,7 +46,48 @@ def callback(*args): accept_callback=None) self.assertDictEqual(scratch, {'target': (3, (8, 16)), - 'callback': (4, (42, ))}) + 'callback': (4, (42,))}) + + def test_apply_target__propagate(self): + target = Mock(name='target') + target.side_effect = KeyError() + with self.assertRaises(KeyError): + apply_target(target, propagate=(KeyError,)) + + def test_apply_target__raises(self): + target = Mock(name='target') + target.side_effect = KeyError() + with self.assertRaises(KeyError): + apply_target(target) + + def test_apply_target__raises_WorkerShutdown(self): + target = Mock(name='target') + target.side_effect = WorkerShutdown() + with self.assertRaises(WorkerShutdown): + apply_target(target) + + def test_apply_target__raises_WorkerTerminate(self): + target = Mock(name='target') + target.side_effect = WorkerTerminate() + with self.assertRaises(WorkerTerminate): + apply_target(target) + + def test_apply_target__raises_BaseException(self): + target = Mock(name='target') + callback = Mock(name='callback') + target.side_effect = BaseException() + apply_target(target, callback=callback) + self.assertTrue(callback.called) + + @patch('celery.concurrency.base.reraise') + def test_apply_target__raises_BaseException_raises_else(self, reraise): + target = Mock(name='target') + callback = Mock(name='callback') + reraise.side_effect = KeyError() + target.side_effect = BaseException() + with self.assertRaises(KeyError): + apply_target(target, callback=callback) + self.assertFalse(callback.called) def test_does_not_debug(self): x = BasePool(10) @@ -65,7 +107,12 @@ def test_interface_on_apply(self): BasePool(10).on_apply() def test_interface_info(self): - self.assertDictEqual(BasePool(10).info, {}) + self.assertDictEqual(BasePool(10).info, { + 'max-concurrency': 10, + }) + + def test_interface_flush(self): + self.assertIsNone(BasePool(10).flush()) def test_active(self): p = BasePool(10) diff --git a/celery/tests/concurrency/test_eventlet.py b/celery/tests/concurrency/test_eventlet.py index 162e4f2cf..46828f0b9 100644 --- a/celery/tests/concurrency/test_eventlet.py +++ b/celery/tests/concurrency/test_eventlet.py @@ -1,31 +1,22 @@ from __future__ import absolute_import +import os import sys -from celery.app.defaults import is_pypy from celery.concurrency.eventlet import ( apply_target, - Schedule, Timer, TaskPool, ) -from celery.tests.case import ( - AppCase, Mock, SkipTest, mock_module, patch, patch_many, skip_if_pypy, -) +from celery.tests.case import AppCase, Mock, patch, skip_if_pypy class EventletCase(AppCase): @skip_if_pypy def setup(self): - if is_pypy: - raise SkipTest('mock_modules not working on PyPy1.9') - try: - self.eventlet = __import__('eventlet') - except ImportError: - raise SkipTest( - 'eventlet not installed, skipping related tests.') + self.mock_modules(*eventlet_modules) @skip_if_pypy def teardown(self): @@ -44,6 +35,18 @@ def test_aaa_is_patched(self): maybe_patch_concurrency(['x', '-P', 'eventlet']) monkey_patch.assert_called_with() + @patch('eventlet.debug.hub_blocking_detection', create=True) + @patch('eventlet.monkey_patch', create=True) + def test_aaa_blockdetecet(self, monkey_patch, hub_blocking_detection): + os.environ['EVENTLET_NOBLOCK'] = "10.3" + try: + from celery import maybe_patch_concurrency + maybe_patch_concurrency(['x', '-P', 'eventlet']) + monkey_patch.assert_called_with() + hub_blocking_detection.assert_called_with(10.3, 10.3) + finally: + os.environ.pop('EVENTLET_NOBLOCK', None) + eventlet_modules = ( 'eventlet', @@ -54,65 +57,82 @@ def test_aaa_is_patched(self): ) -class test_Schedule(EventletCase): +class test_Timer(EventletCase): + + def setup(self): + EventletCase.setup(self) + self.spawn_after = self.patch('eventlet.greenthread.spawn_after') + self.GreenletExit = self.patch('greenlet.GreenletExit') def test_sched(self): - with mock_module(*eventlet_modules): - with patch_many('eventlet.greenthread.spawn_after', - 'greenlet.GreenletExit') as (spawn_after, - GreenletExit): - x = Schedule() - x.GreenletExit = KeyError - entry = Mock() - g = x._enter(1, 0, entry) - self.assertTrue(x.queue) - - x._entry_exit(g, entry) - g.wait.side_effect = KeyError() - x._entry_exit(g, entry) - entry.cancel.assert_called_with() - self.assertFalse(x._queue) - - x._queue.add(g) - x.clear() - x._queue.add(g) - g.cancel.side_effect = KeyError() - x.clear() + x = Timer() + x.GreenletExit = KeyError + entry = Mock() + g = x._enter(1, 0, entry) + self.assertTrue(x.queue) + + x._entry_exit(g, entry) + g.wait.side_effect = KeyError() + x._entry_exit(g, entry) + entry.cancel.assert_called_with() + self.assertFalse(x._queue) + + x._queue.add(g) + x.clear() + x._queue.add(g) + g.cancel.side_effect = KeyError() + x.clear() + + def test_cancel(self): + x = Timer() + tref = Mock(name='tref') + x.cancel(tref) + tref.cancel.assert_called_with() + x.GreenletExit = KeyError + tref.cancel.side_effect = KeyError() + x.cancel(tref) class test_TaskPool(EventletCase): + def setup(self): + EventletCase.setup(self) + self.GreenPool = self.patch('eventlet.greenpool.GreenPool') + self.greenthread = self.patch('eventlet.greenthread') + def test_pool(self): - with mock_module(*eventlet_modules): - with patch_many('eventlet.greenpool.GreenPool', - 'eventlet.greenthread') as (GreenPool, - greenthread): - x = TaskPool() - x.on_start() - x.on_stop() - x.on_apply(Mock()) - x._pool = None - x.on_stop() - self.assertTrue(x.getpid()) + x = TaskPool() + x.on_start() + x.on_stop() + x.on_apply(Mock()) + x._pool = None + x.on_stop() + self.assertTrue(x.getpid()) @patch('celery.concurrency.eventlet.base') def test_apply_target(self, base): apply_target(Mock(), getpid=Mock()) self.assertTrue(base.apply_target.called) - -class test_Timer(EventletCase): - - def test_timer(self): - x = Timer() - x.ensure_started() - x.schedule = Mock() - x.start() - x.stop() - x.schedule.clear.assert_called_with() - - tref = Mock() - x.cancel(tref) - x.schedule.GreenletExit = KeyError - tref.cancel.side_effect = KeyError() - x.cancel(tref) + def test_grow(self): + x = TaskPool(10) + x._pool = Mock(name='_pool') + x.grow(2) + self.assertEqual(x.limit, 12) + x._pool.resize.assert_called_with(12) + + def test_shrink(self): + x = TaskPool(10) + x._pool = Mock(name='_pool') + x.shrink(2) + self.assertEqual(x.limit, 8) + x._pool.resize.assert_called_with(8) + + def test_get_info(self): + x = TaskPool(10) + x._pool = Mock(name='_pool') + self.assertDictEqual(x._get_info(), { + 'max-concurrency': 10, + 'free-threads': x._pool.free(), + 'running-threads': x._pool.running(), + }) diff --git a/celery/tests/concurrency/test_gevent.py b/celery/tests/concurrency/test_gevent.py index baa105ba4..d99bffca4 100644 --- a/celery/tests/concurrency/test_gevent.py +++ b/celery/tests/concurrency/test_gevent.py @@ -1,15 +1,12 @@ from __future__ import absolute_import from celery.concurrency.gevent import ( - Schedule, Timer, TaskPool, apply_timeout, ) -from celery.tests.case import ( - AppCase, Mock, SkipTest, mock_module, patch, patch_many, skip_if_pypy, -) +from celery.tests.case import AppCase, Mock, patch, skip_if_pypy gevent_modules = ( 'gevent', @@ -24,92 +21,78 @@ class GeventCase(AppCase): @skip_if_pypy def setup(self): - try: - self.gevent = __import__('gevent') - except ImportError: - raise SkipTest( - 'gevent not installed, skipping related tests.') + self.mock_modules(*gevent_modules) class test_gevent_patch(GeventCase): def test_is_patched(self): - with mock_module(*gevent_modules): - with patch('gevent.monkey.patch_all', create=True) as patch_all: - import gevent - gevent.version_info = (1, 0, 0) - from celery import maybe_patch_concurrency - maybe_patch_concurrency(['x', '-P', 'gevent']) - self.assertTrue(patch_all.called) + with patch('gevent.monkey.patch_all', create=True) as patch_all: + import gevent + gevent.version_info = (1, 0, 0) + from celery import maybe_patch_concurrency + maybe_patch_concurrency(['x', '-P', 'gevent']) + self.assertTrue(patch_all.called) + +class test_Timer(GeventCase): -class test_Schedule(AppCase): + def setup(self): + GeventCase.setup(self) + self.greenlet = self.patch('gevent.greenlet') + self.GreenletExit = self.patch('gevent.greenlet.GreenletExit') def test_sched(self): - with mock_module(*gevent_modules): - with patch_many('gevent.greenlet', - 'gevent.greenlet.GreenletExit') as (greenlet, - GreenletExit): - greenlet.Greenlet = object - x = Schedule() - greenlet.Greenlet = Mock() - x._Greenlet.spawn_later = Mock() - x._GreenletExit = KeyError - entry = Mock() - g = x._enter(1, 0, entry) - self.assertTrue(x.queue) - - x._entry_exit(g) - g.kill.assert_called_with() - self.assertFalse(x._queue) - - x._queue.add(g) - x.clear() - x._queue.add(g) - g.kill.side_effect = KeyError() - x.clear() - - g = x._Greenlet() - g.cancel() - - -class test_TaskPool(AppCase): + self.greenlet.Greenlet = object + x = Timer() + self.greenlet.Greenlet = Mock() + x._Greenlet.spawn_later = Mock() + x._GreenletExit = KeyError + entry = Mock() + g = x._enter(1, 0, entry) + self.assertTrue(x.queue) + + x._entry_exit(g) + g.kill.assert_called_with() + self.assertFalse(x._queue) + + x._queue.add(g) + x.clear() + x._queue.add(g) + g.kill.side_effect = KeyError() + x.clear() + + g = x._Greenlet() + g.cancel() + + +class test_TaskPool(GeventCase): + + def setup(self): + GeventCase.setup(self) + self.spawn_raw = self.patch('gevent.spawn_raw') + self.Pool = self.patch('gevent.pool.Pool') def test_pool(self): - with mock_module(*gevent_modules): - with patch_many('gevent.spawn_raw', 'gevent.pool.Pool') as ( - spawn_raw, Pool): - x = TaskPool() - x.on_start() - x.on_stop() - x.on_apply(Mock()) - x._pool = None - x.on_stop() - - x._pool = Mock() - x._pool._semaphore.counter = 1 - x._pool.size = 1 - x.grow() - self.assertEqual(x._pool.size, 2) - self.assertEqual(x._pool._semaphore.counter, 2) - x.shrink() - self.assertEqual(x._pool.size, 1) - self.assertEqual(x._pool._semaphore.counter, 1) - - x._pool = [4, 5, 6] - self.assertEqual(x.num_processes, 3) - - -class test_Timer(AppCase): - - def test_timer(self): - with mock_module(*gevent_modules): - x = Timer() - x.ensure_started() - x.schedule = Mock() - x.start() - x.stop() - x.schedule.clear.assert_called_with() + x = TaskPool() + x.on_start() + x.on_stop() + x.on_apply(Mock()) + x._pool = None + x.on_stop() + + x._pool = Mock() + x._pool._semaphore.counter = 1 + x._pool.size = 1 + x.grow() + self.assertEqual(x._pool.size, 2) + self.assertEqual(x._pool._semaphore.counter, 2) + x.shrink() + self.assertEqual(x._pool.size, 1) + self.assertEqual(x._pool._semaphore.counter, 1) + + x._pool = [4, 5, 6] + self.assertEqual(x.num_processes, 3) class test_apply_timeout(AppCase): diff --git a/celery/tests/concurrency/test_pool.py b/celery/tests/concurrency/test_pool.py index d1b314b52..4930dc89f 100644 --- a/celery/tests/concurrency/test_pool.py +++ b/celery/tests/concurrency/test_pool.py @@ -66,7 +66,7 @@ def mycallback(ret_value): self.assertIsInstance(scratchpad[1]['ret_value'], ExceptionInfo) self.assertEqual(scratchpad[1]['ret_value'].exception.args, - ('FOO EXCEPTION', )) + ('FOO EXCEPTION',)) self.assertEqual(res3.get(), 400) time.sleep(0.5) diff --git a/celery/tests/concurrency/test_prefork.py b/celery/tests/concurrency/test_prefork.py index 7ad247436..c829cd596 100644 --- a/celery/tests/concurrency/test_prefork.py +++ b/celery/tests/concurrency/test_prefork.py @@ -1,14 +1,20 @@ from __future__ import absolute_import import errno +import os import socket -import time +import sys from itertools import cycle -from celery.five import items, range +from celery.app.defaults import DEFAULTS +from celery.datastructures import AttributeDict +from celery.five import range from celery.utils.functional import noop -from celery.tests.case import AppCase, Mock, SkipTest, call, patch +from celery.utils.objects import Bunch + +from celery.tests.case import AppCase, Mock, SkipTest, patch, restore_logging + try: from celery.concurrency import prefork as mp from celery.concurrency import asynpool @@ -35,12 +41,6 @@ def apply_async(self, *args, **kwargs): asynpool = None # noqa -class Object(object): # for writeable attributes. - - def __init__(self, **kwargs): - [setattr(self, k, v) for k, v in items(kwargs)] - - class MockResult(object): def __init__(self, value, pid): @@ -54,6 +54,67 @@ def get(self): return self.value +class test_process_initializer(AppCase): + + @patch('celery.platforms.signals') + @patch('celery.platforms.set_mp_process_title') + def test_process_initializer(self, set_mp_process_title, _signals): + with restore_logging(): + from celery import signals + from celery._state import _tls + from celery.concurrency.prefork import ( + process_initializer, WORKER_SIGRESET, WORKER_SIGIGNORE, + ) + + def on_worker_process_init(**kwargs): + on_worker_process_init.called = True + on_worker_process_init.called = False + signals.worker_process_init.connect(on_worker_process_init) + + def Loader(*args, **kwargs): + loader = Mock(*args, **kwargs) + loader.conf = {} + loader.override_backends = {} + return loader + + with self.Celery(loader=Loader) as app: + app.conf = AttributeDict(DEFAULTS) + process_initializer(app, 'awesome.worker.com') + _signals.ignore.assert_any_call(*WORKER_SIGIGNORE) + _signals.reset.assert_any_call(*WORKER_SIGRESET) + self.assertTrue(app.loader.init_worker.call_count) + self.assertTrue(on_worker_process_init.called) + self.assertIs(_tls.current_app, app) + set_mp_process_title.assert_called_with( + 'celeryd', hostname='awesome.worker.com', + ) + + with patch('celery.app.trace.setup_worker_optimizations') as S: + os.environ['FORKED_BY_MULTIPROCESSING'] = "1" + try: + process_initializer(app, 'luke.worker.com') + S.assert_called_with(app, 'luke.worker.com') + finally: + os.environ.pop('FORKED_BY_MULTIPROCESSING', None) + + os.environ['CELERY_LOG_FILE'] = 'worker%I.log' + app.log.setup = Mock(name='log_setup') + try: + process_initializer(app, 'luke.worker.com') + finally: + os.environ.pop('CELERY_LOG_FILE', None) + + +class test_process_destructor(AppCase): + + @patch('celery.concurrency.prefork.signals') + def test_process_destructor(self, signals): + mp.process_destructor(13, -3) + signals.worker_process_shutdown.send.assert_called_with( + sender=None, pid=13, exitcode=-3, + ) + + class MockPool(object): started = False closed = False @@ -68,7 +129,7 @@ def __init__(self, *args, **kwargs): self.maintain_pool = Mock() self._state = mp.RUN self._processes = kwargs.get('processes') - self._pool = [Object(pid=i, inqW_fd=1, outqR_fd=2) + self._pool = [Bunch(pid=i, inqW_fd=1, outqR_fd=2) for i in range(self._processes)] self._current_proc = cycle(range(self._processes)) @@ -112,7 +173,7 @@ class ExeMockPool(MockPool): def apply_async(self, target, args=(), kwargs={}, callback=noop): from threading import Timer res = target(*args, **kwargs) - Timer(0.1, callback, (res, )).start() + Timer(0.1, callback, (res,)).start() return MockResult(res, next(self._current_proc)) @@ -135,6 +196,10 @@ def setup(self): class test_AsynPool(PoolCase): + def setup(self): + if sys.platform == 'win32': + raise SkipTest('win32: skip') + def test_gen_not_started(self): def gen(): @@ -147,71 +212,87 @@ def gen(): list(g) self.assertFalse(asynpool.gen_not_started(g)) - def test_select(self): + @patch('select.select', create=True) + def test_select(self, __select): ebadf = socket.error() ebadf.errno = errno.EBADF - with patch('select.select') as select: - select.return_value = ([3], [], []) + with patch('select.poll', create=True) as poller: + poll = poller.return_value = Mock(name='poll.poll') + poll.return_value = {3}, set(), 0 self.assertEqual( - asynpool._select(set([3])), - ([3], [], 0), + asynpool._select({3}, poll=poll), + ({3}, set(), 0), ) - select.return_value = ([], [], [3]) + poll.return_value = {3}, set(), 0 self.assertEqual( - asynpool._select(set([3]), None, set([3])), - ([3], [], 0), + asynpool._select({3}, None, {3}, poll=poll), + ({3}, set(), 0), ) eintr = socket.error() eintr.errno = errno.EINTR - select.side_effect = eintr + poll.side_effect = eintr - readers = set([3]) - self.assertEqual(asynpool._select(readers), ([], [], 1)) + readers = {3} + self.assertEqual( + asynpool._select(readers, poll=poll), + (set(), set(), 1), + ) self.assertIn(3, readers) - with patch('select.select') as select: - select.side_effect = ebadf - readers = set([3]) - self.assertEqual(asynpool._select(readers), ([], [], 1)) - select.assert_has_calls([call([3], [], [], 0)]) - self.assertNotIn(3, readers) - - with patch('select.select') as select: - select.side_effect = MemoryError() + with patch('select.poll') as poller: + poll = poller.return_value = Mock(name='poll.poll') + poll.side_effect = ebadf + with patch('select.select') as selcheck: + selcheck.side_effect = ebadf + readers = {3} + self.assertEqual( + asynpool._select(readers, poll=poll), + (set(), set(), 1), + ) + self.assertNotIn(3, readers) + + with patch('select.poll') as poller: + poll = poller.return_value = Mock(name='poll.poll') + poll.side_effect = MemoryError() with self.assertRaises(MemoryError): - asynpool._select(set([1])) - - with patch('select.select') as select: - - def se(*args): - select.side_effect = MemoryError() - raise ebadf - select.side_effect = se - with self.assertRaises(MemoryError): - asynpool._select(set([3])) - - with patch('select.select') as select: - - def se2(*args): - select.side_effect = socket.error() - select.side_effect.errno = 1321 - raise ebadf - select.side_effect = se2 + asynpool._select({1}, poll=poll) + + with patch('select.poll') as poller: + poll = poller.return_value = Mock(name='poll.poll') + with patch('select.select') as selcheck: + + def se(*args): + selcheck.side_effect = MemoryError() + raise ebadf + poll.side_effect = se + with self.assertRaises(MemoryError): + asynpool._select({3}, poll=poll) + + with patch('select.poll') as poller: + poll = poller.return_value = Mock(name='poll.poll') + with patch('select.select') as selcheck: + + def se2(*args): + selcheck.side_effect = socket.error() + selcheck.side_effect.errno = 1321 + raise ebadf + poll.side_effect = se2 + with self.assertRaises(socket.error): + asynpool._select({3}, poll=poll) + + with patch('select.poll') as poller: + poll = poller.return_value = Mock(name='poll.poll') + + poll.side_effect = socket.error() + poll.side_effect.errno = 34134 with self.assertRaises(socket.error): - asynpool._select(set([3])) - - with patch('select.select') as select: - - select.side_effect = socket.error() - select.side_effect.errno = 34134 - with self.assertRaises(socket.error): - asynpool._select(set([3])) + asynpool._select({3}, poll=poll) def test_promise(self): fun = Mock() - x = asynpool.promise(fun, (1, ), {'foo': 1}) + x = asynpool.promise(fun, (1,), {'foo': 1}) x() self.assertTrue(x.ready) fun.assert_called_with(1, foo=1) @@ -219,11 +300,15 @@ def test_promise(self): def test_Worker(self): w = asynpool.Worker(Mock(), Mock()) w.on_loop_start(1234) - w.outq.put.assert_called_with((asynpool.WORKER_UP, (1234, ))) + w.outq.put.assert_called_with((asynpool.WORKER_UP, (1234,))) class test_ResultHandler(PoolCase): + def setup(self): + if sys.platform == 'win32': + raise SkipTest('win32: skip') + def test_process_result(self): x = asynpool.ResultHandler( Mock(), Mock(), {}, Mock(), @@ -268,10 +353,43 @@ def test_start(self): pool.terminate() self.assertTrue(_pool.terminated) + def test_restart(self): + pool = TaskPool(10) + pool._pool = Mock(name='pool') + pool.restart() + pool._pool.restart.assert_called_with() + pool._pool.apply_async.assert_called_with(mp.noop) + + def test_did_start_ok(self): + pool = TaskPool(10) + pool._pool = Mock(name='pool') + self.assertIs(pool.did_start_ok(), pool._pool.did_start_ok()) + + def test_register_with_event_loop(self): + pool = TaskPool(10) + pool._pool = Mock(name='pool') + loop = Mock(name='loop') + pool.register_with_event_loop(loop) + pool._pool.register_with_event_loop.assert_called_with(loop) + + def test_on_close(self): + pool = TaskPool(10) + pool._pool = Mock(name='pool') + pool._pool._state = mp.RUN + pool.on_close() + pool._pool.close.assert_called_with() + + def test_on_close__pool_not_running(self): + pool = TaskPool(10) + pool._pool = Mock(name='pool') + pool._pool._state = mp.CLOSE + pool.on_close() + self.assertFalse(pool._pool.close.called) + def test_apply_async(self): pool = TaskPool(10) pool.start() - pool.apply_async(lambda x: x, (2, ), {}) + pool.apply_async(lambda x: x, (2,), {}) def test_grow_shrink(self): pool = TaskPool(10) @@ -284,7 +402,7 @@ def test_grow_shrink(self): def test_info(self): pool = TaskPool(10) - procs = [Object(pid=i) for i in range(pool.limit)] + procs = [Bunch(pid=i) for i in range(pool.limit)] class _Pool(object): _pool = procs @@ -304,17 +422,3 @@ def test_num_processes(self): pool = TaskPool(7) pool.start() self.assertEqual(pool.num_processes, 7) - - def test_restart(self): - raise SkipTest('functional test') - - def get_pids(pool): - return set([p.pid for p in pool._pool._pool]) - - tp = self.TaskPool(5) - time.sleep(0.5) - tp.start() - pids = get_pids(tp) - tp.restart() - time.sleep(0.5) - self.assertEqual(pids, get_pids(tp)) diff --git a/celery/tests/concurrency/test_threads.py b/celery/tests/concurrency/test_threads.py index 2eb5e3882..1edeb5664 100644 --- a/celery/tests/concurrency/test_threads.py +++ b/celery/tests/concurrency/test_threads.py @@ -20,31 +20,31 @@ def test_without_threadpool(self): with mask_modules('threadpool'): with self.assertRaises(ImportError): - TaskPool() + TaskPool(app=self.app) def test_with_threadpool(self): with mock_module('threadpool'): - x = TaskPool() + x = TaskPool(app=self.app) self.assertTrue(x.ThreadPool) self.assertTrue(x.WorkRequest) def test_on_start(self): with mock_module('threadpool'): - x = TaskPool() + x = TaskPool(app=self.app) x.on_start() self.assertTrue(x._pool) self.assertIsInstance(x._pool.workRequests, NullDict) def test_on_stop(self): with mock_module('threadpool'): - x = TaskPool() + x = TaskPool(app=self.app) x.on_start() x.on_stop() x._pool.dismissWorkers.assert_called_with(x.limit, do_join=True) def test_on_apply(self): with mock_module('threadpool'): - x = TaskPool() + x = TaskPool(app=self.app) x.on_start() callback = Mock() accept_callback = Mock() diff --git a/celery/tests/contrib/test_methods.py b/celery/tests/contrib/test_methods.py deleted file mode 100644 index da74cc98b..000000000 --- a/celery/tests/contrib/test_methods.py +++ /dev/null @@ -1,34 +0,0 @@ -from __future__ import absolute_import - -from celery.contrib.methods import task_method, task - -from celery.tests.case import AppCase, patch - - -class test_task_method(AppCase): - - def test_task_method(self): - - class X(object): - - def __init__(self): - self.state = 0 - - @self.app.task(shared=False, filter=task_method) - def add(self, x): - self.state += x - - x = X() - x.add(2) - self.assertEqual(x.state, 2) - x.add(4) - self.assertEqual(x.state, 6) - - self.assertTrue(X.add) - self.assertIs(x.add.__self__, x) - - def test_task(self): - with patch('celery.contrib.methods.current_app') as curapp: - fun = object() - task(fun, x=1) - curapp.task.assert_called_with(fun, x=1, filter=task_method) diff --git a/celery/tests/contrib/test_rdb.py b/celery/tests/contrib/test_rdb.py index a933c6010..23e5699dd 100644 --- a/celery/tests/contrib/test_rdb.py +++ b/celery/tests/contrib/test_rdb.py @@ -8,14 +8,14 @@ debugger, set_trace, ) -from celery.tests.case import Case, Mock, WhateverIO, patch, skip_if_pypy +from celery.tests.case import AppCase, Mock, WhateverIO, patch, skip_if_pypy class SockErr(socket.error): errno = None -class test_Rdb(Case): +class test_Rdb(AppCase): @patch('celery.contrib.rdb.Rdb') def test_debugger(self, Rdb): @@ -37,58 +37,65 @@ def test_rdb(self, get_avail_port): get_avail_port.return_value = (sock, 8000) sock.accept.return_value = (Mock(), ['helu']) out = WhateverIO() - rdb = Rdb(out=out) - self.assertTrue(get_avail_port.called) - self.assertIn('helu', out.getvalue()) - - # set_quit - with patch('sys.settrace') as settrace: - rdb.set_quit() - settrace.assert_called_with(None) - - # set_trace - with patch('celery.contrib.rdb.Pdb.set_trace') as pset: - with patch('celery.contrib.rdb._frame'): - rdb.set_trace() - rdb.set_trace(Mock()) - pset.side_effect = SockErr - pset.side_effect.errno = errno.ECONNRESET - rdb.set_trace() - pset.side_effect.errno = errno.ENOENT - with self.assertRaises(SockErr): + with Rdb(out=out) as rdb: + self.assertTrue(get_avail_port.called) + self.assertIn('helu', out.getvalue()) + + # set_quit + with patch('sys.settrace') as settrace: + rdb.set_quit() + settrace.assert_called_with(None) + + # set_trace + with patch('celery.contrib.rdb.Pdb.set_trace') as pset: + with patch('celery.contrib.rdb._frame'): rdb.set_trace() - - # _close_session - rdb._close_session() - - # do_continue - rdb.set_continue = Mock() - rdb.do_continue(Mock()) - rdb.set_continue.assert_called_with() - - # do_quit - rdb.set_quit = Mock() - rdb.do_quit(Mock()) - rdb.set_quit.assert_called_with() + rdb.set_trace(Mock()) + pset.side_effect = SockErr + pset.side_effect.errno = errno.ENOENT + with self.assertRaises(SockErr): + rdb.set_trace() + + # _close_session + rdb._close_session() + rdb.active = True + rdb._handle = None + rdb._client = None + rdb._sock = None + rdb._close_session() + + # do_continue + rdb.set_continue = Mock() + rdb.do_continue(Mock()) + rdb.set_continue.assert_called_with() + + # do_quit + rdb.set_quit = Mock() + rdb.do_quit(Mock()) + rdb.set_quit.assert_called_with() @patch('socket.socket') @skip_if_pypy def test_get_avail_port(self, sock): out = WhateverIO() sock.return_value.accept.return_value = (Mock(), ['helu']) - Rdb(out=out) + with Rdb(out=out): + pass with patch('celery.contrib.rdb.current_process') as curproc: curproc.return_value.name = 'PoolWorker-10' - Rdb(out=out) + with Rdb(out=out): + pass err = sock.return_value.bind.side_effect = SockErr() err.errno = errno.ENOENT with self.assertRaises(SockErr): - Rdb(out=out) + with Rdb(out=out): + pass err.errno = errno.EADDRINUSE with self.assertRaises(Exception): - Rdb(out=out) + with Rdb(out=out): + pass called = [0] def effect(*a, **kw): @@ -99,4 +106,5 @@ def effect(*a, **kw): finally: called[0] += 1 sock.return_value.bind.side_effect = effect - Rdb(out=out) + with Rdb(out=out): + pass diff --git a/celery/tests/events/test_cursesmon.py b/celery/tests/events/test_cursesmon.py index c8e615167..d5c10953a 100644 --- a/celery/tests/events/test_cursesmon.py +++ b/celery/tests/events/test_cursesmon.py @@ -14,7 +14,7 @@ class test_CursesDisplay(AppCase): def setup(self): try: import curses # noqa - except ImportError: + except (ImportError, OSError): raise SkipTest('curses monitor requires curses') from celery.events import cursesmon diff --git a/celery/tests/events/test_events.py b/celery/tests/events/test_events.py index 791f4167e..e1810a03d 100644 --- a/celery/tests/events/test_events.py +++ b/celery/tests/events/test_events.py @@ -2,11 +2,13 @@ import socket -from celery.events import Event -from celery.tests.case import AppCase, Mock +from celery.events import CLIENT_CLOCK_SKEW, Event + +from celery.tests.case import AppCase, Mock, call class MockProducer(object): + raise_on_publish = False def __init__(self, *args, **kwargs): @@ -64,7 +66,7 @@ def test_sql_transports_disabled(self): def test_send(self): producer = MockProducer() - producer.connection = self.app.connection() + producer.connection = self.app.connection_for_write() connection = Mock() connection.transport.driver_type = 'amqp' eventer = self.app.events.Dispatcher(connection, enabled=False, @@ -91,12 +93,48 @@ def test_send(self): for ev in evs: self.assertTrue(producer.has_event(ev)) - buf = eventer._outbound_buffer = Mock() - buf.popleft.side_effect = IndexError() eventer.flush() + def test_send_buffer_group(self): + buf_received = [None] + producer = MockProducer() + producer.connection = self.app.connection_for_write() + connection = Mock() + connection.transport.driver_type = 'amqp' + eventer = self.app.events.Dispatcher( + connection, enabled=False, + buffer_group={'task'}, buffer_limit=2, + ) + eventer.producer = producer + eventer.enabled = True + eventer._publish = Mock(name='_publish') + + def on_eventer_publish(events, *args, **kwargs): + buf_received[0] = list(events) + eventer._publish.side_effect = on_eventer_publish + self.assertFalse(eventer._group_buffer['task']) + eventer.on_send_buffered = Mock(name='on_send_buffered') + eventer.send('task-received', uuid=1) + prev_buffer = eventer._group_buffer['task'] + self.assertTrue(eventer._group_buffer['task']) + eventer.on_send_buffered.assert_called_with() + eventer.send('task-received', uuid=1) + self.assertFalse(eventer._group_buffer['task']) + eventer._publish.assert_has_calls([ + call([], eventer.producer, 'task.multi'), + ]) + # clear in place + self.assertIs(eventer._group_buffer['task'], prev_buffer) + self.assertEqual(len(buf_received[0]), 2) + eventer.on_send_buffered = None + eventer.send('task-received', uuid=1) + + def test_flush_no_groups_no_errors(self): + eventer = self.app.events.Dispatcher(Mock()) + eventer.flush(errors=False, groups=False) + def test_enter_exit(self): - with self.app.connection() as conn: + with self.app.connection_for_write() as conn: d = self.app.events.Dispatcher(conn) d.close = Mock() with d as _d: @@ -106,7 +144,7 @@ def test_enter_exit(self): def test_enable_disable_callbacks(self): on_enable = Mock() on_disable = Mock() - with self.app.connection() as conn: + with self.app.connection_for_write() as conn: with self.app.events.Dispatcher(conn, enabled=False) as d: d.on_enabled.add(on_enable) d.on_disabled.add(on_disable) @@ -116,7 +154,7 @@ def test_enable_disable_callbacks(self): on_disable.assert_called_with() def test_enabled_disable(self): - connection = self.app.connection() + connection = self.app.connection_for_write() channel = connection.channel() try: dispatcher = self.app.events.Dispatcher(connection, @@ -127,7 +165,7 @@ def test_enabled_disable(self): self.assertTrue(dispatcher.enabled) self.assertTrue(dispatcher.producer.channel) self.assertEqual(dispatcher.producer.serializer, - self.app.conf.CELERY_EVENT_SERIALIZER) + self.app.conf.event_serializer) created_channel = dispatcher.producer.channel dispatcher.disable() @@ -176,6 +214,10 @@ def my_handler(event): r._receive(message, object()) self.assertTrue(got_event[0]) + def test_accept_argument(self): + r = self.app.events.Receiver(Mock(), accept={'app/foo'}) + self.assertEqual(r.accept, {'app/foo'}) + def test_catch_all_event(self): message = {'type': 'world-war'} @@ -193,7 +235,7 @@ def my_handler(event): self.assertTrue(got_event[0]) def test_itercapture(self): - connection = self.app.connection() + connection = self.app.connection_for_write() try: r = self.app.events.Receiver(connection, node_id='celery.tests') it = r.itercapture(timeout=0.0001, wakeup=False) @@ -219,8 +261,30 @@ def test_event_from_message_localize_disabled(self): self.assertFalse(ts_adjust.called) r.adjust_clock.assert_called_with(313) + def test_event_from_message_clock_from_client(self): + r = self.app.events.Receiver(Mock(), node_id='celery.tests') + r.clock.value = 302 + r.adjust_clock = Mock() + + body = {'type': 'task-sent'} + r.event_from_message( + body, localize=False, adjust_timestamp=Mock(), + ) + self.assertEqual(body['clock'], r.clock.value + CLIENT_CLOCK_SKEW) + + def test_receive_multi(self): + r = self.app.events.Receiver(Mock(name='connection')) + r.process = Mock(name='process') + efm = r.event_from_message = Mock(name='event_from_message') + + def on_efm(*args): + return args + efm.side_effect = on_efm + r._receive([1, 2, 3], Mock()) + r.process.assert_has_calls([call(1), call(2), call(3)]) + def test_itercapture_limit(self): - connection = self.app.connection() + connection = self.app.connection_for_write() channel = connection.channel() try: events_received = [0] diff --git a/celery/tests/events/test_state.py b/celery/tests/events/test_state.py index b7e35d7cf..841a8a989 100644 --- a/celery/tests/events/test_state.py +++ b/celery/tests/events/test_state.py @@ -10,15 +10,16 @@ from celery import states from celery.events import Event from celery.events.state import ( + HEARTBEAT_EXPIRE_WINDOW, + HEARTBEAT_DRIFT_MAX, State, Worker, Task, - HEARTBEAT_EXPIRE_WINDOW, - HEARTBEAT_DRIFT_MAX, + heartbeat_expires, ) from celery.five import range from celery.utils import uuid -from celery.tests.case import AppCase, Mock, patch +from celery.tests.case import AppCase, Mock, SkipTest, patch try: Decimal(2.6) @@ -26,7 +27,8 @@ # Py2.6: Must first convert float to str _float_to_decimal = str else: - _float_to_decimal = lambda f: f # noqa + def _float_to_decimal(f): # noqa + return f class replay(object): @@ -103,6 +105,7 @@ def setup(self): traceback='line 1 at main', hostname='utest1'), Event('task-succeeded', uuid=tid, result='4', runtime=0.1234, hostname='utest1'), + Event('foo-bar'), ] @@ -180,6 +183,12 @@ def test_equality(self): hash(Worker(hostname='foo')), hash(Worker(hostname='bar')), ) + def test_heartbeat_expires__Decimal(self): + self.assertEqual( + heartbeat_expires(Decimal(344313.37), freq=60, expire_window=200), + 344433.37, + ) + def test_compatible_with_Decimal(self): w = Worker('george@vandelay.com') timestamp, local_received = Decimal(_float_to_decimal(time())), time() @@ -191,6 +200,39 @@ def test_compatible_with_Decimal(self): }) self.assertTrue(w.alive) + def test_eq_ne_other(self): + self.assertEqual(Worker('a@b.com'), Worker('a@b.com')) + self.assertNotEqual(Worker('a@b.com'), Worker('b@b.com')) + self.assertNotEqual(Worker('a@b.com'), object()) + + def test_reduce_direct(self): + w = Worker('george@vandelay.com') + w.event('worker-online', 10.0, 13.0, fields={ + 'hostname': 'george@vandelay.com', + 'timestamp': 10.0, + 'local_received': 13.0, + 'freq': 60, + }) + fun, args = w.__reduce__() + w2 = fun(*args) + self.assertEqual(w2.hostname, w.hostname) + self.assertEqual(w2.pid, w.pid) + self.assertEqual(w2.freq, w.freq) + self.assertEqual(w2.heartbeats, w.heartbeats) + self.assertEqual(w2.clock, w.clock) + self.assertEqual(w2.active, w.active) + self.assertEqual(w2.processed, w.processed) + self.assertEqual(w2.loadavg, w.loadavg) + self.assertEqual(w2.sw_ident, w.sw_ident) + + def test_update(self): + w = Worker('george@vandelay.com') + w.update({'idx': '301'}, foo=1, clock=30, bah='foo') + self.assertEqual(w.idx, '301') + self.assertEqual(w.foo, 1) + self.assertEqual(w.clock, 30) + self.assertEqual(w.bah, 'foo') + def test_survives_missing_timestamp(self): worker = Worker(hostname='foo') worker.event('heartbeat') @@ -243,6 +285,8 @@ def test_info(self): eta=1, runtime=0.0001, expires=1, + parent_id='bdefc', + root_id='dedfef', foo=None, exception=1, received=time() - 10, @@ -253,13 +297,19 @@ def test_info(self): self.assertEqual(sorted(list(task._info_fields)), sorted(task.info().keys())) - self.assertEqual(sorted(list(task._info_fields + ('received', ))), - sorted(task.info(extra=('received', )))) + self.assertEqual(sorted(list(task._info_fields + ('received',))), + sorted(task.info(extra=('received',)))) self.assertEqual(sorted(['args', 'kwargs']), sorted(task.info(['args', 'kwargs']).keys())) self.assertFalse(list(task.info('foo'))) + def test_reduce_direct(self): + task = Task(uuid='uuid', name='tasks.add', args='(2, 2)') + fun, args = task.__reduce__() + task2 = fun(*args) + self.assertEqual(task, task2) + def test_ready(self): task = Task(uuid='abcdefg', name='tasks.add') @@ -318,6 +368,59 @@ def test_task_logical_clock_ordering(self): self.assertEqual(now[1][0], tC) self.assertEqual(now[2][0], tB) + def test_task_descending_clock_ordering(self): + raise SkipTest('not working') + state = State() + r = ev_logical_clock_ordering(state) + tA, tB, tC = r.uids + r.play() + now = list(state.tasks_by_time(reverse=False)) + self.assertEqual(now[0][0], tA) + self.assertEqual(now[1][0], tB) + self.assertEqual(now[2][0], tC) + for _ in range(1000): + shuffle(r.uids) + tA, tB, tC = r.uids + r.rewind_with_offset(r.current_clock + 1, r.uids) + r.play() + now = list(state.tasks_by_time(reverse=False)) + self.assertEqual(now[0][0], tB) + self.assertEqual(now[1][0], tC) + self.assertEqual(now[2][0], tA) + + def test_get_or_create_task(self): + state = State() + task, created = state.get_or_create_task('id1') + self.assertEqual(task.uuid, 'id1') + self.assertTrue(created) + task2, created2 = state.get_or_create_task('id1') + self.assertIs(task2, task) + self.assertFalse(created2) + + def test_get_or_create_worker(self): + state = State() + worker, created = state.get_or_create_worker('george@vandelay.com') + self.assertEqual(worker.hostname, 'george@vandelay.com') + self.assertTrue(created) + worker2, created2 = state.get_or_create_worker('george@vandelay.com') + self.assertIs(worker2, worker) + self.assertFalse(created2) + + def test_get_or_create_worker__with_defaults(self): + state = State() + worker, created = state.get_or_create_worker( + 'george@vandelay.com', pid=30, + ) + self.assertEqual(worker.hostname, 'george@vandelay.com') + self.assertEqual(worker.pid, 30) + self.assertTrue(created) + worker2, created2 = state.get_or_create_worker( + 'george@vandelay.com', pid=40, + ) + self.assertIs(worker2, worker) + self.assertEqual(worker2.pid, 40) + self.assertFalse(created2) + def test_worker_online_offline(self): r = ev_worker_online_offline(State()) next(r) @@ -455,10 +558,11 @@ def test_task_types(self): r.play() self.assertEqual(sorted(r.state.task_types()), ['task1', 'task2']) - def test_tasks_by_timestamp(self): + def test_tasks_by_time(self): r = ev_snapshot(State()) r.play() - self.assertEqual(len(list(r.state.tasks_by_timestamp())), 20) + self.assertEqual(len(list(r.state.tasks_by_time())), 20) + self.assertEqual(len(list(r.state.tasks_by_time(reverse=False))), 20) def test_tasks_by_type(self): r = ev_snapshot(State()) diff --git a/celery/tests/fixups/test_django.py b/celery/tests/fixups/test_django.py index 1d4ec5cea..f99d73f0c 100644 --- a/celery/tests/fixups/test_django.py +++ b/celery/tests/fixups/test_django.py @@ -10,9 +10,10 @@ DjangoFixup, DjangoWorkerFixup, ) +from celery.utils.objects import Bunch from celery.tests.case import ( - AppCase, Mock, patch, patch_many, patch_modules, mask_modules, + AppCase, Mock, patch, patch_modules, mask_modules, ) @@ -31,6 +32,46 @@ def fixup_context(self, app): class test_DjangoFixup(FixupCase): Fixup = DjangoFixup + def test_setting_default_app(self): + from celery.fixups import django + prev, django.default_app = django.default_app, None + try: + app = Mock(name='app') + DjangoFixup(app) + app.set_default.assert_called_with() + finally: + django.default_app = prev + + @patch('celery.fixups.django.DjangoWorkerFixup') + def test_worker_fixup_property(self, DjangoWorkerFixup): + f = DjangoFixup(self.app) + f._worker_fixup = None + self.assertIs(f.worker_fixup, DjangoWorkerFixup()) + self.assertIs(f.worker_fixup, DjangoWorkerFixup()) + + def test_on_import_modules(self): + f = DjangoFixup(self.app) + f.worker_fixup = Mock(name='worker_fixup') + f.on_import_modules() + f.worker_fixup.validate_models.assert_called_with() + + def test_autodiscover_tasks_pre17(self): + self.mask_modules('django.apps') + f = DjangoFixup(self.app) + f._settings = Mock(name='_settings') + self.assertIs(f.autodiscover_tasks(), f._settings.INSTALLED_APPS) + + def test_autodiscover_tasks(self): + self.mock_modules('django.apps') + from django.apps import apps + f = DjangoFixup(self.app) + configs = [Mock(name='c1'), Mock(name='c2')] + apps.get_app_configs.return_value = configs + self.assertEqual( + f.autodiscover_tasks(), + [c.name for c in configs], + ) + def test_fixup(self): with patch('celery.fixups.django.DjangoFixup') as Fixup: with patch.dict(os.environ, DJANGO_SETTINGS_MODULE=''): @@ -63,15 +104,16 @@ def se(name): def test_install(self): self.app.loader = Mock() + self.cw = self.patch('os.getcwd') + self.p = self.patch('sys.path') + self.sigs = self.patch('celery.fixups.django.signals') with self.fixup_context(self.app) as (f, _, _): - with patch_many('os.getcwd', 'sys.path', - 'celery.fixups.django.signals') as (cw, p, sigs): - cw.return_value = '/opt/vandelay' - f.install() - sigs.worker_init.connect.assert_called_with(f.on_worker_init) - self.assertEqual(self.app.loader.now, f.now) - self.assertEqual(self.app.loader.mail_admins, f.mail_admins) - p.append.assert_called_with('/opt/vandelay') + self.cw.return_value = '/opt/vandelay' + f.install() + self.sigs.worker_init.connect.assert_called_with(f.on_worker_init) + self.assertEqual(self.app.loader.now, f.now) + self.assertEqual(self.app.loader.mail_admins, f.mail_admins) + self.p.append.assert_called_with('/opt/vandelay') def test_now(self): with self.fixup_context(self.app) as (f, _, _): @@ -93,9 +135,7 @@ def test_on_worker_init(self): f.on_worker_init() DWF.assert_called_with(f.app) DWF.return_value.install.assert_called_with() - self.assertIs( - f._worker_fixup, DWF.return_value.install.return_value, - ) + self.assertIs(f._worker_fixup, DWF.return_value) class test_DjangoWorkerFixup(FixupCase): @@ -116,7 +156,7 @@ def test_install(self): self.app.conf = {'CELERY_DB_REUSE_MAX': None} self.app.loader = Mock() with self.fixup_context(self.app) as (f, _, _): - with patch_many('celery.fixups.django.signals') as (sigs, ): + with patch('celery.fixups.django.signals') as sigs: f.install() sigs.beat_embedded_init.connect.assert_called_with( f.close_database, @@ -150,6 +190,11 @@ def test_on_worker_process_init(self): f._db.connection = None f.on_worker_process_init() + f.validate_models = Mock(name='validate_models') + self.mock_environ('FORKED_BY_MULTIPROCESSING', '1') + f.on_worker_process_init() + f.validate_models.assert_called_with() + def test_on_task_prerun(self): task = Mock() with self.fixup_context(self.app) as (f, _, _): @@ -205,11 +250,22 @@ def test_close_database(self): _close.assert_called_with() self.assertEqual(f._db_recycles, 1) + def test_close_database__django16(self): + with self.fixup_context(self.app) as (f, _, _): + f._db.connections = Mock(name='db.connections') + f._db.connections.all.side_effect = AttributeError() + f._close_database() + f._db.close_old_connections.assert_called_with() + def test__close_database(self): with self.fixup_context(self.app) as (f, _, _): - conns = f._db.connections = [Mock(), Mock(), Mock()] + conns = [Mock(), Mock(), Mock()] conns[1].close.side_effect = KeyError('already closed') - f.database_errors = (KeyError, ) + f.database_errors = (KeyError,) + f.interface_errors = () + + f._db.connections = Mock() # ConnectionHandler + f._db.connections.all.side_effect = lambda: conns f._close_database() conns[0].close.assert_called_with() @@ -220,10 +276,7 @@ def test__close_database(self): with self.assertRaises(KeyError): f._close_database() - class Object(object): - pass - o = Object() - o.close_connection = Mock() + o = Bunch(close_connection=Mock()) f._db = o f._close_database() o.close_connection.assert_called_with() @@ -243,6 +296,43 @@ def test_on_worker_ready(self): f._settings.DEBUG = True f.on_worker_ready() + def test_validate_models(self): + self.patch('celery.fixups.django.symbol_by_name') + self.patch('celery.fixups.django.import_module') + f = self.Fixup(self.app) + self.mock_modules('django.core.management.validation') + f.django_setup = Mock(name='django.setup') + from django.core.management.validation import get_validation_errors + get_validation_errors.return_value = 0 + f.validate_models() + f.django_setup.assert_called_with() + get_validation_errors.return_value = 3 + with self.assertRaises(RuntimeError): + f.validate_models() + + self.mask_modules('django.core.management.validation') + f._validate_models_django17 = Mock('validate17') + f.validate_models() + f._validate_models_django17.assert_called_with() + + def test_validate_models_django17(self): + self.patch('celery.fixups.django.symbol_by_name') + self.patch('celery.fixups.django.import_module') + self.mock_modules('django.core.management.base') + from django.core.management import base + f = self.Fixup(self.app) + f._validate_models_django17() + base.BaseCommand.assert_called_with() + base.BaseCommand().check.assert_called_with() + + def test_django_setup(self): + self.patch('celery.fixups.django.symbol_by_name') + self.patch('celery.fixups.django.import_module') + django, = self.mock_modules('django') + f = self.Fixup(self.app) + f.django_setup() + django.setup.assert_called_with() + def test_mysql_errors(self): with patch_modules('MySQLdb'): import MySQLdb as mod diff --git a/celery/tests/security/case.py b/celery/tests/security/case.py index ba421a9d5..4440f4963 100644 --- a/celery/tests/security/case.py +++ b/celery/tests/security/case.py @@ -2,14 +2,10 @@ from celery.tests.case import AppCase, SkipTest -import sys - class SecurityCase(AppCase): def setup(self): - if sys.version_info[0] == 3: - raise SkipTest('PyOpenSSL does not work on Python 3') try: from OpenSSL import crypto # noqa except ImportError: diff --git a/celery/tests/security/test_certificate.py b/celery/tests/security/test_certificate.py index 4b07b5a98..3cdc596c8 100644 --- a/celery/tests/security/test_certificate.py +++ b/celery/tests/security/test_certificate.py @@ -6,7 +6,7 @@ from . import CERT1, CERT2, KEY1 from .case import SecurityCase -from celery.tests.case import Mock, mock_open, patch +from celery.tests.case import Mock, SkipTest, mock_open, patch class test_Certificate(SecurityCase): @@ -23,8 +23,14 @@ def test_invalid_certificate(self): self.assertRaises(SecurityError, Certificate, KEY1) def test_has_expired(self): + raise SkipTest('cert expired') self.assertFalse(Certificate(CERT1).has_expired()) + def test_has_expired_mock(self): + x = Certificate(CERT1) + x._cert = Mock(name='cert') + self.assertIs(x.has_expired(), x._cert.has_expired()) + class test_CertStore(SecurityCase): diff --git a/celery/tests/security/test_security.py b/celery/tests/security/test_security.py index 227c65a5d..ca560c73f 100644 --- a/celery/tests/security/test_security.py +++ b/celery/tests/security/test_security.py @@ -3,7 +3,7 @@ Generated with: -.. code-block:: bash +.. code-block:: console $ openssl genrsa -des3 -passout pass:test -out key1.key 1024 $ openssl req -new -key key1.key -out key1.csr -passin pass:test @@ -20,6 +20,7 @@ from celery.exceptions import ImproperlyConfigured, SecurityError from celery.five import builtins +from celery.security import disable_untrusted_serializers, setup_security from celery.security.utils import reraise_errors from kombu.serialization import registry @@ -53,15 +54,24 @@ def test_disable_insecure_serializers(self): finally: disable_insecure_serializers(allowed=['json']) + @patch('celery.security._disable_insecure_serializers') + def test_disable_untrusted_serializers(self, disable): + disable_untrusted_serializers(['foo']) + disable.assert_called_with(allowed=['foo']) + def test_setup_security(self): disabled = registry._disabled_content_types self.assertEqual(0, len(disabled)) - self.app.conf.CELERY_TASK_SERIALIZER = 'json' + self.app.conf.task_serializer = 'json' self.app.setup_security() self.assertIn('application/x-python-serialize', disabled) disabled.clear() + @patch('celery.current_app') + def test_setup_security__default_app(self, current_app): + setup_security() + @patch('celery.security.register_auth') @patch('celery.security._disable_insecure_serializers') def test_setup_registry_complete(self, dis, reg, key='KEY', cert='CERT'): @@ -75,7 +85,7 @@ def effect(*args): finally: calls[0] += 1 - self.app.conf.CELERY_TASK_SERIALIZER = 'auth' + self.app.conf.task_serializer = 'auth' with mock_open(side_effect=effect): with patch('celery.security.registry') as registry: store = Mock() @@ -85,7 +95,7 @@ def effect(*args): registry._set_default_serializer.assert_called_with('auth') def test_security_conf(self): - self.app.conf.CELERY_TASK_SERIALIZER = 'auth' + self.app.conf.task_serializer = 'auth' with self.assertRaises(ImproperlyConfigured): self.app.setup_security() @@ -103,8 +113,8 @@ def import_hook(name, *args, **kwargs): def test_reraise_errors(self): with self.assertRaises(SecurityError): - with reraise_errors(errors=(KeyError, )): + with reraise_errors(errors=(KeyError,)): raise KeyError('foo') with self.assertRaises(KeyError): - with reraise_errors(errors=(ValueError, )): + with reraise_errors(errors=(ValueError,)): raise KeyError('bar') diff --git a/celery/tests/security/test_serialization.py b/celery/tests/security/test_serialization.py index 50bc4bfab..e66ae6fdc 100644 --- a/celery/tests/security/test_serialization.py +++ b/celery/tests/security/test_serialization.py @@ -4,6 +4,7 @@ import base64 from kombu.serialization import registry +from kombu.utils.encoding import bytes_to_str from celery.exceptions import SecurityError from celery.security.serialization import SecureSerializer, register_auth @@ -59,6 +60,6 @@ def test_register_auth(self): def test_lots_of_sign(self): for i in range(1000): - rdata = base64.urlsafe_b64encode(os.urandom(265)) + rdata = bytes_to_str(base64.urlsafe_b64encode(os.urandom(265))) s = self._get_s(KEY1, CERT1, [CERT1]) self.assertEqual(s.deserialize(s.serialize(rdata)), rdata) diff --git a/celery/tests/tasks/test_canvas.py b/celery/tests/tasks/test_canvas.py index 8ecbbbbc9..ea2c45952 100644 --- a/celery/tests/tasks/test_canvas.py +++ b/celery/tests/tasks/test_canvas.py @@ -1,5 +1,6 @@ from __future__ import absolute_import +from celery._state import _task_stack from celery.canvas import ( Signature, chain, @@ -11,18 +12,33 @@ chunks, _maybe_group, maybe_signature, + maybe_unroll_group, ) from celery.result import EagerResult -from celery.tests.case import AppCase, Mock +from celery.tests.case import ( + AppCase, ContextMock, MagicMock, Mock, depends_on_current_app, +) SIG = Signature({'task': 'TASK', - 'args': ('A1', ), + 'args': ('A1',), 'kwargs': {'K1': 'V1'}, 'options': {'task_id': 'TASK_ID'}, 'subtask_type': ''}) +class test_maybe_unroll_group(AppCase): + + def test_when_no_len_and_no_length_hint(self): + g = MagicMock(name='group') + g.tasks.__len__.side_effect = TypeError() + g.tasks.__length_hint__ = Mock() + g.tasks.__length_hint__.return_value = 0 + self.assertIs(maybe_unroll_group(g), g) + g.tasks.__length_hint__.side_effect = AttributeError() + self.assertIs(maybe_unroll_group(g), g) + + class CanvasCase(AppCase): def setup(self): @@ -54,14 +70,38 @@ def test_getitem_property_class(self): def test_getitem_property(self): self.assertEqual(SIG.task, 'TASK') - self.assertEqual(SIG.args, ('A1', )) + self.assertEqual(SIG.args, ('A1',)) self.assertEqual(SIG.kwargs, {'K1': 'V1'}) self.assertEqual(SIG.options, {'task_id': 'TASK_ID'}) self.assertEqual(SIG.subtask_type, '') + def test_call(self): + x = Signature('foo', (1, 2), {'arg1': 33}, app=self.app) + x.type = Mock(name='type') + x(3, 4, arg2=66) + x.type.assert_called_with(3, 4, 1, 2, arg1=33, arg2=66) + + def test_link_on_scalar(self): + x = Signature('TASK', link=Signature('B')) + self.assertTrue(x.options['link']) + x.link(Signature('C')) + self.assertIsInstance(x.options['link'], list) + self.assertIn(Signature('B'), x.options['link']) + self.assertIn(Signature('C'), x.options['link']) + + def test_json(self): + x = Signature('TASK', link=Signature('B', app=self.app), app=self.app) + self.assertDictEqual(x.__json__(), dict(x)) + + @depends_on_current_app + def test_reduce(self): + x = Signature('TASK', (2, 4), app=self.app) + fun, args = x.__reduce__() + self.assertEqual(fun(*args), x) + def test_replace(self): x = Signature('TASK', ('A'), {}) - self.assertTupleEqual(x.replace(args=('B', )).args, ('B', )) + self.assertTupleEqual(x.replace(args=('B',)).args, ('B',)) self.assertDictEqual( x.replace(kwargs={'FOO': 'BAR'}).kwargs, {'FOO': 'BAR'}, @@ -122,7 +162,7 @@ def test_INVERT(self): def test_merge_immutable(self): x = self.add.si(2, 2, foo=1) - args, kwargs, options = x._merge((4, ), {'bar': 2}, {'task_id': 3}) + args, kwargs, options = x._merge((4,), {'bar': 2}, {'task_id': 3}) self.assertTupleEqual(args, (2, 2)) self.assertDictEqual(kwargs, {'foo': 1}) self.assertDictEqual(options, {'task_id': 3}) @@ -163,6 +203,7 @@ def test_apply(self): s.apply_async(foo=1) s.type.apply_async.assert_called_with( (), {'task': self.add.s(), 'it': args}, foo=1, + route_name=self.add.name, ) self.assertEqual(type.from_dict(dict(s)), s) @@ -184,12 +225,12 @@ def test_chunks(self): gr = x.group.return_value = Mock() x.apply_async() - gr.apply_async.assert_called_with((), {}) - + gr.apply_async.assert_called_with((), {}, route_name=self.add.name) + gr.apply_async.reset_mock() x() - gr.assert_called_with() + gr.apply_async.assert_called_with((), {}, route_name=self.add.name) - self.app.conf.CELERY_ALWAYS_EAGER = True + self.app.conf.task_always_eager = True chunks.apply_chunks(app=self.app, **x['kwargs']) @@ -201,13 +242,164 @@ def test_repr(self): repr(x), '%s(2, 2) | %s(2)' % (self.add.name, self.add.name), ) + def test_apply_async(self): + c = self.add.s(2, 2) | self.add.s(4) | self.add.s(8) + result = c.apply_async() + self.assertTrue(result.parent) + self.assertTrue(result.parent.parent) + self.assertIsNone(result.parent.parent.parent) + + def test_group_to_chord__freeze_parent_id(self): + def using_freeze(c): + c.freeze(parent_id='foo', root_id='root') + return c._frozen[0] + self.assert_group_to_chord_parent_ids(using_freeze) + + def assert_group_to_chord_parent_ids(self, freezefun): + c = ( + self.add.s(5, 5) | + group([self.add.s(i, i) for i in range(5)], app=self.app) | + self.add.si(10, 10) | + self.add.si(20, 20) | + self.add.si(30, 30) + ) + tasks = freezefun(c) + self.assertEqual(tasks[-1].parent_id, 'foo') + self.assertEqual(tasks[-1].root_id, 'root') + self.assertEqual(tasks[-2].parent_id, tasks[-1].id) + self.assertEqual(tasks[-2].root_id, 'root') + self.assertEqual(tasks[-2].body.parent_id, tasks[-2].tasks.id) + self.assertEqual(tasks[-2].body.parent_id, tasks[-2].id) + self.assertEqual(tasks[-2].body.root_id, 'root') + self.assertEqual(tasks[-2].tasks.tasks[0].parent_id, tasks[-1].id) + self.assertEqual(tasks[-2].tasks.tasks[0].root_id, 'root') + self.assertEqual(tasks[-2].tasks.tasks[1].parent_id, tasks[-1].id) + self.assertEqual(tasks[-2].tasks.tasks[1].root_id, 'root') + self.assertEqual(tasks[-2].tasks.tasks[2].parent_id, tasks[-1].id) + self.assertEqual(tasks[-2].tasks.tasks[2].root_id, 'root') + self.assertEqual(tasks[-2].tasks.tasks[3].parent_id, tasks[-1].id) + self.assertEqual(tasks[-2].tasks.tasks[3].root_id, 'root') + self.assertEqual(tasks[-2].tasks.tasks[4].parent_id, tasks[-1].id) + self.assertEqual(tasks[-2].tasks.tasks[4].root_id, 'root') + self.assertEqual(tasks[-3].parent_id, tasks[-2].body.id) + self.assertEqual(tasks[-3].root_id, 'root') + self.assertEqual(tasks[-4].parent_id, tasks[-3].id) + self.assertEqual(tasks[-4].root_id, 'root') + + def test_splices_chains(self): + c = chain( + self.add.s(5, 5), + chain(self.add.s(6), self.add.s(7), self.add.s(8), app=self.app), + app=self.app, + ) + c.freeze() + tasks, _ = c._frozen + self.assertEqual(len(tasks), 4) + + def test_from_dict_no_tasks(self): + self.assertTrue(chain.from_dict( + dict(chain(app=self.app)), app=self.app)) + + @depends_on_current_app + def test_app_falls_back_to_default(self): + from celery._state import current_app + self.assertIs(chain().app, current_app) + + def test_handles_dicts(self): + c = chain( + self.add.s(5, 5), dict(self.add.s(8)), app=self.app, + ) + c.freeze() + tasks, _ = c._frozen + for task in tasks: + self.assertIsInstance(task, Signature) + self.assertIs(task.app, self.app) + + def test_group_to_chord(self): + c = ( + self.add.s(5) | + group([self.add.s(i, i) for i in range(5)], app=self.app) | + self.add.s(10) | + self.add.s(20) | + self.add.s(30) + ) + c._use_link = True + tasks, results = c.prepare_steps((), c.tasks) + + self.assertEqual(tasks[-1].args[0], 5) + self.assertIsInstance(tasks[-2], chord) + self.assertEqual(len(tasks[-2].tasks), 5) + self.assertEqual(tasks[-2].parent_id, tasks[-1].id) + self.assertEqual(tasks[-2].root_id, tasks[-1].id) + self.assertEqual(tasks[-2].body.args[0], 10) + self.assertEqual(tasks[-2].body.parent_id, tasks[-2].id) + + self.assertEqual(tasks[-3].args[0], 20) + self.assertEqual(tasks[-3].root_id, tasks[-1].id) + self.assertEqual(tasks[-3].parent_id, tasks[-2].body.id) + + self.assertEqual(tasks[-4].args[0], 30) + self.assertEqual(tasks[-4].parent_id, tasks[-3].id) + self.assertEqual(tasks[-4].root_id, tasks[-1].id) + + self.assertTrue(tasks[-2].body.options['link']) + self.assertTrue(tasks[-2].body.options['link'][0].options['link']) + + c2 = self.add.s(2, 2) | group(self.add.s(i, i) for i in range(10)) + c2._use_link = True + tasks2, _ = c2.prepare_steps((), c2.tasks) + self.assertIsInstance(tasks2[0], group) + + def test_group_to_chord__protocol_2(self): + c = ( + group([self.add.s(i, i) for i in range(5)], app=self.app) | + self.add.s(10) | + self.add.s(20) | + self.add.s(30) + ) + c._use_link = False + tasks, _ = c.prepare_steps((), c.tasks) + self.assertIsInstance(tasks[-1], chord) + + c2 = self.add.s(2, 2) | group(self.add.s(i, i) for i in range(10)) + c2._use_link = False + tasks2, _ = c2.prepare_steps((), c2.tasks) + self.assertIsInstance(tasks2[0], group) + + def test_apply_options(self): + + class static(Signature): + + def clone(self, *args, **kwargs): + return self + + def s(*args, **kwargs): + return static(self.add, args, kwargs, type=self.add, app=self.app) + + c = s(2, 2) | s(4) | s(8) + r1 = c.apply_async(task_id='some_id') + self.assertEqual(r1.id, 'some_id') + + c.apply_async(group_id='some_group_id') + self.assertEqual(c.tasks[-1].options['group_id'], 'some_group_id') + + c.apply_async(chord='some_chord_id') + self.assertEqual(c.tasks[-1].options['chord'], 'some_chord_id') + + c.apply_async(link=[s(32)]) + self.assertListEqual(c.tasks[-1].options['link'], [s(32)]) + + c.apply_async(link_error=[s('error')]) + for task in c.tasks: + self.assertListEqual(task.options['link_error'], [s('error')]) + def test_reverse(self): x = self.add.s(2, 2) | self.add.s(2) self.assertIsInstance(signature(x), chain) self.assertIsInstance(signature(dict(x)), chain) def test_always_eager(self): - self.app.conf.CELERY_ALWAYS_EAGER = True + self.app.conf.task_always_eager = True self.assertEqual(~(self.add.s(4, 4) | self.add.s(8)), 16) def test_apply(self): @@ -224,6 +416,39 @@ def test_empty_chain_returns_none(self): self.assertIsNone(chain(app=self.app)()) self.assertIsNone(chain(app=self.app).apply_async()) + def test_root_id_parent_id(self): + self.app.conf.task_protocol = 2 + c = chain(self.add.si(i, i) for i in range(4)) + c.freeze() + tasks, _ = c._frozen + for i, task in enumerate(tasks): + self.assertEqual(task.root_id, tasks[-1].id) + try: + self.assertEqual(task.parent_id, tasks[i + 1].id) + except IndexError: + assert i == len(tasks) - 1 + else: + valid_parents = i + self.assertEqual(valid_parents, len(tasks) - 2) + + self.assert_sent_with_ids(tasks[-1], tasks[-1].id, 'foo', + parent_id='foo') + self.assertTrue(tasks[-2].options['parent_id']) + self.assert_sent_with_ids(tasks[-2], tasks[-1].id, tasks[-1].id) + self.assert_sent_with_ids(tasks[-3], tasks[-1].id, tasks[-2].id) + self.assert_sent_with_ids(tasks[-4], tasks[-1].id, tasks[-3].id) + + def assert_sent_with_ids(self, task, rid, pid, **options): + self.app.amqp.send_task_message = Mock(name='send_task_message') + self.app.backend = Mock() + self.app.producer_or_acquire = ContextMock() + + task.apply_async(**options) + self.assertTrue(self.app.amqp.send_task_message.called) + message = self.app.amqp.send_task_message.call_args[0][2] + self.assertEqual(message.headers['parent_id'], pid) + self.assertEqual(message.headers['root_id'], rid) + def test_call_no_tasks(self): x = chain() self.assertFalse(x()) @@ -238,7 +463,7 @@ def test_from_dict_no_args__with_args(self): x = dict(self.add.s(2, 2) | self.add.s(4)) x['args'] = None self.assertIsInstance(chain.from_dict(x), chain) - x['args'] = (2, ) + x['args'] = (2,) self.assertIsInstance(chain.from_dict(x), chain) def test_accepts_generator_argument(self): @@ -251,18 +476,84 @@ class test_group(CanvasCase): def test_repr(self): x = group([self.add.s(2, 2), self.add.s(4, 4)]) - self.assertEqual(repr(x), repr(x.tasks)) + self.assertTrue(repr(x)) def test_reverse(self): x = group([self.add.s(2, 2), self.add.s(4, 4)]) self.assertIsInstance(signature(x), group) self.assertIsInstance(signature(dict(x)), group) + def test_group_with_group_argument(self): + g1 = group(self.add.s(2, 2), self.add.s(4, 4), app=self.app) + g2 = group(g1, app=self.app) + self.assertIs(g2.tasks, g1.tasks) + def test_maybe_group_sig(self): self.assertListEqual( - _maybe_group(self.add.s(2, 2)), [self.add.s(2, 2)], + _maybe_group(self.add.s(2, 2), self.app), [self.add.s(2, 2)], ) + def test_apply(self): + x = group([self.add.s(4, 4), self.add.s(8, 8)]) + res = x.apply() + self.assertEqual(res.get(), [8, 16]) + + def test_apply_async(self): + x = group([self.add.s(4, 4), self.add.s(8, 8)]) + x.apply_async() + + def test_prepare_with_dict(self): + x = group([self.add.s(4, 4), dict(self.add.s(8, 8))], app=self.app) + x.apply_async() + + def test_group_in_group(self): + g1 = group(self.add.s(2, 2), self.add.s(4, 4), app=self.app) + g2 = group(self.add.s(8, 8), g1, self.add.s(16, 16), app=self.app) + g2.apply_async() + + def test_set_immutable(self): + g1 = group(Mock(name='t1'), Mock(name='t2'), app=self.app) + g1.set_immutable(True) + for task in g1.tasks: + task.set_immutable.assert_called_with(True) + + def test_link(self): + g1 = group(Mock(name='t1'), Mock(name='t2'), app=self.app) + sig = Mock(name='sig') + g1.link(sig) + g1.tasks[0].link.assert_called_with(sig.clone().set(immutable=True)) + + def test_link_error(self): + g1 = group(Mock(name='t1'), Mock(name='t2'), app=self.app) + sig = Mock(name='sig') + g1.link_error(sig) + g1.tasks[0].link_error.assert_called_with( + sig.clone().set(immutable=True), + ) + + def test_apply_empty(self): + x = group(app=self.app) + x.apply() + res = x.apply_async() + self.assertFalse(res) + self.assertFalse(res.results) + + def test_apply_async_with_parent(self): + _task_stack.push(self.add) + try: + self.add.push_request(called_directly=False) + try: + assert not self.add.request.children + x = group([self.add.s(4, 4), self.add.s(8, 8)]) + res = x() + self.assertTrue(self.add.request.children) + self.assertIn(res, self.add.request.children) + self.assertEqual(len(self.add.request.children), 1) + finally: + self.add.pop_request() + finally: + _task_stack.pop() + def test_from_dict(self): x = group([self.add.s(2, 2), self.add.s(4, 4)]) x['args'] = (2, 2) @@ -273,6 +564,9 @@ def test_from_dict(self): def test_call_empty_group(self): x = group(app=self.app) self.assertFalse(len(x())) + x.delay() + x.apply_async() + x() def test_skew(self): g = group([self.add.s(i, i) for i in range(10)]) @@ -300,6 +594,41 @@ def test_clone_clones_body(self): z = y.clone() self.assertIsNone(z.kwargs.get('body')) + def test_argument_is_group(self): + x = chord(group(self.add.s(2, 2), self.add.s(4, 4), app=self.app)) + self.assertTrue(x.tasks) + + def test_set_parent_id(self): + x = chord(group(self.add.s(2, 2))) + x.tasks = [self.add.s(2, 2)] + x.set_parent_id('pid') + + def test_app_when_app(self): + app = Mock(name='app') + x = chord([self.add.s(4, 4)], app=app) + self.assertIs(x.app, app) + + def test_app_when_app_in_task(self): + t1 = Mock(name='t1') + t2 = Mock(name='t2') + x = chord([t1, self.add.s(4, 4)]) + self.assertIs(x.app, x.tasks[0].app) + t1.app = None + x = chord([t1], body=t2) + self.assertIs(x.app, t2._app) + + @depends_on_current_app + def test_app_fallback_to_current(self): + from celery._state import current_app + t1 = Mock(name='t1') + t1.app = t1._app = None + x = chord([t1], body=t1) + self.assertIs(x.app, current_app) + + def test_set_immutable(self): + x = chord([Mock(name='t1'), Mock(name='t2')], app=self.app) + x.set_immutable(True) + def test_links_to_body(self): x = chord([self.add.s(2, 2), self.add.s(4, 4)], body=self.mul.s(4)) x.link(self.div.s(2)) @@ -319,6 +648,12 @@ def test_repr(self): x.kwargs['body'] = None self.assertIn('without body', repr(x)) + def test_freeze_tasks_is_not_group(self): + x = chord([self.add.s(2, 2)], body=self.add.s(), app=self.app) + x.freeze() + x.tasks = [self.add.s(2, 2)] + x.freeze() + class test_maybe_signature(CanvasCase): diff --git a/celery/tests/tasks/test_chord.py b/celery/tests/tasks/test_chord.py index 47e771841..d5e243101 100644 --- a/celery/tests/tasks/test_chord.py +++ b/celery/tests/tasks/test_chord.py @@ -2,7 +2,7 @@ from contextlib import contextmanager -from celery import group +from celery import group, uuid from celery import canvas from celery import result from celery.exceptions import ChordError, Retry @@ -10,7 +10,9 @@ from celery.result import AsyncResult, GroupResult, EagerResult from celery.tests.case import AppCase, Mock -passthru = lambda x: x + +def passthru(x): + return x class ChordCase(AppCase): @@ -72,11 +74,20 @@ class AlwaysReady(TSR): with self._chord_context(AlwaysReady) as (cb, retry, _): cb.type.apply_async.assert_called_with( - ([2, 4, 8, 6], ), {}, task_id=cb.id, + ([2, 4, 8, 6],), {}, task_id=cb.id, ) # did not retry self.assertFalse(retry.call_count) + def test_deps_ready_fails(self): + GroupResult = Mock(name='GroupResult') + GroupResult.return_value.ready.side_effect = KeyError('foo') + unlock_chord = self.app.tasks['celery.chord_unlock'] + + with self.assertRaises(KeyError): + unlock_chord('groupid', Mock(), result=[Mock()], + GroupResult=GroupResult, result_from_tuple=Mock()) + def test_callback_fails(self): class AlwaysReady(TSR): @@ -142,7 +153,7 @@ def callback(*args, **kwargs): fail_current = self.app.backend.fail_from_current_stack = Mock() try: with patch_unlock_retry(self.app) as (unlock, retry): - subtask, canvas.maybe_signature = ( + signature, canvas.maybe_signature = ( canvas.maybe_signature, passthru, ) if setup: @@ -160,7 +171,7 @@ def callback(*args, **kwargs): except Retry: pass finally: - canvas.maybe_signature = subtask + canvas.maybe_signature = signature yield callback_s, retry, fail_current finally: result.GroupResult = pts @@ -192,31 +203,82 @@ def addX(x, y): def sumX(n): return sum(n) - self.app.conf.CELERY_ALWAYS_EAGER = True + self.app.conf.task_always_eager = True x = chord(addX.s(i, i) for i in range(10)) body = sumX.s() result = x(body) self.assertEqual(result.get(), sum(i + i for i in range(10))) def test_apply(self): - self.app.conf.CELERY_ALWAYS_EAGER = False + self.app.conf.task_always_eager = False from celery import chord m = Mock() - m.app.conf.CELERY_ALWAYS_EAGER = False + m.app.conf.task_always_eager = False m.AsyncResult = AsyncResult - prev, chord._type = chord._type, m + prev, chord.run = chord.run, m try: x = chord(self.add.s(i, i) for i in range(10)) body = self.add.s(2) result = x(body) self.assertTrue(result.id) - # does not modify original subtask + # does not modify original signature with self.assertRaises(KeyError): body.options['task_id'] - self.assertTrue(chord._type.called) + self.assertTrue(chord.run.called) finally: - chord._type = prev + chord.run = prev + + +class test_add_to_chord(AppCase): + + def setup(self): + + @self.app.task(shared=False) + def add(x, y): + return x + y + self.add = add + + @self.app.task(shared=False, bind=True) + def adds(self, sig, lazy=False): + return self.add_to_chord(sig, lazy) + self.adds = adds + + def test_add_to_chord(self): + self.app.backend = Mock(name='backend') + + sig = self.add.s(2, 2) + sig.delay = Mock(name='sig.delay') + self.adds.request.group = uuid() + self.adds.request.id = uuid() + + with self.assertRaises(ValueError): + # task not part of chord + self.adds.run(sig) + self.adds.request.chord = self.add.s() + + res1 = self.adds.run(sig, True) + self.assertEqual(res1, sig) + self.assertTrue(sig.options['task_id']) + self.assertEqual(sig.options['group_id'], self.adds.request.group) + self.assertEqual(sig.options['chord'], self.adds.request.chord) + self.assertFalse(sig.delay.called) + self.app.backend.add_to_chord.assert_called_with( + self.adds.request.group, sig.freeze(), + ) + + self.app.backend.reset_mock() + sig2 = self.add.s(4, 4) + sig2.delay = Mock(name='sig2.delay') + res2 = self.adds.run(sig2) + self.assertEqual(res2, sig2.delay.return_value) + self.assertTrue(sig2.options['task_id']) + self.assertEqual(sig2.options['group_id'], self.adds.request.group) + self.assertEqual(sig2.options['chord'], self.adds.request.chord) + sig2.delay.assert_called_with() + self.app.backend.add_to_chord.assert_called_with( + self.adds.request.group, sig2.freeze(), + ) class test_Chord_task(ChordCase): @@ -227,7 +289,7 @@ def test_run(self): self.app.backend.cleanup.__name__ = 'cleanup' Chord = self.app.tasks['celery.chord'] - body = dict() - Chord(group(self.add.subtask((i, i)) for i in range(5)), body) - Chord([self.add.subtask((j, j)) for j in range(5)], body) + body = self.add.signature() + Chord(group(self.add.signature((i, i)) for i in range(5)), body) + Chord([self.add.signature((j, j)) for j in range(5)], body) self.assertEqual(self.app.backend.apply_chord.call_count, 2) diff --git a/celery/tests/tasks/test_result.py b/celery/tests/tasks/test_result.py index dbaf3f4d4..64829a443 100644 --- a/celery/tests/tasks/test_result.py +++ b/celery/tests/tasks/test_result.py @@ -3,18 +3,24 @@ from contextlib import contextmanager from celery import states -from celery.exceptions import IncompleteStream, TimeoutError +from celery.backends.base import SyncBackendMixin +from celery.exceptions import ( + ImproperlyConfigured, IncompleteStream, TimeoutError, +) from celery.five import range from celery.result import ( AsyncResult, EagerResult, - TaskSetResult, + ResultSet, result_from_tuple, + assert_will_not_block, ) from celery.utils import uuid from celery.utils.serialization import pickle -from celery.tests.case import AppCase, Mock, depends_on_current_app, patch +from celery.tests.case import ( + AppCase, Mock, call, depends_on_current_app, patch, +) def mock_task(name, state, result): @@ -44,6 +50,8 @@ def make_mock_group(app, size=10): class test_AsyncResult(AppCase): def setup(self): + self.app.conf.result_cache_max = 100 + self.app.conf.result_serializer = 'pickle' self.task1 = mock_task('task1', states.SUCCESS, 'the') self.task2 = mock_task('task2', states.SUCCESS, 'quick') self.task3 = mock_task('task3', states.FAILURE, KeyError('brown')) @@ -57,12 +65,30 @@ def mytask(): pass self.mytask = mytask + @patch('celery.result.task_join_will_block') + def test_assert_will_not_block(self, task_join_will_block): + task_join_will_block.return_value = True + with self.assertRaises(RuntimeError): + assert_will_not_block() + task_join_will_block.return_value = False + assert_will_not_block() + + def test_without_id(self): + with self.assertRaises(ValueError): + AsyncResult(None, app=self.app) + def test_compat_properties(self): x = self.app.AsyncResult('1') self.assertEqual(x.task_id, x.id) x.task_id = '2' self.assertEqual(x.id, '2') + @depends_on_current_app + def test_reduce_direct(self): + x = AsyncResult('1', app=self.app) + fun, args = x.__reduce__() + self.assertEqual(fun(*args), x) + def test_children(self): x = self.app.AsyncResult('1') children = [EagerResult(str(i), i, states.SUCCESS) for i in range(3)] @@ -73,16 +99,17 @@ def test_children(self): def test_propagates_for_parent(self): x = self.app.AsyncResult(uuid()) - x.backend = Mock() + x.backend = Mock(name='backend') x.backend.get_task_meta.return_value = {} + x.backend.wait_for_pending.return_value = 84 x.parent = EagerResult(uuid(), KeyError('foo'), states.FAILURE) with self.assertRaises(KeyError): x.get(propagate=True) - self.assertFalse(x.backend.wait_for.called) + self.assertFalse(x.backend.wait_for_pending.called) x.parent = EagerResult(uuid(), 42, states.SUCCESS) - x.get(propagate=True) - self.assertTrue(x.backend.wait_for.called) + self.assertEqual(x.get(propagate=True), 84) + self.assertTrue(x.backend.wait_for_pending.called) def test_get_children(self): tid = uuid() @@ -147,14 +174,22 @@ def test_eq_not_implemented(self): @depends_on_current_app def test_reduce(self): - a1 = self.app.AsyncResult('uuid', task_name=self.mytask.name) + a1 = self.app.AsyncResult('uuid') restored = pickle.loads(pickle.dumps(a1)) self.assertEqual(restored.id, 'uuid') - self.assertEqual(restored.task_name, self.mytask.name) a2 = self.app.AsyncResult('uuid') self.assertEqual(pickle.loads(pickle.dumps(a2)).id, 'uuid') + def test_maybe_set_cache_empty(self): + self.app.AsyncResult('uuid')._maybe_set_cache(None) + + def test_set_cache__children(self): + r1 = self.app.AsyncResult('id1') + r2 = self.app.AsyncResult('id2') + r1._set_cache({'children': [r2.as_tuple()]}) + self.assertIn(r2, r1.children) + def test_successful(self): ok_res = self.app.AsyncResult(self.task1['id']) nok_res = self.app.AsyncResult(self.task3['id']) @@ -212,13 +247,22 @@ def test_get_traceback(self): pending_res = self.app.AsyncResult(uuid()) self.assertFalse(pending_res.traceback) + def test_get__backend_gives_None(self): + res = self.app.AsyncResult(self.task1['id']) + res.backend.wait_for = Mock(name='wait_for') + res.backend.wait_for.return_value = None + self.assertIsNone(res.get()) + def test_get(self): ok_res = self.app.AsyncResult(self.task1['id']) ok2_res = self.app.AsyncResult(self.task2['id']) nok_res = self.app.AsyncResult(self.task3['id']) nok2_res = self.app.AsyncResult(self.task4['id']) - self.assertEqual(ok_res.get(), 'the') + callback = Mock(name='callback') + + self.assertEqual(ok_res.get(callback=callback), 'the') + callback.assert_called_with(ok_res.id, 'the') self.assertEqual(ok2_res.get(), 'quick') with self.assertRaises(KeyError): nok_res.get() @@ -226,6 +270,21 @@ def test_get(self): self.assertIsInstance(nok2_res.result, KeyError) self.assertEqual(ok_res.info, 'the') + def test_eq_ne(self): + r1 = self.app.AsyncResult(self.task1['id']) + r2 = self.app.AsyncResult(self.task1['id']) + r3 = self.app.AsyncResult(self.task2['id']) + self.assertEqual(r1, r2) + self.assertNotEqual(r1, r3) + self.assertEqual(r1, r2.id) + self.assertNotEqual(r1, r3.id) + + @depends_on_current_app + def test_reduce_restore(self): + r1 = self.app.AsyncResult(self.task1['id']) + fun, args = r1.__reduce__() + self.assertEqual(fun(*args), r1) + def test_get_timeout(self): res = self.app.AsyncResult(self.task4['id']) # has RETRY state with self.assertRaises(TimeoutError): @@ -261,8 +320,11 @@ def test_resultset_repr(self): [self.app.AsyncResult(t) for t in ['1', '2', '3']]))) def test_eq_other(self): - self.assertFalse(self.app.ResultSet([1, 3, 3]) == 1) - self.assertTrue(self.app.ResultSet([1]) == self.app.ResultSet([1])) + self.assertFalse(self.app.ResultSet( + [self.app.AsyncResult(t) for t in [1, 3, 3]]) == 1) + rs1 = self.app.ResultSet([self.app.AsyncResult(1)]) + rs2 = self.app.ResultSet([self.app.AsyncResult(1)]) + self.assertTrue(rs1 == rs2) def test_get(self): x = self.app.ResultSet([self.app.AsyncResult(t) for t in [1, 2, 3]]) @@ -276,11 +338,41 @@ def test_get(self): x.get() self.assertTrue(x.join_native.called) + def test_eq_ne(self): + g1 = self.app.ResultSet([ + self.app.AsyncResult('id1'), + self.app.AsyncResult('id2'), + ]) + g2 = self.app.ResultSet([ + self.app.AsyncResult('id1'), + self.app.AsyncResult('id2'), + ]) + g3 = self.app.ResultSet([ + self.app.AsyncResult('id3'), + self.app.AsyncResult('id1'), + ]) + self.assertEqual(g1, g2) + self.assertNotEqual(g1, g3) + self.assertNotEqual(g1, object()) + + def test_takes_app_from_first_task(self): + x = ResultSet([self.app.AsyncResult('id1')]) + self.assertIs(x.app, x.results[0].app) + x.app = self.app + self.assertIs(x.app, self.app) + + def test_get_empty(self): + x = self.app.ResultSet([]) + self.assertIsNone(x.supports_native_join) + x.join = Mock(name='join') + x.get() + self.assertTrue(x.join.called) + def test_add(self): - x = self.app.ResultSet([1]) - x.add(2) + x = self.app.ResultSet([self.app.AsyncResult(1)]) + x.add(self.app.AsyncResult(2)) self.assertEqual(len(x), 2) - x.add(2) + x.add(self.app.AsyncResult(2)) self.assertEqual(len(x), 2) @contextmanager @@ -387,7 +479,7 @@ def get(self, **kwargs): return self.result -class SimpleBackend(object): +class SimpleBackend(SyncBackendMixin): ids = [] def __init__(self, ids=[]): @@ -398,31 +490,6 @@ def get_many(self, *args, **kwargs): for i, id in enumerate(self.ids)) -class test_TaskSetResult(AppCase): - - def setup(self): - self.size = 10 - self.ts = TaskSetResult(uuid(), make_mock_group(self.app, self.size)) - - def test_total(self): - self.assertEqual(self.ts.total, self.size) - - def test_compat_properties(self): - self.assertEqual(self.ts.taskset_id, self.ts.id) - self.ts.taskset_id = 'foo' - self.assertEqual(self.ts.taskset_id, 'foo') - - def test_compat_subtasks_kwarg(self): - x = TaskSetResult(uuid(), subtasks=[1, 2, 3]) - self.assertEqual(x.results, [1, 2, 3]) - - def test_itersubtasks(self): - it = self.ts.itersubtasks() - - for i, t in enumerate(it): - self.assertEqual(t.get(), i) - - class test_GroupResult(AppCase): def setup(self): @@ -438,6 +505,24 @@ def test_is_pickleable(self): ts2 = self.app.GroupResult(uuid(), [self.app.AsyncResult(uuid())]) self.assertEqual(pickle.loads(pickle.dumps(ts2)), ts2) + @depends_on_current_app + def test_reduce(self): + ts = self.app.GroupResult(uuid(), [self.app.AsyncResult(uuid())]) + fun, args = ts.__reduce__() + ts2 = fun(*args) + self.assertEqual(ts2.id, ts.id) + self.assertEqual(ts, ts2) + + def test_eq_ne(self): + ts = self.app.GroupResult(uuid(), [self.app.AsyncResult(uuid())]) + ts2 = self.app.GroupResult(ts.id, ts.results) + ts3 = self.app.GroupResult(uuid(), [self.app.AsyncResult(uuid())]) + ts4 = self.app.GroupResult(ts.id, [self.app.AsyncResult(uuid())]) + self.assertEqual(ts, ts2) + self.assertNotEqual(ts, ts3) + self.assertNotEqual(ts, ts4) + self.assertNotEqual(ts, object()) + def test_len(self): self.assertEqual(len(self.ts), self.size) @@ -445,7 +530,7 @@ def test_eq_other(self): self.assertFalse(self.ts == 1) @depends_on_current_app - def test_reduce(self): + def test_pickleable(self): self.assertTrue(pickle.loads(pickle.dumps(self.ts))) def test_iterate_raises(self): @@ -477,8 +562,8 @@ def test_save_restore(self): ts.save() with self.assertRaises(AttributeError): ts.save(backend=object()) - self.assertEqual(self.app.GroupResult.restore(ts.id).subtasks, - ts.subtasks) + self.assertEqual(self.app.GroupResult.restore(ts.id).results, + ts.results) ts.delete() self.assertIsNone(self.app.GroupResult.restore(ts.id)) with self.assertRaises(AttributeError): @@ -486,12 +571,18 @@ def test_save_restore(self): def test_join_native(self): backend = SimpleBackend() - subtasks = [self.app.AsyncResult(uuid(), backend=backend) - for i in range(10)] - ts = self.app.GroupResult(uuid(), subtasks) - backend.ids = [subtask.id for subtask in subtasks] + results = [self.app.AsyncResult(uuid(), backend=backend) + for i in range(10)] + ts = self.app.GroupResult(uuid(), results) + ts.app.backend = backend + backend.ids = [result.id for result in results] res = ts.join_native() self.assertEqual(res, list(range(10))) + callback = Mock(name='callback') + self.assertFalse(ts.join_native(callback=callback)) + callback.assert_has_calls([ + call(r.id, i) for i, r in enumerate(ts.results) + ]) def test_join_native_raises(self): ts = self.app.GroupResult(uuid(), [self.app.AsyncResult(uuid())]) @@ -523,10 +614,11 @@ def test_children_is_results(self): def test_iter_native(self): backend = SimpleBackend() - subtasks = [self.app.AsyncResult(uuid(), backend=backend) - for i in range(10)] - ts = self.app.GroupResult(uuid(), subtasks) - backend.ids = [subtask.id for subtask in subtasks] + results = [self.app.AsyncResult(uuid(), backend=backend) + for i in range(10)] + ts = self.app.GroupResult(uuid(), results) + ts.app.backend = backend + backend.ids = [result.id for result in results] self.assertEqual(len(list(ts.iter_native())), 10) def test_iterate_yields(self): @@ -559,6 +651,9 @@ def test_join_timeout(self): ar4.get = Mock() ts2 = self.app.GroupResult(uuid(), [ar4]) self.assertTrue(ts2.join(timeout=0.1)) + callback = Mock(name='callback') + self.assertFalse(ts2.join(timeout=0.1, callback=callback)) + callback.assert_called_with(ar4.id, ar4.get()) def test_iter_native_when_empty_group(self): ts = self.app.GroupResult(uuid(), []) @@ -583,6 +678,17 @@ def test_successful(self): def test_failed(self): self.assertFalse(self.ts.failed()) + def test_maybe_throw(self): + self.ts.results = [Mock(name='r1')] + self.ts.maybe_throw() + self.ts.results[0].maybe_throw.assert_called_with( + callback=None, propagate=True, + ) + + def test_join__on_message(self): + with self.assertRaises(ImproperlyConfigured): + self.ts.join(on_message=Mock()) + def test_waiting(self): self.assertFalse(self.ts.waiting()) @@ -605,12 +711,13 @@ def test_result(self): class test_failed_AsyncResult(test_GroupResult): def setup(self): + self.app.conf.result_serializer = 'pickle' self.size = 11 - subtasks = make_mock_group(self.app, 10) + results = make_mock_group(self.app, 10) failed = mock_task('ts11', states.FAILURE, KeyError('Baz')) save_result(self.app, failed) failed_res = self.app.AsyncResult(failed['id']) - self.ts = self.app.GroupResult(uuid(), subtasks + [failed_res]) + self.ts = self.app.GroupResult(uuid(), results + [failed_res]) def test_completed_count(self): self.assertEqual(self.ts.completed_count(), len(self.ts) - 1) diff --git a/celery/tests/tasks/test_tasks.py b/celery/tests/tasks/test_tasks.py index 93a782ecc..1a02d9d18 100644 --- a/celery/tests/tasks/test_tasks.py +++ b/celery/tests/tasks/test_tasks.py @@ -6,13 +6,17 @@ from celery import Task -from celery.exceptions import Retry +from celery import group +from celery.app.task import _reprtask +from celery.exceptions import Ignore, Retry from celery.five import items, range, string_t from celery.result import EagerResult from celery.utils import uuid from celery.utils.timeutils import parse_iso8601 -from celery.tests.case import AppCase, depends_on_current_app, patch +from celery.tests.case import ( + AppCase, ContextMock, Mock, depends_on_current_app, patch, +) def return_True(*args, **kwargs): @@ -100,6 +104,20 @@ def retry_task_customexc(self, arg1, arg2, kwarg=1, **kwargs): raise self.retry(countdown=0, exc=exc) self.retry_task_customexc = retry_task_customexc + @self.app.task(bind=True, autoretry_for=(ZeroDivisionError,), + shared=False) + def autoretry_task_no_kwargs(self, a, b): + self.iterations += 1 + return a/b + self.autoretry_task_no_kwargs = autoretry_task_no_kwargs + + @self.app.task(bind=True, autoretry_for=(ZeroDivisionError,), + retry_kwargs={'max_retries': 5}, shared=False) + def autoretry_task(self, a, b): + self.iterations += 1 + return a/b + self.autoretry_task = autoretry_task + class MyCustomException(Exception): """Random custom exception.""" @@ -124,10 +142,31 @@ def test_retry_no_args(self): self.retry_task_noargs.apply(propagate=True).get() self.assertEqual(self.retry_task_noargs.iterations, 4) + def test_signature_from_request__passes_headers(self): + self.retry_task.push_request() + self.retry_task.request.headers = {'custom': 10.1} + sig = self.retry_task.signature_from_request() + self.assertEqual(sig.options['headers']['custom'], 10.1) + + def test_signature_from_request__delivery_info(self): + self.retry_task.push_request() + self.retry_task.request.delivery_info = { + 'exchange': 'testex', + 'routing_key': 'testrk', + } + sig = self.retry_task.signature_from_request() + self.assertEqual(sig.options['exchange'], 'testex') + self.assertEqual(sig.options['routing_key'], 'testrk') + def test_retry_kwargs_can_be_empty(self): self.retry_task_mockapply.push_request() try: with self.assertRaises(Retry): + import sys + try: + sys.exc_clear() + except AttributeError: + pass self.retry_task_mockapply.retry(args=[4, 4], kwargs=None) finally: self.retry_task_mockapply.pop_request() @@ -188,6 +227,18 @@ def test_max_retries_exceeded(self): result.get() self.assertEqual(self.retry_task.iterations, 2) + def test_autoretry_no_kwargs(self): + self.autoretry_task_no_kwargs.max_retries = 3 + self.autoretry_task_no_kwargs.iterations = 0 + self.autoretry_task_no_kwargs.apply((1, 0)) + self.assertEqual(self.autoretry_task_no_kwargs.iterations, 4) + + def test_autoretry(self): + self.autoretry_task.max_retries = 3 + self.autoretry_task.iterations = 0 + self.autoretry_task.apply((1, 0)) + self.assertEqual(self.autoretry_task.iterations, 6) + class test_canvas_utils(TasksCase): @@ -222,6 +273,20 @@ def xxx(): pass self.assertIs(pickle.loads(pickle.dumps(xxx)), xxx.app.tasks[xxx.name]) + @patch('celery.app.task.current_app') + @depends_on_current_app + def test_bind__no_app(self, current_app): + class XTask(Task): + _app = None + XTask._app = None + XTask.__bound__ = False + XTask.bind = Mock(name='bind') + self.assertIs(XTask.app, current_app) + XTask.bind.assert_called_with(current_app) + + def test_reprtask__no_fmt(self): + self.assertTrue(_reprtask(self.mytask)) + def test_AsyncResult(self): task_id = uuid() result = self.retry_task.AsyncResult(task_id) @@ -230,7 +295,7 @@ def test_AsyncResult(self): def assertNextTaskDataEqual(self, consumer, presult, task_name, test_eta=False, test_expires=False, **kwargs): - next_task = consumer.queues[0].get(accept=['pickle']) + next_task = consumer.queues[0].get(accept=['pickle', 'json']) task_data = next_task.decode() self.assertEqual(task_data['id'], presult.id) self.assertEqual(task_data['task'], task_name) @@ -256,12 +321,12 @@ class IncompleteTask(Task): IncompleteTask().run() def test_task_kwargs_must_be_dictionary(self): - with self.assertRaises(ValueError): + with self.assertRaises(TypeError): self.increment_counter.apply_async([], 'str') def test_task_args_must_be_list(self): with self.assertRaises(ValueError): - self.increment_counter.apply_async('str', {}) + self.increment_counter.apply_async('s', {}) def test_regular_task(self): self.assertIsInstance(self.mytask, Task) @@ -328,6 +393,47 @@ def test_regular_task(self): self.mytask.backend.mark_as_done(presult.id, result=None) self.assertTrue(presult.successful()) + def test_send_event(self): + mytask = self.mytask._get_current_object() + mytask.app.events = Mock(name='events') + mytask.app.events.attach_mock(ContextMock(), 'default_dispatcher') + mytask.request.id = 'fb' + mytask.send_event('task-foo', id=3122) + mytask.app.events.default_dispatcher().send.assert_called_with( + 'task-foo', uuid='fb', id=3122, + ) + + def test_replace(self): + sig1 = Mock(name='sig1') + with self.assertRaises(Ignore): + self.mytask.replace(sig1) + + def test_replace__group(self): + c = group([self.mytask.s()], app=self.app) + c.freeze = Mock(name='freeze') + c.delay = Mock(name='delay') + self.mytask.request.id = 'id' + self.mytask.request.group = 'group' + self.mytask.request.root_id = 'root_id', + with self.assertRaises(Ignore): + self.mytask.replace(c) + + def test_send_error_email_enabled(self): + mytask = self.increment_counter._get_current_object() + mytask.send_error_emails = True + mytask.disable_error_emails = False + mytask.ErrorMail = Mock(name='ErrorMail') + context = Mock(name='context') + exc = Mock(name='context') + mytask.send_error_email(context, exc, foo=1) + mytask.ErrorMail.assert_called_with(mytask, foo=1) + mytask.ErrorMail().send.assert_called_with(context, exc) + + def test_add_trail__no_trail(self): + mytask = self.increment_counter._get_current_object() + mytask.trail = False + mytask.add_trail('foo') + def test_repr_v2_compat(self): self.mytask.__v2_compat__ = True self.assertIn('v2 compatible', repr(self.mytask)) @@ -353,15 +459,6 @@ def test_context_get(self): finally: self.mytask.pop_request() - def test_task_class_repr(self): - self.assertIn('class Task of', repr(self.mytask.app.Task)) - self.mytask.app.Task._app = None - self.assertIn('unbound', repr(self.mytask.app.Task, )) - - def test_bind_no_magic_kwargs(self): - self.mytask.accept_magic_kwargs = None - self.mytask.bind(self.mytask.app) - def test_annotate(self): with patch('celery.app.task.resolve_all_annotations') as anno: anno.return_value = [{'FOO': 'BAR'}] @@ -381,11 +478,6 @@ def test_after_return(self): finally: self.mytask.pop_request() - def test_send_task_sent_event(self): - with self.app.connection() as conn: - self.app.conf.CELERY_SEND_TASK_SENT_EVENT = True - self.assertTrue(self.app.amqp.TaskProducer(conn).send_sent_event) - def test_update_state(self): @self.app.task(shared=False) @@ -429,8 +521,8 @@ def test_apply_throw(self): with self.assertRaises(KeyError): self.raising.apply(throw=True) - def test_apply_with_CELERY_EAGER_PROPAGATES_EXCEPTIONS(self): - self.app.conf.CELERY_EAGER_PROPAGATES_EXCEPTIONS = True + def test_apply_with_task_eager_propagates(self): + self.app.conf.task_eager_propagates = True with self.assertRaises(KeyError): self.raising.apply() diff --git a/celery/tests/tasks/test_trace.py b/celery/tests/tasks/test_trace.py index 12c6280ef..47563a73b 100644 --- a/celery/tests/tasks/test_trace.py +++ b/celery/tests/tasks/test_trace.py @@ -1,22 +1,35 @@ from __future__ import absolute_import -from celery import uuid +from kombu.exceptions import EncodeError + +from celery import group, uuid from celery import signals from celery import states -from celery.exceptions import Ignore, Retry +from celery.exceptions import Ignore, Retry, Reject from celery.app.trace import ( TraceInfo, - eager_trace_task, + build_tracer, + get_log_policy, + log_policy_reject, + log_policy_ignore, + log_policy_internal, + log_policy_expected, + log_policy_unexpected, trace_task, + _trace_task_ret, + _fast_trace_task, setup_worker_optimizations, reset_worker_optimizations, ) from celery.tests.case import AppCase, Mock, patch -def trace(app, task, args=(), kwargs={}, propagate=False, **opts): - return eager_trace_task(task, 'id-1', args, kwargs, - propagate=propagate, app=app, **opts) +def trace(app, task, args=(), kwargs={}, + propagate=False, eager=True, request=None, **opts): + t = build_tracer(task.name, task, + eager=eager, propagate=propagate, app=app, **opts) + ret = t('id-1', args, kwargs, request) + return ret.retval, ret.info class TraceCase(AppCase): @@ -57,6 +70,33 @@ def add_with_success(x, y): self.trace(add_with_success, (2, 2), {}) self.assertTrue(add_with_success.on_success.called) + def test_get_log_policy(self): + einfo = Mock(name='einfo') + einfo.internal = False + self.assertIs( + get_log_policy(self.add, einfo, Reject()), + log_policy_reject, + ) + self.assertIs( + get_log_policy(self.add, einfo, Ignore()), + log_policy_ignore, + ) + self.add.throws = (TypeError,) + self.assertIs( + get_log_policy(self.add, einfo, KeyError()), + log_policy_unexpected, + ) + self.assertIs( + get_log_policy(self.add, einfo, TypeError()), + log_policy_expected, + ) + einfo2 = Mock(name='einfo2') + einfo2.internal = True + self.assertIs( + get_log_policy(self.add, einfo2, KeyError()), + log_policy_internal, + ) + def test_trace_after_return(self): @self.app.task(shared=False, after_return=Mock()) @@ -100,8 +140,14 @@ def add(x, y): return x + y add.backend = Mock() - self.trace(add, (2, 2), {}, request={'chord': uuid()}) - add.backend.on_chord_part_return.assert_called_with(add, 'SUCCESS', 4) + request = {'chord': uuid()} + self.trace(add, (2, 2), {}, request=request) + self.assertTrue(add.backend.mark_as_done.called) + args, kwargs = add.backend.mark_as_done.call_args + self.assertEqual(args[0], 'id-1') + self.assertEqual(args[1], 4) + self.assertEqual(args[2].chord, request['chord']) + self.assertFalse(args[3]) def test_when_backend_cleanup_raises(self): @@ -125,25 +171,125 @@ def ignored(): retval, info = self.trace(ignored, (), {}) self.assertEqual(info.state, states.IGNORED) + def test_when_Reject(self): + + @self.app.task(shared=False) + def rejecting(): + raise Reject() + + retval, info = self.trace(rejecting, (), {}) + self.assertEqual(info.state, states.REJECTED) + + def test_backend_cleanup_raises(self): + self.add.backend.process_cleanup = Mock() + self.add.backend.process_cleanup.side_effect = RuntimeError() + self.trace(self.add, (2, 2), {}) + + @patch('celery.canvas.maybe_signature') + def test_callbacks__scalar(self, maybe_signature): + sig = Mock(name='sig') + request = {'callbacks': [sig], 'root_id': 'root'} + maybe_signature.return_value = sig + retval, _ = self.trace(self.add, (2, 2), {}, request=request) + sig.apply_async.assert_called_with( + (4,), parent_id='id-1', root_id='root', + ) + + @patch('celery.canvas.maybe_signature') + def test_chain_proto2(self, maybe_signature): + sig = Mock(name='sig') + sig2 = Mock(name='sig2') + request = {'chain': [sig2, sig], 'root_id': 'root'} + maybe_signature.return_value = sig + retval, _ = self.trace(self.add, (2, 2), {}, request=request) + sig.apply_async.assert_called_with( + (4, ), parent_id='id-1', root_id='root', + chain=[sig2], + ) + + @patch('celery.canvas.maybe_signature') + def test_callbacks__EncodeError(self, maybe_signature): + sig = Mock(name='sig') + request = {'callbacks': [sig], 'root_id': 'root'} + maybe_signature.return_value = sig + sig.apply_async.side_effect = EncodeError() + retval, einfo = self.trace(self.add, (2, 2), {}, request=request) + self.assertEqual(einfo.state, states.FAILURE) + + @patch('celery.canvas.maybe_signature') + @patch('celery.app.trace.group.apply_async') + def test_callbacks__sigs(self, group_, maybe_signature): + sig1 = Mock(name='sig') + sig2 = Mock(name='sig2') + sig3 = group([Mock(name='g1'), Mock(name='g2')], app=self.app) + sig3.apply_async = Mock(name='gapply') + request = {'callbacks': [sig1, sig3, sig2], 'root_id': 'root'} + + def passt(s, *args, **kwargs): + return s + maybe_signature.side_effect = passt + retval, _ = self.trace(self.add, (2, 2), {}, request=request) + group_.assert_called_with( + (4,), parent_id='id-1', root_id='root', + ) + sig3.apply_async.assert_called_with( + (4,), parent_id='id-1', root_id='root', + ) + + @patch('celery.canvas.maybe_signature') + @patch('celery.app.trace.group.apply_async') + def test_callbacks__only_groups(self, group_, maybe_signature): + sig1 = group([Mock(name='g1'), Mock(name='g2')], app=self.app) + sig2 = group([Mock(name='g3'), Mock(name='g4')], app=self.app) + sig1.apply_async = Mock(name='gapply') + sig2.apply_async = Mock(name='gapply') + request = {'callbacks': [sig1, sig2], 'root_id': 'root'} + + def passt(s, *args, **kwargs): + return s + maybe_signature.side_effect = passt + retval, _ = self.trace(self.add, (2, 2), {}, request=request) + sig1.apply_async.assert_called_with( + (4,), parent_id='id-1', root_id='root', + ) + sig2.apply_async.assert_called_with( + (4,), parent_id='id-1', root_id='root', + ) + def test_trace_SystemExit(self): with self.assertRaises(SystemExit): - self.trace(self.raises, (SystemExit(), ), {}) + self.trace(self.raises, (SystemExit(),), {}) def test_trace_Retry(self): exc = Retry('foo', 'bar') - _, info = self.trace(self.raises, (exc, ), {}) + _, info = self.trace(self.raises, (exc,), {}) self.assertEqual(info.state, states.RETRY) self.assertIs(info.retval, exc) def test_trace_exception(self): exc = KeyError('foo') - _, info = self.trace(self.raises, (exc, ), {}) + _, info = self.trace(self.raises, (exc,), {}) self.assertEqual(info.state, states.FAILURE) self.assertIs(info.retval, exc) + def test_trace_task_ret__no_content_type(self): + _trace_task_ret( + self.add.name, 'id1', {}, ((2, 2), {}, {}), None, None, + app=self.app, + ) + + def test_fast_trace_task__no_content_type(self): + self.app.tasks[self.add.name].__trace__ = build_tracer( + self.add.name, self.add, app=self.app, + ) + _fast_trace_task( + self.add.name, 'id1', {}, ((2, 2), {}, {}), None, None, + app=self.app, _loc=[self.app.tasks, {}, 'hostname'] + ) + def test_trace_exception_propagate(self): with self.assertRaises(KeyError): - self.trace(self.raises, (KeyError('foo'), ), {}, propagate=True) + self.trace(self.raises, (KeyError('foo'),), {}, propagate=True) @patch('celery.app.trace.build_tracer') @patch('celery.app.trace.report_internal_error') @@ -164,17 +310,26 @@ def xtask(): class test_TraceInfo(TraceCase): class TI(TraceInfo): - __slots__ = TraceInfo.__slots__ + ('__dict__', ) + __slots__ = TraceInfo.__slots__ + ('__dict__',) def test_handle_error_state(self): x = self.TI(states.FAILURE) x.handle_failure = Mock() - x.handle_error_state(self.add_cast) + x.handle_error_state(self.add_cast, self.add_cast.request) x.handle_failure.assert_called_with( - self.add_cast, + self.add_cast, self.add_cast.request, store_errors=self.add_cast.store_errors_even_if_ignored, + call_errbacks=True, ) + @patch('celery.app.trace.ExceptionInfo') + def test_handle_reject(self, ExceptionInfo): + x = self.TI(states.FAILURE) + x._log_error = Mock(name='log_error') + req = Mock(name='req') + x.handle_reject(self.add, req) + x._log_error.assert_called_with(self.add, req, ExceptionInfo()) + class test_stackprotection(AppCase): diff --git a/celery/tests/utils/test_datastructures.py b/celery/tests/utils/test_datastructures.py index f26fe86f7..f8ff56cda 100644 --- a/celery/tests/utils/test_datastructures.py +++ b/celery/tests/utils/test_datastructures.py @@ -3,6 +3,9 @@ import pickle import sys +from collections import Mapping +from itertools import count + from billiard.einfo import ExceptionInfo from time import time @@ -14,18 +17,15 @@ DependencyGraph, ) from celery.five import items +from celery.utils.objects import Bunch -from celery.tests.case import Case, Mock, WhateverIO, SkipTest, patch - - -class Object(object): - pass +from celery.tests.case import Case, Mock, WhateverIO, SkipTest class test_DictAttribute(Case): def test_get_set_keys_values_items(self): - x = DictAttribute(Object()) + x = DictAttribute(Bunch()) x['foo'] = 'The quick brown fox' self.assertEqual(x['foo'], 'The quick brown fox') self.assertEqual(x['foo'], x.obj.foo) @@ -43,19 +43,20 @@ def test_get_set_keys_values_items(self): self.assertIn('The quick yellow fox', list(x.values())) def test_setdefault(self): - x = DictAttribute(Object()) - self.assertEqual(x.setdefault('foo', 'NEW'), 'NEW') - self.assertEqual(x.setdefault('foo', 'XYZ'), 'NEW') + x = DictAttribute(Bunch()) + x.setdefault('foo', 'NEW') + self.assertEqual(x['foo'], 'NEW') + x.setdefault('foo', 'XYZ') + self.assertEqual(x['foo'], 'NEW') def test_contains(self): - x = DictAttribute(Object()) + x = DictAttribute(Bunch()) x['foo'] = 1 self.assertIn('foo', x) self.assertNotIn('bar', x) def test_items(self): - obj = Object() - obj.attr1 = 1 + obj = Bunch(attr1=1) x = DictAttribute(obj) x['attr2'] = 2 self.assertEqual(x['attr1'], 1) @@ -71,8 +72,10 @@ def setUp(self): 'both': 1}]) def test_setdefault(self): - self.assertEqual(self.view.setdefault('both', 36), 2) - self.assertEqual(self.view.setdefault('new', 36), 36) + self.view.setdefault('both', 36) + self.assertEqual(self.view['both'], 2) + self.view.setdefault('new', 36) + self.assertEqual(self.view['new'], 36) def test_get(self): self.assertEqual(self.view.get('both'), 2) @@ -116,8 +119,7 @@ def test_add_defaults_dict(self): self.assertEqual(self.view.foo, 10) def test_add_defaults_object(self): - defaults = Object() - defaults.foo = 10 + defaults = Bunch(foo=10) self.view.add_defaults(defaults) self.assertEqual(self.view.foo, 10) @@ -158,7 +160,7 @@ def test_exception_info(self): self.assertEqual(str(einfo), einfo.traceback) self.assertIsInstance(einfo.exception, LookupError) self.assertTupleEqual( - einfo.exception.args, ('The quick brown fox jumps...', ), + einfo.exception.args, ('The quick brown fox jumps...',), ) self.assertTrue(einfo.traceback) @@ -185,44 +187,50 @@ def test_add(self): self.assertIn(n, s) self.assertNotIn('foo', s) + s = LimitedSet(maxlen=10) + for i in range(150): + s.add(i) + self.assertLessEqual(len(s), 10) + + # make sure heap is not leaking: + self.assertLessEqual( + len(s._heap), + len(s) * (100. + s.max_heap_percent_overload) / 100, + ) + def test_purge(self): - s = LimitedSet(maxlen=None) + # purge now enforces rules + # cant purge(1) now. but .purge(now=...) still works + s = LimitedSet(maxlen=10) [s.add(i) for i in range(10)] s.maxlen = 2 - s.purge(1) - self.assertEqual(len(s), 9) - s.purge(None) + s.purge() self.assertEqual(len(s), 2) # expired - s = LimitedSet(maxlen=None, expires=1) + s = LimitedSet(maxlen=10, expires=1) [s.add(i) for i in range(10)] s.maxlen = 2 - s.purge(1, now=lambda: time() + 100) - self.assertEqual(len(s), 9) - s.purge(None, now=lambda: time() + 100) - self.assertEqual(len(s), 2) + s.purge(now=time() + 100) + self.assertEqual(len(s), 0) # not expired s = LimitedSet(maxlen=None, expires=1) [s.add(i) for i in range(10)] s.maxlen = 2 - s.purge(1, now=lambda: time() - 100) - self.assertEqual(len(s), 10) - s.purge(None, now=lambda: time() - 100) - self.assertEqual(len(s), 10) + s.purge(now=lambda: time() - 100) + self.assertEqual(len(s), 2) - s = LimitedSet(maxlen=None) - [s.add(i) for i in range(10)] - s.maxlen = 2 - with patch('celery.datastructures.heappop') as hp: - hp.side_effect = IndexError() - s.purge() - hp.assert_called_with(s._heap) - with patch('celery.datastructures.heappop') as hp: - s._data = dict((i * 2, i * 2) for i in range(10)) - s.purge() - self.assertEqual(hp.call_count, 10) + # expired -> minsize + s = LimitedSet(maxlen=10, minlen=10, expires=1) + [s.add(i) for i in range(20)] + s.minlen = 3 + s.purge(now=time() + 3) + self.assertEqual(s.minlen, len(s)) + self.assertLessEqual( + len(s._heap), + s.maxlen * (100. + s.max_heap_percent_overload) / 100, + ) def test_pickleable(self): s = LimitedSet(maxlen=2) @@ -255,6 +263,7 @@ def test_discard(self): s.add('foo') s.discard('foo') self.assertNotIn('foo', s) + self.assertEqual(len(s._data), 0) s.discard('foo') def test_clear(self): @@ -279,11 +288,54 @@ def test_update(self): s2.update(['do', 're']) self.assertItemsEqual(list(s2), ['do', 're']) + s1 = LimitedSet(maxlen=10, expires=None) + s2 = LimitedSet(maxlen=10, expires=None) + s3 = LimitedSet(maxlen=10, expires=None) + s4 = LimitedSet(maxlen=10, expires=None) + s5 = LimitedSet(maxlen=10, expires=None) + for i in range(12): + s1.add(i) + s2.add(i*i) + s3.update(s1) + s3.update(s2) + s4.update(s1.as_dict()) + s4.update(s2.as_dict()) + s5.update(s1._data) # revoke is using this + s5.update(s2._data) + self.assertEqual(s3, s4) + self.assertEqual(s3, s5) + s2.update(s4) + s4.update(s2) + self.assertEqual(s2, s4) + + def test_iterable_and_ordering(self): + s = LimitedSet(maxlen=35, expires=None) + # we use a custom clock here, as time.time() does not have enough + # precision when called quickly (can return the same value twice). + clock = count(1) + for i in reversed(range(15)): + s.add(i, now=next(clock)) + j = 40 + for i in s: + self.assertLess(i, j) # each item is smaller and smaller + j = i + self.assertEqual(i, 0) # last item is zero + + def test_pop_and_ordering_again(self): + s = LimitedSet(maxlen=5) + for i in range(10): + s.add(i) + j = -1 + for _ in range(5): + i = s.pop() + self.assertLess(j, i) + i = s.pop() + self.assertEqual(i, None) def test_as_dict(self): s = LimitedSet(maxlen=2) s.add('foo') - self.assertIsInstance(s.as_dict(), dict) + self.assertIsInstance(s.as_dict(), Mapping) class test_AttributeDict(Case): diff --git a/celery/tests/utils/test_debug.py b/celery/tests/utils/test_debug.py new file mode 100644 index 000000000..739954a66 --- /dev/null +++ b/celery/tests/utils/test_debug.py @@ -0,0 +1,98 @@ +from __future__ import absolute_import, unicode_literals + +from celery.utils import debug + +from celery.tests.case import Case, Mock, patch + + +class test_on_blocking(Case): + + @patch('inspect.getframeinfo') + def test_on_blocking(self, getframeinfo): + frame = Mock(name='frame') + with self.assertRaises(RuntimeError): + debug._on_blocking(1, frame) + getframeinfo.assert_called_with(frame) + + +class test_blockdetection(Case): + + @patch('celery.utils.debug.signals') + def test_context(self, signals): + with debug.blockdetection(10): + signals.arm_alarm.assert_called_with(10) + signals.__setitem__.assert_called_with('ALRM', debug._on_blocking) + signals.__setitem__.assert_called_with('ALRM', signals['ALRM']) + signals.reset_alarm.assert_called_with() + + +class test_sample_mem(Case): + + @patch('celery.utils.debug.mem_rss') + def test_sample_mem(self, mem_rss): + prev, debug._mem_sample = debug._mem_sample, [] + try: + debug.sample_mem() + self.assertIs(debug._mem_sample[0], mem_rss()) + finally: + debug._mem_sample = prev + + +class test_sample(Case): + + def test_sample(self): + x = list(range(100)) + self.assertEqual( + list(debug.sample(x, 10)), + [0, 10, 20, 30, 40, 50, 60, 70, 80, 90], + ) + x = list(range(91)) + self.assertEqual( + list(debug.sample(x, 10)), + [0, 9, 18, 27, 36, 45, 54, 63, 72, 81], + ) + + +class test_hfloat(Case): + + def test_hfloat(self): + self.assertEqual(str(debug.hfloat(10, 5)), "10") + self.assertEqual(str(debug.hfloat(10.45645234234, 5)), "10.456") + + +class test_humanbytes(Case): + + def test_humanbytes(self): + self.assertEqual(debug.humanbytes(2 ** 20), "1MB") + self.assertEqual(debug.humanbytes(4 * 2 ** 20), "4MB") + self.assertEqual(debug.humanbytes(2 ** 16), "64kB") + self.assertEqual(debug.humanbytes(2 ** 16), "64kB") + self.assertEqual(debug.humanbytes(2 ** 8), "256b") + + +class test_mem_rss(Case): + + @patch('celery.utils.debug.ps') + @patch('celery.utils.debug.humanbytes') + def test_mem_rss(self, humanbytes, ps): + ret = debug.mem_rss() + ps.assert_called_with() + ps().get_memory_info.assert_called_with() + humanbytes.assert_called_with(ps().get_memory_info().rss) + self.assertIs(ret, humanbytes()) + ps.return_value = None + self.assertIsNone(debug.mem_rss()) + + +class test_ps(Case): + + @patch('celery.utils.debug.Process') + @patch('os.getpid') + def test_ps(self, getpid, Process): + prev, debug._process = debug._process, None + try: + debug.ps() + Process.assert_called_with(getpid()) + self.assertIs(debug._process, Process()) + finally: + debug._process = prev diff --git a/celery/tests/utils/test_dispatcher.py b/celery/tests/utils/test_dispatcher.py index 72a36f3b3..9a3dcd8ab 100644 --- a/celery/tests/utils/test_dispatcher.py +++ b/celery/tests/utils/test_dispatcher.py @@ -57,18 +57,22 @@ def _testIsClean(self, signal): def test_exact(self): a_signal.connect(receiver_1_arg, sender=self) - expected = [(receiver_1_arg, 'test')] - result = a_signal.send(sender=self, val='test') - self.assertEqual(result, expected) - a_signal.disconnect(receiver_1_arg, sender=self) + try: + expected = [(receiver_1_arg, 'test')] + result = a_signal.send(sender=self, val='test') + self.assertEqual(result, expected) + finally: + a_signal.disconnect(receiver_1_arg, sender=self) self._testIsClean(a_signal) def test_ignored_sender(self): a_signal.connect(receiver_1_arg) - expected = [(receiver_1_arg, 'test')] - result = a_signal.send(sender=self, val='test') - self.assertEqual(result, expected) - a_signal.disconnect(receiver_1_arg) + try: + expected = [(receiver_1_arg, 'test')] + result = a_signal.send(sender=self, val='test') + self.assertEqual(result, expected) + finally: + a_signal.disconnect(receiver_1_arg) self._testIsClean(a_signal) def test_garbage_collected(self): @@ -83,19 +87,22 @@ def test_garbage_collected(self): def test_multiple_registration(self): a = Callable() - a_signal.connect(a) - a_signal.connect(a) - a_signal.connect(a) - a_signal.connect(a) - a_signal.connect(a) - a_signal.connect(a) - result = a_signal.send(sender=self, val='test') - self.assertEqual(len(result), 1) - self.assertEqual(len(a_signal.receivers), 1) - del a - del result - garbage_collect() - self._testIsClean(a_signal) + result = None + try: + a_signal.connect(a) + a_signal.connect(a) + a_signal.connect(a) + a_signal.connect(a) + a_signal.connect(a) + a_signal.connect(a) + result = a_signal.send(sender=self, val='test') + self.assertEqual(len(result), 1) + self.assertEqual(len(a_signal.receivers), 1) + finally: + del a + del result + garbage_collect() + self._testIsClean(a_signal) def test_uid_registration(self): @@ -106,9 +113,11 @@ def uid_based_receiver_2(**kwargs): pass a_signal.connect(uid_based_receiver_1, dispatch_uid='uid') - a_signal.connect(uid_based_receiver_2, dispatch_uid='uid') - self.assertEqual(len(a_signal.receivers), 1) - a_signal.disconnect(dispatch_uid='uid') + try: + a_signal.connect(uid_based_receiver_2, dispatch_uid='uid') + self.assertEqual(len(a_signal.receivers), 1) + finally: + a_signal.disconnect(dispatch_uid='uid') self._testIsClean(a_signal) def test_robust(self): @@ -117,22 +126,25 @@ def fails(val, **kwargs): raise ValueError('this') a_signal.connect(fails) - result = a_signal.send_robust(sender=self, val='test') - err = result[0][1] - self.assertTrue(isinstance(err, ValueError)) - self.assertEqual(err.args, ('this',)) - a_signal.disconnect(fails) + try: + a_signal.send(sender=self, val='test') + finally: + a_signal.disconnect(fails) self._testIsClean(a_signal) def test_disconnection(self): receiver_1 = Callable() receiver_2 = Callable() receiver_3 = Callable() - a_signal.connect(receiver_1) - a_signal.connect(receiver_2) - a_signal.connect(receiver_3) - a_signal.disconnect(receiver_1) - del receiver_2 - garbage_collect() - a_signal.disconnect(receiver_3) + try: + try: + a_signal.connect(receiver_1) + a_signal.connect(receiver_2) + a_signal.connect(receiver_3) + finally: + a_signal.disconnect(receiver_1) + del receiver_2 + garbage_collect() + finally: + a_signal.disconnect(receiver_3) self._testIsClean(a_signal) diff --git a/celery/tests/utils/test_functional.py b/celery/tests/utils/test_functional.py index 79085417c..2b37e140b 100644 --- a/celery/tests/utils/test_functional.py +++ b/celery/tests/utils/test_functional.py @@ -1,20 +1,38 @@ from __future__ import absolute_import import pickle +import sys + +from itertools import count from kombu.utils.functional import lazy from celery.five import THREAD_TIMEOUT_MAX, items, range, nextfun from celery.utils.functional import ( + DummyContext, LRUCache, + fun_takes_argument, + head_from_fun, firstmethod, first, + maybe_list, + memoize, mlazy, padlist, - maybe_list, + regen, ) -from celery.tests.case import Case +from celery.tests.case import Case, SkipTest + + +class test_DummyContext(Case): + + def test_context(self): + with DummyContext(): + pass + with self.assertRaises(KeyError): + with DummyContext(): + raise KeyError() class test_LRUCache(Case): @@ -62,7 +80,14 @@ def test_least_recently_used(self): x[7] = 7 self.assertEqual(list(x.keys()), [3, 6, 7]) + def test_update_larger_than_cache_size(self): + x = LRUCache(2) + x.update({x: x for x in range(100)}) + self.assertEqual(list(x.keys()), [98, 99]) + def assertSafeIter(self, method, interval=0.01, size=10000): + if sys.version_info >= (3, 5): + raise SkipTest('Fails on Py3.5') from threading import Thread, Event from time import sleep x = LRUCache(size) @@ -79,7 +104,7 @@ def __init__(self, cache): def run(self): while not self.__is_shutdown.isSet(): try: - self.cache.data.popitem(last=False) + self.cache.popitem(last=False) except KeyError: break self.__is_stopped.set() @@ -168,6 +193,24 @@ def test_maybe_list(self): self.assertIsNone(maybe_list(None)) +class test_memoize(Case): + + def test_memoize(self): + counter = count(1) + + @memoize(maxsize=2) + def x(i): + return next(counter) + + self.assertEqual(x(1), 1) + self.assertEqual(x(1), 1) + self.assertEqual(x(2), 2) + self.assertEqual(x(3), 3) + self.assertEqual(x(1), 4) + x.clear() + self.assertEqual(x(3), 5) + + class test_mlazy(Case): def test_is_memoized(self): @@ -178,3 +221,117 @@ def test_is_memoized(self): self.assertTrue(p.evaluated) self.assertEqual(p(), 20) self.assertEqual(repr(p), '20') + + +class test_regen(Case): + + def test_regen_list(self): + l = [1, 2] + r = regen(iter(l)) + self.assertIs(regen(l), l) + self.assertEqual(r, l) + self.assertEqual(r, l) + self.assertEqual(r.__length_hint__(), 0) + + fun, args = r.__reduce__() + self.assertEqual(fun(*args), l) + + def test_regen_gen(self): + g = regen(iter(list(range(10)))) + self.assertEqual(g[7], 7) + self.assertEqual(g[6], 6) + self.assertEqual(g[5], 5) + self.assertEqual(g[4], 4) + self.assertEqual(g[3], 3) + self.assertEqual(g[2], 2) + self.assertEqual(g[1], 1) + self.assertEqual(g[0], 0) + self.assertEqual(g.data, list(range(10))) + self.assertEqual(g[8], 8) + self.assertEqual(g[0], 0) + g = regen(iter(list(range(10)))) + self.assertEqual(g[0], 0) + self.assertEqual(g[1], 1) + self.assertEqual(g.data, list(range(10))) + g = regen(iter([1])) + self.assertEqual(g[0], 1) + with self.assertRaises(IndexError): + g[1] + self.assertEqual(g.data, [1]) + + g = regen(iter(list(range(10)))) + self.assertEqual(g[-1], 9) + self.assertEqual(g[-2], 8) + self.assertEqual(g[-3], 7) + self.assertEqual(g[-4], 6) + self.assertEqual(g[-5], 5) + self.assertEqual(g[5], 5) + self.assertEqual(g.data, list(range(10))) + + self.assertListEqual(list(iter(g)), list(range(10))) + + +class test_head_from_fun(Case): + + def test_from_cls(self): + class X(object): + def __call__(x, y, kwarg=1): + pass + + g = head_from_fun(X()) + with self.assertRaises(TypeError): + g(1) + g(1, 2) + g(1, 2, kwarg=3) + + def test_from_fun(self): + def f(x, y, kwarg=1): + pass + g = head_from_fun(f) + with self.assertRaises(TypeError): + g(1) + g(1, 2) + g(1, 2, kwarg=3) + + def test_from_fun_with_hints(self): + local = {} + fun = ('def f_hints(x: int, y: int, kwarg: int=1):' + ' pass') + try: + exec(fun, {}, local) + except SyntaxError: + # py2 + return + f_hints = local['f_hints'] + + g = head_from_fun(f_hints) + with self.assertRaises(TypeError): + g(1) + g(1, 2) + g(1, 2, kwarg=3) + + +class test_fun_takes_argument(Case): + + def test_starkwargs(self): + self.assertTrue(fun_takes_argument('foo', lambda **kw: 1)) + + def test_named(self): + self.assertTrue(fun_takes_argument('foo', lambda a, foo, bar: 1)) + + def fun(a, b, c, d): + return 1 + + self.assertTrue(fun_takes_argument('foo', fun, position=4)) + + def test_starargs(self): + self.assertTrue(fun_takes_argument('foo', lambda a, *args: 1)) + + def test_does_not(self): + self.assertFalse(fun_takes_argument('foo', lambda a, bar, baz: 1)) + self.assertFalse(fun_takes_argument('foo', lambda: 1)) + + def fun(a, b, foo): + return 1 + + self.assertFalse(fun_takes_argument('foo', fun, position=4)) diff --git a/celery/tests/utils/test_imports.py b/celery/tests/utils/test_imports.py index e7d88bc09..f477d8f62 100644 --- a/celery/tests/utils/test_imports.py +++ b/celery/tests/utils/test_imports.py @@ -19,9 +19,10 @@ def test_find_module(self): imp.return_value = None with self.assertRaises(NotAPackage): find_module('foo.bar.baz', imp=imp) + self.assertTrue(find_module('celery.worker.request')) def test_qualname(self): - Class = type('Fox', (object, ), {'__module__': 'quick.brown'}) + Class = type('Fox', (object,), {'__module__': 'quick.brown'}) self.assertEqual(qualname(Class), 'quick.brown.Fox') self.assertEqual(qualname(Class()), 'quick.brown.Fox') diff --git a/celery/tests/utils/test_local.py b/celery/tests/utils/test_local.py index 2b50efcda..febcb8a97 100644 --- a/celery/tests/utils/test_local.py +++ b/celery/tests/utils/test_local.py @@ -31,6 +31,12 @@ def test_std_class_attributes(self): self.assertEqual(Proxy.__module__, 'celery.local') self.assertIsInstance(Proxy.__doc__, str) + def test_doc(self): + def real(): + pass + x = Proxy(real, __doc__='foo') + self.assertEqual(x.__doc__, 'foo') + def test_name(self): def real(): @@ -341,6 +347,7 @@ def test_callbacks(self): self.assertTrue(object.__getattribute__(p, '__pending__')) self.assertTrue(repr(p)) + self.assertTrue(p.__evaluated__()) with self.assertRaises(AttributeError): object.__getattribute__(p, '__pending__') cbA.assert_called_with(p) diff --git a/celery/tests/utils/test_mail.py b/celery/tests/utils/test_mail.py index 4006fb0b5..3d9a17c42 100644 --- a/celery/tests/utils/test_mail.py +++ b/celery/tests/utils/test_mail.py @@ -1,6 +1,6 @@ from __future__ import absolute_import -from celery.utils.mail import Message, Mailer, SSLError +from celery.utils.mail import Message, Mailer, SSLError, ErrorMail from celery.tests.case import Case, Mock, patch @@ -46,8 +46,38 @@ def test_send(self, SMTP): mailer = Mailer(use_ssl=False, use_tls=False) mailer._send(msg) - client.sendmail.assert_called_With(msg.sender, msg.to, str(msg)) + client.sendmail.assert_called_with(msg.sender, msg.to, str(msg)) client.quit.side_effect = SSLError() mailer._send(msg) client.close.assert_called_with() + + +class test_ErrorMail(Case): + + def setUp(self): + self.task = Mock(name='task') + self.mailer = ErrorMail( + self.task, subject='foo{foo} ', body='bar{bar} ', + ) + + def test_should_send(self): + self.assertTrue(self.mailer.should_send(Mock(), Mock())) + + def test_format_subject(self): + self.assertEqual( + self.mailer.format_subject({'foo': 'FOO'}), + 'fooFOO', + ) + + def test_format_body(self): + self.assertEqual( + self.mailer.format_body({'bar': 'BAR'}), + 'barBAR', + ) + + def test_send(self): + self.mailer.send({'foo': 'FOO', 'bar': 'BAR'}, KeyError()) + self.task.app.mail_admins.assert_called_with( + 'fooFOO', 'barBAR', fail_silently=True, + ) diff --git a/celery/tests/utils/test_objects.py b/celery/tests/utils/test_objects.py new file mode 100644 index 000000000..303d14966 --- /dev/null +++ b/celery/tests/utils/test_objects.py @@ -0,0 +1,13 @@ +from __future__ import absolute_import, unicode_literals + +from celery.utils.objects import Bunch + +from celery.tests.case import Case + + +class test_Bunch(Case): + + def test(self): + x = Bunch(foo='foo', bar=2) + self.assertEqual(x.foo, 'foo') + self.assertEqual(x.bar, 2) diff --git a/celery/tests/utils/test_pickle.py b/celery/tests/utils/test_pickle.py index 6b65bb3c5..59ce6b8e7 100644 --- a/celery/tests/utils/test_pickle.py +++ b/celery/tests/utils/test_pickle.py @@ -29,7 +29,7 @@ def test_pickle_regular_exception(self): exception = unpickled.get('exception') self.assertTrue(exception) self.assertIsInstance(exception, RegularException) - self.assertTupleEqual(exception.args, ('RegularException raised', )) + self.assertTupleEqual(exception.args, ('RegularException raised',)) def test_pickle_arg_override_exception(self): diff --git a/celery/tests/utils/test_platforms.py b/celery/tests/utils/test_platforms.py index 4e27efd7b..4dd6704f9 100644 --- a/celery/tests/utils/test_platforms.py +++ b/celery/tests/utils/test_platforms.py @@ -4,6 +4,7 @@ import os import sys import signal +import tempfile from celery import _find_option_with_arg from celery import platforms @@ -11,7 +12,9 @@ from celery.platforms import ( get_fdmax, ignore_errno, + check_privileges, set_process_title, + set_mp_process_title, signals, maybe_drop_privileges, setuid, @@ -27,6 +30,7 @@ setgroups, _setgroups_hack, close_open_fds, + fd_by_path, ) try: @@ -55,6 +59,24 @@ def test_short_opt(self): ) +class test_fd_by_path(Case): + + def setUp(self): + if sys.platform == 'win32': + raise SkipTest('win32: skip') + + def test_finds(self): + test_file = tempfile.NamedTemporaryFile() + try: + keep = fd_by_path([test_file.name]) + self.assertEqual(keep, [test_file.file.fileno()]) + with patch('os.open') as _open: + _open.side_effect = OSError() + self.assertFalse(fd_by_path([test_file.name])) + finally: + test_file.close() + + class test_close_open_fds(Case): def test_closes(self): @@ -88,13 +110,27 @@ def test_otherwise(self): class test_set_process_title(Case): - def when_no_setps(self): - prev = platforms._setproctitle = platforms._setproctitle, None + def test_no_setps(self): + prev, platforms._setproctitle = platforms._setproctitle, None try: set_process_title('foo') finally: platforms._setproctitle = prev + @patch('celery.platforms.set_process_title') + @patch('celery.platforms.current_process') + def test_mp_no_hostname(self, current_process, set_process_title): + current_process().name = 'Foo' + set_mp_process_title('foo', info='hello') + set_process_title.assert_called_with('foo:Foo', info='hello') + + @patch('celery.platforms.set_process_title') + @patch('celery.platforms.current_process') + def test_mp_hostname(self, current_process, set_process_title): + current_process().name = 'Foo' + set_mp_process_title('foo', hostname='a@q.com', info='hello') + set_process_title.assert_called_with('foo: a@q.com:Foo', info='hello') + class test_Signals(Case): @@ -135,9 +171,15 @@ def test_ignore(self, set): signals.ignore('SIGTERM') set.assert_called_with(signals.signum('TERM'), signals.ignored) + @patch('signal.signal') + def test_reset(self, set): + signals.reset('SIGINT') + set.assert_called_with(signals.signum('INT'), signals.default) + @patch('signal.signal') def test_setitem(self, set): - handle = lambda *a: a + def handle(*args): + return args signals['INT'] = handle set.assert_called_with(signal.SIGINT, handle) @@ -168,13 +210,28 @@ def test_when_actual(self, getrlimit): class test_maybe_drop_privileges(Case): + def test_on_windows(self): + prev, sys.platform = sys.platform, 'win32' + try: + maybe_drop_privileges() + finally: + sys.platform = prev + + @patch('os.getegid') + @patch('os.getgid') + @patch('os.geteuid') + @patch('os.getuid') @patch('celery.platforms.parse_uid') + @patch('celery.platforms.parse_gid') @patch('pwd.getpwuid') @patch('celery.platforms.setgid') @patch('celery.platforms.setuid') @patch('celery.platforms.initgroups') def test_with_uid(self, initgroups, setuid, setgid, - getpwuid, parse_uid): + getpwuid, parse_gid, parse_uid, getuid, geteuid, + getgid, getegid): + geteuid.return_value = 10 + getuid.return_value = 10 class pw_struct(object): pw_gid = 50001 @@ -185,6 +242,7 @@ def raise_on_second_call(*args, **kwargs): setuid.side_effect = raise_on_second_call getpwuid.return_value = pw_struct() parse_uid.return_value = 5001 + parse_gid.return_value = 5001 maybe_drop_privileges(uid='user') parse_uid.assert_called_with('user') getpwuid.assert_called_with(5001) @@ -192,6 +250,40 @@ def raise_on_second_call(*args, **kwargs): initgroups.assert_called_with(5001, 50001) setuid.assert_has_calls([call(5001), call(0)]) + setuid.side_effect = raise_on_second_call + + def to_root_on_second_call(mock, first): + return_value = [first] + + def on_first_call(*args, **kwargs): + ret, return_value[0] = return_value[0], 0 + return ret + mock.side_effect = on_first_call + to_root_on_second_call(geteuid, 10) + to_root_on_second_call(getuid, 10) + with self.assertRaises(AssertionError): + maybe_drop_privileges(uid='user') + + getuid.return_value = getuid.side_effect = None + geteuid.return_value = geteuid.side_effect = None + getegid.return_value = 0 + getgid.return_value = 0 + setuid.side_effect = raise_on_second_call + with self.assertRaises(AssertionError): + maybe_drop_privileges(gid='group') + + getuid.reset_mock() + geteuid.reset_mock() + setuid.reset_mock() + getuid.side_effect = geteuid.side_effect = None + + def raise_on_second_call(*args, **kwargs): + setuid.side_effect = OSError() + setuid.side_effect.errno = errno.ENOENT + setuid.side_effect = raise_on_second_call + with self.assertRaises(OSError): + maybe_drop_privileges(uid='user') + @patch('celery.platforms.parse_uid') @patch('celery.platforms.parse_gid') @patch('celery.platforms.setgid') @@ -372,7 +464,7 @@ class test_DaemonContext(Case): @patch('os.dup2') def test_open(self, dup2, open, close, closer, umask, chdir, _exit, setsid, fork): - x = DaemonContext(workdir='/opt/workdir') + x = DaemonContext(workdir='/opt/workdir', umask=0o22) x.stdfds = [0, 1, 2] fork.return_value = 0 @@ -385,7 +477,7 @@ def test_open(self, dup2, open, close, closer, umask, chdir, self.assertFalse(_exit.called) chdir.assert_called_with(x.workdir) - umask.assert_called_with(x.umask) + umask.assert_called_with(0o22) self.assertTrue(dup2.called) fork.reset_mock() @@ -409,6 +501,20 @@ def test_open(self, dup2, open, close, closer, umask, chdir, pass x.after_chdir.assert_called_with() + x = DaemonContext(workdir='/opt/workdir', umask="0755") + self.assertEqual(x.umask, 493) + x = DaemonContext(workdir='/opt/workdir', umask="493") + self.assertEqual(x.umask, 493) + + x.redirect_to_null(None) + + with patch('celery.platforms.mputil') as mputil: + x = DaemonContext(after_forkers=True) + x.open() + mputil._run_after_forkers.assert_called_with() + x = DaemonContext(after_forkers=False) + x.open() + class test_Pidfile(Case): @patch('celery.platforms.Pidfile') @@ -699,3 +805,21 @@ def test_setgroups_raises_EPERM(self, hack, sysconf, getgroups): with self.assertRaises(OSError): setgroups(list(range(400))) getgroups.assert_called_with() + + +class test_check_privileges(Case): + + def test_suspicious(self): + class Obj(object): + fchown = 13 + prev, platforms.os = platforms.os, Obj() + try: + with self.assertRaises(AssertionError): + check_privileges({'pickle'}) + finally: + platforms.os = prev + prev, platforms.os = platforms.os, object() + try: + check_privileges({'pickle'}) + finally: + platforms.os = prev diff --git a/celery/tests/utils/test_saferepr.py b/celery/tests/utils/test_saferepr.py new file mode 100644 index 000000000..ce2b81df5 --- /dev/null +++ b/celery/tests/utils/test_saferepr.py @@ -0,0 +1,188 @@ +from __future__ import absolute_import, unicode_literals + +import re + +from decimal import Decimal +from pprint import pprint + +from celery.five import items, long_t, text_t, values + +from celery.utils.saferepr import saferepr + +from celery.tests.case import Case + +D_NUMBERS = { + b'integer': 1, + b'float': 1.3, + b'decimal': Decimal("1.3"), + b'long': long_t(4), + b'complex': complex(13.3), +} +D_INT_KEYS = {v: k for k, v in items(D_NUMBERS)} + +QUICK_BROWN_FOX = 'The quick brown fox jumps over the lazy dog.' +B_QUICK_BROWN_FOX = b'The quick brown fox jumps over the lazy dog.' + +D_TEXT = { + b'foo': QUICK_BROWN_FOX, + b'bar': B_QUICK_BROWN_FOX, + b'baz': B_QUICK_BROWN_FOX, + b'xuzzy': B_QUICK_BROWN_FOX, +} + +L_NUMBERS = list(values(D_NUMBERS)) + +D_TEXT_LARGE = { + b'bazxuzzyfoobarlongverylonglong': QUICK_BROWN_FOX * 30, +} + +D_ALL = { + b'numbers': D_NUMBERS, + b'intkeys': D_INT_KEYS, + b'text': D_TEXT, + b'largetext': D_TEXT_LARGE, +} + +D_D_TEXT = {b'rest': D_TEXT} + +RE_OLD_SET_REPR = re.compile(r'(?'.format(self) class X(object): - def __init__(self, app, heartbeat=None, on_task_message=None): + def __init__(self, app, heartbeat=None, on_task_message=None, + transport_driver_type=None): hub = Hub() ( self.obj, @@ -41,7 +62,9 @@ def __init__(self, app, heartbeat=None, on_task_message=None): ) self.consumer.callbacks = [] self.obj.strategies = {} - self.connection.connection_errors = (socket.error, ) + self.connection.connection_errors = (socket.error,) + if transport_driver_type: + self.connection.transport.driver_type = transport_driver_type self.hub.readers = {} self.hub.writers = {} self.hub.consolidate = set() @@ -54,7 +77,8 @@ def __init__(self, app, heartbeat=None, on_task_message=None): self.Hub = self.hub self.blueprint.state = RUN # need this for create_task_handler - _consumer = Consumer(Mock(), timer=Mock(), app=app) + self._consumer = _consumer = Consumer( + Mock(), timer=Mock(), controller=Mock(), app=app) _consumer.on_task_message = on_task_message or [] self.obj.create_task_handler = _consumer.create_task_handler self.on_unknown_message = self.obj.on_unknown_message = Mock( @@ -107,7 +131,7 @@ def get_task_callback(*args, **kwargs): x = X(*args, **kwargs) x.blueprint.state = CLOSE asynloop(*x.args) - return x, x.consumer.callbacks[0] + return x, x.consumer.on_message class test_asynloop(AppCase): @@ -119,6 +143,19 @@ def add(x, y): return x + y self.add = add + def test_drain_after_consume(self): + x, _ = get_task_callback(self.app, transport_driver_type='amqp') + self.assertIn( + _quick_drain, [p.fun for p in x.hub._ready], + ) + + def test_pool_did_not_start_at_startup(self): + x = X(self.app) + x.obj.restart_count = 0 + x.obj.pool.did_start_ok.return_value = False + with self.assertRaises(WorkerLostError): + asynloop(*x.args) + def test_setup_heartbeat(self): x = X(self.app, heartbeat=10) x.hub.call_repeatedly = Mock(name='x.hub.call_repeatedly()') @@ -132,45 +169,49 @@ def test_setup_heartbeat(self): def task_context(self, sig, **kwargs): x, on_task = get_task_callback(self.app, **kwargs) - body = body_from_sig(self.app, sig) - message = Mock() - strategy = x.obj.strategies[sig.task] = Mock() - return x, on_task, body, message, strategy + message = task_message_from_sig(self.app, sig) + strategy = x.obj.strategies[sig.task] = Mock(name='strategy') + return x, on_task, message, strategy def test_on_task_received(self): - _, on_task, body, msg, strategy = self.task_context(self.add.s(2, 2)) - on_task(body, msg) + x, on_task, msg, strategy = self.task_context(self.add.s(2, 2)) + on_task(msg) strategy.assert_called_with( - msg, body, msg.ack_log_error, msg.reject_log_error, [], + msg, None, + PromiseEqual(x._consumer.call_soon, msg.ack_log_error), + PromiseEqual(x._consumer.call_soon, msg.reject_log_error), [], ) def test_on_task_received_executes_on_task_message(self): cbs = [Mock(), Mock(), Mock()] - _, on_task, body, msg, strategy = self.task_context( + x, on_task, msg, strategy = self.task_context( self.add.s(2, 2), on_task_message=cbs, ) - on_task(body, msg) + on_task(msg) strategy.assert_called_with( - msg, body, msg.ack_log_error, msg.reject_log_error, cbs, + msg, None, + PromiseEqual(x._consumer.call_soon, msg.ack_log_error), + PromiseEqual(x._consumer.call_soon, msg.reject_log_error), + cbs, ) def test_on_task_message_missing_name(self): - x, on_task, body, msg, strategy = self.task_context(self.add.s(2, 2)) - body.pop('task') - on_task(body, msg) - x.on_unknown_message.assert_called_with(body, msg) + x, on_task, msg, strategy = self.task_context(self.add.s(2, 2)) + msg.headers.pop('task') + on_task(msg) + x.on_unknown_message.assert_called_with(msg.decode(), msg) def test_on_task_not_registered(self): - x, on_task, body, msg, strategy = self.task_context(self.add.s(2, 2)) + x, on_task, msg, strategy = self.task_context(self.add.s(2, 2)) exc = strategy.side_effect = KeyError(self.add.name) - on_task(body, msg) - x.on_unknown_task.assert_called_with(body, msg, exc) + on_task(msg) + x.on_invalid_task.assert_called_with(None, msg, exc) def test_on_task_InvalidTaskError(self): - x, on_task, body, msg, strategy = self.task_context(self.add.s(2, 2)) + x, on_task, msg, strategy = self.task_context(self.add.s(2, 2)) exc = strategy.side_effect = InvalidTaskError() - on_task(body, msg) - x.on_invalid_task.assert_called_with(body, msg, exc) + on_task(msg) + x.on_invalid_task.assert_called_with(None, msg, exc) def test_should_terminate(self): x = X(self.app) @@ -180,27 +221,27 @@ def test_should_terminate(self): with self.assertRaises(WorkerTerminate): asynloop(*x.args) finally: - state.should_terminate = False + state.should_terminate = None def test_should_terminate_hub_close_raises(self): x = X(self.app) # XXX why aren't the errors propagated?!? - state.should_terminate = True + state.should_terminate = EX_FAILURE x.hub.close.side_effect = MemoryError() try: with self.assertRaises(WorkerTerminate): asynloop(*x.args) finally: - state.should_terminate = False + state.should_terminate = None def test_should_stop(self): x = X(self.app) - state.should_stop = True + state.should_stop = 303 try: with self.assertRaises(WorkerShutdown): asynloop(*x.args) finally: - state.should_stop = False + state.should_stop = None def test_updates_qos(self): x = X(self.app) @@ -217,7 +258,7 @@ def test_updates_qos(self): x.hub.on_tick.add(x.closer(mod=2)) asynloop(*x.args) x.qos.update.assert_called_with() - x.hub.fire_timers.assert_called_with(propagate=(socket.error, )) + x.hub.fire_timers.assert_called_with(propagate=(socket.error,)) def test_poll_empty(self): x = X(self.app) @@ -414,3 +455,26 @@ def test_ignores_socket_errors_when_closed(self): x = X(self.app) x.close_then_error(x.connection.drain_events) self.assertIsNone(synloop(*x.args)) + + +class test_quick_drain(AppCase): + + def setup(self): + self.connection = Mock(name='connection') + + def test_drain(self): + _quick_drain(self.connection, timeout=33.3) + self.connection.drain_events.assert_called_with(timeout=33.3) + + def test_drain_error(self): + exc = KeyError() + exc.errno = 313 + self.connection.drain_events.side_effect = exc + with self.assertRaises(KeyError): + _quick_drain(self.connection, timeout=33.3) + + def test_drain_error_EAGAIN(self): + exc = KeyError() + exc.errno = errno.EAGAIN + self.connection.drain_events.side_effect = exc + _quick_drain(self.connection, timeout=33.3) diff --git a/celery/tests/worker/test_request.py b/celery/tests/worker/test_request.py index 488ea72f4..72c4a7d41 100644 --- a/celery/tests/worker/test_request.py +++ b/celery/tests/worker/test_request.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import, unicode_literals -import anyjson +import numbers import os import signal import socket @@ -10,7 +10,6 @@ from datetime import datetime, timedelta from billiard.einfo import ExceptionInfo -from kombu.transport.base import Message from kombu.utils.encoding import from_utf8, default_encode from celery import states @@ -27,16 +26,19 @@ from celery.exceptions import ( Ignore, InvalidTaskError, + Reject, Retry, TaskRevokedError, Terminated, WorkerLostError, ) -from celery.five import keys, monotonic +from celery.five import monotonic from celery.signals import task_revoked from celery.utils import uuid -from celery.worker import job as module -from celery.worker.job import Request, logger as req_logger +from celery.worker import request as module +from celery.worker.request import ( + Request, create_request_cls, logger as req_logger, +) from celery.worker.state import revoked from celery.tests.case import ( @@ -44,12 +46,46 @@ Case, Mock, SkipTest, + TaskMessage, assert_signal_called, - body_from_sig, + task_message_from_sig, patch, ) +class RequestCase(AppCase): + + def setup(self): + self.app.conf.result_serializer = 'pickle' + + @self.app.task(shared=False) + def add(x, y, **kw_): + return x + y + self.add = add + + @self.app.task(shared=False) + def mytask(i, **kwargs): + return i ** i + self.mytask = mytask + + @self.app.task(shared=False) + def mytask_raising(i): + raise KeyError(i) + self.mytask_raising = mytask_raising + + def xRequest(self, name=None, id=None, args=None, kwargs=None, + on_ack=None, on_reject=None, Request=Request, **head): + args = [1] if args is None else args + kwargs = {'f': 'x'} if kwargs is None else kwargs + on_ack = on_ack or Mock(name='on_ack') + on_reject = on_reject or Mock(name='on_reject') + message = TaskMessage( + name or self.mytask.name, id, args=args, kwargs=kwargs, **head + ) + return Request(message, app=self.app, + on_ack=on_ack, on_reject=on_reject) + + class test_mro_lookup(Case): def test_order(self): @@ -71,7 +107,7 @@ def mro(cls): A.x = 10 self.assertEqual(mro_lookup(C, 'x'), A) - self.assertIsNone(mro_lookup(C, 'x', stop=(A, ))) + self.assertIsNone(mro_lookup(C, 'x', stop={A})) B.x = 10 self.assertEqual(mro_lookup(C, 'x'), B) C.x = 10 @@ -85,7 +121,7 @@ def jail(app, task_id, name, args, kwargs): task.__trace__ = None # rebuild return trace_task( task, task_id, args, kwargs, request=request, eager=False, app=app, - ) + ).retval class test_default_encode(AppCase): @@ -124,7 +160,7 @@ def test_retry_semipredicate(self): self.assertEqual(ret.exc, exc) -class test_trace_task(AppCase): +class test_trace_task(RequestCase): def setup(self): @@ -138,14 +174,14 @@ def mytask_raising(i): raise KeyError(i) self.mytask_raising = mytask_raising - @patch('celery.app.trace._logger') + @patch('celery.app.trace.logger') def test_process_cleanup_fails(self, _logger): self.mytask.backend = Mock() self.mytask.backend.process_cleanup = Mock(side_effect=KeyError()) tid = uuid() ret = jail(self.app, tid, self.mytask.name, [2], {}) self.assertEqual(ret, 4) - self.assertTrue(self.mytask.backend.store_result.called) + self.assertTrue(self.mytask.backend.mark_as_done.called) self.assertIn('Process cleanup failed', _logger.error.call_args[0][0]) def test_process_cleanup_BaseException(self): @@ -161,7 +197,7 @@ def test_execute_jail_success(self): def test_marked_as_started(self): _started = [] - def store_result(tid, meta, state, **kwars): + def store_result(tid, meta, state, **kwargs): if state == states.STARTED: _started.append(tid) self.mytask.backend.store_result = Mock(name='store_result') @@ -182,7 +218,7 @@ def test_execute_jail_failure(self): self.app, uuid(), self.mytask_raising.name, [4], {}, ) self.assertIsInstance(ret, ExceptionInfo) - self.assertTupleEqual(ret.exception.args, (4, )) + self.assertTupleEqual(ret.exception.args, (4,)) def test_execute_ignore_result(self): @@ -206,36 +242,26 @@ def send(self, event, **fields): self.sent.append(event) -class test_Request(AppCase): - - def setup(self): - - @self.app.task(shared=False) - def add(x, y, **kw_): - return x + y - self.add = add - - @self.app.task(shared=False) - def mytask(i, **kwargs): - return i ** i - self.mytask = mytask - - @self.app.task(shared=False) - def mytask_raising(i): - raise KeyError(i) - self.mytask_raising = mytask_raising +class test_Request(RequestCase): def get_request(self, sig, Request=Request, **kwargs): return Request( - body_from_sig(self.app, sig), - on_ack=Mock(), - eventer=Mock(), + task_message_from_sig(self.app, sig), + on_ack=Mock(name='on_ack'), + on_reject=Mock(name='on_reject'), + eventer=Mock(name='eventer'), app=self.app, - connection_errors=(socket.error, ), + connection_errors=(socket.error,), task=sig.type, **kwargs ) + def test_shadow(self): + self.assertEqual( + self.get_request(self.add.s(2, 2).set(shadow='fooxyz')).name, + 'fooxyz', + ) + def test_invalid_eta_raises_InvalidTaskError(self): with self.assertRaises(InvalidTaskError): self.get_request(self.add.s(2, 2).set(eta='12345')) @@ -245,8 +271,9 @@ def test_invalid_expires_raises_InvalidTaskError(self): self.get_request(self.add.s(2, 2).set(expires='12345')) def test_valid_expires_with_utc_makes_aware(self): - with patch('celery.worker.job.maybe_make_aware') as mma: - self.get_request(self.add.s(2, 2).set(expires=10)) + with patch('celery.worker.request.maybe_make_aware') as mma: + self.get_request(self.add.s(2, 2).set(expires=10), + maybe_make_aware=mma) self.assertTrue(mma.called) def test_maybe_expire_when_expires_is_None(self): @@ -273,7 +300,7 @@ def test_on_failure_Termianted(self): uuid=req.id, terminated=True, signum='9', expired=False, ) - def test_log_error_propagates_MemoryError(self): + def test_on_failure_propagates_MemoryError(self): einfo = None try: raise MemoryError() @@ -282,9 +309,9 @@ def test_log_error_propagates_MemoryError(self): self.assertIsNotNone(einfo) req = self.get_request(self.add.s(2, 2)) with self.assertRaises(MemoryError): - req._log_error(einfo) + req.on_failure(einfo) - def test_log_error_when_Ignore(self): + def test_on_failure_Ignore_acknowledges(self): einfo = None try: raise Ignore() @@ -292,48 +319,71 @@ def test_log_error_when_Ignore(self): einfo = ExceptionInfo(internal=True) self.assertIsNotNone(einfo) req = self.get_request(self.add.s(2, 2)) - req._log_error(einfo) + req.on_failure(einfo) req.on_ack.assert_called_with(req_logger, req.connection_errors) + def test_on_failure_Reject_rejects(self): + einfo = None + try: + raise Reject() + except Reject: + einfo = ExceptionInfo(internal=True) + self.assertIsNotNone(einfo) + req = self.get_request(self.add.s(2, 2)) + req.on_failure(einfo) + req.on_reject.assert_called_with( + req_logger, req.connection_errors, False, + ) + + def test_on_failure_Reject_rejects_with_requeue(self): + einfo = None + try: + raise Reject(requeue=True) + except Reject: + einfo = ExceptionInfo(internal=True) + self.assertIsNotNone(einfo) + req = self.get_request(self.add.s(2, 2)) + req.on_failure(einfo) + req.on_reject.assert_called_with( + req_logger, req.connection_errors, True, + ) + + def test_on_failure_WorkerLostError_rejects_with_requeue(self): + einfo = None + try: + raise WorkerLostError() + except: + einfo = ExceptionInfo(internal=True) + req = self.get_request(self.add.s(2, 2)) + req.task.acks_late = True + req.task.reject_on_worker_lost = True + req.delivery_info['redelivered'] = False + req.on_failure(einfo) + req.on_reject.assert_called_with( + req_logger, req.connection_errors, True) + + def test_on_failure_WorkerLostError_redelivered_None(self): + einfo = None + try: + raise WorkerLostError() + except: + einfo = ExceptionInfo(internal=True) + req = self.get_request(self.add.s(2, 2)) + req.task.acks_late = True + req.task.reject_on_worker_lost = True + req.delivery_info['redelivered'] = None + req.on_failure(einfo) + req.on_reject.assert_called_with( + req_logger, req.connection_errors, False) + def test_tzlocal_is_cached(self): req = self.get_request(self.add.s(2, 2)) req._tzlocal = 'foo' self.assertEqual(req.tzlocal, 'foo') - def test_execute_magic_kwargs(self): - task = self.add.s(2, 2) - task.freeze() - req = self.get_request(task) - self.add.accept_magic_kwargs = True - pool = Mock() - req.execute_using_pool(pool) - self.assertTrue(pool.apply_async.called) - args = pool.apply_async.call_args[1]['args'] - self.assertEqual(args[0], task.task) - self.assertEqual(args[1], task.id) - self.assertEqual(args[2], task.args) - kwargs = args[3] - self.assertEqual(kwargs.get('task_name'), task.task) - - def xRequest(self, body=None, **kwargs): - body = dict({'task': self.mytask.name, - 'id': uuid(), - 'args': [1], - 'kwargs': {'f': 'x'}}, **body or {}) - return Request(body, app=self.app, **kwargs) - def test_task_wrapper_repr(self): self.assertTrue(repr(self.xRequest())) - @patch('celery.worker.job.kwdict') - def test_kwdict(self, kwdict): - prev, module.NEEDS_KWDICT = module.NEEDS_KWDICT, True - try: - self.xRequest() - self.assertTrue(kwdict.called) - finally: - module.NEEDS_KWDICT = prev - def test_sets_store_errors(self): self.mytask.ignore_result = True job = self.xRequest() @@ -350,12 +400,7 @@ def test_send_event(self): self.assertIn('task-frobulated', job.eventer.sent) def test_on_retry(self): - job = Request({ - 'task': self.mytask.name, - 'id': uuid(), - 'args': [1], - 'kwargs': {'f': 'x'}, - }, app=self.app) + job = self.get_request(self.mytask.s(1, f='x')) job.eventer = MockEventDispatcher() try: raise Retry('foo', KeyError('moofoobar')) @@ -372,12 +417,7 @@ def test_on_retry(self): job.on_failure(einfo) def test_compat_properties(self): - job = Request({ - 'task': self.mytask.name, - 'id': uuid(), - 'args': [1], - 'kwargs': {'f': 'x'}, - }, app=self.app) + job = self.xRequest() self.assertEqual(job.task_id, job.id) self.assertEqual(job.task_name, job.name) job.task_id = 'ID' @@ -385,15 +425,27 @@ def test_compat_properties(self): job.task_name = 'NAME' self.assertEqual(job.name, 'NAME') + def test_terminate__pool_ref(self): + pool = Mock() + signum = signal.SIGTERM + job = self.get_request(self.mytask.s(1, f='x')) + job._apply_result = Mock(name='_apply_result') + with assert_signal_called( + task_revoked, sender=job.task, request=job, + terminated=True, expired=False, signum=signum): + job.time_start = monotonic() + job.worker_pid = 314 + job.terminate(pool, signal='TERM') + job._apply_result().terminate.assert_called_with(signum) + + job._apply_result = Mock(name='_apply_result2') + job._apply_result.return_value = None + job.terminate(pool, signal='TERM') + def test_terminate__task_started(self): pool = Mock() signum = signal.SIGTERM - job = Request({ - 'task': self.mytask.name, - 'id': uuid(), - 'args': [1], - 'kwrgs': {'f': 'x'}, - }, app=self.app) + job = self.get_request(self.mytask.s(1, f='x')) with assert_signal_called( task_revoked, sender=job.task, request=job, terminated=True, expired=False, signum=signum): @@ -404,12 +456,7 @@ def test_terminate__task_started(self): def test_terminate__task_reserved(self): pool = Mock() - job = Request({ - 'task': self.mytask.name, - 'id': uuid(), - 'args': [1], - 'kwargs': {'f': 'x'}, - }, app=self.app) + job = self.get_request(self.mytask.s(1, f='x')) job.time_start = None job.terminate(pool, signal='TERM') self.assertFalse(pool.terminate_job.called) @@ -417,13 +464,9 @@ def test_terminate__task_reserved(self): job.terminate(pool, signal='TERM') def test_revoked_expires_expired(self): - job = Request({ - 'task': self.mytask.name, - 'id': uuid(), - 'args': [1], - 'kwargs': {'f': 'x'}, - 'expires': datetime.utcnow() - timedelta(days=1), - }, app=self.app) + job = self.get_request(self.mytask.s(1, f='x').set( + expires=datetime.utcnow() - timedelta(days=1) + )) with assert_signal_called( task_revoked, sender=job.task, request=job, terminated=False, expired=True, signum=None): @@ -435,9 +478,9 @@ def test_revoked_expires_expired(self): ) def test_revoked_expires_not_expired(self): - job = self.xRequest({ - 'expires': datetime.utcnow() + timedelta(days=1), - }) + job = self.xRequest( + expires=datetime.utcnow() + timedelta(days=1), + ) job.revoked() self.assertNotIn(job.id, revoked) self.assertNotEqual( @@ -447,47 +490,15 @@ def test_revoked_expires_not_expired(self): def test_revoked_expires_ignore_result(self): self.mytask.ignore_result = True - job = self.xRequest({ - 'expires': datetime.utcnow() - timedelta(days=1), - }) + job = self.xRequest( + expires=datetime.utcnow() - timedelta(days=1), + ) job.revoked() self.assertIn(job.id, revoked) self.assertNotEqual( self.mytask.backend.get_status(job.id), states.REVOKED, ) - def test_send_email(self): - app = self.app - mail_sent = [False] - - def mock_mail_admins(*args, **kwargs): - mail_sent[0] = True - - def get_ei(): - try: - raise KeyError('moofoobar') - except: - return ExceptionInfo() - - app.mail_admins = mock_mail_admins - self.mytask.send_error_emails = True - job = self.xRequest() - einfo = get_ei() - job.on_failure(einfo) - self.assertTrue(mail_sent[0]) - - einfo = get_ei() - mail_sent[0] = False - self.mytask.send_error_emails = False - job.on_failure(einfo) - self.assertFalse(mail_sent[0]) - - einfo = get_ei() - mail_sent[0] = False - self.mytask.send_error_emails = True - job.on_failure(einfo) - self.assertTrue(mail_sent[0]) - def test_already_revoked(self): job = self.xRequest() job._already_revoked = True @@ -510,10 +521,10 @@ def test_execute_does_not_execute_revoked(self): def test_execute_acks_late(self): self.mytask_raising.acks_late = True - job = self.xRequest({ - 'task': self.mytask_raising.name, - 'kwargs': {}, - }) + job = self.xRequest( + name=self.mytask_raising.name, + kwargs={}, + ) job.execute() self.assertTrue(job.acknowledged) job.execute() @@ -555,10 +566,10 @@ def test_on_accepted_terminates(self): def test_on_success_acks_early(self): job = self.xRequest() job.time_start = 1 - job.on_success(42) + job.on_success((0, 42, 0.001)) prev, module._does_info = module._does_info, False try: - job.on_success(42) + job.on_success((0, 42, 0.001)) self.assertFalse(job.acknowledged) finally: module._does_info = prev @@ -570,7 +581,7 @@ def test_on_success_BaseException(self): try: raise SystemExit() except SystemExit: - job.on_success(ExceptionInfo()) + job.on_success((1, ExceptionInfo(), 0.01)) else: assert False @@ -579,7 +590,7 @@ def test_on_success_eventer(self): job.time_start = 1 job.eventer = Mock() job.eventer.send = Mock() - job.on_success(42) + job.on_success((0, 42, 0.001)) self.assertTrue(job.eventer.send.called) def test_on_success_when_failure(self): @@ -589,14 +600,14 @@ def test_on_success_when_failure(self): try: raise KeyError('foo') except Exception: - job.on_success(ExceptionInfo()) + job.on_success((1, ExceptionInfo(), 0.001)) self.assertTrue(job.on_failure.called) def test_on_success_acks_late(self): job = self.xRequest() job.time_start = 1 self.mytask.acks_late = True - job.on_success(42) + job.on_success((0, 42, 0.001)) self.assertTrue(job.acknowledged) def test_on_failure_WorkerLostError(self): @@ -634,15 +645,18 @@ def test_on_failure_acks_late(self): self.assertTrue(job.acknowledged) def test_from_message_invalid_kwargs(self): - body = dict(task=self.mytask.name, id=1, args=(), kwargs='foo') + m = TaskMessage(self.mytask.name, args=(), kwargs='foo') + req = Request(m, app=self.app) with self.assertRaises(InvalidTaskError): - Request(body, message=None, app=self.app) + raise req.execute().exception - @patch('celery.worker.job.error') - @patch('celery.worker.job.warn') + @patch('celery.worker.request.error') + @patch('celery.worker.request.warn') def test_on_timeout(self, warn, error): job = self.xRequest() + job.acknowledge = Mock(name='ack') + job.task.acks_late = True job.on_timeout(soft=True, timeout=1337) self.assertIn('Soft time limit', warn.call_args[0][0]) job.on_timeout(soft=False, timeout=1337) @@ -650,6 +664,7 @@ def test_on_timeout(self, warn, error): self.assertEqual( self.mytask.backend.get_status(job.id), states.FAILURE, ) + job.acknowledge.assert_called_with() self.mytask.ignore_result = True job = self.xRequest() @@ -658,41 +673,71 @@ def test_on_timeout(self, warn, error): self.mytask.backend.get_status(job.id), states.PENDING, ) + job = self.xRequest() + job.acknowledge = Mock(name='ack') + job.task.acks_late = False + job.on_timeout(soft=True, timeout=1335) + self.assertFalse(job.acknowledge.called) + def test_fast_trace_task(self): from celery.app import trace setup_worker_optimizations(self.app) self.assertIs(trace.trace_task_ret, trace._fast_trace_task) + tid = uuid() + message = TaskMessage(self.mytask.name, tid, args=[4]) + assert len(message.payload) == 3 try: self.mytask.__trace__ = build_tracer( self.mytask.name, self.mytask, self.app.loader, 'test', app=self.app, ) - res = trace.trace_task_ret(self.mytask.name, uuid(), [4], {}) - self.assertEqual(res, 4 ** 4) + failed, res, runtime = trace.trace_task_ret( + self.mytask.name, tid, message.headers, message.body, + message.content_type, message.content_encoding) + self.assertFalse(failed) + self.assertEqual(res, repr(4 ** 4)) + self.assertIsNotNone(runtime) + self.assertIsInstance(runtime, numbers.Real) finally: reset_worker_optimizations() self.assertIs(trace.trace_task_ret, trace._trace_task_ret) delattr(self.mytask, '__trace__') - res = trace.trace_task_ret( - self.mytask.name, uuid(), [4], {}, app=self.app, + failed, res, runtime = trace.trace_task_ret( + self.mytask.name, tid, message.headers, message.body, + message.content_type, message.content_encoding, app=self.app, ) - self.assertEqual(res, 4 ** 4) + self.assertFalse(failed) + self.assertEqual(res, repr(4 ** 4)) + self.assertIsNotNone(runtime) + self.assertIsInstance(runtime, numbers.Real) def test_trace_task_ret(self): self.mytask.__trace__ = build_tracer( self.mytask.name, self.mytask, self.app.loader, 'test', app=self.app, ) - res = _trace_task_ret(self.mytask.name, uuid(), [4], {}, app=self.app) - self.assertEqual(res, 4 ** 4) + tid = uuid() + message = TaskMessage(self.mytask.name, tid, args=[4]) + _, R, _ = _trace_task_ret( + self.mytask.name, tid, message.headers, + message.body, message.content_type, + message.content_encoding, app=self.app, + ) + self.assertEqual(R, repr(4 ** 4)) def test_trace_task_ret__no_trace(self): try: delattr(self.mytask, '__trace__') except AttributeError: pass - res = _trace_task_ret(self.mytask.name, uuid(), [4], {}, app=self.app) - self.assertEqual(res, 4 ** 4) + tid = uuid() + message = TaskMessage(self.mytask.name, tid, args=[4]) + _, R, _ = _trace_task_ret( + self.mytask.name, tid, message.headers, + message.body, message.content_type, + message.content_encoding, app=self.app, + ) + self.assertEqual(R, repr(4 ** 4)) def test_trace_catches_exception(self): @@ -705,7 +750,7 @@ def raising(): with self.assertWarnsRegex(RuntimeWarning, r'Exception raised outside'): - res = trace_task(raising, uuid(), [], {}, app=self.app) + res = trace_task(raising, uuid(), [], {}, app=self.app)[0] self.assertIsInstance(res, ExceptionInfo) def test_worker_task_trace_handle_retry(self): @@ -718,11 +763,15 @@ def test_worker_task_trace_handle_retry(self): raise Retry(str(exc), exc=exc) except Retry as exc: w = TraceInfo(states.RETRY, exc) - w.handle_retry(self.mytask, store_errors=False) + w.handle_retry( + self.mytask, self.mytask.request, store_errors=False, + ) self.assertEqual( self.mytask.backend.get_status(tid), states.PENDING, ) - w.handle_retry(self.mytask, store_errors=True) + w.handle_retry( + self.mytask, self.mytask.request, store_errors=True, + ) self.assertEqual( self.mytask.backend.get_status(tid), states.RETRY, ) @@ -738,82 +787,54 @@ def test_worker_task_trace_handle_failure(self): raise ValueError('foo') except Exception as exc: w = TraceInfo(states.FAILURE, exc) - w.handle_failure(self.mytask, store_errors=False) + w.handle_failure( + self.mytask, self.mytask.request, store_errors=False, + ) self.assertEqual( self.mytask.backend.get_status(tid), states.PENDING, ) - w.handle_failure(self.mytask, store_errors=True) + w.handle_failure( + self.mytask, self.mytask.request, store_errors=True, + ) self.assertEqual( self.mytask.backend.get_status(tid), states.FAILURE, ) finally: self.mytask.pop_request() - def test_task_wrapper_mail_attrs(self): - job = self.xRequest({'args': [], 'kwargs': {}}) - x = job.success_msg % { - 'name': job.name, - 'id': job.id, - 'return_value': 10, - 'runtime': 0.3641, - } - self.assertTrue(x) - x = job.error_msg % { - 'name': job.name, - 'id': job.id, - 'exc': 'FOOBARBAZ', - 'description': 'raised unexpected', - 'traceback': 'foobarbaz', - } - self.assertTrue(x) - def test_from_message(self): us = 'æØåveéðƒeæ' - body = {'task': self.mytask.name, 'id': uuid(), - 'args': [2], 'kwargs': {us: 'bar'}} - m = Message(None, body=anyjson.dumps(body), backend='foo', - content_type='application/json', - content_encoding='utf-8') - job = Request(m.decode(), message=m, app=self.app) + tid = uuid() + m = TaskMessage(self.mytask.name, tid, args=[2], kwargs={us: 'bar'}) + job = Request(m, app=self.app) self.assertIsInstance(job, Request) - self.assertEqual(job.name, body['task']) - self.assertEqual(job.id, body['id']) - self.assertEqual(job.args, body['args']) - us = from_utf8(us) - if sys.version_info < (2, 6): - self.assertEqual(next(keys(job.kwargs)), us) - self.assertIsInstance(next(keys(job.kwargs)), str) + self.assertEqual(job.name, self.mytask.name) + self.assertEqual(job.id, tid) + self.assertIs(job.message, m) def test_from_message_empty_args(self): - body = {'task': self.mytask.name, 'id': uuid()} - m = Message(None, body=anyjson.dumps(body), backend='foo', - content_type='application/json', - content_encoding='utf-8') - job = Request(m.decode(), message=m, app=self.app) + tid = uuid() + m = TaskMessage(self.mytask.name, tid, args=[], kwargs={}) + job = Request(m, app=self.app) self.assertIsInstance(job, Request) - self.assertEqual(job.args, []) - self.assertEqual(job.kwargs, {}) def test_from_message_missing_required_fields(self): - body = {} - m = Message(None, body=anyjson.dumps(body), backend='foo', - content_type='application/json', - content_encoding='utf-8') + m = TaskMessage(self.mytask.name) + m.headers.clear() with self.assertRaises(KeyError): - Request(m.decode(), message=m, app=self.app) + Request(m, app=self.app) def test_from_message_nonexistant_task(self): - body = {'task': 'cu.mytask.doesnotexist', 'id': uuid(), - 'args': [2], 'kwargs': {'æØåveéðƒeæ': 'bar'}} - m = Message(None, body=anyjson.dumps(body), backend='foo', - content_type='application/json', - content_encoding='utf-8') + m = TaskMessage( + 'cu.mytask.doesnotexist', + args=[2], kwargs={'æØåveéðƒeæ': 'bar'}, + ) with self.assertRaises(KeyError): - Request(m.decode(), message=m, app=self.app) + Request(m, app=self.app) def test_execute(self): tid = uuid() - job = self.xRequest({'id': tid, 'args': [4], 'kwargs': {}}) + job = self.xRequest(id=tid, args=[4], kwargs={}) self.assertEqual(job.execute(), 256) meta = self.mytask.backend.get_task_meta(tid) self.assertEqual(meta['status'], states.SUCCESS) @@ -826,38 +847,17 @@ def mytask_no_kwargs(i): return i ** i tid = uuid() - job = self.xRequest({ - 'task': mytask_no_kwargs.name, - 'id': tid, - 'args': [4], - 'kwargs': {}, - }) + job = self.xRequest( + name=mytask_no_kwargs.name, + id=tid, + args=[4], + kwargs={}, + ) self.assertEqual(job.execute(), 256) meta = mytask_no_kwargs.backend.get_task_meta(tid) self.assertEqual(meta['result'], 256) self.assertEqual(meta['status'], states.SUCCESS) - def test_execute_success_some_kwargs(self): - scratch = {'task_id': None} - - @self.app.task(shared=False, accept_magic_kwargs=True) - def mytask_some_kwargs(i, task_id): - scratch['task_id'] = task_id - return i ** i - - tid = uuid() - job = self.xRequest({ - 'task': mytask_some_kwargs.name, - 'id': tid, - 'args': [4], - 'kwargs': {}, - }) - self.assertEqual(job.execute(), 256) - meta = mytask_some_kwargs.backend.get_task_meta(tid) - self.assertEqual(scratch.get('task_id'), tid) - self.assertEqual(meta['result'], 256) - self.assertEqual(meta['status'], states.SUCCESS) - def test_execute_ack(self): scratch = {'ACK': False} @@ -865,7 +865,7 @@ def on_ack(*args, **kwargs): scratch['ACK'] = True tid = uuid() - job = self.xRequest({'id': tid, 'args': [4]}, on_ack=on_ack) + job = self.xRequest(id=tid, args=[4], on_ack=on_ack) self.assertEqual(job.execute(), 256) meta = self.mytask.backend.get_task_meta(tid) self.assertTrue(scratch['ACK']) @@ -874,20 +874,21 @@ def on_ack(*args, **kwargs): def test_execute_fail(self): tid = uuid() - job = self.xRequest({ - 'task': self.mytask_raising.name, - 'id': tid, - 'args': [4], - 'kwargs': {}, - }) + job = self.xRequest( + name=self.mytask_raising.name, + id=tid, + args=[4], + kwargs={}, + ) self.assertIsInstance(job.execute(), ExceptionInfo) + assert self.mytask_raising.backend.serializer == 'pickle' meta = self.mytask_raising.backend.get_task_meta(tid) self.assertEqual(meta['status'], states.FAILURE) self.assertIsInstance(meta['result'], KeyError) def test_execute_using_pool(self): tid = uuid() - job = self.xRequest({'id': tid, 'args': [4]}) + job = self.xRequest(id=tid, args=[4]) class MockPool(BasePool): target = None @@ -908,55 +909,165 @@ def apply_async(self, target, args=None, kwargs=None, self.assertTrue(p.target) self.assertEqual(p.args[0], self.mytask.name) self.assertEqual(p.args[1], tid) - self.assertEqual(p.args[2], [4]) - self.assertIn('f', p.args[3]) - self.assertIn([4], p.args) + self.assertEqual(p.args[3], job.message.body) - job.task.accept_magic_kwargs = False - job.execute_using_pool(p) - - def test_default_kwargs(self): - self.maxDiff = 3000 - tid = uuid() - job = self.xRequest({'id': tid, 'args': [4]}) - self.assertDictEqual( - job.extend_with_default_kwargs(), { - 'f': 'x', - 'logfile': None, - 'loglevel': None, - 'task_id': job.id, - 'task_retries': 0, - 'task_is_eager': False, - 'delivery_info': { - 'exchange': None, - 'routing_key': None, - 'priority': 0, - 'redelivered': False, - }, - 'task_name': job.name}) - - @patch('celery.worker.job.logger') - def _test_on_failure(self, exception, logger): - app = self.app + def _test_on_failure(self, exception, **kwargs): tid = uuid() - job = self.xRequest({'id': tid, 'args': [4]}) + job = self.xRequest(id=tid, args=[4]) + job.send_event = Mock(name='send_event') + job.task.backend.mark_as_failure = Mock(name='mark_as_failure') try: raise exception - except Exception: + except type(exception): exc_info = ExceptionInfo() - app.conf.CELERY_SEND_TASK_ERROR_EMAILS = True - job.on_failure(exc_info) - self.assertTrue(logger.log.called) - context = logger.log.call_args[0][2] - self.assertEqual(self.mytask.name, context['name']) - self.assertIn(tid, context['id']) + job.on_failure(exc_info, **kwargs) + self.assertTrue(job.send_event.called) + return job def test_on_failure(self): self._test_on_failure(Exception('Inside unit tests')) - def test_on_failure_unicode_exception(self): + def test_on_failure__unicode_exception(self): self._test_on_failure(Exception('Бобры атакуют')) - def test_on_failure_utf8_exception(self): + def test_on_failure__utf8_exception(self): self._test_on_failure(Exception( from_utf8('Бобры атакуют'))) + + def test_on_failure__WorkerLostError(self): + exc = WorkerLostError() + job = self._test_on_failure(exc) + job.task.backend.mark_as_failure.assert_called_with( + job.id, exc, request=job, store_result=True, + ) + + def test_on_failure__return_ok(self): + self._test_on_failure(KeyError(), return_ok=True) + + def test_reject(self): + job = self.xRequest(id=uuid()) + job.on_reject = Mock(name='on_reject') + job.acknowleged = False + job.reject(requeue=True) + job.on_reject.assert_called_with( + req_logger, job.connection_errors, True, + ) + self.assertTrue(job.acknowledged) + job.on_reject.reset_mock() + job.reject(requeue=True) + self.assertFalse(job.on_reject.called) + + def test_group(self): + gid = uuid() + job = self.xRequest(id=uuid(), group=gid) + self.assertEqual(job.group, gid) + + +class test_create_request_class(RequestCase): + + def setup(self): + RequestCase.setup(self) + self.task = Mock(name='task') + self.pool = Mock(name='pool') + self.eventer = Mock(name='eventer') + + def create_request_cls(self, **kwargs): + return create_request_cls( + Request, self.task, self.pool, 'foo', self.eventer, **kwargs + ) + + def zRequest(self, Request=None, revoked_tasks=None, ref=None, **kwargs): + return self.xRequest( + Request=Request or self.create_request_cls( + ref=ref, + revoked_tasks=revoked_tasks, + ), + **kwargs) + + def test_on_success(self): + self.zRequest(id=uuid()).on_success((False, "hey", 3.1222)) + + def test_on_success__SystemExit(self, + errors=(SystemExit, KeyboardInterrupt)): + for exc in errors: + einfo = None + try: + raise exc() + except exc: + einfo = ExceptionInfo() + with self.assertRaises(exc): + self.zRequest(id=uuid()).on_success((True, einfo, 1.0)) + + def test_on_success__calls_failure(self): + job = self.zRequest(id=uuid()) + einfo = Mock(name='einfo') + job.on_failure = Mock(name='on_failure') + job.on_success((True, einfo, 1.0)) + job.on_failure.assert_called_with(einfo, return_ok=True) + + def test_on_success__acks_late_enabled(self): + self.task.acks_late = True + job = self.zRequest(id=uuid()) + job.acknowledge = Mock(name='ack') + job.on_success((False, 'foo', 1.0)) + job.acknowledge.assert_called_with() + + def test_on_success__acks_late_disabled(self): + self.task.acks_late = False + job = self.zRequest(id=uuid()) + job.acknowledge = Mock(name='ack') + job.on_success((False, 'foo', 1.0)) + self.assertFalse(job.acknowledge.called) + + def test_on_success__no_events(self): + self.eventer = None + job = self.zRequest(id=uuid()) + job.send_event = Mock(name='send_event') + job.on_success((False, 'foo', 1.0)) + self.assertFalse(job.send_event.called) + + def test_on_success__with_events(self): + job = self.zRequest(id=uuid()) + job.send_event = Mock(name='send_event') + job.on_success((False, 'foo', 1.0)) + job.send_event.assert_called_with( + 'task-succeeded', result='foo', runtime=1.0, + ) + + def test_execute_using_pool__revoked(self): + tid = uuid() + job = self.zRequest(id=tid, revoked_tasks={tid}) + job.revoked = Mock() + job.revoked.return_value = True + with self.assertRaises(TaskRevokedError): + job.execute_using_pool(self.pool) + + def test_execute_using_pool__expired(self): + tid = uuid() + job = self.zRequest(id=tid, revoked_tasks=set()) + job.expires = 1232133 + job.revoked = Mock() + job.revoked.return_value = True + with self.assertRaises(TaskRevokedError): + job.execute_using_pool(self.pool) + + def test_execute_using_pool(self): + from celery.app.trace import trace_task_ret as trace + weakref_ref = Mock(name='weakref.ref') + job = self.zRequest(id=uuid(), revoked_tasks=set(), ref=weakref_ref) + job.execute_using_pool(self.pool) + self.pool.apply_async.assert_called_with( + trace, + args=(job.type, job.id, job.request_dict, job.body, + job.content_type, job.content_encoding), + accept_callback=job.on_accepted, + timeout_callback=job.on_timeout, + callback=job.on_success, + error_callback=job.on_failure, + soft_timeout=self.task.soft_time_limit, + timeout=self.task.time_limit, + correlation_id=job.id, + ) + self.assertTrue(job._apply_result) + weakref_ref.assert_called_with(self.pool.apply_async()) + self.assertIs(job._apply_result, weakref_ref()) diff --git a/celery/tests/worker/test_state.py b/celery/tests/worker/test_state.py index ede9a00a1..707fb1fe8 100644 --- a/celery/tests/worker/test_state.py +++ b/celery/tests/worker/test_state.py @@ -48,13 +48,42 @@ class MyPersistent(state.Persistent): class test_maybe_shutdown(AppCase): def teardown(self): - state.should_stop = False - state.should_terminate = False + state.should_stop = None + state.should_terminate = None def test_should_stop(self): state.should_stop = True with self.assertRaises(WorkerShutdown): state.maybe_shutdown() + state.should_stop = 0 + with self.assertRaises(WorkerShutdown): + state.maybe_shutdown() + state.should_stop = False + try: + state.maybe_shutdown() + except SystemExit: + raise RuntimeError('should not have exited') + state.should_stop = None + try: + state.maybe_shutdown() + except SystemExit: + raise RuntimeError('should not have exited') + + state.should_stop = 0 + try: + state.maybe_shutdown() + except SystemExit as exc: + self.assertEqual(exc.code, 0) + else: + raise RuntimeError('should have exited') + + state.should_stop = 303 + try: + state.maybe_shutdown() + except SystemExit as exc: + self.assertEqual(exc.code, 303) + else: + raise RuntimeError('should have exited') def test_should_terminate(self): state.should_terminate = True diff --git a/celery/tests/worker/test_strategy.py b/celery/tests/worker/test_strategy.py index 7edf78bff..143bed25c 100644 --- a/celery/tests/worker/test_strategy.py +++ b/celery/tests/worker/test_strategy.py @@ -5,13 +5,57 @@ from kombu.utils.limits import TokenBucket +from celery.exceptions import InvalidTaskError from celery.worker import state +from celery.worker.strategy import proto1_to_proto2 from celery.utils.timeutils import rate -from celery.tests.case import AppCase, Mock, patch, body_from_sig +from celery.tests.case import ( + AppCase, Mock, TaskMessage, TaskMessage1, patch, task_message_from_sig, +) -class test_default_strategy(AppCase): +class test_proto1_to_proto2(AppCase): + + def setup(self): + self.message = Mock(name='message') + self.body = { + 'args': (1,), + 'kwargs': {'foo': 'baz'}, + 'utc': False, + 'taskset': '123', + } + + def test_message_without_args(self): + self.body.pop('args') + with self.assertRaises(InvalidTaskError): + proto1_to_proto2(self.message, self.body) + + def test_message_without_kwargs(self): + self.body.pop('kwargs') + with self.assertRaises(InvalidTaskError): + proto1_to_proto2(self.message, self.body) + + def test_message_kwargs_not_mapping(self): + self.body['kwargs'] = (2,) + with self.assertRaises(InvalidTaskError): + proto1_to_proto2(self.message, self.body) + + def test_message_no_taskset_id(self): + self.body.pop('taskset') + self.assertTrue(proto1_to_proto2(self.message, self.body)) + + def test_message(self): + body, headers, decoded, utc = proto1_to_proto2(self.message, self.body) + self.assertTupleEqual(body, ((1,), {'foo': 'baz'}, { + 'callbacks': None, 'errbacks': None, 'chord': None, 'chain': None, + })) + self.assertDictEqual(headers, dict(self.body, group='123')) + self.assertTrue(decoded) + self.assertFalse(utc) + + +class test_default_strategy_proto2(AppCase): def setup(self): @self.app.task(shared=False) @@ -20,20 +64,27 @@ def add(x, y): self.add = add + def get_message_class(self): + return TaskMessage + + def prepare_message(self, message): + return message + class Context(object): - def __init__(self, sig, s, reserved, consumer, message, body): + def __init__(self, sig, s, reserved, consumer, message): self.sig = sig self.s = s self.reserved = reserved self.consumer = consumer self.message = message - self.body = body - def __call__(self, **kwargs): + def __call__(self, callbacks=[], **kwargs): return self.s( - self.message, self.body, - self.message.ack, self.message.reject, [], **kwargs + self.message, + (self.message.payload + if not self.message.headers.get('id') else None), + self.message.ack, self.message.reject, callbacks, **kwargs ) def was_reserved(self): @@ -71,15 +122,17 @@ def _context(self, sig, if limit: bucket = TokenBucket(rate(limit), capacity=1) consumer.task_buckets[sig.task] = bucket + consumer.controller.state.revoked = set() consumer.disable_rate_limits = not rate_limits consumer.event_dispatcher.enabled = events s = sig.type.start_strategy(self.app, consumer, task_reserved=reserved) self.assertTrue(s) - message = Mock() - body = body_from_sig(self.app, sig, utc=utc) - - yield self.Context(sig, s, reserved, consumer, message, body) + message = task_message_from_sig( + self.app, sig, utc=utc, TaskMessage=self.get_message_class(), + ) + message = self.prepare_message(message) + yield self.Context(sig, s, reserved, consumer, message) def test_when_logging_disabled(self): with patch('celery.worker.strategy.logger') as logger: @@ -96,6 +149,14 @@ def test_task_strategy(self): C.consumer.on_task_request.assert_called_with(req) self.assertTrue(C.event_sent()) + def test_callbacks(self): + with self._context(self.add.s(2, 2)) as C: + callbacks = [Mock(name='cb1'), Mock(name='cb2')] + C(callbacks=callbacks) + req = C.get_request() + for callback in callbacks: + callback.assert_called_with(req) + def test_when_events_disabled(self): with self._context(self.add.s(2, 2), events=False) as C: C() @@ -129,11 +190,28 @@ def test_when_rate_limited__limits_disabled(self): def test_when_revoked(self): task = self.add.s(2, 2) task.freeze() - state.revoked.add(task.id) try: with self._context(task) as C: + C.consumer.controller.state.revoked.add(task.id) + state.revoked.add(task.id) C() with self.assertRaises(ValueError): C.get_request() finally: state.revoked.discard(task.id) + + +class test_default_strategy_proto1(test_default_strategy_proto2): + + def get_message_class(self): + return TaskMessage1 + + +class test_default_strategy_proto1__no_utc(test_default_strategy_proto2): + + def get_message_class(self): + return TaskMessage1 + + def prepare_message(self, message): + message.payload['utc'] = False + return message diff --git a/celery/tests/worker/test_worker.py b/celery/tests/worker/test_worker.py index b700a6ca6..dcfc06336 100644 --- a/celery/tests/worker/test_worker.py +++ b/celery/tests/worker/test_worker.py @@ -2,6 +2,7 @@ import os import socket +import sys from collections import deque from datetime import datetime, timedelta @@ -12,31 +13,31 @@ from kombu.common import QoS, ignore_errors from kombu.transport.base import Message -from celery.app.defaults import DEFAULTS -from celery.bootsteps import RUN, CLOSE, StartStopStep +from celery.bootsteps import RUN, CLOSE, TERMINATE, StartStopStep from celery.concurrency.base import BasePool -from celery.datastructures import AttributeDict from celery.exceptions import ( WorkerShutdown, WorkerTerminate, TaskRevokedError, + InvalidTaskError, ImproperlyConfigured, ) from celery.five import Empty, range, Queue as FastQueue +from celery.platforms import EX_FAILURE from celery.utils import uuid from celery.worker import components from celery.worker import consumer from celery.worker.consumer import Consumer as __Consumer -from celery.worker.job import Request +from celery.worker.request import Request from celery.utils import worker_direct from celery.utils.serialization import pickle from celery.utils.timer2 import Timer -from celery.tests.case import AppCase, Mock, SkipTest, patch, restore_logging +from celery.tests.case import AppCase, Mock, SkipTest, TaskMessage, patch def MockStep(step=None): step = Mock() if step is None else step step.blueprint = Mock() step.blueprint.name = 'MockNS' - step.name = 'MockStep(%s)' % (id(step), ) + step.name = 'MockStep(%s)' % (id(step),) return step @@ -48,7 +49,7 @@ def mock_event_dispatcher(): class PlaceHolder(object): - pass + pass def find_step(obj, typ): @@ -61,6 +62,7 @@ def __init__(self, *args, **kwargs): kwargs.setdefault('without_mingle', True) # disable Mingle step kwargs.setdefault('without_gossip', True) # disable Gossip step kwargs.setdefault('without_heartbeat', True) # disable Heart step + kwargs.setdefault('controller', Mock()) super(Consumer, self).__init__(*args, **kwargs) @@ -70,6 +72,7 @@ class _MyKombuConsumer(Consumer): def __init__(self, *args, **kwargs): kwargs.setdefault('pool', BasePool(2)) + kwargs.setdefault('controller', Mock()) super(_MyKombuConsumer, self).__init__(*args, **kwargs) def restart_heartbeat(self): @@ -123,6 +126,13 @@ def create_message(channel, **data): return m +def create_task_message(channel, *args, **kwargs): + m = TaskMessage(*args, **kwargs) + m.channel = channel + m.delivery_info = {'consumer_tag': 'mock'} + return m + + class test_Consumer(AppCase): def setup(self): @@ -144,7 +154,7 @@ def test_info(self): l.connection = Mock() l.connection.info.return_value = {'foo': 'bar'} l.controller = l.app.WorkController() - l.controller.pool = Mock() + l.pool = l.controller.pool = Mock() l.controller.pool.info.return_value = [Mock(), Mock()] l.controller.consumer = l info = l.controller.stats() @@ -158,6 +168,8 @@ def test_start_when_closed(self): def test_connection(self): l = MyKombuConsumer(self.buffer.put, timer=self.timer, app=self.app) + l.controller = l.app.WorkController() + l.pool = l.controller.pool = Mock() l.blueprint.start(l) self.assertIsInstance(l.connection, Connection) @@ -202,55 +214,64 @@ def test_close_connection(self): self.assertTrue(eventer.close.call_count) self.assertTrue(heart.closed) - @patch('celery.worker.consumer.warn') + @patch('celery.worker.consumer.consumer.warn') def test_receive_message_unknown(self, warn): l = _MyKombuConsumer(self.buffer.put, timer=self.timer, app=self.app) l.blueprint.state = RUN l.steps.pop() - backend = Mock() - m = create_message(backend, unknown={'baz': '!!!'}) + channel = Mock() + m = create_message(channel, unknown={'baz': '!!!'}) l.event_dispatcher = mock_event_dispatcher() l.node = MockNode() callback = self._get_on_message(l) - callback(m.decode(), m) + callback(m) self.assertTrue(warn.call_count) @patch('celery.worker.strategy.to_timestamp') def test_receive_message_eta_OverflowError(self, to_timestamp): to_timestamp.side_effect = OverflowError() l = _MyKombuConsumer(self.buffer.put, timer=self.timer, app=self.app) + l.controller = l.app.WorkController() + l.pool = l.controller.pool = Mock() l.blueprint.state = RUN l.steps.pop() - m = create_message(Mock(), task=self.foo_task.name, - args=('2, 2'), - kwargs={}, - eta=datetime.now().isoformat()) + m = create_task_message( + Mock(), self.foo_task.name, + args=('2, 2'), kwargs={}, + eta=datetime.now().isoformat(), + ) l.event_dispatcher = mock_event_dispatcher() l.node = MockNode() l.update_strategies() l.qos = Mock() callback = self._get_on_message(l) - callback(m.decode(), m) + callback(m) self.assertTrue(m.acknowledged) - @patch('celery.worker.consumer.error') + @patch('celery.worker.consumer.consumer.error') def test_receive_message_InvalidTaskError(self, error): l = _MyKombuConsumer(self.buffer.put, timer=self.timer, app=self.app) l.blueprint.state = RUN l.event_dispatcher = mock_event_dispatcher() l.steps.pop() - m = create_message(Mock(), task=self.foo_task.name, - args=(1, 2), kwargs='foobarbaz', id=1) + l.controller = l.app.WorkController() + l.pool = l.controller.pool = Mock() + m = create_task_message( + Mock(), self.foo_task.name, + args=(1, 2), kwargs='foobarbaz', id=1) l.update_strategies() l.event_dispatcher = mock_event_dispatcher() + strat = l.strategies[self.foo_task.name] = Mock(name='strategy') + strat.side_effect = InvalidTaskError() callback = self._get_on_message(l) - callback(m.decode(), m) + callback(m) + self.assertTrue(error.called) self.assertIn('Received invalid task message', error.call_args[0][0]) - @patch('celery.worker.consumer.crit') + @patch('celery.worker.consumer.consumer.crit') def test_on_decode_error(self, crit): l = Consumer(self.buffer.put, timer=self.timer, app=self.app) @@ -274,18 +295,22 @@ def _get_on_message(self, l): with self.assertRaises(WorkerShutdown): l.loop(*l.loop_args()) - self.assertTrue(l.task_consumer.register_callback.called) - return l.task_consumer.register_callback.call_args[0][0] + self.assertTrue(l.task_consumer.on_message) + return l.task_consumer.on_message def test_receieve_message(self): l = Consumer(self.buffer.put, timer=self.timer, app=self.app) + l.controller = l.app.WorkController() + l.pool = l.controller.pool = Mock() l.blueprint.state = RUN l.event_dispatcher = mock_event_dispatcher() - m = create_message(Mock(), task=self.foo_task.name, - args=[2, 4, 8], kwargs={}) + m = create_task_message( + Mock(), self.foo_task.name, + args=[2, 4, 8], kwargs={}, + ) l.update_strategies() callback = self._get_on_message(l) - callback(m.decode(), m) + callback(m) in_bucket = self.buffer.get_nowait() self.assertIsInstance(in_bucket, Request) @@ -306,7 +331,9 @@ def loop(self, *args, **kwargs): l = MockConsumer(self.buffer.put, timer=self.timer, send_events=False, pool=BasePool(), app=self.app) - l.channel_errors = (KeyError, ) + l.controller = l.app.WorkController() + l.pool = l.controller.pool = Mock() + l.channel_errors = (KeyError,) with self.assertRaises(KeyError): l.start() l.timer.stop() @@ -324,14 +351,16 @@ def loop(self, *args, **kwargs): l = MockConsumer(self.buffer.put, timer=self.timer, send_events=False, pool=BasePool(), app=self.app) + l.controller = l.app.WorkController() + l.pool = l.controller.pool = Mock() - l.connection_errors = (KeyError, ) + l.connection_errors = (KeyError,) self.assertRaises(SyntaxError, l.start) l.timer.stop() def test_loop_ignores_socket_timeout(self): - class Connection(self.app.connection().__class__): + class Connection(self.app.connection_for_read().__class__): obj = None def drain_events(self, **kwargs): @@ -347,7 +376,7 @@ def drain_events(self, **kwargs): def test_loop_when_socket_error(self): - class Connection(self.app.connection().__class__): + class Connection(self.app.connection_for_read().__class__): obj = None def drain_events(self, **kwargs): @@ -369,7 +398,7 @@ def drain_events(self, **kwargs): def test_loop(self): - class Connection(self.app.connection().__class__): + class Connection(self.app.connection_for_read().__class__): obj = None def drain_events(self, **kwargs): @@ -395,8 +424,8 @@ def drain_events(self, **kwargs): def test_ignore_errors(self): l = MyKombuConsumer(self.buffer.put, timer=self.timer, app=self.app) - l.connection_errors = (AttributeError, KeyError, ) - l.channel_errors = (SyntaxError, ) + l.connection_errors = (AttributeError, KeyError,) + l.channel_errors = (SyntaxError,) ignore_errors(l, Mock(side_effect=AttributeError('foo'))) ignore_errors(l, Mock(side_effect=KeyError('foo'))) ignore_errors(l, Mock(side_effect=SyntaxError('foo'))) @@ -406,6 +435,8 @@ def test_ignore_errors(self): def test_apply_eta_task(self): from celery.worker import state l = MyKombuConsumer(self.buffer.put, timer=self.timer, app=self.app) + l.controller = l.app.WorkController() + l.pool = l.controller.pool = Mock() l.qos = QoS(None, 10) task = object() @@ -417,10 +448,12 @@ def test_apply_eta_task(self): def test_receieve_message_eta_isoformat(self): l = _MyKombuConsumer(self.buffer.put, timer=self.timer, app=self.app) + l.controller = l.app.WorkController() + l.pool = l.controller.pool = Mock() l.blueprint.state = RUN l.steps.pop() - m = create_message( - Mock(), task=self.foo_task.name, + m = create_task_message( + Mock(), self.foo_task.name, eta=(datetime.now() + timedelta(days=1)).isoformat(), args=[2, 4, 8], kwargs={}, ) @@ -432,7 +465,7 @@ def test_receieve_message_eta_isoformat(self): l.enabled = False l.update_strategies() callback = self._get_on_message(l) - callback(m.decode(), m) + callback(m) l.timer.stop() l.timer.join(1) @@ -469,51 +502,61 @@ def test_revoke(self): l = _MyKombuConsumer(self.buffer.put, timer=self.timer, app=self.app) l.blueprint.state = RUN l.steps.pop() - backend = Mock() + channel = Mock() id = uuid() - t = create_message(backend, task=self.foo_task.name, args=[2, 4, 8], - kwargs={}, id=id) + t = create_task_message( + channel, self.foo_task.name, + args=[2, 4, 8], kwargs={}, id=id, + ) from celery.worker.state import revoked revoked.add(id) callback = self._get_on_message(l) - callback(t.decode(), t) + callback(t) self.assertTrue(self.buffer.empty()) def test_receieve_message_not_registered(self): l = _MyKombuConsumer(self.buffer.put, timer=self.timer, app=self.app) l.blueprint.state = RUN l.steps.pop() - backend = Mock() - m = create_message(backend, task='x.X.31x', args=[2, 4, 8], kwargs={}) + channel = Mock(name='channel') + m = create_task_message( + channel, 'x.X.31x', args=[2, 4, 8], kwargs={}, + ) l.event_dispatcher = mock_event_dispatcher() callback = self._get_on_message(l) - self.assertFalse(callback(m.decode(), m)) + self.assertFalse(callback(m)) with self.assertRaises(Empty): self.buffer.get_nowait() self.assertTrue(self.timer.empty()) - @patch('celery.worker.consumer.warn') - @patch('celery.worker.consumer.logger') + @patch('celery.worker.consumer.consumer.warn') + @patch('celery.worker.consumer.consumer.logger') def test_receieve_message_ack_raises(self, logger, warn): l = Consumer(self.buffer.put, timer=self.timer, app=self.app) + l.controller = l.app.WorkController() + l.pool = l.controller.pool = Mock() l.blueprint.state = RUN - backend = Mock() - m = create_message(backend, args=[2, 4, 8], kwargs={}) + channel = Mock() + m = create_task_message( + channel, self.foo_task.name, + args=[2, 4, 8], kwargs={}, + ) + m.headers = None l.event_dispatcher = mock_event_dispatcher() - l.connection_errors = (socket.error, ) + l.update_strategies() + l.connection_errors = (socket.error,) m.reject = Mock() m.reject.side_effect = socket.error('foo') callback = self._get_on_message(l) - self.assertFalse(callback(m.decode(), m)) + self.assertFalse(callback(m)) self.assertTrue(warn.call_count) with self.assertRaises(Empty): self.buffer.get_nowait() self.assertTrue(self.timer.empty()) - m.reject.assert_called_with(requeue=False) - self.assertTrue(logger.critical.call_count) + m.reject_log_error.assert_called_with(logger, l.connection_errors) def test_receive_message_eta(self): import sys @@ -526,13 +569,15 @@ def pp(*args, **kwargs): pp('TEST RECEIVE MESSAGE ETA') pp('+CREATE MYKOMBUCONSUMER') l = _MyKombuConsumer(self.buffer.put, timer=self.timer, app=self.app) + l.controller = l.app.WorkController() + l.pool = l.controller.pool = Mock() pp('-CREATE MYKOMBUCONSUMER') l.steps.pop() l.event_dispatcher = mock_event_dispatcher() - backend = Mock() + channel = Mock(name='channel') pp('+ CREATE MESSAGE') - m = create_message( - backend, task=self.foo_task.name, + m = create_task_message( + channel, self.foo_task.name, args=[2, 4, 8], kwargs={}, eta=(datetime.now() + timedelta(days=1)).isoformat(), ) @@ -542,12 +587,12 @@ def pp(*args, **kwargs): pp('+ BLUEPRINT START 1') l.blueprint.start(l) pp('- BLUEPRINT START 1') - p = l.app.conf.BROKER_CONNECTION_RETRY - l.app.conf.BROKER_CONNECTION_RETRY = False + p = l.app.conf.broker_connection_retry + l.app.conf.broker_connection_retry = False pp('+ BLUEPRINT START 2') l.blueprint.start(l) pp('- BLUEPRINT START 2') - l.app.conf.BROKER_CONNECTION_RETRY = p + l.app.conf.broker_connection_retry = p pp('+ BLUEPRINT RESTART') l.blueprint.restart(l) pp('- BLUEPRINT RESTART') @@ -556,7 +601,7 @@ def pp(*args, **kwargs): callback = self._get_on_message(l) pp('- GET ON MESSAGE') pp('+ CALLBACK') - callback(m.decode(), m) + callback(m) pp('- CALLBACK') finally: pp('+ STOP TIMER') @@ -586,7 +631,7 @@ def test_reset_pidbox_node(self): chan = con.node.channel = Mock() l.connection = Mock() chan.close.side_effect = socket.error('foo') - l.connection_errors = (socket.error, ) + l.connection_errors = (socket.error,) con.reset() chan.close.assert_called_with() @@ -671,7 +716,7 @@ def close(self): def test_connect_errback(self, sleep, connect): l = MyKombuConsumer(self.buffer.put, timer=self.timer, app=self.app) from kombu.transport.memory import Transport - Transport.connection_errors = (ChannelError, ) + Transport.connection_errors = (ChannelError,) def effect(): if connect.call_count > 1: @@ -708,6 +753,8 @@ def reset_connection(self): init_callback = Mock() l = _Consumer(self.buffer.put, timer=self.timer, init_callback=init_callback, app=self.app) + l.controller = l.app.WorkController() + l.pool = l.controller.pool = Mock() l.task_consumer = Mock() l.broadcast_consumer = Mock() l.qos = _QoS() @@ -730,6 +777,8 @@ def raises_KeyError(*args, **kwargs): init_callback.reset_mock() l = _Consumer(self.buffer.put, timer=self.timer, app=self.app, send_events=False, init_callback=init_callback) + l.controller = l.app.WorkController() + l.pool = l.controller.pool = Mock() l.qos = _QoS() l.task_consumer = Mock() l.broadcast_consumer = Mock() @@ -741,8 +790,9 @@ def raises_KeyError(*args, **kwargs): def test_reset_connection_with_no_node(self): l = Consumer(self.buffer.put, timer=self.timer, app=self.app) + l.controller = l.app.WorkController() + l.pool = l.controller.pool = Mock() l.steps.pop() - self.assertEqual(None, l.pool) l.blueprint.start(l) @@ -775,13 +825,24 @@ def test_on_consumer_ready(self): self.worker.on_consumer_ready(Mock()) def test_setup_queues_worker_direct(self): - self.app.conf.CELERY_WORKER_DIRECT = True + self.app.conf.worker_direct = True self.app.amqp.__dict__['queues'] = Mock() self.worker.setup_queues({}) self.app.amqp.queues.select_add.assert_called_with( worker_direct(self.worker.hostname), ) + def test_setup_queues__missing_queue(self): + self.app.amqp.queues.select = Mock(name='select') + self.app.amqp.queues.deselect = Mock(name='deselect') + self.app.amqp.queues.select.side_effect = KeyError() + self.app.amqp.queues.deselect.side_effect = KeyError() + with self.assertRaises(ImproperlyConfigured): + self.worker.setup_queues("x,y", exclude="foo,bar") + self.app.amqp.queues.select = Mock(name='select') + with self.assertRaises(ImproperlyConfigured): + self.worker.setup_queues("x,y", exclude="foo,bar") + def test_send_worker_shutdown(self): with patch('celery.signals.worker_shutdown') as ws: self.worker._send_worker_shutdown() @@ -815,7 +876,7 @@ def test_shutdown_no_blueprint(self): self.worker.blueprint = None self.worker._shutdown() - @patch('celery.platforms.create_pidlock') + @patch('celery.worker.create_pidlock') def test_use_pidfile(self, create_pidlock): create_pidlock.return_value = Mock() worker = self.create_worker(pidfile='pidfilelockfilepid') @@ -825,47 +886,6 @@ def test_use_pidfile(self, create_pidlock): worker.stop() self.assertTrue(worker.pidlock.release.called) - @patch('celery.platforms.signals') - @patch('celery.platforms.set_mp_process_title') - def test_process_initializer(self, set_mp_process_title, _signals): - with restore_logging(): - from celery import signals - from celery._state import _tls - from celery.concurrency.prefork import ( - process_initializer, WORKER_SIGRESET, WORKER_SIGIGNORE, - ) - - def on_worker_process_init(**kwargs): - on_worker_process_init.called = True - on_worker_process_init.called = False - signals.worker_process_init.connect(on_worker_process_init) - - def Loader(*args, **kwargs): - loader = Mock(*args, **kwargs) - loader.conf = {} - loader.override_backends = {} - return loader - - with self.Celery(loader=Loader) as app: - app.conf = AttributeDict(DEFAULTS) - process_initializer(app, 'awesome.worker.com') - _signals.ignore.assert_any_call(*WORKER_SIGIGNORE) - _signals.reset.assert_any_call(*WORKER_SIGRESET) - self.assertTrue(app.loader.init_worker.call_count) - self.assertTrue(on_worker_process_init.called) - self.assertIs(_tls.current_app, app) - set_mp_process_title.assert_called_with( - 'celeryd', hostname='awesome.worker.com', - ) - - with patch('celery.app.trace.setup_worker_optimizations') as S: - os.environ['FORKED_BY_MULTIPROCESSING'] = "1" - try: - process_initializer(app, 'luke.worker.com') - S.assert_called_with(app) - finally: - os.environ.pop('FORKED_BY_MULTIPROCESSING', None) - def test_attrs(self): worker = self.worker self.assertIsNotNone(worker.timer) @@ -925,10 +945,12 @@ def test_on_timer_tick(self): def test_process_task(self): worker = self.worker worker.pool = Mock() - backend = Mock() - m = create_message(backend, task=self.foo_task.name, args=[4, 8, 10], - kwargs={}) - task = Request(m.decode(), message=m, app=self.app) + channel = Mock() + m = create_task_message( + channel, self.foo_task.name, + args=[4, 8, 10], kwargs={}, + ) + task = Request(m, app=self.app) worker._process_task(task) self.assertEqual(worker.pool.apply_async.call_count, 1) worker.pool.stop() @@ -937,10 +959,12 @@ def test_process_task_raise_base(self): worker = self.worker worker.pool = Mock() worker.pool.apply_async.side_effect = KeyboardInterrupt('Ctrl+C') - backend = Mock() - m = create_message(backend, task=self.foo_task.name, args=[4, 8, 10], - kwargs={}) - task = Request(m.decode(), message=m, app=self.app) + channel = Mock() + m = create_task_message( + channel, self.foo_task.name, + args=[4, 8, 10], kwargs={}, + ) + task = Request(m, app=self.app) worker.steps = [] worker.blueprint.state = RUN with self.assertRaises(KeyboardInterrupt): @@ -950,10 +974,12 @@ def test_process_task_raise_WorkerTerminate(self): worker = self.worker worker.pool = Mock() worker.pool.apply_async.side_effect = WorkerTerminate() - backend = Mock() - m = create_message(backend, task=self.foo_task.name, args=[4, 8, 10], - kwargs={}) - task = Request(m.decode(), message=m, app=self.app) + channel = Mock() + m = create_task_message( + channel, self.foo_task.name, + args=[4, 8, 10], kwargs={}, + ) + task = Request(m, app=self.app) worker.steps = [] worker.blueprint.state = RUN with self.assertRaises(SystemExit): @@ -963,10 +989,12 @@ def test_process_task_raise_regular(self): worker = self.worker worker.pool = Mock() worker.pool.apply_async.side_effect = KeyError('some exception') - backend = Mock() - m = create_message(backend, task=self.foo_task.name, args=[4, 8, 10], - kwargs={}) - task = Request(m.decode(), message=m, app=self.app) + channel = Mock() + m = create_task_message( + channel, self.foo_task.name, + args=[4, 8, 10], kwargs={}, + ) + task = Request(m, app=self.app) worker._process_task(task) worker.pool.stop() @@ -1018,6 +1046,23 @@ def test_signal_consumer_close(self): worker.consumer.close.side_effect = AttributeError() worker.signal_consumer_close() + def test_rusage__no_resource(self): + from celery import worker + prev, worker.resource = worker.resource, None + try: + self.worker.pool = Mock(name='pool') + with self.assertRaises(NotImplementedError): + self.worker.rusage() + self.worker.stats() + finally: + worker.resource = prev + + def test_repr(self): + self.assertTrue(repr(self.worker)) + + def test_str(self): + self.assertEqual(str(self.worker), self.worker.hostname) + def test_start__stop(self): worker = self.worker worker.blueprint.shutdown_complete.set() @@ -1033,7 +1078,7 @@ def test_start__stop(self): for w in worker.steps: self.assertTrue(w.start.call_count) worker.consumer = Mock() - worker.stop() + worker.stop(exitcode=3) for stopstep in worker.steps: self.assertTrue(stopstep.close.call_count) self.assertTrue(stopstep.stop.call_count) @@ -1048,6 +1093,24 @@ def test_start__stop(self): worker.start() worker.stop() + def test_start__KeyboardInterrupt(self): + worker = self.worker + worker.blueprint = Mock(name='blueprint') + worker.blueprint.start.side_effect = KeyboardInterrupt() + worker.stop = Mock(name='stop') + worker.start() + worker.stop.assert_called_with(exitcode=EX_FAILURE) + + def test_register_with_event_loop(self): + worker = self.worker + hub = Mock(name='hub') + worker.blueprint = Mock(name='blueprint') + worker.register_with_event_loop(hub) + worker.blueprint.send_all.assert_called_with( + worker, 'register_with_event_loop', args=(hub,), + description='hub.register', + ) + def test_step_raises(self): worker = self.worker step = Mock() @@ -1055,7 +1118,7 @@ def test_step_raises(self): step.start.side_effect = TypeError() worker.stop = Mock() worker.start() - worker.stop.assert_called_with() + worker.stop.assert_called_with(exitcode=EX_FAILURE) def test_state(self): self.assertTrue(self.worker.state) @@ -1074,12 +1137,8 @@ def test_start__terminate(self): worker.terminate() for step in worker.steps: self.assertTrue(step.terminate.call_count) - - def test_Queues_pool_no_sem(self): - w = Mock() - w.pool_cls.uses_semaphore = False - components.Queues(w).create(w) - self.assertIs(w.process_task, w._process_task) + worker.blueprint.state = TERMINATE + worker.terminate() def test_Hub_crate(self): w = Mock() @@ -1095,6 +1154,12 @@ def test_Pool_crate_threaded(self): pool = components.Pool(w) pool.create(w) + def test_Pool_pool_no_sem(self): + w = Mock() + w.pool_cls.uses_semaphore = False + components.Pool(w).create(w) + self.assertIs(w.process_task, w._process_task) + def test_Pool_create(self): from kombu.async.semaphore import LaxBoundedSemaphore w = Mock() @@ -1123,6 +1188,7 @@ def timers(self): pool = components.Pool(w) pool.create(w) pool.register_with_event_loop(w, w.hub) - self.assertIsInstance(w.semaphore, LaxBoundedSemaphore) - P = w.pool - P.start() + if sys.platform != 'win32': + self.assertIsInstance(w.semaphore, LaxBoundedSemaphore) + P = w.pool + P.start() diff --git a/celery/utils/__init__.py b/celery/utils/__init__.py index 24205090b..44d553887 100644 --- a/celery/utils/__init__.py +++ b/celery/utils/__init__.py @@ -6,7 +6,7 @@ Utility functions. """ -from __future__ import absolute_import, print_function +from __future__ import absolute_import, print_function, unicode_literals import numbers import os @@ -19,7 +19,6 @@ from collections import Callable from functools import partial, wraps -from inspect import getargspec from pprint import pprint from kombu.entity import Exchange, Queue @@ -27,6 +26,8 @@ from celery.exceptions import CPendingDeprecationWarning, CDeprecationWarning from celery.five import WhateverIO, items, reraise, string_t +from .functional import memoize + __all__ = ['worker_direct', 'warn_deprecated', 'deprecated', 'lpmerge', 'is_iterable', 'isatty', 'cry', 'maybe_reraise', 'strtobool', 'jsonify', 'gen_task_name', 'nodename', 'nodesplit', @@ -34,7 +35,6 @@ PY3 = sys.version_info[0] == 3 - PENDING_DEPRECATION_FMT = """ {description} is scheduled for deprecation in \ version {deprecation} and removal in version v{removal}. \ @@ -46,17 +46,23 @@ version {removal}. {alternative} """ +UNKNOWN_SIMPLE_FORMAT_KEY = """ +Unknown format %{0} in string {1!r}. +Possible causes: Did you forget to escape the expand sign (use '%%{0!r}'), +or did you escape and the value was expanded twice? (%%N -> %N -> %hostname)? +""".strip() + #: Billiard sets this when execv is enabled. #: We use it to find out the name of the original ``__main__`` #: module, so that we can properly rewrite the name of the #: task to be that of ``App.main``. -MP_MAIN_FILE = os.environ.get('MP_MAIN_FILE') or None +MP_MAIN_FILE = os.environ.get('MP_MAIN_FILE') #: Exchange for worker direct queues. -WORKER_DIRECT_EXCHANGE = Exchange('C.dq') +WORKER_DIRECT_EXCHANGE = Exchange('C.dq2') #: Format for worker direct queue names. -WORKER_DIRECT_QUEUE_FORMAT = '{hostname}.dq' +WORKER_DIRECT_QUEUE_FORMAT = '{hostname}.dq2' #: Separator for worker node name and hostname. NODENAME_SEP = '@' @@ -64,6 +70,8 @@ NODENAME_DEFAULT = 'celery' RE_FORMAT = re.compile(r'%(\w)') +gethostname = memoize(1, Cache=dict)(socket.gethostname) + def worker_direct(hostname): """Return :class:`kombu.Queue` that is a direct route to @@ -76,9 +84,11 @@ def worker_direct(hostname): """ if isinstance(hostname, Queue): return hostname - return Queue(WORKER_DIRECT_QUEUE_FORMAT.format(hostname=hostname), - WORKER_DIRECT_EXCHANGE, - hostname, auto_delete=True) + return Queue( + WORKER_DIRECT_QUEUE_FORMAT.format(hostname=hostname), + WORKER_DIRECT_EXCHANGE, + hostname, + ) def warn_deprecated(description=None, deprecation=None, @@ -176,8 +186,8 @@ def lpmerge(L, R): """In place left precedent dictionary merge. Keeps values from `L`, if the value in `R` is :const:`None`.""" - set = L.__setitem__ - [set(k, v) for k, v in items(R) if v is not None] + setitem = L.__setitem__ + [setitem(k, v) for k, v in items(R) if v is not None] return L @@ -189,14 +199,6 @@ def is_iterable(obj): return True -def fun_takes_kwargs(fun, kwlist=[]): - # deprecated - S = getattr(fun, 'argspec', getargspec(fun)) - if S.keywords is not None: - return kwlist - return [kw for kw in kwlist if kw in S.args] - - def isatty(fh): try: return fh.isatty() @@ -214,7 +216,7 @@ def cry(out=None, sepchr='=', seplen=49): # pragma: no cover # get a map of threads by their ID so we can print their names # during the traceback dump - tmap = dict((t.ident, t) for t in threading.enumerate()) + tmap = {t.ident: t for t in threading.enumerate()} sep = sepchr * seplen for tid, frame in items(sys._current_frames()): @@ -276,9 +278,10 @@ def jsonify(obj, elif isinstance(obj, (tuple, list)): return [_jsonify(v) for v in obj] elif isinstance(obj, dict): - return dict((k, _jsonify(v, key=k)) - for k, v in items(obj) - if (keyfilter(k) if keyfilter else 1)) + return { + k: _jsonify(v, key=k) for k, v in items(obj) + if (keyfilter(k) if keyfilter else 1) + } elif isinstance(obj, datetime.datetime): # See "Date Time String Format" in the ECMA-262 specification. r = obj.isoformat() @@ -306,6 +309,7 @@ def jsonify(obj, def gen_task_name(app, name, module_name): """Generate task name from name/module pair.""" + module_name = module_name or '__main__' try: module = sys.modules[module_name] except KeyError: @@ -332,7 +336,7 @@ def nodename(name, hostname): def anon_nodename(hostname=None, prefix='gen'): return nodename(''.join([prefix, str(os.getpid())]), - hostname or socket.gethostname()) + hostname or gethostname()) def nodesplit(nodename): @@ -345,13 +349,13 @@ def nodesplit(nodename): def default_nodename(hostname): name, host = nodesplit(hostname or '') - return nodename(name or NODENAME_DEFAULT, host or socket.gethostname()) + return nodename(name or NODENAME_DEFAULT, host or gethostname()) def node_format(s, nodename, **extra): name, host = nodesplit(nodename) return host_format( - s, host, n=name or NODENAME_DEFAULT, **extra) + s, host, name or NODENAME_DEFAULT, p=nodename, **extra) def _fmt_process_index(prefix='', default='0'): @@ -361,9 +365,10 @@ def _fmt_process_index(prefix='', default='0'): _fmt_process_index_with_prefix = partial(_fmt_process_index, '-', '') -def host_format(s, host=None, **extra): - host = host or socket.gethostname() - name, _, domain = host.partition('.') +def host_format(s, host=None, name=None, **extra): + host = host or gethostname() + hname, _, domain = host.partition('.') + name = name or hname keys = dict({ 'h': host, 'n': name, 'd': domain, 'i': _fmt_process_index, 'I': _fmt_process_index_with_prefix, @@ -376,7 +381,11 @@ def simple_format(s, keys, pattern=RE_FORMAT, expand=r'\1'): keys.setdefault('%', '%') def resolve(match): - resolver = keys[match.expand(expand)] + key = match.expand(expand) + try: + resolver = keys[key] + except KeyError: + raise ValueError(UNKNOWN_SIMPLE_FORMAT_KEY.format(key, s)) if isinstance(resolver, Callable): return resolver() return resolver @@ -393,5 +402,5 @@ def resolve(match): instantiate, import_from_cwd ) from .functional import chunks, noop # noqa -from kombu.utils import cached_property, kwdict, uuid # noqa +from kombu.utils import cached_property, uuid # noqa gen_unique_id = uuid diff --git a/celery/utils/abstract.py b/celery/utils/abstract.py new file mode 100644 index 000000000..f8357393d --- /dev/null +++ b/celery/utils/abstract.py @@ -0,0 +1,136 @@ +# -*- coding: utf-8 -*- +""" + celery.utils.abstract + ~~~~~~~~~~~~~~~~~~~~~ + + Abstract classes. + +""" +from __future__ import absolute_import, unicode_literals + +from abc import ABCMeta, abstractmethod, abstractproperty +from collections import Callable + +from celery.five import with_metaclass + +__all__ = ['CallableTask', 'CallableSignature'] + + +def _hasattr(C, attr): + return any(attr in B.__dict__ for B in C.__mro__) + + +@with_metaclass(ABCMeta) +class _AbstractClass(object): + __required_attributes__ = frozenset() + + @classmethod + def _subclasshook_using(cls, parent, C): + return ( + cls is parent and + all(_hasattr(C, attr) for attr in cls.__required_attributes__) + ) or NotImplemented + + +class CallableTask(_AbstractClass, Callable): # pragma: no cover + __required_attributes__ = frozenset({ + 'delay', 'apply_async', 'apply', + }) + + @abstractmethod + def delay(self, *args, **kwargs): + pass + + @abstractmethod + def apply_async(self, *args, **kwargs): + pass + + @abstractmethod + def apply(self, *args, **kwargs): + pass + + @classmethod + def __subclasshook__(cls, C): + return cls._subclasshook_using(CallableTask, C) + + +class CallableSignature(CallableTask): # pragma: no cover + __required_attributes__ = frozenset({ + 'clone', 'freeze', 'set', 'link', 'link_error', '__or__', + }) + + @abstractproperty + def name(self): + pass + + @abstractproperty + def type(self): + pass + + @abstractproperty + def app(self): + pass + + @abstractproperty + def id(self): + pass + + @abstractproperty + def task(self): + pass + + @abstractproperty + def args(self): + pass + + @abstractproperty + def kwargs(self): + pass + + @abstractproperty + def options(self): + pass + + @abstractproperty + def subtask_type(self): + pass + + @abstractproperty + def chord_size(self): + pass + + @abstractproperty + def immutable(self): + pass + + @abstractmethod + def clone(self, args=None, kwargs=None): + pass + + @abstractmethod + def freeze(self, id=None, group_id=None, chord=None, root_id=None): + pass + + @abstractmethod + def set(self, immutable=None, **options): + pass + + @abstractmethod + def link(self, callback): + pass + + @abstractmethod + def link_error(self, errback): + pass + + @abstractmethod + def __or__(self, other): + pass + + @abstractmethod + def __invert__(self): + pass + + @classmethod + def __subclasshook__(cls, C): + return cls._subclasshook_using(CallableSignature, C) diff --git a/celery/utils/compat.py b/celery/utils/compat.py deleted file mode 100644 index 6f6296489..000000000 --- a/celery/utils/compat.py +++ /dev/null @@ -1 +0,0 @@ -from celery.five import * # noqa diff --git a/celery/utils/debug.py b/celery/utils/debug.py index 79ac4e1e3..50a2b8282 100644 --- a/celery/utils/debug.py +++ b/celery/utils/debug.py @@ -31,7 +31,7 @@ (2 ** 30.0, 'GB'), (2 ** 20.0, 'MB'), (2 ** 10.0, 'kB'), - (0.0, '{0!d}b'), + (0.0, 'b'), ) _process = None @@ -78,7 +78,7 @@ def sample_mem(): return current_rss -def _memdump(samples=10): +def _memdump(samples=10): # pragma: no cover S = _mem_sample prev = list(S) if len(S) <= samples else sample(S, samples) _mem_sample[:] = [] @@ -88,7 +88,7 @@ def _memdump(samples=10): return prev, after_collect -def memdump(samples=10, file=None): +def memdump(samples=10, file=None): # pragma: no cover """Dump memory statistics. Will print a sample of all RSS memory samples added by @@ -151,7 +151,7 @@ def mem_rss(): return humanbytes(p.get_memory_info().rss) -def ps(): +def ps(): # pragma: no cover """Return the global :class:`psutil.Process` instance, or :const:`None` if :mod:`psutil` is not installed.""" global _process diff --git a/celery/utils/dispatch/signal.py b/celery/utils/dispatch/signal.py index 7d4b337a9..2f0d6c832 100644 --- a/celery/utils/dispatch/signal.py +++ b/celery/utils/dispatch/signal.py @@ -5,17 +5,23 @@ import weakref from . import saferef -from celery.five import range +from celery.five import range, text_t from celery.local import PromiseProxy, Proxy +from celery.utils.log import get_logger __all__ = ['Signal'] +logger = get_logger(__name__) + WEAKREF_TYPES = (weakref.ReferenceType, saferef.BoundMethodWeakref) def _make_id(target): # pragma: no cover if isinstance(target, Proxy): target = target._get_current_object() + if isinstance(target, (bytes, text_t)): + # see Issue #2475 + return target if hasattr(target, '__func__'): return (id(target.__self__), id(target.__func__)) return id(target) @@ -162,42 +168,12 @@ def send(self, sender, **named): if not self.receivers: return responses - for receiver in self._live_receivers(_make_id(sender)): - response = receiver(signal=self, sender=sender, **named) - responses.append((receiver, response)) - return responses - - def send_robust(self, sender, **named): - """Send signal from sender to all connected receivers catching errors. - - :param sender: The sender of the signal. Can be any python object - (normally one registered with a connect if you actually want - something to occur). - - :keyword \*\*named: Named arguments which will be passed to receivers. - These arguments must be a subset of the argument names defined in - :attr:`providing_args`. - - :returns: a list of tuple pairs: `[(receiver, response), … ]`. - - :raises DispatcherKeyError: - - if any receiver raises an error (specifically any subclass of - :exc:`Exception`), the error instance is returned as the result - for that receiver. - - """ - responses = [] - if not self.receivers: - return responses - - # Call each receiver with whatever arguments it can accept. - # Return a list of tuple pairs [(receiver, response), … ]. for receiver in self._live_receivers(_make_id(sender)): try: response = receiver(signal=self, sender=sender, **named) - except Exception as err: - responses.append((receiver, err)) + except Exception as exc: + logger.error('Signal handler %r raised: %r', + receiver, exc, exc_info=1) else: responses.append((receiver, response)) return responses diff --git a/celery/utils/encoding.py b/celery/utils/encoding.py index 3ddcd35eb..03da6d9eb 100644 --- a/celery/utils/encoding.py +++ b/celery/utils/encoding.py @@ -6,7 +6,7 @@ This module has moved to :mod:`kombu.utils.encoding`. """ -from __future__ import absolute_import +from __future__ import absolute_import, unicode_literals from kombu.utils.encoding import ( # noqa default_encode, default_encoding, bytes_t, bytes_to_str, str_t, diff --git a/celery/utils/functional.py b/celery/utils/functional.py index faa272b32..4f7e6b147 100644 --- a/celery/utils/functional.py +++ b/celery/utils/functional.py @@ -6,26 +6,46 @@ Utilities for functions. """ -from __future__ import absolute_import +from __future__ import absolute_import, print_function, unicode_literals import sys import threading -from functools import wraps -from itertools import islice +from collections import OrderedDict +from functools import partial, wraps +from inspect import isfunction +from itertools import chain, islice -from kombu.utils import cached_property -from kombu.utils.functional import lazy, maybe_evaluate, is_list, maybe_list -from kombu.utils.compat import OrderedDict +from kombu.utils.functional import ( + dictfilter, lazy, maybe_evaluate, is_list, maybe_list, +) +from vine import promise -from celery.five import UserDict, UserList, items, keys +from celery.five import UserDict, UserList, getfullargspec, keys, range __all__ = ['LRUCache', 'is_list', 'maybe_list', 'memoize', 'mlazy', 'noop', 'first', 'firstmethod', 'chunks', 'padlist', 'mattrgetter', 'uniq', - 'regen', 'dictfilter', 'lazy', 'maybe_evaluate'] + 'regen', 'dictfilter', 'lazy', 'maybe_evaluate', 'head_from_fun'] + +IS_PY3 = sys.version_info[0] == 3 +IS_PY2 = sys.version_info[0] == 2 KEYWORD_MARK = object() +FUNHEAD_TEMPLATE = """ +def {fun_name}({fun_args}): + return {fun_value} +""" + + +class DummyContext(object): + + def __enter__(self): + return self + + def __exit__(self, *exc_info): + pass + class LRUCache(UserDict): """LRU Cache implementation using a doubly linked list to track access. @@ -45,7 +65,7 @@ def __init__(self, limit=None): def __getitem__(self, key): with self.mutex: value = self[key] = self.data.pop(key) - return value + return value def update(self, *args, **kwargs): with self.mutex: @@ -53,9 +73,12 @@ def update(self, *args, **kwargs): data.update(*args, **kwargs) if limit and len(data) > limit: # pop additional items in case limit exceeded - # negative overflow will lead to an empty list - for item in islice(iter(data), len(data) - limit): - data.pop(item) + for _ in range(len(data) - limit): + data.popitem(last=False) + + def popitem(self, last=True): + with self.mutex: + return self.data.popitem(last) def __setitem__(self, key, value): # remove least recently used key. @@ -68,24 +91,28 @@ def __iter__(self): return iter(self.data) def _iterate_items(self): - for k in self: - try: - yield (k, self.data[k]) - except KeyError: # pragma: no cover - pass + with self.mutex: + for k in self: + try: + yield (k, self.data[k]) + except KeyError: # pragma: no cover + pass iteritems = _iterate_items def _iterate_values(self): - for k in self: - try: - yield self.data[k] - except KeyError: # pragma: no cover - pass + with self.mutex: + for k in self: + try: + yield self.data[k] + except KeyError: # pragma: no cover + pass + itervalues = _iterate_values def _iterate_keys(self): # userdict.keys in py3k calls __getitem__ - return keys(self.data) + with self.mutex: + return keys(self.data) iterkeys = _iterate_keys def incr(self, key, delta=1): @@ -94,7 +121,7 @@ def incr(self, key, delta=1): # integer as long as it exists and we can cast it newval = int(self.data.pop(key)) + delta self[key] = str(newval) - return newval + return newval def __getstate__(self): d = dict(vars(self)) @@ -124,7 +151,6 @@ def items(self): def memoize(maxsize=None, keyfun=None, Cache=LRUCache): def _memoize(fun): - mutex = threading.Lock() cache = Cache(limit=maxsize) @wraps(fun) @@ -132,15 +158,13 @@ def _M(*args, **kwargs): if keyfun: key = keyfun(args, kwargs) else: - key = args + (KEYWORD_MARK, ) + tuple(sorted(kwargs.items())) + key = args + (KEYWORD_MARK,) + tuple(sorted(kwargs.items())) try: - with mutex: - value = cache[key] + value = cache[key] except KeyError: value = fun(*args, **kwargs) _M.misses += 1 - with mutex: - cache[key] = value + cache[key] = value else: _M.hits += 1 return value @@ -188,6 +212,17 @@ def noop(*args, **kwargs): pass +def pass1(arg, *args, **kwargs): + return arg + + +def evaluate_promises(it): + for value in it: + if isinstance(value, promise): + value = value() + yield value + + def first(predicate, it): """Return the first element in `iterable` that `predicate` Gives a :const:`True` value for. @@ -196,12 +231,13 @@ def first(predicate, it): """ return next( - (v for v in it if (predicate(v) if predicate else v is not None)), + (v for v in evaluate_promises(it) if ( + predicate(v) if predicate is not None else v is not None)), None, ) -def firstmethod(method): +def firstmethod(method, on_call=None): """Return a function that with a list of instances, finds the first instance that gives a value for the given method. @@ -213,13 +249,14 @@ def firstmethod(method): def _matcher(it, *args, **kwargs): for obj in it: try: - answer = getattr(maybe_evaluate(obj), method)(*args, **kwargs) + meth = getattr(maybe_evaluate(obj), method) + reply = (on_call(meth, *args, **kwargs) if on_call + else meth(*args, **kwargs)) except AttributeError: pass else: - if answer is not None: - return answer - + if reply is not None: + return reply return _matcher @@ -239,7 +276,6 @@ def chunks(it, n): [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10]] """ - # XXX This function is not used anymore, at least not by Celery itself. for first in it: yield [first] + list(islice(it, n - 1)) @@ -265,8 +301,7 @@ def padlist(container, size, default=None): def mattrgetter(*attrs): """Like :func:`operator.itemgetter` but return :const:`None` on missing attributes instead of raising :exc:`AttributeError`.""" - return lambda obj: dict((attr, getattr(obj, attr, None)) - for attr in attrs) + return lambda obj: {attr: getattr(obj, attr, None) for attr in attrs} def uniq(it): @@ -286,21 +321,85 @@ def regen(it): class _regen(UserList, list): # must be subclass of list so that json can encode. + def __init__(self, it): self.__it = it + self.__index = 0 + self.__consumed = [] def __reduce__(self): - return list, (self.data, ) + return list, (self.data,) def __length_hint__(self): return self.__it.__length_hint__() - @cached_property - def data(self): - return list(self.__it) - + def __iter__(self): + return chain(self.__consumed, self.__it) + + def __getitem__(self, index): + if index < 0: + return self.data[index] + try: + return self.__consumed[index] + except IndexError: + try: + for i in range(self.__index, index + 1): + self.__consumed.append(next(self.__it)) + except StopIteration: + raise IndexError(index) + else: + return self.__consumed[index] -def dictfilter(d=None, **kw): - """Remove all keys from dict ``d`` whose value is :const:`None`""" - d = kw if d is None else (dict(d, **kw) if kw else d) - return dict((k, v) for k, v in items(d) if v is not None) + @property + def data(self): + try: + self.__consumed.extend(list(self.__it)) + except StopIteration: + pass + return self.__consumed + + +def _argsfromspec(spec, replace_defaults=True): + if spec.defaults: + split = len(spec.defaults) + defaults = (list(range(len(spec.defaults))) if replace_defaults + else spec.defaults) + positional = spec.args[:-split] + optional = list(zip(spec.args[-split:], defaults)) + else: + positional, optional = spec.args, [] + return ', '.join(filter(None, [ + ', '.join(positional), + ', '.join('{0}={1}'.format(k, v) for k, v in optional), + '*{0}'.format(spec.varargs) if spec.varargs else None, + '**{0}'.format(spec.varkw) if spec.varkw else None, + ])) + + +def head_from_fun(fun, bound=False, debug=False): + if not isfunction(fun) and hasattr(fun, '__call__'): + name, fun = fun.__class__.__name__, fun.__call__ + else: + name = fun.__name__ + definition = FUNHEAD_TEMPLATE.format( + fun_name=name, + fun_args=_argsfromspec(getfullargspec(fun)), + fun_value=1, + ) + if debug: # pragma: no cover + print(definition, file=sys.stderr) + namespace = {'__name__': 'headof_{0}'.format(name)} + exec(definition, namespace) + result = namespace[name] + result._source = definition + if bound: + return partial(result, object()) + return result + + +def fun_takes_argument(name, fun, position=None): + spec = getfullargspec(fun) + return ( + spec.varkw or spec.varargs or + (len(spec.args) >= position if position else name in spec.args) + ) diff --git a/celery/utils/imports.py b/celery/utils/imports.py index 22a2fdcd3..e82db0c6e 100644 --- a/celery/utils/imports.py +++ b/celery/utils/imports.py @@ -6,7 +6,7 @@ Utilities related to importing modules and symbols by name. """ -from __future__ import absolute_import +from __future__ import absolute_import, unicode_literals import imp as _imp import importlib diff --git a/celery/utils/iso8601.py b/celery/utils/iso8601.py index c951cf6ea..98a336170 100644 --- a/celery/utils/iso8601.py +++ b/celery/utils/iso8601.py @@ -1,5 +1,4 @@ -""" -Originally taken from pyiso8601 (http://code.google.com/p/pyiso8601/) +"""Originally taken from pyiso8601 (http://code.google.com/p/pyiso8601/) Modified to match the behavior of dateutil.parser: @@ -31,7 +30,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ -from __future__ import absolute_import +from __future__ import absolute_import, unicode_literals import re @@ -69,9 +68,9 @@ def parse_iso8601(datestring): hours = -hours minutes = -minutes tz = FixedOffset(minutes + hours * 60) - frac = groups['fraction'] or 0 return datetime( - int(groups['year']), int(groups['month']), int(groups['day']), - int(groups['hour']), int(groups['minute']), int(groups['second']), - int(frac), tz + int(groups['year']), int(groups['month']), + int(groups['day']), int(groups['hour'] or 0), + int(groups['minute'] or 0), int(groups['second'] or 0), + int(groups['fraction'] or 0), tz ) diff --git a/celery/utils/log.py b/celery/utils/log.py index 2cef63877..743a9a663 100644 --- a/celery/utils/log.py +++ b/celery/utils/log.py @@ -6,7 +6,7 @@ Logging utilities. """ -from __future__ import absolute_import, print_function +from __future__ import absolute_import, print_function, unicode_literals import logging import numbers @@ -16,7 +16,6 @@ import traceback from contextlib import contextmanager -from billiard import current_process, util as mputil from kombu.five import values from kombu.log import get_logger as _get_logger, LOG_LEVELS from kombu.utils.encoding import safe_str @@ -27,7 +26,7 @@ __all__ = ['ColorFormatter', 'LoggingProxy', 'base_logger', 'set_in_sighandler', 'in_sighandler', 'get_logger', - 'get_task_logger', 'mlevel', 'ensure_process_aware_logger', + 'get_task_logger', 'mlevel', 'get_multiprocessing_logger', 'reset_multiprocessing_logger'] _process_aware = False @@ -60,7 +59,7 @@ def iter_open_logger_fds(): try: for handler in logger.handlers: try: - if handler not in seen: + if handler not in seen: # pragma: no cover yield handler.stream seen.add(handler) except AttributeError: @@ -78,9 +77,9 @@ def in_sighandler(): set_in_sighandler(False) -def logger_isa(l, p): +def logger_isa(l, p, max=1000): this, seen = l, set() - while this: + for _ in range(max): if this == p: return True else: @@ -90,13 +89,17 @@ def logger_isa(l, p): ) seen.add(this) this = this.parent + if not this: + break + else: # pragma: no cover + raise RuntimeError('Logger hierarchy exceeds {0}'.format(max)) return False def get_logger(name): l = _get_logger(name) if logging.root not in (l, l.parent) and l is not base_logger: - if not logger_isa(l, base_logger): + if not logger_isa(l, base_logger): # pragma: no cover l.parent = base_logger return l task_logger = get_logger('celery.task') @@ -135,11 +138,13 @@ def formatException(self, ei): return r def format(self, record): - sformat = logging.Formatter.format + msg = logging.Formatter.format(self, record) color = self.colors.get(record.levelname) + # reset exception info later for other handlers... + einfo = sys.exc_info() if record.exc_info == 1 else record.exc_info + if color and self.use_color: - msg = record.msg try: # safe_str will repr the color object # and color will break on non-string objects @@ -147,18 +152,22 @@ def format(self, record): # Issue #427 try: if isinstance(msg, string_t): - record.msg = text_t(color(safe_str(msg))) - else: - record.msg = safe_str(color(msg)) - except UnicodeDecodeError: - record.msg = safe_str(msg) # skip colors + return text_t(color(safe_str(msg))) + return safe_str(color(msg)) + except UnicodeDecodeError: # pragma: no cover + return safe_str(msg) # skip colors except Exception as exc: - record.msg = ''.format( - type(msg), exc) - record.exc_info = True - return sformat(self, record) + prev_msg, record.exc_info, record.msg = ( + record.msg, 1, ''.format( + type(msg), exc + ), + ) + try: + return logging.Formatter.format(self, record) + finally: + record.msg, record.exc_info = prev_msg, einfo else: - return safe_str(sformat(self, record)) + return safe_str(msg) class LoggingProxy(object): @@ -246,46 +255,34 @@ def isatty(self): return False -def ensure_process_aware_logger(force=False): - """Make sure process name is recorded when loggers are used.""" - global _process_aware - if force or not _process_aware: - logging._acquireLock() - try: - _process_aware = True - Logger = logging.getLoggerClass() - if getattr(Logger, '_process_aware', False): # pragma: no cover - return - - class ProcessAwareLogger(Logger): - _signal_safe = True - _process_aware = True - - def makeRecord(self, *args, **kwds): - record = Logger.makeRecord(self, *args, **kwds) - record.processName = current_process()._name - return record - - def log(self, *args, **kwargs): - if _in_sighandler: - return - return Logger.log(self, *args, **kwargs) - logging.setLoggerClass(ProcessAwareLogger) - finally: - logging._releaseLock() - - def get_multiprocessing_logger(): - return mputil.get_logger() if mputil else None + try: + from billiard import util + except ImportError: # pragma: no cover + pass + else: + return util.get_logger() def reset_multiprocessing_logger(): - if mputil and hasattr(mputil, '_logger'): - mputil._logger = None + try: + from billiard import util + except ImportError: # pragma: no cover + pass + else: + if hasattr(util, '_logger'): # pragma: no cover + util._logger = None + + +def current_process(): + try: + from billiard import process + except ImportError: # pragma: no cover + pass + else: + return process.current_process() def current_process_index(base=1): - if current_process: - index = getattr(current_process(), 'index', None) - return index + base if index is not None else index -ensure_process_aware_logger() + index = getattr(current_process(), 'index', None) + return index + base if index is not None else index diff --git a/celery/utils/mail.py b/celery/utils/mail.py index 00c5f29a9..0f0ec2082 100644 --- a/celery/utils/mail.py +++ b/celery/utils/mail.py @@ -6,7 +6,7 @@ How task error emails are formatted and sent. """ -from __future__ import absolute_import +from __future__ import absolute_import, unicode_literals import smtplib import socket @@ -42,7 +42,7 @@ class SendmailWarning(UserWarning): class Message(object): def __init__(self, to=None, sender=None, subject=None, - body=None, charset='us-ascii'): + body=None, charset='utf-8'): self.to = maybe_list(to) self.sender = sender self.subject = subject diff --git a/celery/utils/objects.py b/celery/utils/objects.py index b2ad646b3..1fac84ddc 100644 --- a/celery/utils/objects.py +++ b/celery/utils/objects.py @@ -6,12 +6,19 @@ Object related utilities including introspection, etc. """ -from __future__ import absolute_import +from __future__ import absolute_import, unicode_literals __all__ = ['mro_lookup'] -def mro_lookup(cls, attr, stop=(), monkey_patched=[]): +class Bunch(object): + """Object that enables you to modify attributes.""" + + def __init__(self, **kwargs): + self.__dict__.update(kwargs) + + +def mro_lookup(cls, attr, stop=set(), monkey_patched=[]): """Return the first node by MRO order that defines an attribute. :keyword stop: A list of types that if reached will stop the search. @@ -25,8 +32,8 @@ def mro_lookup(cls, attr, stop=(), monkey_patched=[]): for node in cls.mro(): if node in stop: try: - attr = node.__dict__[attr] - module_origin = attr.__module__ + value = node.__dict__[attr] + module_origin = value.__module__ except (AttributeError, KeyError): pass else: @@ -35,3 +42,50 @@ def mro_lookup(cls, attr, stop=(), monkey_patched=[]): return if attr in node.__dict__: return node + + +class FallbackContext(object): + """The built-in ``@contextmanager`` utility does not work well + when wrapping other contexts, as the traceback is wrong when + the wrapped context raises. + + This solves this problem and can be used instead of ``@contextmanager`` + in this example:: + + @contextmanager + def connection_or_default_connection(connection=None): + if connection: + # user already has a connection, should not close + # after use + yield connection + else: + # must have new connection, and also close the connection + # after the block returns + with create_new_connection() as connection: + yield connection + + This wrapper can be used instead for the above like this:: + + def connection_or_default_connection(connection=None): + return FallbackContext(connection, create_new_connection) + + """ + + def __init__(self, provided, fallback, *fb_args, **fb_kwargs): + self.provided = provided + self.fallback = fallback + self.fb_args = fb_args + self.fb_kwargs = fb_kwargs + self._context = None + + def __enter__(self): + if self.provided is not None: + return self.provided + context = self._context = self.fallback( + *self.fb_args, **self.fb_kwargs + ).__enter__() + return context + + def __exit__(self, *exc_info): + if self._context is not None: + return self._context.__exit__(*exc_info) diff --git a/celery/utils/saferepr.py b/celery/utils/saferepr.py new file mode 100644 index 000000000..93acba08d --- /dev/null +++ b/celery/utils/saferepr.py @@ -0,0 +1,206 @@ +# -*- coding: utf-8 -*- +""" + celery.utils.saferepr + ~~~~~~~~~~~~~~~~~~~~~ + + Streaming, truncating, non-recursive version of :func:`repr`. + + Differences from regular :func:`repr`: + + - Sets are represented the Python 3 way: ``{1, 2}`` vs ``set([1, 2])``. + - Unicode strings does not have the ``u'`` prefix, even on Python 2. + - Empty set formatted as ``set()`` (Python 3), not ``set([])`` (Python 2). + - Longs do not have the ``L`` suffix. + + Very slow with no limits, super quick with limits. + +""" +from __future__ import absolute_import, unicode_literals + +import sys + +from collections import Iterable, Mapping, deque, namedtuple + +from decimal import Decimal +from itertools import chain +from numbers import Number +from pprint import _recursion + +from kombu.utils.encoding import bytes_to_str + +from celery.five import items, text_t + +from .text import truncate, truncate_bytes + +__all__ = ['saferepr', 'reprstream'] + +IS_PY3 = sys.version_info[0] == 3 + +if IS_PY3: # pragma: no cover + range_t = (range, ) +else: + class range_t(object): # noqa + pass + +_literal = namedtuple('_literal', ('value', 'truncate', 'direction')) +_key = namedtuple('_key', ('value',)) +_quoted = namedtuple('_quoted', ('value',)) +_dirty = namedtuple('_dirty', ('objid',)) + +chars_t = (bytes, text_t) +literal_t = (_literal, _key) +safe_t = (Number,) +set_t = (frozenset, set) + +LIT_DICT_START = _literal('{', False, +1) +LIT_DICT_KVSEP = _literal(': ', True, 0) +LIT_DICT_END = _literal('}', False, -1) +LIT_LIST_START = _literal('[', False, +1) +LIT_LIST_END = _literal(']', False, -1) +LIT_LIST_SEP = _literal(', ', True, 0) +LIT_SET_START = _literal('{', False, +1) +LIT_SET_END = _literal('}', False, -1) +LIT_TUPLE_START = _literal('(', False, +1) +LIT_TUPLE_END = _literal(')', False, -1) +LIT_TUPLE_END_SV = _literal(',)', False, -1) + + +def saferepr(o, maxlen=None, maxlevels=3, seen=None): + return ''.join(_saferepr( + o, maxlen=maxlen, maxlevels=maxlevels, seen=seen + )) + + +def _chaindict(mapping, + LIT_DICT_KVSEP=LIT_DICT_KVSEP, + LIT_LIST_SEP=LIT_LIST_SEP): + size = len(mapping) + for i, (k, v) in enumerate(items(mapping)): + yield _key(k) + yield LIT_DICT_KVSEP + yield v + if i < (size - 1): + yield LIT_LIST_SEP + + +def _chainlist(it, LIT_LIST_SEP=LIT_LIST_SEP): + size = len(it) + for i, v in enumerate(it): + yield v + if i < (size - 1): + yield LIT_LIST_SEP + + +def _repr_empty_set(s): + return '%s()' % (type(s).__name__,) + + +def _saferepr(o, maxlen=None, maxlevels=3, seen=None): + stack = deque([iter([o])]) + for token, it in reprstream(stack, seen=seen, maxlevels=maxlevels): + if maxlen is not None and maxlen <= 0: + yield ', ...' + # move rest back to stack, so that we can include + # dangling parens. + stack.append(it) + break + if isinstance(token, _literal): + val = token.value + elif isinstance(token, _key): + val = saferepr(token.value, maxlen, maxlevels) + elif isinstance(token, _quoted): + val = token.value + if IS_PY3 and isinstance(val, bytes): # pragma: no cover + val = "b'%s'" % (bytes_to_str(truncate_bytes(val, maxlen)),) + else: + val = "'%s'" % (truncate(val, maxlen),) + else: + val = truncate(token, maxlen) + yield val + if maxlen is not None: + maxlen -= len(val) + for rest1 in stack: + # maxlen exceeded, process any dangling parens. + for rest2 in rest1: + if isinstance(rest2, _literal) and not rest2.truncate: + yield rest2.value + + +def _reprseq(val, lit_start, lit_end, builtin_type, chainer): + if type(val) is builtin_type: # noqa + return lit_start, lit_end, chainer(val) + return ( + _literal('%s(%s' % (type(val).__name__, lit_start.value), False, +1), + _literal('%s)' % (lit_end.value,), False, -1), + chainer(val) + ) + + +def reprstream(stack, seen=None, maxlevels=3, level=0, isinstance=isinstance): + seen = seen or set() + append = stack.append + popleft = stack.popleft + is_in_seen = seen.__contains__ + discard_from_seen = seen.discard + add_to_seen = seen.add + + while stack: + lit_start = lit_end = None + it = popleft() + for val in it: + orig = val + if isinstance(val, _dirty): + discard_from_seen(val.objid) + continue + elif isinstance(val, _literal): + level += val.direction + yield val, it + elif isinstance(val, _key): + yield val, it + elif isinstance(val, Decimal): + yield repr(val), it + elif isinstance(val, safe_t): + yield text_t(val), it + elif isinstance(val, chars_t): + yield _quoted(val), it + elif isinstance(val, range_t): # pragma: no cover + yield repr(val), it + else: + if isinstance(val, set_t): + if not val: + yield _repr_empty_set(val), it + continue + lit_start, lit_end, val = _reprseq( + val, LIT_SET_START, LIT_SET_END, set, _chainlist, + ) + elif isinstance(val, tuple): + lit_start, lit_end, val = ( + LIT_TUPLE_START, + LIT_TUPLE_END_SV if len(val) == 1 else LIT_TUPLE_END, + _chainlist(val)) + elif isinstance(val, Mapping): + lit_start, lit_end, val = ( + LIT_DICT_START, LIT_DICT_END, _chaindict(val)) + elif isinstance(val, Iterable): + lit_start, lit_end, val = ( + LIT_LIST_START, LIT_LIST_END, _chainlist(val)) + else: + # other type of object + yield repr(val), it + continue + + if maxlevels and level >= maxlevels: + yield "%s...%s" % (lit_start.value, lit_end.value), it + continue + + objid = id(orig) + if is_in_seen(objid): + yield _recursion(orig), it + continue + add_to_seen(objid) + + # Recurse into the new list/tuple/dict/etc by tacking + # the rest of our iterable onto the new it: this way + # it works similar to a linked list. + append(chain([lit_start], val, [_dirty(objid), lit_end], it)) + break diff --git a/celery/utils/serialization.py b/celery/utils/serialization.py index d5509f1c2..6a2c28c8e 100644 --- a/celery/utils/serialization.py +++ b/celery/utils/serialization.py @@ -6,8 +6,9 @@ Utilities for safely pickling exceptions. """ -from __future__ import absolute_import +from __future__ import absolute_import, unicode_literals +from base64 import b64encode as base64encode, b64decode as base64decode from inspect import getmro from itertools import takewhile @@ -16,6 +17,8 @@ except ImportError: import pickle # noqa +from kombu.utils.encoding import bytes_to_str, str_to_bytes + from .encoding import safe_repr __all__ = ['UnpickleableExceptionWrapper', 'subclass_exception', @@ -31,7 +34,7 @@ def subclass_exception(name, parent, module): # noqa - return type(name, (parent, ), {'__module__': module}) + return type(name, (parent,), {'__module__': module}) def find_pickleable_exception(exc, loads=pickle.loads, @@ -83,7 +86,7 @@ class UnpickleableExceptionWrapper(Exception): **Example** - .. code-block:: python + .. code-block:: pycon >>> def pickle_it(raising_function): ... try: @@ -165,3 +168,11 @@ def get_pickled_exception(exc): if isinstance(exc, UnpickleableExceptionWrapper): return exc.restore() return exc + + +def b64encode(s): + return bytes_to_str(base64encode(str_to_bytes(s))) + + +def b64decode(s): + return base64decode(str_to_bytes(s)) diff --git a/celery/utils/sysinfo.py b/celery/utils/sysinfo.py index 65073a6f9..19264baa5 100644 --- a/celery/utils/sysinfo.py +++ b/celery/utils/sysinfo.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -from __future__ import absolute_import +from __future__ import absolute_import, unicode_literals import os diff --git a/celery/utils/term.py b/celery/utils/term.py index f6f08d44c..a71be76b5 100644 --- a/celery/utils/term.py +++ b/celery/utils/term.py @@ -21,11 +21,14 @@ OP_SEQ = '\033[%dm' RESET_SEQ = '\033[0m' COLOR_SEQ = '\033[1;%dm' -fg = lambda s: COLOR_SEQ % s IS_WINDOWS = platform.system() == 'Windows' +def fg(s): + return COLOR_SEQ % s + + class colored(object): """Terminal colored text. diff --git a/celery/utils/text.py b/celery/utils/text.py index ffd2d72fa..851c9f86e 100644 --- a/celery/utils/text.py +++ b/celery/utils/text.py @@ -6,7 +6,7 @@ Text formatting utilities """ -from __future__ import absolute_import +from __future__ import absolute_import, unicode_literals from textwrap import fill @@ -62,11 +62,17 @@ def indent(t, indent=0, sep='\n'): return sep.join(' ' * indent + p for p in t.split(sep)) -def truncate(text, maxlen=128, suffix='...'): +def truncate(s, maxlen=128, suffix='...'): """Truncates text to a maximum number of characters.""" - if len(text) >= maxlen: - return text[:maxlen].rsplit(' ', 1)[0] + suffix - return text + if maxlen and len(s) >= maxlen: + return s[:maxlen].rsplit(' ', 1)[0] + suffix + return s + + +def truncate_bytes(s, maxlen=128, suffix=b'...'): + if maxlen and len(s) >= maxlen: + return s[:maxlen].rsplit(b' ', 1)[0] + suffix + return s def pluralize(n, text, suffix='s'): @@ -84,3 +90,7 @@ def pretty(value, width=80, nl_width=80, sep='\n', **kw): ) else: return pformat(value, width=width, **kw) + + +def match_case(s, other): + return s.upper() if other.isupper() else s.lower() diff --git a/celery/utils/threads.py b/celery/utils/threads.py index 5d4237329..1016496f9 100644 --- a/celery/utils/threads.py +++ b/celery/utils/threads.py @@ -6,7 +6,7 @@ Threading utilities. """ -from __future__ import absolute_import, print_function +from __future__ import absolute_import, print_function, unicode_literals import os import socket diff --git a/celery/utils/timer2.py b/celery/utils/timer2.py index d462c6574..cfeb034e2 100644 --- a/celery/utils/timer2.py +++ b/celery/utils/timer2.py @@ -6,7 +6,7 @@ Scheduler for Python functions. """ -from __future__ import absolute_import +from __future__ import absolute_import, print_function, unicode_literals import os import sys @@ -86,8 +86,8 @@ def run(self): os._exit(1) def stop(self): + self._is_shutdown.set() if self.running: - self._is_shutdown.set() self._is_stopped.wait() self.join(THREAD_TIMEOUT_MAX) self.running = False diff --git a/celery/utils/timeutils.py b/celery/utils/timeutils.py index 5b75b83a8..76a01020d 100644 --- a/celery/utils/timeutils.py +++ b/celery/utils/timeutils.py @@ -6,19 +6,19 @@ This module contains various utilities related to dates and times. """ -from __future__ import absolute_import +from __future__ import absolute_import, print_function, unicode_literals import numbers import os +import sys import time as _time from calendar import monthrange from datetime import date, datetime, timedelta, tzinfo from kombu.utils import cached_property, reprcall -from kombu.utils.compat import timedelta_seconds -from pytz import timezone as _timezone, AmbiguousTimeError +from pytz import timezone as _timezone, AmbiguousTimeError, FixedOffset from celery.five import string_t @@ -26,12 +26,15 @@ from .iso8601 import parse_iso8601 from .text import pluralize -__all__ = ['LocalTimezone', 'timezone', 'maybe_timedelta', 'timedelta_seconds', +__all__ = ['LocalTimezone', 'timezone', 'maybe_timedelta', 'delta_resolution', 'remaining', 'rate', 'weekday', 'humanize_seconds', 'maybe_iso8601', 'is_naive', 'make_aware', 'localize', 'to_utc', 'maybe_make_aware', 'ffwd', 'utcoffset', 'adjust_timestamp', 'maybe_s_to_ms'] +PY3 = sys.version_info[0] == 3 +PY33 = sys.version_info >= (3, 3) + C_REMDEBUG = os.environ.get('C_REMDEBUG', False) DAYNAMES = 'sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat' @@ -50,15 +53,13 @@ _local_timezone = None -__timezone__ = -_time.timezone -__altzone__ = -_time.altzone - class LocalTimezone(tzinfo): """Local time implementation taken from Python's docs. Used only when UTC is not enabled. """ + _offset_cache = {} def __init__(self): # This code is moved in __init__ to execute it as late as possible @@ -72,23 +73,34 @@ def __init__(self): tzinfo.__init__(self) def __repr__(self): - return '' + return ''.format( + int(self.DSTOFFSET.total_seconds() / 3600), + ) def utcoffset(self, dt): - if self._isdst(dt): - return self.DSTOFFSET - else: - return self.STDOFFSET + return self.DSTOFFSET if self._isdst(dt) else self.STDOFFSET def dst(self, dt): - if self._isdst(dt): - return self.DSTDIFF - else: - return ZERO + return self.DSTDIFF if self._isdst(dt) else ZERO def tzname(self, dt): return _time.tzname[self._isdst(dt)] + if PY3: # pragma: no cover + + def fromutc(self, dt): + # The base tzinfo class no longer implements a DST + # offset aware .fromutc() in Python 3 (Issue #2306). + + # I'd rather rely on pytz to do this, than port + # the C code from cpython's fromutc [asksol] + offset = int(self.utcoffset(dt).seconds / 60.0) + try: + tz = self._offset_cache[offset] + except KeyError: + tz = self._offset_cache[offset] = FixedOffset(offset) + return tz.fromutc(dt.replace(tzinfo=tz)) + def _isdst(self, dt): tt = (dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second, @@ -110,8 +122,17 @@ def to_local(self, dt, local=None, orig=None): dt = make_aware(dt, orig or self.utc) return localize(dt, self.tz_or_local(local)) - def to_system(self, dt): - return localize(dt, self.local) + if PY33: # pragma: no cover + + def to_system(self, dt): + # tz=None is a special case since Python 3.3, and will + # convert to the current local timezone (Issue #2306). + return dt.astimezone(tz=None) + + else: + + def to_system(self, dt): # noqa + return localize(dt, self.local) def to_local_fallback(self, dt): if is_naive(dt): @@ -149,7 +170,7 @@ def delta_resolution(dt, delta): which will just return the original datetime. """ - delta = timedelta_seconds(delta) + delta = max(delta.total_seconds(), 0) resolutions = ((3, lambda x: x / 86400), (4, lambda x: x / 3600), @@ -334,10 +355,10 @@ def _fields(self, **extra): }, **extra) -def utcoffset(): - if _time.daylight: - return __altzone__ // 3600 - return __timezone__ // 3600 +def utcoffset(time=_time, localtime=_time.localtime): + if localtime().tm_isdst: + return time.altzone // 3600 + return time.timezone // 3600 def adjust_timestamp(ts, offset, here=utcoffset): diff --git a/celery/worker/__init__.py b/celery/worker/__init__.py index 217902d2e..fe99af132 100644 --- a/celery/worker/__init__.py +++ b/celery/worker/__init__.py @@ -26,12 +26,12 @@ from celery import bootsteps from celery.bootsteps import RUN, TERMINATE from celery import concurrency as _concurrency -from celery import platforms from celery import signals from celery.exceptions import ( ImproperlyConfigured, WorkerTerminate, TaskRevokedError, ) from celery.five import string_t, values +from celery.platforms import EX_FAILURE, create_pidlock from celery.utils import default_nodename, worker_direct from celery.utils.imports import reload_from_cwd from celery.utils.log import mlevel, worker_logger as logger @@ -46,15 +46,15 @@ SELECT_UNKNOWN_QUEUE = """\ Trying to select queue subset of {0!r}, but queue {1} is not -defined in the CELERY_QUEUES setting. +defined in the `task_queues` setting. If you want to automatically declare unknown queues you can -enable the CELERY_CREATE_MISSING_QUEUES setting. +enable the `task_create_missing_queues` setting. """ DESELECT_UNKNOWN_QUEUE = """\ Trying to deselect queue subset of {0!r}, but queue {1} is not -defined in the CELERY_QUEUES setting. +defined in the `task_queues` setting. """ @@ -73,12 +73,14 @@ class WorkController(object): pool = None semaphore = None + #: contains the exit code if a :exc:`SystemExit` event is handled. + exitcode = None + class Blueprint(bootsteps.Blueprint): """Worker bootstep blueprint.""" name = 'Worker' - default_steps = set([ + default_steps = { 'celery.worker.components:Hub', - 'celery.worker.components:Queues', 'celery.worker.components:Pool', 'celery.worker.components:Beat', 'celery.worker.components:Timer', @@ -86,8 +88,7 @@ class Blueprint(bootsteps.Blueprint): 'celery.worker.components:Consumer', 'celery.worker.autoscale:WorkerComponent', 'celery.worker.autoreload:WorkerComponent', - - ]) + } def __init__(self, app=None, hostname=None, **kwargs): self.app = app or self.app @@ -121,7 +122,7 @@ def setup_instance(self, queues=None, ready_callback=None, pidfile=None, self.ready_callback = ready_callback or self.on_consumer_ready # this connection is not established, only used for params - self._conninfo = self.app.connection() + self._conninfo = self.app.connection_for_read() self.use_eventloop = ( self.should_use_eventloop() if use_eventloop is None else use_eventloop @@ -151,7 +152,7 @@ def on_after_init(self, **kwargs): def on_start(self): if self.pidfile: - self.pidlock = platforms.create_pidlock(self.pidfile) + self.pidlock = create_pidlock(self.pidfile) def on_consumer_ready(self, consumer): pass @@ -179,20 +180,20 @@ def setup_queues(self, include, exclude=None): except KeyError as exc: raise ImproperlyConfigured( DESELECT_UNKNOWN_QUEUE.format(exclude, exc)) - if self.app.conf.CELERY_WORKER_DIRECT: + if self.app.conf.worker_direct: self.app.amqp.queues.select_add(worker_direct(self.hostname)) def setup_includes(self, includes): # Update celery_include to have all known task modules, so that we # ensure all task modules are imported in case an execv happens. - prev = tuple(self.app.conf.CELERY_INCLUDE) + prev = tuple(self.app.conf.include) if includes: prev += tuple(includes) [self.app.loader.import_task_module(m) for m in includes] self.include = includes - task_modules = set(task.__class__.__module__ - for task in values(self.app.tasks)) - self.app.conf.CELERY_INCLUDE = tuple(set(prev) | task_modules) + task_modules = {task.__class__.__module__ + for task in values(self.app.tasks)} + self.app.conf.include = tuple(set(prev) | task_modules) def prepare_args(self, **kwargs): return kwargs @@ -207,14 +208,16 @@ def start(self): except WorkerTerminate: self.terminate() except Exception as exc: - logger.error('Unrecoverable error: %r', exc, exc_info=True) - self.stop() - except (KeyboardInterrupt, SystemExit): - self.stop() + logger.critical('Unrecoverable error: %r', exc, exc_info=True) + self.stop(exitcode=EX_FAILURE) + except SystemExit as exc: + self.stop(exitcode=exc.code) + except KeyboardInterrupt: + self.stop(exitcode=EX_FAILURE) def register_with_event_loop(self, hub): self.blueprint.send_all( - self, 'register_with_event_loop', args=(hub, ), + self, 'register_with_event_loop', args=(hub,), description='hub.register', ) @@ -242,10 +245,13 @@ def signal_consumer_close(self): def should_use_eventloop(self): return (detect_environment() == 'default' and - self._conninfo.is_evented and not self.app.IS_WINDOWS) + self._conninfo.transport.implements.async and + not self.app.IS_WINDOWS) - def stop(self, in_sighandler=False): + def stop(self, in_sighandler=False, exitcode=None): """Graceful shutdown of the worker server.""" + if exitcode is not None: + self.exitcode = exitcode if self.blueprint.state == RUN: self.signal_consumer_close() if not in_sighandler or self.pool.signal_safe: @@ -281,7 +287,10 @@ def reload(self, modules=None, reload=False, reloader=None): if self.consumer: self.consumer.update_strategies() self.consumer.reset_rate_limits() - self.pool.restart() + try: + self.pool.restart() + except NotImplementedError: + pass def info(self): return {'total': self.state.total_count, @@ -323,7 +332,8 @@ def stats(self): def __repr__(self): return ''.format( - self=self, state=self.blueprint.human_state(), + self=self, + state=self.blueprint.human_state() if self.blueprint else 'INIT', ) def __str__(self): @@ -333,7 +343,7 @@ def __str__(self): def state(self): return state - def setup_defaults(self, concurrency=None, loglevel=None, logfile=None, + def setup_defaults(self, concurrency=None, loglevel='WARN', logfile=None, send_events=None, pool_cls=None, consumer_cls=None, timer_cls=None, timer_precision=None, autoscaler_cls=None, autoreloader_cls=None, @@ -342,47 +352,44 @@ def setup_defaults(self, concurrency=None, loglevel=None, logfile=None, schedule_filename=None, scheduler_cls=None, task_time_limit=None, task_soft_time_limit=None, max_tasks_per_child=None, prefetch_multiplier=None, - disable_rate_limits=None, worker_lost_wait=None, **_kw): - self.concurrency = self._getopt('concurrency', concurrency) - self.loglevel = self._getopt('log_level', loglevel) - self.logfile = self._getopt('log_file', logfile) - self.send_events = self._getopt('send_events', send_events) - self.pool_cls = self._getopt('pool', pool_cls) - self.consumer_cls = self._getopt('consumer', consumer_cls) - self.timer_cls = self._getopt('timer', timer_cls) - self.timer_precision = self._getopt('timer_precision', timer_precision) - self.autoscaler_cls = self._getopt('autoscaler', autoscaler_cls) - self.autoreloader_cls = self._getopt('autoreloader', autoreloader_cls) - self.pool_putlocks = self._getopt('pool_putlocks', pool_putlocks) - self.pool_restarts = self._getopt('pool_restarts', pool_restarts) - self.force_execv = self._getopt('force_execv', force_execv) - self.state_db = self._getopt('state_db', state_db) - self.schedule_filename = self._getopt( - 'schedule_filename', schedule_filename, - ) - self.scheduler_cls = self._getopt( - 'celerybeat_scheduler', scheduler_cls, + disable_rate_limits=None, worker_lost_wait=None, + max_memory_per_child=None, **_kw): + either = self.app.either + self.loglevel = loglevel + self.logfile = logfile + + self.concurrency = either('worker_concurrency', concurrency) + self.send_events = either('worker_send_task_events', send_events) + self.pool_cls = either('worker_pool', pool_cls) + self.consumer_cls = either('worker_consumer', consumer_cls) + self.timer_cls = either('worker_timer', timer_cls) + self.timer_precision = either( + 'worker_timer_precision', timer_precision, ) - self.task_time_limit = self._getopt( - 'task_time_limit', task_time_limit, + self.autoscaler_cls = either('worker_autoscaler', autoscaler_cls) + self.autoreloader_cls = either('worker_autoreloader', autoreloader_cls) + self.pool_putlocks = either('worker_pool_putlocks', pool_putlocks) + self.pool_restarts = either('worker_pool_restarts', pool_restarts) + self.force_execv = either('worker_force_execv', force_execv) + self.state_db = either('worker_state_db', state_db) + self.schedule_filename = either( + 'beat_schedule_filename', schedule_filename, ) - self.task_soft_time_limit = self._getopt( + self.scheduler_cls = either('beat_scheduler', scheduler_cls) + self.task_time_limit = either('task_time_limit', task_time_limit) + self.task_soft_time_limit = either( 'task_soft_time_limit', task_soft_time_limit, ) - self.max_tasks_per_child = self._getopt( - 'max_tasks_per_child', max_tasks_per_child, + self.max_tasks_per_child = either( + 'worker_max_tasks_per_child', max_tasks_per_child, ) - self.prefetch_multiplier = int(self._getopt( - 'prefetch_multiplier', prefetch_multiplier, - )) - self.disable_rate_limits = self._getopt( - 'disable_rate_limits', disable_rate_limits, + self.max_memory_per_child = either( + 'worker_max_memory_per_child', max_memory_per_child, ) - self.worker_lost_wait = self._getopt( - 'worker_lost_wait', worker_lost_wait, + self.prefetch_multiplier = int(either( + 'worker_prefetch_multiplier', prefetch_multiplier, + )) + self.disable_rate_limits = either( + 'worker_disable_rate_limits', disable_rate_limits, ) - - def _getopt(self, key, value): - if value is not None: - return value - return self.app.conf.find_value_for_key(key, namespace='celeryd') + self.worker_lost_wait = either('worker_lost_wait', worker_lost_wait) diff --git a/celery/worker/autoreload.py b/celery/worker/autoreload.py index 8ade32fb2..3613e2004 100644 --- a/celery/worker/autoreload.py +++ b/celery/worker/autoreload.py @@ -46,7 +46,7 @@ class WorkerComponent(bootsteps.StartStopStep): label = 'Autoreloader' conditional = True - requires = (Pool, ) + requires = (Pool,) def __init__(self, w, autoreload=None, **kwargs): self.enabled = w.autoreload = autoreload @@ -107,8 +107,8 @@ def register_with_event_loop(self, hub): def find_changes(self): maybe_modified = self._maybe_modified - modified = dict((f, mt) for f, mt in self._mtimes() - if maybe_modified(f, mt)) + modified = {f: mt for f, mt in self._mtimes() + if maybe_modified(f, mt)} if modified: self.on_change(modified) self.modify_times.update(modified) @@ -131,7 +131,7 @@ class KQueueMonitor(BaseMonitor): def __init__(self, *args, **kwargs): super(KQueueMonitor, self).__init__(*args, **kwargs) - self.filemap = dict((f, None) for f in self.files) + self.filemap = {f: None for f in self.files} self.fdmap = {} def register_with_event_loop(self, hub): @@ -257,13 +257,14 @@ def __init__(self, controller, modules=None, monitor_cls=None, **options): def on_init(self): files = self.file_to_module - files.update(dict( - (module_file(sys.modules[m]), m) for m in self.modules)) + files.update({ + module_file(sys.modules[m]): m for m in self.modules + }) self._monitor = self.Monitor( files, self.on_change, shutdown_event=self._is_shutdown, **self.options) - self._hashes = dict([(f, file_hash(f)) for f in files]) + self._hashes = {f: file_hash(f) for f in files} def register_with_event_loop(self, hub): if self._monitor is None: diff --git a/celery/worker/autoscale.py b/celery/worker/autoscale.py index 14afc2e95..9b94d17c7 100644 --- a/celery/worker/autoscale.py +++ b/celery/worker/autoscale.py @@ -39,7 +39,7 @@ class WorkerComponent(bootsteps.StartStopStep): label = 'Autoscaler' conditional = True - requires = (Pool, ) + requires = (Pool,) def __init__(self, w, **kwargs): self.enabled = w.autoscale @@ -71,7 +71,7 @@ def __init__(self, pool, max_concurrency, self.max_concurrency = max_concurrency self.min_concurrency = min_concurrency self.keepalive = keepalive - self._last_action = None + self._last_scale_up = None self.worker = worker assert self.keepalive, 'cannot scale down too fast.' @@ -81,29 +81,30 @@ def body(self): self.maybe_scale() sleep(1.0) - def _maybe_scale(self): + def _maybe_scale(self, req=None): procs = self.processes cur = min(self.qty, self.max_concurrency) if cur > procs: self.scale_up(cur - procs) return True - elif cur < procs: - self.scale_down((procs - cur) - self.min_concurrency) + cur = max(self.qty, self.min_concurrency) + if cur < procs: + self.scale_down(procs - cur) return True - def maybe_scale(self): - if self._maybe_scale(): + def maybe_scale(self, req=None): + if self._maybe_scale(req): self.pool.maintain_pool() def update(self, max=None, min=None): with self.mutex: if max is not None: - if max < self.max_concurrency: + if max < self.processes: self._shrink(self.processes - max) self.max_concurrency = max if min is not None: - if min > self.min_concurrency: - self._grow(min - self.min_concurrency) + if min > self.processes: + self._grow(min - self.processes) self.min_concurrency = min return self.max_concurrency, self.min_concurrency @@ -112,7 +113,6 @@ def force_scale_up(self, n): new = self.processes + n if new > self.max_concurrency: self.max_concurrency = new - self.min_concurrency += 1 self._grow(n) def force_scale_down(self, n): @@ -123,13 +123,12 @@ def force_scale_down(self, n): self._shrink(min(n, self.processes)) def scale_up(self, n): - self._last_action = monotonic() + self._last_scale_up = monotonic() return self._grow(n) def scale_down(self, n): - if n and self._last_action and ( - monotonic() - self._last_action > self.keepalive): - self._last_action = monotonic() + if self._last_scale_up and ( + monotonic() - self._last_scale_up > self.keepalive): return self._shrink(n) def _grow(self, n): diff --git a/celery/worker/components.py b/celery/worker/components.py index d23a3b6b8..469db8995 100644 --- a/celery/worker/components.py +++ b/celery/worker/components.py @@ -19,9 +19,11 @@ from celery._state import _set_task_join_will_block from celery.exceptions import ImproperlyConfigured from celery.five import string_t +from celery.platforms import IS_WINDOWS from celery.utils.log import worker_logger as logger -__all__ = ['Timer', 'Hub', 'Queues', 'Pool', 'Beat', 'StateDB', 'Consumer'] + +__all__ = ['Timer', 'Hub', 'Pool', 'Beat', 'StateDB', 'Consumer'] ERR_B_GREEN = """\ -B option doesn't work with eventlet/gevent pools: \ @@ -29,7 +31,7 @@ """ W_POOL_SETTING = """ -The CELERYD_POOL setting should not be used to select the eventlet/gevent +The worker_pool setting should not be used to select the eventlet/gevent pools, instead you *must use the -P* argument so that patches are applied as early as possible. """ @@ -60,7 +62,7 @@ def on_timer_tick(self, delay): class Hub(bootsteps.StartStopStep): - requires = (Timer, ) + requires = (Timer,) def __init__(self, w, **kwargs): w.hub = None @@ -71,7 +73,9 @@ def include_if(self, w): def create(self, w): w.hub = get_event_loop() if w.hub is None: - w.hub = set_event_loop(_Hub(w.timer)) + required_hub = getattr(w._conninfo, 'requires_hub', None) + w.hub = set_event_loop(( + required_hub if required_hub else _Hub)(w.timer)) self._patch_thread_primitives(w) return self @@ -90,25 +94,12 @@ def _patch_thread_primitives(self, w): # multiprocessing's ApplyResult uses this lock. try: from billiard import pool - except ImportError: + except ImportError: # pragma: no cover pass else: pool.Lock = DummyLock -class Queues(bootsteps.Step): - """This bootstep initializes the internal queues - used by the worker.""" - label = 'Queues (intra)' - requires = (Hub, ) - - def create(self, w): - w.process_task = w._process_task - if w.use_eventloop: - if w.pool_putlocks and w.pool_cls.uses_semaphore: - w.process_task = w._process_task_sem - - class Pool(bootsteps.StartStopStep): """Bootstep managing the worker pool. @@ -123,7 +114,7 @@ class Pool(bootsteps.StartStopStep): * min_concurrency """ - requires = (Queues, ) + requires = (Hub,) def __init__(self, w, autoscale=None, autoreload=None, no_execv=False, optimization=None, **kwargs): @@ -148,22 +139,27 @@ def terminate(self, w): if w.pool: w.pool.terminate() - def create(self, w, semaphore=None, max_restarts=None): - if w.app.conf.CELERYD_POOL in ('eventlet', 'gevent'): + def create(self, w, semaphore=None, max_restarts=None, + green_pools={'eventlet', 'gevent'}): + if w.app.conf.worker_pool in green_pools: # pragma: no cover warnings.warn(UserWarning(W_POOL_SETTING)) - threaded = not w.use_eventloop + threaded = not w.use_eventloop or IS_WINDOWS procs = w.min_concurrency forking_enable = w.no_execv if w.force_execv else True + w.process_task = w._process_task if not threaded: semaphore = w.semaphore = LaxBoundedSemaphore(procs) w._quick_acquire = w.semaphore.acquire w._quick_release = w.semaphore.release max_restarts = 100 + if w.pool_putlocks and w.pool_cls.uses_semaphore: + w.process_task = w._process_task_sem allow_restart = self.autoreload_enabled or w.pool_restarts pool = w.pool = self.instantiate( w.pool_cls, w.min_concurrency, initargs=(w.app, w.hostname), maxtasksperchild=w.max_tasks_per_child, + max_memory_per_child=w.max_memory_per_child, timeout=w.task_time_limit, soft_timeout=w.task_soft_time_limit, putlocks=w.pool_putlocks and threaded, @@ -174,6 +170,7 @@ def create(self, w, semaphore=None, max_restarts=None): forking_enable=forking_enable, semaphore=semaphore, sched_strategy=self.optimization, + app=w.app, ) _set_task_join_will_block(pool.task_join_will_block) return pool @@ -203,7 +200,7 @@ def create(self, w): from celery.beat import EmbeddedService if w.pool_cls.__module__.endswith(('gevent', 'eventlet')): raise ImproperlyConfigured(ERR_B_GREEN) - b = w.beat = EmbeddedService(app=w.app, + b = w.beat = EmbeddedService(w.app, schedule_filename=w.schedule_filename, scheduler_cls=w.scheduler_cls) return b diff --git a/celery/worker/consumer/__init__.py b/celery/worker/consumer/__init__.py new file mode 100644 index 000000000..086ee9a47 --- /dev/null +++ b/celery/worker/consumer/__init__.py @@ -0,0 +1,17 @@ +from __future__ import absolute_import, unicode_literals + +from .consumer import Consumer + +from .agent import Agent +from .connection import Connection +from .control import Control +from .events import Events +from .gossip import Gossip +from .heart import Heart +from .mingle import Mingle +from .tasks import Tasks + +__all__ = [ + 'Consumer', 'Agent', 'Connection', 'Control', + 'Events', 'Gossip', 'Heart', 'Mingle', 'Tasks', +] diff --git a/celery/worker/consumer/agent.py b/celery/worker/consumer/agent.py new file mode 100644 index 000000000..9c1801a13 --- /dev/null +++ b/celery/worker/consumer/agent.py @@ -0,0 +1,20 @@ +from __future__ import absolute_import, unicode_literals + +from celery import bootsteps + +from .connection import Connection + +__all__ = ['Agent'] + + +class Agent(bootsteps.StartStopStep): + + conditional = True + requires = (Connection,) + + def __init__(self, c, **kwargs): + self.agent_cls = self.enabled = c.app.conf.worker_agent + + def create(self, c): + agent = c.agent = self.instantiate(self.agent_cls, c.connection) + return agent diff --git a/celery/worker/consumer/connection.py b/celery/worker/consumer/connection.py new file mode 100644 index 000000000..e54aa248e --- /dev/null +++ b/celery/worker/consumer/connection.py @@ -0,0 +1,33 @@ +from __future__ import absolute_import, unicode_literals + +from kombu.common import ignore_errors + +from celery import bootsteps +from celery.utils.log import get_logger + +__all__ = ['Connection'] +logger = get_logger(__name__) +info = logger.info + + +class Connection(bootsteps.StartStopStep): + + def __init__(self, c, **kwargs): + c.connection = None + + def start(self, c): + c.connection = c.connect() + info('Connected to %s', c.connection.as_uri()) + + def shutdown(self, c): + # We must set self.connection to None here, so + # that the green pidbox thread exits. + connection, c.connection = c.connection, None + if connection: + ignore_errors(connection, connection.close) + + def info(self, c, params='N/A'): + if c.connection: + params = c.connection.info() + params.pop('password', None) # don't send password. + return {'broker': params} diff --git a/celery/worker/consumer.py b/celery/worker/consumer/consumer.py similarity index 51% rename from celery/worker/consumer.py rename to celery/worker/consumer/consumer.py index 16fa0ff4e..c189718fb 100644 --- a/celery/worker/consumer.py +++ b/celery/worker/consumer/consumer.py @@ -11,51 +11,38 @@ from __future__ import absolute_import import errno -import kombu import logging import os -import socket from collections import defaultdict -from functools import partial -from heapq import heappush -from operator import itemgetter from time import sleep from billiard.common import restart_state from billiard.exceptions import RestartFreqExceeded from kombu.async.semaphore import DummyLock -from kombu.common import QoS, ignore_errors +from kombu.five import buffer_t, items from kombu.syn import _detect_environment -from kombu.utils.compat import get_errno from kombu.utils.encoding import safe_repr, bytes_t from kombu.utils.limits import TokenBucket +from vine import ppartial, promise from celery import bootsteps +from celery import signals from celery.app.trace import build_tracer -from celery.canvas import signature -from celery.exceptions import InvalidTaskError -from celery.five import items, values +from celery.exceptions import InvalidTaskError, NotRegistered +from celery.utils import gethostname from celery.utils.functional import noop from celery.utils.log import get_logger +from celery.utils.objects import Bunch from celery.utils.text import truncate from celery.utils.timeutils import humanize_seconds, rate -from . import heartbeat, loops, pidbox -from .state import task_reserved, maybe_shutdown, revoked, reserved_requests +from celery.worker import loops +from celery.worker.state import ( + task_reserved, maybe_shutdown, reserved_requests, +) -try: - buffer_t = buffer -except NameError: # pragma: no cover - # Py3 does not have buffer, but we only need isinstance. - - class buffer_t(object): # noqa - pass - -__all__ = [ - 'Consumer', 'Connection', 'Events', 'Heart', 'Control', - 'Tasks', 'Evloop', 'Agent', 'Mingle', 'Gossip', 'dump_body', -] +__all__ = ['Consumer', 'Evloop', 'dump_body'] CLOSE = bootsteps.CLOSE logger = get_logger(__name__) @@ -123,10 +110,10 @@ class buffer_t(object): # noqa delivery_info:{3} headers={4}}} """ -MINGLE_GET_FIELDS = itemgetter('clock', 'revoked') - def dump_body(m, body): + # v2 protocol does not deserialize body + body = m.body if body is None else body if isinstance(body, buffer_t): body = bytes_t(body) return '{0} ({1}b)'.format(truncate(safe_repr(body), 1024), @@ -134,6 +121,7 @@ def dump_body(m, body): class Consumer(object): + Strategies = dict #: set when consumer is shutting down. @@ -155,15 +143,15 @@ class Consumer(object): class Blueprint(bootsteps.Blueprint): name = 'Consumer' default_steps = [ - 'celery.worker.consumer:Connection', - 'celery.worker.consumer:Mingle', - 'celery.worker.consumer:Events', - 'celery.worker.consumer:Gossip', - 'celery.worker.consumer:Heart', - 'celery.worker.consumer:Control', - 'celery.worker.consumer:Tasks', - 'celery.worker.consumer:Evloop', - 'celery.worker.consumer:Agent', + 'celery.worker.consumer.connection:Connection', + 'celery.worker.consumer.mingle:Mingle', + 'celery.worker.consumer.events:Events', + 'celery.worker.consumer.gossip:Gossip', + 'celery.worker.consumer.heart:Heart', + 'celery.worker.consumer.control:Control', + 'celery.worker.consumer.tasks:Tasks', + 'celery.worker.consumer.consumer:Evloop', + 'celery.worker.consumer.agent:Agent', ] def shutdown(self, parent): @@ -178,20 +166,21 @@ def __init__(self, on_task_request, self.app = app self.controller = controller self.init_callback = init_callback - self.hostname = hostname or socket.gethostname() + self.hostname = hostname or gethostname() self.pid = os.getpid() self.pool = pool self.timer = timer self.strategies = self.Strategies() - conninfo = self.app.connection() - self.connection_errors = conninfo.connection_errors - self.channel_errors = conninfo.channel_errors + self.conninfo = self.app.connection_for_read() + self.connection_errors = self.conninfo.connection_errors + self.channel_errors = self.conninfo.channel_errors self._restart_state = restart_state(maxR=5, maxT=1) self._does_info = logger.isEnabledFor(logging.INFO) + self._limit_order = 0 self.on_task_request = on_task_request self.on_task_message = set() - self.amqheartbeat_rate = self.app.conf.BROKER_HEARTBEAT_CHECKRATE + self.amqheartbeat_rate = self.app.conf.broker_heartbeat_checkrate self.disable_rate_limits = disable_rate_limits self.initial_prefetch_count = initial_prefetch_count self.prefetch_multiplier = prefetch_multiplier @@ -205,7 +194,7 @@ def __init__(self, on_task_request, if self.hub: self.amqheartbeat = amqheartbeat if self.amqheartbeat is None: - self.amqheartbeat = self.app.conf.BROKER_HEARTBEAT + self.amqheartbeat = self.app.conf.broker_heartbeat else: self.amqheartbeat = 0 @@ -216,7 +205,9 @@ def __init__(self, on_task_request, # there's a gevent bug that causes timeouts to not be reset, # so if the connection timeout is exceeded once, it can NEVER # connect again. - self.app.conf.BROKER_CONNECTION_TIMEOUT = None + self.app.conf.broker_connection_timeout = None + + self._pending_operations = [] self.steps = [] self.blueprint = self.Blueprint( @@ -224,6 +215,21 @@ def __init__(self, on_task_request, ) self.blueprint.apply(self, **dict(worker_options or {}, **kwargs)) + def call_soon(self, p, *args, **kwargs): + p = ppartial(p, *args, **kwargs) + if self.hub: + return self.hub.call_soon(p) + self._pending_operations.append(p) + return p + + def perform_pending_operations(self): + if not self.hub: + while self._pending_operations: + try: + self._pending_operations.pop()() + except Exception as exc: + error('Pending callback raised: %r', exc, exc_info=1) + def bucket_for_task(self, type): limit = rate(getattr(type, 'rate_limit', None)) return TokenBucket(limit, capacity=1) if limit else None @@ -259,25 +265,35 @@ def _update_qos_eventually(self, index): else self.qos.increment_eventually)( abs(index) * self.prefetch_multiplier) + def _limit_move_to_pool(self, request): + task_reserved(request) + self.on_task_request(request) + def _limit_task(self, request, bucket, tokens): if not bucket.can_consume(tokens): hold = bucket.expected_time(tokens) + pri = self._limit_order = (self._limit_order + 1) % 10 self.timer.call_after( - hold, self._limit_task, (request, bucket, tokens), + hold, self._limit_move_to_pool, (request,), + priority=pri, ) else: task_reserved(request) self.on_task_request(request) def start(self): - blueprint, loop = self.blueprint, self.loop + blueprint = self.blueprint while blueprint.state != CLOSE: self.restart_count += 1 maybe_shutdown() try: blueprint.start(self) except self.connection_errors as exc: - if isinstance(exc, OSError) and get_errno(exc) == errno.EMFILE: + # If we're not retrying connections, no need to catch + # connection errors + if not self.app.conf.broker_connection_retry: + raise + if isinstance(exc, OSError) and exc.errno == errno.EMFILE: raise # Too many open files maybe_shutdown() try: @@ -296,7 +312,7 @@ def start(self): def register_with_event_loop(self, hub): self.blueprint.send_all( - self, 'register_with_event_loop', args=(hub, ), + self, 'register_with_event_loop', args=(hub,), description='Hub.register', ) @@ -350,10 +366,10 @@ def connect(self): """Establish the broker connection. Will retry establishing the connection if the - :setting:`BROKER_CONNECTION_RETRY` setting is enabled + :setting:`broker_connection_retry` setting is enabled """ - conn = self.app.connection(heartbeat=self.amqheartbeat) + conn = self.app.connection_for_read(heartbeat=self.amqheartbeat) # Callback called for each retry while the connection # can't be established. @@ -365,25 +381,33 @@ def _error_handler(exc, interval, next_step=CONNECTION_RETRY_STEP): # remember that the connection is lazy, it won't establish # until needed. - if not self.app.conf.BROKER_CONNECTION_RETRY: + if not self.app.conf.broker_connection_retry: # retry disabled, just call connect directly. conn.connect() return conn conn = conn.ensure_connection( - _error_handler, self.app.conf.BROKER_CONNECTION_MAX_RETRIES, + _error_handler, self.app.conf.broker_connection_max_retries, callback=maybe_shutdown, ) if self.hub: conn.transport.register_with_event_loop(conn.connection, self.hub) return conn + def _flush_events(self): + if self.event_dispatcher: + self.event_dispatcher.flush() + + def on_send_event_buffered(self): + if self.hub: + self.hub._ready.add(self._flush_events) + def add_task_queue(self, queue, exchange=None, exchange_type=None, routing_key=None, **options): cset = self.task_consumer queues = self.app.amqp.queues # Must use in' here, as __missing__ will automatically - # create queues when CELERY_CREATE_MISSING_QUEUES is enabled. + # create queues when :setting:`task_create_missing_queues` is enabled. # (Issue #1079) if queue in queues: q = queues[queue] @@ -401,7 +425,7 @@ def add_task_queue(self, queue, exchange=None, exchange_type=None, info('Started consuming from %s', queue) def cancel_task_queue(self, queue): - info('Cancelling queue %s', queue) + info('Canceling queue %s', queue) self.app.amqp.queues.deselect(queue) self.task_consumer.cancel_by_queue(queue) @@ -422,14 +446,39 @@ def _message_report(self, body, message): def on_unknown_message(self, body, message): warn(UNKNOWN_FORMAT, self._message_report(body, message)) message.reject_log_error(logger, self.connection_errors) + signals.task_rejected.send(sender=self, message=message, exc=None) def on_unknown_task(self, body, message, exc): error(UNKNOWN_TASK_ERROR, exc, dump_body(message, body), exc_info=True) + try: + id_, name = message.headers['id'], message.headers['task'] + root_id = message.headers.get('root_id') + except KeyError: # proto1 + id_, name = body['id'], body['task'] + root_id = None + request = Bunch( + name=name, chord=None, root_id=root_id, + correlation_id=message.properties.get('correlation_id'), + reply_to=message.properties.get('reply_to'), + errbacks=None, + ) message.reject_log_error(logger, self.connection_errors) + self.app.backend.mark_as_failure( + id_, NotRegistered(name), request=request, + ) + if self.event_dispatcher: + self.event_dispatcher.send( + 'task-failed', uuid=id_, + exception='NotRegistered({0!r})'.format(name), + ) + signals.task_unknown.send( + sender=self, message=message, exc=exc, name=name, id=id_, + ) def on_invalid_task(self, body, message, exc): error(INVALID_TASK_ERROR, exc, dump_body(message, body), exc_info=True) message.reject_log_error(logger, self.connection_errors) + signals.task_rejected.send(sender=self, message=message, exc=exc) def update_strategies(self): loader = self.app.loader @@ -438,28 +487,50 @@ def update_strategies(self): task.__trace__ = build_tracer(name, task, loader, self.hostname, app=self.app) - def create_task_handler(self): + def create_task_handler(self, promise=promise): strategies = self.strategies on_unknown_message = self.on_unknown_message on_unknown_task = self.on_unknown_task on_invalid_task = self.on_invalid_task callbacks = self.on_task_message + call_soon = self.call_soon - def on_task_received(body, message): + def on_task_received(message): + # payload will only be set for v1 protocol, since v2 + # will defer deserializing the message body to the pool. + payload = None try: - name = body['task'] - except (KeyError, TypeError): - return on_unknown_message(body, message) - + type_ = message.headers['task'] # protocol v2 + except TypeError: + return on_unknown_message(None, message) + except KeyError: + try: + payload = message.decode() + except Exception as exc: + return self.on_decode_error(message, exc) + try: + type_, payload = payload['task'], payload # protocol v1 + except (TypeError, KeyError): + return on_unknown_message(payload, message) try: - strategies[name](message, body, - message.ack_log_error, - message.reject_log_error, - callbacks) + strategy = strategies[type_] except KeyError as exc: - on_unknown_task(body, message, exc) - except InvalidTaskError as exc: - on_invalid_task(body, message, exc) + return on_unknown_task(None, message, exc) + else: + try: + strategy( + message, payload, + promise(call_soon, (message.ack_log_error,)), + promise(call_soon, (message.reject_log_error,)), + callbacks, + ) + except InvalidTaskError as exc: + return on_invalid_task(payload, message, exc) + except MemoryError: + raise + except Exception as exc: + # XXX handle as internal error? + return on_invalid_task(payload, message, exc) return on_task_received @@ -469,330 +540,8 @@ def __repr__(self): ) -class Connection(bootsteps.StartStopStep): - - def __init__(self, c, **kwargs): - c.connection = None - - def start(self, c): - c.connection = c.connect() - info('Connected to %s', c.connection.as_uri()) - - def shutdown(self, c): - # We must set self.connection to None here, so - # that the green pidbox thread exits. - connection, c.connection = c.connection, None - if connection: - ignore_errors(connection, connection.close) - - def info(self, c, params='N/A'): - if c.connection: - params = c.connection.info() - params.pop('password', None) # don't send password. - return {'broker': params} - - -class Events(bootsteps.StartStopStep): - requires = (Connection, ) - - def __init__(self, c, send_events=None, **kwargs): - self.send_events = True - self.groups = None if send_events else ['worker'] - c.event_dispatcher = None - - def start(self, c): - # flush events sent while connection was down. - prev = self._close(c) - dis = c.event_dispatcher = c.app.events.Dispatcher( - c.connect(), hostname=c.hostname, - enabled=self.send_events, groups=self.groups, - ) - if prev: - dis.extend_buffer(prev) - dis.flush() - - def stop(self, c): - pass - - def _close(self, c): - if c.event_dispatcher: - dispatcher = c.event_dispatcher - # remember changes from remote control commands: - self.groups = dispatcher.groups - - # close custom connection - if dispatcher.connection: - ignore_errors(c, dispatcher.connection.close) - ignore_errors(c, dispatcher.close) - c.event_dispatcher = None - return dispatcher - - def shutdown(self, c): - self._close(c) - - -class Heart(bootsteps.StartStopStep): - requires = (Events, ) - - def __init__(self, c, without_heartbeat=False, **kwargs): - self.enabled = not without_heartbeat - c.heart = None - - def start(self, c): - c.heart = heartbeat.Heart(c.timer, c.event_dispatcher) - c.heart.start() - - def stop(self, c): - c.heart = c.heart and c.heart.stop() - shutdown = stop - - -class Mingle(bootsteps.StartStopStep): - label = 'Mingle' - requires = (Events, ) - compatible_transports = set(['amqp', 'redis']) - - def __init__(self, c, without_mingle=False, **kwargs): - self.enabled = not without_mingle and self.compatible_transport(c.app) - - def compatible_transport(self, app): - with app.connection() as conn: - return conn.transport.driver_type in self.compatible_transports - - def start(self, c): - info('mingle: searching for neighbors') - I = c.app.control.inspect(timeout=1.0, connection=c.connection) - replies = I.hello(c.hostname, revoked._data) or {} - replies.pop(c.hostname, None) - if replies: - info('mingle: sync with %s nodes', - len([reply for reply, value in items(replies) if value])) - for reply in values(replies): - if reply: - try: - other_clock, other_revoked = MINGLE_GET_FIELDS(reply) - except KeyError: # reply from pre-3.1 worker - pass - else: - c.app.clock.adjust(other_clock) - revoked.update(other_revoked) - info('mingle: sync complete') - else: - info('mingle: all alone') - - -class Tasks(bootsteps.StartStopStep): - requires = (Mingle, ) - - def __init__(self, c, **kwargs): - c.task_consumer = c.qos = None - - def start(self, c): - c.update_strategies() - c.task_consumer = c.app.amqp.TaskConsumer( - c.connection, on_decode_error=c.on_decode_error, - ) - c.qos = QoS(c.task_consumer.qos, c.initial_prefetch_count) - c.qos.update() # set initial prefetch count - - def stop(self, c): - if c.task_consumer: - debug('Cancelling task consumer...') - ignore_errors(c, c.task_consumer.cancel) - - def shutdown(self, c): - if c.task_consumer: - self.stop(c) - debug('Closing consumer channel...') - ignore_errors(c, c.task_consumer.close) - c.task_consumer = None - - def info(self, c): - return {'prefetch_count': c.qos.value if c.qos else 'N/A'} - - -class Agent(bootsteps.StartStopStep): - conditional = True - requires = (Connection, ) - - def __init__(self, c, **kwargs): - self.agent_cls = self.enabled = c.app.conf.CELERYD_AGENT - - def create(self, c): - agent = c.agent = self.instantiate(self.agent_cls, c.connection) - return agent - - -class Control(bootsteps.StartStopStep): - requires = (Tasks, ) - - def __init__(self, c, **kwargs): - self.is_green = c.pool is not None and c.pool.is_green - self.box = (pidbox.gPidbox if self.is_green else pidbox.Pidbox)(c) - self.start = self.box.start - self.stop = self.box.stop - self.shutdown = self.box.shutdown - - def include_if(self, c): - return c.app.conf.CELERY_ENABLE_REMOTE_CONTROL - - -class Gossip(bootsteps.ConsumerStep): - label = 'Gossip' - requires = (Mingle, ) - _cons_stamp_fields = itemgetter( - 'id', 'clock', 'hostname', 'pid', 'topic', 'action', 'cver', - ) - compatible_transports = set(['amqp', 'redis']) - - def __init__(self, c, without_gossip=False, interval=5.0, **kwargs): - self.enabled = not without_gossip and self.compatible_transport(c.app) - self.app = c.app - c.gossip = self - self.Receiver = c.app.events.Receiver - self.hostname = c.hostname - self.full_hostname = '.'.join([self.hostname, str(c.pid)]) - - self.timer = c.timer - if self.enabled: - self.state = c.app.events.State( - on_node_join=self.on_node_join, - on_node_leave=self.on_node_leave, - max_tasks_in_memory=1, - ) - if c.hub: - c._mutex = DummyLock() - self.update_state = self.state.event - self.interval = interval - self._tref = None - self.consensus_requests = defaultdict(list) - self.consensus_replies = {} - self.event_handlers = { - 'worker.elect': self.on_elect, - 'worker.elect.ack': self.on_elect_ack, - } - self.clock = c.app.clock - - self.election_handlers = { - 'task': self.call_task - } - - def compatible_transport(self, app): - with app.connection() as conn: - return conn.transport.driver_type in self.compatible_transports - - def election(self, id, topic, action=None): - self.consensus_replies[id] = [] - self.dispatcher.send( - 'worker-elect', - id=id, topic=topic, action=action, cver=1, - ) - - def call_task(self, task): - try: - signature(task, app=self.app).apply_async() - except Exception as exc: - error('Could not call task: %r', exc, exc_info=1) - - def on_elect(self, event): - try: - (id_, clock, hostname, pid, - topic, action, _) = self._cons_stamp_fields(event) - except KeyError as exc: - return error('election request missing field %s', exc, exc_info=1) - heappush( - self.consensus_requests[id_], - (clock, '%s.%s' % (hostname, pid), topic, action), - ) - self.dispatcher.send('worker-elect-ack', id=id_) - - def start(self, c): - super(Gossip, self).start(c) - self.dispatcher = c.event_dispatcher - - def on_elect_ack(self, event): - id = event['id'] - try: - replies = self.consensus_replies[id] - except KeyError: - return # not for us - alive_workers = self.state.alive_workers() - replies.append(event['hostname']) - - if len(replies) >= len(alive_workers): - _, leader, topic, action = self.clock.sort_heap( - self.consensus_requests[id], - ) - if leader == self.full_hostname: - info('I won the election %r', id) - try: - handler = self.election_handlers[topic] - except KeyError: - error('Unknown election topic %r', topic, exc_info=1) - else: - handler(action) - else: - info('node %s elected for %r', leader, id) - self.consensus_requests.pop(id, None) - self.consensus_replies.pop(id, None) - - def on_node_join(self, worker): - debug('%s joined the party', worker.hostname) - - def on_node_leave(self, worker): - debug('%s left', worker.hostname) - - def on_node_lost(self, worker): - info('missed heartbeat from %s', worker.hostname) - - def register_timer(self): - if self._tref is not None: - self._tref.cancel() - self._tref = self.timer.call_repeatedly(self.interval, self.periodic) - - def periodic(self): - workers = self.state.workers - dirty = set() - for worker in values(workers): - if not worker.alive: - dirty.add(worker) - self.on_node_lost(worker) - for worker in dirty: - workers.pop(worker.hostname, None) - - def get_consumers(self, channel): - self.register_timer() - ev = self.Receiver(channel, routing_key='worker.#') - return [kombu.Consumer( - channel, - queues=[ev.queue], - on_message=partial(self.on_message, ev.event_from_message), - no_ack=True - )] - - def on_message(self, prepare, message): - _type = message.delivery_info['routing_key'] - - # For redis when `fanout_patterns=False` (See Issue #1882) - if _type.split('.', 1)[0] == 'task': - return - try: - handler = self.event_handlers[_type] - except KeyError: - pass - else: - return handler(message.payload) - - hostname = (message.headers.get('hostname') or - message.payload['hostname']) - if hostname != self.hostname: - type, event = prepare(message.payload) - obj, subject = self.update_state(event) - else: - self.clock.forward() - - class Evloop(bootsteps.StartStopStep): + label = 'event loop' last = True diff --git a/celery/worker/consumer/control.py b/celery/worker/consumer/control.py new file mode 100644 index 000000000..f99b2fc7e --- /dev/null +++ b/celery/worker/consumer/control.py @@ -0,0 +1,27 @@ +from __future__ import absolute_import, unicode_literals + +from celery import bootsteps +from celery.utils.log import get_logger + +from celery.worker import pidbox + +from .tasks import Tasks + +__all__ = ['Control'] +logger = get_logger(__name__) + + +class Control(bootsteps.StartStopStep): + + requires = (Tasks,) + + def __init__(self, c, **kwargs): + self.is_green = c.pool is not None and c.pool.is_green + self.box = (pidbox.gPidbox if self.is_green else pidbox.Pidbox)(c) + self.start = self.box.start + self.stop = self.box.stop + self.shutdown = self.box.shutdown + + def include_if(self, c): + return (c.app.conf.worker_enable_remote_control and + c.conninfo.supports_exchange_type('fanout')) diff --git a/celery/worker/consumer/events.py b/celery/worker/consumer/events.py new file mode 100644 index 000000000..0f32f203d --- /dev/null +++ b/celery/worker/consumer/events.py @@ -0,0 +1,56 @@ +from __future__ import absolute_import, unicode_literals + +from kombu.common import ignore_errors + +from celery import bootsteps + +from .connection import Connection + +__all__ = ['Events'] + + +class Events(bootsteps.StartStopStep): + + requires = (Connection,) + + def __init__(self, c, send_events=True, + without_heartbeat=False, without_gossip=False, **kwargs): + self.groups = None if send_events else ['worker'] + self.send_events = ( + send_events or + not without_gossip or + not without_heartbeat + ) + c.event_dispatcher = None + + def start(self, c): + # flush events sent while connection was down. + prev = self._close(c) + dis = c.event_dispatcher = c.app.events.Dispatcher( + c.connect(), hostname=c.hostname, + enabled=self.send_events, groups=self.groups, + buffer_group=['task'] if c.hub else None, + on_send_buffered=c.on_send_event_buffered if c.hub else None, + ) + if prev: + dis.extend_buffer(prev) + dis.flush() + + def stop(self, c): + pass + + def _close(self, c): + if c.event_dispatcher: + dispatcher = c.event_dispatcher + # remember changes from remote control commands: + self.groups = dispatcher.groups + + # close custom connection + if dispatcher.connection: + ignore_errors(c, dispatcher.connection.close) + ignore_errors(c, dispatcher.close) + c.event_dispatcher = None + return dispatcher + + def shutdown(self, c): + self._close(c) diff --git a/celery/worker/consumer/gossip.py b/celery/worker/consumer/gossip.py new file mode 100644 index 000000000..8289ad89c --- /dev/null +++ b/celery/worker/consumer/gossip.py @@ -0,0 +1,195 @@ +from __future__ import absolute_import, unicode_literals + +from collections import defaultdict +from functools import partial +from heapq import heappush +from operator import itemgetter + +from kombu import Consumer +from kombu.async.semaphore import DummyLock + +from celery import bootsteps +from celery.five import values +from celery.utils.log import get_logger +from celery.utils.objects import Bunch + +from .mingle import Mingle + +__all__ = ['Gossip'] +logger = get_logger(__name__) +debug, info, error = logger.debug, logger.info, logger.error + + +class Gossip(bootsteps.ConsumerStep): + + label = 'Gossip' + requires = (Mingle,) + _cons_stamp_fields = itemgetter( + 'id', 'clock', 'hostname', 'pid', 'topic', 'action', 'cver', + ) + compatible_transports = {'amqp', 'redis'} + + def __init__(self, c, without_gossip=False, + interval=5.0, heartbeat_interval=2.0, **kwargs): + self.enabled = not without_gossip and self.compatible_transport(c.app) + self.app = c.app + c.gossip = self + self.Receiver = c.app.events.Receiver + self.hostname = c.hostname + self.full_hostname = '.'.join([self.hostname, str(c.pid)]) + self.on = Bunch( + node_join=set(), + node_leave=set(), + node_lost=set(), + ) + + self.timer = c.timer + if self.enabled: + self.state = c.app.events.State( + on_node_join=self.on_node_join, + on_node_leave=self.on_node_leave, + max_tasks_in_memory=1, + ) + if c.hub: + c._mutex = DummyLock() + self.update_state = self.state.event + self.interval = interval + self.heartbeat_interval = heartbeat_interval + self._tref = None + self.consensus_requests = defaultdict(list) + self.consensus_replies = {} + self.event_handlers = { + 'worker.elect': self.on_elect, + 'worker.elect.ack': self.on_elect_ack, + } + self.clock = c.app.clock + + self.election_handlers = { + 'task': self.call_task + } + + def compatible_transport(self, app): + with app.connection_for_read() as conn: + return conn.transport.driver_type in self.compatible_transports + + def election(self, id, topic, action=None): + self.consensus_replies[id] = [] + self.dispatcher.send( + 'worker-elect', + id=id, topic=topic, action=action, cver=1, + ) + + def call_task(self, task): + try: + self.app.signature(task).apply_async() + except Exception as exc: + error('Could not call task: %r', exc, exc_info=1) + + def on_elect(self, event): + try: + (id_, clock, hostname, pid, + topic, action, _) = self._cons_stamp_fields(event) + except KeyError as exc: + return error('election request missing field %s', exc, exc_info=1) + heappush( + self.consensus_requests[id_], + (clock, '%s.%s' % (hostname, pid), topic, action), + ) + self.dispatcher.send('worker-elect-ack', id=id_) + + def start(self, c): + super(Gossip, self).start(c) + self.dispatcher = c.event_dispatcher + + def on_elect_ack(self, event): + id = event['id'] + try: + replies = self.consensus_replies[id] + except KeyError: + return # not for us + alive_workers = self.state.alive_workers() + replies.append(event['hostname']) + + if len(replies) >= len(alive_workers): + _, leader, topic, action = self.clock.sort_heap( + self.consensus_requests[id], + ) + if leader == self.full_hostname: + info('I won the election %r', id) + try: + handler = self.election_handlers[topic] + except KeyError: + error('Unknown election topic %r', topic, exc_info=1) + else: + handler(action) + else: + info('node %s elected for %r', leader, id) + self.consensus_requests.pop(id, None) + self.consensus_replies.pop(id, None) + + def on_node_join(self, worker): + debug('%s joined the party', worker.hostname) + self._call_handlers(self.on.node_join, worker) + + def on_node_leave(self, worker): + debug('%s left', worker.hostname) + self._call_handlers(self.on.node_leave, worker) + + def on_node_lost(self, worker): + info('missed heartbeat from %s', worker.hostname) + self._call_handlers(self.on.node_lost, worker) + + def _call_handlers(self, handlers, *args, **kwargs): + for handler in handlers: + try: + handler(*args, **kwargs) + except Exception as exc: + error('Ignored error from handler %r: %r', + handler, exc, exc_info=1) + + def register_timer(self): + if self._tref is not None: + self._tref.cancel() + self._tref = self.timer.call_repeatedly(self.interval, self.periodic) + + def periodic(self): + workers = self.state.workers + dirty = set() + for worker in values(workers): + if not worker.alive: + dirty.add(worker) + self.on_node_lost(worker) + for worker in dirty: + workers.pop(worker.hostname, None) + + def get_consumers(self, channel): + self.register_timer() + ev = self.Receiver(channel, routing_key='worker.#', + queue_ttl=self.heartbeat_interval) + return [Consumer( + channel, + queues=[ev.queue], + on_message=partial(self.on_message, ev.event_from_message), + no_ack=True + )] + + def on_message(self, prepare, message): + _type = message.delivery_info['routing_key'] + + # For redis when `fanout_patterns=False` (See Issue #1882) + if _type.split('.', 1)[0] == 'task': + return + try: + handler = self.event_handlers[_type] + except KeyError: + pass + else: + return handler(message.payload) + + hostname = (message.headers.get('hostname') or + message.payload['hostname']) + if hostname != self.hostname: + type, event = prepare(message.payload) + self.update_state(event) + else: + self.clock.forward() diff --git a/celery/worker/consumer/heart.py b/celery/worker/consumer/heart.py new file mode 100644 index 000000000..0f0173c63 --- /dev/null +++ b/celery/worker/consumer/heart.py @@ -0,0 +1,30 @@ +from __future__ import absolute_import, unicode_literals + +from celery import bootsteps + +from celery.worker import heartbeat + +from .events import Events + +__all__ = ['Heart'] + + +class Heart(bootsteps.StartStopStep): + + requires = (Events,) + + def __init__(self, c, + without_heartbeat=False, heartbeat_interval=None, **kwargs): + self.enabled = not without_heartbeat + self.heartbeat_interval = heartbeat_interval + c.heart = None + + def start(self, c): + c.heart = heartbeat.Heart( + c.timer, c.event_dispatcher, self.heartbeat_interval, + ) + c.heart.start() + + def stop(self, c): + c.heart = c.heart and c.heart.stop() + shutdown = stop diff --git a/celery/worker/consumer/mingle.py b/celery/worker/consumer/mingle.py new file mode 100644 index 000000000..2ca059149 --- /dev/null +++ b/celery/worker/consumer/mingle.py @@ -0,0 +1,65 @@ +from __future__ import absolute_import, unicode_literals + +from operator import itemgetter + +from celery import bootsteps +from celery.five import items +from celery.utils.log import get_logger + +from .events import Events + +__all__ = ['Mingle'] + +MINGLE_GET_FIELDS = itemgetter('clock', 'revoked') + +logger = get_logger(__name__) +debug, info, exception = logger.debug, logger.info, logger.exception + + +class Mingle(bootsteps.StartStopStep): + + label = 'Mingle' + requires = (Events,) + compatible_transports = {'amqp', 'redis'} + + def __init__(self, c, without_mingle=False, **kwargs): + self.enabled = not without_mingle and self.compatible_transport(c.app) + + def compatible_transport(self, app): + with app.connection_for_read() as conn: + return conn.transport.driver_type in self.compatible_transports + + def start(self, c): + info('mingle: searching for neighbors') + I = c.app.control.inspect(timeout=1.0, connection=c.connection) + our_revoked = c.controller.state.revoked + replies = I.hello(c.hostname, our_revoked._data) or {} + replies.pop(c.hostname, None) # delete my own response + if replies: + info('mingle: sync with %s nodes', + len([reply for reply, value in items(replies) if value])) + [self.on_node_reply(c, nodename, reply) + for nodename, reply in items(replies) if reply] + info('mingle: sync complete') + else: + info('mingle: all alone') + + def on_node_reply(self, c, nodename, reply): + debug('mingle: processing reply from %s', nodename) + try: + self.sync_with_node(c, **reply) + except MemoryError: + raise + except Exception as exc: + exception('mingle: sync with %s failed: %r', nodename, exc) + + def sync_with_node(self, c, clock=None, revoked=None, **kwargs): + self.on_clock_event(c, clock) + self.on_revoked_received(c, revoked) + + def on_clock_event(self, c, clock): + c.app.clock.adjust(clock) if clock else c.app.clock.forward() + + def on_revoked_received(self, c, revoked): + if revoked: + c.controller.state.revoked.update(revoked) diff --git a/celery/worker/consumer/tasks.py b/celery/worker/consumer/tasks.py new file mode 100644 index 000000000..2a4f9b785 --- /dev/null +++ b/celery/worker/consumer/tasks.py @@ -0,0 +1,59 @@ +from __future__ import absolute_import, unicode_literals + +from kombu.common import QoS, ignore_errors + +from celery import bootsteps +from celery.utils.log import get_logger + +from .mingle import Mingle + +__all__ = ['Tasks'] +logger = get_logger(__name__) +debug = logger.debug + + +class Tasks(bootsteps.StartStopStep): + + requires = (Mingle,) + + def __init__(self, c, **kwargs): + c.task_consumer = c.qos = None + + def start(self, c): + c.update_strategies() + + # - RabbitMQ 3.3 completely redefines how basic_qos works.. + # This will detect if the new qos smenatics is in effect, + # and if so make sure the 'apply_global' flag is set on qos updates. + qos_global = not c.connection.qos_semantics_matches_spec + + # set initial prefetch count + c.connection.default_channel.basic_qos( + 0, c.initial_prefetch_count, qos_global, + ) + + c.task_consumer = c.app.amqp.TaskConsumer( + c.connection, on_decode_error=c.on_decode_error, + ) + + def set_prefetch_count(prefetch_count): + return c.task_consumer.qos( + prefetch_count=prefetch_count, + apply_global=qos_global, + ) + c.qos = QoS(set_prefetch_count, c.initial_prefetch_count) + + def stop(self, c): + if c.task_consumer: + debug('Canceling task consumer...') + ignore_errors(c, c.task_consumer.cancel) + + def shutdown(self, c): + if c.task_consumer: + self.stop(c) + debug('Closing consumer channel...') + ignore_errors(c, c.task_consumer.close) + c.task_consumer = None + + def info(self, c): + return {'prefetch_count': c.qos.value if c.qos else 'N/A'} diff --git a/celery/worker/control.py b/celery/worker/control.py index fcaf04081..f223ff154 100644 --- a/celery/worker/control.py +++ b/celery/worker/control.py @@ -11,10 +11,11 @@ import io import tempfile +from billiard.common import TERM_SIGNAME from kombu.utils.encoding import safe_repr from celery.exceptions import WorkerShutdown -from celery.five import UserDict, items +from celery.five import UserDict, items, string_t from celery.platforms import signals as _signals from celery.utils import timeutils from celery.utils.functional import maybe_list @@ -22,14 +23,22 @@ from celery.utils import jsonify from . import state as worker_state +from .request import Request from .state import revoked -from .job import Request __all__ = ['Panel'] DEFAULT_TASK_INFO_ITEMS = ('exchange', 'routing_key', 'rate_limit') logger = get_logger(__name__) +def ok(value): + return {'ok': value} + + +def nok(value): + return {'error': value} + + class Panel(UserDict): data = dict() # Global registry. @@ -52,21 +61,15 @@ def _find_requests_by_id(ids, requests): @Panel.register def query_task(state, ids, **kwargs): ids = maybe_list(ids) - - def reqinfo(state, req): - return state, req.info() - - reqs = dict((req.id, ('reserved', req.info())) - for req in _find_requests_by_id( - ids, worker_state.reserved_requests)) - reqs.update(dict( - (req.id, ('active', req.info())) + return dict({ + req.id: ('reserved', req.info()) for req in _find_requests_by_id( - ids, worker_state.active_requests, - ) - )) - - return reqs + ids, state.tset(worker_state.reserved_requests)) + }, **{ + req.id: ('active', req.info()) + for req in _find_requests_by_id( + ids, state.tset(worker_state.active_requests)) + }) @Panel.register @@ -79,12 +82,12 @@ def revoke(state, task_id, terminate=False, signal=None, **kwargs): revoked.update(task_ids) if terminate: - signum = _signals.signum(signal or 'TERM') + signum = _signals.signum(signal or TERM_SIGNAME) # reserved_requests changes size during iteration # so need to consume the items first, then terminate after. requests = set(_find_requests_by_id( task_ids, - worker_state.reserved_requests, + state.tset(worker_state.reserved_requests), )) for request in requests: if request.id not in terminated: @@ -95,27 +98,27 @@ def revoke(state, task_id, terminate=False, signal=None, **kwargs): break if not terminated: - return {'ok': 'terminate: tasks unknown'} - return {'ok': 'terminate: {0}'.format(', '.join(terminated))} + return ok('terminate: tasks unknown') + return ok('terminate: {0}'.format(', '.join(terminated))) idstr = ', '.join(task_ids) logger.info('Tasks flagged as revoked: %s', idstr) - return {'ok': 'tasks {0} flagged as revoked'.format(idstr)} + return ok('tasks {0} flagged as revoked'.format(idstr)) @Panel.register def report(state): - return {'ok': state.app.bugreport()} + return ok(state.app.bugreport()) @Panel.register def enable_events(state): dispatcher = state.consumer.event_dispatcher - if 'task' not in dispatcher.groups: + if dispatcher.groups and 'task' not in dispatcher.groups: dispatcher.groups.add('task') logger.info('Events of group {task} enabled by remote.') - return {'ok': 'task events enabled'} - return {'ok': 'task events already enabled'} + return ok('task events enabled') + return ok('task events already enabled') @Panel.register @@ -124,8 +127,8 @@ def disable_events(state): if 'task' in dispatcher.groups: dispatcher.groups.discard('task') logger.info('Events of group {task} disabled by remote.') - return {'ok': 'task events disabled'} - return {'ok': 'task events already disabled'} + return ok('task events disabled') + return ok('task events already disabled') @Panel.register @@ -149,24 +152,24 @@ def rate_limit(state, task_name, rate_limit, **kwargs): try: timeutils.rate(rate_limit) except ValueError as exc: - return {'error': 'Invalid rate limit string: {0!r}'.format(exc)} + return nok('Invalid rate limit string: {0!r}'.format(exc)) try: state.app.tasks[task_name].rate_limit = rate_limit except KeyError: logger.error('Rate limit attempt for unknown task %s', task_name, exc_info=True) - return {'error': 'unknown task'} + return nok('unknown task') state.consumer.reset_rate_limits() if not rate_limit: logger.info('Rate limits disabled for tasks of type %s', task_name) - return {'ok': 'rate limit disabled successfully'} + return ok('rate limit disabled successfully') logger.info('New rate limit for tasks of type %s: %s.', task_name, rate_limit) - return {'ok': 'new rate limit set successfully'} + return ok('new rate limit set successfully') @Panel.register @@ -176,14 +179,14 @@ def time_limit(state, task_name=None, hard=None, soft=None, **kwargs): except KeyError: logger.error('Change time limit attempt for unknown task %s', task_name, exc_info=True) - return {'error': 'unknown task'} + return nok('unknown task') task.soft_time_limit = soft task.time_limit = hard logger.info('New time limits for tasks of type %s: soft=%s hard=%s', task_name, soft, hard) - return {'ok': 'time limits set successfully'} + return ok('time limits set successfully') @Panel.register @@ -205,7 +208,10 @@ def prepare_entries(): @Panel.register def dump_reserved(state, safe=False, **kwargs): - reserved = worker_state.reserved_requests - worker_state.active_requests + reserved = ( + state.tset(worker_state.reserved_requests) - + state.tset(worker_state.active_requests) + ) if not reserved: return [] return [request.info(safe=safe) for request in reserved] @@ -214,7 +220,7 @@ def dump_reserved(state, safe=False, **kwargs): @Panel.register def dump_active(state, safe=False, **kwargs): return [request.info(safe=safe) - for request in worker_state.active_requests] + for request in state.tset(worker_state.active_requests)] @Panel.register @@ -228,7 +234,7 @@ def objgraph(state, num=200, max_depth=10, type='Request'): # pragma: no cover import objgraph except ImportError: raise ImportError('Requires the objgraph library') - print('Dumping graph for type %r' % (type, )) + logger.info('Dumping graph for type %r', type) with tempfile.NamedTemporaryFile(prefix='cobjg', suffix='.png', delete=False) as fh: objects = objgraph.by_type(type)[:num] @@ -275,25 +281,29 @@ def hello(state, from_node, revoked=None, **kwargs): @Panel.register -def dump_tasks(state, taskinfoitems=None, **kwargs): - tasks = state.app.tasks +def dump_tasks(state, taskinfoitems=None, builtins=False, **kwargs): + reg = state.app.tasks taskinfoitems = taskinfoitems or DEFAULT_TASK_INFO_ITEMS + tasks = reg if builtins else ( + task for task in reg if not task.startswith('celery.')) + def _extract_info(task): - fields = dict((field, str(getattr(task, field, None))) - for field in taskinfoitems - if getattr(task, field, None) is not None) + fields = { + field: str(getattr(task, field, None)) for field in taskinfoitems + if getattr(task, field, None) is not None + } if fields: info = ['='.join(f) for f in items(fields)] return '{0} [{1}]'.format(task.name, ' '.join(info)) return task.name - return [_extract_info(tasks[task]) for task in sorted(tasks)] + return [_extract_info(reg[task]) for task in sorted(tasks)] @Panel.register def ping(state, **kwargs): - return {'ok': 'pong'} + return ok('pong') @Panel.register @@ -303,7 +313,7 @@ def pool_grow(state, n=1, **kwargs): else: state.consumer.pool.grow(n) state.consumer._update_prefetch_count(n) - return {'ok': 'pool will grow'} + return ok('pool will grow') @Panel.register @@ -313,14 +323,14 @@ def pool_shrink(state, n=1, **kwargs): else: state.consumer.pool.shrink(n) state.consumer._update_prefetch_count(-n) - return {'ok': 'pool will shrink'} + return ok('pool will shrink') @Panel.register def pool_restart(state, modules=None, reload=False, reloader=None, **kwargs): - if state.app.conf.CELERYD_POOL_RESTARTS: + if state.app.conf.worker_pool_restarts: state.consumer.controller.reload(modules, reload, reloader=reloader) - return {'ok': 'reload started'} + return ok('reload started') else: raise ValueError('Pool restarts not enabled') @@ -330,7 +340,7 @@ def autoscale(state, max=None, min=None): autoscaler = state.consumer.controller.autoscaler if autoscaler: max_, min_ = autoscaler.update(max, min) - return {'ok': 'autoscale now min={0} max={1}'.format(max_, min_)} + return ok('autoscale now max={0} min={1}'.format(max_, min_)) raise ValueError('Autoscale not enabled') @@ -343,15 +353,19 @@ def shutdown(state, msg='Got shutdown from remote', **kwargs): @Panel.register def add_consumer(state, queue, exchange=None, exchange_type=None, routing_key=None, **options): - state.consumer.add_task_queue(queue, exchange, exchange_type, - routing_key, **options) - return {'ok': 'add consumer {0}'.format(queue)} + state.consumer.call_soon( + state.consumer.add_task_queue, + queue, exchange, exchange_type, routing_key, **options + ) + return ok('add consumer {0}'.format(queue)) @Panel.register def cancel_consumer(state, queue=None, **_): - state.consumer.cancel_task_queue(queue) - return {'ok': 'no longer consuming from {0}'.format(queue)} + state.consumer.call_soon( + state.consumer.cancel_task_queue, queue, + ) + return ok('no longer consuming from {0}'.format(queue)) @Panel.register @@ -364,7 +378,7 @@ def active_queues(state): def _wanted_config_key(key): - return key.isupper() and not key.startswith('__') + return isinstance(key, string_t) and not key.startswith('__') @Panel.register diff --git a/celery/worker/heartbeat.py b/celery/worker/heartbeat.py index cf46ab0c8..fe2550541 100644 --- a/celery/worker/heartbeat.py +++ b/celery/worker/heartbeat.py @@ -47,7 +47,7 @@ def start(self): if self.eventer.enabled: self._send('worker-online') self.tref = self.timer.call_repeatedly( - self.interval, self._send, ('worker-heartbeat', ), + self.interval, self._send, ('worker-heartbeat',), ) def stop(self): diff --git a/celery/worker/job.py b/celery/worker/job.py deleted file mode 100644 index b277520e3..000000000 --- a/celery/worker/job.py +++ /dev/null @@ -1,587 +0,0 @@ -# -*- coding: utf-8 -*- -""" - celery.worker.job - ~~~~~~~~~~~~~~~~~ - - This module defines the :class:`Request` class, - which specifies how tasks are executed. - -""" -from __future__ import absolute_import, unicode_literals - -import logging -import socket -import sys - -from billiard.einfo import ExceptionInfo -from datetime import datetime -from weakref import ref - -from kombu.utils import kwdict, reprcall -from kombu.utils.encoding import safe_repr, safe_str - -from celery import signals -from celery.app.trace import trace_task, trace_task_ret -from celery.exceptions import ( - Ignore, TaskRevokedError, InvalidTaskError, - SoftTimeLimitExceeded, TimeLimitExceeded, - WorkerLostError, Terminated, Retry, Reject, -) -from celery.five import items, monotonic, string, string_t -from celery.platforms import signals as _signals -from celery.utils import fun_takes_kwargs -from celery.utils.functional import noop -from celery.utils.log import get_logger -from celery.utils.serialization import get_pickled_exception -from celery.utils.text import truncate -from celery.utils.timeutils import maybe_iso8601, timezone, maybe_make_aware - -from . import state - -__all__ = ['Request'] - -IS_PYPY = hasattr(sys, 'pypy_version_info') - -logger = get_logger(__name__) -debug, info, warn, error = (logger.debug, logger.info, - logger.warning, logger.error) -_does_info = False -_does_debug = False - -#: Max length of result representation -RESULT_MAXLEN = 128 - - -def __optimize__(): - # this is also called by celery.app.trace.setup_worker_optimizations - global _does_debug - global _does_info - _does_debug = logger.isEnabledFor(logging.DEBUG) - _does_info = logger.isEnabledFor(logging.INFO) -__optimize__() - -# Localize -tz_utc = timezone.utc -tz_or_local = timezone.tz_or_local -send_revoked = signals.task_revoked.send - -task_accepted = state.task_accepted -task_ready = state.task_ready -revoked_tasks = state.revoked - -NEEDS_KWDICT = sys.version_info <= (2, 6) - -#: Use when no message object passed to :class:`Request`. -DEFAULT_FIELDS = { - 'headers': None, - 'reply_to': None, - 'correlation_id': None, - 'delivery_info': { - 'exchange': None, - 'routing_key': None, - 'priority': 0, - 'redelivered': False, - }, -} - - -class Request(object): - """A request for task execution.""" - if not IS_PYPY: # pragma: no cover - __slots__ = ( - 'app', 'name', 'id', 'args', 'kwargs', 'on_ack', - 'hostname', 'eventer', 'connection_errors', 'task', 'eta', - 'expires', 'request_dict', 'acknowledged', 'on_reject', - 'utc', 'time_start', 'worker_pid', '_already_revoked', - '_terminate_on_ack', '_apply_result', - '_tzlocal', '__weakref__', '__dict__', - ) - - #: Format string used to log task success. - success_msg = """\ - Task %(name)s[%(id)s] succeeded in %(runtime)ss: %(return_value)s - """ - - #: Format string used to log task failure. - error_msg = """\ - Task %(name)s[%(id)s] %(description)s: %(exc)s - """ - - #: Format string used to log internal error. - internal_error_msg = """\ - Task %(name)s[%(id)s] %(description)s: %(exc)s - """ - - ignored_msg = """\ - Task %(name)s[%(id)s] %(description)s - """ - - rejected_msg = """\ - Task %(name)s[%(id)s] %(exc)s - """ - - #: Format string used to log task retry. - retry_msg = """Task %(name)s[%(id)s] retry: %(exc)s""" - - def __init__(self, body, on_ack=noop, - hostname=None, eventer=None, app=None, - connection_errors=None, request_dict=None, - message=None, task=None, on_reject=noop, **opts): - self.app = app - name = self.name = body['task'] - self.id = body['id'] - self.args = body.get('args', []) - self.kwargs = body.get('kwargs', {}) - try: - self.kwargs.items - except AttributeError: - raise InvalidTaskError( - 'Task keyword arguments is not a mapping') - if NEEDS_KWDICT: - self.kwargs = kwdict(self.kwargs) - eta = body.get('eta') - expires = body.get('expires') - utc = self.utc = body.get('utc', False) - self.on_ack = on_ack - self.on_reject = on_reject - self.hostname = hostname or socket.gethostname() - self.eventer = eventer - self.connection_errors = connection_errors or () - self.task = task or self.app.tasks[name] - self.acknowledged = self._already_revoked = False - self.time_start = self.worker_pid = self._terminate_on_ack = None - self._apply_result = None - self._tzlocal = None - - # timezone means the message is timezone-aware, and the only timezone - # supported at this point is UTC. - if eta is not None: - try: - self.eta = maybe_iso8601(eta) - except (AttributeError, ValueError, TypeError) as exc: - raise InvalidTaskError( - 'invalid eta value {0!r}: {1}'.format(eta, exc)) - if utc: - self.eta = maybe_make_aware(self.eta, self.tzlocal) - else: - self.eta = None - if expires is not None: - try: - self.expires = maybe_iso8601(expires) - except (AttributeError, ValueError, TypeError) as exc: - raise InvalidTaskError( - 'invalid expires value {0!r}: {1}'.format(expires, exc)) - if utc: - self.expires = maybe_make_aware(self.expires, self.tzlocal) - else: - self.expires = None - - if message: - delivery_info = message.delivery_info or {} - properties = message.properties or {} - body.update({ - 'headers': message.headers, - 'reply_to': properties.get('reply_to'), - 'correlation_id': properties.get('correlation_id'), - 'delivery_info': { - 'exchange': delivery_info.get('exchange'), - 'routing_key': delivery_info.get('routing_key'), - 'priority': delivery_info.get('priority'), - 'redelivered': delivery_info.get('redelivered'), - } - - }) - else: - body.update(DEFAULT_FIELDS) - self.request_dict = body - - @property - def delivery_info(self): - return self.request_dict['delivery_info'] - - def extend_with_default_kwargs(self): - """Extend the tasks keyword arguments with standard task arguments. - - Currently these are `logfile`, `loglevel`, `task_id`, - `task_name`, `task_retries`, and `delivery_info`. - - See :meth:`celery.task.base.Task.run` for more information. - - Magic keyword arguments are deprecated and will be removed - in version 4.0. - - """ - kwargs = dict(self.kwargs) - default_kwargs = {'logfile': None, # deprecated - 'loglevel': None, # deprecated - 'task_id': self.id, - 'task_name': self.name, - 'task_retries': self.request_dict.get('retries', 0), - 'task_is_eager': False, - 'delivery_info': self.delivery_info} - fun = self.task.run - supported_keys = fun_takes_kwargs(fun, default_kwargs) - extend_with = dict((key, val) for key, val in items(default_kwargs) - if key in supported_keys) - kwargs.update(extend_with) - return kwargs - - def execute_using_pool(self, pool, **kwargs): - """Used by the worker to send this task to the pool. - - :param pool: A :class:`celery.concurrency.base.TaskPool` instance. - - :raises celery.exceptions.TaskRevokedError: if the task was revoked - and ignored. - - """ - uuid = self.id - task = self.task - if self.revoked(): - raise TaskRevokedError(uuid) - - hostname = self.hostname - kwargs = self.kwargs - if task.accept_magic_kwargs: - kwargs = self.extend_with_default_kwargs() - request = self.request_dict - request.update({'hostname': hostname, 'is_eager': False, - 'delivery_info': self.delivery_info, - 'group': self.request_dict.get('taskset')}) - timeout, soft_timeout = request.get('timelimit', (None, None)) - timeout = timeout or task.time_limit - soft_timeout = soft_timeout or task.soft_time_limit - result = pool.apply_async( - trace_task_ret, - args=(self.name, uuid, self.args, kwargs, request), - accept_callback=self.on_accepted, - timeout_callback=self.on_timeout, - callback=self.on_success, - error_callback=self.on_failure, - soft_timeout=soft_timeout, - timeout=timeout, - correlation_id=uuid, - ) - # cannot create weakref to None - self._apply_result = ref(result) if result is not None else result - return result - - def execute(self, loglevel=None, logfile=None): - """Execute the task in a :func:`~celery.app.trace.trace_task`. - - :keyword loglevel: The loglevel used by the task. - :keyword logfile: The logfile used by the task. - - """ - if self.revoked(): - return - - # acknowledge task as being processed. - if not self.task.acks_late: - self.acknowledge() - - kwargs = self.kwargs - if self.task.accept_magic_kwargs: - kwargs = self.extend_with_default_kwargs() - request = self.request_dict - request.update({'loglevel': loglevel, 'logfile': logfile, - 'hostname': self.hostname, 'is_eager': False, - 'delivery_info': self.delivery_info}) - retval = trace_task(self.task, self.id, self.args, kwargs, request, - hostname=self.hostname, loader=self.app.loader, - app=self.app) - self.acknowledge() - return retval - - def maybe_expire(self): - """If expired, mark the task as revoked.""" - if self.expires: - now = datetime.now(tz_or_local(self.tzlocal) if self.utc else None) - if now > self.expires: - revoked_tasks.add(self.id) - return True - - def terminate(self, pool, signal=None): - signal = _signals.signum(signal or 'TERM') - if self.time_start: - pool.terminate_job(self.worker_pid, signal) - self._announce_revoked('terminated', True, signal, False) - else: - self._terminate_on_ack = pool, signal - if self._apply_result is not None: - obj = self._apply_result() # is a weakref - if obj is not None: - obj.terminate(signal) - - def _announce_revoked(self, reason, terminated, signum, expired): - task_ready(self) - self.send_event('task-revoked', - terminated=terminated, signum=signum, expired=expired) - if self.store_errors: - self.task.backend.mark_as_revoked(self.id, reason, request=self) - self.acknowledge() - self._already_revoked = True - send_revoked(self.task, request=self, - terminated=terminated, signum=signum, expired=expired) - - def revoked(self): - """If revoked, skip task and mark state.""" - expired = False - if self._already_revoked: - return True - if self.expires: - expired = self.maybe_expire() - if self.id in revoked_tasks: - info('Discarding revoked task: %s[%s]', self.name, self.id) - self._announce_revoked( - 'expired' if expired else 'revoked', False, None, expired, - ) - return True - return False - - def send_event(self, type, **fields): - if self.eventer and self.eventer.enabled: - self.eventer.send(type, uuid=self.id, **fields) - - def on_accepted(self, pid, time_accepted): - """Handler called when task is accepted by worker pool.""" - self.worker_pid = pid - self.time_start = time_accepted - task_accepted(self) - if not self.task.acks_late: - self.acknowledge() - self.send_event('task-started') - if _does_debug: - debug('Task accepted: %s[%s] pid:%r', self.name, self.id, pid) - if self._terminate_on_ack is not None: - self.terminate(*self._terminate_on_ack) - - def on_timeout(self, soft, timeout): - """Handler called if the task times out.""" - task_ready(self) - if soft: - warn('Soft time limit (%ss) exceeded for %s[%s]', - timeout, self.name, self.id) - exc = SoftTimeLimitExceeded(timeout) - else: - error('Hard time limit (%ss) exceeded for %s[%s]', - timeout, self.name, self.id) - exc = TimeLimitExceeded(timeout) - - if self.store_errors: - self.task.backend.mark_as_failure(self.id, exc, request=self) - - if self.task.acks_late: - self.acknowledge() - - def on_success(self, ret_value, now=None, nowfun=monotonic): - """Handler called if the task was successfully processed.""" - if isinstance(ret_value, ExceptionInfo): - if isinstance(ret_value.exception, ( - SystemExit, KeyboardInterrupt)): - raise ret_value.exception - return self.on_failure(ret_value) - task_ready(self) - - if self.task.acks_late: - self.acknowledge() - - if self.eventer and self.eventer.enabled: - now = nowfun() - runtime = self.time_start and (now - self.time_start) or 0 - self.send_event('task-succeeded', - result=safe_repr(ret_value), runtime=runtime) - - if _does_info: - now = now or nowfun() - runtime = self.time_start and (now - self.time_start) or 0 - info(self.success_msg.strip(), { - 'id': self.id, 'name': self.name, - 'return_value': self.repr_result(ret_value), - 'runtime': runtime}) - - def on_retry(self, exc_info): - """Handler called if the task should be retried.""" - if self.task.acks_late: - self.acknowledge() - - self.send_event('task-retried', - exception=safe_repr(exc_info.exception.exc), - traceback=safe_str(exc_info.traceback)) - - if _does_info: - info(self.retry_msg.strip(), - {'id': self.id, 'name': self.name, - 'exc': exc_info.exception}) - - def on_failure(self, exc_info): - """Handler called if the task raised an exception.""" - task_ready(self) - send_failed_event = True - - if not exc_info.internal: - exc = exc_info.exception - - if isinstance(exc, Retry): - return self.on_retry(exc_info) - - # These are special cases where the process would not have had - # time to write the result. - if self.store_errors: - if isinstance(exc, WorkerLostError): - self.task.backend.mark_as_failure( - self.id, exc, request=self, - ) - elif isinstance(exc, Terminated): - self._announce_revoked( - 'terminated', True, string(exc), False) - send_failed_event = False # already sent revoked event - # (acks_late) acknowledge after result stored. - if self.task.acks_late: - self.acknowledge() - self._log_error(exc_info, send_failed_event=send_failed_event) - - def _log_error(self, einfo, send_failed_event=True): - einfo.exception = get_pickled_exception(einfo.exception) - eobj = einfo.exception - exception, traceback, exc_info, internal, sargs, skwargs = ( - safe_repr(eobj), - safe_str(einfo.traceback), - einfo.exc_info, - einfo.internal, - safe_repr(self.args), - safe_repr(self.kwargs), - ) - task = self.task - if task.throws and isinstance(eobj, task.throws): - severity, exc_info = logging.INFO, None - description = 'raised expected' - else: - severity = logging.ERROR - description = 'raised unexpected' - format = self.error_msg - if send_failed_event: - self.send_event( - 'task-failed', exception=exception, traceback=traceback, - ) - - if internal: - if isinstance(einfo.exception, MemoryError): - raise MemoryError('Process got: %s' % (einfo.exception, )) - elif isinstance(einfo.exception, Reject): - format = self.rejected_msg - description = 'rejected' - severity = logging.WARN - exc_info = einfo - self.reject(requeue=einfo.exception.requeue) - elif isinstance(einfo.exception, Ignore): - format = self.ignored_msg - description = 'ignored' - severity = logging.INFO - exc_info = None - self.acknowledge() - else: - format = self.internal_error_msg - description = 'INTERNAL ERROR' - severity = logging.CRITICAL - - context = { - 'hostname': self.hostname, - 'id': self.id, - 'name': self.name, - 'exc': exception, - 'traceback': traceback, - 'args': sargs, - 'kwargs': skwargs, - 'description': description, - } - - logger.log(severity, format.strip(), context, - exc_info=exc_info, - extra={'data': {'id': self.id, - 'name': self.name, - 'args': sargs, - 'kwargs': skwargs, - 'hostname': self.hostname, - 'internal': internal}}) - - task.send_error_email(context, einfo.exception) - - def acknowledge(self): - """Acknowledge task.""" - if not self.acknowledged: - self.on_ack(logger, self.connection_errors) - self.acknowledged = True - - def reject(self, requeue=False): - if not self.acknowledged: - self.on_reject(logger, self.connection_errors, requeue) - self.acknowledged = True - - def repr_result(self, result, maxlen=RESULT_MAXLEN): - # 46 is the length needed to fit - # 'the quick brown fox jumps over the lazy dog' :) - if not isinstance(result, string_t): - result = safe_repr(result) - return truncate(result) if len(result) > maxlen else result - - def info(self, safe=False): - return {'id': self.id, - 'name': self.name, - 'args': self.args if safe else safe_repr(self.args), - 'kwargs': self.kwargs if safe else safe_repr(self.kwargs), - 'hostname': self.hostname, - 'time_start': self.time_start, - 'acknowledged': self.acknowledged, - 'delivery_info': self.delivery_info, - 'worker_pid': self.worker_pid} - - def __str__(self): - return '{0.name}[{0.id}]{1}{2}'.format(self, - ' eta:[{0}]'.format(self.eta) if self.eta else '', - ' expires:[{0}]'.format(self.expires) if self.expires else '') - shortinfo = __str__ - - def __repr__(self): - return '<{0} {1}: {2}>'.format( - type(self).__name__, self.id, - reprcall(self.name, self.args, self.kwargs)) - - @property - def tzlocal(self): - if self._tzlocal is None: - self._tzlocal = self.app.conf.CELERY_TIMEZONE - return self._tzlocal - - @property - def store_errors(self): - return (not self.task.ignore_result - or self.task.store_errors_even_if_ignored) - - @property - def task_id(self): - # XXX compat - return self.id - - @task_id.setter # noqa - def task_id(self, value): - self.id = value - - @property - def task_name(self): - # XXX compat - return self.name - - @task_name.setter # noqa - def task_name(self, value): - self.name = value - - @property - def reply_to(self): - # used by rpc backend when failures reported by parent process - return self.request_dict['reply_to'] - - @property - def correlation_id(self): - # used similarly to reply_to - return self.request_dict['correlation_id'] diff --git a/celery/worker/loops.py b/celery/worker/loops.py index 0891f51a6..8365f221f 100644 --- a/celery/worker/loops.py +++ b/celery/worker/loops.py @@ -7,6 +7,7 @@ """ from __future__ import absolute_import +import errno import socket from celery.bootsteps import RUN @@ -21,23 +22,30 @@ error = logger.error +def _quick_drain(connection, timeout=0.1): + try: + connection.drain_events(timeout=timeout) + except Exception as exc: + exc_errno = getattr(exc, 'errno', None) + if exc_errno is not None and exc_errno != errno.EAGAIN: + raise + + def asynloop(obj, connection, consumer, blueprint, hub, qos, heartbeat, clock, hbrate=2.0, RUN=RUN): """Non-blocking event loop consuming messages until connection is lost, or shutdown is requested.""" update_qos = qos.update - readers, writers = hub.readers, hub.writers hbtick = connection.heartbeat_check errors = connection.connection_errors heartbeat = connection.get_heartbeat_interval() # negotiated - hub_add, hub_remove = hub.add, hub.remove on_task_received = obj.create_task_handler() if heartbeat and connection.supports_heartbeats: hub.call_repeatedly(heartbeat / hbrate, hbtick, hbrate) - consumer.callbacks = [on_task_received] + consumer.on_message = on_task_received consumer.consume() obj.on_ready() obj.controller.register_with_event_loop(hub) @@ -49,6 +57,12 @@ def asynloop(obj, connection, consumer, blueprint, hub, qos, if not obj.restart_count and not obj.pool.did_start_ok(): raise WorkerLostError('Could not start worker processes') + # consumer.consume() may have prefetched up to our + # limit - drain an event so we are in a clean state + # prior to starting our event loop. + if connection.transport.driver_type == 'amqp': + hub.call_soon(_quick_drain, connection) + # FIXME: Use loop.run_forever # Tried and works, but no time to test properly before release. hub.propagate_errors = errors @@ -57,10 +71,14 @@ def asynloop(obj, connection, consumer, blueprint, hub, qos, try: while blueprint.state == RUN and obj.connection: # shutdown if signal handlers told us to. - if state.should_stop: - raise WorkerShutdown() - elif state.should_terminate: - raise WorkerTerminate() + should_stop, should_terminate = ( + state.should_stop, state.should_terminate, + ) + # False == EX_OK, so must use is not False + if should_stop is not None and should_stop is not False: + raise WorkerShutdown(should_stop) + elif should_terminate is not None and should_stop is not False: + raise WorkerTerminate(should_terminate) # We only update QoS when there is no more messages to read. # This groups together qos calls, and makes sure that remote @@ -86,7 +104,8 @@ def synloop(obj, connection, consumer, blueprint, hub, qos, """Fallback blocking event loop for transports that doesn't support AIO.""" on_task_received = obj.create_task_handler() - consumer.register_callback(on_task_received) + perform_pending_operations = obj.perform_pending_operations + consumer.on_message = on_task_received consumer.consume() obj.on_ready() @@ -96,6 +115,7 @@ def synloop(obj, connection, consumer, blueprint, hub, qos, if qos.prev != qos.value: qos.update() try: + perform_pending_operations() connection.drain_events(timeout=2.0) except socket.timeout: pass diff --git a/celery/worker/pidbox.py b/celery/worker/pidbox.py index 99c7a1a39..374aaca1f 100644 --- a/celery/worker/pidbox.py +++ b/celery/worker/pidbox.py @@ -7,6 +7,7 @@ from kombu.utils.encoding import safe_str from celery.datastructures import AttributeDict +from celery.utils.functional import pass1 from celery.utils.log import get_logger from . import control @@ -26,13 +27,18 @@ def __init__(self, c): self.node = c.app.control.mailbox.Node( safe_str(c.hostname), handlers=control.Panel.data, - state=AttributeDict(app=c.app, hostname=c.hostname, consumer=c), + state=AttributeDict( + app=c.app, + hostname=c.hostname, + consumer=c, + tset=pass1 if c.controller.use_eventloop else set), ) self._forward_clock = self.c.app.clock.forward def on_message(self, body, message): - self._forward_clock() # just increase clock as clients usually don't - # have a valid clock to adjust with. + # just increase clock as clients usually don't + # have a valid clock to adjust with. + self._forward_clock() try: self.node.handle_message(body, message) except KeyError as exc: @@ -65,7 +71,7 @@ def _close_channel(self, c): def shutdown(self, c): self.on_stop() if self.consumer: - debug('Cancelling broadcast consumer...') + debug('Canceling broadcast consumer...') ignore_errors(c, self.consumer.cancel) self.stop(self.c) diff --git a/celery/worker/request.py b/celery/worker/request.py new file mode 100644 index 000000000..06921efc1 --- /dev/null +++ b/celery/worker/request.py @@ -0,0 +1,545 @@ +# -*- coding: utf-8 -*- +""" + celery.worker.request + ~~~~~~~~~~~~~~~~~~~~~ + + This module defines the :class:`Request` class, + which specifies how tasks are executed. + +""" +from __future__ import absolute_import, unicode_literals + +import logging +import sys + +from datetime import datetime +from weakref import ref + +from billiard.common import TERM_SIGNAME +from kombu.utils.encoding import safe_repr, safe_str + +from celery import signals +from celery.app.trace import trace_task, trace_task_ret +from celery.exceptions import ( + Ignore, TaskRevokedError, InvalidTaskError, + SoftTimeLimitExceeded, TimeLimitExceeded, + WorkerLostError, Terminated, Retry, Reject, +) +from celery.five import string +from celery.platforms import signals as _signals +from celery.utils import cached_property, gethostname +from celery.utils.functional import noop +from celery.utils.log import get_logger +from celery.utils.timeutils import maybe_iso8601, timezone, maybe_make_aware +from celery.utils.serialization import get_pickled_exception + +from . import state + +__all__ = ['Request'] + +IS_PYPY = hasattr(sys, 'pypy_version_info') + +logger = get_logger(__name__) +debug, info, warn, error = (logger.debug, logger.info, + logger.warning, logger.error) +_does_info = False +_does_debug = False + + +def __optimize__(): + # this is also called by celery.app.trace.setup_worker_optimizations + global _does_debug + global _does_info + _does_debug = logger.isEnabledFor(logging.DEBUG) + _does_info = logger.isEnabledFor(logging.INFO) +__optimize__() + +# Localize +tz_utc = timezone.utc +tz_or_local = timezone.tz_or_local +send_revoked = signals.task_revoked.send + +task_accepted = state.task_accepted +task_ready = state.task_ready +revoked_tasks = state.revoked + + +class Request(object): + """A request for task execution.""" + acknowledged = False + time_start = None + worker_pid = None + time_limits = (None, None) + _already_revoked = False + _terminate_on_ack = None + _apply_result = None + _tzlocal = None + + if not IS_PYPY: # pragma: no cover + __slots__ = ( + 'app', 'type', 'name', 'id', 'root_id', 'parent_id', + 'on_ack', 'body', 'hostname', 'eventer', 'connection_errors', + 'task', 'eta', 'expires', 'request_dict', 'on_reject', 'utc', + 'content_type', 'content_encoding', 'argsrepr', 'kwargsrepr', + '_decoded', + '__weakref__', '__dict__', + ) + + def __init__(self, message, on_ack=noop, + hostname=None, eventer=None, app=None, + connection_errors=None, request_dict=None, + task=None, on_reject=noop, body=None, + headers=None, decoded=False, utc=True, + maybe_make_aware=maybe_make_aware, + maybe_iso8601=maybe_iso8601, **opts): + if headers is None: + headers = message.headers + if body is None: + body = message.body + self.app = app + self.message = message + self.body = body + self.utc = utc + self._decoded = decoded + if decoded: + self.content_type = self.content_encoding = None + else: + self.content_type, self.content_encoding = ( + message.content_type, message.content_encoding, + ) + + self.id = headers['id'] + type = self.type = self.name = headers['task'] + self.root_id = headers.get('root_id') + self.parent_id = headers.get('parent_id') + if 'shadow' in headers: + self.name = headers['shadow'] or self.name + if 'timelimit' in headers: + self.time_limits = headers['timelimit'] + self.argsrepr = headers.get('argsrepr', '') + self.kwargsrepr = headers.get('kwargsrepr', '') + self.on_ack = on_ack + self.on_reject = on_reject + self.hostname = hostname or gethostname() + self.eventer = eventer + self.connection_errors = connection_errors or () + self.task = task or self.app.tasks[type] + + # timezone means the message is timezone-aware, and the only timezone + # supported at this point is UTC. + eta = headers.get('eta') + if eta is not None: + try: + eta = maybe_iso8601(eta) + except (AttributeError, ValueError, TypeError) as exc: + raise InvalidTaskError( + 'invalid eta value {0!r}: {1}'.format(eta, exc)) + self.eta = maybe_make_aware(eta, self.tzlocal) + else: + self.eta = None + + expires = headers.get('expires') + if expires is not None: + try: + expires = maybe_iso8601(expires) + except (AttributeError, ValueError, TypeError) as exc: + raise InvalidTaskError( + 'invalid expires value {0!r}: {1}'.format(expires, exc)) + self.expires = maybe_make_aware(expires, self.tzlocal) + else: + self.expires = None + + delivery_info = message.delivery_info or {} + properties = message.properties or {} + headers.update({ + 'reply_to': properties.get('reply_to'), + 'correlation_id': properties.get('correlation_id'), + 'delivery_info': { + 'exchange': delivery_info.get('exchange'), + 'routing_key': delivery_info.get('routing_key'), + 'priority': properties.get('priority'), + 'redelivered': delivery_info.get('redelivered'), + } + + }) + self.request_dict = headers + + @property + def delivery_info(self): + return self.request_dict['delivery_info'] + + def execute_using_pool(self, pool, **kwargs): + """Used by the worker to send this task to the pool. + + :param pool: A :class:`celery.concurrency.base.TaskPool` instance. + + :raises celery.exceptions.TaskRevokedError: if the task was revoked + and ignored. + + """ + task_id = self.id + task = self.task + if self.revoked(): + raise TaskRevokedError(task_id) + + time_limit, soft_time_limit = self.time_limits + time_limit = time_limit or task.time_limit + soft_time_limit = soft_time_limit or task.soft_time_limit + result = pool.apply_async( + trace_task_ret, + args=(self.type, task_id, self.request_dict, self.body, + self.content_type, self.content_encoding), + accept_callback=self.on_accepted, + timeout_callback=self.on_timeout, + callback=self.on_success, + error_callback=self.on_failure, + soft_timeout=soft_time_limit, + timeout=time_limit, + correlation_id=task_id, + ) + # cannot create weakref to None + self._apply_result = ref(result) if result is not None else result + return result + + def execute(self, loglevel=None, logfile=None): + """Execute the task in a :func:`~celery.app.trace.trace_task`. + + :keyword loglevel: The loglevel used by the task. + :keyword logfile: The logfile used by the task. + + """ + if self.revoked(): + return + + # acknowledge task as being processed. + if not self.task.acks_late: + self.acknowledge() + + request = self.request_dict + args, kwargs, embed = self._payload + request.update({'loglevel': loglevel, 'logfile': logfile, + 'hostname': self.hostname, 'is_eager': False, + 'args': args, 'kwargs': kwargs}, **embed or {}) + retval = trace_task(self.task, self.id, args, kwargs, request, + hostname=self.hostname, loader=self.app.loader, + app=self.app)[0] + self.acknowledge() + return retval + + def maybe_expire(self): + """If expired, mark the task as revoked.""" + if self.expires: + now = datetime.now(self.expires.tzinfo) + if now > self.expires: + revoked_tasks.add(self.id) + return True + + def terminate(self, pool, signal=None): + signal = _signals.signum(signal or TERM_SIGNAME) + if self.time_start: + pool.terminate_job(self.worker_pid, signal) + self._announce_revoked('terminated', True, signal, False) + else: + self._terminate_on_ack = pool, signal + if self._apply_result is not None: + obj = self._apply_result() # is a weakref + if obj is not None: + obj.terminate(signal) + + def _announce_revoked(self, reason, terminated, signum, expired): + task_ready(self) + self.send_event('task-revoked', + terminated=terminated, signum=signum, expired=expired) + self.task.backend.mark_as_revoked( + self.id, reason, request=self, store_result=self.store_errors, + ) + self.acknowledge() + self._already_revoked = True + send_revoked(self.task, request=self, + terminated=terminated, signum=signum, expired=expired) + + def revoked(self): + """If revoked, skip task and mark state.""" + expired = False + if self._already_revoked: + return True + if self.expires: + expired = self.maybe_expire() + if self.id in revoked_tasks: + info('Discarding revoked task: %s[%s]', self.name, self.id) + self._announce_revoked( + 'expired' if expired else 'revoked', False, None, expired, + ) + return True + return False + + def send_event(self, type, **fields): + if self.eventer and self.eventer.enabled: + self.eventer.send(type, uuid=self.id, **fields) + + def on_accepted(self, pid, time_accepted): + """Handler called when task is accepted by worker pool.""" + self.worker_pid = pid + self.time_start = time_accepted + task_accepted(self) + if not self.task.acks_late: + self.acknowledge() + self.send_event('task-started') + if _does_debug: + debug('Task accepted: %s[%s] pid:%r', self.name, self.id, pid) + if self._terminate_on_ack is not None: + self.terminate(*self._terminate_on_ack) + + def on_timeout(self, soft, timeout): + """Handler called if the task times out.""" + task_ready(self) + if soft: + warn('Soft time limit (%ss) exceeded for %s[%s]', + soft, self.name, self.id) + exc = SoftTimeLimitExceeded(soft) + else: + error('Hard time limit (%ss) exceeded for %s[%s]', + timeout, self.name, self.id) + exc = TimeLimitExceeded(timeout) + + self.task.backend.mark_as_failure( + self.id, exc, request=self, store_result=self.store_errors, + ) + + if self.task.acks_late: + self.acknowledge() + + def on_success(self, failed__retval__runtime, **kwargs): + """Handler called if the task was successfully processed.""" + failed, retval, runtime = failed__retval__runtime + if failed: + if isinstance(retval.exception, (SystemExit, KeyboardInterrupt)): + raise retval.exception + return self.on_failure(retval, return_ok=True) + task_ready(self) + + if self.task.acks_late: + self.acknowledge() + + self.send_event('task-succeeded', result=retval, runtime=runtime) + + def on_retry(self, exc_info): + """Handler called if the task should be retried.""" + if self.task.acks_late: + self.acknowledge() + + self.send_event('task-retried', + exception=safe_repr(exc_info.exception.exc), + traceback=safe_str(exc_info.traceback)) + + def on_failure(self, exc_info, send_failed_event=True, return_ok=False): + """Handler called if the task raised an exception.""" + task_ready(self) + if isinstance(exc_info.exception, MemoryError): + raise MemoryError('Process got: %s' % (exc_info.exception,)) + elif isinstance(exc_info.exception, Reject): + return self.reject(requeue=exc_info.exception.requeue) + elif isinstance(exc_info.exception, Ignore): + return self.acknowledge() + + exc = exc_info.exception + + if isinstance(exc, Retry): + return self.on_retry(exc_info) + + # These are special cases where the process would not have had + # time to write the result. + if isinstance(exc, Terminated): + self._announce_revoked( + 'terminated', True, string(exc), False) + send_failed_event = False # already sent revoked event + elif isinstance(exc, WorkerLostError) or not return_ok: + self.task.backend.mark_as_failure( + self.id, exc, request=self, store_result=self.store_errors, + ) + # (acks_late) acknowledge after result stored. + if self.task.acks_late: + requeue = self.delivery_info.get('redelivered', None) is False + reject = ( + self.task.reject_on_worker_lost and + isinstance(exc, WorkerLostError) + ) + if reject: + self.reject(requeue=requeue) + send_failed_event = False + else: + self.acknowledge() + + if send_failed_event: + self.send_event( + 'task-failed', + exception=safe_repr(get_pickled_exception(exc_info.exception)), + traceback=exc_info.traceback, + ) + + if not return_ok: + error('Task handler raised error: %r', exc, + exc_info=exc_info.exc_info) + + def acknowledge(self): + """Acknowledge task.""" + if not self.acknowledged: + self.on_ack(logger, self.connection_errors) + self.acknowledged = True + + def reject(self, requeue=False): + if not self.acknowledged: + self.on_reject(logger, self.connection_errors, requeue) + self.acknowledged = True + self.send_event('task-rejected', requeue=requeue) + + def info(self, safe=False): + return { + 'id': self.id, + 'name': self.name, + 'args': self.argsrepr, + 'kwargs': self.kwargsrepr, + 'type': self.type, + 'body': self.body, + 'hostname': self.hostname, + 'time_start': self.time_start, + 'acknowledged': self.acknowledged, + 'delivery_info': self.delivery_info, + 'worker_pid': self.worker_pid, + } + + def __str__(self): + return ' '.join([ + self.humaninfo(), + ' eta:[{0}]'.format(self.eta) if self.eta else '', + ' expires:[{0}]'.format(self.expires) if self.expires else '', + ]) + shortinfo = __str__ + + def humaninfo(self): + return '{0.name}[{0.id}]'.format(self) + + def __repr__(self): + return '<{0}: {1} {2} {3}>'.format( + type(self).__name__, self.humaninfo(), + self.argsrepr, self.kwargsrepr, + ) + + @property + def tzlocal(self): + if self._tzlocal is None: + self._tzlocal = self.app.conf.timezone + return self._tzlocal + + @property + def store_errors(self): + return (not self.task.ignore_result or + self.task.store_errors_even_if_ignored) + + @property + def task_id(self): + # XXX compat + return self.id + + @task_id.setter # noqa + def task_id(self, value): + self.id = value + + @property + def task_name(self): + # XXX compat + return self.name + + @task_name.setter # noqa + def task_name(self, value): + self.name = value + + @property + def reply_to(self): + # used by rpc backend when failures reported by parent process + return self.request_dict['reply_to'] + + @property + def correlation_id(self): + # used similarly to reply_to + return self.request_dict['correlation_id'] + + @cached_property + def _payload(self): + return self.body if self._decoded else self.message.payload + + @cached_property + def chord(self): + # used by backend.mark_as_failure when failure is reported + # by parent process + _, _, embed = self._payload + return embed.get('chord') + + @cached_property + def errbacks(self): + # used by backend.mark_as_failure when failure is reported + # by parent process + _, _, embed = self._payload + return embed.get('errbacks') + + @cached_property + def group(self): + # used by backend.on_chord_part_return when failures reported + # by parent process + return self.request_dict['group'] + + +def create_request_cls(base, task, pool, hostname, eventer, + ref=ref, revoked_tasks=revoked_tasks, + task_ready=task_ready): + from celery.app.trace import trace_task_ret as trace + default_time_limit = task.time_limit + default_soft_time_limit = task.soft_time_limit + apply_async = pool.apply_async + acks_late = task.acks_late + events = eventer and eventer.enabled + + class Request(base): + + def execute_using_pool(self, pool, **kwargs): + task_id = self.id + if (self.expires or task_id in revoked_tasks) and self.revoked(): + raise TaskRevokedError(task_id) + + time_limit, soft_time_limit = self.time_limits + time_limit = time_limit or default_time_limit + soft_time_limit = soft_time_limit or default_soft_time_limit + result = apply_async( + trace, + args=(self.type, task_id, self.request_dict, self.body, + self.content_type, self.content_encoding), + accept_callback=self.on_accepted, + timeout_callback=self.on_timeout, + callback=self.on_success, + error_callback=self.on_failure, + soft_timeout=soft_time_limit, + timeout=time_limit, + correlation_id=task_id, + ) + # cannot create weakref to None + self._apply_result = ref(result) if result is not None else result + return result + + def on_success(self, failed__retval__runtime, **kwargs): + failed, retval, runtime = failed__retval__runtime + if failed: + if isinstance(retval.exception, ( + SystemExit, KeyboardInterrupt)): + raise retval.exception + return self.on_failure(retval, return_ok=True) + task_ready(self) + + if acks_late: + self.acknowledge() + + if events: + self.send_event( + 'task-succeeded', result=retval, runtime=runtime, + ) + + return Request diff --git a/celery/worker/state.py b/celery/worker/state.py index 8abaa5d73..4e86e723a 100644 --- a/celery/worker/state.py +++ b/celery/worker/state.py @@ -9,7 +9,7 @@ statistics, and revoked tasks. """ -from __future__ import absolute_import +from __future__ import absolute_import, print_function import os import sys @@ -27,8 +27,7 @@ __all__ = ['SOFTWARE_INFO', 'reserved_requests', 'active_requests', 'total_count', 'revoked', 'task_reserved', 'maybe_shutdown', - 'task_accepted', 'task_ready', 'task_reserved', 'task_ready', - 'Persistent'] + 'task_accepted', 'task_reserved', 'task_ready', 'Persistent'] #: Worker software/platform information. SOFTWARE_INFO = {'sw_ident': 'py-celery', @@ -42,10 +41,10 @@ #: being expired when the max limit has been exceeded. REVOKE_EXPIRES = 10800 -#: set of all reserved :class:`~celery.worker.job.Request`'s. +#: set of all reserved :class:`~celery.worker.request.Request`'s. reserved_requests = set() -#: set of currently active :class:`~celery.worker.job.Request`'s. +#: set of currently active :class:`~celery.worker.request.Request`'s. active_requests = set() #: count of tasks accepted by the worker, sorted by type. @@ -60,15 +59,23 @@ #: Update global state when a task has been reserved. task_reserved = reserved_requests.add -should_stop = False -should_terminate = False +should_stop = None +should_terminate = None + + +def reset_state(): + reserved_requests.clear() + active_requests.clear() + total_count.clear() + all_total_count[:] = [0] + revoked.clear() def maybe_shutdown(): - if should_stop: - raise WorkerShutdown() - elif should_terminate: - raise WorkerTerminate() + if should_stop is not None and should_stop is not False: + raise WorkerShutdown(should_stop) + elif should_terminate is not None and should_terminate is not False: + raise WorkerTerminate(should_terminate) def task_accepted(request, _all_total_count=all_total_count): @@ -90,7 +97,7 @@ def task_ready(request): if C_BENCH: # pragma: no cover import atexit - from billiard import current_process + from billiard.process import current_process from celery.five import monotonic from celery.utils.debug import memdump, sample_mem diff --git a/celery/worker/strategy.py b/celery/worker/strategy.py index 0b0d327c3..d087743e6 100644 --- a/celery/worker/strategy.py +++ b/celery/worker/strategy.py @@ -11,12 +11,14 @@ import logging from kombu.async.timer import to_timestamp -from kombu.utils.encoding import safe_repr +from kombu.five import buffer_t +from celery.exceptions import InvalidTaskError from celery.utils.log import get_logger +from celery.utils.saferepr import saferepr from celery.utils.timeutils import timezone -from .job import Request +from .request import Request, create_request_cls from .state import task_reserved __all__ = ['default'] @@ -24,12 +26,45 @@ logger = get_logger(__name__) +def proto1_to_proto2(message, body): + """Converts Task message protocol 1 arguments to protocol 2. + + Returns tuple of ``(body, headers, already_decoded_status, utc)`` + + """ + try: + args, kwargs = body['args'], body['kwargs'] + kwargs.items + except KeyError: + raise InvalidTaskError('Message does not have args/kwargs') + except AttributeError: + raise InvalidTaskError( + 'Task keyword arguments must be a mapping', + ) + body.update( + argsrepr=saferepr(args), + kwargsrepr=saferepr(kwargs), + headers=message.headers, + ) + try: + body['group'] = body['taskset'] + except KeyError: + pass + embed = { + 'callbacks': body.get('callbacks'), + 'errbacks': body.get('errbacks'), + 'chord': body.get('chord'), + 'chain': None, + } + return (args, kwargs, embed), body, True, body.get('utc', True) + + def default(task, app, consumer, info=logger.info, error=logger.error, task_reserved=task_reserved, - to_system_tz=timezone.to_system): + to_system_tz=timezone.to_system, bytes=bytes, buffer_t=buffer_t, + proto1_to_proto2=proto1_to_proto2): hostname = consumer.hostname eventer = consumer.event_dispatcher - Req = Request connection_errors = consumer.connection_errors _does_info = logger.isEnabledFor(logging.INFO) events = eventer and eventer.enabled @@ -37,18 +72,31 @@ def default(task, app, consumer, call_at = consumer.timer.call_at apply_eta_task = consumer.apply_eta_task rate_limits_enabled = not consumer.disable_rate_limits - bucket = consumer.task_buckets[task.name] + get_bucket = consumer.task_buckets.__getitem__ handle = consumer.on_task_request limit_task = consumer._limit_task + body_can_be_buffer = consumer.pool.body_can_be_buffer + Req = create_request_cls(Request, task, consumer.pool, hostname, eventer) + + revoked_tasks = consumer.controller.state.revoked def task_message_handler(message, body, ack, reject, callbacks, to_timestamp=to_timestamp): - req = Req(body, on_ack=ack, on_reject=reject, - app=app, hostname=hostname, - eventer=eventer, task=task, - connection_errors=connection_errors, - message=message) - if req.revoked(): + if body is None: + body, headers, decoded, utc = ( + message.body, message.headers, False, True, + ) + if not body_can_be_buffer: + body = bytes(body) if isinstance(body, buffer_t) else body + else: + body, headers, decoded, utc = proto1_to_proto2(message, body) + req = Req( + message, + on_ack=ack, on_reject=reject, app=app, hostname=hostname, + eventer=eventer, task=task, connection_errors=connection_errors, + body=body, headers=headers, decoded=decoded, utc=utc, + ) + if (req.expires or req.id in revoked_tasks) and req.revoked(): return if _does_info: @@ -58,7 +106,8 @@ def task_message_handler(message, body, ack, reject, callbacks, send_event( 'task-received', uuid=req.id, name=req.name, - args=safe_repr(req.args), kwargs=safe_repr(req.kwargs), + args=req.argsrepr, kwargs=req.kwargsrepr, + root_id=req.root_id, parent_id=req.parent_id, retries=req.request_dict.get('retries', 0), eta=req.eta and req.eta.isoformat(), expires=req.expires and req.expires.isoformat(), @@ -76,14 +125,15 @@ def task_message_handler(message, body, ack, reject, callbacks, req.acknowledge() else: consumer.qos.increment_eventually() - call_at(eta, apply_eta_task, (req, ), priority=6) + call_at(eta, apply_eta_task, (req,), priority=6) else: if rate_limits_enabled: + bucket = get_bucket(task.name) if bucket: return limit_task(req, bucket, 1) task_reserved(req) if callbacks: - [callback() for callback in callbacks] + [callback(req) for callback in callbacks] handle(req) return task_message_handler diff --git a/docs/.templates/page.html b/docs/.templates/page.html index e4d1c2132..89292a458 100644 --- a/docs/.templates/page.html +++ b/docs/.templates/page.html @@ -2,14 +2,14 @@ {% block body %}
- {% if version == "3.2" or version == "4.0" %} + {% if version == "4.0" %}

This document is for Celery's development version, which can be significantly different from previous releases. Get old docs here: - 3.0. + 3.1.

- {% else %} + {% else %}

This document describes the current stable version of Celery ({{ version }}). For development docs, go here. diff --git a/docs/.templates/sidebarintro.html b/docs/.templates/sidebarintro.html index 16cca544a..cc68b8f24 100644 --- a/docs/.templates/sidebarintro.html +++ b/docs/.templates/sidebarintro.html @@ -2,14 +2,12 @@

diff --git a/docs/.templates/sidebarlogo.html b/docs/.templates/sidebarlogo.html index 16cca544a..cc68b8f24 100644 --- a/docs/.templates/sidebarlogo.html +++ b/docs/.templates/sidebarlogo.html @@ -2,14 +2,12 @@

diff --git a/docs/AUTHORS.txt b/docs/AUTHORS.txt index 3d53ce911..2f88710de 100644 --- a/docs/AUTHORS.txt +++ b/docs/AUTHORS.txt @@ -21,6 +21,7 @@ Ben Firshman Brad Jasper Branko Čibej Brendon Crawford +Brian Bouterse Brian Rosner Bryan Berg Chase Seibert @@ -68,6 +69,7 @@ Jeff Terrace Jerzy Kozera Jesper Noehr John Watson +John Whitlock Jonas Haag Jonas Obrist Jonatan Heyman @@ -87,6 +89,7 @@ Marcin Kuźmiński Marcin Lulek Mark Hellewell Mark Lavin +Mark Parncutt Mark Stover Mark Thurman Martin Galpin @@ -103,6 +106,7 @@ Miguel Hernandez Martos Mikhail Gusarov Mikhail Korobov Mitar +Môshe van der Sterre Neil Chintomby Noah Kantrowitz Norman Richards diff --git a/docs/_ext/applyxrefs.py b/docs/_ext/applyxrefs.py index deed5d90b..a9a9d8c2a 100644 --- a/docs/_ext/applyxrefs.py +++ b/docs/_ext/applyxrefs.py @@ -49,7 +49,6 @@ def has_target(fn): if not readok: return (True, None) - #print fn, len(lines) if len(lines) < 1: print("Not touching empty file %s." % fn) return (True, None) @@ -71,7 +70,6 @@ def main(argv=None): files.extend([(dirpath, f) for f in filenames]) files.sort() files = [os.path.join(p, fn) for p, fn in files if fn.endswith('.txt')] - #print files for fn in files: if fn in DONT_TOUCH: diff --git a/docs/_ext/celerydocs.py b/docs/_ext/celerydocs.py index 2cbc97f60..d2c170c08 100644 --- a/docs/_ext/celerydocs.py +++ b/docs/_ext/celerydocs.py @@ -5,6 +5,7 @@ APPATTRS = { 'amqp': 'celery.app.amqp.AMQP', 'backend': 'celery.backends.base.BaseBackend', + 'conf': 'celery.app.utils.Settings', 'control': 'celery.app.control.Control', 'events': 'celery.events.Events', 'loader': 'celery.app.loaders.base.BaseLoader', @@ -13,15 +14,32 @@ 'tasks': 'celery.app.registry.Registry', 'AsyncResult': 'celery.result.AsyncResult', + 'ResultSet': 'celery.result.ResultSet', 'GroupResult': 'celery.result.GroupResult', 'Worker': 'celery.apps.worker.Worker', 'WorkController': 'celery.worker.WorkController', 'Beat': 'celery.apps.beat.Beat', 'Task': 'celery.app.task.Task', - 'send_task': 'celery.Celery.send_task', - 'connection': 'celery.Celery.connection', + 'signature': 'celery.canvas.Signature', } +APPDIRECT = { + 'on_configure', 'on_after_configure', 'on_after_finalize', + 'set_current', 'set_default', 'close', 'on_init', 'start', + 'worker_main', 'task', 'gen_task_name', 'finalize', + 'add_defaults', 'config_from_object', 'config_from_envvar', + 'config_from_cmdline', 'setup_security', 'autodiscover_tasks', + 'send_task', 'connection', 'connection_or_acquire', + 'producer_or_acquire', 'prepare_config', 'now', 'mail_admins', + 'select_queues', 'either', 'bugreport', 'create_task_cls', + 'subclass_with_self', 'annotations', 'current_task', 'oid', + 'timezone', '__reduce_keys__', 'fixups', 'finalized', 'configured', + 'add_periodic_task', + 'autofinalize', 'steps', 'user_options', 'main', 'clock', +} + +APPATTRS.update({x: 'celery.Celery.{0}'.format(x) for x in APPDIRECT}) + ABBRS = { 'Celery': 'celery.Celery', } @@ -43,19 +61,21 @@ def shorten(S, newtarget, src_dict): return S[2:] elif S.startswith('@'): if src_dict is APPATTRS: - return '.'.join([pkg_of(newtarget), S[1:]]) + return '.'.join(['app', S[1:]]) return S[1:] return S -def get_abbr(pre, rest, type): +def get_abbr(pre, rest, type, orig=None): if pre: for d in APPATTRS, ABBRS: try: return d[pre], rest, d except KeyError: pass - raise KeyError(pre) + raise KeyError('Unknown abbreviation: {0} ({1})'.format( + '.'.join([pre, rest]) if orig is None else orig, type, + )) else: for d in APPATTRS, ABBRS: try: @@ -66,6 +86,7 @@ def get_abbr(pre, rest, type): def resolve(S, type): + orig = S if S.startswith('@'): S = S.lstrip('@-') try: @@ -73,7 +94,7 @@ def resolve(S, type): except ValueError: pre, rest = '', S - target, rest, src = get_abbr(pre, rest, type) + target, rest, src = get_abbr(pre, rest, type, orig) return '.'.join([target, rest]) if rest else target, src return S, None diff --git a/docs/_ext/literals_to_xrefs.py b/docs/_ext/literals_to_xrefs.py index 38dad0b74..debd8953b 100644 --- a/docs/_ext/literals_to_xrefs.py +++ b/docs/_ext/literals_to_xrefs.py @@ -146,8 +146,8 @@ def colorize(text='', opts=(), **kwargs): """ color_names = ('black', 'red', 'green', 'yellow', 'blue', 'magenta', 'cyan', 'white') - foreground = dict([(color_names[x], '3%s' % x) for x in range(8)]) - background = dict([(color_names[x], '4%s' % x) for x in range(8)]) + foreground = {color_names[x]: '3%s' % x for x in range(8)} + background = {color_names[x]: '4%s' % x for x in range(8)} RESET = '0' opt_dict = {'bold': '1', diff --git a/docs/conf.py b/docs/conf.py index 914aee712..05352f36e 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -10,7 +10,7 @@ # absolute, like shown here. sys.path.insert(0, os.path.join(this, os.pardir)) sys.path.append(os.path.join(this, '_ext')) -import celery +import celery # noqa # General configuration # --------------------- @@ -54,7 +54,7 @@ def linkcode_resolve(domain, info): # General information about the project. project = 'Celery' -copyright = '2009-2013, Ask Solem & Contributors' +copyright = '2009-2016, Ask Solem & Contributors' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -67,20 +67,17 @@ def linkcode_resolve(domain, info): exclude_trees = ['.build'] -#unused_docs = [ -# 'xreftest.rst', -# 'tutorials/otherqueues', -#] - # If true, '()' will be appended to :func: etc. cross-reference text. add_function_parentheses = True intersphinx_mapping = { 'python': ('http://docs.python.org/dev', None), - 'kombu': ('http://kombu.readthedocs.org/en/latest/', None), - 'djcelery': ('http://django-celery.readthedocs.org/en/latest', None), + 'kombu': ('http://kombu.readthedocs.org/en/master/', None), + 'djcelery': ('http://django-celery.readthedocs.org/en/master', None), 'cyme': ('http://cyme.readthedocs.org/en/latest', None), 'amqp': ('http://amqp.readthedocs.org/en/latest', None), + 'vine': ('http://vine.readthedocs.org/en/latest', None), + 'flower': ('http://flower.readthedocs.org/en/latest', None), } # The name of the Pygments (syntax highlighting) style to use. @@ -115,7 +112,7 @@ def linkcode_resolve(domain, info): 'sourcelink.html', 'searchbox.html'], } -### Issuetracker +# ## Issuetracker github_project = 'celery/celery' @@ -125,7 +122,7 @@ def linkcode_resolve(domain, info): epub_title = 'Celery Manual, Version {0}'.format(version) epub_author = 'Ask Solem' epub_publisher = 'Celery Project' -epub_copyright = '2009-2013' +epub_copyright = '2009-2014' # The language of the text. It defaults to the language option # or en if the language is not set. @@ -141,13 +138,13 @@ def linkcode_resolve(domain, info): # A unique identification for the text. epub_uid = 'Celery Manual, Version {0}'.format(version) -# HTML files that should be inserted before the pages created by sphinx. -# The format is a list of tuples containing the path and title. -#epub_pre_files = [] +# ## HTML files that should be inserted before the pages created by sphinx. +# ## The format is a list of tuples containing the path and title. +# epub_pre_files = [] -# HTML files shat should be inserted after the pages created by sphinx. -# The format is a list of tuples containing the path and title. -#epub_post_files = [] +# ## HTML files shat should be inserted after the pages created by sphinx. +# ## The format is a list of tuples containing the path and title. +# epub_post_files = [] # A list of files that should not be packed into the epub file. epub_exclude_files = ['search.html'] diff --git a/docs/configuration.rst b/docs/configuration.rst index 3f787f270..56a22ba16 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -24,29 +24,186 @@ It should contain all you need to run a basic Celery set-up. .. code-block:: python ## Broker settings. - BROKER_URL = 'amqp://guest:guest@localhost:5672//' + broker_url = 'amqp://guest:guest@localhost:5672//' # List of modules to import when celery starts. - CELERY_IMPORTS = ('myapp.tasks', ) + imports = ('myapp.tasks',) ## Using the database to store task state and results. - CELERY_RESULT_BACKEND = 'db+sqlite:///results.db' - - CELERY_ANNOTATIONS = {'tasks.add': {'rate_limit': '10/s'}} - + result_backend = 'db+sqlite:///results.db' + + task_annotations = {'tasks.add': {'rate_limit': '10/s'}} + + +.. _conf-old-settings-map: + +New lowercase settings +====================== + +Version 4.0 introduced new lower case settings and setting organization. + +The major difference between previous versions, apart from the lower case +names, are the renaming of some prefixes, like ``celerybeat_`` to ``beat_``, +``celeryd_`` to ``worker_``, and most of the top level ``celery_`` settings +have been moved into a new ``task_`` prefix. + +Celery will still be able to read old configuration files, so there is no +rush in moving to the new settings format. + +===================================== ============================================== +**Setting name** **Replace with** +===================================== ============================================== +``CELERY_ACCEPT_CONTENT`` :setting:`accept_content` +``ADMINS`` :setting:`admins` +``CELERY_ENABLE_UTC`` :setting:`enable_utc` +``CELERY_IMPORTS`` :setting:`imports` +``CELERY_INCLUDE`` :setting:`include` +``SERVER_EMAIL`` :setting:`server_email` +``CELERY_TIMEZONE`` :setting:`timezone` +``CELERYBEAT_MAX_LOOP_INTERVAL`` :setting:`beat_max_loop_interval` +``CELERYBEAT_SCHEDULE`` :setting:`beat_schedule` +``CELERYBEAT_SCHEDULER`` :setting:`beat_scheduler` +``CELERYBEAT_SCHEDULE_FILENAME`` :setting:`beat_schedule_filename` +``CELERYBEAT_SYNC_EVERY`` :setting:`beat_sync_every` +``BROKER_URL`` :setting:`broker_url` +``BROKER_TRANSPORT`` :setting:`broker_transport` +``BROKER_TRANSPORT_OPTIONS`` :setting:`broker_transport_options` +``BROKER_CONNECTION_TIMEOUT`` :setting:`broker_connection_timeout` +``BROKER_CONNECTION_RETRY`` :setting:`broker_connection_retry` +``BROKER_CONNECTION_MAX_RETRIES`` :setting:`broker_connection_max_retries` +``BROKER_FAILOVER_STRATEGY`` :setting:`broker_failover_strategy` +``BROKER_HEARTBEAT`` :setting:`broker_heartbeat` +``BROKER_LOGIN_METHOD`` :setting:`broker_login_method` +``BROKER_POOL_LIMIT`` :setting:`broker_pool_limit` +``BROKER_USE_SSL`` :setting:`broker_use_ssl` +``CELERY_CACHE_BACKEND`` :setting:`cache_backend` +``CELERY_CACHE_BACKEND_OPTIONS`` :setting:`cache_backend_options` +``CASSANDRA_COLUMN_FAMILY`` :setting:`cassandra_table` +``CASSANDRA_ENTRY_TTL`` :setting:`cassandra_entry_ttl` +``CASSANDRA_KEYSPACE`` :setting:`cassandra_keyspace` +``CASSANDRA_PORT`` :setting:`cassandra_port` +``CASSANDRA_READ_CONSISTENCY`` :setting:`cassandra_read_consistency` +``CASSANDRA_SERVERS`` :setting:`cassandra_servers` +``CASSANDRA_WRITE_CONSISTENCY`` :setting:`cassandra_write_consistency` +``CELERY_COUCHBASE_BACKEND_SETTINGS`` :setting:`couchbase_backend_settings` +``EMAIL_HOST`` :setting:`email_host` +``EMAIL_HOST_USER`` :setting:`email_host_user` +``EMAIL_HOST_PASSWORD`` :setting:`email_host_password` +``EMAIL_PORT`` :setting:`email_port` +``EMAIL_TIMEOUT`` :setting:`email_timeout` +``EMAIL_USE_SSL`` :setting:`email_use_ssl` +``EMAIL_USE_TLS`` :setting:`email_use_tls` +``CELERY_MONGODB_BACKEND_SETTINGS`` :setting:`mongodb_backend_settings` +``CELERY_EVENT_QUEUE_EXPIRES`` :setting:`event_queue_expires` +``CELERY_EVENT_QUEUE_TTL`` :setting:`event_queue_ttl` +``CELERY_EVENT_SERIALIZER`` :setting:`event_serializer` +``CELERY_REDIS_DB`` :setting:`redis_db` +``CELERY_REDIS_HOST`` :setting:`redis_host` +``CELERY_REDIS_MAX_CONNECTIONS`` :setting:`redis_max_connections` +``CELERY_REDIS_PASSWORD`` :setting:`redis_password` +``CELERY_REDIS_PORT`` :setting:`redis_port` +``CELERY_RESULT_BACKEND`` :setting:`result_backend` +``CELERY_MAX_CACHED_RESULTS`` :setting:`result_cache_max` +``CELERY_MESSAGE_COMPRESSION`` :setting:`result_compression` +``CELERY_RESULT_EXCHANGE`` :setting:`result_exchange` +``CELERY_RESULT_EXCHANGE_TYPE`` :setting:`result_exchange_type` +``CELERY_TASK_RESULT_EXPIRES`` :setting:`result_expires` +``CELERY_RESULT_PERSISTENT`` :setting:`result_persistent` +``CELERY_RESULT_SERIALIZER`` :setting:`result_serializer` +``CELERY_RESULT_DBURI`` :setting:`sqlalchemy_dburi` +``CELERY_RESULT_ENGINE_OPTIONS`` :setting:`sqlalchemy_engine_options` +``-*-_DB_SHORT_LIVED_SESSIONS`` :setting:`sqlalchemy_short_lived_sessions` +``CELERY_RESULT_DB_TABLE_NAMES`` :setting:`sqlalchemy_db_names` +``CELERY_SECURITY_CERTIFICATE`` :setting:`security_certificate` +``CELERY_SECURITY_CERT_STORE`` :setting:`security_cert_store` +``CELERY_SECURITY_KEY`` :setting:`security_key` +``CELERY_ACKS_LATE`` :setting:`task_acks_late` +``CELERY_ALWAYS_EAGER`` :setting:`task_always_eager` +``CELERY_ANNOTATIONS`` :setting:`task_annotations` +``CELERY_MESSAGE_COMPRESSION`` :setting:`task_compression` +``CELERY_CREATE_MISSING_QUEUES`` :setting:`task_create_missing_queues` +``CELERY_DEFAULT_DELIVERY_MODE`` :setting:`task_default_delivery_mode` +``CELERY_DEFAULT_EXCHANGE`` :setting:`task_default_exchange` +``CELERY_DEFAULT_EXCHANGE_TYPE`` :setting:`task_default_exchange_type` +``CELERY_DEFAULT_QUEUE`` :setting:`task_default_queue` +``CELERY_DEFAULT_RATE_LIMIT`` :setting:`task_default_rate_limit` +``CELERY_DEFAULT_ROUTING_KEY`` :setting:`task_default_routing_key` +``-'-_EAGER_PROPAGATES_EXCEPTIONS`` :setting:`task_eager_propagates` +``CELERY_IGNORE_RESULT`` :setting:`task_ignore_result` +``CELERY_TASK_PUBLISH_RETRY`` :setting:`task_publish_retry` +``CELERY_TASK_PUBLISH_RETRY_POLICY`` :setting:`task_publish_retry_policy` +``CELERY_QUEUES`` :setting:`task_queues` +``CELERY_ROUTES`` :setting:`task_routes` +``CELERY_SEND_TASK_ERROR_EMAILS`` :setting:`task_send_error_emails` +``CELERY_SEND_TASK_SENT_EVENT`` :setting:`task_send_sent_event` +``CELERY_TASK_SERIALIZER`` :setting:`task_serializer` +``CELERYD_TASK_SOFT_TIME_LIMIT`` :setting:`task_soft_time_limit` +``CELERYD_TASK_TIME_LIMIT`` :setting:`task_time_limit` +``CELERY_TRACK_STARTED`` :setting:`task_track_started` +``CELERYD_AGENT`` :setting:`worker_agent` +``CELERYD_AUTOSCALER`` :setting:`worker_autoscaler` +``CELERYD_AUTORELAODER`` :setting:`worker_autoreloader` +``CELERYD_CONCURRENCY`` :setting:`worker_concurrency` +``CELERYD_CONSUMER`` :setting:`worker_consumer` +``CELERY_WORKER_DIRECT`` :setting:`worker_direct` +``CELERY_DISABLE_RATE_LIMITS`` :setting:`worker_disable_rate_limits` +``CELERY_ENABLE_REMOTE_CONTROL`` :setting:`worker_enable_remote_control` +``CELERYD_FORCE_EXECV`` :setting:`worker_force_execv` +``CELERYD_HIJACK_ROOT_LOGGER`` :setting:`worker_hijack_root_logger` +``CELERYD_LOG_COLOR`` :setting:`worker_log_color` +``CELERYD_LOG_FORMAT`` :setting:`worker_log_format` +``CELERYD_WORKER_LOST_WAIT`` :setting:`worker_lost_wait` +``CELERYD_MAX_TASKS_PER_CHILD`` :setting:`worker_max_tasks_per_child` +``CELERYD_POOL`` :setting:`worker_pool` +``CELERYD_POOL_PUTLOCKS`` :setting:`worker_pool_putlocks` +``CELERYD_POOL_RESTARTS`` :setting:`worker_pool_restarts` +``CELERYD_PREFETCH_MULTIPLIER`` :setting:`worker_prefetch_multiplier` +``CELERYD_REDIRECT_STDOUTS`` :setting:`worker_redirect_stdouts` +``CELERYD_REDIRECT_STDOUTS_LEVEL`` :setting:`worker_redirect_stdouts_level` +``CELERYD_SEND_EVENTS`` :setting:`worker_send_task_events` +``CELERYD_STATE_DB`` :setting:`worker_state_db` +``CELERYD_TASK_LOG_FORMAT`` :setting:`worker_task_log_format` +``CELERYD_TIMER`` :setting:`worker_timer` +``CELERYD_TIMER_PRECISION`` :setting:`worker_timer_precision` +===================================== ============================================== Configuration Directives ======================== .. _conf-datetime: +General settings +---------------- + +.. setting:: accept_content + +accept_content +~~~~~~~~~~~~~~ + +A whitelist of content-types/serializers to allow. + +If a message is received that is not in this list then +the message will be discarded with an error. + +By default any content type is enabled (including pickle and yaml) +so make sure untrusted parties do not have access to your broker. +See :ref:`guide-security` for more. + +Example:: + + # using serializer name + accept_content = ['json'] + + # or the actual content-type (MIME) + accept_content = ['application/json'] + Time and date settings ---------------------- -.. setting:: CELERY_ENABLE_UTC +.. setting:: enable_utc -CELERY_ENABLE_UTC -~~~~~~~~~~~~~~~~~ +enable_utc +~~~~~~~~~~ .. versionadded:: 2.5 @@ -59,52 +216,48 @@ upgraded. Enabled by default since version 3.0. -.. setting:: CELERY_TIMEZONE +.. setting:: timezone -CELERY_TIMEZONE -~~~~~~~~~~~~~~~ +timezone +~~~~~~~~ Configure Celery to use a custom time zone. The timezone value can be any time zone supported by the `pytz`_ library. If not set the UTC timezone is used. For backwards compatibility -there is also a :setting:`CELERY_ENABLE_UTC` setting, and this is set +there is also a :setting:`enable_utc` setting, and this is set to false the system local timezone is used instead. .. _`pytz`: http://pypi.python.org/pypi/pytz/ - - .. _conf-tasks: Task settings ------------- -.. setting:: CELERY_ANNOTATIONS +.. setting:: task_annotations -CELERY_ANNOTATIONS -~~~~~~~~~~~~~~~~~~ +task_annotations +~~~~~~~~~~~~~~~~ This setting can be used to rewrite any task attribute from the configuration. The setting can be a dict, or a list of annotation objects that filter for tasks and return a map of attributes to change. - This will change the ``rate_limit`` attribute for the ``tasks.add`` task: .. code-block:: python - CELERY_ANNOTATIONS = {'tasks.add': {'rate_limit': '10/s'}} + task_annotations = {'tasks.add': {'rate_limit': '10/s'}} or change the same for all tasks: .. code-block:: python - CELERY_ANNOTATIONS = {'*': {'rate_limit': '10/s'}} - + task_annotations = {'*': {'rate_limit': '10/s'}} You can change methods too, for example the ``on_failure`` handler: @@ -113,8 +266,7 @@ You can change methods too, for example the ``on_failure`` handler: def my_on_failure(self, exc, task_id, args, kwargs, einfo): print('Oh no! Task failed: {0!r}'.format(exc)) - CELERY_ANNOTATIONS = {'*': {'on_failure': my_on_failure}} - + task_annotations = {'*': {'on_failure': my_on_failure}} If you need more flexibility then you can use objects instead of a dict to choose which tasks to annotate: @@ -127,66 +279,226 @@ instead of a dict to choose which tasks to annotate: if task.name.startswith('tasks.'): return {'rate_limit': '10/s'} - CELERY_ANNOTATIONS = (MyAnnotate(), {…}) + task_annotations = (MyAnnotate(), {…}) +.. setting:: task_compression +task_compression +~~~~~~~~~~~~~~~~ -.. _conf-concurrency: +Default compression used for task messages. +Can be ``gzip``, ``bzip2`` (if available), or any custom +compression schemes registered in the Kombu compression registry. -Concurrency settings --------------------- +The default is to send uncompressed messages. -.. setting:: CELERYD_CONCURRENCY +.. setting:: task_protocol -CELERYD_CONCURRENCY -~~~~~~~~~~~~~~~~~~~ +task_protocol +~~~~~~~~~~~~~ -The number of concurrent worker processes/threads/green threads executing -tasks. +Default task message protocol version. +Supports protocols: 1 and 2 (default is 1 for backwards compatibility). -If you're doing mostly I/O you can have more processes, -but if mostly CPU-bound, try to keep it close to the -number of CPUs on your machine. If not set, the number of CPUs/cores -on the host will be used. +.. setting:: task_serializer -Defaults to the number of available CPUs. +task_serializer +~~~~~~~~~~~~~~~ -.. setting:: CELERYD_PREFETCH_MULTIPLIER +A string identifying the default serialization method to use. Can be +`pickle` (default), `json`, `yaml`, `msgpack` or any custom serialization +methods that have been registered with :mod:`kombu.serialization.registry`. -CELERYD_PREFETCH_MULTIPLIER -~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. seealso:: -How many messages to prefetch at a time multiplied by the number of -concurrent processes. The default is 4 (four messages for each -process). The default setting is usually a good choice, however -- if you -have very long running tasks waiting in the queue and you have to start the -workers, note that the first worker to start will receive four times the -number of messages initially. Thus the tasks may not be fairly distributed -to the workers. + :ref:`calling-serializers`. -.. note:: +.. setting:: task_publish_retry - Tasks with ETA/countdown are not affected by prefetch limits. +task_publish_retry +~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 2.2 + +Decides if publishing task messages will be retried in the case +of connection loss or other connection errors. +See also :setting:`task_publish_retry_policy`. + +Enabled by default. + +.. setting:: task_publish_retry_policy + +task_publish_retry_policy +~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 2.2 + +Defines the default policy when retrying publishing a task message in +the case of connection loss or other connection errors. + +See :ref:`calling-retry` for more information. +.. _conf-task-execution: + +Task execution settings +----------------------- + +.. setting:: task_always_eager + +task_always_eager +~~~~~~~~~~~~~~~~~ + +If this is :const:`True`, all tasks will be executed locally by blocking until +the task returns. ``apply_async()`` and ``Task.delay()`` will return +an :class:`~celery.result.EagerResult` instance, which emulates the API +and behavior of :class:`~celery.result.AsyncResult`, except the result +is already evaluated. + +That is, tasks will be executed locally instead of being sent to +the queue. + +.. setting:: task_eager_propagates + +task_eager_propagates +~~~~~~~~~~~~~~~~~~~~~ + +If this is :const:`True`, eagerly executed tasks (applied by `task.apply()`, +or when the :setting:`task_always_eager` setting is enabled), will +propagate exceptions. + +It's the same as always running ``apply()`` with ``throw=True``. + +.. setting:: task_ignore_result + +task_ignore_result +~~~~~~~~~~~~~~~~~~ + +Whether to store the task return values or not (tombstones). +If you still want to store errors, just not successful return values, +you can set :setting:`task_store_errors_even_if_ignored`. + +.. setting:: task_store_errors_even_if_ignored + +task_store_errors_even_if_ignored +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If set, the worker stores all task errors in the result store even if +:attr:`Task.ignore_result ` is on. + +.. setting:: task_track_started + +task_track_started +~~~~~~~~~~~~~~~~~~ + +If :const:`True` the task will report its status as 'started' when the +task is executed by a worker. The default value is :const:`False` as +the normal behaviour is to not report that level of granularity. Tasks +are either pending, finished, or waiting to be retried. Having a 'started' +state can be useful for when there are long running tasks and there is a +need to report which task is currently running. + +.. setting:: task_time_limit + +task_time_limit +~~~~~~~~~~~~~~~ + +Task hard time limit in seconds. The worker processing the task will +be killed and replaced with a new one when this is exceeded. + +.. setting:: task_soft_time_limit + +task_soft_time_limit +~~~~~~~~~~~~~~~~~~~~ + +Task soft time limit in seconds. + +The :exc:`~@SoftTimeLimitExceeded` exception will be +raised when this is exceeded. The task can catch this to +e.g. clean up before the hard time limit comes. + +Example: + +.. code-block:: python + + from celery.exceptions import SoftTimeLimitExceeded + + @app.task + def mytask(): + try: + return do_work() + except SoftTimeLimitExceeded: + cleanup_in_a_hurry() + +.. setting:: task_acks_late + +task_acks_late +~~~~~~~~~~~~~~ + +Late ack means the task messages will be acknowledged **after** the task +has been executed, not *just before*, which is the default behavior. + +.. seealso:: + + FAQ: :ref:`faq-acks_late-vs-retry`. + +.. setting:: task_reject_on_worker_lost + +task_reject_on_worker_lost +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Even if :setting:`task_acks_late` is enabled, the worker will +acknowledge tasks when the worker process executing them abrubtly +exits or is signaled (e.g. :sig:`KILL`/:sig:`INT`, etc). + +Setting this to true allows the message to be requeued instead, +so that the task will execute again by the same worker, or another +worker. + +.. warning:: + + Enabling this can cause message loops; make sure you know + what you're doing. + +.. setting:: task_default_rate_limit + +task_default_rate_limit +~~~~~~~~~~~~~~~~~~~~~~~ + +The global default rate limit for tasks. + +This value is used for tasks that does not have a custom rate limit +The default is no rate limit. + +.. seealso:: + + The setting:`worker_disable_rate_limits` setting can + disable all rate limits. .. _conf-result-backend: Task result backend settings ---------------------------- -.. setting:: CELERY_RESULT_BACKEND +.. setting:: result_backend -CELERY_RESULT_BACKEND -~~~~~~~~~~~~~~~~~~~~~ -:Deprecated aliases: ``CELERY_BACKEND`` +result_backend +~~~~~~~~~~~~~~ The backend used to store task results (tombstones). Disabled by default. Can be one of the following: +* rpc + Send results back as AMQP messages + See :ref:`conf-rpc-result-backend`. + * database Use a relational database supported by `SQLAlchemy`_. See :ref:`conf-database-result-backend`. +* redis + Use `Redis`_ to store the results. + See :ref:`conf-redis-result-backend`. + * cache Use `memcached`_ to store the results. See :ref:`conf-cache-result-backend`. @@ -195,18 +507,14 @@ Can be one of the following: Use `MongoDB`_ to store the results. See :ref:`conf-mongodb-result-backend`. -* redis - Use `Redis`_ to store the results. - See :ref:`conf-redis-result-backend`. - -* amqp - Send results back as AMQP messages - See :ref:`conf-amqp-result-backend`. - * cassandra Use `Cassandra`_ to store the results. See :ref:`conf-cassandra-result-backend`. +* elasticsearch + Use `Elasticsearch`_ to store the results. + See :ref:`conf-elasticsearch-result-backend`. + * ironcache Use `IronCache`_ to store the results. See :ref:`conf-ironcache-result-backend`. @@ -215,6 +523,18 @@ Can be one of the following: Use `Couchbase`_ to store the results. See :ref:`conf-couchbase-result-backend`. +* couchdb + Use `CouchDB`_ to store the results. + See :ref:`conf-couchdb-result-backend`. + +* amqp + Older AMQP backend (badly) emulating a database-based backend. + See :ref:`conf-amqp-result-backend`. + +* filesystem + Use a shared directory to store the results. + See :ref:`conf-filesystem-result-backend`. + .. warning: While the AMQP result backend is very efficient, you must make sure @@ -225,19 +545,70 @@ Can be one of the following: .. _`MongoDB`: http://mongodb.org .. _`Redis`: http://redis.io .. _`Cassandra`: http://cassandra.apache.org/ +.. _`Elasticsearch`: https://aws.amazon.com/elasticsearch-service/ .. _`IronCache`: http://www.iron.io/cache +.. _`CouchDB`: http://www.couchdb.com/ .. _`Couchbase`: http://www.couchbase.com/ +.. setting:: result_serializer -.. setting:: CELERY_RESULT_SERIALIZER - -CELERY_RESULT_SERIALIZER -~~~~~~~~~~~~~~~~~~~~~~~~ +result_serializer +~~~~~~~~~~~~~~~~~ Result serialization format. Default is ``pickle``. See :ref:`calling-serializers` for information about supported serialization formats. +.. setting:: result_compression + +result_compression +~~~~~~~~~~~~~~~~~~ + +Optional compression method used for task results. +Supports the same options as the :setting:`task_serializer` setting. + +Default is no compression. + +.. setting:: result_expires + +result_expires +~~~~~~~~~~~~~~ + +Time (in seconds, or a :class:`~datetime.timedelta` object) for when after +stored task tombstones will be deleted. + +A built-in periodic task will delete the results after this time +(``celery.backend_cleanup``), assuming that ``celery beat`` is +enabled. The task runs daily at 4am. + +A value of :const:`None` or 0 means results will never expire (depending +on backend specifications). + +Default is to expire after 1 day. + +.. note:: + + For the moment this only works with the amqp, database, cache, redis and MongoDB + backends. + + When using the database or MongoDB backends, `celery beat` must be + running for the results to be expired. + +.. setting:: result_cache_max + +result_cache_max +~~~~~~~~~~~~~~~~ + +Enables client caching of results, which can be useful for the old 'amqp' +backend where the result is unavailable as soon as one result instance +consumes it. + +This is the total number of results to cache before older results are evicted. +A value of 0 or None means no limit, and a value of :const:`-1` +will disable the cache. + +Disabled by default. + .. _conf-database-result-backend: Database backend settings @@ -247,26 +618,26 @@ Database URL Examples ~~~~~~~~~~~~~~~~~~~~~ To use the database backend you have to configure the -:setting:`CELERY_RESULT_BACKEND` setting with a connection URL and the ``db+`` +:setting:`result_backend` setting with a connection URL and the ``db+`` prefix: .. code-block:: python - CELERY_RESULT_BACKEND = 'db+scheme://user:password@host:port/dbname' + result_backend = 'db+scheme://user:password@host:port/dbname' -Examples: +Examples:: # sqlite (filename) - CELERY_RESULT_BACKEND = 'db+sqlite:///results.sqlite' + result_backend = 'db+sqlite:///results.sqlite' # mysql - CELERY_RESULT_BACKEND = 'db+mysql://scott:tiger@localhost/foo' + result_backend = 'db+mysql://scott:tiger@localhost/foo' # postgresql - CELERY_RESULT_BACKEND = 'db+postgresql://scott:tiger@localhost/mydatabase' + result_backend = 'db+postgresql://scott:tiger@localhost/mydatabase' # oracle - CELERY_RESULT_BACKEND = 'db+oracle://scott:tiger@127.0.0.1:1521/sidname' + result_backend = 'db+oracle://scott:tiger@127.0.0.1:1521/sidname' .. code-block:: python @@ -280,28 +651,31 @@ strings (which is the part of the URI that comes after the ``db+`` prefix). .. _`Connection String`: http://www.sqlalchemy.org/docs/core/engines.html#database-urls -.. setting:: CELERY_RESULT_DBURI +.. setting:: sqlalchemy_dburi -CELERY_RESULT_DBURI -~~~~~~~~~~~~~~~~~~~ +sqlalchemy_dburi +~~~~~~~~~~~~~~~~ This setting is no longer used as it's now possible to specify -the database URL directly in the :setting:`CELERY_RESULT_BACKEND` setting. +the database URL directly in the :setting:`result_backend` setting. -.. setting:: CELERY_RESULT_ENGINE_OPTIONS +.. setting:: sqlalchemy_engine_options -CELERY_RESULT_ENGINE_OPTIONS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +sqlalchemy_engine_options +~~~~~~~~~~~~~~~~~~~~~~~~~ To specify additional SQLAlchemy database engine options you can use -the :setting:`CELERY_RESULT_ENGINE_OPTIONS` setting:: +the :setting:`sqlalchmey_engine_options` setting:: # echo enables verbose logging from SQLAlchemy. - CELERY_RESULT_ENGINE_OPTIONS = {'echo': True} + sqlalchemy_engine_options = {'echo': True} +.. setting:: sqlalchemy_short_lived_sessions + +sqlalchemy_short_lived_sessions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. setting:: CELERY_RESULT_DB_SHORT_LIVED_SESSIONS - CELERY_RESULT_DB_SHORT_LIVED_SESSIONS = True + sqlalchemy_short_lived_sessions = True Short lived sessions are disabled by default. If enabled they can drastically reduce performance, especially on systems processing lots of tasks. This option is useful @@ -310,10 +684,10 @@ going stale through inactivity. For example, intermittent errors like `(OperationalError) (2006, 'MySQL server has gone away')` can be fixed by enabling short lived sessions. This option only affects the database backend. -Specifying Table Names -~~~~~~~~~~~~~~~~~~~~~~ +.. setting:: sqlalchemy_table_names -.. setting:: CELERY_RESULT_DB_TABLENAMES +sqlalchemy_table_names +~~~~~~~~~~~~~~~~~~~~~~ When SQLAlchemy is configured as the result backend, Celery automatically creates two tables to store result metadata for tasks. This setting allows @@ -322,43 +696,20 @@ you to customize the table names: .. code-block:: python # use custom table names for the database result backend. - CELERY_RESULT_DB_TABLENAMES = { + sqlalchemy_table_names = { 'task': 'myapp_taskmeta', 'group': 'myapp_groupmeta', } -.. _conf-amqp-result-backend: +.. _conf-rpc-result-backend: -AMQP backend settings ---------------------- +RPC backend settings +-------------------- -.. note:: +.. setting:: result_persistent - The AMQP backend requires RabbitMQ 1.1.0 or higher to automatically - expire results. If you are running an older version of RabbitmQ - you should disable result expiration like this: - - CELERY_TASK_RESULT_EXPIRES = None - -.. setting:: CELERY_RESULT_EXCHANGE - -CELERY_RESULT_EXCHANGE -~~~~~~~~~~~~~~~~~~~~~~ - -Name of the exchange to publish results in. Default is `celeryresults`. - -.. setting:: CELERY_RESULT_EXCHANGE_TYPE - -CELERY_RESULT_EXCHANGE_TYPE -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The exchange type of the result exchange. Default is to use a `direct` -exchange. - -.. setting:: CELERY_RESULT_PERSISTENT - -CELERY_RESULT_PERSISTENT -~~~~~~~~~~~~~~~~~~~~~~~~ +result_persistent +~~~~~~~~~~~~~~~~~ If set to :const:`True`, result messages will be persistent. This means the messages will not be lost after a broker restart. The default is for the @@ -369,8 +720,8 @@ Example configuration .. code-block:: python - CELERY_RESULT_BACKEND = 'amqp' - CELERY_TASK_RESULT_EXPIRES = 18000 # 5 hours. + result_backend = 'rpc://' + result_persistent = False .. _conf-cache-result-backend: @@ -386,42 +737,47 @@ Using a single memcached server: .. code-block:: python - CELERY_RESULT_BACKEND = 'cache+memcached://127.0.0.1:11211/' + result_backend = 'cache+memcached://127.0.0.1:11211/' Using multiple memcached servers: .. code-block:: python - CELERY_RESULT_BACKEND = """ + result_backend = """ cache+memcached://172.19.26.240:11211;172.19.26.242:11211/ """.strip() -.. setting:: CELERY_CACHE_BACKEND_OPTIONS - The "memory" backend stores the cache in memory only: - CELERY_CACHE_BACKEND = 'memory' +.. code-block:: python + + result_backend = 'cache' + cache_backend = 'memory' -CELERY_CACHE_BACKEND_OPTIONS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. setting:: cache_backend_options + +cache_backend_options +~~~~~~~~~~~~~~~~~~~~~ -You can set pylibmc options using the :setting:`CELERY_CACHE_BACKEND_OPTIONS` +You can set pylibmc options using the :setting:`cache_backend_options` setting: .. code-block:: python - CELERY_CACHE_BACKEND_OPTIONS = {'binary': True, - 'behaviors': {'tcp_nodelay': True}} + cache_backend_options = { + 'binary': True, + 'behaviors': {'tcp_nodelay': True}, + } .. _`pylibmc`: http://sendapatch.se/projects/pylibmc/ -.. setting:: CELERY_CACHE_BACKEND +.. setting:: cache_backend -CELERY_CACHE_BACKEND -~~~~~~~~~~~~~~~~~~~~ +cache_backend +~~~~~~~~~~~~~ This setting is no longer used as it's now possible to specify -the cache backend directly in the :setting:`CELERY_RESULT_BACKEND` setting. +the cache backend directly in the :setting:`result_backend` setting. .. _conf-redis-result-backend: @@ -438,24 +794,24 @@ Configuring the backend URL To install the redis package use `pip` or `easy_install`: - .. code-block:: bash + .. code-block:: console $ pip install redis -This backend requires the :setting:`CELERY_RESULT_BACKEND` +This backend requires the :setting:`result_backend` setting to be set to a Redis URL:: - CELERY_RESULT_BACKEND = 'redis://:password@host:port/db' + result_backend = 'redis://:password@host:port/db' For example:: - CELERY_RESULT_BACKEND = 'redis://localhost/0' + result_backend = 'redis://localhost/0' which is the same as:: - CELERY_RESULT_BACKEND = 'redis://' + result_backend = 'redis://' -The fields of the URL is defined as folows: +The fields of the URL are defined as follows: - *host* @@ -474,14 +830,24 @@ The db can include an optional leading slash. Password used to connect to the database. -.. setting:: CELERY_REDIS_MAX_CONNECTIONS +.. setting:: redis_max_connections -CELERY_REDIS_MAX_CONNECTIONS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +redis_max_connections +~~~~~~~~~~~~~~~~~~~~~ Maximum number of connections available in the Redis connection pool used for sending and retrieving results. +.. setting:: redis_socket_timeout + +redis_socket_timeout +~~~~~~~~~~~~~~~~~~~~ + +Socket timeout for connections to Redis from the result backend +in seconds (int/float) + +Default is 5 seconds. + .. _conf-mongodb-result-backend: MongoDB backend settings @@ -492,9 +858,9 @@ MongoDB backend settings The MongoDB backend requires the :mod:`pymongo` library: http://github.com/mongodb/mongo-python-driver/tree/master -.. setting:: CELERY_MONGODB_BACKEND_SETTINGS +.. setting:: mongodb_backend_settings -CELERY_MONGODB_BACKEND_SETTINGS +mongodb_backend_settings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is a dict supporting the following keys: @@ -526,108 +892,201 @@ Example configuration .. code-block:: python - CELERY_RESULT_BACKEND = 'mongodb://192.168.1.100:30000/' - CELERY_MONGODB_BACKEND_SETTINGS = { + result_backend = 'mongodb://192.168.1.100:30000/' + mongodb_backend_settings = { 'database': 'mydb', 'taskmeta_collection': 'my_taskmeta_collection', } .. _conf-cassandra-result-backend: -Cassandra backend settings +cassandra backend settings -------------------------- .. note:: - The Cassandra backend requires the :mod:`pycassa` library: - http://pypi.python.org/pypi/pycassa/ + This Cassandra backend driver requires :mod:`cassandra-driver`. + https://pypi.python.org/pypi/cassandra-driver - To install the pycassa package use `pip` or `easy_install`: + To install, use `pip` or `easy_install`: - .. code-block:: bash + .. code-block:: console - $ pip install pycassa + $ pip install cassandra-driver This backend requires the following configuration directives to be set. -.. setting:: CASSANDRA_SERVERS +.. setting:: cassandra_servers -CASSANDRA_SERVERS +cassandra_servers ~~~~~~~~~~~~~~~~~ -List of ``host:port`` Cassandra servers. e.g.:: +List of ``host`` Cassandra servers. e.g.:: + + cassandra_servers = ['localhost'] + + +.. setting:: cassandra_port + +cassandra_port +~~~~~~~~~~~~~~ - CASSANDRA_SERVERS = ['localhost:9160'] +Port to contact the Cassandra servers on. Default is 9042. -.. setting:: CASSANDRA_KEYSPACE +.. setting:: cassandra_keyspace -CASSANDRA_KEYSPACE +cassandra_keyspace ~~~~~~~~~~~~~~~~~~ The keyspace in which to store the results. e.g.:: - CASSANDRA_KEYSPACE = 'tasks_keyspace' + cassandra_keyspace = 'tasks_keyspace' -.. setting:: CASSANDRA_COLUMN_FAMILY +.. setting:: cassandra_table -CASSANDRA_COLUMN_FAMILY -~~~~~~~~~~~~~~~~~~~~~~~ +cassandra_table +~~~~~~~~~~~~~~~ -The column family in which to store the results. e.g.:: +The table (column family) in which to store the results. e.g.:: - CASSANDRA_COLUMN_FAMILY = 'tasks' + cassandra_table = 'tasks' -.. setting:: CASSANDRA_READ_CONSISTENCY +.. setting:: cassandra_read_consistency -CASSANDRA_READ_CONSISTENCY +cassandra_read_consistency ~~~~~~~~~~~~~~~~~~~~~~~~~~ -The read consistency used. Values can be ``ONE``, ``QUORUM`` or ``ALL``. +The read consistency used. Values can be ``ONE``, ``TWO``, ``THREE``, ``QUORUM``, ``ALL``, +``LOCAL_QUORUM``, ``EACH_QUORUM``, ``LOCAL_ONE``. -.. setting:: CASSANDRA_WRITE_CONSISTENCY +.. setting:: cassandra_write_consistency -CASSANDRA_WRITE_CONSISTENCY +cassandra_write_consistency ~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The write consistency used. Values can be ``ONE``, ``QUORUM`` or ``ALL``. +The write consistency used. Values can be ``ONE``, ``TWO``, ``THREE``, ``QUORUM``, ``ALL``, +``LOCAL_QUORUM``, ``EACH_QUORUM``, ``LOCAL_ONE``. + +.. setting:: cassandra_entry_ttl + +cassandra_entry_ttl +~~~~~~~~~~~~~~~~~~~ + +Time-to-live for status entries. They will expire and be removed after that many seconds +after adding. Default (None) means they will never expire. -.. setting:: CASSANDRA_DETAILED_MODE +.. setting:: cassandra_auth_provider -CASSANDRA_DETAILED_MODE +cassandra_auth_provider ~~~~~~~~~~~~~~~~~~~~~~~ -Enable or disable detailed mode. Default is :const:`False`. -This mode allows to use the power of Cassandra wide columns to -store all states for a task as a wide column, instead of only the last one. +AuthProvider class within ``cassandra.auth`` module to use. Values can be +``PlainTextAuthProvider`` or ``SaslAuthProvider``. -To use this mode, you need to configure your ColumnFamily to -use the ``TimeUUID`` type as a comparator:: +.. setting:: cassandra_auth_kwargs - create column family task_results with comparator = TimeUUIDType; +cassandra_auth_kwargs +~~~~~~~~~~~~~~~~~~~~~ -CASSANDRA_OPTIONS -~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Named arguments to pass into the auth provider. e.g.:: + + cassandra_auth_kwargs = { + username: 'cassandra', + password: 'cassandra' + } + +Example configuration +~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: python -Options to be passed to the `pycassa connection pool`_ (optional). + cassandra_servers = ['localhost'] + cassandra_keyspace = 'celery' + cassandra_table = 'tasks' + cassandra_read_consistency = 'ONE' + cassandra_write_consistency = 'ONE' + cassandra_entry_ttl = 86400 -.. _`pycassa connection pool`: http://pycassa.github.com/pycassa/api/pycassa/pool.html +.. _conf-elasticsearch-result-backend: + +Elasticsearch backend settings +------------------------------ + +To use `Elasticsearch`_ as the result backend you simply need to +configure the :setting:`result_backend` setting with the correct URL. Example configuration ~~~~~~~~~~~~~~~~~~~~~ .. code-block:: python - CASSANDRA_SERVERS = ['localhost:9160'] - CASSANDRA_KEYSPACE = 'celery' - CASSANDRA_COLUMN_FAMILY = 'task_results' - CASSANDRA_READ_CONSISTENCY = 'ONE' - CASSANDRA_WRITE_CONSISTENCY = 'ONE' - CASSANDRA_DETAILED_MODE = True - CASSANDRA_OPTIONS = { - 'timeout': 300, - 'max_retries': 10 - } + result_backend = 'elasticsearch://example.com:9200/index_name/doc_type' + +.. _conf-riak-result-backend: + +Riak backend settings +--------------------- + +.. note:: + + The Riak backend requires the :mod:`riak` library: + http://pypi.python.org/pypi/riak/ + To install the riak package use `pip` or `easy_install`: + + .. code-block:: console + + $ pip install riak + +This backend requires the :setting:`result_backend` +setting to be set to a Riak URL:: + + result_backend = 'riak://host:port/bucket' + +For example:: + + result_backend = 'riak://localhost/celery + +which is the same as:: + + result_backend = 'riak://' + +The fields of the URL are defined as follows: + +- *host* + +Host name or IP address of the Riak server. e.g. `'localhost'`. + +- *port* + +Port to the Riak server using the protobuf protocol. Default is 8087. + +- *bucket* + +Bucket name to use. Default is `celery`. +The bucket needs to be a string with ascii characters only. + +Altenatively, this backend can be configured with the following configuration directives. + +.. setting:: riak_backend_settings + +riak_backend_settings +~~~~~~~~~~~~~~~~~~~~~ + +This is a dict supporting the following keys: + +* host + The host name of the Riak server. Defaults to "localhost". + +* port + The port the Riak server is listening to. Defaults to 8087. + +* bucket + The bucket name to connect to. Defaults to "celery". + +* protocol + The protocol to use to connect to the Riak server. This is not configurable + via :setting:`result_backend` .. _conf-ironcache-result-backend: @@ -641,13 +1100,13 @@ IronCache backend settings To install the iron_celery package use `pip` or `easy_install`: - .. code-block:: bash + .. code-block:: console $ pip install iron_celery -IronCache is configured via the URL provided in :setting:`CELERY_RESULT_BACKEND`, for example:: +IronCache is configured via the URL provided in :setting:`result_backend`, for example:: - CELERY_RESULT_BACKEND = 'ironcache://project_id:token@' + result_backend = 'ironcache://project_id:token@' Or to change the cache name:: @@ -655,7 +1114,6 @@ Or to change the cache name:: For more information, see: https://github.com/iron-io/iron_celery - .. _conf-couchbase-result-backend: Couchbase backend settings @@ -668,20 +1126,19 @@ Couchbase backend settings To install the couchbase package use `pip` or `easy_install`: - .. code-block:: bash + .. code-block:: console $ pip install couchbase -This backend can be configured via the :setting:`CELERY_RESULT_BACKEND` +This backend can be configured via the :setting:`result_backend` set to a couchbase URL:: - CELERY_RESULT_BACKEND = 'couchbase://username:password@host:port/bucket' + result_backend = 'couchbase://username:password@host:port/bucket' +.. setting:: couchbase_backend_settings -.. setting:: CELERY_COUCHBASE_BACKEND_SETTINGS - -CELERY_COUCHBASE_BACKEND_SETTINGS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +couchbase_backend_settings +~~~~~~~~~~~~~~~~~~~~~~~~~~ This is a dict supporting the following keys: @@ -701,559 +1158,602 @@ This is a dict supporting the following keys: * password Password to authenticate to the Couchbase server (optional). +.. _conf-couchdb-result-backend: -.. _conf-messaging: +CouchDB backend settings +------------------------ -Message Routing ---------------- +.. note:: -.. _conf-messaging-routing: + The CouchDB backend requires the :mod:`pycouchdb` library: + https://pypi.python.org/pypi/pycouchdb -.. setting:: CELERY_QUEUES + To install the couchbase package use `pip` or `easy_install`: -CELERY_QUEUES -~~~~~~~~~~~~~ + .. code-block:: console -The mapping of queues the worker consumes from. This is a dictionary -of queue name/options. See :ref:`guide-routing` for more information. + $ pip install pycouchdb -The default is a queue/exchange/binding key of ``celery``, with -exchange type ``direct``. +This backend can be configured via the :setting:`result_backend` +set to a couchdb URL:: -You don't have to care about this unless you want custom routing facilities. + result_backend = 'couchdb://username:password@host:port/container' -.. setting:: CELERY_ROUTES +The URL is formed out of the following parts: -CELERY_ROUTES -~~~~~~~~~~~~~ +* username + User name to authenticate to the CouchDB server as (optional). -A list of routers, or a single router used to route tasks to queues. -When deciding the final destination of a task the routers are consulted -in order. See :ref:`routers` for more information. +* password + Password to authenticate to the CouchDB server (optional). -.. setting:: CELERY_QUEUE_HA_POLICY +* host + Host name of the CouchDB server. Defaults to ``localhost``. -CELERY_QUEUE_HA_POLICY -~~~~~~~~~~~~~~~~~~~~~~ -:brokers: RabbitMQ +* port + The port the CouchDB server is listening to. Defaults to ``8091``. -This will set the default HA policy for a queue, and the value -can either be a string (usually ``all``): +* container + The default container the CouchDB server is writing to. + Defaults to ``default``. -.. code-block:: python +.. _conf-amqp-result-backend: - CELERY_QUEUE_HA_POLICY = 'all' +AMQP backend settings +--------------------- -Using 'all' will replicate the queue to all current nodes, -Or you can give it a list of nodes to replicate to: +.. admonition:: Do not use in production. -.. code-block:: python + This is the old AMQP result backend that creates one queue per task, + if you want to send results back as message please consider using the + RPC backend instead, or if you need the results to be persistent + use a result backend designed for that purpose (e.g. Redis, or a database). - CELERY_QUEUE_HA_POLICY = ['rabbit@host1', 'rabbit@host2'] +.. note:: + The AMQP backend requires RabbitMQ 1.1.0 or higher to automatically + expire results. If you are running an older version of RabbitMQ + you should disable result expiration like this: -Using a list will implicitly set ``x-ha-policy`` to 'nodes' and -``x-ha-policy-params`` to the given list of nodes. + result_expires = None -See http://www.rabbitmq.com/ha.html for more information. +.. setting:: result_exchange -.. setting:: CELERY_WORKER_DIRECT +result_exchange +~~~~~~~~~~~~~~~ -CELERY_WORKER_DIRECT -~~~~~~~~~~~~~~~~~~~~ +Name of the exchange to publish results in. Default is `celeryresults`. -This option enables so that every worker has a dedicated queue, -so that tasks can be routed to specific workers. +.. setting:: result_exchange_type -The queue name for each worker is automatically generated based on -the worker hostname and a ``.dq`` suffix, using the ``C.dq`` exchange. +result_exchange_type +~~~~~~~~~~~~~~~~~~~~ -For example the queue name for the worker with node name ``w1@example.com`` -becomes:: +The exchange type of the result exchange. Default is to use a `direct` +exchange. - w1@example.com.dq +result_persistent +~~~~~~~~~~~~~~~~~ -Then you can route the task to the task by specifying the hostname -as the routing key and the ``C.dq`` exchange:: +If set to :const:`True`, result messages will be persistent. This means the +messages will not be lost after a broker restart. The default is for the +results to be transient. - CELERY_ROUTES = { - 'tasks.add': {'exchange': 'C.dq', 'routing_key': 'w1@example.com'} - } +Example configuration +~~~~~~~~~~~~~~~~~~~~~ -.. setting:: CELERY_CREATE_MISSING_QUEUES +.. code-block:: python -CELERY_CREATE_MISSING_QUEUES -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result_backend = 'amqp' + result_expires = 18000 # 5 hours. -If enabled (default), any queues specified that are not defined in -:setting:`CELERY_QUEUES` will be automatically created. See -:ref:`routing-automatic`. +.. _conf-filesystem-result-backend: -.. setting:: CELERY_DEFAULT_QUEUE +Filesystem backend settings +--------------------------- -CELERY_DEFAULT_QUEUE -~~~~~~~~~~~~~~~~~~~~ +This backend can be configured using a file URL, for example:: -The name of the default queue used by `.apply_async` if the message has -no route or no custom queue has been specified. + CELERY_RESULT_BACKEND = 'file:///var/celery/results' +The configured directory needs to be shared and writeable by all servers using +the backend. -This queue must be listed in :setting:`CELERY_QUEUES`. -If :setting:`CELERY_QUEUES` is not specified then it is automatically -created containing one queue entry, where this name is used as the name of -that queue. +If you are trying Celery on a single system you can simply use the backend +without any further configuration. For larger clusters you could use NFS, +`GlusterFS`_, CIFS, `HDFS`_ (using FUSE) or any other filesystem. -The default is: `celery`. +.. _`GlusterFS`: http://www.gluster.org/ +.. _`HDFS`: http://hadoop.apache.org/ -.. seealso:: - :ref:`routing-changing-default-queue` +.. _conf-messaging: -.. setting:: CELERY_DEFAULT_EXCHANGE +Message Routing +--------------- -CELERY_DEFAULT_EXCHANGE -~~~~~~~~~~~~~~~~~~~~~~~ +.. _conf-messaging-routing: -Name of the default exchange to use when no custom exchange is -specified for a key in the :setting:`CELERY_QUEUES` setting. +.. setting:: task_queues -The default is: `celery`. +task_queues +~~~~~~~~~~~ -.. setting:: CELERY_DEFAULT_EXCHANGE_TYPE +Most users will not want to specify this setting and should rather use +the :ref:`automatic routing facilities `. -CELERY_DEFAULT_EXCHANGE_TYPE -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If you really want to configure advanced routing, this setting should +be a list of :class:`kombu.Queue` objects the worker will consume from. -Default exchange type used when no custom exchange type is specified -for a key in the :setting:`CELERY_QUEUES` setting. -The default is: `direct`. +Note that workers can be overriden this setting via the `-Q` option, +or individual queues from this list (by name) can be excluded using +the `-X` option. -.. setting:: CELERY_DEFAULT_ROUTING_KEY +Also see :ref:`routing-basics` for more information. -CELERY_DEFAULT_ROUTING_KEY -~~~~~~~~~~~~~~~~~~~~~~~~~~ +The default is a queue/exchange/binding key of ``celery``, with +exchange type ``direct``. -The default routing key used when no custom routing key -is specified for a key in the :setting:`CELERY_QUEUES` setting. +See also :setting:`task_routes` -The default is: `celery`. +.. setting:: task_routes -.. setting:: CELERY_DEFAULT_DELIVERY_MODE +task_routes +~~~~~~~~~~~~~ -CELERY_DEFAULT_DELIVERY_MODE -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +A list of routers, or a single router used to route tasks to queues. +When deciding the final destination of a task the routers are consulted +in order. -Can be `transient` or `persistent`. The default is to send -persistent messages. +A router can be specified as either: -.. _conf-broker-settings: +* A router class instance. +* A string which provides the path to a router class +* A dict containing router specification: + Will be converted to a :class:`celery.routes.MapRoute` instance. +* A list of ``(pattern, route)`` tuples: + Will be converted to a :class:`celery.routes.MapRoute` instance. -Broker Settings ---------------- +Examples: -.. setting:: CELERY_ACCEPT_CONTENT +.. code-block:: python -CELERY_ACCEPT_CONTENT -~~~~~~~~~~~~~~~~~~~~~ + task_routes = { + 'celery.ping': 'default', + 'mytasks.add': 'cpu-bound', + 'feed.tasks.*': 'feeds', # <-- glob pattern + re.compile(r'(image|video)\.tasks\..*'): 'media', # <-- regex + 'video.encode': { + 'queue': 'video', + 'exchange': 'media' + 'routing_key': 'media.video.encode', + }, + } -A whitelist of content-types/serializers to allow. + task_routes = ('myapp.tasks.Router', {'celery.ping': 'default}) -If a message is received that is not in this list then -the message will be discarded with an error. +Where ``myapp.tasks.Router`` could be: -By default any content type is enabled (including pickle and yaml) -so make sure untrusted parties do not have access to your broker. -See :ref:`guide-security` for more. +.. code-block:: python -Example:: + class Router(object): - # using serializer name - CELERY_ACCEPT_CONTENT = ['json'] + def route_for_task(self, task, args=None, kwargs=None): + if task == 'celery.ping': + return {'queue': 'default'} - # or the actual content-type (MIME) - CELERY_ACCEPT_CONTENT = ['application/json'] +``route_for_task`` may return a string or a dict. A string then means +it's a queue name in :setting:`task_queues`, a dict means it's a custom route. -.. setting:: BROKER_FAILOVER_STRATEGY +When sending tasks, the routers are consulted in order. The first +router that doesn't return ``None`` is the route to use. The message options +is then merged with the found route settings, where the routers settings +have priority. -BROKER_FAILOVER_STRATEGY -~~~~~~~~~~~~~~~~~~~~~~~~ +Example if :func:`~celery.execute.apply_async` has these arguments: -Default failover strategy for the broker Connection object. If supplied, -may map to a key in 'kombu.connection.failover_strategies', or be a reference -to any method that yields a single item from a supplied list. +.. code-block:: python -Example:: + Task.apply_async(immediate=False, exchange='video', + routing_key='video.compress') - # Random failover strategy - def random_failover_strategy(servers): - it = list(it) # don't modify callers list - shuffle = random.shuffle - for _ in repeat(None): - shuffle(it) - yield it[0] +and a router returns: - BROKER_FAILOVER_STRATEGY=random_failover_strategy +.. code-block:: python -.. setting:: BROKER_TRANSPORT + {'immediate': True, 'exchange': 'urgent'} -BROKER_FAILOVER_STRATEGY -~~~~~~~~~~~~~~~~~~~~~~~~ +the final message options will be: -Default failover strategy for the broker Connection object. If supplied, -may map to a key in 'kombu.connection.failover_strategies', or be a reference -to any method that yields a single item from a supplied list. +.. code-block:: python -Example:: + immediate=True, exchange='urgent', routing_key='video.compress' - # Random failover strategy - def random_failover_strategy(servers): - it = list(it) # don't modify callers list - shuffle = random.shuffle - for _ in repeat(None): - shuffle(it) - yield it[0] +(and any default message options defined in the +:class:`~celery.task.base.Task` class) - BROKER_FAILOVER_STRATEGY=random_failover_strategy +Values defined in :setting:`task_routes` have precedence over values defined in +:setting:`task_queues` when merging the two. +With the follow settings: -BROKER_TRANSPORT -~~~~~~~~~~~~~~~~ -:Aliases: ``BROKER_BACKEND`` -:Deprecated aliases: ``CARROT_BACKEND`` +.. code-block:: python -.. setting:: BROKER_URL + task_queues = { + 'cpubound': { + 'exchange': 'cpubound', + 'routing_key': 'cpubound', + }, + } -BROKER_URL -~~~~~~~~~~ + task_routes = { + 'tasks.add': { + 'queue': 'cpubound', + 'routing_key': 'tasks.add', + 'serializer': 'json', + }, + } -Default broker URL. This must be an URL in the form of:: +The final routing options for ``tasks.add`` will become: - transport://userid:password@hostname:port/virtual_host +.. code-block:: javascript -Only the scheme part (``transport://``) is required, the rest -is optional, and defaults to the specific transports default values. + {'exchange': 'cpubound', + 'routing_key': 'tasks.add', + 'serializer': 'json'} -The transport part is the broker implementation to use, and the -default is ``amqp``, which uses ``librabbitmq`` by default or falls back to -``pyamqp`` if that is not installed. Also there are many other choices including -``redis``, ``beanstalk``, ``sqlalchemy``, ``django``, ``mongodb``, -``couchdb``. -It can also be a fully qualified path to your own transport implementation. +See :ref:`routers` for more examples. -See :ref:`kombu:connection-urls` in the Kombu documentation for more -information. +.. setting:: task_queue_ha_policy -.. setting:: BROKER_HEARTBEAT +task_queue_ha_policy +~~~~~~~~~~~~~~~~~~~~ +:brokers: RabbitMQ -BROKER_HEARTBEAT -~~~~~~~~~~~~~~~~ -:transports supported: ``pyamqp`` +This will set the default HA policy for a queue, and the value +can either be a string (usually ``all``): -It's not always possible to detect connection loss in a timely -manner using TCP/IP alone, so AMQP defines something called heartbeats -that's is used both by the client and the broker to detect if -a connection was closed. +.. code-block:: python -Hartbeats are disabled by default. + task_queue_ha_policy = 'all' -If the heartbeat value is 10 seconds, then -the heartbeat will be monitored at the interval specified -by the :setting:`BROKER_HEARTBEAT_CHECKRATE` setting, which by default is -double the rate of the heartbeat value -(so for the default 10 seconds, the heartbeat is checked every 5 seconds). +Using 'all' will replicate the queue to all current nodes, +Or you can give it a list of nodes to replicate to: -.. setting:: BROKER_HEARTBEAT_CHECKRATE +.. code-block:: python -BROKER_HEARTBEAT_CHECKRATE -~~~~~~~~~~~~~~~~~~~~~~~~~~ -:transports supported: ``pyamqp`` + task_queue_ha_policy = ['rabbit@host1', 'rabbit@host2'] -At intervals the worker will monitor that the broker has not missed -too many heartbeats. The rate at which this is checked is calculated -by dividing the :setting:`BROKER_HEARTBEAT` value with this value, -so if the heartbeat is 10.0 and the rate is the default 2.0, the check -will be performed every 5 seconds (twice the heartbeat sending rate). +Using a list will implicitly set ``x-ha-policy`` to 'nodes' and +``x-ha-policy-params`` to the given list of nodes. -.. setting:: BROKER_USE_SSL +See http://www.rabbitmq.com/ha.html for more information. -BROKER_USE_SSL -~~~~~~~~~~~~~~ +.. setting:: worker_direct -Use SSL to connect to the broker. Off by default. This may not be supported -by all transports. +worker_direct +~~~~~~~~~~~~~ -.. setting:: BROKER_POOL_LIMIT +This option enables so that every worker has a dedicated queue, +so that tasks can be routed to specific workers. -BROKER_POOL_LIMIT -~~~~~~~~~~~~~~~~~ +The queue name for each worker is automatically generated based on +the worker hostname and a ``.dq`` suffix, using the ``C.dq`` exchange. -.. versionadded:: 2.3 +For example the queue name for the worker with node name ``w1@example.com`` +becomes:: -The maximum number of connections that can be open in the connection pool. + w1@example.com.dq -The pool is enabled by default since version 2.5, with a default limit of ten -connections. This number can be tweaked depending on the number of -threads/greenthreads (eventlet/gevent) using a connection. For example -running eventlet with 1000 greenlets that use a connection to the broker, -contention can arise and you should consider increasing the limit. +Then you can route the task to the task by specifying the hostname +as the routing key and the ``C.dq`` exchange:: -If set to :const:`None` or 0 the connection pool will be disabled and -connections will be established and closed for every use. + task_routes = { + 'tasks.add': {'exchange': 'C.dq', 'routing_key': 'w1@example.com'} + } -Default (since 2.5) is to use a pool of 10 connections. +.. setting:: task_create_missing_queues -.. setting:: BROKER_CONNECTION_TIMEOUT +task_create_missing_queues +~~~~~~~~~~~~~~~~~~~~~~~~~~ -BROKER_CONNECTION_TIMEOUT -~~~~~~~~~~~~~~~~~~~~~~~~~ +If enabled (default), any queues specified that are not defined in +:setting:`task_queues` will be automatically created. See +:ref:`routing-automatic`. -The default timeout in seconds before we give up establishing a connection -to the AMQP server. Default is 4 seconds. +.. setting:: task_default_queue -.. setting:: BROKER_CONNECTION_RETRY +task_default_queue +~~~~~~~~~~~~~~~~~~ -BROKER_CONNECTION_RETRY -~~~~~~~~~~~~~~~~~~~~~~~ +The name of the default queue used by `.apply_async` if the message has +no route or no custom queue has been specified. -Automatically try to re-establish the connection to the AMQP broker if lost. +This queue must be listed in :setting:`task_queues`. +If :setting:`task_queues` is not specified then it is automatically +created containing one queue entry, where this name is used as the name of +that queue. -The time between retries is increased for each retry, and is -not exhausted before :setting:`BROKER_CONNECTION_MAX_RETRIES` is -exceeded. +The default is: `celery`. -This behavior is on by default. +.. seealso:: -.. setting:: BROKER_CONNECTION_MAX_RETRIES + :ref:`routing-changing-default-queue` -BROKER_CONNECTION_MAX_RETRIES -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. setting:: task_default_exchange -Maximum number of retries before we give up re-establishing a connection -to the AMQP broker. +task_default_exchange +~~~~~~~~~~~~~~~~~~~~~ -If this is set to :const:`0` or :const:`None`, we will retry forever. +Name of the default exchange to use when no custom exchange is +specified for a key in the :setting:`task_queues` setting. -Default is 100 retries. +The default is: `celery`. -.. setting:: BROKER_LOGIN_METHOD +.. setting:: task_default_exchange_type -BROKER_LOGIN_METHOD -~~~~~~~~~~~~~~~~~~~ +task_default_exchange_type +~~~~~~~~~~~~~~~~~~~~~~~~~~ -Set custom amqp login method, default is ``AMQPLAIN``. +Default exchange type used when no custom exchange type is specified +for a key in the :setting:`task_queues` setting. +The default is: `direct`. -.. setting:: BROKER_TRANSPORT_OPTIONS +.. setting:: task_default_routing_key -BROKER_TRANSPORT_OPTIONS +task_default_routing_key ~~~~~~~~~~~~~~~~~~~~~~~~ -.. versionadded:: 2.2 +The default routing key used when no custom routing key +is specified for a key in the :setting:`task_queues` setting. -A dict of additional options passed to the underlying transport. +The default is: `celery`. -See your transport user manual for supported options (if any). +.. setting:: task_default_delivery_mode -Example setting the visibility timeout (supported by Redis and SQS -transports): +task_default_delivery_mode +~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. code-block:: python +Can be `transient` or `persistent`. The default is to send +persistent messages. - BROKER_TRANSPORT_OPTIONS = {'visibility_timeout': 18000} # 5 hours +.. _conf-broker-settings: -.. _conf-task-execution: +Broker Settings +--------------- -Task execution settings ------------------------ +.. setting:: broker_url -.. setting:: CELERY_ALWAYS_EAGER +broker_url +~~~~~~~~~~ -CELERY_ALWAYS_EAGER -~~~~~~~~~~~~~~~~~~~ +Default broker URL. This must be an URL in the form of:: -If this is :const:`True`, all tasks will be executed locally by blocking until -the task returns. ``apply_async()`` and ``Task.delay()`` will return -an :class:`~celery.result.EagerResult` instance, which emulates the API -and behavior of :class:`~celery.result.AsyncResult`, except the result -is already evaluated. + transport://userid:password@hostname:port/virtual_host -That is, tasks will be executed locally instead of being sent to -the queue. +Only the scheme part (``transport://``) is required, the rest +is optional, and defaults to the specific transports default values. -.. setting:: CELERY_EAGER_PROPAGATES_EXCEPTIONS +The transport part is the broker implementation to use, and the +default is ``amqp``, which uses ``librabbitmq`` by default or falls back to +``pyamqp`` if that is not installed. Also there are many other choices including +``redis``, ``beanstalk``, ``sqlalchemy``, ``django``, ``mongodb``, +``couchdb``. +It can also be a fully qualified path to your own transport implementation. -CELERY_EAGER_PROPAGATES_EXCEPTIONS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +More than broker URL, of the same transport, can also be specified. +The broker URLs can be passed in as a single string that is semicolon delimited:: -If this is :const:`True`, eagerly executed tasks (applied by `task.apply()`, -or when the :setting:`CELERY_ALWAYS_EAGER` setting is enabled), will -propagate exceptions. + broker_url = 'transport://userid:password@hostname:port//;transport://userid:password@hostname:port//' -It's the same as always running ``apply()`` with ``throw=True``. +Or as a list:: -.. setting:: CELERY_IGNORE_RESULT + broker_url = [ + 'transport://userid:password@localhost:port//', + 'transport://userid:password@hostname:port//' + ] -CELERY_IGNORE_RESULT -~~~~~~~~~~~~~~~~~~~~ +The brokers will then be used in the :setting:`broker_failover_strategy`. -Whether to store the task return values or not (tombstones). -If you still want to store errors, just not successful return values, -you can set :setting:`CELERY_STORE_ERRORS_EVEN_IF_IGNORED`. +See :ref:`kombu:connection-urls` in the Kombu documentation for more +information. -.. setting:: CELERY_MESSAGE_COMPRESSION +.. setting:: broker_read_url -CELERY_MESSAGE_COMPRESSION -~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. setting:: broker_write_url -Default compression used for task messages. -Can be ``gzip``, ``bzip2`` (if available), or any custom -compression schemes registered in the Kombu compression registry. +broker_read_url / broker_write_url +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +These settings can be configured, instead of :setting:`broker_url` to specify +different connection parameters for broker connections used for consuming and +producing. + +Example:: + + broker_read_url = 'amqp://user:pass@broker.example.com:56721' + broker_write_url = 'amqp://user:pass@broker.example.com:56722' + +Both options can also be specified as a list for failover alternates, see +:setting:`broker_url` for more information. + +.. setting:: broker_failover_strategy + +broker_failover_strategy +~~~~~~~~~~~~~~~~~~~~~~~~ + +Default failover strategy for the broker Connection object. If supplied, +may map to a key in 'kombu.connection.failover_strategies', or be a reference +to any method that yields a single item from a supplied list. -The default is to send uncompressed messages. +Example:: -.. setting:: CELERY_TASK_RESULT_EXPIRES + # Random failover strategy + def random_failover_strategy(servers): + it = list(it) # don't modify callers list + shuffle = random.shuffle + for _ in repeat(None): + shuffle(it) + yield it[0] -CELERY_TASK_RESULT_EXPIRES -~~~~~~~~~~~~~~~~~~~~~~~~~~ + broker_failover_strategy = random_failover_strategy -Time (in seconds, or a :class:`~datetime.timedelta` object) for when after -stored task tombstones will be deleted. +.. setting:: broker_heartbeat -A built-in periodic task will delete the results after this time -(:class:`celery.task.backend_cleanup`). +broker_heartbeat +~~~~~~~~~~~~~~~~ +:transports supported: ``pyamqp`` -A value of :const:`None` or 0 means results will never expire (depending -on backend specifications). +It's not always possible to detect connection loss in a timely +manner using TCP/IP alone, so AMQP defines something called heartbeats +that's is used both by the client and the broker to detect if +a connection was closed. -Default is to expire after 1 day. +Heartbeats are disabled by default. -.. note:: +If the heartbeat value is 10 seconds, then +the heartbeat will be monitored at the interval specified +by the :setting:`broker_heartbeat_checkrate` setting, which by default is +double the rate of the heartbeat value +(so for the default 10 seconds, the heartbeat is checked every 5 seconds). - For the moment this only works with the amqp, database, cache, redis and MongoDB - backends. +.. setting:: broker_heartbeat_checkrate - When using the database or MongoDB backends, `celery beat` must be - running for the results to be expired. +broker_heartbeat_checkrate +~~~~~~~~~~~~~~~~~~~~~~~~~~ +:transports supported: ``pyamqp`` -.. setting:: CELERY_MAX_CACHED_RESULTS +At intervals the worker will monitor that the broker has not missed +too many heartbeats. The rate at which this is checked is calculated +by dividing the :setting:`broker_heartbeat` value with this value, +so if the heartbeat is 10.0 and the rate is the default 2.0, the check +will be performed every 5 seconds (twice the heartbeat sending rate). -CELERY_MAX_CACHED_RESULTS -~~~~~~~~~~~~~~~~~~~~~~~~~ +.. setting:: broker_use_ssl -Result backends caches ready results used by the client. +broker_use_ssl +~~~~~~~~~~~~~~ +:transports supported: ``pyamqp``, ``redis`` -This is the total number of results to cache before older results are evicted. -The default is 5000. 0 or None means no limit, and a value of :const:`-1` -will disable the cache. +Toggles SSL usage on broker connection and SSL settings. -.. setting:: CELERY_CHORD_PROPAGATES +If ``True`` the connection will use SSL with default SSL settings. +If set to a dict, will configure SSL connection according to the specified +policy. The format used is python `ssl.wrap_socket() +options `_. -CELERY_CHORD_PROPAGATES -~~~~~~~~~~~~~~~~~~~~~~~ +Default is ``False`` (no SSL). -.. versionadded:: 3.0.14 +Note that SSL socket is generally served on a separate port by the broker. -This setting defines what happens when a task part of a chord raises an -exception: +Example providing a client cert and validating the server cert against a custom +certificate authority: -- If propagate is True the chord callback will change state to FAILURE - with the exception value set to a :exc:`~@ChordError` - instance containing information about the error and the task that failed. +.. code-block:: python - This is the default behavior in Celery 3.1+ + import ssl -- If propagate is False the exception value will instead be forwarded - to the chord callback. + broker_use_ssl = { + 'keyfile': '/var/ssl/private/worker-key.pem', + 'certfile': '/var/ssl/amqp-server-cert.pem', + 'ca_certs': '/var/ssl/myca.pem', + 'cert_reqs': ssl.CERT_REQUIRED + } - This was the default behavior before version 3.1. +.. warning:: -.. setting:: CELERY_TRACK_STARTED + Be careful using ``broker_use_ssl=True``. It is possible that your default + configuration will not validate the server cert at all. Please read Python + `ssl module security + considerations `_. -CELERY_TRACK_STARTED -~~~~~~~~~~~~~~~~~~~~ +.. setting:: broker_pool_limit -If :const:`True` the task will report its status as "started" when the -task is executed by a worker. The default value is :const:`False` as -the normal behaviour is to not report that level of granularity. Tasks -are either pending, finished, or waiting to be retried. Having a "started" -state can be useful for when there are long running tasks and there is a -need to report which task is currently running. +broker_pool_limit +~~~~~~~~~~~~~~~~~ -.. setting:: CELERY_TASK_SERIALIZER +.. versionadded:: 2.3 -CELERY_TASK_SERIALIZER -~~~~~~~~~~~~~~~~~~~~~~ +The maximum number of connections that can be open in the connection pool. -A string identifying the default serialization method to use. Can be -`pickle` (default), `json`, `yaml`, `msgpack` or any custom serialization -methods that have been registered with :mod:`kombu.serialization.registry`. +The pool is enabled by default since version 2.5, with a default limit of ten +connections. This number can be tweaked depending on the number of +threads/greenthreads (eventlet/gevent) using a connection. For example +running eventlet with 1000 greenlets that use a connection to the broker, +contention can arise and you should consider increasing the limit. -.. seealso:: +If set to :const:`None` or 0 the connection pool will be disabled and +connections will be established and closed for every use. - :ref:`calling-serializers`. +Default (since 2.5) is to use a pool of 10 connections. -.. setting:: CELERY_TASK_PUBLISH_RETRY +.. setting:: broker_connection_timeout -CELERY_TASK_PUBLISH_RETRY +broker_connection_timeout ~~~~~~~~~~~~~~~~~~~~~~~~~ -.. versionadded:: 2.2 +The default timeout in seconds before we give up establishing a connection +to the AMQP server. Default is 4 seconds. -Decides if publishing task messages will be retried in the case -of connection loss or other connection errors. -See also :setting:`CELERY_TASK_PUBLISH_RETRY_POLICY`. +.. setting:: broker_connection_retry -Enabled by default. +broker_connection_retry +~~~~~~~~~~~~~~~~~~~~~~~ -.. setting:: CELERY_TASK_PUBLISH_RETRY_POLICY +Automatically try to re-establish the connection to the AMQP broker if lost. -CELERY_TASK_PUBLISH_RETRY_POLICY -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The time between retries is increased for each retry, and is +not exhausted before :setting:`broker_connection_max_retries` is +exceeded. -.. versionadded:: 2.2 +This behavior is on by default. -Defines the default policy when retrying publishing a task message in -the case of connection loss or other connection errors. +.. setting:: broker_connection_max_retries -See :ref:`calling-retry` for more information. +broker_connection_max_retries +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. setting:: CELERY_DEFAULT_RATE_LIMIT +Maximum number of retries before we give up re-establishing a connection +to the AMQP broker. -CELERY_DEFAULT_RATE_LIMIT -~~~~~~~~~~~~~~~~~~~~~~~~~ +If this is set to :const:`0` or :const:`None`, we will retry forever. -The global default rate limit for tasks. +Default is 100 retries. -This value is used for tasks that does not have a custom rate limit -The default is no rate limit. +.. setting:: broker_login_method -.. setting:: CELERY_DISABLE_RATE_LIMITS +broker_login_method +~~~~~~~~~~~~~~~~~~~ -CELERY_DISABLE_RATE_LIMITS -~~~~~~~~~~~~~~~~~~~~~~~~~~ +Set custom amqp login method, default is ``AMQPLAIN``. -Disable all rate limits, even if tasks has explicit rate limits set. +.. setting:: broker_transport_options -.. setting:: CELERY_ACKS_LATE +broker_transport_options +~~~~~~~~~~~~~~~~~~~~~~~~ -CELERY_ACKS_LATE -~~~~~~~~~~~~~~~~ +.. versionadded:: 2.2 -Late ack means the task messages will be acknowledged **after** the task -has been executed, not *just before*, which is the default behavior. +A dict of additional options passed to the underlying transport. -.. seealso:: +See your transport user manual for supported options (if any). - FAQ: :ref:`faq-acks_late-vs-retry`. +Example setting the visibility timeout (supported by Redis and SQS +transports): + +.. code-block:: python + + broker_transport_options = {'visibility_timeout': 18000} # 5 hours .. _conf-worker: Worker ------ -.. setting:: CELERY_IMPORTS +.. setting:: imports -CELERY_IMPORTS -~~~~~~~~~~~~~~ +imports +~~~~~~~ A sequence of modules to import when the worker starts. @@ -1262,39 +1762,61 @@ to import signal handlers and additional remote control commands, etc. The modules will be imported in the original order. -.. setting:: CELERY_INCLUDE +.. setting:: include -CELERY_INCLUDE -~~~~~~~~~~~~~~ +include +~~~~~~~ -Exact same semantics as :setting:`CELERY_IMPORTS`, but can be used as a means +Exact same semantics as :setting:`imports`, but can be used as a means to have different import categories. The modules in this setting are imported after the modules in -:setting:`CELERY_IMPORTS`. +:setting:`imports`. -.. setting:: CELERYD_FORCE_EXECV +.. _conf-concurrency: -CELERYD_FORCE_EXECV -~~~~~~~~~~~~~~~~~~~ +.. setting:: worker_concurrency -On Unix the prefork pool will fork, so that child processes -start with the same memory as the parent process. +worker_concurrency +~~~~~~~~~~~~~~~~~~ -This can cause problems as there is a known deadlock condition -with pthread locking primitives when `fork()` is combined with threads. +The number of concurrent worker processes/threads/green threads executing +tasks. -You should enable this setting if you are experiencing hangs (deadlocks), -especially in combination with time limits or having a max tasks per child limit. +If you're doing mostly I/O you can have more processes, +but if mostly CPU-bound, try to keep it close to the +number of CPUs on your machine. If not set, the number of CPUs/cores +on the host will be used. -This option will be enabled by default in a later version. +Defaults to the number of available CPUs. -This is not a problem on Windows, as it does not have `fork()`. +.. setting:: worker_prefetch_multiplier -.. setting:: CELERYD_WORKER_LOST_WAIT +worker_prefetch_multiplier +~~~~~~~~~~~~~~~~~~~~~~~~~~ -CELERYD_WORKER_LOST_WAIT -~~~~~~~~~~~~~~~~~~~~~~~~ +How many messages to prefetch at a time multiplied by the number of +concurrent processes. The default is 4 (four messages for each +process). The default setting is usually a good choice, however -- if you +have very long running tasks waiting in the queue and you have to start the +workers, note that the first worker to start will receive four times the +number of messages initially. Thus the tasks may not be fairly distributed +to the workers. + +To disable prefetching, set :setting:`worker_prefetch_multiplier` to 1. +Changing that setting to 0 will allow the worker to keep consuming +as many messages as it wants. + +For more on prefetching, read :ref:`optimizing-prefetch-limit` + +.. note:: + + Tasks with ETA/countdown are not affected by prefetch limits. + +.. setting:: worker_lost_wait + +worker_lost_wait +~~~~~~~~~~~~~~~~ In some cases a worker may be killed without proper cleanup, and the worker may have published a result before terminating. @@ -1303,58 +1825,36 @@ raising a :exc:`@WorkerLostError` exception. Default is 10.0 -.. setting:: CELERYD_MAX_TASKS_PER_CHILD +.. setting:: worker_max_tasks_per_child -CELERYD_MAX_TASKS_PER_CHILD +worker_max_tasks_per_child ~~~~~~~~~~~~~~~~~~~~~~~~~~~ Maximum number of tasks a pool worker process can execute before it's replaced with a new one. Default is no limit. -.. setting:: CELERYD_TASK_TIME_LIMIT - -CELERYD_TASK_TIME_LIMIT -~~~~~~~~~~~~~~~~~~~~~~~ - -Task hard time limit in seconds. The worker processing the task will -be killed and replaced with a new one when this is exceeded. - -.. setting:: CELERYD_TASK_SOFT_TIME_LIMIT - -CELERYD_TASK_SOFT_TIME_LIMIT -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Task soft time limit in seconds. - -The :exc:`~@SoftTimeLimitExceeded` exception will be -raised when this is exceeded. The task can catch this to -e.g. clean up before the hard time limit comes. - -Example: - -.. code-block:: python +.. setting:: worker_max_memory_per_child - from celery.exceptions import SoftTimeLimitExceeded +worker_max_memory_per_child +~~~~~~~~~~~~~~~~~~~~~~~~~~~ - @app.task - def mytask(): - try: - return do_work() - except SoftTimeLimitExceeded: - cleanup_in_a_hurry() +Maximum amount of resident memory that may be consumed by a +worker before it will be replaced by a new worker. If a single +task causes a worker to exceed this limit, the task will be +completed, and the worker will be replaced afterwards. Default: +no limit. -.. setting:: CELERY_STORE_ERRORS_EVEN_IF_IGNORED +.. setting:: worker_disable_rate_limits -CELERY_STORE_ERRORS_EVEN_IF_IGNORED -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +worker_disable_rate_limits +~~~~~~~~~~~~~~~~~~~~~~~~~~ -If set, the worker stores all task errors in the result store even if -:attr:`Task.ignore_result ` is on. +Disable all rate limits, even if tasks has explicit rate limits set. -.. setting:: CELERYD_STATE_DB +.. setting:: worker_state_db -CELERYD_STATE_DB -~~~~~~~~~~~~~~~~ +worker_state_db +~~~~~~~~~~~~~~~ Name of the file used to stores persistent worker state (like revoked tasks). Can be a relative or absolute path, but be aware that the suffix `.db` @@ -1365,10 +1865,10 @@ Can also be set via the :option:`--statedb` argument to Not enabled by default. -.. setting:: CELERYD_TIMER_PRECISION +.. setting:: worker_timer_precision -CELERYD_TIMER_PRECISION -~~~~~~~~~~~~~~~~~~~~~~~ +worker_timer_precision +~~~~~~~~~~~~~~~~~~~~~~ Set the maximum time in seconds that the ETA scheduler can sleep between rechecking the schedule. Default is 1 second. @@ -1376,94 +1876,92 @@ rechecking the schedule. Default is 1 second. Setting this value to 1 second means the schedulers precision will be 1 second. If you need near millisecond precision you can set this to 0.1. -.. setting:: CELERY_ENABLE_REMOTE_CONTROL +.. setting:: worker_enable_remote_control -CELERY_ENABLE_REMOTE_CONTROL +worker_enable_remote_control ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Specify if remote control of the workers is enabled. Default is :const:`True`. - .. _conf-error-mails: Error E-Mails ------------- -.. setting:: CELERY_SEND_TASK_ERROR_EMAILS +.. setting:: task_send_error_emails -CELERY_SEND_TASK_ERROR_EMAILS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +task_send_error_emails +~~~~~~~~~~~~~~~~~~~~~~ The default value for the `Task.send_error_emails` attribute, which if set to :const:`True` means errors occurring during task execution will be -sent to :setting:`ADMINS` by email. +sent to :setting:`admins` by email. Disabled by default. -.. setting:: ADMINS +.. setting:: admins -ADMINS +admins ~~~~~~ List of `(name, email_address)` tuples for the administrators that should receive error emails. -.. setting:: SERVER_EMAIL +.. setting:: server_email -SERVER_EMAIL +server_email ~~~~~~~~~~~~ The email address this worker sends emails from. Default is celery@localhost. -.. setting:: EMAIL_HOST +.. setting:: email_host -EMAIL_HOST +email_host ~~~~~~~~~~ The mail server to use. Default is ``localhost``. -.. setting:: EMAIL_HOST_USER +.. setting:: email_host_user -EMAIL_HOST_USER +email_host_user ~~~~~~~~~~~~~~~ User name (if required) to log on to the mail server with. -.. setting:: EMAIL_HOST_PASSWORD +.. setting:: email_host_password -EMAIL_HOST_PASSWORD +email_host_password ~~~~~~~~~~~~~~~~~~~ Password (if required) to log on to the mail server with. -.. setting:: EMAIL_PORT +.. setting:: email_port -EMAIL_PORT +email_port ~~~~~~~~~~ The port the mail server is listening on. Default is `25`. +.. setting:: email_use_ssl -.. setting:: EMAIL_USE_SSL - -EMAIL_USE_SSL +email_use_ssl ~~~~~~~~~~~~~ Use SSL when connecting to the SMTP server. Disabled by default. -.. setting:: EMAIL_USE_TLS +.. setting:: email_use_tls -EMAIL_USE_TLS +email_use_tls ~~~~~~~~~~~~~ Use TLS when connecting to the SMTP server. Disabled by default. -.. setting:: EMAIL_TIMEOUT +.. setting:: email_timeout -EMAIL_TIMEOUT +email_timeout ~~~~~~~~~~~~~ Timeout in seconds for when we give up trying to connect @@ -1471,6 +1969,14 @@ to the SMTP server when sending emails. The default is 2 seconds. +.. setting:: email_charset + +email_charset +~~~~~~~~~~~~~ +.. versionadded:: 4.0 + +Charset for outgoing emails. Default is 'utf-8'. + .. _conf-example-error-mail-config: Example E-Mail configuration @@ -1482,39 +1988,40 @@ george@vandelay.com and kramer@vandelay.com: .. code-block:: python # Enables error emails. - CELERY_SEND_TASK_ERROR_EMAILS = True + task_send_error_emails = True # Name and email addresses of recipients - ADMINS = ( + admins = ( ('George Costanza', 'george@vandelay.com'), ('Cosmo Kramer', 'kosmo@vandelay.com'), ) # Email address used as sender (From field). - SERVER_EMAIL = 'no-reply@vandelay.com' + server_email = 'no-reply@vandelay.com' # Mailserver configuration - EMAIL_HOST = 'mail.vandelay.com' - EMAIL_PORT = 25 - # EMAIL_HOST_USER = 'servers' - # EMAIL_HOST_PASSWORD = 's3cr3t' + email_host = 'mail.vandelay.com' + email_port = 25 + # email_host_user = 'servers' + # email_host_password = 's3cr3t' .. _conf-events: Events ------ -.. setting:: CELERY_SEND_EVENTS +.. setting:: worker_send_task_events -CELERY_SEND_EVENTS -~~~~~~~~~~~~~~~~~~ +worker_send_task_events +~~~~~~~~~~~~~~~~~~~~~~~ -Send events so the worker can be monitored by tools like `celerymon`. +Send task-related events so that tasks can be monitored using tools like +`flower`. Sets the default value for the workers :option:`-E` argument. -.. setting:: CELERY_SEND_TASK_SENT_EVENT +.. setting:: task_send_sent_event -CELERY_SEND_TASK_SENT_EVENT -~~~~~~~~~~~~~~~~~~~~~~~~~~~ +task_send_sent_event +~~~~~~~~~~~~~~~~~~~~ .. versionadded:: 2.2 @@ -1523,9 +2030,9 @@ tracked before they are consumed by a worker. Disabled by default. -.. setting:: CELERY_EVENT_QUEUE_TTL +.. setting:: event_queue_ttl -CELERY_EVENT_QUEUE_TTL +event_queue_ttl ~~~~~~~~~~~~~~~~~~~~~~ :transports supported: ``amqp`` @@ -1537,83 +2044,50 @@ will be deleted after 10 seconds. Disabled by default. -.. setting:: CELERY_EVENT_QUEUE_EXPIRES +.. setting:: event_queue_expires -CELERY_EVENT_QUEUE_EXPIRES -~~~~~~~~~~~~~~~~~~~~~~~~~~ +event_queue_expires +~~~~~~~~~~~~~~~~~~~ :transports supported: ``amqp`` - -Expiry time in seconds (int/float) for when a monitor clients +Expiry time in seconds (int/float) for when after a monitor clients event queue will be deleted (``x-expires``). Default is never, relying on the queue autodelete setting. -.. setting:: CELERY_EVENT_SERIALIZER +.. setting:: event_serializer -CELERY_EVENT_SERIALIZER -~~~~~~~~~~~~~~~~~~~~~~~ +event_serializer +~~~~~~~~~~~~~~~~ Message serialization format used when sending event messages. Default is ``json``. See :ref:`calling-serializers`. -.. _conf-broadcast: - -Broadcast Commands ------------------- - -.. setting:: CELERY_BROADCAST_QUEUE - -CELERY_BROADCAST_QUEUE -~~~~~~~~~~~~~~~~~~~~~~ - -Name prefix for the queue used when listening for broadcast messages. -The workers host name will be appended to the prefix to create the final -queue name. - -Default is ``celeryctl``. - -.. setting:: CELERY_BROADCAST_EXCHANGE - -CELERY_BROADCAST_EXCHANGE -~~~~~~~~~~~~~~~~~~~~~~~~~ - -Name of the exchange used for broadcast messages. - -Default is ``celeryctl``. - -.. setting:: CELERY_BROADCAST_EXCHANGE_TYPE - -CELERY_BROADCAST_EXCHANGE_TYPE -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Exchange type used for broadcast messages. Default is ``fanout``. - .. _conf-logging: Logging ------- -.. setting:: CELERYD_HIJACK_ROOT_LOGGER +.. setting:: worker_hijack_root_logger -CELERYD_HIJACK_ROOT_LOGGER -~~~~~~~~~~~~~~~~~~~~~~~~~~ +worker_hijack_root_logger +~~~~~~~~~~~~~~~~~~~~~~~~~ .. versionadded:: 2.2 By default any previously configured handlers on the root logger will be removed. If you want to customize your own logging handlers, then you can disable this behavior by setting -`CELERYD_HIJACK_ROOT_LOGGER = False`. +`worker_hijack_root_logger = False`. .. note:: Logging can also be customized by connecting to the :signal:`celery.signals.setup_logging` signal. -.. setting:: CELERYD_LOG_COLOR +.. setting:: worker_log_color -CELERYD_LOG_COLOR +worker_log_color ~~~~~~~~~~~~~~~~~ Enables/disables colors in logging output by the Celery apps. @@ -1623,25 +2097,26 @@ By default colors are enabled if 1) the app is logging to a real terminal, and not a file. 2) the app is not running on Windows. -.. setting:: CELERYD_LOG_FORMAT +.. setting:: worker_log_format -CELERYD_LOG_FORMAT -~~~~~~~~~~~~~~~~~~ +worker_log_format +~~~~~~~~~~~~~~~~~ The format to use for log messages. -Default is `[%(asctime)s: %(levelname)s/%(processName)s] %(message)s` +Default is:: + + [%(asctime)s: %(levelname)s/%(processName)s] %(message)s See the Python :mod:`logging` module for more information about log formats. -.. setting:: CELERYD_TASK_LOG_FORMAT +.. setting:: worker_task_log_format -CELERYD_TASK_LOG_FORMAT -~~~~~~~~~~~~~~~~~~~~~~~ +worker_task_log_format +~~~~~~~~~~~~~~~~~~~~~~ -The format to use for log messages logged in tasks. Can be overridden using -the :option:`--loglevel` option to :mod:`~celery.bin.worker`. +The format to use for log messages logged in tasks. Default is:: @@ -1651,9 +2126,9 @@ Default is:: See the Python :mod:`logging` module for more information about log formats. -.. setting:: CELERY_REDIRECT_STDOUTS +.. setting:: worker_redirect_stdouts -CELERY_REDIRECT_STDOUTS +worker_redirect_stdouts ~~~~~~~~~~~~~~~~~~~~~~~ If enabled `stdout` and `stderr` will be redirected @@ -1662,9 +2137,9 @@ to the current logger. Enabled by default. Used by :program:`celery worker` and :program:`celery beat`. -.. setting:: CELERY_REDIRECT_STDOUTS_LEVEL +.. setting:: worker_redirect_stdouts_level -CELERY_REDIRECT_STDOUTS_LEVEL +worker_redirect_stdouts_level ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The log level output to `stdout` and `stderr` is logged as. @@ -1678,30 +2153,30 @@ Default is :const:`WARNING`. Security -------- -.. setting:: CELERY_SECURITY_KEY +.. setting:: security_key -CELERY_SECURITY_KEY -~~~~~~~~~~~~~~~~~~~ +security_key +~~~~~~~~~~~~ .. versionadded:: 2.5 The relative or absolute path to a file containing the private key used to sign messages when :ref:`message-signing` is used. -.. setting:: CELERY_SECURITY_CERTIFICATE +.. setting:: security_certificate -CELERY_SECURITY_CERTIFICATE -~~~~~~~~~~~~~~~~~~~~~~~~~~~ +security_certificate +~~~~~~~~~~~~~~~~~~~~ .. versionadded:: 2.5 The relative or absolute path to an X.509 certificate file used to sign messages when :ref:`message-signing` is used. -.. setting:: CELERY_SECURITY_CERT_STORE +.. setting:: security_cert_store -CELERY_SECURITY_CERT_STORE -~~~~~~~~~~~~~~~~~~~~~~~~~~ +security_cert_store +~~~~~~~~~~~~~~~~~~~ .. versionadded:: 2.5 @@ -1714,35 +2189,36 @@ The directory containing X.509 certificates used for Custom Component Classes (advanced) ----------------------------------- -.. setting:: CELERYD_POOL +.. setting:: worker_pool -CELERYD_POOL -~~~~~~~~~~~~ +worker_pool +~~~~~~~~~~~ Name of the pool class used by the worker. .. admonition:: Eventlet/Gevent Never use this option to select the eventlet or gevent pool. - You must use the `-P` option instead, otherwise the monkey patching - will happen too late and things will break in strange and silent ways. + You must use the `-P` option to :program:`celery worker` instead, to + ensure the monkey patches are not applied too late, causing things + to break in strange ways. Default is ``celery.concurrency.prefork:TaskPool``. -.. setting:: CELERYD_POOL_RESTARTS +.. setting:: worker_pool_restarts -CELERYD_POOL_RESTARTS -~~~~~~~~~~~~~~~~~~~~~ +worker_pool_restarts +~~~~~~~~~~~~~~~~~~~~ If enabled the worker pool can be restarted using the :control:`pool_restart` remote control command. Disabled by default. -.. setting:: CELERYD_AUTOSCALER +.. setting:: worker_autoscaler -CELERYD_AUTOSCALER -~~~~~~~~~~~~~~~~~~ +worker_autoscaler +~~~~~~~~~~~~~~~~~ .. versionadded:: 2.2 @@ -1750,60 +2226,60 @@ Name of the autoscaler class to use. Default is ``celery.worker.autoscale:Autoscaler``. -.. setting:: CELERYD_AUTORELOADER +.. setting:: worker_autoreloader -CELERYD_AUTORELOADER -~~~~~~~~~~~~~~~~~~~~ +worker_autoreloader +~~~~~~~~~~~~~~~~~~~ Name of the autoreloader class used by the worker to reload Python modules and files that have changed. Default is: ``celery.worker.autoreload:Autoreloader``. -.. setting:: CELERYD_CONSUMER +.. setting:: worker_consumer -CELERYD_CONSUMER -~~~~~~~~~~~~~~~~ +worker_consumer +~~~~~~~~~~~~~~~ Name of the consumer class used by the worker. Default is :class:`celery.worker.consumer.Consumer` -.. setting:: CELERYD_TIMER +.. setting:: worker_timer -CELERYD_TIMER -~~~~~~~~~~~~~~~~~~~~~ +worker_timer +~~~~~~~~~~~~ Name of the ETA scheduler class used by the worker. -Default is :class:`celery.utils.timer2.Timer`, or one overrided +Default is :class:`kombu.async.hub.timer.Timer`, or one overrided by the pool implementation. .. _conf-celerybeat: -Periodic Task Server: celery beat ---------------------------------- +Beat Settings (:program:`celery beat`) +-------------------------------------- -.. setting:: CELERYBEAT_SCHEDULE +.. setting:: beat_schedule -CELERYBEAT_SCHEDULE -~~~~~~~~~~~~~~~~~~~ +beat_schedule +~~~~~~~~~~~~~ The periodic task schedule used by :mod:`~celery.bin.beat`. See :ref:`beat-entries`. -.. setting:: CELERYBEAT_SCHEDULER +.. setting:: beat_scheduler -CELERYBEAT_SCHEDULER -~~~~~~~~~~~~~~~~~~~~ +beat_scheduler +~~~~~~~~~~~~~~ The default scheduler class. Default is ``celery.beat:PersistentScheduler``. Can also be set via the :option:`-S` argument to :mod:`~celery.bin.beat`. -.. setting:: CELERYBEAT_SCHEDULE_FILENAME +.. setting:: beat_schedule_filename -CELERYBEAT_SCHEDULE_FILENAME -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +beat_schedule_filename +~~~~~~~~~~~~~~~~~~~~~~ Name of the file used by `PersistentScheduler` to store the last run times of periodic tasks. Can be a relative or absolute path, but be aware that the @@ -1812,10 +2288,10 @@ suffix `.db` may be appended to the file name (depending on Python version). Can also be set via the :option:`--schedule` argument to :mod:`~celery.bin.beat`. -.. setting:: CELERYBEAT_SYNC_EVERY +.. setting:: beat_sync_every -CELERYBEAT_SYNC_EVERY -~~~~~~~~~~~~~~~~~~~~~ +beat_sync_every +~~~~~~~~~~~~~~~ The number of periodic tasks that can be called before another database sync is issued. @@ -1823,10 +2299,10 @@ Defaults to 0 (sync based on timing - default of 3 minutes as determined by scheduler.sync_every). If set to 1, beat will call sync after every task message sent. -.. setting:: CELERYBEAT_MAX_LOOP_INTERVAL +.. setting:: beat_max_loop_interval -CELERYBEAT_MAX_LOOP_INTERVAL -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +beat_max_loop_interval +~~~~~~~~~~~~~~~~~~~~~~ The maximum number of seconds :mod:`~celery.bin.beat` can sleep between checking the schedule. @@ -1840,22 +2316,3 @@ changes to the schedule into account. Also when running celery beat embedded (:option:`-B`) on Jython as a thread the max interval is overridden and set to 1 so that it's possible to shut down in a timely manner. - - -.. _conf-celerymon: - -Monitor Server: celerymon -------------------------- - - -.. setting:: CELERYMON_LOG_FORMAT - -CELERYMON_LOG_FORMAT -~~~~~~~~~~~~~~~~~~~~ - -The format to use for log messages. - -Default is `[%(asctime)s: %(levelname)s/%(processName)s] %(message)s` - -See the Python :mod:`logging` module for more information about log -formats. diff --git a/docs/contributing.rst b/docs/contributing.rst index 0bb7693de..438bf9891 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -187,7 +187,7 @@ the developers fix the bug. A bug could be fixed by some other improvements and fixes - it might not have an existing report in the bug tracker. Make sure you're using the latest releases of -celery, billiard and kombu. +celery, billiard, kombu, amqp and vine. 5) **Collect information about the bug.** @@ -214,7 +214,7 @@ spelling or other errors on the website/docs/code. D) Include the output from the `celery report` command: - .. code-block:: bash + .. code-block:: console $ celery -A proj report @@ -247,6 +247,7 @@ issue tracker. * Celery: http://github.com/celery/celery/issues/ * Kombu: http://github.com/celery/kombu/issues * pyamqp: http://github.com/celery/pyamqp/issues +* vine: http://github.com/celery/vine/issues * librabbitmq: http://github.com/celery/librabbitmq/issues * Django-Celery: http://github.com/celery/django-celery/issues @@ -402,14 +403,14 @@ is in the Github Guide: `Fork a Repo`_. After you have cloned the repository you should checkout your copy to a directory on your machine: -.. code-block:: bash +.. code-block:: console $ git clone git@github.com:username/celery.git When the repository is cloned enter the directory to set up easy access to upstream changes: -.. code-block:: bash +.. code-block:: console $ cd celery $ git remote add upstream git://github.com/celery/celery.git @@ -418,7 +419,7 @@ to upstream changes: If you need to pull in new changes from upstream you should always use the :option:`--rebase` option to ``git pull``: -.. code-block:: bash +.. code-block:: console git pull --rebase upstream master @@ -446,16 +447,25 @@ To run the Celery test suite you need to install a few dependencies. A complete list of the dependencies needed are located in :file:`requirements/test.txt`. -Installing the test requirements: +If you're working on the development version, then you need to +install the development requirements first: -.. code-block:: bash +.. code-block:: console + + $ pip install -U -r requirements/dev.txt + +Both the stable and the development version have testing related +dependencies, so install these next: + +.. code-block:: console $ pip install -U -r requirements/test.txt + $ pip install -U -r requirements/default.txt -When installation of dependencies is complete you can execute +After installing the dependencies required, you can now execute the test suite by calling ``nosetests``: -.. code-block:: bash +.. code-block:: console $ nosetests @@ -480,7 +490,7 @@ Some useful options to :program:`nosetests` are: If you want to run the tests for a single test file only you can do so like this: -.. code-block:: bash +.. code-block:: console $ nosetests celery.tests.test_worker.test_worker_job @@ -510,13 +520,13 @@ To calculate test coverage you must first install the :mod:`coverage` module. Installing the :mod:`coverage` module: -.. code-block:: bash +.. code-block:: console $ pip install -U coverage Code coverage in HTML: -.. code-block:: bash +.. code-block:: console $ nosetests --with-coverage --cover-html @@ -525,7 +535,7 @@ The coverage output will then be located at Code coverage in XML (Cobertura-style): -.. code-block:: bash +.. code-block:: console $ nosetests --with-coverage --cover-xml --cover-xml-file=coverage.xml @@ -541,16 +551,16 @@ distribution. To run the tests for all supported Python versions simply execute: -.. code-block:: bash +.. code-block:: console $ tox If you only want to test specific Python versions use the :option:`-e` option: -.. code-block:: bash +.. code-block:: console - $ tox -e py26 + $ tox -e 2.7 Building the documentation -------------------------- @@ -558,14 +568,14 @@ Building the documentation To build the documentation you need to install the dependencies listed in :file:`requirements/docs.txt`: -.. code-block:: bash +.. code-block:: console $ pip install -U -r requirements/docs.txt After these dependencies are installed you should be able to build the docs by running: -.. code-block:: bash +.. code-block:: console $ cd docs $ rm -rf .build @@ -584,7 +594,7 @@ can be found in :file:`requirements/pkgutils.txt`. Installing the dependencies: -.. code-block:: bash +.. code-block:: console $ pip install -U -r requirements/pkgutils.txt @@ -594,16 +604,16 @@ pyflakes & PEP8 To ensure that your changes conform to PEP8 and to run pyflakes execute: -.. code-block:: bash +.. code-block:: console - $ paver flake8 + $ make flakecheck -To not return a negative exit code when this command fails use the -:option:`-E` option, this can be convenient while developing: +To not return a negative exit code when this command fails use +the ``flakes`` target instead: -.. code-block:: bash +.. code-block:: console - $ paver flake8 -E + $ make flakes§ API reference ~~~~~~~~~~~~~ @@ -611,10 +621,10 @@ API reference To make sure that all modules have a corresponding section in the API reference please execute: -.. code-block:: bash +.. code-block:: console - $ paver autodoc - $ paver verifyindex + $ make apicheck + $ make indexcheck If files are missing you can add them by copying an existing reference file. @@ -628,24 +638,24 @@ and this module is considered part of the public API, use the following steps: Use an existing file as a template: -.. code-block:: bash +.. code-block:: console $ cd docs/reference/ $ cp celery.schedules.rst celery.worker.awesome.rst Edit the file using your favorite editor: -.. code-block:: bash +.. code-block:: console $ vim celery.worker.awesome.rst - # change every occurance of ``celery.schedules`` to + # change every occurrence of ``celery.schedules`` to # ``celery.worker.awesome`` Edit the index using your favorite editor: -.. code-block:: bash +.. code-block:: console $ vim index.rst @@ -654,7 +664,7 @@ Edit the index using your favorite editor: Commit your changes: -.. code-block:: bash +.. code-block:: console # Add the file to git $ git add celery.worker.awesome.rst @@ -838,10 +848,10 @@ that require 3rd party libraries must be added. After you've made changes to this file you need to render the distro :file:`README` file: - .. code-block:: bash + .. code-block:: console $ pip install -U requirements/pkgutils.txt - $ paver readme + $ make readme That's all that needs to be done, but remember that if your feature @@ -916,6 +926,7 @@ celery :git: https://github.com/celery/celery :CI: http://travis-ci.org/#!/celery/celery +:Windows-CI: https://ci.appveyor.com/project/ask/celery :PyPI: http://pypi.python.org/pypi/celery :docs: http://docs.celeryproject.org @@ -926,6 +937,7 @@ Messaging library. :git: https://github.com/celery/kombu :CI: http://travis-ci.org/#!/celery/kombu +:Windows-CI: https://ci.appveyor.com/project/ask/kombu :PyPI: http://pypi.python.org/pypi/kombu :docs: http://kombu.readthedocs.org @@ -936,9 +948,21 @@ Python AMQP 0.9.1 client. :git: https://github.com/celery/py-amqp :CI: http://travis-ci.org/#!/celery/py-amqp +:Windows-CI: https://ci.appveyor.com/project/ask/py-amqp :PyPI: http://pypi.python.org/pypi/amqp :docs: http://amqp.readthedocs.org +vine +---- + +Promise/deferred implementation. + +:git: https://github.com/celery/vine/ +:CI: http://travis-ci.org/#!/celery/vine/ +:Windows-CI: https://ci.appveyor.com/project/ask/vine +:PyPI: http://pypi.python.org/pypi/vine +:docs: http://vine.readthedocs.org + billiard -------- @@ -946,6 +970,8 @@ Fork of multiprocessing containing improvements that will eventually be merged into the Python stdlib. :git: https://github.com/celery/billiard +:CI: http://travis-ci.org/#!/celery/billiard/ +:Windows-CI: https://ci.appveyor.com/project/ask/billiard :PyPI: http://pypi.python.org/pypi/billiard librabbitmq @@ -1042,22 +1068,22 @@ The version number must be updated two places: After you have changed these files you must render the :file:`README` files. There is a script to convert sphinx syntax -to generic reStructured Text syntax, and the paver task `readme` +to generic reStructured Text syntax, and the make target `readme` does this for you: -.. code-block:: bash +.. code-block:: console - $ paver readme + $ make readme Now commit the changes: -.. code-block:: bash +.. code-block:: console $ git commit -a -m "Bumps version to X.Y.Z" and make a new version tag: -.. code-block:: bash +.. code-block:: console $ git tag vX.Y.Z $ git push --tags @@ -1067,10 +1093,9 @@ Releasing Commands to make a new public stable release:: - $ paver releaseok # checks pep8, autodoc index, runs tests and more - $ paver removepyc # Remove .pyc files - $ git clean -xdn # Check that there's no left-over files in the repo - $ python setup.py sdist upload # Upload package to PyPI + $ make distcheck # checks pep8, autodoc index, runs tests and more + $ make dist # NOTE: Runs git clean -xdf and removes files not in the repo. + $ python setup.py sdist bdist_wheel upload # Upload package to PyPI If this is a new release series then you also need to do the following: diff --git a/docs/copyright.rst b/docs/copyright.rst index c7b95e8d0..2295029a8 100644 --- a/docs/copyright.rst +++ b/docs/copyright.rst @@ -7,21 +7,22 @@ by Ask Solem .. |copy| unicode:: U+000A9 .. COPYRIGHT SIGN -Copyright |copy| 2009-2013, Ask Solem. +Copyright |copy| 2009-2016, Ask Solem. All rights reserved. This material may be copied or distributed only subject to the terms and conditions set forth in the `Creative Commons -Attribution-Noncommercial-Share Alike 3.0 United States License -`_. You must -give the original author credit. You may not use this work for -commercial purposes. If you alter, transform, or build upon this -work, you may distribute the resulting work only under the same or -similar license to this one. +Attribution-ShareAlike 4.0 International` +`_ license. + +You may share and adapt the material, even for commercial purposes, but +you must give the original author credit. +If you alter, transform, or build upon this +work, you may distribute the resulting work only under the same license or +a license compatible to this one. .. note:: While the *Celery* documentation is offered under the - Creative Commons *attribution-nonconmmercial-share alike 3.0 united - states* license, the Celery *software* is offered under the - less restrictive + Creative Commons *Attribution-ShareAlike 4.0 International* license + the Celery *software* is offered under the `BSD License (3 Clause) `_ diff --git a/docs/django/first-steps-with-django.rst b/docs/django/first-steps-with-django.rst index e25022e85..d033f0741 100644 --- a/docs/django/first-steps-with-django.rst +++ b/docs/django/first-steps-with-django.rst @@ -55,7 +55,7 @@ first we import absolute imports from the future, so that our from __future__ import absolute_import -Then we set the default :envvar:`DJANGO_SETTINGS_MODULE` +Then we set the default :envvar:`DJANGO_SETTINGS_MODULE` environment variable for the :program:`celery` command-line program: .. code-block:: python @@ -76,7 +76,13 @@ but there's probably no reason for that when using Django. We also add the Django settings module as a configuration source for Celery. This means that you don't have to use multiple configuration files, and instead configure Celery directly -from the Django settings. +from the Django settings; but you can also separate them if wanted. + +The uppercase namespace means that all Celery configuration options +must be specified in uppercase instead of lowercase, and start with +``CELERY_``, so e.g. the :setting:`task_always_eager`` setting +becomes ``CELERY_TASK_ALWAYS_EAGER``, and the :setting:`broker_url` +setting becomes ``CELERY_BROKER_URL``. You can pass the object directly here, but using a string is better since then the worker doesn't have to serialize the object when using Windows @@ -84,7 +90,7 @@ or execv: .. code-block:: python - app.config_from_object('django.conf:settings') + app.config_from_object('django.conf:settings', namespace='CELERY') Next, a common practice for reusable apps is to define all tasks in a separate ``tasks.py`` module, and Celery does have a way to @@ -92,20 +98,21 @@ autodiscover these modules: .. code-block:: python - app.autodiscover_tasks(lambda: settings.INSTALLED_APPS) + app.autodiscover_tasks() -With the line above Celery will automatically discover tasks in reusable -apps if you follow the ``tasks.py`` convention:: +With the line above Celery will automatically discover tasks from all +of your installed apps, following the ``tasks.py`` convention:: - app1/ - - app1/tasks.py - - app1/models.py + - tasks.py + - models.py - app2/ - - app2/tasks.py - - app2/models.py + - tasks.py + - models.py + This way you do not have to manually add the individual modules -to the :setting:`CELERY_IMPORTS` setting. The ``lambda`` so that the +to the :setting:`CELERY_IMPORTS ` setting. The ``lambda`` so that the autodiscovery can happen only when needed, and so that importing your module will not evaluate the Django settings object. @@ -136,14 +143,14 @@ concrete app instance: Using the Django ORM/Cache as a result backend. ----------------------------------------------- -The ``django-celery`` library defines result backends that -uses the Django ORM and Django Cache frameworks. +The [``django-celery``](https://github.com/celery/django-celery) library defines +result backends that uses the Django ORM and Django Cache frameworks. To use this with your project you need to follow these four steps: 1. Install the ``django-celery`` library: - .. code-block:: bash + .. code-block:: console $ pip install django-celery @@ -158,13 +165,13 @@ To use this with your project you need to follow these four steps: If you are using south_ for schema migrations, you'll want to: - .. code-block:: bash + .. code-block:: console $ python manage.py migrate djcelery For those who are not using south, a normal ``syncdb`` will work: - .. code-block:: bash + .. code-block:: console $ python manage.py syncdb @@ -175,7 +182,7 @@ To use this with your project you need to follow these four steps: .. code-block:: python app.conf.update( - CELERY_RESULT_BACKEND='djcelery.backends.database:DatabaseBackend', + result_backend='djcelery.backends.database:DatabaseBackend', ) For the cache backend you can use: @@ -183,7 +190,7 @@ To use this with your project you need to follow these four steps: .. code-block:: python app.conf.update( - CELERY_RESULT_BACKEND='djcelery.backends.cache:CacheBackend', + result_backend='djcelery.backends.cache:CacheBackend', ) If you have connected Celery to your Django settings then you can @@ -211,7 +218,7 @@ as a daemon - see :ref:`daemonizing` - but for testing and development it is useful to be able to start a worker instance by using the ``celery worker`` manage command, much as you would use Django's runserver: -.. code-block:: bash +.. code-block:: console $ celery -A proj worker -l info @@ -219,7 +226,7 @@ development it is useful to be able to start a worker instance by using the For a complete listing of the command-line options available, use the help command: -.. code-block:: bash +.. code-block:: console $ celery help diff --git a/docs/faq.rst b/docs/faq.rst index d1cfc0ddf..c374f9748 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -99,7 +99,7 @@ many performance and stability improvements. It is an eventual goal that these improvements will be merged back into Python one day. It is also used for compatibility with older Python versions -that doesn't come with the multiprocessing module. +that don't come with the multiprocessing module. .. _`billiard`: http://pypi.python.org/pypi/billiard @@ -129,22 +129,9 @@ broker this is a natural dependency. .. _`amqp`: http://pypi.python.org/pypi/amqp -- `anyjson`_ - -anyjson is an utility library to select the best possible -JSON implementation. - -.. _`anyjson`: http://pypi.python.org/pypi/anyjson - - .. note:: - For compatibility reasons additional packages may be installed - if you are running on older Python versions, - for example Python 2.6 depends on the ``importlib``, - and ``ordereddict`` libraries. - - Also, to handle the dependencies for popular configuration + To handle the dependencies for popular configuration choices Celery defines a number of "bundle" packages, see :ref:`bundles`. @@ -288,9 +275,16 @@ most systems), it usually contains a message describing the reason. Does it work on FreeBSD? ------------------------ -**Answer:** The prefork pool requires a working POSIX semaphore -implementation which isn't enabled in FreeBSD by default. You have to enable -POSIX semaphores in the kernel and manually recompile multiprocessing. +**Answer:** Depends + +When using the RabbitMQ (AMQP) and Redis transports it should work +out of the box. + +For other transports the compatibility prefork pool is +used which requires a working POSIX semaphore implementation, +this is enabled in FreeBSD by default since FreeBSD 8.x. +For older version of FreeBSD, you have to enable +POSIX semaphores in the kernel and manually recompile billiard. Luckily, Viktor Petersson has written a tutorial to get you started with Celery on FreeBSD here: @@ -312,7 +306,7 @@ Why aren't my tasks processed? **Answer:** With RabbitMQ you can see how many consumers are currently receiving tasks by running the following command: -.. code-block:: bash +.. code-block:: console $ rabbitmqctl list_queues -p name messages consumers Listing queues ... @@ -372,24 +366,24 @@ How do I purge all waiting tasks? **Answer:** You can use the ``celery purge`` command to purge all configured task queues: -.. code-block:: bash +.. code-block:: console - $ celery purge + $ celery -A proj purge or programatically: -.. code-block:: python +.. code-block:: pycon - >>> from celery import current_app as celery - >>> celery.control.purge() + >>> from proj.celery import app + >>> app.control.purge() 1753 If you only want to purge messages from a specific queue you have to use the AMQP API or the :program:`celery amqp` utility: -.. code-block:: bash +.. code-block:: console - $ celery amqp queue.purge + $ celery -A proj amqp queue.purge The number 1753 is the number of messages deleted. @@ -432,7 +426,7 @@ using the tasks current result backend. If you need to specify a custom result backend, or you want to use the current application's default backend you can use -:class:`@Celery.AsyncResult`: +:class:`@AsyncResult`: >>> result = app.AsyncResult(task_id) >>> result.get() @@ -452,19 +446,22 @@ It is essential that you protect against unauthorized access to your broker, databases and other services transmitting pickled data. -For the task messages you can set the :setting:`CELERY_TASK_SERIALIZER` -setting to "json" or "yaml" instead of pickle. There is -currently no alternative solution for task results (but writing a -custom result backend using JSON is a simple task) - Note that this is not just something you should be aware of with Celery, for example also Django uses pickle for its cache client. +For the task messages you can set the :setting:`task_serializer` +setting to "json" or "yaml" instead of pickle. + +Similarly for task results you can set :setting:`result_serializer`. + +For more details of the formats used and the lookup order when +checking which format to use for a task see :ref:`calling-serializers` + Can messages be encrypted? -------------------------- **Answer**: Some AMQP brokers supports using SSL (including RabbitMQ). -You can enable this using the :setting:`BROKER_USE_SSL` setting. +You can enable this using the :setting:`broker_use_ssl` setting. It is also possible to add additional encryption and security to messages, if you have a need for this then you should contact the :ref:`mailing-list`. @@ -520,13 +517,13 @@ as a message. If you don't collect these results, they will build up and RabbitMQ will eventually run out of memory. Results expire after 1 day by default. It may be a good idea -to lower this value by configuring the :setting:`CELERY_TASK_RESULT_EXPIRES` +to lower this value by configuring the :setting:`result_expires` setting. If you don't use the results for a task, make sure you set the `ignore_result` option: -.. code-block python +.. code-block:: python @app.task(ignore_result=True) def mytask(): @@ -568,7 +565,7 @@ Tasks How can I reuse the same connection when calling tasks? ------------------------------------------------------- -**Answer**: See the :setting:`BROKER_POOL_LIMIT` setting. +**Answer**: See the :setting:`broker_pool_limit` setting. The connection pool is enabled by default since version 2.5. .. _faq-sudo-subprocess: @@ -594,7 +591,7 @@ Why do workers delete tasks from the queue if they are unable to process them? **Answer**: The worker rejects unknown tasks, messages with encoding errors and messages -that doesn't contain the proper fields (as per the task message protocol). +that don't contain the proper fields (as per the task message protocol). If it did not reject them they could be redelivered again and again, causing a loop. @@ -607,12 +604,11 @@ queue for exchange, so that rejected messages is moved there. Can I call a task by name? ----------------------------- -**Answer**: Yes. Use :func:`celery.execute.send_task`. +**Answer**: Yes. Use :meth:`@send_task`. You can also call a task by name from any language that has an AMQP client. - >>> from celery.execute import send_task - >>> send_task("tasks.add", args=[2, 2], kwargs={}) + >>> app.send_task('tasks.add', args=[2, 2], kwargs={}) .. _faq-get-current-task-id: @@ -693,8 +689,8 @@ Can I cancel the execution of a task? or if you only have the task id:: - >>> from celery import current_app as celery - >>> celery.control.revoke(task_id) + >>> from proj.celery import app + >>> app.control.revoke(task_id) .. _faq-node-not-receiving-broadcast-commands: @@ -709,10 +705,10 @@ control commands will be received in round-robin between them. To work around this you can explicitly set the nodename for every worker using the :option:`-n` argument to :mod:`~celery.bin.worker`: -.. code-block:: bash +.. code-block:: console - $ celery worker -n worker1@%h - $ celery worker -n worker2@%h + $ celery -A proj worker -n worker1@%h + $ celery -A proj worker -n worker2@%h where ``%h`` is automatically expanded into the current hostname. @@ -726,6 +722,21 @@ and a worker can bind to as many queues as it wants. See :doc:`userguide/routing` for more information. +.. _faq-disable-prefetch: + +Can I disable prefetching of tasks? +----------------------------------- + +**Answer**: The term prefetch must have confused you, as as in Celery it's only used +to describe the task prefetching *limits*. + +Disabling the prefetch limits is possible, but that means the worker will +consume as many tasks as it can, as fast as possible. + +A discussion on prefetch limits, and configuration settings for a worker +that only reserves one task at a time is found here: +:ref:`optimizing-prefetch-limit`. + .. _faq-change-periodic-task-interval-at-runtime: Can I change the interval of a periodic task at runtime? @@ -750,13 +761,15 @@ create a new schedule subclass and override Does celery support task priorities? ------------------------------------ -**Answer**: No. In theory, yes, as AMQP supports priorities. However -RabbitMQ doesn't implement them yet. +**Answer**: Yes. -The usual way to prioritize work in Celery, is to route high priority tasks -to different servers. In the real world this may actually work better than per message -priorities. You can use this in combination with rate limiting to achieve a -highly responsive system. +RabbitMQ supports priorities since version 3.5.0. +Redis transport emulates support of priorities. + +You can also prioritize work by routing high priority tasks +to different workers. In the real world this may actually work better +than per message priorities. You can use this in combination with rate +limiting to achieve a responsive system. .. _faq-acks_late-vs-retry: @@ -768,7 +781,7 @@ to use both. `Task.retry` is used to retry tasks, notably for expected errors that is catchable with the `try:` block. The AMQP transaction is not used -for these errors: **if the task raises an exception it is still acknowledged!**. +for these errors: **if the task raises an exception it is still acknowledged!** The `acks_late` setting would be used when you need the task to be executed again if the worker (for some reason) crashes mid-execution. @@ -794,7 +807,7 @@ scenario of course, but you can probably imagine something far more sinister. So for ease of programming we have less reliability; It's a good default, users who require it and know what they are doing can still enable acks_late (and in the future hopefully -use manual acknowledgement) +use manual acknowledgement). In addition `Task.retry` has features not available in AMQP transactions: delay between retries, max retries, etc. @@ -812,18 +825,8 @@ Can I schedule tasks to execute at a specific time? **Answer**: Yes. You can use the `eta` argument of :meth:`Task.apply_async`. -Or to schedule a periodic task at a specific time, use the -:class:`celery.schedules.crontab` schedule behavior: - - -.. code-block:: python - - from celery.schedules import crontab - from celery.task import periodic_task +See also :ref:`guide-beat`. - @periodic_task(run_every=crontab(hour=7, minute=30, day_of_week="mon")) - def every_monday_morning(): - print("This is run every Monday morning at 7:30") .. _faq-safe-worker-shutdown: @@ -835,9 +838,23 @@ executing jobs and shut down as soon as possible. No tasks should be lost. You should never stop :mod:`~celery.bin.worker` with the :sig:`KILL` signal (:option:`-9`), unless you've tried :sig:`TERM` a few times and waited a few -minutes to let it get a chance to shut down. As if you do tasks may be -terminated mid-execution, and they will not be re-run unless you have the -`acks_late` option set (`Task.acks_late` / :setting:`CELERY_ACKS_LATE`). +minutes to let it get a chance to shut down. + +Also make sure you kill the main worker process, not its child processes. +You can direct a kill signal to a specific child process if you know the +process is currently executing a task the worker shutdown is depending on, +but this also means that a ``WorkerLostError`` state will be set for the +task so the task will not run again. + +Identifying the type of process is easier if you have installed the +``setproctitle`` module: + +.. code-block:: console + + $ pip install setproctitle + +With this library installed you will be able to see the type of process in ps +listings, but the worker must be restarted for this to take effect. .. seealso:: diff --git a/docs/getting-started/brokers/beanstalk.rst b/docs/getting-started/brokers/beanstalk.rst index 4854310a0..c31c630b2 100644 --- a/docs/getting-started/brokers/beanstalk.rst +++ b/docs/getting-started/brokers/beanstalk.rst @@ -22,7 +22,7 @@ For the Beanstalk support you have to install additional dependencies. You can install both Celery and these dependencies in one go using the ``celery[beanstalk]`` :ref:`bundle `: -.. code-block:: bash +.. code-block:: console $ pip install -U celery[beanstalk] @@ -34,7 +34,7 @@ Configuration Configuration is easy, set the transport, and configure the location of your Beanstalk database:: - BROKER_URL = 'beanstalk://localhost:11300' + broker_url = 'beanstalk://localhost:11300' Where the URL is in the format of:: diff --git a/docs/getting-started/brokers/couchdb.rst b/docs/getting-started/brokers/couchdb.rst index d731ef061..b7dba3e05 100644 --- a/docs/getting-started/brokers/couchdb.rst +++ b/docs/getting-started/brokers/couchdb.rst @@ -20,7 +20,7 @@ For the CouchDB support you have to install additional dependencies. You can install both Celery and these dependencies in one go using the ``celery[couchdb]`` :ref:`bundle `: -.. code-block:: bash +.. code-block:: console $ pip install -U celery[couchdb] @@ -32,7 +32,7 @@ Configuration Configuration is easy, set the transport, and configure the location of your CouchDB database:: - BROKER_URL = 'couchdb://localhost:5984/database_name' + broker_url = 'couchdb://localhost:5984/database_name' Where the URL is in the format of:: diff --git a/docs/getting-started/brokers/django.rst b/docs/getting-started/brokers/django.rst index d4358d710..df4669ea1 100644 --- a/docs/getting-started/brokers/django.rst +++ b/docs/getting-started/brokers/django.rst @@ -26,15 +26,15 @@ configuration values. #. Set your broker transport:: - BROKER_URL = 'django://' + CELERY_BROKER_URL = 'django://' #. Add :mod:`kombu.transport.django` to `INSTALLED_APPS`:: - INSTALLED_APPS = ('kombu.transport.django', ) + INSTALLED_APPS = ('kombu.transport.django',) #. Sync your database schema: -.. code-block:: bash +.. code-block:: console $ python manage.py syncdb diff --git a/docs/getting-started/brokers/ironmq.rst b/docs/getting-started/brokers/ironmq.rst index 49ddcf46f..4816bebba 100644 --- a/docs/getting-started/brokers/ironmq.rst +++ b/docs/getting-started/brokers/ironmq.rst @@ -11,7 +11,7 @@ Installation For IronMQ support, you'll need the [iron_celery](http://github.com/iron-io/iron_celery) library: -.. code-block:: bash +.. code-block:: console $ pip install iron_celery @@ -31,7 +31,7 @@ First, you'll need to import the iron_celery library right after you import Cele You have to specify IronMQ in the broker URL:: - BROKER_URL = 'ironmq://ABCDEFGHIJKLMNOPQRST:ZYXK7NiynGlTogH8Nj+P9nlE73sq3@' + broker_url = 'ironmq://ABCDEFGHIJKLMNOPQRST:ZYXK7NiynGlTogH8Nj+P9nlE73sq3@' where the URL format is:: diff --git a/docs/getting-started/brokers/mongodb.rst b/docs/getting-started/brokers/mongodb.rst index 394736893..cd4d478b7 100644 --- a/docs/getting-started/brokers/mongodb.rst +++ b/docs/getting-started/brokers/mongodb.rst @@ -20,7 +20,7 @@ For the MongoDB support you have to install additional dependencies. You can install both Celery and these dependencies in one go using the ``celery[mongodb]`` :ref:`bundle `: -.. code-block:: bash +.. code-block:: console $ pip install -U celery[mongodb] @@ -32,7 +32,7 @@ Configuration Configuration is easy, set the transport, and configure the location of your MongoDB database:: - BROKER_URL = 'mongodb://localhost:27017/database_name' + broker_url = 'mongodb://localhost:27017/database_name' Where the URL is in the format of:: diff --git a/docs/getting-started/brokers/rabbitmq.rst b/docs/getting-started/brokers/rabbitmq.rst index 1df16560a..cf2902885 100644 --- a/docs/getting-started/brokers/rabbitmq.rst +++ b/docs/getting-started/brokers/rabbitmq.rst @@ -12,9 +12,11 @@ Installation & Configuration RabbitMQ is the default broker so it does not require any additional dependencies or initial configuration, other than the URL location of -the broker instance you want to use:: +the broker instance you want to use: - >>> BROKER_URL = 'amqp://guest:guest@localhost:5672//' +.. code-block:: python + + broker_url = 'amqp://guest:guest@localhost:5672//' For a description of broker URLs and a full list of the various broker configuration options available to Celery, @@ -46,15 +48,19 @@ Setting up RabbitMQ To use celery we need to create a RabbitMQ user, a virtual host and allow that user access to that virtual host: -.. code-block:: bash +.. code-block:: console $ sudo rabbitmqctl add_user myuser mypassword -.. code-block:: bash +.. code-block:: console $ sudo rabbitmqctl add_vhost myvhost -.. code-block:: bash +.. code-block:: console + + $ sudo rabbitmqctl set_user_tags myuser mytag + +.. code-block:: console $ sudo rabbitmqctl set_permissions -p myvhost myuser ".*" ".*" ".*" @@ -75,13 +81,13 @@ shiny package management system for OS X. First, install homebrew using the one-line command provided by the `Homebrew documentation`_: -.. code-block:: bash +.. code-block:: console ruby -e "$(curl -fsSL https://raw.github.com/Homebrew/homebrew/go/install)" Finally, we can install rabbitmq using :program:`brew`: -.. code-block:: bash +.. code-block:: console $ brew install rabbitmq @@ -90,6 +96,12 @@ Finally, we can install rabbitmq using :program:`brew`: .. _rabbitmq-osx-system-hostname: +After you have installed rabbitmq with brew you need to add the following to your path to be able to start and stop the broker. Add it to your .bash_profile or .profile + +.. code-block:: console + + `PATH=$PATH:/usr/local/sbin` + Configuring the system host name ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -99,7 +111,7 @@ to communicate with nodes. Use the :program:`scutil` command to permanently set your host name: -.. code-block:: bash +.. code-block:: console $ sudo scutil --set HostName myhost.local @@ -111,7 +123,7 @@ back into an IP address:: If you start the rabbitmq server, your rabbit node should now be `rabbit@myhost`, as verified by :program:`rabbitmqctl`: -.. code-block:: bash +.. code-block:: console $ sudo rabbitmqctl status Status of node rabbit@myhost ... @@ -136,21 +148,21 @@ Starting/Stopping the RabbitMQ server To start the server: -.. code-block:: bash +.. code-block:: console $ sudo rabbitmq-server you can also run it in the background by adding the :option:`-detached` option (note: only one dash): -.. code-block:: bash +.. code-block:: console $ sudo rabbitmq-server -detached Never use :program:`kill` to stop the RabbitMQ server, but rather use the :program:`rabbitmqctl` command: -.. code-block:: bash +.. code-block:: console $ sudo rabbitmqctl stop diff --git a/docs/getting-started/brokers/redis.rst b/docs/getting-started/brokers/redis.rst index 6a1d6e31f..c2329efed 100644 --- a/docs/getting-started/brokers/redis.rst +++ b/docs/getting-started/brokers/redis.rst @@ -13,7 +13,7 @@ For the Redis support you have to install additional dependencies. You can install both Celery and these dependencies in one go using the ``celery[redis]`` :ref:`bundle `: -.. code-block:: bash +.. code-block:: console $ pip install -U celery[redis] @@ -25,7 +25,7 @@ Configuration Configuration is easy, just configure the location of your Redis database:: - BROKER_URL = 'redis://localhost:6379/0' + broker_url = 'redis://localhost:6379/0' Where the URL is in the format of:: @@ -34,6 +34,15 @@ Where the URL is in the format of:: all fields after the scheme are optional, and will default to localhost on port 6379, using database 0. +If a unix socket connection should be used, the URL needs to be in the format:: + + redis+socket:///path/to/redis.sock + +Specifying a different database number when using a unix socket is possible +by adding the ``virtual_host`` parameter to the URL:: + + redis+socket:///path/to/redis.sock?virtual_host=db_number + .. _redis-visibility_timeout: Visibility Timeout @@ -43,9 +52,9 @@ The visibility timeout defines the number of seconds to wait for the worker to acknowledge the task before the message is redelivered to another worker. Be sure to see :ref:`redis-caveats` below. -This option is set via the :setting:`BROKER_TRANSPORT_OPTIONS` setting:: +This option is set via the :setting:`broker_transport_options` setting:: - BROKER_TRANSPORT_OPTIONS = {'visibility_timeout': 3600} # 1 hour. + broker_transport_options = {'visibility_timeout': 3600} # 1 hour. The default visibility timeout for Redis is 1 hour. @@ -57,7 +66,7 @@ Results If you also want to store the state and return values of tasks in Redis, you should configure these settings:: - CELERY_RESULT_BACKEND = 'redis://localhost:6379/0' + result_backend = 'redis://localhost:6379/0' For a complete list of options supported by the Redis result backend, see :ref:`conf-redis-result-backend` @@ -69,72 +78,77 @@ Caveats .. _redis-caveat-fanout-prefix: -- Broadcast messages will be seen by all virtual hosts by default. +Fanout prefix +------------- + +Broadcast messages will be seen by all virtual hosts by default. - You have to set a transport option to prefix the messages so that - they will only be received by the active virtual host:: +You have to set a transport option to prefix the messages so that +they will only be received by the active virtual host:: - BROKER_TRANSPORT_OPTIONS = {'fanout_prefix': True} + broker_transport_options = {'fanout_prefix': True} - Note that you will not be able to communicate with workers running older - versions or workers that does not have this setting enabled. +Note that you will not be able to communicate with workers running older +versions or workers that does not have this setting enabled. - This setting will be the default in the future, so better to migrate - sooner rather than later. +This setting will be the default in the future, so better to migrate +sooner rather than later. .. _redis-caveat-fanout-patterns: -- Workers will receive all task related events by default. +Fanout patterns +--------------- - To avoid this you must set the ``fanout_patterns`` fanout option so that - the workers may only subscribe to worker related events:: +Workers will receive all task related events by default. - BROKER_TRANSPORT_OPTIONS = {'fanout_patterns': True} +To avoid this you must set the ``fanout_patterns`` fanout option so that +the workers may only subscribe to worker related events:: - Note that this change is backward incompatible so all workers in the - cluster must have this option enabled, or else they will not be able to - communicate. + broker_transport_options = {'fanout_patterns': True} - This option will be enabled by default in the future. +Note that this change is backward incompatible so all workers in the +cluster must have this option enabled, or else they will not be able to +communicate. -- If a task is not acknowledged within the :ref:`redis-visibility_timeout` - the task will be redelivered to another worker and executed. +This option will be enabled by default in the future. - This causes problems with ETA/countdown/retry tasks where the - time to execute exceeds the visibility timeout; in fact if that - happens it will be executed again, and again in a loop. +Visibility timeout +------------------ - So you have to increase the visibility timeout to match - the time of the longest ETA you are planning to use. +If a task is not acknowledged within the :ref:`redis-visibility_timeout` +the task will be redelivered to another worker and executed. - Note that Celery will redeliver messages at worker shutdown, - so having a long visibility timeout will only delay the redelivery - of 'lost' tasks in the event of a power failure or forcefully terminated - workers. +This causes problems with ETA/countdown/retry tasks where the +time to execute exceeds the visibility timeout; in fact if that +happens it will be executed again, and again in a loop. - Periodic tasks will not be affected by the visibility timeout, - as this is a concept separate from ETA/countdown. +So you have to increase the visibility timeout to match +the time of the longest ETA you are planning to use. - You can increase this timeout by configuring a transport option - with the same name:: +Note that Celery will redeliver messages at worker shutdown, +so having a long visibility timeout will only delay the redelivery +of 'lost' tasks in the event of a power failure or forcefully terminated +workers. - BROKER_TRANSPORT_OPTIONS = {'visibility_timeout': 43200} +Periodic tasks will not be affected by the visibility timeout, +as this is a concept separate from ETA/countdown. - The value must be an int describing the number of seconds. +You can increase this timeout by configuring a transport option +with the same name:: + broker_transport_options = {'visibility_timeout': 43200} -- Monitoring events (as used by flower and other tools) are global - and is not affected by the virtual host setting. +The value must be an int describing the number of seconds. - This is caused by a limitation in Redis. The Redis PUB/SUB channels - are global and not affected by the database number. +Key eviction +------------ -- Redis may evict keys from the database in some situations +Redis may evict keys from the database in some situations - If you experience an error like:: +If you experience an error like:: - InconsistencyError, Probably the key ('_kombu.binding.celery') has been - removed from the Redis database. + InconsistencyError, Probably the key ('_kombu.binding.celery') has been + removed from the Redis database. - you may want to configure the redis-server to not evict keys by setting - the ``timeout`` parameter to 0. +you may want to configure the redis-server to not evict keys by setting +the ``timeout`` parameter to 0 in the redis configuration file. diff --git a/docs/getting-started/brokers/sqlalchemy.rst b/docs/getting-started/brokers/sqlalchemy.rst index 0f8cb7b6a..37f8d7f57 100644 --- a/docs/getting-started/brokers/sqlalchemy.rst +++ b/docs/getting-started/brokers/sqlalchemy.rst @@ -24,34 +24,34 @@ Configuration Celery needs to know the location of your database, which should be the usual SQLAlchemy connection string, but with 'sqla+' prepended to it:: - BROKER_URL = 'sqla+sqlite:///celerydb.sqlite' + broker_url = 'sqla+sqlite:///celerydb.sqlite' -This transport uses only the :setting:`BROKER_URL` setting, which have to be +This transport uses only the :setting:`broker_url` setting, which have to be an SQLAlchemy database URI. Please see `SQLAlchemy: Supported Databases`_ for a table of supported databases. -Here's a list of examples using a selection of other `SQLAlchemy Connection String`_'s: +Here's a list of examples using a selection of other `SQLAlchemy Connection Strings`_: .. code-block:: python # sqlite (filename) - BROKER_URL = 'sqla+sqlite:///celerydb.sqlite' + broker_url = 'sqla+sqlite:///celerydb.sqlite' # mysql - BROKER_URL = 'sqla+mysql://scott:tiger@localhost/foo' + broker_url = 'sqla+mysql://scott:tiger@localhost/foo' # postgresql - BROKER_URL = 'sqla+postgresql://scott:tiger@localhost/mydatabase' + broker_url = 'sqla+postgresql://scott:tiger@localhost/mydatabase' # oracle - BROKER_URL = 'sqla+oracle://scott:tiger@127.0.0.1:1521/sidname' + broker_url = 'sqla+oracle://scott:tiger@127.0.0.1:1521/sidname' .. _`SQLAlchemy: Supported Databases`: http://www.sqlalchemy.org/docs/core/engines.html#supported-databases -.. _`SQLAlchemy Connection String`: +.. _`SQLAlchemy Connection Strings`: http://www.sqlalchemy.org/docs/core/engines.html#database-urls .. _sqlalchemy-results-configuration: diff --git a/docs/getting-started/brokers/sqs.rst b/docs/getting-started/brokers/sqs.rst index 9f2331471..cc44b280f 100644 --- a/docs/getting-started/brokers/sqs.rst +++ b/docs/getting-started/brokers/sqs.rst @@ -18,7 +18,7 @@ Installation For the Amazon SQS support you have to install the `boto`_ library: -.. code-block:: bash +.. code-block:: console $ pip install -U boto @@ -32,7 +32,7 @@ Configuration You have to specify SQS in the broker URL:: - BROKER_URL = 'sqs://ABCDEFGHIJKLMNOPQRST:ZYXK7NiynGlTogH8Nj+P9nlE73sq3@' + broker_url = 'sqs://ABCDEFGHIJKLMNOPQRST:ZYXK7NiynGlTogH8Nj+P9nlE73sq3@' where the URL format is:: @@ -57,9 +57,9 @@ Region ------ The default region is ``us-east-1`` but you can select another region -by configuring the :setting:`BROKER_TRANSPORT_OPTIONS` setting:: +by configuring the :setting:`broker_transport_options` setting:: - BROKER_TRANSPORT_OPTIONS = {'region': 'eu-west-1'} + broker_transport_options = {'region': 'eu-west-1'} .. seealso:: @@ -74,9 +74,9 @@ The visibility timeout defines the number of seconds to wait for the worker to acknowledge the task before the message is redelivered to another worker. Also see caveats below. -This option is set via the :setting:`BROKER_TRANSPORT_OPTIONS` setting:: +This option is set via the :setting:`broker_transport_options` setting:: - BROKER_TRANSPORT_OPTIONS = {'visibility_timeout': 3600} # 1 hour. + broker_transport_options = {'visibility_timeout': 3600} # 1 hour. The default visibility timeout is 30 seconds. @@ -91,10 +91,10 @@ sleep for one second whenever there are no more messages to read. You should note that **more frequent polling is also more expensive, so increasing the polling interval can save you money**. -The polling interval can be set via the :setting:`BROKER_TRANSPORT_OPTIONS` +The polling interval can be set via the :setting:`broker_transport_options` setting:: - BROKER_TRANSPORT_OPTIONS = {'polling_interval': 0.3} + broker_transport_options = {'polling_interval': 0.3} Very frequent polling intervals can cause *busy loops*, which results in the worker using a lot of CPU time. If you need sub-millisecond precision you @@ -106,9 +106,9 @@ Queue Prefix By default Celery will not assign any prefix to the queue names, If you have other services using SQS you can configure it do so -using the :setting:`BROKER_TRANSPORT_OPTIONS` setting:: +using the :setting:`broker_transport_options` setting:: - BROKER_TRANSPORT_OPTIONS = {'queue_name_prefix': 'celery-'} + broker_transport_options = {'queue_name_prefix': 'celery-'} .. _sqs-caveats: @@ -137,7 +137,7 @@ Caveats The maximum visibility timeout supported by AWS as of this writing is 12 hours (43200 seconds):: - BROKER_TRANSPORT_OPTIONS = {'visibility_timeout': 43200} + broker_transport_options = {'visibility_timeout': 43200} - SQS does not yet support worker remote control commands. diff --git a/docs/getting-started/first-steps-with-celery.rst b/docs/getting-started/first-steps-with-celery.rst index c79f5dcbd..661b8bf0c 100644 --- a/docs/getting-started/first-steps-with-celery.rst +++ b/docs/getting-started/first-steps-with-celery.rst @@ -16,7 +16,7 @@ In this tutorial you will learn the absolute basics of using Celery. You will learn about; - Choosing and installing a message transport (broker). -- Installing Celery and creating your first task +- Installing Celery and creating your first task. - Starting the worker and calling tasks. - Keeping track of tasks as they transition through different states, and inspecting return values. @@ -37,7 +37,7 @@ showcase Celery's capabilities. Choosing a Broker ================= -Celery requires a solution to send and receive messages, usually this +Celery requires a solution to send and receive messages; usually this comes in the form of a separate service called a *message broker*. There are several choices available, including: @@ -56,7 +56,7 @@ Detailed information about using RabbitMQ with Celery: If you are using Ubuntu or Debian install RabbitMQ by executing this command: -.. code-block:: bash +.. code-block:: console $ sudo apt-get install rabbitmq-server @@ -111,15 +111,15 @@ Installing Celery Celery is on the Python Package Index (PyPI), so it can be installed with standard Python tools like ``pip`` or ``easy_install``: -.. code-block:: bash +.. code-block:: console $ pip install celery Application =========== -The first thing you need is a Celery instance, this is called the celery -application or just app in short. Since this instance is used as +The first thing you need is a Celery instance, which is called the celery +application or just "app" for short. Since this instance is used as the entry-point for everything you want to do in Celery, like creating tasks and managing workers, it must be possible for other modules to import it. @@ -157,7 +157,7 @@ Running the celery worker server You now run the worker by executing our program with the ``worker`` argument: -.. code-block:: bash +.. code-block:: console $ celery -A tasks worker --loglevel=info @@ -173,13 +173,13 @@ for more information). For a complete listing of the command-line options available, do: -.. code-block:: bash +.. code-block:: console $ celery worker --help There are also several other commands available, and help is also available: -.. code-block:: bash +.. code-block:: console $ celery help @@ -223,14 +223,14 @@ built-in result backends to choose from: `SQLAlchemy`_/`Django`_ ORM, .. _`SQLAlchemy`: http://www.sqlalchemy.org/ .. _`Django`: http://djangoproject.com -For this example you will use the `amqp` result backend, which sends states -as messages. The backend is specified via the ``backend`` argument to -:class:`@Celery`, (or via the :setting:`CELERY_RESULT_BACKEND` setting if +For this example you will use the `rpc` result backend, which sends states +back as transient messages. The backend is specified via the ``backend`` argument to +:class:`@Celery`, (or via the :setting:`task_result_backend` setting if you choose to use a configuration module):: - app = Celery('tasks', backend='amqp', broker='amqp://') + app = Celery('tasks', backend='rpc://', broker='amqp://') -or if you want to use Redis as the result backend, but still use RabbitMQ as +Or if you want to use Redis as the result backend, but still use RabbitMQ as the message broker (a popular combination):: app = Celery('tasks', backend='redis://localhost', broker='amqp://') @@ -275,12 +275,12 @@ See :mod:`celery.result` for the complete result object reference. Configuration ============= -Celery, like a consumer appliance doesn't need much to be operated. +Celery, like a consumer appliance, doesn't need much to be operated. It has an input and an output, where you must connect the input to a broker and maybe the output to a result backend if so wanted. But if you look closely at the back there's a lid revealing loads of sliders, dials and buttons: this is the configuration. -The default configuration should be good enough for most uses, but there's +The default configuration should be good enough for most uses, but there are many things to tweak so Celery works just the way you want it to. Reading about the options available is a good idea to get familiar with what can be configured. You can read about the options in the @@ -289,22 +289,22 @@ can be configured. You can read about the options in the The configuration can be set on the app directly or by using a dedicated configuration module. As an example you can configure the default serializer used for serializing -task payloads by changing the :setting:`CELERY_TASK_SERIALIZER` setting: +task payloads by changing the :setting:`task_serializer` setting: .. code-block:: python - app.conf.CELERY_TASK_SERIALIZER = 'json' + app.conf.task_serializer = 'json' If you are configuring many settings at once you can use ``update``: .. code-block:: python app.conf.update( - CELERY_TASK_SERIALIZER='json', - CELERY_ACCEPT_CONTENT=['json'], # Ignore other content - CELERY_RESULT_SERIALIZER='json', - CELERY_TIMEZONE='Europe/Oslo', - CELERY_ENABLE_UTC=True, + task_serializer='json', + accept_content=['json'], # Ignore other content + result_serializer='json', + timezone='Europe/Oslo', + enable_utc=True, ) For larger projects using a dedicated configuration module is useful, @@ -316,7 +316,7 @@ you can also imagine your SysAdmin making simple changes to the configuration in the event of system trouble. You can tell your Celery instance to use a configuration module, -by calling the :meth:`~@Celery.config_from_object` method: +by calling the :meth:`@config_from_object` method: .. code-block:: python @@ -332,32 +332,32 @@ current directory or on the Python path, it could look like this: .. code-block:: python - BROKER_URL = 'amqp://' - CELERY_RESULT_BACKEND = 'amqp://' + broker_url = 'amqp://' + result_backend = 'rpc://' - CELERY_TASK_SERIALIZER = 'json' - CELERY_RESULT_SERIALIZER = 'json' - CELERY_ACCEPT_CONTENT=['json'] - CELERY_TIMEZONE = 'Europe/Oslo' - CELERY_ENABLE_UTC = True + task_serializer = 'json' + result_serializer = 'json' + accept_content = ['json'] + timezone = 'Europe/Oslo' + enable_utc = True To verify that your configuration file works properly, and doesn't contain any syntax errors, you can try to import it: -.. code-block:: bash +.. code-block:: console $ python -m celeryconfig For a complete reference of configuration options, see :ref:`configuration`. -To demonstrate the power of configuration files, this how you would +To demonstrate the power of configuration files, this is how you would route a misbehaving task to a dedicated queue: :file:`celeryconfig.py`: .. code-block:: python - CELERY_ROUTES = { + task_routes = { 'tasks.add': 'low-priority', } @@ -369,7 +369,7 @@ instead, so that only 10 tasks of this type can be processed in a minute .. code-block:: python - CELERY_ANNOTATIONS = { + task_annotations = { 'tasks.add': {'rate_limit': '10/m'} } @@ -377,14 +377,14 @@ If you are using RabbitMQ or Redis as the broker then you can also direct the workers to set a new rate limit for the task at runtime: -.. code-block:: bash +.. code-block:: console - $ celery control rate_limit tasks.add 10/m + $ celery -A tasks control rate_limit tasks.add 10/m worker@example.com: OK new rate limit set successfully See :ref:`guide-routing` to read more about task routing, -and the :setting:`CELERY_ANNOTATIONS` setting for more about annotations, +and the :setting:`task_annotations` setting for more about annotations, or :ref:`guide-monitoring` for more about remote control commands, and how to monitor what your workers are doing. @@ -411,7 +411,7 @@ Worker does not start: Permission Error A simple workaround is to create a symbolic link: - .. code-block:: bash + .. code-block:: console # ln -s /run/shm /dev/shm @@ -435,7 +435,7 @@ the task id after all). Enabling this option will force the worker to skip updating states. -2) Make sure the :setting:`CELERY_IGNORE_RESULT` setting is not enabled. +2) Make sure the :setting:`task_ignore_result` setting is not enabled. 3) Make sure that you do not have any old workers still running. diff --git a/docs/getting-started/introduction.rst b/docs/getting-started/introduction.rst index afd31a5e4..ad8472497 100644 --- a/docs/getting-started/introduction.rst +++ b/docs/getting-started/introduction.rst @@ -14,19 +14,19 @@ What is a Task Queue? Task queues are used as a mechanism to distribute work across threads or machines. -A task queue's input is a unit of work, called a task, dedicated worker -processes then constantly monitor the queue for new work to perform. +A task queue's input is a unit of work called a task. Dedicated worker +processes constantly monitor task queues for new work to perform. Celery communicates via messages, usually using a broker -to mediate between clients and workers. To initiate a task a client puts a -message on the queue, the broker then delivers the message to a worker. +to mediate between clients and workers. To initiate a task, a client adds a +message to the queue, which the broker then delivers to a worker. A Celery system can consist of multiple workers and brokers, giving way to high availability and horizontal scaling. Celery is written in Python, but the protocol can be implemented in any language. So far there's RCelery_ for the Ruby programming language, -node-celery_ for Node.js and a `PHP client`_, but language interoperability can also be achieved +node-celery_ for Node.js and a `PHP client`_. Language interoperability can also be achieved by :ref:`using webhooks `. .. _RCelery: http://leapfrogdevelopment.github.com/rcelery/ @@ -37,9 +37,9 @@ What do I need? =============== .. sidebar:: Version Requirements - :subtitle: Celery version 3.0 runs on + :subtitle: Celery version 4.0 runs on - - Python ❨2.5, 2.6, 2.7, 3.2, 3.3❩ + - Python ❨2.7, 3.4, 3.5❩ - PyPy ❨1.8, 1.9❩ - Jython ❨2.5, 2.7❩. @@ -134,7 +134,7 @@ Celery is… - AMQP, Redis - memcached, MongoDB - SQLAlchemy, Django ORM - - Apache Cassandra + - Apache Cassandra, IronCache, Elasticsearch - **Serialization** @@ -281,7 +281,7 @@ Quickjump - :ref:`see a list of running workers ` - :ref:`purge all messages ` - :ref:`inspect what the workers are doing ` - - :ref:`see what tasks a worker has registerd ` + - :ref:`see what tasks a worker has registered ` - :ref:`migrate tasks to a new broker ` - :ref:`see a list of event message types ` - :ref:`contribute to Celery ` diff --git a/docs/getting-started/next-steps.rst b/docs/getting-started/next-steps.rst index d25282d16..29cc8ed84 100644 --- a/docs/getting-started/next-steps.rst +++ b/docs/getting-started/next-steps.rst @@ -70,15 +70,15 @@ you simply import this instance. Starting the worker ------------------- -The :program:`celery` program can be used to start the worker: +The :program:`celery` program can be used to start the worker (you need to run the worker in the directory above proj): -.. code-block:: bash +.. code-block:: console - $ celery worker --app=proj -l info + $ celery -A proj worker -l info When the worker starts you should see a banner and some messages:: - -------------- celery@halcyon.local v3.1 (Cipater) + -------------- celery@halcyon.local v4.0 (0today8) ---- **** ----- --- * *** * -- [Configuration] -- * - **** --- . broker: amqp://guest@localhost:5672// @@ -92,7 +92,7 @@ When the worker starts you should see a banner and some messages:: [2012-06-08 16:23:51,078: WARNING/MainProcess] celery@halcyon.local has started. --- The *broker* is the URL you specifed in the broker argument in our ``celery`` +-- The *broker* is the URL you specified in the broker argument in our ``celery`` module, you can also specify a different broker on the command-line by using the :option:`-b` option. @@ -122,13 +122,13 @@ the :ref:`Monitoring and Management guide `. tasks from. The worker can be told to consume from several queues at once, and this is used to route messages to specific workers as a means for Quality of Service, separation of concerns, -and emulating priorities, all described in the :ref:`Routing Guide +and prioritization, all described in the :ref:`Routing Guide `. You can get a complete list of command-line arguments by passing in the `--help` flag: -.. code-block:: bash +.. code-block:: console $ celery worker --help @@ -149,31 +149,31 @@ described in detail in the :ref:`daemonization tutorial `. The daemonization scripts uses the :program:`celery multi` command to start one or more workers in the background: -.. code-block:: bash +.. code-block:: console $ celery multi start w1 -A proj -l info - celery multi v3.1.1 (Cipater) + celery multi v4.0.0 (0today8) > Starting nodes... > w1.halcyon.local: OK You can restart it too: -.. code-block:: bash +.. code-block:: console - $ celery multi restart w1 -A proj -l info - celery multi v3.1.1 (Cipater) + $ celery multi restart w1 -A proj -l info + celery multi v4.0.0 (0today8) > Stopping nodes... > w1.halcyon.local: TERM -> 64024 > Waiting for 1 node..... > w1.halcyon.local: OK > Restarting node w1.halcyon.local: OK - celery multi v3.1.1 (Cipater) + celery multi v4.0.0 (0today8) > Stopping nodes... > w1.halcyon.local: TERM -> 64052 or stop it: -.. code-block:: bash +.. code-block:: console $ celery multi stop w1 -A proj -l info @@ -181,7 +181,7 @@ The ``stop`` command is asynchronous so it will not wait for the worker to shutdown. You will probably want to use the ``stopwait`` command instead which will ensure all currently executing tasks is completed: -.. code-block:: bash +.. code-block:: console $ celery multi stopwait w1 -A proj -l info @@ -196,18 +196,18 @@ By default it will create pid and log files in the current directory, to protect against multiple workers launching on top of each other you are encouraged to put these in a dedicated directory: -.. code-block:: bash +.. code-block:: console $ mkdir -p /var/run/celery $ mkdir -p /var/log/celery $ celery multi start w1 -A proj -l info --pidfile=/var/run/celery/%n.pid \ - --logfile=/var/log/celery/%n.pid + --logfile=/var/log/celery/%n%I.log With the multi command you can start multiple workers, and there is a powerful command-line syntax to specify arguments for different workers too, e.g: -.. code-block:: bash +.. code-block:: console $ celery multi start 10 -A proj -l info -Q:1-3 images,video -Q:4,5 data \ -Q default -L:4,5 debug @@ -250,17 +250,23 @@ for larger projects. Calling Tasks ============= -You can call a task using the :meth:`delay` method:: +You can call a task using the :meth:`delay` method: + +.. code-block:: pycon >>> add.delay(2, 2) This method is actually a star-argument shortcut to another method called -:meth:`apply_async`:: +:meth:`apply_async`: + +.. code-block:: pycon >>> add.apply_async((2, 2)) The latter enables you to specify execution options like the time to run -(countdown), the queue it should be sent to and so on:: +(countdown), the queue it should be sent to and so on: + +.. code-block:: pycon >>> add.apply_async((2, 2), queue='lopri', countdown=10) @@ -268,14 +274,16 @@ In the above example the task will be sent to a queue named ``lopri`` and the task will execute, at the earliest, 10 seconds after the message was sent. Applying the task directly will execute the task in the current process, -so that no message is sent:: +so that no message is sent: + +.. code-block:: pycon >>> add(2, 2) 4 These three methods - :meth:`delay`, :meth:`apply_async`, and applying (``__call__``), represents the Celery calling API, which are also used for -subtasks. +signatures. A more detailed overview of the Calling API can be found in the :ref:`Calling User Guide `. @@ -296,32 +304,43 @@ have. Also note that result backends are not used for monitoring tasks and work for that Celery uses dedicated event messages (see :ref:`guide-monitoring`). If you have a result backend configured you can retrieve the return -value of a task:: +value of a task: + +.. code-block:: pycon >>> res = add.delay(2, 2) >>> res.get(timeout=1) 4 -You can find the task's id by looking at the :attr:`id` attribute:: +You can find the task's id by looking at the :attr:`id` attribute: + +.. code-block:: pycon >>> res.id d6b3aea2-fb9b-4ebc-8da4-848818db9114 You can also inspect the exception and traceback if the task raised an -exception, in fact ``result.get()`` will propagate any errors by default:: +exception, in fact ``result.get()`` will propagate any errors by default: + +.. code-block:: pycon >>> res = add.delay(2) >>> res.get(timeout=1) + +.. code-block:: pytb + Traceback (most recent call last): File "", line 1, in File "/opt/devel/celery/celery/result.py", line 113, in get interval=interval) File "/opt/devel/celery/celery/backends/amqp.py", line 138, in wait_for - raise self.exception_to_python(meta['result']) + raise meta['result'] TypeError: add() takes exactly 2 arguments (1 given) If you don't wish for the errors to propagate then you can disable that -by passing the ``propagate`` argument:: +by passing the ``propagate`` argument: + +.. code-block:: pycon >>> res.get(propagate=False) TypeError('add() takes exactly 2 arguments (1 given)',) @@ -337,7 +356,9 @@ use the corresponding methods on the result instance:: False So how does it know if the task has failed or not? It can find out by looking -at the tasks *state*:: +at the tasks *state*: + +.. code-block:: pycon >>> res.state 'FAILURE' @@ -348,12 +369,14 @@ states. The stages of a typical task can be:: PENDING -> STARTED -> SUCCESS The started state is a special state that is only recorded if the -:setting:`CELERY_TRACK_STARTED` setting is enabled, or if the +:setting:`task_track_started` setting is enabled, or if the ``@task(track_started=True)`` option is set for the task. The pending state is actually not a recorded state, but rather the default state for any task id that is unknown, which you can see -from this example:: +from this example: + +.. code-block:: pycon >>> from proj.celery import app @@ -380,19 +403,23 @@ Calling tasks is described in detail in the You just learned how to call a task using the tasks ``delay`` method, and this is often all you need, but sometimes you may want to pass the signature of a task invocation to another process or as an argument to another -function, for this Celery uses something called *subtasks*. +function, for this Celery uses something called *signatures*. -A subtask wraps the arguments and execution options of a single task +A signature wraps the arguments and execution options of a single task invocation in a way such that it can be passed to functions or even serialized and sent across the wire. -You can create a subtask for the ``add`` task using the arguments ``(2, 2)``, -and a countdown of 10 seconds like this:: +You can create a signature for the ``add`` task using the arguments ``(2, 2)``, +and a countdown of 10 seconds like this: - >>> add.subtask((2, 2), countdown=10) +.. code-block:: pycon + + >>> add.signature((2, 2), countdown=10) tasks.add(2, 2) -There is also a shortcut using star arguments:: +There is also a shortcut using star arguments: + +.. code-block:: pycon >>> add.s(2, 2) tasks.add(2, 2) @@ -400,12 +427,14 @@ There is also a shortcut using star arguments:: And there's that calling API again… ----------------------------------- -Subtask instances also supports the calling API, which means that they +Signature instances also supports the calling API, which means that they have the ``delay`` and ``apply_async`` methods. -But there is a difference in that the subtask may already have +But there is a difference in that the signature may already have an argument signature specified. The ``add`` task takes two arguments, -so a subtask specifying two arguments would make a complete signature:: +so a signature specifying two arguments would make a complete signature: + +.. code-block:: pycon >>> s1 = add.s(2, 2) >>> res = s1.delay() @@ -413,13 +442,17 @@ so a subtask specifying two arguments would make a complete signature:: 4 But, you can also make incomplete signatures to create what we call -*partials*:: +*partials*: + +.. code-block:: pycon # incomplete partial: add(?, 2) >>> s2 = add.s(2) -``s2`` is now a partial subtask that needs another argument to be complete, -and this can be resolved when calling the subtask:: +``s2`` is now a partial signature that needs another argument to be complete, +and this can be resolved when calling the signature: + +.. code-block:: pycon # resolves the partial: add(8, 2) >>> res = s2.delay(8) @@ -430,19 +463,21 @@ Here you added the argument 8, which was prepended to the existing argument 2 forming a complete signature of ``add(8, 2)``. Keyword arguments can also be added later, these are then merged with any -existing keyword arguments, but with new arguments taking precedence:: +existing keyword arguments, but with new arguments taking precedence: + +.. code-block:: pycon >>> s3 = add.s(2, 2, debug=True) >>> s3.delay(debug=False) # debug is now False. -As stated subtasks supports the calling API, which means that: +As stated signatures supports the calling API, which means that: -- ``subtask.apply_async(args=(), kwargs={}, **options)`` +- ``sig.apply_async(args=(), kwargs={}, **options)`` - Calls the subtask with optional partial arguments and partial + Calls the signature with optional partial arguments and partial keyword arguments. Also supports partial execution options. -- ``subtask.delay(*args, **kwargs)`` +- ``sig.delay(*args, **kwargs)`` Star argument version of ``apply_async``. Any arguments will be prepended to the arguments in the signature, and keyword arguments is merged with any @@ -466,7 +501,7 @@ The Primitives - :ref:`starmap ` - :ref:`chunks ` -The primitives are subtasks themselves, so that they can be combined +These primitives are signature objects themselves, so they can be combined in any number of ways to compose complex workflows. .. note:: @@ -484,7 +519,7 @@ A :class:`~celery.group` calls a list of tasks in parallel, and it returns a special result instance that lets you inspect the results as a group, and retrieve the return values in order. -.. code-block:: python +.. code-block:: pycon >>> from celery import group >>> from proj.tasks import add @@ -494,7 +529,7 @@ as a group, and retrieve the return values in order. - Partial group -.. code-block:: python +.. code-block:: pycon >>> g = group(add.s(i) for i in xrange(10)) >>> g(10).get() @@ -506,7 +541,7 @@ Chains Tasks can be linked together so that after one task returns the other is called: -.. code-block:: python +.. code-block:: pycon >>> from celery import chain >>> from proj.tasks import add, mul @@ -518,9 +553,9 @@ is called: or a partial chain: -.. code-block:: python +.. code-block:: pycon - # (? + 4) * 8 + >>> # (? + 4) * 8 >>> g = chain(add.s(4) | mul.s(8)) >>> g(4).get() 64 @@ -528,7 +563,7 @@ or a partial chain: Chains can also be written like this: -.. code-block:: python +.. code-block:: pycon >>> (add.s(4, 4) | mul.s(8))().get() 64 @@ -538,7 +573,7 @@ Chords A chord is a group with a callback: -.. code-block:: python +.. code-block:: pycon >>> from celery import chord >>> from proj.tasks import add, xsum @@ -550,13 +585,13 @@ A chord is a group with a callback: A group chained to another task will be automatically converted to a chord: -.. code-block:: python +.. code-block:: pycon >>> (group(add.s(i, i) for i in xrange(10)) | xsum.s())().get() 90 -Since these primitives are all of the subtask type they +Since these primitives are all of the signature type they can be combined almost however you want, e.g:: >>> upload_document.s(file) | group(apply_filter.s() for filter in filters) @@ -570,17 +605,21 @@ Routing Celery supports all of the routing facilities provided by AMQP, but it also supports simple routing where messages are sent to named queues. -The :setting:`CELERY_ROUTES` setting enables you to route tasks by name -and keep everything centralized in one location:: +The :setting:`task_routes` setting enables you to route tasks by name +and keep everything centralized in one location: + +.. code-block:: python app.conf.update( - CELERY_ROUTES = { + task_routes = { 'proj.tasks.add': {'queue': 'hipri'}, }, ) You can also specify the queue at runtime -with the ``queue`` argument to ``apply_async``:: +with the ``queue`` argument to ``apply_async``: + +.. code-block:: pycon >>> from proj.tasks import add >>> add.apply_async((2, 2), queue='hipri') @@ -588,7 +627,7 @@ with the ``queue`` argument to ``apply_async``:: You can then make a worker consume from this queue by specifying the :option:`-Q` option: -.. code-block:: bash +.. code-block:: console $ celery -A proj worker -Q hipri @@ -597,7 +636,7 @@ for example you can make the worker consume from both the default queue, and the ``hipri`` queue, where the default queue is named ``celery`` for historical reasons: -.. code-block:: bash +.. code-block:: console $ celery -A proj worker -Q hipri,celery @@ -615,7 +654,7 @@ you can control and inspect the worker at runtime. For example you can see what tasks the worker is currently working on: -.. code-block:: bash +.. code-block:: console $ celery -A proj inspect active @@ -626,7 +665,7 @@ You can also specify one or more workers to act on the request using the :option:`--destination` option, which is a comma separated list of worker host names: -.. code-block:: bash +.. code-block:: console $ celery -A proj inspect active --destination=celery@example.com @@ -638,47 +677,47 @@ does not change anything in the worker, it only replies information and statistics about what is going on inside the worker. For a list of inspect commands you can execute: -.. code-block:: bash +.. code-block:: console $ celery -A proj inspect --help Then there is the :program:`celery control` command, which contains commands that actually changes things in the worker at runtime: -.. code-block:: bash +.. code-block:: console $ celery -A proj control --help For example you can force workers to enable event messages (used for monitoring tasks and workers): -.. code-block:: bash +.. code-block:: console $ celery -A proj control enable_events When events are enabled you can then start the event dumper to see what the workers are doing: -.. code-block:: bash +.. code-block:: console $ celery -A proj events --dump or you can start the curses interface: -.. code-block:: bash +.. code-block:: console $ celery -A proj events when you're finished monitoring you can disable events again: -.. code-block:: bash +.. code-block:: console $ celery -A proj control disable_events The :program:`celery status` command also uses remote control commands and shows a list of online workers in the cluster: -.. code-block:: bash +.. code-block:: console $ celery -A proj status @@ -693,9 +732,11 @@ All times and dates, internally and in messages uses the UTC timezone. When the worker receives a message, for example with a countdown set it converts that UTC time to local time. If you wish to use a different timezone than the system timezone then you must -configure that using the :setting:`CELERY_TIMEZONE` setting:: +configure that using the :setting:`timezone` setting: + +.. code-block:: python - app.conf.CELERY_TIMEZONE = 'Europe/London' + app.conf.timezone = 'Europe/London' Optimization ============ @@ -711,7 +752,7 @@ for throughput then you should read the :ref:`Optimizing Guide If you're using RabbitMQ then you should install the :mod:`librabbitmq` module, which is an AMQP client implemented in C: -.. code-block:: bash +.. code-block:: console $ pip install librabbitmq diff --git a/docs/glossary.rst b/docs/glossary.rst index ecc4561a2..c66daf2ae 100644 --- a/docs/glossary.rst +++ b/docs/glossary.rst @@ -18,6 +18,32 @@ Glossary ack Short for :term:`acknowledged`. + early acknowledgement + + Task is :term:`acknowledged` just-in-time before being executed, + meaning the task will not be redelivered to another worker if the + machine loses power, or the worker instance is abruptly killed, + mid-execution. + + Configured using :setting:`task_acks_late`. + + late acknowledgment + + Task is :term:`acknowledged` after execution (both if successful, or + if the task is raising an error), which means the task will be + redelivered to another worker in the event of the machine losing + power, or the worker instance being killed mid-execution. + + Configured using :setting:`task_acks_late`. + + early ack + + Short for :term:`early acknowledgement` + + late ack + + Short for :term:`late acknowledgement` + request Task messages are converted to *requests* within the worker. The request information is also available as the task's @@ -51,9 +77,11 @@ Glossary Idempotence is a mathematical property that describes a function that can be called multiple times without changing the result. Practically it means that a function can be repeated many times without - unintented effects, but not necessarily side-effect free in the pure + unintended effects, but not necessarily side-effect free in the pure sense (compare to :term:`nullipotent`). + Further reading: http://en.wikipedia.org/wiki/Idempotent + nullipotent describes a function that will have the same effect, and give the same result, even if called zero or multiple times (side-effect free). @@ -74,7 +102,7 @@ Glossary prefetch multiplier The :term:`prefetch count` is configured by using the - :setting:`CELERYD_PREFETCH_MULTIPLIER` setting, which is multiplied + :setting:`worker_prefetch_multiplier` setting, which is multiplied by the number of pool slots (threads/processes/greenthreads). prefetch count diff --git a/docs/history/changelog-1.0.rst b/docs/history/changelog-1.0.rst index f10ff9451..cf0fdf143 100644 --- a/docs/history/changelog-1.0.rst +++ b/docs/history/changelog-1.0.rst @@ -20,13 +20,13 @@ If you've already used the AMQP backend this means you have to delete the previous definitions: - .. code-block:: bash + .. code-block:: console $ camqadm exchange.delete celeryresults or: - .. code-block:: bash + .. code-block:: console $ python manage.py camqadm exchange.delete celeryresults @@ -506,7 +506,7 @@ Fixes If you're using Celery with Django, you can't use `project.settings` as the settings module name, but the following should work: - .. code-block:: bash + .. code-block:: console $ python manage.py celeryd --settings=settings @@ -534,7 +534,7 @@ Fixes Excellent for deleting queues/bindings/exchanges, experimentation and testing: - .. code-block:: bash + .. code-block:: console $ camqadm 1> help @@ -543,7 +543,7 @@ Fixes When using Django, use the management command instead: - .. code-block:: bash + .. code-block:: console $ python manage.py camqadm 1> help @@ -711,7 +711,7 @@ Backward incompatible changes To launch the periodic task scheduler you have to run celerybeat: - .. code-block:: bash + .. code-block:: console $ celerybeat @@ -720,7 +720,7 @@ Backward incompatible changes If you only have one worker server you can embed it into the worker like this: - .. code-block:: bash + .. code-block:: console $ celeryd --beat # Embed celerybeat in celeryd. @@ -1552,7 +1552,7 @@ arguments, so be sure to flush your task queue before you upgrade. * You can now run the celery daemon by using `manage.py`: - .. code-block:: bash + .. code-block:: console $ python manage.py celeryd @@ -1693,7 +1693,7 @@ arguments, so be sure to flush your task queue before you upgrade. * Now using the Sphinx documentation system, you can build the html documentation by doing: - .. code-block:: bash + .. code-block:: console $ cd docs $ make html diff --git a/docs/history/changelog-2.0.rst b/docs/history/changelog-2.0.rst index 93f7d5a6a..b55afa688 100644 --- a/docs/history/changelog-2.0.rst +++ b/docs/history/changelog-2.0.rst @@ -278,13 +278,13 @@ Documentation If you've already hit this problem you may have to delete the declaration: - .. code-block:: bash + .. code-block:: console $ camqadm exchange.delete celerycrq or: - .. code-block:: bash + .. code-block:: console $ python manage.py camqadm exchange.delete celerycrq @@ -387,7 +387,7 @@ Documentation Use the `-S|--statedb` argument to the worker to enable it: - .. code-block:: bash + .. code-block:: console $ celeryd --statedb=/var/run/celeryd @@ -599,7 +599,7 @@ Backward incompatible changes If you've already used celery with this backend chances are you have to delete the previous declaration: - .. code-block:: bash + .. code-block:: console $ camqadm exchange.delete celeryresults @@ -638,7 +638,7 @@ News If you run `celeryev` with the `-d` switch it will act as an event dumper, simply dumping the events it receives to standard out: - .. code-block:: bash + .. code-block:: console $ celeryev -d -> celeryev: starting capture... @@ -742,7 +742,7 @@ News This feature is added for easily setting up routing using the `-Q` option to the worker: - .. code-block:: bash + .. code-block:: console $ celeryd -Q video, image @@ -887,7 +887,7 @@ News command would make the worker only consume from the `image` and `video` queues: - .. code-block:: bash + .. code-block:: console $ celeryd -Q image,video @@ -916,25 +916,25 @@ News Before you run the tests you need to install the test requirements: - .. code-block:: bash + .. code-block:: console $ pip install -r requirements/test.txt Running all tests: - .. code-block:: bash + .. code-block:: console $ nosetests Specifying the tests to run: - .. code-block:: bash + .. code-block:: console $ nosetests celery.tests.test_task Producing HTML coverage: - .. code-block:: bash + .. code-block:: console $ nosetests --with-coverage3 @@ -947,7 +947,7 @@ News Some examples: - .. code-block:: bash + .. code-block:: console # Advanced example with 10 workers: # * Three of the workers processes the images and video queue diff --git a/docs/history/changelog-2.1.rst b/docs/history/changelog-2.1.rst index 57b898fcd..5d4856c00 100644 --- a/docs/history/changelog-2.1.rst +++ b/docs/history/changelog-2.1.rst @@ -223,7 +223,7 @@ News Example using celeryctl to start consuming from queue "queue", in exchange "exchange", of type "direct" using binding key "key": - .. code-block:: bash + .. code-block:: console $ celeryctl inspect add_consumer queue exchange direct key $ celeryctl inspect cancel_consumer queue @@ -234,7 +234,7 @@ News Another example using :class:`~celery.task.control.inspect`: - .. code-block:: python + .. code-block:: pycon >>> from celery.task.control import inspect >>> inspect.add_consumer(queue="queue", exchange="exchange", @@ -296,7 +296,7 @@ Important Notes To do this use :program:`python` to find the location of this module: - .. code-block:: bash + .. code-block:: console $ python >>> import celery.platform @@ -306,7 +306,7 @@ Important Notes Here the compiled module is in :file:`/opt/devel/celery/celery/`, to remove the offending files do: - .. code-block:: bash + .. code-block:: console $ rm -f /opt/devel/celery/celery/platform.py* @@ -345,13 +345,13 @@ News 1. Create the new database tables: - .. code-block:: bash + .. code-block:: console $ python manage.py syncdb 2. Start the django-celery snapshot camera: - .. code-block:: bash + .. code-block:: console $ python manage.py celerycam @@ -403,7 +403,7 @@ News Some examples: - .. code-block:: bash + .. code-block:: console $ celeryctl apply tasks.add -a '[2, 2]' --countdown=10 @@ -482,7 +482,7 @@ News Example: - .. code-block:: bash + .. code-block:: console $ celeryd -I app1.tasks,app2.tasks @@ -692,7 +692,7 @@ Experimental multi can now be used to start, stop and restart worker nodes: - .. code-block:: bash + .. code-block:: console $ celeryd-multi start jerry elaine george kramer @@ -701,7 +701,7 @@ Experimental use the `--pidfile` and `--logfile` arguments with the `%n` format: - .. code-block:: bash + .. code-block:: console $ celeryd-multi start jerry elaine george kramer \ --logfile=/var/log/celeryd@%n.log \ @@ -709,20 +709,20 @@ Experimental Stopping: - .. code-block:: bash + .. code-block:: console $ celeryd-multi stop jerry elaine george kramer Restarting. The nodes will be restarted one by one as the old ones are shutdown: - .. code-block:: bash + .. code-block:: console $ celeryd-multi restart jerry elaine george kramer Killing the nodes (**WARNING**: Will discard currently executing tasks): - .. code-block:: bash + .. code-block:: console $ celeryd-multi kill jerry elaine george kramer diff --git a/docs/history/changelog-2.2.rst b/docs/history/changelog-2.2.rst index 2f8ba7894..a93613bf7 100644 --- a/docs/history/changelog-2.2.rst +++ b/docs/history/changelog-2.2.rst @@ -371,7 +371,7 @@ Fixes objects with a broken ``__repr__`` does not crash the worker, or otherwise make errors hard to understand (Issue #298). -* Remote control command ``active_queues``: did not account for queues added +* Remote control command :control:`active_queues`: did not account for queues added at runtime. In addition the dictionary replied by this command now has a different @@ -666,7 +666,7 @@ Important Notes If you telnet the port specified you will be presented with a ``pdb`` shell: - .. code-block:: bash + .. code-block:: console $ telnet localhost 6900 Connected to localhost. @@ -711,7 +711,7 @@ Important Notes If you would like to remove the old exchange you can do so by executing the following command: - .. code-block:: bash + .. code-block:: console $ camqadm exchange.delete celeryevent @@ -721,7 +721,7 @@ Important Notes Configuration options must appear after the last argument, separated by two dashes: - .. code-block:: bash + .. code-block:: console $ celery worker -l info -I tasks -- broker.host=localhost broker.vhost=/app @@ -924,7 +924,7 @@ News For example: - .. code-block:: bash + .. code-block:: console $ celery worker --config=celeryconfig.py --loader=myloader.Loader diff --git a/docs/history/changelog-2.3.rst b/docs/history/changelog-2.3.rst index 90a4454f5..d38dd51c9 100644 --- a/docs/history/changelog-2.3.rst +++ b/docs/history/changelog-2.3.rst @@ -287,7 +287,7 @@ News Example use: - .. code-block:: bash + .. code-block:: console $ celery multi start 4 -c 2 -- broker.host=amqp.example.com \ broker.vhost=/ \ diff --git a/docs/history/changelog-2.4.rst b/docs/history/changelog-2.4.rst index 64866b87c..1cfbd7f4e 100644 --- a/docs/history/changelog-2.4.rst +++ b/docs/history/changelog-2.4.rst @@ -205,7 +205,7 @@ Important Notes Also, programs now support the :option:`-b|--broker` option to specify a broker URL on the command-line: - .. code-block:: bash + .. code-block:: console $ celery worker -b redis://localhost diff --git a/docs/history/changelog-2.5.rst b/docs/history/changelog-2.5.rst index fa395a2c7..77936ab34 100644 --- a/docs/history/changelog-2.5.rst +++ b/docs/history/changelog-2.5.rst @@ -76,7 +76,7 @@ News @task_sent.connect def on_task_sent(**kwargs): - print("sent task: %r" % (kwargs, )) + print("sent task: %r" % (kwargs,)) - Invalid task messages are now rejected instead of acked. @@ -94,10 +94,10 @@ News Example: - .. code-block:: python + .. code-block:: pycon - >>> s = add.subtask((5, )) - >>> new = s.clone(args=(10, ), countdown=5}) + >>> s = add.subtask((5,)) + >>> new = s.clone(args=(10,), countdown=5}) >>> new.args (10, 5) @@ -145,7 +145,7 @@ Fixes Like with the worker it is now possible to configure celery settings on the command-line for celery control|inspect - .. code-block:: bash + .. code-block:: console $ celery inspect -- broker.pool_limit=30 diff --git a/docs/history/changelog-3.0.rst b/docs/history/changelog-3.0.rst index 76994ed37..0dee20c78 100644 --- a/docs/history/changelog-3.0.rst +++ b/docs/history/changelog-3.0.rst @@ -353,7 +353,7 @@ If you're looking for versions prior to 3.0.x you should go to :ref:`history`. - The ``pool_restart`` remote control command now reports an error if the :setting:`CELERYD_POOL_RESTARTS` setting is not set. -- ``celery.conf.add_defaults`` can now be used with non-dict objects. +- :meth:`@add_defaults`` can now be used with non-dict objects. - Fixed compatibility problems in the Proxy class (Issue #1087). @@ -596,7 +596,7 @@ If you're looking for versions prior to 3.0.x you should go to :ref:`history`. - ``subtask.id`` added as an alias to ``subtask['options'].id`` - .. code-block:: python + .. code-block:: pycon >>> s = add.s(2, 2) >>> s.id = 'my-id' @@ -690,9 +690,9 @@ If you're looking for versions prior to 3.0.x you should go to :ref:`history`. Previously it would incorrectly add a regular result instead of a group result, but now this works: - .. code-block:: python + .. code-block:: pycon - # [4 + 4, 4 + 8, 16 + 8] + >>> # [4 + 4, 4 + 8, 16 + 8] >>> res = (add.s(2, 2) | group(add.s(4), add.s(8), add.s(16)))() >>> res >> c1 = (add.s(2) | add.s(4)) >>> c2 = (add.s(8) | add.s(16)) >>> c3 = (c1 | c2) - # 8 + 2 + 4 + 8 + 16 + >>> # 8 + 2 + 4 + 8 + 16 >>> assert c3(8).get() == 38 - Subtasks can now be used with unregistered tasks. @@ -891,7 +891,7 @@ If you're looking for versions prior to 3.0.x you should go to :ref:`history`. Users can force paths to be created by calling the ``create-paths`` subcommand: - .. code-block:: bash + .. code-block:: console $ sudo /etc/init.d/celeryd create-paths @@ -971,7 +971,7 @@ If you're looking for versions prior to 3.0.x you should go to :ref:`history`. Previously calling a chord/group/chain would modify the ids of subtasks so that: - .. code-block:: python + .. code-block:: pycon >>> c = chord([add.s(2, 2), add.s(4, 4)], xsum.s()) >>> c() @@ -1077,7 +1077,7 @@ If you're looking for versions prior to 3.0.x you should go to :ref:`history`. You can do this by executing the following command: - .. code-block:: bash + .. code-block:: console $ python manage.py shell >>> from djcelery.models import PeriodicTask @@ -1108,7 +1108,7 @@ If you're looking for versions prior to 3.0.x you should go to :ref:`history`. - App instances now supports the with statement. - This calls the new :meth:`~celery.Celery.close` method at exit, which + This calls the new :meth:`@close` method at exit, which cleans up after the app like closing pool connections. Note that this is only necessary when dynamically creating apps, @@ -1411,16 +1411,16 @@ If you're looking for versions prior to 3.0.x you should go to :ref:`history`. } } -- New :meth:`@Celery.add_defaults` method can add new default configuration +- New :meth:`@add_defaults` method can add new default configuration dicts to the applications configuration. For example:: config = {'FOO': 10} - celery.add_defaults(config) + app.add_defaults(config) - is the same as ``celery.conf.update(config)`` except that data will not be + is the same as ``app.conf.update(config)`` except that data will not be copied, and that it will not be pickled when the worker spawns child processes. @@ -1429,7 +1429,7 @@ If you're looking for versions prior to 3.0.x you should go to :ref:`history`. def initialize_config(): # insert heavy stuff that can't be done at import time here. - celery.add_defaults(initialize_config) + app.add_defaults(initialize_config) which means the same as the above except that it will not happen until the celery configuration is actually used. @@ -1437,8 +1437,8 @@ If you're looking for versions prior to 3.0.x you should go to :ref:`history`. As an example, Celery can lazily use the configuration of a Flask app:: flask_app = Flask() - celery = Celery() - celery.add_defaults(lambda: flask_app.config) + app = Celery() + app.add_defaults(lambda: flask_app.config) - Revoked tasks were not marked as revoked in the result backend (Issue #871). @@ -1455,8 +1455,8 @@ If you're looking for versions prior to 3.0.x you should go to :ref:`history`. - New method names: - - ``Celery.default_connection()`` ➠ :meth:`~@Celery.connection_or_acquire`. - - ``Celery.default_producer()`` ➠ :meth:`~@Celery.producer_or_acquire`. + - ``Celery.default_connection()`` ➠ :meth:`~@connection_or_acquire`. + - ``Celery.default_producer()`` ➠ :meth:`~@producer_or_acquire`. The old names still work for backward compatibility. diff --git a/docs/history/changelog-3.1.rst b/docs/history/changelog-3.1.rst new file mode 100644 index 000000000..d9263f2b3 --- /dev/null +++ b/docs/history/changelog-3.1.rst @@ -0,0 +1,1586 @@ +.. _changelog-3.1: + +================ + Change history +================ + +This document contains change notes for bugfix releases in the 3.1.x series +(Cipater), please see :ref:`whatsnew-3.1` for an overview of what's +new in Celery 3.1. + +.. _version-3.1.21: + +3.1.21 +====== +:release-date: 2016-03-04 11:16 A.M PST +:release-by: Ask Solem + +- **Requirements** + + - Now depends on :ref:`Kombu 3.0.34 `. + + - Now depends on :mod:`billiard` 3.3.0.23. + +- **Prefork pool**: Fixes 100% CPU loop on Linux epoll (Issue #1845). + + Also potential fix for: Issue #2142, Issue #2606 + +- **Prefork pool**: Fixes memory leak related to processes exiting + (Issue #2927). + +- **Worker**: Fixes crash at startup when trying to censor passwords + in MongoDB and Cache result backend URLs (Issue #3079, Issue #3045, + Issue #3049, Issue #3068, Issue #3073). + + Fix contributed by Maxime Verger. + +- **Task**: An exception is now raised if countdown/expires is less + than -2147483648 (Issue #3078). + +- **Programs**: :program:`celery shell --ipython` now compatible with newer + IPython versions. + +- **Programs**: The DuplicateNodeName warning emitted by inspect/control + now includes a list of the node names returned. + + Contributed by Sebastian Kalinowski. + +- **Utils**: The ``.discard(item)`` method of + :class:`~celery.datastructures.LimitedSet` did not actually remove the item + (Issue #3087). + + Fix contributed by Dave Smith. + +- **Worker**: Node name formatting now emits less confusing error message + for unmatched format keys (Issue #3016). + +- **Results**: amqp/rpc backends: Fixed deserialization of JSON exceptions + (Issue #2518). + + Fix contributed by Allard Hoeve. + +- **Prefork pool**: The `process inqueue damaged` error message now includes + the original exception raised. + +- **Documentation**: Includes improvements by: + + - Jeff Widman. + +.. _version-3.1.20: + +3.1.20 +====== +:release-date: 2016-01-22 06:50 P.M UTC +:release-by: Ask Solem + +- **Requirements** + + - Now depends on :ref:`Kombu 3.0.33 `. + + - Now depends on :mod:`billiard` 3.3.0.22. + + Includes binary wheels for Microsoft Windows x86 and x86_64! + +- **Task**: Error emails now uses ``utf-8`` charset by default (Issue #2737). + +- **Task**: Retry now forwards original message headers (Issue #3017). + +- **Worker**: Bootsteps can now hook into ``on_node_join``/``leave``/``lost``. + + See :ref:`extending-consumer-gossip` for an example. + +- **Events**: Fixed handling of DST timezones (Issue #2983). + +- **Results**: Redis backend stopped respecting certain settings. + + Contributed by Jeremy Llewellyn. + +- **Results**: Database backend now properly supports JSON exceptions + (Issue #2441). + +- **Results**: Redis ``new_join`` did not properly call task errbacks on chord + error (Issue #2796). + +- **Results**: Restores Redis compatibility with redis-py < 2.10.0 + (Issue #2903). + +- **Results**: Fixed rare issue with chord error handling (Issue #2409). + +- **Tasks**: Using queue-name values in :setting:`CELERY_ROUTES` now works + again (Issue #2987). + +- **General**: Result backend password now sanitized in report output + (Issue #2812, Issue #2004). + +- **Configuration**: Now gives helpful error message when the result backend + configuration points to a module, and not a class (Issue #2945). + +- **Results**: Exceptions sent by JSON serialized workers are now properly + handled by pickle configured workers. + +- **Programs**: ``celery control autoscale`` now works (Issue #2950). + +- **Programs**: ``celery beat --detached`` now runs after fork callbacks. + +- **General**: Fix for LRU cache implementation on Python 3.5 (Issue #2897). + + Contributed by Dennis Brakhane. + + Python 3.5's ``OrderedDict`` does not allow mutation while it is being + iterated over. This breaks "update" if it is called with a dict + larger than the maximum size. + + This commit changes the code to a version that does not iterate over + the dict, and should also be a little bit faster. + +- **Init scripts**: The beat init script now properly reports service as down + when no pid file can be found. + + Eric Zarowny + +- **Beat**: Added cleaning of corrupted scheduler files for some storage + backend errors (Issue #2985). + + Fix contributed by Aleksandr Kuznetsov. + +- **Beat**: Now syncs the schedule even if the schedule is empty. + + Fix contributed by Colin McIntosh. + +- **Supervisord**: Set higher process priority in supervisord example. + + Contributed by George Tantiras. + +- **Documentation**: Includes improvements by: + + Bryson + Caleb Mingle + Christopher Martin + Dieter Adriaenssens + Jason Veatch + Jeremy Cline + Juan Rossi + Kevin Harvey + Kevin McCarthy + Kirill Pavlov + Marco Buttu + Mayflower + Mher Movsisyan + Michael Floering + michael-k + Nathaniel Varona + Rudy Attias + Ryan Luckie + Steven Parker + squfrans + Tadej Janež + TakesxiSximada + Tom S + +.. _version-3.1.19: + +3.1.19 +====== +:release-date: 2015-10-26 01:00 P.M UTC +:release-by: Ask Solem + +- **Requirements** + + - Now depends on :ref:`Kombu 3.0.29 `. + + - Now depends on :mod:`billiard` 3.3.0.21. + +- **Results**: Fixed MongoDB result backend URL parsing problem + (Issue celery/kombu#375). + +- **Worker**: Task request now properly sets ``priority`` in delivery_info. + + Fix contributed by Gerald Manipon. + +- **Beat**: PyPy shelve may raise ``KeyError`` when setting keys + (Issue #2862). + +- **Programs**: :program:`celery beat --deatched` now working on PyPy. + + Fix contributed by Krzysztof Bujniewicz. + +- **Results**: Redis result backend now ensures all pipelines are cleaned up. + + Contributed by Justin Patrin. + +- **Results**: Redis result backend now allows for timeout to be set in the + query portion of the result backend URL. + + E.g. ``CELERY_RESULT_BACKEND = 'redis://?timeout=10'`` + + Contributed by Justin Patrin. + +- **Results**: ``result.get`` now properly handles failures where the + exception value is set to :const:`None` (Issue #2560). + +- **Prefork pool**: Fixed attribute error ``proc.dead``. + +- **Worker**: Fixed worker hanging when gossip/heartbeat disabled + (Issue #1847). + + Fix contributed by Aaron Webber and Bryan Helmig. + +- **Results**: MongoDB result backend now supports pymongo 3.x + (Issue #2744). + + Fix contributed by Sukrit Khera. + +- **Results**: RPC/amqp backends did not deserialize exceptions properly + (Issue #2691). + + Fix contributed by Sukrit Khera. + +- **Programs**: Fixed problem with :program:`celery amqp`'s + ``basic_publish`` (Issue #2013). + +- **Worker**: Embedded beat now properly sets app for thread/process + (Issue #2594). + +- **Documentation**: Many improvements and typos fixed. + + Contributions by: + + Carlos Garcia-Dubus + D. Yu + jerry + Jocelyn Delalande + Josh Kupershmidt + Juan Rossi + kanemra + Paul Pearce + Pavel Savchenko + Sean Wang + Seungha Kim + Zhaorong Ma + +.. _version-3.1.18: + +3.1.18 +====== +:release-date: 2015-04-22 05:30 P.M UTC +:release-by: Ask Solem + +- **Requirements** + + - Now depends on :ref:`Kombu 3.0.25 `. + + - Now depends on :mod:`billiard` 3.3.0.20. + +- **Django**: Now supports Django 1.8 (Issue #2536). + + Fix contributed by Bence Tamas and Mickaël Penhard. + +- **Results**: MongoDB result backend now compatible with pymongo 3.0. + + Fix contributed by Fatih Sucu. + +- **Tasks**: Fixed bug only happening when a task has multiple callbacks + (Issue #2515). + + Fix contributed by NotSqrt. + +- **Commands**: Preload options now support ``--arg value`` syntax. + + Fix contributed by John Anderson. + +- **Compat**: A typo caused ``celery.log.setup_logging_subsystem`` to be + undefined. + + Fix contributed by Gunnlaugur Thor Briem. + +- **init scripts**: The celerybeat generic init script now uses + ``/bin/sh`` instead of bash (Issue #2496). + + Fix contributed by Jelle Verstraaten. + +- **Django**: Fixed a :exc:`TypeError` sometimes occurring in logging + when validating models. + + Fix contributed by Alexander. + +- **Commands**: Worker now supports new ``--executable`` argument that can + be used with ``--detach``. + + Contributed by Bert Vanderbauwhede. + +- **Canvas**: Fixed crash in chord unlock fallback task (Issue #2404). + +- **Worker**: Fixed rare crash occurring with ``--autoscale`` enabled + (Issue #2411). + +- **Django**: Properly recycle worker Django database connections when the + Django ``CONN_MAX_AGE`` setting is enabled (Issue #2453). + + Fix contributed by Luke Burden. + +.. _version-3.1.17: + +3.1.17 +====== +:release-date: 2014-11-19 03:30 P.M UTC +:release-by: Ask Solem + +.. admonition:: Do not enable the :setting:`CELERYD_FORCE_EXECV` setting! + + Please review your configuration and disable this option if you're using the + RabbitMQ or Redis transport. + + Keeping this option enabled after 3.1 means the async based prefork pool will + be disabled, which can easily cause instability. + +- **Requirements** + + - Now depends on :ref:`Kombu 3.0.24 `. + + Includes the new Qpid transport coming in Celery 3.2, backported to + support those who may still require Python 2.6 compatibility. + + - Now depends on :mod:`billiard` 3.3.0.19. + + - ``celery[librabbitmq]`` now depends on librabbitmq 1.6.1. + +- **Task**: The timing of ETA/countdown tasks were off after the example ``LocalTimezone`` + implementation in the Python documentation no longer works in Python 3.4. + (Issue #2306). + +- **Task**: Raising :exc:`~celery.exceptions.Ignore` no longer sends + ``task-failed`` event (Issue #2365). + +- **Redis result backend**: Fixed unbound local errors. + + Fix contributed by Thomas French. + +- **Task**: Callbacks was not called properly if ``link`` was a list of + signatures (Issuse #2350). + +- **Canvas**: chain and group now handles json serialized signatures + (Issue #2076). + +- **Results**: ``.join_native()`` would accidentally treat the ``STARTED`` + state as being ready (Issue #2326). + + This could lead to the chord callback being called with invalid arguments + when using chords with the :setting:`CELERY_TRACK_STARTED` setting + enabled. + +- **Canvas**: The ``chord_size`` attribute is now set for all canvas primitives, + making sure more combinations will work with the ``new_join`` optimization + for Redis (Issue #2339). + +- **Task**: Fixed problem with app not being properly propagated to + ``trace_task`` in all cases. + + Fix contributed by kristaps. + +- **Worker**: Expires from task message now associated with a timezone. + + Fix contributed by Albert Wang. + +- **Cassandra result backend**: Fixed problems when using detailed mode. + + When using the Cassandra backend in detailed mode, a regression + caused errors when attempting to retrieve results. + + Fix contributed by Gino Ledesma. + +- **Mongodb Result backend**: Pickling the backend instance will now include + the original url (Issue #2347). + + Fix contributed by Sukrit Khera. + +- **Task**: Exception info was not properly set for tasks raising + :exc:`~celery.exceptions.Reject` (Issue #2043). + +- **Worker**: Duplicates are now removed when loading the set of revoked tasks + from the worker state database (Issue #2336). + +- **celery.contrib.rdb**: Fixed problems with ``rdb.set_trace`` calling stop + from the wrong frame. + + Fix contributed by llllllllll. + +- **Canvas**: ``chain`` and ``chord`` can now be immutable. + +- **Canvas**: ``chord.apply_async`` will now keep partial args set in + ``self.args`` (Issue #2299). + +- **Results**: Small refactoring so that results are decoded the same way in + all result backends. + +- **Logging**: The ``processName`` format was introduced in Py2.6.2 so for + compatibility this format is now excluded when using earlier versions + (Issue #1644). + +.. _version-3.1.16: + +3.1.16 +====== +:release-date: 2014-10-03 06:00 P.M UTC +:release-by: Ask Solem + +- **Worker**: 3.1.15 broke ``-Ofair`` behavior (Issue #2286). + + This regression could result in all tasks executing + in a single child process if ``-Ofair`` was enabled. + +- **Canvas**: ``celery.signature`` now properly forwards app argument + in all cases. + +- **Task**: ``.retry()`` did not raise the exception correctly + when called without a current exception. + + Fix contributed by Andrea Rabbaglietti. + +- **Worker**: The ``enable_events`` remote control command + disabled worker-related events by mistake (Issue #2272). + + Fix contributed by Konstantinos Koukopoulos. + +- **Django**: Adds support for Django 1.7 class names in INSTALLED_APPS + when using ``app.autodiscover_tasks()`` (Issue #2248). + +- **Sphinx**: ``celery.contrib.sphinx`` now uses ``getfullargspec`` + on Python 3 (Issue #2302). + +- **Redis/Cache Backends**: Chords will now run at most once if one or more tasks + in the chord are executed multiple times for some reason. + +.. _version-3.1.15: + +3.1.15 +====== +:release-date: 2014-09-14 11:00 P.M UTC +:release-by: Ask Solem + +- **Django**: Now makes sure ``django.setup()`` is called + before importing any task modules (Django 1.7 compatibility, Issue #2227) + +- **Results**: ``result.get()`` was misbehaving by calling + ``backend.get_task_meta`` in a finally call leading to + AMQP result backend queues not being properly cleaned up (Issue #2245). + +.. _version-3.1.14: + +3.1.14 +====== +:release-date: 2014-09-08 03:00 P.M UTC +:release-by: Ask Solem + +- **Requirements** + + - Now depends on :ref:`Kombu 3.0.22 `. + +- **Init scripts**: The generic worker init scripts ``status`` command + now gets an accurate pidfile list (Issue #1942). + +- **Init scripts**: The generic beat script now implements the ``status`` + command. + + Contributed by John Whitlock. + +- **Commands**: Multi now writes informational output to stdout instead of stderr. + +- **Worker**: Now ignores not implemented error for ``pool.restart`` + (Issue #2153). + +- **Task**: Retry no longer raises retry exception when executed in eager + mode (Issue #2164). + +- **AMQP Result backend**: Now ensured ``on_interval`` is called at least + every second for blocking calls to properly propagate parent errors. + +- **Django**: Compatibility with Django 1.7 on Windows (Issue #2126). + +- **Programs**: `--umask` argument can be now specified in both octal (if starting + with 0) or decimal. + + +.. _version-3.1.13: + +3.1.13 +====== + +Security Fixes +-------------- + +* [Security: `CELERYSA-0002`_] Insecure default umask. + + The built-in utility used to daemonize the Celery worker service sets + an insecure umask by default (umask 0). + + This means that any files or directories created by the worker will + end up having world-writable permissions. + + Special thanks to Red Hat for originally discovering and reporting the + issue! + + This version will no longer set a default umask by default, so if unset + the umask of the parent process will be used. + +.. _`CELERYSA-0002`: + http://github.com/celery/celery/tree/master/docs/sec/CELERYSA-0002.txt + +News +---- + +- **Requirements** + + - Now depends on :ref:`Kombu 3.0.21 `. + + - Now depends on :mod:`billiard` 3.3.0.18. + + +- **App**: ``backend`` argument now also sets the :setting:`CELERY_RESULT_BACKEND` + setting. + +- **Task**: ``signature_from_request`` now propagates ``reply_to`` so that + the RPC backend works with retried tasks (Issue #2113). + +- **Task**: ``retry`` will no longer attempt to requeue the task if sending + the retry message fails. + + Unrelated exceptions being raised could cause a message loop, so it was + better to remove this behavior. + +- **Beat**: Accounts for standard 1ms drift by always waking up 0.010s + earlier. + + This will adjust the latency so that the periodic tasks will not move + 1ms after every invocation. + +- Documentation fixes + + Contributed by Yuval Greenfield, Lucas Wiman, nicholsonjf + +- **Worker**: Removed an outdated assert statement that could lead to errors + being masked (Issue #2086). + + + +.. _version-3.1.12: + +3.1.12 +====== +:release-date: 2014-06-09 10:12 P.M UTC +:release-by: Ask Solem + +- **Requirements** + + Now depends on :ref:`Kombu 3.0.19 `. + +- **App**: Connections were not being closed after fork due to an error in the + after fork handler (Issue #2055). + + This could manifest itself by causing framing errors when using RabbitMQ. + (``Unexpected frame``). + +- **Django**: ``django.setup()`` was being called too late when + using Django 1.7 (Issue #1802). + +- **Django**: Fixed problems with event timezones when using Django + (``Substantial drift``). + + Celery did not take into account that Django modifies the + ``time.timeone`` attributes and friends. + +- **Canvas**: ``Signature.link`` now works when the link option is a scalar + value (Issue #2019). + +- **Prefork pool**: Fixed race conditions for when file descriptors are + removed from the event loop. + + Fix contributed by Roger Hu. + +- **Prefork pool**: Improved solution for dividing tasks between child + processes. + + This change should improve performance when there are many child + processes, and also decrease the chance that two subsequent tasks are + written to the same child process. + +- **Worker**: Now ignores unknown event types, instead of crashing. + + Fix contributed by Illes Solt. + +- **Programs**: :program:`celery worker --detach` no longer closes open file + descriptors when :envvar:`C_FAKEFORK` is used so that the workers output + can be seen. + +- **Programs**: The default working directory for :program:`celery worker + --detach` is now the current working directory, not ``/``. + +- **Canvas**: ``signature(s, app=app)`` did not upgrade serialized signatures + to their original class (``subtask_type``) when the ``app`` keyword argument + was used. + +- **Control**: The ``duplicate nodename`` warning emitted by control commands + now shows the duplicate node name. + +- **Tasks**: Can now call ``ResultSet.get()`` on a result set without members. + + Fix contributed by Alexey Kotlyarov. + +- **App**: Fixed strange traceback mangling issue for + ``app.connection_or_acquire``. + +- **Programs**: The :program:`celery multi stopwait` command is now documented + in usage. + +- **Other**: Fixed cleanup problem with ``PromiseProxy`` when an error is + raised while trying to evaluate the promise. + +- **Other**: The utility used to censor configuration values now handles + non-string keys. + + Fix contributed by Luke Pomfrey. + +- **Other**: The ``inspect conf`` command did not handle non-string keys well. + + Fix contributed by Jay Farrimond. + +- **Programs**: Fixed argument handling problem in + :program:`celery worker --detach`. + + Fix contributed by Dmitry Malinovsky. + +- **Programs**: :program:`celery worker --detach` did not forward working + directory option (Issue #2003). + +- **Programs**: :program:`celery inspect registered` no longer includes + the list of built-in tasks. + +- **Worker**: The ``requires`` attribute for boot steps were not being handled + correctly (Issue #2002). + +- **Eventlet**: The eventlet pool now supports the ``pool_grow`` and + ``pool_shrink`` remote control commands. + + Contributed by Mher Movsisyan. + +- **Eventlet**: The eventlet pool now implements statistics for + :program:``celery inspect stats``. + + Contributed by Mher Movsisyan. + +- **Documentation**: Clarified ``Task.rate_limit`` behavior. + + Contributed by Jonas Haag. + +- **Documentation**: ``AbortableTask`` examples now updated to use the new + API (Issue #1993). + +- **Documentation**: The security documentation examples used an out of date + import. + + Fix contributed by Ian Dees. + +- **Init scripts**: The CentOS init scripts did not quote + :envvar:`CELERY_CHDIR`. + + Fix contributed by ffeast. + +.. _version-3.1.11: + +3.1.11 +====== +:release-date: 2014-04-16 11:00 P.M UTC +:release-by: Ask Solem + +- **Now compatible with RabbitMQ 3.3.0** + + You need to run Celery 3.1.11 or later when using RabbitMQ 3.3, + and if you use the ``librabbitmq`` module you also have to upgrade + to librabbitmq 1.5.0: + + .. code-block:: bash + + $ pip install -U librabbitmq + +- **Requirements**: + + - Now depends on :ref:`Kombu 3.0.15 `. + + - Now depends on `billiard 3.3.0.17`_. + + - Bundle ``celery[librabbitmq]`` now depends on :mod:`librabbitmq` 1.5.0. + +.. _`billiard 3.3.0.17`: + https://github.com/celery/billiard/blob/master/CHANGES.txt + +- **Tasks**: The :setting:`CELERY_DEFAULT_DELIVERY_MODE` setting was being + ignored (Issue #1953). + +- **Worker**: New :option:`--heartbeat-interval` can be used to change the + time (in seconds) between sending event heartbeats. + + Contributed by Matthew Duggan and Craig Northway. + +- **App**: Fixed memory leaks occurring when creating lots of temporary + app instances (Issue #1949). + +- **MongoDB**: SSL configuration with non-MongoDB transport breaks MongoDB + results backend (Issue #1973). + + Fix contributed by Brian Bouterse. + +- **Logging**: The color formatter accidentally modified ``record.msg`` + (Issue #1939). + +- **Results**: Fixed problem with task trails being stored multiple times, + causing ``result.collect()`` to hang (Issue #1936, Issue #1943). + +- **Results**: ``ResultSet`` now implements a ``.backend`` attribute for + compatibility with ``AsyncResult``. + +- **Results**: ``.forget()`` now also clears the local cache. + +- **Results**: Fixed problem with multiple calls to ``result._set_cache`` + (Issue #1940). + +- **Results**: ``join_native`` populated result cache even if disabled. + +- **Results**: The YAML result serializer should now be able to handle storing + exceptions. + +- **Worker**: No longer sends task error emails for expected errors (in + ``@task(throws=(..., )))``. + +- **Canvas**: Fixed problem with exception deserialization when using + the JSON serializer (Issue #1987). + +- **Eventlet**: Fixes crash when ``celery.contrib.batches`` attempted to + cancel a non-existing timer (Issue #1984). + +- Can now import ``celery.version_info_t``, and ``celery.five`` (Issue #1968). + + +.. _version-3.1.10: + +3.1.10 +====== +:release-date: 2014-03-22 09:40 P.M UTC +:release-by: Ask Solem + +- **Requirements**: + + - Now depends on :ref:`Kombu 3.0.14 `. + +- **Results**: + + Reliability improvements to the SQLAlchemy database backend. Previously the + connection from the MainProcess was improperly shared with the workers. + (Issue #1786) + +- **Redis:** Important note about events (Issue #1882). + + There is a new transport option for Redis that enables monitors + to filter out unwanted events. Enabling this option in the workers + will increase performance considerably: + + .. code-block:: python + + BROKER_TRANSPORT_OPTIONS = {'fanout_patterns': True} + + Enabling this option means that your workers will not be able to see + workers with the option disabled (or is running an older version of + Celery), so if you do enable it then make sure you do so on all + nodes. + + See :ref:`redis-caveats`. + + This will be the default in Celery 3.2. + +- **Results**: The :class:`@AsyncResult` object now keeps a local cache + of the final state of the task. + + This means that the global result cache can finally be disabled, + and you can do so by setting :setting:`CELERY_MAX_CACHED_RESULTS` to + :const:`-1`. The lifetime of the cache will then be bound to the + lifetime of the result object, which will be the default behavior + in Celery 3.2. + +- **Events**: The "Substantial drift" warning message is now logged once + per node name only (Issue #1802). + +- **Worker**: Ability to use one log file per child process when using the + prefork pool. + + This can be enabled by using the new ``%i`` and ``%I`` format specifiers + for the log file name. See :ref:`worker-files-process-index`. + +- **Redis**: New experimental chord join implementation. + + This is an optimization for chords when using the Redis result backend, + where the join operation is now considerably faster and using less + resources than the previous strategy. + + The new option can be set in the result backend URL: + + CELERY_RESULT_BACKEND = 'redis://localhost?new_join=1' + + This must be enabled manually as it's incompatible + with workers and clients not using it, so be sure to enable + the option in all clients and workers if you decide to use it. + +- **Multi**: With ``-opt:index`` (e.g. :option:`-c:1`) the index now always refers + to the position of a node in the argument list. + + This means that referring to a number will work when specifying a list + of node names and not just for a number range: + + .. code-block:: bash + + celery multi start A B C D -c:1 4 -c:2-4 8 + + In this example ``1`` refers to node A (as it's the first node in the + list). + +- **Signals**: The sender argument to ``Signal.connect`` can now be a proxy + object, which means that it can be used with the task decorator + (Issue #1873). + +- **Task**: A regression caused the ``queue`` argument to ``Task.retry`` to be + ignored (Issue #1892). + +- **App**: Fixed error message for :meth:`~@Celery.config_from_envvar`. + + Fix contributed by Dmitry Malinovsky. + +- **Canvas**: Chords can now contain a group of other chords (Issue #1921). + +- **Canvas**: Chords can now be combined when using the amqp result backend + (a chord where the callback is also a chord). + +- **Canvas**: Calling ``result.get()`` for a chain task will now complete + even if one of the tasks in the chain is ``ignore_result=True`` + (Issue #1905). + +- **Canvas**: Worker now also logs chord errors. + +- **Canvas**: A chord task raising an exception will now result in + any errbacks (``link_error``) to the chord callback to also be called. + +- **Results**: Reliability improvements to the SQLAlchemy database backend + (Issue #1786). + + Previously the connection from the ``MainProcess`` was improperly + inherited by child processes. + + Fix contributed by Ionel Cristian Mărieș. + +- **Task**: Task callbacks and errbacks are now called using the group + primitive. + +- **Task**: ``Task.apply`` now properly sets ``request.headers`` + (Issue #1874). + +- **Worker**: Fixed ``UnicodeEncodeError`` occuring when worker is started + by `supervisord`. + + Fix contributed by Codeb Fan. + +- **Beat**: No longer attempts to upgrade a newly created database file + (Issue #1923). + +- **Beat**: New setting :setting:``CELERYBEAT_SYNC_EVERY`` can be be used + to control file sync by specifying the number of tasks to send between + each sync. + + Contributed by Chris Clark. + +- **Commands**: :program:`celery inspect memdump` no longer crashes + if the :mod:`psutil` module is not installed (Issue #1914). + +- **Worker**: Remote control commands now always accepts json serialized + messages (Issue #1870). + +- **Worker**: Gossip will now drop any task related events it receives + by mistake (Issue #1882). + + +.. _version-3.1.9: + +3.1.9 +===== +:release-date: 2014-02-10 06:43 P.M UTC +:release-by: Ask Solem + +- **Requirements**: + + - Now depends on :ref:`Kombu 3.0.12 `. + +- **Prefork pool**: Better handling of exiting child processes. + + Fix contributed by Ionel Cristian Mărieș. + +- **Prefork pool**: Now makes sure all file descriptors are removed + from the hub when a process is cleaned up. + + Fix contributed by Ionel Cristian Mărieș. + +- **New Sphinx extension**: for autodoc documentation of tasks: + :mod:`celery.contrib.spinx` (Issue #1833). + +- **Django**: Now works with Django 1.7a1. + +- **Task**: Task.backend is now a property that forwards to ``app.backend`` + if no custom backend has been specified for the task (Issue #1821). + +- **Generic init scripts**: Fixed bug in stop command. + + Fix contributed by Rinat Shigapov. + +- **Generic init scripts**: Fixed compatibility with GNU :manpage:`stat`. + + Fix contributed by Paul Kilgo. + +- **Generic init scripts**: Fixed compatibility with the minimal + :program:`dash` shell (Issue #1815). + +- **Commands**: The :program:`celery amqp basic.publish` command was not + working properly. + + Fix contributed by Andrey Voronov. + +- **Commands**: Did no longer emit an error message if the pidfile exists + and the process is still alive (Issue #1855). + +- **Commands**: Better error message for missing arguments to preload + options (Issue #1860). + +- **Commands**: :program:`celery -h` did not work because of a bug in the + argument parser (Issue #1849). + +- **Worker**: Improved error message for message decoding errors. + +- **Time**: Now properly parses the `Z` timezone specifier in ISO 8601 date + strings. + + Fix contributed by Martin Davidsson. + +- **Worker**: Now uses the *negotiated* heartbeat value to calculate + how often to run the heartbeat checks. + +- **Beat**: Fixed problem with beat hanging after the first schedule + iteration (Issue #1822). + + Fix contributed by Roger Hu. + +- **Signals**: The header argument to :signal:`before_task_publish` is now + always a dictionary instance so that signal handlers can add headers. + +- **Worker**: A list of message headers is now included in message related + errors. + +.. _version-3.1.8: + +3.1.8 +===== +:release-date: 2014-01-17 10:45 P.M UTC +:release-by: Ask Solem + +- **Requirements**: + + - Now depends on :ref:`Kombu 3.0.10 `. + + - Now depends on `billiard 3.3.0.14`_. + +.. _`billiard 3.3.0.14`: + https://github.com/celery/billiard/blob/master/CHANGES.txt + +- **Worker**: The event loop was not properly reinitialized at consumer restart + which would force the worker to continue with a closed ``epoll`` instance on + Linux, resulting in a crash. + +- **Events:** Fixed issue with both heartbeats and task events that could + result in the data not being kept in sorted order. + + As a result this would force the worker to log "heartbeat missed" + events even though the remote node was sending heartbeats in a timely manner. + +- **Results:** The pickle serializer no longer converts group results to tuples, + and will keep the original type (*Issue #1750*). + +- **Results:** ``ResultSet.iterate`` is now pending deprecation. + + The method will be deprecated in version 3.2 and removed in version 3.3. + + Use ``result.get(callback=)`` (or ``result.iter_native()`` where available) + instead. + +- **Worker**\|eventlet/gevent: A regression caused ``Ctrl+C`` to be ineffective + for shutdown. + +- **Redis result backend:** Now using a pipeline to store state changes + for improved performance. + + Contributed by Pepijn de Vos. + +- **Redis result backend:** Will now retry storing the result if disconnected. + +- **Worker**\|gossip: Fixed attribute error occurring when another node leaves. + + Fix contributed by Brodie Rao. + +- **Generic init scripts:** Now runs a check at startup to verify + that any configuration scripts are owned by root and that they + are not world/group writeable. + + The init script configuration is a shell script executed by root, + so this is a preventive measure to ensure that users do not + leave this file vulnerable to changes by unprivileged users. + + .. note:: + + Note that upgrading celery will not update the init scripts, + instead you need to manually copy the improved versions from the + source distribution: + https://github.com/celery/celery/tree/3.1/extra/generic-init.d + +- **Commands**: The :program:`celery purge` command now warns that the operation + will delete all tasks and prompts the user for confirmation. + + A new :option:`-f` was added that can be used to disable + interactive mode. + +- **Task**: ``.retry()`` did not raise the value provided in the ``exc`` argument + when called outside of an error context (*Issue #1755*). + +- **Commands:** The :program:`celery multi` command did not forward command + line configuration to the target workers. + + The change means that multi will forward the special ``--`` argument and + configuration content at the end of the arguments line to the specified + workers. + + Example using command-line configuration to set a broker heartbeat + from :program:`celery multi`: + + .. code-block:: bash + + $ celery multi start 1 -c3 -- broker.heartbeat=30 + + Fix contributed by Antoine Legrand. + +- **Canvas:** ``chain.apply_async()`` now properly forwards execution options. + + Fix contributed by Konstantin Podshumok. + +- **Redis result backend:** Now takes ``connection_pool`` argument that can be + used to change the connection pool class/constructor. + +- **Worker:** Now truncates very long arguments and keyword arguments logged by + the pool at debug severity. + +- **Worker:** The worker now closes all open files on :sig:`SIGHUP` (regression) + (*Issue #1768*). + + Fix contributed by Brodie Rao + +- **Worker:** Will no longer accept remote control commands while the + worker startup phase is incomplete (*Issue #1741*). + +- **Commands:** The output of the event dump utility + (:program:`celery events -d`) can now be piped into other commands. + +- **Documentation:** The RabbitMQ installation instructions for OS X was + updated to use modern homebrew practices. + + Contributed by Jon Chen. + +- **Commands:** The :program:`celery inspect conf` utility now works. + +- **Commands:** The :option:`-no-color` argument was not respected by + all commands (*Issue #1799*). + +- **App:** Fixed rare bug with ``autodiscover_tasks()`` (*Issue #1797*). + +- **Distribution:** The sphinx docs will now always add the parent directory + to path so that the current celery source code is used as a basis for + API documentation (*Issue #1782*). + +- **Documentation:** Supervisord examples contained an extraneous '-' in a + `--logfile` argument example. + + Fix contributed by Mohammad Almeer. + +.. _version-3.1.7: + +3.1.7 +===== +:release-date: 2013-12-17 06:00 P.M UTC +:release-by: Ask Solem + +.. _v317-important: + +Important Notes +--------------- + +Init script security improvements +--------------------------------- + +Where the generic init scripts (for ``celeryd``, and ``celerybeat``) before +delegated the responsibility of dropping privileges to the target application, +it will now use ``su`` instead, so that the Python program is not trusted +with superuser privileges. + +This is not in reaction to any known exploit, but it will +limit the possibility of a privilege escalation bug being abused in the +future. + +You have to upgrade the init scripts manually from this directory: +https://github.com/celery/celery/tree/3.1/extra/generic-init.d + +AMQP result backend +~~~~~~~~~~~~~~~~~~~ + +The 3.1 release accidentally left the amqp backend configured to be +non-persistent by default. + +Upgrading from 3.0 would give a "not equivalent" error when attempting to +set or retrieve results for a task. That is unless you manually set the +persistence setting:: + + CELERY_RESULT_PERSISTENT = True + +This version restores the previous value so if you already forced +the upgrade by removing the existing exchange you must either +keep the configuration by setting ``CELERY_RESULT_PERSISTENT = False`` +or delete the ``celeryresults`` exchange again. + +Synchronous subtasks +~~~~~~~~~~~~~~~~~~~~ + +Tasks waiting for the result of a subtask will now emit +a :exc:`RuntimeWarning` warning when using the prefork pool, +and in 3.2 this will result in an exception being raised. + +It's not legal for tasks to block by waiting for subtasks +as this is likely to lead to resource starvation and eventually +deadlock when using the prefork pool (see also :ref:`task-synchronous-subtasks`). + +If you really know what you are doing you can avoid the warning (and +the future exception being raised) by moving the operation in a whitelist +block: + +.. code-block:: python + + from celery.result import allow_join_result + + @app.task + def misbehaving(): + result = other_task.delay() + with allow_join_result(): + result.get() + +Note also that if you wait for the result of a subtask in any form +when using the prefork pool you must also disable the pool prefetching +behavior with the worker :ref:`-Ofair option `. + +.. _v317-fixes: + +Fixes +----- + +- Now depends on :ref:`Kombu 3.0.8 `. + +- Now depends on :mod:`billiard` 3.3.0.13 + +- Events: Fixed compatibility with non-standard json libraries + that sends float as :class:`decimal.Decimal` (Issue #1731) + +- Events: State worker objects now always defines attributes: + ``active``, ``processed``, ``loadavg``, ``sw_ident``, ``sw_ver`` + and ``sw_sys``. + +- Worker: Now keeps count of the total number of tasks processed, + not just by type (``all_active_count``). + +- Init scripts: Fixed problem with reading configuration file + when the init script is symlinked to a runlevel (e.g. ``S02celeryd``). + (Issue #1740). + + This also removed a rarely used feature where you can symlink the script + to provide alternative configurations. You instead copy the script + and give it a new name, but perhaps a better solution is to provide + arguments to ``CELERYD_OPTS`` to separate them: + + .. code-block:: bash + + CELERYD_NODES="X1 X2 Y1 Y2" + CELERYD_OPTS="-A:X1 x -A:X2 x -A:Y1 y -A:Y2 y" + +- Fallback chord unlock task is now always called after the chord header + (Issue #1700). + + This means that the unlock task will not be started if there's + an error sending the header. + +- Celery command: Fixed problem with arguments for some control commands. + + Fix contributed by Konstantin Podshumok. + +- Fixed bug in ``utcoffset`` where the offset when in DST would be + completely wrong (Issue #1743). + +- Worker: Errors occurring while attempting to serialize the result of a + task will now cause the task to be marked with failure and a + :class:`kombu.exceptions.EncodingError` error. + + Fix contributed by Ionel Cristian Mărieș. + +- Worker with ``-B`` argument did not properly shut down the beat instance. + +- Worker: The ``%n`` and ``%h`` formats are now also supported by the + :option:`--logfile`, :option:`--pidfile` and :option:`--statedb` arguments. + + Example: + + .. code-block:: bash + + $ celery -A proj worker -n foo@%h --logfile=%n.log --statedb=%n.db + +- Redis/Cache result backends: Will now timeout if keys evicted while trying + to join a chord. + +- The fallbock unlock chord task now raises :exc:`Retry` so that the + retry even is properly logged by the worker. + +- Multi: Will no longer apply Eventlet/gevent monkey patches (Issue #1717). + +- Redis result backend: Now supports UNIX sockets. + + Like the Redis broker transport the result backend now also supports + using ``redis+socket:///tmp/redis.sock`` URLs. + + Contributed by Alcides Viamontes Esquivel. + +- Events: Events sent by clients was mistaken for worker related events + (Issue #1714). + + For ``events.State`` the tasks now have a ``Task.client`` attribute + that is set when a ``task-sent`` event is being received. + + Also, a clients logical clock is not in sync with the cluster so + they live in a "time bubble". So for this reason monitors will no + longer attempt to merge with the clock of an event sent by a client, + instead it will fake the value by using the current clock with + a skew of -1. + +- Prefork pool: The method used to find terminated processes was flawed + in that it did not also take into account missing popen objects. + +- Canvas: ``group`` and ``chord`` now works with anon signatures as long + as the group/chord object is associated with an app instance (Issue #1744). + + You can pass the app by using ``group(..., app=app)``. + +.. _version-3.1.6: + +3.1.6 +===== +:release-date: 2013-12-02 06:00 P.M UTC +:release-by: Ask Solem + +- Now depends on :mod:`billiard` 3.3.0.10. + +- Now depends on :ref:`Kombu 3.0.7 `. + +- Fixed problem where Mingle caused the worker to hang at startup + (Issue #1686). + +- Beat: Would attempt to drop privileges twice (Issue #1708). + +- Windows: Fixed error with ``geteuid`` not being available (Issue #1676). + +- Tasks can now provide a list of expected error classes (Issue #1682). + + The list should only include errors that the task is expected to raise + during normal operation:: + + @task(throws=(KeyError, HttpNotFound)) + + What happens when an exceptions is raised depends on the type of error: + + - Expected errors (included in ``Task.throws``) + + Will be logged using severity ``INFO``, and traceback is excluded. + + - Unexpected errors + + Will be logged using severity ``ERROR``, with traceback included. + +- Cache result backend now compatible with Python 3 (Issue #1697). + +- CentOS init script: Now compatible with sys-v style init symlinks. + + Fix contributed by Jonathan Jordan. + +- Events: Fixed problem when task name is not defined (Issue #1710). + + Fix contributed by Mher Movsisyan. + +- Task: Fixed unbound local errors (Issue #1684). + + Fix contributed by Markus Ullmann. + +- Canvas: Now unrolls groups with only one task (optimization) (Issue #1656). + +- Task: Fixed problem with eta and timezones. + + Fix contributed by Alexander Koval. + +- Django: Worker now performs model validation (Issue #1681). + +- Task decorator now emits less confusing errors when used with + incorrect arguments (Issue #1692). + +- Task: New method ``Task.send_event`` can be used to send custom events + to Flower and other monitors. + +- Fixed a compatibility issue with non-abstract task classes + +- Events from clients now uses new node name format (``gen@``). + +- Fixed rare bug with Callable not being defined at interpreter shutdown + (Issue #1678). + + Fix contributed by Nick Johnson. + +- Fixed Python 2.6 compatibility (Issue #1679). + +.. _version-3.1.5: + +3.1.5 +===== +:release-date: 2013-11-21 06:20 P.M UTC +:release-by: Ask Solem + +- Now depends on :ref:`Kombu 3.0.6 `. + +- Now depends on :mod:`billiard` 3.3.0.8 + +- App: ``config_from_object`` is now lazy (Issue #1665). + +- App: ``autodiscover_tasks`` is now lazy. + + Django users should now wrap access to the settings object + in a lambda:: + + app.autodiscover_tasks(lambda: settings.INSTALLED_APPS) + + this ensures that the settings object is not prepared + prematurely. + +- Fixed regression for ``--app`` argument experienced by + some users (Issue #1653). + +- Worker: Now respects the ``--uid`` and ``--gid`` arguments + even if ``--detach`` is not enabled. + +- Beat: Now respects the ``--uid`` and ``--gid`` arguments + even if ``--detach`` is not enabled. + +- Python 3: Fixed unorderable error occuring with the worker ``-B`` + argument enabled. + +- ``celery.VERSION`` is now a named tuple. + +- ``maybe_signature(list)`` is now applied recursively (Issue #1645). + +- ``celery shell`` command: Fixed ``IPython.frontend`` deprecation warning. + +- The default app no longer includes the builtin fixups. + + This fixes a bug where ``celery multi`` would attempt + to load the Django settings module before entering + the target working directory. + +- The Django daemonization tutorial was changed. + + Users no longer have to explicitly export ``DJANGO_SETTINGS_MODULE`` + in :file:`/etc/default/celeryd` when the new project layout is used. + +- Redis result backend: expiry value can now be 0 (Issue #1661). + +- Censoring settings now accounts for non-string keys (Issue #1663). + +- App: New ``autofinalize`` option. + + Apps are automatically finalized when the task registry is accessed. + You can now disable this behavior so that an exception is raised + instead. + + Example: + + .. code-block:: python + + app = Celery(autofinalize=False) + + # raises RuntimeError + tasks = app.tasks + + @app.task + def add(x, y): + return x + y + + # raises RuntimeError + add.delay(2, 2) + + app.finalize() + # no longer raises: + tasks = app.tasks + add.delay(2, 2) + +- The worker did not send monitoring events during shutdown. + +- Worker: Mingle and gossip is now automatically disabled when + used with an unsupported transport (Issue #1664). + +- ``celery`` command: Preload options now supports + the rare ``--opt value`` format (Issue #1668). + +- ``celery`` command: Accidentally removed options + appearing before the subcommand, these are now moved to the end + instead. + +- Worker now properly responds to ``inspect stats`` commands + even if received before startup is complete (Issue #1659). + +- :signal:`task_postrun` is now sent within a finally block, to make + sure the signal is always sent. + +- Beat: Fixed syntax error in string formatting. + + Contributed by nadad. + +- Fixed typos in the documentation. + + Fixes contributed by Loic Bistuer, sunfinite. + +- Nested chains now works properly when constructed using the + ``chain`` type instead of the ``|`` operator (Issue #1656). + +.. _version-3.1.4: + +3.1.4 +===== +:release-date: 2013-11-15 11:40 P.M UTC +:release-by: Ask Solem + +- Now depends on :ref:`Kombu 3.0.5 `. + +- Now depends on :mod:`billiard` 3.3.0.7 + +- Worker accidentally set a default socket timeout of 5 seconds. + +- Django: Fixup now sets the default app so that threads will use + the same app instance (e.g. for manage.py runserver). + +- Worker: Fixed Unicode error crash at startup experienced by some users. + +- Calling ``.apply_async`` on an empty chain now works again (Issue #1650). + +- The ``celery multi show`` command now generates the same arguments + as the start command does. + +- The ``--app`` argument could end up using a module object instead + of an app instance (with a resulting crash). + +- Fixed a syntax error problem in the celerybeat init script. + + Fix contributed by Vsevolod. + +- Tests now passing on PyPy 2.1 and 2.2. + +.. _version-3.1.3: + +3.1.3 +===== +:release-date: 2013-11-13 00:55 A.M UTC +:release-by: Ask Solem + +- Fixed compatibility problem with Python 2.7.0 - 2.7.5 (Issue #1637) + + ``unpack_from`` started supporting ``memoryview`` arguments + in Python 2.7.6. + +- Worker: :option:`-B` argument accidentally closed files used + for logging. + +- Task decorated tasks now keep their docstring (Issue #1636) + +.. _version-3.1.2: + +3.1.2 +===== +:release-date: 2013-11-12 08:00 P.M UTC +:release-by: Ask Solem + +- Now depends on :mod:`billiard` 3.3.0.6 + +- No longer needs the billiard C extension to be installed. + +- The worker silently ignored task errors. + +- Django: Fixed ``ImproperlyConfigured`` error raised + when no database backend specified. + + Fix contributed by j0hnsmith + +- Prefork pool: Now using ``_multiprocessing.read`` with ``memoryview`` + if available. + +- ``close_open_fds`` now uses ``os.closerange`` if available. + +- ``get_fdmax`` now takes value from ``sysconfig`` if possible. + +.. _version-3.1.1: + +3.1.1 +===== +:release-date: 2013-11-11 06:30 P.M UTC +:release-by: Ask Solem + +- Now depends on :mod:`billiard` 3.3.0.4. + +- Python 3: Fixed compatibility issues. + +- Windows: Accidentally showed warning that the billiard C extension + was not installed (Issue #1630). + +- Django: Tutorial updated with a solution that sets a default + :envvar:`DJANGO_SETTINGS_MODULE` so that it doesn't have to be typed + in with the :program:`celery` command. + + Also fixed typos in the tutorial, and added the settings + required to use the Django database backend. + + Thanks to Chris Ward, orarbel. + +- Django: Fixed a problem when using the Django settings in Django 1.6. + +- Django: Fixup should not be applied if the django loader is active. + +- Worker: Fixed attribute error for ``human_write_stats`` when using the + compatibility prefork pool implementation. + +- Worker: Fixed compatibility with billiard without C extension. + +- Inspect.conf: Now supports a ``with_defaults`` argument. + +- Group.restore: The backend argument was not respected. + +.. _version-3.1.0: + +3.1.0 +======= +:release-date: 2013-11-09 11:00 P.M UTC +:release-by: Ask Solem + +See :ref:`whatsnew-3.1`. diff --git a/docs/history/index.rst b/docs/history/index.rst index 673532de1..cf6d0f96c 100644 --- a/docs/history/index.rst +++ b/docs/history/index.rst @@ -13,6 +13,7 @@ version please visit :ref:`changelog`. .. toctree:: :maxdepth: 2 + changelog-3.1 changelog-3.0 changelog-2.5 changelog-2.4 diff --git a/docs/images/worker_graph_full.png b/docs/images/worker_graph_full.png index 867bcfb74..ea104a53e 100644 Binary files a/docs/images/worker_graph_full.png and b/docs/images/worker_graph_full.png differ diff --git a/docs/includes/installation.txt b/docs/includes/installation.txt index 54ec954b0..25ae7eef9 100644 --- a/docs/includes/installation.txt +++ b/docs/includes/installation.txt @@ -26,11 +26,11 @@ You can specify these in your requirements or on the ``pip`` comand-line by using brackets. Multiple bundles can be specified by separating them by commas. -.. code-block:: bash +.. code-block:: console - $ pip install celery[librabbitmq] + $ pip install "celery[librabbitmq]" - $ pip install celery[librabbitmq,redis,auth,msgpack] + $ pip install "celery[librabbitmq,redis,auth,msgpack]" The following bundles are available: @@ -75,10 +75,13 @@ Transports and Backends for using Amazon SQS as a message transport (*experimental*). :celery[memcache]: - for using memcached as a result backend. + for using memcached as a result backend (using pylibmc) + +:celery[pymemcache]: + for using memcached as a result backend (pure-python implementation). :celery[cassandra]: - for using Apache Cassandra as a result backend. + for using Apache Cassandra as a result backend with DataStax driver. :celery[couchdb]: for using CouchDB as a message transport (*experimental*). @@ -86,6 +89,12 @@ Transports and Backends :celery[couchbase]: for using CouchBase as a result backend. +:celery[elasticsearch] + for using Elasticsearch as a result backend. + +:celery[riak]: + for using Riak as a result backend. + :celery[beanstalk]: for using Beanstalk as a message transport (*experimental*). diff --git a/docs/includes/introduction.txt b/docs/includes/introduction.txt index c96304ff1..2c37e4a4f 100644 --- a/docs/includes/introduction.txt +++ b/docs/includes/introduction.txt @@ -1,4 +1,4 @@ -:Version: 3.1.10 (Cipater) +:Version: 4.0.0rc1 (0today8) :Web: http://celeryproject.org/ :Download: http://pypi.python.org/pypi/celery/ :Source: http://github.com/celery/celery/ @@ -38,7 +38,7 @@ What do I need? Celery version 3.0 runs on, -- Python (2.5, 2.6, 2.7, 3.2, 3.3) +- Python (2.7, 3.4, 3.5) - PyPy (1.8, 1.9) - Jython (2.5, 2.7). @@ -133,7 +133,7 @@ It supports… - AMQP, Redis - memcached, MongoDB - SQLAlchemy, Django ORM - - Apache Cassandra, IronCache + - Apache Cassandra, IronCache, Elasticsearch - **Serialization** @@ -177,7 +177,7 @@ development easier, and sometimes they add important hooks like closing database connections at ``fork``. .. _`Django`: http://djangoproject.com/ -.. _`Pylons`: http://pylonshq.com/ +.. _`Pylons`: http://pylonsproject.org/ .. _`Flask`: http://flask.pocoo.org/ .. _`web2py`: http://web2py.com/ .. _`Bottle`: http://bottlepy.org/ diff --git a/docs/index.rst b/docs/index.rst index 86e47949b..bb0418df7 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -49,6 +49,7 @@ Contents tutorials/index faq changelog + whatsnew-4.0 whatsnew-3.1 whatsnew-3.0 whatsnew-2.5 diff --git a/docs/internals/app-overview.rst b/docs/internals/app-overview.rst index 33dd4e815..4dd82a791 100644 --- a/docs/internals/app-overview.rst +++ b/docs/internals/app-overview.rst @@ -57,8 +57,8 @@ Getting access to the configuration: .. code-block:: python - celery.conf.CELERY_ALWAYS_EAGER = True - celery.conf["CELERY_ALWAYS_EAGER"] = True + celery.conf.task_always_eager = True + celery.conf["task_always_eager"] = True Controlling workers:: @@ -98,29 +98,11 @@ Deprecations Inferior to the ping remote control command. Will be removed in Celery 2.3. -Removed deprecations -==================== - -* `celery.utils.timedelta_seconds` - Use: :func:`celery.utils.timeutils.timedelta_seconds` - -* `celery.utils.defaultdict` - Use: :func:`celery.utils.compat.defaultdict` - -* `celery.utils.all` - Use: :func:`celery.utils.compat.all` - -* `celery.task.apply_async` - Use app.send_task - -* `celery.task.tasks` - Use :data:`celery.registry.tasks` - Aliases (Pending deprecation) ============================= * celery.task.base - * .Task -> {app.create_task_cls} + * .Task -> {app.Task / :class:`celery.app.task.Task`} * celery.task.sets * .TaskSet -> {app.TaskSet} @@ -153,15 +135,15 @@ Aliases (Pending deprecation) * celery.conf.* -> {app.conf} **NOTE**: All configuration keys are now named the same - as in the configuration. So the key "CELERY_ALWAYS_EAGER" + as in the configuration. So the key "task_always_eager" is accessed as:: - >>> app.conf.CELERY_ALWAYS_EAGER + >>> app.conf.task_always_eager instead of:: >>> from celery import conf - >>> conf.ALWAYS_EAGER + >>> conf.always_eager * .get_queues -> {app.amqp.get_queues} @@ -226,7 +208,7 @@ App Dependency Tree * celery.apps.worker.Worker * celery.worker.WorkerController * celery.worker.consumer.Consumer - * celery.worker.job.TaskRequest + * celery.worker.request.Request * celery.events.EventDispatcher * celery.worker.control.ControlDispatch * celery.woker.control.registry.Panel diff --git a/docs/internals/deprecation.rst b/docs/internals/deprecation.rst index 687c5ed0c..4d0900ea6 100644 --- a/docs/internals/deprecation.rst +++ b/docs/internals/deprecation.rst @@ -7,33 +7,9 @@ .. contents:: :local: -.. _deprecations-v3.2: +.. _deprecations-v5.0: -Removals for version 3.2 -======================== - -- Module ``celery.task.trace`` has been renamed to ``celery.app.trace`` - as the ``celery.task`` package is being phased out. The compat module - will be removed in version 3.2 so please change any import from:: - - from celery.task.trace import … - - to:: - - from celery.app.trace import … - -- ``AsyncResult.serializable()`` and ``celery.result.from_serializable`` - will be removed. - - Use instead:: - - >>> tup = result.as_tuple() - >>> from celery.result import result_from_tuple - >>> result = result_from_tuple(tup) - -.. _deprecations-v4.0: - -Removals for version 4.0 +Removals for version 5.0 ======================== Old Task API @@ -92,47 +68,6 @@ on the class, but have to instantiate the task first:: >>> MyTask().delay() # WORKS! -TaskSet -~~~~~~~ - -TaskSet has been renamed to group and TaskSet will be removed in version 4.0. - -Old:: - - >>> from celery.task import TaskSet - - >>> TaskSet(add.subtask((i, i)) for i in xrange(10)).apply_async() - -New:: - - >>> from celery import group - >>> group(add.s(i, i) for i in xrange(10))() - - -Magic keyword arguments -~~~~~~~~~~~~~~~~~~~~~~~ - -The magic keyword arguments accepted by tasks will be removed -in 4.0, so you should start rewriting any tasks -using the ``celery.decorators`` module and depending -on keyword arguments being passed to the task, -for example:: - - from celery.decorators import task - - @task() - def add(x, y, task_id=None): - print("My task id is %r" % (task_id, )) - -should be rewritten into:: - - from celery import task - - @task(bind=True) - def add(self, x, y): - print("My task id is {0.request.id}".format(self)) - - Task attributes --------------- @@ -145,42 +80,7 @@ The task attributes: - ``delivery_mode`` - ``priority`` -is deprecated and must be set by :setting:`CELERY_ROUTES` instead. - -:mod:`celery.result` --------------------- - -- ``BaseAsyncResult`` -> ``AsyncResult``. - -- ``TaskSetResult`` -> ``GroupResult``. - -- ``TaskSetResult.total`` -> ``len(GroupResult)`` - -- ``TaskSetResult.taskset_id`` -> ``GroupResult.id`` - -Apply to: :class:`~celery.result.AsyncResult`, -:class:`~celery.result.EagerResult`:: - -- ``Result.wait()`` -> ``Result.get()`` - -- ``Result.task_id()`` -> ``Result.id`` - -- ``Result.status`` -> ``Result.state``. - -:mod:`celery.loader` --------------------- - -- ``current_loader()`` -> ``current_app.loader`` - -- ``load_settings()`` -> ``current_app.conf`` - - -Task_sent signal ----------------- - -The :signal:`task_sent` signal will be removed in version 4.0. -Please use the :signal:`before_task_publish` and :signal:`after_task_publush` -signals instead. +is deprecated and must be set by :setting:`task_routes` instead. Modules to Remove @@ -228,55 +128,64 @@ Settings ===================================== ===================================== **Setting name** **Replace with** ===================================== ===================================== -``BROKER_HOST`` :setting:`BROKER_URL` -``BROKER_PORT`` :setting:`BROKER_URL` -``BROKER_USER`` :setting:`BROKER_URL` -``BROKER_PASSWORD`` :setting:`BROKER_URL` -``BROKER_VHOST`` :setting:`BROKER_URL` +``BROKER_HOST`` :setting:`broker_url` +``BROKER_PORT`` :setting:`broker_url` +``BROKER_USER`` :setting:`broker_url` +``BROKER_PASSWORD`` :setting:`broker_url` +``BROKER_VHOST`` :setting:`broker_url` ===================================== ===================================== - ``REDIS`` Result Backend Settings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ===================================== ===================================== **Setting name** **Replace with** ===================================== ===================================== -``CELERY_REDIS_HOST`` :setting:`CELERY_RESULT_BACKEND` -``CELERY_REDIS_PORT`` :setting:`CELERY_RESULT_BACKEND` -``CELERY_REDIS_DB`` :setting:`CELERY_RESULT_BACKEND` -``CELERY_REDIS_PASSWORD`` :setting:`CELERY_RESULT_BACKEND` -``REDIS_HOST`` :setting:`CELERY_RESULT_BACKEND` -``REDIS_PORT`` :setting:`CELERY_RESULT_BACKEND` -``REDIS_DB`` :setting:`CELERY_RESULT_BACKEND` -``REDIS_PASSWORD`` :setting:`CELERY_RESULT_BACKEND` +``CELERY_REDIS_HOST`` :setting:`result_backend` +``CELERY_REDIS_PORT`` :setting:`result_backend` +``CELERY_REDIS_DB`` :setting:`result_backend` +``CELERY_REDIS_PASSWORD`` :setting:`result_backend` +``REDIS_HOST`` :setting:`result_backend` +``REDIS_PORT`` :setting:`result_backend` +``REDIS_DB`` :setting:`result_backend` +``REDIS_PASSWORD`` :setting:`result_backend` ===================================== ===================================== -Logging Settings -~~~~~~~~~~~~~~~~ -===================================== ===================================== -**Setting name** **Replace with** -===================================== ===================================== -``CELERYD_LOG_LEVEL`` :option:`--loglevel` -``CELERYD_LOG_FILE`` :option:`--logfile`` -``CELERYBEAT_LOG_LEVEL`` :option:`--loglevel` -``CELERYBEAT_LOG_FILE`` :option:`--loglevel`` -``CELERYMON_LOG_LEVEL`` :option:`--loglevel` -``CELERYMON_LOG_FILE`` :option:`--loglevel`` -===================================== ===================================== +Task_sent signal +---------------- + +The :signal:`task_sent` signal will be removed in version 4.0. +Please use the :signal:`before_task_publish` and :signal:`after_task_publish` +signals instead. -Other Settings -~~~~~~~~~~~~~~ +Result +------ + +Apply to: :class:`~celery.result.AsyncResult`, +:class:`~celery.result.EagerResult`:: + +- ``Result.wait()`` -> ``Result.get()`` + +- ``Result.task_id()`` -> ``Result.id`` + +- ``Result.status`` -> ``Result.state``. + +.. _deprecations-v3.1: + + +Settings +~~~~~~~~ ===================================== ===================================== **Setting name** **Replace with** ===================================== ===================================== ``CELERY_TASK_ERROR_WITELIST`` Annotate ``Task.ErrorMail`` -``CELERY_AMQP_TASK_RESULT_EXPIRES`` :setting:`CELERY_TASK_RESULT_EXPIRES` +``CELERY_AMQP_TASK_RESULT_EXPIRES`` :setting:`result_expires` ===================================== ===================================== + .. _deprecations-v2.0: Removals for version 2.0 @@ -287,12 +196,12 @@ Removals for version 2.0 ===================================== ===================================== **Setting name** **Replace with** ===================================== ===================================== -`CELERY_AMQP_CONSUMER_QUEUES` `CELERY_QUEUES` -`CELERY_AMQP_CONSUMER_QUEUES` `CELERY_QUEUES` -`CELERY_AMQP_EXCHANGE` `CELERY_DEFAULT_EXCHANGE` -`CELERY_AMQP_EXCHANGE_TYPE` `CELERY_DEFAULT_AMQP_EXCHANGE_TYPE` -`CELERY_AMQP_CONSUMER_ROUTING_KEY` `CELERY_QUEUES` -`CELERY_AMQP_PUBLISHER_ROUTING_KEY` `CELERY_DEFAULT_ROUTING_KEY` +`CELERY_AMQP_CONSUMER_QUEUES` `task_queues` +`CELERY_AMQP_CONSUMER_QUEUES` `task_queues` +`CELERY_AMQP_EXCHANGE` `task_default_exchange` +`CELERY_AMQP_EXCHANGE_TYPE` `task_default_exchange_type` +`CELERY_AMQP_CONSUMER_ROUTING_KEY` `task_queues` +`CELERY_AMQP_PUBLISHER_ROUTING_KEY` `task_default_routing_key` ===================================== ===================================== * :envvar:`CELERY_LOADER` definitions without class name. @@ -303,4 +212,4 @@ Removals for version 2.0 * :meth:`TaskSet.run`. Use :meth:`celery.task.base.TaskSet.apply_async` instead. -* The module :mod:`celery.task.rest`; use :mod:`celery.task.http` instead. +* The module :mod:`celery.task.rest`; use :mod:`celery.task.httpY` instead. diff --git a/docs/internals/guide.rst b/docs/internals/guide.rst index 941f7b11d..ae35f6347 100644 --- a/docs/internals/guide.rst +++ b/docs/internals/guide.rst @@ -64,7 +64,7 @@ Naming Sometimes it makes sense to have a class mask as a function, and there is precedence for this in the stdlib (e.g. :class:`~contextlib.contextmanager`). Celery examples include - :class:`~celery.subtask`, :class:`~celery.chord`, + :class:`~celery.signature`, :class:`~celery.chord`, ``inspect``, :class:`~kombu.utils.functional.promise` and more.. - Factory functions and methods must be `CamelCase` (excluding verbs): @@ -108,7 +108,7 @@ A subclass can change the default value: and the value can be set at instantiation: -.. code-block:: python +.. code-block:: pycon >>> producer = TaskProducer(serializer='msgpack') @@ -305,3 +305,30 @@ Module Overview - celery.contrib Additional public code that doesn't fit into any other namespace. + +Worker overview +=============== + +* `celery.bin.worker:Worker` + + This is the command-line interface to the worker. + + Responsibilities: + * Daemonization when `--detach` set, + * dropping privileges when using `--uid`/`--gid` arguments + * Installs "concurrency patches" (eventlet/gevent monkey patches). + + ``app.worker_main(argv)`` calls + ``instantiate('celery.bin.worker:Worker')(app).execute_from_commandline(argv)`` + +* `app.Worker` -> `celery.apps.worker:Worker` + + Responsibilities: + * sets up logging and redirects stdouts + * installs signal handlers (`TERM`/`HUP`/`STOP`/`USR1` (cry)/`USR2` (rdb)) + * prints banner and warnings (e.g. pickle warning) + * handles the ``--purge`` argument + +* `app.WorkController` -> `celery.worker.WorkController` + + This is the real worker, built up around bootsteps. diff --git a/docs/internals/index.rst b/docs/internals/index.rst index d10ed013e..4521a22fa 100644 --- a/docs/internals/index.rst +++ b/docs/internals/index.rst @@ -14,6 +14,5 @@ deprecation worker protocol - protov2 app-overview reference/index diff --git a/docs/internals/protocol.rst b/docs/internals/protocol.rst index f80e6e8ff..8a6922d65 100644 --- a/docs/internals/protocol.rst +++ b/docs/internals/protocol.rst @@ -1,16 +1,192 @@ +.. _message-protocol: + +=================== + Message Protocol +=================== + +.. contents:: + :local: + +.. _message-protocol-task: .. _internals-task-message-protocol: +Task messages +============= + +.. _message-protocol-task-v2: + +Version 2 +--------- + +Definition +~~~~~~~~~~ + +.. code-block:: python + + properties = { + 'correlation_id': uuid task_id, + 'content_type': string mimetype, + 'content_encoding': string encoding, + + # optional + 'reply_to': string queue_or_url, + } + headers = { + 'lang': string 'py' + 'task': string task, + 'id': uuid task_id, + 'root_id': uuid root_id, + 'parent_id': uuid parent_id, + 'group': uuid group_id, + + # optional + 'meth': string method_name, + 'shadow': string alias_name, + 'eta': iso8601 eta, + 'expires'; iso8601 expires, + 'retries': int retries, + 'timelimit': (soft, hard), + 'argsrepr': str repr(args), + 'kwargsrepr': str repr(kwargs), + 'origin': str nodename, + } + + body = ( + object[] args, + Mapping kwargs, + Mapping embed { + 'callbacks': Signature[] callbacks, + 'errbacks': Signature[] errbacks, + 'chain': Signature[] chain, + 'chord': Signature chord_callback, + } + ) + +Example +~~~~~~~ + +This example sends a task message using version 2 of the protocol: + +.. code-block:: python + + # chain: add(add(add(2, 2), 4), 8) == 2 + 2 + 4 + 8 + + import json + import os + import socket + + task_id = uuid() + args = (2, 2) + kwargs = {} + basic_publish( + message=json.dumps((args, kwargs, None), + application_headers={ + 'lang': 'py', + 'task': 'proj.tasks.add', + 'argsrepr': repr(args), + 'kwargsrepr': repr(kwargs), + 'origin': '@'.join([os.getpid(), socket.gethostname()]) + } + properties={ + 'correlation_id': task_id, + 'content_type': 'application/json', + 'content_encoding': 'utf-8', + } + ) + +Changes from version 1 +~~~~~~~~~~~~~~~~~~~~~~ + +- Protocol version detected by the presence of a ``task`` message header. + +- Support for multiple languages via the ``lang`` header. + + Worker may redirect the message to a worker that supports + the language. + +- Metadata moved to headers. + + This means that workers/intermediates can inspect the message + and make decisions based on the headers without decoding + the payload (which may be language specific, e.g. serialized by the + Python specific pickle serializer). + +- Always UTC + + There's no ``utc`` flag anymore, so any time information missing timezone + will be expected to be in UTC time. + +- Body is only for language specific data. + + - Python stores args/kwargs and embedded signatures in body. + + - If a message uses raw encoding then the raw data + will be passed as a single argument to the function. + + - Java/C, etc. can use a thrift/protobuf document as the body + +- Dispatches to actor based on ``task``, ``meth`` headers + + ``meth`` is unused by python, but may be used in the future + to specify class+method pairs. + +- Chain gains a dedicated field. + + Reducing the chain into a recursive ``callbacks`` argument + causes problems when the recursion limit is exceeded. + + This is fixed in the new message protocol by specifying + a list of signatures, each task will then pop a task off the list + when sending the next message: + + .. code-block:: python + + execute_task(message) + chain = embed['chain'] + if chain: + sig = maybe_signature(chain.pop()) + sig.apply_async(chain=chain) + +- ``correlation_id`` replaces ``task_id`` field. + +- ``root_id`` and ``parent_id`` fields helps keep track of workflows. + +- ``shadow`` lets you specify a different name for logs, monitors + can be used for e.g. meta tasks that calls any function: + + .. code-block:: python + + from celery.utils.imports import qualname + + class PickleTask(Task): + abstract = True + + def unpack_args(self, fun, args=()): + return fun, args + + def apply_async(self, args, kwargs, **options): + fun, real_args = self.unpack_args(*args) + return super(PickleTask, self).apply_async( + (fun, real_args, kwargs), shadow=qualname(fun), **options + ) + + @app.task(base=PickleTask) + def call(fun, args, kwargs): + return fun(*args, **kwargs) + + +.. _message-protocol-task-v1: .. _task-message-protocol-v1: -======================= - Task Messages -======================= +Version 1 +--------- -.. contents:: - :local: +In version 1 of the protocol all fields are stored in the message body, +which means workers and intermediate consumers must deserialize the payload +to read the fields. -Message format -============== +Message body +~~~~~~~~~~~~ * task :`string`: @@ -56,22 +232,13 @@ Message format will be expired when the message is received and the expiration date has been exceeded. -Extensions -========== - -Extensions are additional keys in the message body that the worker may or -may not support. If the worker finds an extension key it doesn't support -it should optimally reject the message so another worker gets a chance -to process it. - - * taskset :`string`: The taskset this task is part of (if any). * chord - :`subtask`: + :`Signature`: .. versionadded:: 2.3 @@ -88,18 +255,18 @@ to process it. should be used. * callbacks - :`subtask`: + :`Signature`: .. versionadded:: 3.0 - A list of subtasks to apply if the task exited successfully. + A list of signatures to call if the task exited successfully. * errbacks - :`subtask`: + :`Signature`: .. versionadded:: 3.0 - A list of subtasks to apply if an error occurs while executing the task. + A list of signatures to call if an error occurs while executing the task. * timelimit :`(float, float)`: @@ -116,9 +283,9 @@ to process it. Example message -=============== +~~~~~~~~~~~~~~~ -This is an example invocation of the `celery.task.PingTask` task in JSON +This is an example invocation of a `celery.task.ping` task in JSON format: .. code-block:: javascript @@ -130,8 +297,8 @@ format: "retries": 0, "eta": "2009-11-17T12:30:56.527191"} -Serialization -============= +Task Serialization +------------------ Several types of serialization formats are supported using the `content_type` message header. @@ -146,3 +313,82 @@ The MIME-types supported by default are shown in the following table. pickle application/x-python-serialize msgpack application/x-msgpack =============== ================================= + +.. _message-protocol-event: + +Event Messages +============== + +Event messages are always JSON serialized and can contain arbitrary message +body fields. + +Since version 4.0. the body can consist of either a single mapping (one event), +or a list of mappings (multiple events). + +There are also standard fields that must always be present in an event +message: + +Standard body fields +-------------------- + +- *string* ``type`` + + The type of event. This is a string containing the *category* and + *action* separated by a dash delimeter (e.g. ``task-succeeded``). + +- *string* ``hostname`` + + The fully qualified hostname of where the event occurred at. + +- *unsigned long long* ``clock`` + + The logical clock value for this event (Lamport timestamp). + +- *float* ``timestamp`` + + The UNIX timestamp corresponding to the time of when the event occurred. + +- *signed short* ``utcoffset`` + + This field describes the timezone of the originating host, and is + specified as the number of hours ahead of/behind UTC. E.g. ``-2`` or + ``+1``. + +- *unsigned long long* ``pid`` + + The process id of the process the event originated in. + +Standard event types +-------------------- + +For a list of standard event types and their fields see the +:ref:`event-reference`. + +Example message +--------------- + +This is the message fields for a ``task-succeeded`` event: + +.. code-block:: python + + properties = { + 'routing_key': 'task.succeeded', + 'exchange': 'celeryev', + 'content_type': 'application/json', + 'content_encoding': 'utf-8', + 'delivery_mode': 1, + } + headers = { + 'hostname': 'worker1@george.vandelay.com', + } + body = { + 'type': 'task-succeeded', + 'hostname': 'worker1@george.vandelay.com', + 'pid': 6335, + 'clock': 393912923921, + 'timestamp': 1401717709.101747, + 'utcoffset': -1, + 'uuid': '9011d855-fdd1-4f8f-adb3-a413b499eafb', + 'retval': '4', + 'runtime': 0.0003212, + ) diff --git a/docs/internals/protov2.rst b/docs/internals/protov2.rst deleted file mode 100644 index e0bb1ff89..000000000 --- a/docs/internals/protov2.rst +++ /dev/null @@ -1,146 +0,0 @@ -.. _protov2draft: - -======================================== - Task Message Protocol v2 (Draft Spec.) -======================================== - -Notes -===== - -- Support for multiple languages via the ``lang`` header. - - Worker may redirect the message to a worker that supports - the language. - -- Metadata moved to headers. - - This means that workers/intermediates can inspect the message - and make decisions based on the headers without decoding - the payload (which may be language specific, e.g. serialized by the - Python specific pickle serializer). - -- Body is only for language specific data. - - - Python stores args/kwargs in body. - - - If a message uses raw encoding then the raw data - will be passed as a single argument to the function. - - - Java/C, etc. can use a thrift/protobuf document as the body - -- Dispatches to actor based on ``c_type``, ``c_meth`` headers - - ``c_meth`` is unused by python, but may be used in the future - to specify class+method pairs. - -- Chain gains a dedicated field. - - Reducing the chain into a recursive ``callbacks`` argument - causes problems when the recursion limit is exceeded. - - This is fixed in the new message protocol by specifying - a list of signatures, each task will then pop a task off the list - when sending the next message:: - - execute_task(message) - chain = message.headers['chain'] - if chain: - sig = maybe_signature(chain.pop()) - sig.apply_async(chain=chain) - -- ``correlation_id`` replaces ``task_id`` field. - - -- ``c_shadow`` lets you specify a different name for logs, monitors - can be used for e.g. meta tasks that calls any function:: - - from celery.utils.imports import qualname - - class PickleTask(Task): - abstract = True - - def unpack_args(self, fun, args=()): - return fun, args - - def apply_async(self, args, kwargs, **options): - fun, real_args = self.unpack_args(*args) - return super(PickleTask, self).apply_async( - (fun, real_args, kwargs), shadow=qualname(fun), **options - ) - - @app.task(base=PickleTask) - def call(fun, args, kwargs): - return fun(*args, **kwargs) - - - -Undecided ---------- - -- May consider moving callbacks/errbacks/chain into body. - - Will huge lists in headers cause overhead? - The downside of keeping them in the body is that intermediates - won't be able to introspect these values. - -Definition -========== - -.. code-block:: python - - # protocol v2 implies UTC=True - # 'class' header existing means protocol is v2 - - properties = { - 'correlation_id': (uuid)task_id, - 'content_type': (string)mime, - 'content_encoding': (string)encoding, - - # optional - 'reply_to': (string)queue_or_url, - } - headers = { - 'lang': (string)'py' - 'c_type': (string)task, - - # optional - 'c_meth': (string)unused, - 'c_shadow': (string)replace_name, - 'eta': (iso8601)eta, - 'expires'; (iso8601)expires, - 'callbacks': (list)Signature, - 'errbacks': (list)Signature, - 'chain': (list)Signature, # non-recursive, reversed list of signatures - 'group': (uuid)group_id, - 'chord': (uuid)chord_id, - 'retries': (int)retries, - 'timelimit': (tuple)(soft, hard), - } - - body = (args, kwargs) - -Example -======= - -.. code-block:: python - - # chain: add(add(add(2, 2), 4), 8) == 2 + 2 + 4 + 8 - - task_id = uuid() - basic_publish( - message=json.dumps([[2, 2], {}]), - application_headers={ - 'lang': 'py', - 'c_type': 'proj.tasks.add', - 'chain': [ - # reversed chain list - {'task': 'proj.tasks.add', 'args': (8, )}, - {'task': 'proj.tasks.add', 'args': (4, )}, - ] - } - properties={ - 'correlation_id': task_id, - 'content_type': 'application/json', - 'content_encoding': 'utf-8', - } - ) diff --git a/docs/internals/reference/celery.backends.couchdb.rst b/docs/internals/reference/celery.backends.couchdb.rst new file mode 100644 index 000000000..bd836abc4 --- /dev/null +++ b/docs/internals/reference/celery.backends.couchdb.rst @@ -0,0 +1,11 @@ +=========================================== + celery.backends.couchdb +=========================================== + +.. contents:: + :local: +.. currentmodule:: celery.backends.couchdb + +.. automodule:: celery.backends.couchdb + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.backends.elasticsearch.txt b/docs/internals/reference/celery.backends.elasticsearch.txt new file mode 100644 index 000000000..ae06fa19f --- /dev/null +++ b/docs/internals/reference/celery.backends.elasticsearch.txt @@ -0,0 +1,11 @@ +=========================================== + celery.backends.elasticsearch +=========================================== + +.. contents:: + :local: +.. currentmodule:: celery.backends.elasticsearch + +.. automodule:: celery.backends.elasticsearch + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.backends.filesystem.rst b/docs/internals/reference/celery.backends.filesystem.rst new file mode 100644 index 000000000..c5560d6b8 --- /dev/null +++ b/docs/internals/reference/celery.backends.filesystem.rst @@ -0,0 +1,11 @@ +========================================== + celery.backends.filesystem +========================================== + +.. contents:: + :local: +.. currentmodule:: celery.backends.filesystem + +.. automodule:: celery.backends.filesystem + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.backends.riak.rst b/docs/internals/reference/celery.backends.riak.rst new file mode 100644 index 000000000..edbdb1c2d --- /dev/null +++ b/docs/internals/reference/celery.backends.riak.rst @@ -0,0 +1,11 @@ +=========================================== + celery.backends.riak +=========================================== + +.. contents:: + :local: +.. currentmodule:: celery.backends.riak + +.. automodule:: celery.backends.riak + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.utils.abstract.rst b/docs/internals/reference/celery.utils.abstract.rst new file mode 100644 index 000000000..70ec49749 --- /dev/null +++ b/docs/internals/reference/celery.utils.abstract.rst @@ -0,0 +1,11 @@ +=========================================== + celery.utils.abstract +=========================================== + +.. contents:: + :local: +.. currentmodule:: celery.utils.abstract + +.. automodule:: celery.utils.abstract + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.utils.compat.rst b/docs/internals/reference/celery.utils.compat.rst deleted file mode 100644 index 851851f09..000000000 --- a/docs/internals/reference/celery.utils.compat.rst +++ /dev/null @@ -1,11 +0,0 @@ -============================================ - celery.utils.compat -============================================ - -.. contents:: - :local: -.. currentmodule:: celery.utils.compat - -.. automodule:: celery.utils.compat - :members: - :undoc-members: diff --git a/docs/internals/reference/celery.utils.saferepr.rst b/docs/internals/reference/celery.utils.saferepr.rst new file mode 100644 index 000000000..e01790857 --- /dev/null +++ b/docs/internals/reference/celery.utils.saferepr.rst @@ -0,0 +1,11 @@ +=========================================== + celery.utils.saferepr +=========================================== + +.. contents:: + :local: +.. currentmodule:: celery.utils.saferepr + +.. automodule:: celery.utils.saferepr + :members: + :undoc-members: diff --git a/docs/internals/reference/index.rst b/docs/internals/reference/index.rst index 31b606139..864f7fd70 100644 --- a/docs/internals/reference/index.rst +++ b/docs/internals/reference/index.rst @@ -26,12 +26,16 @@ celery.backends.base celery.backends.rpc celery.backends.database - celery.backends.cache celery.backends.amqp + celery.backends.cache + celery.backends.couchdb celery.backends.mongodb + celery.backends.elasticsearch celery.backends.redis + celery.backends.riak celery.backends.cassandra celery.backends.couchbase + celery.backends.filesystem celery.app.trace celery.app.annotations celery.app.routes @@ -46,12 +50,13 @@ celery.backends.database.models celery.backends.database.session celery.utils + celery.utils.abstract celery.utils.functional celery.utils.objects celery.utils.term celery.utils.timeutils celery.utils.iso8601 - celery.utils.compat + celery.utils.saferepr celery.utils.serialization celery.utils.sysinfo celery.utils.threads diff --git a/docs/internals/worker.rst b/docs/internals/worker.rst index 30eb64185..c1695cb48 100644 --- a/docs/internals/worker.rst +++ b/docs/internals/worker.rst @@ -35,7 +35,7 @@ Receives messages from the broker using `Kombu`_. .. _`Kombu`: http://pypi.python.org/pypi/kombu When a message is received it's converted into a -:class:`celery.worker.job.TaskRequest` object. +:class:`celery.worker.request.Request` object. Tasks with an ETA, or rate-limit are entered into the `timer`, messages that can be immediately processed are sent to the execution pool. diff --git a/docs/reference/celery.app.amqp.rst b/docs/reference/celery.app.amqp.rst index 467552820..5257acdbf 100644 --- a/docs/reference/celery.app.amqp.rst +++ b/docs/reference/celery.app.amqp.rst @@ -17,7 +17,11 @@ .. attribute:: Consumer - Base Consumer class used. Default is :class:`kombu.compat.Consumer`. + Base Consumer class used. Default is :class:`kombu.Consumer`. + + .. attribute:: Producer + + Base Producer class used. Default is :class:`kombu.Producer`. .. attribute:: queues @@ -25,13 +29,13 @@ .. automethod:: Queues .. automethod:: Router - .. autoattribute:: TaskConsumer - .. autoattribute:: TaskProducer .. automethod:: flush_routes + .. autoattribute:: create_task_message + .. autoattribute:: send_task_message .. autoattribute:: default_queue .. autoattribute:: default_exchange - .. autoattribute:: publisher_pool + .. autoattribute:: producer_pool .. autoattribute:: router .. autoattribute:: routes @@ -41,10 +45,3 @@ .. autoclass:: Queues :members: :undoc-members: - - TaskPublisher - ------------- - - .. autoclass:: TaskPublisher - :members: - :undoc-members: diff --git a/docs/reference/celery.bin.logtool.rst b/docs/reference/celery.bin.logtool.rst new file mode 100644 index 000000000..3242835ce --- /dev/null +++ b/docs/reference/celery.bin.logtool.rst @@ -0,0 +1,11 @@ +===================================================== + celery.bin.logtool +===================================================== + +.. contents:: + :local: +.. currentmodule:: celery.bin.logtool + +.. automodule:: celery.bin.logtool + :members: + :undoc-members: diff --git a/docs/reference/celery.contrib.methods.rst b/docs/reference/celery.contrib.methods.rst deleted file mode 100644 index 539234e1f..000000000 --- a/docs/reference/celery.contrib.methods.rst +++ /dev/null @@ -1,5 +0,0 @@ -.. currentmodule:: celery.contrib.methods - -.. automodule:: celery.contrib.methods - :members: - :undoc-members: diff --git a/docs/reference/celery.rst b/docs/reference/celery.rst index d87cfdca1..4890bfdce 100644 --- a/docs/reference/celery.rst +++ b/docs/reference/celery.rst @@ -29,366 +29,90 @@ and creating Celery applications. .. versionadded:: 2.5 -.. class:: Celery(main='__main__', broker='amqp://localhost//', …) +.. autoclass:: Celery - :param main: Name of the main module if running as `__main__`. - This is used as a prefix for task names. - :keyword broker: URL of the default broker used. - :keyword loader: The loader class, or the name of the loader class to use. - Default is :class:`celery.loaders.app.AppLoader`. - :keyword backend: The result store backend class, or the name of the - backend class to use. Default is the value of the - :setting:`CELERY_RESULT_BACKEND` setting. - :keyword amqp: AMQP object or class name. - :keyword events: Events object or class name. - :keyword log: Log object or class name. - :keyword control: Control object or class name. - :keyword set_as_current: Make this the global current app. - :keyword tasks: A task registry or the name of a registry class. - :keyword include: List of modules every worker should import. - :keyword fixups: List of fixup plug-ins (see e.g. - :mod:`celery.fixups.django`). - :keyword autofinalize: If set to False a :exc:`RuntimeError` - will be raised if the task registry or tasks are used before - the app is finalized. - .. attribute:: Celery.main + .. autoattribute:: user_options - Name of the `__main__` module. Required for standalone scripts. + .. autoattribute:: steps - If set this will be used instead of `__main__` when automatically - generating task names. + .. autoattribute:: current_task - .. attribute:: Celery.conf + .. autoattribute:: amqp - Current configuration. + .. autoattribute:: backend - .. attribute:: user_options + .. autoattribute:: loader - Custom options for command-line programs. - See :ref:`extending-commandoptions` + .. autoattribute:: control + .. autoattribute:: events + .. autoattribute:: log + .. autoattribute:: tasks + .. autoattribute:: pool + .. autoattribute:: Task + .. autoattribute:: timezone - .. attribute:: steps + .. automethod:: close - Custom bootsteps to extend and modify the worker. - See :ref:`extending-bootsteps`. + .. automethod:: signature - .. attribute:: Celery.current_task + .. automethod:: bugreport - The instance of the task that is being executed, or :const:`None`. + .. automethod:: config_from_object - .. attribute:: Celery.amqp + .. automethod:: config_from_envvar - AMQP related functionality: :class:`~@amqp`. + .. automethod:: autodiscover_tasks - .. attribute:: Celery.backend + .. automethod:: add_defaults - Current backend instance. + .. automethod:: setup_security - .. attribute:: Celery.loader + .. automethod:: start - Current loader instance. + .. automethod:: task - .. attribute:: Celery.control + .. automethod:: send_task - Remote control: :class:`~@control`. + .. autoattribute:: AsyncResult - .. attribute:: Celery.events + .. autoattribute:: GroupResult - Consuming and sending events: :class:`~@events`. + .. automethod:: worker_main - .. attribute:: Celery.log + .. autoattribute:: Worker - Logging: :class:`~@log`. + .. autoattribute:: WorkController - .. attribute:: Celery.tasks + .. autoattribute:: Beat - Task registry. + .. automethod:: connection - Accessing this attribute will also finalize the app. + .. automethod:: connection_or_acquire - .. attribute:: Celery.pool + .. automethod:: producer_or_acquire - Broker connection pool: :class:`~@pool`. - This attribute is not related to the workers concurrency pool. + .. automethod:: mail_admins - .. attribute:: Celery.Task + .. automethod:: select_queues - Base task class for this app. + .. automethod:: now - .. attribute:: Celery.timezone + .. automethod:: set_current - Current timezone for this app. - This is a cached property taking the time zone from the - :setting:`CELERY_TIMEZONE` setting. + .. automethod:: finalize - .. method:: Celery.close + .. data:: on_configure - Close any open pool connections and do any other steps necessary - to clean up after the application. + Signal sent when app is loading configuration. - Only necessary for dynamically created apps for which you can - use the with statement instead:: + .. data:: on_after_configure - with Celery(set_as_current=False) as app: - with app.connection() as conn: - pass + Signal sent after app has prepared the configuration. - .. method:: Celery.signature + .. data:: on_after_finalize - Return a new :class:`~celery.canvas.Signature` bound to this app. - See :meth:`~celery.signature` - - .. method:: Celery.bugreport - - Return a string with information useful for the Celery core - developers when reporting a bug. - - .. method:: Celery.config_from_object(obj, silent=False, force=False) - - Reads configuration from object, where object is either - an object or the name of a module to import. - - :keyword silent: If true then import errors will be ignored. - - :keyword force: Force reading configuration immediately. - By default the configuration will be read only when required. - - .. code-block:: python - - >>> celery.config_from_object("myapp.celeryconfig") - - >>> from myapp import celeryconfig - >>> celery.config_from_object(celeryconfig) - - .. method:: Celery.config_from_envvar(variable_name, - silent=False, force=False) - - Read configuration from environment variable. - - The value of the environment variable must be the name - of a module to import. - - .. code-block:: python - - >>> os.environ["CELERY_CONFIG_MODULE"] = "myapp.celeryconfig" - >>> celery.config_from_envvar("CELERY_CONFIG_MODULE") - - .. method:: Celery.autodiscover_tasks(packages, related_name="tasks") - - With a list of packages, try to import modules of a specific name (by - default 'tasks'). - - For example if you have an (imagined) directory tree like this:: - - foo/__init__.py - tasks.py - models.py - - bar/__init__.py - tasks.py - models.py - - baz/__init__.py - models.py - - Then calling ``app.autodiscover_tasks(['foo', bar', 'baz'])`` will - result in the modules ``foo.tasks`` and ``bar.tasks`` being imported. - - :param packages: List of packages to search. - This argument may also be a callable, in which case the - value returned is used (for lazy evaluation). - - :keyword related_name: The name of the module to find. Defaults - to "tasks", which means it look for "module.tasks" for every - module in ``packages``. - :keyword force: By default this call is lazy so that the actual - autodiscovery will not happen until an application imports the - default modules. Forcing will cause the autodiscovery to happen - immediately. - - - .. method:: Celery.add_defaults(d) - - Add default configuration from dict ``d``. - - If the argument is a callable function then it will be regarded - as a promise, and it won't be loaded until the configuration is - actually needed. - - This method can be compared to:: - - >>> celery.conf.update(d) - - with a difference that 1) no copy will be made and 2) the dict will - not be transferred when the worker spawns child processes, so - it's important that the same configuration happens at import time - when pickle restores the object on the other side. - - .. method:: Celery.setup_security(…) - - Setup the message-signing serializer. - This will affect all application instances (a global operation). - - Disables untrusted serializers and if configured to use the ``auth`` - serializer will register the auth serializer with the provided settings - into the Kombu serializer registry. - - :keyword allowed_serializers: List of serializer names, or content_types - that should be exempt from being disabled. - :keyword key: Name of private key file to use. - Defaults to the :setting:`CELERY_SECURITY_KEY` setting. - :keyword cert: Name of certificate file to use. - Defaults to the :setting:`CELERY_SECURITY_CERTIFICATE` setting. - :keyword store: Directory containing certificates. - Defaults to the :setting:`CELERY_SECURITY_CERT_STORE` setting. - :keyword digest: Digest algorithm used when signing messages. - Default is ``sha1``. - :keyword serializer: Serializer used to encode messages after - they have been signed. See :setting:`CELERY_TASK_SERIALIZER` for - the serializers supported. - Default is ``json``. - - .. method:: Celery.start(argv=None) - - Run :program:`celery` using `argv`. - - Uses :data:`sys.argv` if `argv` is not specified. - - .. method:: Celery.task(fun, …) - - Decorator to create a task class out of any callable. - - Examples: - - .. code-block:: python - - @app.task - def refresh_feed(url): - return … - - with setting extra options: - - .. code-block:: python - - @app.task(exchange="feeds") - def refresh_feed(url): - return … - - .. admonition:: App Binding - - For custom apps the task decorator will return a proxy - object, so that the act of creating the task is not performed - until the task is used or the task registry is accessed. - - If you are depending on binding to be deferred, then you must - not access any attributes on the returned object until the - application is fully set up (finalized). - - - .. method:: Celery.send_task(name[, args[, kwargs[, …]]]) - - Send task by name. - - :param name: Name of task to call (e.g. `"tasks.add"`). - :keyword result_cls: Specify custom result class. Default is - using :meth:`AsyncResult`. - - Otherwise supports the same arguments as :meth:`@-Task.apply_async`. - - .. attribute:: Celery.AsyncResult - - Create new result instance. See :class:`~celery.result.AsyncResult`. - - .. attribute:: Celery.GroupResult - - Create new group result instance. - See :class:`~celery.result.GroupResult`. - - .. method:: Celery.worker_main(argv=None) - - Run :program:`celery worker` using `argv`. - - Uses :data:`sys.argv` if `argv` is not specified. - - .. attribute:: Celery.Worker - - Worker application. See :class:`~@Worker`. - - .. attribute:: Celery.WorkController - - Embeddable worker. See :class:`~@WorkController`. - - .. attribute:: Celery.Beat - - Celerybeat scheduler application. - See :class:`~@Beat`. - - .. method:: Celery.connection(url=default, [ssl, [transport_options={}]]) - - Establish a connection to the message broker. - - :param url: Either the URL or the hostname of the broker to use. - - :keyword hostname: URL, Hostname/IP-address of the broker. - If an URL is used, then the other argument below will - be taken from the URL instead. - :keyword userid: Username to authenticate as. - :keyword password: Password to authenticate with - :keyword virtual_host: Virtual host to use (domain). - :keyword port: Port to connect to. - :keyword ssl: Defaults to the :setting:`BROKER_USE_SSL` setting. - :keyword transport: defaults to the :setting:`BROKER_TRANSPORT` - setting. - - :returns :class:`kombu.Connection`: - - .. method:: Celery.connection_or_acquire(connection=None) - - For use within a with-statement to get a connection from the pool - if one is not already provided. - - :keyword connection: If not provided, then a connection will be - acquired from the connection pool. - - .. method:: Celery.producer_or_acquire(producer=None) - - For use within a with-statement to get a producer from the pool - if one is not already provided - - :keyword producer: If not provided, then a producer will be - acquired from the producer pool. - - .. method:: Celery.mail_admins(subject, body, fail_silently=False) - - Sends an email to the admins in the :setting:`ADMINS` setting. - - .. method:: Celery.select_queues(queues=[]) - - Select a subset of queues, where queues must be a list of queue - names to keep. - - .. method:: Celery.now() - - Return the current time and date as a :class:`~datetime.datetime` - object. - - .. method:: Celery.set_current() - - Makes this the current app for this thread. - - .. method:: Celery.finalize() - - Finalizes the app by loading built-in tasks, - and evaluating pending task decorators - - .. method:: Celery.on_configure() - - Optional callback for when the first time the configured is required. - - .. attribute:: Celery.Pickler - - Helper class used to pickle this application. + Signal sent after app has been finalized. Canvas primitives ----------------- @@ -408,7 +132,7 @@ See :ref:`guide-canvas` for more about creating task workflows. A group is lazy so you must call it to take action and evaluate the group. - Will return a `group` task that when called will then call of the + Will return a `group` task that when called will then call all of the tasks in the group (and return a :class:`GroupResult` instance that can be used to inspect the state of the group). @@ -462,7 +186,7 @@ See :ref:`guide-canvas` for more about creating task workflows. Signatures can also be created from tasks:: - >>> add.subtask(args=(), kwargs={}, options={}) + >>> add.signature(args=(), kwargs={}, options={}) or the ``.s()`` shortcut:: diff --git a/docs/reference/celery.worker.job.rst b/docs/reference/celery.worker.request.rst similarity index 57% rename from docs/reference/celery.worker.job.rst rename to docs/reference/celery.worker.request.rst index 36fc1a7b3..8821d6bef 100644 --- a/docs/reference/celery.worker.job.rst +++ b/docs/reference/celery.worker.request.rst @@ -1,11 +1,11 @@ ===================================== - celery.worker.job + celery.worker.request ===================================== .. contents:: :local: -.. currentmodule:: celery.worker.job +.. currentmodule:: celery.worker.request -.. automodule:: celery.worker.job +.. automodule:: celery.worker.request :members: :undoc-members: diff --git a/docs/reference/index.rst b/docs/reference/index.rst index 5f1c72a08..2f104e89c 100644 --- a/docs/reference/index.rst +++ b/docs/reference/index.rst @@ -39,7 +39,6 @@ celery.contrib.migrate celery.contrib.sphinx celery.contrib.rdb - celery.contrib.methods celery.events celery.events.state celery.beat @@ -47,7 +46,7 @@ celery.apps.beat celery.worker celery.worker.consumer - celery.worker.job + celery.worker.request celery.worker.state celery.worker.strategy celery.bin.base @@ -55,6 +54,7 @@ celery.bin.worker celery.bin.beat celery.bin.events + celery.bin.logtool celery.bin.amqp celery.bin.multi celery.bin.graph diff --git a/docs/sec/CELERYSA-0002.txt b/docs/sec/CELERYSA-0002.txt new file mode 100644 index 000000000..7938da59c --- /dev/null +++ b/docs/sec/CELERYSA-0002.txt @@ -0,0 +1,90 @@ +========================================= + CELERYSA-0002: Celery Security Advisory +========================================= +:contact: security@celeryproject.org +:CVE id: TBA +:date: 2014-07-10 05:00:00 P.M UTC + +Details +======= + +:package: celery +:vulnerability: Environment error +:problem type: local +:risk: low +:versions-affected: 2.5, 3.0, 3.1 + +Description +=========== + +The built-in utility used to daemonize the Celery worker service sets +an insecure umask by default (umask 0). + +This means that any files or directories created by the worker will +end up having world-writable permissions. + +In practice this means that local users will be able to modify and possibly +corrupt the files created by user tasks. + +This is not immediately exploitable but can be if those files are later +evaluated as a program, for example a task that creates Python program files +that are later executed. + +Patches are now available for all maintained versions (see below), +and users are urged to upgrade, even if not directly +affected. + +Acknowledgements +================ + +Special thanks to Red Hat for originally discovering and reporting the issue. + +Systems affected +================ + +Users of Celery versions 3.0, and 3.1, except the recently +released 3.1.13, are affected if daemonizing the +Celery programs using the `--detach` argument or using the `celery multi` program +to start workers in the background, without setting a custom `--umask` +argument. + +Solution +======== + +NOTE: + Not all users of Celery will use it to create files, but if you do + then files may already have been created with insecure permissions. + + So after upgrading, or using the workaround, then please make sure + that files already created are not world writable. + +To work around the issue you can set a custom umask using the ``--umask`` +argument: + + $ celery worker -l info --detach --umask=18 # (022) + +Or you can upgrade to a more recent version: + +- Users of the 3.1 series should upgrade to 3.1.13: + + * ``pip install -U celery``, or + * ``easy_install -U celery``, or + * http://pypi.python.org/pypi/celery/3.1.13 + +- Users of the 3.0 series should upgrade to 3.0.25: + + * ``pip install -U celery==3.0.25``, or + * ``easy_install -U celery==3.0.25``, or + * http://pypi.python.org/pypi/celery/3.0.25 + +Distribution package maintainers are urged to provide their users +with updated packages. + +Please direct questions to the celery-users mailing-list: +http://groups.google.com/group/celery-users/, + +or if you are planning to report a new security related issue we request that +you keep the information confidential by contacting +security@celeryproject.org instead. + +Thank you! diff --git a/docs/tutorials/daemonizing.rst b/docs/tutorials/daemonizing.rst index 0c644584c..9895338e0 100644 --- a/docs/tutorials/daemonizing.rst +++ b/docs/tutorials/daemonizing.rst @@ -52,13 +52,17 @@ must also export them (e.g. ``export DISPLAY=":0"``) instead they can use the :program:`celery multi` utility (or :program:`celery worker --detach`): - .. code-block:: bash + .. code-block:: console $ celery multi start worker1 \ + -A proj \ --pidfile="$HOME/run/celery/%n.pid" \ - --logfile="$HOME/log/celery/%n.log" + --logfile="$HOME/log/celery/%n%I.log" - $ celery multi restart worker1 --pidfile="$HOME/run/celery/%n.pid" + $ celery multi restart worker1 \ + -A proj \ + --logfile="$HOME/log/celery/%n%I.log" \ + --pidfile="$HOME/run/celery/%n.pid $ celery multi stopwait worker1 --pidfile="$HOME/run/celery/%n.pid" @@ -74,11 +78,13 @@ This is an example configuration for a Python project. .. code-block:: bash # Names of nodes to start - # most will only start one node: + # most people will only start one node: CELERYD_NODES="worker1" # but you can also start multiple and configure settings - # for each in CELERYD_OPTS (see `celery multi --help` for examples). - CELERYD_NODES="worker1 worker2 worker3" + # for each in CELERYD_OPTS (see `celery multi --help` for examples): + #CELERYD_NODES="worker1 worker2 worker3" + # alternatively, you can specify the number of nodes to start: + #CELERYD_NODES=10 # Absolute or relative path to the 'celery' command: CELERY_BIN="/usr/local/bin/celery" @@ -96,9 +102,12 @@ This is an example configuration for a Python project. # Extra command-line arguments to the worker CELERYD_OPTS="--time-limit=300 --concurrency=8" - # %N will be replaced with the first part of the nodename. - CELERYD_LOG_FILE="/var/log/celery/%N.log" - CELERYD_PID_FILE="/var/run/celery/%N.pid" + # Set logging level to DEBUG + #CELERYD_LOG_LEVEL="DEBUG" + + # %n will be replaced with the first part of the nodename. + CELERYD_LOG_FILE="/var/log/celery/%n%I.log" + CELERYD_PID_FILE="/var/run/celery/%n.pid" # Workers should run as an unprivileged user. # You need to create this user manually (or you can choose @@ -110,6 +119,19 @@ This is an example configuration for a Python project. # and owned by the userid/group configured. CELERY_CREATE_DIRS=1 +Using a login shell +~~~~~~~~~~~~~~~~~~~ + +You can inherit the environment of the ``CELERYD_USER`` by using a login +shell: + +.. code-block:: bash + + CELERYD_SU_ARGS="-l" + +Note that this is not recommended, and that you should only use this option +when absolutely necessary. + .. _generic-initd-celeryd-django-example: Example Django configuration @@ -153,10 +175,12 @@ Available options directory. * CELERYD_PID_FILE - Full path to the PID file. Default is /var/run/celery/%N.pid + Full path to the PID file. Default is /var/run/celery/%n.pid * CELERYD_LOG_FILE - Full path to the worker log file. Default is /var/log/celery/%N.log + Full path to the worker log file. Default is /var/log/celery/%n%I.log + **Note**: Using `%I` is important when using the prefork pool as having + multiple processes share the same log file will lead to race conditions. * CELERYD_LOG_LEVEL Worker log level. Default is INFO. @@ -211,7 +235,7 @@ This is an example configuration for a Python project: CELERYBEAT_CHDIR="/opt/Myproject/" # Extra arguments to celerybeat - CELERYBEAT_OPTS="--schedule=/var/run/celerybeat-schedule" + CELERYBEAT_OPTS="--schedule=/var/run/celery/celerybeat-schedule" .. _generic-initd-celerybeat-django-example: @@ -265,7 +289,7 @@ Available options * CELERY_CREATE_LOGDIR Always create logfile directory. By default only enable when no custom logfile location set. - + .. _daemon-systemd-generic: Usage systemd @@ -279,10 +303,10 @@ Service file: celery.service :Usage: `systemctl {start|stop|restart|status} celery.service` :Configuration file: /etc/conf.d/celery -To create a temporary folders for the log and pid files change user and group in +To create a temporary folders for the log and pid files change user and group in /usr/lib/tmpfiles.d/celery.conf. -To configure user, group, chdir change settings User, Group and WorkingDirectory defines -in /usr/lib/systemd/system/celery.service. +To configure user, group, chdir change settings User, Group and WorkingDirectory defines +in /usr/lib/systemd/system/celery.service. .. _generic-systemd-celery-example: @@ -311,9 +335,11 @@ This is an example configuration for a Python project: # Extra command-line arguments to the worker CELERYD_OPTS="--time-limit=300 --concurrency=8" - # %N will be replaced with the first part of the nodename. - CELERYD_LOG_FILE="/var/log/celery/%N.log" - CELERYD_PID_FILE="/var/run/celery/%N.pid" + # - %n will be replaced with the first part of the nodename. + # - %I will be replaced with the current child process index + # and is important when using the prefork pool to avoid race conditions. + CELERYD_LOG_FILE="/var/log/celery/%n%I.log" + CELERYD_PID_FILE="/var/run/celery/%n.pid" .. _generic-systemd-celeryd-django-example: @@ -339,9 +365,10 @@ This is an example configuration for those using `django-celery`: # Extra command-line arguments to the worker CELERYD_OPTS="--time-limit=300 --concurrency=8" - # %N will be replaced with the first part of the nodename. - CELERYD_LOG_FILE="/var/log/celery/%N.log" - CELERYD_PID_FILE="/var/run/celery/%N.pid" + # - %n will be replaced with the first part of the nodename. + # - %I will be replaced with the current child process index + CELERYD_LOG_FILE="/var/log/celery/%n%I.log" + CELERYD_PID_FILE="/var/run/celery/%n.pid" To add an environment variable such as DJANGO_SETTINGS_MODULE use the Environment in celery.service. @@ -354,7 +381,7 @@ Troubleshooting If you can't get the init scripts to work, you should try running them in *verbose mode*: -.. code-block:: bash +.. code-block:: console # sh -x /etc/init.d/celeryd start @@ -367,9 +394,9 @@ not be able to see them anywhere. For this situation you can use the :envvar:`C_FAKEFORK` environment variable to skip the daemonization step: -.. code-block:: bash +.. code-block:: console - C_FAKEFORK=1 sh -x /etc/init.d/celeryd start + # C_FAKEFORK=1 sh -x /etc/init.d/celeryd start and now you should be able to see the errors. @@ -410,9 +437,3 @@ Windows See this excellent external tutorial: http://www.calazan.com/windows-tip-run-applications-in-the-background-using-task-scheduler/ - -CentOS -====== -In CentOS we can take advantage of built-in service helpers, such as the -pid-based status checker function in ``/etc/init.d/functions``. -See the sample script in http://github.com/celery/celery/tree/3.1/extra/centos/. diff --git a/docs/tutorials/debugging.rst b/docs/tutorials/debugging.rst index 7eb8e5cc9..942d565d8 100644 --- a/docs/tutorials/debugging.rst +++ b/docs/tutorials/debugging.rst @@ -52,7 +52,7 @@ information:: If you telnet the port specified you will be presented with a `pdb` shell: -.. code-block:: bash +.. code-block:: console $ telnet localhost 6900 Connected to localhost. diff --git a/docs/tutorials/task-cookbook.rst b/docs/tutorials/task-cookbook.rst index ad772a751..d5bde5f26 100644 --- a/docs/tutorials/task-cookbook.rst +++ b/docs/tutorials/task-cookbook.rst @@ -23,41 +23,51 @@ a Django model called `Feed`. We ensure that it's not possible for two or more workers to import the same feed at the same time by setting a cache key consisting of the MD5 checksum of the feed URL. -The cache key expires after some time in case something unexpected happens -(you never know, right?) +The cache key expires after some time in case something unexpected happens, +and something always will... + +For this reason your tasks runtime should not exceeed the timeout. + .. code-block:: python from celery import task + from celery.five import monotonic from celery.utils.log import get_task_logger + from contextlib import contextmanager from django.core.cache import cache - from django.utils.hashcompat import md5_constructor as md5 + from hashlib import md5 from djangofeeds.models import Feed logger = get_task_logger(__name__) - LOCK_EXPIRE = 60 * 5 # Lock expires in 5 minutes - - @task - def import_feed(feed_url): + LOCK_EXPIRE = 60 * 10 # Lock expires in 10 minutes + + @contextmanager + def memcache_lock(lock_id, oid): + timeout_at = monotonic() + LOCK_EXPIRE - 3 + # cache.add fails if the key already exists + status = cache.add(lock_id, oid, LOCK_EXPIRE) + try: + yield status + finally: + # memcache delete is very slow, but we have to use it to take + # advantage of using add() for atomic locking + if monotonic() < timeout_at: + # do not release the lock if we exceeded the timeout + # to lessen the chance of releasing an expired lock + # owned by someone else. + cache.delete(lock_id) + + @task(bind=True) + def import_feed(self, feed_url): # The cache key consists of the task name and the MD5 digest # of the feed URL. - feed_url_digest = md5(feed_url).hexdigest() + feed_url_hexdigest = md5(feed_url).hexdigest() lock_id = '{0}-lock-{1}'.format(self.name, feed_url_hexdigest) - - # cache.add fails if if the key already exists - acquire_lock = lambda: cache.add(lock_id, 'true', LOCK_EXPIRE) - # memcache delete is very slow, but we have to use it to take - # advantage of using add() for atomic locking - release_lock = lambda: cache.delete(lock_id) - logger.debug('Importing feed: %s', feed_url) - if acquire_lock(): - try: - feed = Feed.objects.import_feed(feed_url) - finally: - release_lock() - return feed.url - + with memcache_lock(lock_id, self.app.oid) as acquired: + if acquired: + return Feed.objects.import_feed(feed_url).url logger.debug( 'Feed %s is already being imported by another worker', feed_url) diff --git a/docs/userguide/application.rst b/docs/userguide/application.rst index 4ebc142ef..5cff4a2bc 100644 --- a/docs/userguide/application.rst +++ b/docs/userguide/application.rst @@ -12,12 +12,12 @@ The Celery library must be instantiated before use, this instance is called an application (or *app* for short). The application is thread-safe so that multiple Celery applications -with different configuration, components and tasks can co-exist in the +with different configurations, components and tasks can co-exist in the same process space. Let's create one now: -.. code-block:: python +.. code-block:: pycon >>> from celery import Celery >>> app = Celery() @@ -32,18 +32,18 @@ current main module (``__main__``), and the memory address of the object Main Name ========= -Only one of these is important, and that is the main module name, -let's look at why that is. +Only one of these is important, and that is the main module name. +Let's look at why that is. When you send a task message in Celery, that message will not contain any source code, but only the name of the task you want to execute. -This works similarly to how host names works on the internet: every worker +This works similarly to how host names work on the internet: every worker maintains a mapping of task names to their actual functions, called the *task registry*. Whenever you define a task, that task will also be added to the local registry: -.. code-block:: python +.. code-block:: pycon >>> @app.task ... def add(x, y): @@ -67,7 +67,8 @@ This is only a problem in a limited set of use cases: #. If the module that the task is defined in is run as a program. #. If the application is created in the Python shell (REPL). -For example here, where the tasks module is also used to start a worker: +For example here, where the tasks module is also used to start a worker +with :meth:`@worker_main`: :file:`tasks.py`: @@ -92,7 +93,7 @@ the tasks will be named starting with "``tasks``" (the real name of the module): You can specify another name for the main module: -.. code-block:: python +.. code-block:: pycon >>> app = Celery('tasks') >>> app.main @@ -114,20 +115,20 @@ There are several options you can set that will change how Celery works. These options can be set directly on the app instance, or you can use a dedicated configuration module. -The configuration is available as :attr:`@Celery.conf`:: +The configuration is available as :attr:`@conf`:: - >>> app.conf.CELERY_TIMEZONE + >>> app.conf.timezone 'Europe/London' where you can also set configuration values directly:: - >>> app.conf.CELERY_ENABLE_UTC = True + >>> app.conf.enable_utc = True and update several keys at once by using the ``update`` method:: >>> app.conf.update( - ... CELERY_ENABLE_UTC=True, - ... CELERY_TIMEZONE='Europe/London', + ... enable_utc=True, + ... timezone='Europe/London', ...) The configuration object consists of multiple dictionaries @@ -137,7 +138,7 @@ that are consulted in order: #. The configuration module (if any) #. The default configuration (:mod:`celery.app.defaults`). -You can even add new default sources by using the :meth:`@Celery.add_defaults` +You can even add new default sources by using the :meth:`@add_defaults` method. .. seealso:: @@ -148,13 +149,13 @@ method. ``config_from_object`` ---------------------- -The :meth:`@Celery.config_from_object` method loads configuration +The :meth:`@config_from_object` method loads configuration from a configuration object. This can be a configuration module, or any object with configuration attributes. -Note that any configuration that was previous set will be reset when -:meth:`~@Celery.config_from_object` is called. If you want to set additional +Note that any configuration that was previously set will be reset when +:meth:`~@config_from_object` is called. If you want to set additional configuration you should do so after. Example 1: Using the name of a module @@ -174,15 +175,15 @@ The ``celeryconfig`` module may then look like this: .. code-block:: python - CELERY_ENABLE_UTC = True - CELERY_TIMEZONE = 'Europe/London' + enable_utc = True + timezone = 'Europe/London' Example 2: Using a configuration module ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. tip:: - Using the name of a module is recomended + Using the name of a module is recommended as this means that the module doesn't need to be serialized when the prefork pool is used. If you're experiencing configuration pickle errors then please try using @@ -206,8 +207,8 @@ Example 3: Using a configuration class/object app = Celery() class Config: - CELERY_ENABLE_UTC = True - CELERY_TIMEZONE = 'Europe/London' + enable_utc = True + timezone = 'Europe/London' app.config_from_object(Config) # or using the fully qualified name of the object: @@ -216,7 +217,7 @@ Example 3: Using a configuration class/object ``config_from_envvar`` ---------------------- -The :meth:`@Celery.config_from_envvar` takes the configuration module name +The :meth:`@config_from_envvar` takes the configuration module name from an environment variable For example -- to load configuration from a module specified in the @@ -235,7 +236,7 @@ environment variable named :envvar:`CELERY_CONFIG_MODULE`: You can then specify the configuration module to use via the environment: -.. code-block:: bash +.. code-block:: console $ CELERY_CONFIG_MODULE="celeryconfig.prod" celery worker -l info @@ -251,7 +252,7 @@ passwords and API keys. Celery comes with several utilities used for presenting the configuration, one is :meth:`~celery.app.utils.Settings.humanize`: -.. code-block:: python +.. code-block:: pycon >>> app.conf.humanize(with_defaults=False, censored=True) @@ -262,7 +263,7 @@ default keys and values by changing the ``with_defaults`` argument. If you instead want to work with the configuration as a dictionary, then you can use the :meth:`~celery.app.utils.Settings.table` method: -.. code-block:: python +.. code-block:: pycon >>> app.conf.table(with_defaults=False, censored=True) @@ -288,9 +289,9 @@ Creating a :class:`@Celery` instance will only do the following: #. Create the task registry. #. Set itself as the current app (but not if the ``set_as_current`` argument was disabled) - #. Call the :meth:`@Celery.on_init` callback (does nothing by default). + #. Call the :meth:`@on_init` callback (does nothing by default). -The :meth:`~@Celery.task` decorator does not actually create the +The :meth:`@task` decorator does not actually create the tasks at the point when it's called, instead it will defer the creation of the task to happen either when the task is used, or after the application has been *finalized*, @@ -298,7 +299,7 @@ application has been *finalized*, This example shows how the task is not created until you use the task, or access an attribute (in this case :meth:`repr`): -.. code-block:: python +.. code-block:: pycon >>> @app.task >>> def add(x, y): @@ -317,7 +318,7 @@ you use the task, or access an attribute (in this case :meth:`repr`): True *Finalization* of the app happens either explicitly by calling -:meth:`@Celery.finalize` -- or implicitly by accessing the :attr:`~@Celery.tasks` +:meth:`@finalize` -- or implicitly by accessing the :attr:`@tasks` attribute. Finalizing the object will: @@ -332,7 +333,7 @@ Finalizing the object will: #. Make sure all tasks are bound to the current app. - Tasks are bound to apps so that it can read default + Tasks are bound to an app so that they can read default values from the configuration. .. _default-app: @@ -409,7 +410,7 @@ In development you can set the :envvar:`CELERY_TRACE_APP` environment variable to raise an exception if the app chain breaks: -.. code-block:: bash +.. code-block:: console $ CELERY_TRACE_APP=1 celery worker -l info @@ -422,14 +423,14 @@ chain breaks: For example, in the beginning it was possible to use any callable as a task: - .. code-block:: python + .. code-block:: pycon def hello(to): return 'hello {0}'.format(to) >>> from celery.execute import apply_async - >>> apply_async(hello, ('world!', )) + >>> apply_async(hello, ('world!',)) or you could also create a ``Task`` class to set certain options, or override other behavior @@ -464,8 +465,8 @@ chain breaks: Abstract Tasks ============== -All tasks created using the :meth:`~@Celery.task` decorator -will inherit from the applications base :attr:`~@Celery.Task` class. +All tasks created using the :meth:`~@task` decorator +will inherit from the application's base :attr:`~@Task` class. You can specify a different base class with the ``base`` argument: @@ -504,9 +505,9 @@ Once a task is bound to an app it will read configuration to set default values and so on. It's also possible to change the default base class for an application -by changing its :meth:`@Celery.Task` attribute: +by changing its :meth:`@Task` attribute: -.. code-block:: python +.. code-block:: pycon >>> from celery import Celery, Task @@ -520,7 +521,7 @@ by changing its :meth:`@Celery.Task` attribute: >>> app.Task - >>> @x.task + >>> @app.task ... def add(x, y): ... return x + y diff --git a/docs/userguide/calling.rst b/docs/userguide/calling.rst index 5d2150cbb..f7ce4352e 100644 --- a/docs/userguide/calling.rst +++ b/docs/userguide/calling.rst @@ -39,15 +39,16 @@ The API defines a standard set of execution options, as well as three methods: .. topic:: Quick Cheat Sheet - ``T.delay(arg, kwarg=value)`` - always a shortcut to ``.apply_async``. + Star arguments shortcut to ``.apply_async``. + (``.delay(*args, **kwargs)`` calls ``.apply_async(args, kwargs)``). - - ``T.apply_async((arg, ), {'kwarg': value})`` + - ``T.apply_async((arg,), {'kwarg': value})`` - ``T.apply_async(countdown=10)`` executes 10 seconds from now. - ``T.apply_async(eta=now + timedelta(seconds=10))`` - executes 10 seconds from now, specifed using ``eta`` + executes 10 seconds from now, specified using ``eta`` - ``T.apply_async(countdown=60, expires=120)`` executes in one minute from now, but expires after 2 minutes. @@ -95,7 +96,7 @@ called `add`, returning the sum of two arguments: .. topic:: There's another way… You will learn more about this later while reading about the :ref:`Canvas - `, but :class:`~celery.subtask`'s are objects used to pass around + `, but :class:`~celery.signature`'s are objects used to pass around the signature of a task invocation, (for example to send it over the network), and they also support the Calling API: @@ -118,8 +119,8 @@ as a partial argument: .. sidebar:: What is ``s``? - The ``add.s`` call used here is called a subtask, I talk - more about subtasks in the :ref:`canvas guide `, + The ``add.s`` call used here is called a signature, I talk + more about signatures in the :ref:`canvas guide `, where you can also learn about :class:`~celery.chain`, which is a simpler way to chain tasks together. @@ -160,7 +161,9 @@ option: In addition, both the ``link`` and ``link_error`` options can be expressed -as a list:: +as a list: + +.. code-block:: python add.apply_async((2, 2), link=[add.s(16), other_task.s()]) @@ -177,7 +180,7 @@ The ETA (estimated time of arrival) lets you set a specific date and time that is the earliest time at which your task will be executed. `countdown` is a shortcut to set eta by seconds into the future. -.. code-block:: python +.. code-block:: pycon >>> result = add.apply_async((2, 2), countdown=3) >>> result.get() # this takes at least 3 seconds to return @@ -195,7 +198,7 @@ While `countdown` is an integer, `eta` must be a :class:`~datetime.datetime` object, specifying an exact date and time (including millisecond precision, and timezone information): -.. code-block:: python +.. code-block:: pycon >>> from datetime import datetime, timedelta @@ -211,7 +214,7 @@ The `expires` argument defines an optional expiry time, either as seconds after task publish, or a specific date and time using :class:`~datetime.datetime`: -.. code-block:: python +.. code-block:: pycon >>> # Task expires after one minute from now. >>> add.apply_async((10, 10), expires=60) @@ -245,8 +248,8 @@ To disable retry you can set the ``retry`` execution option to :const:`False`: .. hlist:: :columns: 2 - - :setting:`CELERY_TASK_PUBLISH_RETRY` - - :setting:`CELERY_TASK_PUBLISH_RETRY_POLICY` + - :setting:`task_publish_retry` + - :setting:`task_publish_retry_policy` Retry Policy ------------ @@ -313,12 +316,17 @@ so every message in Celery has a ``content_type`` header that describes the serialization method used to encode it. The default serializer is :mod:`pickle`, but you can -change this using the :setting:`CELERY_TASK_SERIALIZER` setting, +change this using the :setting:`task_serializer` setting, or for each individual task, or even per message. There's built-in support for :mod:`pickle`, `JSON`, `YAML` and `msgpack`, and you can also add your own custom serializers by registering -them into the Kombu serializer registry (see ref:`kombu:guide-serialization`). +them into the Kombu serializer registry + +.. seealso:: + + :ref:`Message Serialization ` in the Kombu user + guide. Each option has its advantages and disadvantages. @@ -375,12 +383,12 @@ to use when sending a task: 1. The `serializer` execution option. 2. The :attr:`@-Task.serializer` attribute - 3. The :setting:`CELERY_TASK_SERIALIZER` setting. + 3. The :setting:`task_serializer` setting. Example setting a custom serializer for a single task invocation: -.. code-block:: python +.. code-block:: pycon >>> add.apply_async((10, 10), serializer='json') @@ -398,7 +406,7 @@ to use when sending a task: 1. The `compression` execution option. 2. The :attr:`@-Task.compression` attribute. - 3. The :setting:`CELERY_MESSAGE_COMPRESSION` attribute. + 3. The :setting:`task_compression` attribute. Example specifying the compression used when calling a task:: @@ -417,7 +425,7 @@ Connections The connection pool is enabled by default since version 2.5. - See the :setting:`BROKER_POOL_LIMIT` setting for more information. + See the :setting:`broker_pool_limit` setting for more information. You can handle the connection manually by creating a publisher: @@ -437,12 +445,12 @@ publisher: Though this particular example is much better expressed as a group: -.. code-block:: python +.. code-block:: pycon >>> from celery import group >>> numbers = [(2, 2), (4, 4), (8, 8), (16, 16)] - >>> res = group(add.subtask(n) for i in numbers).apply_async() + >>> res = group(add.s(i, j) for i, j in numbers).apply_async() >>> res.get() [4, 8, 16, 32] @@ -461,14 +469,14 @@ Simple routing (name <-> name) is accomplished using the ``queue`` option:: You can then assign workers to the ``priority.high`` queue by using the workers :option:`-Q` argument: -.. code-block:: bash +.. code-block:: console - $ celery worker -l info -Q celery,priority.high + $ celery -A proj worker -l info -Q celery,priority.high .. seealso:: Hard-coding queue names in code is not recommended, the best practice - is to use configuration routers (:setting:`CELERY_ROUTES`). + is to use configuration routers (:setting:`task_routes`). To find out more about routing, please see :ref:`guide-routing`. @@ -490,6 +498,6 @@ AMQP's full routing capabilities. Interested parties may read the - priority - A number between `0` and `9`, where `0` is the highest priority. + A number between `0` and `255`, where `255` is the highest priority. - Supported by: redis, beanstalk + Supported by: rabbitmq, redis (priority reversed, 0 is highest), beanstalk diff --git a/docs/userguide/canvas.rst b/docs/userguide/canvas.rst index 0afff4dbf..97cb06449 100644 --- a/docs/userguide/canvas.rst +++ b/docs/userguide/canvas.rst @@ -26,10 +26,9 @@ A :func:`~celery.signature` wraps the arguments, keyword arguments, and executio of a single task invocation in a way such that it can be passed to functions or even serialized and sent across the wire. -Signatures are often nicknamed "subtasks" because they describe a task to be called -within a task. +- You can create a signature for the ``add`` task using its name like this: -- You can create a signature for the ``add`` task using its name like this:: + .. code-block:: pycon >>> from celery import signature >>> signature('tasks.add', args=(2, 2), countdown=10) @@ -38,24 +37,32 @@ within a task. This task has a signature of arity 2 (two arguments): ``(2, 2)``, and sets the countdown execution option to 10. -- or you can create one using the task's ``subtask`` method:: +- or you can create one using the task's ``signature`` method: - >>> add.subtask((2, 2), countdown=10) + .. code-block:: pycon + + >>> add.signature((2, 2), countdown=10) tasks.add(2, 2) -- There is also a shortcut using star arguments:: +- There is also a shortcut using star arguments: + + .. code-block:: pycon >>> add.s(2, 2) tasks.add(2, 2) -- Keyword arguments are also supported:: +- Keyword arguments are also supported: + + .. code-block:: pycon >>> add.s(2, 2, debug=True) tasks.add(2, 2, debug=True) -- From any signature instance you can inspect the different fields:: +- From any signature instance you can inspect the different fields: - >>> s = add.subtask((2, 2), {'debug': True}, countdown=10) + .. code-block:: pycon + + >>> s = add.signature((2, 2), {'debug': True}, countdown=10) >>> s.args (2, 2) >>> s.kwargs @@ -66,42 +73,55 @@ within a task. - It supports the "Calling API" which means it supports ``delay`` and ``apply_async`` or being called directly. - Calling the signature will execute the task inline in the current process:: + Calling the signature will execute the task inline in the current process: + + .. code-block:: pycon >>> add(2, 2) 4 >>> add.s(2, 2)() 4 - ``delay`` is our beloved shortcut to ``apply_async`` taking star-arguments:: + ``delay`` is our beloved shortcut to ``apply_async`` taking star-arguments: + + .. code-block:: pycon >>> result = add.delay(2, 2) >>> result.get() 4 - ``apply_async`` takes the same arguments as the :meth:`Task.apply_async <@Task.apply_async>` method:: + ``apply_async`` takes the same arguments as the + :meth:`Task.apply_async <@Task.apply_async>` method: + + .. code-block:: pycon >>> add.apply_async(args, kwargs, **options) - >>> add.subtask(args, kwargs, **options).apply_async() + >>> add.signature(args, kwargs, **options).apply_async() >>> add.apply_async((2, 2), countdown=1) - >>> add.subtask((2, 2), countdown=1).apply_async() + >>> add.signature((2, 2), countdown=1).apply_async() - You can't define options with :meth:`~@Task.s`, but a chaining - ``set`` call takes care of that:: + ``set`` call takes care of that: + + .. code-block:: pycon - >>> add.s(2, 2).set(countdown=1) - proj.tasks.add(2, 2) + >>> add.s(2, 2).set(countdown=1) + proj.tasks.add(2, 2) Partials -------- -With a signature, you can execute the task in a worker:: +With a signature, you can execute the task in a worker: + +.. code-block:: pycon >>> add.s(2, 2).delay() >>> add.s(2, 2).apply_async(countdown=1) -Or you can call it directly in the current process:: +Or you can call it directly in the current process: + +.. code-block:: pycon >>> add.s(2, 2)() 4 @@ -109,32 +129,40 @@ Or you can call it directly in the current process:: Specifying additional args, kwargs or options to ``apply_async``/``delay`` creates partials: -- Any arguments added will be prepended to the args in the signature:: +- Any arguments added will be prepended to the args in the signature: - >>> partial = add.s(2) # incomplete signature - >>> partial.delay(4) # 2 + 4 - >>> partial.apply_async((4, )) # same + .. code-block:: pycon + + >>> partial = add.s(2) # incomplete signature + >>> partial.delay(4) # 4 + 2 + >>> partial.apply_async((4,)) # same - Any keyword arguments added will be merged with the kwargs in the signature, - with the new keyword arguments taking precedence:: + with the new keyword arguments taking precedence: - >>> s = add.s(2, 2) - >>> s.delay(debug=True) # -> add(2, 2, debug=True) - >>> s.apply_async(kwargs={'debug': True}) # same + .. code-block:: pycon + + >>> s = add.s(2, 2) + >>> s.delay(debug=True) # -> add(2, 2, debug=True) + >>> s.apply_async(kwargs={'debug': True}) # same - Any options added will be merged with the options in the signature, - with the new options taking precedence:: + with the new options taking precedence: + + .. code-block:: pycon + + >>> s = add.signature((2, 2), countdown=10) + >>> s.apply_async(countdown=1) # countdown is now 1 - >>> s = add.subtask((2, 2), countdown=10) - >>> s.apply_async(countdown=1) # countdown is now 1 +You can also clone signatures to create derivatives: -You can also clone signatures to create derivates: +.. code-block:: pycon >>> s = add.s(2) proj.tasks.add(2) - >>> s.clone(args=(4, ), kwargs={'debug': True}) - proj.tasks.add(2, 4, debug=True) + >>> s.clone(args=(4,), kwargs={'debug': True}) + proj.tasks.add(4, 2, debug=True) Immutability ------------ @@ -145,11 +173,15 @@ Partials are meant to be used with callbacks, any tasks linked or chord callbacks will be applied with the result of the parent task. Sometimes you want to specify a callback that does not take additional arguments, and in that case you can set the signature -to be immutable:: +to be immutable: - >>> add.apply_async((2, 2), link=reset_buffers.subtask(immutable=True)) +.. code-block:: pycon -The ``.si()`` shortcut can also be used to create immutable signatures:: + >>> add.apply_async((2, 2), link=reset_buffers.signature(immutable=True)) + +The ``.si()`` shortcut can also be used to create immutable signatures: + +.. code-block:: pycon >>> add.apply_async((2, 2), link=reset_buffers.si()) @@ -160,7 +192,9 @@ so it's not possible to call the signature with partial args/kwargs. In this tutorial I sometimes use the prefix operator `~` to signatures. You probably shouldn't use it in your production code, but it's a handy shortcut - when experimenting in the Python shell:: + when experimenting in the Python shell: + + .. code-block:: pycon >>> ~sig @@ -176,7 +210,9 @@ Callbacks .. versionadded:: 3.0 Callbacks can be added to any task using the ``link`` argument -to ``apply_async``:: +to ``apply_async``: + +.. code-block:: pycon add.apply_async((2, 2), link=other_task.s()) @@ -186,18 +222,24 @@ and it will be applied with the return value of the parent task as argument. As I mentioned earlier, any arguments you add to a signature, will be prepended to the arguments specified by the signature itself! -If you have the signature:: +If you have the signature: + +.. code-block:: pycon >>> sig = add.s(10) -then `sig.delay(result)` becomes:: +then `sig.delay(result)` becomes: + +.. code-block:: pycon >>> add.apply_async(args=(result, 10)) ... Now let's call our ``add`` task with a callback using partial -arguments:: +arguments: + +.. code-block:: pycon >>> add.apply_async((2, 2), link=add.s(8)) @@ -233,7 +275,9 @@ The Primitives a temporary task where a list of arguments is applied to the task. E.g. ``task.map([1, 2])`` results in a single task being called, applying the arguments in order to the task function so - that the result is:: + that the result is: + + .. code-block:: python res = [task(1), task(2)] @@ -241,13 +285,17 @@ The Primitives Works exactly like map except the arguments are applied as ``*args``. For example ``add.starmap([(2, 2), (4, 4)])`` results in a single - task calling:: + task calling: + + .. code-block:: python res = [add(2, 2), add(4, 4)] - ``chunks`` - Chunking splits a long list of arguments into parts, e.g the operation:: + Chunking splits a long list of arguments into parts, e.g the operation: + + .. code-block:: pycon >>> items = zip(xrange(1000), xrange(1000)) # 1000 items >>> add.chunks(items, 10) @@ -266,16 +314,18 @@ Here's some examples: Here's a simple chain, the first task executes passing its return value to the next task in the chain, and so on. - .. code-block:: python + .. code-block:: pycon >>> from celery import chain - # 2 + 2 + 4 + 8 + >>> # 2 + 2 + 4 + 8 >>> res = chain(add.s(2, 2), add.s(4), add.s(8))() >>> res.get() 16 - This can also be written using pipes:: + This can also be written using pipes: + + .. code-block:: pycon >>> (add.s(2, 2) | add.s(4) | add.s(8))().get() 16 @@ -287,15 +337,21 @@ Here's some examples: for example if you don't want the result of the previous task in a chain. In that case you can mark the signature as immutable, so that the arguments - cannot be changed:: + cannot be changed: + + .. code-block:: pycon - >>> add.subtask((2, 2), immutable=True) + >>> add.signature((2, 2), immutable=True) - There's also an ``.si`` shortcut for this:: + There's also an ``.si`` shortcut for this: + + .. code-block:: pycon >>> add.si(2, 2) - Now you can create a chain of independent tasks instead:: + Now you can create a chain of independent tasks instead: + + .. code-block:: pycon >>> res = (add.si(2, 2) | add.si(4, 4) | add.s(8, 8))() >>> res.get() @@ -309,7 +365,9 @@ Here's some examples: - Simple group - You can easily create a group of tasks to execute in parallel:: + You can easily create a group of tasks to execute in parallel: + + .. code-block:: pycon >>> from celery import group >>> res = group(add.s(i, i) for i in xrange(10))() @@ -320,7 +378,9 @@ Here's some examples: The chord primitive enables us to add callback to be called when all of the tasks in a group have finished executing, which is often - required for algorithms that aren't embarrassingly parallel:: + required for algorithms that aren't embarrassingly parallel: + + .. code-block:: pycon >>> from celery import chord >>> res = chord((add.s(i, i) for i in xrange(10)), xsum.s())() @@ -332,7 +392,9 @@ Here's some examples: into a list and sent to the ``xsum`` task. The body of a chord can also be immutable, so that the return value - of the group is not passed on to the callback:: + of the group is not passed on to the callback: + + .. code-block:: pycon >>> chord((import_contact.s(c) for c in contacts), ... notify_complete.si(import_id)).apply_async() @@ -341,7 +403,9 @@ Here's some examples: - Blow your mind by combining - Chains can be partial too:: + Chains can be partial too: + + .. code-block:: pycon >>> c1 = (add.s(4) | mul.s(8)) @@ -350,7 +414,9 @@ Here's some examples: >>> res.get() 160 - Which means that you can combine chains:: + Which means that you can combine chains: + + .. code-block:: pycon # ((4 + 16) * 2 + 4) * 8 >>> c2 = (add.s(4, 16) | mul.s(2) | (add.s(4) | mul.s(8))) @@ -360,7 +426,9 @@ Here's some examples: 352 Chaining a group together with another task will automatically - upgrade it to be a chord:: + upgrade it to be a chord: + + .. code-block:: pycon >>> c3 = (group(add.s(i, i) for i in xrange(10)) | xsum.s()) >>> res = c3() @@ -368,7 +436,9 @@ Here's some examples: 90 Groups and chords accepts partial arguments too, so in a chain - the return value of the previous task is forwarded to all tasks in the group:: + the return value of the previous task is forwarded to all tasks in the group: + + .. code-block:: pycon >>> new_user_workflow = (create_user.s() | group( @@ -381,7 +451,9 @@ Here's some examples: If you don't want to forward arguments to the group then - you can make the signatures in the group immutable:: + you can make the signatures in the group immutable: + + .. code-block:: pycon >>> res = (add.s(4, 4) | group(add.si(i, i) for i in xrange(10)))() >>> res.get() @@ -409,7 +481,9 @@ Chains .. versionadded:: 3.0 Tasks can be linked together, which in practice means adding -a callback task:: +a callback task: + +.. code-block:: pycon >>> res = add.apply_async((2, 2), link=mul.s(16)) >>> res.get() @@ -419,8 +493,10 @@ The linked task will be applied with the result of its parent task as the first argument, which in the above case will result in ``mul(4, 16)`` since the result is 4. -The results will keep track of what subtasks a task applies, -and this can be accessed from the result instance:: +The results will keep track of any subtasks called by the original task, +and this can be accessed from the result instance: + +.. code-block:: pycon >>> res.children [] @@ -430,7 +506,9 @@ and this can be accessed from the result instance:: The result instance also has a :meth:`~@AsyncResult.collect` method that treats the result as a graph, enabling you to iterate over -the results:: +the results: + +.. code-block:: pycon >>> list(res.collect()) [(, 4), @@ -440,23 +518,29 @@ By default :meth:`~@AsyncResult.collect` will raise an :exc:`~@IncompleteStream` exception if the graph is not fully formed (one of the tasks has not completed yet), but you can get an intermediate representation of the graph -too:: +too: + +.. code-block:: pycon >>> for result, value in res.collect(intermediate=True)): .... You can link together as many tasks as you like, -and signatures can be linked too:: +and signatures can be linked too: + +.. code-block:: pycon >>> s = add.s(2, 2) >>> s.link(mul.s(4)) >>> s.link(log_result.s()) -You can also add *error callbacks* using the ``link_error`` argument:: +You can also add *error callbacks* using the ``link_error`` argument: + +.. code-block:: pycon >>> add.apply_async((2, 2), link_error=log_error.s()) - >>> add.subtask((2, 2), link_error=log_error.s()) + >>> add.signature((2, 2), link_error=log_error.s()) Since exceptions can only be serialized when pickle is used the error callbacks take the id of the parent task as argument instead: @@ -479,25 +563,29 @@ To make it even easier to link tasks together there is a special signature called :class:`~celery.chain` that lets you chain tasks together: -.. code-block:: python +.. code-block:: pycon >>> from celery import chain >>> from proj.tasks import add, mul - # (4 + 4) * 8 * 10 + >>> # (4 + 4) * 8 * 10 >>> res = chain(add.s(4, 4), mul.s(8), mul.s(10)) proj.tasks.add(4, 4) | proj.tasks.mul(8) | proj.tasks.mul(10) Calling the chain will call the tasks in the current process -and return the result of the last task in the chain:: +and return the result of the last task in the chain: + +.. code-block:: pycon >>> res = chain(add.s(4, 4), mul.s(8), mul.s(10))() >>> res.get() 640 It also sets ``parent`` attributes so that you can -work your way up the chain to get intermediate results:: +work your way up the chain to get intermediate results: + +.. code-block:: pycon >>> res.parent.get() 64 @@ -509,7 +597,9 @@ work your way up the chain to get intermediate results:: -Chains can also be made using the ``|`` (pipe) operator:: +Chains can also be made using the ``|`` (pipe) operator: + +.. code-block:: pycon >>> (add.s(2, 2) | mul.s(8) | mul.s(10)).apply_async() @@ -519,7 +609,7 @@ Graphs In addition you can work with the result graph as a :class:`~celery.datastructures.DependencyGraph`: -.. code-block:: python +.. code-block:: pycon >>> res = chain(add.s(4, 4), mul.s(8), mul.s(10))() @@ -530,7 +620,9 @@ In addition you can work with the result graph as a 285fa253-fcf8-42ef-8b95-0078897e83e6(1) 463afec2-5ed4-4036-b22d-ba067ec64f52(0) -You can even convert these graphs to *dot* format:: +You can even convert these graphs to *dot* format: + +.. code-block:: pycon >>> with open('graph.dot', 'w') as fh: ... res.parent.parent.graph.to_dot(fh) @@ -538,7 +630,7 @@ You can even convert these graphs to *dot* format:: and create images: -.. code-block:: bash +.. code-block:: console $ dot -Tpng graph.dot -o graph.png @@ -553,7 +645,9 @@ Groups A group can be used to execute several tasks in parallel. -The :class:`~celery.group` function takes a list of signatures:: +The :class:`~celery.group` function takes a list of signatures: + +.. code-block:: pycon >>> from celery import group >>> from proj.tasks import add @@ -564,14 +658,18 @@ The :class:`~celery.group` function takes a list of signatures:: If you **call** the group, the tasks will be applied one after one in the current process, and a :class:`~celery.result.GroupResult` instance is returned which can be used to keep track of the results, -or tell how many tasks are ready and so on:: +or tell how many tasks are ready and so on: + +.. code-block:: pycon >>> g = group(add.s(2, 2), add.s(4, 4)) >>> res = g() >>> res.get() [4, 8] -Group also supports iterators:: +Group also supports iterators: + +.. code-block:: pycon >>> group(add.s(i, i) for i in xrange(100))() @@ -583,7 +681,9 @@ Group Results The group task returns a special result too, this result works just like normal task results, except -that it works on the group as a whole:: +that it works on the group as a whole: + +.. code-block:: pycon >>> from celery import group >>> from tasks import add @@ -656,7 +756,7 @@ Chords Tasks used within a chord must *not* ignore their results. If the result backend is disabled for *any* task (header or body) in your chord you should read ":ref:`chord-important-notes`". - + A chord is a task that only executes after all of the tasks in a group have finished executing. @@ -680,7 +780,9 @@ already a standard function): Now you can use a chord to calculate each addition step in parallel, and then -get the sum of the resulting numbers:: +get the sum of the resulting numbers: + +.. code-block:: pycon >>> from celery import chord >>> from tasks import add, tsum @@ -691,9 +793,11 @@ get the sum of the resulting numbers:: This is obviously a very contrived example, the overhead of messaging and -synchronization makes this a lot slower than its Python counterpart:: +synchronization makes this a lot slower than its Python counterpart: + +.. code-block:: pycon - sum(i + i for i in xrange(100)) + >>> sum(i + i for i in xrange(100)) The synchronization step is costly, so you should avoid using chords as much as possible. Still, the chord is a powerful primitive to have in your toolbox @@ -701,7 +805,7 @@ as synchronization is a required step for many parallel algorithms. Let's break the chord expression down: -.. code-block:: python +.. code-block:: pycon >>> callback = tsum.s() >>> header = [add.s(i, i) for i in range(100)] @@ -724,33 +828,27 @@ Error handling So what happens if one of the tasks raises an exception? -This was not documented for some time and before version 3.1 -the exception value will be forwarded to the chord callback. - - -From 3.1 errors will propagate to the callback, so the callback will not be executed +Errors will propagate to the callback, so the callback will not be executed instead the callback changes to failure state, and the error is set to the :exc:`~@ChordError` exception: -.. code-block:: python +.. code-block:: pycon >>> c = chord([add.s(4, 4), raising_task.s(), add.s(8, 8)]) >>> result = c() >>> result.get() + +.. code-block:: pytb + Traceback (most recent call last): File "", line 1, in File "*/celery/result.py", line 120, in get interval=interval) File "*/celery/backends/amqp.py", line 150, in wait_for - raise self.exception_to_python(meta['result']) + raise meta['result'] celery.exceptions.ChordError: Dependency 97de6f3f-ea67-4517-a21c-d867c61fcb47 raised ValueError('something something',) -If you're running 3.0.14 or later you can enable the new behavior via -the :setting:`CELERY_CHORD_PROPAGATES` setting:: - - CELERY_CHORD_PROPAGATES = True - While the traceback may be different depending on which result backend is being used, you can see the error description includes the id of the task that failed and a string representation of the original exception. You can also @@ -767,8 +865,8 @@ Important Notes ~~~~~~~~~~~~~~~ Tasks used within a chord must *not* ignore their results. In practice this -means that you must enable a :const:`CELERY_RESULT_BACKEND` in order to use -chords. Additionally, if :const:`CELERY_IGNORE_RESULT` is set to :const:`True` +means that you must enable a :const:`result_backend` in order to use +chords. Additionally, if :const:`task_ignore_result` is set to :const:`True` in your configuration, be sure that the individual tasks to be used within the chord are defined with :const:`ignore_result=False`. This applies to both Task subclasses and decorated tasks. @@ -845,7 +943,7 @@ They differ from group in that For example using ``map``: -.. code-block:: python +.. code-block:: pycon >>> from proj.tasks import add @@ -860,7 +958,9 @@ is the same as having a task doing: def temp(): return [xsum(range(10)), xsum(range(100))] -and using ``starmap``:: +and using ``starmap``: + +.. code-block:: pycon >>> ~add.starmap(zip(range(10), range(10))) [0, 2, 4, 6, 8, 10, 12, 14, 16, 18] @@ -875,7 +975,9 @@ is the same as having a task doing: Both ``map`` and ``starmap`` are signature objects, so they can be used as other signatures and combined in groups etc., for example -to call the starmap after 10 seconds:: +to call the starmap after 10 seconds: + +.. code-block:: pycon >>> add.starmap(zip(range(10), range(10))).apply_async(countdown=10) @@ -895,14 +997,14 @@ it may considerably increase performance. To create a chunks signature you can use :meth:`@Task.chunks`: -.. code-block:: python +.. code-block:: pycon >>> add.chunks(zip(range(100), range(100)), 10) As with :class:`~celery.group` the act of sending the messages for the chunks will happen in the current process when called: -.. code-block:: python +.. code-block:: pycon >>> from proj.tasks import add @@ -921,16 +1023,22 @@ the chunks will happen in the current process when called: while calling ``.apply_async`` will create a dedicated task so that the individual tasks are applied in a worker -instead:: +instead: + +.. code-block:: pycon - >>> add.chunks(zip(range(100), range(100), 10)).apply_async() + >>> add.chunks(zip(range(100), range(100)), 10).apply_async() -You can also convert chunks to a group:: +You can also convert chunks to a group: - >>> group = add.chunks(zip(range(100), range(100), 10)).group() +.. code-block:: pycon + + >>> group = add.chunks(zip(range(100), range(100)), 10).group() and with the group skew the countdown of each task by increments -of one:: +of one: + +.. code-block:: pycon >>> group.skew(start=1, stop=10)() diff --git a/docs/userguide/concurrency/eventlet.rst b/docs/userguide/concurrency/eventlet.rst index d1545b211..01f98bfb3 100644 --- a/docs/userguide/concurrency/eventlet.rst +++ b/docs/userguide/concurrency/eventlet.rst @@ -42,9 +42,9 @@ Enabling Eventlet You can enable the Eventlet pool by using the ``-P`` option to :program:`celery worker`: -.. code-block:: bash +.. code-block:: console - $ celery worker -P eventlet -c 1000 + $ celery -A proj worker -P eventlet -c 1000 .. _eventlet-examples: diff --git a/docs/userguide/extending.rst b/docs/userguide/extending.rst index 41878034f..0713a93c1 100644 --- a/docs/userguide/extending.rst +++ b/docs/userguide/extending.rst @@ -46,7 +46,7 @@ whenever the connection is established: def send_me_a_message(self, who='world!', producer=None): with app.producer_or_acquire(producer) as producer: - producer.send( + producer.publish( {'hello': who}, serializer='json', exchange=my_queue.exchange, @@ -65,7 +65,7 @@ whenever the connection is established: mechanisms. The first one is the ``callbacks`` argument which accepts a list of callbacks with a ``(body, message)`` signature, the second one is the ``on_message`` argument which takes a single - callback with a ``(message, )`` signature. The latter will not + callback with a ``(message,)`` signature. The latter will not automatically decode and deserialize the payload which is useful in many cases: @@ -106,6 +106,7 @@ and the worker currently defines two blueprints: **Worker**, and **Consumer** ---------------------------------------------------------- +.. _extending-worker_blueprint: Worker ====== @@ -118,21 +119,31 @@ to the Consumer blueprint. The :class:`~celery.worker.WorkController` is the core worker implementation, and contains several methods and attributes that you can use in your bootstep. +.. _extending-worker_blueprint-attributes: + Attributes ---------- +.. _extending-worker-app: + .. attribute:: app The current app instance. +.. _extending-worker-hostname: + .. attribute:: hostname The workers node name (e.g. `worker1@example.com`) +.. _extending-worker-blueprint: + .. attribute:: blueprint This is the worker :class:`~celery.bootsteps.Blueprint`. +.. _extending-worker-hub: + .. attribute:: hub Event loop object (:class:`~kombu.async.Hub`). You can use @@ -146,7 +157,9 @@ Attributes .. code-block:: python class WorkerStep(bootsteps.StartStopStep): - requires = ('celery.worker.components:Hub', ) + requires = ('celery.worker.components:Hub',) + +.. _extending-worker-pool: .. attribute:: pool @@ -158,7 +171,9 @@ Attributes .. code-block:: python class WorkerStep(bootsteps.StartStopStep): - requires = ('celery.worker.components:Pool', ) + requires = ('celery.worker.components:Pool',) + +.. _extending-worker-timer: .. attribute:: timer @@ -169,7 +184,9 @@ Attributes .. code-block:: python class WorkerStep(bootsteps.StartStopStep): - requires = ('celery.worker.components:Timer', ) + requires = ('celery.worker.components:Timer',) + +.. _extending-worker-statedb: .. attribute:: statedb @@ -183,7 +200,9 @@ Attributes .. code-block:: python class WorkerStep(bootsteps.StartStopStep): - requires = ('celery.worker.components:Statedb', ) + requires = ('celery.worker.components:Statedb',) + +.. _extending-worker-autoscaler: .. attribute:: autoscaler @@ -197,7 +216,9 @@ Attributes .. code-block:: python class WorkerStep(bootsteps.StartStopStep): - requires = ('celery.worker.autoscaler:Autoscaler', ) + requires = ('celery.worker.autoscaler:Autoscaler',) + +.. _extending-worker-autoreloader: .. attribute:: autoreloader @@ -210,7 +231,10 @@ Attributes .. code-block:: python class WorkerStep(bootsteps.StartStopStep): - requires = ('celery.worker.autoreloader:Autoreloader', ) + requires = ('celery.worker.autoreloader:Autoreloader',) + +Example worker bootstep +----------------------- An example Worker bootstep could be: @@ -219,7 +243,7 @@ An example Worker bootstep could be: from celery import bootsteps class ExampleWorkerStep(bootsteps.StartStopStep): - requires = ('Pool', ) + requires = ('Pool',) def __init__(self, worker, **kwargs): print('Called when the WorkController instance is constructed') @@ -243,7 +267,6 @@ An example Worker bootstep could be: Every method is passed the current ``WorkController`` instance as the first argument. - Another example could use the timer to wake up at regular intervals: .. code-block:: python @@ -252,7 +275,7 @@ Another example could use the timer to wake up at regular intervals: class DeadlockDetection(bootsteps.StartStopStep): - requires = ('Timer', ) + requires = ('Timer',) def __init__(self, worker, deadlock_timeout=3600): self.timeout = deadlock_timeout @@ -262,7 +285,7 @@ Another example could use the timer to wake up at regular intervals: def start(self, worker): # run every 30 seconds. self.tref = worker.timer.call_repeatedly( - 30.0, self.detect, (worker, ), priority=10, + 30.0, self.detect, (worker,), priority=10, ) def stop(self, worker): @@ -276,6 +299,8 @@ Another example could use the timer to wake up at regular intervals: if req.time_start and time() - req.time_start > self.timeout: raise SystemExit() +.. _extending-consumer_blueprint: + Consumer ======== @@ -289,25 +314,37 @@ be possible to restart your blueprint. An additional 'shutdown' method is defined for consumer bootsteps, this method is called when the worker is shutdown. +.. _extending-consumer-attributes: + Attributes ---------- +.. _extending-consumer-app: + .. attribute:: app The current app instance. +.. _extending-consumer-controller: + .. attribute:: controller The parent :class:`~@WorkController` object that created this consumer. +.. _extending-consumer-hostname: + .. attribute:: hostname The workers node name (e.g. `worker1@example.com`) +.. _extending-consumer-blueprint: + .. attribute:: blueprint This is the worker :class:`~celery.bootsteps.Blueprint`. +.. _extending-consumer-hub: + .. attribute:: hub Event loop object (:class:`~kombu.async.Hub`). You can use @@ -321,8 +358,9 @@ Attributes .. code-block:: python class WorkerStep(bootsteps.StartStopStep): - requires = ('celery.worker:Hub', ) + requires = ('celery.worker:Hub',) +.. _extending-consumer-connection: .. attribute:: connection @@ -334,7 +372,9 @@ Attributes .. code-block:: python class Step(bootsteps.StartStopStep): - requires = ('celery.worker.consumer:Connection', ) + requires = ('celery.worker.consumer:Connection',) + +.. _extending-consumer-event_dispatcher: .. attribute:: event_dispatcher @@ -345,29 +385,84 @@ Attributes .. code-block:: python class Step(bootsteps.StartStopStep): - requires = ('celery.worker.consumer:Events', ) + requires = ('celery.worker.consumer:Events',) + +.. _extending-consumer-gossip: .. attribute:: gossip Worker to worker broadcast communication - (class:`~celery.worker.consumer.Gossip`). + (:class:`~celery.worker.consumer.Gossip`). A consumer bootstep must require the `Gossip` bootstep to use this. .. code-block:: python - class Step(bootsteps.StartStopStep): - requires = ('celery.worker.consumer:Events', ) + class RatelimitStep(bootsteps.StartStopStep): + """Rate limit tasks based on the number of workers in the + cluster.""" + requires = ('celery.worker.consumer:Gossip',) + + def start(self, c): + self.c = c + self.c.gossip.on.node_join.add(self.on_cluster_size_change) + self.c.gossip.on.node_leave.add(self.on_cluster_size_change) + self.c.gossip.on.node_lost.add(self.on_node_lost) + self.tasks = [ + self.app.tasks['proj.tasks.add'] + self.app.tasks['proj.tasks.mul'] + ] + self.last_size = None + + def on_cluster_size_change(self, worker): + cluster_size = len(self.c.gossip.state.alive_workers()) + if cluster_size != self.last_size: + for task in self.tasks: + task.rate_limit = 1.0 / cluster_size + self.c.reset_rate_limits() + self.last_size = cluster_size + + def on_node_lost(self, worker): + # may have processed heartbeat too late, so wake up soon + # in order to see if the worker recovered. + self.c.timer.call_after(10.0, self.on_cluster_size_change) + + **Callbacks** + + - `` gossip.on.node_join`` + + Called whenever a new node joins the cluster, providing a + :class:`~celery.events.state.Worker` instance. + + - `` gossip.on.node_leave`` + + Called whenever a new node leaves the cluster (shuts down), + providing a :class:`~celery.events.state.Worker` instance. + + - `` gossip.on.node_lost`` + + Called whenever heartbeat was missed for a worker instance in the + cluster (heartbeat not received or processed in time), + providing a :class:`~celery.events.state.Worker` instance. + + This does not necessarily mean the worker is actually offline, so use a time + out mechanism if the default heartbeat timeout is not sufficient. + +.. _extending-consumer-pool: .. attribute:: pool The current process/eventlet/gevent/thread pool. See :class:`celery.concurrency.base.BasePool`. +.. _extending-consumer-timer: + .. attribute:: timer :class:`Timer >> app = Celery() >>> app.steps['worker'].add(MyWorkerStep) # < add class, do not instantiate @@ -554,7 +662,7 @@ information about the boot process:: [2013-05-29 16:18:20,511: DEBUG/MainProcess] | Worker: Building graph... is in init [2013-05-29 16:18:20,511: DEBUG/MainProcess] | Worker: New boot order: - {Hub, Queues (intra), Pool, Autoreloader, Timer, StateDB, + {Hub, Pool, Autoreloader, Timer, StateDB, Autoscaler, InfoStep, Beat, Consumer} [2013-05-29 16:18:20,514: DEBUG/MainProcess] | Consumer: Preparing bootsteps. [2013-05-29 16:18:20,514: DEBUG/MainProcess] | Consumer: Building graph... @@ -613,7 +721,7 @@ Command-specific options ~~~~~~~~~~~~~~~~~~~~~~~~ You can add additional command-line options to the ``worker``, ``beat`` and -``events`` commands by modifying the :attr:`~@Celery.user_options` attribute of the +``events`` commands by modifying the :attr:`~@user_options` attribute of the application instance. Celery commands uses the :mod:`optparse` module to parse command-line @@ -768,7 +876,7 @@ will take some time so other transports still use a threading-based solution. Add callback to be called when ``fd`` is readable. - The callback will stay registered until explictly removed using + The callback will stay registered until explicitly removed using :meth:`hub.remove(fd) `, or the fd is automatically discarded because it's no longer valid. diff --git a/docs/userguide/monitoring.rst b/docs/userguide/monitoring.rst index e5cffe837..0009f1946 100644 --- a/docs/userguide/monitoring.rst +++ b/docs/userguide/monitoring.rst @@ -31,13 +31,13 @@ and manage worker nodes (and to some degree tasks). To list all the commands available do: -.. code-block:: bash +.. code-block:: console $ celery help or to get help for a specific command do: -.. code-block:: bash +.. code-block:: console $ celery --help @@ -56,51 +56,52 @@ Commands * **status**: List active nodes in this cluster - .. code-block:: bash + .. code-block:: console - $ celery status + $ celery -A proj status * **result**: Show the result of a task - .. code-block:: bash + .. code-block:: console - $ celery result -t tasks.add 4e196aa4-0141-4601-8138-7aa33db0f577 + $ celery -A proj result -t tasks.add 4e196aa4-0141-4601-8138-7aa33db0f577 Note that you can omit the name of the task as long as the task doesn't use a custom result backend. * **purge**: Purge messages from all configured task queues. - .. code-block:: bash - - $ celery purge - .. warning:: There is no undo for this operation, and messages will be permanently deleted! + .. code-block:: console + + $ celery -A proj purge + + * **inspect active**: List active tasks - .. code-block:: bash + .. code-block:: console - $ celery inspect active + $ celery -A proj inspect active These are all the tasks that are currently being executed. * **inspect scheduled**: List scheduled ETA tasks - .. code-block:: bash + .. code-block:: console - $ celery inspect scheduled + $ celery -A proj inspect scheduled These are tasks reserved by the worker because they have the `eta` or `countdown` argument set. * **inspect reserved**: List reserved tasks - .. code-block:: bash + .. code-block:: console - $ celery inspect reserved + $ celery -A proj inspect reserved This will list all tasks that have been prefetched by the worker, and is currently waiting to be executed (does not include tasks @@ -108,39 +109,39 @@ Commands * **inspect revoked**: List history of revoked tasks - .. code-block:: bash + .. code-block:: console - $ celery inspect revoked + $ celery -A proj inspect revoked * **inspect registered**: List registered tasks - .. code-block:: bash + .. code-block:: console - $ celery inspect registered + $ celery -A proj inspect registered * **inspect stats**: Show worker statistics (see :ref:`worker-statistics`) - .. code-block:: bash + .. code-block:: console - $ celery inspect stats + $ celery -A proj inspect stats * **control enable_events**: Enable events - .. code-block:: bash + .. code-block:: console - $ celery control enable_events + $ celery -A proj control enable_events * **control disable_events**: Disable events - .. code-block:: bash + .. code-block:: console - $ celery control disable_events + $ celery -A proj control disable_events * **migrate**: Migrate tasks from one broker to another (**EXPERIMENTAL**). - .. code-block:: bash + .. code-block:: console - $ celery migrate redis://localhost amqp://localhost + $ celery -A proj migrate redis://localhost amqp://localhost This command will migrate all the tasks on one broker to another. As this command is new and experimental you should be sure to have @@ -162,11 +163,11 @@ By default the inspect and control commands operates on all workers. You can specify a single, or a list of workers by using the `--destination` argument: -.. code-block:: bash +.. code-block:: console - $ celery inspect -d w1,w2 reserved + $ celery -A proj inspect -d w1,w2 reserved - $ celery control -d w1,w2 enable_events + $ celery -A proj control -d w1,w2 enable_events .. _monitoring-flower: @@ -187,16 +188,16 @@ Features - Real-time monitoring using Celery Events - - Task progress and history. + - Task progress and history - Ability to show task details (arguments, start time, runtime, and more) - Graphs and statistics - Remote Control - - View worker status and statistics. - - Shutdown and restart worker instances. - - Control worker pool size and autoscale settings. - - View and modify the queues a worker instance consumes from. + - View worker status and statistics + - Shutdown and restart worker instances + - Control worker pool size and autoscale settings + - View and modify the queues a worker instance consumes from - View currently running tasks - View scheduled tasks (ETA/countdown) - View reserved and revoked tasks @@ -205,6 +206,25 @@ Features - Revoke or terminate tasks - HTTP API + + - List workers + - Shut down a worker + - Restart worker’s pool + - Grow worker’s pool + - Shrink worker’s pool + - Autoscale worker pool + - Start consuming from a queue + - Stop consuming from a queue + - List tasks + - List (seen) task types + - Get a task info + - Execute a task + - Execute a task by name + - Get a task result + - Change soft and hard time limits for a task + - Change rate limit for a task + - Revoke a task + - OpenID authentication **Screenshots** @@ -224,25 +244,25 @@ Usage You can use pip to install Flower: -.. code-block:: bash +.. code-block:: console $ pip install flower Running the flower command will start a web-server that you can visit: -.. code-block:: bash +.. code-block:: console - $ celery flower + $ celery -A proj flower The default port is http://localhost:5555, but you can change this using the `--port` argument: -.. code-block:: bash +.. code-block:: console - $ celery flower --port=5555 + $ celery -A proj flower --port=5555 Broker URL can also be passed through the `--broker` argument : -.. code-block:: bash +.. code-block:: console $ celery flower --broker=amqp://guest:guest@localhost:5672// or @@ -250,10 +270,15 @@ Broker URL can also be passed through the `--broker` argument : Then, you can visit flower in your web browser : -.. code-block:: bash +.. code-block:: console $ open http://localhost:5555 +Flower has many more features than are detailed here, including +authorization options. Check out the `official documentation`_ for more +information. + +.. _official documentation: http://flower.readthedocs.org/en/latest/ .. _monitoring-celeryev: @@ -271,9 +296,9 @@ probably want to use Flower instead. Starting: -.. code-block:: bash +.. code-block:: console - $ celery events + $ celery -A proj events You should see a screen like: @@ -283,19 +308,19 @@ You should see a screen like: `celery events` is also used to start snapshot cameras (see :ref:`monitoring-snapshots`: -.. code-block:: bash +.. code-block:: console - $ celery events --camera= --frequency=1.0 + $ celery -A proj events --camera= --frequency=1.0 and it includes a tool to dump events to :file:`stdout`: -.. code-block:: bash +.. code-block:: console - $ celery events --dump + $ celery -A proj events --dump For a complete list of options use ``--help``: -.. code-block:: bash +.. code-block:: console $ celery events --help @@ -330,7 +355,7 @@ Inspecting queues Finding the number of tasks in a queue: -.. code-block:: bash +.. code-block:: console $ rabbitmqctl list_queues name messages messages_ready \ messages_unacknowledged @@ -345,13 +370,13 @@ not acknowledged yet (meaning it is in progress, or has been reserved). Finding the number of workers currently consuming from a queue: -.. code-block:: bash +.. code-block:: console $ rabbitmqctl list_queues name consumers Finding the amount of memory allocated to a queue: -.. code-block:: bash +.. code-block:: console $ rabbitmqctl list_queues name memory @@ -374,13 +399,13 @@ Inspecting queues Finding the number of tasks in a queue: -.. code-block:: bash +.. code-block:: console $ redis-cli -h HOST -p PORT -n DATABASE_NUMBER llen QUEUE_NAME The default queue is named `celery`. To get all available queues, invoke: -.. code-block:: bash +.. code-block:: console $ redis-cli -h HOST -p PORT -n DATABASE_NUMBER keys \* @@ -455,9 +480,9 @@ for example if you want to capture state every 2 seconds using the camera ``myapp.Camera`` you run :program:`celery events` with the following arguments: -.. code-block:: bash +.. code-block:: console - $ celery events -c myapp.Camera --frequency=2.0 + $ celery -A proj events -c myapp.Camera --frequency=2.0 .. _monitoring-camera: @@ -479,6 +504,7 @@ Here is an example camera, dumping the snapshot to screen: from celery.events.snapshot import Polaroid class DumpCam(Polaroid): + clear_after = True # clear after flush (incl, state.event_count). def on_shutter(self, state): if not state.event_count: @@ -486,7 +512,7 @@ Here is an example camera, dumping the snapshot to screen: return print('Workers: {0}'.format(pformat(state.workers, indent=4))) print('Tasks: {0}'.format(pformat(state.tasks, indent=4))) - print('Total: {0.event_count} events, %s {0.task_count}'.format( + print('Total: {0.event_count} events, {0.task_count} tasks'.format( state)) See the API reference for :mod:`celery.events.state` to read more @@ -495,9 +521,9 @@ about state objects. Now you can use this cam with :program:`celery events` by specifying it with the :option:`-c` option: -.. code-block:: bash +.. code-block:: console - $ celery events -c myapp.DumpCam --frequency=2.0 + $ celery -A proj events -c myapp.DumpCam --frequency=2.0 Or you can use it programmatically like this: @@ -559,7 +585,7 @@ Combining these you can easily process events in real-time: task = state.tasks.get(event['uuid']) print('TASK FAILED: %s[%s] %s' % ( - task.name, task.uuid, task.info(), )) + task.name, task.uuid, task.info(),)) with app.connection() as connection: recv = app.events.Receiver(connection, handlers={ @@ -595,7 +621,7 @@ You can listen to specific events by specifying the handlers: task = state.tasks.get(event['uuid']) print('TASK FAILED: %s[%s] %s' % ( - task.name, task.uuid, task.info(), )) + task.name, task.uuid, task.info(),)) with app.connection() as connection: recv = app.events.Receiver(connection, handlers={ @@ -625,10 +651,10 @@ task-sent ~~~~~~~~~ :signature: ``task-sent(uuid, name, args, kwargs, retries, eta, expires, - queue, exchange, routing_key)`` + queue, exchange, routing_key, root_id, parent_id)`` Sent when a task message is published and -the :setting:`CELERY_SEND_TASK_SENT_EVENT` setting is enabled. +the :setting:`task_send_sent_event` setting is enabled. .. event:: task-received @@ -636,7 +662,7 @@ task-received ~~~~~~~~~~~~~ :signature: ``task-received(uuid, name, args, kwargs, retries, eta, hostname, - timestamp)`` + timestamp, root_id, parent_id)`` Sent when the worker receives a task. @@ -671,6 +697,16 @@ task-failed Sent if the execution of the task failed. +.. event:: task-rejected + +task-rejected +~~~~~~~~~~~~~ + +:signature: ``task-rejected(uuid, requeued)`` + +The task was rejected by the worker, possibly to be requeued or moved to a +dead letter queue. + .. event:: task-revoked task-revoked @@ -709,7 +745,7 @@ worker-online The worker has connected to the broker and is online. -- `hostname`: Hostname of the worker. +- `hostname`: Nodename of the worker. - `timestamp`: Event timestamp. - `freq`: Heartbeat frequency in seconds (float). - `sw_ident`: Name of worker software (e.g. ``py-celery``). @@ -727,7 +763,7 @@ worker-heartbeat Sent every minute, if the worker has not sent a heartbeat in 2 minutes, it is considered to be offline. -- `hostname`: Hostname of the worker. +- `hostname`: Nodename of the worker. - `timestamp`: Event timestamp. - `freq`: Heartbeat frequency in seconds (float). - `sw_ident`: Name of worker software (e.g. ``py-celery``). diff --git a/docs/userguide/optimizing.rst b/docs/userguide/optimizing.rst index 590143c63..a7c0446b5 100644 --- a/docs/userguide/optimizing.rst +++ b/docs/userguide/optimizing.rst @@ -60,7 +60,7 @@ librabbitmq If you're using RabbitMQ (AMQP) as the broker then you can install the :mod:`librabbitmq` module to use an optimized client written in C: -.. code-block:: bash +.. code-block:: console $ pip install librabbitmq @@ -75,7 +75,7 @@ Broker Connection Pools The broker connection pool is enabled by default since version 2.5. -You can tweak the :setting:`BROKER_POOL_LIMIT` setting to minimize +You can tweak the :setting:`broker_pool_limit` setting to minimize contention, and the value should be based on the number of active threads/greenthreads using broker connections. @@ -96,19 +96,28 @@ to improve performance: from kombu import Exchange, Queue - CELERY_QUEUES = ( + task_queues = ( Queue('celery', routing_key='celery'), - Queue('transient', routing_key='transient', - delivery_mode=1), + Queue('transient', Exchange('transient', delivery_mode=1), + routing_key='transient', durable=False), ) +or by using :setting:`task_routes`: + +.. code-block:: python + + task_routes = { + 'proj.tasks.add': {'queue': 'celery', 'delivery_mode': 'transient'} + } + + The ``delivery_mode`` changes how the messages to this queue are delivered. A value of 1 means that the message will not be written to disk, and a value of 2 (default) means that the message can be written to disk. To direct a task to your new transient queue you can specify the queue -argument (or use the :setting:`CELERY_ROUTES` setting): +argument (or use the :setting:`task_routes` setting): .. code-block:: python @@ -136,7 +145,7 @@ available worker nodes that may be able to process them sooner [*]_, or that the messages may not even fit in memory. The workers' default prefetch count is the -:setting:`CELERYD_PREFETCH_MULTIPLIER` setting multiplied by the number +:setting:`worker_prefetch_multiplier` setting multiplied by the number of concurrency slots[*]_ (processes/threads/greenthreads). If you have many tasks with a long duration you want @@ -160,34 +169,43 @@ the tasks according to the run-time. (see :ref:`guide-routing`). nodes starting. If there are 3 offline nodes and one active node, all messages will be delivered to the active node. -.. [*] This is the concurrency setting; :setting:`CELERYD_CONCURRENCY` or the +.. [*] This is the concurrency setting; :setting:`worker_concurrency` or the :option:`-c` option to the :program:`celery worker` program. Reserve one task at a time -------------------------- -When using early acknowledgement (default), a prefetch multiplier of 1 -means the worker will reserve at most one extra task for every active -worker process. +The task message is only deleted from the queue after the task is +:term:`acknowledged`, so if the worker crashes before acknowleding the task, +it can be redelivered to another worker (or the same after recovery). + +When using the default of early acknowledgement, having a prefetch multiplier setting +of 1, means the worker will reserve at most one extra task for every +worker process: or in other words, if the worker is started with `-c 10`, +the worker may reserve at most 20 tasks (10 unacknowledged tasks executing, and 10 +unacknowledged reserved tasks) at any time. -When users ask if it's possible to disable "prefetching of tasks", often -what they really want is to have a worker only reserve as many tasks as there -are child processes. +Often users ask if disabling "prefetching of tasks" is possible, but what +they really mean by that is to have a worker only reserve as many tasks as +there are worker processes (10 unacknowledged tasks for `-c 10`) -But this is not possible without enabling late acknowledgements -acknowledgements; A task that has been started, will be -retried if the worker crashes mid execution so the task must be `idempotent`_ -(see also notes at :ref:`faq-acks_late-vs-retry`). +That is possible, but not without also enabling +:term:`late acknowledgments`. Using this option over the +default beahvior means a task that has already started executing will be +retried in the event of a power failure or the worker instance being killed +abruptly, so this also means the task must be :term:`idempotent` -.. _`idempotent`: http://en.wikipedia.org/wiki/Idempotent +.. seealso:: + + Notes at :ref:`faq-acks_late-vs-retry`. You can enable this behavior by using the following configuration options: .. code-block:: python - CELERY_ACKS_LATE = True - CELERYD_PREFETCH_MULTIPLIER = 1 + task_acks_late = True + worker_prefetch_multiplier = 1 .. _prefork-pool-prefetch: @@ -201,15 +219,17 @@ tasks. This benefits performance but it also means that tasks may be stuck waiting for long running tasks to complete:: - -> send T1 to Process A + -> send task T1 to process A # A executes T1 - -> send T2 to Process B + -> send task T2 to process B # B executes T2 - <- T2 complete + <- T2 complete sent by process B - -> send T3 to Process A - # A still executing T1, T3 stuck in local buffer and - # will not start until T1 returns + -> send task T3 to process A + # A still executing T1, T3 stuck in local buffer and will not start until + # T1 returns, and other queued tasks will not be sent to idle processes + <- T1 complete sent by process A + # A executes T3 The worker will send tasks to the process as long as the pipe buffer is writable. The pipe buffer size varies based on the operating system: some may @@ -219,9 +239,22 @@ size is 1MB (can only be changed system wide). You can disable this prefetching behavior by enabling the :option:`-Ofair` worker option: -.. code-block:: bash +.. code-block:: console $ celery -A proj worker -l info -Ofair -With this option enabled the worker will only write to workers that are -available for work, disabling the prefetch behavior. +With this option enabled the worker will only write to processes that are +available for work, disabling the prefetch behavior:: + +-> send task T1 to process A +# A executes T1 +-> send task T2 to process B +# B executes T2 +<- T2 complete sent by process B + +-> send T3 to process B +# B executes T3 + +<- T3 complete sent by process B +<- T1 complete sent by process A + diff --git a/docs/userguide/periodic-tasks.rst b/docs/userguide/periodic-tasks.rst index 6b829887f..319fefc29 100644 --- a/docs/userguide/periodic-tasks.rst +++ b/docs/userguide/periodic-tasks.rst @@ -13,7 +13,7 @@ Introduction :program:`celery beat` is a scheduler. It kicks off tasks at regular intervals, which are then executed by the worker nodes available in the cluster. -By default the entries are taken from the :setting:`CELERYBEAT_SCHEDULE` setting, +By default the entries are taken from the :setting:`beat_schedule` setting, but custom stores can also be used, like storing the entries in an SQL database. @@ -28,18 +28,18 @@ Time Zones ========== The periodic task schedules uses the UTC time zone by default, -but you can change the time zone used using the :setting:`CELERY_TIMEZONE` +but you can change the time zone used using the :setting:`timezone` setting. An example time zone could be `Europe/London`: .. code-block:: python - CELERY_TIMEZONE = 'Europe/London' + timezone = 'Europe/London' This setting must be added to your app, either by configuration it directly -using (``app.conf.CELERY_TIMEZONE = 'Europe/London'``), or by adding +using (``app.conf.timezone = 'Europe/London'``), or by adding it to your configuration module if you have set one up using ``app.config_from_object``. See :ref:`celerytut-configuration` for more information about configuration options. @@ -58,12 +58,12 @@ schedule manually. For Django users the time zone specified in the ``TIME_ZONE`` setting will be used, or you can specify a custom time zone for Celery alone - by using the :setting:`CELERY_TIMEZONE` setting. + by using the :setting:`timezone` setting. The database scheduler will not reset when timezone related settings change, so you must do this manually: - .. code-block:: bash + .. code-block:: console $ python manage.py shell >>> from djcelery.models import PeriodicTask @@ -74,24 +74,55 @@ schedule manually. Entries ======= -To schedule a task periodically you have to add an entry to the -:setting:`CELERYBEAT_SCHEDULE` setting. +To call a task periodically you have to add an entry to the +beat schedule list. + +.. code-block:: python + + from celery import Celery + from celery.schedules import crontab + + app = Celery() + + @app.on_after_configure.connect + def setup_periodic_tasks(sender, **kwargs): + # Calls test('hello') every 10 seconds. + sender.add_periodic_task(10.0, test.s('hello'), name='add every 10') + + # Calls test('world') every 30 seconds + sender.add_periodic_task(30.0, test.s('world'), expires=10) + + # Executes every Monday morning at 7:30 A.M + sender.add_periodic_task( + crontab(hour=7, minute=30, day_of_week=1), + test.s('Happy Mondays!'), + ) + + @app.task + def test(arg): + print(arg) + + +Setting these up from within the ``on_after_configure`` handler means +that we will not evaluate the app at module level when using ``test.s()``. + +The `@add_periodic_task` function will add the entry to the +:setting:`beat_schedule` setting behind the scenes, which also +can be used to set up periodic tasks manually: Example: Run the `tasks.add` task every 30 seconds. .. code-block:: python - from datetime import timedelta - - CELERYBEAT_SCHEDULE = { + beat_schedule = { 'add-every-30-seconds': { 'task': 'tasks.add', - 'schedule': timedelta(seconds=30), + 'schedule': 30.0, 'args': (16, 16) }, } - CELERY_TIMEZONE = 'UTC' + timezone = 'UTC' .. note:: @@ -100,6 +131,9 @@ Example: Run the `tasks.add` task every 30 seconds. please see :ref:`celerytut-configuration`. You can either set these options on your app directly or you can keep a separate module for configuration. + + If you want to use a single item tuple for `args`, don't forget + that the constructor is a comma and not a pair of parentheses. Using a :class:`~datetime.timedelta` for the schedule means the task will be sent in 30 second intervals (the first task will be sent 30 seconds @@ -169,7 +203,7 @@ the :class:`~celery.schedules.crontab` schedule type: from celery.schedules import crontab - CELERYBEAT_SCHEDULE = { + beat_schedule = { # Executes every Monday morning at 7:30 A.M 'add-every-monday-morning': { 'task': 'tasks.add', @@ -188,7 +222,8 @@ The syntax of these crontab expressions are very flexible. Some examples: | ``crontab(minute=0, hour=0)`` | Execute daily at midnight. | +-----------------------------------------+--------------------------------------------+ | ``crontab(minute=0, hour='*/3')`` | Execute every three hours: | -| | 3am, 6am, 9am, noon, 3pm, 6pm, 9pm. | +| | midnight, 3am, 6am, 9am, | +| | noon, 3pm, 6pm, 9pm. | +-----------------------------------------+--------------------------------------------+ | ``crontab(minute=0,`` | Same as previous. | | ``hour='0,3,6,9,12,15,18,21')`` | | @@ -219,24 +254,126 @@ The syntax of these crontab expressions are very flexible. Some examples: | ``crontab(minute=0, hour='*/3,8-17')`` | Execute every hour divisible by 3, and | | | every hour during office hours (8am-5pm). | +-----------------------------------------+--------------------------------------------+ -| ``crontab(day_of_month='2')`` | Execute on the second day of every month. | +| ``crontab(0, 0, day_of_month='2')`` | Execute on the second day of every month. | | | | +-----------------------------------------+--------------------------------------------+ -| ``crontab(day_of_month='2-30/3')`` | Execute on every even numbered day. | -| | | +| ``crontab(0, 0,`` | Execute on every even numbered day. | +| ``day_of_month='2-30/3')`` | | +-----------------------------------------+--------------------------------------------+ -| ``crontab(day_of_month='1-7,15-21')`` | Execute on the first and third weeks of | -| | the month. | +| ``crontab(0, 0,`` | Execute on the first and third weeks of | +| ``day_of_month='1-7,15-21')`` | the month. | +-----------------------------------------+--------------------------------------------+ -| ``crontab(day_of_month='11',`` | Execute on 11th of May every year. | -| ``month_of_year='5')`` | | +| ``crontab(0, 0, day_of_month='11',`` | Execute on 11th of May every year. | +| ``month_of_year='5')`` | | +-----------------------------------------+--------------------------------------------+ -| ``crontab(month_of_year='*/3')`` | Execute on the first month of every | -| | quarter. | +| ``crontab(0, 0,`` | Execute on the first month of every | +| ``month_of_year='*/3')`` | quarter. | +-----------------------------------------+--------------------------------------------+ See :class:`celery.schedules.crontab` for more documentation. +.. _beat-solar: + +Solar schedules +================= + +If you have a task that should be executed according to sunrise, +sunset, dawn or dusk, you can use the +:class:`~celery.schedules.solar` schedule type: + +.. code-block:: python + + from celery.schedules import solar + + beat_schedule = { + # Executes at sunset in Melbourne + 'add-at-melbourne-sunset': { + 'task': 'tasks.add', + 'schedule': solar('sunset', -37.81753, 144.96715), + 'args': (16, 16), + }, + } + +The arguments are simply: ``solar(event, latitude, longitude)`` + +Be sure to use the correct sign for latitude and longitude: + ++---------------+-------------------+----------------------+ +| **Sign** | **Argument** | **Meaning** | ++---------------+-------------------+----------------------+ +| ``+`` | ``latitude`` | North | ++---------------+-------------------+----------------------+ +| ``-`` | ``latitude`` | South | ++---------------+-------------------+----------------------+ +| ``+`` | ``longitude`` | East | ++---------------+-------------------+----------------------+ +| ``-`` | ``longitude`` | West | ++---------------+-------------------+----------------------+ + +Possible event types are: + ++-----------------------------------------+--------------------------------------------+ +| **Event** | **Meaning** | ++-----------------------------------------+--------------------------------------------+ +| ``dawn_astronomical`` | Execute at the moment after which the sky | +| | is no longer completely dark. This is when | +| | the sun is 18 degrees below the horizon. | ++-----------------------------------------+--------------------------------------------+ +| ``dawn_nautical`` | Execute when there is enough sunlight for | +| | the horizon and some objects to be | +| | distinguishable; formally, when the sun is | +| | 12 degrees below the horizon. | ++-----------------------------------------+--------------------------------------------+ +| ``dawn_civil`` | Execute when there is enough light for | +| | objects to be distinguishable so that | +| | outdoor activities can commence; | +| | formally, when the Sun is 6 degrees below | +| | the horizon. | ++-----------------------------------------+--------------------------------------------+ +| ``sunrise`` | Execute when the upper edge of the sun | +| | appears over the eastern horizon in the | +| | morning. | ++-----------------------------------------+--------------------------------------------+ +| ``solar_noon`` | Execute when the sun is highest above the | +| | horizon on that day. | ++-----------------------------------------+--------------------------------------------+ +| ``sunset`` | Execute when the trailing edge of the sun | +| | disappears over the western horizon in the | +| | evening. | ++-----------------------------------------+--------------------------------------------+ +| ``dusk_civil`` | Execute at the end of civil twilight, when | +| | objects are still distinguishable and some | +| | stars and planets are visible. Formally, | +| | when the sun is 6 degrees below the | +| | horizon. | ++-----------------------------------------+--------------------------------------------+ +| ``dusk_nautical`` | Execute when the sun is 12 degrees below | +| | the horizon. Objects are no longer | +| | distinguishable, and the horizon is no | +| | longer visible to the naked eye. | ++-----------------------------------------+--------------------------------------------+ +| ``dusk_astronomical`` | Execute at the moment after which the sky | +| | becomes completely dark; formally, when | +| | the sun is 18 degrees below the horizon. | ++-----------------------------------------+--------------------------------------------+ + +All solar events are calculated using UTC, and are therefore +unaffected by your timezone setting. + +In polar regions, the sun may not rise or set every day. The scheduler +is able to handle these cases, i.e. a ``sunrise`` event won't run on a day +when the sun doesn't rise. The one exception is ``solar_noon``, which is +formally defined as the moment the sun transits the celestial meridian, +and will occur every day even if the sun is below the horizon. + +Twilight is defined as the period between dawn and sunrise, and between +sunset and dusk. You can schedule an event according to "twilight" +depending on your definition of twilight (civil, nautical or astronomical), +and whether you want the event to take place at the beginning or end +of twilight, using the appropriate event from the list above. + +See :class:`celery.schedules.solar` for more documentation. + .. _beat-starting: Starting the Scheduler @@ -244,27 +381,27 @@ Starting the Scheduler To start the :program:`celery beat` service: -.. code-block:: bash +.. code-block:: console - $ celery beat + $ celery -A proj beat You can also start embed `beat` inside the worker by enabling workers `-B` option, this is convenient if you will never run more than one worker node, but it's not commonly used and for that reason is not recommended for production use: -.. code-block:: bash +.. code-block:: console - $ celery worker -B + $ celery -A proj worker -B Beat needs to store the last run times of the tasks in a local database file (named `celerybeat-schedule` by default), so it needs access to write in the current directory, or alternatively you can specify a custom location for this file: -.. code-block:: bash +.. code-block:: console - $ celery beat -s /home/celery/var/run/celerybeat-schedule + $ celery -A proj beat -s /home/celery/var/run/celerybeat-schedule .. note:: @@ -284,7 +421,7 @@ which is simply keeping track of the last run times in a local database file `django-celery` also ships with a scheduler that stores the schedule in the Django database: -.. code-block:: bash +.. code-block:: console $ celery -A proj beat -S djcelery.schedulers.DatabaseScheduler diff --git a/docs/userguide/remote-tasks.rst b/docs/userguide/remote-tasks.rst index e5f4aa8c7..d36867e43 100644 --- a/docs/userguide/remote-tasks.rst +++ b/docs/userguide/remote-tasks.rst @@ -18,13 +18,17 @@ If you need to call into another language, framework or similar, you can do so by using HTTP callback tasks. The HTTP callback tasks uses GET/POST data to pass arguments and returns -result as a JSON response. The scheme to call a task is:: +result as a JSON response. The scheme to call a task is: - GET http://example.com/mytask/?arg1=a&arg2=b&arg3=c +.. code-block:: http -or using POST:: + GET HTTP/1.1 http://example.com/mytask/?arg1=a&arg2=b&arg3=c - POST http://example.com/mytask +or using POST: + +.. code-block:: http + + POST HTTP/1.1 http://example.com/mytask .. note:: @@ -33,11 +37,15 @@ or using POST:: Whether to use GET or POST is up to you and your requirements. The web page should then return a response in the following format -if the execution was successful:: +if the execution was successful: + +.. code-block:: javascript {'status': 'success', 'retval': …} -or if there was an error:: +or if there was an error: + +.. code-block:: javascript {'status': 'failure', 'reason': 'Invalid moon alignment.'} @@ -45,7 +53,7 @@ Enabling the HTTP task ---------------------- To enable the HTTP dispatch task you have to add :mod:`celery.task.http` -to :setting:`CELERY_IMPORTS`, or start the worker with ``-I +to :setting:`imports`, or start the worker with ``-I celery.task.http``. @@ -59,7 +67,7 @@ With this information you could define a simple task in Django: .. code-block:: python from django.http import HttpResponse - from anyjson import serialize + from json import dumps def multiply(request): @@ -67,7 +75,7 @@ With this information you could define a simple task in Django: y = int(request.GET['y']) result = x * y response = {'status': 'success', 'retval': result} - return HttpResponse(serialize(response), mimetype='application/json') + return HttpResponse(dumps(response), mimetype='application/json') .. _webhook-rails-example: @@ -97,13 +105,17 @@ Calling webhook tasks To call a task you can use the :class:`~celery.task.http.URL` class: +.. code-block:: pycon + >>> from celery.task.http import URL >>> res = URL('http://example.com/multiply').get_async(x=10, y=10) :class:`~celery.task.http.URL` is a shortcut to the :class:`HttpDispatchTask`. You can subclass this to extend the -functionality. +functionality: + +.. code-block:: pycon >>> from celery.task.http import HttpDispatchTask >>> res = HttpDispatchTask.delay( diff --git a/docs/userguide/routing.rst b/docs/userguide/routing.rst index f0a27a5d4..5c485b5ea 100644 --- a/docs/userguide/routing.rst +++ b/docs/userguide/routing.rst @@ -25,34 +25,55 @@ Automatic routing ----------------- The simplest way to do routing is to use the -:setting:`CELERY_CREATE_MISSING_QUEUES` setting (on by default). +:setting:`task_create_missing_queues` setting (on by default). With this setting on, a named queue that is not already defined in -:setting:`CELERY_QUEUES` will be created automatically. This makes it easy to +:setting:`task_queues` will be created automatically. This makes it easy to perform simple routing tasks. Say you have two servers, `x`, and `y` that handles regular tasks, and one server `z`, that only handles feed related tasks. You can use this configuration:: - CELERY_ROUTES = {'feed.tasks.import_feed': {'queue': 'feeds'}} + task_routes = {'feed.tasks.import_feed': {'queue': 'feeds'}} With this route enabled import feed tasks will be routed to the `"feeds"` queue, while all other tasks will be routed to the default queue (named `"celery"` for historical reasons). -Now you can start server `z` to only process the feeds queue like this: +Alternatively, you can use glob pattern matching, or even regular expressions, +to match all tasks in the ``feed.tasks`` namespace:: -.. code-block:: bash + task_routes = {'feed.tasks.*': {'queue': 'feeds'}} - user@z:/$ celery worker -Q feeds +If the order in which the patterns are matched is important you should should +specify a tuple as the task router instead:: + + task_routes = ([ + ('feed.tasks.*': {'queue': 'feeds'}), + ('web.tasks.*': {'queue': 'web'}), + (re.compile(r'(video|image)\.tasks\..*'), {'queue': 'media'}), + ],) + +.. note:: + + The :setting:`task_routes` setting can either be a dictionary, or a + list of router objects, so in this case we need to specify the setting + as a tuple containing a list. + +After installing the router, you can start server `z` to only process the feeds +queue like this: + +.. code-block:: console + + user@z:/$ celery -A proj worker -Q feeds You can specify as many queues as you want, so you can make this server process the default queue as well: -.. code-block:: bash +.. code-block:: console - user@z:/$ celery worker -Q feeds,celery + user@z:/$ celery -A proj worker -Q feeds,celery .. _routing-changing-default-queue: @@ -66,8 +87,8 @@ configuration: from kombu import Exchange, Queue - CELERY_DEFAULT_QUEUE = 'default' - CELERY_QUEUES = ( + task_default_queue = 'default' + task_queues = ( Queue('default', Exchange('default'), routing_key='default'), ) @@ -82,15 +103,15 @@ are declared. A queue named `"video"` will be created with the following settings: -.. code-block:: python +.. code-block:: javascript {'exchange': 'video', 'exchange_type': 'direct', 'routing_key': 'video'} -The non-AMQP backends like `ghettoq` does not support exchanges, so they -require the exchange to have the same name as the queue. Using this design -ensures it will work for them as well. +The non-AMQP backends like `Redis` or `Django-models` do not support exchanges, +so they require the exchange to have the same name as the queue. Using this +design ensures it will work for them as well. .. _routing-manual: @@ -105,27 +126,27 @@ configuration: from kombu import Queue - CELERY_DEFAULT_QUEUE = 'default' - CELERY_QUEUES = ( + task_default_queue = 'default' + task_queues = ( Queue('default', routing_key='task.#'), Queue('feed_tasks', routing_key='feed.#'), ) - CELERY_DEFAULT_EXCHANGE = 'tasks' - CELERY_DEFAULT_EXCHANGE_TYPE = 'topic' - CELERY_DEFAULT_ROUTING_KEY = 'task.default' + task_default_exchange = 'tasks' + task_default_exchange_type = 'topic' + task_default_routing_key = 'task.default' -:setting:`CELERY_QUEUES` is a list of :class:`~kombu.entitity.Queue` +:setting:`task_queues` is a list of :class:`~kombu.entitity.Queue` instances. If you don't set the exchange or exchange type values for a key, these -will be taken from the :setting:`CELERY_DEFAULT_EXCHANGE` and -:setting:`CELERY_DEFAULT_EXCHANGE_TYPE` settings. +will be taken from the :setting:`task_default_exchange` and +:setting:`task_default_exchange_type` settings. To route a task to the `feed_tasks` queue, you can add an entry in the -:setting:`CELERY_ROUTES` setting: +:setting:`task_routes` setting: .. code-block:: python - CELERY_ROUTES = { + task_routes = { 'feeds.tasks.import_feed': { 'queue': 'feed_tasks', 'routing_key': 'feed.import', @@ -145,23 +166,23 @@ You can also override this using the `routing_key` argument to To make server `z` consume from the feed queue exclusively you can start it with the ``-Q`` option: -.. code-block:: bash +.. code-block:: console - user@z:/$ celery worker -Q feed_tasks --hostname=z@%h + user@z:/$ celery -A proj worker -Q feed_tasks --hostname=z@%h Servers `x` and `y` must be configured to consume from the default queue: -.. code-block:: bash +.. code-block:: console - user@x:/$ celery worker -Q default --hostname=x@%h - user@y:/$ celery worker -Q default --hostname=y@%h + user@x:/$ celery -A proj worker -Q default --hostname=x@%h + user@y:/$ celery -A proj worker -Q default --hostname=y@%h If you want, you can even have your feed processing worker handle regular tasks as well, maybe in times when there's a lot of work to do: -.. code-block:: python +.. code-block:: console - user@z:/$ celery worker -Q feed_tasks,default --hostname=z@%h + user@z:/$ celery -A proj worker -Q feed_tasks,default --hostname=z@%h If you have another queue but on another exchange you want to add, just specify a custom exchange and exchange type: @@ -170,7 +191,7 @@ just specify a custom exchange and exchange type: from kombu import Exchange, Queue - CELERY_QUEUES = ( + task_queues = ( Queue('feed_tasks', routing_key='feed.#'), Queue('regular_tasks', routing_key='task.#'), Queue('image_tasks', exchange=Exchange('mediatasks', type='direct'), @@ -209,7 +230,7 @@ metadata -- like the number of retries or an ETA. This is an example task message represented as a Python dictionary: -.. code-block:: python +.. code-block:: javascript {'task': 'myapp.tasks.add', 'id': '54086c5e-6193-4575-8308-dbab76798756', @@ -249,7 +270,7 @@ The steps required to send and receive messages are: 3. Bind the queue to the exchange. Celery automatically creates the entities necessary for the queues in -:setting:`CELERY_QUEUES` to work (except if the queue's `auto_declare` +:setting:`task_queues` to work (except if the queue's `auto_declare` setting is set to :const:`False`). Here's an example queue configuration with three queues; @@ -259,14 +280,14 @@ One for video, one for images and one default queue for everything else: from kombu import Exchange, Queue - CELERY_QUEUES = ( + task_queues = ( Queue('default', Exchange('default'), routing_key='default'), Queue('videos', Exchange('media'), routing_key='media.video'), Queue('images', Exchange('media'), routing_key='media.image'), ) - CELERY_DEFAULT_QUEUE = 'default' - CELERY_DEFAULT_EXCHANGE_TYPE = 'direct' - CELERY_DEFAULT_ROUTING_KEY = 'default' + task_default_queue = 'default' + task_default_exchange_type = 'direct' + task_default_routing_key = 'default' .. _amqp-exchange-types: @@ -313,6 +334,8 @@ Related API commands Declares an exchange by name. + See :meth:`amqp:Channel.exchange_declare `. + :keyword passive: Passive means the exchange won't be created, but you can use this to check if the exchange already exists. @@ -327,22 +350,31 @@ Related API commands Declares a queue by name. + See :meth:`amqp:Channel.queue_declare ` + Exclusive queues can only be consumed from by the current connection. Exclusive also implies `auto_delete`. .. method:: queue.bind(queue_name, exchange_name, routing_key) Binds a queue to an exchange with a routing key. + Unbound queues will not receive messages, so this is necessary. + See :meth:`amqp:Channel.queue_bind ` + .. method:: queue.delete(name, if_unused=False, if_empty=False) Deletes a queue and its binding. + See :meth:`amqp:Channel.queue_delete ` + .. method:: exchange.delete(name, if_unused=False) Deletes an exchange. + See :meth:`amqp:Channel.exchange_delete ` + .. note:: Declaring does not necessarily mean "create". When you declare you @@ -365,9 +397,9 @@ but different implementation may not implement all commands. You can write commands directly in the arguments to :program:`celery amqp`, or just start with no arguments to start it in shell-mode: -.. code-block:: bash +.. code-block:: console - $ celery amqp + $ celery -A proj amqp -> connecting to amqp://guest@localhost:5672/. -> connected. 1> @@ -379,9 +411,9 @@ hit the `tab` key to show a list of possible matches. Let's create a queue you can send messages to: -.. code-block:: bash +.. code-block:: console - $ celery amqp + $ celery -A proj amqp 1> exchange.declare testexchange direct ok. 2> queue.declare testqueue @@ -395,17 +427,21 @@ the routing key ``testkey``. From now on all messages sent to the exchange ``testexchange`` with routing key ``testkey`` will be moved to this queue. You can send a message by -using the ``basic.publish`` command:: +using the ``basic.publish`` command: + +.. code-block:: console 4> basic.publish 'This is a message!' testexchange testkey ok. Now that the message is sent you can retrieve it again. You can use the ``basic.get``` command here, which polls for new messages on the queue -(which is alright for maintainence tasks, for services you'd want to use +(which is alright for maintenance tasks, for services you'd want to use ``basic.consume`` instead) -Pop a message off the queue:: +Pop a message off the queue: + +.. code-block:: console 5> basic.get testqueue {'body': 'This is a message!', @@ -428,12 +464,16 @@ This tag is used to acknowledge the message. Also note that delivery tags are not unique across connections, so in another client the delivery tag `1` might point to a different message than in this channel. -You can acknowledge the message you received using ``basic.ack``:: +You can acknowledge the message you received using ``basic.ack``: + +.. code-block:: console 6> basic.ack 1 ok. -To clean up after our test session you should delete the entities you created:: +To clean up after our test session you should delete the entities you created: + +.. code-block:: console 7> queue.delete testqueue ok. 0 messages deleted. @@ -451,7 +491,7 @@ Routing Tasks Defining queues --------------- -In Celery available queues are defined by the :setting:`CELERY_QUEUES` setting. +In Celery available queues are defined by the :setting:`task_queues` setting. Here's an example queue configuration with three queues; One for video, one for images and one default queue for everything else: @@ -461,21 +501,21 @@ One for video, one for images and one default queue for everything else: default_exchange = Exchange('default', type='direct') media_exchange = Exchange('media', type='direct') - CELERY_QUEUES = ( + task_queues = ( Queue('default', default_exchange, routing_key='default'), Queue('videos', media_exchange, routing_key='media.video'), Queue('images', media_exchange, routing_key='media.image') ) - CELERY_DEFAULT_QUEUE = 'default' - CELERY_DEFAULT_EXCHANGE = 'default' - CELERY_DEFAULT_ROUTING_KEY = 'default' + task_default_queue = 'default' + task_default_exchange = 'default' + task_default_routing_key = 'default' -Here, the :setting:`CELERY_DEFAULT_QUEUE` will be used to route tasks that +Here, the :setting:`task_default_queue` will be used to route tasks that doesn't have an explicit route. The default exchange, exchange type and routing key will be used as the default routing values for tasks, and as the default values for entries -in :setting:`CELERY_QUEUES`. +in :setting:`task_queues`. .. _routing-task-destination: @@ -484,7 +524,7 @@ Specifying task destination The destination for a task is decided by the following (in order): -1. The :ref:`routers` defined in :setting:`CELERY_ROUTES`. +1. The :ref:`routers` defined in :setting:`task_routes`. 2. The routing arguments to :func:`Task.apply_async`. 3. Routing related attributes defined on the :class:`~celery.task.base.Task` itself. @@ -516,7 +556,7 @@ All you need to define a new router is to create a class with a return None If you return the ``queue`` key, it will expand with the defined settings of -that queue in :setting:`CELERY_QUEUES`: +that queue in :setting:`task_queues`: .. code-block:: javascript @@ -532,26 +572,32 @@ becomes --> 'routing_key': 'video.compress'} -You install router classes by adding them to the :setting:`CELERY_ROUTES` -setting:: +You install router classes by adding them to the :setting:`task_routes` +setting: - CELERY_ROUTES = (MyRouter(), ) +.. code-block:: python -Router classes can also be added by name:: + task_routes = (MyRouter(),) - CELERY_ROUTES = ('myapp.routers.MyRouter', ) +Router classes can also be added by name: + +.. code-block:: python + + task_routes = ('myapp.routers.MyRouter',) For simple task name -> route mappings like the router example above, -you can simply drop a dict into :setting:`CELERY_ROUTES` to get the +you can simply drop a dict into :setting:`task_routes` to get the same behavior: .. code-block:: python - CELERY_ROUTES = ({'myapp.tasks.compress_video': { - 'queue': 'video', - 'routing_key': 'video.compress' - }}, ) + task_routes = ( + {'myapp.tasks.compress_video': { + 'queue': 'video', + 'routing_key': 'video.compress', + }}, + ) The routers will then be traversed in order, it will stop at the first router returning a true value, and use that as the final route for the task. @@ -567,13 +613,30 @@ copies of tasks to all workers connected to it: from kombu.common import Broadcast - CELERY_QUEUES = (Broadcast('broadcast_tasks'), ) - - CELERY_ROUTES = {'tasks.reload_cache': {'queue': 'broadcast_tasks'}} + task_queues = (Broadcast('broadcast_tasks'),) + task_routes = {'tasks.reload_cache': {'queue': 'broadcast_tasks'}} -Now the ``tasks.reload_tasks`` task will be sent to every +Now the ``tasks.reload_cache`` task will be sent to every worker consuming from this queue. +Here is another example of broadcast routing, this time with +a celerybeat schedule: + +.. code-block:: python + + from kombu.common import Broadcast + from celery.schedules import crontab + + task_queues = (Broadcast('broadcast_tasks'),) + + beat_schedule = {'test-task': { + 'task': 'tasks.reload_cache', + 'schedule': crontab(minute=0, hour='*/3'), + 'options': {'exchange': 'broadcast_tasks'} + }, + } + + .. admonition:: Broadcast & Results Note that Celery result does not define what happens if two diff --git a/docs/userguide/security.rst b/docs/userguide/security.rst index 027ad5489..f1ebe3e18 100644 --- a/docs/userguide/security.rst +++ b/docs/userguide/security.rst @@ -46,6 +46,9 @@ If your broker supports fine-grained access control, like RabbitMQ, this is something you should look at enabling. See for example http://www.rabbitmq.com/access-control.html. +If supported by your broker backend, you can enable end-to-end SSL encryption +and authentication using :setting:`broker_use_ssl`. + Client ------ @@ -101,7 +104,7 @@ unauthenticated. .. [*] http://nadiana.com/python-pickle-insecure You can disable untrusted content by specifying -a white-list of accepted content-types in the :setting:`CELERY_ACCEPT_CONTENT` +a white-list of accepted content-types in the :setting:`accept_content` setting: .. versionadded:: 3.0.18 @@ -114,7 +117,7 @@ setting: .. code-block:: python - CELERY_ACCEPT_CONTENT = ['json'] + accept_content = ['json'] This accepts a list of serializer names and content-types, so you could @@ -122,7 +125,7 @@ also specify the content type for json: .. code-block:: python - CELERY_ACCEPT_CONTENT = ['application/json'] + accept_content = ['application/json'] Celery also comes with a special `auth` serializer that validates communication between Celery clients and workers, making sure @@ -148,12 +151,12 @@ and then later verified by the worker using a public certificate. Optimally certificates should be signed by an official `Certificate Authority`_, but they can also be self-signed. -To enable this you should configure the :setting:`CELERY_TASK_SERIALIZER` +To enable this you should configure the :setting:`task_serializer` setting to use the `auth` serializer. Also required is configuring the paths used to locate private keys and certificates on the file-system: -the :setting:`CELERY_SECURITY_KEY`, -:setting:`CELERY_SECURITY_CERTIFICATE` and :setting:`CELERY_SECURITY_CERT_STORE` +the :setting:`security_key`, +:setting:`security_certificate` and :setting:`security_cert_store` settings respectively. With these configured it is also necessary to call the :func:`celery.setup_security` function. Note that this will also @@ -165,11 +168,13 @@ with the private key and certificate files located in `/etc/ssl`. .. code-block:: python - CELERY_SECURITY_KEY = '/etc/ssl/private/worker.key' - CELERY_SECURITY_CERTIFICATE = '/etc/ssl/certs/worker.pem' - CELERY_SECURITY_CERT_STORE = '/etc/ssl/certs/*.pem' - from celery import setup_security - setup_security() + app = Celery() + app.conf.update( + security_key='/etc/ssl/private/worker.key' + security_certificate='/etc/ssl/certs/worker.pem' + security_cert_store='/etc/ssl/certs/*.pem', + ) + app.setup_security() .. note:: @@ -197,7 +202,7 @@ Logs are usually the first place to look for evidence of security breaches, but they are useless if they can be tampered with. A good solution is to set up centralized logging with a dedicated logging -server. Acess to it should be restricted. +server. Access to it should be restricted. In addition to having all of the logs in a single place, if configured correctly, it can make it harder for intruders to tamper with your logs. diff --git a/docs/userguide/signals.rst b/docs/userguide/signals.rst index 4d6d72e69..40d9f7096 100644 --- a/docs/userguide/signals.rst +++ b/docs/userguide/signals.rst @@ -10,7 +10,7 @@ Signals Signals allows decoupled applications to receive notifications when certain actions occur elsewhere in the application. -Celery ships with many signals that you application can hook into +Celery ships with many signals that your application can hook into to augment behavior of certain actions. .. _signal-basics: @@ -28,9 +28,12 @@ Example connecting to the :signal:`after_task_publish` signal: from celery.signals import after_task_publish @after_task_publish.connect - def task_sent_handler(sender=None, body=None, **kwargs): - print('after_task_publish for task id {body[id]}'.format( - body=body, + def task_sent_handler(sender=None, headers=None, body=None, **kwargs): + # information about task are located in headers for task messages + # using the task protocol version 2. + info = headers if 'task' in headers else body + print('after_task_publish for task id {info[id]}'.format( + info=info, )) @@ -44,9 +47,12 @@ is published: .. code-block:: python @after_task_publish.connect(sender='proj.tasks.add') - def task_sent_handler(sender=None, body=None, **kwargs): - print('after_task_publish for task id {body[id]}'.format( - body=body, + def task_sent_handler(sender=None, headers=None, body=None, **kwargs): + # information about task are located in headers for task messages + # using the task protocol version 2. + info = headers if 'task' in headers else body + print('after_task_publish for task id {info[id]}'.format( + info=info, )) Signals use the same implementation as django.core.dispatch. As a result other @@ -75,14 +81,14 @@ Note that this is executed in the process sending the task. Sender is the name of the task being sent. -Provides arguements: +Provides arguments: * body Task message body. This is a mapping containing the task message fields - (see :ref:`task-message-protocol-v1`). + (see :ref:`message-protocol-task-v1`). * exchange @@ -103,7 +109,7 @@ Provides arguements: * declare List of entities (:class:`~kombu.Exchange`, - :class:`~kombu.Queue` or :class:~`kombu.binding` to declare before + :class:`~kombu.Queue` or :class:`~kombu.binding` to declare before publishing the message. Can be modified. * retry_policy @@ -123,9 +129,16 @@ Sender is the name of the task being sent. Provides arguments: +* headers + + The task message headers, see :ref:`message-protocol-task-v2` + and :ref:`message-protocol-task-v1`. + for a reference of possible fields that can be defined. + * body - The task message body, see :ref:`task-message-protocol-v1` + The task message body, see :ref:`message-protocol-task-v2` + and :ref:`message-protocol-task-v1`. for a reference of possible fields that can be defined. * exchange @@ -271,7 +284,7 @@ Provides arguments: * request - This is a :class:`~celery.worker.job.Request` instance, and not + This is a :class:`~celery.worker.request.Request` instance, and not ``task.request``. When using the prefork pool this signal is dispatched in the parent process, so ``task.request`` is not available and should not be used. Use this object instead, which should have many @@ -287,6 +300,53 @@ Provides arguments: * expired Set to :const:`True` if the task expired. +.. signal:: task_unknown + +task_unknown +~~~~~~~~~~~~ + +Dispatched when a worker receives a message for a task that is not registered. + +Sender is the worker :class:`~celery.worker.consumer.Consumer`. + +Provides arguments: + +* name + + Name of task not found in registry. + +* id + + The task id found in the message. + +* message + + Raw message object. + +* exc + + The error that occurred. + +.. signal:: task_rejected + +task_rejected +~~~~~~~~~~~~~ + +Dispatched when a worker receives an unknown type of message to one of its +task queues. + +Sender is the worker :class:`~celery.worker.consumer.Consumer`. + +Provides arguments: + +* message + + Raw message object. + +* exc + + The error that occurred (if any). + App Signals ----------- @@ -296,7 +356,7 @@ import_modules ~~~~~~~~~~~~~~ This signal is sent when a program (worker, beat, shell) etc, asks -for modules in the :setting:`CELERY_INCLUDE` and :setting:`CELERY_IMPORTS` +for modules in the :setting:`include` and :setting:`imports` settings to be imported. Sender is the app instance. @@ -330,7 +390,7 @@ used to route a task to any specific worker: Provides arguments: * sender - Hostname of the worker. + Nodename of the worker. * instance This is the :class:`celery.apps.worker.Worker` instance to be initialized. @@ -356,7 +416,7 @@ to setup worker specific configuration: @celeryd_init.connect(sender='worker12@example.com') def configure_worker12(conf=None, **kwargs): - conf.CELERY_DEFAULT_RATE_LIMIT = '10/m' + conf.task_default_rate_limit = '10/m' or to set up configuration for multiple workers you can omit specifying a sender when you connect: @@ -368,9 +428,9 @@ sender when you connect: @celeryd_init.connect def configure_workers(sender=None, conf=None, **kwargs): if sender in ('worker1@example.com', 'worker2@example.com'): - conf.CELERY_DEFAULT_RATE_LIMIT = '10/m' + conf.task_default_rate_limit = '10/m' if sender == 'worker3@example.com': - conf.CELERYD_PREFETCH_MULTIPLIER = 0 + conf.worker_prefetch_multiplier = 0 Provides arguments: diff --git a/docs/userguide/tasks.rst b/docs/userguide/tasks.rst index b984d5480..0579aca0b 100644 --- a/docs/userguide/tasks.rst +++ b/docs/userguide/tasks.rst @@ -19,7 +19,7 @@ many messages in advance and even if the worker is killed -- caused by power fai or otherwise -- the message will be redelivered to another worker. Ideally task functions should be :term:`idempotent`, which means that -the function will not cause unintented effects even if called +the function will not cause unintended effects even if called multiple times with the same arguments. Since the worker cannot detect if your tasks are idempotent, the default behavior is to acknowledge the message in advance, before it's executed, @@ -45,7 +45,7 @@ Basics ====== You can easily create a task from any callable by using -the :meth:`~@Celery.task` decorator: +the :meth:`~@task` decorator: .. code-block:: python @@ -66,14 +66,15 @@ these can be specified as arguments to the decorator: User.objects.create(username=username, password=password) - .. sidebar:: How do I import the task decorator? And what is "app"? The task decorator is available on your :class:`@Celery` application instance, if you don't know what that is then please read :ref:`first-steps`. If you're using Django or are still using the "old" module based celery API, - then you can import the task decorator like this:: + then you can import the task decorator like this: + + .. code-block:: python from celery import task @@ -96,6 +97,42 @@ these can be specified as arguments to the decorator: def add(x, y): return x + y +Bound tasks +----------- + +A task being bound means the first argument to the task will always +be the task instance (``self``), just like Python bound methods: + +.. code-block:: python + + logger = get_task_logger(__name__) + + @task(bind=True) + def add(self, x, y): + logger.info(self.request.id) + +Bound tasks are needed for retries (using :meth:`@Task.retry`), for +accessing information about the current task request, and for any additional +functionality you add to custom task base classes. + +Task inheritance +---------------- + +The ``base`` argument to the task decorator specifies the base class of the task: + +.. code-block:: python + + import celery + + class MyTask(celery.Task): + + def on_failure(self, exc, task_id, args, kwargs, einfo): + print('{0!r} failed: {1!r}'.format(task_id, exc) + + @task(base=MyTask) + def add(x, y): + raise KeyError() + .. _task-names: Names @@ -106,7 +143,7 @@ will be generated out of the function name if a custom name is not provided. For example: -.. code-block:: python +.. code-block:: pycon >>> @app.task(name='sum-of-two-numbers') >>> def add(x, y): @@ -119,13 +156,15 @@ A best practice is to use the module name as a namespace, this way names won't collide if there's already a task with that name defined in another module. -.. code-block:: python +.. code-block:: pycon >>> @app.task(name='tasks.add') >>> def add(x, y): ... return x + y -You can tell the name of the task by investigating its name attribute:: +You can tell the name of the task by investigating its name attribute: + +.. code-block:: pycon >>> add.name 'tasks.add' @@ -150,7 +189,7 @@ if the module name is "tasks.py": Automatic naming and relative imports ------------------------------------- -Relative imports and automatic name generation does not go well together, +Relative imports and automatic name generation do not go well together, so if you're using relative imports you should set the name explicitly. For example if the client imports the module "myapp.tasks" as ".tasks", and @@ -168,7 +207,7 @@ If you install the app under the name ``project.myapp`` then the tasks module will be imported as ``project.myapp.tasks``, so you must make sure you always import the tasks using the same name: -.. code-block:: python +.. code-block:: pycon >>> from project.myapp.tasks import mytask # << GOOD @@ -177,7 +216,7 @@ so you must make sure you always import the tasks using the same name: The second example will cause the task to be named differently since the worker and the client imports the modules under different names: -.. code-block:: python +.. code-block:: pycon >>> from project.myapp.tasks import mytask >>> mytask.name @@ -215,6 +254,55 @@ on the automatic naming: def add(x, y): return x + y +.. _task-name-generator-info: + +Changing the automatic naming behavior +-------------------------------------- + +.. versionadded:: 4.0 + +There are some cases when the default automatic naming is not suitable. +Consider you have many tasks within many different modules:: + + project/ + /__init__.py + /celery.py + /moduleA/ + /__init__.py + /tasks.py + /moduleB/ + /__init__.py + /tasks.py + +Using the default automatic naming, each task will have a generated name +like `moduleA.tasks.taskA`, `moduleA.tasks.taskB`, `moduleB.tasks.test` +and so on. You may want to get rid of having `tasks` in all task names. +As pointed above, you can explicitly give names for all tasks, or you +can change the automatic naming behavior by overriding +:meth:`@gen_task_name`. Continuing with the example, `celery.py` +may contain: + +.. code-block:: python + + from celery import Celery + + class MyCelery(Celery): + + def gen_task_name(self, name, module): + if module.endswith('.tasks'): + module = module[:-6] + return super(MyCelery, self).gen_task_name(name, module) + + app = MyCelery('main') + +So each task will have a name like `moduleA.taskA`, `moduleA.taskB` and +`moduleB.test`. + +.. warning:: + + Make sure that your :meth:`@gen_task_name` is a pure function, which means + that for the same input it must always return the same output. + .. _task-request-info: Context @@ -243,18 +331,18 @@ The request defines the following attributes: the client, and not by a worker. :eta: The original ETA of the task (if any). - This is in UTC time (depending on the :setting:`CELERY_ENABLE_UTC` + This is in UTC time (depending on the :setting:`enable_utc` setting). :expires: The original expiry time of the task (if any). - This is in UTC time (depending on the :setting:`CELERY_ENABLE_UTC` + This is in UTC time (depending on the :setting:`enable_utc` setting). :logfile: The file the worker logs to. See `Logging`_. :loglevel: The current log level used. -:hostname: Hostname of the worker instance executing the task. +:hostname: Node name of the worker instance executing the task. :delivery_info: Additional message delivery information. This is a mapping containing the exchange and routing key used to deliver this @@ -266,11 +354,11 @@ The request defines the following attributes: :called_directly: This flag is set to true if the task was not executed by the worker. -:callbacks: A list of subtasks to be called if this task returns successfully. +:callbacks: A list of signatures to be called if this task returns successfully. -:errback: A list of subtasks to be called if this task fails. +:errback: A list of signatures to be called if this task fails. -:utc: Set to true the caller has utc enabled (:setting:`CELERY_ENABLE_UTC`). +:utc: Set to true the caller has utc enabled (:setting:`enable_utc`). .. versionadded:: 3.1 @@ -327,8 +415,34 @@ for which documentation can be found in the :mod:`logging` module. You can also use :func:`print`, as anything written to standard -out/-err will be redirected to logging system (you can disable this, -see :setting:`CELERY_REDIRECT_STDOUTS`). +out/-err will be redirected to the logging system (you can disable this, +see :setting:`worker_redirect_stdouts`). + +.. note:: + + The worker will not update the redirection if you create a logger instance + somewhere in your task or task module. + + If you want to redirect ``sys.stdout`` and ``sys.stderr`` to a custom + logger you have to enable this manually, for example: + + .. code-block:: python + + import sys + + logger = get_task_logger(__name__) + + @app.task(bind=True) + def add(self, x, y): + old_outs = sys.stdout, sys.stderr + rlevel = self.app.conf.worker_redirect_stdouts_level + try: + self.app.log.redirect_stdouts_to_logger(logger, rlevel) + print('Adding {0} + {1}'.format(x, y)) + return x + y + finally: + sys.stdout, sys.stderr = old_outs + .. _task-retry: @@ -383,7 +497,7 @@ but this will not happen if: - An ``exc`` argument was not given. - In this case the :exc:`~@MaxRetriesExceeded` + In this case the :exc:`~@MaxRetriesExceededError` exception will be raised. - There is no current exception @@ -421,6 +535,45 @@ override this default. raise self.retry(exc=exc, countdown=60) # override the default and # retry in 1 minute +Autoretrying +------------ + +.. versionadded:: 4.0 + +Sometimes you may want to retry a task on particular exception. To do so, +you should wrap a task body with `try-except` statement, for example: + +.. code-block:: python + + @app.task + def div(a, b): + try: + return a / b + except ZeroDivisionError as exc: + raise div.retry(exc=exc) + +This may not be acceptable all the time, since you may have a lot of such +tasks. + +Fortunately, you can tell Celery to automatically retry a task using +`autoretry_for` argument in `~@Celery.task` decorator: + +.. code-block:: python + + @app.task(autoretry_for(ZeroDivisionError,)) + def div(a, b): + return a / b + +If you want to specify custom arguments for internal `~@Task.retry` +call, pass `retry_kwargs` argument to `~@Celery.task` decorator: + +.. code-block:: python + + @app.task(autoretry_for=(ZeroDivisionError,), + retry_kwargs={'max_retries': 5}) + def div(a, b): + return a / b + .. _task-options: List of Options @@ -462,7 +615,7 @@ General .. attribute:: Task.max_retries The maximum number of attempted retries before giving up. - If the number of retries exceeds this value a :exc:`~@MaxRetriesExceeded` + If the number of retries exceeds this value a :exc:`~@MaxRetriesExceededError` exception will be raised. *NOTE:* You have to call :meth:`~@Task.retry` manually, as it will not automatically retry on exception.. @@ -472,7 +625,7 @@ General .. attribute:: Task.throws - Optional list of expected error classes that should not be regarded + Optional tuple of expected error classes that should not be regarded as an actual error. Errors in this list will be reported as a failure to the result backend, @@ -514,10 +667,19 @@ General If it is an integer or float, it is interpreted as "tasks per second". The rate limits can be specified in seconds, minutes or hours - by appending `"/s"`, `"/m"` or `"/h"` to the value. - Example: `"100/m"` (hundred tasks a minute). Default is the - :setting:`CELERY_DEFAULT_RATE_LIMIT` setting, which if not specified means - rate limiting for tasks is disabled by default. + by appending `"/s"`, `"/m"` or `"/h"` to the value. Tasks will be evenly + distributed over the specified time frame. + + Example: `"100/m"` (hundred tasks a minute). This will enforce a minimum + delay of 600ms between starting two tasks on the same worker instance. + + Default is the :setting:`task_default_rate_limit` setting, + which if not specified means rate limiting for tasks is disabled by default. + + Note that this is a *per worker instance* rate limit, and not a global + rate limit. To enforce a global rate limit (e.g. for an API with a + maximum number of requests per second), you must restrict to a given + queue. .. attribute:: Task.time_limit @@ -543,7 +705,7 @@ General .. attribute:: Task.send_error_emails Send an email whenever a task of this type fails. - Defaults to the :setting:`CELERY_SEND_TASK_ERROR_EMAILS` setting. + Defaults to the :setting:`task_send_error_emails` setting. See :ref:`conf-error-mails` for more information. .. attribute:: Task.ErrorMail @@ -554,8 +716,8 @@ General .. attribute:: Task.serializer A string identifying the default serialization - method to use. Defaults to the :setting:`CELERY_TASK_SERIALIZER` - setting. Can be `pickle` `json`, `yaml`, or any custom + method to use. Defaults to the :setting:`task_serializer` + setting. Can be `pickle`, `json`, `yaml`, or any custom serialization methods that have been registered with :mod:`kombu.serialization.registry`. @@ -565,7 +727,7 @@ General A string identifying the default compression scheme to use. - Defaults to the :setting:`CELERY_MESSAGE_COMPRESSION` setting. + Defaults to the :setting:`task_compression` setting. Can be `gzip`, or `bzip2`, or any custom compression schemes that have been registered with the :mod:`kombu.compression` registry. @@ -573,8 +735,9 @@ General .. attribute:: Task.backend - The result store backend to use for this task. Defaults to the - :setting:`CELERY_RESULT_BACKEND` setting. + The result store backend to use for this task. An instance of one of the + backend classes in `celery.backends`. Defaults to `app.backend` which is + defined by the :setting:`result_backend` setting. .. attribute:: Task.acks_late @@ -586,7 +749,7 @@ General crashes in the middle of execution, which may be acceptable for some applications. - The global default can be overridden by the :setting:`CELERY_ACKS_LATE` + The global default can be overridden by the :setting:`task_acks_late` setting. .. _task-track-started: @@ -605,7 +768,7 @@ General will be available in the state metadata (e.g. `result.info['pid']`) The global default can be overridden by the - :setting:`CELERY_TRACK_STARTED` setting. + :setting:`task_track_started` setting. .. seealso:: @@ -648,48 +811,31 @@ Result Backends If you want to keep track of tasks or need the return values, then Celery must store or send the states somewhere so that they can be retrieved later. There are several built-in result backends to choose from: SQLAlchemy/Django ORM, -Memcached, RabbitMQ (amqp), MongoDB, and Redis -- or you can define your own. +Memcached, RabbitMQ/QPid (rpc), MongoDB, and Redis -- or you can define your own. No backend works well for every use case. You should read about the strengths and weaknesses of each backend, and choose the most appropriate for your needs. - .. seealso:: :ref:`conf-result-backend` -RabbitMQ Result Backend -~~~~~~~~~~~~~~~~~~~~~~~ +RPC Result Backend (RabbitMQ/QPid) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The RabbitMQ result backend (amqp) is special as it does not actually *store* +The RPC result backend (`rpc://`) is special as it does not actually *store* the states, but rather sends them as messages. This is an important difference as it -means that a result *can only be retrieved once*; If you have two processes -waiting for the same result, one of the processes will never receive the -result! +means that a result *can only be retrieved once*, and *only by the client +that initiated the task*. Two different processes can not wait for the same result. Even with that limitation, it is an excellent choice if you need to receive state changes in real-time. Using messaging means the client does not have to poll for new states. -There are several other pitfalls you should be aware of when using the -RabbitMQ result backend: - -* Every new task creates a new queue on the server, with thousands of tasks - the broker may be overloaded with queues and this will affect performance in - negative ways. If you're using RabbitMQ then each queue will be a separate - Erlang process, so if you're planning to keep many results simultaneously you - may have to increase the Erlang process limit, and the maximum number of file - descriptors your OS allows. - -* Old results will be cleaned automatically, based on the - :setting:`CELERY_TASK_RESULT_EXPIRES` setting. By default this is set to - expire after 1 day: if you have a very busy cluster you should lower - this value. - -For a list of options supported by the RabbitMQ result backend, please see -:ref:`conf-amqp-result-backend`. - +The messages are transient (non-persistent) by default, so the results will +disappear if the broker restarts. You can configure the result backend to send +persistent messages using the :setting:`result_persistent` setting. Database Result Backend ~~~~~~~~~~~~~~~~~~~~~~~ @@ -709,7 +855,6 @@ limitations. transaction is committed. It is recommended that you change to the `READ-COMMITTED` isolation level. - .. _task-builtin-states: Built-in States @@ -788,13 +933,16 @@ The name of the state is usually an uppercase string. As an example you could have a look at :mod:`abortable tasks <~celery.contrib.abortable>` which defines its own custom :state:`ABORTED` state. -Use :meth:`~@Task.update_state` to update a task's state:: +Use :meth:`~@Task.update_state` to update a task's state:. + +.. code-block:: python @app.task(bind=True) def upload_files(self, filenames): for i, file in enumerate(filenames): - self.update_state(state='PROGRESS', - meta={'current': i, 'total': len(filenames)}) + if not self.request.called_directly: + self.update_state(state='PROGRESS', + meta={'current': i, 'total': len(filenames)}) Here I created the state `"PROGRESS"`, which tells any application @@ -902,7 +1050,8 @@ Example that stores results manually: @app.task(bind=True) def get_tweets(self, user): timeline = twitter.get_timeline(user) - self.update_state(state=states.SUCCESS, meta=timeline) + if not self.request.called_directly: + self.update_state(state=states.SUCCESS, meta=timeline) raise Ignore() .. _task-semipred-reject: @@ -1078,7 +1227,7 @@ base class for new task types. abstract = True def after_return(self, *args, **kwargs): - print('Task returned: {0!r}'.format(self.request) + print('Task returned: {0!r}'.format(self.request)) @app.task(base=DebugTask) @@ -1145,25 +1294,22 @@ Handlers The return value of this handler is ignored. -on_retry -~~~~~~~~ - .. _task-how-they-work: How it works ============ -Here comes the technical details, this part isn't something you need to know, +Here come the technical details. This part isn't something you need to know, but you may be interested. All defined tasks are listed in a registry. The registry contains a list of task names and their task classes. You can investigate this registry yourself: -.. code-block:: python +.. code-block:: pycon - >>> from celery import current_app - >>> current_app.tasks + >>> from proj.celery import app + >>> app.tasks {'celery.chord_unlock': <@task: celery.chord_unlock>, 'celery.backend_cleanup': @@ -1175,7 +1321,7 @@ This is the list of tasks built-in to celery. Note that tasks will only be registered when the module they are defined in is imported. The default loader imports any modules listed in the -:setting:`CELERY_IMPORTS` setting. +:setting:`imports` setting. The entity responsible for registering your task in the registry is the metaclass: :class:`~celery.task.base.TaskType`. @@ -1219,7 +1365,7 @@ wastes time and resources. def mytask(…): something() -Results can even be disabled globally using the :setting:`CELERY_IGNORE_RESULT` +Results can even be disabled globally using the :setting:`task_ignore_result` setting. .. _task-disable-rate-limits: @@ -1231,12 +1377,12 @@ Disabling rate limits altogether is recommended if you don't have any tasks using them. This is because the rate limit subsystem introduces quite a lot of complexity. -Set the :setting:`CELERY_DISABLE_RATE_LIMITS` setting to globally disable +Set the :setting:`worker_disable_rate_limits` setting to globally disable rate limits: .. code-block:: python - CELERY_DISABLE_RATE_LIMITS = True + worker_disable_rate_limits = True You find additional optimization tips in the :ref:`Optimizing Guide `. @@ -1280,7 +1426,7 @@ Make your design asynchronous instead, for example by using *callbacks*. def update_page_info(url): # fetch_page -> parse_page -> store_page - chain = fetch_page.s() | parse_page.s() | store_page_info.s(url) + chain = fetch_page.s(url) | parse_page.s() | store_page_info.s(url) chain() @app.task() @@ -1297,7 +1443,7 @@ Make your design asynchronous instead, for example by using *callbacks*. Here I instead created a chain of tasks by linking together -different :func:`~celery.subtask`'s. +different :func:`~celery.signature`'s. You can read about chains and other powerful constructs at :ref:`designing-workflows`. @@ -1312,8 +1458,8 @@ Granularity ----------- The task granularity is the amount of computation needed by each subtask. -In general it is better to split the problem up into many small tasks, than -have a few long running tasks. +In general it is better to split the problem up into many small tasks rather +than have a few long running tasks. With smaller tasks you can process more tasks in parallel and the tasks won't run long enough to block the worker from processing other waiting tasks. @@ -1395,7 +1541,9 @@ that automatically expands some abbreviations in it: article.save() First, an author creates an article and saves it, then the author -clicks on a button that initiates the abbreviation task:: +clicks on a button that initiates the abbreviation task: + +.. code-block:: pycon >>> article = Article.objects.get(id=102) >>> expand_abbreviations.delay(article) @@ -1416,6 +1564,8 @@ re-fetch the article in the task body: article.body.replace('MyCorp', 'My Corporation') article.save() +.. code-block:: pycon + >>> expand_abbreviations(article_id) There might even be performance benefits to this approach, as sending large @@ -1462,12 +1612,26 @@ depending on state from the current transaction*: transaction.commit() expand_abbreviations.delay(article.pk) +.. note:: + Django 1.6 (and later) now enables autocommit mode by default, + and ``commit_on_success``/``commit_manually`` are deprecated. + + This means each SQL query is wrapped and executed in individual + transactions, making it less likely to experience the + problem described above. + + However, enabling ``ATOMIC_REQUESTS`` on the database + connection will bring back the transaction-per-request model and the + race condition along with it. In this case, the simple solution is + using the ``@transaction.non_atomic_requests`` decorator to go back + to autocommit for that view only. + .. _task-example: Example ======= -Let's take a real wold example; A blog where comments posted needs to be +Let's take a real world example: a blog where comments posted need to be filtered for spam. When the comment is created, the spam filter runs in the background, so the user doesn't have to wait for it to finish. diff --git a/docs/userguide/workers.rst b/docs/userguide/workers.rst index ef98f7db4..ffff5be32 100644 --- a/docs/userguide/workers.rst +++ b/docs/userguide/workers.rst @@ -21,28 +21,28 @@ Starting the worker You can start the worker in the foreground by executing the command: -.. code-block:: bash +.. code-block:: console - $ celery --app=app worker -l info + $ celery -A proj worker -l info For a full list of available command-line options see :mod:`~celery.bin.worker`, or simply do: -.. code-block:: bash +.. code-block:: console $ celery worker --help You can also start multiple workers on the same machine. If you do so be sure to give a unique name to each individual worker by specifying a -host name with the :option:`--hostname|-n` argument: +node name with the :option:`--hostname|-n` argument: -.. code-block:: bash +.. code-block:: console - $ celery worker --loglevel=INFO --concurrency=10 -n worker1.%h - $ celery worker --loglevel=INFO --concurrency=10 -n worker2.%h - $ celery worker --loglevel=INFO --concurrency=10 -n worker3.%h + $ celery -A proj worker --loglevel=INFO --concurrency=10 -n worker1.%h + $ celery -A proj worker --loglevel=INFO --concurrency=10 -n worker2.%h + $ celery -A proj worker --loglevel=INFO --concurrency=10 -n worker3.%h -The hostname argument can expand the following variables: +The ``hostname`` argument can expand the following variables: - ``%h``: Hostname including domain name. - ``%n``: Hostname only. @@ -55,6 +55,10 @@ these will expand to: - ``worker1.%n`` -> ``worker1.george`` - ``worker1.%d`` -> ``worker1.example.com`` +.. admonition:: Note for :program:`supervisord` users. + + The ``%`` sign must be escaped by adding a second one: `%%h`. + .. _worker-stopping: Stopping the worker @@ -77,7 +81,7 @@ Also as processes can't override the :sig:`KILL` signal, the worker will not be able to reap its children, so make sure to do so manually. This command usually does the trick: -.. code-block:: bash +.. code-block:: console $ ps auxww | grep 'celery worker' | awk '{print $2}' | xargs kill -9 @@ -90,10 +94,10 @@ To restart the worker you should send the `TERM` signal and start a new instance. The easiest way to manage workers for development is by using `celery multi`: - .. code-block:: bash +.. code-block:: console - $ celery multi start 1 -A proj -l info -c4 --pidfile=/var/run/celery/%n.pid - $ celery multi restart 1 --pidfile=/var/run/celery/%n.pid + $ celery multi start 1 -A proj -l info -c4 --pidfile=/var/run/celery/%n.pid + $ celery multi restart 1 --pidfile=/var/run/celery/%n.pid For production deployments you should be using init scripts or other process supervision systems (see :ref:`daemonizing`). @@ -103,7 +107,7 @@ restart the worker using the :sig:`HUP` signal, but note that the worker will be responsible for restarting itself so this is prone to problems and is not recommended in production: -.. code-block:: bash +.. code-block:: console $ kill -HUP $pid @@ -145,16 +149,18 @@ can contain variables that the worker will expand: Node name replacements ---------------------- +- ``%p``: Full node name. - ``%h``: Hostname including domain name. - ``%n``: Hostname only. - ``%d``: Domain name only. - ``%i``: Prefork pool process index or 0 if MainProcess. - ``%I``: Prefork pool process index with separator. -E.g. if the current hostname is ``george.example.com`` then +E.g. if the current hostname is ``george@foo.example.com`` then these will expand to: -- ``--logfile=%h.log`` -> :file:`george.example.com.log` +- ``--logfile-%p.log`` -> :file:`george@foo.example.com.log` +- ``--logfile=%h.log`` -> :file:`foo.example.com.log` - ``--logfile=%n.log`` -> :file:`george.log` - ``--logfile=%d`` -> :file:`example.com.log` @@ -187,8 +193,8 @@ is the *process index* not the process count or pid. three log files: - :file:`worker1.log` (main process) - - :file:`worker1-1.log`` (pool process 1) - - :file:`worker1-2.log`` (pool process 2) + - :file:`worker1-1.log` (pool process 1) + - :file:`worker1-2.log` (pool process 2) .. _worker-concurrency: @@ -261,14 +267,18 @@ Some remote control commands also have higher-level interfaces using :meth:`~@control.broadcast` in the background, like :meth:`~@control.rate_limit` and :meth:`~@control.ping`. -Sending the :control:`rate_limit` command and keyword arguments:: +Sending the :control:`rate_limit` command and keyword arguments: + +.. code-block:: pycon >>> app.control.broadcast('rate_limit', ... arguments={'task_name': 'myapp.mytask', ... 'rate_limit': '200/m'}) This will send the command asynchronously, without waiting for a reply. -To request a reply you have to use the `reply` argument:: +To request a reply you have to use the `reply` argument: + +.. code-block:: pycon >>> app.control.broadcast('rate_limit', { ... 'task_name': 'myapp.mytask', 'rate_limit': '200/m'}, reply=True) @@ -277,7 +287,9 @@ To request a reply you have to use the `reply` argument:: {'worker3.example.com': 'New rate limit set successfully'}] Using the `destination` argument you can specify a list of workers -to receive the command:: +to receive the command: + +.. code-block:: pycon >>> app.control.broadcast('rate_limit', { ... 'task_name': 'myapp.mytask', @@ -290,12 +302,16 @@ Of course, using the higher-level interface to set rate limits is much more convenient, but there are commands that can only be requested using :meth:`~@control.broadcast`. +Commands +======== + .. control:: revoke -Revoking tasks -============== -pool support: all -broker support: *amqp, redis* +``revoke``: Revoking tasks +-------------------------- +:pool support: all, terminate only supported by prefork +:broker support: *amqp, redis* +:command: :program:`celery -A proj control revoke ` All worker nodes keeps a memory of revoked task ids, either in-memory or persistent on disk (see :ref:`worker-persistent-revokes`). @@ -310,7 +326,7 @@ the `terminate` option is set. a task is stuck. It's not for terminating the task, it's for terminating the process that is executing the task, and that process may have already started processing another task at the point - when the signal is sent, so for this rason you must never call this + when the signal is sent, so for this reason you must never call this programatically. If `terminate` is set the worker child process processing the task @@ -323,7 +339,7 @@ Terminating a task also revokes it. **Example** -:: +.. code-block:: pycon >>> result.revoke() @@ -351,7 +367,7 @@ several tasks at once. **Example** -:: +.. code-block:: pycon >>> app.control.revoke([ ... '7993b0aa-1f0b-4780-9af0-c47c0858b3f2', @@ -377,15 +393,15 @@ of revoked ids will also vanish. If you want to preserve this list between restarts you need to specify a file for these to be stored in by using the `--statedb` argument to :program:`celery worker`: -.. code-block:: bash +.. code-block:: console - celery -A proj worker -l info --statedb=/var/run/celery/worker.state + $ celery -A proj worker -l info --statedb=/var/run/celery/worker.state or if you use :program:`celery multi` you will want to create one file per worker instance so then you can use the `%n` format to expand the current node name: -.. code-block:: bash +.. code-block:: console celery multi start 2 -l info --statedb=/var/run/celery/%n.state @@ -435,8 +451,8 @@ time limit kills it: except SoftTimeLimitExceeded: clean_up_in_a_hurry() -Time limits can also be set using the :setting:`CELERYD_TASK_TIME_LIMIT` / -:setting:`CELERYD_TASK_SOFT_TIME_LIMIT` settings. +Time limits can also be set using the :setting:`task_time_limit` / +:setting:`task_soft_time_limit` settings. .. note:: @@ -455,7 +471,9 @@ and hard time limits for a task — named ``time_limit``. Example changing the time limit for the ``tasks.crawl_the_web`` task to have a soft time limit of one minute, and a hard time limit of -two minutes:: +two minutes: + +.. code-block:: pycon >>> app.control.time_limit('tasks.crawl_the_web', soft=60, hard=120, reply=True) @@ -476,7 +494,7 @@ Changing rate-limits at runtime Example changing the rate limit for the `myapp.mytask` task to execute at most 200 tasks of that type every minute: -.. code-block:: python +.. code-block:: pycon >>> app.control.rate_limit('myapp.mytask', '200/m') @@ -484,7 +502,7 @@ The above does not specify a destination, so the change request will affect all worker instances in the cluster. If you only want to affect a specific list of workers you can include the ``destination`` argument: -.. code-block:: python +.. code-block:: pycon >>> app.control.rate_limit('myapp.mytask', '200/m', ... destination=['celery@worker1.example.com']) @@ -492,7 +510,7 @@ list of workers you can include the ``destination`` argument: .. warning:: This won't affect workers with the - :setting:`CELERY_DISABLE_RATE_LIMITS` setting enabled. + :setting:`worker_disable_rate_limits` setting enabled. .. _worker-maxtasksperchild: @@ -510,7 +528,23 @@ This is useful if you have memory leaks you have no control over for example from closed source C extensions. The option can be set using the workers `--maxtasksperchild` argument -or using the :setting:`CELERYD_MAX_TASKS_PER_CHILD` setting. +or using the :setting:`worker_max_tasks_per_child` setting. + +Max memory per child setting +============================ + +.. versionadded:: TODO + +pool support: *prefork* + +With this option you can configure the maximum amount of resident +memory a worker can execute before it's replaced by a new process. + +This is useful if you have memory leaks you have no control over +for example from closed source C extensions. + +The option can be set using the workers `--maxmemperchild` argument +or using the :setting:`CELERYD_MAX_MEMORY_PER_CHILD` setting. .. _worker-autoscaling: @@ -539,7 +573,7 @@ numbers: the maximum and minimum number of pool processes:: You can also define your own rules for the autoscaler by subclassing :class:`~celery.worker.autoscaler.Autoscaler`. Some ideas for metrics include load average or the amount of memory available. -You can specify a custom autoscaler with the :setting:`CELERYD_AUTOSCALER` setting. +You can specify a custom autoscaler with the :setting:`worker_autoscaler` setting. .. _worker-queues: @@ -548,20 +582,20 @@ Queues A worker instance can consume from any number of queues. By default it will consume from all queues defined in the -:setting:`CELERY_QUEUES` setting (which if not specified defaults to the +:setting:`task_queues` setting (which if not specified defaults to the queue named ``celery``). You can specify what queues to consume from at startup, by giving a comma separated list of queues to the :option:`-Q` option: -.. code-block:: bash +.. code-block:: console - $ celery worker -l info -Q foo,bar,baz + $ celery -A proj worker -l info -Q foo,bar,baz -If the queue name is defined in :setting:`CELERY_QUEUES` it will use that +If the queue name is defined in :setting:`task_queues` it will use that configuration, but if it's not defined in the list of queues Celery will automatically generate a new queue for you (depending on the -:setting:`CELERY_CREATE_MISSING_QUEUES` option). +:setting:`task_create_missing_queues` option). You can also tell the worker to start and stop consuming from a queue at runtime using the remote control commands :control:`add_consumer` and @@ -578,20 +612,22 @@ to start consuming from a queue. This operation is idempotent. To tell all workers in the cluster to start consuming from a queue named "``foo``" you can use the :program:`celery control` program: -.. code-block:: bash +.. code-block:: console - $ celery control add_consumer foo + $ celery -A proj control add_consumer foo -> worker1.local: OK started consuming from u'foo' If you want to specify a specific worker you can use the :option:`--destination`` argument: -.. code-block:: bash +.. code-block:: console + + $ celery -A proj control add_consumer foo -d worker1.local - $ celery control add_consumer foo -d worker1.local +The same can be accomplished dynamically using the :meth:`@control.add_consumer` method: -The same can be accomplished dynamically using the :meth:`@control.add_consumer` method:: +.. code-block:: pycon >>> app.control.add_consumer('foo', reply=True) [{u'worker1.local': {u'ok': u"already consuming from u'foo'"}}] @@ -603,7 +639,9 @@ The same can be accomplished dynamically using the :meth:`@control.add_consumer` By now I have only shown examples using automatic queues, If you need more control you can also specify the exchange, routing_key and -even other options:: +even other options: + +.. code-block:: pycon >>> app.control.add_consumer( ... queue='baz', @@ -620,8 +658,8 @@ even other options:: .. control:: cancel_consumer -Queues: Cancelling consumers ----------------------------- +Queues: Canceling consumers +--------------------------- You can cancel a consumer by queue name using the :control:`cancel_consumer` control command. @@ -629,22 +667,22 @@ control command. To force all workers in the cluster to cancel consuming from a queue you can use the :program:`celery control` program: -.. code-block:: bash +.. code-block:: console - $ celery control cancel_consumer foo + $ celery -A proj control cancel_consumer foo The :option:`--destination` argument can be used to specify a worker, or a list of workers, to act on the command: -.. code-block:: bash +.. code-block:: console - $ celery control cancel_consumer foo -d worker1.local + $ celery -A proj control cancel_consumer foo -d worker1.local You can also cancel consumers programmatically using the :meth:`@control.cancel_consumer` method: -.. code-block:: bash +.. code-block:: console >>> app.control.cancel_consumer('foo', reply=True) [{u'worker1.local': {u'ok': u"no longer consuming from u'foo'"}}] @@ -657,23 +695,25 @@ Queues: List of active queues You can get a list of queues that a worker consumes from by using the :control:`active_queues` control command: -.. code-block:: bash +.. code-block:: console - $ celery inspect active_queues + $ celery -A proj inspect active_queues [...] Like all other remote control commands this also supports the :option:`--destination` argument used to specify which workers should reply to the request: -.. code-block:: bash +.. code-block:: console - $ celery inspect active_queues -d worker1.local + $ celery -A proj inspect active_queues -d worker1.local [...] This can also be done programmatically by using the -:meth:`@control.inspect.active_queues` method:: +:meth:`@control.inspect.active_queues` method: + +.. code-block:: pycon >>> app.control.inspect().active_queues() [...] @@ -692,8 +732,8 @@ pool support: *prefork, eventlet, gevent, threads, solo* Starting :program:`celery worker` with the :option:`--autoreload` option will enable the worker to watch for file system changes to all imported task -modules imported (and also any non-task modules added to the -:setting:`CELERY_IMPORTS` setting or the :option:`-I|--include` option). +modules (and also any non-task modules added to the +:setting:`imports` setting or the :option:`-I|--include` option). This is an experimental feature intended for use in development only, using auto-reload in production is discouraged as the behavior of reloading @@ -718,7 +758,7 @@ implementations: to install the :mod:`pyinotify` library you have to run the following command: - .. code-block:: bash + .. code-block:: console $ pip install pyinotify @@ -732,7 +772,7 @@ implementations: You can force an implementation by setting the :envvar:`CELERYD_FSNOTIFY` environment variable: -.. code-block:: bash +.. code-block:: console $ env CELERYD_FSNOTIFY=stat celery worker -l info --autoreload @@ -745,7 +785,7 @@ Pool Restart Command .. versionadded:: 2.5 -Requires the :setting:`CELERYD_POOL_RESTARTS` setting to be enabled. +Requires the :setting:`worker_pool_restarts` setting to be enabled. The remote control command :control:`pool_restart` sends restart requests to the workers child processes. It is particularly useful for forcing @@ -758,14 +798,14 @@ Example Running the following command will result in the `foo` and `bar` modules being imported by the worker processes: -.. code-block:: python +.. code-block:: pycon >>> app.control.broadcast('pool_restart', ... arguments={'modules': ['foo', 'bar']}) Use the ``reload`` argument to reload modules it has already imported: -.. code-block:: python +.. code-block:: pycon >>> app.control.broadcast('pool_restart', ... arguments={'modules': ['foo'], @@ -774,7 +814,7 @@ Use the ``reload`` argument to reload modules it has already imported: If you don't specify any modules then all known tasks modules will be imported/reloaded: -.. code-block:: python +.. code-block:: pycon >>> app.control.broadcast('pool_restart', arguments={'reload': True}) @@ -806,18 +846,18 @@ Inspecting workers uses remote control commands under the hood. You can also use the ``celery`` command to inspect workers, -and it supports the same commands as the :class:`@Celery.control` interface. +and it supports the same commands as the :class:`@control` interface. -.. code-block:: python +.. code-block:: pycon - # Inspect all nodes. + >>> # Inspect all nodes. >>> i = app.control.inspect() - # Specify multiple nodes to inspect. + >>> # Specify multiple nodes to inspect. >>> i = app.control.inspect(['worker1.example.com', 'worker2.example.com']) - # Specify a single node to inspect. + >>> # Specify a single node to inspect. >>> i = app.control.inspect('worker1.example.com') .. _worker-inspect-registered-tasks: @@ -826,7 +866,9 @@ Dump of registered tasks ------------------------ You can get a list of tasks registered in the worker using the -:meth:`~@control.inspect.registered`:: +:meth:`~@control.inspect.registered`: + +.. code-block:: pycon >>> i.registered() [{'worker1.example.com': ['tasks.add', @@ -838,7 +880,9 @@ Dump of currently executing tasks --------------------------------- You can get a list of active tasks using -:meth:`~@control.inspect.active`:: +:meth:`~@control.inspect.active`: + +.. code-block:: pycon >>> i.active() [{'worker1.example.com': @@ -853,7 +897,9 @@ Dump of scheduled (ETA) tasks ----------------------------- You can get a list of tasks waiting to be scheduled by using -:meth:`~@control.inspect.scheduled`:: +:meth:`~@control.inspect.scheduled`: + +.. code-block:: pycon >>> i.scheduled() [{'worker1.example.com': @@ -879,11 +925,13 @@ You can get a list of tasks waiting to be scheduled by using Dump of reserved tasks ---------------------- -Reserved tasks are tasks that has been received, but is still waiting to be +Reserved tasks are tasks that have been received, but are still waiting to be executed. You can get a list of these using -:meth:`~@control.inspect.reserved`:: +:meth:`~@control.inspect.reserved`: + +.. code-block:: pycon >>> i.reserved() [{'worker1.example.com': @@ -902,7 +950,7 @@ The remote control command ``inspect stats`` (or :meth:`~@control.inspect.stats`) will give you a long list of useful (or not so useful) statistics about the worker: -.. code-block:: bash +.. code-block:: console $ celery -A proj inspect stats @@ -922,7 +970,7 @@ The output will include the following fields: * ``hostname`` - Hostname of the remote broker. + Node name of the remote broker. * ``insist`` @@ -1086,8 +1134,8 @@ The output will include the following fields: - ``total`` - List of task names and a total number of times that task have been - executed since worker start. + Map of task names and the total number of tasks with that type + the worker has accepted since startup. Additional Commands @@ -1100,7 +1148,7 @@ Remote shutdown This command will gracefully shut down the worker remotely: -.. code-block:: python +.. code-block:: pycon >>> app.control.broadcast('shutdown') # shutdown all workers >>> app.control.broadcast('shutdown, destination="worker1@example.com") @@ -1115,7 +1163,7 @@ The workers reply with the string 'pong', and that's just about it. It will use the default one second timeout for replies unless you specify a custom timeout: -.. code-block:: python +.. code-block:: pycon >>> app.control.ping(timeout=0.5) [{'worker1.example.com': 'pong'}, @@ -1123,7 +1171,9 @@ a custom timeout: {'worker3.example.com': 'pong'}] :meth:`~@control.ping` also supports the `destination` argument, -so you can specify which workers to ping:: +so you can specify which workers to ping: + +.. code-block:: pycon >>> ping(['worker2.example.com', 'worker3.example.com']) [{'worker2.example.com': 'pong'}, @@ -1141,7 +1191,7 @@ You can enable/disable events by using the `enable_events`, `disable_events` commands. This is useful to temporarily monitor a worker using :program:`celery events`/:program:`celerymon`. -.. code-block:: python +.. code-block:: pycon >>> app.control.enable_events() >>> app.control.disable_events() diff --git a/docs/whatsnew-2.5.rst b/docs/whatsnew-2.5.rst index 08dc3135f..b57ac0d5c 100644 --- a/docs/whatsnew-2.5.rst +++ b/docs/whatsnew-2.5.rst @@ -64,7 +64,7 @@ race condition leading to an annoying warning. The :program:`camqadm` command can be used to delete the previous exchange: - .. code-block:: bash + .. code-block:: console $ camqadm exchange.delete celeryresults @@ -240,7 +240,7 @@ implementations: to install the :mod:`pyinotify` library you have to run the following command: - .. code-block:: bash + .. code-block:: console $ pip install pyinotify @@ -254,7 +254,7 @@ implementations: You can force an implementation by setting the :envvar:`CELERYD_FSNOTIFY` environment variable: -.. code-block:: bash +.. code-block:: console $ env CELERYD_FSNOTIFY=stat celeryd -l info --autoreload @@ -288,7 +288,7 @@ You can change methods too, for example the ``on_failure`` handler: .. code-block:: python def my_on_failure(self, exc, task_id, args, kwargs, einfo): - print('Oh no! Task failed: %r' % (exc, )) + print('Oh no! Task failed: %r' % (exc,)) CELERY_ANNOTATIONS = {'*': {'on_failure': my_on_failure}} @@ -378,7 +378,7 @@ In Other News Additional configuration must be added at the end of the argument list followed by ``--``, for example: - .. code-block:: bash + .. code-block:: console $ celerybeat -l info -- celerybeat.max_loop_interval=10.0 @@ -428,7 +428,7 @@ In Other News **Examples**: - .. code-block:: bash + .. code-block:: console $ celeryctl migrate redis://localhost amqp://localhost $ celeryctl migrate amqp://localhost//v1 amqp://localhost//v2 diff --git a/docs/whatsnew-3.0.rst b/docs/whatsnew-3.0.rst index bd0136eb0..165bb54ab 100644 --- a/docs/whatsnew-3.0.rst +++ b/docs/whatsnew-3.0.rst @@ -96,7 +96,7 @@ has been removed, and that makes it incompatible with earlier versions. You can manually delete the old exchanges if you want, using the :program:`celery amqp` command (previously called ``camqadm``): -.. code-block:: bash +.. code-block:: console $ celery amqp exchange.delete celeryd.pidbox $ celery amqp exchange.delete reply.celeryd.pidbox @@ -128,7 +128,7 @@ All Celery's command-line programs are now available from a single You can see a list of subcommands and options by running: -.. code-block:: bash +.. code-block:: console $ celery help @@ -168,7 +168,7 @@ The setup.py install script will try to remove the old package, but if that doesn't work for some reason you have to remove it manually. This command helps: -.. code-block:: bash +.. code-block:: console $ rm -r $(dirname $(python -c ' import celery;print(celery.__file__)'))/app/task/ @@ -303,19 +303,19 @@ Tasks can now have callbacks and errbacks, and dependencies are recorded which can than be used to produce an image: - .. code-block:: bash + .. code-block:: console $ dot -Tpng graph.dot -o graph.png - A new special subtask called ``chain`` is also included: - .. code-block:: python + .. code-block:: pycon >>> from celery import chain # (2 + 2) * 8 / 2 >>> res = chain(add.subtask((2, 2)), - mul.subtask((8, )), + mul.subtask((8,)), div.subtask((2,))).apply_async() >>> res.get() == 16 @@ -351,7 +351,9 @@ The priority field is a number in the range of 0 - 9, where The priority range is collapsed into four steps by default, since it is unlikely that nine steps will yield more benefit than using four steps. The number of steps can be configured by setting the ``priority_steps`` -transport option, which must be a list of numbers in **sorted order**:: +transport option, which must be a list of numbers in **sorted order**: + +.. code-block:: pycon >>> BROKER_TRANSPORT_OPTIONS = { ... 'priority_steps': [0, 2, 4, 6, 8, 9], @@ -393,28 +395,34 @@ accidentally changed while switching to using blocking pop. - A new shortcut has been added to tasks: - :: + .. code-block:: pycon >>> task.s(arg1, arg2, kw=1) - as a shortcut to:: + as a shortcut to: + + .. code-block:: pycon >>> task.subtask((arg1, arg2), {'kw': 1}) -- Tasks can be chained by using the ``|`` operator:: +- Tasks can be chained by using the ``|`` operator: + + .. code-block:: pycon >>> (add.s(2, 2), pow.s(2)).apply_async() - Subtasks can be "evaluated" using the ``~`` operator: - :: + .. code-block:: pycon >>> ~add.s(2, 2) 4 >>> ~(add.s(2, 2) | pow.s(2)) - is the same as:: + is the same as: + + .. code-block:: pycon >>> chain(add.s(2, 2), pow.s(2)).apply_async().get() @@ -434,7 +442,9 @@ accidentally changed while switching to using blocking pop. It's now a pure dict subclass with properties for attribute access to the relevant keys. -- The repr's now outputs how the sequence would like imperatively:: +- The repr's now outputs how the sequence would like imperatively: + + .. code-block:: pycon >>> from celery import chord @@ -457,7 +467,7 @@ New remote control commands These commands were previously experimental, but they have proven stable and is now documented as part of the offical API. -- ``add_consumer``/``cancel_consumer`` +- :control:`add_consumer`/:control:`cancel_consumer` Tells workers to consume from a new queue, or cancel consuming from a queue. This command has also been changed so that the worker remembers @@ -467,7 +477,7 @@ stable and is now documented as part of the offical API. These commands are available programmatically as :meth:`@control.add_consumer` / :meth:`@control.cancel_consumer`: - .. code-block:: python + .. code-block:: pycon >>> celery.control.add_consumer(queue_name, ... destination=['w1.example.com']) @@ -476,7 +486,7 @@ stable and is now documented as part of the offical API. or using the :program:`celery control` command: - .. code-block:: bash + .. code-block:: console $ celery control -d w1.example.com add_consumer queue $ celery control -d w1.example.com cancel_consumer queue @@ -486,45 +496,45 @@ stable and is now documented as part of the offical API. Remember that a control command without *destination* will be sent to **all workers**. -- ``autoscale`` +- :control:`autoscale` Tells workers with `--autoscale` enabled to change autoscale max/min concurrency settings. This command is available programmatically as :meth:`@control.autoscale`: - .. code-block:: python + .. code-block:: pycon >>> celery.control.autoscale(max=10, min=5, ... destination=['w1.example.com']) or using the :program:`celery control` command: - .. code-block:: bash + .. code-block:: console $ celery control -d w1.example.com autoscale 10 5 -- ``pool_grow``/``pool_shrink`` +- :control:`pool_grow`/:control:`pool_shrink` Tells workers to add or remove pool processes. These commands are available programmatically as :meth:`@control.pool_grow` / :meth:`@control.pool_shrink`: - .. code-block:: python + .. code-block:: pycon >>> celery.control.pool_grow(2, destination=['w1.example.com']) >>> celery.contorl.pool_shrink(2, destination=['w1.example.com']) or using the :program:`celery control` command: - .. code-block:: bash + .. code-block:: console $ celery control -d w1.example.com pool_grow 2 $ celery control -d w1.example.com pool_shrink 2 -- :program:`celery control` now supports ``rate_limit`` & ``time_limit`` - commands. +- :program:`celery control` now supports :control:`rate_limit` and + :control:`time_limit` commands. See ``celery control --help`` for details. @@ -537,12 +547,16 @@ Immutable subtasks ------------------ ``subtask``'s can now be immutable, which means that the arguments -will not be modified when calling callbacks:: +will not be modified when calling callbacks: + +.. code-block:: pycon >>> chain(add.s(2, 2), clear_static_electricity.si()) means it will not receive the argument of the parent task, -and ``.si()`` is a shortcut to:: +and ``.si()`` is a shortcut to: + +.. code-block:: pycon >>> clear_static_electricity.subtask(immutable=True) @@ -602,7 +616,9 @@ Task registry no longer global Every Celery instance now has its own task registry. -You can make apps share registries by specifying it:: +You can make apps share registries by specifying it: + +.. code-block:: pycon >>> app1 = Celery() >>> app2 = Celery(tasks=app1.tasks) @@ -610,7 +626,9 @@ You can make apps share registries by specifying it:: Note that tasks are shared between registries by default, so that tasks will be added to every subsequently created task registry. As an alternative tasks can be private to specific task registries -by setting the ``shared`` argument to the ``@task`` decorator:: +by setting the ``shared`` argument to the ``@task`` decorator: + +.. code-block:: python @celery.task(shared=False) def add(x, y): @@ -625,7 +643,9 @@ by default, it will first be bound (and configured) when a concrete subclass is created. This means that you can safely import and make task base classes, -without also initializing the app environment:: +without also initializing the app environment: + +.. code-block:: python from celery.task import Task @@ -633,9 +653,11 @@ without also initializing the app environment:: abstract = True def __call__(self, *args, **kwargs): - print('CALLING %r' % (self, )) + print('CALLING %r' % (self,)) return self.run(*args, **kwargs) +.. code-block:: pycon + >>> DebugTask @@ -656,7 +678,7 @@ decorator executes inline like before, however for custom apps the @task decorator now returns a special PromiseProxy object that is only evaluated on access. -All promises will be evaluated when `app.finalize` is called, or implicitly +All promises will be evaluated when :meth:`@finalize` is called, or implicitly when the task registry is first used. @@ -676,7 +698,7 @@ E.g. if you have a project named 'proj' where the celery app is located in 'from proj.celery import app', then the following will be equivalent: -.. code-block:: bash +.. code-block:: console $ celery worker --app=proj $ celery worker --app=proj.celery: @@ -687,7 +709,7 @@ In Other News - New :setting:`CELERYD_WORKER_LOST_WAIT` to control the timeout in seconds before :exc:`billiard.WorkerLostError` is raised - when a worker can not be signalled (Issue #595). + when a worker can not be signaled (Issue #595). Contributed by Brendon Crawford. @@ -697,7 +719,9 @@ In Other News descriptors that creates a new subclass on access. This means that e.g. ``app.Worker`` is an actual class - and will work as expected when:: + and will work as expected when: + + .. code-block:: python class Worker(app.Worker): ... @@ -715,7 +739,9 @@ In Other News - Result backends can now be set using an URL - Currently only supported by redis. Example use:: + Currently only supported by redis. Example use: + + .. code-block:: python CELERY_RESULT_BACKEND = 'redis://localhost/1' @@ -742,7 +768,7 @@ In Other News @wraps(fun) def _inner(*args, **kwargs): - print('ARGS: %r' % (args, )) + print('ARGS: %r' % (args,)) return _inner CELERY_ANNOTATIONS = { @@ -754,20 +780,22 @@ In Other News - Bugreport now available as a command and broadcast command - - Get it from a Python repl:: + - Get it from a Python repl: + + .. code-block:: pycon - >>> import celery - >>> print(celery.bugreport()) + >>> import celery + >>> print(celery.bugreport()) - Using the ``celery`` command line program: - .. code-block:: bash + .. code-block:: console $ celery report - Get it from remote workers: - .. code-block:: bash + .. code-block:: console $ celery inspect report @@ -788,7 +816,9 @@ In Other News Returns a list of the results applying the task function to every item in the sequence. - Example:: + Example: + + .. code-block:: pycon >>> from celery import xstarmap @@ -799,12 +829,16 @@ In Other News - ``group.skew(start=, stop=, step=)`` - Skew will skew the countdown for the individual tasks in a group, - e.g. with a group:: + Skew will skew the countdown for the individual tasks in a group, + e.g. with a group: + + .. code-block:: pycon >>> g = group(add.s(i, i) for i in xrange(10)) - Skewing the tasks from 0 seconds to 10 seconds:: + Skewing the tasks from 0 seconds to 10 seconds: + + .. code-block:: pycon >>> g.skew(stop=10) diff --git a/docs/whatsnew-3.1.rst b/docs/whatsnew-3.1.rst index 6ac166166..a411e61da 100644 --- a/docs/whatsnew-3.1.rst +++ b/docs/whatsnew-3.1.rst @@ -73,10 +73,10 @@ these transports or donate resources to improve them, but as the situation is now I don't think the quality is up to date with the rest of the code-base so I cannot recommend them for production use. -The next version of Celery 3.2 will focus on performance and removing +The next version of Celery 4.0 will focus on performance and removing rarely used parts of the library. Work has also started on a new message protocol, supporting multiple languages and more. The initial draft can -be found :ref:`here `. +be found :ref:`here `. This has probably been the hardest release I've worked on, so no introduction to this changelog would be complete without a massive @@ -101,13 +101,13 @@ requiring the ``2to3`` porting tool. .. note:: - This is also the last version to support Python 2.6! From Celery 3.2 and + This is also the last version to support Python 2.6! From Celery 4.0 and onwards Python 2.7 or later will be required. Last version to enable Pickle by default ---------------------------------------- -Starting from Celery 3.2 the default serializer will be json. +Starting from Celery 4.0 the default serializer will be json. If you depend on pickle being accepted you should be prepared for this change by explicitly allowing your worker @@ -138,7 +138,7 @@ Everyone should move to the new :program:`celery` umbrella command, so we are incrementally deprecating the old command names. In this version we've removed all commands that are not used -in init scripts. The rest will be removed in 3.2. +in init scripts. The rest will be removed in 4.0. +-------------------+--------------+-------------------------------------+ | Program | New Status | Replacement | @@ -159,7 +159,7 @@ in init scripts. The rest will be removed in 3.2. If this is not a new installation then you may want to remove the old commands: -.. code-block:: bash +.. code-block:: console $ pip uninstall celery $ # repeat until it fails @@ -218,7 +218,7 @@ implementation. - Rare race conditions fixed - Most of these bugs were never reported to us, but was discovered while + Most of these bugs were never reported to us, but were discovered while running the new stress test suite. Caveats @@ -250,7 +250,7 @@ Caveats You can disable this prefetching behavior by enabling the :option:`-Ofair` worker option: - .. code-block:: bash + .. code-block:: console $ celery -A proj worker -l info -Ofair @@ -314,27 +314,27 @@ but if you would like to experiment with it you should know that: app.config_from_object('django.conf:settings') Neither will it automatically traverse your installed apps to find task - modules, but this still available as an option you must enable: + modules. If you want this behavior, you must explictly pass a list of Django instances to the Celery app: .. code-block:: python from django.conf import settings - app.autodiscover_tasks(settings.INSTALLED_APPS) + app.autodiscover_tasks(lambda: settings.INSTALLED_APPS) - You no longer use ``manage.py`` Instead you use the :program:`celery` command directly: - .. code-block:: bash + .. code-block:: console - celery -A proj worker -l info + $ celery -A proj worker -l info For this to work your app module must store the :envvar:`DJANGO_SETTINGS_MODULE` environment variable, see the example in the :ref:`Django guide `. To get started with the new API you should first read the :ref:`first-steps` -tutorial, and then you should read the Django specific instructions in +tutorial, and then you should read the Django-specific instructions in :ref:`django-first-steps`. The fixes and improvements applied by the django-celery library are now @@ -375,7 +375,7 @@ but starting with this version that field is also used to order them. Also, events now record timezone information by including a new ``utcoffset`` field in the event message. This is a signed integer telling the difference from UTC time in hours, -so e.g. an even sent from the Europe/London timezone in daylight savings +so e.g. an event sent from the Europe/London timezone in daylight savings time will have an offset of 1. :class:`@events.Receiver` will automatically convert the timestamps @@ -389,8 +389,8 @@ to the local timezone. starts. If all of the workers are shutdown the clock value will be lost - and reset to 0, to protect against this you should specify - a :option:`--statedb` so that the worker can persist the clock + and reset to 0. To protect against this, you should specify + :option:`--statedb` so that the worker can persist the clock value at shutdown. You may notice that the logical clock is an integer value and @@ -410,14 +410,14 @@ If a custom name is not specified then the worker will use the name 'celery' by default, resulting in a fully qualified node name of 'celery@hostname': -.. code-block:: bash +.. code-block:: console $ celery worker -n example.com celery@example.com To also set the name you must include the @: -.. code-block:: bash +.. code-block:: console $ celery worker -n worker1@example.com worker1@example.com @@ -431,7 +431,7 @@ Remember that the ``-n`` argument also supports simple variable substitutions, so if the current hostname is *george.example.com* then the ``%h`` macro will expand into that: -.. code-block:: bash +.. code-block:: console $ celery worker -n worker1@%h worker1@george.example.com @@ -499,8 +499,8 @@ and you can write extensions that take advantage of this already. Some ideas include consensus protocols, reroute task to best worker (based on resource usage or data locality) or restarting workers when they crash. -We believe that this is a small addition but one that really opens -up for amazing possibilities. +We believe that although this is a small addition, it opens +amazing possibilities. You can disable this bootstep using the ``--without-gossip`` argument. @@ -556,7 +556,7 @@ Time limits can now be set by the client Two new options have been added to the Calling API: ``time_limit`` and ``soft_time_limit``: -.. code-block:: python +.. code-block:: pycon >>> res = add.apply_async((2, 2), time_limit=10, soft_time_limit=8) @@ -605,7 +605,7 @@ setuptools extras. You install extras by specifying them inside brackets: -.. code-block:: bash +.. code-block:: console $ pip install celery[redis,mongodb] @@ -659,9 +659,9 @@ This means that: now does the same as calling the task directly: -.. code-block:: python +.. code-block:: pycon - add(2, 2) + >>> add(2, 2) In Other News ------------- @@ -685,7 +685,7 @@ In Other News Regular signature: - .. code-block:: python + .. code-block:: pycon >>> s = add.s(2, 2) >>> result = s.freeze() @@ -696,7 +696,7 @@ In Other News Group: - .. code-block:: python + .. code-block:: pycon >>> g = group(add.s(2, 2), add.s(4, 4)) >>> result = g.freeze() @@ -706,10 +706,17 @@ In Other News >>> g() +- Chord exception behavior defined (Issue #1172). + + From this version the chord callback will change state to FAILURE + when a task part of a chord raises an exception. + + See more at :ref:`chord-errors`. + - New ability to specify additional command line options to the worker and beat programs. - The :attr:`@Celery.user_options` attribute can be used + The :attr:`@user_options` attribute can be used to add additional command-line arguments, and expects optparse-style options: @@ -760,9 +767,9 @@ In Other News A dispatcher instantiated as follows: - .. code-block:: python + .. code-block:: pycon - app.events.Dispatcher(connection, groups=['worker']) + >>> app.events.Dispatcher(connection, groups=['worker']) will only send worker related events and silently drop any attempts to send events related to any other group. @@ -807,7 +814,7 @@ In Other News Example: - .. code-block:: bash + .. code-block:: console $ celery inspect conf @@ -916,7 +923,7 @@ In Other News You can create graphs from the currently installed bootsteps: - .. code-block:: bash + .. code-block:: console # Create graph of currently installed bootsteps in both the worker # and consumer namespaces. @@ -930,7 +937,7 @@ In Other News Or graphs of workers in a cluster: - .. code-block:: bash + .. code-block:: console # Create graph from the current cluster $ celery graph workers | dot -T png -o workers.png @@ -979,11 +986,11 @@ In Other News The :envvar:`C_IMPDEBUG` can be set to trace imports as they occur: - .. code-block:: bash + .. code-block:: console $ C_IMDEBUG=1 celery worker -l info - .. code-block:: bash + .. code-block:: console $ C_IMPDEBUG=1 celery shell @@ -1046,7 +1053,7 @@ In Other News This is the mapping of parsed command line arguments, and can be used to prepare new preload arguments (``app.user_options['preload']``). -- New callback: ``Celery.on_configure``. +- New callback: :meth:`@on_configure`. This callback is called when an app is about to be configured (a configuration key is required). @@ -1072,8 +1079,9 @@ In Other News (Issue #1555). The revoked signal is dispatched after the task request is removed from - the stack, so it must instead use the :class:`~celery.worker.job.Request` - object to get information about the task. + the stack, so it must instead use the + :class:`~celery.worker.request.Request` object to get information + about the task. - Worker: New :option:`-X` command line argument to exclude queues (Issue #1399). @@ -1081,7 +1089,7 @@ In Other News The :option:`-X` argument is the inverse of the :option:`-Q` argument and accepts a list of queues to exclude (not consume from): - .. code-block:: bash + .. code-block:: console # Consume from all queues in CELERY_QUEUES, but not the 'foo' queue. $ celery worker -A proj -l info -X foo @@ -1090,13 +1098,13 @@ In Other News This means that you can now do: - .. code-block:: bash + .. code-block:: console $ C_FAKEFORK=1 celery multi start 10 or: - .. code-block:: bash + .. code-block:: console $ C_FAKEFORK=1 /etc/init.d/celeryd start @@ -1204,8 +1212,9 @@ Fixes - Eventlet/gevent/solo/threads pools now properly handles :exc:`BaseException` errors raised by tasks. -- Autoscale and ``pool_grow``/``pool_shrink`` remote control commands - will now also automatically increase and decrease the consumer prefetch count. +- :control:`autoscale` and :control:`pool_grow`/:control:`pool_shrink` remote + control commands will now also automatically increase and decrease the + consumer prefetch count. Fix contributed by Daniel M. Taub. @@ -1235,7 +1244,7 @@ Internal changes - Result backends (:class:`celery.backends.base.BaseBackend`) - :class:`celery.worker.WorkController` - :class:`celery.worker.Consumer` - - :class:`celery.worker.job.Request` + - :class:`celery.worker.request.Request` This means that you have to pass a specific app when instantiating these classes. @@ -1255,7 +1264,7 @@ Internal changes This removes a lot of duplicate functionality. - The ``Celery.with_default_connection`` method has been removed in favor - of ``with app.connection_or_acquire``. + of ``with app.connection_or_acquire`` (:meth:`@connection_or_acquire`) - The ``celery.results.BaseDictBackend`` class has been removed and is replaced by :class:`celery.results.BaseBackend`. diff --git a/docs/whatsnew-4.0.rst b/docs/whatsnew-4.0.rst new file mode 100644 index 000000000..49a82672f --- /dev/null +++ b/docs/whatsnew-4.0.rst @@ -0,0 +1,739 @@ +.. _whatsnew-4.0: + +=========================================== + What's new in Celery 4.0 (0Today8) +=========================================== +:Author: Ask Solem (ask at celeryproject.org) + +.. sidebar:: Change history + + What's new documents describe the changes in major versions, + we also have a :ref:`changelog` that lists the changes in bugfix + releases (0.0.x), while older series are archived under the :ref:`history` + section. + +Celery is a simple, flexible and reliable distributed system to +process vast amounts of messages, while providing operations with +the tools required to maintain such a system. + +It's a task queue with focus on real-time processing, while also +supporting task scheduling. + +Celery has a large and diverse community of users and contributors, +you should come join us :ref:`on IRC ` +or :ref:`our mailing-list `. + +To read more about Celery you should go read the :ref:`introduction `. + +While this version is backward compatible with previous versions +it's important that you read the following section. + +This version is officially supported on CPython 2.7, 3.4 and 3.5. +and also supported on PyPy. + +.. _`website`: http://celeryproject.org/ + +.. topic:: Table of Contents + + Make sure you read the important notes before upgrading to this version. + +.. contents:: + :local: + :depth: 2 + +Preface +======= + + +.. _v320-important: + +Important Notes +=============== + +Dropped support for Python 2.6 +------------------------------ + +Celery now requires Python 2.7 or later, +and also drops support for Python 3.3 so supported versions are: + +- CPython 2.7 +- CPython 3.4 +- CPython 3.5 +- PyPy 4.0 (pypy2) +- PyPy 2.4 (pypy3) +- Jython 2.7.0 + +JSON is now the default serializer +---------------------------------- + +The Task base class no longer automatically register tasks +---------------------------------------------------------- + +The metaclass has been removed blah blah + +Arguments now verified when calling a task +------------------------------------------ + +Redis Events not backward compatible +------------------------------------ + +The Redis ``fanout_patterns`` and ``fanout_prefix`` transport +options are now enabled by default, which means that workers +running 4.0 cannot see workers running 3.1 and vice versa. + +They should still execute tasks as normally, so this is only +related to monitoring events. + +To avoid this situation you can reconfigure the 3.1 workers (and clients) +to enable these settings before you mix them with workers and clients +running 4.x: + +.. code-block:: python + + BROKER_TRANSPORT_OPTIONS = { + 'fanout_patterns': True, + 'fanout_prefix': True, + } + +Lowercase setting names +----------------------- + +In the pursuit of beauty all settings have been renamed to be in all +lowercase, in a consistent naming scheme. + +This change is fully backwards compatible so you can still use the uppercase +setting names. + +The loader will try to detect if your configuration is using the new format, +and act accordingly, but this also means that you are not allowed to mix and +match new and old setting names, that is unless you provide a value for both +alternatives. + +The major difference between previous versions, apart from the lower case +names, are the renaming of some prefixes, like ``celerybeat_`` to ``beat_``, +``celeryd_`` to ``worker_``. + +The ``celery_`` prefix has also been removed, and task related settings +from this namespace is now prefixed by ``task_``, worker related settings +with ``worker_``. + +Apart from this most of the settings will be the same in lowercase, apart from +a few special ones: + +===================================== ========================================================== +**Setting name** **Replace with** +===================================== ========================================================== +``CELERY_MAX_CACHED_RESULTS`` :setting:`result_cache_max` +``CELERY_MESSAGE_COMPRESSION`` :setting:`result_compression`/:setting:`task_compression`. +``CELERY_TASK_RESULT_EXPIRES`` :setting:`result_expires` +``CELERY_RESULT_DBURI`` :setting:`sqlalchemy_dburi` +``CELERY_RESULT_ENGINE_OPTIONS`` :setting:`sqlalchemy_engine_options` +``-*-_DB_SHORT_LIVED_SESSIONS`` :setting:`sqlalchemy_short_lived_sessions` +``CELERY_RESULT_DB_TABLE_NAMES`` :setting:`sqlalchemy_db_names` +``CELERY_ACKS_LATE`` :setting:`task_acks_late` +``CELERY_ALWAYS_EAGER`` :setting:`task_always_eager` +``CELERY_ANNOTATIONS`` :setting:`task_annotations` +``CELERY_MESSAGE_COMPRESSION`` :setting:`task_compression` +``CELERY_CREATE_MISSING_QUEUES`` :setting:`task_create_missing_queues` +``CELERY_DEFAULT_DELIVERY_MODE`` :setting:`task_default_delivery_mode` +``CELERY_DEFAULT_EXCHANGE`` :setting:`task_default_exchange` +``CELERY_DEFAULT_EXCHANGE_TYPE`` :setting:`task_default_exchange_type` +``CELERY_DEFAULT_QUEUE`` :setting:`task_default_queue` +``CELERY_DEFAULT_RATE_LIMIT`` :setting:`task_default_rate_limit` +``CELERY_DEFAULT_ROUTING_KEY`` :setting:`task_default_routing_key` +``-"-_EAGER_PROPAGATES_EXCEPTIONS`` :setting:`task_eager_propagates` +``CELERY_IGNORE_RESULT`` :setting:`task_ignore_result` +``CELERY_TASK_PUBLISH_RETRY`` :setting:`task_publish_retry` +``CELERY_TASK_PUBLISH_RETRY_POLICY`` :setting:`task_publish_retry_policy` +``CELERY_QUEUES`` :setting:`task_queues` +``CELERY_ROUTES`` :setting:`task_routes` +``CELERY_SEND_TASK_ERROR_EMAILS`` :setting:`task_send_error_emails` +``CELERY_SEND_TASK_SENT_EVENT`` :setting:`task_send_sent_event` +``CELERY_TASK_SERIALIZER`` :setting:`task_serializer` +``CELERYD_TASK_SOFT_TIME_LIMIT`` :setting:`task_soft_time_limit` +``CELERYD_TASK_TIME_LIMIT`` :setting:`task_time_limit` +``CELERY_TRACK_STARTED`` :setting:`task_track_started` +``CELERY_DISABLE_RATE_LIMITS`` :setting:`worker_disable_rate_limits` +``CELERY_ENABLE_REMOTE_CONTROL`` :setting:`worker_enable_remote_control` +``CELERYD_SEND_EVENTS`` :setting:`worker_send_task_events` +===================================== ========================================================== + +You can see a full table of the changes in :ref:`conf-old-settings-map`. + +Django: Autodiscover no longer takes arguments. +----------------------------------------------- + +Celery's Django support will instead automatically find your installed apps, +which means app configurations will work. + +# e436454d02dcbba4f4410868ad109c54047c2c15 + +Old command-line programs removed +--------------------------------- + +Installing Celery will no longer install the ``celeryd``, +``celerybeat`` and ``celeryd-multi`` programs. + +This was announced with the release of Celery 3.1, but you may still +have scripts pointing to the old names, so make sure you update them +to use the new umbrella command. + ++-------------------+--------------+-------------------------------------+ +| Program | New Status | Replacement | ++===================+==============+=====================================+ +| ``celeryd`` | **REMOVED** | :program:`celery worker` | ++-------------------+--------------+-------------------------------------+ +| ``celerybeat`` | **REMOVED** | :program:`celery beat` | ++-------------------+--------------+-------------------------------------+ +| ``celeryd-multi`` | **REMOVED** | :program:`celery multi` | ++-------------------+--------------+-------------------------------------+ + +.. _v320-news: + +News +==== + +New Task Message Protocol +========================= + +# e71652d384b1b5df2a4e6145df9f0efb456bc71c + + +``TaskProducer`` replaced by ``app.amqp.create_task_message`` and +``app.amqp.send_task_message``. + +- Worker stores results for internal errors like ``ContentDisallowed``, and + exceptions occurring outside of the task function. + +- Worker stores results and sends monitoring events for unknown task names + +- shadow + +- argsrepr + +- Support for very long chains + +- parent_id / root_id + + +Prefork: Tasks now log from the child process +============================================= + +Logging of task success/failure now happens from the child process +actually executing the task, which means that logging utilities +like Sentry can get full information about tasks that fail, including +variables in the traceback. + +Prefork: One logfile per child process +====================================== + +Init scrips and :program:`celery multi` now uses the `%I` logfile format +option (e.g. :file:`/var/log/celery/%n%I.log`) to ensure each child +process has a separate log file to avoid race conditions. + +You are encouraged to upgrade your init scripts and multi arguments +to do so also. + +Canvas Refactor +=============== + +# BLALBLABLA +d79dcd8e82c5e41f39abd07ffed81ca58052bcd2 +1e9dd26592eb2b93f1cb16deb771cfc65ab79612 +e442df61b2ff1fe855881c1e2ff9acc970090f54 +0673da5c09ac22bdd49ba811c470b73a036ee776 + +- Now unrolls groups within groups into a single group (Issue #1509). +- chunks/map/starmap tasks now routes based on the target task +- chords and chains can now be immutable. +- Fixed bug where serialized signature were not converted back into + signatures (Issue #2078) + + Fix contributed by Ross Deane. + +- Fixed problem where chains and groups did not work when using JSON + serialization (Issue #2076). + + Fix contributed by Ross Deane. + +- Creating a chord no longer results in multiple values for keyword + argument 'task_id'" (Issue #2225). + + Fix contributed by Aneil Mallavarapu + +- Fixed issue where the wrong result is returned when a chain + contains a chord as the penultimate task. + + Fix contributed by Aneil Mallavarapu + +- Special case of ``group(A.s() | group(B.s() | C.s()))`` now works. + +- Chain: Fixed bug with incorrect id set when a subtask is also a chain. + +Schedule tasks based on sunrise, sunset, dawn and dusk. +======================================================= + +See :ref:`beat-solar` for more information. + +Contributed by Mark Parncutt. + +App can now configure periodic tasks +==================================== + +# bc18d0859c1570f5eb59f5a969d1d32c63af764b +# 132d8d94d38f4050db876f56a841d5a5e487b25b + +RabbitMQ Priority queue support +=============================== + +# 1d4cbbcc921aa34975bde4b503b8df9c2f1816e0 + +Contributed by Gerald Manipon. + +Prefork: Limits for child process resident memory size. +======================================================= + +This version introduces the new :setting:`worker_max_memory_per_child` setting, +which BLA BLA BLA + +# 5cae0e754128750a893524dcba4ae030c414de33 + +Contributed by Dave Smith. + +Redis: New optimized chord join implementation. +=============================================== + +This was an experimental feature introduced in Celery 3.1, +but is now enabled by default. + +?new_join BLABLABLA + +Riak Result Backend +=================== + +Contributed by Gilles Dartiguelongue, Alman One and NoKriK. + +Bla bla + +- blah blah + +CouchDB Result Backend +====================== + +Contributed by Nathan Van Gheem + +New Cassandra Backend +===================== + +The new Cassandra backend utilizes the python-driver library. +Old backend is deprecated and everyone using cassandra is required to upgrade +to be using the new driver. + +# XXX What changed? + + +Event Batching +============== + +Events are now buffered in the worker and sent as a list, and +events are sent as transient messages by default so that they are not written +to disk by RabbitMQ. + +03399b4d7c26fb593e61acf34f111b66b340ba4e + + +Task.replace +============ + +Task.replace changed, removes Task.replace_in_chord. + +The two methods had almost the same functionality, but the old Task.replace +would force the new task to inherit the callbacks/errbacks of the existing +task. + +If you replace a node in a tree, then you would not expect the new node to +inherit the children of the old node, so this seems like unexpected +behavior. + +So self.replace(sig) now works for any task, in addition sig can now +be a group. + +Groups are automatically converted to a chord, where the callback +will "accumulate" the results of the group tasks. + +A new builtin task (`celery.accumulate` was added for this purpose) + +Closes #817 + + +Optimized Beat implementation +============================= + +heapq +20340d79b55137643d5ac0df063614075385daaa + +Contributed by Ask Solem and Alexander Koshelev. + + +Task Autoretry Decorator +======================== + +75246714dd11e6c463b9dc67f4311690643bff24 + +Contributed by Dmitry Malinovsky. + + +:setting:`task_routes` can now contain glob patterns and regexes. +================================================================= + +See examples in :setting:`task_routes` and :ref:`routing-automatic`. + +In Other News +------------- + +- **Requirements**: + + - Now depends on :ref:`Kombu 3.1 `. + + - Now depends on :mod:`billiard` version 3.4. + + - No longer depends on ``anyjson`` :sadface: + +- **Programs**: ``%n`` format for :program:`celery multi` is now synonym with + ``%N`` to be consistent with :program:`celery worker`. + +- **Programs**: celery inspect/control now supports ``--json`` argument to + give output in json format. + +- **Programs**: :program:`celery inspect registered` now ignores built-in + tasks. + +- **Programs**: New :program:`celery logtool`: Utility for filtering and parsing + celery worker logfiles + +- **Redis Transport**: The Redis transport now supports the + :setting:`broker_use_ssl` option. + +- **Worker**: Worker now only starts the remote control command consumer if the + broker transport used actually supports them. + +- **Worker**: Gossip now sets ``x-message-ttl`` for event queue to heartbeat_interval s. + (Issue #2005). + +- **Worker**: Now preserves exit code (Issue #2024). + +- **Worker**: Loglevel for unrecoverable errors changed from ``error`` to + ``critical``. + +- **Worker**: Improved rate limiting accuracy. + +- **Worker**: Account for missing timezone information in task expires field. + + Fix contributed by Albert Wang. + +- **Worker**: The worker no longer has a ``Queues`` bootsteps, as it is now + superfluous. + +- **Tasks**: New :setting:`task_reject_on_worker_lost` setting, and + :attr:`~@Task.reject_on_worker_lost` task attribute decides what happens + when the child worker process executing a late ack task is terminated. + + Contributed by Michael Permana. + +- **App**: New signals for app configuration/finalization: + + - :data:`app.on_configure <@on_configure>` + - :data:`app.on_after_configure <@on_after_configure>` + - :data:`app.on_after_finalize <@on_after_finalize>` + +- **Task**: New task signals for rejected task messages: + + - :data:`celery.signals.task_rejected`. + - :data:`celery.signals.task_unknown`. + +- **Events**: Event messages now uses the RabbitMQ ``x-message-ttl`` option + to ensure older event messages are discarded. + + The default is 5 seconds, but can be changed using the + :setting:`event_queue_ttl` setting. + +- **Events**: Event monitors now sets the :setting:`event_queue_expires` + setting by default. + + The queues will now expire after 60 seconds after the monitor stops + consuming from it. + +- **Canvas**: ``chunks``/``map``/``starmap`` are now routed based on the target task. + +- **Canvas**: ``Signature.link`` now works when argument is scalar (not a list) + (Issue #2019). + +- **App**: The application can now change how task names are generated using + the :meth:`~@gen_task_name` method. + + Contributed by Dmitry Malinovsky. + +- **Tasks**: ``Task.subtask`` renamed to ``Task.signature`` with alias. + +- **Tasks**: ``Task.subtask_from_request`` renamed to + ``Task.signature_from_request`` with alias. + +- **Tasks**: The ``delivery_mode`` attribute for :class:`kombu.Queue` is now + respected (Issue #1953). + +- **Tasks**: Routes in :setting:`task-routes` can now specify a + :class:`~kombu.Queue` instance directly. + + Example: + + .. code-block:: python + + task_routes = {'proj.tasks.add': {'queue': Queue('add')}} + +- **Tasks**: ``AsyncResult`` now raises :exc:`ValueError` if task_id is None. + (Issue #1996). + +- **Tasks**: ``result.get()`` now supports an ``on_message`` argument to set a + callback to be called for every message received. + +- **Tasks**: New abstract classes added: + + - :class:`~celery.utils.abstract.CallableTask` + + Looks like a task. + + - :class:`~celery.utils.abstract.CallableSignature` + + Looks like a task signature. + +- **Programs**: :program:`celery multi` now passes through `%i` and `%I` log + file formats. + +- **Programs**: A new command line option :option:``--executable`` is now + available for daemonizing programs. + + Contributed by Bert Vanderbauwhede. + +- **Programs**: :program:`celery worker` supports new + :option:`--prefetch-multiplier` option. + + Contributed by Mickaël Penhard. + +- **Prefork**: Prefork pool now uses ``poll`` instead of ``select`` where + available (Issue #2373). + +- **Tasks**: New :setting:`email_charset` setting allows for changing + the charset used for outgoing error emails. + + Contributed by Vladimir Gorbunov. + +- **Worker**: Now respects :setting:`broker_connection_retry` setting. + + Fix contributed by Nat Williams. + +- **Worker**: Autoscale did not always update keepalive when scaling down. + + Fix contributed by Philip Garnero. + +- **General**: Dates are now always timezone aware even if + :setting:`enable_utc` is disabled (Issue #943). + + Fix contributed by Omer Katz. + +- **Result Backends**: The redis result backend now has a default socket + timeout of 5 seconds. + + The default can be changed using the new :setting:`redis_socket_timeout` + setting. + + Contributed by Raghuram Srinivasan. + +- **Result Backends**: RPC Backend result queues are now auto delete by + default (Issue #2001). + +- **Result Backends**: MongoDB now supports setting the + :setting:`result_serialzier` setting to ``bson`` to use the MongoDB + libraries own serializer. + + Contributed by Davide Quarta. + +- **Result Backends**: SQLAlchemy result backend now ignores all result + engine options when using NullPool (Issue #1930). + +- **Result Backends**: MongoDB URI handling has been improved to use + database name, user and password from the URI if provided. + + Contributed by Samuel Jaillet. + +- **Result Backends**: Fix problem with rpc/amqp backends where exception + was not deserialized properly with the json serializer (Issue #2518). + + Fix contributed by Allard Hoeve. + +- **General**: All Celery exceptions/warnings now inherit from common + :class:`~celery.exceptions.CeleryException`/:class:`~celery.exceptions.CeleryWarning`. + (Issue #2643). + +- **Tasks**: Task retry now also throws in eager mode. + + Fix contributed by Feanil Patel. + +- Apps can now define how tasks are named (:meth:`@gen_task_name`). + + Contributed by Dmitry Malinovsky + +- Module ``celery.worker.job`` renamed to :mod:`celery.worker.request`. + +- Beat: ``Scheduler.Publisher``/``.publisher`` renamed to + ``.Producer``/``.producer``. + +Unscheduled Removals +==================== + +- The experimental :mod:`celery.contrib.methods` feature has been removed, + as there were far many bugs in the implementation to be useful. + +- The CentOS init scripts have been removed. + + These did not really add any features over the generic init scripts, + so you are encouraged to use them instead, or something like + ``supervisord``. + + +.. _v320-removals: + +Scheduled Removals +================== + +Modules +------- + +- Module ``celery.worker.job`` has been renamed to :mod:`celery.worker.request`. + + This was an internal module so should not have any effect. + It is now part of the public API so should not change again. + +- Module ``celery.task.trace`` has been renamed to ``celery.app.trace`` + as the ``celery.task`` package is being phased out. The compat module + will be removed in version 4.0 so please change any import from:: + + from celery.task.trace import … + + to:: + + from celery.app.trace import … + +- Old compatibility aliases in the :mod:`celery.loaders` module + has been removed. + + - Removed ``celery.loaders.current_loader()``, use: ``current_app.loader`` + + - Removed ``celery.loaders.load_settings()``, use: ``current_app.conf`` + +Result +------ + +- ``AsyncResult.serializable()`` and ``celery.result.from_serializable`` + has been removed: + + Use instead: + + .. code-block:: pycon + + >>> tup = result.as_tuple() + >>> from celery.result import result_from_tuple + >>> result = result_from_tuple(tup) + +- Removed ``BaseAsyncResult``, use ``AsyncResult`` for instance checks + instead. + +- Removed ``TaskSetResult``, use ``GroupResult`` instead. + + - ``TaskSetResult.total`` -> ``len(GroupResult)`` + + - ``TaskSetResult.taskset_id`` -> ``GroupResult.id`` + + +TaskSet +------- + +TaskSet has been renamed to group and TaskSet will be removed in version 4.0. + +Old:: + + >>> from celery.task import TaskSet + + >>> TaskSet(add.subtask((i, i)) for i in xrange(10)).apply_async() + +New:: + + >>> from celery import group + >>> group(add.s(i, i) for i in xrange(10))() + + +Magic keyword arguments +----------------------- + +Support for the very old magic keyword arguments accepted by tasks has finally +been in 4.0. + +If you are still using these you have to rewrite any task still +using the old ``celery.decorators`` module and depending +on keyword arguments being passed to the task, +for example:: + + from celery.decorators import task + + @task() + def add(x, y, task_id=None): + print("My task id is %r" % (task_id,)) + +should be rewritten into:: + + from celery import task + + @task(bind=True) + def add(self, x, y): + print("My task id is {0.request.id}".format(self)) + +Settings +-------- + +The following settings have been removed, and is no longer supported: + +Logging Settings +~~~~~~~~~~~~~~~~ + +===================================== ===================================== +**Setting name** **Replace with** +===================================== ===================================== +``CELERYD_LOG_LEVEL`` :option:`--loglevel` +``CELERYD_LOG_FILE`` :option:`--logfile`` +``CELERYBEAT_LOG_LEVEL`` :option:`--loglevel` +``CELERYBEAT_LOG_FILE`` :option:`--loglevel`` +``CELERYMON_LOG_LEVEL`` celerymon is deprecated, use flower. +``CELERYMON_LOG_FILE`` celerymon is deprecated, use flower. +``CELERYMON_LOG_FORMAT`` celerymon is deprecated, use flower. +===================================== ===================================== + +Task Settings +~~~~~~~~~~~~~~ + +===================================== ===================================== +**Setting name** **Replace with** +===================================== ===================================== +``CELERY_CHORD_PROPAGATES`` N/a +===================================== ===================================== + +.. _v320-deprecations: + +Deprecations +============ + +See the :ref:`deprecation-timeline`. + +.. _v320-fixes: + +Fixes +===== + diff --git a/examples/app/myapp.py b/examples/app/myapp.py index 51a624b2a..d2939b567 100644 --- a/examples/app/myapp.py +++ b/examples/app/myapp.py @@ -1,6 +1,6 @@ """myapp.py -Usage: +Usage:: (window1)$ python myapp.py worker -l info @@ -27,11 +27,12 @@ app = Celery( 'myapp', broker='amqp://guest@localhost//', - # add result backend here if needed. - #backend='rpc' + # ## add result backend here if needed. + # backend='rpc' ) -@app.task() + +@app.task def add(x, y): return x + y diff --git a/examples/celery_http_gateway/settings.py b/examples/celery_http_gateway/settings.py index 750f18a7b..a56e1061a 100644 --- a/examples/celery_http_gateway/settings.py +++ b/examples/celery_http_gateway/settings.py @@ -5,7 +5,6 @@ DEBUG = True TEMPLATE_DEBUG = DEBUG -CARROT_BACKEND = 'amqp' CELERY_RESULT_BACKEND = 'database' BROKER_URL = 'amqp://guest:guest@localhost:5672//' diff --git a/examples/django/README.rst b/examples/django/README.rst index 9eebc02ad..e8e091e96 100644 --- a/examples/django/README.rst +++ b/examples/django/README.rst @@ -27,10 +27,34 @@ Example generic app. This is decoupled from the rest of the project by using the ``@shared_task`` decorator. This decorator returns a proxy that always points to the currently active Celery instance. +Installing requirements +======================= + +The settings file assumes that ``rabbitmq-server`` is running on ``localhost`` +using the default ports. More information here: + +http://docs.celeryproject.org/en/latest/getting-started/brokers/rabbitmq.html + +In addition, some Python requirements must also be satisfied: + +.. code-block:: console + + $ pip install -r requirements.txt Starting the worker =================== -.. code-block:: bash +.. code-block:: console $ celery -A proj worker -l info + +Running a task +=================== + +.. code-block:: console + + $ python ./manage.sh shell + >>> from demoapp.tasks import add, mul, xsum + >>> res = add.delay(2,3) + >>> res.get() + 5 diff --git a/examples/django/proj/__init__.py b/examples/django/proj/__init__.py index b64e43e83..ff99efb2c 100644 --- a/examples/django/proj/__init__.py +++ b/examples/django/proj/__init__.py @@ -3,3 +3,5 @@ # This will make sure the app is always imported when # Django starts so that shared_task will use this app. from .celery import app as celery_app + +__all__ = ['celery_app'] diff --git a/examples/django/proj/celery.py b/examples/django/proj/celery.py index aebb10850..f35ee8299 100644 --- a/examples/django/proj/celery.py +++ b/examples/django/proj/celery.py @@ -4,17 +4,19 @@ from celery import Celery -from django.conf import settings - # set the default Django settings module for the 'celery' program. os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'proj.settings') +from django.conf import settings # noqa + app = Celery('proj') # Using a string here means the worker will not have to # pickle the object when using Windows. -app.config_from_object('django.conf:settings') -app.autodiscover_tasks(lambda: settings.INSTALLED_APPS) +app.config_from_object('django.conf:settings', namespace='CELERY') + +# load task modules from all registered Django app configs. +app.autodiscover_tasks() @app.task(bind=True) diff --git a/examples/django/proj/settings.py b/examples/django/proj/settings.py index c64ec949c..8ed566b37 100644 --- a/examples/django/proj/settings.py +++ b/examples/django/proj/settings.py @@ -6,11 +6,12 @@ # Celery settings -BROKER_URL = 'amqp://guest:guest@localhost//' +CELERY_BROKER_URL = 'amqp://guest:guest@localhost//' #: Only add pickle to this list if your broker is secured #: from unwanted access (see userguide/security.html) CELERY_ACCEPT_CONTENT = ['json'] +CELERY_RESULT_BACKEND = 'db+sqlite:///results.sqlite' # Django settings for proj project. @@ -131,6 +132,8 @@ 'django.contrib.sites', 'django.contrib.messages', 'django.contrib.staticfiles', + 'django.contrib.admin', + 'kombu.transport.django', 'demoapp', # Uncomment the next line to enable the admin: # 'django.contrib.admin', diff --git a/examples/django/proj/wsgi.py b/examples/django/proj/wsgi.py index 446fcc9d9..6a65b3ff8 100644 --- a/examples/django/proj/wsgi.py +++ b/examples/django/proj/wsgi.py @@ -20,7 +20,7 @@ # This application object is used by any WSGI server configured to use this # file. This includes Django's development server, if the WSGI_APPLICATION # setting points here. -from django.core.wsgi import get_wsgi_application +from django.core.wsgi import get_wsgi_application # noqa application = get_wsgi_application() # Apply WSGI middleware here. diff --git a/examples/django/requirements.txt b/examples/django/requirements.txt new file mode 100644 index 000000000..77a33d8e4 --- /dev/null +++ b/examples/django/requirements.txt @@ -0,0 +1,2 @@ +django==1.8.4 +sqlalchemy==1.0.9 diff --git a/examples/eventlet/README.rst b/examples/eventlet/README.rst index 6bf00e9fa..eb64b7081 100644 --- a/examples/eventlet/README.rst +++ b/examples/eventlet/README.rst @@ -46,7 +46,7 @@ To open several URLs at once you can do:: >>> result = group(urlopen.s(url) ... for url in LIST_OF_URLS).apply_async() >>> for incoming_result in result.iter_native(): - ... print(incoming_result, ) + ... print(incoming_result) * `webcrawler.crawl` diff --git a/examples/eventlet/bulk_task_producer.py b/examples/eventlet/bulk_task_producer.py index 2002160c0..891a900fc 100644 --- a/examples/eventlet/bulk_task_producer.py +++ b/examples/eventlet/bulk_task_producer.py @@ -3,8 +3,6 @@ from eventlet.queue import LightQueue from eventlet.event import Event -from celery import current_app - monkey_patch() @@ -12,7 +10,7 @@ class Receipt(object): result = None def __init__(self, callback=None): - self.callback = None + self.callback = callback self.ready = Event() def finished(self, result): @@ -27,9 +25,16 @@ def wait(self, timeout=None): class ProducerPool(object): + """Usage:: + + >>> app = Celery(broker='amqp://') + >>> ProducerPool(app) + + """ Receipt = Receipt - def __init__(self, size=20): + def __init__(self, app, size=20): + self.app = app self.size = size self.inqueue = LightQueue() self._running = None @@ -48,13 +53,12 @@ def _run(self): ] def _producer(self): - connection = current_app.connection() - publisher = current_app.amqp.TaskProducer(connection) inqueue = self.inqueue - while 1: - task, args, kwargs, options, receipt = inqueue.get() - result = task.apply_async(args, kwargs, - publisher=publisher, - **options) - receipt.finished(result) + with self.app.producer_or_acquire() as producer: + while 1: + task, args, kwargs, options, receipt = inqueue.get() + result = task.apply_async(args, kwargs, + producer=producer, + **options) + receipt.finished(result) diff --git a/examples/eventlet/celeryconfig.py b/examples/eventlet/celeryconfig.py index a816c004f..9e3d1ec7f 100644 --- a/examples/eventlet/celeryconfig.py +++ b/examples/eventlet/celeryconfig.py @@ -2,13 +2,13 @@ import sys sys.path.insert(0, os.getcwd()) -## Start worker with -P eventlet -# Never use the CELERYD_POOL setting as that will patch +# ## Start worker with -P eventlet +# Never use the worker_pool setting as that will patch # the worker too late. -BROKER_URL = 'amqp://guest:guest@localhost:5672//' -CELERY_DISABLE_RATE_LIMITS = True -CELERY_RESULT_BACKEND = 'amqp' -CELERY_TASK_RESULT_EXPIRES = 30 * 60 +broker_url = 'amqp://guest:guest@localhost:5672//' +worker_disable_rate_limits = True +result_backend = 'amqp' +result_expires = 30 * 60 -CELERY_IMPORTS = ('tasks', 'webcrawler') +imports = ('tasks', 'webcrawler') diff --git a/examples/gevent/celeryconfig.py b/examples/gevent/celeryconfig.py index 36d6a6c46..a7ea06aa4 100644 --- a/examples/gevent/celeryconfig.py +++ b/examples/gevent/celeryconfig.py @@ -2,12 +2,11 @@ import sys sys.path.insert(0, os.getcwd()) -### Note: Start worker with -P gevent, -# do not use the CELERYD_POOL option. +# ## Note: Start worker with -P gevent, +# do not use the worker_pool option. -BROKER_URL = 'amqp://guest:guest@localhost:5672//' -CELERY_DISABLE_RATE_LIMITS = True -CELERY_RESULT_BACKEND = 'amqp' -CELERY_TASK_RESULT_EXPIRES = 30 * 60 +broker_url = 'amqp://guest:guest@localhost:5672//' +result_backend = 'amqp' +result_expires = 30 * 60 -CELERY_IMPORTS = ('tasks', ) +imports = ('tasks',) diff --git a/examples/httpexample/views.py b/examples/httpexample/views.py index 5069255e6..e1f4bf0f5 100644 --- a/examples/httpexample/views.py +++ b/examples/httpexample/views.py @@ -1,6 +1,6 @@ from django.http import HttpResponse -from anyjson import dumps +from json import dumps def multiply(request): diff --git a/examples/next-steps/proj/celery.py b/examples/next-steps/proj/celery.py index db98708bd..d200c2d35 100644 --- a/examples/next-steps/proj/celery.py +++ b/examples/next-steps/proj/celery.py @@ -9,7 +9,7 @@ # Optional configuration, see the application user guide. app.conf.update( - CELERY_TASK_RESULT_EXPIRES=3600, + result_expires=3600, ) if __name__ == '__main__': diff --git a/examples/next-steps/setup.py b/examples/next-steps/setup.py index 7eaccf9da..0132b3509 100644 --- a/examples/next-steps/setup.py +++ b/examples/next-steps/setup.py @@ -15,6 +15,6 @@ zip_safe=False, install_requires=[ 'celery>=3.0', - #'requests', + # 'requests', ], ) diff --git a/examples/resultgraph/tasks.py b/examples/resultgraph/tasks.py index bb14d2798..3c6dd81b0 100644 --- a/examples/resultgraph/tasks.py +++ b/examples/resultgraph/tasks.py @@ -16,7 +16,7 @@ # when the second task is ready.) # # >>> unlock_graph.apply_async((A.apply_async(), -# ... A_callback.subtask()), countdown=1) +# ... A_callback.s()), countdown=1) from celery import chord, group, task, signature, uuid diff --git a/extra/appveyor/install.ps1 b/extra/appveyor/install.ps1 new file mode 100644 index 000000000..3f0562825 --- /dev/null +++ b/extra/appveyor/install.ps1 @@ -0,0 +1,85 @@ +# Sample script to install Python and pip under Windows +# Authors: Olivier Grisel and Kyle Kastner +# License: CC0 1.0 Universal: http://creativecommons.org/publicdomain/zero/1.0/ + +$BASE_URL = "https://www.python.org/ftp/python/" +$GET_PIP_URL = "https://bootstrap.pypa.io/get-pip.py" +$GET_PIP_PATH = "C:\get-pip.py" + + +function DownloadPython ($python_version, $platform_suffix) { + $webclient = New-Object System.Net.WebClient + $filename = "python-" + $python_version + $platform_suffix + ".msi" + $url = $BASE_URL + $python_version + "/" + $filename + + $basedir = $pwd.Path + "\" + $filepath = $basedir + $filename + if (Test-Path $filename) { + Write-Host "Reusing" $filepath + return $filepath + } + + # Download and retry up to 5 times in case of network transient errors. + Write-Host "Downloading" $filename "from" $url + $retry_attempts = 3 + for($i=0; $i -lt $retry_attempts; $i++){ + try { + $webclient.DownloadFile($url, $filepath) + break + } + Catch [Exception]{ + Start-Sleep 1 + } + } + Write-Host "File saved at" $filepath + return $filepath +} + + +function InstallPython ($python_version, $architecture, $python_home) { + Write-Host "Installing Python" $python_version "for" $architecture "bit architecture to" $python_home + if (Test-Path $python_home) { + Write-Host $python_home "already exists, skipping." + return $false + } + if ($architecture -eq "32") { + $platform_suffix = "" + } else { + $platform_suffix = ".amd64" + } + $filepath = DownloadPython $python_version $platform_suffix + Write-Host "Installing" $filepath "to" $python_home + $args = "/qn /i $filepath TARGETDIR=$python_home" + Write-Host "msiexec.exe" $args + Start-Process -FilePath "msiexec.exe" -ArgumentList $args -Wait -Passthru + Write-Host "Python $python_version ($architecture) installation complete" + return $true +} + + +function InstallPip ($python_home) { + $pip_path = $python_home + "/Scripts/pip.exe" + $python_path = $python_home + "/python.exe" + if (-not(Test-Path $pip_path)) { + Write-Host "Installing pip..." + $webclient = New-Object System.Net.WebClient + $webclient.DownloadFile($GET_PIP_URL, $GET_PIP_PATH) + Write-Host "Executing:" $python_path $GET_PIP_PATH + Start-Process -FilePath "$python_path" -ArgumentList "$GET_PIP_PATH" -Wait -Passthru + } else { + Write-Host "pip already installed." + } +} + +function InstallPackage ($python_home, $pkg) { + $pip_path = $python_home + "/Scripts/pip.exe" + & $pip_path install $pkg +} + +function main () { + InstallPython $env:PYTHON_VERSION $env:PYTHON_ARCH $env:PYTHON + InstallPip $env:PYTHON + InstallPackage $env:PYTHON wheel +} + +main diff --git a/extra/appveyor/run_with_compiler.cmd b/extra/appveyor/run_with_compiler.cmd new file mode 100644 index 000000000..3a472bc83 --- /dev/null +++ b/extra/appveyor/run_with_compiler.cmd @@ -0,0 +1,47 @@ +:: To build extensions for 64 bit Python 3, we need to configure environment +:: variables to use the MSVC 2010 C++ compilers from GRMSDKX_EN_DVD.iso of: +:: MS Windows SDK for Windows 7 and .NET Framework 4 (SDK v7.1) +:: +:: To build extensions for 64 bit Python 2, we need to configure environment +:: variables to use the MSVC 2008 C++ compilers from GRMSDKX_EN_DVD.iso of: +:: MS Windows SDK for Windows 7 and .NET Framework 3.5 (SDK v7.0) +:: +:: 32 bit builds do not require specific environment configurations. +:: +:: Note: this script needs to be run with the /E:ON and /V:ON flags for the +:: cmd interpreter, at least for (SDK v7.0) +:: +:: More details at: +:: https://github.com/cython/cython/wiki/64BitCythonExtensionsOnWindows +:: http://stackoverflow.com/a/13751649/163740 +:: +:: Author: Olivier Grisel +:: License: CC0 1.0 Universal: http://creativecommons.org/publicdomain/zero/1.0/ +@ECHO OFF + +SET COMMAND_TO_RUN=%* +SET WIN_SDK_ROOT=C:\Program Files\Microsoft SDKs\Windows + +SET MAJOR_PYTHON_VERSION="%PYTHON_VERSION:~0,1%" +IF %MAJOR_PYTHON_VERSION% == "2" ( + SET WINDOWS_SDK_VERSION="v7.0" +) ELSE IF %MAJOR_PYTHON_VERSION% == "3" ( + SET WINDOWS_SDK_VERSION="v7.1" +) ELSE ( + ECHO Unsupported Python version: "%MAJOR_PYTHON_VERSION%" + EXIT 1 +) + +IF "%PYTHON_ARCH%"=="64" ( + ECHO Configuring Windows SDK %WINDOWS_SDK_VERSION% for Python %MAJOR_PYTHON_VERSION% on a 64 bit architecture + SET DISTUTILS_USE_SDK=1 + SET MSSdk=1 + "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Setup\WindowsSdkVer.exe" -q -version:%WINDOWS_SDK_VERSION% + "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Bin\SetEnv.cmd" /x64 /release + ECHO Executing: %COMMAND_TO_RUN% + call %COMMAND_TO_RUN% || EXIT 1 +) ELSE ( + ECHO Using default MSVC build environment for 32 bit architecture + ECHO Executing: %COMMAND_TO_RUN% + call %COMMAND_TO_RUN% || EXIT 1 +) diff --git a/extra/centos/celeryd b/extra/centos/celeryd deleted file mode 100644 index 8b43b6112..000000000 --- a/extra/centos/celeryd +++ /dev/null @@ -1,265 +0,0 @@ -#!/bin/sh -# ============================================ -# celeryd - Starts the Celery worker daemon. -# ============================================ -# -# :Usage: /etc/init.d/celeryd {start|stop|restart|status} -# :Configuration file: /etc/sysconfig/celeryd -# -# See http://docs.celeryproject.org/en/latest/tutorials/daemonizing.html - -### BEGIN INIT INFO -# Provides: celeryd -# Required-Start: $network $local_fs $remote_fs -# Required-Stop: $network $local_fs $remote_fs -# Default-Start: 2 3 4 5 -# Default-Stop: 0 1 6 -# Short-Description: celery task worker daemon -### END INIT INFO -# -# -# To implement separate init scripts, do NOT copy this script. Instead, -# symlink it. I.e., if my new application, "little-worker" needs an init, I -# should just use: -# -# ln -s /etc/init.d/celeryd /etc/init.d/little-worker -# -# You can then configure this by manipulating /etc/sysconfig/little-worker. -# -# Setting `prog` here allows you to symlink this init script, making it easy -# to run multiple processes on the system. - -# If we're invoked via SysV-style runlevel scripts we need to follow the -# link from rcX.d before working out the script name. -if [[ `dirname $0` == /etc/rc*.d ]]; then - target="$(readlink $0)" -else - target=$0 -fi - -prog="$(basename $target)" - -# Source the centos service helper functions -source /etc/init.d/functions -# NOTE: "set -e" does not work with the above functions, -# which use non-zero return codes as non-error return conditions - -# some commands work asyncronously, so we'll wait this many seconds -SLEEP_SECONDS=5 - -DEFAULT_PID_FILE="/var/run/celery/$prog-%n.pid" -DEFAULT_LOG_FILE="/var/log/celery/$prog-%n.log" -DEFAULT_LOG_LEVEL="INFO" -DEFAULT_NODES="celery" -DEFAULT_CELERYD="-m celery.bin.celeryd_detach" - -CELERY_DEFAULTS=${CELERY_DEFAULTS:-"/etc/sysconfig/$prog"} - -test -f "$CELERY_DEFAULTS" && . "$CELERY_DEFAULTS" - -# Set CELERY_CREATE_DIRS to always create log/pid dirs. -CELERY_CREATE_DIRS=${CELERY_CREATE_DIRS:-0} -CELERY_CREATE_RUNDIR=$CELERY_CREATE_DIRS -CELERY_CREATE_LOGDIR=$CELERY_CREATE_DIRS -if [ -z "$CELERYD_PID_FILE" ]; then - CELERYD_PID_FILE="$DEFAULT_PID_FILE" - CELERY_CREATE_RUNDIR=1 -fi -if [ -z "$CELERYD_LOG_FILE" ]; then - CELERYD_LOG_FILE="$DEFAULT_LOG_FILE" - CELERY_CREATE_LOGDIR=1 -fi - -CELERYD_LOG_LEVEL=${CELERYD_LOG_LEVEL:-${CELERYD_LOGLEVEL:-$DEFAULT_LOG_LEVEL}} -CELERYD_MULTI=${CELERYD_MULTI:-"celeryd-multi"} -CELERYD=${CELERYD:-$DEFAULT_CELERYD} -CELERYD_NODES=${CELERYD_NODES:-$DEFAULT_NODES} - -# This is used to change how Celery loads in the configs. It does not need to -# be set to be run. -export CELERY_LOADER - -if [ -n "$2" ]; then - CELERYD_OPTS="$CELERYD_OPTS $2" -fi - -CELERYD_LOG_DIR=`dirname $CELERYD_LOG_FILE` -CELERYD_PID_DIR=`dirname $CELERYD_PID_FILE` - -# Extra start-stop-daemon options, like user/group. -if [ -n "$CELERYD_USER" ]; then - DAEMON_OPTS="$DAEMON_OPTS --uid=$CELERYD_USER" -fi -if [ -n "$CELERYD_GROUP" ]; then - DAEMON_OPTS="$DAEMON_OPTS --gid=$CELERYD_GROUP" -fi - -if [ -n "$CELERYD_CHDIR" ]; then - DAEMON_OPTS="$DAEMON_OPTS --workdir=\"$CELERYD_CHDIR\"" -fi - -check_dev_null() { - if [ ! -c /dev/null ]; then - echo "/dev/null is not a character device!" - exit 75 # EX_TEMPFAIL - fi -} - - -maybe_die() { - if [ $? -ne 0 ]; then - echo "Exiting: $* (errno $?)" - exit 77 # EX_NOPERM - fi -} - -create_default_dir() { - if [ ! -d "$1" ]; then - echo "- Creating default directory: '$1'" - mkdir -p "$1" - maybe_die "Couldn't create directory $1" - echo "- Changing permissions of '$1' to 02755" - chmod 02755 "$1" - maybe_die "Couldn't change permissions for $1" - if [ -n "$CELERYD_USER" ]; then - echo "- Changing owner of '$1' to '$CELERYD_USER'" - chown "$CELERYD_USER" "$1" - maybe_die "Couldn't change owner of $1" - fi - if [ -n "$CELERYD_GROUP" ]; then - echo "- Changing group of '$1' to '$CELERYD_GROUP'" - chgrp "$CELERYD_GROUP" "$1" - maybe_die "Couldn't change group of $1" - fi - fi -} - - -check_paths() { - if [ $CELERY_CREATE_LOGDIR -eq 1 ]; then - create_default_dir "$CELERYD_LOG_DIR" - fi - if [ $CELERY_CREATE_RUNDIR -eq 1 ]; then - create_default_dir "$CELERYD_PID_DIR" - fi -} - -create_paths() { - create_default_dir "$CELERYD_LOG_DIR" - create_default_dir "$CELERYD_PID_DIR" -} - -export PATH="${PATH:+$PATH:}/usr/sbin:/sbin" - - -_get_pid_files() { - [[ ! -d "$CELERYD_PID_DIR" ]] && return - echo $(ls -1 "$CELERYD_PID_DIR"/$prog-*.pid 2> /dev/null) -} - -stop() { - local pid_files=$(_get_pid_files) - [[ -z "$pid_files" ]] && echo "$prog is stopped" && return 0 - - local one_failed= - for pid_file in $pid_files; do - local pid=$(cat "$pid_file") - echo -n $"Stopping $prog (pid $pid): " - - # killproc comes from 'functions' and brings three nice features: - # 1. sending TERM, sleeping, then sleeping more if needed, then sending KILL - # 2. handling 'success' and 'failure' output - # 3. removes stale pid files, if any remain - killproc -p "$pid_file" -d "$SLEEP_SECONDS" $prog || one_failed=true - echo - done - - [[ "$one_failed" ]] && return 1 || return 0 -} - -start() { - echo -n $"Starting $prog: " - - # If Celery is already running, bail out - local pid_files=$(_get_pid_files) - if [[ "$pid_files" ]]; then - echo -n $"$prog is already running. Use 'restart'." - failure - echo - return 1 - fi - - $CELERYD_MULTI start $CELERYD_NODES $DAEMON_OPTS \ - --pidfile="$CELERYD_PID_FILE" \ - --logfile="$CELERYD_LOG_FILE" \ - --loglevel="$CELERYD_LOG_LEVEL" \ - --cmd="$CELERYD" \ - --quiet \ - $CELERYD_OPTS - - if [[ "$?" == "0" ]]; then - # Sleep a few seconds to give Celery a chance to initialize itself. - # This is useful to prevent scripts following this one from trying to - # use Celery (or its pid files) too early. - sleep $SLEEP_SECONDS - pid_files=$(_get_pid_files) - if [[ "$pid_files" ]]; then - for pid_file in $pid_files; do - local node=$(basename "$pid_file" .pid) - local pid=$(cat "$pid_file") - echo - echo -n " $node (pid $pid):" - success - done - echo - return 0 - else # celeryd_multi succeeded but no pid files found - failure - fi - else # celeryd_multi did not succeed - failure - fi - echo - return 1 -} - -check_status() { - local pid_files=$(_get_pid_files) - [[ -z "$pid_files" ]] && echo "$prog is stopped" && return 1 - for pid_file in $pid_files; do - local node=$(basename "$pid_file" .pid) - status -p "$pid_file" $"$prog (node $node)" || return 1 # if one node is down celeryd is down - done - return 0 -} - -case "$1" in - start) - check_dev_null - check_paths - start - ;; - - stop) - check_dev_null - check_paths - stop - ;; - - status) - check_status - ;; - - restart) - check_dev_null - check_paths - stop && start - ;; - - *) - echo "Usage: /etc/init.d/$prog {start|stop|restart|status}" - exit 3 - ;; -esac - -exit $? diff --git a/extra/centos/celeryd.sysconfig b/extra/centos/celeryd.sysconfig deleted file mode 100644 index e1d98bd4d..000000000 --- a/extra/centos/celeryd.sysconfig +++ /dev/null @@ -1,24 +0,0 @@ -# In CentOS, contents should be placed in the file /etc/sysconfig/celeryd - -# Names of nodes to start (space-separated) -#CELERYD_NODES="my_application-node_1" - -# Where to chdir at start. This could be the root of a virtualenv. -#CELERYD_CHDIR="/path/to/my_application" - -# How to call celeryd-multi -#CELERYD_MULTI="$CELERYD_CHDIR/bin/celeryd-multi" - -# Extra arguments -#CELERYD_OPTS="--app=my_application.path.to.worker --time-limit=300 --concurrency=8 --loglevel=DEBUG" - -# Create log/pid dirs, if they don't already exist -#CELERY_CREATE_DIRS=1 - -# %n will be replaced with the nodename -#CELERYD_LOG_FILE="/path/to/my_application/log/%n.log" -#CELERYD_PID_FILE="/var/run/celery/%n.pid" - -# Workers run as an unprivileged user -#CELERYD_USER=celery -#CELERYD_GROUP=celery diff --git a/extra/centos/test_celeryd.sh b/extra/centos/test_celeryd.sh deleted file mode 100755 index a331c2c83..000000000 --- a/extra/centos/test_celeryd.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/sh - -# If you make changes to the celeryd init script, -# you can use this test script to verify you didn't break the universe - -SERVICE="celeryd" -SERVICE_CMD="sudo /sbin/service $SERVICE" - -run_test() { - local msg="$1" - local cmd="$2" - local expected_retval="${3:-0}" - local n=${#msg} - - echo - echo `printf "%$((${n}+4))s" | tr " " "#"` - echo "# $msg #" - echo `printf "%$((${n}+4))s" | tr " " "#"` - - $cmd - local retval=$? - if [[ "$retval" == "$expected_retval" ]]; then - echo "[PASSED]" - else - echo "[FAILED]" - echo "Exit status: $retval, but expected: $expected_retval" - exit $retval - fi -} - -run_test "stop should succeed" "$SERVICE_CMD stop" 0 -run_test "status on a stopped service should return 1" "$SERVICE_CMD status" 1 -run_test "stopping a stopped celery should not fail" "$SERVICE_CMD stop" 0 -run_test "start should succeed" "$SERVICE_CMD start" 0 -run_test "status on a running service should return 0" "$SERVICE_CMD status" 0 -run_test "starting a running service should fail" "$SERVICE_CMD start" 1 -run_test "restarting a running service should succeed" "$SERVICE_CMD restart" 0 -run_test "status on a restarted service should return 0" "$SERVICE_CMD status" 0 -run_test "stop should succeed" "$SERVICE_CMD stop" 0 - -echo "All tests passed!" diff --git a/extra/generic-init.d/celerybeat b/extra/generic-init.d/celerybeat index 00e8b35b3..5cdbd2b64 100755 --- a/extra/generic-init.d/celerybeat +++ b/extra/generic-init.d/celerybeat @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/sh -e # ========================================================= # celerybeat - Starts the Celery periodic task scheduler. # ========================================================= @@ -20,7 +20,7 @@ # Cannot use set -e/bash -e since the kill -0 command will abort # abnormally in the absence of a valid process ID. #set -e -VERSION=10.0 +VERSION=10.1 echo "celery init v${VERSION}." if [ $(id -u) -ne 0 ]; then @@ -110,6 +110,9 @@ DEFAULT_CELERYBEAT="$CELERY_BIN beat" CELERYBEAT=${CELERYBEAT:-$DEFAULT_CELERYBEAT} CELERYBEAT_LOG_LEVEL=${CELERYBEAT_LOG_LEVEL:-${CELERYBEAT_LOGLEVEL:-$DEFAULT_LOG_LEVEL}} +CELERYBEAT_SU=${CELERYBEAT_SU:-"su"} +CELERYBEAT_SU_ARGS=${CELERYBEAT_SU_ARGS:-""} + # Sets --app argument for CELERY_BIN CELERY_APP_ARG="" if [ ! -z "$CELERY_APP" ]; then @@ -202,14 +205,17 @@ create_paths () { create_default_dir "$CELERYBEAT_PID_DIR" } +is_running() { + pid=$1 + ps $pid > /dev/null 2>&1 +} wait_pid () { pid=$1 forever=1 i=0 while [ $forever -gt 0 ]; do - kill -0 $pid 1>/dev/null 2>&1 - if [ $? -eq 1 ]; then + if ! is_running $pid; then echo "OK" forever=0 else @@ -237,7 +243,8 @@ stop_beat () { } _chuid () { - su "$CELERYBEAT_USER" -c "$CELERYBEAT $*" + ${CELERYBEAT_SU} ${CELERYBEAT_SU_ARGS} \ + "$CELERYBEAT_USER" -c "$CELERYBEAT $*" } start_beat () { @@ -247,6 +254,36 @@ start_beat () { } +check_status () { + local failed= + local pid_file=$CELERYBEAT_PID_FILE + if [ ! -e $pid_file ]; then + echo "${SCRIPT_NAME} is down: no pid file found" + failed=true + elif [ ! -r $pid_file ]; then + echo "${SCRIPT_NAME} is in unknown state, user cannot read pid file." + failed=true + else + local pid=`cat "$pid_file"` + local cleaned_pid=`echo "$pid" | sed -e 's/[^0-9]//g'` + if [ -z "$pid" ] || [ "$cleaned_pid" != "$pid" ]; then + echo "${SCRIPT_NAME}: bad pid file ($pid_file)" + failed=true + else + local failed= + kill -0 $pid 2> /dev/null || failed=true + if [ "$failed" ]; then + echo "${SCRIPT_NAME} (pid $pid) is down, but pid file exists!" + failed=true + else + echo "${SCRIPT_NAME} (pid $pid) is up..." + fi + fi + fi + + [ "$failed" ] && exit 1 || exit 0 +} + case "$1" in start) @@ -261,6 +298,9 @@ case "$1" in reload|force-reload) echo "Use start+stop" ;; + status) + check_status + ;; restart) echo "Restarting celery periodic task scheduler" check_paths @@ -277,7 +317,7 @@ case "$1" in check_paths ;; *) - echo "Usage: /etc/init.d/${SCRIPT_NAME} {start|stop|restart|create-paths}" + echo "Usage: /etc/init.d/${SCRIPT_NAME} {start|stop|restart|create-paths|status}" exit 64 # EX_USAGE ;; esac diff --git a/extra/generic-init.d/celeryd b/extra/generic-init.d/celeryd index 0fe704fdf..a67a40c2b 100755 --- a/extra/generic-init.d/celeryd +++ b/extra/generic-init.d/celeryd @@ -28,7 +28,7 @@ # # You can then configure this by manipulating /etc/default/little-worker. # -VERSION=10.0 +VERSION=10.1 echo "celery init v${VERSION}." if [ $(id -u) -ne 0 ]; then echo "Error: This program can only be used by the root user." @@ -37,9 +37,14 @@ if [ $(id -u) -ne 0 ]; then exit 1 fi +origin_is_runlevel_dir () { + set +e + dirname $0 | grep -q "/etc/rc.\.d" + echo $? +} # Can be a runlevel symlink (e.g. S02celeryd) -if [ -L "$0" ]; then +if [ $(origin_is_runlevel_dir) -eq 0 ]; then SCRIPT_FILE=$(readlink "$0") else SCRIPT_FILE="$0" @@ -48,7 +53,7 @@ SCRIPT_NAME="$(basename "$SCRIPT_FILE")" DEFAULT_USER="celery" DEFAULT_PID_FILE="/var/run/celery/%n.pid" -DEFAULT_LOG_FILE="/var/log/celery/%n.log" +DEFAULT_LOG_FILE="/var/log/celery/%n%I.log" DEFAULT_LOG_LEVEL="INFO" DEFAULT_NODES="celery" DEFAULT_CELERYD="-m celery worker --detach" @@ -110,6 +115,12 @@ if [ ! -z "$CELERY_APP" ]; then CELERY_APP_ARG="--app=$CELERY_APP" fi +# Options to su +# can be used to enable login shell (CELERYD_SU_ARGS="-l"), +# or even to use start-stop-daemon instead of su. +CELERYD_SU=${CELERY_SU:-"su"} +CELERYD_SU_ARGS=${CELERYD_SU_ARGS:-""} + CELERYD_USER=${CELERYD_USER:-$DEFAULT_USER} # Set CELERY_CREATE_DIRS to always create log/pid dirs. @@ -199,15 +210,21 @@ create_paths() { export PATH="${PATH:+$PATH:}/usr/sbin:/sbin" +_get_pidfiles () { + # note: multi < 3.1.14 output to stderr, not stdout, hence the redirect. + ${CELERYD_MULTI} expand "${CELERYD_PID_FILE}" ${CELERYD_NODES} 2>&1 +} + + _get_pids() { found_pids=0 my_exitcode=0 - for pid_file in "$CELERYD_PID_DIR"/*.pid; do - local pid=`cat "$pid_file"` + for pidfile in $(_get_pidfiles); do + local pid=`cat "$pidfile"` local cleaned_pid=`echo "$pid" | sed -e 's/[^0-9]//g'` if [ -z "$pid" ] || [ "$cleaned_pid" != "$pid" ]; then - echo "bad pid file ($pid_file)" + echo "bad pid file ($pidfile)" one_failed=true my_exitcode=1 else @@ -224,7 +241,7 @@ _get_pids() { _chuid () { - su "$CELERYD_USER" -c "$CELERYD_MULTI $*" + ${CELERYD_SU} ${CELERYD_SU_ARGS} "$CELERYD_USER" -c "$CELERYD_MULTI $*" } @@ -267,6 +284,8 @@ kill_workers() { restart_workers_graceful () { + echo "WARNING: Use with caution in production" + echo "The workers will attempt to restart, but they may not be able to." local worker_pids= worker_pids=`_get_pids` [ "$one_failed" ] && exit 1 @@ -291,27 +310,27 @@ check_status () { found_pids=0 local one_failed= - for pid_file in "$CELERYD_PID_DIR"/*.pid; do - if [ ! -r $pid_file ]; then - echo "${SCRIPT_NAME} is stopped: no pids were found" + for pidfile in $(_get_pidfiles); do + if [ ! -r $pidfile ]; then + echo "${SCRIPT_NAME} down: no pidfiles found" one_failed=true break fi - local node=`basename "$pid_file" .pid` - local pid=`cat "$pid_file"` + local node=`basename "$pidfile" .pid` + local pid=`cat "$pidfile"` local cleaned_pid=`echo "$pid" | sed -e 's/[^0-9]//g'` if [ -z "$pid" ] || [ "$cleaned_pid" != "$pid" ]; then - echo "bad pid file ($pid_file)" + echo "bad pid file ($pidfile)" one_failed=true else local failed= kill -0 $pid 2> /dev/null || failed=true if [ "$failed" ]; then - echo "${SCRIPT_NAME} (node $node) (pid $pid) is stopped, but pid file exists!" + echo "${SCRIPT_NAME} (node $node) (pid $pid) is down, but pidfile exists!" one_failed=true else - echo "${SCRIPT_NAME} (node $node) (pid $pid) is running..." + echo "${SCRIPT_NAME} (node $node) (pid $pid) is up..." fi fi done diff --git a/extra/release/attribution.py b/extra/release/attribution.py index d48a46603..dcc70033b 100755 --- a/extra/release/attribution.py +++ b/extra/release/attribution.py @@ -23,11 +23,11 @@ def find_missing_authors(seen): with open("AUTHORS") as authors: known = [author(line) for line in authors.readlines()] - seen_authors = set(filter(proper_name, (t[0] for t in seen))) - known_authors = set(t[0] for t in known) + seen_authors = {t[0] for t in seen if proper_name(t[0])} + known_authors = {t[0] for t in known} # maybe later?: - # seen_emails = set(t[1] for t in seen) - # known_emails = set(t[1] for t in known) + # seen_emails = {t[1] for t in seen} + # known_emails = {t[1] for t in known} pprint(seen_authors - known_authors) diff --git a/extra/release/bump_version.py b/extra/release/bump_version.py index 8e507255a..9415b7046 100755 --- a/extra/release/bump_version.py +++ b/extra/release/bump_version.py @@ -12,11 +12,13 @@ from contextlib import contextmanager from tempfile import NamedTemporaryFile -rq = lambda s: s.strip("\"'") - str_t = str if sys.version_info[0] >= 3 else basestring +def rq(s): + return s.strip("\"'") + + def cmd(*args): return subprocess.Popen(args, stdout=subprocess.PIPE).communicate()[0] diff --git a/extra/release/doc4allmods b/extra/release/doc4allmods index b08b769eb..c36cb6273 100755 --- a/extra/release/doc4allmods +++ b/extra/release/doc4allmods @@ -7,7 +7,6 @@ SKIP_FILES="celery.five.rst celery.task.rst celery.task.base.rst celery.task.sets.rst - celery.task.trace.rst celery.bin.rst celery.bin.celeryd_detach.rst celery.contrib.rst diff --git a/extra/release/sphinx-to-rst.py b/extra/release/sphinx-to-rst.py index 35417a05f..d9b5c0d9c 100755 --- a/extra/release/sphinx-to-rst.py +++ b/extra/release/sphinx-to-rst.py @@ -138,7 +138,7 @@ def resolve_pending_refs(lines): RE_INCLUDE: include_file} -def _process(lines): +def _process(lines, encoding='utf-8'): lines = list(lines) # non-destructive for i, line in enumerate(lines): for regex, alt in TO_RST_MAP.items(): @@ -150,18 +150,21 @@ def _process(lines): else: lines[i] = regex.sub(alt, line) lines[i] = deref_all(lines[i]) - return resolve_pending_refs(asciify(lines)) + if encoding == 'ascii': + lines = asciify(lines) + return resolve_pending_refs(lines) -def sphinx_to_rst(fh): - return ''.join(_process(fh)) +def sphinx_to_rst(fh, encoding='utf-8'): + return ''.join(_process(fh, encoding)) if __name__ == '__main__': global dirname dirname = os.path.dirname(sys.argv[1]) + encoding = 'ascii' if '--ascii' in sys.argv else 'utf-8' fh = codecs.open(sys.argv[1], encoding='utf-8') try: - print(sphinx_to_rst(fh)) + print(sphinx_to_rst(fh, encoding).encode('utf-8')) finally: fh.close() diff --git a/extra/release/verify_config_reference.py b/extra/release/verify_config_reference.py index c36af7a20..59b6e56c6 100644 --- a/extra/release/verify_config_reference.py +++ b/extra/release/verify_config_reference.py @@ -1,23 +1,26 @@ -from __future__ import print_function +from __future__ import print_function, unicode_literals from fileinput import input as _input from sys import exit, stderr from celery.app.defaults import NAMESPACES, flatten -ignore = frozenset([ - 'CELERYD_AGENT', - 'CELERYD_POOL_PUTLOCKS', - 'BROKER_HOST', - 'BROKER_USER', - 'BROKER_PASSWORD', - 'BROKER_VHOST', - 'BROKER_PORT', - 'CELERY_REDIS_HOST', - 'CELERY_REDIS_PORT', - 'CELERY_REDIS_DB', - 'CELERY_REDIS_PASSWORD', -]) +ignore = { + 'worker_agent', + 'worker_pool_putlocks', + 'broker_host', + 'broker_user', + 'broker_password', + 'broker_vhost', + 'broker_port', + 'broker_transport', + 'chord_propagates', + 'redis_host', + 'redis_port', + 'redis_db', + 'redis_password', + 'worker_force_execv', +} def is_ignored(setting, option): @@ -27,8 +30,9 @@ def is_ignored(setting, option): def find_undocumented_settings(directive='.. setting:: '): settings = dict(flatten(NAMESPACES)) all = set(settings) + inp = (l.decode('utf-8') for l in _input()) documented = set( - line.strip()[len(directive):].strip() for line in _input() + line.strip()[len(directive):].strip() for line in inp if line.strip().startswith(directive) ) return [setting for setting in all ^ documented diff --git a/extra/supervisord/celery.sh b/extra/supervisord/celery.sh new file mode 100644 index 000000000..a5bcee09f --- /dev/null +++ b/extra/supervisord/celery.sh @@ -0,0 +1,3 @@ +#!/bin/bash +source {{ additional variables }} +exec celery --app={{ application_name }}.celery:app worker --loglevel=INFO -n worker.%%h \ No newline at end of file diff --git a/extra/supervisord/celeryd.conf b/extra/supervisord/celeryd.conf index f92293727..0747ff836 100644 --- a/extra/supervisord/celeryd.conf +++ b/extra/supervisord/celeryd.conf @@ -6,6 +6,11 @@ ; Set full path to celery program if using virtualenv command=celery worker -A proj --loglevel=INFO +; Alternatively, +;command=celery --app=your_app.celery:app worker --loglevel=INFO -n worker.%%h +; Or run a script +;command=celery.sh + directory=/path/to/project user=nobody numprocs=1 @@ -24,6 +29,6 @@ stopwaitsecs = 600 ; taking care of its children as well. killasgroup=true -; if rabbitmq is supervised, set its priority higher -; so it starts first -priority=998 +; Set Celery priority higher than default (999) +; so, if rabbitmq is supervised, it will start first. +priority=1000 diff --git a/extra/supervisord/supervisord.conf b/extra/supervisord/supervisord.conf index 26e5fcbff..1bde65a78 100644 --- a/extra/supervisord/supervisord.conf +++ b/extra/supervisord/supervisord.conf @@ -26,9 +26,3 @@ serverurl=unix:///tmp/supervisor.sock ; use unix:// schem for a unix sockets. # Uncomment this line for celeryd for Python ;files=celeryd.conf -# Uncomment this line for celeryd for Django. -;files=django/celeryd.conf - - - - diff --git a/extra/systemd/celery.conf b/extra/systemd/celery.conf index d490fe793..6662d43d5 100644 --- a/extra/systemd/celery.conf +++ b/extra/systemd/celery.conf @@ -1,2 +1,13 @@ +# See +# http://docs.celeryproject.org/en/latest/tutorials/daemonizing.html#available-options + +CELERY_APP="proj" +CELERYD_NODES="worker" +CELERYD_OPTS="" +CELERY_BIN="/usr/bin/python2 -m celery" +CELERYD_PID_FILE="/var/run/celery/%n.pid" +CELERYD_LOG_FILE="/var/log/celery/%n%I.log" +CELERYD_LOG_LEVEL="INFO" + d /run/celery 0755 user users - -d /var/log/celery 0755 user users - \ No newline at end of file +d /var/log/celery 0755 user users - diff --git a/extra/systemd/celery.service b/extra/systemd/celery.service index 31f17bdb1..5729d2924 100644 --- a/extra/systemd/celery.service +++ b/extra/systemd/celery.service @@ -1,17 +1,23 @@ [Unit] -Description=Celery Nodes Daemon +Description=Celery workers After=network.target [Service] Type=forking User=user Group=users -#Environment=DJANGO_SETTINGS_MODULE=MyProject.settings EnvironmentFile=-/etc/conf.d/celery WorkingDirectory=/opt/Myproject/ -ExecStart=/usr/bin/python2 ${CELERY_BIN} $CELERYD_MULTI start $CELERYD_NODES --pidfile=${CELERYD_PID_FILE} --logfile=${CELERYD_LOG_FILE} --loglevel="INFO" $CELERYD_OPTS -ExecStop=/usr/bin/python2 ${CELERY_BIN} $CELERYD_MULTI stopwait $CELERYD_NODES --pidfile=${CELERYD_PID_FILE} -ExecReload=/usr/bin/python2 ${CELERY_BIN} $CELERYD_MULTI restart $CELERYD_NODES --pidfile=${CELERYD_PID_FILE} --logfile=${CELERYD_LOG_FILE} --loglevel="INFO" $CELERYD_OPTS +ExecStart=${CELERY_BIN} multi start $CELERYD_NODES \ + -A $CELERY_APP --pidfile=${CELERYD_PID_FILE} \ + --logfile=${CELERYD_LOG_FILE} --loglevel="${CELERYD_LOG_LEVEL}" \ + $CELERYD_OPTS +ExecStop=${CELERY_BIN} multi stopwait $CELERYD_NODES \ + --pidfile=${CELERYD_PID_FILE} +ExecReload=${CELERY_BIN} multi restart $CELERYD_NODES \ + -A ${CELERY_APP} --pidfile=${CELERYD_PID_FILE} \ + --logfile=${CELERYD_LOG_FILE} --loglevel="${CELERYD_LOG_LEVEL}" \ + $CELERYD_OPTS [Install] WantedBy=multi-user.target diff --git a/funtests/benchmarks/bench_worker.py b/funtests/benchmarks/bench_worker.py index 8de8a3af6..d6535d6b3 100644 --- a/funtests/benchmarks/bench_worker.py +++ b/funtests/benchmarks/bench_worker.py @@ -1,23 +1,16 @@ -from __future__ import print_function +from __future__ import print_function, unicode_literals import os import sys -import time os.environ.update( NOSETPS='yes', USE_FAST_LOCALS='yes', ) -import anyjson -JSONIMP = os.environ.get('JSONIMP') -if JSONIMP: - anyjson.force_implementation(JSONIMP) - -print('anyjson implementation: {0!r}'.format(anyjson.implementation.name)) - -from celery import Celery, group -from celery.five import range +from celery import Celery # noqa +from celery.five import range # noqa +from kombu.five import monotonic # noqa DEFAULT_ITS = 40000 @@ -27,13 +20,12 @@ app = Celery('bench_worker') app.conf.update( - BROKER_TRANSPORT=BROKER_TRANSPORT, - BROKER_POOL_LIMIT=10, - CELERYD_POOL='solo', - CELERYD_PREFETCH_MULTIPLIER=0, - CELERY_DISABLE_RATE_LIMITS=True, - CELERY_DEFAULT_DELIVERY_MODE=1, - CELERY_QUEUES={ + broker_transport=BROKER_TRANSPORT, + broker_pool_limit=10, + celeryd_pool='solo', + celeryd_prefetch_multiplier=0, + default_delivery_mode=1, + queues={ 'bench.worker': { 'exchange': 'bench.worker', 'routing_key': 'bench.worker', @@ -43,39 +35,43 @@ 'auto_delete': True, } }, - CELERY_TASK_SERIALIZER='json', - CELERY_DEFAULT_QUEUE='bench.worker', - CELERY_BACKEND=None, + task_serializer='json', + default_queue='bench.worker', + result_backend=None, ), def tdiff(then): - return time.time() - then + return monotonic() - then @app.task(cur=0, time_start=None, queue='bench.worker', bare=True) def it(_, n): - i = it.cur # use internal counter, as ordering can be skewed - # by previous runs, or the broker. + # use internal counter, as ordering can be skewed + # by previous runs, or the broker. + i = it.cur if i and not i % 5000: print('({0} so far: {1}s)'.format(i, tdiff(it.subt)), file=sys.stderr) - it.subt = time.time() + it.subt = monotonic() if not i: - it.subt = it.time_start = time.time() - elif i == n - 1: + it.subt = it.time_start = monotonic() + elif i > n - 2: total = tdiff(it.time_start) print('({0} so far: {1}s)'.format(i, tdiff(it.subt)), file=sys.stderr) print('-- process {0} tasks: {1}s total, {2} tasks/s} '.format( n, total, n / (total + .0), )) - sys.exit() + import os + os._exit() it.cur += 1 def bench_apply(n=DEFAULT_ITS): - time_start = time.time() - group(it.s(i, n) for i in range(n))() - print('-- apply {0} tasks: {1}s'.format(n, time.time() - time_start)) + time_start = monotonic() + task = it._get_current_object() + with app.producer_or_acquire() as producer: + [task.apply_async((i, n), producer=producer) for i in range(n)] + print('-- apply {0} tasks: {1}s'.format(n, monotonic() - time_start)) def bench_work(n=DEFAULT_ITS, loglevel='CRITICAL'): diff --git a/funtests/stress/README.rst b/funtests/stress/README.rst index 9a55a9192..7c91b24b6 100644 --- a/funtests/stress/README.rst +++ b/funtests/stress/README.rst @@ -153,6 +153,13 @@ See ``python -m stress --help`` for a list of all available options. Options ======= +Using a different broker +------------------------ +You can set the environment ``CSTRESS_BROKER`` to change the broker used:: + + $ CSTRESS_BROKER='amqp://' celery -A stress worker # … + $ CSTRESS_BROKER='amqp://' python -m stress + Using a different result backend -------------------------------- diff --git a/funtests/stress/run/Vagrantfile b/funtests/stress/run/Vagrantfile new file mode 100644 index 000000000..65b4e1443 --- /dev/null +++ b/funtests/stress/run/Vagrantfile @@ -0,0 +1,125 @@ +# -*- mode: ruby -*- +# vi: set ft=ruby : + +# Vagrantfile API/syntax version. Don't touch unless you know what you're doing! +VAGRANTFILE_API_VERSION = "2" + +Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| + # All Vagrant configuration is done here. The most common configuration + # options are documented and commented below. For a complete reference, + # please see the online documentation at vagrantup.com. + + # Every Vagrant virtual environment requires a box to build off of. + config.vm.box = "ubuntu/trusty64" + + config.vm.provision :shell, path: "provision/provision.sh", + privileged: true + + # Disable automatic box update checking. If you disable this, then + # boxes will only be checked for updates when the user runs + # `vagrant box outdated`. This is not recommended. + # config.vm.box_check_update = false + + # Create a forwarded port mapping which allows access to a specific port + # within the machine from a port on the host machine. In the example below, + # accessing "localhost:8080" will access port 80 on the guest machine. + # config.vm.network "forwarded_port", guest: 80, host: 8080 + + # Create a private network, which allows host-only access to the machine + # using a specific IP. + config.vm.network "private_network", ip: "192.168.33.123" + + # Create a public network, which generally matched to bridged network. + # Bridged networks make the machine appear as another physical device on + # your network. + # config.vm.network "public_network" + + # If true, then any SSH connections made will enable agent forwarding. + # Default value: false + # config.ssh.forward_agent = true + + # Share an additional folder to the guest VM. The first argument is + # the path on the host to the actual folder. The second argument is + # the path on the guest to mount the folder. And the optional third + # argument is a set of non-required options. + # config.vm.synced_folder "../data", "/vagrant_data" + + # Provider-specific configuration so you can fine-tune various + # backing providers for Vagrant. These expose provider-specific options. + # Example for VirtualBox: + # + config.vm.provider "virtualbox" do |vb| + # # Don't boot with headless mode + # vb.gui = true + # + # # Use VBoxManage to customize the VM. For example to change memory: + vb.customize ["modifyvm", :id, "--memory", "1024"] + end + # + # View the documentation for the provider you're using for more + # information on available options. + + # Enable provisioning with CFEngine. CFEngine Community packages are + # automatically installed. For example, configure the host as a + # policy server and optionally a policy file to run: + # + # config.vm.provision "cfengine" do |cf| + # cf.am_policy_hub = true + # # cf.run_file = "motd.cf" + # end + # + # You can also configure and bootstrap a client to an existing + # policy server: + # + # config.vm.provision "cfengine" do |cf| + # cf.policy_server_address = "10.0.2.15" + # end + + # Enable provisioning with Puppet stand alone. Puppet manifests + # are contained in a directory path relative to this Vagrantfile. + # You will need to create the manifests directory and a manifest in + # the file default.pp in the manifests_path directory. + # + # config.vm.provision "puppet" do |puppet| + # puppet.manifests_path = "manifests" + # puppet.manifest_file = "site.pp" + # end + + # Enable provisioning with chef solo, specifying a cookbooks path, roles + # path, and data_bags path (all relative to this Vagrantfile), and adding + # some recipes and/or roles. + # + # config.vm.provision "chef_solo" do |chef| + # chef.cookbooks_path = "../my-recipes/cookbooks" + # chef.roles_path = "../my-recipes/roles" + # chef.data_bags_path = "../my-recipes/data_bags" + # chef.add_recipe "mysql" + # chef.add_role "web" + # + # # You may also specify custom JSON attributes: + # chef.json = { :mysql_password => "foo" } + # end + + # Enable provisioning with chef server, specifying the chef server URL, + # and the path to the validation key (relative to this Vagrantfile). + # + # The Opscode Platform uses HTTPS. Substitute your organization for + # ORGNAME in the URL and validation key. + # + # If you have your own Chef Server, use the appropriate URL, which may be + # HTTP instead of HTTPS depending on your configuration. Also change the + # validation key to validation.pem. + # + # config.vm.provision "chef_client" do |chef| + # chef.chef_server_url = "https://api.opscode.com/organizations/ORGNAME" + # chef.validation_key_path = "ORGNAME-validator.pem" + # end + # + # If you're using the Opscode platform, your validator client is + # ORGNAME-validator, replacing ORGNAME with your organization name. + # + # If you have your own Chef Server, the default validation client name is + # chef-validator, unless you changed the configuration. + # + # chef.validation_client_name = "ORGNAME-validator" +end diff --git a/funtests/stress/run/provision/celeryd-init.config b/funtests/stress/run/provision/celeryd-init.config new file mode 100644 index 000000000..8669040bc --- /dev/null +++ b/funtests/stress/run/provision/celeryd-init.config @@ -0,0 +1,12 @@ +CELERYD_NODES="worker1" +CELERY_BIN="/usr/local/bin/celery" +CELERY_APP="stress" +CELERYD_CHDIR="/opt/devel/stress" +CELERYD_OPTS="-c10 --maxtasksperchild=256 -Z vagrant1" +CELERYD_LOG_FILE="/var/log/celery/%n%I.log" +CELERYD_PID_FILE="/var/run/celery/%n.pid" + +CELERYD_USER="celery" +CELERYD_GROUP="celery" + +CELERY_CREATE_DIRS=1 diff --git a/funtests/stress/run/provision/provision.sh b/funtests/stress/run/provision/provision.sh new file mode 100644 index 000000000..927261902 --- /dev/null +++ b/funtests/stress/run/provision/provision.sh @@ -0,0 +1,199 @@ +#!/bin/bash + +APT_SOURCES_LST="/etc/apt/sources.list.d/" + +DEVEL_DIR="/opt/devel" + +WGET="wget" +RABBITMQCTL="rabbitmqctl" + +RABBITMQ_APT_URL="http://www.rabbitmq.com/debian/" +RABBITMQ_APT_VER="testing main" +RABBITMQ_APT_KEY="https://www.rabbitmq.com/rabbitmq-signing-key-public.asc" +RABBITMQ_DEB="rabbitmq-server" + +RABBITMQ_USERNAME="testing" +RABBITMQ_PASSWORD="t3s71ng" +RABBITMQ_VHOST="/testing" + +REDIS_DEB="redis-server" +REDIS_CONF="/etc/redis/redis.conf" + +GIT_ROOT="${DEVEL_DIR}" + +GITHUB_ROOT="https://github.com/" +CELERY_GITHUB_USER="celery" +CELERY_USER="celery" +CELERY_GROUP="celery" +CELERY_DIR="${GIT_ROOT}/celery" +CELERY_FUNTESTS="${CELERY_DIR}/funtests/stress" +CELERY_CONFIG_SRC="${CELERY_FUNTESTS}/run/provision/celeryd-init.config" +CELERY_CONFIG_DST="/etc/default/celeryd" +STRESS_DIR="${GIT_ROOT}/stress" + + +die () { + echo $* + exit 1 +} + +# --- grent + +add_real_user () { + user_shell=${3:-/bin/bash} + addgroup $2 + echo creating user "$1 group='$2' shell='${user_shell}'" + echo | adduser -q "$1" --shell="${user_shell}" \ + --ingroup="$2" \ + --disabled-password 1>/dev/null 2>&1 + id "$1" || die "Not able to create user" +} + +# --- system + +make_directories () { + mkdir -p "${DEVEL_DIR}" +} + +enable_bash_vi_mode () { + echo "set -o vi" >> /etc/bash.bashrc +} + +configure_system () { + make_directories + enable_bash_vi_mode +} + + +# --- apt + +apt_update() { + apt-get update +} + +add_apt_source () { + echo "deb $1" >> "${APT_SOURCES_LST}/rabbitmq.list" +} + +add_apt_key() { + "$WGET" --quiet -O - "$1" | apt-key add - +} + +apt_install () { + apt-get install -y "$1" +} + +# --- rabbitmq + +rabbitmq_add_user () { + "$RABBITMQCTL" add_user "$1" "$2" +} + +rabbitmq_add_vhost () { + "$RABBITMQCTL" add_vhost "$1" +} + +rabbitmq_set_perm () { + "$RABBITMQCTL" set_permissions -p $1 $2 '.*' '.*' '.*' +} + +install_rabbitmq() { + add_apt_source "${RABBITMQ_APT_URL} ${RABBITMQ_APT_VER}" + add_apt_key "${RABBITMQ_APT_KEY}" + apt_update + apt_install "${RABBITMQ_DEB}" + + rabbitmq_add_user "${RABBITMQ_USERNAME}" "${RABBITMQ_PASSWORD}" + rabbitmq_add_vhost "${RABBITMQ_VHOST}" + rabbitmq_set_perm "${RABBITMQ_VHOST}" "${RABBITMQ_USERNAME}" +} + +# --- redis + +restart_redis () { + service redis-server restart +} + + +install_redis () { + apt_install "${REDIS_DEB}" + sed -i 's/^bind .*$/#bind 127.0.0.1/' "${REDIS_CONF}" + restart_redis +} + +# --- git + +install_git () { + apt_install git +} + + +github_clone () { + mkdir "${CELERY_DIR}" + chown "${CELERY_USER}" "${CELERY_DIR}" + (cd "${GIT_ROOT}"; sudo -u celery git clone "${GITHUB_ROOT}/${1}/${2}") +} + +# --- pip + +pip_install () { + pip install -U "$1" +} + +install_pip () { + apt_install python-setuptools + easy_install pip + pip_install virtualenv + apt_install python-dev + pip_install setproctitle +} + +# --- celery + +restart_celery () { + service celeryd restart +} + + +install_celery_service () { + cp "${CELERY_DIR}/extra/generic-init.d/celeryd" /etc/init.d/ + chmod +x "/etc/init.d/celeryd" + update-rc.d celeryd defaults + echo "cp \'${CELERY_CONFIG_SRC}\' \'${CELERY_CONFIG_DST}'" + cp "${CELERY_CONFIG_SRC}" "${CELERY_CONFIG_DST}" + update-rc.d celeryd enable + restart_celery +} + +install_celery () { + pip_install celery + add_real_user "${CELERY_USER}" "${CELERY_GROUP}" + echo github_clone "'${CELERY_GITHUB_USER}'" "'celery'" + github_clone "${CELERY_GITHUB_USER}" celery + (cd ${CELERY_DIR}; pip install -r requirements/dev.txt); + (cd ${CELERY_DIR}; python setup.py develop); +} + +install_stress () { + mkdir "${STRESS_DIR}" + chown "${CELERY_USER}" "${STRESS_DIR}" + cp -r ${CELERY_DIR}/funtests/stress/* "${STRESS_DIR}/" +} + +# --- MAIN + +provision () { + apt_update + configure_system + apt_install powertop + apt_install htop + install_git + install_rabbitmq + install_redis + install_pip + install_celery + install_stress + install_celery_service +} + +provision diff --git a/funtests/stress/stress/__init__.py b/funtests/stress/stress/__init__.py index 747647ffe..d000f8a20 100644 --- a/funtests/stress/stress/__init__.py +++ b/funtests/stress/stress/__init__.py @@ -1,4 +1,20 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import +import os +import time + +from .data import install_json # noqa + +if os.environ.get('C_SLEEP'): + + _orig_sleep = time.sleep + + def _sleep(n): + print('WARNING: Time sleep for {0}s'.format(n)) + import traceback + traceback.print_stack() + _orig_sleep(n) + time.sleep = _sleep + from .app import app # noqa diff --git a/funtests/stress/stress/__main__.py b/funtests/stress/stress/__main__.py index 6b6f6ceda..1b5b97579 100644 --- a/funtests/stress/stress/__main__.py +++ b/funtests/stress/stress/__main__.py @@ -1,4 +1,4 @@ -from __future__ import absolute_import, print_function +from __future__ import absolute_import, print_function, unicode_literals from celery.bin.base import Command, Option @@ -31,7 +31,7 @@ def get_options(self): Option('-r', '--repeat', type='float', default=0, help='Number of times to repeat the test suite'), Option('-g', '--group', default='all', - help='Specify test group (all|green)'), + help='Specify test group (all|green|redis)'), Option('--diag', default=False, action='store_true', help='Enable diagnostics (slow)'), Option('-J', '--no-join', default=False, action='store_true', diff --git a/funtests/stress/stress/app.py b/funtests/stress/stress/app.py index e3d72da5e..d4541961c 100644 --- a/funtests/stress/stress/app.py +++ b/funtests/stress/stress/app.py @@ -1,13 +1,13 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import, print_function, unicode_literals +import celery import os import sys import signal from time import sleep -from celery import Celery from celery import signals from celery.bin.base import Option from celery.exceptions import SoftTimeLimitExceeded @@ -17,8 +17,10 @@ logger = get_task_logger(__name__) +IS_CELERY_4 = celery.VERSION[0] >= 4 -class App(Celery): + +class App(celery.Celery): template_selected = False def __init__(self, *args, **kwargs): @@ -33,7 +35,8 @@ def __init__(self, *args, **kwargs): ) ) signals.user_preload_options.connect(self.on_preload_parsed) - self.after_configure = None + if IS_CELERY_4: + self.on_configure.connect(self._maybe_use_default_template) def on_preload_parsed(self, options=None, **kwargs): self.use_template(options['template']) @@ -44,16 +47,22 @@ def use_template(self, name='default'): use_template(self, name) self.template_selected = True - def _get_config(self): - ret = super(App, self)._get_config() - if self.after_configure: - self.after_configure(ret) - return ret - - def on_configure(self): + def _maybe_use_default_template(self, **kwargs): if not self.template_selected: self.use_template('default') + if not IS_CELERY_4: + after_configure = None + + def _get_config(self): + ret = super(App, self)._get_config() + if self.after_configure: + self.after_configure(ret) + return ret + + def on_configure(self): + self._maybe_use_default_template() + app = App('stress', set_as_current=False) @@ -67,6 +76,16 @@ def add(x, y): return x + y +@app.task(bind=True) +def ids(self, i): + return (self.request.root_id, self.request.parent_id, i) + + +@app.task(bind=True) +def collect_ids(self, ids, i): + return ids, (self.request.root_id, self.request.parent_id, i) + + @app.task def xsum(x): return sum(x) @@ -91,7 +110,7 @@ def exiting(status=0): @app.task -def kill(sig=signal.SIGKILL): +def kill(sig=getattr(signal, 'SIGKILL', None) or signal.SIGTERM): os.kill(os.getpid(), sig) @@ -116,7 +135,8 @@ def retries(self): @app.task -def unicode(): +def print_unicode(): + logger.warning('hå它 valmuefrø') print('hiöäüß') @@ -127,6 +147,17 @@ def segfault(): assert False, 'should not get here' +@app.task(bind=True) +def chord_adds(self, x): + self.add_to_chord(add.s(x, x)) + return 42 + + +@app.task(bind=True) +def chord_replace(self, x): + return self.replace_in_chord(add.s(x, x)) + + @app.task def raising(exc=KeyError()): raise exc diff --git a/funtests/stress/stress/data.py b/funtests/stress/stress/data.py index bc6b37a46..040147203 100644 --- a/funtests/stress/stress/data.py +++ b/funtests/stress/stress/data.py @@ -1,14 +1,45 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import -import json - -from celery.utils.debug import humanbytes -from celery.utils.imports import qualname +try: + import simplejson as json +except ImportError: + import json # noqa type_registry = {} +class JSONEncoder(json.JSONEncoder): + + def default(self, obj): + try: + return super(JSONEncoder, self).default(obj) + except TypeError: + reducer = getattr(obj, '__to_json__', None) + if reducer: + return reducer() + raise + + +def decode_hook(d): + try: + d = d['py/obj'] + except KeyError: + return d + type_registry[d['type']](**d['attrs']) + + +def install_json(): + json._default_encoder = JSONEncoder() + json._default_decoder.object_hook = decode_hook +install_json() # ugh, ugly but it's a test suite after all + + +# this imports kombu.utils.json, so can only import after install_json() +from celery.utils.debug import humanbytes # noqa +from celery.utils.imports import qualname # noqa + + def json_reduce(obj, attrs): return {'py/obj': {'type': qualname(obj), 'attrs': attrs}} @@ -43,29 +74,3 @@ def __reduce__(self): BIG = Data('BIG', 'x' * 2 ** 20 * 8) SMALL = Data('SMALL', 'e' * 1024) - - -class JSONEncoder(json.JSONEncoder): - - def default(self, obj): - try: - return super(JSONEncoder, self).default(obj) - except TypeError: - reducer = getattr(obj, '__to_json__', None) - if reducer: - return reducer() - raise - - -def decode_hook(d): - try: - d = d['py/obj'] - except KeyError: - return d - type_registry[d['type']](**d['attrs']) - - -def install_json(): - json._default_encoder = JSONEncoder() - json._default_decoder.object_hook = decode_hook -install_json() # ugh, ugly but it's a test suite after all diff --git a/funtests/stress/stress/fbi.py b/funtests/stress/stress/fbi.py index 9b06af654..5f6625166 100644 --- a/funtests/stress/stress/fbi.py +++ b/funtests/stress/stress/fbi.py @@ -1,4 +1,4 @@ -from __future__ import absolute_import, print_function +from __future__ import absolute_import, print_function, unicode_literals import socket import sys @@ -64,4 +64,3 @@ def diag(self, ids, file=sys.stderr): self.ffwd() for tid in ids: print(self.state_of(tid), file=file) - #print(self.query(ids), file=file) diff --git a/funtests/stress/stress/suite.py b/funtests/stress/stress/suite.py index f2fb02120..daff39cd8 100755 --- a/funtests/stress/stress/suite.py +++ b/funtests/stress/stress/suite.py @@ -1,30 +1,30 @@ from __future__ import absolute_import, print_function, unicode_literals +import inspect import platform import random import socket import sys -from collections import namedtuple +from collections import OrderedDict, defaultdict, namedtuple from itertools import count from time import sleep -from kombu.utils.compat import OrderedDict - -from celery import group, VERSION_BANNER +from celery import VERSION_BANNER, chain, group, uuid from celery.exceptions import TimeoutError -from celery.five import range, values, monotonic +from celery.five import items, monotonic, range, values from celery.utils.debug import blockdetection from celery.utils.text import pluralize, truncate from celery.utils.timeutils import humanize_seconds from .app import ( - marker, _marker, add, any_, exiting, kill, sleeping, - sleeping_ignore_limits, segfault, any_returning, + marker, _marker, add, any_, collect_ids, exiting, ids, kill, sleeping, + sleeping_ignore_limits, any_returning, print_unicode, ) from .data import BIG, SMALL from .fbi import FBI + BANNER = """\ Celery stress-suite v{version} @@ -51,15 +51,21 @@ Inf = float('Inf') +def assert_equal(a, b): + assert a == b, '{0!r} != {1!r}'.format(a, b) + + class StopSuite(Exception): pass def pstatus(p): + runtime = monotonic() - p.runtime + elapsed = monotonic() - p.elapsed return F_PROGRESS.format( p, - runtime=humanize_seconds(monotonic() - p.runtime, now='0 seconds'), - elapsed=humanize_seconds(monotonic() - p.elapsed, now='0 seconds'), + runtime=humanize_seconds(runtime, now=runtime), + elapsed=humanize_seconds(elapsed, now=elapsed), ) @@ -83,7 +89,7 @@ def testgroup(*funs): return OrderedDict((fun.__name__, fun) for fun in funs) -class Suite(object): +class BaseSuite(object): def __init__(self, app, block_timeout=30 * 60): self.app = app @@ -92,30 +98,26 @@ def __init__(self, app, block_timeout=30 * 60): self.progress = None self.speaker = Speaker() self.fbi = FBI(app) + self.init_groups() - self.groups = { - 'all': testgroup( - self.manyshort, - self.termbysig, - self.bigtasks, - self.bigtasksbigvalue, - self.smalltasks, - self.timelimits, - self.timelimits_soft, - self.revoketermfast, - self.revoketermslow, - self.alwayskilled, - self.alwaysexits, - ), - 'green': testgroup( - self.manyshort, - self.bigtasks, - self.bigtasksbigvalue, - self.smalltasks, - self.alwaysexits, - self.group_with_exit, - ), - } + def init_groups(self): + acc = defaultdict(list) + for attr in dir(self): + if not _is_descriptor(self, attr): + meth = getattr(self, attr) + try: + groups = meth.__func__.__testgroup__ + except AttributeError: + pass + else: + for g in groups: + acc[g].append(meth) + # sort the tests by the order in which they are defined in the class + for g in values(acc): + g[:] = sorted(g, key=lambda m: m.__func__.__testsort__) + self.groups = dict( + (name, testgroup(*tests)) for name, tests in items(acc) + ) def run(self, names=None, iterations=50, offset=0, numtests=None, list_all=False, repeat=0, group='all', @@ -167,11 +169,8 @@ def banner(self, tests): total=len(tests), ) - def manyshort(self): - self.join(group(add.s(i, i) for i in range(1000))(), - timeout=10, propagate=True) - def runtest(self, fun, n=50, index=0, repeats=1): + n = getattr(fun, '__iterations__', None) or n print('{0}: [[[{1}({2})]]]'.format(repeats, fun.__name__, n)) with blockdetection(self.block_timeout): with self.fbi.investigation(): @@ -194,6 +193,8 @@ def runtest(self, fun, n=50, index=0, repeats=1): raise except Exception as exc: print('-> {0!r}'.format(exc)) + import traceback + print(traceback.format_exc()) print(pstatus(self.progress)) else: print(pstatus(self.progress)) @@ -202,7 +203,7 @@ def runtest(self, fun, n=50, index=0, repeats=1): self.speaker.beep() raise finally: - print('{0} {1} iterations in {2}s'.format( + print('{0} {1} iterations in {2}'.format( 'failed after' if failed else 'completed', i + 1, humanize_seconds(monotonic() - elapsed), )) @@ -211,26 +212,202 @@ def runtest(self, fun, n=50, index=0, repeats=1): fun, i + 1, n, index, repeats, runtime, elapsed, 1, ) + def missing_results(self, r): + return [res.id for res in r if res.id not in res.backend._cache] + + def join(self, r, propagate=False, max_retries=10, **kwargs): + if self.no_join: + return + received = [] + + def on_result(task_id, value): + received.append(task_id) + + for i in range(max_retries) if max_retries else count(0): + received[:] = [] + try: + return r.get(callback=on_result, propagate=propagate, **kwargs) + except (socket.timeout, TimeoutError) as exc: + waiting_for = self.missing_results(r) + self.speaker.beep() + marker( + 'Still waiting for {0}/{1}: [{2}]: {3!r}'.format( + len(r) - len(received), len(r), + truncate(', '.join(waiting_for)), exc), '!', + ) + self.fbi.diag(waiting_for) + except self.connerrors as exc: + self.speaker.beep() + marker('join: connection lost: {0!r}'.format(exc), '!') + raise StopSuite('Test failed: Missing task results') + + def dump_progress(self): + return pstatus(self.progress) if self.progress else 'No test running' + + +_creation_counter = count(0) + + +def testcase(*groups, **kwargs): + if not groups: + raise ValueError('@testcase requires at least one group name') + + def _mark_as_case(fun): + fun.__testgroup__ = groups + fun.__testsort__ = next(_creation_counter) + fun.__iterations__ = kwargs.get('iterations') + return fun + + return _mark_as_case + + +def _is_descriptor(obj, attr): + try: + cattr = getattr(obj.__class__, attr) + except AttributeError: + pass + else: + return not inspect.ismethod(cattr) and hasattr(cattr, '__get__') + return False + + +class Suite(BaseSuite): + + @testcase('all', 'green', 'redis', iterations=1) + def chain(self): + c = add.s(4, 4) | add.s(8) | add.s(16) + assert_equal(self.join(c()), 32) + + @testcase('all', 'green', 'redis', iterations=1) + def chaincomplex(self): + c = ( + add.s(2, 2) | ( + add.s(4) | add.s(8) | add.s(16) + ) | + group(add.s(i) for i in range(4)) + ) + res = c() + assert_equal(res.get(), [32, 33, 34, 35]) + + @testcase('all', 'green', 'redis', iterations=1) + def parentids_chain(self, num=248): + c = chain(ids.si(i) for i in range(num)) + c.freeze() + res = c() + res.get(timeout=5) + self.assert_ids(res, num - 1) + + @testcase('all', 'green', 'redis', iterations=1) + def parentids_group(self): + g = ids.si(1) | ids.si(2) | group(ids.si(i) for i in range(2, 50)) + res = g() + expected_root_id = res.parent.parent.id + expected_parent_id = res.parent.id + values = res.get(timeout=5) + + for i, r in enumerate(values): + root_id, parent_id, value = r + assert_equal(root_id, expected_root_id) + assert_equal(parent_id, expected_parent_id) + assert_equal(value, i + 2) + + def assert_ids(self, res, size): + i, root = size, res + while root.parent: + root = root.parent + node = res + while node: + root_id, parent_id, value = node.get(timeout=5) + assert_equal(value, i) + assert_equal(root_id, root.id) + if node.parent: + assert_equal(parent_id, node.parent.id) + node = node.parent + i -= 1 + + @testcase('redis', iterations=1) + def parentids_chord(self): + self.assert_parentids_chord() + self.assert_parentids_chord(uuid(), uuid()) + + def assert_parentids_chord(self, base_root=None, base_parent=None): + g = ( + ids.si(1) | + ids.si(2) | + group(ids.si(i) for i in range(3, 50)) | + collect_ids.s(i=50) | + ids.si(51) + ) + g.freeze(root_id=base_root, parent_id=base_parent) + res = g.apply_async(root_id=base_root, parent_id=base_parent) + expected_root_id = base_root or res.parent.parent.parent.id + + root_id, parent_id, value = res.get(timeout=5) + assert_equal(value, 51) + assert_equal(root_id, expected_root_id) + assert_equal(parent_id, res.parent.id) + + prev, (root_id, parent_id, value) = res.parent.get(timeout=5) + assert_equal(value, 50) + assert_equal(root_id, expected_root_id) + assert_equal(parent_id, res.parent.parent.id) + + for i, p in enumerate(prev): + root_id, parent_id, value = p + assert_equal(root_id, expected_root_id) + assert_equal(parent_id, res.parent.parent.id) + + root_id, parent_id, value = res.parent.parent.get(timeout=5) + assert_equal(value, 2) + assert_equal(parent_id, res.parent.parent.parent.id) + assert_equal(root_id, expected_root_id) + + root_id, parent_id, value = res.parent.parent.parent.get(timeout=5) + assert_equal(value, 1) + assert_equal(root_id, expected_root_id) + assert_equal(parent_id, base_parent) + + @testcase('all', 'green') + def manyshort(self): + self.join(group(add.s(i, i) for i in range(1000))(), + timeout=10, propagate=True) + + @testcase('all', 'green', iterations=1) + def unicodetask(self): + self.join(group(print_unicode.s() for _ in range(5))(), + timeout=1, propagate=True) + + @testcase('all') + def always_timeout(self): + self.join( + group(sleeping.s(1).set(time_limit=0.1) + for _ in range(100))(), + timeout=10, propagate=True, + ) + + @testcase('all') def termbysig(self): self._evil_groupmember(kill) + @testcase('green') def group_with_exit(self): self._evil_groupmember(exiting) - def termbysegfault(self): - self._evil_groupmember(segfault) - + @testcase('all') def timelimits(self): self._evil_groupmember(sleeping, 2, time_limit=1) + @testcase('all') def timelimits_soft(self): self._evil_groupmember(sleeping_ignore_limits, 2, soft_time_limit=1, time_limit=1.1) + @testcase('all') def alwayskilled(self): g = group(kill.s() for _ in range(10)) self.join(g(), timeout=10) + @testcase('all', 'green') def alwaysexits(self): g = group(exiting.s() for _ in range(10)) self.join(g(), timeout=10) @@ -243,20 +420,32 @@ def _evil_groupmember(self, evil_t, *eargs, **opts): self.join(g1(), timeout=10) self.join(g2(), timeout=10) + @testcase('all', 'green') def bigtasksbigvalue(self): g = group(any_returning.s(BIG, sleep=0.3) for i in range(8)) r = g() - self.join(r, timeout=10) + try: + self.join(r, timeout=10) + finally: + # very big values so remove results from backend + try: + r.forget() + except NotImplementedError: + pass + @testcase('all', 'green') def bigtasks(self, wait=None): self._revoketerm(wait, False, False, BIG) + @testcase('all', 'green') def smalltasks(self, wait=None): self._revoketerm(wait, False, False, SMALL) + @testcase('all') def revoketermfast(self, wait=None): self._revoketerm(wait, True, False, SMALL) + @testcase('all') def revoketermslow(self, wait=5): self._revoketerm(wait, True, True, BIG) @@ -269,35 +458,3 @@ def _revoketerm(self, wait=None, terminate=True, sleep(random.choice(range(4))) r.revoke(terminate=True) self.join(r, timeout=10) - - def missing_results(self, r): - return [res.id for res in r if res.id not in res.backend._cache] - - def join(self, r, propagate=False, max_retries=10, **kwargs): - if self.no_join: - return - received = [] - - def on_result(task_id, value): - received.append(task_id) - - for i in range(max_retries) if max_retries else count(0): - received[:] = [] - try: - return r.get(callback=on_result, propagate=propagate, **kwargs) - except (socket.timeout, TimeoutError) as exc: - waiting_for = self.missing_results(r) - self.speaker.beep() - marker( - 'Still waiting for {0}/{1}: [{2}]: {3!r}'.format( - len(r) - len(received), len(r), - truncate(', '.join(waiting_for)), exc), '!', - ) - self.fbi.diag(waiting_for) - except self.connerrors as exc: - self.speaker.beep() - marker('join: connection lost: {0!r}'.format(exc), '!') - raise StopSuite('Test failed: Missing task results') - - def dump_progress(self): - return pstatus(self.progress) if self.progress else 'No test running' diff --git a/funtests/stress/stress/templates.py b/funtests/stress/stress/templates.py index 73b8cd3bf..7d3802971 100644 --- a/funtests/stress/stress/templates.py +++ b/funtests/stress/stress/templates.py @@ -1,17 +1,22 @@ from __future__ import absolute_import +import celery import os from functools import partial from celery.five import items -from kombu import Exchange, Queue +from kombu import Queue from kombu.utils import symbol_by_name -CSTRESS_QUEUE = os.environ.get('CSTRESS_QUEUE_NAME', 'c.stress') +CSTRESS_TRANS = os.environ.get('CSTRESS_TRANS', False) +default_queue = 'c.stress.trans' if CSTRESS_TRANS else 'c.stress' +CSTRESS_QUEUE = os.environ.get('CSTRESS_QUEUE_NAME', default_queue) templates = {} +IS_CELERY_4 = celery.VERSION[0] >= 4 + def template(name=None): @@ -21,10 +26,23 @@ def _register(cls): return _register -def use_template(app, template='default'): - template = template.split(',') - app.after_configure = partial(mixin_templates, template[1:]) - app.config_from_object(templates[template[0]]) +if IS_CELERY_4: + + def use_template(app, template='default'): + template = template.split(',') + + # mixin the rest of the templates when the config is needed + @app.on_after_configure.connect(weak=False) + def load_template(sender, source, **kwargs): + mixin_templates(template[1:], source) + + app.config_from_object(templates[template[0]]) +else: + + def use_template(app, template='default'): # noqa + template = template.split(',') + app.after_configure = partial(mixin_templates, template[1:]) + app.config_from_object(templates[template[0]]) def mixin_templates(templates, conf): @@ -35,7 +53,7 @@ def mixin_template(template, conf): cls = symbol_by_name(templates[template]) conf.update(dict( (k, v) for k, v in items(vars(cls)) - if k.isupper() and not k.startswith('_') + if not k.startswith('_') )) @@ -46,37 +64,39 @@ def template_names(): @template() class default(object): CELERY_ACCEPT_CONTENT = ['json'] - CELERY_DEFAULT_QUEUE = CSTRESS_QUEUE - CELERY_TASK_SERIALIZER = 'json' + BROKER_URL = os.environ.get('CSTRESS_BROKER', 'pyamqp://') + BROKER_HEARTBEAT = 30 + CELERY_RESULT_BACKEND = os.environ.get('CSTRESS_BACKEND', 'rpc://') CELERY_RESULT_SERIALIZER = 'json' CELERY_RESULT_PERSISTENT = True - CELERY_TASK_RESULT_EXPIRES = 300 - CELERY_QUEUES = [ + CELERY_RESULT_EXPIRES = 300 + CELERY_MAX_CACHED_RESULTS = 100 + CELERY_DEFAULT_QUEUE = CSTRESS_QUEUE + CELERY_TASK_QUEUES = [ Queue(CSTRESS_QUEUE, - exchange=Exchange(CSTRESS_QUEUE), - routing_key=CSTRESS_QUEUE), + durable=not CSTRESS_TRANS, + no_ack=CSTRESS_TRANS), ] - CELERY_MAX_CACHED_RESULTS = -1 - BROKER_URL = os.environ.get('CSTRESS_BROKER', 'amqp://') - CELERY_RESULT_BACKEND = os.environ.get('CSTRESS_BACKEND', 'rpc://') - CELERYD_PREFETCH_MULTIPLIER = int(os.environ.get('CSTRESS_PREFETCH', 10)) + CELERY_TASK_SERIALIZER = 'json' CELERY_TASK_PUBLISH_RETRY_POLICY = { 'max_retries': 100, 'interval_max': 2, 'interval_step': 0.1, } + CELERY_TASK_PROTOCOL = 2 + if CSTRESS_TRANS: + CELERY_DEFAULT_DELIVERY_MODE = 1 + CELERYD_PREFETCH_MULTIPLIER = int(os.environ.get('CSTRESS_PREFETCH', 10)) @template() class redis(default): BROKER_URL = os.environ.get('CSTRESS_BROKER', 'redis://') - CELERY_RESULT_BACKEND = os.environ.get( - 'CSTRESS_BACKEND', 'redis://?new_join=1', - ) BROKER_TRANSPORT_OPTIONS = { 'fanout_prefix': True, 'fanout_patterns': True, } + CELERY_RESULT_BACKEND = os.environ.get('CSTRESS_BACKEND', 'redis://') @template() @@ -111,3 +131,27 @@ class events(default): @template() class execv(default): CELERYD_FORCE_EXECV = True + + +@template() +class sqs(default): + BROKER_URL = 'sqs://' + BROKER_TRANSPORT_OPTIONS = { + 'region': os.environ.get('AWS_REGION', 'us-east-1'), + } + + +@template() +class proto1(default): + CELERY_TASK_PROTOCOL = 1 + + +@template() +class vagrant1(default): + BROKER_URL = 'pyamqp://testing:t3s71ng@192.168.33.123//testing' + + +@template() +class vagrant1_redis(redis): + BROKER_URL = 'redis://192.168.33.123' + CELERY_RESULT_BACKEND = 'redis://192.168.33.123' diff --git a/funtests/suite/__init__.py b/funtests/suite/__init__.py index aed92042d..847100058 100644 --- a/funtests/suite/__init__.py +++ b/funtests/suite/__init__.py @@ -1,6 +1,7 @@ import os import sys +sys.path.insert(0, os.getcwd()) sys.path.insert(0, os.path.join(os.getcwd(), os.pardir)) config = os.environ.setdefault('CELERY_FUNTEST_CONFIG_MODULE', diff --git a/funtests/suite/config.py b/funtests/suite/config.py index 741df4b40..8f895a1e3 100644 --- a/funtests/suite/config.py +++ b/funtests/suite/config.py @@ -1,18 +1,18 @@ import atexit import os -BROKER_URL = os.environ.get('BROKER_URL') or 'amqp://' -CELERY_RESULT_BACKEND = 'amqp://' -CELERY_SEND_TASK_ERROR_EMAILS = False +broker_url = os.environ.get('BROKER_URL') or 'amqp://' +result_backend = 'amqp://' +send_task_error_emails = False -CELERY_DEFAULT_QUEUE = 'testcelery' -CELERY_DEFAULT_EXCHANGE = 'testcelery' -CELERY_DEFAULT_ROUTING_KEY = 'testcelery' -CELERY_QUEUES = {'testcelery': {'routing_key': 'testcelery'}} +default_queue = 'testcelery' +default_exchange = 'testcelery' +default_routing_key = 'testcelery' +queues = {'testcelery': {'routing_key': 'testcelery'}} -CELERYD_LOG_COLOR = False +log_color = False -CELERY_IMPORTS = ('celery.tests.functional.tasks', ) +imports = ('celery.tests.functional.tasks',) @atexit.register diff --git a/funtests/suite/test_basic.py b/funtests/suite/test_basic.py index cb0471381..5213baf74 100644 --- a/funtests/suite/test_basic.py +++ b/funtests/suite/test_basic.py @@ -1,10 +1,8 @@ +from __future__ import absolute_import + import operator -import os -import sys # funtest config -sys.path.insert(0, os.getcwd()) -sys.path.insert(0, os.path.join(os.getcwd(), os.pardir)) import suite # noqa from celery.five import range diff --git a/funtests/suite/test_leak.py b/funtests/suite/test_leak.py index bd23c45c1..7a3dcc067 100644 --- a/funtests/suite/test_leak.py +++ b/funtests/suite/test_leak.py @@ -1,4 +1,4 @@ -from __future__ import print_function +from __future__ import print_function, unicode_literals import gc import os @@ -6,16 +6,13 @@ import shlex import subprocess -sys.path.insert(0, os.getcwd()) -sys.path.insert(0, os.path.join(os.getcwd(), os.pardir)) - from celery import current_app from celery.five import range from celery.tests.case import SkipTest, unittest import suite # noqa -GET_RSIZE = '/bin/ps -p {pid} -o rss=' +GET_RSIZE = b'/bin/ps -p {pid} -o rss=' class Sizes(list): @@ -127,5 +124,6 @@ def task2(): finally: self.app.conf.BROKER_POOL_LIMIT = pool_limit + if __name__ == '__main__': unittest.main() diff --git a/pavement.py b/pavement.py deleted file mode 100644 index 7b077cc27..000000000 --- a/pavement.py +++ /dev/null @@ -1,205 +0,0 @@ -from __future__ import print_function - -import sys -import traceback - -from paver.easy import task, sh, cmdopts, path, needs, options, Bunch -from paver import doctools # noqa -from paver.setuputils import setup # noqa - -PYCOMPILE_CACHES = ['*.pyc', '*$py.class'] - -options( - sphinx=Bunch(builddir='.build'), -) - - -def sphinx_builddir(options): - return path('docs') / options.sphinx.builddir / 'html' - - -@task -def clean_docs(options): - sphinx_builddir(options).rmtree() - - -@task -@needs('clean_docs', 'paver.doctools.html') -def html(options): - destdir = path('Documentation') - destdir.rmtree() - builtdocs = sphinx_builddir(options) - builtdocs.move(destdir) - - -@task -@needs('paver.doctools.html') -def qhtml(options): - destdir = path('Documentation') - builtdocs = sphinx_builddir(options) - sh('rsync -az {0}/ {1}'.format(builtdocs, destdir)) - - -@task -def autodoc(options): - sh('extra/release/doc4allmods celery') - - -@task -def verifyindex(options): - sh('extra/release/verify-reference-index.sh') - - -@task -def verifyconfigref(options): - sh('PYTHONPATH=. {0} extra/release/verify_config_reference.py \ - docs/configuration.rst'.format(sys.executable)) - - -@task -@cmdopts([ - ('noerror', 'E', 'Ignore errors'), -]) -def flake8(options): - noerror = getattr(options, 'noerror', False) - complexity = getattr(options, 'complexity', 22) - sh("""flake8 celery | perl -mstrict -mwarnings -nle' - my $ignore = m/too complex \((\d+)\)/ && $1 le {0}; - if (! $ignore) {{ print STDERR; our $FOUND_FLAKE = 1 }} - }}{{exit $FOUND_FLAKE; - '""".format(complexity), ignore_error=noerror) - - -@task -@cmdopts([ - ('noerror', 'E', 'Ignore errors'), -]) -def flakeplus(options): - noerror = getattr(options, 'noerror', False) - sh('flakeplus celery --2.6', ignore_error=noerror) - - -@task -@cmdopts([ - ('noerror', 'E', 'Ignore errors') -]) -def flakes(options): - flake8(options) - flakeplus(options) - - -@task -def clean_readme(options): - path('README').unlink_p() - path('README.rst').unlink_p() - - -@task -def clean_contributing(options): - path('CONTRIBUTING.rst').unlink_p() - - -@task -def verify_readme(options): - with open('README.rst') as fp: - try: - fp.read().encode('ascii') - except Exception: - print('README contains non-ascii characters', file=sys.stderr) - print('Original exception below...', file=sys.stderr) - traceback.print_stack(file=sys.stderr) - sh('false') - - -@task -@needs('clean_readme') -def readme(options): - sh('{0} extra/release/sphinx-to-rst.py docs/templates/readme.txt \ - > README.rst'.format(sys.executable)) - verify_readme() - - -@task -@needs('clean_contributing') -def contributing(options): - sh('{0} extra/release/sphinx-to-rst.py docs/contributing.rst \ - > CONTRIBUTING.rst'.format(sys.executable)) - - -@task -def bump(options): - sh("extra/release/bump_version.py \ - celery/__init__.py docs/includes/introduction.txt \ - --before-commit='paver readme'") - - -@task -@cmdopts([ - ('coverage', 'c', 'Enable coverage'), - ('verbose', 'V', 'Make more noise'), -]) -def test(options): - cmd = 'CELERY_LOADER=default nosetests' - if getattr(options, 'coverage', False): - cmd += ' --with-coverage' - if getattr(options, 'verbose', False): - cmd += ' --verbosity=2' - sh(cmd) - - -@task -@cmdopts([ - ('noerror', 'E', 'Ignore errors'), -]) -def pep8(options): - noerror = getattr(options, 'noerror', False) - return sh("""find . -name "*.py" | xargs pep8 | perl -nle'\ - print; $a=1 if $_}{exit($a)'""", ignore_error=noerror) - - -@task -def removepyc(options): - sh('find . -type f -a \\( {0} \\) | xargs rm'.format( - ' -o '.join("-name '{0}'".format(pat) for pat in PYCOMPILE_CACHES))) - sh('find . -type d -name "__pycache__" | xargs rm -r') - - -@task -def update_graphs(options, dest='docs/images/worker_graph_full.png'): - sh('celery graph bootsteps | dot -Tpng -o {dest}'.format( - dest=dest, - )) - - -@task -@needs('removepyc') -def gitclean(options): - sh('git clean -xdn') - - -@task -@needs('removepyc') -def gitcleanforce(options): - sh('git clean -xdf') - - -@task -@needs('flakes', 'autodoc', 'verifyindex', - 'verifyconfigref', 'verify_readme', 'test', 'gitclean') -def releaseok(options): - pass - - -@task -def verify_authors(options): - sh('git shortlog -se | cut -f2 | extra/release/attribution.py') - - -@task -def testloc(options): - sh('sloccount celery/tests') - - -@task -def loc(options): - sh('sloccount celery') diff --git a/requirements/default.txt b/requirements/default.txt index 9895c2c55..ce285a811 100644 --- a/requirements/default.txt +++ b/requirements/default.txt @@ -1,3 +1,6 @@ pytz>dev -billiard>=3.3.0.14,<3.4 -kombu>=3.0.14,<4.0 +billiard>dev +kombu>dev + +# remove before release +amqp>dev diff --git a/requirements/deps/mock.txt b/requirements/deps/mock.txt new file mode 100644 index 000000000..fc5a38307 --- /dev/null +++ b/requirements/deps/mock.txt @@ -0,0 +1 @@ +mock>=1.3 diff --git a/requirements/deps/nose.txt b/requirements/deps/nose.txt new file mode 100644 index 000000000..7331c33c1 --- /dev/null +++ b/requirements/deps/nose.txt @@ -0,0 +1 @@ +nose>=1.3.7 diff --git a/requirements/dev.txt b/requirements/dev.txt index 567243863..0f3f526b2 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -1,3 +1,4 @@ +https://github.com/celery/vine/zipball/master https://github.com/celery/py-amqp/zipball/master https://github.com/celery/billiard/zipball/master https://github.com/celery/kombu/zipball/master diff --git a/requirements/docs.txt b/requirements/docs.txt index 3854f9edf..b0bdf1c0c 100644 --- a/requirements/docs.txt +++ b/requirements/docs.txt @@ -1,2 +1,3 @@ Sphinx -SQLAlchemy +-r extras/sqlalchemy.txt +-r dev.txt diff --git a/requirements/extras/beanstalk.rst b/requirements/extras/beanstalk.rst deleted file mode 100644 index c62c81bd2..000000000 --- a/requirements/extras/beanstalk.rst +++ /dev/null @@ -1 +0,0 @@ -beanstalkc diff --git a/requirements/extras/cassandra.txt b/requirements/extras/cassandra.txt index a58d089a5..a94062dad 100644 --- a/requirements/extras/cassandra.txt +++ b/requirements/extras/cassandra.txt @@ -1 +1 @@ -pycassa +cassandra-driver \ No newline at end of file diff --git a/requirements/extras/couchdb.txt b/requirements/extras/couchdb.txt index 3e100d4b3..bc7a1a32b 100644 --- a/requirements/extras/couchdb.txt +++ b/requirements/extras/couchdb.txt @@ -1 +1 @@ -couchdb +pycouchdb diff --git a/requirements/extras/elasticsearch.txt b/requirements/extras/elasticsearch.txt new file mode 100644 index 000000000..174c3f8b3 --- /dev/null +++ b/requirements/extras/elasticsearch.txt @@ -0,0 +1 @@ +elasticsearch diff --git a/requirements/extras/librabbitmq.txt b/requirements/extras/librabbitmq.txt index e3ac1690b..8f9a2dbca 100644 --- a/requirements/extras/librabbitmq.txt +++ b/requirements/extras/librabbitmq.txt @@ -1 +1 @@ -librabbitmq>=1.0.2 +librabbitmq>=1.5.0 diff --git a/requirements/extras/pymemcache.txt b/requirements/extras/pymemcache.txt new file mode 100644 index 000000000..851bfd86d --- /dev/null +++ b/requirements/extras/pymemcache.txt @@ -0,0 +1 @@ +python-memcached diff --git a/requirements/extras/riak.txt b/requirements/extras/riak.txt new file mode 100644 index 000000000..b6bfed133 --- /dev/null +++ b/requirements/extras/riak.txt @@ -0,0 +1 @@ +riak >=2.0 diff --git a/requirements/jython.txt b/requirements/jython.txt index 4427a9a5f..16a2ad15f 100644 --- a/requirements/jython.txt +++ b/requirements/jython.txt @@ -1,2 +1,2 @@ -threadpool multiprocessing +-r extras/threads.txt diff --git a/requirements/pkgutils.txt b/requirements/pkgutils.txt index de2162e30..9156799f2 100644 --- a/requirements/pkgutils.txt +++ b/requirements/pkgutils.txt @@ -1,7 +1,6 @@ setuptools>=1.3.2 wheel -paver flake8 flakeplus -tox +tox>=2.1.1 Sphinx-PyPI-upload diff --git a/requirements/security.txt b/requirements/security.txt index 9292484f9..9ae559b69 100644 --- a/requirements/security.txt +++ b/requirements/security.txt @@ -1 +1 @@ -PyOpenSSL +-r extras/auth.txt diff --git a/requirements/test-ci-base.txt b/requirements/test-ci-base.txt new file mode 100644 index 000000000..71fbfea0e --- /dev/null +++ b/requirements/test-ci-base.txt @@ -0,0 +1,7 @@ +coverage>=3.0 +codecov +-r extras/redis.txt +-r extras/mongodb.txt +-r extras/sqlalchemy.txt +-r extras/pymemcache.txt +-r dev.txt diff --git a/requirements/test-ci-default.txt b/requirements/test-ci-default.txt new file mode 100644 index 000000000..6d0b42f6b --- /dev/null +++ b/requirements/test-ci-default.txt @@ -0,0 +1,4 @@ +-r test-ci-base.txt +#: Disabled for Cryptography crashing on 2.7 after interpreter shutdown. +#-r extras/auth.txt +-r extras/riak.txt diff --git a/requirements/test-ci.txt b/requirements/test-ci.txt deleted file mode 100644 index 92ee75994..000000000 --- a/requirements/test-ci.txt +++ /dev/null @@ -1,6 +0,0 @@ -coverage>=3.0 -coveralls -redis -#pymongo -#SQLAlchemy -PyOpenSSL diff --git a/requirements/test-pypy3.txt b/requirements/test-pypy3.txt new file mode 100644 index 000000000..dc9901d75 --- /dev/null +++ b/requirements/test-pypy3.txt @@ -0,0 +1 @@ +-r deps/mock.txt diff --git a/requirements/test.txt b/requirements/test.txt index 0d0b3c697..8ad2e9a3c 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -1,3 +1,3 @@ +-r deps/mock.txt +-r deps/nose.txt unittest2>=0.5.1 -nose -mock>=1.0.1 diff --git a/requirements/test3.txt b/requirements/test3.txt index f7ca6c013..881384714 100644 --- a/requirements/test3.txt +++ b/requirements/test3.txt @@ -1,3 +1 @@ -nose -# FIXME required by kombu.tests.case -mock >=1.0.1 +-r deps/nose.txt diff --git a/setup.cfg b/setup.cfg index c8da3bd33..682cb7d93 100644 --- a/setup.cfg +++ b/setup.cfg @@ -11,5 +11,8 @@ upload-dir = docs/.build/html [bdist_rpm] requires = pytz >= 2011b - billiard >= 3.3.0.14 - kombu >= 3.0.14 + billiard >= 3.3.0.17 + kombu >= 3.0.15 + +[wheel] +universal = 1 diff --git a/setup.py b/setup.py index 24ed03769..da34e97c1 100644 --- a/setup.py +++ b/setup.py @@ -1,26 +1,53 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -try: - from setuptools import setup, find_packages - from setuptools.command.test import test - is_setuptools = True -except ImportError: - raise - from ez_setup import use_setuptools - use_setuptools() - from setuptools import setup, find_packages # noqa - from setuptools.command.test import test # noqa - is_setuptools = False +from setuptools import setup, find_packages import os +import re import sys import codecs -CELERY_COMPAT_PROGRAMS = int(os.environ.get('CELERY_COMPAT_PROGRAMS', 1)) +try: + import platform + _pyimp = platform.python_implementation +except (AttributeError, ImportError): + def _pyimp(): + return 'Python' + +E_UNSUPPORTED_PYTHON = """ +---------------------------------------- + Celery 4.0 requires %s %s or later! +---------------------------------------- + +- For CPython 2.6, PyPy 1.x, Jython 2.6, CPython 3.2->3.3; use Celery 3.1: + + $ pip install 'celery<4' + +- For CPython 2.5, Jython 2.5; use Celery 3.0: + + $ pip install 'celery<3.1' -if sys.version_info < (2, 6): - raise Exception('Celery 3.1 requires Python 2.6 or higher.') +- For CPython 2.4; use Celery 2.2: + + $ pip install 'celery<2.3' +""" + +PYIMP = _pyimp() +PY26_OR_LESS = sys.version_info < (2, 7) +PY3 = sys.version_info[0] == 3 +PY33_OR_LESS = PY3 and sys.version_info < (3, 4) +JYTHON = sys.platform.startswith('java') +PYPY_VERSION = getattr(sys, 'pypy_version_info', None) +PYPY = PYPY_VERSION is not None +PYPY24_ATLEAST = PYPY_VERSION and PYPY_VERSION >= (2, 4) + +if PY26_OR_LESS: + raise Exception(E_UNSUPPORTED_PYTHON % (PYIMP, '2.7')) +elif PY33_OR_LESS and not PYPY24_ATLEAST: + raise Exception(E_UNSUPPORTED_PYTHON % (PYIMP, '3.4')) + +# -*- Upgrading from older versions -*- downgrade_packages = [ 'celery.app.task', @@ -53,7 +80,6 @@ finally: sys.path[:] = orig_path - NAME = 'celery' entrypoints = {} extra = {} @@ -67,46 +93,40 @@ Topic :: Software Development :: Object Brokering Programming Language :: Python Programming Language :: Python :: 2 - Programming Language :: Python :: 2.6 Programming Language :: Python :: 2.7 Programming Language :: Python :: 3 - Programming Language :: Python :: 3.3 Programming Language :: Python :: 3.4 + Programming Language :: Python :: 3.5 Programming Language :: Python :: Implementation :: CPython Programming Language :: Python :: Implementation :: PyPy Programming Language :: Python :: Implementation :: Jython Operating System :: OS Independent - Operating System :: POSIX - Operating System :: Microsoft :: Windows - Operating System :: MacOS :: MacOS X """ classifiers = [s.strip() for s in classes.split('\n') if s] -PY3 = sys.version_info[0] == 3 -JYTHON = sys.platform.startswith('java') -PYPY = hasattr(sys, 'pypy_version_info') - # -*- Distribution Meta -*- -import re re_meta = re.compile(r'__(\w+?)__\s*=\s*(.*)') re_vers = re.compile(r'VERSION\s*=.*?\((.*?)\)') re_doc = re.compile(r'^"""(.+?)"""') -rq = lambda s: s.strip("\"'") + + +def rq(s): + return s.strip("\"'") def add_default(m): attr_name, attr_value = m.groups() - return ((attr_name, rq(attr_value)), ) + return ((attr_name, rq(attr_value)),) def add_version(m): v = list(map(rq, m.groups()[0].split(', '))) - return (('VERSION', '.'.join(v[0:3]) + ''.join(v[3:])), ) + return (('VERSION', '.'.join(v[0:3]) + ''.join(v[3:])),) def add_doc(m): - return (('doc', m.groups()[0]), ) + return (('doc', m.groups()[0]),) pats = {re_meta: add_default, re_vers: add_version, @@ -124,20 +144,29 @@ def add_doc(m): # -*- Installation Requires -*- -py_version = sys.version_info - def strip_comments(l): return l.split('#', 1)[0].strip() -def reqs(*f): +def _pip_requirement(req): + if req.startswith('-r '): + _, path = req.split() + return reqs(*path.split('/')) + return [req] + + +def _reqs(*f): return [ - r for r in ( + _pip_requirement(r) for r in ( strip_comments(l) for l in open( os.path.join(os.getcwd(), 'requirements', *f)).readlines() ) if r] + +def reqs(*f): + return [req for subreq in _reqs(*f) for req in subreq] + install_requires = reqs('default.txt') if JYTHON: install_requires.extend(reqs('jython.txt')) @@ -159,22 +188,21 @@ def reqs(*f): 'celery = celery.__main__:main', ] -if CELERY_COMPAT_PROGRAMS: - console_scripts.extend([ - 'celeryd = celery.__main__:_compat_worker', - 'celerybeat = celery.__main__:_compat_beat', - 'celeryd-multi = celery.__main__:_compat_multi', - ]) - -if is_setuptools: - extras = lambda *p: reqs('extras', *p) - # Celery specific - specific_list = ['auth', 'cassandra', 'memcache', 'couchbase', 'threads', - 'eventlet', 'gevent', 'msgpack', 'yaml', 'redis', - 'mongodb', 'sqs', 'couchdb', 'beanstalk', 'zookeeper', - 'zeromq', 'sqlalchemy', 'librabbitmq', 'pyro', 'slmq'] - extras_require = dict((x, extras(x + '.txt')) for x in specific_list) - extra['extras_require'] = extras_require +# -*- Extras -*- + + +def extras(*p): + return reqs('extras', *p) + +# Celery specific +features = set([ + 'auth', 'cassandra', 'elasticsearch', 'memcache', 'pymemcache', + 'couchbase', 'threads', 'eventlet', 'gevent', 'msgpack', 'yaml', + 'redis', 'mongodb', 'sqs', 'couchdb', 'riak', 'beanstalk', 'zookeeper', + 'zeromq', 'sqlalchemy', 'librabbitmq', 'pyro', 'slmq', +]) +extras_require = dict((x, extras(x + '.txt')) for x in features) +extra['extras_require'] = extras_require # -*- %%% -*- @@ -188,6 +216,7 @@ def reqs(*f): platforms=['any'], license='BSD', packages=find_packages(exclude=['ez_setup', 'tests', 'tests.*']), + include_package_data=False, zip_safe=False, install_requires=install_requires, tests_require=tests_require, diff --git a/tox.ini b/tox.ini index e532ca2d4..b5c1ddcde 100644 --- a/tox.ini +++ b/tox.ini @@ -1,51 +1,35 @@ [tox] -envlist = - 2.6, - 2.7, - 3.3, - 3.4, - pypy +envlist = 2.7,pypy,3.4,3.5,pypy3 [testenv] -sitepackages = False -commands = nosetests +deps= + -r{toxinidir}/requirements/default.txt -[testenv:3.4] -basepython = python3.4 -deps = -r{toxinidir}/requirements/default.txt - -r{toxinidir}/requirements/test3.txt - -r{toxinidir}/requirements/test-ci.txt -commands = {toxinidir}/extra/release/removepyc.sh {toxinidir} - nosetests -xsv --with-coverage --cover-inclusive --cover-erase [] + 2.7,pypy: -r{toxinidir}/requirements/test.txt + 2.7: -r{toxinidir}/requirements/test-ci-default.txt -[testenv:3.3] -basepython = python3.3 -deps = -r{toxinidir}/requirements/default.txt - -r{toxinidir}/requirements/test3.txt - -r{toxinidir}/requirements/test-ci.txt -commands = {toxinidir}/extra/release/removepyc.sh {toxinidir} - nosetests -xsv --with-coverage --cover-inclusive --cover-erase [] + 3.4,3.5,pypy3: -r{toxinidir}/requirements/test3.txt + 3.4,3.5: -r{toxinidir}/requirements/test-ci-default.txt -[testenv:2.7] -basepython = python2.7 -deps = -r{toxinidir}/requirements/default.txt - -r{toxinidir}/requirements/test.txt - -r{toxinidir}/requirements/test-ci.txt -commands = {toxinidir}/extra/release/removepyc.sh {toxinidir} - nosetests -xsv --with-coverage --cover-inclusive --cover-erase [] + pypy,pypy3: -r{toxinidir}/requirements/test-ci-base.txt + pypy3: -r{toxinidir}/requirements/test-pypy3.txt -[testenv:2.6] -basepython = python2.6 -deps = -r{toxinidir}/requirements/default.txt - -r{toxinidir}/requirements/test.txt - -r{toxinidir}/requirements/test-ci.txt +sitepackages = False +recreate = False commands = {toxinidir}/extra/release/removepyc.sh {toxinidir} - nosetests -xsv --with-coverage --cover-inclusive --cover-erase [] + pip install -U -r{toxinidir}/requirements/dev.txt + nosetests -xsv --with-coverage \ + --cover-inclusive --cover-min-percentage=94 --cover-erase [] -[testenv:pypy] -basepython = pypy -deps = -r{toxinidir}/requirements/default.txt - -r{toxinidir}/requirements/test.txt - -r{toxinidir}/requirements/test-ci.txt -commands = {toxinidir}/extra/release/removepyc.sh {toxinidir} - nosetests -xsv --with-coverage --cover-inclusive --cover-erase [] +basepython = + 2.7: python2.7 + 3.4: python3.4 + 3.5: python3.5 + pypy: pypy + pypy3: pypy3 + +[testenv:docs] +deps = -r{toxinidir}/requirements/docs.txt +commands = + pip install -U -r{toxinidir}/requirements/dev.txt + sphinx-build -W -b linkcheck -d {envtmpdir}/doctrees docs docs/_build/linkcheck