finn pushed to branch finn/48-cancellation-leases at BuildGrid / buildgrid
Commits:
-
d777a234
by Finn at 2018-11-16T15:35:39Z
-
552b1b79
by Finn at 2018-11-16T15:35:39Z
-
b4bdb9f9
by Finn at 2018-11-16T15:35:39Z
-
bedd33f4
by Finn at 2018-11-16T15:35:51Z
-
ca7e42e5
by Finn at 2018-11-16T15:35:51Z
-
d0dae061
by Finn at 2018-11-16T15:35:51Z
-
b4006733
by Finn at 2018-11-16T15:35:51Z
-
e84973c0
by Finn at 2018-11-16T15:35:51Z
17 changed files:
- buildgrid/_app/bots/buildbox.py
- buildgrid/_app/bots/dummy.py
- buildgrid/_app/bots/host.py
- buildgrid/_app/commands/cmd_bot.py
- buildgrid/bot/bot.py
- buildgrid/bot/bot_interface.py → buildgrid/bot/interface.py
- buildgrid/bot/session.py
- buildgrid/server/execution/service.py
- tests/cas/test_client.py
- tests/cas/test_storage.py
- tests/integration/bot_session.py
- tests/integration/bots_service.py
- tests/integration/operations_service.py
- tests/server_instance.py
- + tests/utils/bots_interface.py
- tests/utils/cas.py
- + tests/utils/utils.py
Changes:
| ... | ... | @@ -24,7 +24,7 @@ from buildgrid.settings import HASH_LENGTH |
| 24 | 24 |
from buildgrid.utils import read_file, write_file
|
| 25 | 25 |
|
| 26 | 26 |
|
| 27 |
-def work_buildbox(context, lease):
|
|
| 27 |
+def work_buildbox(lease, context, event):
|
|
| 28 | 28 |
"""Executes a lease for a build action, using buildbox.
|
| 29 | 29 |
"""
|
| 30 | 30 |
local_cas_directory = context.local_cas
|
| ... | ... | @@ -20,7 +20,7 @@ from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_p |
| 20 | 20 |
from buildgrid.utils import get_hostname
|
| 21 | 21 |
|
| 22 | 22 |
|
| 23 |
-def work_dummy(context, lease):
|
|
| 23 |
+def work_dummy(lease, context, event):
|
|
| 24 | 24 |
""" Just returns lease after some random time
|
| 25 | 25 |
"""
|
| 26 | 26 |
action_result = remote_execution_pb2.ActionResult()
|
| ... | ... | @@ -22,7 +22,7 @@ from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_p |
| 22 | 22 |
from buildgrid.utils import get_hostname, output_file_maker, output_directory_maker
|
| 23 | 23 |
|
| 24 | 24 |
|
| 25 |
-def work_host_tools(context, lease):
|
|
| 25 |
+def work_host_tools(lease, context, event):
|
|
| 26 | 26 |
"""Executes a lease for a build action, using host tools.
|
| 27 | 27 |
"""
|
| 28 | 28 |
instance_name = context.parent
|
| ... | ... | @@ -28,8 +28,11 @@ from urllib.parse import urlparse |
| 28 | 28 |
import click
|
| 29 | 29 |
import grpc
|
| 30 | 30 |
|
| 31 |
-from buildgrid.bot import bot, bot_interface
|
|
| 32 |
-from buildgrid.bot.bot_session import BotSession, Device, Worker
|
|
| 31 |
+from buildgrid.bot import bot, interface, session
|
|
| 32 |
+from buildgrid.bot.hardware.interface import HardwareInterface
|
|
| 33 |
+from buildgrid.bot.hardware.device import Device
|
|
| 34 |
+from buildgrid.bot.hardware.worker import Worker
|
|
| 35 |
+ |
|
| 33 | 36 |
|
| 34 | 37 |
from ..bots import buildbox, dummy, host
|
| 35 | 38 |
from ..cli import pass_context
|
| ... | ... | @@ -123,15 +126,13 @@ def cli(context, parent, update_period, remote, client_key, client_cert, server_ |
| 123 | 126 |
context.logger = logging.getLogger(__name__)
|
| 124 | 127 |
context.logger.debug("Starting for remote {}".format(context.remote))
|
| 125 | 128 |
|
| 126 |
- interface = bot_interface.BotInterface(context.channel)
|
|
| 127 |
- |
|
| 129 |
+ bot_interface = interface.BotInterface(context.channel)
|
|
| 128 | 130 |
worker = Worker()
|
| 129 | 131 |
worker.add_device(Device())
|
| 132 |
+ hardware_interface = HardwareInterface(worker)
|
|
| 130 | 133 |
|
| 131 |
- bot_session = BotSession(parent, interface)
|
|
| 132 |
- bot_session.add_worker(worker)
|
|
| 133 |
- |
|
| 134 |
- context.bot_session = bot_session
|
|
| 134 |
+ context.bot_interface = bot_interface
|
|
| 135 |
+ context.hardware_interface = hardware_interface
|
|
| 135 | 136 |
|
| 136 | 137 |
|
| 137 | 138 |
@cli.command('dummy', short_help="Run a dummy session simply returning leases.")
|
| ... | ... | @@ -141,9 +142,10 @@ def run_dummy(context): |
| 141 | 142 |
Creates a session, accepts leases, does fake work and updates the server.
|
| 142 | 143 |
"""
|
| 143 | 144 |
try:
|
| 144 |
- b = bot.Bot(context.bot_session, context.update_period)
|
|
| 145 |
- b.session(dummy.work_dummy,
|
|
| 146 |
- context)
|
|
| 145 |
+ bot_session = session.BotSession(context.parent, context.bot_interface, context.hardware_interface,
|
|
| 146 |
+ dummy.work_dummy, context)
|
|
| 147 |
+ b = bot.Bot(bot_session, context.update_period)
|
|
| 148 |
+ b.session()
|
|
| 147 | 149 |
except KeyboardInterrupt:
|
| 148 | 150 |
pass
|
| 149 | 151 |
|
| ... | ... | @@ -156,9 +158,10 @@ def run_host_tools(context): |
| 156 | 158 |
result back to CAS.
|
| 157 | 159 |
"""
|
| 158 | 160 |
try:
|
| 159 |
- b = bot.Bot(context.bot_session, context.update_period)
|
|
| 160 |
- b.session(host.work_host_tools,
|
|
| 161 |
- context)
|
|
| 161 |
+ bot_session = session.BotSession(context.parent, context.bot_interface, context.hardware_interface,
|
|
| 162 |
+ host.work_host_tools, context)
|
|
| 163 |
+ b = bot.Bot(bot_session, context.update_period)
|
|
| 164 |
+ b.session()
|
|
| 162 | 165 |
except KeyboardInterrupt:
|
| 163 | 166 |
pass
|
| 164 | 167 |
|
| ... | ... | @@ -177,8 +180,9 @@ def run_buildbox(context, local_cas, fuse_dir): |
| 177 | 180 |
context.fuse_dir = fuse_dir
|
| 178 | 181 |
|
| 179 | 182 |
try:
|
| 180 |
- b = bot.Bot(context.bot_session, context.update_period)
|
|
| 181 |
- b.session(buildbox.work_buildbox,
|
|
| 182 |
- context)
|
|
| 183 |
+ bot_session = session.BotSession(context.parent, context.bot_interface, context.hardware_interface,
|
|
| 184 |
+ buildbox.work_buildbox, context)
|
|
| 185 |
+ b = bot.Bot(bot_session, context.update_period)
|
|
| 186 |
+ b.session()
|
|
| 183 | 187 |
except KeyboardInterrupt:
|
| 184 | 188 |
pass
|
| ... | ... | @@ -17,7 +17,7 @@ |
| 17 | 17 |
Bot
|
| 18 | 18 |
====
|
| 19 | 19 |
|
| 20 |
-Creates a bot session.
|
|
| 20 |
+Creates a bot session and sends updates to the server.
|
|
| 21 | 21 |
"""
|
| 22 | 22 |
|
| 23 | 23 |
import asyncio
|
| ... | ... | @@ -35,16 +35,17 @@ class Bot: |
| 35 | 35 |
self._bot_session = bot_session
|
| 36 | 36 |
self._update_period = update_period
|
| 37 | 37 |
|
| 38 |
- def session(self, work, context):
|
|
| 38 |
+ def session(self):
|
|
| 39 | 39 |
loop = asyncio.get_event_loop()
|
| 40 | 40 |
|
| 41 |
- self._bot_session.create_bot_session(work, context)
|
|
| 41 |
+ self._bot_session.create_bot_session()
|
|
| 42 | 42 |
|
| 43 | 43 |
try:
|
| 44 | 44 |
task = asyncio.ensure_future(self._update_bot_session())
|
| 45 | 45 |
loop.run_forever()
|
| 46 | 46 |
except KeyboardInterrupt:
|
| 47 | 47 |
pass
|
| 48 |
+ |
|
| 48 | 49 |
finally:
|
| 49 | 50 |
task.cancel()
|
| 50 | 51 |
loop.close()
|
| ... | ... | @@ -54,5 +55,11 @@ class Bot: |
| 54 | 55 |
Calls the server periodically to inform the server the client has not died.
|
| 55 | 56 |
"""
|
| 56 | 57 |
while True:
|
| 57 |
- self._bot_session.update_bot_session()
|
|
| 58 |
+ try:
|
|
| 59 |
+ self._bot_session.update_bot_session()
|
|
| 60 |
+ |
|
| 61 |
+ except Exception as e:
|
|
| 62 |
+ self.__logger.error(e)
|
|
| 63 |
+ raise
|
|
| 64 |
+ |
|
| 58 | 65 |
await asyncio.sleep(self._update_period)
|
| ... | ... | @@ -15,12 +15,13 @@ |
| 15 | 15 |
|
| 16 | 16 |
"""
|
| 17 | 17 |
Bot Interface
|
| 18 |
-====
|
|
| 18 |
+=============
|
|
| 19 | 19 |
|
| 20 | 20 |
Interface to grpc
|
| 21 | 21 |
"""
|
| 22 | 22 |
|
| 23 | 23 |
import logging
|
| 24 |
+import grpc
|
|
| 24 | 25 |
|
| 25 | 26 |
from buildgrid._protos.google.devtools.remoteworkers.v1test2 import bots_pb2, bots_pb2_grpc
|
| 26 | 27 |
|
| ... | ... | @@ -38,10 +39,18 @@ class BotInterface: |
| 38 | 39 |
def create_bot_session(self, parent, bot_session):
|
| 39 | 40 |
request = bots_pb2.CreateBotSessionRequest(parent=parent,
|
| 40 | 41 |
bot_session=bot_session)
|
| 41 |
- return self._stub.CreateBotSession(request)
|
|
| 42 |
+ try:
|
|
| 43 |
+ return self._stub.CreateBotSession(request)
|
|
| 44 |
+ |
|
| 45 |
+ except grpc.RpcError as e:
|
|
| 46 |
+ self.__logger.error(e)
|
|
| 42 | 47 |
|
| 43 | 48 |
def update_bot_session(self, bot_session, update_mask=None):
|
| 44 | 49 |
request = bots_pb2.UpdateBotSessionRequest(name=bot_session.name,
|
| 45 | 50 |
bot_session=bot_session,
|
| 46 | 51 |
update_mask=update_mask)
|
| 47 |
- return self._stub.UpdateBotSession(request)
|
|
| 52 |
+ try:
|
|
| 53 |
+ return self._stub.UpdateBotSession(request)
|
|
| 54 |
+ |
|
| 55 |
+ except grpc.RpcError as e:
|
|
| 56 |
+ self.__logger.error(e)
|
| ... | ... | @@ -12,9 +12,6 @@ |
| 12 | 12 |
# See the License for the specific language governing permissions and
|
| 13 | 13 |
# limitations under the License.
|
| 14 | 14 |
|
| 15 |
-# Disable broad exception catch
|
|
| 16 |
-# pylint: disable=broad-except
|
|
| 17 |
- |
|
| 18 | 15 |
|
| 19 | 16 |
"""
|
| 20 | 17 |
Bot Session
|
| ... | ... | @@ -22,21 +19,20 @@ Bot Session |
| 22 | 19 |
|
| 23 | 20 |
Allows connections
|
| 24 | 21 |
"""
|
| 25 |
-import asyncio
|
|
| 26 | 22 |
import logging
|
| 27 | 23 |
import platform
|
| 28 |
-import uuid
|
|
| 29 |
- |
|
| 30 |
-import grpc
|
|
| 31 | 24 |
|
| 32 | 25 |
from buildgrid._enums import BotStatus, LeaseState
|
| 26 |
+from buildgrid._protos.google.devtools.remoteworkers.v1test2 import bots_pb2
|
|
| 33 | 27 |
from buildgrid._protos.google.rpc import code_pb2
|
| 34 |
-from buildgrid._protos.google.devtools.remoteworkers.v1test2 import bots_pb2, worker_pb2
|
|
| 35 |
-from buildgrid._exceptions import BotError
|
|
| 28 |
+ |
|
| 29 |
+from buildgrid._exceptions import FailedPreconditionError
|
|
| 30 |
+ |
|
| 31 |
+from .tenantmanager import TenantManager
|
|
| 36 | 32 |
|
| 37 | 33 |
|
| 38 | 34 |
class BotSession:
|
| 39 |
- def __init__(self, parent, interface, worker):
|
|
| 35 |
+ def __init__(self, parent, bots_interface, hardware_interface, work, context=None):
|
|
| 40 | 36 |
""" Unique bot ID within the farm used to identify this bot
|
| 41 | 37 |
Needs to be human readable.
|
| 42 | 38 |
All prior sessions with bot_id of same ID are invalidated.
|
| ... | ... | @@ -45,88 +41,78 @@ class BotSession: |
| 45 | 41 |
"""
|
| 46 | 42 |
self.__logger = logging.getLogger(__name__)
|
| 47 | 43 |
|
| 48 |
- self._context = None
|
|
| 44 |
+ self._bots_interface = bots_interface
|
|
| 45 |
+ self._hardware_interface = hardware_interface
|
|
| 49 | 46 |
|
| 50 |
- self._worker = worker
|
|
| 51 |
- self._interface = interface
|
|
| 52 |
- self._leases = {}
|
|
| 53 |
- self._parent = parent
|
|
| 54 | 47 |
self._status = BotStatus.OK.value
|
| 48 |
+ self._tenant_manager = TenantManager()
|
|
| 55 | 49 |
|
| 50 |
+ self.__parent = parent
|
|
| 56 | 51 |
self.__bot_id = '{}.{}'.format(parent, platform.node())
|
| 57 | 52 |
self.__name = None
|
| 58 | 53 |
|
| 54 |
+ self._work = work
|
|
| 55 |
+ self._context = context
|
|
| 56 |
+ |
|
| 59 | 57 |
@property
|
| 60 | 58 |
def bot_id(self):
|
| 61 | 59 |
return self.__bot_id
|
| 62 | 60 |
|
| 63 |
- def create_bot_session(self, work, context=None):
|
|
| 61 |
+ def create_bot_session(self):
|
|
| 64 | 62 |
self.__logger.debug("Creating bot session")
|
| 65 |
- self._work = work
|
|
| 66 |
- self._context = context
|
|
| 67 | 63 |
|
| 68 |
- session = self._interface.create_bot_session(self._parent, self.get_pb2())
|
|
| 64 |
+ session = self._bots_interface.create_bot_session(self.__parent, self.get_pb2())
|
|
| 69 | 65 |
self.__name = session.name
|
| 70 | 66 |
|
| 71 |
- self.__logger.info("Created bot session with name: [%s]", self._name)
|
|
| 67 |
+ self.__logger.info("Created bot session with name: [%s]", self.__name)
|
|
| 72 | 68 |
|
| 73 | 69 |
for lease in session.leases:
|
| 74 |
- self._update_lease_from_server(lease)
|
|
| 70 |
+ self._register_lease(lease)
|
|
| 75 | 71 |
|
| 76 | 72 |
def update_bot_session(self):
|
| 77 |
- self.__logger.debug("Updating bot session: [%s]", self._bot_id)
|
|
| 78 |
- session = self._interface.update_bot_session(self.get_pb2())
|
|
| 79 |
- for k, v in list(self._leases.items()):
|
|
| 80 |
- if v.state == LeaseState.COMPLETED.value:
|
|
| 81 |
- del self._leases[k]
|
|
| 73 |
+ self.__logger.debug("Updating bot session: [%s]", self.__bot_id)
|
|
| 74 |
+ |
|
| 75 |
+ session = self._bots_interface.update_bot_session(self.get_pb2())
|
|
| 76 |
+ server_ids = []
|
|
| 82 | 77 |
|
| 83 | 78 |
for lease in session.leases:
|
| 84 |
- self._update_lease_from_server(lease)
|
|
| 79 |
+ server_ids.append(lease.id)
|
|
| 85 | 80 |
|
| 86 |
- def get_pb2(self):
|
|
| 87 |
- leases = list(self._leases.values())
|
|
| 88 |
- if not leases:
|
|
| 89 |
- leases = None
|
|
| 81 |
+ lease_state = LeaseState(lease.state)
|
|
| 82 |
+ if lease_state == LeaseState.PENDING:
|
|
| 83 |
+ self._register_lease(lease)
|
|
| 90 | 84 |
|
| 91 |
- return bots_pb2.BotSession(worker=self._worker.get_pb2(),
|
|
| 92 |
- status=self._status,
|
|
| 93 |
- leases=leases,
|
|
| 94 |
- bot_id=self.__bot_id,
|
|
| 95 |
- name=self.__name)
|
|
| 85 |
+ elif lease_state == LeaseState.CANCELLED:
|
|
| 86 |
+ self._tenant_manager.cancel_tenancy(lease.id)
|
|
| 96 | 87 |
|
| 97 |
- def lease_completed(self, lease):
|
|
| 98 |
- lease.state = LeaseState.COMPLETED.value
|
|
| 99 |
- self._leases[lease.id] = lease
|
|
| 88 |
+ closed_lease_ids = [x for x in self._tenant_manager.get_lease_ids() if x not in server_ids]
|
|
| 100 | 89 |
|
| 101 |
- def _update_lease_from_server(self, lease):
|
|
| 102 |
- """
|
|
| 103 |
- State machine for any recieved updates to the leases.
|
|
| 104 |
- """
|
|
| 105 |
- # TODO: Compare with previous state of lease
|
|
| 106 |
- if lease.state == LeaseState.PENDING.value:
|
|
| 107 |
- lease.state = LeaseState.ACTIVE.value
|
|
| 108 |
- self._leases[lease.id] = lease
|
|
| 109 |
- self.update_bot_session()
|
|
| 110 |
- asyncio.ensure_future(self.create_work(lease))
|
|
| 90 |
+ for lease_id in closed_lease_ids:
|
|
| 91 |
+ self._tenant_manager.cancel_tenancy(lease_id)
|
|
| 92 |
+ self._tenant_manager.remove_tenant(lease_id)
|
|
| 111 | 93 |
|
| 112 |
- async def create_work(self, lease):
|
|
| 113 |
- self.__logger.debug("Work created: [%s]", lease.id)
|
|
| 114 |
- loop = asyncio.get_event_loop()
|
|
| 94 |
+ def get_pb2(self):
|
|
| 95 |
+ return bots_pb2.BotSession(worker=self._hardware_interface.get_worker_pb2(),
|
|
| 96 |
+ status=self._status,
|
|
| 97 |
+ leases=self._tenant_manager.get_leases(),
|
|
| 98 |
+ bot_id=self.__bot_id,
|
|
| 99 |
+ name=self.__name)
|
|
| 115 | 100 |
|
| 101 |
+ def _register_lease(self, lease):
|
|
| 102 |
+ lease_id = lease.id
|
|
| 116 | 103 |
try:
|
| 117 |
- lease = await loop.run_in_executor(None, self._work, self._context, lease)
|
|
| 104 |
+ self._tenant_manager.create_tenancy(lease)
|
|
| 118 | 105 |
|
| 119 |
- except grpc.RpcError as e:
|
|
| 106 |
+ except KeyError as e:
|
|
| 120 | 107 |
self.__logger.error(e)
|
| 121 |
- lease.status.CopyFrom(e.code())
|
|
| 122 | 108 |
|
| 123 |
- except BotError as e:
|
|
| 124 |
- self.__logger.error(e)
|
|
| 125 |
- lease.status.code = code_pb2.INTERNAL
|
|
| 109 |
+ else:
|
|
| 110 |
+ try:
|
|
| 111 |
+ self._hardware_interface.configure_hardware(lease.requirements)
|
|
| 126 | 112 |
|
| 127 |
- except Exception as e:
|
|
| 128 |
- self.__logger.error(e)
|
|
| 129 |
- lease.status.code = code_pb2.INTERNAL
|
|
| 113 |
+ except FailedPreconditionError as e:
|
|
| 114 |
+ self.__logger.error(e)
|
|
| 115 |
+ self._tenant_manager.complete_lease(lease_id, status=code_pb2.FailedPreconditionError)
|
|
| 130 | 116 |
|
| 131 |
- self.__logger.debug("Work complete: [%s]", lease.id)
|
|
| 132 |
- self.lease_completed(lease)
|
|
| 117 |
+ else:
|
|
| 118 |
+ self._tenant_manager.create_work(lease_id, self._work, self._context)
|
| ... | ... | @@ -80,7 +80,7 @@ class ExecutionService(remote_execution_pb2_grpc.ExecutionServicer): |
| 80 | 80 |
yield operations_pb2.Operation()
|
| 81 | 81 |
|
| 82 | 82 |
except CancelledError as e:
|
| 83 |
- self.logger.error(e)
|
|
| 83 |
+ self.__logger.error(e)
|
|
| 84 | 84 |
context.set_details(str(e))
|
| 85 | 85 |
context.set_code(grpc.StatusCode.CANCELLED)
|
| 86 | 86 |
yield operations_pb2.Operation()
|
| ... | ... | @@ -118,7 +118,7 @@ class ExecutionService(remote_execution_pb2_grpc.ExecutionServicer): |
| 118 | 118 |
yield operations_pb2.Operation()
|
| 119 | 119 |
|
| 120 | 120 |
except CancelledError as e:
|
| 121 |
- self.logger.error(e)
|
|
| 121 |
+ self.__logger.error(e)
|
|
| 122 | 122 |
context.set_details(str(e))
|
| 123 | 123 |
context.set_code(grpc.StatusCode.CANCELLED)
|
| 124 | 124 |
yield operations_pb2.Operation()
|
| ... | ... | @@ -26,7 +26,8 @@ from buildgrid.client.cas import download, upload |
| 26 | 26 |
from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
|
| 27 | 27 |
from buildgrid.utils import create_digest
|
| 28 | 28 |
|
| 29 |
-from ..utils.cas import serve_cas, run_in_subprocess
|
|
| 29 |
+from ..utils.cas import serve_cas
|
|
| 30 |
+from ..utils.utils import run_in_subprocess
|
|
| 30 | 31 |
|
| 31 | 32 |
|
| 32 | 33 |
INTANCES = ['', 'instance']
|
| ... | ... | @@ -32,7 +32,8 @@ from buildgrid.server.cas.storage.s3 import S3Storage |
| 32 | 32 |
from buildgrid.server.cas.storage.with_cache import WithCacheStorage
|
| 33 | 33 |
from buildgrid.settings import HASH
|
| 34 | 34 |
|
| 35 |
-from ..utils.cas import serve_cas, run_in_subprocess
|
|
| 35 |
+from ..utils.cas import serve_cas
|
|
| 36 |
+from ..utils.utils import run_in_subprocess
|
|
| 36 | 37 |
|
| 37 | 38 |
|
| 38 | 39 |
BLOBS = [(b'abc', b'defg', b'hijk', b'')]
|
| ... | ... | @@ -14,56 +14,174 @@ |
| 14 | 14 |
|
| 15 | 15 |
# pylint: disable=redefined-outer-name
|
| 16 | 16 |
|
| 17 |
-import uuid
|
|
| 17 |
+import asyncio
|
|
| 18 | 18 |
|
| 19 |
+import grpc
|
|
| 19 | 20 |
import pytest
|
| 20 | 21 |
|
| 21 |
-from buildgrid.bot import bot_session
|
|
| 22 |
+from buildgrid._enums import LeaseState
|
|
| 23 |
+from buildgrid._protos.google.devtools.remoteworkers.v1test2 import bots_pb2
|
|
| 24 |
+from buildgrid.bot.hardware.worker import Worker
|
|
| 25 |
+from buildgrid.bot.hardware.interface import HardwareInterface
|
|
| 26 |
+from buildgrid.bot.session import BotSession
|
|
| 27 |
+from buildgrid.bot.interface import BotInterface
|
|
| 22 | 28 |
|
| 29 |
+from ..utils.utils import run_in_subprocess
|
|
| 30 |
+from ..utils.bots_interface import serve_bots_interface
|
|
| 23 | 31 |
|
| 24 |
-@pytest.mark.parametrize("docker_value", ["True", "False"])
|
|
| 25 |
-@pytest.mark.parametrize("os_value", ["nexus7", "nexus8"])
|
|
| 26 |
-def test_create_device(docker_value, os_value):
|
|
| 27 |
- properties = {'docker': docker_value, 'os': os_value}
|
|
| 28 |
- device = bot_session.Device(properties)
|
|
| 29 | 32 |
|
| 30 |
- assert uuid.UUID(device.name, version=4)
|
|
| 31 |
- assert properties == device.properties
|
|
| 33 |
+INSTANCES = ['', 'instance']
|
|
| 32 | 34 |
|
| 33 | 35 |
|
| 34 |
-def test_create_device_key_fail():
|
|
| 35 |
- properties = {'voight': 'kampff'}
|
|
| 36 |
+# Use subprocess to avoid creation of gRPC threads in main process
|
|
| 37 |
+# See https://github.com/grpc/grpc/blob/master/doc/fork_support.md
|
|
| 38 |
+# Multiprocessing uses pickle which protobufs don't work with
|
|
| 39 |
+# Workaround wrapper to send messages as strings
|
|
| 40 |
+class ServerInterface:
|
|
| 36 | 41 |
|
| 37 |
- with pytest.raises(KeyError):
|
|
| 38 |
- bot_session.Device(properties)
|
|
| 42 |
+ def __init__(self, remote):
|
|
| 43 |
+ self.__remote = remote
|
|
| 39 | 44 |
|
| 45 |
+ def create_bot_session(self, parent, bot_session):
|
|
| 40 | 46 |
|
| 41 |
-def test_create_device_value_fail():
|
|
| 42 |
- properties = {'docker': True}
|
|
| 47 |
+ def __create_bot_session(queue, remote, parent, string_bot_session):
|
|
| 48 |
+ bot_session = bots_pb2.BotSession()
|
|
| 49 |
+ bot_session.ParseFromString(string_bot_session)
|
|
| 43 | 50 |
|
| 44 |
- with pytest.raises(ValueError):
|
|
| 45 |
- bot_session.Device(properties)
|
|
| 51 |
+ interface = BotInterface(grpc.insecure_channel(remote))
|
|
| 46 | 52 |
|
| 53 |
+ result = interface.create_bot_session(parent, bot_session)
|
|
| 54 |
+ queue.put(result.SerializeToString())
|
|
| 47 | 55 |
|
| 48 |
-def test_create_worker():
|
|
| 49 |
- properties = {'pool': 'swim'}
|
|
| 50 |
- configs = {'DockerImage': 'Windows'}
|
|
| 51 |
- worker = bot_session.Worker(properties, configs)
|
|
| 56 |
+ string_bot_session = bot_session.SerializeToString()
|
|
| 57 |
+ result = run_in_subprocess(__create_bot_session,
|
|
| 58 |
+ self.__remote, parent, string_bot_session)
|
|
| 52 | 59 |
|
| 53 |
- assert properties == worker.properties
|
|
| 54 |
- assert configs == worker.configs
|
|
| 60 |
+ bot_session = bots_pb2.BotSession()
|
|
| 61 |
+ bot_session.ParseFromString(result)
|
|
| 62 |
+ return bot_session
|
|
| 55 | 63 |
|
| 56 |
- device = bot_session.Device()
|
|
| 57 |
- worker.add_device(device)
|
|
| 64 |
+ def update_bot_session(self, bot_session, update_mask=None):
|
|
| 58 | 65 |
|
| 59 |
- assert worker._devices[0] == device
|
|
| 66 |
+ def __update_bot_session(queue, remote, string_bot_session, update_mask):
|
|
| 67 |
+ bot_session = bots_pb2.BotSession()
|
|
| 68 |
+ bot_session.ParseFromString(string_bot_session)
|
|
| 60 | 69 |
|
| 70 |
+ interface = BotInterface(grpc.insecure_channel(remote))
|
|
| 61 | 71 |
|
| 62 |
-def test_create_worker_key_fail():
|
|
| 63 |
- properties = {'voight': 'kampff'}
|
|
| 64 |
- configs = {'voight': 'kampff'}
|
|
| 72 |
+ result = interface.update_bot_session(bot_session, update_mask)
|
|
| 73 |
+ queue.put(result.SerializeToString())
|
|
| 65 | 74 |
|
| 66 |
- with pytest.raises(KeyError):
|
|
| 67 |
- bot_session.Worker(properties)
|
|
| 68 |
- with pytest.raises(KeyError):
|
|
| 69 |
- bot_session.Worker(configs)
|
|
| 75 |
+ string_bot_session = bot_session.SerializeToString()
|
|
| 76 |
+ result = run_in_subprocess(__update_bot_session,
|
|
| 77 |
+ self.__remote, string_bot_session, update_mask)
|
|
| 78 |
+ |
|
| 79 |
+ bot_session = bots_pb2.BotSession()
|
|
| 80 |
+ bot_session.ParseFromString(result)
|
|
| 81 |
+ return bot_session
|
|
| 82 |
+ |
|
| 83 |
+ |
|
| 84 |
+@pytest.mark.parametrize('instance', INSTANCES)
|
|
| 85 |
+def test_create_bot_session(instance):
|
|
| 86 |
+ |
|
| 87 |
+ with serve_bots_interface([instance]) as server:
|
|
| 88 |
+ interface = ServerInterface(server.remote)
|
|
| 89 |
+ hardware_interface = HardwareInterface(Worker())
|
|
| 90 |
+ session = BotSession(instance, interface, hardware_interface, None)
|
|
| 91 |
+ session.create_bot_session()
|
|
| 92 |
+ assert session.get_pb2() == server.get_bot_session()
|
|
| 93 |
+ |
|
| 94 |
+ |
|
| 95 |
+@pytest.mark.parametrize('instance', INSTANCES)
|
|
| 96 |
+def test_update_bot_session(instance):
|
|
| 97 |
+ |
|
| 98 |
+ with serve_bots_interface([instance]) as server:
|
|
| 99 |
+ interface = ServerInterface(server.remote)
|
|
| 100 |
+ hardware_interface = HardwareInterface(Worker())
|
|
| 101 |
+ session = BotSession(instance, interface, hardware_interface, None)
|
|
| 102 |
+ session.create_bot_session()
|
|
| 103 |
+ assert session.get_pb2() == server.get_bot_session()
|
|
| 104 |
+ session.update_bot_session()
|
|
| 105 |
+ assert session.get_pb2() == server.get_bot_session()
|
|
| 106 |
+ |
|
| 107 |
+ |
|
| 108 |
+@pytest.mark.parametrize('instance', INSTANCES)
|
|
| 109 |
+def test_create_bot_session_with_work(instance):
|
|
| 110 |
+ |
|
| 111 |
+ def __work(lease, context, event):
|
|
| 112 |
+ return lease
|
|
| 113 |
+ |
|
| 114 |
+ with serve_bots_interface([instance]) as server:
|
|
| 115 |
+ interface = ServerInterface(server.remote)
|
|
| 116 |
+ hardware_interface = HardwareInterface(Worker())
|
|
| 117 |
+ session = BotSession(instance, interface, hardware_interface, __work)
|
|
| 118 |
+ server.inject_work()
|
|
| 119 |
+ session.create_bot_session()
|
|
| 120 |
+ |
|
| 121 |
+ assert len(session.get_pb2().leases) == 1
|
|
| 122 |
+ |
|
| 123 |
+ loop = asyncio.get_event_loop()
|
|
| 124 |
+ for task in asyncio.Task.all_tasks():
|
|
| 125 |
+ loop.run_until_complete(task)
|
|
| 126 |
+ |
|
| 127 |
+ assert session.get_pb2().leases[0].state == LeaseState.COMPLETED.value
|
|
| 128 |
+ |
|
| 129 |
+ |
|
| 130 |
+@pytest.mark.parametrize('instance', INSTANCES)
|
|
| 131 |
+def test_update_bot_session_with_work(instance):
|
|
| 132 |
+ |
|
| 133 |
+ def __work(lease, context, event):
|
|
| 134 |
+ return lease
|
|
| 135 |
+ |
|
| 136 |
+ with serve_bots_interface([instance]) as server:
|
|
| 137 |
+ interface = ServerInterface(server.remote)
|
|
| 138 |
+ hardware_interface = HardwareInterface(Worker())
|
|
| 139 |
+ session = BotSession(instance, interface, hardware_interface, __work)
|
|
| 140 |
+ session.create_bot_session()
|
|
| 141 |
+ server.inject_work()
|
|
| 142 |
+ session.update_bot_session()
|
|
| 143 |
+ |
|
| 144 |
+ assert len(session.get_pb2().leases) == 1
|
|
| 145 |
+ |
|
| 146 |
+ loop = asyncio.get_event_loop()
|
|
| 147 |
+ for task in asyncio.Task.all_tasks():
|
|
| 148 |
+ loop.run_until_complete(task)
|
|
| 149 |
+ |
|
| 150 |
+ assert session.get_pb2().leases[0].state == LeaseState.COMPLETED.value
|
|
| 151 |
+ |
|
| 152 |
+ |
|
| 153 |
+@pytest.mark.parametrize('instance', INSTANCES)
|
|
| 154 |
+def test_cancel_leases(instance):
|
|
| 155 |
+ |
|
| 156 |
+ def __work(lease, context, cancel_event):
|
|
| 157 |
+ # while not cancel_event.is_set():
|
|
| 158 |
+ |
|
| 159 |
+ return lease
|
|
| 160 |
+ |
|
| 161 |
+ with serve_bots_interface([instance]) as server:
|
|
| 162 |
+ interface = ServerInterface(server.remote)
|
|
| 163 |
+ hardware_interface = HardwareInterface(Worker())
|
|
| 164 |
+ session = BotSession(instance, interface, hardware_interface, __work)
|
|
| 165 |
+ |
|
| 166 |
+ lease = bots_pb2.Lease()
|
|
| 167 |
+ lease.state = LeaseState.PENDING.value
|
|
| 168 |
+ lease.id = 'foo'
|
|
| 169 |
+ server.inject_work(lease)
|
|
| 170 |
+ session.create_bot_session()
|
|
| 171 |
+ |
|
| 172 |
+ leases_pb2 = session.get_pb2().leases
|
|
| 173 |
+ assert len(leases_pb2) == 1
|
|
| 174 |
+ assert leases_pb2[0].state == LeaseState.ACTIVE.value
|
|
| 175 |
+ |
|
| 176 |
+ server.cancel_lease(leases_pb2[0].id)
|
|
| 177 |
+ session.update_bot_session()
|
|
| 178 |
+ assert len(session.get_pb2().leases) == 1
|
|
| 179 |
+ |
|
| 180 |
+ loop = asyncio.get_event_loop()
|
|
| 181 |
+ for task in asyncio.Task.all_tasks():
|
|
| 182 |
+ try:
|
|
| 183 |
+ loop.run_until_complete(task)
|
|
| 184 |
+ except asyncio.CancelledError:
|
|
| 185 |
+ pass
|
|
| 186 |
+ |
|
| 187 |
+ assert session.get_pb2().leases[0].state == LeaseState.CANCELLED.value
|
| ... | ... | @@ -17,7 +17,6 @@ |
| 17 | 17 |
|
| 18 | 18 |
# pylint: disable=redefined-outer-name
|
| 19 | 19 |
|
| 20 |
-import copy
|
|
| 21 | 20 |
from unittest import mock
|
| 22 | 21 |
|
| 23 | 22 |
import grpc
|
| ... | ... | @@ -150,129 +149,6 @@ def test_update_leases_with_work(bot_session, context, instance): |
| 150 | 149 |
assert response_action == action_digest
|
| 151 | 150 |
|
| 152 | 151 |
|
| 153 |
-def test_update_leases_work_complete(bot_session, context, instance):
|
|
| 154 |
- request = bots_pb2.CreateBotSessionRequest(parent='',
|
|
| 155 |
- bot_session=bot_session)
|
|
| 156 |
- # Create bot session
|
|
| 157 |
- # Simulated the severed binding between client and server
|
|
| 158 |
- response = copy.deepcopy(instance.CreateBotSession(request, context))
|
|
| 159 |
- |
|
| 160 |
- # Inject work
|
|
| 161 |
- action_digest = remote_execution_pb2.Digest(hash='gaff')
|
|
| 162 |
- _inject_work(instance._instances[""]._scheduler, action_digest=action_digest)
|
|
| 163 |
- |
|
| 164 |
- request = bots_pb2.UpdateBotSessionRequest(name=response.name,
|
|
| 165 |
- bot_session=response)
|
|
| 166 |
- response = copy.deepcopy(instance.UpdateBotSession(request, context))
|
|
| 167 |
- |
|
| 168 |
- assert response.leases[0].state == LeaseState.PENDING.value
|
|
| 169 |
- response.leases[0].state = LeaseState.ACTIVE.value
|
|
| 170 |
- |
|
| 171 |
- request = bots_pb2.UpdateBotSessionRequest(name=response.name,
|
|
| 172 |
- bot_session=response)
|
|
| 173 |
- |
|
| 174 |
- response = copy.deepcopy(instance.UpdateBotSession(request, context))
|
|
| 175 |
- |
|
| 176 |
- response.leases[0].state = LeaseState.COMPLETED.value
|
|
| 177 |
- response.leases[0].result.Pack(remote_execution_pb2.ActionResult())
|
|
| 178 |
- |
|
| 179 |
- request = bots_pb2.UpdateBotSessionRequest(name=response.name,
|
|
| 180 |
- bot_session=response)
|
|
| 181 |
- response = copy.deepcopy(instance.UpdateBotSession(request, context))
|
|
| 182 |
- |
|
| 183 |
- assert len(response.leases) is 0
|
|
| 184 |
- |
|
| 185 |
- |
|
| 186 |
-def test_work_rejected_by_bot(bot_session, context, instance):
|
|
| 187 |
- request = bots_pb2.CreateBotSessionRequest(parent='',
|
|
| 188 |
- bot_session=bot_session)
|
|
| 189 |
- # Inject work
|
|
| 190 |
- action_digest = remote_execution_pb2.Digest(hash='gaff')
|
|
| 191 |
- _inject_work(instance._instances[""]._scheduler, action_digest=action_digest)
|
|
| 192 |
- |
|
| 193 |
- # Simulated the severed binding between client and server
|
|
| 194 |
- response = copy.deepcopy(instance.CreateBotSession(request, context))
|
|
| 195 |
- |
|
| 196 |
- # Reject work
|
|
| 197 |
- assert response.leases[0].state == LeaseState.PENDING.value
|
|
| 198 |
- response.leases[0].state = LeaseState.COMPLETED.value
|
|
| 199 |
- request = bots_pb2.UpdateBotSessionRequest(name=response.name,
|
|
| 200 |
- bot_session=response)
|
|
| 201 |
- |
|
| 202 |
- response = instance.UpdateBotSession(request, context)
|
|
| 203 |
- |
|
| 204 |
- context.set_code.assert_called_once_with(grpc.StatusCode.UNIMPLEMENTED)
|
|
| 205 |
- |
|
| 206 |
- |
|
| 207 |
-@pytest.mark.parametrize("state", [LeaseState.LEASE_STATE_UNSPECIFIED, LeaseState.PENDING])
|
|
| 208 |
-def test_work_out_of_sync_from_pending(state, bot_session, context, instance):
|
|
| 209 |
- request = bots_pb2.CreateBotSessionRequest(parent='',
|
|
| 210 |
- bot_session=bot_session)
|
|
| 211 |
- # Inject work
|
|
| 212 |
- action_digest = remote_execution_pb2.Digest(hash='gaff')
|
|
| 213 |
- _inject_work(instance._instances[""]._scheduler, action_digest=action_digest)
|
|
| 214 |
- |
|
| 215 |
- # Simulated the severed binding between client and server
|
|
| 216 |
- response = copy.deepcopy(instance.CreateBotSession(request, context))
|
|
| 217 |
- |
|
| 218 |
- response.leases[0].state = state.value
|
|
| 219 |
- |
|
| 220 |
- request = bots_pb2.UpdateBotSessionRequest(name=response.name,
|
|
| 221 |
- bot_session=response)
|
|
| 222 |
- |
|
| 223 |
- response = instance.UpdateBotSession(request, context)
|
|
| 224 |
- |
|
| 225 |
- context.set_code.assert_called_once_with(grpc.StatusCode.DATA_LOSS)
|
|
| 226 |
- |
|
| 227 |
- |
|
| 228 |
-@pytest.mark.parametrize("state", [LeaseState.LEASE_STATE_UNSPECIFIED, LeaseState.PENDING])
|
|
| 229 |
-def test_work_out_of_sync_from_active(state, bot_session, context, instance):
|
|
| 230 |
- request = bots_pb2.CreateBotSessionRequest(parent='',
|
|
| 231 |
- bot_session=bot_session)
|
|
| 232 |
- # Inject work
|
|
| 233 |
- action_digest = remote_execution_pb2.Digest(hash='gaff')
|
|
| 234 |
- _inject_work(instance._instances[""]._scheduler, action_digest=action_digest)
|
|
| 235 |
- |
|
| 236 |
- # Simulated the severed binding between client and server
|
|
| 237 |
- response = copy.deepcopy(instance.CreateBotSession(request, context))
|
|
| 238 |
- |
|
| 239 |
- response.leases[0].state = LeaseState.ACTIVE.value
|
|
| 240 |
- |
|
| 241 |
- request = copy.deepcopy(bots_pb2.UpdateBotSessionRequest(name=response.name,
|
|
| 242 |
- bot_session=response))
|
|
| 243 |
- |
|
| 244 |
- response = instance.UpdateBotSession(request, context)
|
|
| 245 |
- |
|
| 246 |
- response.leases[0].state = state.value
|
|
| 247 |
- |
|
| 248 |
- request = bots_pb2.UpdateBotSessionRequest(name=response.name,
|
|
| 249 |
- bot_session=response)
|
|
| 250 |
- |
|
| 251 |
- response = instance.UpdateBotSession(request, context)
|
|
| 252 |
- |
|
| 253 |
- context.set_code.assert_called_once_with(grpc.StatusCode.DATA_LOSS)
|
|
| 254 |
- |
|
| 255 |
- |
|
| 256 |
-def test_work_active_to_active(bot_session, context, instance):
|
|
| 257 |
- request = bots_pb2.CreateBotSessionRequest(parent='',
|
|
| 258 |
- bot_session=bot_session)
|
|
| 259 |
- # Inject work
|
|
| 260 |
- action_digest = remote_execution_pb2.Digest(hash='gaff')
|
|
| 261 |
- _inject_work(instance._instances[""]._scheduler, action_digest=action_digest)
|
|
| 262 |
- |
|
| 263 |
- # Simulated the severed binding between client and server
|
|
| 264 |
- response = copy.deepcopy(instance.CreateBotSession(request, context))
|
|
| 265 |
- |
|
| 266 |
- response.leases[0].state = LeaseState.ACTIVE.value
|
|
| 267 |
- |
|
| 268 |
- request = bots_pb2.UpdateBotSessionRequest(name=response.name,
|
|
| 269 |
- bot_session=response)
|
|
| 270 |
- |
|
| 271 |
- response = instance.UpdateBotSession(request, context)
|
|
| 272 |
- |
|
| 273 |
- assert response.leases[0].state == LeaseState.ACTIVE.value
|
|
| 274 |
- |
|
| 275 |
- |
|
| 276 | 152 |
def test_post_bot_event_temp(context, instance):
|
| 277 | 153 |
request = bots_pb2.PostBotEventTempRequest()
|
| 278 | 154 |
instance.PostBotEventTemp(request, context)
|
| ... | ... | @@ -28,10 +28,8 @@ from buildgrid._enums import OperationStage |
| 28 | 28 |
from buildgrid._exceptions import InvalidArgumentError
|
| 29 | 29 |
from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
|
| 30 | 30 |
from buildgrid._protos.google.longrunning import operations_pb2
|
| 31 |
-from buildgrid._protos.google.rpc import status_pb2
|
|
| 32 | 31 |
from buildgrid.server.cas.storage import lru_memory_cache
|
| 33 | 32 |
from buildgrid.server.controller import ExecutionController
|
| 34 |
-from buildgrid.server.job import LeaseState
|
|
| 35 | 33 |
from buildgrid.server.operations import service
|
| 36 | 34 |
from buildgrid.server.operations.service import OperationsService
|
| 37 | 35 |
from buildgrid.utils import create_digest
|
| ... | ... | @@ -166,31 +164,6 @@ def test_list_operations_instance_fail(instance, controller, execute_request, co |
| 166 | 164 |
context.set_code.assert_called_once_with(grpc.StatusCode.INVALID_ARGUMENT)
|
| 167 | 165 |
|
| 168 | 166 |
|
| 169 |
-def test_list_operations_with_result(instance, controller, execute_request, context):
|
|
| 170 |
- response_execute = controller.execution_instance.execute(execute_request.action_digest,
|
|
| 171 |
- execute_request.skip_cache_lookup)
|
|
| 172 |
- |
|
| 173 |
- action_result = remote_execution_pb2.ActionResult()
|
|
| 174 |
- output_file = remote_execution_pb2.OutputFile(path='unicorn')
|
|
| 175 |
- action_result.output_files.extend([output_file])
|
|
| 176 |
- |
|
| 177 |
- controller.operations_instance._scheduler.jobs[response_execute.name].create_lease()
|
|
| 178 |
- controller.operations_instance._scheduler.update_job_lease_state(response_execute.name,
|
|
| 179 |
- LeaseState.COMPLETED,
|
|
| 180 |
- lease_status=status_pb2.Status(),
|
|
| 181 |
- lease_result=_pack_any(action_result))
|
|
| 182 |
- |
|
| 183 |
- request = operations_pb2.ListOperationsRequest(name=instance_name)
|
|
| 184 |
- response = instance.ListOperations(request, context)
|
|
| 185 |
- |
|
| 186 |
- assert response.operations[0].name.split('/')[-1] == response_execute.name
|
|
| 187 |
- |
|
| 188 |
- execute_response = remote_execution_pb2.ExecuteResponse()
|
|
| 189 |
- response.operations[0].response.Unpack(execute_response)
|
|
| 190 |
- |
|
| 191 |
- assert execute_response.result.output_files == action_result.output_files
|
|
| 192 |
- |
|
| 193 |
- |
|
| 194 | 167 |
def test_list_operations_empty(instance, context):
|
| 195 | 168 |
request = operations_pb2.ListOperationsRequest(name=instance_name)
|
| 196 | 169 |
|
| ... | ... | @@ -22,7 +22,7 @@ from buildgrid.server.operations.service import OperationsService |
| 22 | 22 |
from buildgrid.server.bots.service import BotsService
|
| 23 | 23 |
from buildgrid.server.referencestorage.service import ReferenceStorageService
|
| 24 | 24 |
|
| 25 |
-from .utils.cas import run_in_subprocess
|
|
| 25 |
+from .utils.utils import run_in_subprocess
|
|
| 26 | 26 |
|
| 27 | 27 |
|
| 28 | 28 |
config = """
|
| 1 |
+# Copyright (C) 2018 Bloomberg LP
|
|
| 2 |
+#
|
|
| 3 |
+# Licensed under the Apache License, Version 2.0 (the "License");
|
|
| 4 |
+# you may not use this file except in compliance with the License.
|
|
| 5 |
+# You may obtain a copy of the License at
|
|
| 6 |
+#
|
|
| 7 |
+# <http://www.apache.org/licenses/LICENSE-2.0>
|
|
| 8 |
+#
|
|
| 9 |
+# Unless required by applicable law or agreed to in writing, software
|
|
| 10 |
+# distributed under the License is distributed on an "AS IS" BASIS,
|
|
| 11 |
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
| 12 |
+# See the License for the specific language governing permissions and
|
|
| 13 |
+# limitations under the License.
|
|
| 14 |
+ |
|
| 15 |
+from concurrent import futures
|
|
| 16 |
+from contextlib import contextmanager
|
|
| 17 |
+import multiprocessing
|
|
| 18 |
+import os
|
|
| 19 |
+import signal
|
|
| 20 |
+import uuid
|
|
| 21 |
+ |
|
| 22 |
+import grpc
|
|
| 23 |
+import pytest_cov
|
|
| 24 |
+ |
|
| 25 |
+from buildgrid._enums import LeaseState
|
|
| 26 |
+from buildgrid._protos.google.devtools.remoteworkers.v1test2 import bots_pb2
|
|
| 27 |
+from buildgrid.server.bots import service
|
|
| 28 |
+ |
|
| 29 |
+ |
|
| 30 |
+@contextmanager
|
|
| 31 |
+def serve_bots_interface(instances):
|
|
| 32 |
+ server = Server(instances)
|
|
| 33 |
+ try:
|
|
| 34 |
+ yield server
|
|
| 35 |
+ finally:
|
|
| 36 |
+ server.quit()
|
|
| 37 |
+ |
|
| 38 |
+ |
|
| 39 |
+class Server:
|
|
| 40 |
+ |
|
| 41 |
+ def __init__(self, instances):
|
|
| 42 |
+ self.instances = instances
|
|
| 43 |
+ |
|
| 44 |
+ self.__queue = multiprocessing.Queue()
|
|
| 45 |
+ self.__bot_session_queue = multiprocessing.Queue()
|
|
| 46 |
+ self.__message_queue = multiprocessing.Queue()
|
|
| 47 |
+ self.__process = multiprocessing.Process(
|
|
| 48 |
+ target=Server.serve,
|
|
| 49 |
+ args=(self.__queue, self.instances,
|
|
| 50 |
+ self.__bot_session_queue, self.__message_queue))
|
|
| 51 |
+ self.__process.start()
|
|
| 52 |
+ |
|
| 53 |
+ self.port = self.__queue.get()
|
|
| 54 |
+ self.remote = 'localhost:{}'.format(self.port)
|
|
| 55 |
+ |
|
| 56 |
+ @classmethod
|
|
| 57 |
+ def serve(cls, queue, instances, bot_session_queue, message_queue):
|
|
| 58 |
+ pytest_cov.embed.cleanup_on_sigterm()
|
|
| 59 |
+ |
|
| 60 |
+ # Use max_workers default from Python 3.5+
|
|
| 61 |
+ max_workers = (os.cpu_count() or 1) * 5
|
|
| 62 |
+ server = grpc.server(futures.ThreadPoolExecutor(max_workers))
|
|
| 63 |
+ port = server.add_insecure_port('localhost:0')
|
|
| 64 |
+ |
|
| 65 |
+ bots_service = service.BotsService(server)
|
|
| 66 |
+ for name in instances:
|
|
| 67 |
+ bots_interface = BotsInterface(bot_session_queue, message_queue)
|
|
| 68 |
+ bots_service.add_instance(name, bots_interface)
|
|
| 69 |
+ |
|
| 70 |
+ server.start()
|
|
| 71 |
+ queue.put(port)
|
|
| 72 |
+ signal.pause()
|
|
| 73 |
+ |
|
| 74 |
+ def get_bot_session(self, timeout=1):
|
|
| 75 |
+ bot_session = bots_pb2.BotSession()
|
|
| 76 |
+ bot_session.ParseFromString(self.__bot_session_queue.get(timeout=timeout))
|
|
| 77 |
+ return bot_session
|
|
| 78 |
+ |
|
| 79 |
+ def inject_work(self, lease=None, timeout=1):
|
|
| 80 |
+ if not lease:
|
|
| 81 |
+ lease = bots_pb2.Lease()
|
|
| 82 |
+ lease.state = LeaseState.PENDING.value
|
|
| 83 |
+ |
|
| 84 |
+ lease_string = lease.SerializeToString()
|
|
| 85 |
+ self.__message_queue.put(('INJECT_WORK', lease_string))
|
|
| 86 |
+ |
|
| 87 |
+ def cancel_lease(self, lease_id):
|
|
| 88 |
+ self.__message_queue.put(('CANCEL_LEASE', lease_id))
|
|
| 89 |
+ |
|
| 90 |
+ def quit(self):
|
|
| 91 |
+ if self.__process:
|
|
| 92 |
+ self.__process.terminate()
|
|
| 93 |
+ self.__process.join()
|
|
| 94 |
+ |
|
| 95 |
+ |
|
| 96 |
+class BotsInterface:
|
|
| 97 |
+ |
|
| 98 |
+ def __init__(self, bot_session_queue, message_queue):
|
|
| 99 |
+ self.__bot_session_queue = bot_session_queue
|
|
| 100 |
+ self.__message_queue = message_queue
|
|
| 101 |
+ |
|
| 102 |
+ def register_instance_with_server(self, instance_name, server):
|
|
| 103 |
+ server.add_bots_interface(self, instance_name)
|
|
| 104 |
+ |
|
| 105 |
+ def create_bot_session(self, parent, bot_session):
|
|
| 106 |
+ name = "{}/{}".format(parent, str(uuid.uuid4()))
|
|
| 107 |
+ bot_session.name = name
|
|
| 108 |
+ |
|
| 109 |
+ while not self.__message_queue.empty():
|
|
| 110 |
+ message = self.__message_queue.get()
|
|
| 111 |
+ if message[0] == 'INJECT_WORK':
|
|
| 112 |
+ lease_string = message[1]
|
|
| 113 |
+ lease = bots_pb2.Lease()
|
|
| 114 |
+ lease.ParseFromString(lease_string)
|
|
| 115 |
+ bot_session.leases.extend([lease])
|
|
| 116 |
+ |
|
| 117 |
+ self.__bot_session_queue.put(bot_session.SerializeToString())
|
|
| 118 |
+ return bot_session
|
|
| 119 |
+ |
|
| 120 |
+ def update_bot_session(self, name, bot_session):
|
|
| 121 |
+ for lease in bot_session.leases:
|
|
| 122 |
+ state = LeaseState(lease.state)
|
|
| 123 |
+ if state == LeaseState.COMPLETED:
|
|
| 124 |
+ lease.Clear()
|
|
| 125 |
+ |
|
| 126 |
+ elif state == LeaseState.CANCELLED:
|
|
| 127 |
+ lease.Clear()
|
|
| 128 |
+ |
|
| 129 |
+ while not self.__message_queue.empty():
|
|
| 130 |
+ message = self.__message_queue.get()
|
|
| 131 |
+ |
|
| 132 |
+ if message[0] == 'INJECT_WORK':
|
|
| 133 |
+ lease_string = message[1]
|
|
| 134 |
+ lease = bots_pb2.Lease()
|
|
| 135 |
+ lease.ParseFromString(lease_string)
|
|
| 136 |
+ bot_session.leases.extend([lease])
|
|
| 137 |
+ |
|
| 138 |
+ elif message[0] == 'CANCEL_LEASE':
|
|
| 139 |
+ lease_id = message[1]
|
|
| 140 |
+ for lease in bot_session.leases:
|
|
| 141 |
+ if lease.id == lease_id:
|
|
| 142 |
+ lease.state = LeaseState.CANCELLED.value
|
|
| 143 |
+ |
|
| 144 |
+ self.__bot_session_queue.put(bot_session.SerializeToString())
|
|
| 145 |
+ return bot_session
|
| ... | ... | @@ -21,7 +21,6 @@ import signal |
| 21 | 21 |
import tempfile
|
| 22 | 22 |
|
| 23 | 23 |
import grpc
|
| 24 |
-import psutil
|
|
| 25 | 24 |
import pytest_cov
|
| 26 | 25 |
|
| 27 | 26 |
from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
|
| ... | ... | @@ -42,43 +41,6 @@ def serve_cas(instances): |
| 42 | 41 |
server.quit()
|
| 43 | 42 |
|
| 44 | 43 |
|
| 45 |
-def kill_process_tree(pid):
|
|
| 46 |
- proc = psutil.Process(pid)
|
|
| 47 |
- children = proc.children(recursive=True)
|
|
| 48 |
- |
|
| 49 |
- def kill_proc(p):
|
|
| 50 |
- try:
|
|
| 51 |
- p.kill()
|
|
| 52 |
- except psutil.AccessDenied:
|
|
| 53 |
- # Ignore this error, it can happen with
|
|
| 54 |
- # some setuid bwrap processes.
|
|
| 55 |
- pass
|
|
| 56 |
- |
|
| 57 |
- # Bloody Murder
|
|
| 58 |
- for child in children:
|
|
| 59 |
- kill_proc(child)
|
|
| 60 |
- kill_proc(proc)
|
|
| 61 |
- |
|
| 62 |
- |
|
| 63 |
-def run_in_subprocess(function, *arguments):
|
|
| 64 |
- queue = multiprocessing.Queue()
|
|
| 65 |
- # Use subprocess to avoid creation of gRPC threads in main process
|
|
| 66 |
- # See https://github.com/grpc/grpc/blob/master/doc/fork_support.md
|
|
| 67 |
- process = multiprocessing.Process(target=function,
|
|
| 68 |
- args=(queue, *arguments))
|
|
| 69 |
- |
|
| 70 |
- try:
|
|
| 71 |
- process.start()
|
|
| 72 |
- |
|
| 73 |
- result = queue.get()
|
|
| 74 |
- process.join()
|
|
| 75 |
- except KeyboardInterrupt:
|
|
| 76 |
- kill_process_tree(process.pid)
|
|
| 77 |
- raise
|
|
| 78 |
- |
|
| 79 |
- return result
|
|
| 80 |
- |
|
| 81 |
- |
|
| 82 | 44 |
class Server:
|
| 83 | 45 |
|
| 84 | 46 |
def __init__(self, instances):
|
| 1 |
+# Copyright (C) 2018 Bloomberg LP
|
|
| 2 |
+#
|
|
| 3 |
+# Licensed under the Apache License, Version 2.0 (the "License");
|
|
| 4 |
+# you may not use this file except in compliance with the License.
|
|
| 5 |
+# You may obtain a copy of the License at
|
|
| 6 |
+#
|
|
| 7 |
+# <http://www.apache.org/licenses/LICENSE-2.0>
|
|
| 8 |
+#
|
|
| 9 |
+# Unless required by applicable law or agreed to in writing, software
|
|
| 10 |
+# distributed under the License is distributed on an "AS IS" BASIS,
|
|
| 11 |
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
| 12 |
+# See the License for the specific language governing permissions and
|
|
| 13 |
+# limitations under the License.
|
|
| 14 |
+ |
|
| 15 |
+import multiprocessing
|
|
| 16 |
+import psutil
|
|
| 17 |
+ |
|
| 18 |
+ |
|
| 19 |
+def kill_process_tree(pid):
|
|
| 20 |
+ proc = psutil.Process(pid)
|
|
| 21 |
+ children = proc.children(recursive=True)
|
|
| 22 |
+ |
|
| 23 |
+ def kill_proc(p):
|
|
| 24 |
+ try:
|
|
| 25 |
+ p.kill()
|
|
| 26 |
+ except psutil.AccessDenied:
|
|
| 27 |
+ # Ignore this error, it can happen with
|
|
| 28 |
+ # some setuid bwrap processes.
|
|
| 29 |
+ pass
|
|
| 30 |
+ |
|
| 31 |
+ # Bloody Murder
|
|
| 32 |
+ for child in children:
|
|
| 33 |
+ kill_proc(child)
|
|
| 34 |
+ kill_proc(proc)
|
|
| 35 |
+ |
|
| 36 |
+ |
|
| 37 |
+def run_in_subprocess(function, *arguments, timeout=1):
|
|
| 38 |
+ queue = multiprocessing.Queue()
|
|
| 39 |
+ # Use subprocess to avoid creation of gRPC threads in main process
|
|
| 40 |
+ # See https://github.com/grpc/grpc/blob/master/doc/fork_support.md
|
|
| 41 |
+ process = multiprocessing.Process(target=function,
|
|
| 42 |
+ args=(queue, *arguments))
|
|
| 43 |
+ |
|
| 44 |
+ try:
|
|
| 45 |
+ process.start()
|
|
| 46 |
+ result = queue.get(timeout=timeout)
|
|
| 47 |
+ process.join()
|
|
| 48 |
+ |
|
| 49 |
+ except KeyboardInterrupt:
|
|
| 50 |
+ kill_process_tree(process.pid)
|
|
| 51 |
+ raise
|
|
| 52 |
+ |
|
| 53 |
+ return result
|
