Raoul Hidalgo Charman pushed to branch raoul/smarter-bot-calls at BuildGrid / buildgrid
Commits:
-
8527f25b
by Raoul Hidalgo Charman at 2018-11-06T17:59:50Z
5 changed files:
- buildgrid/_app/commands/cmd_bot.py
- buildgrid/bot/bot.py
- buildgrid/bot/bot_interface.py
- buildgrid/bot/bot_session.py
- buildgrid/utils.py
Changes:
| ... | ... | @@ -33,7 +33,6 @@ from buildgrid.bot.bot_session import BotSession, Device, Worker |
| 33 | 33 |
|
| 34 | 34 |
from ..bots import buildbox, dummy, host
|
| 35 | 35 |
from ..cli import pass_context
|
| 36 |
-from ...settings import INTERVAL_BUFFER
|
|
| 37 | 36 |
|
| 38 | 37 |
|
| 39 | 38 |
@click.group(name='bot', short_help="Create and register bot clients.")
|
| ... | ... | @@ -53,7 +52,8 @@ from ...settings import INTERVAL_BUFFER |
| 53 | 52 |
help="Public CAS client certificate for TLS (PEM-encoded)")
|
| 54 | 53 |
@click.option('--cas-server-cert', type=click.Path(exists=True, dir_okay=False), default=None,
|
| 55 | 54 |
help="Public CAS server certificate for TLS (PEM-encoded)")
|
| 56 |
-@click.option('--update-period', type=click.FLOAT, default=30, show_default=True,
|
|
| 55 |
+# TODO change default to 30
|
|
| 56 |
+@click.option('--update-period', type=click.FLOAT, default=5, show_default=True,
|
|
| 57 | 57 |
help="Time period for bot updates to the server in seconds.")
|
| 58 | 58 |
@click.option('--parent', type=click.STRING, default='main', show_default=True,
|
| 59 | 59 |
help="Targeted farm resource.")
|
| ... | ... | @@ -37,10 +37,8 @@ class Bot: |
| 37 | 37 |
def session(self, work, context):
|
| 38 | 38 |
loop = asyncio.get_event_loop()
|
| 39 | 39 |
|
| 40 |
- self._bot_session.create_bot_session(work, context)
|
|
| 41 |
- |
|
| 42 | 40 |
try:
|
| 43 |
- task = asyncio.ensure_future(self._update_bot_session())
|
|
| 41 |
+ task = asyncio.ensure_future(self._update_bot_session(work, context))
|
|
| 44 | 42 |
loop.run_forever()
|
| 45 | 43 |
except KeyboardInterrupt:
|
| 46 | 44 |
pass
|
| ... | ... | @@ -48,9 +46,12 @@ class Bot: |
| 48 | 46 |
task.cancel()
|
| 49 | 47 |
loop.close()
|
| 50 | 48 |
|
| 51 |
- async def _update_bot_session(self):
|
|
| 49 |
+ async def _update_bot_session(self, work, context):
|
|
| 52 | 50 |
"""
|
| 53 | 51 |
Calls the server periodically to inform the server the client has not died.
|
| 54 | 52 |
"""
|
| 55 | 53 |
while True:
|
| 56 |
- self._bot_session.update_bot_session()
|
|
| 54 |
+ sleep = self._bot_session.run_bot_session(work, context)
|
|
| 55 |
+ # If you get rid of this it breaks when actually executing commands
|
|
| 56 |
+ if sleep:
|
|
| 57 |
+ await asyncio.sleep(sleep)
|
| ... | ... | @@ -21,8 +21,10 @@ Interface to grpc |
| 21 | 21 |
"""
|
| 22 | 22 |
|
| 23 | 23 |
import logging
|
| 24 |
+import grpc
|
|
| 24 | 25 |
|
| 25 | 26 |
from buildgrid._protos.google.devtools.remoteworkers.v1test2 import bots_pb2, bots_pb2_grpc
|
| 27 |
+from ..settings import INTERVAL_BUFFER
|
|
| 26 | 28 |
|
| 27 | 29 |
|
| 28 | 30 |
class BotInterface:
|
| ... | ... | @@ -39,10 +41,19 @@ class BotInterface: |
| 39 | 41 |
def create_bot_session(self, parent, bot_session):
|
| 40 | 42 |
request = bots_pb2.CreateBotSessionRequest(parent=parent,
|
| 41 | 43 |
bot_session=bot_session)
|
| 42 |
- return self._stub.CreateBotSession(request)
|
|
| 44 |
+ return self._bot_call(self._stub.CreateBotSession, request)
|
|
| 43 | 45 |
|
| 44 | 46 |
def update_bot_session(self, bot_session, update_mask=None):
|
| 45 | 47 |
request = bots_pb2.UpdateBotSessionRequest(name=bot_session.name,
|
| 46 | 48 |
bot_session=bot_session,
|
| 47 | 49 |
update_mask=update_mask)
|
| 48 |
- return self._stub.UpdateBotSession(request, timeout=self._interval)
|
|
| 50 |
+ return self._bot_call(self._stub.UpdateBotSession, request)
|
|
| 51 |
+ |
|
| 52 |
+ def _bot_call(self, call, request):
|
|
| 53 |
+ try:
|
|
| 54 |
+ response = call(request, timeout=self._interval + INTERVAL_BUFFER)
|
|
| 55 |
+ return response
|
|
| 56 |
+ except grpc.RpcError as e:
|
|
| 57 |
+ if e.code() in grpc.StatusCode:
|
|
| 58 |
+ self.logger.warning("Server responded with error: {}".format(e.code()))
|
|
| 59 |
+ return None
|
| ... | ... | @@ -49,6 +49,7 @@ class BotSession: |
| 49 | 49 |
self._bot_id = '{}.{}'.format(parent, platform.node())
|
| 50 | 50 |
self._context = None
|
| 51 | 51 |
self._interface = interface
|
| 52 |
+ self._connected = False
|
|
| 52 | 53 |
self._leases = {}
|
| 53 | 54 |
self._name = None
|
| 54 | 55 |
self._parent = parent
|
| ... | ... | @@ -63,12 +64,29 @@ class BotSession: |
| 63 | 64 |
def add_worker(self, worker):
|
| 64 | 65 |
self._worker = worker
|
| 65 | 66 |
|
| 67 |
+ def run_bot_session(self, work, context=None):
|
|
| 68 |
+ if self._connected is False:
|
|
| 69 |
+ self.create_bot_session(work, context)
|
|
| 70 |
+ else:
|
|
| 71 |
+ self.update_bot_session()
|
|
| 72 |
+ |
|
| 73 |
+ if self._connected is False:
|
|
| 74 |
+ return self._interface._interval
|
|
| 75 |
+ elif self._leases:
|
|
| 76 |
+ return 0.5
|
|
| 77 |
+ else:
|
|
| 78 |
+ return None
|
|
| 79 |
+ |
|
| 66 | 80 |
def create_bot_session(self, work, context=None):
|
| 67 | 81 |
self.logger.debug("Creating bot session")
|
| 68 | 82 |
self._work = work
|
| 69 | 83 |
self._context = context
|
| 70 | 84 |
|
| 71 | 85 |
session = self._interface.create_bot_session(self._parent, self.get_pb2())
|
| 86 |
+ if session is None:
|
|
| 87 |
+ self._connected = False
|
|
| 88 |
+ return
|
|
| 89 |
+ self._connected = True
|
|
| 72 | 90 |
self._name = session.name
|
| 73 | 91 |
|
| 74 | 92 |
self.logger.info("Created bot session with name: [{}]".format(self._name))
|
| ... | ... | @@ -79,10 +97,15 @@ class BotSession: |
| 79 | 97 |
def update_bot_session(self):
|
| 80 | 98 |
self.logger.debug("Updating bot session: [{}]".format(self._bot_id))
|
| 81 | 99 |
session = self._interface.update_bot_session(self.get_pb2())
|
| 100 |
+ if session is None:
|
|
| 101 |
+ self._connected = False
|
|
| 102 |
+ return
|
|
| 103 |
+ self._connected = True
|
|
| 82 | 104 |
for k, v in list(self._leases.items()):
|
| 83 | 105 |
if v.state == LeaseState.COMPLETED.value:
|
| 84 | 106 |
del self._leases[k]
|
| 85 | 107 |
|
| 108 |
+ print(self._leases)
|
|
| 86 | 109 |
for lease in session.leases:
|
| 87 | 110 |
self._update_lease_from_server(lease)
|
| 88 | 111 |
|
| ... | ... | @@ -16,6 +16,8 @@ |
| 16 | 16 |
from operator import attrgetter
|
| 17 | 17 |
import os
|
| 18 | 18 |
import socket
|
| 19 |
+from contextlib import contextmanager
|
|
| 20 |
+import signal
|
|
| 19 | 21 |
|
| 20 | 22 |
from buildgrid.settings import HASH
|
| 21 | 23 |
from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
|
