Raoul Hidalgo Charman pushed to branch raoul/126-bot-reconnects at BuildGrid / buildgrid
Commits:
-
8b2f48d8
by Raoul Hidalgo Charman at 2018-12-06T18:20:52Z
-
b1d07576
by Raoul Hidalgo Charman at 2018-12-06T18:20:56Z
4 changed files:
- buildgrid/_app/commands/cmd_bot.py
- buildgrid/bot/bot.py
- buildgrid/bot/interface.py
- buildgrid/bot/session.py
Changes:
... | ... | @@ -141,13 +141,10 @@ def run_dummy(context): |
141 | 141 |
"""
|
142 | 142 |
Creates a session, accepts leases, does fake work and updates the server.
|
143 | 143 |
"""
|
144 |
- try:
|
|
145 |
- bot_session = session.BotSession(context.parent, context.bot_interface, context.hardware_interface,
|
|
146 |
- dummy.work_dummy, context)
|
|
147 |
- b = bot.Bot(bot_session, context.update_period)
|
|
148 |
- b.session()
|
|
149 |
- except KeyboardInterrupt:
|
|
150 |
- pass
|
|
144 |
+ bot_session = session.BotSession(context.parent, context.bot_interface, context.hardware_interface,
|
|
145 |
+ dummy.work_dummy, context, context.update_period)
|
|
146 |
+ b = bot.Bot(bot_session)
|
|
147 |
+ b.session()
|
|
151 | 148 |
|
152 | 149 |
|
153 | 150 |
@cli.command('host-tools', short_help="Runs commands using the host's tools.")
|
... | ... | @@ -157,13 +154,10 @@ def run_host_tools(context): |
157 | 154 |
Downloads inputs from CAS, runs build commands using host-tools and uploads
|
158 | 155 |
result back to CAS.
|
159 | 156 |
"""
|
160 |
- try:
|
|
161 |
- bot_session = session.BotSession(context.parent, context.bot_interface, context.hardware_interface,
|
|
162 |
- host.work_host_tools, context)
|
|
163 |
- b = bot.Bot(bot_session, context.update_period)
|
|
164 |
- b.session()
|
|
165 |
- except KeyboardInterrupt:
|
|
166 |
- pass
|
|
157 |
+ bot_session = session.BotSession(context.parent, context.bot_interface, context.hardware_interface,
|
|
158 |
+ host.work_host_tools, context, context.update_period)
|
|
159 |
+ b = bot.Bot(bot_session)
|
|
160 |
+ b.session()
|
|
167 | 161 |
|
168 | 162 |
|
169 | 163 |
@cli.command('buildbox', short_help="Run commands using the BuildBox tool.")
|
... | ... | @@ -179,10 +173,7 @@ def run_buildbox(context, local_cas, fuse_dir): |
179 | 173 |
context.local_cas = local_cas
|
180 | 174 |
context.fuse_dir = fuse_dir
|
181 | 175 |
|
182 |
- try:
|
|
183 |
- bot_session = session.BotSession(context.parent, context.bot_interface, context.hardware_interface,
|
|
184 |
- buildbox.work_buildbox, context)
|
|
185 |
- b = bot.Bot(bot_session, context.update_period)
|
|
186 |
- b.session()
|
|
187 |
- except KeyboardInterrupt:
|
|
188 |
- pass
|
|
176 |
+ bot_session = session.BotSession(context.parent, context.bot_interface, context.hardware_interface,
|
|
177 |
+ buildbox.work_buildbox, context, context.update_period)
|
|
178 |
+ b = bot.Bot(bot_session)
|
|
179 |
+ b.session()
|
... | ... | @@ -20,13 +20,10 @@ import logging |
20 | 20 |
class Bot:
|
21 | 21 |
"""Creates a local BotSession."""
|
22 | 22 |
|
23 |
- def __init__(self, bot_session, update_period=1):
|
|
24 |
- """
|
|
25 |
- """
|
|
23 |
+ def __init__(self, bot_session):
|
|
26 | 24 |
self.__logger = logging.getLogger(__name__)
|
27 | 25 |
|
28 | 26 |
self.__bot_session = bot_session
|
29 |
- self.__update_period = update_period
|
|
30 | 27 |
|
31 | 28 |
self.__loop = None
|
32 | 29 |
|
... | ... | @@ -34,28 +31,16 @@ class Bot: |
34 | 31 |
"""Will create a session and periodically call the server."""
|
35 | 32 |
|
36 | 33 |
self.__loop = asyncio.get_event_loop()
|
37 |
- self.__bot_session.create_bot_session()
|
|
38 | 34 |
|
39 | 35 |
try:
|
40 |
- task = asyncio.ensure_future(self.__update_bot_session())
|
|
36 |
+ task = asyncio.ensure_future(self.__bot_session.run())
|
|
41 | 37 |
self.__loop.run_until_complete(task)
|
42 |
- |
|
43 | 38 |
except KeyboardInterrupt:
|
44 | 39 |
pass
|
45 | 40 |
|
46 | 41 |
self.__kill_everyone()
|
47 | 42 |
self.__logger.info("Bot shutdown.")
|
48 | 43 |
|
49 |
- async def __update_bot_session(self):
|
|
50 |
- """Calls the server periodically to inform the server the client has not died."""
|
|
51 |
- try:
|
|
52 |
- while True:
|
|
53 |
- self.__bot_session.update_bot_session()
|
|
54 |
- await asyncio.sleep(self.__update_period)
|
|
55 |
- |
|
56 |
- except asyncio.CancelledError:
|
|
57 |
- pass
|
|
58 |
- |
|
59 | 44 |
def __kill_everyone(self):
|
60 | 45 |
"""Cancels and waits for them to stop."""
|
61 | 46 |
self.__logger.info("Cancelling remaining tasks...")
|
... | ... | @@ -37,22 +37,25 @@ class BotInterface: |
37 | 37 |
self._stub = bots_pb2_grpc.BotsStub(channel)
|
38 | 38 |
|
39 | 39 |
def create_bot_session(self, parent, bot_session):
|
40 |
+ """ Create bot session request
|
|
41 |
+ Returns BotSession if correct else a grpc StatusCode
|
|
42 |
+ """
|
|
40 | 43 |
request = bots_pb2.CreateBotSessionRequest(parent=parent,
|
41 | 44 |
bot_session=bot_session)
|
42 |
- try:
|
|
43 |
- return self._stub.CreateBotSession(request)
|
|
44 |
- |
|
45 |
- except grpc.RpcError as e:
|
|
46 |
- self.__logger.error(e)
|
|
47 |
- raise
|
|
45 |
+ return self._bot_call(self._stub.CreateBotSession, request)
|
|
48 | 46 |
|
49 | 47 |
def update_bot_session(self, bot_session, update_mask=None):
|
48 |
+ """ Update bot session request
|
|
49 |
+ Returns BotSession if correct else a grpc StatusCode
|
|
50 |
+ """
|
|
50 | 51 |
request = bots_pb2.UpdateBotSessionRequest(name=bot_session.name,
|
51 | 52 |
bot_session=bot_session,
|
52 | 53 |
update_mask=update_mask)
|
53 |
- try:
|
|
54 |
- return self._stub.UpdateBotSession(request)
|
|
54 |
+ return self._bot_call(self._stub.UpdateBotSession, request)
|
|
55 | 55 |
|
56 |
+ def _bot_call(self, call, request):
|
|
57 |
+ try:
|
|
58 |
+ return call(request)
|
|
56 | 59 |
except grpc.RpcError as e:
|
57 |
- self.__logger.error(e)
|
|
58 |
- raise
|
|
60 |
+ self.__logger.error(e.code())
|
|
61 |
+ return e.code()
|
... | ... | @@ -19,8 +19,10 @@ Bot Session |
19 | 19 |
|
20 | 20 |
Allows connections
|
21 | 21 |
"""
|
22 |
+import asyncio
|
|
22 | 23 |
import logging
|
23 | 24 |
import platform
|
25 |
+import grpc
|
|
24 | 26 |
|
25 | 27 |
from buildgrid._enums import BotStatus, LeaseState
|
26 | 28 |
from buildgrid._protos.google.devtools.remoteworkers.v1test2 import bots_pb2
|
... | ... | @@ -32,7 +34,8 @@ from .tenantmanager import TenantManager |
32 | 34 |
|
33 | 35 |
|
34 | 36 |
class BotSession:
|
35 |
- def __init__(self, parent, bots_interface, hardware_interface, work, context=None):
|
|
37 |
+ def __init__(self, parent, bots_interface, hardware_interface, work,
|
|
38 |
+ context=None, update_period=1):
|
|
36 | 39 |
""" Unique bot ID within the farm used to identify this bot
|
37 | 40 |
Needs to be human readable.
|
38 | 41 |
All prior sessions with bot_id of same ID are invalidated.
|
... | ... | @@ -54,14 +57,37 @@ class BotSession: |
54 | 57 |
self._work = work
|
55 | 58 |
self._context = context
|
56 | 59 |
|
60 |
+ self.__connected = False
|
|
61 |
+ self.__update_period = update_period
|
|
62 |
+ |
|
57 | 63 |
@property
|
58 | 64 |
def bot_id(self):
|
59 | 65 |
return self.__bot_id
|
60 | 66 |
|
67 |
+ @property
|
|
68 |
+ def connected(self):
|
|
69 |
+ return self.__connected
|
|
70 |
+ |
|
71 |
+ async def run(self):
|
|
72 |
+ try:
|
|
73 |
+ while True:
|
|
74 |
+ if not self.connected:
|
|
75 |
+ self.create_bot_session()
|
|
76 |
+ else:
|
|
77 |
+ self.update_bot_session()
|
|
78 |
+ |
|
79 |
+ await asyncio.sleep(self.__update_period)
|
|
80 |
+ except asyncio.CancelledError:
|
|
81 |
+ pass
|
|
82 |
+ |
|
61 | 83 |
def create_bot_session(self):
|
62 | 84 |
self.__logger.debug("Creating bot session")
|
63 | 85 |
|
64 | 86 |
session = self._bots_interface.create_bot_session(self.__parent, self.get_pb2())
|
87 |
+ if session in list(grpc.StatusCode):
|
|
88 |
+ self.__connected = False
|
|
89 |
+ return
|
|
90 |
+ self.__connected = True
|
|
65 | 91 |
self.__name = session.name
|
66 | 92 |
|
67 | 93 |
self.__logger.info("Created bot session with name: [%s]", self.__name)
|
... | ... | @@ -73,6 +99,10 @@ class BotSession: |
73 | 99 |
self.__logger.debug("Updating bot session: [%s]", self.__bot_id)
|
74 | 100 |
|
75 | 101 |
session = self._bots_interface.update_bot_session(self.get_pb2())
|
102 |
+ if session in list(grpc.StatusCode):
|
|
103 |
+ self.__connected = False
|
|
104 |
+ return
|
|
105 |
+ self.__connected = True
|
|
76 | 106 |
server_ids = []
|
77 | 107 |
|
78 | 108 |
for lease in session.leases:
|