[Notes] [Git][BuildGrid/buildgrid][raoul/126-bot-reconnects] 2 commits: bot: Add logic to reconnect



Title: GitLab

Raoul Hidalgo Charman pushed to branch raoul/126-bot-reconnects at BuildGrid / buildgrid

Commits:

4 changed files:

Changes:

  • buildgrid/_app/commands/cmd_bot.py
    ... ... @@ -141,13 +141,10 @@ def run_dummy(context):
    141 141
         """
    
    142 142
         Creates a session, accepts leases, does fake work and updates the server.
    
    143 143
         """
    
    144
    -    try:
    
    145
    -        bot_session = session.BotSession(context.parent, context.bot_interface, context.hardware_interface,
    
    146
    -                                         dummy.work_dummy, context)
    
    147
    -        b = bot.Bot(bot_session, context.update_period)
    
    148
    -        b.session()
    
    149
    -    except KeyboardInterrupt:
    
    150
    -        pass
    
    144
    +    bot_session = session.BotSession(context.parent, context.bot_interface, context.hardware_interface,
    
    145
    +                                     dummy.work_dummy, context, context.update_period)
    
    146
    +    b = bot.Bot(bot_session)
    
    147
    +    b.session()
    
    151 148
     
    
    152 149
     
    
    153 150
     @cli.command('host-tools', short_help="Runs commands using the host's tools.")
    
    ... ... @@ -157,13 +154,10 @@ def run_host_tools(context):
    157 154
         Downloads inputs from CAS, runs build commands using host-tools and uploads
    
    158 155
         result back to CAS.
    
    159 156
         """
    
    160
    -    try:
    
    161
    -        bot_session = session.BotSession(context.parent, context.bot_interface, context.hardware_interface,
    
    162
    -                                         host.work_host_tools, context)
    
    163
    -        b = bot.Bot(bot_session, context.update_period)
    
    164
    -        b.session()
    
    165
    -    except KeyboardInterrupt:
    
    166
    -        pass
    
    157
    +    bot_session = session.BotSession(context.parent, context.bot_interface, context.hardware_interface,
    
    158
    +                                     host.work_host_tools, context, context.update_period)
    
    159
    +    b = bot.Bot(bot_session)
    
    160
    +    b.session()
    
    167 161
     
    
    168 162
     
    
    169 163
     @cli.command('buildbox', short_help="Run commands using the BuildBox tool.")
    
    ... ... @@ -179,10 +173,7 @@ def run_buildbox(context, local_cas, fuse_dir):
    179 173
         context.local_cas = local_cas
    
    180 174
         context.fuse_dir = fuse_dir
    
    181 175
     
    
    182
    -    try:
    
    183
    -        bot_session = session.BotSession(context.parent, context.bot_interface, context.hardware_interface,
    
    184
    -                                         buildbox.work_buildbox, context)
    
    185
    -        b = bot.Bot(bot_session, context.update_period)
    
    186
    -        b.session()
    
    187
    -    except KeyboardInterrupt:
    
    188
    -        pass
    176
    +    bot_session = session.BotSession(context.parent, context.bot_interface, context.hardware_interface,
    
    177
    +                                     buildbox.work_buildbox, context, context.update_period)
    
    178
    +    b = bot.Bot(bot_session)
    
    179
    +    b.session()

  • buildgrid/bot/bot.py
    ... ... @@ -20,13 +20,10 @@ import logging
    20 20
     class Bot:
    
    21 21
         """Creates a local BotSession."""
    
    22 22
     
    
    23
    -    def __init__(self, bot_session, update_period=1):
    
    24
    -        """
    
    25
    -        """
    
    23
    +    def __init__(self, bot_session):
    
    26 24
             self.__logger = logging.getLogger(__name__)
    
    27 25
     
    
    28 26
             self.__bot_session = bot_session
    
    29
    -        self.__update_period = update_period
    
    30 27
     
    
    31 28
             self.__loop = None
    
    32 29
     
    
    ... ... @@ -34,28 +31,16 @@ class Bot:
    34 31
             """Will create a session and periodically call the server."""
    
    35 32
     
    
    36 33
             self.__loop = asyncio.get_event_loop()
    
    37
    -        self.__bot_session.create_bot_session()
    
    38 34
     
    
    39 35
             try:
    
    40
    -            task = asyncio.ensure_future(self.__update_bot_session())
    
    36
    +            task = asyncio.ensure_future(self.__bot_session.run())
    
    41 37
                 self.__loop.run_until_complete(task)
    
    42
    -
    
    43 38
             except KeyboardInterrupt:
    
    44 39
                 pass
    
    45 40
     
    
    46 41
             self.__kill_everyone()
    
    47 42
             self.__logger.info("Bot shutdown.")
    
    48 43
     
    
    49
    -    async def __update_bot_session(self):
    
    50
    -        """Calls the server periodically to inform the server the client has not died."""
    
    51
    -        try:
    
    52
    -            while True:
    
    53
    -                self.__bot_session.update_bot_session()
    
    54
    -                await asyncio.sleep(self.__update_period)
    
    55
    -
    
    56
    -        except asyncio.CancelledError:
    
    57
    -            pass
    
    58
    -
    
    59 44
         def __kill_everyone(self):
    
    60 45
             """Cancels and waits for them to stop."""
    
    61 46
             self.__logger.info("Cancelling remaining tasks...")
    

  • buildgrid/bot/interface.py
    ... ... @@ -37,22 +37,25 @@ class BotInterface:
    37 37
             self._stub = bots_pb2_grpc.BotsStub(channel)
    
    38 38
     
    
    39 39
         def create_bot_session(self, parent, bot_session):
    
    40
    +        """ Create bot session request
    
    41
    +        Returns BotSession if correct else a grpc StatusCode
    
    42
    +        """
    
    40 43
             request = bots_pb2.CreateBotSessionRequest(parent=parent,
    
    41 44
                                                        bot_session=bot_session)
    
    42
    -        try:
    
    43
    -            return self._stub.CreateBotSession(request)
    
    44
    -
    
    45
    -        except grpc.RpcError as e:
    
    46
    -            self.__logger.error(e)
    
    47
    -            raise
    
    45
    +        return self._bot_call(self._stub.CreateBotSession, request)
    
    48 46
     
    
    49 47
         def update_bot_session(self, bot_session, update_mask=None):
    
    48
    +        """ Update bot session request
    
    49
    +        Returns BotSession if correct else a grpc StatusCode
    
    50
    +        """
    
    50 51
             request = bots_pb2.UpdateBotSessionRequest(name=bot_session.name,
    
    51 52
                                                        bot_session=bot_session,
    
    52 53
                                                        update_mask=update_mask)
    
    53
    -        try:
    
    54
    -            return self._stub.UpdateBotSession(request)
    
    54
    +        return self._bot_call(self._stub.UpdateBotSession, request)
    
    55 55
     
    
    56
    +    def _bot_call(self, call, request):
    
    57
    +        try:
    
    58
    +            return call(request)
    
    56 59
             except grpc.RpcError as e:
    
    57
    -            self.__logger.error(e)
    
    58
    -            raise
    60
    +            self.__logger.error(e.code())
    
    61
    +            return e.code()

  • buildgrid/bot/session.py
    ... ... @@ -19,8 +19,10 @@ Bot Session
    19 19
     
    
    20 20
     Allows connections
    
    21 21
     """
    
    22
    +import asyncio
    
    22 23
     import logging
    
    23 24
     import platform
    
    25
    +import grpc
    
    24 26
     
    
    25 27
     from buildgrid._enums import BotStatus, LeaseState
    
    26 28
     from buildgrid._protos.google.devtools.remoteworkers.v1test2 import bots_pb2
    
    ... ... @@ -32,7 +34,8 @@ from .tenantmanager import TenantManager
    32 34
     
    
    33 35
     
    
    34 36
     class BotSession:
    
    35
    -    def __init__(self, parent, bots_interface, hardware_interface, work, context=None):
    
    37
    +    def __init__(self, parent, bots_interface, hardware_interface, work,
    
    38
    +                 context=None, update_period=1):
    
    36 39
             """ Unique bot ID within the farm used to identify this bot
    
    37 40
             Needs to be human readable.
    
    38 41
             All prior sessions with bot_id of same ID are invalidated.
    
    ... ... @@ -54,14 +57,37 @@ class BotSession:
    54 57
             self._work = work
    
    55 58
             self._context = context
    
    56 59
     
    
    60
    +        self.__connected = False
    
    61
    +        self.__update_period = update_period
    
    62
    +
    
    57 63
         @property
    
    58 64
         def bot_id(self):
    
    59 65
             return self.__bot_id
    
    60 66
     
    
    67
    +    @property
    
    68
    +    def connected(self):
    
    69
    +        return self.__connected
    
    70
    +
    
    71
    +    async def run(self):
    
    72
    +        try:
    
    73
    +            while True:
    
    74
    +                if not self.connected:
    
    75
    +                    self.create_bot_session()
    
    76
    +                else:
    
    77
    +                    self.update_bot_session()
    
    78
    +
    
    79
    +                await asyncio.sleep(self.__update_period)
    
    80
    +        except asyncio.CancelledError:
    
    81
    +            pass
    
    82
    +
    
    61 83
         def create_bot_session(self):
    
    62 84
             self.__logger.debug("Creating bot session")
    
    63 85
     
    
    64 86
             session = self._bots_interface.create_bot_session(self.__parent, self.get_pb2())
    
    87
    +        if session in list(grpc.StatusCode):
    
    88
    +            self.__connected = False
    
    89
    +            return
    
    90
    +        self.__connected = True
    
    65 91
             self.__name = session.name
    
    66 92
     
    
    67 93
             self.__logger.info("Created bot session with name: [%s]", self.__name)
    
    ... ... @@ -73,6 +99,10 @@ class BotSession:
    73 99
             self.__logger.debug("Updating bot session: [%s]", self.__bot_id)
    
    74 100
     
    
    75 101
             session = self._bots_interface.update_bot_session(self.get_pb2())
    
    102
    +        if session in list(grpc.StatusCode):
    
    103
    +            self.__connected = False
    
    104
    +            return
    
    105
    +        self.__connected = True
    
    76 106
             server_ids = []
    
    77 107
     
    
    78 108
             for lease in session.leases:
    



  • [Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]