Martin Blanchard pushed to branch mablanch/139-emit-build-metrics at BuildGrid / buildgrid
Commits:
- 
e2244954
by Martin Blanchard at 2018-11-14T12:13:10Z
- 
467f13ce
by Martin Blanchard at 2018-11-14T12:13:15Z
- 
6537e098
by Martin Blanchard at 2018-11-14T16:52:59Z
- 
e7446f2e
by Martin Blanchard at 2018-11-15T09:44:40Z
- 
b2df08ec
by Martin Blanchard at 2018-11-15T13:06:36Z
- 
24f70ff3
by Martin Blanchard at 2018-11-15T13:06:57Z
- 
49782b64
by Martin Blanchard at 2018-11-15T13:06:57Z
- 
a43c5625
by Martin Blanchard at 2018-11-15T13:33:02Z
- 
527d993b
by Martin Blanchard at 2018-11-15T13:33:04Z
- 
a7852bbf
by Martin Blanchard at 2018-11-15T13:33:04Z
- 
89168bc7
by Martin Blanchard at 2018-11-15T13:33:04Z
- 
4a509095
by Martin Blanchard at 2018-11-15T13:33:04Z
- 
a99dcb19
by Martin Blanchard at 2018-11-15T15:03:03Z
- 
8c7db057
by Martin Blanchard at 2018-11-15T15:03:20Z
- 
a09a386d
by Martin Blanchard at 2018-11-15T15:03:20Z
- 
97023a78
by Martin Blanchard at 2018-11-15T15:03:20Z
- 
b9a9a841
by Martin Blanchard at 2018-11-15T15:03:20Z
- 
8da1bf57
by Martin Blanchard at 2018-11-15T15:03:20Z
- 
129f3cfd
by Martin Blanchard at 2018-11-15T15:03:42Z
- 
e3e2aa2d
by Martin Blanchard at 2018-11-15T15:03:42Z
- 
7fa63766
by Martin Blanchard at 2018-11-15T15:07:17Z
21 changed files:
- .pylintrc
- buildgrid/_app/cli.py
- buildgrid/_app/commands/cmd_server.py
- buildgrid/_enums.py
- + buildgrid/_protos/buildgrid/__init__.py
- + buildgrid/_protos/buildgrid/v2/__init__.py
- + buildgrid/_protos/buildgrid/v2/monitoring.proto
- + buildgrid/_protos/buildgrid/v2/monitoring_pb2.py
- + buildgrid/_protos/buildgrid/v2/monitoring_pb2_grpc.py
- + buildgrid/server/_monitoring.py
- buildgrid/server/bots/instance.py
- buildgrid/server/bots/service.py
- buildgrid/server/execution/instance.py
- buildgrid/server/execution/service.py
- buildgrid/server/instance.py
- buildgrid/server/job.py
- buildgrid/server/operations/instance.py
- buildgrid/server/operations/service.py
- buildgrid/server/scheduler.py
- buildgrid/settings.py
- setup.py
Changes:
| ... | ... | @@ -460,6 +460,7 @@ known-third-party=boto3, | 
| 460 | 460 |                    enchant,
 | 
| 461 | 461 |                    google,
 | 
| 462 | 462 |                    grpc,
 | 
| 463 | +                  janus,
 | |
| 463 | 464 |                    moto,
 | 
| 464 | 465 |                    yaml
 | 
| 465 | 466 |  | 
| ... | ... | @@ -523,4 +524,4 @@ valid-metaclass-classmethod-first-arg=mcs | 
| 523 | 524 |  | 
| 524 | 525 |  # Exceptions that will emit a warning when being caught. Defaults to
 | 
| 525 | 526 |  # "Exception"
 | 
| 526 | -overgeneral-exceptions=Exception | |
| 527 | +overgeneral-exceptions=Exception | |
| \ No newline at end of file | 
| ... | ... | @@ -27,6 +27,7 @@ import os | 
| 27 | 27 |  import click
 | 
| 28 | 28 |  import grpc
 | 
| 29 | 29 |  | 
| 30 | +from buildgrid.settings import LOG_RECORD_FORMAT
 | |
| 30 | 31 |  from buildgrid.utils import read_file
 | 
| 31 | 32 |  | 
| 32 | 33 |  CONTEXT_SETTINGS = dict(auto_envvar_prefix='BUILDGRID')
 | 
| ... | ... | @@ -138,11 +139,38 @@ class BuildGridCLI(click.MultiCommand): | 
| 138 | 139 |          return mod.cli
 | 
| 139 | 140 |  | 
| 140 | 141 |  | 
| 142 | +class DebugFilter(logging.Filter):
 | |
| 143 | + | |
| 144 | +    def __init__(self, debug_domains, name=''):
 | |
| 145 | +        super().__init__(name=name)
 | |
| 146 | +        self.__domains_tree = {}
 | |
| 147 | + | |
| 148 | +        for domain in debug_domains.split(','):
 | |
| 149 | +            domains_tree = self.__domains_tree
 | |
| 150 | +            for label in domain.split('.'):
 | |
| 151 | +                if label not in domains_tree:
 | |
| 152 | +                    domains_tree[label] = {}
 | |
| 153 | +                domains_tree = domains_tree[label]
 | |
| 154 | + | |
| 155 | +    def filter(self, record):
 | |
| 156 | +        domains_tree = self.__domains_tree
 | |
| 157 | +        for label in record.name.split('.'):
 | |
| 158 | +            if '*' in domains_tree:
 | |
| 159 | +                return True
 | |
| 160 | +            if label not in domains_tree:
 | |
| 161 | +                return False
 | |
| 162 | +            domains_tree = domains_tree[label]
 | |
| 163 | + | |
| 164 | +        return True
 | |
| 165 | + | |
| 166 | + | |
| 141 | 167 |  @click.command(cls=BuildGridCLI, context_settings=CONTEXT_SETTINGS)
 | 
| 168 | +@click.option('--no-print', is_flag=True, show_default=True,
 | |
| 169 | +              help="Do not print log records to stdout/stderr.")
 | |
| 142 | 170 |  @click.option('-v', '--verbose', count=True,
 | 
| 143 | 171 |                help='Increase log verbosity level.')
 | 
| 144 | 172 |  @pass_context
 | 
| 145 | -def cli(context, verbose):
 | |
| 173 | +def cli(context, no_print, verbose):
 | |
| 146 | 174 |      """BuildGrid App"""
 | 
| 147 | 175 |      logger = logging.getLogger()
 | 
| 148 | 176 |  | 
| ... | ... | @@ -152,8 +180,20 @@ def cli(context, verbose): | 
| 152 | 180 |      for log_filter in logger.filters[:]:
 | 
| 153 | 181 |          logger.removeFilter(log_filter)
 | 
| 154 | 182 |  | 
| 155 | -    logging.basicConfig(
 | |
| 156 | -        format='%(asctime)s:%(name)32.32s][%(levelname)5.5s]: %(message)s')
 | |
| 183 | +    # Do not register a stream handler if no-print is requested:
 | |
| 184 | +    if not no_print:
 | |
| 185 | +        log_handler = logging.StreamHandler()
 | |
| 186 | + | |
| 187 | +        # Filter debug messages using BGD_MESSAGE_DEBUG value:
 | |
| 188 | +        debug_domains = os.environ.get('BGD_MESSAGE_DEBUG', None)
 | |
| 189 | +        if debug_domains:
 | |
| 190 | +            log_handler.addFilter(DebugFilter(debug_domains))
 | |
| 191 | + | |
| 192 | +    else:
 | |
| 193 | +        log_handler = logging.NullHandler()
 | |
| 194 | + | |
| 195 | +    logging.basicConfig(format=LOG_RECORD_FORMAT,
 | |
| 196 | +                        handlers=[log_handler])
 | |
| 157 | 197 |  | 
| 158 | 198 |      if verbose == 1:
 | 
| 159 | 199 |          logger.setLevel(logging.WARNING)
 | 
| ... | ... | @@ -161,5 +201,3 @@ def cli(context, verbose): | 
| 161 | 201 |          logger.setLevel(logging.INFO)
 | 
| 162 | 202 |      elif verbose >= 3:
 | 
| 163 | 203 |          logger.setLevel(logging.DEBUG) | 
| 164 | -    else:
 | |
| 165 | -        logger.setLevel(logging.ERROR) | 
| ... | ... | @@ -20,7 +20,6 @@ Server command | 
| 20 | 20 |  Create a BuildGrid server.
 | 
| 21 | 21 |  """
 | 
| 22 | 22 |  | 
| 23 | -import asyncio
 | |
| 24 | 23 |  import logging
 | 
| 25 | 24 |  import sys
 | 
| 26 | 25 |  | 
| ... | ... | @@ -52,18 +51,14 @@ def start(context, config): | 
| 52 | 51 |          click.echo("ERROR: Could not parse config: {}.\n".format(str(e)), err=True)
 | 
| 53 | 52 |          sys.exit(-1)
 | 
| 54 | 53 |  | 
| 55 | -    loop = asyncio.get_event_loop()
 | |
| 56 | 54 |      try:
 | 
| 57 | 55 |          server.start()
 | 
| 58 | -        loop.run_forever()
 | |
| 59 | 56 |  | 
| 60 | 57 |      except KeyboardInterrupt:
 | 
| 61 | 58 |          pass
 | 
| 62 | 59 |  | 
| 63 | 60 |      finally:
 | 
| 64 | -        context.logger.info("Stopping server")
 | |
| 65 | 61 |          server.stop()
 | 
| 66 | -        loop.close()
 | |
| 67 | 62 |  | 
| 68 | 63 |  | 
| 69 | 64 |  def _create_server_from_config(config):
 | 
| ... | ... | @@ -16,9 +16,13 @@ | 
| 16 | 16 |  from enum import Enum
 | 
| 17 | 17 |  | 
| 18 | 18 |  from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
 | 
| 19 | +from buildgrid._protos.buildgrid.v2 import monitoring_pb2
 | |
| 19 | 20 |  from buildgrid._protos.google.devtools.remoteworkers.v1test2 import bots_pb2
 | 
| 20 | 21 |  | 
| 21 | 22 |  | 
| 23 | +# RWAPI enumerations
 | |
| 24 | +# From google/devtools/remoteworkers/v1test2/bots.proto:
 | |
| 25 | + | |
| 22 | 26 |  class BotStatus(Enum):
 | 
| 23 | 27 |      # Initially unknown state.
 | 
| 24 | 28 |      BOT_STATUS_UNSPECIFIED = bots_pb2.BotStatus.Value('BOT_STATUS_UNSPECIFIED')
 | 
| ... | ... | @@ -45,6 +49,9 @@ class LeaseState(Enum): | 
| 45 | 49 |      CANCELLED = bots_pb2.LeaseState.Value('CANCELLED')
 | 
| 46 | 50 |  | 
| 47 | 51 |  | 
| 52 | +# REAPI enumerations
 | |
| 53 | +# From build/bazel/remote/execution/v2/remote_execution.proto:
 | |
| 54 | + | |
| 48 | 55 |  class OperationStage(Enum):
 | 
| 49 | 56 |      # Initially unknown stage.
 | 
| 50 | 57 |      UNKNOWN = remote_execution_pb2.ExecuteOperationMetadata.Stage.Value('UNKNOWN')
 | 
| ... | ... | @@ -56,3 +63,41 @@ class OperationStage(Enum): | 
| 56 | 63 |      EXECUTING = remote_execution_pb2.ExecuteOperationMetadata.Stage.Value('EXECUTING')
 | 
| 57 | 64 |      # Finished execution.
 | 
| 58 | 65 |      COMPLETED = remote_execution_pb2.ExecuteOperationMetadata.Stage.Value('COMPLETED')
 | 
| 66 | + | |
| 67 | + | |
| 68 | +# Internal enumerations
 | |
| 69 | +# From buildgrid.v2/monitoring.proto:
 | |
| 70 | + | |
| 71 | +class LogRecordLevel(Enum):
 | |
| 72 | +    # Initially unknown level.
 | |
| 73 | +    NOTSET = monitoring_pb2.LogRecord.Level.Value('NOTSET')
 | |
| 74 | +    # Debug message severity level.
 | |
| 75 | +    DEBUG = monitoring_pb2.LogRecord.Level.Value('DEBUG')
 | |
| 76 | +    # Information message severity level.
 | |
| 77 | +    INFO = monitoring_pb2.LogRecord.Level.Value('INFO')
 | |
| 78 | +    # Warning message severity level.
 | |
| 79 | +    WARNING = monitoring_pb2.LogRecord.Level.Value('WARNING')
 | |
| 80 | +    # Error message severity level.
 | |
| 81 | +    ERROR = monitoring_pb2.LogRecord.Level.Value('ERROR')
 | |
| 82 | +    # Critical message severity level.
 | |
| 83 | +    CRITICAL = monitoring_pb2.LogRecord.Level.Value('CRITICAL')
 | |
| 84 | + | |
| 85 | + | |
| 86 | +class MetricRecordDomain(Enum):
 | |
| 87 | +    # Initially unknown domain.
 | |
| 88 | +    UNKNOWN = monitoring_pb2.MetricRecord.Domain.Value('UNKNOWN')
 | |
| 89 | +    # A server state related metric.
 | |
| 90 | +    STATE = monitoring_pb2.MetricRecord.Domain.Value('STATE')
 | |
| 91 | +    # A build execution related metric.
 | |
| 92 | +    BUILD = monitoring_pb2.MetricRecord.Domain.Value('BUILD')
 | |
| 93 | + | |
| 94 | + | |
| 95 | +class MetricRecordType(Enum):
 | |
| 96 | +    # Initially unknown type.
 | |
| 97 | +    NONE = monitoring_pb2.MetricRecord.Type.Value('NONE')
 | |
| 98 | +    # A metric for counting.
 | |
| 99 | +    COUNTER = monitoring_pb2.MetricRecord.Type.Value('COUNTER')
 | |
| 100 | +    # A metric for mesuring a duration.
 | |
| 101 | +    TIMER = monitoring_pb2.MetricRecord.Type.Value('TIMER')
 | |
| 102 | +    # A metric in arbitrary value.
 | |
| 103 | +    GAUGE = monitoring_pb2.MetricRecord.Type.Value('GAUGE') | 
| 1 | +// Copyright (C) 2018 Bloomberg LP
 | |
| 2 | +//
 | |
| 3 | +// Licensed under the Apache License, Version 2.0 (the "License");
 | |
| 4 | +// you may not use this file except in compliance with the License.
 | |
| 5 | +// You may obtain a copy of the License at
 | |
| 6 | +//
 | |
| 7 | +//  <http://www.apache.org/licenses/LICENSE-2.0>
 | |
| 8 | +//
 | |
| 9 | +// Unless required by applicable law or agreed to in writing, software
 | |
| 10 | +// distributed under the License is distributed on an "AS IS" BASIS,
 | |
| 11 | +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| 12 | +// See the License for the specific language governing permissions and
 | |
| 13 | +// limitations under the License.
 | |
| 14 | + | |
| 15 | +syntax = "proto3";
 | |
| 16 | + | |
| 17 | +package buildgrid.v2;
 | |
| 18 | + | |
| 19 | +import "google/api/annotations.proto";
 | |
| 20 | +import "google/protobuf/duration.proto";
 | |
| 21 | +import "google/protobuf/timestamp.proto";
 | |
| 22 | + | |
| 23 | +message BusMessage {
 | |
| 24 | +  // The position of this message in the bus stream.
 | |
| 25 | +  int64 sequence_number = 1;
 | |
| 26 | + | |
| 27 | +  // The carried message.
 | |
| 28 | +  oneof record {
 | |
| 29 | +    LogRecord log_record = 2;
 | |
| 30 | +    MetricRecord metric_record = 3;
 | |
| 31 | +  }
 | |
| 32 | +}
 | |
| 33 | + | |
| 34 | +message LogRecord {
 | |
| 35 | +  // When the record has been created.
 | |
| 36 | +  google.protobuf.Timestamp creation_timestamp = 1;
 | |
| 37 | + | |
| 38 | +  enum Level {
 | |
| 39 | +    NOTSET = 0;
 | |
| 40 | +    // Debug message severity level.
 | |
| 41 | +    DEBUG = 1;
 | |
| 42 | +    // Information message severity level.
 | |
| 43 | +    INFO = 2;
 | |
| 44 | +    // Warning message severity level.
 | |
| 45 | +    WARNING = 3;
 | |
| 46 | +    // Error message severity level.
 | |
| 47 | +    ERROR = 4;
 | |
| 48 | +    // Critical message severity level.
 | |
| 49 | +    CRITICAL = 5;
 | |
| 50 | +  }
 | |
| 51 | + | |
| 52 | +  // The domain name for the record.
 | |
| 53 | +  string domain = 2;
 | |
| 54 | + | |
| 55 | +  // The severity level of the record.
 | |
| 56 | +  Level level = 3;
 | |
| 57 | + | |
| 58 | +  // The human-readable record's message.
 | |
| 59 | +  string message = 4;
 | |
| 60 | + | |
| 61 | +  // An optional list of additional metadata.
 | |
| 62 | +  map<string, string> extra = 5;
 | |
| 63 | +}
 | |
| 64 | + | |
| 65 | +message MetricRecord {
 | |
| 66 | +  // When the metric has been created.
 | |
| 67 | +  google.protobuf.Timestamp creation_timestamp = 1;
 | |
| 68 | + | |
| 69 | +  enum Domain {
 | |
| 70 | +    UNKNOWN = 0;
 | |
| 71 | +    // A server state related metric.
 | |
| 72 | +    STATE = 1;
 | |
| 73 | +    // A build execution related metric.
 | |
| 74 | +    BUILD = 2;
 | |
| 75 | +  }
 | |
| 76 | + | |
| 77 | +  // The domain for the record.
 | |
| 78 | +  Domain domain = 2;
 | |
| 79 | + | |
| 80 | +  enum Type {
 | |
| 81 | +    NONE = 0;
 | |
| 82 | +    // A metric for counting.
 | |
| 83 | +    COUNTER = 1;
 | |
| 84 | +    // A metric for mesuring a duration.
 | |
| 85 | +    TIMER = 2;
 | |
| 86 | +    // A metric in arbitrary value.
 | |
| 87 | +    GAUGE = 3;
 | |
| 88 | +  }
 | |
| 89 | + | |
| 90 | +  // The type of metric, see Type.
 | |
| 91 | +  Type type = 3;
 | |
| 92 | + | |
| 93 | +  // The name identifying the metric.
 | |
| 94 | +  string name = 4;
 | |
| 95 | + | |
| 96 | +  // The carried value, depending on the metric's type.
 | |
| 97 | +  oneof data {
 | |
| 98 | +    // Set for Type.COUNTER metrics.
 | |
| 99 | +    int32 count = 5;
 | |
| 100 | +    // Set for Type.TIMER metrics.
 | |
| 101 | +    google.protobuf.Duration duration = 6;
 | |
| 102 | +    // Set for Type.GAUGE metrics.
 | |
| 103 | +    int32 value = 7;
 | |
| 104 | +  }
 | |
| 105 | + | |
| 106 | +  // An optional list of additional metadata.
 | |
| 107 | +  map<string, string> extra = 8;
 | |
| 108 | +} | |
| \ No newline at end of file | 
| 1 | +# Generated by the protocol buffer compiler.  DO NOT EDIT!
 | |
| 2 | +# source: buildgrid/v2/monitoring.proto
 | |
| 3 | + | |
| 4 | +import sys
 | |
| 5 | +_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
 | |
| 6 | +from google.protobuf import descriptor as _descriptor
 | |
| 7 | +from google.protobuf import message as _message
 | |
| 8 | +from google.protobuf import reflection as _reflection
 | |
| 9 | +from google.protobuf import symbol_database as _symbol_database
 | |
| 10 | +# @@protoc_insertion_point(imports)
 | |
| 11 | + | |
| 12 | +_sym_db = _symbol_database.Default()
 | |
| 13 | + | |
| 14 | + | |
| 15 | +from buildgrid._protos.google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2
 | |
| 16 | +from google.protobuf import duration_pb2 as google_dot_protobuf_dot_duration__pb2
 | |
| 17 | +from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2
 | |
| 18 | + | |
| 19 | + | |
| 20 | +DESCRIPTOR = _descriptor.FileDescriptor(
 | |
| 21 | +  name='buildgrid/v2/monitoring.proto',
 | |
| 22 | +  package='buildgrid.v2',
 | |
| 23 | +  syntax='proto3',
 | |
| 24 | +  serialized_options=None,
 | |
| 25 | +  serialized_pb=_b('\n\x1d\x62uildgrid/v2/monitoring.proto\x12\x0c\x62uildgrid.v2\x1a\x1cgoogle/api/annotations.proto\x1a\x1egoogle/protobuf/duration.proto\x1a\x1fgoogle/protobuf/timestamp.proto\"\x93\x01\n\nBusMessage\x12\x17\n\x0fsequence_number\x18\x01 \x01(\x03\x12-\n\nlog_record\x18\x02 \x01(\x0b\x32\x17.buildgrid.v2.LogRecordH\x00\x12\x33\n\rmetric_record\x18\x03 \x01(\x0b\x32\x1a.buildgrid.v2.MetricRecordH\x00\x42\x08\n\x06record\"\xc3\x02\n\tLogRecord\x12\x36\n\x12\x63reation_timestamp\x18\x01 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\x0e\n\x06\x64omain\x18\x02 \x01(\t\x12,\n\x05level\x18\x03 \x01(\x0e\x32\x1d.buildgrid.v2.LogRecord.Level\x12\x0f\n\x07message\x18\x04 \x01(\t\x12\x31\n\x05\x65xtra\x18\x05 \x03(\x0b\x32\".buildgrid.v2.LogRecord.ExtraEntry\x1a,\n\nExtraEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"N\n\x05Level\x12\n\n\x06NOTSET\x10\x00\x12\t\n\x05\x44\x45\x42UG\x10\x01\x12\x08\n\x04INFO\x10\x02\x12\x0b\n\x07WARNING\x10\x03\x12\t\n\x05\x45RROR\x10\x04\x12\x0c\n\x08\x43RITICAL\x10\x05\"\xd5\x03\n\x0cMetricRecord\x12\x36\n\x12\x63reation_timestamp\x18\x01 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\x31\n\x06\x64omain\x18\x02 \x01(\x0e\x32!.buildgrid.v2.MetricRecord.Domain\x12-\n\x04type\x18\x03 \x01(\x0e\x32\x1f.buildgrid.v2.MetricRecord.Type\x12\x0c\n\x04name\x18\x04 \x01(\t\x12\x0f\n\x05\x63ount\x18\x05 \x01(\x05H\x00\x12-\n\x08\x64uration\x18\x06 \x01(\x0b\x32\x19.google.protobuf.DurationH\x00\x12\x0f\n\x05value\x18\x07 \x01(\x05H\x00\x12\x34\n\x05\x65xtra\x18\x08 \x03(\x0b\x32%.buildgrid.v2.MetricRecord.ExtraEntry\x1a,\n\nExtraEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"+\n\x06\x44omain\x12\x0b\n\x07UNKNOWN\x10\x00\x12\t\n\x05STATE\x10\x01\x12\t\n\x05\x42UILD\x10\x02\"3\n\x04Type\x12\x08\n\x04NONE\x10\x00\x12\x0b\n\x07\x43OUNTER\x10\x01\x12\t\n\x05TIMER\x10\x02\x12\t\n\x05GAUGE\x10\x03\x42\x06\n\x04\x64\x61tab\x06proto3')
 | |
| 26 | +  ,
 | |
| 27 | +  dependencies=[google_dot_api_dot_annotations__pb2.DESCRIPTOR,google_dot_protobuf_dot_duration__pb2.DESCRIPTOR,google_dot_protobuf_dot_timestamp__pb2.DESCRIPTOR,])
 | |
| 28 | + | |
| 29 | + | |
| 30 | + | |
| 31 | +_LOGRECORD_LEVEL = _descriptor.EnumDescriptor(
 | |
| 32 | +  name='Level',
 | |
| 33 | +  full_name='buildgrid.v2.LogRecord.Level',
 | |
| 34 | +  filename=None,
 | |
| 35 | +  file=DESCRIPTOR,
 | |
| 36 | +  values=[
 | |
| 37 | +    _descriptor.EnumValueDescriptor(
 | |
| 38 | +      name='NOTSET', index=0, number=0,
 | |
| 39 | +      serialized_options=None,
 | |
| 40 | +      type=None),
 | |
| 41 | +    _descriptor.EnumValueDescriptor(
 | |
| 42 | +      name='DEBUG', index=1, number=1,
 | |
| 43 | +      serialized_options=None,
 | |
| 44 | +      type=None),
 | |
| 45 | +    _descriptor.EnumValueDescriptor(
 | |
| 46 | +      name='INFO', index=2, number=2,
 | |
| 47 | +      serialized_options=None,
 | |
| 48 | +      type=None),
 | |
| 49 | +    _descriptor.EnumValueDescriptor(
 | |
| 50 | +      name='WARNING', index=3, number=3,
 | |
| 51 | +      serialized_options=None,
 | |
| 52 | +      type=None),
 | |
| 53 | +    _descriptor.EnumValueDescriptor(
 | |
| 54 | +      name='ERROR', index=4, number=4,
 | |
| 55 | +      serialized_options=None,
 | |
| 56 | +      type=None),
 | |
| 57 | +    _descriptor.EnumValueDescriptor(
 | |
| 58 | +      name='CRITICAL', index=5, number=5,
 | |
| 59 | +      serialized_options=None,
 | |
| 60 | +      type=None),
 | |
| 61 | +  ],
 | |
| 62 | +  containing_type=None,
 | |
| 63 | +  serialized_options=None,
 | |
| 64 | +  serialized_start=538,
 | |
| 65 | +  serialized_end=616,
 | |
| 66 | +)
 | |
| 67 | +_sym_db.RegisterEnumDescriptor(_LOGRECORD_LEVEL)
 | |
| 68 | + | |
| 69 | +_METRICRECORD_DOMAIN = _descriptor.EnumDescriptor(
 | |
| 70 | +  name='Domain',
 | |
| 71 | +  full_name='buildgrid.v2.MetricRecord.Domain',
 | |
| 72 | +  filename=None,
 | |
| 73 | +  file=DESCRIPTOR,
 | |
| 74 | +  values=[
 | |
| 75 | +    _descriptor.EnumValueDescriptor(
 | |
| 76 | +      name='UNKNOWN', index=0, number=0,
 | |
| 77 | +      serialized_options=None,
 | |
| 78 | +      type=None),
 | |
| 79 | +    _descriptor.EnumValueDescriptor(
 | |
| 80 | +      name='STATE', index=1, number=1,
 | |
| 81 | +      serialized_options=None,
 | |
| 82 | +      type=None),
 | |
| 83 | +    _descriptor.EnumValueDescriptor(
 | |
| 84 | +      name='BUILD', index=2, number=2,
 | |
| 85 | +      serialized_options=None,
 | |
| 86 | +      type=None),
 | |
| 87 | +  ],
 | |
| 88 | +  containing_type=None,
 | |
| 89 | +  serialized_options=None,
 | |
| 90 | +  serialized_start=984,
 | |
| 91 | +  serialized_end=1027,
 | |
| 92 | +)
 | |
| 93 | +_sym_db.RegisterEnumDescriptor(_METRICRECORD_DOMAIN)
 | |
| 94 | + | |
| 95 | +_METRICRECORD_TYPE = _descriptor.EnumDescriptor(
 | |
| 96 | +  name='Type',
 | |
| 97 | +  full_name='buildgrid.v2.MetricRecord.Type',
 | |
| 98 | +  filename=None,
 | |
| 99 | +  file=DESCRIPTOR,
 | |
| 100 | +  values=[
 | |
| 101 | +    _descriptor.EnumValueDescriptor(
 | |
| 102 | +      name='NONE', index=0, number=0,
 | |
| 103 | +      serialized_options=None,
 | |
| 104 | +      type=None),
 | |
| 105 | +    _descriptor.EnumValueDescriptor(
 | |
| 106 | +      name='COUNTER', index=1, number=1,
 | |
| 107 | +      serialized_options=None,
 | |
| 108 | +      type=None),
 | |
| 109 | +    _descriptor.EnumValueDescriptor(
 | |
| 110 | +      name='TIMER', index=2, number=2,
 | |
| 111 | +      serialized_options=None,
 | |
| 112 | +      type=None),
 | |
| 113 | +    _descriptor.EnumValueDescriptor(
 | |
| 114 | +      name='GAUGE', index=3, number=3,
 | |
| 115 | +      serialized_options=None,
 | |
| 116 | +      type=None),
 | |
| 117 | +  ],
 | |
| 118 | +  containing_type=None,
 | |
| 119 | +  serialized_options=None,
 | |
| 120 | +  serialized_start=1029,
 | |
| 121 | +  serialized_end=1080,
 | |
| 122 | +)
 | |
| 123 | +_sym_db.RegisterEnumDescriptor(_METRICRECORD_TYPE)
 | |
| 124 | + | |
| 125 | + | |
| 126 | +_BUSMESSAGE = _descriptor.Descriptor(
 | |
| 127 | +  name='BusMessage',
 | |
| 128 | +  full_name='buildgrid.v2.BusMessage',
 | |
| 129 | +  filename=None,
 | |
| 130 | +  file=DESCRIPTOR,
 | |
| 131 | +  containing_type=None,
 | |
| 132 | +  fields=[
 | |
| 133 | +    _descriptor.FieldDescriptor(
 | |
| 134 | +      name='sequence_number', full_name='buildgrid.v2.BusMessage.sequence_number', index=0,
 | |
| 135 | +      number=1, type=3, cpp_type=2, label=1,
 | |
| 136 | +      has_default_value=False, default_value=0,
 | |
| 137 | +      message_type=None, enum_type=None, containing_type=None,
 | |
| 138 | +      is_extension=False, extension_scope=None,
 | |
| 139 | +      serialized_options=None, file=DESCRIPTOR),
 | |
| 140 | +    _descriptor.FieldDescriptor(
 | |
| 141 | +      name='log_record', full_name='buildgrid.v2.BusMessage.log_record', index=1,
 | |
| 142 | +      number=2, type=11, cpp_type=10, label=1,
 | |
| 143 | +      has_default_value=False, default_value=None,
 | |
| 144 | +      message_type=None, enum_type=None, containing_type=None,
 | |
| 145 | +      is_extension=False, extension_scope=None,
 | |
| 146 | +      serialized_options=None, file=DESCRIPTOR),
 | |
| 147 | +    _descriptor.FieldDescriptor(
 | |
| 148 | +      name='metric_record', full_name='buildgrid.v2.BusMessage.metric_record', index=2,
 | |
| 149 | +      number=3, type=11, cpp_type=10, label=1,
 | |
| 150 | +      has_default_value=False, default_value=None,
 | |
| 151 | +      message_type=None, enum_type=None, containing_type=None,
 | |
| 152 | +      is_extension=False, extension_scope=None,
 | |
| 153 | +      serialized_options=None, file=DESCRIPTOR),
 | |
| 154 | +  ],
 | |
| 155 | +  extensions=[
 | |
| 156 | +  ],
 | |
| 157 | +  nested_types=[],
 | |
| 158 | +  enum_types=[
 | |
| 159 | +  ],
 | |
| 160 | +  serialized_options=None,
 | |
| 161 | +  is_extendable=False,
 | |
| 162 | +  syntax='proto3',
 | |
| 163 | +  extension_ranges=[],
 | |
| 164 | +  oneofs=[
 | |
| 165 | +    _descriptor.OneofDescriptor(
 | |
| 166 | +      name='record', full_name='buildgrid.v2.BusMessage.record',
 | |
| 167 | +      index=0, containing_type=None, fields=[]),
 | |
| 168 | +  ],
 | |
| 169 | +  serialized_start=143,
 | |
| 170 | +  serialized_end=290,
 | |
| 171 | +)
 | |
| 172 | + | |
| 173 | + | |
| 174 | +_LOGRECORD_EXTRAENTRY = _descriptor.Descriptor(
 | |
| 175 | +  name='ExtraEntry',
 | |
| 176 | +  full_name='buildgrid.v2.LogRecord.ExtraEntry',
 | |
| 177 | +  filename=None,
 | |
| 178 | +  file=DESCRIPTOR,
 | |
| 179 | +  containing_type=None,
 | |
| 180 | +  fields=[
 | |
| 181 | +    _descriptor.FieldDescriptor(
 | |
| 182 | +      name='key', full_name='buildgrid.v2.LogRecord.ExtraEntry.key', index=0,
 | |
| 183 | +      number=1, type=9, cpp_type=9, label=1,
 | |
| 184 | +      has_default_value=False, default_value=_b("").decode('utf-8'),
 | |
| 185 | +      message_type=None, enum_type=None, containing_type=None,
 | |
| 186 | +      is_extension=False, extension_scope=None,
 | |
| 187 | +      serialized_options=None, file=DESCRIPTOR),
 | |
| 188 | +    _descriptor.FieldDescriptor(
 | |
| 189 | +      name='value', full_name='buildgrid.v2.LogRecord.ExtraEntry.value', index=1,
 | |
| 190 | +      number=2, type=9, cpp_type=9, label=1,
 | |
| 191 | +      has_default_value=False, default_value=_b("").decode('utf-8'),
 | |
| 192 | +      message_type=None, enum_type=None, containing_type=None,
 | |
| 193 | +      is_extension=False, extension_scope=None,
 | |
| 194 | +      serialized_options=None, file=DESCRIPTOR),
 | |
| 195 | +  ],
 | |
| 196 | +  extensions=[
 | |
| 197 | +  ],
 | |
| 198 | +  nested_types=[],
 | |
| 199 | +  enum_types=[
 | |
| 200 | +  ],
 | |
| 201 | +  serialized_options=_b('8\001'),
 | |
| 202 | +  is_extendable=False,
 | |
| 203 | +  syntax='proto3',
 | |
| 204 | +  extension_ranges=[],
 | |
| 205 | +  oneofs=[
 | |
| 206 | +  ],
 | |
| 207 | +  serialized_start=492,
 | |
| 208 | +  serialized_end=536,
 | |
| 209 | +)
 | |
| 210 | + | |
| 211 | +_LOGRECORD = _descriptor.Descriptor(
 | |
| 212 | +  name='LogRecord',
 | |
| 213 | +  full_name='buildgrid.v2.LogRecord',
 | |
| 214 | +  filename=None,
 | |
| 215 | +  file=DESCRIPTOR,
 | |
| 216 | +  containing_type=None,
 | |
| 217 | +  fields=[
 | |
| 218 | +    _descriptor.FieldDescriptor(
 | |
| 219 | +      name='creation_timestamp', full_name='buildgrid.v2.LogRecord.creation_timestamp', index=0,
 | |
| 220 | +      number=1, type=11, cpp_type=10, label=1,
 | |
| 221 | +      has_default_value=False, default_value=None,
 | |
| 222 | +      message_type=None, enum_type=None, containing_type=None,
 | |
| 223 | +      is_extension=False, extension_scope=None,
 | |
| 224 | +      serialized_options=None, file=DESCRIPTOR),
 | |
| 225 | +    _descriptor.FieldDescriptor(
 | |
| 226 | +      name='domain', full_name='buildgrid.v2.LogRecord.domain', index=1,
 | |
| 227 | +      number=2, type=9, cpp_type=9, label=1,
 | |
| 228 | +      has_default_value=False, default_value=_b("").decode('utf-8'),
 | |
| 229 | +      message_type=None, enum_type=None, containing_type=None,
 | |
| 230 | +      is_extension=False, extension_scope=None,
 | |
| 231 | +      serialized_options=None, file=DESCRIPTOR),
 | |
| 232 | +    _descriptor.FieldDescriptor(
 | |
| 233 | +      name='level', full_name='buildgrid.v2.LogRecord.level', index=2,
 | |
| 234 | +      number=3, type=14, cpp_type=8, label=1,
 | |
| 235 | +      has_default_value=False, default_value=0,
 | |
| 236 | +      message_type=None, enum_type=None, containing_type=None,
 | |
| 237 | +      is_extension=False, extension_scope=None,
 | |
| 238 | +      serialized_options=None, file=DESCRIPTOR),
 | |
| 239 | +    _descriptor.FieldDescriptor(
 | |
| 240 | +      name='message', full_name='buildgrid.v2.LogRecord.message', index=3,
 | |
| 241 | +      number=4, type=9, cpp_type=9, label=1,
 | |
| 242 | +      has_default_value=False, default_value=_b("").decode('utf-8'),
 | |
| 243 | +      message_type=None, enum_type=None, containing_type=None,
 | |
| 244 | +      is_extension=False, extension_scope=None,
 | |
| 245 | +      serialized_options=None, file=DESCRIPTOR),
 | |
| 246 | +    _descriptor.FieldDescriptor(
 | |
| 247 | +      name='extra', full_name='buildgrid.v2.LogRecord.extra', index=4,
 | |
| 248 | +      number=5, type=11, cpp_type=10, label=3,
 | |
| 249 | +      has_default_value=False, default_value=[],
 | |
| 250 | +      message_type=None, enum_type=None, containing_type=None,
 | |
| 251 | +      is_extension=False, extension_scope=None,
 | |
| 252 | +      serialized_options=None, file=DESCRIPTOR),
 | |
| 253 | +  ],
 | |
| 254 | +  extensions=[
 | |
| 255 | +  ],
 | |
| 256 | +  nested_types=[_LOGRECORD_EXTRAENTRY, ],
 | |
| 257 | +  enum_types=[
 | |
| 258 | +    _LOGRECORD_LEVEL,
 | |
| 259 | +  ],
 | |
| 260 | +  serialized_options=None,
 | |
| 261 | +  is_extendable=False,
 | |
| 262 | +  syntax='proto3',
 | |
| 263 | +  extension_ranges=[],
 | |
| 264 | +  oneofs=[
 | |
| 265 | +  ],
 | |
| 266 | +  serialized_start=293,
 | |
| 267 | +  serialized_end=616,
 | |
| 268 | +)
 | |
| 269 | + | |
| 270 | + | |
| 271 | +_METRICRECORD_EXTRAENTRY = _descriptor.Descriptor(
 | |
| 272 | +  name='ExtraEntry',
 | |
| 273 | +  full_name='buildgrid.v2.MetricRecord.ExtraEntry',
 | |
| 274 | +  filename=None,
 | |
| 275 | +  file=DESCRIPTOR,
 | |
| 276 | +  containing_type=None,
 | |
| 277 | +  fields=[
 | |
| 278 | +    _descriptor.FieldDescriptor(
 | |
| 279 | +      name='key', full_name='buildgrid.v2.MetricRecord.ExtraEntry.key', index=0,
 | |
| 280 | +      number=1, type=9, cpp_type=9, label=1,
 | |
| 281 | +      has_default_value=False, default_value=_b("").decode('utf-8'),
 | |
| 282 | +      message_type=None, enum_type=None, containing_type=None,
 | |
| 283 | +      is_extension=False, extension_scope=None,
 | |
| 284 | +      serialized_options=None, file=DESCRIPTOR),
 | |
| 285 | +    _descriptor.FieldDescriptor(
 | |
| 286 | +      name='value', full_name='buildgrid.v2.MetricRecord.ExtraEntry.value', index=1,
 | |
| 287 | +      number=2, type=9, cpp_type=9, label=1,
 | |
| 288 | +      has_default_value=False, default_value=_b("").decode('utf-8'),
 | |
| 289 | +      message_type=None, enum_type=None, containing_type=None,
 | |
| 290 | +      is_extension=False, extension_scope=None,
 | |
| 291 | +      serialized_options=None, file=DESCRIPTOR),
 | |
| 292 | +  ],
 | |
| 293 | +  extensions=[
 | |
| 294 | +  ],
 | |
| 295 | +  nested_types=[],
 | |
| 296 | +  enum_types=[
 | |
| 297 | +  ],
 | |
| 298 | +  serialized_options=_b('8\001'),
 | |
| 299 | +  is_extendable=False,
 | |
| 300 | +  syntax='proto3',
 | |
| 301 | +  extension_ranges=[],
 | |
| 302 | +  oneofs=[
 | |
| 303 | +  ],
 | |
| 304 | +  serialized_start=492,
 | |
| 305 | +  serialized_end=536,
 | |
| 306 | +)
 | |
| 307 | + | |
| 308 | +_METRICRECORD = _descriptor.Descriptor(
 | |
| 309 | +  name='MetricRecord',
 | |
| 310 | +  full_name='buildgrid.v2.MetricRecord',
 | |
| 311 | +  filename=None,
 | |
| 312 | +  file=DESCRIPTOR,
 | |
| 313 | +  containing_type=None,
 | |
| 314 | +  fields=[
 | |
| 315 | +    _descriptor.FieldDescriptor(
 | |
| 316 | +      name='creation_timestamp', full_name='buildgrid.v2.MetricRecord.creation_timestamp', index=0,
 | |
| 317 | +      number=1, type=11, cpp_type=10, label=1,
 | |
| 318 | +      has_default_value=False, default_value=None,
 | |
| 319 | +      message_type=None, enum_type=None, containing_type=None,
 | |
| 320 | +      is_extension=False, extension_scope=None,
 | |
| 321 | +      serialized_options=None, file=DESCRIPTOR),
 | |
| 322 | +    _descriptor.FieldDescriptor(
 | |
| 323 | +      name='domain', full_name='buildgrid.v2.MetricRecord.domain', index=1,
 | |
| 324 | +      number=2, type=14, cpp_type=8, label=1,
 | |
| 325 | +      has_default_value=False, default_value=0,
 | |
| 326 | +      message_type=None, enum_type=None, containing_type=None,
 | |
| 327 | +      is_extension=False, extension_scope=None,
 | |
| 328 | +      serialized_options=None, file=DESCRIPTOR),
 | |
| 329 | +    _descriptor.FieldDescriptor(
 | |
| 330 | +      name='type', full_name='buildgrid.v2.MetricRecord.type', index=2,
 | |
| 331 | +      number=3, type=14, cpp_type=8, label=1,
 | |
| 332 | +      has_default_value=False, default_value=0,
 | |
| 333 | +      message_type=None, enum_type=None, containing_type=None,
 | |
| 334 | +      is_extension=False, extension_scope=None,
 | |
| 335 | +      serialized_options=None, file=DESCRIPTOR),
 | |
| 336 | +    _descriptor.FieldDescriptor(
 | |
| 337 | +      name='name', full_name='buildgrid.v2.MetricRecord.name', index=3,
 | |
| 338 | +      number=4, type=9, cpp_type=9, label=1,
 | |
| 339 | +      has_default_value=False, default_value=_b("").decode('utf-8'),
 | |
| 340 | +      message_type=None, enum_type=None, containing_type=None,
 | |
| 341 | +      is_extension=False, extension_scope=None,
 | |
| 342 | +      serialized_options=None, file=DESCRIPTOR),
 | |
| 343 | +    _descriptor.FieldDescriptor(
 | |
| 344 | +      name='count', full_name='buildgrid.v2.MetricRecord.count', index=4,
 | |
| 345 | +      number=5, type=5, cpp_type=1, label=1,
 | |
| 346 | +      has_default_value=False, default_value=0,
 | |
| 347 | +      message_type=None, enum_type=None, containing_type=None,
 | |
| 348 | +      is_extension=False, extension_scope=None,
 | |
| 349 | +      serialized_options=None, file=DESCRIPTOR),
 | |
| 350 | +    _descriptor.FieldDescriptor(
 | |
| 351 | +      name='duration', full_name='buildgrid.v2.MetricRecord.duration', index=5,
 | |
| 352 | +      number=6, type=11, cpp_type=10, label=1,
 | |
| 353 | +      has_default_value=False, default_value=None,
 | |
| 354 | +      message_type=None, enum_type=None, containing_type=None,
 | |
| 355 | +      is_extension=False, extension_scope=None,
 | |
| 356 | +      serialized_options=None, file=DESCRIPTOR),
 | |
| 357 | +    _descriptor.FieldDescriptor(
 | |
| 358 | +      name='value', full_name='buildgrid.v2.MetricRecord.value', index=6,
 | |
| 359 | +      number=7, type=5, cpp_type=1, label=1,
 | |
| 360 | +      has_default_value=False, default_value=0,
 | |
| 361 | +      message_type=None, enum_type=None, containing_type=None,
 | |
| 362 | +      is_extension=False, extension_scope=None,
 | |
| 363 | +      serialized_options=None, file=DESCRIPTOR),
 | |
| 364 | +    _descriptor.FieldDescriptor(
 | |
| 365 | +      name='extra', full_name='buildgrid.v2.MetricRecord.extra', index=7,
 | |
| 366 | +      number=8, type=11, cpp_type=10, label=3,
 | |
| 367 | +      has_default_value=False, default_value=[],
 | |
| 368 | +      message_type=None, enum_type=None, containing_type=None,
 | |
| 369 | +      is_extension=False, extension_scope=None,
 | |
| 370 | +      serialized_options=None, file=DESCRIPTOR),
 | |
| 371 | +  ],
 | |
| 372 | +  extensions=[
 | |
| 373 | +  ],
 | |
| 374 | +  nested_types=[_METRICRECORD_EXTRAENTRY, ],
 | |
| 375 | +  enum_types=[
 | |
| 376 | +    _METRICRECORD_DOMAIN,
 | |
| 377 | +    _METRICRECORD_TYPE,
 | |
| 378 | +  ],
 | |
| 379 | +  serialized_options=None,
 | |
| 380 | +  is_extendable=False,
 | |
| 381 | +  syntax='proto3',
 | |
| 382 | +  extension_ranges=[],
 | |
| 383 | +  oneofs=[
 | |
| 384 | +    _descriptor.OneofDescriptor(
 | |
| 385 | +      name='data', full_name='buildgrid.v2.MetricRecord.data',
 | |
| 386 | +      index=0, containing_type=None, fields=[]),
 | |
| 387 | +  ],
 | |
| 388 | +  serialized_start=619,
 | |
| 389 | +  serialized_end=1088,
 | |
| 390 | +)
 | |
| 391 | + | |
| 392 | +_BUSMESSAGE.fields_by_name['log_record'].message_type = _LOGRECORD
 | |
| 393 | +_BUSMESSAGE.fields_by_name['metric_record'].message_type = _METRICRECORD
 | |
| 394 | +_BUSMESSAGE.oneofs_by_name['record'].fields.append(
 | |
| 395 | +  _BUSMESSAGE.fields_by_name['log_record'])
 | |
| 396 | +_BUSMESSAGE.fields_by_name['log_record'].containing_oneof = _BUSMESSAGE.oneofs_by_name['record']
 | |
| 397 | +_BUSMESSAGE.oneofs_by_name['record'].fields.append(
 | |
| 398 | +  _BUSMESSAGE.fields_by_name['metric_record'])
 | |
| 399 | +_BUSMESSAGE.fields_by_name['metric_record'].containing_oneof = _BUSMESSAGE.oneofs_by_name['record']
 | |
| 400 | +_LOGRECORD_EXTRAENTRY.containing_type = _LOGRECORD
 | |
| 401 | +_LOGRECORD.fields_by_name['creation_timestamp'].message_type = google_dot_protobuf_dot_timestamp__pb2._TIMESTAMP
 | |
| 402 | +_LOGRECORD.fields_by_name['level'].enum_type = _LOGRECORD_LEVEL
 | |
| 403 | +_LOGRECORD.fields_by_name['extra'].message_type = _LOGRECORD_EXTRAENTRY
 | |
| 404 | +_LOGRECORD_LEVEL.containing_type = _LOGRECORD
 | |
| 405 | +_METRICRECORD_EXTRAENTRY.containing_type = _METRICRECORD
 | |
| 406 | +_METRICRECORD.fields_by_name['creation_timestamp'].message_type = google_dot_protobuf_dot_timestamp__pb2._TIMESTAMP
 | |
| 407 | +_METRICRECORD.fields_by_name['domain'].enum_type = _METRICRECORD_DOMAIN
 | |
| 408 | +_METRICRECORD.fields_by_name['type'].enum_type = _METRICRECORD_TYPE
 | |
| 409 | +_METRICRECORD.fields_by_name['duration'].message_type = google_dot_protobuf_dot_duration__pb2._DURATION
 | |
| 410 | +_METRICRECORD.fields_by_name['extra'].message_type = _METRICRECORD_EXTRAENTRY
 | |
| 411 | +_METRICRECORD_DOMAIN.containing_type = _METRICRECORD
 | |
| 412 | +_METRICRECORD_TYPE.containing_type = _METRICRECORD
 | |
| 413 | +_METRICRECORD.oneofs_by_name['data'].fields.append(
 | |
| 414 | +  _METRICRECORD.fields_by_name['count'])
 | |
| 415 | +_METRICRECORD.fields_by_name['count'].containing_oneof = _METRICRECORD.oneofs_by_name['data']
 | |
| 416 | +_METRICRECORD.oneofs_by_name['data'].fields.append(
 | |
| 417 | +  _METRICRECORD.fields_by_name['duration'])
 | |
| 418 | +_METRICRECORD.fields_by_name['duration'].containing_oneof = _METRICRECORD.oneofs_by_name['data']
 | |
| 419 | +_METRICRECORD.oneofs_by_name['data'].fields.append(
 | |
| 420 | +  _METRICRECORD.fields_by_name['value'])
 | |
| 421 | +_METRICRECORD.fields_by_name['value'].containing_oneof = _METRICRECORD.oneofs_by_name['data']
 | |
| 422 | +DESCRIPTOR.message_types_by_name['BusMessage'] = _BUSMESSAGE
 | |
| 423 | +DESCRIPTOR.message_types_by_name['LogRecord'] = _LOGRECORD
 | |
| 424 | +DESCRIPTOR.message_types_by_name['MetricRecord'] = _METRICRECORD
 | |
| 425 | +_sym_db.RegisterFileDescriptor(DESCRIPTOR)
 | |
| 426 | + | |
| 427 | +BusMessage = _reflection.GeneratedProtocolMessageType('BusMessage', (_message.Message,), dict(
 | |
| 428 | +  DESCRIPTOR = _BUSMESSAGE,
 | |
| 429 | +  __module__ = 'buildgrid.v2.monitoring_pb2'
 | |
| 430 | +  # @@protoc_insertion_point(class_scope:buildgrid.v2.BusMessage)
 | |
| 431 | +  ))
 | |
| 432 | +_sym_db.RegisterMessage(BusMessage)
 | |
| 433 | + | |
| 434 | +LogRecord = _reflection.GeneratedProtocolMessageType('LogRecord', (_message.Message,), dict(
 | |
| 435 | + | |
| 436 | +  ExtraEntry = _reflection.GeneratedProtocolMessageType('ExtraEntry', (_message.Message,), dict(
 | |
| 437 | +    DESCRIPTOR = _LOGRECORD_EXTRAENTRY,
 | |
| 438 | +    __module__ = 'buildgrid.v2.monitoring_pb2'
 | |
| 439 | +    # @@protoc_insertion_point(class_scope:buildgrid.v2.LogRecord.ExtraEntry)
 | |
| 440 | +    ))
 | |
| 441 | +  ,
 | |
| 442 | +  DESCRIPTOR = _LOGRECORD,
 | |
| 443 | +  __module__ = 'buildgrid.v2.monitoring_pb2'
 | |
| 444 | +  # @@protoc_insertion_point(class_scope:buildgrid.v2.LogRecord)
 | |
| 445 | +  ))
 | |
| 446 | +_sym_db.RegisterMessage(LogRecord)
 | |
| 447 | +_sym_db.RegisterMessage(LogRecord.ExtraEntry)
 | |
| 448 | + | |
| 449 | +MetricRecord = _reflection.GeneratedProtocolMessageType('MetricRecord', (_message.Message,), dict(
 | |
| 450 | + | |
| 451 | +  ExtraEntry = _reflection.GeneratedProtocolMessageType('ExtraEntry', (_message.Message,), dict(
 | |
| 452 | +    DESCRIPTOR = _METRICRECORD_EXTRAENTRY,
 | |
| 453 | +    __module__ = 'buildgrid.v2.monitoring_pb2'
 | |
| 454 | +    # @@protoc_insertion_point(class_scope:buildgrid.v2.MetricRecord.ExtraEntry)
 | |
| 455 | +    ))
 | |
| 456 | +  ,
 | |
| 457 | +  DESCRIPTOR = _METRICRECORD,
 | |
| 458 | +  __module__ = 'buildgrid.v2.monitoring_pb2'
 | |
| 459 | +  # @@protoc_insertion_point(class_scope:buildgrid.v2.MetricRecord)
 | |
| 460 | +  ))
 | |
| 461 | +_sym_db.RegisterMessage(MetricRecord)
 | |
| 462 | +_sym_db.RegisterMessage(MetricRecord.ExtraEntry)
 | |
| 463 | + | |
| 464 | + | |
| 465 | +_LOGRECORD_EXTRAENTRY._options = None
 | |
| 466 | +_METRICRECORD_EXTRAENTRY._options = None
 | |
| 467 | +# @@protoc_insertion_point(module_scope) | 
| 1 | +# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
 | |
| 2 | +import grpc
 | |
| 3 | + | 
| 1 | +# Copyright (C) 2018 Bloomberg LP
 | |
| 2 | +#
 | |
| 3 | +# Licensed under the Apache License, Version 2.0 (the "License");
 | |
| 4 | +# you may not use this file except in compliance with the License.
 | |
| 5 | +# You may obtain a copy of the License at
 | |
| 6 | +#
 | |
| 7 | +#  <http://www.apache.org/licenses/LICENSE-2.0>
 | |
| 8 | +#
 | |
| 9 | +# Unless required by applicable law or agreed to in writing, software
 | |
| 10 | +# distributed under the License is distributed on an "AS IS" BASIS,
 | |
| 11 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| 12 | +# See the License for the specific language governing permissions and
 | |
| 13 | +# limitations under the License.
 | |
| 14 | + | |
| 15 | + | |
| 16 | +import asyncio
 | |
| 17 | + | |
| 18 | +from google.protobuf import json_format
 | |
| 19 | + | |
| 20 | +from buildgrid._protos.buildgrid.v2 import monitoring_pb2
 | |
| 21 | + | |
| 22 | + | |
| 23 | +class MonitoringBus:
 | |
| 24 | + | |
| 25 | +    def __init__(self, loop):
 | |
| 26 | +        self.__event_loop = loop
 | |
| 27 | +        self.__streaming_task = None
 | |
| 28 | + | |
| 29 | +        self.__message_queue = asyncio.Queue(loop=loop)
 | |
| 30 | +        self.__sequence_number = 1
 | |
| 31 | + | |
| 32 | +    # --- Public API ---
 | |
| 33 | + | |
| 34 | +    def start(self):
 | |
| 35 | +        """Starts the monitoring bus worker task."""
 | |
| 36 | +        self.__streaming_task = asyncio.ensure_future(
 | |
| 37 | +            self._streaming_worker(), loop=self.__event_loop)
 | |
| 38 | + | |
| 39 | +    def stop(self):
 | |
| 40 | +        """Cancels the monitoring bus worker task."""
 | |
| 41 | +        if self.__streaming_task is not None:
 | |
| 42 | +            self.__streaming_task.cancel()
 | |
| 43 | + | |
| 44 | +    async def send_record(self, record):
 | |
| 45 | +        """Publishes a record onto the bus asynchronously.
 | |
| 46 | + | |
| 47 | +        Args:
 | |
| 48 | +            record (Message): The
 | |
| 49 | +        """
 | |
| 50 | +        await self.__message_queue.put(record)
 | |
| 51 | + | |
| 52 | +    def send_record_nowait(self, record):
 | |
| 53 | +        """Publishes a record onto the bus.
 | |
| 54 | + | |
| 55 | +        Args:
 | |
| 56 | +            record (Message): The
 | |
| 57 | +        """
 | |
| 58 | +        self.__message_queue.put_nowait(record)
 | |
| 59 | + | |
| 60 | +    # --- Private API ---
 | |
| 61 | + | |
| 62 | +    async def _streaming_worker(self):
 | |
| 63 | +        """Handles bus messages steaming work."""
 | |
| 64 | +        async def __streaming_worker():
 | |
| 65 | +            record = await self.__message_queue.get()
 | |
| 66 | + | |
| 67 | +            message = monitoring_pb2.BusMessage()
 | |
| 68 | +            message.sequence_number = self.__sequence_number
 | |
| 69 | + | |
| 70 | +            if record.DESCRIPTOR is monitoring_pb2.LogRecord.DESCRIPTOR:
 | |
| 71 | +                message.log_record.CopyFrom(record)
 | |
| 72 | + | |
| 73 | +            elif record.DESCRIPTOR is monitoring_pb2.MetricRecord.DESCRIPTOR:
 | |
| 74 | +                message.metric_record.CopyFrom(record)
 | |
| 75 | + | |
| 76 | +            else:
 | |
| 77 | +                return False
 | |
| 78 | + | |
| 79 | +            # print(json_format.MessageToJson(message))
 | |
| 80 | + | |
| 81 | +            return True
 | |
| 82 | + | |
| 83 | +        try:
 | |
| 84 | +            while True:
 | |
| 85 | +                if await __streaming_worker():
 | |
| 86 | +                    self.__sequence_number += 1
 | |
| 87 | + | |
| 88 | +        except asyncio.CancelledError:
 | |
| 89 | +            pass | 
| ... | ... | @@ -37,6 +37,10 @@ class BotsInterface: | 
| 37 | 37 |          self._bot_sessions = {}
 | 
| 38 | 38 |          self._scheduler = scheduler
 | 
| 39 | 39 |  | 
| 40 | +    @property
 | |
| 41 | +    def scheduler(self):
 | |
| 42 | +        return self._scheduler
 | |
| 43 | + | |
| 40 | 44 |      def register_instance_with_server(self, instance_name, server):
 | 
| 41 | 45 |          server.add_bots_interface(self, instance_name)
 | 
| 42 | 46 |  | 
| ... | ... | @@ -23,8 +23,9 @@ import logging | 
| 23 | 23 |  | 
| 24 | 24 |  import grpc
 | 
| 25 | 25 |  | 
| 26 | -from google.protobuf.empty_pb2 import Empty
 | |
| 26 | +from google.protobuf import empty_pb2, timestamp_pb2
 | |
| 27 | 27 |  | 
| 28 | +from buildgrid._enums import BotStatus
 | |
| 28 | 29 |  from buildgrid._exceptions import InvalidArgumentError, OutOfSyncError
 | 
| 29 | 30 |  from buildgrid._protos.google.devtools.remoteworkers.v1test2 import bots_pb2
 | 
| 30 | 31 |  from buildgrid._protos.google.devtools.remoteworkers.v1test2 import bots_pb2_grpc
 | 
| ... | ... | @@ -32,24 +33,87 @@ from buildgrid._protos.google.devtools.remoteworkers.v1test2 import bots_pb2_grp | 
| 32 | 33 |  | 
| 33 | 34 |  class BotsService(bots_pb2_grpc.BotsServicer):
 | 
| 34 | 35 |  | 
| 35 | -    def __init__(self, server):
 | |
| 36 | +    def __init__(self, server, monitor=True):
 | |
| 36 | 37 |          self.__logger = logging.getLogger(__name__)
 | 
| 37 | 38 |  | 
| 39 | +        self.__bots_by_status = None
 | |
| 40 | +        self.__bots_by_instance = None
 | |
| 41 | +        self.__bots = None
 | |
| 42 | + | |
| 38 | 43 |          self._instances = {}
 | 
| 39 | 44 |  | 
| 40 | 45 |          bots_pb2_grpc.add_BotsServicer_to_server(self, server)
 | 
| 41 | 46 |  | 
| 42 | -    def add_instance(self, name, instance):
 | |
| 43 | -        self._instances[name] = instance
 | |
| 47 | +        self._is_instrumented = monitor
 | |
| 48 | + | |
| 49 | +        if self._is_instrumented:
 | |
| 50 | +            self.__bots_by_status = {}
 | |
| 51 | +            self.__bots_by_instance = {}
 | |
| 52 | +            self.__bots = {}
 | |
| 53 | + | |
| 54 | +            self.__bots_by_status[BotStatus.OK] = set()
 | |
| 55 | +            self.__bots_by_status[BotStatus.UNHEALTHY] = set()
 | |
| 56 | +            self.__bots_by_status[BotStatus.HOST_REBOOTING] = set()
 | |
| 57 | +            self.__bots_by_status[BotStatus.BOT_TERMINATING] = set()
 | |
| 58 | + | |
| 59 | +    # --- Public API ---
 | |
| 60 | + | |
| 61 | +    def add_instance(self, instance_name, instance):
 | |
| 62 | +        """Registers a new servicer instance.
 | |
| 63 | + | |
| 64 | +        Args:
 | |
| 65 | +            instance_name (str): The new instance's name.
 | |
| 66 | +            instance (BotsInterface): The new instance itself.
 | |
| 67 | +        """
 | |
| 68 | +        self._instances[instance_name] = instance
 | |
| 69 | + | |
| 70 | +        if self._is_instrumented:
 | |
| 71 | +            self.__bots_by_instance[instance_name] = 0
 | |
| 72 | + | |
| 73 | +    def get_scheduler(self, instance_name):
 | |
| 74 | +        """Retrieves a reference to the scheduler for an instance.
 | |
| 75 | + | |
| 76 | +        Args:
 | |
| 77 | +            instance_name (str): The name of the instance to query.
 | |
| 78 | + | |
| 79 | +        Returns:
 | |
| 80 | +            Scheduler: A reference to the scheduler for `instance_name`.
 | |
| 81 | + | |
| 82 | +        Raises:
 | |
| 83 | +            InvalidArgumentError: If no instance named `instance_name` exists.
 | |
| 84 | +        """
 | |
| 85 | +        instance = self._get_instance(instance_name)
 | |
| 86 | + | |
| 87 | +        return instance.scheduler
 | |
| 88 | + | |
| 89 | +    # --- Public API: Servicer ---
 | |
| 44 | 90 |  | 
| 45 | 91 |      def CreateBotSession(self, request, context):
 | 
| 92 | +        """Handles CreateBotSessionRequest messages.
 | |
| 93 | + | |
| 94 | +        Args:
 | |
| 95 | +            request (CreateBotSessionRequest): The incoming RPC request.
 | |
| 96 | +            context (grpc.ServicerContext): Context for the RPC call.
 | |
| 97 | +        """
 | |
| 46 | 98 |          self.__logger.debug("CreateBotSession request from [%s]", context.peer())
 | 
| 47 | 99 |  | 
| 100 | +        instance_name = request.parent
 | |
| 101 | +        bot_status = BotStatus(request.bot_session.status)
 | |
| 102 | +        bot_id = request.bot_session.bot_id
 | |
| 103 | + | |
| 48 | 104 |          try:
 | 
| 49 | -            parent = request.parent
 | |
| 50 | -            instance = self._get_instance(request.parent)
 | |
| 51 | -            return instance.create_bot_session(parent,
 | |
| 52 | -                                               request.bot_session)
 | |
| 105 | +            instance = self._get_instance(instance_name)
 | |
| 106 | +            bot_session = instance.create_bot_session(instance_name,
 | |
| 107 | +                                                      request.bot_session)
 | |
| 108 | +            now = timestamp_pb2.Timestamp()
 | |
| 109 | +            now.GetCurrentTime()
 | |
| 110 | + | |
| 111 | +            if self._is_instrumented:
 | |
| 112 | +                self.__bots[bot_id] = now
 | |
| 113 | +                self.__bots_by_instance[instance_name] += 1
 | |
| 114 | +                self.__bots_by_status[bot_status].add(bot_id)
 | |
| 115 | + | |
| 116 | +            return bot_session
 | |
| 53 | 117 |  | 
| 54 | 118 |          except InvalidArgumentError as e:
 | 
| 55 | 119 |              self.__logger.error(e)
 | 
| ... | ... | @@ -59,17 +123,36 @@ class BotsService(bots_pb2_grpc.BotsServicer): | 
| 59 | 123 |          return bots_pb2.BotSession()
 | 
| 60 | 124 |  | 
| 61 | 125 |      def UpdateBotSession(self, request, context):
 | 
| 126 | +        """Handles UpdateBotSessionRequest messages.
 | |
| 127 | + | |
| 128 | +        Args:
 | |
| 129 | +            request (UpdateBotSessionRequest): The incoming RPC request.
 | |
| 130 | +            context (grpc.ServicerContext): Context for the RPC call.
 | |
| 131 | +        """
 | |
| 62 | 132 |          self.__logger.debug("UpdateBotSession request from [%s]", context.peer())
 | 
| 63 | 133 |  | 
| 134 | +        names = request.name.split("/")
 | |
| 135 | +        bot_status = BotStatus(request.bot_session.status)
 | |
| 136 | +        bot_id = request.bot_session.bot_id
 | |
| 137 | + | |
| 64 | 138 |          try:
 | 
| 65 | -            names = request.name.split("/")
 | |
| 66 | -            # Operation name should be in format:
 | |
| 67 | -            # {instance/name}/{uuid}
 | |
| 68 | -            instance_name = ''.join(names[0:-1])
 | |
| 139 | +            instance_name = '/'.join(names[:-1])
 | |
| 69 | 140 |  | 
| 70 | 141 |              instance = self._get_instance(instance_name)
 | 
| 71 | -            return instance.update_bot_session(request.name,
 | |
| 72 | -                                               request.bot_session)
 | |
| 142 | +            bot_session = instance.update_bot_session(request.name,
 | |
| 143 | +                                                      request.bot_session)
 | |
| 144 | + | |
| 145 | +            if self._is_instrumented:
 | |
| 146 | +                self.__bots[bot_id].GetCurrentTime()
 | |
| 147 | +                if bot_id not in self.__bots_by_status[bot_status]:
 | |
| 148 | +                    self.__bots_by_status[BotStatus.OK].discard(bot_id)
 | |
| 149 | +                    self.__bots_by_status[BotStatus.UNHEALTHY].discard(bot_id)
 | |
| 150 | +                    self.__bots_by_status[BotStatus.HOST_REBOOTING].discard(bot_id)
 | |
| 151 | +                    self.__bots_by_status[BotStatus.BOT_TERMINATING].discard(bot_id)
 | |
| 152 | + | |
| 153 | +                    self.__bots_by_status[bot_status].add(bot_id)
 | |
| 154 | + | |
| 155 | +            return bot_session
 | |
| 73 | 156 |  | 
| 74 | 157 |          except InvalidArgumentError as e:
 | 
| 75 | 158 |              self.__logger.error(e)
 | 
| ... | ... | @@ -89,10 +172,46 @@ class BotsService(bots_pb2_grpc.BotsServicer): | 
| 89 | 172 |          return bots_pb2.BotSession()
 | 
| 90 | 173 |  | 
| 91 | 174 |      def PostBotEventTemp(self, request, context):
 | 
| 175 | +        """Handles PostBotEventTempRequest messages.
 | |
| 176 | + | |
| 177 | +        Args:
 | |
| 178 | +            request (PostBotEventTempRequest): The incoming RPC request.
 | |
| 179 | +            context (grpc.ServicerContext): Context for the RPC call.
 | |
| 180 | +        """
 | |
| 92 | 181 |          self.__logger.debug("PostBotEventTemp request from [%s]", context.peer())
 | 
| 93 | 182 |  | 
| 94 | 183 |          context.set_code(grpc.StatusCode.UNIMPLEMENTED)
 | 
| 95 | -        return Empty()
 | |
| 184 | + | |
| 185 | +        return empty_pb2.Empty()
 | |
| 186 | + | |
| 187 | +    # --- Public API: Monitoring ---
 | |
| 188 | + | |
| 189 | +    @property
 | |
| 190 | +    def is_instrumented(self):
 | |
| 191 | +        return self._is_instrumented
 | |
| 192 | + | |
| 193 | +    def query_n_bots(self):
 | |
| 194 | +        if self.__bots is not None:
 | |
| 195 | +            return len(self.__bots)
 | |
| 196 | +        return 0
 | |
| 197 | + | |
| 198 | +    def query_n_bots_for_instance(self, instance_name):
 | |
| 199 | +        try:
 | |
| 200 | +            if self.__bots_by_instance is not None:
 | |
| 201 | +                return self.__bots_by_instance[instance_name]
 | |
| 202 | +        except KeyError:
 | |
| 203 | +            pass
 | |
| 204 | +        return 0
 | |
| 205 | + | |
| 206 | +    def query_n_bots_for_status(self, bot_status):
 | |
| 207 | +        try:
 | |
| 208 | +            if self.__bots_by_status is not None:
 | |
| 209 | +                return len(self.__bots_by_status[bot_status])
 | |
| 210 | +        except KeyError:
 | |
| 211 | +            pass
 | |
| 212 | +        return 0
 | |
| 213 | + | |
| 214 | +    # --- Private API ---
 | |
| 96 | 215 |  | 
| 97 | 216 |      def _get_instance(self, name):
 | 
| 98 | 217 |          try:
 | 
| ... | ... | @@ -35,6 +35,10 @@ class ExecutionInstance: | 
| 35 | 35 |          self._storage = storage
 | 
| 36 | 36 |          self._scheduler = scheduler
 | 
| 37 | 37 |  | 
| 38 | +    @property
 | |
| 39 | +    def scheduler(self):
 | |
| 40 | +        return self._scheduler
 | |
| 41 | + | |
| 38 | 42 |      def register_instance_with_server(self, instance_name, server):
 | 
| 39 | 43 |          server.add_execution_instance(self, instance_name)
 | 
| 40 | 44 |  | 
| ... | ... | @@ -33,30 +33,84 @@ from buildgrid._protos.google.longrunning import operations_pb2 | 
| 33 | 33 |  | 
| 34 | 34 |  class ExecutionService(remote_execution_pb2_grpc.ExecutionServicer):
 | 
| 35 | 35 |  | 
| 36 | -    def __init__(self, server):
 | |
| 36 | +    def __init__(self, server, monitor=True):
 | |
| 37 | 37 |          self.__logger = logging.getLogger(__name__)
 | 
| 38 | 38 |  | 
| 39 | +        self.__peers_by_instance = None
 | |
| 40 | +        self.__peers = None
 | |
| 41 | + | |
| 39 | 42 |          self._instances = {}
 | 
| 43 | + | |
| 40 | 44 |          remote_execution_pb2_grpc.add_ExecutionServicer_to_server(self, server)
 | 
| 41 | 45 |  | 
| 42 | -    def add_instance(self, name, instance):
 | |
| 43 | -        self._instances[name] = instance
 | |
| 46 | +        self._is_instrumented = monitor
 | |
| 47 | + | |
| 48 | +        if self._is_instrumented:
 | |
| 49 | +            self.__peers_by_instance = {}
 | |
| 50 | +            self.__peers = {}
 | |
| 51 | + | |
| 52 | +    # --- Public API ---
 | |
| 53 | + | |
| 54 | +    def add_instance(self, instance_name, instance):
 | |
| 55 | +        """Registers a new servicer instance.
 | |
| 56 | + | |
| 57 | +        Args:
 | |
| 58 | +            instance_name (str): The new instance's name.
 | |
| 59 | +            instance (ExecutionInstance): The new instance itself.
 | |
| 60 | +        """
 | |
| 61 | +        self._instances[instance_name] = instance
 | |
| 62 | + | |
| 63 | +        if self._is_instrumented:
 | |
| 64 | +            self.__peers_by_instance[instance_name] = set()
 | |
| 65 | + | |
| 66 | +    def get_scheduler(self, instance_name):
 | |
| 67 | +        """Retrieves a reference to the scheduler for an instance.
 | |
| 68 | + | |
| 69 | +        Args:
 | |
| 70 | +            instance_name (str): The name of the instance to query.
 | |
| 71 | + | |
| 72 | +        Returns:
 | |
| 73 | +            Scheduler: A reference to the scheduler for `instance_name`.
 | |
| 74 | + | |
| 75 | +        Raises:
 | |
| 76 | +            InvalidArgumentError: If no instance named `instance_name` exists.
 | |
| 77 | +        """
 | |
| 78 | +        instance = self._get_instance(instance_name)
 | |
| 79 | + | |
| 80 | +        return instance.scheduler
 | |
| 81 | + | |
| 82 | +    # --- Public API: Servicer ---
 | |
| 44 | 83 |  | 
| 45 | 84 |      def Execute(self, request, context):
 | 
| 85 | +        """Handles ExecuteRequest messages.
 | |
| 86 | + | |
| 87 | +        Args:
 | |
| 88 | +            request (ExecuteRequest): The incoming RPC request.
 | |
| 89 | +            context (grpc.ServicerContext): Context for the RPC call.
 | |
| 90 | +        """
 | |
| 46 | 91 |          self.__logger.debug("Execute request from [%s]", context.peer())
 | 
| 47 | 92 |  | 
| 93 | +        instance_name = request.instance_name
 | |
| 94 | +        message_queue = queue.Queue()
 | |
| 95 | +        peer = context.peer()
 | |
| 96 | + | |
| 48 | 97 |          try:
 | 
| 49 | -            message_queue = queue.Queue()
 | |
| 50 | -            instance = self._get_instance(request.instance_name)
 | |
| 98 | +            instance = self._get_instance(instance_name)
 | |
| 51 | 99 |              operation = instance.execute(request.action_digest,
 | 
| 52 | 100 |                                           request.skip_cache_lookup,
 | 
| 53 | 101 |                                           message_queue)
 | 
| 54 | 102 |  | 
| 55 | -            context.add_callback(partial(instance.unregister_message_client,
 | |
| 56 | -                                         operation.name, message_queue))
 | |
| 103 | +            context.add_callback(partial(self._rpc_termination_callback,
 | |
| 104 | +                                         peer, instance_name, operation.name, message_queue))
 | |
| 105 | + | |
| 106 | +            if self._is_instrumented:
 | |
| 107 | +                if peer not in self.__peers:
 | |
| 108 | +                    self.__peers_by_instance[instance_name].add(peer)
 | |
| 109 | +                    self.__peers[peer] = 1
 | |
| 110 | +                else:
 | |
| 111 | +                    self.__peers[peer] += 1
 | |
| 57 | 112 |  | 
| 58 | -            instanced_op_name = "{}/{}".format(request.instance_name,
 | |
| 59 | -                                               operation.name)
 | |
| 113 | +            instanced_op_name = "{}/{}".format(instance_name, operation.name)
 | |
| 60 | 114 |  | 
| 61 | 115 |              self.__logger.info("Operation name: [%s]", instanced_op_name)
 | 
| 62 | 116 |  | 
| ... | ... | @@ -80,23 +134,37 @@ class ExecutionService(remote_execution_pb2_grpc.ExecutionServicer): | 
| 80 | 134 |              yield operations_pb2.Operation()
 | 
| 81 | 135 |  | 
| 82 | 136 |      def WaitExecution(self, request, context):
 | 
| 137 | +        """Handles WaitExecutionRequest messages.
 | |
| 138 | + | |
| 139 | +        Args:
 | |
| 140 | +            request (WaitExecutionRequest): The incoming RPC request.
 | |
| 141 | +            context (grpc.ServicerContext): Context for the RPC call.
 | |
| 142 | +        """
 | |
| 83 | 143 |          self.__logger.debug("WaitExecution request from [%s]", context.peer())
 | 
| 84 | 144 |  | 
| 85 | -        try:
 | |
| 86 | -            names = request.name.split("/")
 | |
| 145 | +        names = request.name.split('/')
 | |
| 146 | +        instance_name = '/'.join(names[:-1])
 | |
| 147 | +        operation_name = names[-1]
 | |
| 148 | +        message_queue = queue.Queue()
 | |
| 149 | +        peer = context.peer()
 | |
| 87 | 150 |  | 
| 88 | -            # Operation name should be in format:
 | |
| 89 | -            # {instance/name}/{operation_id}
 | |
| 90 | -            instance_name = ''.join(names[0:-1])
 | |
| 151 | +        try:
 | |
| 152 | +            if instance_name != request.instance_name:
 | |
| 153 | +                raise InvalidArgumentError("Invalid operation [{}] for instance [{}]"
 | |
| 154 | +                                            .format(request.name, instance_name))
 | |
| 91 | 155 |  | 
| 92 | -            message_queue = queue.Queue()
 | |
| 93 | -            operation_name = names[-1]
 | |
| 94 | 156 |              instance = self._get_instance(instance_name)
 | 
| 95 | 157 |  | 
| 96 | 158 |              instance.register_message_client(operation_name, message_queue)
 | 
| 159 | +            context.add_callback(partial(self._rpc_termination_callback,
 | |
| 160 | +                                         peer, instance_name, operation_name, message_queue))
 | |
| 97 | 161 |  | 
| 98 | -            context.add_callback(partial(instance.unregister_message_client,
 | |
| 99 | -                                         operation_name, message_queue))
 | |
| 162 | +            if self._is_instrumented:
 | |
| 163 | +                if peer not in self.__peers:
 | |
| 164 | +                    self.__peers_by_instance[instance_name].add(peer)
 | |
| 165 | +                    self.__peers[peer] = 1
 | |
| 166 | +                else:
 | |
| 167 | +                    self.__peers[peer] += 1
 | |
| 100 | 168 |  | 
| 101 | 169 |              for operation in instance.stream_operation_updates(message_queue,
 | 
| 102 | 170 |                                                                 operation_name):
 | 
| ... | ... | @@ -111,9 +179,42 @@ class ExecutionService(remote_execution_pb2_grpc.ExecutionServicer): | 
| 111 | 179 |              context.set_code(grpc.StatusCode.INVALID_ARGUMENT)
 | 
| 112 | 180 |              yield operations_pb2.Operation()
 | 
| 113 | 181 |  | 
| 182 | +    # --- Private API ---
 | |
| 183 | + | |
| 184 | +    def _rpc_termination_callback(self, peer, instance_name, job_name, message_queue):
 | |
| 185 | +        instance = self._get_instance(instance_name)
 | |
| 186 | + | |
| 187 | +        instance.unregister_message_client(job_name, message_queue)
 | |
| 188 | + | |
| 189 | +        if self._is_instrumented:
 | |
| 190 | +            if self.__peers[peer] > 1:
 | |
| 191 | +                self.__peers[peer] -= 1
 | |
| 192 | +            else:
 | |
| 193 | +                self.__peers_by_instance[instance_name].remove(peer)
 | |
| 194 | +                del self.__peers[peer]
 | |
| 195 | + | |
| 114 | 196 |      def _get_instance(self, name):
 | 
| 115 | 197 |          try:
 | 
| 116 | 198 |              return self._instances[name]
 | 
| 117 | 199 |  | 
| 118 | 200 |          except KeyError:
 | 
| 119 | 201 |              raise InvalidArgumentError("Instance doesn't exist on server: [{}]".format(name))
 | 
| 202 | + | |
| 203 | +    # --- Public API: Monitoring ---
 | |
| 204 | + | |
| 205 | +    @property
 | |
| 206 | +    def is_instrumented(self):
 | |
| 207 | +        return self._is_instrumented
 | |
| 208 | + | |
| 209 | +    def query_n_clients(self):
 | |
| 210 | +        if self.__peers is not None:
 | |
| 211 | +            return len(self.__peers)
 | |
| 212 | +        return 0
 | |
| 213 | + | |
| 214 | +    def query_n_clients_for_instance(self, instance_name):
 | |
| 215 | +        try:
 | |
| 216 | +            if self.__peers_by_instance is not None:
 | |
| 217 | +                return len(self.__peers_by_instance[instance_name])
 | |
| 218 | +        except KeyError:
 | |
| 219 | +            pass
 | |
| 220 | +        return 0 | 
| ... | ... | @@ -13,18 +13,27 @@ | 
| 13 | 13 |  # limitations under the License.
 | 
| 14 | 14 |  | 
| 15 | 15 |  | 
| 16 | +import asyncio
 | |
| 16 | 17 |  from concurrent import futures
 | 
| 18 | +from datetime import datetime, timedelta
 | |
| 17 | 19 |  import logging
 | 
| 20 | +from logging.handlers import QueueHandler
 | |
| 18 | 21 |  import os
 | 
| 22 | +import time
 | |
| 19 | 23 |  | 
| 20 | 24 |  import grpc
 | 
| 25 | +import janus
 | |
| 21 | 26 |  | 
| 22 | -from .cas.service import ByteStreamService, ContentAddressableStorageService
 | |
| 23 | -from .actioncache.service import ActionCacheService
 | |
| 24 | -from .execution.service import ExecutionService
 | |
| 25 | -from .operations.service import OperationsService
 | |
| 26 | -from .bots.service import BotsService
 | |
| 27 | -from .referencestorage.service import ReferenceStorageService
 | |
| 27 | +from buildgrid._enums import LogRecordLevel, MetricRecordDomain, MetricRecordType
 | |
| 28 | +from buildgrid._protos.buildgrid.v2 import monitoring_pb2
 | |
| 29 | +from buildgrid.server.actioncache.service import ActionCacheService
 | |
| 30 | +from buildgrid.server.bots.service import BotsService
 | |
| 31 | +from buildgrid.server.cas.service import ByteStreamService, ContentAddressableStorageService
 | |
| 32 | +from buildgrid.server.execution.service import ExecutionService
 | |
| 33 | +from buildgrid.server._monitoring import MonitoringBus
 | |
| 34 | +from buildgrid.server.operations.service import OperationsService
 | |
| 35 | +from buildgrid.server.referencestorage.service import ReferenceStorageService
 | |
| 36 | +from buildgrid.settings import MONITORING_PERIOD
 | |
| 28 | 37 |  | 
| 29 | 38 |  | 
| 30 | 39 |  class BuildGridServer:
 | 
| ... | ... | @@ -34,7 +43,7 @@ class BuildGridServer: | 
| 34 | 43 |      requisite services.
 | 
| 35 | 44 |      """
 | 
| 36 | 45 |  | 
| 37 | -    def __init__(self, max_workers=None):
 | |
| 46 | +    def __init__(self, max_workers=None, monitor=True):
 | |
| 38 | 47 |          """Initializes a new :class:`BuildGridServer` instance.
 | 
| 39 | 48 |  | 
| 40 | 49 |          Args:
 | 
| ... | ... | @@ -46,9 +55,19 @@ class BuildGridServer: | 
| 46 | 55 |              # Use max_workers default from Python 3.5+
 | 
| 47 | 56 |              max_workers = (os.cpu_count() or 1) * 5
 | 
| 48 | 57 |  | 
| 49 | -        server = grpc.server(futures.ThreadPoolExecutor(max_workers))
 | |
| 58 | +        self.__grpc_executor = futures.ThreadPoolExecutor(max_workers)
 | |
| 59 | +        self.__grpc_server = grpc.server(self.__grpc_executor)
 | |
| 50 | 60 |  | 
| 51 | -        self._server = server
 | |
| 61 | +        self.__main_loop = asyncio.get_event_loop()
 | |
| 62 | + | |
| 63 | +        self.__monitoring_bus = None
 | |
| 64 | +        self.__build_metadata_queues = None
 | |
| 65 | +        self.__logging_queue = None
 | |
| 66 | +        self.__logging_handler = None
 | |
| 67 | + | |
| 68 | +        self.__state_monitoring_task = None
 | |
| 69 | +        self.__build_monitoring_tasks = None
 | |
| 70 | +        self.__logging_task = None
 | |
| 52 | 71 |  | 
| 53 | 72 |          self._execution_service = None
 | 
| 54 | 73 |          self._bots_service = None
 | 
| ... | ... | @@ -58,15 +77,73 @@ class BuildGridServer: | 
| 58 | 77 |          self._cas_service = None
 | 
| 59 | 78 |          self._bytestream_service = None
 | 
| 60 | 79 |  | 
| 80 | +        self._schedulers = {}
 | |
| 81 | +        self._instances = set()
 | |
| 82 | + | |
| 83 | +        self._is_instrumented = monitor
 | |
| 84 | + | |
| 85 | +        if self._is_instrumented:
 | |
| 86 | +            self.__monitoring_bus = MonitoringBus(self.__main_loop)
 | |
| 87 | +            self.__build_metadata_queues = None
 | |
| 88 | +            self.__logging_queue = janus.Queue(loop=self.__main_loop)
 | |
| 89 | +            self.__logging_handler = QueueHandler(self.__logging_queue.sync_q)
 | |
| 90 | + | |
| 91 | +            self.__build_monitoring_tasks = []
 | |
| 92 | + | |
| 93 | +            logging.getLogger().addHandler(self.__logging_handler)
 | |
| 94 | + | |
| 95 | +    # --- Public API ---
 | |
| 96 | + | |
| 61 | 97 |      def start(self):
 | 
| 62 | -        """Starts the server.
 | |
| 98 | +        """Starts the BuildGrid server.
 | |
| 63 | 99 |          """
 | 
| 64 | -        self._server.start()
 | |
| 100 | +        self.__grpc_server.start()
 | |
| 101 | + | |
| 102 | +        if self._is_instrumented:
 | |
| 103 | +            self.__monitoring_bus.start()
 | |
| 104 | + | |
| 105 | +            self.__state_monitoring_task = asyncio.ensure_future(
 | |
| 106 | +                self._state_monitoring_worker(period=MONITORING_PERIOD),
 | |
| 107 | +                loop=self.__main_loop)
 | |
| 108 | + | |
| 109 | +            for instance_name, scheduler in self._schedulers.items():
 | |
| 110 | +                if not scheduler.is_instrumented:
 | |
| 111 | +                    continue
 | |
| 112 | + | |
| 113 | +                message_queue = janus.Queue(loop=self.__main_loop)
 | |
| 114 | +                scheduler.register_build_metadata_watcher(message_queue.sync_q)
 | |
| 115 | + | |
| 116 | +                self.__build_monitoring_tasks.append(asyncio.ensure_future(
 | |
| 117 | +                    self._build_monitoring_worker(instance_name, message_queue),
 | |
| 118 | +                    loop=self.__main_loop))
 | |
| 119 | + | |
| 120 | +            self.__logging_task = asyncio.ensure_future(
 | |
| 121 | +                self._logging_worker(), loop=self.__main_loop)
 | |
| 122 | + | |
| 123 | +        self.__main_loop.run_forever()
 | |
| 65 | 124 |  | 
| 66 | 125 |      def stop(self, grace=0):
 | 
| 67 | -        """Stops the server.
 | |
| 126 | +        """Stops the BuildGrid server.
 | |
| 127 | + | |
| 128 | +        Args:
 | |
| 129 | +            grace (int, optional): A duration of time in seconds. Defaults to 0.
 | |
| 68 | 130 |          """
 | 
| 69 | -        self._server.stop(grace)
 | |
| 131 | +        if self._is_instrumented:
 | |
| 132 | +            if self.__state_monitoring_task is not None:
 | |
| 133 | +                self.__state_monitoring_task.cancel()
 | |
| 134 | + | |
| 135 | +            for build_monitoring_task in self.__build_monitoring_tasks:
 | |
| 136 | +                build_monitoring_task.cancel()
 | |
| 137 | + | |
| 138 | +            if self.__logging_task is not None:
 | |
| 139 | +                self.__logging_task.cancel()
 | |
| 140 | + | |
| 141 | +            self.__monitoring_bus.stop()
 | |
| 142 | + | |
| 143 | +        self.__grpc_server.stop(grace)
 | |
| 144 | + | |
| 145 | +        if grace > 0:
 | |
| 146 | +            time.sleep(grace)
 | |
| 70 | 147 |  | 
| 71 | 148 |      def add_port(self, address, credentials):
 | 
| 72 | 149 |          """Adds a port to the server.
 | 
| ... | ... | @@ -80,11 +157,11 @@ class BuildGridServer: | 
| 80 | 157 |          """
 | 
| 81 | 158 |          if credentials is not None:
 | 
| 82 | 159 |              self.__logger.info("Adding secure connection on: [%s]", address)
 | 
| 83 | -            self._server.add_secure_port(address, credentials)
 | |
| 160 | +            self.__grpc_server.add_secure_port(address, credentials)
 | |
| 84 | 161 |  | 
| 85 | 162 |          else:
 | 
| 86 | 163 |              self.__logger.info("Adding insecure connection on [%s]", address)
 | 
| 87 | -            self._server.add_insecure_port(address)
 | |
| 164 | +            self.__grpc_server.add_insecure_port(address)
 | |
| 88 | 165 |  | 
| 89 | 166 |      def add_execution_instance(self, instance, instance_name):
 | 
| 90 | 167 |          """Adds an :obj:`ExecutionInstance` to the service.
 | 
| ... | ... | @@ -96,10 +173,12 @@ class BuildGridServer: | 
| 96 | 173 |              instance_name (str): Instance name.
 | 
| 97 | 174 |          """
 | 
| 98 | 175 |          if self._execution_service is None:
 | 
| 99 | -            self._execution_service = ExecutionService(self._server)
 | |
| 100 | - | |
| 176 | +            self._execution_service = ExecutionService(self.__grpc_server)
 | |
| 101 | 177 |          self._execution_service.add_instance(instance_name, instance)
 | 
| 102 | 178 |  | 
| 179 | +        self._schedulers[instance_name] = instance.scheduler
 | |
| 180 | +        self._instances.add(instance_name)
 | |
| 181 | + | |
| 103 | 182 |      def add_bots_interface(self, instance, instance_name):
 | 
| 104 | 183 |          """Adds a :obj:`BotsInterface` to the service.
 | 
| 105 | 184 |  | 
| ... | ... | @@ -110,10 +189,11 @@ class BuildGridServer: | 
| 110 | 189 |              instance_name (str): Instance name.
 | 
| 111 | 190 |          """
 | 
| 112 | 191 |          if self._bots_service is None:
 | 
| 113 | -            self._bots_service = BotsService(self._server)
 | |
| 114 | - | |
| 192 | +            self._bots_service = BotsService(self.__grpc_server)
 | |
| 115 | 193 |          self._bots_service.add_instance(instance_name, instance)
 | 
| 116 | 194 |  | 
| 195 | +        self._instances.add(instance_name)
 | |
| 196 | + | |
| 117 | 197 |      def add_operations_instance(self, instance, instance_name):
 | 
| 118 | 198 |          """Adds an :obj:`OperationsInstance` to the service.
 | 
| 119 | 199 |  | 
| ... | ... | @@ -124,8 +204,7 @@ class BuildGridServer: | 
| 124 | 204 |              instance_name (str): Instance name.
 | 
| 125 | 205 |          """
 | 
| 126 | 206 |          if self._operations_service is None:
 | 
| 127 | -            self._operations_service = OperationsService(self._server)
 | |
| 128 | - | |
| 207 | +            self._operations_service = OperationsService(self.__grpc_server)
 | |
| 129 | 208 |          self._operations_service.add_instance(instance_name, instance)
 | 
| 130 | 209 |  | 
| 131 | 210 |      def add_reference_storage_instance(self, instance, instance_name):
 | 
| ... | ... | @@ -138,8 +217,7 @@ class BuildGridServer: | 
| 138 | 217 |              instance_name (str): Instance name.
 | 
| 139 | 218 |          """
 | 
| 140 | 219 |          if self._reference_storage_service is None:
 | 
| 141 | -            self._reference_storage_service = ReferenceStorageService(self._server)
 | |
| 142 | - | |
| 220 | +            self._reference_storage_service = ReferenceStorageService(self.__grpc_server)
 | |
| 143 | 221 |          self._reference_storage_service.add_instance(instance_name, instance)
 | 
| 144 | 222 |  | 
| 145 | 223 |      def add_action_cache_instance(self, instance, instance_name):
 | 
| ... | ... | @@ -152,8 +230,7 @@ class BuildGridServer: | 
| 152 | 230 |              instance_name (str): Instance name.
 | 
| 153 | 231 |          """
 | 
| 154 | 232 |          if self._action_cache_service is None:
 | 
| 155 | -            self._action_cache_service = ActionCacheService(self._server)
 | |
| 156 | - | |
| 233 | +            self._action_cache_service = ActionCacheService(self.__grpc_server)
 | |
| 157 | 234 |          self._action_cache_service.add_instance(instance_name, instance)
 | 
| 158 | 235 |  | 
| 159 | 236 |      def add_cas_instance(self, instance, instance_name):
 | 
| ... | ... | @@ -166,8 +243,7 @@ class BuildGridServer: | 
| 166 | 243 |              instance_name (str): Instance name.
 | 
| 167 | 244 |          """
 | 
| 168 | 245 |          if self._cas_service is None:
 | 
| 169 | -            self._cas_service = ContentAddressableStorageService(self._server)
 | |
| 170 | - | |
| 246 | +            self._cas_service = ContentAddressableStorageService(self.__grpc_server)
 | |
| 171 | 247 |          self._cas_service.add_instance(instance_name, instance)
 | 
| 172 | 248 |  | 
| 173 | 249 |      def add_bytestream_instance(self, instance, instance_name):
 | 
| ... | ... | @@ -180,6 +256,246 @@ class BuildGridServer: | 
| 180 | 256 |              instance_name (str): Instance name.
 | 
| 181 | 257 |          """
 | 
| 182 | 258 |          if self._bytestream_service is None:
 | 
| 183 | -            self._bytestream_service = ByteStreamService(self._server)
 | |
| 184 | - | |
| 259 | +            self._bytestream_service = ByteStreamService(self.__grpc_server)
 | |
| 185 | 260 |          self._bytestream_service.add_instance(instance_name, instance)
 | 
| 261 | + | |
| 262 | +    # --- Public API: Monitoring ---
 | |
| 263 | + | |
| 264 | +    @property
 | |
| 265 | +    def is_instrumented(self):
 | |
| 266 | +        return self._is_instrumented
 | |
| 267 | + | |
| 268 | +    # --- Private API ---
 | |
| 269 | + | |
| 270 | +    async def _logging_worker(self):
 | |
| 271 | +        """Publishes log records to the monitoring bus."""
 | |
| 272 | +        async def __logging_worker():
 | |
| 273 | +            log_record = await self.__logging_queue.async_q.get()
 | |
| 274 | + | |
| 275 | +            # Emit a log record:
 | |
| 276 | +            record = monitoring_pb2.LogRecord()
 | |
| 277 | +            creation_time = datetime.fromtimestamp(log_record.created)
 | |
| 278 | + | |
| 279 | +            record.creation_timestamp.FromDatetime(creation_time),
 | |
| 280 | +            record.domain = log_record.name
 | |
| 281 | +            record.level = int(log_record.levelno / 10)
 | |
| 282 | +            record.message = log_record.message
 | |
| 283 | +            # logging.LogRecord.extra must be a str to str dict:
 | |
| 284 | +            if 'extra' in log_record.__dict__ and log_record.extra:
 | |
| 285 | +                record.extra.update(log_record.extra)
 | |
| 286 | + | |
| 287 | +            await self.__monitoring_bus.send_record(record)
 | |
| 288 | + | |
| 289 | +        try:
 | |
| 290 | +            while True:
 | |
| 291 | +                await __logging_worker()
 | |
| 292 | + | |
| 293 | +        except asyncio.CancelledError:
 | |
| 294 | +            pass
 | |
| 295 | +        except BaseException as e:
 | |
| 296 | +             print(f'__logging_worker: {e}')
 | |
| 297 | + | |
| 298 | +    async def _build_monitoring_worker(self, instance_name, message_queue):
 | |
| 299 | +        """Publishes builds metadata to the monitoring bus."""
 | |
| 300 | +        async def __build_monitoring_worker():
 | |
| 301 | +            metadata = await message_queue.async_q.get()
 | |
| 302 | + | |
| 303 | +            # Emit build inputs fetching time record:
 | |
| 304 | +            fetch_start = metadata.input_fetch_start_timestamp.ToDatetime()
 | |
| 305 | +            fetch_completed = metadata.input_fetch_completed_timestamp.ToDatetime()
 | |
| 306 | +            input_fetch_time =  fetch_completed - fetch_start
 | |
| 307 | +            timer_record = self._forge_timer_metric_record(
 | |
| 308 | +                MetricRecordDomain.BUILD, 'inputs-fetching-time', input_fetch_time,
 | |
| 309 | +                extra={ 'instance-name': instance_name or 'void' })
 | |
| 310 | + | |
| 311 | +            await self.__monitoring_bus.send_record(timer_record)
 | |
| 312 | + | |
| 313 | +            # Emit build execution time record:
 | |
| 314 | +            execution_start = metadata.execution_start_timestamp.ToDatetime()
 | |
| 315 | +            execution_completed = metadata.execution_completed_timestamp.ToDatetime()
 | |
| 316 | +            execution_time =  execution_completed - execution_start
 | |
| 317 | +            timer_record = self._forge_timer_metric_record(
 | |
| 318 | +                MetricRecordDomain.BUILD, 'execution-time', execution_time,
 | |
| 319 | +                extra={ 'instance-name': instance_name or 'void' })
 | |
| 320 | + | |
| 321 | +            await self.__monitoring_bus.send_record(timer_record)
 | |
| 322 | + | |
| 323 | +            # Emit build outputs uploading time record:
 | |
| 324 | +            upload_start = metadata.output_upload_start_timestamp.ToDatetime()
 | |
| 325 | +            upload_completed = metadata.output_upload_completed_timestamp.ToDatetime()
 | |
| 326 | +            output_upload_time =  upload_completed - upload_start
 | |
| 327 | +            timer_record = self._forge_timer_metric_record(
 | |
| 328 | +                MetricRecordDomain.BUILD, 'outputs-uploading-time', output_upload_time,
 | |
| 329 | +                extra={ 'instance-name': instance_name or 'void' })
 | |
| 330 | + | |
| 331 | +            await self.__monitoring_bus.send_record(timer_record)
 | |
| 332 | + | |
| 333 | +            # Emit total build handling time record:
 | |
| 334 | +            queued = metadata.queued_timestamp.ToDatetime()
 | |
| 335 | +            worker_completed = metadata.worker_completed_timestamp.ToDatetime()
 | |
| 336 | +            total_handling_time =  worker_completed - queued
 | |
| 337 | +            timer_record = self._forge_timer_metric_record(
 | |
| 338 | +                MetricRecordDomain.BUILD, 'total-handling-time', total_handling_time,
 | |
| 339 | +                extra={ 'instance-name': instance_name or 'void' })
 | |
| 340 | + | |
| 341 | +            await self.__monitoring_bus.send_record(timer_record)
 | |
| 342 | + | |
| 343 | +        try:
 | |
| 344 | +            while True:
 | |
| 345 | +                await __build_monitoring_worker()
 | |
| 346 | + | |
| 347 | +        except asyncio.CancelledError:
 | |
| 348 | +            pass
 | |
| 349 | +        except BaseException as e:
 | |
| 350 | +             print(f'__build_monitoring_worker: {e}')
 | |
| 351 | + | |
| 352 | +    async def _state_monitoring_worker(self, period=1.0):
 | |
| 353 | +        """Periodically publishes state metrics to the monitoring bus."""
 | |
| 354 | +        async def __state_monitoring_worker():
 | |
| 355 | +            # Emit total clients count record:
 | |
| 356 | +            _, record = self._query_n_clients()
 | |
| 357 | +            await self.__monitoring_bus.send_record(record)
 | |
| 358 | + | |
| 359 | +            # Emit total bots count record:
 | |
| 360 | +            _, record = self._query_n_bots()
 | |
| 361 | +            await self.__monitoring_bus.send_record(record)
 | |
| 362 | + | |
| 363 | +            queue_times = []
 | |
| 364 | +            # Emits records by instance:
 | |
| 365 | +            for instance_name in self._instances:
 | |
| 366 | +                # Emit instance clients count record:
 | |
| 367 | +                _, record = self._query_n_clients_for_instance(instance_name)
 | |
| 368 | +                await self.__monitoring_bus.send_record(record)
 | |
| 369 | + | |
| 370 | +                # Emit instance bots count record:
 | |
| 371 | +                _, record = self._query_n_bots_for_instance(instance_name)
 | |
| 372 | +                await self.__monitoring_bus.send_record(record)
 | |
| 373 | + | |
| 374 | +                # Emit instance average queue time record:
 | |
| 375 | +                queue_time, record = self._query_am_queue_time_for_instance(instance_name)
 | |
| 376 | +                await self.__monitoring_bus.send_record(record)
 | |
| 377 | +                if queue_time:
 | |
| 378 | +                    queue_times.append(queue_time)
 | |
| 379 | + | |
| 380 | +            # Emit overall average queue time record:
 | |
| 381 | +            if len(queue_times) > 0:
 | |
| 382 | +                am_queue_time = sum(queue_times, timedelta()) / len(queue_times)
 | |
| 383 | +            else:
 | |
| 384 | +                am_queue_time = timedelta()
 | |
| 385 | +            record = self._forge_timer_metric_record(
 | |
| 386 | +                MetricRecordDomain.STATE,
 | |
| 387 | +                'average-queue-time',
 | |
| 388 | +                am_queue_time)
 | |
| 389 | + | |
| 390 | +            await self.__monitoring_bus.send_record(record)
 | |
| 391 | + | |
| 392 | +            print('---')
 | |
| 393 | +            n_clients = self._execution_service.query_n_clients()
 | |
| 394 | +            n_bots = self._bots_service.query_n_bots()
 | |
| 395 | +            print('Totals: n_clients={}, n_bots={}, am_queue_time={}'
 | |
| 396 | +                  .format(n_clients, n_bots, am_queue_time))
 | |
| 397 | +            print('Per instances:')
 | |
| 398 | +            for instance_name in self._instances:
 | |
| 399 | +                n_clients = self._execution_service.query_n_clients_for_instance(instance_name)
 | |
| 400 | +                n_bots = self._bots_service.query_n_bots_for_instance(instance_name)
 | |
| 401 | +                am_queue_time = self._execution_service.get_scheduler(instance_name).query_am_queue_time()
 | |
| 402 | +                instance_name = instance_name or 'void'
 | |
| 403 | +                print(' - {}: n_clients={}, n_bots={}, am_queue_time={}'
 | |
| 404 | +                      .format(instance_name, n_clients, n_bots, am_queue_time))
 | |
| 405 | +            print('---')
 | |
| 406 | + | |
| 407 | +        try:
 | |
| 408 | +            while True:
 | |
| 409 | +                start = time.time()
 | |
| 410 | +                await __state_monitoring_worker()
 | |
| 411 | + | |
| 412 | +                end = time.time()
 | |
| 413 | +                await asyncio.sleep(period - (end - start))
 | |
| 414 | + | |
| 415 | +        except asyncio.CancelledError:
 | |
| 416 | +            pass
 | |
| 417 | +        except BaseException as e:
 | |
| 418 | +             print(f'__state_monitoring_worker: {e}')
 | |
| 419 | + | |
| 420 | +    def _forge_counter_metric_record(self, domain, name, count, extra=None):
 | |
| 421 | +        counter_record = monitoring_pb2.MetricRecord()
 | |
| 422 | + | |
| 423 | +        counter_record.creation_timestamp.GetCurrentTime()
 | |
| 424 | +        counter_record.domain = domain.value
 | |
| 425 | +        counter_record.type = MetricRecordType.COUNTER.value
 | |
| 426 | +        counter_record.name = name
 | |
| 427 | +        counter_record.count = count
 | |
| 428 | +        if extra is not None:
 | |
| 429 | +            counter_record.extra.update(extra)
 | |
| 430 | + | |
| 431 | +        return record
 | |
| 432 | + | |
| 433 | +    def _forge_timer_metric_record(self, domain, name, duration, extra=None):
 | |
| 434 | +        timer_record = monitoring_pb2.MetricRecord()
 | |
| 435 | + | |
| 436 | +        timer_record.creation_timestamp.GetCurrentTime()
 | |
| 437 | +        timer_record.domain = domain.value
 | |
| 438 | +        timer_record.type = MetricRecordType.TIMER.value
 | |
| 439 | +        timer_record.name = name
 | |
| 440 | +        timer_record.duration.FromTimedelta(duration)
 | |
| 441 | +        if extra is not None:
 | |
| 442 | +            timer_record.extra.update(extra)
 | |
| 443 | + | |
| 444 | +        return timer_record
 | |
| 445 | + | |
| 446 | +    def _forge_gauge_metric_record(self, domain, name, value, extra=None):
 | |
| 447 | +        gauge_record = monitoring_pb2.MetricRecord()
 | |
| 448 | + | |
| 449 | +        gauge_record.creation_timestamp.GetCurrentTime()
 | |
| 450 | +        gauge_record.domain = domain.value
 | |
| 451 | +        gauge_record.type = MetricRecordType.GAUGE.value
 | |
| 452 | +        gauge_record.name = name
 | |
| 453 | +        gauge_record.value = value
 | |
| 454 | +        if extra is not None:
 | |
| 455 | +            gauge_record.extra.update(extra)
 | |
| 456 | + | |
| 457 | +        return gauge_record
 | |
| 458 | + | |
| 459 | +    # --- Private API: Monitoring ---
 | |
| 460 | + | |
| 461 | +    def _query_n_clients(self):
 | |
| 462 | +        """Queries the number of clients connected."""
 | |
| 463 | +        n_clients = self._execution_service.query_n_clients()
 | |
| 464 | +        gauge_record = self._forge_gauge_metric_record(
 | |
| 465 | +            MetricRecordDomain.STATE, 'clients-count', n_clients)
 | |
| 466 | + | |
| 467 | +        return n_clients, gauge_record
 | |
| 468 | +    def _query_n_clients_for_instance(self, instance_name):
 | |
| 469 | +        """Queries the number of clients connected for a given instance"""
 | |
| 470 | +        n_clients = self._execution_service.query_n_clients_for_instance(instance_name)
 | |
| 471 | +        gauge_record = self._forge_gauge_metric_record(
 | |
| 472 | +            MetricRecordDomain.STATE, 'clients-count', n_clients,
 | |
| 473 | +            extra={ 'instance-name': instance_name or 'void' })
 | |
| 474 | + | |
| 475 | +        return n_clients, gauge_record
 | |
| 476 | + | |
| 477 | +    def _query_n_bots(self):
 | |
| 478 | +        """Queries the number of bots connected."""
 | |
| 479 | +        n_bots = self._bots_service.query_n_bots()
 | |
| 480 | +        gauge_record = self._forge_gauge_metric_record(
 | |
| 481 | +            MetricRecordDomain.STATE, 'bots-count', n_bots)
 | |
| 482 | + | |
| 483 | +        return n_bots, gauge_record
 | |
| 484 | + | |
| 485 | +    def _query_n_bots_for_instance(self, instance_name):
 | |
| 486 | +        """Queries the number of bots connected for a given instance."""
 | |
| 487 | +        n_bots = self._bots_service.query_n_bots_for_instance(instance_name)
 | |
| 488 | +        gauge_record = self._forge_gauge_metric_record(
 | |
| 489 | +            MetricRecordDomain.STATE, 'bots-count', n_bots,
 | |
| 490 | +            extra={ 'instance-name': instance_name or 'void' })
 | |
| 491 | + | |
| 492 | +        return n_bots, gauge_record
 | |
| 493 | + | |
| 494 | +    def _query_am_queue_time_for_instance(self, instance_name):
 | |
| 495 | +        """Queries the average job's queue time for a given instance."""
 | |
| 496 | +        am_queue_time = self._schedulers[instance_name].query_am_queue_time()
 | |
| 497 | +        timer_record = self._forge_timer_metric_record(
 | |
| 498 | +            MetricRecordDomain.STATE, 'average-queue-time', am_queue_time,
 | |
| 499 | +            extra={ 'instance-name': instance_name or 'void' })
 | |
| 500 | + | |
| 501 | +        return am_queue_time, timer_record | 
| ... | ... | @@ -13,10 +13,11 @@ | 
| 13 | 13 |  # limitations under the License.
 | 
| 14 | 14 |  | 
| 15 | 15 |  | 
| 16 | +from datetime import datetime
 | |
| 16 | 17 |  import logging
 | 
| 17 | 18 |  import uuid
 | 
| 18 | 19 |  | 
| 19 | -from google.protobuf import timestamp_pb2
 | |
| 20 | +from google.protobuf import duration_pb2, timestamp_pb2
 | |
| 20 | 21 |  | 
| 21 | 22 |  from buildgrid._enums import LeaseState, OperationStage
 | 
| 22 | 23 |  from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
 | 
| ... | ... | @@ -37,6 +38,7 @@ class Job: | 
| 37 | 38 |          self.__execute_response = None
 | 
| 38 | 39 |          self.__operation_metadata = remote_execution_pb2.ExecuteOperationMetadata()
 | 
| 39 | 40 |          self.__queued_timestamp = timestamp_pb2.Timestamp()
 | 
| 41 | +        self.__queued_time_duration = duration_pb2.Duration()
 | |
| 40 | 42 |          self.__worker_start_timestamp = timestamp_pb2.Timestamp()
 | 
| 41 | 43 |          self.__worker_completed_timestamp = timestamp_pb2.Timestamp()
 | 
| 42 | 44 |  | 
| ... | ... | @@ -50,6 +52,8 @@ class Job: | 
| 50 | 52 |          self._operation.done = False
 | 
| 51 | 53 |          self._n_tries = 0
 | 
| 52 | 54 |  | 
| 55 | +    # --- Public API ---
 | |
| 56 | + | |
| 53 | 57 |      @property
 | 
| 54 | 58 |      def name(self):
 | 
| 55 | 59 |          return self._name
 | 
| ... | ... | @@ -73,6 +77,13 @@ class Job: | 
| 73 | 77 |          else:
 | 
| 74 | 78 |              return None
 | 
| 75 | 79 |  | 
| 80 | +    @property
 | |
| 81 | +    def holds_cached_action_result(self):
 | |
| 82 | +        if self.__execute_response is not None:
 | |
| 83 | +            return self.__execute_response.cached_result
 | |
| 84 | +        else:
 | |
| 85 | +            return False
 | |
| 86 | + | |
| 76 | 87 |      @property
 | 
| 77 | 88 |      def operation(self):
 | 
| 78 | 89 |          return self._operation
 | 
| ... | ... | @@ -179,7 +190,7 @@ class Job: | 
| 179 | 190 |                  result.Unpack(action_result)
 | 
| 180 | 191 |  | 
| 181 | 192 |              action_metadata = action_result.execution_metadata
 | 
| 182 | -            action_metadata.queued_timestamp.CopyFrom(self.__worker_start_timestamp)
 | |
| 193 | +            action_metadata.queued_timestamp.CopyFrom(self.__queued_timestamp)
 | |
| 183 | 194 |              action_metadata.worker_start_timestamp.CopyFrom(self.__worker_start_timestamp)
 | 
| 184 | 195 |              action_metadata.worker_completed_timestamp.CopyFrom(self.__worker_completed_timestamp)
 | 
| 185 | 196 |  | 
| ... | ... | @@ -204,6 +215,10 @@ class Job: | 
| 204 | 215 |                  self.__queued_timestamp.GetCurrentTime()
 | 
| 205 | 216 |              self._n_tries += 1
 | 
| 206 | 217 |  | 
| 218 | +        elif self.__operation_metadata.stage == OperationStage.EXECUTING.value:
 | |
| 219 | +            queue_in, queue_out = self.__queued_timestamp.ToDatetime(), datetime.now()
 | |
| 220 | +            self.__queued_time_duration.FromTimedelta(queue_out - queue_in)
 | |
| 221 | + | |
| 207 | 222 |          elif self.__operation_metadata.stage == OperationStage.COMPLETED.value:
 | 
| 208 | 223 |              if self.__execute_response is not None:
 | 
| 209 | 224 |                  self._operation.response.Pack(self.__execute_response)
 | 
| ... | ... | @@ -213,3 +228,11 @@ class Job: | 
| 213 | 228 |  | 
| 214 | 229 |          for queue in self._operation_update_queues:
 | 
| 215 | 230 |              queue.put(self._operation)
 | 
| 231 | + | |
| 232 | +    # --- Public API: Monitoring ---
 | |
| 233 | + | |
| 234 | +    def query_queue_time(self):
 | |
| 235 | +        return self.__queued_time_duration.ToTimedelta()
 | |
| 236 | + | |
| 237 | +    def query_n_retries(self):
 | |
| 238 | +        return self._n_tries - 1 if self._n_tries > 0 else 0 | 
| ... | ... | @@ -32,6 +32,10 @@ class OperationsInstance: | 
| 32 | 32 |  | 
| 33 | 33 |          self._scheduler = scheduler
 | 
| 34 | 34 |  | 
| 35 | +    @property
 | |
| 36 | +    def scheduler(self):
 | |
| 37 | +        return self._scheduler
 | |
| 38 | + | |
| 35 | 39 |      def register_instance_with_server(self, instance_name, server):
 | 
| 36 | 40 |          server.add_operations_instance(self, instance_name)
 | 
| 37 | 41 |  | 
| ... | ... | @@ -38,8 +38,18 @@ class OperationsService(operations_pb2_grpc.OperationsServicer): | 
| 38 | 38 |  | 
| 39 | 39 |          operations_pb2_grpc.add_OperationsServicer_to_server(self, server)
 | 
| 40 | 40 |  | 
| 41 | -    def add_instance(self, name, instance):
 | |
| 42 | -        self._instances[name] = instance
 | |
| 41 | +    # --- Public API ---
 | |
| 42 | + | |
| 43 | +    def add_instance(self, instance_name, instance):
 | |
| 44 | +        """Registers a new servicer instance.
 | |
| 45 | + | |
| 46 | +        Args:
 | |
| 47 | +            instance_name (str): The new instance's name.
 | |
| 48 | +            instance (OperationsInstance): The new instance itself.
 | |
| 49 | +        """
 | |
| 50 | +        self._instances[instance_name] = instance
 | |
| 51 | + | |
| 52 | +    # --- Public API: Servicer ---
 | |
| 43 | 53 |  | 
| 44 | 54 |      def GetOperation(self, request, context):
 | 
| 45 | 55 |          self.__logger.debug("GetOperation request from [%s]", context.peer())
 | 
| ... | ... | @@ -132,6 +142,8 @@ class OperationsService(operations_pb2_grpc.OperationsServicer): | 
| 132 | 142 |  | 
| 133 | 143 |          return Empty()
 | 
| 134 | 144 |  | 
| 145 | +    # --- Private API ---
 | |
| 146 | + | |
| 135 | 147 |      def _parse_instance_name(self, name):
 | 
| 136 | 148 |          """ If the instance name is not blank, 'name' will have the form
 | 
| 137 | 149 |          {instance_name}/{operation_uuid}. Otherwise, it will just be
 | 
| ... | ... | @@ -20,24 +20,40 @@ Schedules jobs. | 
| 20 | 20 |  """
 | 
| 21 | 21 |  | 
| 22 | 22 |  from collections import deque
 | 
| 23 | +from datetime import timedelta
 | |
| 23 | 24 |  import logging
 | 
| 24 | 25 |  | 
| 26 | +from buildgrid._enums import LeaseState, OperationStage
 | |
| 25 | 27 |  from buildgrid._exceptions import NotFoundError
 | 
| 26 | 28 |  | 
| 27 | -from .job import OperationStage, LeaseState
 | |
| 28 | - | |
| 29 | 29 |  | 
| 30 | 30 |  class Scheduler:
 | 
| 31 | 31 |  | 
| 32 | 32 |      MAX_N_TRIES = 5
 | 
| 33 | 33 |  | 
| 34 | -    def __init__(self, action_cache=None):
 | |
| 34 | +    def __init__(self, action_cache=None, monitor=True):
 | |
| 35 | 35 |          self.__logger = logging.getLogger(__name__)
 | 
| 36 | 36 |  | 
| 37 | +        self.__build_metadata_queues = None
 | |
| 38 | +        self.__queue_times_by_priority = None
 | |
| 39 | +        self.__queue_time_average = None
 | |
| 40 | +        self.__retries_by_error = None
 | |
| 41 | +        self.__retries_count = 0
 | |
| 42 | + | |
| 37 | 43 |          self._action_cache = action_cache
 | 
| 38 | 44 |          self.jobs = {}
 | 
| 39 | 45 |          self.queue = deque()
 | 
| 40 | 46 |  | 
| 47 | +        self._is_instrumented = monitor
 | |
| 48 | + | |
| 49 | +        if self._is_instrumented:
 | |
| 50 | +            self.__build_metadata_queues = []
 | |
| 51 | +            self.__queue_time_average = 0, timedelta()
 | |
| 52 | +            self.__queue_times_by_priority = {}
 | |
| 53 | +            self.__retries_by_error = {}
 | |
| 54 | + | |
| 55 | +    # --- Public API ---
 | |
| 56 | + | |
| 41 | 57 |      def register_client(self, job_name, queue):
 | 
| 42 | 58 |          self.jobs[job_name].register_client(queue)
 | 
| 43 | 59 |  | 
| ... | ... | @@ -66,18 +82,22 @@ class Scheduler: | 
| 66 | 82 |              operation_stage = OperationStage.QUEUED
 | 
| 67 | 83 |              self.queue.append(job)
 | 
| 68 | 84 |  | 
| 69 | -        job.update_operation_stage(operation_stage)
 | |
| 85 | +        self._update_job_operation_stage(job.name, operation_stage)
 | |
| 70 | 86 |  | 
| 71 | 87 |      def retry_job(self, job_name):
 | 
| 72 | -        if job_name in self.jobs:
 | |
| 73 | -            job = self.jobs[job_name]
 | |
| 74 | -            if job.n_tries >= self.MAX_N_TRIES:
 | |
| 75 | -                # TODO: Decide what to do with these jobs
 | |
| 76 | -                job.update_operation_stage(OperationStage.COMPLETED)
 | |
| 77 | -                # TODO: Mark these jobs as done
 | |
| 78 | -            else:
 | |
| 79 | -                job.update_operation_stage(OperationStage.QUEUED)
 | |
| 80 | -                self.queue.appendleft(job)
 | |
| 88 | +        job = self.jobs[job_name]
 | |
| 89 | + | |
| 90 | +        operation_stage = None
 | |
| 91 | +        if job.n_tries >= self.MAX_N_TRIES:
 | |
| 92 | +            # TODO: Decide what to do with these jobs
 | |
| 93 | +            operation_stage = OperationStage.COMPLETED
 | |
| 94 | +            # TODO: Mark these jobs as done
 | |
| 95 | + | |
| 96 | +        else:
 | |
| 97 | +            operation_stage = OperationStage.QUEUED
 | |
| 98 | +            self.queue.appendleft(job)
 | |
| 99 | + | |
| 100 | +        self._update_job_operation_stage(job_name, operation_stage)
 | |
| 81 | 101 |  | 
| 82 | 102 |      def list_jobs(self):
 | 
| 83 | 103 |          return self.jobs.values()
 | 
| ... | ... | @@ -112,13 +132,14 @@ class Scheduler: | 
| 112 | 132 |          """
 | 
| 113 | 133 |          job = self.jobs[job_name]
 | 
| 114 | 134 |  | 
| 135 | +        operation_stage = None
 | |
| 115 | 136 |          if lease_state == LeaseState.PENDING:
 | 
| 116 | 137 |              job.update_lease_state(LeaseState.PENDING)
 | 
| 117 | -            job.update_operation_stage(OperationStage.QUEUED)
 | |
| 138 | +            operation_stage = OperationStage.QUEUED
 | |
| 118 | 139 |  | 
| 119 | 140 |          elif lease_state == LeaseState.ACTIVE:
 | 
| 120 | 141 |              job.update_lease_state(LeaseState.ACTIVE)
 | 
| 121 | -            job.update_operation_stage(OperationStage.EXECUTING)
 | |
| 142 | +            operation_stage = OperationStage.EXECUTING
 | |
| 122 | 143 |  | 
| 123 | 144 |          elif lease_state == LeaseState.COMPLETED:
 | 
| 124 | 145 |              job.update_lease_state(LeaseState.COMPLETED,
 | 
| ... | ... | @@ -127,7 +148,9 @@ class Scheduler: | 
| 127 | 148 |              if self._action_cache is not None and not job.do_not_cache:
 | 
| 128 | 149 |                  self._action_cache.update_action_result(job.action_digest, job.action_result)
 | 
| 129 | 150 |  | 
| 130 | -            job.update_operation_stage(OperationStage.COMPLETED)
 | |
| 151 | +            operation_stage = OperationStage.COMPLETED
 | |
| 152 | + | |
| 153 | +        self._update_job_operation_stage(job_name, operation_stage)
 | |
| 131 | 154 |  | 
| 132 | 155 |      def get_job_lease(self, job_name):
 | 
| 133 | 156 |          """Returns the lease associated to job, if any have been emitted yet."""
 | 
| ... | ... | @@ -136,3 +159,91 @@ class Scheduler: | 
| 136 | 159 |      def get_job_operation(self, job_name):
 | 
| 137 | 160 |          """Returns the operation associated to job."""
 | 
| 138 | 161 |          return self.jobs[job_name].operation
 | 
| 162 | + | |
| 163 | +    # --- Public API: Monitoring ---
 | |
| 164 | + | |
| 165 | +    @property
 | |
| 166 | +    def is_instrumented(self):
 | |
| 167 | +        return self._is_instrumented
 | |
| 168 | + | |
| 169 | +    def register_build_metadata_watcher(self, message_queue):
 | |
| 170 | +        if self.__build_metadata_queues is not None:
 | |
| 171 | +            self.__build_metadata_queues.append(message_queue)
 | |
| 172 | + | |
| 173 | +    def query_n_jobs(self):
 | |
| 174 | +        return len(self.jobs)
 | |
| 175 | + | |
| 176 | +    def query_n_operations(self):
 | |
| 177 | +        return len(self.jobs)
 | |
| 178 | + | |
| 179 | +    def query_n_operations_by_stage(self):
 | |
| 180 | +        return len(self.jobs)
 | |
| 181 | + | |
| 182 | +    def query_n_leases(self):
 | |
| 183 | +        return len(self.jobs)
 | |
| 184 | + | |
| 185 | +    def query_n_leases_by_state(self):
 | |
| 186 | +        return len(self.jobs)
 | |
| 187 | + | |
| 188 | +    def query_n_retries(self):
 | |
| 189 | +        return self.__retries_count
 | |
| 190 | + | |
| 191 | +    def query_n_retries_for_error(self, error_type):
 | |
| 192 | +        try:
 | |
| 193 | +            if self.__retries_by_error is not None:
 | |
| 194 | +                return self.__retries_by_error[error_type]
 | |
| 195 | +        except KeyError:
 | |
| 196 | +            pass
 | |
| 197 | +        return 0
 | |
| 198 | + | |
| 199 | +    def query_am_queue_time(self):
 | |
| 200 | +        if self.__queue_time_average is not None:
 | |
| 201 | +            return self.__queue_time_average[1]
 | |
| 202 | +        return 0
 | |
| 203 | + | |
| 204 | +    def query_am_queue_time_for_priority(self, priority_level):
 | |
| 205 | +        try:
 | |
| 206 | +            if self.__queue_times_by_priority is not None:
 | |
| 207 | +                return self.__queue_times_by_priority[priority_level]
 | |
| 208 | +        except KeyError:
 | |
| 209 | +            pass
 | |
| 210 | +        return 0
 | |
| 211 | + | |
| 212 | +    # --- Private API ---
 | |
| 213 | + | |
| 214 | +    def _update_job_operation_stage(self, job_name, operation_stage):
 | |
| 215 | +        """Requests a stage transition for the job's :class:Operations.
 | |
| 216 | + | |
| 217 | +        Args:
 | |
| 218 | +            job_name (str): name of the job to query.
 | |
| 219 | +            operation_stage (OperationStage): the stage to transition to.
 | |
| 220 | +        """
 | |
| 221 | +        job = self.jobs[job_name]
 | |
| 222 | + | |
| 223 | +        if operation_stage == OperationStage.CACHE_CHECK:
 | |
| 224 | +            job.update_operation_stage(OperationStage.CACHE_CHECK)
 | |
| 225 | + | |
| 226 | +        elif operation_stage == OperationStage.QUEUED:
 | |
| 227 | +            job.update_operation_stage(OperationStage.QUEUED)
 | |
| 228 | + | |
| 229 | +        elif operation_stage == OperationStage.EXECUTING:
 | |
| 230 | +            job.update_operation_stage(OperationStage.EXECUTING)
 | |
| 231 | + | |
| 232 | +        elif operation_stage == OperationStage.COMPLETED:
 | |
| 233 | +            job.update_operation_stage(OperationStage.COMPLETED)
 | |
| 234 | + | |
| 235 | +            if self._is_instrumented:
 | |
| 236 | +                average_order, average_time = self.__queue_time_average
 | |
| 237 | + | |
| 238 | +                average_order += 1
 | |
| 239 | +                if average_order <= 1:
 | |
| 240 | +                    average_time = job.query_queue_time()
 | |
| 241 | +                else:
 | |
| 242 | +                    queue_time = job.query_queue_time()
 | |
| 243 | +                    average_time = average_time + ((queue_time - average_time) / average_order)
 | |
| 244 | + | |
| 245 | +                self.__queue_time_average = average_order, average_time
 | |
| 246 | + | |
| 247 | +                if not job.holds_cached_action_result:
 | |
| 248 | +                    for message_queue in self.__build_metadata_queues:
 | |
| 249 | +                        message_queue.put(job.action_result.execution_metadata) | 
| 1 | +# Copyright (C) 2018 Bloomberg LP
 | |
| 2 | +#
 | |
| 3 | +# Licensed under the Apache License, Version 2.0 (the "License");
 | |
| 4 | +# you may not use this file except in compliance with the License.
 | |
| 5 | +# You may obtain a copy of the License at
 | |
| 6 | +#
 | |
| 7 | +#  <http://www.apache.org/licenses/LICENSE-2.0>
 | |
| 8 | +#
 | |
| 9 | +# Unless required by applicable law or agreed to in writing, software
 | |
| 10 | +# distributed under the License is distributed on an "AS IS" BASIS,
 | |
| 11 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| 12 | +# See the License for the specific language governing permissions and
 | |
| 13 | +# limitations under the License.
 | |
| 14 | + | |
| 15 | + | |
| 1 | 16 |  import hashlib
 | 
| 2 | 17 |  | 
| 3 | 18 |  | 
| 4 | -# The hash function that CAS uses
 | |
| 19 | +# Hash function used for computing digests:
 | |
| 5 | 20 |  HASH = hashlib.sha256
 | 
| 21 | + | |
| 22 | +# Lenght in bytes of a hash string returned by HASH:
 | |
| 6 | 23 |  HASH_LENGTH = HASH().digest_size * 2
 | 
| 24 | + | |
| 25 | +# Period, in seconds, for the monitoring cycle:
 | |
| 26 | +MONITORING_PERIOD = 5.0
 | |
| 27 | + | |
| 28 | +# String format for log records:
 | |
| 29 | +LOG_RECORD_FORMAT = '%(asctime)s:%(name)36.36s][%(levelname)5.5s]: %(message)s'
 | |
| 30 | +# The different log record attributes are documented here:
 | |
| 31 | +# https://docs.python.org/3/library/logging.html#logrecord-attributes | 
| ... | ... | @@ -112,13 +112,15 @@ setup( | 
| 112 | 112 |      license="Apache License, Version 2.0",
 | 
| 113 | 113 |      description="A remote execution service",
 | 
| 114 | 114 |      packages=find_packages(),
 | 
| 115 | +    python_requires='>= 3.5.3',  # janus requirement
 | |
| 115 | 116 |      install_requires=[
 | 
| 116 | -        'protobuf',
 | |
| 117 | -        'grpcio',
 | |
| 118 | -        'Click',
 | |
| 119 | -        'PyYAML',
 | |
| 120 | 117 |          'boto3 < 1.8.0',
 | 
| 121 | 118 |          'botocore < 1.11.0',
 | 
| 119 | +        'click',
 | |
| 120 | +        'grpcio',
 | |
| 121 | +        'janus',
 | |
| 122 | +        'protobuf',
 | |
| 123 | +        'pyyaml',
 | |
| 122 | 124 |      ],
 | 
| 123 | 125 |      entry_points={
 | 
| 124 | 126 |          'console_scripts': [
 | 
