Martin Blanchard pushed to branch mablanch/83-executed-action-metadata at BuildGrid / buildgrid
Commits:
-
79cd4f43
by Martin Blanchard at 2018-10-26T10:41:26Z
-
687469de
by Martin Blanchard at 2018-10-26T10:48:44Z
4 changed files:
- buildgrid/_app/commands/cmd_operation.py
- + buildgrid/_enums.py
- buildgrid/bot/bot_session.py
- buildgrid/server/job.py
Changes:
| ... | ... | @@ -20,15 +20,21 @@ Operations command |
| 20 | 20 |
Check the status of operations
|
| 21 | 21 |
"""
|
| 22 | 22 |
|
| 23 |
+from collections import OrderedDict
|
|
| 23 | 24 |
import logging
|
| 25 |
+from operator import attrgetter
|
|
| 24 | 26 |
from urllib.parse import urlparse
|
| 25 | 27 |
import sys
|
| 28 |
+from textwrap import indent
|
|
| 26 | 29 |
|
| 27 | 30 |
import click
|
| 31 |
+from google.protobuf import json_format
|
|
| 28 | 32 |
import grpc
|
| 29 | 33 |
|
| 34 |
+from buildgrid._enums import OperationStage
|
|
| 30 | 35 |
from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2, remote_execution_pb2_grpc
|
| 31 | 36 |
from buildgrid._protos.google.longrunning import operations_pb2, operations_pb2_grpc
|
| 37 |
+from buildgrid._protos.google.rpc import code_pb2
|
|
| 32 | 38 |
|
| 33 | 39 |
from ..cli import pass_context
|
| 34 | 40 |
|
| ... | ... | @@ -65,45 +71,145 @@ def cli(context, remote, instance_name, client_key, client_cert, server_cert): |
| 65 | 71 |
context.logger.debug("Starting for remote {}".format(context.remote))
|
| 66 | 72 |
|
| 67 | 73 |
|
| 74 |
+def _print_operation_status(operation, print_details=False):
|
|
| 75 |
+ metadata = remote_execution_pb2.ExecuteOperationMetadata()
|
|
| 76 |
+ # The metadata is expected to be an ExecuteOperationMetadata message:
|
|
| 77 |
+ assert operation.metadata.Is(metadata.DESCRIPTOR)
|
|
| 78 |
+ operation.metadata.Unpack(metadata)
|
|
| 79 |
+ |
|
| 80 |
+ stage = OperationStage(metadata.stage)
|
|
| 81 |
+ |
|
| 82 |
+ if not operation.done:
|
|
| 83 |
+ if stage == OperationStage.CACHE_CHECK:
|
|
| 84 |
+ click.echo('CacheCheck: {}: Querying action-cache (stage={})'
|
|
| 85 |
+ .format(operation.name, metadata.stage))
|
|
| 86 |
+ elif stage == OperationStage.QUEUED:
|
|
| 87 |
+ click.echo('Queued: {}: Waiting for execution (stage={})'
|
|
| 88 |
+ .format(operation.name, metadata.stage))
|
|
| 89 |
+ elif stage == OperationStage.EXECUTING:
|
|
| 90 |
+ click.echo('Executing: {}: Currently running (stage={})'
|
|
| 91 |
+ .format(operation.name, metadata.stage))
|
|
| 92 |
+ else:
|
|
| 93 |
+ click.echo('Error: {}: In an invalid state (stage={})'
|
|
| 94 |
+ .format(operation.name, metadata.stage), err=True)
|
|
| 95 |
+ return
|
|
| 96 |
+ |
|
| 97 |
+ assert stage == OperationStage.COMPLETED
|
|
| 98 |
+ |
|
| 99 |
+ response = remote_execution_pb2.ExecuteResponse()
|
|
| 100 |
+ # The response is expected to be an ExecutionResponse message:
|
|
| 101 |
+ assert operation.response.Is(response.DESCRIPTOR)
|
|
| 102 |
+ operation.response.Unpack(response)
|
|
| 103 |
+ |
|
| 104 |
+ if response.status.code != code_pb2.OK:
|
|
| 105 |
+ click.echo('Failure: {}: {} (code={})'
|
|
| 106 |
+ .format(operation.name, response.status.message, response.status.code))
|
|
| 107 |
+ else:
|
|
| 108 |
+ if response.result.exit_code != 0:
|
|
| 109 |
+ click.echo('Success: {}: Completed with failure (stage={}, exit_code={})'
|
|
| 110 |
+ .format(operation.name, metadata.stage, response.result.exit_code))
|
|
| 111 |
+ else:
|
|
| 112 |
+ click.echo('Success: {}: Completed succesfully (stage={}, exit_code={})'
|
|
| 113 |
+ .format(operation.name, metadata.stage, response.result.exit_code))
|
|
| 114 |
+ |
|
| 115 |
+ if print_details:
|
|
| 116 |
+ metadata = response.result.execution_metadata
|
|
| 117 |
+ click.echo(indent('worker={}'.format(metadata.worker), ' '))
|
|
| 118 |
+ |
|
| 119 |
+ queued = metadata.queued_timestamp.ToDatetime()
|
|
| 120 |
+ click.echo(indent('queued_at={}'.format(queued), ' '))
|
|
| 121 |
+ |
|
| 122 |
+ worker_start = metadata.worker_start_timestamp.ToDatetime()
|
|
| 123 |
+ worker_completed = metadata.worker_completed_timestamp.ToDatetime()
|
|
| 124 |
+ click.echo(indent('work_duration={}'.format(worker_completed - worker_start), ' '))
|
|
| 125 |
+ |
|
| 126 |
+ fetch_start = metadata.input_fetch_start_timestamp.ToDatetime()
|
|
| 127 |
+ fetch_completed = metadata.input_fetch_completed_timestamp.ToDatetime()
|
|
| 128 |
+ click.echo(indent('fetch_duration={}'.format(fetch_completed - fetch_start), ' '))
|
|
| 129 |
+ |
|
| 130 |
+ execution_start = metadata.execution_start_timestamp.ToDatetime()
|
|
| 131 |
+ execution_completed = metadata.execution_completed_timestamp.ToDatetime()
|
|
| 132 |
+ click.echo(indent('exection_duration={}'.format(execution_completed - execution_start), ' '))
|
|
| 133 |
+ |
|
| 134 |
+ upload_start = metadata.output_upload_start_timestamp.ToDatetime()
|
|
| 135 |
+ upload_completed = metadata.output_upload_completed_timestamp.ToDatetime()
|
|
| 136 |
+ click.echo(indent('upload_duration={}'.format(upload_completed - upload_start), ' '))
|
|
| 137 |
+ |
|
| 138 |
+ click.echo(indent('total_duration={}'.format(worker_completed - queued), ' '))
|
|
| 139 |
+ |
|
| 140 |
+ |
|
| 68 | 141 |
@cli.command('status', short_help="Get the status of an operation.")
|
| 69 | 142 |
@click.argument('operation-name', nargs=1, type=click.STRING, required=True)
|
| 143 |
+@click.option('--json', is_flag=True, show_default=True,
|
|
| 144 |
+ help="Print operations status in JSON format.")
|
|
| 70 | 145 |
@pass_context
|
| 71 |
-def status(context, operation_name):
|
|
| 72 |
- context.logger.info("Getting operation status...")
|
|
| 146 |
+def status(context, operation_name, json):
|
|
| 73 | 147 |
stub = operations_pb2_grpc.OperationsStub(context.channel)
|
| 74 |
- |
|
| 75 | 148 |
request = operations_pb2.GetOperationRequest(name=operation_name)
|
| 76 | 149 |
|
| 77 |
- response = stub.GetOperation(request)
|
|
| 78 |
- context.logger.info(response)
|
|
| 150 |
+ operation = stub.GetOperation(request)
|
|
| 151 |
+ |
|
| 152 |
+ if not json:
|
|
| 153 |
+ _print_operation_status(operation, print_details=True)
|
|
| 154 |
+ else:
|
|
| 155 |
+ click.echo(json_format.MessageToJson(operation))
|
|
| 79 | 156 |
|
| 80 | 157 |
|
| 81 | 158 |
@cli.command('list', short_help="List operations.")
|
| 159 |
+@click.option('--json', is_flag=True, show_default=True,
|
|
| 160 |
+ help="Print operations list in JSON format.")
|
|
| 82 | 161 |
@pass_context
|
| 83 |
-def lists(context):
|
|
| 84 |
- context.logger.info("Getting list of operations")
|
|
| 162 |
+def lists(context, json):
|
|
| 85 | 163 |
stub = operations_pb2_grpc.OperationsStub(context.channel)
|
| 86 |
- |
|
| 87 | 164 |
request = operations_pb2.ListOperationsRequest(name=context.instance_name)
|
| 88 | 165 |
|
| 89 | 166 |
response = stub.ListOperations(request)
|
| 90 | 167 |
|
| 91 | 168 |
if not response.operations:
|
| 92 |
- context.logger.warning("No operations to list")
|
|
| 169 |
+ click.echo('Error: No operations to list.', err=True)
|
|
| 93 | 170 |
return
|
| 94 | 171 |
|
| 95 |
- for op in response.operations:
|
|
| 96 |
- context.logger.info(op)
|
|
| 172 |
+ operations_map = OrderedDict([
|
|
| 173 |
+ (OperationStage.CACHE_CHECK, []),
|
|
| 174 |
+ (OperationStage.QUEUED, []),
|
|
| 175 |
+ (OperationStage.EXECUTING, []),
|
|
| 176 |
+ (OperationStage.COMPLETED, [])
|
|
| 177 |
+ ])
|
|
| 178 |
+ |
|
| 179 |
+ for operation in response.operations:
|
|
| 180 |
+ metadata = remote_execution_pb2.ExecuteOperationMetadata()
|
|
| 181 |
+ # The metadata is expected to be an ExecuteOperationMetadata message:
|
|
| 182 |
+ assert operation.metadata.Is(metadata.DESCRIPTOR)
|
|
| 183 |
+ operation.metadata.Unpack(metadata)
|
|
| 184 |
+ |
|
| 185 |
+ stage = OperationStage(metadata.stage)
|
|
| 186 |
+ |
|
| 187 |
+ operations_map[stage].append(operation)
|
|
| 188 |
+ |
|
| 189 |
+ for operations in operations_map.values():
|
|
| 190 |
+ operations.sort(key=attrgetter('name'))
|
|
| 191 |
+ for operation in operations:
|
|
| 192 |
+ if not json:
|
|
| 193 |
+ _print_operation_status(operation)
|
|
| 194 |
+ else:
|
|
| 195 |
+ click.echo(json_format.MessageToJson(operation))
|
|
| 97 | 196 |
|
| 98 | 197 |
|
| 99 | 198 |
@cli.command('wait', short_help="Streams an operation until it is complete.")
|
| 100 | 199 |
@click.argument('operation-name', nargs=1, type=click.STRING, required=True)
|
| 200 |
+@click.option('--json', is_flag=True, show_default=True,
|
|
| 201 |
+ help="Print operations statuses in JSON format.")
|
|
| 101 | 202 |
@pass_context
|
| 102 |
-def wait(context, operation_name):
|
|
| 203 |
+def wait(context, operation_name, json):
|
|
| 103 | 204 |
stub = remote_execution_pb2_grpc.ExecutionStub(context.channel)
|
| 104 | 205 |
request = remote_execution_pb2.WaitExecutionRequest(name=operation_name)
|
| 105 | 206 |
|
| 106 |
- response = stub.WaitExecution(request)
|
|
| 207 |
+ operation_iterator = stub.WaitExecution(request)
|
|
| 107 | 208 |
|
| 108 |
- for stream in response:
|
|
| 109 |
- context.logger.info(stream)
|
|
| 209 |
+ for operation in operation_iterator:
|
|
| 210 |
+ if not json and operation.done:
|
|
| 211 |
+ _print_operation_status(operation, print_details=True)
|
|
| 212 |
+ elif not json:
|
|
| 213 |
+ _print_operation_status(operation)
|
|
| 214 |
+ else:
|
|
| 215 |
+ click.echo(json_format.MessageToJson(operation))
|
| 1 |
+# Copyright (C) 2018 Bloomberg LP
|
|
| 2 |
+#
|
|
| 3 |
+# Licensed under the Apache License, Version 2.0 (the "License");
|
|
| 4 |
+# you may not use this file except in compliance with the License.
|
|
| 5 |
+# You may obtain a copy of the License at
|
|
| 6 |
+#
|
|
| 7 |
+# <http://www.apache.org/licenses/LICENSE-2.0>
|
|
| 8 |
+#
|
|
| 9 |
+# Unless required by applicable law or agreed to in writing, software
|
|
| 10 |
+# distributed under the License is distributed on an "AS IS" BASIS,
|
|
| 11 |
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
| 12 |
+# See the License for the specific language governing permissions and
|
|
| 13 |
+# limitations under the License.
|
|
| 14 |
+ |
|
| 15 |
+ |
|
| 16 |
+from enum import Enum
|
|
| 17 |
+ |
|
| 18 |
+from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
|
|
| 19 |
+from buildgrid._protos.google.devtools.remoteworkers.v1test2 import bots_pb2
|
|
| 20 |
+ |
|
| 21 |
+ |
|
| 22 |
+class BotStatus(Enum):
|
|
| 23 |
+ # Initially unknown state.
|
|
| 24 |
+ BOT_STATUS_UNSPECIFIED = bots_pb2.BotStatus.Value('BOT_STATUS_UNSPECIFIED')
|
|
| 25 |
+ # The bot is healthy, and will accept leases as normal.
|
|
| 26 |
+ OK = bots_pb2.BotStatus.Value('OK')
|
|
| 27 |
+ # The bot is unhealthy and will not accept new leases.
|
|
| 28 |
+ UNHEALTHY = bots_pb2.BotStatus.Value('UNHEALTHY')
|
|
| 29 |
+ # The bot has been asked to reboot the host.
|
|
| 30 |
+ HOST_REBOOTING = bots_pb2.BotStatus.Value('HOST_REBOOTING')
|
|
| 31 |
+ # The bot has been asked to shut down.
|
|
| 32 |
+ BOT_TERMINATING = bots_pb2.BotStatus.Value('BOT_TERMINATING')
|
|
| 33 |
+ |
|
| 34 |
+ |
|
| 35 |
+class LeaseState(Enum):
|
|
| 36 |
+ # Initially unknown state.
|
|
| 37 |
+ LEASE_STATE_UNSPECIFIED = bots_pb2.LeaseState.Value('LEASE_STATE_UNSPECIFIED')
|
|
| 38 |
+ # The server expects the bot to accept this lease.
|
|
| 39 |
+ PENDING = bots_pb2.LeaseState.Value('PENDING')
|
|
| 40 |
+ # The bot has accepted this lease.
|
|
| 41 |
+ ACTIVE = bots_pb2.LeaseState.Value('ACTIVE')
|
|
| 42 |
+ # The bot is no longer leased.
|
|
| 43 |
+ COMPLETED = bots_pb2.LeaseState.Value('COMPLETED')
|
|
| 44 |
+ # The bot should immediately release all resources associated with the lease.
|
|
| 45 |
+ CANCELLED = bots_pb2.LeaseState.Value('CANCELLED')
|
|
| 46 |
+ |
|
| 47 |
+ |
|
| 48 |
+class OperationStage(Enum):
|
|
| 49 |
+ # Initially unknown stage.
|
|
| 50 |
+ UNKNOWN = remote_execution_pb2.ExecuteOperationMetadata.Stage.Value('UNKNOWN')
|
|
| 51 |
+ # Checking the result against the cache.
|
|
| 52 |
+ CACHE_CHECK = remote_execution_pb2.ExecuteOperationMetadata.Stage.Value('CACHE_CHECK')
|
|
| 53 |
+ # Currently idle, awaiting a free machine to execute.
|
|
| 54 |
+ QUEUED = remote_execution_pb2.ExecuteOperationMetadata.Stage.Value('QUEUED')
|
|
| 55 |
+ # Currently being executed by a worker.
|
|
| 56 |
+ EXECUTING = remote_execution_pb2.ExecuteOperationMetadata.Stage.Value('EXECUTING')
|
|
| 57 |
+ # Finished execution.
|
|
| 58 |
+ COMPLETED = remote_execution_pb2.ExecuteOperationMetadata.Stage.Value('COMPLETED')
|
| ... | ... | @@ -26,49 +26,15 @@ import asyncio |
| 26 | 26 |
import logging
|
| 27 | 27 |
import platform
|
| 28 | 28 |
import uuid
|
| 29 |
-from enum import Enum
|
|
| 30 | 29 |
|
| 31 | 30 |
import grpc
|
| 32 | 31 |
|
| 32 |
+from buildgrid._enums import BotStatus, LeaseState
|
|
| 33 | 33 |
from buildgrid._protos.google.rpc import code_pb2
|
| 34 | 34 |
from buildgrid._protos.google.devtools.remoteworkers.v1test2 import bots_pb2, worker_pb2
|
| 35 | 35 |
from buildgrid._exceptions import BotError
|
| 36 | 36 |
|
| 37 | 37 |
|
| 38 |
-class BotStatus(Enum):
|
|
| 39 |
- # Default value.
|
|
| 40 |
- BOT_STATUS_UNSPECIFIED = bots_pb2.BotStatus.Value('BOT_STATUS_UNSPECIFIED')
|
|
| 41 |
- |
|
| 42 |
- # The bot is healthy, and will accept leases as normal.
|
|
| 43 |
- OK = bots_pb2.BotStatus.Value('OK')
|
|
| 44 |
- |
|
| 45 |
- # The bot is unhealthy and will not accept new leases.
|
|
| 46 |
- UNHEALTHY = bots_pb2.BotStatus.Value('UNHEALTHY')
|
|
| 47 |
- |
|
| 48 |
- # The bot has been asked to reboot the host.
|
|
| 49 |
- HOST_REBOOTING = bots_pb2.BotStatus.Value('HOST_REBOOTING')
|
|
| 50 |
- |
|
| 51 |
- # The bot has been asked to shut down.
|
|
| 52 |
- BOT_TERMINATING = bots_pb2.BotStatus.Value('BOT_TERMINATING')
|
|
| 53 |
- |
|
| 54 |
- |
|
| 55 |
-class LeaseState(Enum):
|
|
| 56 |
- # Default value.
|
|
| 57 |
- LEASE_STATE_UNSPECIFIED = bots_pb2.LeaseState.Value('LEASE_STATE_UNSPECIFIED')
|
|
| 58 |
- |
|
| 59 |
- # The server expects the bot to accept this lease.
|
|
| 60 |
- PENDING = bots_pb2.LeaseState.Value('PENDING')
|
|
| 61 |
- |
|
| 62 |
- # The bot has accepted this lease.
|
|
| 63 |
- ACTIVE = bots_pb2.LeaseState.Value('ACTIVE')
|
|
| 64 |
- |
|
| 65 |
- # The bot is no longer leased.
|
|
| 66 |
- COMPLETED = bots_pb2.LeaseState.Value('COMPLETED')
|
|
| 67 |
- |
|
| 68 |
- # The bot should immediately release all resources associated with the lease.
|
|
| 69 |
- CANCELLED = bots_pb2.LeaseState.Value('CANCELLED')
|
|
| 70 |
- |
|
| 71 |
- |
|
| 72 | 38 |
class BotSession:
|
| 73 | 39 |
def __init__(self, parent, interface):
|
| 74 | 40 |
""" Unique bot ID within the farm used to identify this bot
|
| ... | ... | @@ -15,41 +15,15 @@ |
| 15 | 15 |
|
| 16 | 16 |
import logging
|
| 17 | 17 |
import uuid
|
| 18 |
-from enum import Enum
|
|
| 19 | 18 |
|
| 20 | 19 |
from google.protobuf import timestamp_pb2
|
| 21 | 20 |
|
| 21 |
+from buildgrid._enums import LeaseState, OperationStage
|
|
| 22 | 22 |
from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
|
| 23 | 23 |
from buildgrid._protos.google.devtools.remoteworkers.v1test2 import bots_pb2
|
| 24 | 24 |
from buildgrid._protos.google.longrunning import operations_pb2
|
| 25 | 25 |
|
| 26 | 26 |
|
| 27 |
-class OperationStage(Enum):
|
|
| 28 |
- # Initially unknown stage.
|
|
| 29 |
- UNKNOWN = remote_execution_pb2.ExecuteOperationMetadata.Stage.Value('UNKNOWN')
|
|
| 30 |
- # Checking the result against the cache.
|
|
| 31 |
- CACHE_CHECK = remote_execution_pb2.ExecuteOperationMetadata.Stage.Value('CACHE_CHECK')
|
|
| 32 |
- # Currently idle, awaiting a free machine to execute.
|
|
| 33 |
- QUEUED = remote_execution_pb2.ExecuteOperationMetadata.Stage.Value('QUEUED')
|
|
| 34 |
- # Currently being executed by a worker.
|
|
| 35 |
- EXECUTING = remote_execution_pb2.ExecuteOperationMetadata.Stage.Value('EXECUTING')
|
|
| 36 |
- # Finished execution.
|
|
| 37 |
- COMPLETED = remote_execution_pb2.ExecuteOperationMetadata.Stage.Value('COMPLETED')
|
|
| 38 |
- |
|
| 39 |
- |
|
| 40 |
-class LeaseState(Enum):
|
|
| 41 |
- # Initially unknown state.
|
|
| 42 |
- LEASE_STATE_UNSPECIFIED = bots_pb2.LeaseState.Value('LEASE_STATE_UNSPECIFIED')
|
|
| 43 |
- # The server expects the bot to accept this lease.
|
|
| 44 |
- PENDING = bots_pb2.LeaseState.Value('PENDING')
|
|
| 45 |
- # The bot has accepted this lease.
|
|
| 46 |
- ACTIVE = bots_pb2.LeaseState.Value('ACTIVE')
|
|
| 47 |
- # The bot is no longer leased.
|
|
| 48 |
- COMPLETED = bots_pb2.LeaseState.Value('COMPLETED')
|
|
| 49 |
- # The bot should immediately release all resources associated with the lease.
|
|
| 50 |
- CANCELLED = bots_pb2.LeaseState.Value('CANCELLED')
|
|
| 51 |
- |
|
| 52 |
- |
|
| 53 | 27 |
class Job:
|
| 54 | 28 |
|
| 55 | 29 |
def __init__(self, action, action_digest):
|
