Source code for recommenders.utils.k8s_utils

# Copyright (c) Recommenders contributors.
# Licensed under the MIT License.

from math import ceil, floor
import logging

logger = logging.getLogger(__name__)


[docs]def qps_to_replicas( target_qps, processing_time, max_qp_replica=1, target_utilization=0.7 ): """Provide a rough estimate of the number of replicas to support a given load (queries per second) Args: target_qps (int): target queries per second that you want to support processing_time (float): the estimated amount of time (in seconds) your service call takes max_qp_replica (int): maximum number of concurrent queries per replica target_utilization (float): proportion of CPU utilization you think is ideal Returns: int: Number of estimated replicas required to support a target number of queries per second. """ concurrent_queries = target_qps * processing_time / target_utilization replicas = ceil(concurrent_queries / max_qp_replica) logger.info( "Approximately {} replicas are estimated to support {} queries per second.".format( replicas, target_qps ) ) return replicas
[docs]def replicas_to_qps( num_replicas, processing_time, max_qp_replica=1, target_utilization=0.7 ): """Provide a rough estimate of the queries per second supported by a number of replicas Args: num_replicas (int): number of replicas processing_time (float): the estimated amount of time (in seconds) your service call takes max_qp_replica (int): maximum number of concurrent queries per replica target_utilization (float): proportion of CPU utilization you think is ideal Returns: int: queries per second supported by the number of replicas """ qps = floor(num_replicas * max_qp_replica * target_utilization / processing_time) logger.info( "Approximately {} queries per second are supported by {} replicas.".format( qps, num_replicas ) ) return qps
[docs]def nodes_to_replicas(n_cores_per_node, n_nodes=3, cpu_cores_per_replica=0.1): """Provide a rough estimate of the number of replicas supported by a given number of nodes with n_cores_per_node cores each Args: n_cores_per_node (int): Total number of cores per node within an AKS cluster that you want to use n_nodes (int): Number of nodes (i.e. VMs) used in the AKS cluster cpu_cores_per_replica (float): Cores assigned to each replica. This can be fractional and corresponds to the cpu_cores argument passed to AksWebservice.deploy_configuration() Returns: int: Total number of replicas supported by the configuration """ n_cores_avail = (n_cores_per_node - 0.5) * n_nodes - 4.45 replicas = floor(n_cores_avail / cpu_cores_per_replica) logger.info( "Approximately {} replicas are supported by {} nodes with {} cores each.".format( replicas, n_nodes, n_cores_per_node ) ) return replicas