使用GRPC调用TensorFlow-Serving服务
docker启动服务:
docker run -p 8502:8500 --mount type=bind,source=/home/recommend/hh/esmm,target=/models/search_multiply_task_model -e MODEL_NAME=search_multiply_task_model -t tensorflow/serving:2.2.0
其中,本机的8502端口对应Docker的8500端口(GRPC端口),本机8501端口对应Docker的8501端口(HTTP端口)。
python客户端调用代码:
import time import grpc import numpy as np from tensorflow_serving.apis import model_service_pb2_grpc, model_management_pb2, get_model_status_pb2, predict_pb2, prediction_service_pb2_grpc from tensorflow_serving.config import model_server_config_pb2 from tensorflow import make_tensor_proto from tensorflow.core.framework import types_pb2 import tensorflow as tf def predict_test(batch_size, serving_config): # grpc.max_send_message_length最大发送长度默认4M # grpc.max_receive_message_length最大接受长度默认4M channel = grpc.insecure_channel(serving_config[hostport], options=[(grpc.max_send_message_length, serving_config[max_message_length]), ( grpc.max_receive_message_length, serving_config[max_message_length])]) stub = prediction_service_pb2_grpc.PredictionServiceStub(channel) # Creating random images for given batch size sku_id = [[77233.0]] sku_list = [[139204,61577,60372,91450,50226,50227,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]] category = [[142,21,13,2,0,0,8,2,0,4,1,1,3,0,17,1,0,7,0,1618,0,37,0,873,23244,318,3422,3101,0,4,0,0]] numeric = [[1629.0,1792.0,131.0,3528.0,0.502,8542.0,4038.0,2394.0,699.0,0.0,799.0,4700.0,13242.0,7521.0,3845.0,1569.0,0.978, 557.133,0.509,0.520,0.693,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0]] request = predict_pb2.PredictRequest() request.model_spec.name = serving_config[model_name] request.model_spec.signature_name = serving_config[signature_name] request.inputs[sku_id].CopyFrom(make_tensor_proto( sku_id, shape=[1,1], dtype=tf.float32)) request.inputs[sku_list].CopyFrom(make_tensor_proto( sku_list, shape=[1,30], dtype=tf.float32)) request.inputs[category].CopyFrom(make_tensor_proto( category, shape=[1,32], dtype=tf.float32)) request.inputs[numeric].CopyFrom(make_tensor_proto( numeric, shape=[1, 35], dtype=tf.float32)) start = int(time.time() * 1000) result = stub.Predict(request, serving_config[timeout]) print(time_cost:{}.format(int(time.time() * 1000)-start)) channel.close() return result if __name__ == "__main__": serving_config = { "hostport": "10.15.420.50:8502", "max_message_length": 10 * 1024 * 1024, "timeout": 300, "signature_name": "serving_default", "model_name": "search_multiply_task_model" } predict_result = predict_test(1,serving_config) # print(predict_result) print(predict_score:{}.format(predict_result.outputs[dense_3].float_val))
上一篇:
通过多线程提高代码的执行效率例子