使用GRPC调用TensorFlow-Serving服务
docker启动服务:
docker run -p 8502:8500 --mount type=bind,source=/home/recommend/hh/esmm,target=/models/search_multiply_task_model -e MODEL_NAME=search_multiply_task_model -t tensorflow/serving:2.2.0
其中,本机的8502端口对应Docker的8500端口(GRPC端口),本机8501端口对应Docker的8501端口(HTTP端口)。
python客户端调用代码:
import time
import grpc
import numpy as np
from tensorflow_serving.apis import model_service_pb2_grpc, model_management_pb2, get_model_status_pb2, predict_pb2, prediction_service_pb2_grpc
from tensorflow_serving.config import model_server_config_pb2
from tensorflow import make_tensor_proto
from tensorflow.core.framework import types_pb2
import tensorflow as tf
def predict_test(batch_size, serving_config):
# grpc.max_send_message_length最大发送长度默认4M
# grpc.max_receive_message_length最大接受长度默认4M
channel = grpc.insecure_channel(serving_config[hostport], options=[(grpc.max_send_message_length, serving_config[max_message_length]), (
grpc.max_receive_message_length, serving_config[max_message_length])])
stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)
# Creating random images for given batch size
sku_id = [[77233.0]]
sku_list = [[139204,61577,60372,91450,50226,50227,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]]
category = [[142,21,13,2,0,0,8,2,0,4,1,1,3,0,17,1,0,7,0,1618,0,37,0,873,23244,318,3422,3101,0,4,0,0]]
numeric = [[1629.0,1792.0,131.0,3528.0,0.502,8542.0,4038.0,2394.0,699.0,0.0,799.0,4700.0,13242.0,7521.0,3845.0,1569.0,0.978,
557.133,0.509,0.520,0.693,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0]]
request = predict_pb2.PredictRequest()
request.model_spec.name = serving_config[model_name]
request.model_spec.signature_name = serving_config[signature_name]
request.inputs[sku_id].CopyFrom(make_tensor_proto(
sku_id, shape=[1,1], dtype=tf.float32))
request.inputs[sku_list].CopyFrom(make_tensor_proto(
sku_list, shape=[1,30], dtype=tf.float32))
request.inputs[category].CopyFrom(make_tensor_proto(
category, shape=[1,32], dtype=tf.float32))
request.inputs[numeric].CopyFrom(make_tensor_proto(
numeric, shape=[1, 35], dtype=tf.float32))
start = int(time.time() * 1000)
result = stub.Predict(request, serving_config[timeout])
print(time_cost:{}.format(int(time.time() * 1000)-start))
channel.close()
return result
if __name__ == "__main__":
serving_config = {
"hostport": "10.15.420.50:8502",
"max_message_length": 10 * 1024 * 1024,
"timeout": 300,
"signature_name": "serving_default",
"model_name": "search_multiply_task_model"
}
predict_result = predict_test(1,serving_config)
# print(predict_result)
print(predict_score:{}.format(predict_result.outputs[dense_3].float_val))
上一篇:
通过多线程提高代码的执行效率例子
