Request to TFServing fails with std::bad

I have a problem. I want to make a prediction with TFServing but unfourtnaly as soon as I call the API of TFServing the docker container crashes with the following error:

2022-10-05 08:22:19.091237: I tensorflow_serving/model_servers/server.cc:442] Exporting HTTP/REST API at:localhost:8601 ...
terminate called after throwing an instance of 'std::bad_alloc'
  what():  std::bad_alloc

I am using TFServing inside a docker container an the call comes from a flask server. What is the problem for that? I have for the VM 16GB RAM.

server.py

from flask import current_app, flash, jsonify, make_response, redirect, request, url_for
from keras_preprocessing.sequence import pad_sequences
from keras.preprocessing.text import Tokenizer
from dotenv import load_dotenv
from loguru import logger
from pathlib import Path
from flask import Flask
import tensorflow as tf
import numpy as np
import requests
import string
import pickle5 as pickle
import nltk
import re
import os



app = Flask(__name__)
load_dotenv()


@app.route("/test")
def index():

    txt = "This is a text"
    output = get_prediction_probability(txt)
    return output

def text_wragling(text):
    x = text.lower()
    x = remove_URL(x)
    x = remove_punct(x)
    x = remove_stopwords(x)
    with open('tokenizer.pickle', 'rb') as handle:
        tokenizer = pickle.load(handle)
    x = tokenizer.texts_to_sequences([x])
    # pad
    x = pad_sequences(x, maxlen=int(os.getenv('NLP__MAXLEN')))
    return x

def remove_URL(text):
    url = re.compile(r"https?://\S |www.\.S ")
    return url.sub(r"",text)

def remove_punct(text):
    translator = str.maketrans("", "", string.punctuation)
    return text.translate(translator)

def remove_stopwords(text):

    # nltk.download()
    nltk.download('stopwords')
    from nltk.corpus import stopwords
    stop = set(stopwords.words("english"))
    filtered_words = [word.lower() for word in text.split() if word.lower() not in stop]
    return " ".join(filtered_words)


def get_prediction_probability(txt):
    x = text_wragling(txt)
    logger.info("Txt wragling")
    data = {
        "instances": [
            x.tolist()
        ]
    }
    #logger.info(data)
    logger.info("Get prediction from model")
    response = requests.post("http://localhost:8601/v1/models/nlp_model/labels/production:predict", json=data)
    probability = (np.asarray(response.json()['predictions']).max(axis=1))
    pred = np.asarray(response.json()['predictions']).argmax(axis=1)
    with open('labelenconder.pickle', 'rb') as handle:
        le = pickle.load(handle)
    pred = le.classes_[pred]
    prediction = pred[0]
    return {
        "prediction": prediction,
        "probability": probability[0]
    }


if __name__ == '__main__':
    #test()
   app.run(host='0.0.0.0')

Dockerfile

FROM tensorflow/serving
EXPOSE 8601

docker-compose.yml

version: '3'

services:
  tfserving:
    container_name: tfserving
    build: ..
    ports:
      - "8601:8601"
    volumes:
      - ./model.config:/models/model.config
      - ../model:/models/model
    environment:
      - TENSORFLOW_SERVING_REST_API_PORT=8061
      - TENSORFLOW_SERVING_MODEL_NAME=model

      - TENSORFLOW_MODEL_BASE_PATH=/models/model/
    entrypoint: [ "bash", "-c", "tensorflow_model_server --rest_api_port=8601  --allow_version_labels_for_unavailable_models --model_config_file=/models/model.config"]

model.config

model_config_list {
  config {
    name: 'nlp_model'
    base_path: '/models/model/'
    model_platform: 'tensorflow'
    model_version_policy {
      specific {
        versions: 1
        versions: 2
      }
    }
    version_labels {
      key: 'production'
      value: 1
    }
    version_labels {
      key: 'beta'
      value: 2
    }
  }
}

CodePudding user response：

This an error and has already been reported and been fixed on TensorFlow Serving 2.11 (not yet released). You can use nightly release from docker-hub.

You can find this issue here #2048.