FROM debian:buster-slim LABEL authors="Patrick Jentsch , Stephan Porada " ENV LANG=C.UTF-8 RUN apt-get update \ && apt-get install --no-install-recommends --yes \ wget # Install the NLP pipeline and it's dependencies # ## Install pyFlow ## ENV PYFLOW_VERSION=1.1.20 RUN wget --no-check-certificate --quiet \ "https://github.com/Illumina/pyflow/releases/download/v${PYFLOW_VERSION}/pyflow-${PYFLOW_VERSION}.tar.gz" \ && tar -xzf "pyflow-${PYFLOW_VERSION}.tar.gz" \ && cd "pyflow-${PYFLOW_VERSION}" \ && apt-get install --no-install-recommends --yes \ python2.7 \ && python2.7 setup.py build install \ && cd .. \ && rm -r "pyflow-${PYFLOW_VERSION}" "pyflow-${PYFLOW_VERSION}.tar.gz" ## Install spaCy ## ENV SPACY_VERSION=3.0.5 RUN apt-get install --no-install-recommends --yes \ python3.7 \ python3-pip \ && pip3 install \ chardet \ setuptools \ wheel \ && pip3 install --upgrade pip \ && pip3 install "spacy==${SPACY_VERSION}" # Only models that include the following components are compatibel: # lemmatizer, ner, parser, senter, tagger, ENV SPACY_MODELS="de_core_news_md,en_core_web_md,it_core_news_md,nl_core_news_md,pl_core_news_md,zh_core_web_md" ENV SPACY_MODELS_VERSION=3.0.0 RUN for spacy_model in $(echo ${SPACY_MODELS} | tr "," "\n"); do python3 -m spacy download "${spacy_model}-${SPACY_MODELS_VERSION}" --direct; done ## Further dependencies ## RUN apt-get install --no-install-recommends --yes \ procps \ zip ## Install Pipeline ## COPY nlp spacy-nlp vrt-creator /usr/local/bin/ RUN rm -r /var/lib/apt/lists/* ENTRYPOINT ["nlp"] CMD ["--help"]