diff --git a/.github/workflows/test_exporters_common.yml b/.github/workflows/test_exporters_common.yml
index 7df8ce4f7f..9cc3ce77de 100644
--- a/.github/workflows/test_exporters_common.yml
+++ b/.github/workflows/test_exporters_common.yml
@@ -36,7 +36,8 @@ jobs:
run: |
pip install --upgrade pip
pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
- pip install .[tests,exporters]
+ pip install optimum-onnx@git+https://github.com/huggingface/optimum-onnx.git
+ pip install .[tests]
- name: Test with pytest
run: |
diff --git a/.github/workflows/test_pipelines.yml b/.github/workflows/test_pipelines.yml
new file mode 100644
index 0000000000..7a7cb500bf
--- /dev/null
+++ b/.github/workflows/test_pipelines.yml
@@ -0,0 +1,45 @@
+name: Optimum Pipelines / Python - Test
+
+on:
+ push:
+ branches: [main]
+ pull_request:
+ branches: [main]
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+ cancel-in-progress: true
+
+env:
+ UV_SYSTEM_PYTHON: 1
+ UV_TORCH_BACKEND: auto
+ TRANSFORMERS_IS_CI: true
+
+jobs:
+ build:
+ strategy:
+ fail-fast: false
+ matrix:
+ python-version: [3.9]
+ runs-on: [ubuntu-22.04]
+
+ runs-on: ${{ matrix.runs-on }}
+
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v4
+
+ - name: Setup Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v5
+ with:
+ python-version: ${{ matrix.python-version }}
+
+ - name: Install dependencies
+ run: |
+ pip install --upgrade pip uv
+ uv pip install --no-cache-dir optimum-onnx[onnxruntime]@git+https://github.com/huggingface/optimum-onnx.git
+ uv pip install --no-cache-dir .[tests]
+
+ - name: Test with pytest
+ run: |
+ pytest tests/pipelines -vvvv --durations=0
diff --git a/docs/source/quicktour.mdx b/docs/source/quicktour.mdx
index 0ebfd0fd75..4f9666cd14 100644
--- a/docs/source/quicktour.mdx
+++ b/docs/source/quicktour.mdx
@@ -129,42 +129,6 @@ To train transformers on Habana's Gaudi processors, 🤗 Optimum provides a `Gau
You can find more examples in the [documentation](https://huggingface.co/docs/optimum/habana/quickstart) and in the [examples](https://github.com/huggingface/optimum-habana/tree/main/examples).
-
-#### ONNX Runtime
-
-To train transformers with ONNX Runtime's acceleration features, 🤗 Optimum provides a `ORTTrainer` that is very similar to the 🤗 Transformers [Trainer](https://huggingface.co/docs/transformers/main_classes/trainer). Here is a simple example:
-
-```diff
-- from transformers import Trainer, TrainingArguments
-+ from optimum.onnxruntime import ORTTrainer, ORTTrainingArguments
-
- # Download a pretrained model from the Hub
- model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased")
-
- # Define the training arguments
-- training_args = TrainingArguments(
-+ training_args = ORTTrainingArguments(
- output_dir="path/to/save/folder/",
- optim="adamw_ort_fused",
- ...
- )
-
- # Create a ONNX Runtime Trainer
-- trainer = Trainer(
-+ trainer = ORTTrainer(
- model=model,
- args=training_args,
- train_dataset=train_dataset,
-+ feature="text-classification", # The model type to export to ONNX
- ...
- )
-
- # Use ONNX Runtime for training!
- trainer.train()
-```
-
-You can find more examples in the [documentation](https://huggingface.co/docs/optimum/onnxruntime/usage_guides/trainer) and in the [examples](https://github.com/huggingface/optimum/tree/main/examples/onnxruntime/training).
-
## Out of the box ONNX export
The Optimum library handles out of the box the ONNX export of Transformers and Diffusers models!
diff --git a/optimum/configuration_utils.py b/optimum/configuration_utils.py
index 60f7c99617..41c565a22d 100644
--- a/optimum/configuration_utils.py
+++ b/optimum/configuration_utils.py
@@ -342,6 +342,7 @@ def to_dict(self) -> Dict[str, Any]:
output["transformers_version"] = transformers_version_str
output["optimum_version"] = __version__
- self.dict_torch_dtype_to_str(output)
+ if hasattr(self, "dict_torch_dtype_to_str"):
+ self.dict_torch_dtype_to_str(output)
return output
diff --git a/optimum/exporters/utils.py b/optimum/exporters/utils.py
index 6931bf51c4..960910c276 100644
--- a/optimum/exporters/utils.py
+++ b/optimum/exporters/utils.py
@@ -26,8 +26,8 @@
from ..utils import (
DIFFUSERS_MINIMUM_VERSION,
- check_if_diffusers_greater,
is_diffusers_available,
+ is_diffusers_version,
logging,
)
from ..utils.import_utils import _diffusers_version
@@ -38,7 +38,7 @@
if is_diffusers_available():
- if not check_if_diffusers_greater(DIFFUSERS_MINIMUM_VERSION.base_version):
+ if is_diffusers_version("<", DIFFUSERS_MINIMUM_VERSION.base_version):
raise ImportError(
f"We found an older version of diffusers {_diffusers_version} but we require diffusers to be >= {DIFFUSERS_MINIMUM_VERSION}. "
"Please update diffusers by running `pip install --upgrade diffusers`"
diff --git a/optimum/pipelines/__init__.py b/optimum/pipelines/__init__.py
index 892f4bd391..5ed7b6b6d6 100644
--- a/optimum/pipelines/__init__.py
+++ b/optimum/pipelines/__init__.py
@@ -1,5 +1,5 @@
# coding=utf-8
-# Copyright 2023 The HuggingFace Team. All rights reserved.
+# Copyright 2025 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -12,10 +12,282 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+"""Pipelines running different backends."""
-from .pipelines_base import (
- MAPPING_LOADING_FUNC,
- ORT_SUPPORTED_TASKS,
- load_ort_pipeline,
- pipeline,
+from typing import TYPE_CHECKING, Any, Optional, Union
+
+import torch
+
+from optimum.utils.import_utils import (
+ is_ipex_available,
+ is_onnxruntime_available,
+ is_openvino_available,
+ is_optimum_intel_available,
+ is_optimum_onnx_available,
)
+from optimum.utils.logging import get_logger
+
+
+logger = get_logger(__name__)
+
+
+if TYPE_CHECKING:
+ from transformers import (
+ BaseImageProcessor,
+ FeatureExtractionMixin,
+ Pipeline,
+ PretrainedConfig,
+ PreTrainedModel,
+ PreTrainedTokenizer,
+ PreTrainedTokenizerFast,
+ ProcessorMixin,
+ TFPreTrainedModel,
+ )
+
+
+# The docstring is simply a copy of transformers.pipelines.pipeline's doc with minor modifications
+# to reflect the fact that this pipeline loads Accelerated models using optimum.
+def pipeline(
+ task: Optional[str] = None,
+ model: Optional[Union[str, "PreTrainedModel", "TFPreTrainedModel"]] = None,
+ config: Optional[Union[str, "PretrainedConfig"]] = None,
+ tokenizer: Optional[Union[str, "PreTrainedTokenizer", "PreTrainedTokenizerFast"]] = None,
+ feature_extractor: Optional[Union[str, "FeatureExtractionMixin "]] = None,
+ image_processor: Optional[Union[str, "BaseImageProcessor"]] = None,
+ processor: Optional[Union[str, "ProcessorMixin"]] = None,
+ framework: Optional[str] = None,
+ revision: Optional[str] = None,
+ use_fast: bool = True,
+ token: Optional[Union[str, bool]] = None,
+ device: Optional[Union[int, str, "torch.device"]] = None,
+ device_map: Optional[Union[str, dict[str, Union[int, str]]]] = None,
+ torch_dtype: Optional[Union[str, "torch.dtype"]] = "auto",
+ trust_remote_code: Optional[bool] = None,
+ model_kwargs: Optional[dict[str, Any]] = None,
+ pipeline_class: Optional[Any] = None,
+ accelerator: Optional[str] = None,
+ **kwargs: Any,
+) -> "Pipeline":
+ """Utility factory method to build a [`Pipeline`] with an Optimum accelerated model, similar to `transformers.pipeline`.
+ A pipeline consists of:
+ - One or more components for pre-processing model inputs, such as a [tokenizer](tokenizer),
+ [image_processor](image_processor), [feature_extractor](feature_extractor), or [processor](processors).
+ - A [model](model) that generates predictions from the inputs.
+ - Optional post-processing steps to refine the model's output, which can also be handled by processors.
+
+ While there are such optional arguments as `tokenizer`, `feature_extractor`, `image_processor`, and `processor`,
+ they shouldn't be specified all at once. If these components are not provided, `pipeline` will try to load
+ required ones automatically. In case you want to provide these components explicitly, please refer to a
+ specific pipeline in order to get more details regarding what components are required.
+
+ Args:
+ task (`str`):
+ The task defining which pipeline will be returned. Currently accepted tasks are:
+ - `"audio-classification"`: will return a [`AudioClassificationPipeline`].
+ - `"automatic-speech-recognition"`: will return a [`AutomaticSpeechRecognitionPipeline`].
+ - `"depth-estimation"`: will return a [`DepthEstimationPipeline`].
+ - `"document-question-answering"`: will return a [`DocumentQuestionAnsweringPipeline`].
+ - `"feature-extraction"`: will return a [`FeatureExtractionPipeline`].
+ - `"fill-mask"`: will return a [`FillMaskPipeline`]:.
+ - `"image-classification"`: will return a [`ImageClassificationPipeline`].
+ - `"image-feature-extraction"`: will return an [`ImageFeatureExtractionPipeline`].
+ - `"image-segmentation"`: will return a [`ImageSegmentationPipeline`].
+ - `"image-text-to-text"`: will return a [`ImageTextToTextPipeline`].
+ - `"image-to-image"`: will return a [`ImageToImagePipeline`].
+ - `"image-to-text"`: will return a [`ImageToTextPipeline`].
+ - `"mask-generation"`: will return a [`MaskGenerationPipeline`].
+ - `"object-detection"`: will return a [`ObjectDetectionPipeline`].
+ - `"question-answering"`: will return a [`QuestionAnsweringPipeline`].
+ - `"summarization"`: will return a [`SummarizationPipeline`].
+ - `"table-question-answering"`: will return a [`TableQuestionAnsweringPipeline`].
+ - `"text2text-generation"`: will return a [`Text2TextGenerationPipeline`].
+ - `"text-classification"` (alias `"sentiment-analysis"` available): will return a
+ [`TextClassificationPipeline`].
+ - `"text-generation"`: will return a [`TextGenerationPipeline`]:.
+ - `"text-to-audio"` (alias `"text-to-speech"` available): will return a [`TextToAudioPipeline`]:.
+ - `"token-classification"` (alias `"ner"` available): will return a [`TokenClassificationPipeline`].
+ - `"translation"`: will return a [`TranslationPipeline`].
+ - `"translation_xx_to_yy"`: will return a [`TranslationPipeline`].
+ - `"video-classification"`: will return a [`VideoClassificationPipeline`].
+ - `"visual-question-answering"`: will return a [`VisualQuestionAnsweringPipeline`].
+ - `"zero-shot-classification"`: will return a [`ZeroShotClassificationPipeline`].
+ - `"zero-shot-image-classification"`: will return a [`ZeroShotImageClassificationPipeline`].
+ - `"zero-shot-audio-classification"`: will return a [`ZeroShotAudioClassificationPipeline`].
+ - `"zero-shot-object-detection"`: will return a [`ZeroShotObjectDetectionPipeline`].
+ model (`str` or [`ORTModel` or `OVModel`], *optional*):
+ The model that will be used by the pipeline to make predictions. This can be a model identifier or an
+ actual instance of a ONNX Runtime model inheriting from [`ORTModel` or `OVModel`].
+ If not provided, the default for the `task` will be loaded.
+ config (`str` or [`PretrainedConfig`], *optional*):
+ The configuration that will be used by the pipeline to instantiate the model. This can be a model
+ identifier or an actual pretrained model configuration inheriting from [`PretrainedConfig`].
+ If not provided, the default configuration file for the requested model will be used. That means that if
+ `model` is given, its default configuration will be used. However, if `model` is not supplied, this
+ `task`'s default model's config is used instead.
+ tokenizer (`str` or [`PreTrainedTokenizer`], *optional*):
+ The tokenizer that will be used by the pipeline to encode data for the model. This can be a model
+ identifier or an actual pretrained tokenizer inheriting from [`PreTrainedTokenizer`].
+ If not provided, the default tokenizer for the given `model` will be loaded (if it is a string). If `model`
+ is not specified or not a string, then the default tokenizer for `config` is loaded (if it is a string).
+ However, if `config` is also not given or not a string, then the default tokenizer for the given `task`
+ will be loaded.
+ feature_extractor (`str` or [`PreTrainedFeatureExtractor`], *optional*):
+ The feature extractor that will be used by the pipeline to encode data for the model. This can be a model
+ identifier or an actual pretrained feature extractor inheriting from [`PreTrainedFeatureExtractor`].
+ Feature extractors are used for non-NLP models, such as Speech or Vision models as well as multi-modal
+ models. Multi-modal models will also require a tokenizer to be passed.
+ If not provided, the default feature extractor for the given `model` will be loaded (if it is a string). If
+ `model` is not specified or not a string, then the default feature extractor for `config` is loaded (if it
+ is a string). However, if `config` is also not given or not a string, then the default feature extractor
+ for the given `task` will be loaded.
+ image_processor (`str` or [`BaseImageProcessor`], *optional*):
+ The image processor that will be used by the pipeline to preprocess images for the model. This can be a
+ model identifier or an actual image processor inheriting from [`BaseImageProcessor`].
+ Image processors are used for Vision models and multi-modal models that require image inputs. Multi-modal
+ models will also require a tokenizer to be passed.
+ If not provided, the default image processor for the given `model` will be loaded (if it is a string). If
+ `model` is not specified or not a string, then the default image processor for `config` is loaded (if it is
+ a string).
+ processor (`str` or [`ProcessorMixin`], *optional*):
+ The processor that will be used by the pipeline to preprocess data for the model. This can be a model
+ identifier or an actual processor inheriting from [`ProcessorMixin`].
+ Processors are used for multi-modal models that require multi-modal inputs, for example, a model that
+ requires both text and image inputs.
+ If not provided, the default processor for the given `model` will be loaded (if it is a string). If `model`
+ is not specified or not a string, then the default processor for `config` is loaded (if it is a string).
+ framework (`str`, *optional*):
+ The framework to use, either `"pt"` for PyTorch or `"tf"` for TensorFlow. The specified framework must be
+ installed.
+ If no framework is specified, will default to the one currently installed. If no framework is specified and
+ both frameworks are installed, will default to the framework of the `model`, or to PyTorch if no model is
+ provided.
+ revision (`str`, *optional*, defaults to `"main"`):
+ When passing a task name or a string model identifier: The specific model version to use. It can be a
+ branch name, a tag name, or a commit id, since we use a git-based system for storing models and other
+ artifacts on huggingface.co, so `revision` can be any identifier allowed by git.
+ use_fast (`bool`, *optional*, defaults to `True`):
+ Whether or not to use a Fast tokenizer if possible (a [`PreTrainedTokenizerFast`]).
+ use_auth_token (`str` or *bool*, *optional*):
+ The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
+ when running `hf auth login` (stored in `~/.huggingface`).
+ device (`int` or `str` or `torch.device`):
+ Defines the device (*e.g.*, `"cpu"`, `"cuda:1"`, `"mps"`, or a GPU ordinal rank like `1`) on which this
+ pipeline will be allocated.
+ device_map (`str` or `dict[str, Union[int, str, torch.device]`, *optional*):
+ Sent directly as `model_kwargs` (just a simpler shortcut). When `accelerate` library is present, set
+ `device_map="auto"` to compute the most optimized `device_map` automatically (see
+ [here](https://huggingface.co/docs/accelerate/main/en/package_reference/big_modeling#accelerate.cpu_offload)
+ for more information).
+
+ Do not use `device_map` AND `device` at the same time as they will conflict
+
+ torch_dtype (`str` or `torch.dtype`, *optional*):
+ Sent directly as `model_kwargs` (just a simpler shortcut) to use the available precision for this model
+ (`torch.float16`, `torch.bfloat16`, ... or `"auto"`).
+ trust_remote_code (`bool`, *optional*, defaults to `False`):
+ Whether or not to allow for custom code defined on the Hub in their own modeling, configuration,
+ tokenization or even pipeline files. This option should only be set to `True` for repositories you trust
+ and in which you have read the code, as it will execute code present on the Hub on your local machine.
+ model_kwargs (`dict[str, Any]`, *optional*):
+ Additional dictionary of keyword arguments passed along to the model's `from_pretrained(...,
+ **model_kwargs)` function.
+ pipeline_class (`type`, *optional*):
+ Can be used to force using a custom pipeline class. If not provided, the default pipeline class for the
+ specified task will be used.
+ accelerator (`str`, *optional*):
+ The accelerator to use, either `"ort"` for ONNX Runtime, `"ov"` for OpenVINO, or `"ipex"` for Intel
+ Extension for PyTorch. If no accelerator is specified, will default to the one currently installed/available.
+ kwargs (`dict[str, Any]`, *optional*):
+ Additional keyword arguments passed along to the specific pipeline init (see the documentation for the
+ corresponding pipeline class for possible values).
+ Returns:
+ [`Pipeline`]: A suitable pipeline for the task.
+ Examples:
+ ```python
+ >>> from optimum.pipelines import pipeline
+ >>> # Sentiment analysis pipeline with default model, using OpenVINO
+ >>> analyzer = pipeline("sentiment-analysis", accelerator="ov")
+ >>> # Question answering pipeline, specifying the checkpoint identifier, with IPEX
+ >>> oracle = pipeline(
+ ... "question-answering", model="distilbert/distilbert-base-cased-distilled-squad", tokenizer="google-bert/bert-base-cased", accelerator="ipex"
+ ... )
+ >>> # Named entity recognition pipeline, passing in a specific model and tokenizer, with ONNX Runtime
+ >>> model = ORTModelForTokenClassification.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english")
+ >>> tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
+ >>> recognizer = pipeline("ner", model=model, tokenizer=tokenizer)
+ ```
+ """
+
+ if accelerator is None:
+ # probably needs to check for couple of stuff here, like target device, type(model) etc.
+ if is_optimum_intel_available() and is_openvino_available():
+ logger.info(
+ "`accelerator` not specified. Using OpenVINO (`ov`) as the accelerator since `optimum-intel[openvino]` is installed."
+ )
+ accelerator = "ov"
+ elif is_optimum_onnx_available() and is_onnxruntime_available():
+ logger.info(
+ "`accelerator` not specified. Using ONNX Runtime (`ort`) as the accelerator since `optimum-onnx[onnxruntime]` is installed."
+ )
+ accelerator = "ort"
+ elif is_optimum_intel_available() and is_ipex_available():
+ logger.info(
+ "`accelerator` not specified. Using IPEX (`ipex`) as the accelerator since `optimum-intel[ipex]` is installed."
+ )
+ accelerator = "ipex"
+ else:
+ raise ImportError(
+ "You need to install either `optimum-onnx[onnxruntime]` to use ONNX Runtime as an accelerator, "
+ "or `optimum-intel[openvino]` to use OpenVINO as an accelerator, "
+ "or `optimum-intel[ipex]` to use IPEX as an accelerator."
+ )
+
+ if accelerator == "ort":
+ from optimum.onnxruntime import pipeline as ort_pipeline
+
+ return ort_pipeline(
+ task=task,
+ model=model,
+ config=config,
+ tokenizer=tokenizer,
+ feature_extractor=feature_extractor,
+ image_processor=image_processor,
+ processor=processor,
+ framework=framework,
+ revision=revision,
+ use_fast=use_fast,
+ token=token,
+ device=device,
+ device_map=device_map,
+ torch_dtype=torch_dtype,
+ trust_remote_code=trust_remote_code,
+ model_kwargs=model_kwargs,
+ pipeline_class=pipeline_class,
+ **kwargs,
+ )
+ elif accelerator in ["ov", "ipex"]:
+ from optimum.intel import pipeline as intel_pipeline
+
+ return intel_pipeline(
+ task=task,
+ model=model,
+ config=config,
+ tokenizer=tokenizer,
+ feature_extractor=feature_extractor,
+ image_processor=image_processor,
+ processor=processor,
+ framework=framework,
+ revision=revision,
+ use_fast=use_fast,
+ token=token,
+ device=device,
+ device_map=device_map,
+ torch_dtype=torch_dtype,
+ trust_remote_code=trust_remote_code,
+ model_kwargs=model_kwargs,
+ pipeline_class=pipeline_class,
+ accelerator=accelerator,
+ **kwargs,
+ )
+ else:
+ raise ValueError(f"Accelerator {accelerator} not recognized. Please use 'ort', 'ov' or 'ipex'.")
diff --git a/optimum/pipelines/pipelines_base.py b/optimum/pipelines/pipelines_base.py
deleted file mode 100644
index 80d81e0930..0000000000
--- a/optimum/pipelines/pipelines_base.py
+++ /dev/null
@@ -1,364 +0,0 @@
-# coding=utf-8
-# Copyright 2022 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Pipelines running different backends."""
-
-from typing import Any, Dict, Optional, Union
-
-from transformers import (
- AudioClassificationPipeline,
- AutoConfig,
- AutoFeatureExtractor,
- AutoImageProcessor,
- AutomaticSpeechRecognitionPipeline,
- AutoTokenizer,
- FeatureExtractionPipeline,
- FillMaskPipeline,
- ImageClassificationPipeline,
- ImageSegmentationPipeline,
- ImageToImagePipeline,
- ImageToTextPipeline,
- Pipeline,
- PreTrainedTokenizer,
- PreTrainedTokenizerFast,
- QuestionAnsweringPipeline,
- SequenceFeatureExtractor,
- SummarizationPipeline,
- Text2TextGenerationPipeline,
- TextClassificationPipeline,
- TextGenerationPipeline,
- TokenClassificationPipeline,
- TranslationPipeline,
- ZeroShotClassificationPipeline,
-)
-from transformers import pipeline as transformers_pipeline
-from transformers.feature_extraction_utils import PreTrainedFeatureExtractor
-from transformers.image_processing_utils import BaseImageProcessor
-from transformers.pipelines import (
- FEATURE_EXTRACTOR_MAPPING,
- IMAGE_PROCESSOR_MAPPING,
- TOKENIZER_MAPPING,
- check_task,
- get_default_model_and_revision,
-)
-from transformers.pipelines import SUPPORTED_TASKS as TRANSFORMERS_SUPPORTED_TASKS
-
-from ..utils import is_onnxruntime_available
-
-
-if is_onnxruntime_available():
- from ..onnxruntime import (
- ORTModelForAudioClassification,
- ORTModelForCausalLM,
- ORTModelForFeatureExtraction,
- ORTModelForImageClassification,
- ORTModelForImageToImage,
- ORTModelForMaskedLM,
- ORTModelForQuestionAnswering,
- ORTModelForSemanticSegmentation,
- ORTModelForSeq2SeqLM,
- ORTModelForSequenceClassification,
- ORTModelForSpeechSeq2Seq,
- ORTModelForTokenClassification,
- ORTModelForVision2Seq,
- )
- from ..onnxruntime.modeling_ort import ORTModel
-
- ORT_SUPPORTED_TASKS = {
- "feature-extraction": {
- "impl": FeatureExtractionPipeline,
- "class": (ORTModelForFeatureExtraction,),
- "default": "distilbert-base-cased",
- "type": "text", # feature extraction is only supported for text at the moment
- },
- "fill-mask": {
- "impl": FillMaskPipeline,
- "class": (ORTModelForMaskedLM,),
- "default": "bert-base-cased",
- "type": "text",
- },
- "image-classification": {
- "impl": ImageClassificationPipeline,
- "class": (ORTModelForImageClassification,),
- "default": "google/vit-base-patch16-224",
- "type": "image",
- },
- "image-segmentation": {
- "impl": ImageSegmentationPipeline,
- "class": (ORTModelForSemanticSegmentation,),
- "default": "nvidia/segformer-b0-finetuned-ade-512-512",
- "type": "image",
- },
- "question-answering": {
- "impl": QuestionAnsweringPipeline,
- "class": (ORTModelForQuestionAnswering,),
- "default": "distilbert-base-cased-distilled-squad",
- "type": "text",
- },
- "text-classification": {
- "impl": TextClassificationPipeline,
- "class": (ORTModelForSequenceClassification,),
- "default": "distilbert-base-uncased-finetuned-sst-2-english",
- "type": "text",
- },
- "text-generation": {
- "impl": TextGenerationPipeline,
- "class": (ORTModelForCausalLM,),
- "default": "distilgpt2",
- "type": "text",
- },
- "token-classification": {
- "impl": TokenClassificationPipeline,
- "class": (ORTModelForTokenClassification,),
- "default": "dbmdz/bert-large-cased-finetuned-conll03-english",
- "type": "text",
- },
- "zero-shot-classification": {
- "impl": ZeroShotClassificationPipeline,
- "class": (ORTModelForSequenceClassification,),
- "default": "facebook/bart-large-mnli",
- "type": "text",
- },
- "summarization": {
- "impl": SummarizationPipeline,
- "class": (ORTModelForSeq2SeqLM,),
- "default": "t5-base",
- "type": "text",
- },
- "translation": {
- "impl": TranslationPipeline,
- "class": (ORTModelForSeq2SeqLM,),
- "default": "t5-small",
- "type": "text",
- },
- "text2text-generation": {
- "impl": Text2TextGenerationPipeline,
- "class": (ORTModelForSeq2SeqLM,),
- "default": "t5-small",
- "type": "text",
- },
- "automatic-speech-recognition": {
- "impl": AutomaticSpeechRecognitionPipeline,
- "class": (ORTModelForSpeechSeq2Seq,),
- "default": "openai/whisper-tiny.en",
- "type": "multimodal",
- },
- "image-to-text": {
- "impl": ImageToTextPipeline,
- "class": (ORTModelForVision2Seq,),
- "default": "nlpconnect/vit-gpt2-image-captioning",
- "type": "multimodal",
- },
- "audio-classification": {
- "impl": AudioClassificationPipeline,
- "class": (ORTModelForAudioClassification,),
- "default": "superb/hubert-base-superb-ks",
- "type": "audio",
- },
- "image-to-image": {
- "impl": ImageToImagePipeline,
- "class": (ORTModelForImageToImage,),
- "default": "caidas/swin2SR-classical-sr-x2-64",
- "type": "image",
- },
- }
-else:
- ORT_SUPPORTED_TASKS = {}
-
-
-def load_ort_pipeline(
- model,
- targeted_task,
- load_tokenizer,
- tokenizer,
- feature_extractor,
- load_feature_extractor,
- image_processor,
- load_image_processor,
- SUPPORTED_TASKS,
- subfolder: str = "",
- token: Optional[Union[bool, str]] = None,
- revision: str = "main",
- model_kwargs: Optional[Dict[str, Any]] = None,
- config: AutoConfig = None,
- **kwargs,
-):
- if model_kwargs is None:
- model_kwargs = {}
-
- if isinstance(model, str):
- model_id = model
- model = SUPPORTED_TASKS[targeted_task]["class"][0].from_pretrained(
- model, revision=revision, subfolder=subfolder, token=token, **model_kwargs
- )
- elif isinstance(model, ORTModel):
- if tokenizer is None and load_tokenizer:
- for preprocessor in model.preprocessors:
- if isinstance(preprocessor, (PreTrainedTokenizer, PreTrainedTokenizerFast)):
- tokenizer = preprocessor
- break
- if tokenizer is None:
- raise ValueError(
- "Could not automatically find a tokenizer for the ORTModel, you must pass a tokenizer explictly"
- )
- if feature_extractor is None and load_feature_extractor:
- for preprocessor in model.preprocessors:
- if isinstance(preprocessor, SequenceFeatureExtractor):
- feature_extractor = preprocessor
- break
- if feature_extractor is None:
- raise ValueError(
- "Could not automatically find a feature extractor for the ORTModel, you must pass a "
- "feature_extractor explictly"
- )
- if image_processor is None and load_image_processor:
- for preprocessor in model.preprocessors:
- if isinstance(preprocessor, BaseImageProcessor):
- image_processor = preprocessor
- break
- if image_processor is None:
- raise ValueError(
- "Could not automatically find a feature extractor for the ORTModel, you must pass a "
- "image_processor explictly"
- )
-
- model_id = None
- else:
- raise ValueError(
- f"""Model {model} is not supported. Please provide a valid model either as string or ORTModel.
- You can also provide non model then a default one will be used"""
- )
- return model, model_id, tokenizer, feature_extractor, image_processor
-
-
-MAPPING_LOADING_FUNC = {
- "ort": load_ort_pipeline,
-}
-
-
-def pipeline(
- task: str = None,
- model: Optional[Any] = None,
- tokenizer: Optional[Union[str, PreTrainedTokenizer]] = None,
- feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None,
- image_processor: Optional[Union[str, BaseImageProcessor]] = None,
- use_fast: bool = True,
- token: Optional[Union[str, bool]] = None,
- accelerator: Optional[str] = "ort",
- revision: Optional[str] = None,
- trust_remote_code: Optional[bool] = None,
- *model_kwargs,
- **kwargs,
-) -> Pipeline:
- targeted_task = "translation" if task.startswith("translation") else task
-
- if accelerator == "ort":
- if targeted_task not in list(ORT_SUPPORTED_TASKS.keys()):
- raise ValueError(
- f"Task {targeted_task} is not supported for the ONNX Runtime pipeline. Supported tasks are { list(ORT_SUPPORTED_TASKS.keys())}"
- )
-
- supported_tasks = ORT_SUPPORTED_TASKS if accelerator == "ort" else TRANSFORMERS_SUPPORTED_TASKS
-
- if model is None:
- if accelerator != "ort":
- _, target_task, task_options = check_task(task)
- model, default_revision = get_default_model_and_revision(target_task, "pt", task_options)
- revision = revision or default_revision
- else:
- model = supported_tasks[targeted_task]["default"]
-
- hub_kwargs = {
- "revision": revision,
- "token": token,
- "trust_remote_code": trust_remote_code,
- "_commit_hash": None,
- }
-
- config = kwargs.get("config", None)
- if config is None and isinstance(model, str):
- config = AutoConfig.from_pretrained(model, _from_pipeline=task, **hub_kwargs, **kwargs)
- hub_kwargs["_commit_hash"] = config._commit_hash
-
- no_feature_extractor_tasks = set()
- no_tokenizer_tasks = set()
- no_image_processor_tasks = set()
- for _task, values in supported_tasks.items():
- if values["type"] == "text":
- no_feature_extractor_tasks.add(_task)
- no_image_processor_tasks.add(_task)
- elif values["type"] in {"image", "video"}:
- no_tokenizer_tasks.add(_task)
- elif values["type"] in {"audio"}:
- no_tokenizer_tasks.add(_task)
- no_image_processor_tasks.add(_task)
- elif values["type"] not in ["multimodal", "audio", "video"]:
- raise ValueError(f"SUPPORTED_TASK {_task} contains invalid type {values['type']}")
-
- model_config = config or model.config
- load_tokenizer = type(model_config) in TOKENIZER_MAPPING or model_config.tokenizer_class is not None
- load_feature_extractor = type(model_config) in FEATURE_EXTRACTOR_MAPPING or feature_extractor is not None
- load_image_processor = type(model_config) in IMAGE_PROCESSOR_MAPPING or image_processor is not None
-
- # copied from transformers.pipelines.__init__.py l.609
- if targeted_task in no_tokenizer_tasks:
- # These will never require a tokenizer.
- # the model on the other hand might have a tokenizer, but
- # the files could be missing from the hub, instead of failing
- # on such repos, we just force to not load it.
- load_tokenizer = False
-
- if targeted_task in no_feature_extractor_tasks:
- load_feature_extractor = False
-
- if targeted_task in no_image_processor_tasks:
- load_image_processor = False
-
- if load_image_processor and load_feature_extractor:
- load_feature_extractor = False
-
- model, model_id, tokenizer, feature_extractor, image_processor = MAPPING_LOADING_FUNC[accelerator](
- model,
- targeted_task,
- load_tokenizer,
- tokenizer,
- feature_extractor,
- load_feature_extractor,
- image_processor,
- load_image_processor,
- SUPPORTED_TASKS=supported_tasks,
- config=config,
- hub_kwargs=hub_kwargs,
- token=token,
- *model_kwargs,
- **kwargs,
- )
-
- use_fast = kwargs.get(use_fast, "True")
- if tokenizer is None and load_tokenizer:
- tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=use_fast, **hub_kwargs)
- if feature_extractor is None and load_feature_extractor:
- feature_extractor = AutoFeatureExtractor.from_pretrained(model_id, use_fast=use_fast, **hub_kwargs)
- if image_processor is None and load_image_processor:
- image_processor = AutoImageProcessor.from_pretrained(model_id, **hub_kwargs)
-
- return transformers_pipeline(
- task,
- model=model,
- tokenizer=tokenizer,
- feature_extractor=feature_extractor,
- image_processor=image_processor,
- use_fast=use_fast,
- **kwargs,
- )
diff --git a/optimum/utils/__init__.py b/optimum/utils/__init__.py
index e6ad5b6b2e..15e04d0e85 100644
--- a/optimum/utils/__init__.py
+++ b/optimum/utils/__init__.py
@@ -32,19 +32,19 @@
ORT_QUANTIZE_MINIMUM_VERSION,
TORCH_MINIMUM_VERSION,
TRANSFORMERS_MINIMUM_VERSION,
- check_if_diffusers_greater,
- check_if_pytorch_greater,
- check_if_torch_greater,
- check_if_transformers_greater,
is_accelerate_available,
is_auto_gptq_available,
is_datasets_available,
is_diffusers_available,
is_diffusers_version,
is_gptqmodel_available,
+ is_ipex_available,
is_onnx_available,
is_onnxruntime_available,
is_onnxslim_available,
+ is_openvino_available,
+ is_optimum_intel_available,
+ is_optimum_onnx_available,
is_pydantic_available,
is_sentence_transformers_available,
is_tensorrt_available,
diff --git a/optimum/utils/import_utils.py b/optimum/utils/import_utils.py
index e1086508ea..c97d538a7e 100644
--- a/optimum/utils/import_utils.py
+++ b/optimum/utils/import_utils.py
@@ -81,21 +81,24 @@ def _is_package_available(
return package_exists
+_timm_available = _is_package_available("timm")
_onnx_available = _is_package_available("onnx")
+_datasets_available = _is_package_available("datasets")
+_tensorrt_available = _is_package_available("tensorrt")
_pydantic_available = _is_package_available("pydantic")
-_accelerate_available = _is_package_available("accelerate")
+_openvino_available = _is_package_available("openvino")
_auto_gptq_available = _is_package_available("auto_gptq")
_gptqmodel_available = _is_package_available("gptqmodel")
-_timm_available = _is_package_available("timm")
+_accelerate_available = _is_package_available("accelerate")
+_optimum_onnx_available = _is_package_available("optimum.onnx")
+_optimum_intel_available = _is_package_available("optimum.intel")
+_ipex_available = _is_package_available("intel_extension_for_pytorch")
_sentence_transformers_available = _is_package_available("sentence_transformers")
-_datasets_available = _is_package_available("datasets")
-_tensorrt_available = _is_package_available("tensorrt")
_diffusers_available, _diffusers_version = _is_package_available("diffusers", return_version=True)
_transformers_available, _transformers_version = _is_package_available("transformers", return_version=True)
_torch_available, _torch_version = _is_package_available("torch", return_version=True)
-_onnxruntime_available, _onnxruntime_version = _is_package_available(
+_onnxruntime_available = _is_package_available(
"onnxruntime",
- return_version=True,
pkg_distributions=[
"onnxruntime-gpu",
"onnxruntime-rocm",
@@ -191,9 +194,7 @@ def is_torch_version(operation: str, reference_version: str):
if not _torch_available:
return False
- import torch
-
- return compare_versions(version.parse(version.parse(torch.__version__).base_version), operation, reference_version)
+ return compare_versions(version.parse(_torch_version), operation, reference_version)
_is_torch_onnx_support_available = _torch_available and is_torch_version(">=", TORCH_MINIMUM_VERSION.base_version)
@@ -277,72 +278,20 @@ def is_onnxslim_available():
return _onnxslim_available
-@contextmanager
-def check_if_pytorch_greater(target_version: str, message: str):
- r"""
- A context manager that does nothing except checking if the PyTorch version is greater than `pt_version`
- """
- import torch
+def is_ipex_available():
+ return _ipex_available
- if not version.parse(torch.__version__) >= version.parse(target_version):
- raise ImportError(
- f"Found an incompatible version of PyTorch. Found version {torch.__version__}, but only {target_version} and above are supported. {message}"
- )
- try:
- yield
- finally:
- pass
+def is_openvino_available():
+ return _openvino_available
-# TODO : Remove check_if_transformers_greater, check_if_diffusers_greater, check_if_torch_greater
-def check_if_transformers_greater(target_version: Union[str, version.Version]) -> bool:
- """
- Checks whether the current install of transformers is greater than or equal to the target version.
-
- Args:
- target_version (`Union[str, packaging.version.Version]`): version used as the reference for comparison.
- Returns:
- bool: whether the check is True or not.
- """
- import transformers
+def is_optimum_onnx_available():
+ return _optimum_onnx_available
- if isinstance(target_version, str):
- target_version = version.parse(target_version)
-
- return version.parse(transformers.__version__) >= target_version
-
-
-def check_if_diffusers_greater(target_version: str) -> bool:
- """
- Checks whether the current install of diffusers is greater than or equal to the target version.
-
- Args:
- target_version (str): version used as the reference for comparison.
-
- Returns:
- bool: whether the check is True or not.
- """
- if not _diffusers_available:
- return False
-
- return version.parse(_diffusers_version) >= version.parse(target_version)
-
-
-def check_if_torch_greater(target_version: str) -> bool:
- """
- Checks whether the current install of torch is greater than or equal to the target version.
-
- Args:
- target_version (str): version used as the reference for comparison.
-
- Returns:
- bool: whether the check is True or not.
- """
- if not _torch_available:
- return False
- return version.parse(_torch_version) >= version.parse(target_version)
+def is_optimum_intel_available():
+ return _optimum_intel_available
@contextmanager
diff --git a/tests/pipelines/test_pipelines.py b/tests/pipelines/test_pipelines.py
new file mode 100644
index 0000000000..9151462af3
--- /dev/null
+++ b/tests/pipelines/test_pipelines.py
@@ -0,0 +1,279 @@
+# coding=utf-8
+# Copyright 2023 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+from typing import Any, Dict
+
+import numpy as np
+from huggingface_hub.constants import HF_HUB_CACHE
+from PIL import Image
+from transformers import AutoTokenizer
+from transformers.pipelines import Pipeline
+
+from optimum.pipelines import pipeline as optimum_pipeline
+from optimum.utils.testing_utils import remove_directory
+
+
+GENERATE_KWARGS = {"max_new_tokens": 10, "min_new_tokens": 5, "do_sample": True}
+
+
+class ORTPipelineTest(unittest.TestCase):
+ """Test ORT pipelines for all supported tasks"""
+
+ def _create_dummy_text(self) -> str:
+ """Create dummy text input for text-based tasks"""
+ return "This is a test sentence for the pipeline."
+
+ def _create_dummy_image(self) -> Image.Image:
+ """Create dummy image input for image-based tasks"""
+ np_image = np.random.randint(0, 256, (224, 224, 3), dtype=np.uint8)
+ return Image.fromarray(np_image)
+
+ def _create_dummy_audio(self) -> Dict[str, Any]:
+ """Create dummy audio input for audio-based tasks"""
+ sample_rate = 16000
+ audio_array = np.random.randn(sample_rate).astype(np.float32)
+ return {"array": audio_array, "sampling_rate": sample_rate}
+
+ def test_text_classification_pipeline(self):
+ """Test text classification ORT pipeline"""
+ pipe = optimum_pipeline(task="text-classification", accelerator="ort")
+ self.assertIsInstance(pipe, Pipeline)
+ text = self._create_dummy_text()
+ result = pipe(text)
+
+ self.assertIsInstance(result, list)
+ self.assertGreater(len(result), 0)
+ self.assertIn("label", result[0])
+ self.assertIn("score", result[0])
+
+ def test_token_classification_pipeline(self):
+ """Test token classification ORT pipeline"""
+ pipe = optimum_pipeline(task="token-classification", accelerator="ort")
+ self.assertIsInstance(pipe, Pipeline)
+ text = self._create_dummy_text()
+ result = pipe(text)
+
+ self.assertIsInstance(result, list)
+ if len(result) > 0:
+ self.assertIn("entity", result[0])
+ self.assertIn("score", result[0])
+ self.assertIn("word", result[0])
+
+ def test_question_answering_pipeline(self):
+ """Test question answering ORT pipeline"""
+ pipe = optimum_pipeline(task="question-answering", accelerator="ort")
+ self.assertIsInstance(pipe, Pipeline)
+ question = "What animal jumps?"
+ context = "The quick brown fox jumps over the lazy dog."
+ result = pipe(question=question, context=context)
+
+ self.assertIsInstance(result, dict)
+ self.assertIn("answer", result)
+ self.assertIn("score", result)
+ self.assertIn("start", result)
+ self.assertIn("end", result)
+
+ def test_fill_mask_pipeline(self):
+ """Test fill mask ORT pipeline"""
+ pipe = optimum_pipeline(task="fill-mask", accelerator="ort")
+ self.assertIsInstance(pipe, Pipeline)
+ text = "The weather is today."
+ result = pipe(text)
+
+ self.assertIsInstance(result, list)
+ self.assertGreater(len(result), 0)
+ self.assertIn("token_str", result[0])
+ self.assertIn("score", result[0])
+
+ def test_feature_extraction_pipeline(self):
+ """Test feature extraction ORT pipeline"""
+ pipe = optimum_pipeline(task="feature-extraction", accelerator="ort")
+ self.assertIsInstance(pipe, Pipeline)
+ text = self._create_dummy_text()
+ result = pipe(text)
+
+ self.assertIsInstance(result, list)
+ self.assertIsInstance(result[0], list)
+ self.assertIsInstance(result[0][0], list)
+
+ def test_text_generation_pipeline(self):
+ """Test text generation ORT pipeline"""
+ pipe = optimum_pipeline(task="text-generation", accelerator="ort")
+ self.assertIsInstance(pipe, Pipeline)
+ text = "The future of AI is"
+ result = pipe(text, **GENERATE_KWARGS)
+
+ self.assertIsInstance(result, list)
+ self.assertGreater(len(result), 0)
+ self.assertIn("generated_text", result[0])
+ self.assertTrue(result[0]["generated_text"].startswith(text))
+
+ def test_summarization_pipeline(self):
+ """Test summarization ORT pipeline"""
+ pipe = optimum_pipeline(task="summarization", accelerator="ort")
+ self.assertIsInstance(pipe, Pipeline)
+ text = "The quick brown fox jumps over the lazy dog."
+ result = pipe(text, **GENERATE_KWARGS)
+
+ self.assertIsInstance(result, list)
+ self.assertGreater(len(result), 0)
+ self.assertIn("summary_text", result[0])
+
+ def test_translation_pipeline(self):
+ """Test translation ORT pipeline"""
+ pipe = optimum_pipeline(task="translation_en_to_de", accelerator="ort")
+ self.assertIsInstance(pipe, Pipeline)
+ text = "Hello, how are you?"
+ result = pipe(text, **GENERATE_KWARGS)
+
+ self.assertIsInstance(result, list)
+ self.assertGreater(len(result), 0)
+ self.assertIn("translation_text", result[0])
+
+ def test_text2text_generation_pipeline(self):
+ """Test text2text generation ORT pipeline"""
+ pipe = optimum_pipeline(task="text2text-generation", accelerator="ort")
+ self.assertIsInstance(pipe, Pipeline)
+ text = "translate English to German: Hello, how are you?"
+ result = pipe(text, **GENERATE_KWARGS)
+
+ self.assertIsInstance(result, list)
+ self.assertGreater(len(result), 0)
+ self.assertIn("generated_text", result[0])
+
+ def test_zero_shot_classification_pipeline(self):
+ """Test zero shot classification ORT pipeline"""
+ pipe = optimum_pipeline(task="zero-shot-classification", accelerator="ort")
+ self.assertIsInstance(pipe, Pipeline)
+ text = "This is a great movie with excellent acting."
+ candidate_labels = ["positive", "negative", "neutral"]
+ result = pipe(text, candidate_labels)
+
+ self.assertIsInstance(result, dict)
+ self.assertIn("labels", result)
+ self.assertIn("scores", result)
+ self.assertEqual(len(result["labels"]), len(candidate_labels))
+
+ def test_image_classification_pipeline(self):
+ """Test image classification ORT pipeline"""
+ pipe = optimum_pipeline(task="image-classification", accelerator="ort")
+ self.assertIsInstance(pipe, Pipeline)
+ image = self._create_dummy_image()
+ result = pipe(image)
+
+ self.assertIsInstance(result, list)
+ self.assertGreater(len(result), 0)
+ self.assertIn("label", result[0])
+ self.assertIn("score", result[0])
+
+ def test_image_segmentation_pipeline(self):
+ """Test image segmentation ORT pipeline"""
+ pipe = optimum_pipeline(task="image-segmentation", accelerator="ort")
+ self.assertIsInstance(pipe, Pipeline)
+ image = self._create_dummy_image()
+ result = pipe(image)
+
+ self.assertIsInstance(result, list)
+ if len(result) > 0:
+ self.assertIn("label", result[0])
+ self.assertIn("score", result[0])
+ self.assertIn("mask", result[0])
+
+ def test_image_to_text_pipeline(self):
+ """Test image to text ORT pipeline"""
+ pipe = optimum_pipeline(task="image-to-text", accelerator="ort")
+ self.assertIsInstance(pipe, Pipeline)
+ image = self._create_dummy_image()
+ result = pipe(image, generate_kwargs=GENERATE_KWARGS)
+
+ self.assertIsInstance(result, list)
+ self.assertGreater(len(result), 0)
+ self.assertIn("generated_text", result[0])
+
+ def test_image_to_image_pipeline(self):
+ """Test image to image ORT pipeline"""
+ pipe = optimum_pipeline(task="image-to-image", accelerator="ort")
+ self.assertIsInstance(pipe, Pipeline)
+ image = self._create_dummy_image()
+ result = pipe(image)
+
+ self.assertIsInstance(result, Image.Image)
+
+ # TODO: Enable when fixed in optimum-onnx
+ # def test_automatic_speech_recognition_pipeline(self):
+ # """Test automatic speech recognition ORT pipeline"""
+ # pipe = optimum_pipeline(task="automatic-speech-recognition", accelerator="ort")
+ # audio = self._create_dummy_audio()
+ # result = pipe(audio, generate_kwargs=GENERATE_KWARGS)
+
+ # self.assertIsInstance(result, dict)
+ # self.assertIn("text", result)
+
+ def test_audio_classification_pipeline(self):
+ """Test audio classification ORT pipeline"""
+ pipe = optimum_pipeline(task="audio-classification", accelerator="ort")
+ self.assertIsInstance(pipe, Pipeline)
+ audio = self._create_dummy_audio()
+ result = pipe(audio)
+
+ self.assertIsInstance(result, list)
+ self.assertGreater(len(result), 0)
+ self.assertIn("label", result[0])
+ self.assertIn("score", result[0])
+
+ def test_pipeline_with_ort_model(self):
+ """Test ORT pipeline with a model already in ONNX format"""
+ from optimum.onnxruntime import ORTModelForFeatureExtraction
+
+ tokenizer = AutoTokenizer.from_pretrained("distilbert-base-cased")
+ model = ORTModelForFeatureExtraction.from_pretrained("distilbert-base-cased", export=True)
+ pipe = optimum_pipeline(task="feature-extraction", model=model, tokenizer=tokenizer, accelerator="ort")
+ self.assertIsInstance(pipe, Pipeline)
+ text = self._create_dummy_text()
+ result = pipe(text)
+
+ self.assertIsInstance(result, list)
+ self.assertIsInstance(result[0], list)
+ self.assertIsInstance(result[0][0], list)
+
+ def test_pipeline_with_model_id(self):
+ """Test ORT pipeline with a custom model id"""
+ pipe = optimum_pipeline(task="feature-extraction", model="distilbert-base-cased", accelerator="ort")
+ self.assertIsInstance(pipe, Pipeline)
+ text = self._create_dummy_text()
+ result = pipe(text)
+
+ self.assertIsInstance(result, list)
+ self.assertIsInstance(result[0], list)
+
+ def test_pipeline_with_invalid_task(self):
+ """Test ORT pipeline with an unsupported task"""
+ with self.assertRaises(KeyError) as context:
+ _ = optimum_pipeline(task="invalid-task", accelerator="ort")
+ self.assertIn("Unknown task invalid-task", str(context.exception))
+
+ def test_pipeline_with_invalid_accelerator(self):
+ """Test ORT pipeline with an unsupported accelerator"""
+ with self.assertRaises(ValueError) as context:
+ _ = optimum_pipeline(task="feature-extraction", accelerator="invalid-accelerator")
+ self.assertIn("Accelerator invalid-accelerator not recognized", str(context.exception))
+
+ def tearDown(self):
+ remove_directory(HF_HUB_CACHE)
+
+
+if __name__ == "__main__":
+ unittest.main()