diff --git a/.github/workflows/test_exporters_common.yml b/.github/workflows/test_exporters_common.yml index 7df8ce4f7f..9cc3ce77de 100644 --- a/.github/workflows/test_exporters_common.yml +++ b/.github/workflows/test_exporters_common.yml @@ -36,7 +36,8 @@ jobs: run: | pip install --upgrade pip pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu - pip install .[tests,exporters] + pip install optimum-onnx@git+https://github.com/huggingface/optimum-onnx.git + pip install .[tests] - name: Test with pytest run: | diff --git a/.github/workflows/test_pipelines.yml b/.github/workflows/test_pipelines.yml new file mode 100644 index 0000000000..7a7cb500bf --- /dev/null +++ b/.github/workflows/test_pipelines.yml @@ -0,0 +1,45 @@ +name: Optimum Pipelines / Python - Test + +on: + push: + branches: [main] + pull_request: + branches: [main] + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +env: + UV_SYSTEM_PYTHON: 1 + UV_TORCH_BACKEND: auto + TRANSFORMERS_IS_CI: true + +jobs: + build: + strategy: + fail-fast: false + matrix: + python-version: [3.9] + runs-on: [ubuntu-22.04] + + runs-on: ${{ matrix.runs-on }} + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Setup Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + run: | + pip install --upgrade pip uv + uv pip install --no-cache-dir optimum-onnx[onnxruntime]@git+https://github.com/huggingface/optimum-onnx.git + uv pip install --no-cache-dir .[tests] + + - name: Test with pytest + run: | + pytest tests/pipelines -vvvv --durations=0 diff --git a/docs/source/quicktour.mdx b/docs/source/quicktour.mdx index 0ebfd0fd75..4f9666cd14 100644 --- a/docs/source/quicktour.mdx +++ b/docs/source/quicktour.mdx @@ -129,42 +129,6 @@ To train transformers on Habana's Gaudi processors, 🤗 Optimum provides a `Gau You can find more examples in the [documentation](https://huggingface.co/docs/optimum/habana/quickstart) and in the [examples](https://github.com/huggingface/optimum-habana/tree/main/examples). - -#### ONNX Runtime - -To train transformers with ONNX Runtime's acceleration features, 🤗 Optimum provides a `ORTTrainer` that is very similar to the 🤗 Transformers [Trainer](https://huggingface.co/docs/transformers/main_classes/trainer). Here is a simple example: - -```diff -- from transformers import Trainer, TrainingArguments -+ from optimum.onnxruntime import ORTTrainer, ORTTrainingArguments - - # Download a pretrained model from the Hub - model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased") - - # Define the training arguments -- training_args = TrainingArguments( -+ training_args = ORTTrainingArguments( - output_dir="path/to/save/folder/", - optim="adamw_ort_fused", - ... - ) - - # Create a ONNX Runtime Trainer -- trainer = Trainer( -+ trainer = ORTTrainer( - model=model, - args=training_args, - train_dataset=train_dataset, -+ feature="text-classification", # The model type to export to ONNX - ... - ) - - # Use ONNX Runtime for training! - trainer.train() -``` - -You can find more examples in the [documentation](https://huggingface.co/docs/optimum/onnxruntime/usage_guides/trainer) and in the [examples](https://github.com/huggingface/optimum/tree/main/examples/onnxruntime/training). - ## Out of the box ONNX export The Optimum library handles out of the box the ONNX export of Transformers and Diffusers models! diff --git a/optimum/configuration_utils.py b/optimum/configuration_utils.py index 60f7c99617..41c565a22d 100644 --- a/optimum/configuration_utils.py +++ b/optimum/configuration_utils.py @@ -342,6 +342,7 @@ def to_dict(self) -> Dict[str, Any]: output["transformers_version"] = transformers_version_str output["optimum_version"] = __version__ - self.dict_torch_dtype_to_str(output) + if hasattr(self, "dict_torch_dtype_to_str"): + self.dict_torch_dtype_to_str(output) return output diff --git a/optimum/exporters/utils.py b/optimum/exporters/utils.py index 6931bf51c4..960910c276 100644 --- a/optimum/exporters/utils.py +++ b/optimum/exporters/utils.py @@ -26,8 +26,8 @@ from ..utils import ( DIFFUSERS_MINIMUM_VERSION, - check_if_diffusers_greater, is_diffusers_available, + is_diffusers_version, logging, ) from ..utils.import_utils import _diffusers_version @@ -38,7 +38,7 @@ if is_diffusers_available(): - if not check_if_diffusers_greater(DIFFUSERS_MINIMUM_VERSION.base_version): + if is_diffusers_version("<", DIFFUSERS_MINIMUM_VERSION.base_version): raise ImportError( f"We found an older version of diffusers {_diffusers_version} but we require diffusers to be >= {DIFFUSERS_MINIMUM_VERSION}. " "Please update diffusers by running `pip install --upgrade diffusers`" diff --git a/optimum/pipelines/__init__.py b/optimum/pipelines/__init__.py index 892f4bd391..5ed7b6b6d6 100644 --- a/optimum/pipelines/__init__.py +++ b/optimum/pipelines/__init__.py @@ -1,5 +1,5 @@ # coding=utf-8 -# Copyright 2023 The HuggingFace Team. All rights reserved. +# Copyright 2025 The HuggingFace Team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,10 +12,282 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +"""Pipelines running different backends.""" -from .pipelines_base import ( - MAPPING_LOADING_FUNC, - ORT_SUPPORTED_TASKS, - load_ort_pipeline, - pipeline, +from typing import TYPE_CHECKING, Any, Optional, Union + +import torch + +from optimum.utils.import_utils import ( + is_ipex_available, + is_onnxruntime_available, + is_openvino_available, + is_optimum_intel_available, + is_optimum_onnx_available, ) +from optimum.utils.logging import get_logger + + +logger = get_logger(__name__) + + +if TYPE_CHECKING: + from transformers import ( + BaseImageProcessor, + FeatureExtractionMixin, + Pipeline, + PretrainedConfig, + PreTrainedModel, + PreTrainedTokenizer, + PreTrainedTokenizerFast, + ProcessorMixin, + TFPreTrainedModel, + ) + + +# The docstring is simply a copy of transformers.pipelines.pipeline's doc with minor modifications +# to reflect the fact that this pipeline loads Accelerated models using optimum. +def pipeline( + task: Optional[str] = None, + model: Optional[Union[str, "PreTrainedModel", "TFPreTrainedModel"]] = None, + config: Optional[Union[str, "PretrainedConfig"]] = None, + tokenizer: Optional[Union[str, "PreTrainedTokenizer", "PreTrainedTokenizerFast"]] = None, + feature_extractor: Optional[Union[str, "FeatureExtractionMixin "]] = None, + image_processor: Optional[Union[str, "BaseImageProcessor"]] = None, + processor: Optional[Union[str, "ProcessorMixin"]] = None, + framework: Optional[str] = None, + revision: Optional[str] = None, + use_fast: bool = True, + token: Optional[Union[str, bool]] = None, + device: Optional[Union[int, str, "torch.device"]] = None, + device_map: Optional[Union[str, dict[str, Union[int, str]]]] = None, + torch_dtype: Optional[Union[str, "torch.dtype"]] = "auto", + trust_remote_code: Optional[bool] = None, + model_kwargs: Optional[dict[str, Any]] = None, + pipeline_class: Optional[Any] = None, + accelerator: Optional[str] = None, + **kwargs: Any, +) -> "Pipeline": + """Utility factory method to build a [`Pipeline`] with an Optimum accelerated model, similar to `transformers.pipeline`. + A pipeline consists of: + - One or more components for pre-processing model inputs, such as a [tokenizer](tokenizer), + [image_processor](image_processor), [feature_extractor](feature_extractor), or [processor](processors). + - A [model](model) that generates predictions from the inputs. + - Optional post-processing steps to refine the model's output, which can also be handled by processors. + + While there are such optional arguments as `tokenizer`, `feature_extractor`, `image_processor`, and `processor`, + they shouldn't be specified all at once. If these components are not provided, `pipeline` will try to load + required ones automatically. In case you want to provide these components explicitly, please refer to a + specific pipeline in order to get more details regarding what components are required. + + Args: + task (`str`): + The task defining which pipeline will be returned. Currently accepted tasks are: + - `"audio-classification"`: will return a [`AudioClassificationPipeline`]. + - `"automatic-speech-recognition"`: will return a [`AutomaticSpeechRecognitionPipeline`]. + - `"depth-estimation"`: will return a [`DepthEstimationPipeline`]. + - `"document-question-answering"`: will return a [`DocumentQuestionAnsweringPipeline`]. + - `"feature-extraction"`: will return a [`FeatureExtractionPipeline`]. + - `"fill-mask"`: will return a [`FillMaskPipeline`]:. + - `"image-classification"`: will return a [`ImageClassificationPipeline`]. + - `"image-feature-extraction"`: will return an [`ImageFeatureExtractionPipeline`]. + - `"image-segmentation"`: will return a [`ImageSegmentationPipeline`]. + - `"image-text-to-text"`: will return a [`ImageTextToTextPipeline`]. + - `"image-to-image"`: will return a [`ImageToImagePipeline`]. + - `"image-to-text"`: will return a [`ImageToTextPipeline`]. + - `"mask-generation"`: will return a [`MaskGenerationPipeline`]. + - `"object-detection"`: will return a [`ObjectDetectionPipeline`]. + - `"question-answering"`: will return a [`QuestionAnsweringPipeline`]. + - `"summarization"`: will return a [`SummarizationPipeline`]. + - `"table-question-answering"`: will return a [`TableQuestionAnsweringPipeline`]. + - `"text2text-generation"`: will return a [`Text2TextGenerationPipeline`]. + - `"text-classification"` (alias `"sentiment-analysis"` available): will return a + [`TextClassificationPipeline`]. + - `"text-generation"`: will return a [`TextGenerationPipeline`]:. + - `"text-to-audio"` (alias `"text-to-speech"` available): will return a [`TextToAudioPipeline`]:. + - `"token-classification"` (alias `"ner"` available): will return a [`TokenClassificationPipeline`]. + - `"translation"`: will return a [`TranslationPipeline`]. + - `"translation_xx_to_yy"`: will return a [`TranslationPipeline`]. + - `"video-classification"`: will return a [`VideoClassificationPipeline`]. + - `"visual-question-answering"`: will return a [`VisualQuestionAnsweringPipeline`]. + - `"zero-shot-classification"`: will return a [`ZeroShotClassificationPipeline`]. + - `"zero-shot-image-classification"`: will return a [`ZeroShotImageClassificationPipeline`]. + - `"zero-shot-audio-classification"`: will return a [`ZeroShotAudioClassificationPipeline`]. + - `"zero-shot-object-detection"`: will return a [`ZeroShotObjectDetectionPipeline`]. + model (`str` or [`ORTModel` or `OVModel`], *optional*): + The model that will be used by the pipeline to make predictions. This can be a model identifier or an + actual instance of a ONNX Runtime model inheriting from [`ORTModel` or `OVModel`]. + If not provided, the default for the `task` will be loaded. + config (`str` or [`PretrainedConfig`], *optional*): + The configuration that will be used by the pipeline to instantiate the model. This can be a model + identifier or an actual pretrained model configuration inheriting from [`PretrainedConfig`]. + If not provided, the default configuration file for the requested model will be used. That means that if + `model` is given, its default configuration will be used. However, if `model` is not supplied, this + `task`'s default model's config is used instead. + tokenizer (`str` or [`PreTrainedTokenizer`], *optional*): + The tokenizer that will be used by the pipeline to encode data for the model. This can be a model + identifier or an actual pretrained tokenizer inheriting from [`PreTrainedTokenizer`]. + If not provided, the default tokenizer for the given `model` will be loaded (if it is a string). If `model` + is not specified or not a string, then the default tokenizer for `config` is loaded (if it is a string). + However, if `config` is also not given or not a string, then the default tokenizer for the given `task` + will be loaded. + feature_extractor (`str` or [`PreTrainedFeatureExtractor`], *optional*): + The feature extractor that will be used by the pipeline to encode data for the model. This can be a model + identifier or an actual pretrained feature extractor inheriting from [`PreTrainedFeatureExtractor`]. + Feature extractors are used for non-NLP models, such as Speech or Vision models as well as multi-modal + models. Multi-modal models will also require a tokenizer to be passed. + If not provided, the default feature extractor for the given `model` will be loaded (if it is a string). If + `model` is not specified or not a string, then the default feature extractor for `config` is loaded (if it + is a string). However, if `config` is also not given or not a string, then the default feature extractor + for the given `task` will be loaded. + image_processor (`str` or [`BaseImageProcessor`], *optional*): + The image processor that will be used by the pipeline to preprocess images for the model. This can be a + model identifier or an actual image processor inheriting from [`BaseImageProcessor`]. + Image processors are used for Vision models and multi-modal models that require image inputs. Multi-modal + models will also require a tokenizer to be passed. + If not provided, the default image processor for the given `model` will be loaded (if it is a string). If + `model` is not specified or not a string, then the default image processor for `config` is loaded (if it is + a string). + processor (`str` or [`ProcessorMixin`], *optional*): + The processor that will be used by the pipeline to preprocess data for the model. This can be a model + identifier or an actual processor inheriting from [`ProcessorMixin`]. + Processors are used for multi-modal models that require multi-modal inputs, for example, a model that + requires both text and image inputs. + If not provided, the default processor for the given `model` will be loaded (if it is a string). If `model` + is not specified or not a string, then the default processor for `config` is loaded (if it is a string). + framework (`str`, *optional*): + The framework to use, either `"pt"` for PyTorch or `"tf"` for TensorFlow. The specified framework must be + installed. + If no framework is specified, will default to the one currently installed. If no framework is specified and + both frameworks are installed, will default to the framework of the `model`, or to PyTorch if no model is + provided. + revision (`str`, *optional*, defaults to `"main"`): + When passing a task name or a string model identifier: The specific model version to use. It can be a + branch name, a tag name, or a commit id, since we use a git-based system for storing models and other + artifacts on huggingface.co, so `revision` can be any identifier allowed by git. + use_fast (`bool`, *optional*, defaults to `True`): + Whether or not to use a Fast tokenizer if possible (a [`PreTrainedTokenizerFast`]). + use_auth_token (`str` or *bool*, *optional*): + The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated + when running `hf auth login` (stored in `~/.huggingface`). + device (`int` or `str` or `torch.device`): + Defines the device (*e.g.*, `"cpu"`, `"cuda:1"`, `"mps"`, or a GPU ordinal rank like `1`) on which this + pipeline will be allocated. + device_map (`str` or `dict[str, Union[int, str, torch.device]`, *optional*): + Sent directly as `model_kwargs` (just a simpler shortcut). When `accelerate` library is present, set + `device_map="auto"` to compute the most optimized `device_map` automatically (see + [here](https://huggingface.co/docs/accelerate/main/en/package_reference/big_modeling#accelerate.cpu_offload) + for more information). + + Do not use `device_map` AND `device` at the same time as they will conflict + + torch_dtype (`str` or `torch.dtype`, *optional*): + Sent directly as `model_kwargs` (just a simpler shortcut) to use the available precision for this model + (`torch.float16`, `torch.bfloat16`, ... or `"auto"`). + trust_remote_code (`bool`, *optional*, defaults to `False`): + Whether or not to allow for custom code defined on the Hub in their own modeling, configuration, + tokenization or even pipeline files. This option should only be set to `True` for repositories you trust + and in which you have read the code, as it will execute code present on the Hub on your local machine. + model_kwargs (`dict[str, Any]`, *optional*): + Additional dictionary of keyword arguments passed along to the model's `from_pretrained(..., + **model_kwargs)` function. + pipeline_class (`type`, *optional*): + Can be used to force using a custom pipeline class. If not provided, the default pipeline class for the + specified task will be used. + accelerator (`str`, *optional*): + The accelerator to use, either `"ort"` for ONNX Runtime, `"ov"` for OpenVINO, or `"ipex"` for Intel + Extension for PyTorch. If no accelerator is specified, will default to the one currently installed/available. + kwargs (`dict[str, Any]`, *optional*): + Additional keyword arguments passed along to the specific pipeline init (see the documentation for the + corresponding pipeline class for possible values). + Returns: + [`Pipeline`]: A suitable pipeline for the task. + Examples: + ```python + >>> from optimum.pipelines import pipeline + >>> # Sentiment analysis pipeline with default model, using OpenVINO + >>> analyzer = pipeline("sentiment-analysis", accelerator="ov") + >>> # Question answering pipeline, specifying the checkpoint identifier, with IPEX + >>> oracle = pipeline( + ... "question-answering", model="distilbert/distilbert-base-cased-distilled-squad", tokenizer="google-bert/bert-base-cased", accelerator="ipex" + ... ) + >>> # Named entity recognition pipeline, passing in a specific model and tokenizer, with ONNX Runtime + >>> model = ORTModelForTokenClassification.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english") + >>> tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased") + >>> recognizer = pipeline("ner", model=model, tokenizer=tokenizer) + ``` + """ + + if accelerator is None: + # probably needs to check for couple of stuff here, like target device, type(model) etc. + if is_optimum_intel_available() and is_openvino_available(): + logger.info( + "`accelerator` not specified. Using OpenVINO (`ov`) as the accelerator since `optimum-intel[openvino]` is installed." + ) + accelerator = "ov" + elif is_optimum_onnx_available() and is_onnxruntime_available(): + logger.info( + "`accelerator` not specified. Using ONNX Runtime (`ort`) as the accelerator since `optimum-onnx[onnxruntime]` is installed." + ) + accelerator = "ort" + elif is_optimum_intel_available() and is_ipex_available(): + logger.info( + "`accelerator` not specified. Using IPEX (`ipex`) as the accelerator since `optimum-intel[ipex]` is installed." + ) + accelerator = "ipex" + else: + raise ImportError( + "You need to install either `optimum-onnx[onnxruntime]` to use ONNX Runtime as an accelerator, " + "or `optimum-intel[openvino]` to use OpenVINO as an accelerator, " + "or `optimum-intel[ipex]` to use IPEX as an accelerator." + ) + + if accelerator == "ort": + from optimum.onnxruntime import pipeline as ort_pipeline + + return ort_pipeline( + task=task, + model=model, + config=config, + tokenizer=tokenizer, + feature_extractor=feature_extractor, + image_processor=image_processor, + processor=processor, + framework=framework, + revision=revision, + use_fast=use_fast, + token=token, + device=device, + device_map=device_map, + torch_dtype=torch_dtype, + trust_remote_code=trust_remote_code, + model_kwargs=model_kwargs, + pipeline_class=pipeline_class, + **kwargs, + ) + elif accelerator in ["ov", "ipex"]: + from optimum.intel import pipeline as intel_pipeline + + return intel_pipeline( + task=task, + model=model, + config=config, + tokenizer=tokenizer, + feature_extractor=feature_extractor, + image_processor=image_processor, + processor=processor, + framework=framework, + revision=revision, + use_fast=use_fast, + token=token, + device=device, + device_map=device_map, + torch_dtype=torch_dtype, + trust_remote_code=trust_remote_code, + model_kwargs=model_kwargs, + pipeline_class=pipeline_class, + accelerator=accelerator, + **kwargs, + ) + else: + raise ValueError(f"Accelerator {accelerator} not recognized. Please use 'ort', 'ov' or 'ipex'.") diff --git a/optimum/pipelines/pipelines_base.py b/optimum/pipelines/pipelines_base.py deleted file mode 100644 index 80d81e0930..0000000000 --- a/optimum/pipelines/pipelines_base.py +++ /dev/null @@ -1,364 +0,0 @@ -# coding=utf-8 -# Copyright 2022 The HuggingFace Team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Pipelines running different backends.""" - -from typing import Any, Dict, Optional, Union - -from transformers import ( - AudioClassificationPipeline, - AutoConfig, - AutoFeatureExtractor, - AutoImageProcessor, - AutomaticSpeechRecognitionPipeline, - AutoTokenizer, - FeatureExtractionPipeline, - FillMaskPipeline, - ImageClassificationPipeline, - ImageSegmentationPipeline, - ImageToImagePipeline, - ImageToTextPipeline, - Pipeline, - PreTrainedTokenizer, - PreTrainedTokenizerFast, - QuestionAnsweringPipeline, - SequenceFeatureExtractor, - SummarizationPipeline, - Text2TextGenerationPipeline, - TextClassificationPipeline, - TextGenerationPipeline, - TokenClassificationPipeline, - TranslationPipeline, - ZeroShotClassificationPipeline, -) -from transformers import pipeline as transformers_pipeline -from transformers.feature_extraction_utils import PreTrainedFeatureExtractor -from transformers.image_processing_utils import BaseImageProcessor -from transformers.pipelines import ( - FEATURE_EXTRACTOR_MAPPING, - IMAGE_PROCESSOR_MAPPING, - TOKENIZER_MAPPING, - check_task, - get_default_model_and_revision, -) -from transformers.pipelines import SUPPORTED_TASKS as TRANSFORMERS_SUPPORTED_TASKS - -from ..utils import is_onnxruntime_available - - -if is_onnxruntime_available(): - from ..onnxruntime import ( - ORTModelForAudioClassification, - ORTModelForCausalLM, - ORTModelForFeatureExtraction, - ORTModelForImageClassification, - ORTModelForImageToImage, - ORTModelForMaskedLM, - ORTModelForQuestionAnswering, - ORTModelForSemanticSegmentation, - ORTModelForSeq2SeqLM, - ORTModelForSequenceClassification, - ORTModelForSpeechSeq2Seq, - ORTModelForTokenClassification, - ORTModelForVision2Seq, - ) - from ..onnxruntime.modeling_ort import ORTModel - - ORT_SUPPORTED_TASKS = { - "feature-extraction": { - "impl": FeatureExtractionPipeline, - "class": (ORTModelForFeatureExtraction,), - "default": "distilbert-base-cased", - "type": "text", # feature extraction is only supported for text at the moment - }, - "fill-mask": { - "impl": FillMaskPipeline, - "class": (ORTModelForMaskedLM,), - "default": "bert-base-cased", - "type": "text", - }, - "image-classification": { - "impl": ImageClassificationPipeline, - "class": (ORTModelForImageClassification,), - "default": "google/vit-base-patch16-224", - "type": "image", - }, - "image-segmentation": { - "impl": ImageSegmentationPipeline, - "class": (ORTModelForSemanticSegmentation,), - "default": "nvidia/segformer-b0-finetuned-ade-512-512", - "type": "image", - }, - "question-answering": { - "impl": QuestionAnsweringPipeline, - "class": (ORTModelForQuestionAnswering,), - "default": "distilbert-base-cased-distilled-squad", - "type": "text", - }, - "text-classification": { - "impl": TextClassificationPipeline, - "class": (ORTModelForSequenceClassification,), - "default": "distilbert-base-uncased-finetuned-sst-2-english", - "type": "text", - }, - "text-generation": { - "impl": TextGenerationPipeline, - "class": (ORTModelForCausalLM,), - "default": "distilgpt2", - "type": "text", - }, - "token-classification": { - "impl": TokenClassificationPipeline, - "class": (ORTModelForTokenClassification,), - "default": "dbmdz/bert-large-cased-finetuned-conll03-english", - "type": "text", - }, - "zero-shot-classification": { - "impl": ZeroShotClassificationPipeline, - "class": (ORTModelForSequenceClassification,), - "default": "facebook/bart-large-mnli", - "type": "text", - }, - "summarization": { - "impl": SummarizationPipeline, - "class": (ORTModelForSeq2SeqLM,), - "default": "t5-base", - "type": "text", - }, - "translation": { - "impl": TranslationPipeline, - "class": (ORTModelForSeq2SeqLM,), - "default": "t5-small", - "type": "text", - }, - "text2text-generation": { - "impl": Text2TextGenerationPipeline, - "class": (ORTModelForSeq2SeqLM,), - "default": "t5-small", - "type": "text", - }, - "automatic-speech-recognition": { - "impl": AutomaticSpeechRecognitionPipeline, - "class": (ORTModelForSpeechSeq2Seq,), - "default": "openai/whisper-tiny.en", - "type": "multimodal", - }, - "image-to-text": { - "impl": ImageToTextPipeline, - "class": (ORTModelForVision2Seq,), - "default": "nlpconnect/vit-gpt2-image-captioning", - "type": "multimodal", - }, - "audio-classification": { - "impl": AudioClassificationPipeline, - "class": (ORTModelForAudioClassification,), - "default": "superb/hubert-base-superb-ks", - "type": "audio", - }, - "image-to-image": { - "impl": ImageToImagePipeline, - "class": (ORTModelForImageToImage,), - "default": "caidas/swin2SR-classical-sr-x2-64", - "type": "image", - }, - } -else: - ORT_SUPPORTED_TASKS = {} - - -def load_ort_pipeline( - model, - targeted_task, - load_tokenizer, - tokenizer, - feature_extractor, - load_feature_extractor, - image_processor, - load_image_processor, - SUPPORTED_TASKS, - subfolder: str = "", - token: Optional[Union[bool, str]] = None, - revision: str = "main", - model_kwargs: Optional[Dict[str, Any]] = None, - config: AutoConfig = None, - **kwargs, -): - if model_kwargs is None: - model_kwargs = {} - - if isinstance(model, str): - model_id = model - model = SUPPORTED_TASKS[targeted_task]["class"][0].from_pretrained( - model, revision=revision, subfolder=subfolder, token=token, **model_kwargs - ) - elif isinstance(model, ORTModel): - if tokenizer is None and load_tokenizer: - for preprocessor in model.preprocessors: - if isinstance(preprocessor, (PreTrainedTokenizer, PreTrainedTokenizerFast)): - tokenizer = preprocessor - break - if tokenizer is None: - raise ValueError( - "Could not automatically find a tokenizer for the ORTModel, you must pass a tokenizer explictly" - ) - if feature_extractor is None and load_feature_extractor: - for preprocessor in model.preprocessors: - if isinstance(preprocessor, SequenceFeatureExtractor): - feature_extractor = preprocessor - break - if feature_extractor is None: - raise ValueError( - "Could not automatically find a feature extractor for the ORTModel, you must pass a " - "feature_extractor explictly" - ) - if image_processor is None and load_image_processor: - for preprocessor in model.preprocessors: - if isinstance(preprocessor, BaseImageProcessor): - image_processor = preprocessor - break - if image_processor is None: - raise ValueError( - "Could not automatically find a feature extractor for the ORTModel, you must pass a " - "image_processor explictly" - ) - - model_id = None - else: - raise ValueError( - f"""Model {model} is not supported. Please provide a valid model either as string or ORTModel. - You can also provide non model then a default one will be used""" - ) - return model, model_id, tokenizer, feature_extractor, image_processor - - -MAPPING_LOADING_FUNC = { - "ort": load_ort_pipeline, -} - - -def pipeline( - task: str = None, - model: Optional[Any] = None, - tokenizer: Optional[Union[str, PreTrainedTokenizer]] = None, - feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None, - image_processor: Optional[Union[str, BaseImageProcessor]] = None, - use_fast: bool = True, - token: Optional[Union[str, bool]] = None, - accelerator: Optional[str] = "ort", - revision: Optional[str] = None, - trust_remote_code: Optional[bool] = None, - *model_kwargs, - **kwargs, -) -> Pipeline: - targeted_task = "translation" if task.startswith("translation") else task - - if accelerator == "ort": - if targeted_task not in list(ORT_SUPPORTED_TASKS.keys()): - raise ValueError( - f"Task {targeted_task} is not supported for the ONNX Runtime pipeline. Supported tasks are { list(ORT_SUPPORTED_TASKS.keys())}" - ) - - supported_tasks = ORT_SUPPORTED_TASKS if accelerator == "ort" else TRANSFORMERS_SUPPORTED_TASKS - - if model is None: - if accelerator != "ort": - _, target_task, task_options = check_task(task) - model, default_revision = get_default_model_and_revision(target_task, "pt", task_options) - revision = revision or default_revision - else: - model = supported_tasks[targeted_task]["default"] - - hub_kwargs = { - "revision": revision, - "token": token, - "trust_remote_code": trust_remote_code, - "_commit_hash": None, - } - - config = kwargs.get("config", None) - if config is None and isinstance(model, str): - config = AutoConfig.from_pretrained(model, _from_pipeline=task, **hub_kwargs, **kwargs) - hub_kwargs["_commit_hash"] = config._commit_hash - - no_feature_extractor_tasks = set() - no_tokenizer_tasks = set() - no_image_processor_tasks = set() - for _task, values in supported_tasks.items(): - if values["type"] == "text": - no_feature_extractor_tasks.add(_task) - no_image_processor_tasks.add(_task) - elif values["type"] in {"image", "video"}: - no_tokenizer_tasks.add(_task) - elif values["type"] in {"audio"}: - no_tokenizer_tasks.add(_task) - no_image_processor_tasks.add(_task) - elif values["type"] not in ["multimodal", "audio", "video"]: - raise ValueError(f"SUPPORTED_TASK {_task} contains invalid type {values['type']}") - - model_config = config or model.config - load_tokenizer = type(model_config) in TOKENIZER_MAPPING or model_config.tokenizer_class is not None - load_feature_extractor = type(model_config) in FEATURE_EXTRACTOR_MAPPING or feature_extractor is not None - load_image_processor = type(model_config) in IMAGE_PROCESSOR_MAPPING or image_processor is not None - - # copied from transformers.pipelines.__init__.py l.609 - if targeted_task in no_tokenizer_tasks: - # These will never require a tokenizer. - # the model on the other hand might have a tokenizer, but - # the files could be missing from the hub, instead of failing - # on such repos, we just force to not load it. - load_tokenizer = False - - if targeted_task in no_feature_extractor_tasks: - load_feature_extractor = False - - if targeted_task in no_image_processor_tasks: - load_image_processor = False - - if load_image_processor and load_feature_extractor: - load_feature_extractor = False - - model, model_id, tokenizer, feature_extractor, image_processor = MAPPING_LOADING_FUNC[accelerator]( - model, - targeted_task, - load_tokenizer, - tokenizer, - feature_extractor, - load_feature_extractor, - image_processor, - load_image_processor, - SUPPORTED_TASKS=supported_tasks, - config=config, - hub_kwargs=hub_kwargs, - token=token, - *model_kwargs, - **kwargs, - ) - - use_fast = kwargs.get(use_fast, "True") - if tokenizer is None and load_tokenizer: - tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=use_fast, **hub_kwargs) - if feature_extractor is None and load_feature_extractor: - feature_extractor = AutoFeatureExtractor.from_pretrained(model_id, use_fast=use_fast, **hub_kwargs) - if image_processor is None and load_image_processor: - image_processor = AutoImageProcessor.from_pretrained(model_id, **hub_kwargs) - - return transformers_pipeline( - task, - model=model, - tokenizer=tokenizer, - feature_extractor=feature_extractor, - image_processor=image_processor, - use_fast=use_fast, - **kwargs, - ) diff --git a/optimum/utils/__init__.py b/optimum/utils/__init__.py index e6ad5b6b2e..15e04d0e85 100644 --- a/optimum/utils/__init__.py +++ b/optimum/utils/__init__.py @@ -32,19 +32,19 @@ ORT_QUANTIZE_MINIMUM_VERSION, TORCH_MINIMUM_VERSION, TRANSFORMERS_MINIMUM_VERSION, - check_if_diffusers_greater, - check_if_pytorch_greater, - check_if_torch_greater, - check_if_transformers_greater, is_accelerate_available, is_auto_gptq_available, is_datasets_available, is_diffusers_available, is_diffusers_version, is_gptqmodel_available, + is_ipex_available, is_onnx_available, is_onnxruntime_available, is_onnxslim_available, + is_openvino_available, + is_optimum_intel_available, + is_optimum_onnx_available, is_pydantic_available, is_sentence_transformers_available, is_tensorrt_available, diff --git a/optimum/utils/import_utils.py b/optimum/utils/import_utils.py index e1086508ea..c97d538a7e 100644 --- a/optimum/utils/import_utils.py +++ b/optimum/utils/import_utils.py @@ -81,21 +81,24 @@ def _is_package_available( return package_exists +_timm_available = _is_package_available("timm") _onnx_available = _is_package_available("onnx") +_datasets_available = _is_package_available("datasets") +_tensorrt_available = _is_package_available("tensorrt") _pydantic_available = _is_package_available("pydantic") -_accelerate_available = _is_package_available("accelerate") +_openvino_available = _is_package_available("openvino") _auto_gptq_available = _is_package_available("auto_gptq") _gptqmodel_available = _is_package_available("gptqmodel") -_timm_available = _is_package_available("timm") +_accelerate_available = _is_package_available("accelerate") +_optimum_onnx_available = _is_package_available("optimum.onnx") +_optimum_intel_available = _is_package_available("optimum.intel") +_ipex_available = _is_package_available("intel_extension_for_pytorch") _sentence_transformers_available = _is_package_available("sentence_transformers") -_datasets_available = _is_package_available("datasets") -_tensorrt_available = _is_package_available("tensorrt") _diffusers_available, _diffusers_version = _is_package_available("diffusers", return_version=True) _transformers_available, _transformers_version = _is_package_available("transformers", return_version=True) _torch_available, _torch_version = _is_package_available("torch", return_version=True) -_onnxruntime_available, _onnxruntime_version = _is_package_available( +_onnxruntime_available = _is_package_available( "onnxruntime", - return_version=True, pkg_distributions=[ "onnxruntime-gpu", "onnxruntime-rocm", @@ -191,9 +194,7 @@ def is_torch_version(operation: str, reference_version: str): if not _torch_available: return False - import torch - - return compare_versions(version.parse(version.parse(torch.__version__).base_version), operation, reference_version) + return compare_versions(version.parse(_torch_version), operation, reference_version) _is_torch_onnx_support_available = _torch_available and is_torch_version(">=", TORCH_MINIMUM_VERSION.base_version) @@ -277,72 +278,20 @@ def is_onnxslim_available(): return _onnxslim_available -@contextmanager -def check_if_pytorch_greater(target_version: str, message: str): - r""" - A context manager that does nothing except checking if the PyTorch version is greater than `pt_version` - """ - import torch +def is_ipex_available(): + return _ipex_available - if not version.parse(torch.__version__) >= version.parse(target_version): - raise ImportError( - f"Found an incompatible version of PyTorch. Found version {torch.__version__}, but only {target_version} and above are supported. {message}" - ) - try: - yield - finally: - pass +def is_openvino_available(): + return _openvino_available -# TODO : Remove check_if_transformers_greater, check_if_diffusers_greater, check_if_torch_greater -def check_if_transformers_greater(target_version: Union[str, version.Version]) -> bool: - """ - Checks whether the current install of transformers is greater than or equal to the target version. - - Args: - target_version (`Union[str, packaging.version.Version]`): version used as the reference for comparison. - Returns: - bool: whether the check is True or not. - """ - import transformers +def is_optimum_onnx_available(): + return _optimum_onnx_available - if isinstance(target_version, str): - target_version = version.parse(target_version) - - return version.parse(transformers.__version__) >= target_version - - -def check_if_diffusers_greater(target_version: str) -> bool: - """ - Checks whether the current install of diffusers is greater than or equal to the target version. - - Args: - target_version (str): version used as the reference for comparison. - - Returns: - bool: whether the check is True or not. - """ - if not _diffusers_available: - return False - - return version.parse(_diffusers_version) >= version.parse(target_version) - - -def check_if_torch_greater(target_version: str) -> bool: - """ - Checks whether the current install of torch is greater than or equal to the target version. - - Args: - target_version (str): version used as the reference for comparison. - - Returns: - bool: whether the check is True or not. - """ - if not _torch_available: - return False - return version.parse(_torch_version) >= version.parse(target_version) +def is_optimum_intel_available(): + return _optimum_intel_available @contextmanager diff --git a/tests/pipelines/test_pipelines.py b/tests/pipelines/test_pipelines.py new file mode 100644 index 0000000000..9151462af3 --- /dev/null +++ b/tests/pipelines/test_pipelines.py @@ -0,0 +1,279 @@ +# coding=utf-8 +# Copyright 2023 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +from typing import Any, Dict + +import numpy as np +from huggingface_hub.constants import HF_HUB_CACHE +from PIL import Image +from transformers import AutoTokenizer +from transformers.pipelines import Pipeline + +from optimum.pipelines import pipeline as optimum_pipeline +from optimum.utils.testing_utils import remove_directory + + +GENERATE_KWARGS = {"max_new_tokens": 10, "min_new_tokens": 5, "do_sample": True} + + +class ORTPipelineTest(unittest.TestCase): + """Test ORT pipelines for all supported tasks""" + + def _create_dummy_text(self) -> str: + """Create dummy text input for text-based tasks""" + return "This is a test sentence for the pipeline." + + def _create_dummy_image(self) -> Image.Image: + """Create dummy image input for image-based tasks""" + np_image = np.random.randint(0, 256, (224, 224, 3), dtype=np.uint8) + return Image.fromarray(np_image) + + def _create_dummy_audio(self) -> Dict[str, Any]: + """Create dummy audio input for audio-based tasks""" + sample_rate = 16000 + audio_array = np.random.randn(sample_rate).astype(np.float32) + return {"array": audio_array, "sampling_rate": sample_rate} + + def test_text_classification_pipeline(self): + """Test text classification ORT pipeline""" + pipe = optimum_pipeline(task="text-classification", accelerator="ort") + self.assertIsInstance(pipe, Pipeline) + text = self._create_dummy_text() + result = pipe(text) + + self.assertIsInstance(result, list) + self.assertGreater(len(result), 0) + self.assertIn("label", result[0]) + self.assertIn("score", result[0]) + + def test_token_classification_pipeline(self): + """Test token classification ORT pipeline""" + pipe = optimum_pipeline(task="token-classification", accelerator="ort") + self.assertIsInstance(pipe, Pipeline) + text = self._create_dummy_text() + result = pipe(text) + + self.assertIsInstance(result, list) + if len(result) > 0: + self.assertIn("entity", result[0]) + self.assertIn("score", result[0]) + self.assertIn("word", result[0]) + + def test_question_answering_pipeline(self): + """Test question answering ORT pipeline""" + pipe = optimum_pipeline(task="question-answering", accelerator="ort") + self.assertIsInstance(pipe, Pipeline) + question = "What animal jumps?" + context = "The quick brown fox jumps over the lazy dog." + result = pipe(question=question, context=context) + + self.assertIsInstance(result, dict) + self.assertIn("answer", result) + self.assertIn("score", result) + self.assertIn("start", result) + self.assertIn("end", result) + + def test_fill_mask_pipeline(self): + """Test fill mask ORT pipeline""" + pipe = optimum_pipeline(task="fill-mask", accelerator="ort") + self.assertIsInstance(pipe, Pipeline) + text = "The weather is today." + result = pipe(text) + + self.assertIsInstance(result, list) + self.assertGreater(len(result), 0) + self.assertIn("token_str", result[0]) + self.assertIn("score", result[0]) + + def test_feature_extraction_pipeline(self): + """Test feature extraction ORT pipeline""" + pipe = optimum_pipeline(task="feature-extraction", accelerator="ort") + self.assertIsInstance(pipe, Pipeline) + text = self._create_dummy_text() + result = pipe(text) + + self.assertIsInstance(result, list) + self.assertIsInstance(result[0], list) + self.assertIsInstance(result[0][0], list) + + def test_text_generation_pipeline(self): + """Test text generation ORT pipeline""" + pipe = optimum_pipeline(task="text-generation", accelerator="ort") + self.assertIsInstance(pipe, Pipeline) + text = "The future of AI is" + result = pipe(text, **GENERATE_KWARGS) + + self.assertIsInstance(result, list) + self.assertGreater(len(result), 0) + self.assertIn("generated_text", result[0]) + self.assertTrue(result[0]["generated_text"].startswith(text)) + + def test_summarization_pipeline(self): + """Test summarization ORT pipeline""" + pipe = optimum_pipeline(task="summarization", accelerator="ort") + self.assertIsInstance(pipe, Pipeline) + text = "The quick brown fox jumps over the lazy dog." + result = pipe(text, **GENERATE_KWARGS) + + self.assertIsInstance(result, list) + self.assertGreater(len(result), 0) + self.assertIn("summary_text", result[0]) + + def test_translation_pipeline(self): + """Test translation ORT pipeline""" + pipe = optimum_pipeline(task="translation_en_to_de", accelerator="ort") + self.assertIsInstance(pipe, Pipeline) + text = "Hello, how are you?" + result = pipe(text, **GENERATE_KWARGS) + + self.assertIsInstance(result, list) + self.assertGreater(len(result), 0) + self.assertIn("translation_text", result[0]) + + def test_text2text_generation_pipeline(self): + """Test text2text generation ORT pipeline""" + pipe = optimum_pipeline(task="text2text-generation", accelerator="ort") + self.assertIsInstance(pipe, Pipeline) + text = "translate English to German: Hello, how are you?" + result = pipe(text, **GENERATE_KWARGS) + + self.assertIsInstance(result, list) + self.assertGreater(len(result), 0) + self.assertIn("generated_text", result[0]) + + def test_zero_shot_classification_pipeline(self): + """Test zero shot classification ORT pipeline""" + pipe = optimum_pipeline(task="zero-shot-classification", accelerator="ort") + self.assertIsInstance(pipe, Pipeline) + text = "This is a great movie with excellent acting." + candidate_labels = ["positive", "negative", "neutral"] + result = pipe(text, candidate_labels) + + self.assertIsInstance(result, dict) + self.assertIn("labels", result) + self.assertIn("scores", result) + self.assertEqual(len(result["labels"]), len(candidate_labels)) + + def test_image_classification_pipeline(self): + """Test image classification ORT pipeline""" + pipe = optimum_pipeline(task="image-classification", accelerator="ort") + self.assertIsInstance(pipe, Pipeline) + image = self._create_dummy_image() + result = pipe(image) + + self.assertIsInstance(result, list) + self.assertGreater(len(result), 0) + self.assertIn("label", result[0]) + self.assertIn("score", result[0]) + + def test_image_segmentation_pipeline(self): + """Test image segmentation ORT pipeline""" + pipe = optimum_pipeline(task="image-segmentation", accelerator="ort") + self.assertIsInstance(pipe, Pipeline) + image = self._create_dummy_image() + result = pipe(image) + + self.assertIsInstance(result, list) + if len(result) > 0: + self.assertIn("label", result[0]) + self.assertIn("score", result[0]) + self.assertIn("mask", result[0]) + + def test_image_to_text_pipeline(self): + """Test image to text ORT pipeline""" + pipe = optimum_pipeline(task="image-to-text", accelerator="ort") + self.assertIsInstance(pipe, Pipeline) + image = self._create_dummy_image() + result = pipe(image, generate_kwargs=GENERATE_KWARGS) + + self.assertIsInstance(result, list) + self.assertGreater(len(result), 0) + self.assertIn("generated_text", result[0]) + + def test_image_to_image_pipeline(self): + """Test image to image ORT pipeline""" + pipe = optimum_pipeline(task="image-to-image", accelerator="ort") + self.assertIsInstance(pipe, Pipeline) + image = self._create_dummy_image() + result = pipe(image) + + self.assertIsInstance(result, Image.Image) + + # TODO: Enable when fixed in optimum-onnx + # def test_automatic_speech_recognition_pipeline(self): + # """Test automatic speech recognition ORT pipeline""" + # pipe = optimum_pipeline(task="automatic-speech-recognition", accelerator="ort") + # audio = self._create_dummy_audio() + # result = pipe(audio, generate_kwargs=GENERATE_KWARGS) + + # self.assertIsInstance(result, dict) + # self.assertIn("text", result) + + def test_audio_classification_pipeline(self): + """Test audio classification ORT pipeline""" + pipe = optimum_pipeline(task="audio-classification", accelerator="ort") + self.assertIsInstance(pipe, Pipeline) + audio = self._create_dummy_audio() + result = pipe(audio) + + self.assertIsInstance(result, list) + self.assertGreater(len(result), 0) + self.assertIn("label", result[0]) + self.assertIn("score", result[0]) + + def test_pipeline_with_ort_model(self): + """Test ORT pipeline with a model already in ONNX format""" + from optimum.onnxruntime import ORTModelForFeatureExtraction + + tokenizer = AutoTokenizer.from_pretrained("distilbert-base-cased") + model = ORTModelForFeatureExtraction.from_pretrained("distilbert-base-cased", export=True) + pipe = optimum_pipeline(task="feature-extraction", model=model, tokenizer=tokenizer, accelerator="ort") + self.assertIsInstance(pipe, Pipeline) + text = self._create_dummy_text() + result = pipe(text) + + self.assertIsInstance(result, list) + self.assertIsInstance(result[0], list) + self.assertIsInstance(result[0][0], list) + + def test_pipeline_with_model_id(self): + """Test ORT pipeline with a custom model id""" + pipe = optimum_pipeline(task="feature-extraction", model="distilbert-base-cased", accelerator="ort") + self.assertIsInstance(pipe, Pipeline) + text = self._create_dummy_text() + result = pipe(text) + + self.assertIsInstance(result, list) + self.assertIsInstance(result[0], list) + + def test_pipeline_with_invalid_task(self): + """Test ORT pipeline with an unsupported task""" + with self.assertRaises(KeyError) as context: + _ = optimum_pipeline(task="invalid-task", accelerator="ort") + self.assertIn("Unknown task invalid-task", str(context.exception)) + + def test_pipeline_with_invalid_accelerator(self): + """Test ORT pipeline with an unsupported accelerator""" + with self.assertRaises(ValueError) as context: + _ = optimum_pipeline(task="feature-extraction", accelerator="invalid-accelerator") + self.assertIn("Accelerator invalid-accelerator not recognized", str(context.exception)) + + def tearDown(self): + remove_directory(HF_HUB_CACHE) + + +if __name__ == "__main__": + unittest.main()