Propagate library_name parameter in from_pretrained to export (#2328)

tomaarsen · echarlaix · web-flow · commit 689c0b5d38aa · 2025-07-29T14:17:42.000+02:00
* Propagate library_name parameter in from_pretrained to export

Required to avoid automatic inferring of the library_name

* Use class attribute for ORTModel instead

Under modeling_diffusion it looks like ORTModel isn't used

* Add test case

* Update optimum/onnxruntime/modeling_ort.py

---------

Co-authored-by: Ella Charlaix &lt;80481427+echarlaix@users.noreply.github.com&gt;
diff --git a/optimum/onnxruntime/modeling_ort.py b/optimum/onnxruntime/modeling_ort.py
@@ -148,6 +148,7 @@ class ORTModel(ORTSessionMixin, OptimizedModel):
 
     model_type = "onnx_model"
     auto_model_class = AutoModel
+    _library_name: Optional[str] = None
 
     def __init__(
         self,
@@ -431,6 +432,7 @@ def _export(
             local_files_only=local_files_only,
             force_download=force_download,
             trust_remote_code=trust_remote_code,
+            library_name=cls._library_name,
         )
         maybe_save_preprocessors(model_id, model_save_path, src_subfolder=subfolder)
 
@@ -628,6 +630,7 @@ class ORTModelForFeatureExtraction(ORTModel):
     """
 
     auto_model_class = AutoModel
+    _library_name: Optional[str] = "transformers"
 
     @add_start_docstrings_to_model_forward(
         ONNX_TEXT_INPUTS_DOCSTRING.format("batch_size, sequence_length")
@@ -753,6 +756,7 @@ class ORTModelForMaskedLM(ORTModel):
     """
 
     auto_model_class = AutoModelForMaskedLM
+    _library_name: Optional[str] = "transformers"
 
     @add_start_docstrings_to_model_forward(
         ONNX_TEXT_INPUTS_DOCSTRING.format("batch_size, sequence_length")
@@ -855,6 +859,7 @@ class ORTModelForQuestionAnswering(ORTModel):
     """
 
     auto_model_class = AutoModelForQuestionAnswering
+    _library_name: Optional[str] = "transformers"
 
     @add_start_docstrings_to_model_forward(
         ONNX_TEXT_INPUTS_DOCSTRING.format("batch_size, sequence_length")
@@ -974,6 +979,7 @@ class ORTModelForSequenceClassification(ORTModel):
     """
 
     auto_model_class = AutoModelForSequenceClassification
+    _library_name: Optional[str] = "transformers"
 
     @add_start_docstrings_to_model_forward(
         ONNX_TEXT_INPUTS_DOCSTRING.format("batch_size, sequence_length")
@@ -1077,6 +1083,7 @@ class ORTModelForTokenClassification(ORTModel):
     """
 
     auto_model_class = AutoModelForTokenClassification
+    _library_name: Optional[str] = "transformers"
 
     @add_start_docstrings_to_model_forward(
         ONNX_TEXT_INPUTS_DOCSTRING.format("batch_size, sequence_length")
@@ -1173,6 +1180,7 @@ class ORTModelForMultipleChoice(ORTModel):
     """
 
     auto_model_class = AutoModelForMultipleChoice
+    _library_name: Optional[str] = "transformers"
 
     @add_start_docstrings_to_model_forward(
         ONNX_TEXT_INPUTS_DOCSTRING.format("batch_size, sequence_length")
@@ -1376,6 +1384,7 @@ class ORTModelForSemanticSegmentation(ORTModel):
     """
 
     auto_model_class = AutoModelForSemanticSegmentation
+    _library_name: Optional[str] = "transformers"
 
     @add_start_docstrings_to_model_forward(
         ONNX_IMAGE_INPUTS_DOCSTRING.format("batch_size, num_channels, height, width")
@@ -1479,6 +1488,7 @@ class ORTModelForAudioClassification(ORTModel):
     """
 
     auto_model_class = AutoModelForAudioClassification
+    _library_name: Optional[str] = "transformers"
 
     @add_start_docstrings_to_model_forward(
         ONNX_AUDIO_INPUTS_DOCSTRING.format("batch_size, sequence_length")
@@ -1577,6 +1587,7 @@ class ORTModelForCTC(ORTModel):
     """
 
     auto_model_class = AutoModelForCTC
+    _library_name: Optional[str] = "transformers"
 
     @add_start_docstrings_to_model_forward(
         ONNX_AUDIO_INPUTS_DOCSTRING.format("batch_size, sequence_length")
@@ -1681,6 +1692,7 @@ class ORTModelForAudioXVector(ORTModel):
     """
 
     auto_model_class = AutoModelForAudioXVector
+    _library_name: Optional[str] = "transformers"
 
     @add_start_docstrings_to_model_forward(
         ONNX_AUDIO_INPUTS_DOCSTRING.format("batch_size, sequence_length")
@@ -1770,6 +1782,7 @@ class ORTModelForAudioFrameClassification(ORTModel):
     """
 
     auto_model_class = AutoModelForAudioFrameClassification
+    _library_name: Optional[str] = "transformers"
 
     @add_start_docstrings_to_model_forward(
         ONNX_AUDIO_INPUTS_DOCSTRING.format("batch_size, sequence_length")
@@ -1850,6 +1863,7 @@ class ORTModelForImageToImage(ORTModel):
     """
 
     auto_model_class = AutoModelForImageToImage
+    _library_name: Optional[str] = "transformers"
 
     @add_start_docstrings_to_model_forward(
         ONNX_IMAGE_INPUTS_DOCSTRING.format("batch_size, num_channels, height, width")
diff --git a/optimum/onnxruntime/modeling_seq2seq.py b/optimum/onnxruntime/modeling_seq2seq.py
@@ -1280,6 +1280,7 @@ def _export(
             local_files_only=local_files_only,
             force_download=force_download,
             trust_remote_code=trust_remote_code,
+            library_name=cls._library_name,
         )
         maybe_save_preprocessors(model_id, model_save_path, src_subfolder=subfolder)
 
diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py
@@ -1340,6 +1340,21 @@ def test_compare_to_io_binding(self, model_arch):
 
         gc.collect()
 
+    def test_load_sentence_transformers_model_as_fill_mask(self):
+        model_id = "sparse-encoder-testing/splade-bert-tiny-nq"
+        onnx_model = ORTModelForMaskedLM.from_pretrained(model_id)
+        tokenizer = get_preprocessor(model_id)
+        MASK_TOKEN = tokenizer.mask_token
+        pipe = pipeline("fill-mask", model=onnx_model, tokenizer=tokenizer, device=0)
+        text = f"The capital of France is {MASK_TOKEN}."
+        outputs = pipe(text)
+
+        self.assertEqual(pipe.device, onnx_model.device)
+        self.assertGreaterEqual(outputs[0]["score"], 0.0)
+        self.assertIsInstance(outputs[0]["token_str"], str)
+
+        gc.collect()
+
 
 class ORTModelForSequenceClassificationIntegrationTest(ORTModelTestMixin):
     SUPPORTED_ARCHITECTURES = [

Original file line number	Diff line number	Diff line change
`@@ -1280,6 +1280,7 @@ def _export(`
`1280`	`1280`	`local_files_only=local_files_only,`
`1281`	`1281`	`force_download=force_download,`
`1282`	`1282`	`trust_remote_code=trust_remote_code,`
	`1283`	`+ library_name=cls._library_name,`
`1283`	`1284`	`)`
`1284`	`1285`	`maybe_save_preprocessors(model_id, model_save_path, src_subfolder=subfolder)`
`1285`	`1286`