firebase · rlazo · Feb 8, 2025 · Jan 30, 2025 · Jan 31, 2025 · Feb 1, 2025
diff --git a/firebase-vertexai/CHANGELOG.md b/firebase-vertexai/CHANGELOG.md
@@ -1,5 +1,5 @@
 # Unreleased
-
+* [changed] Added support for modality-based token count. (#6658)
 
 # 16.1.0
 * [changed] Internal improvements to correctly handle empty model responses.
@@ -64,4 +64,3 @@
 * [feature] Added support for `responseMimeType` in `GenerationConfig`.
 * [changed] Renamed `GoogleGenerativeAIException` to `FirebaseVertexAIException`.
 * [changed] Updated the KDocs for various classes and functions.
-
diff --git a/firebase-vertexai/api.txt b/firebase-vertexai/api.txt
@@ -165,12 +165,30 @@ package com.google.firebase.vertexai.type {
     method public static com.google.firebase.vertexai.type.Content content(String? role = "user", kotlin.jvm.functions.Function1<? super com.google.firebase.vertexai.type.Content.Builder,kotlin.Unit> init);
   }
 
+  public final class ContentModality {
+    method public int getOrdinal();
+    property public final int ordinal;
+    field public static final com.google.firebase.vertexai.type.ContentModality AUDIO;
+    field public static final com.google.firebase.vertexai.type.ContentModality.Companion Companion;
+    field public static final com.google.firebase.vertexai.type.ContentModality DOCUMENT;
+    field public static final com.google.firebase.vertexai.type.ContentModality IMAGE;
+    field public static final com.google.firebase.vertexai.type.ContentModality TEXT;
+    field public static final com.google.firebase.vertexai.type.ContentModality UNSPECIFIED;
+    field public static final com.google.firebase.vertexai.type.ContentModality VIDEO;
+  }
+
+  public static final class ContentModality.Companion {
+  }
+
   public final class CountTokensResponse {
-    ctor public CountTokensResponse(int totalTokens, Integer? totalBillableCharacters = null);
+    ctor public CountTokensResponse(int totalTokens, Integer? totalBillableCharacters = null, java.util.List<com.google.firebase.vertexai.type.ModalityTokenCount>? promptTokensDetails = null);
     method public operator int component1();
     method public operator Integer? component2();
+    method public operator java.util.List<com.google.firebase.vertexai.type.ModalityTokenCount>? component3();
+    method public java.util.List<com.google.firebase.vertexai.type.ModalityTokenCount>? getPromptTokensDetails();
     method public Integer? getTotalBillableCharacters();
     method public int getTotalTokens();
+    property public final java.util.List<com.google.firebase.vertexai.type.ModalityTokenCount>? promptTokensDetails;
     property public final Integer? totalBillableCharacters;
     property public final int totalTokens;
   }
@@ -369,6 +387,15 @@ package com.google.firebase.vertexai.type {
   public final class InvalidStateException extends com.google.firebase.vertexai.type.FirebaseVertexAIException {
   }
 
+  public final class ModalityTokenCount {
+    method public operator com.google.firebase.vertexai.type.ContentModality component1();
+    method public operator int component2();
+    method public com.google.firebase.vertexai.type.ContentModality getModality();
+    method public int getTokenCount();
+    property public final com.google.firebase.vertexai.type.ContentModality modality;
+    property public final int tokenCount;
+  }
+
   public interface Part {
   }
 
@@ -549,12 +576,16 @@ package com.google.firebase.vertexai.type {
   }
 
   public final class UsageMetadata {
-    ctor public UsageMetadata(int promptTokenCount, Integer? candidatesTokenCount, int totalTokenCount);
+    ctor public UsageMetadata(int promptTokenCount, Integer? candidatesTokenCount, int totalTokenCount, java.util.List<com.google.firebase.vertexai.type.ModalityTokenCount>? promptTokensDetails, java.util.List<com.google.firebase.vertexai.type.ModalityTokenCount>? candidatesTokensDetails);
     method public Integer? getCandidatesTokenCount();
+    method public java.util.List<com.google.firebase.vertexai.type.ModalityTokenCount>? getCandidatesTokensDetails();
     method public int getPromptTokenCount();
+    method public java.util.List<com.google.firebase.vertexai.type.ModalityTokenCount>? getPromptTokensDetails();
     method public int getTotalTokenCount();
     property public final Integer? candidatesTokenCount;
+    property public final java.util.List<com.google.firebase.vertexai.type.ModalityTokenCount>? candidatesTokensDetails;
     property public final int promptTokenCount;
+    property public final java.util.List<com.google.firebase.vertexai.type.ModalityTokenCount>? promptTokensDetails;
     property public final int totalTokenCount;
   }
 

diff --git a/firebase-vertexai/gradle.properties b/firebase-vertexai/gradle.properties
@@ -12,5 +12,5 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-version=16.1.1
+version=16.2.0
 latestReleasedVersion=16.1.0
diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/ContentModality.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/ContentModality.kt
@@ -0,0 +1,68 @@
+/*
+ * Copyright 2025 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.firebase.vertexai.type
+
+import com.google.firebase.vertexai.common.util.FirstOrdinalSerializer
+import kotlinx.serialization.KSerializer
+import kotlinx.serialization.SerialName
+import kotlinx.serialization.Serializable
+
+/** Content part modality. */
+public class ContentModality private constructor(public val ordinal: Int) {
+
+  @Serializable(Internal.Serializer::class)
+  internal enum class Internal {
+    @SerialName("MODALITY_UNSPECIFIED") UNSPECIFIED,
+    TEXT,
+    IMAGE,
+    VIDEO,
+    AUDIO,
+    DOCUMENT;
+
+    internal object Serializer : KSerializer<Internal> by FirstOrdinalSerializer(Internal::class)
+
+    internal fun toPublic() =
+      when (this) {
+        TEXT -> ContentModality.TEXT
+        IMAGE -> ContentModality.IMAGE
+        VIDEO -> ContentModality.VIDEO
+        AUDIO -> ContentModality.AUDIO
+        DOCUMENT -> ContentModality.DOCUMENT
+        else -> ContentModality.UNSPECIFIED
+      }
+  }
+
+  public companion object {
+    /** Unspecified modality. */
+    @JvmField public val UNSPECIFIED: ContentModality = ContentModality(0)
+
+    /** Plain text. */
+    @JvmField public val TEXT: ContentModality = ContentModality(1)
+
+    /** Image. */
+    @JvmField public val IMAGE: ContentModality = ContentModality(2)
+
+    /** Video. */
+    @JvmField public val VIDEO: ContentModality = ContentModality(3)
+
+    /** Audio. */
+    @JvmField public val AUDIO: ContentModality = ContentModality(4)
+
+    /** Document, e.g. PDF. */
+    @JvmField public val DOCUMENT: ContentModality = ContentModality(5)
+  }
+}
diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/CountTokensResponse.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/CountTokensResponse.kt
@@ -30,21 +30,33 @@ import kotlinx.serialization.Serializable
  * to the model as a prompt. **Important:** this property does not include billable image, video or
  * other non-text input. See
  * [Vertex AI pricing](https://cloud.google.com/vertex-ai/generative-ai/pricing) for details.
+ * @property promptTokensDetails The breakdown, by modality, of how many tokens are consumed by the
+ * prompt.
  */
 public class CountTokensResponse(
   public val totalTokens: Int,
-  public val totalBillableCharacters: Int? = null
+  public val totalBillableCharacters: Int? = null,
+  public val promptTokensDetails: List<ModalityTokenCount>? = null,
 ) {
   public operator fun component1(): Int = totalTokens
 
   public operator fun component2(): Int? = totalBillableCharacters
 
+  public operator fun component3(): List<ModalityTokenCount>? = promptTokensDetails
+
   @Serializable
-  internal data class Internal(val totalTokens: Int, val totalBillableCharacters: Int? = null) :
-    Response {
+  internal data class Internal(
+    val totalTokens: Int,
+    val totalBillableCharacters: Int? = null,
+    val promptTokensDetails: List<ModalityTokenCount.Internal>? = null
+  ) : Response {
 
     internal fun toPublic(): CountTokensResponse {
-      return CountTokensResponse(totalTokens, totalBillableCharacters ?: 0)
+      return CountTokensResponse(
+        totalTokens,
+        totalBillableCharacters ?: 0,
+        promptTokensDetails?.map { it.toPublic() }
+      )
     }
   }
 }
diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/ModalityTokenCount.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/ModalityTokenCount.kt
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2025 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.firebase.vertexai.type
+
+import kotlinx.serialization.Serializable
+
+/**
+ * Represents token counting info for a single modality.
+ *
+ * @property modality The modality associated with this token count.
+ * @property tokenCount The number of tokens counted.
+ */
+public class ModalityTokenCount
+private constructor(public val modality: ContentModality, public val tokenCount: Int) {
+
+  public operator fun component1(): ContentModality = modality
+
+  public operator fun component2(): Int = tokenCount
+
+  @Serializable
+  internal data class Internal(
+    val modality: ContentModality.Internal,
+    val tokenCount: Int? = null
+  ) {
+    internal fun toPublic() = ModalityTokenCount(modality.toPublic(), tokenCount ?: 0)
+  }
+}
diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/UsageMetadata.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/UsageMetadata.kt
@@ -24,21 +24,35 @@ import kotlinx.serialization.Serializable
  * @param promptTokenCount Number of tokens in the request.
  * @param candidatesTokenCount Number of tokens in the response(s).
  * @param totalTokenCount Total number of tokens.
+ * @param promptTokensDetails The breakdown, by modality, of how many tokens are consumed by the
+ * prompt.
+ * @param candidatesTokensDetails The breakdown, by modality, of how many tokens are consumed by the
+ * candidates.
  */
 public class UsageMetadata(
   public val promptTokenCount: Int,
   public val candidatesTokenCount: Int?,
-  public val totalTokenCount: Int
+  public val totalTokenCount: Int,
+  public val promptTokensDetails: List<ModalityTokenCount>?,
+  public val candidatesTokensDetails: List<ModalityTokenCount>?,
 ) {
 
   @Serializable
   internal data class Internal(
     val promptTokenCount: Int? = null,
     val candidatesTokenCount: Int? = null,
     val totalTokenCount: Int? = null,
+    val promptTokensDetails: List<ModalityTokenCount.Internal>? = null,
+    val candidatesTokensDetails: List<ModalityTokenCount.Internal>? = null,
   ) {
 
     internal fun toPublic(): UsageMetadata =
-      UsageMetadata(promptTokenCount ?: 0, candidatesTokenCount ?: 0, totalTokenCount ?: 0)
+      UsageMetadata(
+        promptTokenCount ?: 0,
+        candidatesTokenCount ?: 0,
+        totalTokenCount ?: 0,
+        promptTokensDetails = promptTokensDetails?.map { it.toPublic() },
+        candidatesTokensDetails = candidatesTokensDetails?.map { it.toPublic() }
+      )
   }
 }
diff --git a/firebase-vertexai/src/test/java/com/google/firebase/vertexai/UnarySnapshotTests.kt b/firebase-vertexai/src/test/java/com/google/firebase/vertexai/UnarySnapshotTests.kt
@@ -17,6 +17,7 @@
 package com.google.firebase.vertexai
 
 import com.google.firebase.vertexai.type.BlockReason
+import com.google.firebase.vertexai.type.ContentModality
 import com.google.firebase.vertexai.type.FinishReason
 import com.google.firebase.vertexai.type.FunctionCallPart
 import com.google.firebase.vertexai.type.HarmCategory
@@ -34,7 +35,6 @@ import com.google.firebase.vertexai.util.goldenUnaryFile
 import com.google.firebase.vertexai.util.shouldNotBeNullOrEmpty
 import io.kotest.assertions.throwables.shouldThrow
 import io.kotest.inspectors.forAtLeastOne
-import io.kotest.matchers.collections.shouldContain
 import io.kotest.matchers.collections.shouldNotBeEmpty
 import io.kotest.matchers.nulls.shouldNotBeNull
 import io.kotest.matchers.should
@@ -70,15 +70,27 @@ internal class UnarySnapshotTests {
     }
 
   @Test
-  fun `long reply`() =
-    goldenUnaryFile("unary-success-basic-reply-long.json") {
+  fun `response with detailed token-based usageMetadata`() =
+    goldenUnaryFile("unary-success-basic-response-long-usage-metadata.json") {
       withTimeout(testTimeout) {
         val response = model.generateContent("prompt")
 
         response.candidates.isEmpty() shouldBe false
         response.candidates.first().finishReason shouldBe FinishReason.STOP
         response.candidates.first().content.parts.isEmpty() shouldBe false
-        response.candidates.first().safetyRatings.isEmpty() shouldBe false
+        response.usageMetadata shouldNotBe null
+        response.usageMetadata?.apply {
+          totalTokenCount shouldBe 1913
+          candidatesTokenCount shouldBe 76
+          promptTokensDetails?.forAtLeastOne {
+            it.modality shouldBe ContentModality.IMAGE
+            it.tokenCount shouldBe 1806
+          }
+          candidatesTokensDetails?.forAtLeastOne {
+            it.modality shouldBe ContentModality.TEXT
+            it.tokenCount shouldBe 76
+          }
+        }
       }
     }
 
@@ -469,6 +481,22 @@ internal class UnarySnapshotTests {
       }
     }
 
+  @Test
+  fun `countTokens with modality fields returned`() =
+    goldenUnaryFile("unary-success-detailed-token-response.json") {
+      withTimeout(testTimeout) {
+        val response = model.countTokens("prompt")
+
+        response.totalTokens shouldBe 1837
+        response.totalBillableCharacters shouldBe 117
+        response.promptTokensDetails shouldNotBe null
+        response.promptTokensDetails?.forAtLeastOne {
+          it.modality shouldBe ContentModality.IMAGE
+          it.tokenCount shouldBe 1806
+        }
+      }
+    }
+
   @Test
   fun `countTokens succeeds with no billable characters`() =
     goldenUnaryFile("unary-success-no-billable-characters.json") {

diff --git a/firebase-vertexai/update_responses.sh b/firebase-vertexai/update_responses.sh
@@ -17,7 +17,7 @@
 # This script replaces mock response files for Vertex AI unit tests with a fresh
 # clone of the shared repository of Vertex AI test data.
 
-RESPONSES_VERSION='v5.*' # The major version of mock responses to use
+RESPONSES_VERSION='v6.*' # The major version of mock responses to use
 REPO_NAME="vertexai-sdk-test-data"
 REPO_LINK="https://github.com/FirebaseExtended/$REPO_NAME.git"