Skip to main content

Google Provider API

The replit-ai package provides a shim for the Google Vertex AI SDK libraries that allows for swapping to and from the original libraries by simply changing the imports at the top of the code. Both the input parameters and responses from these APIs are intended to be 100% compatible.

To move from the Vertex AI SDK, remove all initialization code, and replace all vertexai imports with replit.ai.modelfarm.google.

Before:

# From 
import vertexai
from vertexai.language_models import TextGenerationModel

vertexai.init(project=project_id, location=location)

parameters = {
"temperature": 0.2,
"max_output_tokens": 256,
"top_p": 0.8,
"top_k": 40,
}

model = TextGenerationModel.from_pretrained("text-bison@001")
response = model.predict(
"Give me ten interview questions for the role of program manager.",
**parameters,
)
print(f"Response from Model: {response.text}")

After:

from replit.ai.modelfarm.google.language_models import TextGenerationModel

parameters = {
"temperature": 0.2,
"max_output_tokens": 256,
"top_p": 0.8,
"top_k": 40,
}

model = TextGenerationModel.from_pretrained("text-bison@001")
response = model.predict(
"Give me ten interview questions for the role of program manager.",
**parameters,
)
print(f"Response from Model: {response.text}")

Note that the Replit-provided Google-specific APIs support both synchronous and asynchronous operations, the latter facilitated through the asyncio library.

Chat

Please refer to the Google Vertex AI documentation for chat here.

Examples

Synchronous Usage

from replit.ai.modelfarm.google.preview.language_models import ChatModel, InputOutputTextPair, ChatSession, ChatMessage, TextGenerationResponse

chat_model = ChatModel.from_pretrained("chat-bison@001")

chat: ChatSession = chat_model.start_chat(
context="My name is Miles. You are an astronomer, knowledgeable about the solar system.",
examples=[
InputOutputTextPair(
input_text="How many moons does Mars have?",
output_text="The planet Mars has two moons, Phobos and Deimos.",
),
]
)

# synchronous, non-streamed response
response: TextGenerationResponse = chat.send_message(
"How many planets are there in the solar system?",
temperature=0.2,
max_output_tokens=256,
top_p=0.95,
top_k=40
)

# synchronous, streamed response
responses = chat.send_message_stream(
"Tell me something about each planet in the Solar System: ",
temperature=0.2,
max_output_tokens=1024,
top_p=0.95,
top_k=40
)

for response in responses:
print(response)

# chat has a message_history, which is a list of ChatMessage objects based on the session's state
chat_history: list[ChatMessage] = chat.message_history

Asynchronous Usage

import asyncio
from replit.ai.modelfarm.google.preview.language_models import ChatModel, InputOutputTextPair, ChatSession, ChatMessage, TextGenerationResponse


async def main():
chat_model = ChatModel.from_pretrained("chat-bison@001")

chat: ChatSession = chat_model.start_chat(
context="My name is Miles. You are an astronomer, knowledgeable about the solar system.",
examples=[
InputOutputTextPair(
input_text="How many moons does Mars have?",
output_text="The planet Mars has two moons, Phobos and Deimos.",
),
]
)

# asynchronous, non-streaming call
response: TextGenerationResponse = await chat.async_send_message(
"How many planets are there in the solar system?",
temperature=0.2,
max_output_tokens=1024,
top_p=0.95,
top_k=40
)

# asynchronous streaming call
responses = chat.async_send_message_stream(
"Tell me something about each planet.",
temperature=0.2,
max_output_tokens=1024,
top_p=0.95,
top_k=40
)

async for async_response in responses:
print(async_response)

asyncio.run(main())

Embedding

Please refer to the Google Vertex AI documentation for embeddings here.

Examples

Synchronous Usage

from replit.ai.modelfarm.google.language_models import TextEmbeddingModel, TextEmbedding

model = TextEmbeddingModel("textembedding-gecko")

# synchronous call
response: list[TextEmbedding] = model.get_embeddings(["Hello, world!"])

# TextEmbedding has two keys: statistics and values
print(response[0].statistics)
print(response[0].values)

The TextEmbedding object like the following:

>>> print(response[0].__dict__)
{
    "statistics": TextEmbeddingStatistics(token_count=4, truncated=False),
    "values": [
        0.010413173586130142,
        0.012456662021577358,
        # .. truncated
        -0.048689305782318115,
        -0.007193463854491711,
    ],
}

Asynchronous Usage

import asyncio
from replit.ai.modelfarm.google.language_models import TextEmbeddingModel, TextEmbedding

async def main():
  model = TextEmbeddingModel("textembedding-gecko")

  # asynchronous call
  response: list[TextEmbedding] = await model.async_get_embeddings(
      ["Hello, world!"])
  print(response)

asyncio.run(main())

Completion

Please refer to the Google Vertex AI documentation for completion here.

Examples

Synchronous Usage

from replit.ai.modelfarm.google.language_models import TextGenerationModel, TextGenerationResponse

model: TextGenerationModel = TextGenerationModel.from_pretrained(
"text-bison@001")

response: TextGenerationResponse = model.predict(
"Give me ten interview questions for the role of program manager.",
temperature=0.8,
top_p=0.8,
top_k=40,
max_output_tokens=256)
print(response)

# synchronous streaming
responses = model.predict_streaming(
prompt=
"Give me ten interview questions for the role of a software engineer.")

for response in responses:
print(response)

The TextGenerationResponse looks like the following:

>>> print(response.model_dump())
{
"is_blocked": False,
"raw_prediction_response": {
"content": "1. What is your experience with project management?\n2. What is your process for managing a project?\n3. How do you handle unexpected challenges or roadblocks?\n4. How do you communicate with stakeholders and keep them updated on the progress of the project?\n5. What are your strengths and weaknesses as a project manager?\n6. What are your salary expectations?\n7. What are your career goals?\n8. Why are you interested in this position?\n9. What do you know about our company?\n10. What questions do you have for me?",
"metadata": {
"safetyAttributes": {
"blocked": False,
"categories": ["Finance"],
"scores": [0.1],
},
"citationMetadata": {"citations": []},
},
},
"safety_attributes": {"Finance": 0.1},
"text": "1. What is your experience with project management?\n2. What is your process for managing a project?\n3. How do you handle unexpected challenges or roadblocks?\n4. How do you communicate with stakeholders and keep them updated on the progress of the project?\n5. What are your strengths and weaknesses as a project manager?\n6. What are your salary expectations?\n7. What are your career goals?\n8. Why are you interested in this position?\n9. What do you know about our company?\n10. What questions do you have for me?",
}

Asynchronous Usage

import asyncio
from replit.ai.modelfarm.google.language_models import TextGenerationModel, TextGenerationResponse

async def main():
model: TextGenerationModel = TextGenerationModel.from_pretrained(
"text-bison@001")

response: TextGenerationResponse = model.predict(
"Give me ten interview questions for the role of program manager.",
temperature=0.8,
top_p=0.8,
top_k=40,
max_output_tokens=256)
print(response.model_dump())

# asynchronous, non-streaming call
response: TextGenerationResponse = await model.async_predict(
"How many planets are there in the solar system?",
temperature=0.2,
max_output_tokens=1024,
top_p=0.95,
top_k=40)

# asynchronous streaming call
responses = model.async_predict_streaming(
"How many planets are there in the solar system?",
temperature=0.2,
max_output_tokens=1024,
top_p=0.95,
top_k=40)

async for async_response in responses:
# each response is a ChatModelResponse
print(async_response)

asyncio.run(main())
Was this helpful?