SDKs
Python SDK
Use NemoRouter with the OpenAI Python SDK
Python SDK
NemoRouter is fully compatible with the OpenAI Python SDK. You don't need a separate SDK — just install openai and point it at NemoRouter.
Installation
pip install openaiSetup
from openai import OpenAI
client = OpenAI(
api_key="sk-nemo-your-key-here",
base_url="https://api.nemorouter.ai/v1",
)Or use an environment variable (recommended for production):
export NEMOROUTER_API_KEY="sk-nemo-your-key-here"import os
from openai import OpenAI
client = OpenAI(
api_key=os.environ["NEMOROUTER_API_KEY"],
base_url="https://api.nemorouter.ai/v1",
)Chat Completions
Basic Request
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "What are the benefits of using an LLM gateway?"},
],
temperature=0.7,
max_tokens=512,
)
print(response.choices[0].message.content)Multi-Turn Conversation
messages = [
{"role": "system", "content": "You are a knowledgeable AI tutor."},
{"role": "user", "content": "Explain machine learning in simple terms."},
]
# First turn
response = client.chat.completions.create(model="gpt-4o", messages=messages)
assistant_message = response.choices[0].message
messages.append({"role": "assistant", "content": assistant_message.content})
# Second turn
messages.append({"role": "user", "content": "Now explain neural networks."})
response = client.chat.completions.create(model="gpt-4o", messages=messages)
print(response.choices[0].message.content)Switch Models Instantly
One of NemoRouter's key benefits — switch providers by changing the model name:
# OpenAI
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Hello!"}],
)
# Anthropic
response = client.chat.completions.create(
model="claude-4-sonnet",
messages=[{"role": "user", "content": "Hello!"}],
)
# Google
response = client.chat.completions.create(
model="gemini-2.5-pro",
messages=[{"role": "user", "content": "Hello!"}],
)Streaming
Stream responses token by token for real-time output:
stream = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "user", "content": "Write a short story about a robot learning to cook."},
],
stream=True,
)
for chunk in stream:
content = chunk.choices[0].delta.content
if content is not None:
print(content, end="", flush=True)
print() # Final newlineCollecting Streamed Content
full_response = ""
stream = client.chat.completions.create(
model="claude-4-sonnet",
messages=[{"role": "user", "content": "List 5 Python best practices."}],
stream=True,
)
for chunk in stream:
content = chunk.choices[0].delta.content
if content is not None:
full_response += content
print(content, end="", flush=True)
print(f"\n\nTotal length: {len(full_response)} characters")Async Support
The OpenAI SDK includes an async client for use with asyncio:
import asyncio
from openai import AsyncOpenAI
client = AsyncOpenAI(
api_key="sk-nemo-your-key-here",
base_url="https://api.nemorouter.ai/v1",
)
async def main():
response = await client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Hello!"}],
)
print(response.choices[0].message.content)
asyncio.run(main())Async Streaming
async def stream_response():
stream = await client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Write a haiku about APIs."}],
stream=True,
)
async for chunk in stream:
content = chunk.choices[0].delta.content
if content is not None:
print(content, end="", flush=True)
asyncio.run(stream_response())Embeddings
response = client.embeddings.create(
model="text-embedding-3-small",
input="NemoRouter is an enterprise LLM gateway.",
)
embedding = response.data[0].embedding
print(f"Dimensions: {len(embedding)}")Batch Embeddings
texts = [
"First document about AI gateways.",
"Second document about API management.",
"Third document about cost optimization.",
]
response = client.embeddings.create(
model="text-embedding-3-small",
input=texts,
)
for i, item in enumerate(response.data):
print(f"Text {i}: {len(item.embedding)} dimensions")Function Calling
import json
tools = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "City and state, e.g. San Francisco, CA",
},
"unit": {
"type": "string",
"enum": ["celsius", "fahrenheit"],
},
},
"required": ["location"],
},
},
}
]
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "What's the weather in Paris?"}],
tools=tools,
tool_choice="auto",
)
message = response.choices[0].message
if message.tool_calls:
for tool_call in message.tool_calls:
print(f"Function: {tool_call.function.name}")
print(f"Arguments: {tool_call.function.arguments}")JSON Mode
Force the model to output valid JSON:
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "system",
"content": "You output JSON. Return a list of 3 programming languages with name and year_created fields.",
},
{"role": "user", "content": "List programming languages."},
],
response_format={"type": "json_object"},
)
data = json.loads(response.choices[0].message.content)
print(json.dumps(data, indent=2))Error Handling
Handle common errors gracefully:
from openai import (
APIConnectionError,
AuthenticationError,
RateLimitError,
APIStatusError,
)
try:
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Hello!"}],
)
except AuthenticationError:
print("Invalid API key. Check your NEMOROUTER_API_KEY.")
except RateLimitError:
print("Rate limit exceeded. Implement backoff or upgrade your plan.")
except APIConnectionError:
print("Could not connect to NemoRouter. Check your network.")
except APIStatusError as e:
print(f"API error: {e.status_code} — {e.message}")Listing Models
models = client.models.list()
for model in models.data:
print(f"{model.id} (owned by {model.owned_by})")Complete Example
Here's a complete script that demonstrates the core NemoRouter workflow:
import os
from openai import OpenAI
# Initialize client
client = OpenAI(
api_key=os.environ["NEMOROUTER_API_KEY"],
base_url="https://api.nemorouter.ai/v1",
)
# List available models
print("Available models:")
models = client.models.list()
for model in models.data[:5]:
print(f" - {model.id}")
# Chat completion
print("\nChat completion:")
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": "You are a concise assistant."},
{"role": "user", "content": "What is an LLM gateway in one sentence?"},
],
max_tokens=100,
)
print(f" {response.choices[0].message.content}")
print(f" Tokens used: {response.usage.total_tokens}")
# Embedding
print("\nEmbedding:")
emb_response = client.embeddings.create(
model="text-embedding-3-small",
input="NemoRouter simplifies LLM access.",
)
print(f" Dimensions: {len(emb_response.data[0].embedding)}")
print("\nDone!")Next Steps
- Node.js SDK — Node.js integration guide
- cURL Examples — Command-line examples
- Chat Completions API — Full API reference
- Authentication — API key best practices