LiteLLM Integration with HoneyHive
This guide demonstrates how to integrate HoneyHive tracing with LiteLLM, a unified interface for calling 100+ LLMs using the OpenAI format, to monitor and optimize your LLM operations.
Prerequisites
- A HoneyHive account and API key
- Python 3.8+
- Basic understanding of LLMs and tracing
Installation
First, install the required packages:
pip install honeyhive litellm
Setup and Configuration
Initialize HoneyHive Tracer
Start by initializing the HoneyHive tracer at the beginning of your application:
import os
from honeyhive import HoneyHiveTracer
# Set your API keys
HONEYHIVE_API_KEY = "your honeyhive api key"
OPENAI_API_KEY = "your openai api key"
# Set OpenAI API key for LiteLLM
litellm.api_key = OPENAI_API_KEY
# Initialize HoneyHive tracer
HoneyHiveTracer.init(
api_key=HONEYHIVE_API_KEY,
project="your project name",
source="dev",
session_name="litellm_example"
)
Next, set up LiteLLM with your API keys:
import litellm
# Set API keys
litellm.api_key = OPENAI_API_KEY
Tracing LiteLLM Operations
Initialize LiteLLM with Tracing
Use the @trace
decorator to monitor LiteLLM initialization:
from honeyhive import trace
@trace
def initialize_litellm():
"""Initialize LiteLLM with configuration."""
try:
# Set verbose mode for more detailed logs
litellm.set_verbose = True
# Configure model list for fallbacks (optional)
litellm.model_list = [
{
"model_name": "gpt-4o-mini",
"litellm_params": {
"model": "gpt-4o-mini",
"api_key": OPENAI_API_KEY
}
}
]
print("LiteLLM initialized successfully")
except Exception as e:
print(f"Error initializing LiteLLM: {e}")
raise
Generate Completions with Tracing
Trace the completion generation process:
@trace
def generate_completion(prompt, model="gpt-4o-mini", temperature=0.7, max_tokens=500):
"""Generate a completion using LiteLLM with tracing."""
try:
response = litellm.completion(
model=model,
messages=[
{"role": "user", "content": prompt}
],
temperature=temperature,
max_tokens=max_tokens
)
completion_text = response.choices[0].message.content
print(f"Generated completion with {len(completion_text)} characters")
return completion_text
except Exception as e:
print(f"Error generating completion: {e}")
raise
Generate Chat Completions with Tracing
Trace chat completion operations:
@trace
def generate_chat_completion(messages, model="gpt-3.5-turbo", temperature=0.7, max_tokens=500):
"""Generate a chat completion using LiteLLM with tracing."""
try:
response = litellm.completion(
model=model,
messages=messages,
temperature=temperature,
max_tokens=max_tokens
)
completion_text = response.choices[0].message.content
print(f"Generated chat completion with {len(completion_text)} characters")
return completion_text
except Exception as e:
print(f"Error generating chat completion: {e}")
raise
Generate Embeddings with Tracing
Monitor embedding generation:
@trace
def generate_embedding(text, model="text-embedding-ada-002"):
"""Generate embeddings using LiteLLM with tracing."""
response = litellm.embedding(
model=model,
input=text)
return print("Embedding generated")
Complete Example
Here’s a complete example of using LiteLLM with HoneyHive tracing:
import os
import litellm
from honeyhive import HoneyHiveTracer, trace
# Set your API keys
HONEYHIVE_API_KEY = "your honeyhive api key"
OPENAI_API_KEY = "your openai api key"
# Set OpenAI API key for LiteLLM
litellm.api_key = OPENAI_API_KEY
# Initialize HoneyHive tracer
HoneyHiveTracer.init(
api_key=HONEYHIVE_API_KEY,
project="your project name",
source="dev",
session_name="litellm_example"
)
@trace
def initialize_litellm():
# Implementation as shown above
pass
@trace
def generate_completion(prompt, model="gpt-4o-mini", temperature=0.7, max_tokens=500):
# Implementation as shown above
pass
@trace
def generate_chat_completion(messages, model="gpt-3.5-turbo", temperature=0.7, max_tokens=500):
# Implementation as shown above
pass
@trace
def generate_embedding(text, model="text-embedding-ada-002"):
# Implementation as shown above
pass
@trace
def process_with_fallback(messages, primary_model="gpt-3.5-turbo", fallback_model="gpt-4"):
"""Process messages with a fallback model if the primary model fails."""
try:
# Try primary model first
print(f"Attempting to use primary model: {primary_model}")
return generate_chat_completion(messages, model=primary_model)
except Exception as primary_error:
print(f"Primary model failed: {primary_error}")
try:
# Fall back to secondary model
print(f"Falling back to secondary model: {fallback_model}")
return generate_chat_completion(messages, model=fallback_model)
except Exception as fallback_error:
print(f"Fallback model also failed: {fallback_error}")
raise
@trace
def batch_process_prompts(prompts, model="gpt-3.5-turbo"):
"""Process multiple prompts in batch with tracing."""
results = []
for i, prompt in enumerate(prompts):
try:
print(f"Processing prompt {i+1}/{len(prompts)}")
result = generate_completion(prompt, model=model)
results.append({"prompt": prompt, "completion": result, "status": "success"})
except Exception as e:
print(f"Error processing prompt {i+1}: {e}")
results.append({"prompt": prompt, "completion": None, "status": "error", "error": str(e)})
return results
def main():
# Initialize LiteLLM
initialize_litellm()
# Example 1: Simple completion
prompt = "Explain the concept of vector databases in simple terms."
completion = generate_completion(prompt)
print("\n=== Simple Completion ===")
print(completion)
# Example 2: Chat completion
messages = [
{"role": "system", "content": "You are a helpful assistant that explains technical concepts clearly."},
{"role": "user", "content": "What is HoneyHive and how does it help with AI observability?"}
]
chat_completion = generate_chat_completion(messages)
print("\n=== Chat Completion ===")
print(chat_completion)
# Example 3: Generate embedding
text = "HoneyHive provides tracing and monitoring for AI applications."
embedding = generate_embedding(text)
print(f"\n=== Embedding ===")
print(f"Generated embeddings: {embedding}")
# Example 4: Process with fallback
fallback_messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Write a short poem about AI observability."}
]
fallback_result = process_with_fallback(fallback_messages)
print("\n=== Fallback Processing ===")
print(fallback_result)
# Example 5: Batch processing
batch_prompts = [
"What are vector databases?",
"Explain the concept of RAG in AI applications.",
"How does tracing help improve AI applications?"
]
batch_results = batch_process_prompts(batch_prompts)
print("\n=== Batch Processing Results ===")
for i, result in enumerate(batch_results):
print(f"Prompt {i+1} Status: {result['status']}")
if __name__ == "__main__":
main()
What’s Being Traced
With this integration, HoneyHive captures:
- LiteLLM Initialization: Configuration and setup of LiteLLM
- Completion Generation: Performance metrics for generating completions
- Chat Completion Generation: Metrics for chat-based completions
- Embedding Generation: Performance of embedding operations
- Fallback Processing: Success rates and performance of fallback mechanisms
- Batch Processing: Metrics for processing multiple prompts
Viewing Traces in HoneyHive
After running your application:
- Log into your HoneyHive account
- Navigate to your project
- View the traces in the Sessions tab
- Analyze the performance of each LLM operation
Advanced Features
Tracing with Model Fallbacks
LiteLLM supports fallback mechanisms when a primary model fails. You can trace this behavior to understand failure patterns:
@trace
def process_with_fallback(messages, primary_model="gpt-3.5-turbo", fallback_model="gpt-4"):
try:
# Try primary model first
print(f"Attempting to use primary model: {primary_model}")
return generate_chat_completion(messages, model=primary_model)
except Exception as primary_error:
print(f"Primary model failed: {primary_error}")
try:
# Fall back to secondary model
print(f"Falling back to secondary model: {fallback_model}")
return generate_chat_completion(messages, model=fallback_model)
except Exception as fallback_error:
print(f"Fallback model also failed: {fallback_error}")
raise
Tracing Batch Processing
For batch operations, you can trace the entire batch process as well as individual completions:
@trace
def batch_process_prompts(prompts, model="gpt-3.5-turbo"):
results = []
for i, prompt in enumerate(prompts):
try:
print(f"Processing prompt {i+1}/{len(prompts)}")
result = generate_completion(prompt, model=model)
results.append({"prompt": prompt, "completion": result, "status": "success"})
except Exception as e:
print(f"Error processing prompt {i+1}: {e}")
results.append({"prompt": prompt, "completion": None, "status": "error", "error": str(e)})
return results
Best Practices
- Use descriptive session names to easily identify different runs
- Add custom attributes to traces for more detailed analysis
- Trace both successful operations and error handling paths
- Consider tracing with different model configurations to compare performance
- Use HoneyHive’s evaluation capabilities to assess response quality
Troubleshooting
If you encounter issues with tracing:
- Ensure your HoneyHive API key is correct
- Verify that all required packages are installed
- Check that your LiteLLM API keys are valid
- Review the HoneyHive documentation for additional troubleshooting steps
Next Steps
- Experiment with different LLM providers through LiteLLM
- Add custom metrics to your traces
- Implement A/B testing of different models
- Explore HoneyHive’s evaluation capabilities for your LLM responses
By integrating HoneyHive with LiteLLM, you gain valuable insights into your LLM operations and can optimize for better performance, cost-efficiency, and response quality.