Python and LLMs: A Comprehensive Guide
?? Table of Contents
1. Introduction
Large Language Models (LLMs) have become an integral part of modern software development. This comprehensive guide will walk you through the process of integrating LLMs into your Python projects, from basic setup to advanced implementations.
2. Setup and Prerequisites
Required Libraries
# Core libraries
!pip install transformers
!pip install torch
!pip install accelerate
!pip install safetensors
# Optional but recommended
!pip install datasets
!pip install evaluate
!pip install sentencepiece
Environment Check
import torch
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
def check_environment():
? ?"""Check and report on the working environment"""
? ?device = "cuda" if torch.cuda.is_available() else "cpu"
? ?print(f"Using device: {device}")
? ?
? ?if torch.cuda.is_available():
? ? ? ?print(f"GPU Model: {torch.cuda.get_device_name(0)}")
? ? ? ?print(f"Available GPUs: {torch.cuda.device_count()}")
? ? ? ?print(f"Current CUDA Version: {torch.version.cuda}")
? ?
? ?return device
# Check environment
device = check_environment()
3. First Steps with LLMs
Model Loading and Configuration
def initialize_model(model_name="mistralai/Mistral-7B-Instruct-v0.2", task="text-generation"):
? ?"""
? Load and configure an LLM model
? Args:
? ? ? model_name (str): Name of the model to use
? ? ? task (str): Task for the model
? Returns:
? ? ? tuple: (model, tokenizer, pipeline)
? """
? ?# Load tokenizer
? ?tokenizer = AutoTokenizer.from_pretrained(model_name)
? ?
? ?# Load model
? ?model = AutoModelForCausalLM.from_pretrained(
? ? ? ?model_name,
? ? ? ?device_map="auto",
? ? ? ?torch_dtype=torch.float16
? )
? ?
? ?# Create pipeline
? ?gen_pipeline = pipeline(
? ? ? ?task,
? ? ? ?model=model,
? ? ? ?tokenizer=tokenizer,
? ? ? ?device_map="auto"
? )
? ?
? ?return model, tokenizer, gen_pipeline
# Example usage
model, tokenizer, generator = initialize_model()
Token Analysis
class TokenAnalyzer:
? ?"""Helper class for token analysis"""
? ?
? ?def __init__(self, tokenizer):
? ? ? ?self.tokenizer = tokenizer
? ?
? ?def analyze_text(self, text):
? ? ? ?"""
? ? ? Analyze tokens in given text
? ? ? Args:
? ? ? ? ? text (str): Text to analyze
? ? ? Returns:
? ? ? ? ? dict: Analysis results
? ? ? """
? ? ? ?# Tokenization
? ? ? ?tokens = self.tokenizer.tokenize(text)
? ? ? ?token_ids = self.tokenizer.encode(text)
? ? ? ?
? ? ? ?# Token analysis
? ? ? ?analysis = {
? ? ? ? ? ?'tokens': tokens,
? ? ? ? ? ?'token_ids': token_ids,
? ? ? ? ? ?'token_count': len(tokens),
? ? ? ? ? ?'unique_tokens': len(set(tokens)),
? ? ? ? ? ?'token_frequency': self._get_token_frequency(tokens)
? ? ? }
? ? ? ?
? ? ? ?return analysis
? ?
? ?def _get_token_frequency(self, tokens):
? ? ? ?"""Calculate token frequencies"""
? ? ? ?from collections import Counter
? ? ? ?return Counter(tokens)
# Usage example
analyzer = TokenAnalyzer(tokenizer)
analysis = analyzer.analyze_text("Working with LLMs in Python is exciting!")
4. Prompt Engineering and Optimization
Advanced Prompt Templates
class PromptTemplate:
? ?"""Templates for various use cases"""
? ?
? ?@staticmethod
? ?def create_qa_prompt(context, question):
? ? ? ?return f"""Context: {context}\n\nQuestion: {question}\n\nAnswer:"""
? ?
? ?@staticmethod
? ?def create_summary_prompt(text, max_words=None):
? ? ? ?word_limit = f"using maximum {max_words} words" if max_words else ""
? ? ? ?return f"""Summarize the following text {word_limit}:\n\n{text}\n\nSummary:"""
? ?
? ?@staticmethod
? ?def create_analysis_prompt(text):
? ? ? ?return f"""Analyze the following text and provide key points:\n\n{text}\n\nAnalysis:"""
? ?
? ?@staticmethod
? ?def create_structured_output(prompt, output_format):
? ? ? ?return f"""{prompt}\n\nProvide the answer in the following format:\n{output_format}"""
# Usage examples
prompt = PromptTemplate.create_qa_prompt(
? ?context="Python was developed by Guido van Rossum in 1991.",
? ?question="When and by whom was Python developed?"
)
Optimization Parameters
def generate_optimized(generator, prompt, **kwargs):
? ?"""
? Generate text with optimized parameters
? """
? ?default_params = {
? ? ? ?'max_length': 100,
? ? ? ?'num_return_sequences': 1,
? ? ? ?'temperature': 0.7,
? ? ? ?'top_p': 0.9,
? ? ? ?'do_sample': True,
? ? ? ?'no_repeat_ngram_size': 2,
? ? ? ?'early_stopping': True
? }
? ?
? ?# Merge parameters
? ?params = {**default_params, **kwargs}
? ?
? ?# Generate text
? ?responses = generator(prompt, **params)
? ?
? ?return responses
5. Practical Applications
Sentiment Analysis
def analyze_sentiment(texts, batch_size=32):
? ?"""
? Perform batch sentiment analysis
? Args:
? ? ? texts (list): List of texts to analyze
? ? ? batch_size (int): Size of processing batches
? Returns:
? ? ? list: Analysis results
? """
? ?classifier = pipeline('sentiment-analysis')
? ?
? ?results = []
? ?for i in range(0, len(texts), batch_size):
? ? ? ?batch = texts[i:i + batch_size]
? ? ? ?batch_results = classifier(batch)
? ? ? ?results.extend(batch_results)
? ?
? ?return results
Text Summarization
def summarize_text(text, max_length=130, min_length=30):
? ?"""
? Text summarization function
? Args:
? ? ? text (str): Text to summarize
? ? ? max_length (int): Maximum summary length
? ? ? min_length (int): Minimum summary length
? Returns:
? ? ? str: Summarized text
? """
? ?summarizer = pipeline('summarization')
? ?
? ?summary = summarizer(text,
? ? ? ? ? ? ? ? ? ? ? ?max_length=max_length,
? ? ? ? ? ? ? ? ? ? ? ?min_length=min_length,
? ? ? ? ? ? ? ? ? ? ? ?do_sample=False)
? ?
? ?return summary[0]['summary_text']
6. Deployment and Best Practices
领英推荐
FastAPI Implementation
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import uvicorn
app = FastAPI()
class PromptRequest(BaseModel):
text: str
max_length: int = 100
temperature: float = 0.7
@app.post("/generate")
async def generate_text(request: PromptRequest):
try:
response = generate_optimized(
generator,
request.text,
max_length=request.max_length,
temperature=request.temperature
)
return {"response": response}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
Docker Configuration
FROM python:3.9-slim
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY . .
# Cache models
RUN python -c "from transformers import AutoTokenizer, AutoModelForCausalLM; AutoTokenizer.from_pretrained('mistralai/Mistral-7B-Instruct-v0.2'); AutoModelForCausalLM.from_pretrained('mistralai/Mistral-7B-Instruct-v0.2')"
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
Memory Optimization
def load_model_efficiently():
"""Memory-efficient model loading"""
config = AutoConfig.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
config.gradient_checkpointing = True
model = AutoModelForCausalLM.from_pretrained(
"mistralai/Mistral-7B-Instruct-v0.2",
config=config,
device_map="auto",
torch_dtype=torch.float16,
low_cpu_mem_usage=True
)
return model
7. Advanced Topics
Model Fine-tuning
Examples and best practices for fine-tuning coming soon.
Multi-Model Ensemble
Combining different models and weighting results.
Custom Tokenizer Training
Training domain-specific tokenizers.
?? Best Practices Summary
Model Optimization
- Optimize GPU usage
- Use batch processing
- Manage memory efficiently
Security
- Implement input validation
- Use rate limiting
- Guard against prompt injection
Performance
- Implement caching mechanisms
- Use asynchronous processing
- Apply load balancing
Quality Control
- Regular output validation
- Metrics tracking
- Implement A/B testing
?? Resources