| name | neuron-rag-specialist |
| description | Implement RAG (Retrieval-Augmented Generation) with Neuron AI including vector stores, embeddings providers, document loaders, and retrieval strategies. Use this skill whenever the user mentions RAG, retrieval, vector search, document retrieval, semantic search, knowledge bases, chat with documents, or wants to build AI systems that can query and understand external documents. Also trigger for tasks involving vector databases, embeddings, document chunking, or retrieval strategies. |
Neuron AI RAG Specialist
This skill helps you implement Retrieval-Augmented Generation (RAG) in Neuron AI. RAG extends the Agent class with document retrieval capabilities.
Core RAG Architecture
RAG systems in Neuron AI consist of three main components:
- Vector Store - Stores document embeddings for semantic search
- Embeddings Provider - Converts text to vector embeddings
- Retrieval Strategy - Determines how to search and rank documents
use NeuronAI\RAG\RAG;
use NeuronAI\Providers\AIProviderInterface;
use NeuronAI\Providers\Anthropic\Anthropic;
use NeuronAI\RAG\Embeddings\EmbeddingsProviderInterface;
use NeuronAI\RAG\Embeddings\OpenAIEmbeddingProvider;
use NeuronAI\RAG\VectorStore\VectorStoreInterface;
use NeuronAI\RAG\VectorStore\PineconeVectorStore;
class MyChatBot extends RAG
{
protected function provider(): AIProviderInterface
{
return new Anthropic(
key: $_ENV['ANTHROPIC_API_KEY'],
model: 'claude-3-5-sonnet-20241022',
);
}
protected function embeddings(): EmbeddingsProviderInterface
{
return new OpenAIEmbeddingProvider(
key: $_ENV['OPENAI_API_KEY'],
model: 'text-embedding-3-small',
);
}
protected function vectorStore(): VectorStoreInterface
{
return new PineconeVectorStore(
key: $_ENV['PINECONE_API_KEY'],
indexUrl: $_ENV['PINECONE_INDEX_URL']
);
}
}
Vector Stores
Pinecone
use NeuronAI\RAG\VectorStore\PineconeVectorStore;
new PineconeVectorStore(
key: $_ENV['PINECONE_API_KEY'],
indexUrl: $_ENV['PINECONE_INDEX_URL'],
environment: 'us-east-1-aws'
);
Chroma
use NeuronAI\RAG\VectorStore\ChromaVectorStore;
new ChromaVectorStore(
host: 'localhost',
port: 8000,
collection: 'my_collection'
);
Qdrant
use NeuronAI\RAG\VectorStore\QdrantVectorStore;
new QdrantVectorStore(
apiKey: $_ENV['QDRANT_API_KEY'],
url: $_ENV['QDRANT_URL'],
collection: 'my_collection'
);
Elasticsearch
use NeuronAI\RAG\VectorStore\ElasticsearchVectorStore;
new ElasticsearchVectorStore(
hosts: ['http://localhost:9200'],
index: 'documents'
);
Typesense
use NeuronAI\RAG\VectorStore\TypesenseVectorStore;
new TypesenseVectorStore(
apiKey: $_ENV['TYPESENSE_API_KEY'],
nodes: [['host' => 'localhost', 'port' => 8108]],
collection: 'documents'
);
Other Vector Stores
MemoryVectorStore - In-memory for testing
MilvusVectorStore - Milvus database
RedisVectorStore - Redis with RediSearch
WeaviateVectorStore - Weaviate database
PgVectorStore - PostgreSQL with pgvector extension
Embeddings Providers
OpenAI
use NeuronAI\RAG\Embeddings\OpenAIEmbeddingProvider;
new OpenAIEmbeddingProvider(
key: $_ENV['OPENAI_API_KEY'],
model: 'text-embedding-3-small'
);
Ollama
use NeuronAI\RAG\Embeddings\OllamaEmbeddingProvider;
new OllamaEmbeddingProvider(
baseUrl: 'http://localhost:11434',
model: 'nomic-embed-text'
);
Gemini
use NeuronAI\RAG\Embeddings\GeminiEmbeddingProvider;
new GeminiEmbeddingProvider(
key: $_ENV['GEMINI_API_KEY'],
model: 'text-embedding-004'
);
Voyage
use NeuronAI\RAG\Embeddings\VoyageEmbeddingProvider;
new VoyageEmbeddingProvider(
key: $_ENV['VOYAGE_API_KEY'],
model: 'voyage-3-lite'
);
Document Loading and Chunking
Text Documents
use NeuronAI\RAG\DocumentLoader\TextLoader;
use NeuronAI\RAG\Chunker\RecursiveCharacterTextSplitter;
$loader = new TextLoader('/path/to/document.txt');
$documents = $loader->load();
$chunker = new RecursiveCharacterTextSplitter(
chunkSize: 1000,
chunkOverlap: 200
);
$chunks = $chunker->chunk($documents);
PDF Documents
use NeuronAI\RAG\DocumentLoader\PDFLoader;
$loader = new PDFLoader('/path/to/document.pdf');
$documents = $loader->load();
HTML Documents
use NeuronAI\RAG\DocumentLoader\HtmlLoader;
$loader = new HtmlLoader('https://example.com/page');
$documents = $loader->load();
Loading Multiple Files
use NeuronAI\RAG\DocumentLoader\DirectoryLoader;
$loader = new DirectoryLoader('/path/to/documents');
$documents = $loader->load();
Ingesting Documents
$rag = MyChatBot::make();
$loader = new DirectoryLoader('./docs');
$documents = $loader->load();
$chunker = new RecursiveCharacterTextSplitter(
chunkSize: 1000,
chunkOverlap: 200
);
$chunks = $chunker->chunk($documents);
$rag->vectorStore()->addDocuments($chunks);
Retrieval Strategies
Basic Similarity Search
use NeuronAI\RAG\Retrieval\SimilaritySearch;
$rag->setRetrieval(new SimilaritySearch(
k: 5 // Return top 5 documents
));
Hybrid Search (Keyword + Semantic)
use NeuronAI\RAG\Retrieval\HybridSearch;
$rag->setRetrieval(new HybridSearch(
k: 5,
alpha: 0.5 // Balance between semantic (1.0) and keyword (0.0)
));
Max Marginal Relevance (MMR)
use NeuronAI\RAG\Retrieval\MMRSearch;
$rag->setRetrieval(new MMRSearch(
k: 5,
fetchK: 20, // Fetch 20, return diverse 5
lambdaMult: 0.5 // Diversity parameter
));
Pre and Post Processors
Pre-Processors (Query Transformation)
use NeuronAI\RAG\Processor\QueryExpansionProcessor;
$rag->addPreProcessor(new QueryExpansionProcessor(
numQueries: 3 // Generate 3 query variations
));
use NeuronAI\RAG\Processor\HydeProcessor;
$rag->addPreProcessor(new HydeProcessor(
model: $rag->provider() // Generate hypothetical document
));
Post-Processors (Result Enhancement)
use NeuronAI\RAG\Processor\RerankProcessor;
use NeuronAI\RAG\Processor\JinaReranker;
$rag->addPostProcessor(new RerankProcessor(
reranker: new JinaReranker(
apiKey: $_ENV['JINA_API_KEY']
),
topK: 5
));
use NeuronAI\RAG\Processor\CompressorProcessor;
$rag->addPostProcessor(new CompressorProcessor(
maxTokens: 2000
));
Using the RAG
Basic Query
$rag = MyChatBot::make();
$response = $rag->chat(
new UserMessage("What are the main features of our product?")
)->getMessage();
echo $response->getContent();
Streaming
use NeuronAI\Chat\Messages\Stream\Chunks\TextChunk;
foreach ($rag->stream(new UserMessage("Explain the architecture"))->events() as $event) {
if ($event instanceof TextChunk) {
echo $event->content;
}
}
Structured Output with RAG
$summary = $rag->structured(
new UserMessage("Summarize the pricing information"),
PricingSummary::class
);
CLI Generation
vendor/bin/neuron make:rag MyKnowledgeBot
Advanced Configuration
Custom Retrieval
use NeuronAI\RAG\Retrieval\RetrievalInterface;
class CustomRetrieval implements RetrievalInterface
{
public function retrieve(string $query, VectorStoreInterface $vectorStore): array
{
return $vectorStore->similaritySearch($query, k: 3);
}
}
$rag->setRetrieval(new CustomRetrieval());
Filtering Results
$rag->chat(
new UserMessage("Find documents about pricing")
)->withMetadataFilter([
'category' => 'pricing',
'year' => 2024
]);
Common Patterns
Company Knowledge Base
class CompanyKnowledgeBot extends RAG
{
protected function embeddings(): EmbeddingsProviderInterface
{
return new OpenAIEmbeddingProvider(
key: $_ENV['OPENAI_API_KEY'],
model: 'text-embedding-3-small'
);
}
protected function vectorStore(): VectorStoreInterface
{
return new PineconeVectorStore(
key: $_ENV['PINECONE_API_KEY'],
indexUrl: $_ENV['PINECONE_INDEX_URL']
);
}
protected function retrieval(): RetrievalInterface
{
return new HybridSearch(k: 5, alpha: 0.7);
}
protected function instructions(): string
{
return (string) new SystemPrompt(
background: [
"You are a helpful assistant that answers questions",
"about our company using the provided context.",
],
constraints: [
"Only use the provided context to answer.",
"If the answer is not in the context, say you don't know.",
]
);
}
}
Document Q&A with Reranking
$rag = MyChatBot::make();
$rag->addPostProcessor(new CohereRerankerPostProcessor(
key: $_ENV['COHERE_API_KEY'],
model: $_ENV['COHERE_MODEL'],
topN: 5
);
$rag->chat(new UserMessage("Your question here"));
Performance Considerations
Chunk Size Selection
- Smaller chunks (500-1000 tokens): More precise retrieval, more documents to process
- Larger chunks (1500-2000 tokens): More context per document, less precise
- Chunk overlap: 10-20% helps maintain context across chunk boundaries
Top-K Selection
- 3-5 documents: Good for focused queries, faster responses
- 10+ documents: Better for comprehensive answers
Testing RAG
use PHPUnit\Framework\TestCase;
class MyChatBotTest extends TestCase
{
public function testRAGRetrieval(): void
{
$rag = MyChatBot::make();
$rag->vectorStore()->addDocument(
new Document('test', 'The product costs $99.')
);
$response = $rag->chat(
new UserMessage("How much does it cost?")
)->getMessage();
$this->assertStringContainsString('99', $response->getContent());
}
}