CactusCactus

RAG & Embedding

Embeddings, vector search, and retrieval-augmented generation

Embeddings

Generate text, image, and audio embeddings on-device.

import { CactusLM, CactusSTT } from 'cactus-react-native';

// Text embedding
const textResult = await cactusLM.embed({ text: 'Hello, World!' });
console.log('Embedding vector:', textResult.embedding);

// Image embedding (requires vision-capable model)
const imageResult = await cactusLM.imageEmbed({ imagePath: 'path/to/image.jpg' });
console.log('Image embedding:', imageResult.embedding);

// Audio embedding
const audioResult = await cactusSTT.audioEmbed({ audioPath: 'path/to/audio.wav' });
console.log('Audio embedding:', audioResult.embedding);

Using the Hook:

const cactusLM = useCactusLM();

const handleEmbed = async () => {
  const result = await cactusLM.embed({ text: 'Hello, World!' });
  console.log('Embedding:', result.embedding);
};
// Text embeddings
final embedding = model.embed('Hello, world!');

// Image embeddings
final imageEmbedding = model.imageEmbed('/path/to/image.jpg');

// Audio embeddings
final audioEmbedding = model.audioEmbed('/path/to/audio.wav');
// Text embeddings
val embedding = model.embed("Hello, world!")

// Image embeddings
val imageEmbedding = model.imageEmbed("/path/to/image.jpg")

// Audio embeddings
val audioEmbedding = model.audioEmbed("/path/to/audio.wav")

Vector Index

Build and query an on-device vector index for similarity search.

import { CactusIndex } from 'cactus-react-native';

const cactusIndex = new CactusIndex('my-index', 1024);
await cactusIndex.init();

// Add documents with embeddings
await cactusIndex.add({
  ids: [1, 2, 3],
  documents: ['First doc', 'Second doc', 'Third doc'],
  embeddings: [[0.1, 0.2, ...], [0.3, 0.4, ...], [0.5, 0.6, ...]],
  metadatas: ['meta1', 'meta2', 'meta3']
});

// Query for similar documents
const result = await cactusIndex.query({
  embeddings: [[0.1, 0.2, ...]],
  options: { topK: 5 }
});

console.log('Similar docs:', result.ids);

Using the Hook:

const cactusIndex = useCactusIndex({
  name: 'my-index',
  embeddingDim: 1024
});

useEffect(() => {
  cactusIndex.init();
}, []);
final index = CactusIndex.create('/path/to/index', embeddingDim: 384);

index.add(
  ids: [1, 2],
  documents: ['Document 1', 'Document 2'],
  embeddings: [
    model.embed('Document 1'),
    model.embed('Document 2'),
  ],
);

final results = index.query(model.embed('search query'), topK: 5);
for (final r in results) {
  print('ID: ${r.id}, Score: ${r.score}');
}

index.dispose();
CactusIndex.create("/path/to/index", embeddingDim = 384).use { index ->
    val embeddings = arrayOf(model.embed("doc1"), model.embed("doc2"))

    index.add(
        ids = intArrayOf(1, 2),
        documents = arrayOf("Document 1", "Document 2"),
        embeddings = embeddings
    )

    val results = index.query(model.embed("search query"), topK = 5)
    results.forEach { println("ID: ${it.id}, Score: ${it.score}") }
}

Auto-RAG

Pass a corpus directory at model initialization for automatic retrieval-augmented generation.

final model = Cactus.create(
  '/path/to/model.gguf',
  corpusDir: '/path/to/documents',
);

final result = model.complete('What does the documentation say about X?');
val model = Cactus.create(
    modelPath = "/path/to/model",
    corpusDir = "/path/to/documents"
)

val result = model.complete("What does the documentation say about X?")
cactus_model_t model = cactus_init(
    "path/to/weight/folder",
    "path/to/rag/documents", // auto-RAG corpus directory
);

// Queries automatically retrieve relevant context from the corpus
char response[4096];
cactus_complete(model, messages, response, sizeof(response),
    nullptr, nullptr, nullptr, nullptr);

Tokenization

final tokens = model.tokenize('Hello, world!');
final scores = model.scoreWindow(tokens, 0, tokens.length, 512);
val tokens = model.tokenize("Hello, world!")
val scores = model.scoreWindow(tokens, start = 0, end = tokens.size, context = 512)

Example App

Check out the example app in the React Native repository for complete RAG implementations.