RAG & Embedding

Embeddings

Generate text, image, and audio embeddings on-device.

// Text embeddings
final embedding = model.embed('Hello, world!');

// Image embeddings
final imageEmbedding = model.imageEmbed('/path/to/image.jpg');

// Audio embeddings
final audioEmbedding = model.audioEmbed('/path/to/audio.wav');

// Text embeddings
val embedding = model.embed("Hello, world!")

// Image embeddings
val imageEmbedding = model.imageEmbed("/path/to/image.jpg")

// Audio embeddings
val audioEmbedding = model.audioEmbed("/path/to/audio.wav")

Auto-RAG

Pass a corpus directory at model initialization for automatic retrieval-augmented generation.

final model = Cactus.create(
  '/path/to/model.gguf',
  corpusDir: '/path/to/documents',
);

final result = model.complete('What does the documentation say about X?');

val model = Cactus.create(
    modelPath = "/path/to/model",
    corpusDir = "/path/to/documents"
)

val result = model.complete("What does the documentation say about X?")

cactus_model_t model = cactus_init(
    "path/to/weight/folder",
    "path/to/rag/documents", // auto-RAG corpus directory
);

char response[4096];
cactus_complete(model, messages, response, sizeof(response),
    nullptr, nullptr, nullptr, nullptr);

Vector Index

Build and query an on-device vector index for similarity search.

final index = CactusIndex.create('/path/to/index', embeddingDim: 384);

index.add(
  ids: [1, 2],
  documents: ['Document 1', 'Document 2'],
  embeddings: [
    model.embed('Document 1'),
    model.embed('Document 2'),
  ],
);

final results = index.query(model.embed('search query'), topK: 5);
for (final r in results) {
  print('ID: ${r.id}, Score: ${r.score}');
}

index.dispose();

CactusIndex.create("/path/to/index", embeddingDim = 384).use { index ->
    val embeddings = arrayOf(model.embed("doc1"), model.embed("doc2"))

    index.add(
        ids = intArrayOf(1, 2),
        documents = arrayOf("Document 1", "Document 2"),
        embeddings = embeddings
    )

    val results = index.query(model.embed("search query"), topK = 5)
    results.forEach { println("ID: ${it.id}, Score: ${it.score}") }
}

Tokenization

final tokens = model.tokenize('Hello, world!');
final scores = model.scoreWindow(tokens, 0, tokens.length, 512);

val tokens = model.tokenize("Hello, world!")
val scores = model.scoreWindow(tokens, start = 0, end = tokens.size, context = 512)

RAG & Embedding

Embeddings

Auto-RAG

Vector Index

Tokenization

On this page