API Reference

Complete class and type definitions for each SDK.

Cactus

class Cactus {
  static Cactus create(String modelPath, {String? corpusDir});

  CompletionResult complete(
    String prompt,
    {CompletionOptions options, void Function(String, int)? onToken}
  );

  CompletionResult completeMessages(
    List<Message> messages,
    {CompletionOptions options, List<Map<String, dynamic>>? tools, void Function(String, int)? onToken}
  );

  TranscriptionResult transcribe(String audioPath, {String? prompt, TranscriptionOptions options});
  TranscriptionResult transcribePcm(Uint8List pcmData, {String? prompt, TranscriptionOptions options});

  List<double> embed(String text, {bool normalize = true});
  List<double> imageEmbed(String imagePath);
  List<double> audioEmbed(String audioPath);
  String ragQuery(String query, {int topK = 5});

  List<int> tokenize(String text);
  String scoreWindow(List<int> tokens, int start, int end, int context);
  StreamTranscriber createStreamTranscriber();

  void reset();
  void stop();
  void dispose();

  static String getLastError();
}

Message

class Message {
  static Message system(String content);
  static Message user(String content);
  static Message assistant(String content);
}

CompletionOptions

class CompletionOptions {
  final double temperature;
  final double topP;
  final int topK;
  final int maxTokens;
  final List<String> stopSequences;
  final double confidenceThreshold;

  static const defaultOptions;
}

CompletionResult

class CompletionResult {
  final String text;
  final List<Map<String, dynamic>>? functionCalls;
  final int promptTokens;
  final int completionTokens;
  final double timeToFirstToken;
  final double totalTime;
  final double prefillTokensPerSecond;
  final double decodeTokensPerSecond;
  final double confidence;
  final bool needsCloudHandoff;
}

TranscriptionResult

class TranscriptionResult {
  final String text;
  final List<Map<String, dynamic>>? segments;
  final double totalTime;
}

StreamTranscriber

class StreamTranscriber {
  void insert(Uint8List pcmData);
  TranscriptionResult process({String? language});
  TranscriptionResult finalize();
  void dispose();
}

CactusIndex

class CactusIndex {
  static CactusIndex create(String indexDir, {required int embeddingDim});

  void add({
    required List<int> ids,
    required List<String> documents,
    required List<List<double>> embeddings,
    List<String>? metadatas
  });

  void delete(List<int> ids);
  List<IndexResult> query(List<double> embedding, {int topK = 5});
  void compact();
  void dispose();
}

class IndexResult {
  final int id;
  final double score;
}

Cactus

object Cactus {
    fun create(modelPath: String, corpusDir: String? = null): Cactus
}

fun complete(
    prompt: String,
    options: CompletionOptions = CompletionOptions()
): CompletionResult

fun complete(
    messages: List<Message>,
    options: CompletionOptions = CompletionOptions(),
    tools: List<Map<String, Any>>? = null,
    callback: TokenCallback? = null
): CompletionResult

fun transcribe(
    audioPath: String,
    prompt: String? = null,
    language: String? = null,
    translate: Boolean = false
): TranscriptionResult

fun transcribe(
    pcmData: ByteArray,
    prompt: String? = null,
    language: String? = null,
    translate: Boolean = false
): TranscriptionResult

fun embed(text: String, normalize: Boolean = true): FloatArray
fun imageEmbed(imagePath: String): FloatArray
fun audioEmbed(audioPath: String): FloatArray
fun ragQuery(query: String, topK: Int = 5): String
fun tokenize(text: String): IntArray
fun scoreWindow(tokens: IntArray, start: Int, end: Int, context: Int): String
fun createStreamTranscriber(): StreamTranscriber
fun reset()
fun stop()
fun close()

Message

data class Message(val role: String, val content: String) {
    companion object {
        fun system(content: String): Message
        fun user(content: String): Message
        fun assistant(content: String): Message
    }
}

CompletionOptions

data class CompletionOptions(
    val temperature: Float = 0.7f,
    val topP: Float = 0.9f,
    val topK: Int = 40,
    val maxTokens: Int = 512,
    val stopSequences: List<String> = emptyList(),
    val confidenceThreshold: Float = 0f
)

CompletionResult

data class CompletionResult(
    val text: String,
    val functionCalls: List<Map<String, Any>>?,
    val promptTokens: Int,
    val completionTokens: Int,
    val timeToFirstToken: Double,
    val totalTime: Double,
    val prefillTokensPerSecond: Double,
    val decodeTokensPerSecond: Double,
    val confidence: Double,
    val needsCloudHandoff: Boolean
)

TranscriptionResult

data class TranscriptionResult(
    val text: String,
    val segments: List<Map<String, Any>>?,
    val totalTime: Double
)

TokenCallback

fun interface TokenCallback {
    fun onToken(token: String, tokenId: Int)
}

StreamTranscriber

class StreamTranscriber : Closeable {
    fun insert(pcmData: ByteArray)
    fun process(language: String? = null): TranscriptionResult
    fun finalize(): TranscriptionResult
    override fun close()
}

CactusIndex

class CactusIndex : Closeable {
    companion object {
        fun create(indexDir: String, embeddingDim: Int): CactusIndex
    }

    fun add(
        ids: IntArray,
        documents: Array<String>,
        embeddings: Array<FloatArray>,
        metadatas: Array<String>? = null
    )

    fun delete(ids: IntArray)
    fun query(embedding: FloatArray, topK: Int = 5): List<IndexResult>
    fun compact()
    override fun close()
}

data class IndexResult(val id: Int, val score: Float)

Core Functions

// Initialize a model
cactus_model_t cactus_init(
    const char* model_path,      // Path to weight folder
    const char* corpus_dir       // Optional: RAG corpus directory (or nullptr)
);

// Run completion
int cactus_complete(
    cactus_model_t model,        // Model handle from cactus_init
    const char* messages,        // Chat messages as JSON array
    char* response,              // Output buffer
    size_t response_size,        // Buffer size
    const char* options,         // Generation options JSON (or nullptr)
    const char* tools,           // Tool definitions JSON (or nullptr)
    void (*callback)(const char* token, int token_id, void* user_data),
    void* user_data              // User data passed to callback
);

Options JSON

{
    "max_tokens": 512,
    "stop_sequences": ["<|im_end|>"],
    "temperature": 0.7,
    "top_p": 0.9,
    "top_k": 40
}

Response JSON

{
    "success": true,
    "error": null,
    "cloud_handoff": false,
    "response": "The capital of France is Paris.",
    "function_calls": [],
    "confidence": 0.8193,
    "time_to_first_token_ms": 45.23,
    "total_time_ms": 163.67,
    "prefill_tps": 1621.89,
    "decode_tps": 168.42,
    "ram_usage_mb": 245.67,
    "prefill_tokens": 28,
    "decode_tokens": 50,
    "total_tokens": 78
}

Graph API

class CactusGraph {
    Tensor input(std::vector<int> shape, Precision precision);
    Tensor matmul(Tensor a, Tensor b, bool transpose_b);
    Tensor transpose(Tensor t);

    void set_input(Tensor t, void* data, Precision precision);
    void execute();
    void* get_output(Tensor t);
    void hard_reset();
};

Precision Enum

enum class Precision {
    FP32,   // Full precision floating point
    FP16,   // Half precision
    INT8,   // 8-bit quantized
    INT4    // 4-bit quantized
};

See the GitHub repository for the complete source code and additional usage examples.

On this page