CactusCactus

API Reference

Complete API reference for all Cactus SDKs

Complete class and type definitions for each SDK.

CactusLM

class CactusLM {
  constructor(options?: { model?: string; options?: { quantization?: 'int4' | 'int8'; pro?: boolean } });

  download(): Promise<void>;
  init(): Promise<void>;
  destroy(): void;

  complete(params: {
    messages: Array<{ role: string; content: string; images?: string[] }>;
    tools?: Tool[];
    onToken?: (token: string) => void;
  }): Promise<CompletionResult>;

  embed(params: CactusLMEmbedParams): Promise<CactusLMEmbedResult>;
  imageEmbed(params: { imagePath: string }): Promise<CactusLMImageEmbedResult>;
}

useCactusLM Hook

function useCactusLM(options?: { model?: string }): {
  isDownloaded: boolean;
  isDownloading: boolean;
  downloadProgress: number;
  isGenerating: boolean;
  completion: string;
  download(): Promise<void>;
  complete(params: { messages: Message[] }): Promise<void>;
};

CactusSTT

class CactusSTT {
  constructor(options: { model: string });

  init(): Promise<void>;
  destroy(): void;

  transcribe(params: {
    audio: string | number[];
    onToken?: (token: string) => void;
  }): Promise<TranscriptionResult>;

  streamTranscribeStart(options?: {
    confirmationThreshold?: number;
    minChunkSize?: number;
  }): Promise<void>;

  streamTranscribeProcess(params: {
    audio: number[];
  }): Promise<StreamResult>;

  streamTranscribeStop(): Promise<TranscriptionResult>;

  audioEmbed(params: { audioPath: string }): Promise<CactusSTTAudioEmbedResult>;
}

useCactusSTT Hook

function useCactusSTT(options: { model: string }): {
  transcription: string;
  isTranscribing: boolean;
  transcribe(params: { audio: string | number[] }): Promise<void>;
};

CactusVAD

class CactusVAD {
  constructor(options: { model: string });

  vad(params: {
    audio: string | number[];
    options?: {
      threshold?: number;
      minSpeechDurationMs?: number;
    };
  }): Promise<VADResult>;
}

useCactusVAD Hook

function useCactusVAD(options: { model: string }): {
  vad(params: { audio: string | number[] }): Promise<void>;
};

CactusIndex

class CactusIndex {
  constructor(name: string, embeddingDim: number);

  init(): Promise<void>;
  destroy(): void;

  add(params: {
    ids: number[];
    documents: string[];
    embeddings: number[][];
    metadatas?: string[];
  }): Promise<void>;

  query(params: {
    embeddings: number[][];
    options?: { topK?: number };
  }): Promise<QueryResult>;
}

useCactusIndex Hook

function useCactusIndex(options: { name: string; embeddingDim: number }): {
  init(): Promise<void>;
};

Types

interface CompletionResult {
  response: string;
  cloudHandoff: boolean;
  functionCalls?: Array<{ name: string; arguments: Record<string, any> }>;
}

interface TranscriptionResult {
  text: string;
  segments?: Array<{ start: number; end: number; text: string }>;
}

interface StreamResult {
  confirmed: string;
  pending: string;
  cloudResult?: string;
}

interface VADResult {
  segments: Array<{ start: number; end: number }>;
}

interface QueryResult {
  ids: number[];
  scores: number[];
}

interface CactusLMEmbedParams {
  text: string;
  normalize?: boolean;
}

interface CactusLMEmbedResult {
  embedding: number[];
}

interface CactusLMImageEmbedResult {
  embedding: number[];
}

interface CactusSTTAudioEmbedResult {
  embedding: number[];
}

interface Tool {
  name: string;
  description: string;
  parameters: {
    type: 'object';
    properties: Record<string, { type: string; description?: string }>;
    required?: string[];
  };
}

Cactus

class Cactus {
  static Cactus create(String modelPath, {String? corpusDir});

  CompletionResult complete(
    String prompt,
    {CompletionOptions options, void Function(String, int)? onToken}
  );

  CompletionResult completeMessages(
    List<Message> messages,
    {CompletionOptions options, List<Map<String, dynamic>>? tools, void Function(String, int)? onToken}
  );

  TranscriptionResult transcribe(String audioPath, {String? prompt, TranscriptionOptions options});
  TranscriptionResult transcribePcm(Uint8List pcmData, {String? prompt, TranscriptionOptions options});

  List<double> embed(String text, {bool normalize = true});
  List<double> imageEmbed(String imagePath);
  List<double> audioEmbed(String audioPath);
  String ragQuery(String query, {int topK = 5});

  List<int> tokenize(String text);
  String scoreWindow(List<int> tokens, int start, int end, int context);
  StreamTranscriber createStreamTranscriber();

  void reset();
  void stop();
  void dispose();

  static String getLastError();
}

Message

class Message {
  static Message system(String content);
  static Message user(String content);
  static Message assistant(String content);
}

CompletionOptions

class CompletionOptions {
  final double temperature;
  final double topP;
  final int topK;
  final int maxTokens;
  final List<String> stopSequences;
  final double confidenceThreshold;

  static const defaultOptions;
}

CompletionResult

class CompletionResult {
  final String text;
  final List<Map<String, dynamic>>? functionCalls;
  final int promptTokens;
  final int completionTokens;
  final double timeToFirstToken;
  final double totalTime;
  final double prefillTokensPerSecond;
  final double decodeTokensPerSecond;
  final double confidence;
  final bool needsCloudHandoff;
}

TranscriptionResult

class TranscriptionResult {
  final String text;
  final List<Map<String, dynamic>>? segments;
  final double totalTime;
}

StreamTranscriber

class StreamTranscriber {
  void insert(Uint8List pcmData);
  TranscriptionResult process({String? language});
  TranscriptionResult finalize();
  void dispose();
}

CactusIndex

class CactusIndex {
  static CactusIndex create(String indexDir, {required int embeddingDim});

  void add({
    required List<int> ids,
    required List<String> documents,
    required List<List<double>> embeddings,
    List<String>? metadatas
  });

  void delete(List<int> ids);
  List<IndexResult> query(List<double> embedding, {int topK = 5});
  void compact();
  void dispose();
}

class IndexResult {
  final int id;
  final double score;
}

Cactus

object Cactus {
    fun create(modelPath: String, corpusDir: String? = null): Cactus
}

fun complete(
    prompt: String,
    options: CompletionOptions = CompletionOptions()
): CompletionResult

fun complete(
    messages: List<Message>,
    options: CompletionOptions = CompletionOptions(),
    tools: List<Map<String, Any>>? = null,
    callback: TokenCallback? = null
): CompletionResult

fun transcribe(
    audioPath: String,
    prompt: String? = null,
    language: String? = null,
    translate: Boolean = false
): TranscriptionResult

fun transcribe(
    pcmData: ByteArray,
    prompt: String? = null,
    language: String? = null,
    translate: Boolean = false
): TranscriptionResult

fun embed(text: String, normalize: Boolean = true): FloatArray
fun imageEmbed(imagePath: String): FloatArray
fun audioEmbed(audioPath: String): FloatArray
fun ragQuery(query: String, topK: Int = 5): String
fun tokenize(text: String): IntArray
fun scoreWindow(tokens: IntArray, start: Int, end: Int, context: Int): String
fun createStreamTranscriber(): StreamTranscriber
fun reset()
fun stop()
fun close()

Message

data class Message(val role: String, val content: String) {
    companion object {
        fun system(content: String): Message
        fun user(content: String): Message
        fun assistant(content: String): Message
    }
}

CompletionOptions

data class CompletionOptions(
    val temperature: Float = 0.7f,
    val topP: Float = 0.9f,
    val topK: Int = 40,
    val maxTokens: Int = 512,
    val stopSequences: List<String> = emptyList(),
    val confidenceThreshold: Float = 0f
)

CompletionResult

data class CompletionResult(
    val text: String,
    val functionCalls: List<Map<String, Any>>?,
    val promptTokens: Int,
    val completionTokens: Int,
    val timeToFirstToken: Double,
    val totalTime: Double,
    val prefillTokensPerSecond: Double,
    val decodeTokensPerSecond: Double,
    val confidence: Double,
    val needsCloudHandoff: Boolean
)

TranscriptionResult

data class TranscriptionResult(
    val text: String,
    val segments: List<Map<String, Any>>?,
    val totalTime: Double
)

TokenCallback

fun interface TokenCallback {
    fun onToken(token: String, tokenId: Int)
}

StreamTranscriber

class StreamTranscriber : Closeable {
    fun insert(pcmData: ByteArray)
    fun process(language: String? = null): TranscriptionResult
    fun finalize(): TranscriptionResult
    override fun close()
}

CactusIndex

class CactusIndex : Closeable {
    companion object {
        fun create(indexDir: String, embeddingDim: Int): CactusIndex
    }

    fun add(
        ids: IntArray,
        documents: Array<String>,
        embeddings: Array<FloatArray>,
        metadatas: Array<String>? = null
    )

    fun delete(ids: IntArray)
    fun query(embedding: FloatArray, topK: Int = 5): List<IndexResult>
    fun compact()
    override fun close()
}

data class IndexResult(val id: Int, val score: Float)

Core Functions

// Initialize a model
cactus_model_t cactus_init(
    const char* model_path,      // Path to weight folder
    const char* corpus_dir       // Optional: RAG corpus directory (or nullptr)
);

// Run completion
int cactus_complete(
    cactus_model_t model,        // Model handle from cactus_init
    const char* messages,        // Chat messages as JSON array
    char* response,              // Output buffer
    size_t response_size,        // Buffer size
    const char* options,         // Generation options JSON (or nullptr)
    const char* tools,           // Tool definitions JSON (or nullptr)
    void (*callback)(const char* token, int token_id, void* user_data),
    void* user_data              // User data passed to callback
);

Options JSON

{
    "max_tokens": 512,
    "stop_sequences": ["<|im_end|>"],
    "temperature": 0.7,
    "top_p": 0.9,
    "top_k": 40
}

Response JSON

{
    "success": true,
    "error": null,
    "cloud_handoff": false,
    "response": "The capital of France is Paris.",
    "function_calls": [],
    "confidence": 0.8193,
    "time_to_first_token_ms": 45.23,
    "total_time_ms": 163.67,
    "prefill_tps": 1621.89,
    "decode_tps": 168.42,
    "ram_usage_mb": 245.67,
    "prefill_tokens": 28,
    "decode_tokens": 50,
    "total_tokens": 78
}

Graph API

class CactusGraph {
    Tensor input(std::vector<int> shape, Precision precision);
    Tensor matmul(Tensor a, Tensor b, bool transpose_b);
    Tensor transpose(Tensor t);

    void set_input(Tensor t, void* data, Precision precision);
    void execute();
    void* get_output(Tensor t);
    void hard_reset();
};

Precision Enum

enum class Precision {
    FP32,   // Full precision floating point
    FP16,   // Half precision
    INT8,   // 8-bit quantized
    INT4    // 4-bit quantized
};

See the GitHub repository for the complete source code and additional usage examples.