API Reference
Complete API reference for all Cactus SDKs
Complete class and type definitions for each SDK.
CactusLM
class CactusLM {
constructor(options?: { model?: string; options?: { quantization?: 'int4' | 'int8'; pro?: boolean } });
download(): Promise<void>;
init(): Promise<void>;
destroy(): void;
complete(params: {
messages: Array<{ role: string; content: string; images?: string[] }>;
tools?: Tool[];
onToken?: (token: string) => void;
}): Promise<CompletionResult>;
embed(params: CactusLMEmbedParams): Promise<CactusLMEmbedResult>;
imageEmbed(params: { imagePath: string }): Promise<CactusLMImageEmbedResult>;
}useCactusLM Hook
function useCactusLM(options?: { model?: string }): {
isDownloaded: boolean;
isDownloading: boolean;
downloadProgress: number;
isGenerating: boolean;
completion: string;
download(): Promise<void>;
complete(params: { messages: Message[] }): Promise<void>;
};CactusSTT
class CactusSTT {
constructor(options: { model: string });
init(): Promise<void>;
destroy(): void;
transcribe(params: {
audio: string | number[];
onToken?: (token: string) => void;
}): Promise<TranscriptionResult>;
streamTranscribeStart(options?: {
confirmationThreshold?: number;
minChunkSize?: number;
}): Promise<void>;
streamTranscribeProcess(params: {
audio: number[];
}): Promise<StreamResult>;
streamTranscribeStop(): Promise<TranscriptionResult>;
audioEmbed(params: { audioPath: string }): Promise<CactusSTTAudioEmbedResult>;
}useCactusSTT Hook
function useCactusSTT(options: { model: string }): {
transcription: string;
isTranscribing: boolean;
transcribe(params: { audio: string | number[] }): Promise<void>;
};CactusVAD
class CactusVAD {
constructor(options: { model: string });
vad(params: {
audio: string | number[];
options?: {
threshold?: number;
minSpeechDurationMs?: number;
};
}): Promise<VADResult>;
}useCactusVAD Hook
function useCactusVAD(options: { model: string }): {
vad(params: { audio: string | number[] }): Promise<void>;
};CactusIndex
class CactusIndex {
constructor(name: string, embeddingDim: number);
init(): Promise<void>;
destroy(): void;
add(params: {
ids: number[];
documents: string[];
embeddings: number[][];
metadatas?: string[];
}): Promise<void>;
query(params: {
embeddings: number[][];
options?: { topK?: number };
}): Promise<QueryResult>;
}useCactusIndex Hook
function useCactusIndex(options: { name: string; embeddingDim: number }): {
init(): Promise<void>;
};Types
interface CompletionResult {
response: string;
cloudHandoff: boolean;
functionCalls?: Array<{ name: string; arguments: Record<string, any> }>;
}
interface TranscriptionResult {
text: string;
segments?: Array<{ start: number; end: number; text: string }>;
}
interface StreamResult {
confirmed: string;
pending: string;
cloudResult?: string;
}
interface VADResult {
segments: Array<{ start: number; end: number }>;
}
interface QueryResult {
ids: number[];
scores: number[];
}
interface CactusLMEmbedParams {
text: string;
normalize?: boolean;
}
interface CactusLMEmbedResult {
embedding: number[];
}
interface CactusLMImageEmbedResult {
embedding: number[];
}
interface CactusSTTAudioEmbedResult {
embedding: number[];
}
interface Tool {
name: string;
description: string;
parameters: {
type: 'object';
properties: Record<string, { type: string; description?: string }>;
required?: string[];
};
}Cactus
class Cactus {
static Cactus create(String modelPath, {String? corpusDir});
CompletionResult complete(
String prompt,
{CompletionOptions options, void Function(String, int)? onToken}
);
CompletionResult completeMessages(
List<Message> messages,
{CompletionOptions options, List<Map<String, dynamic>>? tools, void Function(String, int)? onToken}
);
TranscriptionResult transcribe(String audioPath, {String? prompt, TranscriptionOptions options});
TranscriptionResult transcribePcm(Uint8List pcmData, {String? prompt, TranscriptionOptions options});
List<double> embed(String text, {bool normalize = true});
List<double> imageEmbed(String imagePath);
List<double> audioEmbed(String audioPath);
String ragQuery(String query, {int topK = 5});
List<int> tokenize(String text);
String scoreWindow(List<int> tokens, int start, int end, int context);
StreamTranscriber createStreamTranscriber();
void reset();
void stop();
void dispose();
static String getLastError();
}Message
class Message {
static Message system(String content);
static Message user(String content);
static Message assistant(String content);
}CompletionOptions
class CompletionOptions {
final double temperature;
final double topP;
final int topK;
final int maxTokens;
final List<String> stopSequences;
final double confidenceThreshold;
static const defaultOptions;
}CompletionResult
class CompletionResult {
final String text;
final List<Map<String, dynamic>>? functionCalls;
final int promptTokens;
final int completionTokens;
final double timeToFirstToken;
final double totalTime;
final double prefillTokensPerSecond;
final double decodeTokensPerSecond;
final double confidence;
final bool needsCloudHandoff;
}TranscriptionResult
class TranscriptionResult {
final String text;
final List<Map<String, dynamic>>? segments;
final double totalTime;
}StreamTranscriber
class StreamTranscriber {
void insert(Uint8List pcmData);
TranscriptionResult process({String? language});
TranscriptionResult finalize();
void dispose();
}CactusIndex
class CactusIndex {
static CactusIndex create(String indexDir, {required int embeddingDim});
void add({
required List<int> ids,
required List<String> documents,
required List<List<double>> embeddings,
List<String>? metadatas
});
void delete(List<int> ids);
List<IndexResult> query(List<double> embedding, {int topK = 5});
void compact();
void dispose();
}
class IndexResult {
final int id;
final double score;
}Cactus
object Cactus {
fun create(modelPath: String, corpusDir: String? = null): Cactus
}
fun complete(
prompt: String,
options: CompletionOptions = CompletionOptions()
): CompletionResult
fun complete(
messages: List<Message>,
options: CompletionOptions = CompletionOptions(),
tools: List<Map<String, Any>>? = null,
callback: TokenCallback? = null
): CompletionResult
fun transcribe(
audioPath: String,
prompt: String? = null,
language: String? = null,
translate: Boolean = false
): TranscriptionResult
fun transcribe(
pcmData: ByteArray,
prompt: String? = null,
language: String? = null,
translate: Boolean = false
): TranscriptionResult
fun embed(text: String, normalize: Boolean = true): FloatArray
fun imageEmbed(imagePath: String): FloatArray
fun audioEmbed(audioPath: String): FloatArray
fun ragQuery(query: String, topK: Int = 5): String
fun tokenize(text: String): IntArray
fun scoreWindow(tokens: IntArray, start: Int, end: Int, context: Int): String
fun createStreamTranscriber(): StreamTranscriber
fun reset()
fun stop()
fun close()Message
data class Message(val role: String, val content: String) {
companion object {
fun system(content: String): Message
fun user(content: String): Message
fun assistant(content: String): Message
}
}CompletionOptions
data class CompletionOptions(
val temperature: Float = 0.7f,
val topP: Float = 0.9f,
val topK: Int = 40,
val maxTokens: Int = 512,
val stopSequences: List<String> = emptyList(),
val confidenceThreshold: Float = 0f
)CompletionResult
data class CompletionResult(
val text: String,
val functionCalls: List<Map<String, Any>>?,
val promptTokens: Int,
val completionTokens: Int,
val timeToFirstToken: Double,
val totalTime: Double,
val prefillTokensPerSecond: Double,
val decodeTokensPerSecond: Double,
val confidence: Double,
val needsCloudHandoff: Boolean
)TranscriptionResult
data class TranscriptionResult(
val text: String,
val segments: List<Map<String, Any>>?,
val totalTime: Double
)TokenCallback
fun interface TokenCallback {
fun onToken(token: String, tokenId: Int)
}StreamTranscriber
class StreamTranscriber : Closeable {
fun insert(pcmData: ByteArray)
fun process(language: String? = null): TranscriptionResult
fun finalize(): TranscriptionResult
override fun close()
}CactusIndex
class CactusIndex : Closeable {
companion object {
fun create(indexDir: String, embeddingDim: Int): CactusIndex
}
fun add(
ids: IntArray,
documents: Array<String>,
embeddings: Array<FloatArray>,
metadatas: Array<String>? = null
)
fun delete(ids: IntArray)
fun query(embedding: FloatArray, topK: Int = 5): List<IndexResult>
fun compact()
override fun close()
}
data class IndexResult(val id: Int, val score: Float)Core Functions
// Initialize a model
cactus_model_t cactus_init(
const char* model_path, // Path to weight folder
const char* corpus_dir // Optional: RAG corpus directory (or nullptr)
);
// Run completion
int cactus_complete(
cactus_model_t model, // Model handle from cactus_init
const char* messages, // Chat messages as JSON array
char* response, // Output buffer
size_t response_size, // Buffer size
const char* options, // Generation options JSON (or nullptr)
const char* tools, // Tool definitions JSON (or nullptr)
void (*callback)(const char* token, int token_id, void* user_data),
void* user_data // User data passed to callback
);Options JSON
{
"max_tokens": 512,
"stop_sequences": ["<|im_end|>"],
"temperature": 0.7,
"top_p": 0.9,
"top_k": 40
}Response JSON
{
"success": true,
"error": null,
"cloud_handoff": false,
"response": "The capital of France is Paris.",
"function_calls": [],
"confidence": 0.8193,
"time_to_first_token_ms": 45.23,
"total_time_ms": 163.67,
"prefill_tps": 1621.89,
"decode_tps": 168.42,
"ram_usage_mb": 245.67,
"prefill_tokens": 28,
"decode_tokens": 50,
"total_tokens": 78
}Graph API
class CactusGraph {
Tensor input(std::vector<int> shape, Precision precision);
Tensor matmul(Tensor a, Tensor b, bool transpose_b);
Tensor transpose(Tensor t);
void set_input(Tensor t, void* data, Precision precision);
void execute();
void* get_output(Tensor t);
void hard_reset();
};Precision Enum
enum class Precision {
FP32, // Full precision floating point
FP16, // Half precision
INT8, // 8-bit quantized
INT4 // 4-bit quantized
};See the GitHub repository for the complete source code and additional usage examples.