本页汇总了 LiteRT-LM 的核心集成模式。从底层的硬件探测流程到完整的 C++ Demo,提供了全维度的工程参考。
这是系统启动时的标准逻辑,用于确保模型在当前环境下以最优性能运行。
揭示了“任务堆叠”与“异步驱动”的分离逻辑,这是高性能 DLL 调用的核心。
// --- 完整 C++ Demo 实现 ---
#include <iostream>
#include <windows.h>
#include <string>
#include <atomic>
// [此处省略部分 typedef 以节省篇幅,实际应用中请参考 api/ 页面]
typedef struct { const char* model_path; const char* backend; int max_num_tokens; int num_threads; int bEnableBenchmark; int bOptimizeShader; } LiteRtLm_Config;
typedef struct { float temperature; float top_p; int top_k; int max_tokens; int constraint_type; const char* constraint_string; } LiteRtLm_SamplingParams;
typedef struct { const char* text_chunk; const char* full_json_chunk; const char* error_msg; int bIsDone; float tokens_per_sec; } LiteRtLm_Result;
typedef void (*LiteRtLmCallback)(LiteRtLm_Result result, void* user_ptr);
typedef void* (*PN_CreateEngine)(LiteRtLm_Config config);
typedef void (*PN_DestroyEngine)(void* engine_ptr);
typedef void* (*PN_CreateConversation)(void* engine_ptr);
typedef void (*PN_AppendUserMessage)(void* conv_ptr, const char* text);
typedef void (*PN_RunInference)(void* conv_ptr, LiteRtLm_SamplingParams params, LiteRtLmCallback callback, void* user_ptr);
typedef int (*PN_WaitUntilDone)(void* engine_ptr, int timeout_sec);
std::atomic<bool> g_IsDone{false};
void MyCallback(LiteRtLm_Result res, void*) {
if (res.text_chunk) std::cout << res.text_chunk << std::flush;
if (res.bIsDone) g_IsDone = true;
}
int main() {
HMODULE hDll = LoadLibraryA("litert_lm_wrapper.dll");
if (!hDll) return 1;
auto CreateEngine = (PN_CreateEngine)GetProcAddress(hDll, "LiteRtLm_CreateEngine");
auto CreateConversation = (PN_CreateConversation)GetProcAddress(hDll, "LiteRtLm_CreateConversation");
auto RunInference = (PN_RunInference)GetProcAddress(hDll, "LiteRtLm_RunInference");
auto WaitUntilDone = (PN_WaitUntilDone)GetProcAddress(hDll, "LiteRtLm_WaitUntilDone");
LiteRtLm_Config config = {"D:\\gemma-2b.litertlm", "gpu", 2048, 8, 0, 1};
void* engine = CreateEngine(config);
void* conv = CreateConversation(engine);
std::string input;
while (true) {
std::cout << "\nUser >> ";
if (!std::getline(cin, input) || input == "exit") break;
LiteRtLm_AppendUserMessage(conv, input.c_str());
LiteRtLm_SamplingParams params = {0.7f, 0.9f, 40, 512, 0, nullptr};
std::cout << "AI >> ";
g_IsDone = false;
RunInference(conv, params, MyCallback, nullptr);
while (!g_IsDone) { WaitUntilDone(engine, 1); }
}
FreeLibrary(hDll);
return 0;
}
cl.exe /EHsc main.cpp /link User32.lib