EN 中文

实战集成导引

本页汇总了 LiteRT-LM 的核心集成模式。从底层的硬件探测流程到完整的 C++ Demo，提供了全维度的工程参考。

1. 硬件探测与初始化流

graph TD Start[App Startup] --> Probe[GetAvailableBackends] Probe --> Detect{检测计算设备} Detect -->|Found GPU| ConfigGPU["Config.backend = 'gpu'"] Detect -->|CPU Only| ConfigCPU["Config.backend = 'cpu'"] ConfigGPU --> Create[CreateEngine] ConfigCPU --> Create

这是系统启动时的标准逻辑，用于确保模型在当前环境下以最优性能运行。

2. 消息堆叠与推理时序

sequenceDiagram participant User as User / App participant DLL as Wrapper DLL User->>DLL: AppendUserMessage (Stacking) User->>DLL: RunInference (Trigger) loop Driving Loop User->>DLL: WaitUntilDone (Crankshaft Drive) DLL-->>User: Streaming Callback (Result) end

揭示了“任务堆叠”与“异步驱动”的分离逻辑，这是高性能 DLL 调用的核心。

3. 完整演示源码 (main.cpp)

WinyunqDebug/main.cpp

// --- 完整 C++ Demo 实现 ---
#include <iostream>
#include <windows.h>
#include <string>
#include <atomic>

// [此处省略部分 typedef 以节省篇幅，实际应用中请参考 api/ 页面]
typedef struct { const char* model_path; const char* backend; int max_num_tokens; int num_threads; int bEnableBenchmark; int bOptimizeShader; } LiteRtLm_Config;
typedef struct { float temperature; float top_p; int top_k; int max_tokens; int constraint_type; const char* constraint_string; } LiteRtLm_SamplingParams;
typedef struct { const char* text_chunk; const char* full_json_chunk; const char* error_msg; int bIsDone; float tokens_per_sec; } LiteRtLm_Result;
typedef void (*LiteRtLmCallback)(LiteRtLm_Result result, void* user_ptr);

typedef void* (*PN_CreateEngine)(LiteRtLm_Config config);
typedef void (*PN_DestroyEngine)(void* engine_ptr);
typedef void* (*PN_CreateConversation)(void* engine_ptr);
typedef void (*PN_AppendUserMessage)(void* conv_ptr, const char* text);
typedef void (*PN_RunInference)(void* conv_ptr, LiteRtLm_SamplingParams params, LiteRtLmCallback callback, void* user_ptr);
typedef int (*PN_WaitUntilDone)(void* engine_ptr, int timeout_sec);

std::atomic<bool> g_IsDone{false};
void MyCallback(LiteRtLm_Result res, void*) {
    if (res.text_chunk) std::cout << res.text_chunk << std::flush;
    if (res.bIsDone) g_IsDone = true;
}

int main() {
    HMODULE hDll = LoadLibraryA("litert_lm_wrapper.dll");
    if (!hDll) return 1;

    auto CreateEngine = (PN_CreateEngine)GetProcAddress(hDll, "LiteRtLm_CreateEngine");
    auto CreateConversation = (PN_CreateConversation)GetProcAddress(hDll, "LiteRtLm_CreateConversation");
    auto RunInference = (PN_RunInference)GetProcAddress(hDll, "LiteRtLm_RunInference");
    auto WaitUntilDone = (PN_WaitUntilDone)GetProcAddress(hDll, "LiteRtLm_WaitUntilDone");

    LiteRtLm_Config config = {"D:\\gemma-2b.litertlm", "gpu", 2048, 8, 0, 1};
    void* engine = CreateEngine(config);
    void* conv = CreateConversation(engine);

    std::string input;
    while (true) {
        std::cout << "\nUser >> ";
        if (!std::getline(cin, input) || input == "exit") break;

        LiteRtLm_AppendUserMessage(conv, input.c_str());
        LiteRtLm_SamplingParams params = {0.7f, 0.9f, 40, 512, 0, nullptr};
        
        std::cout << "AI >> ";
        g_IsDone = false;
        RunInference(conv, params, MyCallback, nullptr);

        while (!g_IsDone) { WaitUntilDone(engine, 1); }
    }
    FreeLibrary(hDll);
    return 0;
}

4. 部署与编译

编译指令 (MSVC)

cl.exe /EHsc main.cpp /link User32.lib

运行时依赖

• litert_lm_wrapper.dll
• libLiteRt.dll
• libLiteRtWebGpuAccelerator.dll
• dxcompiler.dll / dxil.dll