This page summarizes the core integration patterns of LiteRT-LM. From the low-level hardware detection process to a complete C++ Demo, it provides a comprehensive engineering reference.
This is the standard logic at system startup to ensure the model runs with optimal performance in the current environment.
Reveals the isolation logic of "task stacking" and "asynchronous driving," which is core to high-performance DLL invocation.
// --- Full C++ Demo Implementation ---
#include <iostream>
#include <windows.h>
#include <string>
#include <atomic>
typedef struct { const char* model_path; const char* backend; int max_num_tokens; int num_threads; int bEnableBenchmark; int bOptimizeShader; } LiteRtLm_Config;
typedef struct { float temperature; float top_p; int top_k; int max_tokens; int constraint_type; const char* constraint_string; } LiteRtLm_SamplingParams;
typedef struct { const char* text_chunk; const char* full_json_chunk; const char* error_msg; int bIsDone; float tokens_per_sec; } LiteRtLm_Result;
typedef void (*LiteRtLmCallback)(LiteRtLm_Result result, void* user_ptr);
typedef void* (*PN_CreateEngine)(LiteRtLm_Config config);
typedef void (*PN_DestroyEngine)(void* engine_ptr);
typedef void* (*PN_CreateConversation)(void* engine_ptr);
typedef void (*PN_AppendUserMessage)(void* conv_ptr, const char* text);
typedef void (*PN_RunInference)(void* conv_ptr, LiteRtLm_SamplingParams params, LiteRtLmCallback callback, void* user_ptr);
typedef int (*PN_WaitUntilDone)(void* engine_ptr, int timeout_sec);
std::atomic<bool> g_IsDone{false};
void MyCallback(LiteRtLm_Result res, void*) {
if (res.text_chunk) std::cout << res.text_chunk << std::flush;
if (res.bIsDone) g_IsDone = true;
}
int main() {
HMODULE hDll = LoadLibraryA("litert_lm_wrapper.dll");
if (!hDll) return 1;
auto CreateEngine = (PN_CreateEngine)GetProcAddress(hDll, "LiteRtLm_CreateEngine");
auto CreateConversation = (PN_CreateConversation)GetProcAddress(hDll, "LiteRtLm_CreateConversation");
auto RunInference = (PN_RunInference)GetProcAddress(hDll, "LiteRtLm_RunInference");
auto WaitUntilDone = (PN_WaitUntilDone)GetProcAddress(hDll, "LiteRtLm_WaitUntilDone");
LiteRtLm_Config config = {"D:\\gemma-2b.litertlm", "gpu", 2048, 8, 0, 1};
void* engine = CreateEngine(config);
void* conv = CreateConversation(engine);
std::string input;
while (true) {
std::cout << "\nUser >> ";
if (!std::getline(cin, input) || input == "exit") break;
LiteRtLm_AppendUserMessage(conv, input.c_str());
LiteRtLm_SamplingParams params = {0.7f, 0.9f, 40, 512, 0, nullptr};
std::cout << "AI >> ";
g_IsDone = false;
RunInference(conv, params, MyCallback, nullptr);
while (!g_IsDone) { WaitUntilDone(engine, 1); }
}
FreeLibrary(hDll);
return 0;
}
cl.exe /EHsc main.cpp /link User32.lib