[{"data":1,"prerenderedAt":628},["ShallowReactive",2],{"docs-en-/en/docs/services/local-llm-services":3},{"id":4,"title":5,"body":6,"description":16,"extension":621,"meta":622,"navigation":623,"path":624,"seo":625,"stem":626,"__hash__":627},"docs/en/docs/services/local-llm-services.md","Local LLM Services",{"type":7,"value":8,"toc":609},"minimark",[9,13,17,22,51,60,64,69,72,101,106,249,253,273,277,280,302,305,392,396,416,420,423,446,450,453,480,483,500,504,507,577,585,588,605],[10,11,5],"h1",{"id":12},"local-llm-services",[14,15,16],"p",{},"Doco Translate supports running translations entirely on your Mac using local AI models. This keeps your documents private — no data leaves your machine.",[18,19,21],"h2",{"id":20},"why-use-local-models","Why Use Local Models?",[23,24,25,33,39,45],"ul",{},[26,27,28,32],"li",{},[29,30,31],"strong",{},"Privacy"," — Your documents never leave your Mac. No text is sent to external servers.",[26,34,35,38],{},[29,36,37],{},"No API costs"," — Run translations without paying per-request fees.",[26,40,41,44],{},[29,42,43],{},"Offline capability"," — Translate documents without an internet connection (after downloading models).",[26,46,47,50],{},[29,48,49],{},"Full control"," — Choose exactly which model to use and configure its behavior.",[52,53,54],"blockquote",{},[14,55,56,59],{},[29,57,58],{},"Trade-off:"," Local models generally produce lower quality translations compared to cloud-based AI services, and translation speed depends on your Mac's hardware (CPU, GPU, and RAM).",[18,61,63],{"id":62},"supported-local-services","Supported Local Services",[65,66,68],"h3",{"id":67},"ollama","Ollama",[14,70,71],{},"Ollama is a popular open-source tool for running large language models locally on macOS, Linux, and Windows.",[23,73,74,84,90],{},[26,75,76,79,80],{},[29,77,78],{},"Default Host:"," ",[81,82,83],"code",{},"http://localhost:11434",[26,85,86,89],{},[29,87,88],{},"API Key:"," Not required (unless authentication is enabled on your Ollama instance)",[26,91,92,79,95],{},[29,93,94],{},"Website:",[96,97,98],"a",{"href":98,"rel":99},"https://ollama.com",[100],"nofollow",[102,103,105],"h4",{"id":104},"setup","Setup",[107,108,109,149,194,212],"ol",{},[26,110,111,114],{},[29,112,113],{},"Install Ollama:",[23,115,116],{},[26,117,118,119,123,124],{},"Download from ",[96,120,122],{"href":98,"rel":121},[100],"ollama.com"," or install via Homebrew:\n",[125,126,131],"pre",{"className":127,"code":128,"language":129,"meta":130,"style":130},"language-bash shiki shiki-themes github-light github-dark","brew install ollama\n","bash","",[81,132,133],{"__ignoreMap":130},[134,135,138,142,146],"span",{"class":136,"line":137},"line",1,[134,139,141],{"class":140},"sScJk","brew",[134,143,145],{"class":144},"sZZnC"," install",[134,147,148],{"class":144}," ollama\n",[26,150,151,154,155,170,173,174],{},[29,152,153],{},"Pull a model:","\nOpen Terminal and run:",[125,156,158],{"className":127,"code":157,"language":129,"meta":130,"style":130},"ollama pull qwen3.6\n",[81,159,160],{"__ignoreMap":130},[134,161,162,164,167],{"class":136,"line":137},[134,163,67],{"class":140},[134,165,166],{"class":144}," pull",[134,168,169],{"class":144}," qwen3.6\n",[171,172],"br",{},"Popular models for translation:",[23,175,176,182,188],{},[26,177,178,181],{},[81,179,180],{},"qwen3.6"," — Strong multilingual support, especially for Chinese and Asian languages",[26,183,184,187],{},[81,185,186],{},"llama3.2"," — General-purpose, good balance of speed and quality",[26,189,190,193],{},[81,191,192],{},"gemma4"," — Google's open model, good for European languages",[26,195,196,199,200],{},[29,197,198],{},"Start Ollama:","\nOllama runs automatically after installation. If not, start it manually:",[125,201,203],{"className":127,"code":202,"language":129,"meta":130,"style":130},"ollama serve\n",[81,204,205],{"__ignoreMap":130},[134,206,207,209],{"class":136,"line":137},[134,208,67],{"class":140},[134,210,211],{"class":144}," serve\n",[26,213,214,217],{},[29,215,216],{},"Configure in Doco Translate:",[23,218,219,226,232,239,242],{},[26,220,221,222,225],{},"Go to ",[29,223,224],{},"Settings → Services → Ollama",".",[26,227,228,229,231],{},"The default host (",[81,230,83],{},") should work out of the box.",[26,233,234,235,238],{},"Use ",[29,236,237],{},"Fetch model list"," to automatically detect models you've pulled.",[26,240,241],{},"Select a model from the dropdown.",[26,243,244,245,248],{},"Click ",[29,246,247],{},"Verify service"," to test the connection.",[102,250,252],{"id":251},"tips-for-ollama","Tips for Ollama",[23,254,255,261,267],{},[26,256,257,260],{},[29,258,259],{},"Model size matters:"," Larger models (70B+) produce better translations but require more RAM and run slower. Start with 7B–8B models for a good balance.",[26,262,263,266],{},[29,264,265],{},"GPU acceleration:"," Ollama automatically uses Apple Silicon GPU acceleration on M-series Macs.",[26,268,269,272],{},[29,270,271],{},"Keep Ollama running:"," Make sure the Ollama service is running before using it in Doco Translate.",[65,274,276],{"id":275},"lm-studio","LM Studio",[14,278,279],{},"LM Studio is a desktop application for discovering, downloading, and running local LLMs with a graphical interface.",[23,281,282,289,294],{},[26,283,284,79,286],{},[29,285,78],{},[81,287,288],{},"http://localhost:1234",[26,290,291,293],{},[29,292,88],{}," Not required",[26,295,296,79,298],{},[29,297,94],{},[96,299,300],{"href":300,"rel":301},"https://lmstudio.ai",[100],[102,303,105],{"id":304},"setup-1",[107,306,307,320,345,367],{},[26,308,309,312],{},[29,310,311],{},"Install LM Studio:",[23,313,314],{},[26,315,118,316,225],{},[96,317,319],{"href":300,"rel":318},[100],"lmstudio.ai",[26,321,322,325],{},[29,323,324],{},"Download a model:",[23,326,327,330,339],{},[26,328,329],{},"Open LM Studio.",[26,331,332,333,335,336,338],{},"Use the search bar to find a model (e.g., ",[81,334,180],{},", ",[81,337,192],{},").",[26,340,244,341,344],{},[29,342,343],{},"Download"," on your preferred model variant.",[26,346,347,350],{},[29,348,349],{},"Start the local server:",[23,351,352,359,362],{},[26,353,354,355,358],{},"In LM Studio, go to the ",[29,356,357],{},"Local Server"," tab (left sidebar).",[26,360,361],{},"Select the model you downloaded.",[26,363,244,364,225],{},[29,365,366],{},"Start Server",[26,368,369,371],{},[29,370,216],{},[23,372,373,378,383,388],{},[26,374,221,375,225],{},[29,376,377],{},"Settings → Services → LM Studio",[26,379,228,380,382],{},[81,381,288],{},") should work if LM Studio's server is running.",[26,384,234,385,387],{},[29,386,237],{}," to detect the loaded model, or enter the model name manually.",[26,389,244,390,248],{},[29,391,247],{},[102,393,395],{"id":394},"tips-for-lm-studio","Tips for LM Studio",[23,397,398,404,410],{},[26,399,400,403],{},[29,401,402],{},"Only one model at a time:"," LM Studio loads one model into memory at a time. Switching models requires unloading the current one first.",[26,405,406,409],{},[29,407,408],{},"Quantization:"," LM Studio supports various quantization levels (Q4, Q5, Q8). Lower quantization (Q4) uses less memory but may reduce quality.",[26,411,412,415],{},[29,413,414],{},"Server must be running:"," The LM Studio local server must be active for Doco Translate to connect.",[18,417,419],{"id":418},"configuring-local-services","Configuring Local Services",[14,421,422],{},"Local service settings in Doco Translate are similar to cloud AI services, with a few differences:",[23,424,425,431,437],{},[26,426,427,430],{},[29,428,429],{},"No API key required"," — Local services don't need authentication by default. If you've configured authentication on your local service, you can enter the credentials.",[26,432,433,436],{},[29,434,435],{},"Custom host"," — You can change the host if your local service runs on a different port or machine.",[26,438,439,442,443,445],{},[29,440,441],{},"Model selection"," — Use ",[29,444,237],{}," to auto-detect available models, or add models manually.",[18,447,449],{"id":448},"custom-local-services","Custom Local Services",[14,451,452],{},"If you run another local LLM server that's compatible with the OpenAI API format:",[107,454,455,464,467,474,477],{},[26,456,221,457,460,461,225],{},[29,458,459],{},"Settings → Services"," and click ",[29,462,463],{},"Custom Service",[26,465,466],{},"Enter a name for your service.",[26,468,469,470,473],{},"Select the ",[29,471,472],{},"OpenAI"," protocol.",[26,475,476],{},"Enter the host address of your local server.",[26,478,479],{},"Configure the model name and other settings as needed.",[14,481,482],{},"This works with any OpenAI-compatible server, including:",[23,484,485,488,491,494,497],{},[26,486,487],{},"vLLM",[26,489,490],{},"text-generation-webui",[26,492,493],{},"LocalAI",[26,495,496],{},"llama.cpp server",[26,498,499],{},"Any custom API server",[18,501,503],{"id":502},"performance-considerations","Performance Considerations",[14,505,506],{},"Translation speed with local models depends on several factors:",[508,509,510,523],"table",{},[511,512,513],"thead",{},[514,515,516,520],"tr",{},[517,518,519],"th",{},"Factor",[517,521,522],{},"Impact",[524,525,526,537,547,557,567],"tbody",{},[514,527,528,534],{},[529,530,531],"td",{},[29,532,533],{},"Model size",[529,535,536],{},"Smaller models (7B) are faster; larger models (70B+) are slower but more accurate",[514,538,539,544],{},[529,540,541],{},[29,542,543],{},"Quantization",[529,545,546],{},"Lower quantization = faster but less accurate",[514,548,549,554],{},[529,550,551],{},[29,552,553],{},"Hardware",[529,555,556],{},"Apple Silicon M-series chips provide the best performance",[514,558,559,564],{},[529,560,561],{},[29,562,563],{},"RAM",[529,565,566],{},"Larger models require more RAM (8B ≈ 5GB, 70B ≈ 40GB)",[514,568,569,574],{},[529,570,571],{},[29,572,573],{},"Concurrency",[529,575,576],{},"Lower concurrency settings (1–2) work better for local models to avoid overloading",[52,578,579],{},[14,580,581,584],{},[29,582,583],{},"Recommendation:"," Start with a 7B–8B model and increase model size only if quality is insufficient. Set Max Concurrent Pages to 1 or 2 for local services to avoid overloading your Mac.",[586,587],"hr",{},[14,589,590,79,593,597,598,79,601],{},[29,591,592],{},"Previous:",[96,594,596],{"href":595},"./ai-services","AI Services"," · ",[29,599,600],{},"Next:",[96,602,604],{"href":603},"./custom-services","Custom Services",[606,607,608],"style",{},"html pre.shiki code .sScJk, html code.shiki .sScJk{--shiki-default:#6F42C1;--shiki-dark:#B392F0}html pre.shiki code .sZZnC, html code.shiki .sZZnC{--shiki-default:#032F62;--shiki-dark:#9ECBFF}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html.dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}",{"title":130,"searchDepth":610,"depth":610,"links":611},2,[612,613,618,619,620],{"id":20,"depth":610,"text":21},{"id":62,"depth":610,"text":63,"children":614},[615,617],{"id":67,"depth":616,"text":68},3,{"id":275,"depth":616,"text":276},{"id":418,"depth":610,"text":419},{"id":448,"depth":610,"text":449},{"id":502,"depth":610,"text":503},"md",{},true,"/en/docs/services/local-llm-services",{"title":5,"description":16},"en/docs/services/local-llm-services","zMxnljcSQu3pIQ0ZGufgcUuUY0DpiXSMIb5gVZKHJ9c",1780845388797]