<?xml version="1.0" encoding="utf-8" standalone="yes"?><rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"><channel><title>Llm on Pi Stack</title><link>https://www.pistack.xyz/tags/llm/</link><description>Recent content in Llm on Pi Stack</description><generator>Hugo</generator><language>en-us</language><lastBuildDate>Sat, 11 Apr 2026 00:00:00 +0000</lastBuildDate><atom:link href="https://www.pistack.xyz/tags/llm/index.xml" rel="self" type="application/rss+xml"/><item><title>Ollama vs LM Studio vs LocalAI: Run LLMs Locally in 2026</title><link>https://www.pistack.xyz/posts/ollama-vs-lmstudio-vs-localai/</link><pubDate>Sat, 11 Apr 2026 00:00:00 +0000</pubDate><guid>https://www.pistack.xyz/posts/ollama-vs-lmstudio-vs-localai/</guid><description>&lt;h2 id="why-run-ai-models-locally">Why Run AI Models Locally?&lt;/h2>
&lt;p>Running LLMs on your own hardware gives you:&lt;/p>
&lt;ul>
&lt;li>&lt;strong>Complete Privacy&lt;/strong>: No data sent to cloud providers&lt;/li>
&lt;li>&lt;strong>No API Costs&lt;/strong>: Free after hardware investment&lt;/li>
&lt;li>&lt;strong>Offline Access&lt;/strong>: Works without internet&lt;/li>
&lt;li>&lt;strong>Customization&lt;/strong>: Fine-tune and modify models freely&lt;/li>
&lt;/ul>
&lt;h2 id="quick-comparison">Quick Comparison&lt;/h2>
&lt;table>
 &lt;thead>
 &lt;tr>
 &lt;th>Feature&lt;/th>
 &lt;th>&lt;a href="https://ollama.com/">ollama&lt;/a>&lt;/th>
 &lt;th>LM Studio&lt;/th>
 &lt;th>LocalAI&lt;/th>
 &lt;/tr>
 &lt;/thead>
 &lt;tbody>
 &lt;tr>
 &lt;td>&lt;strong>Primary Use&lt;/strong>&lt;/td>
 &lt;td>CLI &amp;amp; API&lt;/td>
 &lt;td>Desktop GUI&lt;/td>
 &lt;td>OpenAI-compatible API&lt;/td>
 &lt;/tr>
 &lt;tr>
 &lt;td>&lt;strong>Supported OS&lt;/strong>&lt;/td>
 &lt;td>Linux/macOS/WSL&lt;/td>
 &lt;td>&lt;a href="https://www.docker.com/">docker&lt;/a>ac/Linux&lt;/td>
 &lt;td>Linux/Docker&lt;/td>
 &lt;/tr>
 &lt;tr>
 &lt;td>&lt;strong>Model Format&lt;/strong>&lt;/td>
 &lt;td>GGUF&lt;/td>
 &lt;td>GGUF&lt;/td>
 &lt;td>GGUF/GPTQ&lt;/td>
 &lt;/tr>
 &lt;tr>
 &lt;td>&lt;strong>GPU Support&lt;/strong>&lt;/td>
 &lt;td>Metal/CUDA&lt;/td>
 &lt;td>Metal/CUDA&lt;/td>
 &lt;td>CUDA/Vulkan&lt;/td>
 &lt;/tr>
 &lt;tr>
 &lt;td>&lt;strong>API Compatibility&lt;/strong>&lt;/td>
 &lt;td>Custom&lt;/td>
 &lt;td>None&lt;/td>
 &lt;td>OpenAI Drop-in&lt;/td>
 &lt;/tr>
 &lt;tr>
 &lt;td>&lt;strong>Multi-model&lt;/strong>&lt;/td>
 &lt;td>✅ Yes&lt;/td>
 &lt;td>✅ Yes&lt;/td>
 &lt;td>✅ Yes&lt;/td>
 &lt;/tr>
 &lt;tr>
 &lt;td>&lt;strong>Embeddings&lt;/strong>&lt;/td>
 &lt;td>✅ Yes&lt;/td>
 &lt;td>✅ Yes&lt;/td>
 &lt;td>✅ Yes&lt;/td>
 &lt;/tr>
 &lt;tr>
 &lt;td>&lt;strong>Docker Support&lt;/strong>&lt;/td>
 &lt;td>✅ Yes&lt;/td>
 &lt;td>❌ No&lt;/td>
 &lt;td>✅ Native&lt;/td>
 &lt;/tr>
 &lt;tr>
 &lt;td>&lt;strong>License&lt;/strong>&lt;/td>
 &lt;td>MIT&lt;/td>
 &lt;td>Free/Closed&lt;/td>
 &lt;td>MIT&lt;/td>
 &lt;/tr>
 &lt;/tbody>
&lt;/table>
&lt;hr>
&lt;h2 id="1-ollama-the-developer-favorite">1. Ollama (The Developer Favorite)&lt;/h2>
&lt;p>&lt;strong>Best for&lt;/strong>: CLI users, developers, server deployment&lt;/p></description></item><item><title>Self-Hosted AI Stack: Complete Local AI Setup Guide 2026</title><link>https://www.pistack.xyz/posts/self-hosted-ai-stack/</link><pubDate>Sat, 11 Apr 2026 00:00:00 +0000</pubDate><guid>https://www.pistack.xyz/posts/self-hosted-ai-stack/</guid><description>&lt;h2 id="why-self-host-your-ai">Why Self-Host Your AI?&lt;/h2>
&lt;ul>
&lt;li>&lt;strong>Privacy&lt;/strong>: Your data never leaves your server&lt;/li>
&lt;li>&lt;strong>Cost&lt;/strong>: No per-token API fees&lt;/li>
&lt;li>&lt;strong>Customization&lt;/strong>: Use any open model&lt;/li>
&lt;li>&lt;strong>Reliability&lt;/strong>: Works offline, no rate limits&lt;/li>
&lt;/ul>
&lt;h2 id="the-self-hosted-ai-architecture">The Self-Hosted AI Architecture&lt;/h2>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt"> 1
&lt;/span>&lt;span class="lnt"> 2
&lt;/span>&lt;span class="lnt"> 3
&lt;/span>&lt;span class="lnt"> 4
&lt;/span>&lt;span class="lnt"> 5
&lt;/span>&lt;span class="lnt"> 6
&lt;/span>&lt;span class="lnt"> 7
&lt;/span>&lt;span class="lnt"> 8
&lt;/span>&lt;span class="lnt"> 9
&lt;/span>&lt;span class="lnt">10
&lt;/span>&lt;span class="lnt">11
&lt;/span>&lt;span class="lnt">12
&lt;/span>&lt;span class="lnt">13
&lt;/span>&lt;span class="lnt">14
&lt;/span>&lt;span class="lnt">15
&lt;/span>&lt;span class="lnt">16
&lt;/span>&lt;span class="lnt">17
&lt;/span>&lt;span class="lnt">18
&lt;/span>&lt;span class="lnt">19
&lt;/span>&lt;span class="lnt">20
&lt;/span>&lt;span class="lnt">21
&lt;/span>&lt;span class="lnt">22
&lt;/span>&lt;span class="lnt">23
&lt;/span>&lt;span class="lnt">24
&lt;/span>&lt;span class="lnt">25
&lt;/span>&lt;span class="lnt">26
&lt;/span>&lt;span class="lnt">27
&lt;/span>&lt;span class="lnt">28
&lt;/span>&lt;span class="lnt">29
&lt;/span>&lt;span class="lnt">30
&lt;/span>&lt;span class="lnt">31
&lt;/span>&lt;span class="lnt">32
&lt;/span>&lt;span class="lnt">33
&lt;/span>&lt;span class="lnt">34
&lt;/span>&lt;span class="lnt">35
&lt;/span>&lt;span class="lnt">36
&lt;/span>&lt;span class="lnt">37
&lt;/span>&lt;span class="lnt">38
&lt;/span>&lt;span class="lnt">39
&lt;/span>&lt;span class="lnt">40
&lt;/span>&lt;span class="lnt">41
&lt;/span>&lt;span class="lnt">42
&lt;/span>&lt;span class="lnt">43
&lt;/span>&lt;span class="lnt">44
&lt;/span>&lt;span class="lnt">45
&lt;/span>&lt;span class="lnt">46
&lt;/span>&lt;span class="lnt">47
&lt;/span>&lt;span class="lnt">48
&lt;/span>&lt;span class="lnt">49
&lt;/span>&lt;span class="lnt">50
&lt;/span>&lt;span class="lnt">51
&lt;/span>&lt;span class="lnt">52
&lt;/span>&lt;span class="lnt">53
&lt;/span>&lt;span class="lnt">54
&lt;/span>&lt;span class="lnt">55
&lt;/span>&lt;span class="lnt">56
&lt;/span>&lt;span class="lnt">57
&lt;/span>&lt;span class="lnt">58
&lt;/span>&lt;span class="lnt">59
&lt;/span>&lt;span class="lnt">60
&lt;/span>&lt;span class="lnt">61
&lt;/span>&lt;span class="lnt">62
&lt;/span>&lt;span class="lnt">63
&lt;/span>&lt;span class="lnt">64
&lt;/span>&lt;span class="lnt">65
&lt;/span>&lt;span class="lnt">66
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-fallback" data-lang="fallback">&lt;span class="line">&lt;span class="cl">User → Open WebUI → [ollama](https://ollama.com/) API → LLM (Llama/Mistral/Qwen)
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> ↘ Embeddings → Vector DB → RAG
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> ↘ TTS/STT → Voice Inte[docker](https://www.docker.com/)```
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">## Complete Docker Compose Stack
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">```yaml
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"># ai-stack.yml
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">version: &amp;#39;3.8&amp;#39;
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">services:
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> # LLM Inference Engine
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> ollama:
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> image: ollama/ollama:latest
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> container_name: ollama
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> restart: unless-stopped
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> ports:
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> - &amp;#34;11434:11434&amp;#34;
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> volumes:
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> - ollama_data:/root/.ollama
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> deploy:
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> resources:
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> reservations:
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> devices:
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> - driver: nvidia
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> count: 1
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> capabilities: [gpu]
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> # Web Interface
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> open-webui:
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> image: ghcr.io/open-webui/open-webui:main
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> container_name: open-webui
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> restart: unless-stopped
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> ports:
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> - &amp;#34;3000:8080&amp;#34;
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> environment:
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> - OLLAMA_BASE_URL=http://ollama:11434
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> - WEBUI_SECRET_KEY=your-secret-key
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> volumes:
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> - openwebui_data:/app/backend/data
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> # Embedding Model
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> embedding-model:
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> image: ollama/ollama:latest
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> container_name: ollama-embed
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> restart: unless-stopped
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> ports:
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> - &amp;#34;11435:11434&amp;#34;
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> volumes:
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> - embed_data:/root/.ollama
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> command: ollama serve
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> # Vector Database (Optional)
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> qdrant:
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> image: qdrant/qdrant:latest
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> container_name: qdrant
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> restart: unless-stopped
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> ports:
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> - &amp;#34;6333:6333&amp;#34;
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> volumes:
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> - qdrant_data:/qdrant/storage
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">volumes:
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> ollama_data:
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> openwebui_data:
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> embed_data:
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> qdrant_data:
&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h2 id="setup-steps">Setup Steps&lt;/h2>
&lt;h3 id="1-start-the-stack">1. Start the Stack&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt">1
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-bash" data-lang="bash">&lt;span class="line">&lt;span class="cl">docker compose -f ai-stack.yml up -d
&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h3 id="2-pull-models">2. Pull Models&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt">1
&lt;/span>&lt;span class="lnt">2
&lt;/span>&lt;span class="lnt">3
&lt;/span>&lt;span class="lnt">4
&lt;/span>&lt;span class="lnt">5
&lt;/span>&lt;span class="lnt">6
&lt;/span>&lt;span class="lnt">7
&lt;/span>&lt;span class="lnt">8
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-bash" data-lang="bash">&lt;span class="line">&lt;span class="cl">&lt;span class="c1"># Main chat model&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">ollama pull llama3.2
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="c1"># Coding assistant&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">ollama pull qwen2.5-coder
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="c1"># Embedding model&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">curl http://localhost:11435/api/pull -d &lt;span class="s1">&amp;#39;{&amp;#34;name&amp;#34;: &amp;#34;nomic-embed-text&amp;#34;}&amp;#39;&lt;/span>
&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h3 id="3-access-web-ui">3. Access Web UI&lt;/h3>
&lt;p>Open http://localhost:3000 and create your account.&lt;/p></description></item></channel></rss>