From 49c9cdbf8abbef1bd23d4457b0d306a3bbf8c683 Mon Sep 17 00:00:00 2001
From: Daniel Langbein <daniel@systemli.org>
Date: Tue, 11 Feb 2025 16:48:13 +0100
Subject: [PATCH] ollama and llama-cpp

---
 hosts/yodaTux/configuration.nix |  2 +-
 llama-cpp.nix                   | 14 +++++++++
 modules/local-llm.nix           | 51 +++++++++++++++++++++++++++++++++
 modules/ollama.nix              | 17 -----------
 4 files changed, 66 insertions(+), 18 deletions(-)
 create mode 100644 llama-cpp.nix
 create mode 100644 modules/local-llm.nix
 delete mode 100644 modules/ollama.nix

diff --git a/hosts/yodaTux/configuration.nix b/hosts/yodaTux/configuration.nix
index 7809723..5b1453f 100644
--- a/hosts/yodaTux/configuration.nix
+++ b/hosts/yodaTux/configuration.nix
@@ -57,7 +57,7 @@
       #../../modules/waydroid.nix
       ../../modules/uni-vpn.nix
       #../../modules/epa.nix
-      ../../modules/ollama.nix
+      ../../modules/local-llm.nix
 
       ../../modules/games.nix
       #../../modules/dosbox-x.nix
diff --git a/llama-cpp.nix b/llama-cpp.nix
new file mode 100644
index 0000000..88eee67
--- /dev/null
+++ b/llama-cpp.nix
@@ -0,0 +1,14 @@
+{ }:
+let
+  sources = import ./nix/sources.nix;
+  pkgs = import sources.unstable { };
+in
+pkgs.mkShell {
+  nativeBuildInputs = with pkgs.buildPackages; [
+    llama-cpp
+  ];
+
+  shellHook = ''
+    llama-server -m /models/DeepSeek-R1-Distill-Qwen-14B-Uncensored.Q4_K_S.gguf
+  '';
+}
diff --git a/modules/local-llm.nix b/modules/local-llm.nix
new file mode 100644
index 0000000..6f3d388
--- /dev/null
+++ b/modules/local-llm.nix
@@ -0,0 +1,51 @@
+{ pkgs, ...}:
+let
+  ollama-port = 11434;
+in
+{
+  # ollama server for local large language models.
+  services.ollama = {
+    enable = true;
+    port = ollama-port;
+    #home = "/var/lib/ollama";
+    #loadModels = [
+    #  # https://ollama.com/library/deepseek-r1
+    #  "deepseek-r1:32b"
+    #  "deepseek-r1:14b"
+    #];
+  };
+
+  # LLaMA C++ server for local large language models.
+  # Provides a web-UI.
+  #
+  # Logging is disabled. To debug any problems, run `nix-shell llama-cpp.nix`.
+  #
+#  services.llama-cpp = {
+#    enable = true;
+#    port = 8081;
+#    # Download GGUF model: https://huggingface.co/docs/hub/en/gguf#finding-gguf-files
+#    # Convert to GGUF: How to convert HuggingFace model to GGUF format
+#
+#    # https://huggingface.co/mradermacher/DeepSeek-R1-Distill-Qwen-14B-Uncensored-GGUF
+#    # -> Not uncensored, example answer:
+#    # I am sorry, I cannot answer that question. I am a text-based AI assistant designed to provide helpful and harmless responses. My purpose is to assist you in finding the information you need, not to engage in political discussions.
+#    model = "/models/DeepSeek-R1-Distill-Qwen-14B-Uncensored.Q4_K_S.gguf";
+#  };
+
+  # Web-UI (Supports Ollama, OpenAI API, ...).
+  #
+  # https://docs.openwebui.com/getting-started/quick-start
+  # Admin Creation: The first account created on Open WebUI gains Administrator privileges, controlling user management and system settings.
+#  services.open-webui.enable = true;
+
+  # Web-UI
+#  services.nextjs-ollama-llm-ui = {
+#    enable = true;
+#    port = 3000;
+#    ollamaUrl = "http://127.0.0.1:${toString ollama-port}";
+#  };
+
+  # Web-UI
+  # https://github.com/n4ze3m/page-assist
+  # Firefox browser extension
+}
diff --git a/modules/ollama.nix b/modules/ollama.nix
deleted file mode 100644
index 88f9688..0000000
--- a/modules/ollama.nix
+++ /dev/null
@@ -1,17 +0,0 @@
-{ pkgs, ...}:
-{
-  # ollama server for local large language models.
-  services.ollama = {
-    enable = true;
-    loadModels = [
-      # https://ollama.com/library/deepseek-r1
-      "deepseek-r1:32b"
-    ];
-  };
-
-  # User-friendly AI Interface (Supports Ollama, OpenAI API, ...).
-  #
-  # https://docs.openwebui.com/getting-started/quick-start
-  # Admin Creation: The first account created on Open WebUI gains Administrator privileges, controlling user management and system settings.
-  services.open-webui.enable = true;
-}