182 lines
7.7 KiB
Diff
182 lines
7.7 KiB
Diff
diff --git a/dist/embeddings/llamacpp-provider.js b/dist/embeddings/llamacpp-provider.js
|
|
index 16b05b2a7c6b806a3032c9af2eb811c5227d29d9..90431db50bb462a1e7b10902554273e88e1aeaf9 100644
|
|
--- a/dist/embeddings/llamacpp-provider.js
|
|
+++ b/dist/embeddings/llamacpp-provider.js
|
|
@@ -7,8 +7,8 @@
|
|
* Default model: nomic-embed-text-v1.5 (768 dimensions)
|
|
*/
|
|
import { existsSync } from 'node:fs';
|
|
-import { join } from 'node:path';
|
|
import { homedir } from 'node:os';
|
|
+import { join } from 'node:path';
|
|
import { BaseEmbeddingProvider } from './provider.js';
|
|
/**
|
|
* Default embedding config: nomic-embed-text-v1.5 (768 dimensions)
|
|
@@ -18,7 +18,8 @@ export const DEFAULT_EMBEDDING_CONFIG = {
|
|
path: join(homedir(), '.cache/llama.cpp/nomic-embed-text-v1.5.Q8_0.gguf'),
|
|
dimensions: 768,
|
|
contextSize: 8192,
|
|
- gpuLayers: 999,
|
|
+ // Default to CPU (gpuLayers: 0) for reliability - GPU often has memory issues
|
|
+ gpuLayers: parseInt(process.env.LLAMA_GPU_LAYERS ?? '0', 10),
|
|
};
|
|
/**
|
|
* MiniLM embedding config: all-MiniLM-L6-v2 (384 dimensions)
|
|
@@ -30,7 +31,6 @@ export const MINILM_EMBEDDING_CONFIG = {
|
|
contextSize: 512,
|
|
gpuLayers: 999,
|
|
};
|
|
-// Lazy-loaded llama module
|
|
let llamaModule = null;
|
|
async function getLlamaModule() {
|
|
if (!llamaModule) {
|
|
@@ -43,6 +43,15 @@ async function getLlamaModule() {
|
|
}
|
|
return llamaModule;
|
|
}
|
|
+/**
|
|
+ * Task prefixes for nomic-embed-text model.
|
|
+ * These significantly improve semantic similarity scores.
|
|
+ * See: https://huggingface.co/nomic-ai/nomic-embed-text-v1.5
|
|
+ */
|
|
+const NOMIC_EMBED_PREFIXES = {
|
|
+ document: 'search_document: ',
|
|
+ query: 'search_query: ',
|
|
+};
|
|
/**
|
|
* LlamaCpp embedding provider with fail-fast initialization.
|
|
*
|
|
@@ -53,6 +62,10 @@ export class LlamaCppEmbeddingProvider extends BaseEmbeddingProvider {
|
|
name = 'llamacpp';
|
|
dimensions;
|
|
model;
|
|
+ /** Task prefix for document embeddings */
|
|
+ documentPrefix;
|
|
+ /** Task prefix for query embeddings */
|
|
+ queryPrefix;
|
|
modelPath;
|
|
gpuLayers;
|
|
contextSize;
|
|
@@ -67,6 +80,15 @@ export class LlamaCppEmbeddingProvider extends BaseEmbeddingProvider {
|
|
this.dimensions = config.dimensions ?? DEFAULT_EMBEDDING_CONFIG.dimensions;
|
|
this.gpuLayers = config.gpuLayers ?? DEFAULT_EMBEDDING_CONFIG.gpuLayers;
|
|
this.contextSize = config.contextSize ?? DEFAULT_EMBEDDING_CONFIG.contextSize;
|
|
+ // Set task prefixes based on model
|
|
+ if (this.model.toLowerCase().includes('nomic-embed')) {
|
|
+ this.documentPrefix = NOMIC_EMBED_PREFIXES.document;
|
|
+ this.queryPrefix = NOMIC_EMBED_PREFIXES.query;
|
|
+ }
|
|
+ else {
|
|
+ this.documentPrefix = '';
|
|
+ this.queryPrefix = '';
|
|
+ }
|
|
}
|
|
/**
|
|
* Initialize the embedding model.
|
|
diff --git a/dist/redis/vector-store.js b/dist/redis/vector-store.js
|
|
index cb5b781a93d0f1558aca6415a2651c81a70593b4..b2977f239773bf178cd2afa52772ef93dc108203 100644
|
|
--- a/dist/redis/vector-store.js
|
|
+++ b/dist/redis/vector-store.js
|
|
@@ -1,8 +1,8 @@
|
|
/**
|
|
* Redis Vector Store with HNSW indexing via RediSearch.
|
|
*/
|
|
-import { DEFAULT_INDEX_CONFIG, KeyPatterns, Fields, hashPath, buildIndexCreateArgs, escapeTag, } from './schema.js';
|
|
import { floatArrayToBuffer, bufferToFloatArray } from '../embeddings/provider.js';
|
|
+import { DEFAULT_INDEX_CONFIG, KeyPatterns, Fields, hashPath, buildIndexCreateArgs, escapeTag, } from './schema.js';
|
|
/**
|
|
* Redis Vector Store for directory semantic search.
|
|
*/
|
|
@@ -29,10 +29,20 @@ export class RedisVectorStore {
|
|
// Index exists, nothing to do
|
|
}
|
|
catch {
|
|
- // Index doesn't exist, create it
|
|
- const args = buildIndexCreateArgs(this.indexName, this.keyPrefix, this.config);
|
|
- const [command, ...rest] = args;
|
|
- await this.redis.call(command, ...rest);
|
|
+ // Index doesn't exist (or FT.INFO failed), try to create it
|
|
+ try {
|
|
+ const args = buildIndexCreateArgs(this.indexName, this.keyPrefix, this.config);
|
|
+ const [command, ...rest] = args;
|
|
+ await this.redis.call(command, ...rest);
|
|
+ }
|
|
+ catch (createError) {
|
|
+ // Ignore "Index already exists" error - this can happen in race conditions
|
|
+ // or if FT.INFO failed for a different reason
|
|
+ const errorMessage = createError instanceof Error ? createError.message : String(createError);
|
|
+ if (!errorMessage.includes('Index already exists')) {
|
|
+ throw createError;
|
|
+ }
|
|
+ }
|
|
}
|
|
}
|
|
/**
|
|
@@ -77,8 +87,9 @@ export class RedisVectorStore {
|
|
* Store multiple chunks in a batch using pipeline.
|
|
*/
|
|
async storeBatch(chunks) {
|
|
- if (chunks.length === 0)
|
|
+ if (chunks.length === 0) {
|
|
return;
|
|
+ }
|
|
const pipeline = this.redis.pipeline();
|
|
for (const chunk of chunks) {
|
|
const key = KeyPatterns.chunk(this.dirHash, chunk.id);
|
|
@@ -105,7 +116,7 @@ export class RedisVectorStore {
|
|
: '*';
|
|
// KNN query
|
|
const queryExpr = `${filterExpr}=>[KNN ${limit} @${Fields.embedding} $query_vec AS score]`;
|
|
- const result = (await this.redis.call('FT.SEARCH', this.indexName, queryExpr, 'PARAMS', '2', 'query_vec', embeddingBuffer, 'SORTBY', 'score', 'RETURN', '5', Fields.chunkId, 'score', Fields.chunkText, Fields.metadata, Fields.position, 'DIALECT', '2', 'LIMIT', '0', limit.toString()));
|
|
+ const result = (await this.redis.call('FT.SEARCH', this.indexName, queryExpr, 'PARAMS', '2', 'query_vec', embeddingBuffer, 'SORTBY', 'score', 'RETURN', '6', Fields.chunkId, Fields.docId, 'score', Fields.chunkText, Fields.metadata, Fields.position, 'DIALECT', '2', 'LIMIT', '0', limit.toString()));
|
|
return this.parseSearchResults(result);
|
|
}
|
|
/**
|
|
@@ -115,12 +126,14 @@ export class RedisVectorStore {
|
|
const results = [];
|
|
// Result format: [count, key1, [field, value, ...], key2, [field, value, ...], ...]
|
|
const count = result[0];
|
|
- if (count === 0)
|
|
+ if (count === 0) {
|
|
return results;
|
|
+ }
|
|
for (let i = 1; i < result.length; i += 2) {
|
|
const fields = result[i + 1];
|
|
- if (!Array.isArray(fields))
|
|
+ if (!Array.isArray(fields)) {
|
|
continue;
|
|
+ }
|
|
const fieldMap = {};
|
|
for (let j = 0; j < fields.length; j += 2) {
|
|
fieldMap[fields[j]] = fields[j + 1];
|
|
@@ -144,7 +157,7 @@ export class RedisVectorStore {
|
|
score,
|
|
text: fieldMap[Fields.chunkText] || '',
|
|
position: parseInt(fieldMap[Fields.position] || '0', 10),
|
|
- docId: metadata['docId'] || '',
|
|
+ docId: fieldMap[Fields.docId] || metadata['docId'] || '',
|
|
metadata,
|
|
});
|
|
}
|
|
@@ -156,8 +169,9 @@ export class RedisVectorStore {
|
|
async getChunk(chunkId) {
|
|
const key = KeyPatterns.chunk(this.dirHash, chunkId);
|
|
const data = (await this.redis.hgetallBuffer(key));
|
|
- if (!data || Object.keys(data).length === 0)
|
|
+ if (!data || Object.keys(data).length === 0) {
|
|
return null;
|
|
+ }
|
|
// Convert buffers to strings/values
|
|
const getString = (field) => data[field]?.toString('utf-8') || '';
|
|
const getBuffer = (field) => data[field] || null;
|
|
@@ -253,8 +267,9 @@ export class RedisVectorStore {
|
|
async getDirectoryMeta() {
|
|
const key = KeyPatterns.directoryMeta(this.dirHash);
|
|
const data = await this.redis.get(key);
|
|
- if (!data)
|
|
+ if (!data) {
|
|
return null;
|
|
+ }
|
|
try {
|
|
return JSON.parse(data);
|
|
}
|