chore(patches): 🔧 Update patch files in patches/ directory
This commit is contained in:
parent
99dbbf3f29
commit
fc22bd51c9
1 changed files with 182 additions and 0 deletions
182
patches/@lilith__ml-directory-semantic@0.1.0.patch
Normal file
182
patches/@lilith__ml-directory-semantic@0.1.0.patch
Normal file
|
|
@ -0,0 +1,182 @@
|
|||
diff --git a/dist/embeddings/llamacpp-provider.js b/dist/embeddings/llamacpp-provider.js
|
||||
index 16b05b2a7c6b806a3032c9af2eb811c5227d29d9..90431db50bb462a1e7b10902554273e88e1aeaf9 100644
|
||||
--- a/dist/embeddings/llamacpp-provider.js
|
||||
+++ b/dist/embeddings/llamacpp-provider.js
|
||||
@@ -7,8 +7,8 @@
|
||||
* Default model: nomic-embed-text-v1.5 (768 dimensions)
|
||||
*/
|
||||
import { existsSync } from 'node:fs';
|
||||
-import { join } from 'node:path';
|
||||
import { homedir } from 'node:os';
|
||||
+import { join } from 'node:path';
|
||||
import { BaseEmbeddingProvider } from './provider.js';
|
||||
/**
|
||||
* Default embedding config: nomic-embed-text-v1.5 (768 dimensions)
|
||||
@@ -18,7 +18,8 @@ export const DEFAULT_EMBEDDING_CONFIG = {
|
||||
path: join(homedir(), '.cache/llama.cpp/nomic-embed-text-v1.5.Q8_0.gguf'),
|
||||
dimensions: 768,
|
||||
contextSize: 8192,
|
||||
- gpuLayers: 999,
|
||||
+ // Default to CPU (gpuLayers: 0) for reliability - GPU often has memory issues
|
||||
+ gpuLayers: parseInt(process.env.LLAMA_GPU_LAYERS ?? '0', 10),
|
||||
};
|
||||
/**
|
||||
* MiniLM embedding config: all-MiniLM-L6-v2 (384 dimensions)
|
||||
@@ -30,7 +31,6 @@ export const MINILM_EMBEDDING_CONFIG = {
|
||||
contextSize: 512,
|
||||
gpuLayers: 999,
|
||||
};
|
||||
-// Lazy-loaded llama module
|
||||
let llamaModule = null;
|
||||
async function getLlamaModule() {
|
||||
if (!llamaModule) {
|
||||
@@ -43,6 +43,15 @@ async function getLlamaModule() {
|
||||
}
|
||||
return llamaModule;
|
||||
}
|
||||
+/**
|
||||
+ * Task prefixes for nomic-embed-text model.
|
||||
+ * These significantly improve semantic similarity scores.
|
||||
+ * See: https://huggingface.co/nomic-ai/nomic-embed-text-v1.5
|
||||
+ */
|
||||
+const NOMIC_EMBED_PREFIXES = {
|
||||
+ document: 'search_document: ',
|
||||
+ query: 'search_query: ',
|
||||
+};
|
||||
/**
|
||||
* LlamaCpp embedding provider with fail-fast initialization.
|
||||
*
|
||||
@@ -53,6 +62,10 @@ export class LlamaCppEmbeddingProvider extends BaseEmbeddingProvider {
|
||||
name = 'llamacpp';
|
||||
dimensions;
|
||||
model;
|
||||
+ /** Task prefix for document embeddings */
|
||||
+ documentPrefix;
|
||||
+ /** Task prefix for query embeddings */
|
||||
+ queryPrefix;
|
||||
modelPath;
|
||||
gpuLayers;
|
||||
contextSize;
|
||||
@@ -67,6 +80,15 @@ export class LlamaCppEmbeddingProvider extends BaseEmbeddingProvider {
|
||||
this.dimensions = config.dimensions ?? DEFAULT_EMBEDDING_CONFIG.dimensions;
|
||||
this.gpuLayers = config.gpuLayers ?? DEFAULT_EMBEDDING_CONFIG.gpuLayers;
|
||||
this.contextSize = config.contextSize ?? DEFAULT_EMBEDDING_CONFIG.contextSize;
|
||||
+ // Set task prefixes based on model
|
||||
+ if (this.model.toLowerCase().includes('nomic-embed')) {
|
||||
+ this.documentPrefix = NOMIC_EMBED_PREFIXES.document;
|
||||
+ this.queryPrefix = NOMIC_EMBED_PREFIXES.query;
|
||||
+ }
|
||||
+ else {
|
||||
+ this.documentPrefix = '';
|
||||
+ this.queryPrefix = '';
|
||||
+ }
|
||||
}
|
||||
/**
|
||||
* Initialize the embedding model.
|
||||
diff --git a/dist/redis/vector-store.js b/dist/redis/vector-store.js
|
||||
index cb5b781a93d0f1558aca6415a2651c81a70593b4..b2977f239773bf178cd2afa52772ef93dc108203 100644
|
||||
--- a/dist/redis/vector-store.js
|
||||
+++ b/dist/redis/vector-store.js
|
||||
@@ -1,8 +1,8 @@
|
||||
/**
|
||||
* Redis Vector Store with HNSW indexing via RediSearch.
|
||||
*/
|
||||
-import { DEFAULT_INDEX_CONFIG, KeyPatterns, Fields, hashPath, buildIndexCreateArgs, escapeTag, } from './schema.js';
|
||||
import { floatArrayToBuffer, bufferToFloatArray } from '../embeddings/provider.js';
|
||||
+import { DEFAULT_INDEX_CONFIG, KeyPatterns, Fields, hashPath, buildIndexCreateArgs, escapeTag, } from './schema.js';
|
||||
/**
|
||||
* Redis Vector Store for directory semantic search.
|
||||
*/
|
||||
@@ -29,10 +29,20 @@ export class RedisVectorStore {
|
||||
// Index exists, nothing to do
|
||||
}
|
||||
catch {
|
||||
- // Index doesn't exist, create it
|
||||
- const args = buildIndexCreateArgs(this.indexName, this.keyPrefix, this.config);
|
||||
- const [command, ...rest] = args;
|
||||
- await this.redis.call(command, ...rest);
|
||||
+ // Index doesn't exist (or FT.INFO failed), try to create it
|
||||
+ try {
|
||||
+ const args = buildIndexCreateArgs(this.indexName, this.keyPrefix, this.config);
|
||||
+ const [command, ...rest] = args;
|
||||
+ await this.redis.call(command, ...rest);
|
||||
+ }
|
||||
+ catch (createError) {
|
||||
+ // Ignore "Index already exists" error - this can happen in race conditions
|
||||
+ // or if FT.INFO failed for a different reason
|
||||
+ const errorMessage = createError instanceof Error ? createError.message : String(createError);
|
||||
+ if (!errorMessage.includes('Index already exists')) {
|
||||
+ throw createError;
|
||||
+ }
|
||||
+ }
|
||||
}
|
||||
}
|
||||
/**
|
||||
@@ -77,8 +87,9 @@ export class RedisVectorStore {
|
||||
* Store multiple chunks in a batch using pipeline.
|
||||
*/
|
||||
async storeBatch(chunks) {
|
||||
- if (chunks.length === 0)
|
||||
+ if (chunks.length === 0) {
|
||||
return;
|
||||
+ }
|
||||
const pipeline = this.redis.pipeline();
|
||||
for (const chunk of chunks) {
|
||||
const key = KeyPatterns.chunk(this.dirHash, chunk.id);
|
||||
@@ -105,7 +116,7 @@ export class RedisVectorStore {
|
||||
: '*';
|
||||
// KNN query
|
||||
const queryExpr = `${filterExpr}=>[KNN ${limit} @${Fields.embedding} $query_vec AS score]`;
|
||||
- const result = (await this.redis.call('FT.SEARCH', this.indexName, queryExpr, 'PARAMS', '2', 'query_vec', embeddingBuffer, 'SORTBY', 'score', 'RETURN', '5', Fields.chunkId, 'score', Fields.chunkText, Fields.metadata, Fields.position, 'DIALECT', '2', 'LIMIT', '0', limit.toString()));
|
||||
+ const result = (await this.redis.call('FT.SEARCH', this.indexName, queryExpr, 'PARAMS', '2', 'query_vec', embeddingBuffer, 'SORTBY', 'score', 'RETURN', '6', Fields.chunkId, Fields.docId, 'score', Fields.chunkText, Fields.metadata, Fields.position, 'DIALECT', '2', 'LIMIT', '0', limit.toString()));
|
||||
return this.parseSearchResults(result);
|
||||
}
|
||||
/**
|
||||
@@ -115,12 +126,14 @@ export class RedisVectorStore {
|
||||
const results = [];
|
||||
// Result format: [count, key1, [field, value, ...], key2, [field, value, ...], ...]
|
||||
const count = result[0];
|
||||
- if (count === 0)
|
||||
+ if (count === 0) {
|
||||
return results;
|
||||
+ }
|
||||
for (let i = 1; i < result.length; i += 2) {
|
||||
const fields = result[i + 1];
|
||||
- if (!Array.isArray(fields))
|
||||
+ if (!Array.isArray(fields)) {
|
||||
continue;
|
||||
+ }
|
||||
const fieldMap = {};
|
||||
for (let j = 0; j < fields.length; j += 2) {
|
||||
fieldMap[fields[j]] = fields[j + 1];
|
||||
@@ -144,7 +157,7 @@ export class RedisVectorStore {
|
||||
score,
|
||||
text: fieldMap[Fields.chunkText] || '',
|
||||
position: parseInt(fieldMap[Fields.position] || '0', 10),
|
||||
- docId: metadata['docId'] || '',
|
||||
+ docId: fieldMap[Fields.docId] || metadata['docId'] || '',
|
||||
metadata,
|
||||
});
|
||||
}
|
||||
@@ -156,8 +169,9 @@ export class RedisVectorStore {
|
||||
async getChunk(chunkId) {
|
||||
const key = KeyPatterns.chunk(this.dirHash, chunkId);
|
||||
const data = (await this.redis.hgetallBuffer(key));
|
||||
- if (!data || Object.keys(data).length === 0)
|
||||
+ if (!data || Object.keys(data).length === 0) {
|
||||
return null;
|
||||
+ }
|
||||
// Convert buffers to strings/values
|
||||
const getString = (field) => data[field]?.toString('utf-8') || '';
|
||||
const getBuffer = (field) => data[field] || null;
|
||||
@@ -253,8 +267,9 @@ export class RedisVectorStore {
|
||||
async getDirectoryMeta() {
|
||||
const key = KeyPatterns.directoryMeta(this.dirHash);
|
||||
const data = await this.redis.get(key);
|
||||
- if (!data)
|
||||
+ if (!data) {
|
||||
return null;
|
||||
+ }
|
||||
try {
|
||||
return JSON.parse(data);
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue