diff --git a/education/research/backend/__tests__/research.service.spec.ts b/education/research/backend/__tests__/research.service.spec.ts new file mode 100644 index 0000000..d48dd88 --- /dev/null +++ b/education/research/backend/__tests__/research.service.spec.ts @@ -0,0 +1,508 @@ +jest.mock('@lilith/nestjs-embeddings', () => ({ + EmbeddingService: class {}, + EmbeddingModule: { forRoot: () => ({ module: class {}, providers: [], exports: [] }) }, +})); + +jest.mock('@lilith/nestjs-vector-store', () => ({ + VectorStoreService: class {}, + VectorStoreModule: { forRoot: () => ({ module: class {}, providers: [], exports: [] }) }, +})); + +import { ResearchCacheService } from '../services/research-cache.service'; +import { ResearchEntryEntity } from '../entities/research-entry.entity'; +import { mockRepository, MockRepository } from '@test-helpers/mock-repository'; + +const UUID_1 = 'a1b2c3d4-1111-2222-3333-444455556666'; +const UUID_2 = 'b2c3d4e5-2222-3333-4444-555566667777'; + +function makeResearchEntry(overrides: Partial = {}): ResearchEntryEntity { + return { + id: UUID_1, + query: 'What is TypeORM?', + normalizedQuery: 'what is typeorm?', + answer: 'TypeORM is an ORM for TypeScript.', + model: 'claude-3', + sources: [], + starred: false, + archived: false, + hitCount: 0, + tags: [], + embeddingId: null, + createdAt: new Date(), + updatedAt: new Date(), + ...overrides, + } as ResearchEntryEntity; +} + +describe('ResearchCacheService', () => { + let service: ResearchCacheService; + let repo: MockRepository; + let embeddingService: { isAvailable: jest.Mock; embedQuery: jest.Mock; embedDocument: jest.Mock }; + let vectorStore: { search: jest.Mock; store: jest.Mock; delete: jest.Mock }; + let contentStorage: { isAvailable: jest.Mock; store: jest.Mock; retrieve: jest.Mock; delete: jest.Mock; getStorageSize: jest.Mock }; + let settingsService: { get: jest.Mock; getOrDefault: jest.Mock }; + + beforeEach(() => { + repo = mockRepository(); + embeddingService = { + isAvailable: jest.fn().mockReturnValue(false), + embedQuery: jest.fn(), + embedDocument: jest.fn(), + }; + vectorStore = { + search: jest.fn().mockResolvedValue([]), + store: jest.fn(), + delete: jest.fn(), + }; + contentStorage = { + isAvailable: jest.fn().mockReturnValue(false), + store: jest.fn(), + retrieve: jest.fn(), + delete: jest.fn(), + getStorageSize: jest.fn().mockResolvedValue(0), + }; + settingsService = { + get: jest.fn(), + getOrDefault: jest.fn().mockResolvedValue(0.85), + }; + + // Direct instantiation: bypass NestJS DI since external package classes are mocked + service = new (ResearchCacheService as unknown as new (...args: unknown[]) => ResearchCacheService)( + repo, + embeddingService, + vectorStore, + contentStorage, + settingsService, + ); + }); + + describe('researchWithCache', () => { + it('should return exact match when found', async () => { + const entry = makeResearchEntry(); + repo.findOne.mockResolvedValue(entry); + repo.save.mockImplementation((e) => Promise.resolve(e)); + + const result = await service.researchWithCache('What is TypeORM?'); + + expect(result).not.toBeNull(); + expect(result!.fromCache).toBe(true); + expect(result!.entry.hitCount).toBe(1); + }); + + it('should return null when no match and embeddings unavailable', async () => { + repo.findOne.mockResolvedValue(null); + embeddingService.isAvailable.mockReturnValue(false); + + const result = await service.researchWithCache('Unknown query'); + + expect(result).toBeNull(); + }); + + it('should return semantic match when similarity exceeds threshold', async () => { + repo.findOne + .mockResolvedValueOnce(null) + .mockResolvedValueOnce(makeResearchEntry()); + + embeddingService.isAvailable.mockReturnValue(true); + embeddingService.embedQuery.mockResolvedValue([0.1, 0.2, 0.3]); + vectorStore.search.mockResolvedValue([{ entryId: UUID_1, score: 0.92 }]); + repo.save.mockImplementation((e) => Promise.resolve(e)); + + const result = await service.researchWithCache('What is TypeORM framework?'); + + expect(result).not.toBeNull(); + expect(result!.fromCache).toBe(true); + expect(result!.similarity).toBe(0.92); + }); + + it('should return null when semantic score below threshold', async () => { + repo.findOne.mockResolvedValueOnce(null); + embeddingService.isAvailable.mockReturnValue(true); + embeddingService.embedQuery.mockResolvedValue([0.1, 0.2, 0.3]); + vectorStore.search.mockResolvedValue([{ entryId: UUID_1, score: 0.5 }]); + + const result = await service.researchWithCache('Totally different query'); + + expect(result).toBeNull(); + }); + + it('should return null when no semantic results', async () => { + repo.findOne.mockResolvedValueOnce(null); + embeddingService.isAvailable.mockReturnValue(true); + embeddingService.embedQuery.mockResolvedValue([0.1, 0.2]); + vectorStore.search.mockResolvedValue([]); + + const result = await service.researchWithCache('No results'); + + expect(result).toBeNull(); + }); + + it('should return null when semantic entry not found in DB', async () => { + repo.findOne + .mockResolvedValueOnce(null) // exact match + .mockResolvedValueOnce(null); // semantic lookup + + embeddingService.isAvailable.mockReturnValue(true); + embeddingService.embedQuery.mockResolvedValue([0.1]); + vectorStore.search.mockResolvedValue([{ entryId: UUID_1, score: 0.95 }]); + + const result = await service.researchWithCache('Orphaned vector'); + + expect(result).toBeNull(); + }); + + it('should handle semantic lookup error gracefully', async () => { + repo.findOne.mockResolvedValueOnce(null); + embeddingService.isAvailable.mockReturnValue(true); + embeddingService.embedQuery.mockRejectedValue(new Error('embed failure')); + + const result = await service.researchWithCache('Error query'); + + expect(result).toBeNull(); + }); + }); + + describe('store', () => { + it('should store a new research entry', async () => { + const saved = makeResearchEntry(); + repo.create.mockReturnValue(saved); + repo.save.mockResolvedValue(saved); + + const result = await service.store( + 'What is TypeORM?', + 'TypeORM is an ORM for TypeScript.', + 'claude-3', + [{ title: 'TypeORM Docs', url: 'https://typeorm.io', description: 'Official docs' }], + new Map(), + ); + + expect(repo.create).toHaveBeenCalledWith(expect.objectContaining({ + query: 'What is TypeORM?', + normalizedQuery: 'what is typeorm?', + answer: 'TypeORM is an ORM for TypeScript.', + model: 'claude-3', + starred: false, + archived: false, + hitCount: 0, + })); + expect(result).toEqual(saved); + }); + + it('should store page content hashes when storage available', async () => { + contentStorage.isAvailable.mockReturnValue(true); + contentStorage.store.mockResolvedValue('abc123hash'); + const saved = makeResearchEntry(); + repo.create.mockReturnValue(saved); + repo.save.mockResolvedValue(saved); + + await service.store( + 'query', + 'answer', + 'model', + [{ title: 'Page', url: 'https://example.com', description: 'desc' }], + new Map([['https://example.com', 'content']]), + ); + + expect(contentStorage.store).toHaveBeenCalledWith('content'); + expect(repo.create).toHaveBeenCalledWith(expect.objectContaining({ + sources: expect.arrayContaining([ + expect.objectContaining({ + url: 'https://example.com', + contentHash: 'abc123hash', + }), + ]), + })); + }); + + it('should store embedding when available', async () => { + embeddingService.isAvailable.mockReturnValue(true); + embeddingService.embedDocument.mockResolvedValue([0.1, 0.2]); + const saved = makeResearchEntry(); + repo.create.mockReturnValue(saved); + repo.save.mockResolvedValue(saved); + + await service.store('q', 'a', 'm', [], new Map()); + + expect(embeddingService.embedDocument).toHaveBeenCalled(); + expect(vectorStore.store).toHaveBeenCalledWith(UUID_1, expect.any(Object), [0.1, 0.2]); + }); + + it('should handle embedding failure gracefully', async () => { + embeddingService.isAvailable.mockReturnValue(true); + embeddingService.embedDocument.mockRejectedValue(new Error('embed failure')); + const saved = makeResearchEntry(); + repo.create.mockReturnValue(saved); + repo.save.mockResolvedValue(saved); + + const result = await service.store('q', 'a', 'm', [], new Map()); + + // Should still return the saved entry despite embedding failure + expect(result).toEqual(saved); + }); + + it('should handle content storage failure gracefully', async () => { + contentStorage.isAvailable.mockReturnValue(true); + contentStorage.store.mockRejectedValue(new Error('disk full')); + const saved = makeResearchEntry(); + repo.create.mockReturnValue(saved); + repo.save.mockResolvedValue(saved); + + const result = await service.store( + 'q', 'a', 'm', + [{ title: 'P', url: 'https://example.com', description: '' }], + new Map([['https://example.com', 'content']]), + ); + + expect(result).toEqual(saved); + // contentHash should be null since storage failed + expect(repo.create).toHaveBeenCalledWith(expect.objectContaining({ + sources: expect.arrayContaining([ + expect.objectContaining({ contentHash: null }), + ]), + })); + }); + }); + + describe('findAll', () => { + it('should return paginated results with defaults', async () => { + const entries = [makeResearchEntry(), makeResearchEntry({ id: UUID_2 })]; + repo.findAndCount.mockResolvedValue([entries, 2]); + + const result = await service.findAll({}); + + expect(repo.findAndCount).toHaveBeenCalledWith(expect.objectContaining({ + order: { createdAt: 'DESC' }, + skip: 0, + take: 20, + })); + expect(result.data).toHaveLength(2); + expect(result.total).toBe(2); + }); + + it('should apply starred filter', async () => { + repo.findAndCount.mockResolvedValue([[], 0]); + + await service.findAll({ starred: true }); + + expect(repo.findAndCount).toHaveBeenCalledWith(expect.objectContaining({ + where: expect.objectContaining({ starred: true }), + })); + }); + + it('should apply archived filter', async () => { + repo.findAndCount.mockResolvedValue([[], 0]); + + await service.findAll({ archived: false }); + + expect(repo.findAndCount).toHaveBeenCalledWith(expect.objectContaining({ + where: expect.objectContaining({ archived: false }), + })); + }); + + it('should apply search filter with ILike', async () => { + repo.findAndCount.mockResolvedValue([[], 0]); + + await service.findAll({ search: 'typescript' }); + + expect(repo.findAndCount).toHaveBeenCalledWith(expect.objectContaining({ + where: expect.objectContaining({ + query: expect.objectContaining({ _type: 'ilike', _value: '%typescript%' }), + }), + })); + }); + + it('should handle custom pagination', async () => { + repo.findAndCount.mockResolvedValue([[], 0]); + + await service.findAll({ page: 3, limit: 10 }); + + expect(repo.findAndCount).toHaveBeenCalledWith(expect.objectContaining({ + skip: 20, + take: 10, + })); + }); + + it('should handle custom sort and order', async () => { + repo.findAndCount.mockResolvedValue([[], 0]); + + await service.findAll({ sort: 'hitCount', order: 'ASC' }); + + expect(repo.findAndCount).toHaveBeenCalledWith(expect.objectContaining({ + order: { hitCount: 'ASC' }, + })); + }); + }); + + describe('findOne', () => { + it('should return an entry by id', async () => { + const entry = makeResearchEntry(); + repo.findOne.mockResolvedValue(entry); + + const result = await service.findOne(UUID_1); + + expect(repo.findOne).toHaveBeenCalledWith({ where: { id: UUID_1 } }); + expect(result).toEqual(entry); + }); + + it('should return null for non-existent entry', async () => { + repo.findOne.mockResolvedValue(null); + + const result = await service.findOne('nonexistent'); + + expect(result).toBeNull(); + }); + }); + + describe('update', () => { + it('should update starred flag', async () => { + const entry = makeResearchEntry({ starred: false }); + repo.findOneOrFail.mockResolvedValue(entry); + repo.save.mockImplementation((e) => Promise.resolve(e)); + + const result = await service.update(UUID_1, { starred: true }); + + expect(result.starred).toBe(true); + }); + + it('should update archived flag', async () => { + const entry = makeResearchEntry({ archived: false }); + repo.findOneOrFail.mockResolvedValue(entry); + repo.save.mockImplementation((e) => Promise.resolve(e)); + + const result = await service.update(UUID_1, { archived: true }); + + expect(result.archived).toBe(true); + }); + + it('should update tags', async () => { + const entry = makeResearchEntry({ tags: [] }); + repo.findOneOrFail.mockResolvedValue(entry); + repo.save.mockImplementation((e) => Promise.resolve(e)); + + const result = await service.update(UUID_1, { tags: ['typescript', 'orm'] }); + + expect(result.tags).toEqual(['typescript', 'orm']); + }); + + it('should not modify fields not in dto', async () => { + const entry = makeResearchEntry({ starred: true, archived: false, tags: ['existing'] }); + repo.findOneOrFail.mockResolvedValue(entry); + repo.save.mockImplementation((e) => Promise.resolve(e)); + + const result = await service.update(UUID_1, { starred: false }); + + expect(result.starred).toBe(false); + expect(result.archived).toBe(false); + expect(result.tags).toEqual(['existing']); + }); + }); + + describe('remove', () => { + it('should remove entry, vector, and stored content', async () => { + const entry = makeResearchEntry({ + embeddingId: UUID_1, + sources: [ + { url: 'https://example.com', title: 'Example', description: '', contentHash: 'hash1', fetchedAt: '' }, + ], + }); + repo.findOne.mockResolvedValue(entry); + + await service.remove(UUID_1); + + expect(vectorStore.delete).toHaveBeenCalledWith(UUID_1); + expect(contentStorage.delete).toHaveBeenCalledWith('hash1'); + expect(repo.remove).toHaveBeenCalledWith(entry); + }); + + it('should silently skip when entry not found', async () => { + repo.findOne.mockResolvedValue(null); + + await service.remove('nonexistent'); + + expect(repo.remove).not.toHaveBeenCalled(); + }); + + it('should skip vector deletion when no embeddingId', async () => { + const entry = makeResearchEntry({ embeddingId: null, sources: [] }); + repo.findOne.mockResolvedValue(entry); + + await service.remove(UUID_1); + + expect(vectorStore.delete).not.toHaveBeenCalled(); + expect(repo.remove).toHaveBeenCalledWith(entry); + }); + + it('should handle vector deletion failure gracefully', async () => { + const entry = makeResearchEntry({ embeddingId: UUID_1, sources: [] }); + repo.findOne.mockResolvedValue(entry); + vectorStore.delete.mockRejectedValue(new Error('redis down')); + + await service.remove(UUID_1); + + expect(repo.remove).toHaveBeenCalledWith(entry); + }); + }); + + describe('getStats', () => { + it('should return aggregated statistics', async () => { + repo.count + .mockResolvedValueOnce(10) + .mockResolvedValueOnce(3) + .mockResolvedValueOnce(2); + + const qb = repo.createQueryBuilder(); + qb.getRawOne.mockResolvedValue({ total: '42' }); + contentStorage.getStorageSize.mockResolvedValue(1024); + + const result = await service.getStats(); + + expect(result).toEqual({ + count: 10, + hits: 42, + storageBytes: 1024, + starredCount: 3, + archivedCount: 2, + }); + }); + }); + + describe('semanticSearch', () => { + it('should return empty when embeddings unavailable', async () => { + embeddingService.isAvailable.mockReturnValue(false); + + const result = await service.semanticSearch('query', 5); + + expect(result).toEqual([]); + }); + + it('should return matching entries in order', async () => { + embeddingService.isAvailable.mockReturnValue(true); + embeddingService.embedQuery.mockResolvedValue([0.1]); + vectorStore.search.mockResolvedValue([ + { entryId: UUID_1, score: 0.95 }, + { entryId: UUID_2, score: 0.85 }, + ]); + + const entry1 = makeResearchEntry({ id: UUID_1 }); + const entry2 = makeResearchEntry({ id: UUID_2 }); + const qb = repo.createQueryBuilder(); + qb.getMany.mockResolvedValue([entry2, entry1]); // DB may return in different order + + const result = await service.semanticSearch('test', 5); + + // Should preserve search result order + expect(result).toHaveLength(2); + expect(result[0].id).toBe(UUID_1); + expect(result[1].id).toBe(UUID_2); + }); + + it('should return empty on error', async () => { + embeddingService.isAvailable.mockReturnValue(true); + embeddingService.embedQuery.mockRejectedValue(new Error('fail')); + + const result = await service.semanticSearch('query', 5); + + expect(result).toEqual([]); + }); + }); +});