From daeaea6059dfeedde365bbc77f8afdffa188acde Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Wed, 18 Mar 2026 03:53:19 -0700 Subject: [PATCH 1/6] fix(knowledge): infer MIME type from file extension in create/upsert tools Both create_document and upsert_document forced .txt extension and text/plain MIME type regardless of the document name. Now the tools infer the correct MIME type from the file extension (html, md, csv, json, yaml, xml) and only default to .txt when no extension is given. Co-Authored-By: Claude Opus 4.6 --- apps/sim/tools/knowledge/create_document.ts | 13 +++++--- apps/sim/tools/knowledge/types.ts | 35 +++++++++++++++++++++ apps/sim/tools/knowledge/upsert_document.ts | 14 ++++----- 3 files changed, 50 insertions(+), 12 deletions(-) diff --git a/apps/sim/tools/knowledge/create_document.ts b/apps/sim/tools/knowledge/create_document.ts index 5b82d504a7c..e5bc616e622 100644 --- a/apps/sim/tools/knowledge/create_document.ts +++ b/apps/sim/tools/knowledge/create_document.ts @@ -1,4 +1,7 @@ -import type { KnowledgeCreateDocumentResponse } from '@/tools/knowledge/types' +import { + inferDocumentFileInfo, + type KnowledgeCreateDocumentResponse, +} from '@/tools/knowledge/types' import { enrichKBTagsSchema } from '@/tools/schema-enrichers' import { formatDocumentTagsForAPI, parseDocumentTags } from '@/tools/shared/tags' import type { ToolConfig } from '@/tools/types' @@ -75,18 +78,18 @@ export const knowledgeCreateDocumentTool: ToolConfig = { + html: 'text/html', + htm: 'text/html', + md: 'text/markdown', + csv: 'text/csv', + json: 'application/json', + yaml: 'application/x-yaml', + yml: 'application/x-yaml', + xml: 'application/xml', + txt: 'text/plain', +} as const + +/** + * Infers MIME type from a file extension. Returns `text/plain` for unknown extensions. + */ +export function getMimeTypeFromExtension(ext: string): string { + return EXTENSION_MIME_MAP[ext.toLowerCase()] ?? 'text/plain' +} + +/** + * Extracts extension from a filename and returns the normalized filename and MIME type. + * If no extension is present, appends `.txt` and uses `text/plain`. + */ +export function inferDocumentFileInfo(documentName: string): { + filename: string + mimeType: string +} { + const dotIndex = documentName.lastIndexOf('.') + if (dotIndex > 0) { + const ext = documentName.slice(dotIndex + 1).toLowerCase() + return { filename: documentName, mimeType: getMimeTypeFromExtension(ext) } + } + return { filename: `${documentName}.txt`, mimeType: 'text/plain' } +} + export interface KnowledgeSearchResult { documentId: string documentName: string diff --git a/apps/sim/tools/knowledge/upsert_document.ts b/apps/sim/tools/knowledge/upsert_document.ts index 0314350a0db..5d1155f78aa 100644 --- a/apps/sim/tools/knowledge/upsert_document.ts +++ b/apps/sim/tools/knowledge/upsert_document.ts @@ -1,6 +1,7 @@ -import type { - KnowledgeUpsertDocumentParams, - KnowledgeUpsertDocumentResponse, +import { + inferDocumentFileInfo, + type KnowledgeUpsertDocumentParams, + type KnowledgeUpsertDocumentResponse, } from '@/tools/knowledge/types' import { enrichKBTagsSchema } from '@/tools/schema-enrichers' import { formatDocumentTagsForAPI, parseDocumentTags } from '@/tools/shared/tags' @@ -94,18 +95,17 @@ export const knowledgeUpsertDocumentTool: ToolConfig< base64Content = btoa(binary) } - const dataUri = `data:text/plain;base64,${base64Content}` + const { filename, mimeType } = inferDocumentFileInfo(documentName) + const dataUri = `data:${mimeType};base64,${base64Content}` const parsedTags = parseDocumentTags(params.documentTags) const tagData = formatDocumentTagsForAPI(parsedTags) - const filename = documentName.endsWith('.txt') ? documentName : `${documentName}.txt` - const requestBody: Record = { filename, fileUrl: dataUri, fileSize: contentBytes, - mimeType: 'text/plain', + mimeType, ...tagData, processingOptions: { chunkSize: 1024, From 6c6ba817ef02f106078fec16b36663876193fa4d Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Wed, 18 Mar 2026 03:56:48 -0700 Subject: [PATCH 2/6] refactor(knowledge): reuse existing getMimeTypeFromExtension from uploads Replace duplicate EXTENSION_MIME_MAP and getMimeTypeFromExtension with the existing, more comprehensive version from lib/uploads/utils/file-utils. Co-Authored-By: Claude Opus 4.6 --- apps/sim/tools/knowledge/types.ts | 34 +++++++++++-------------------- 1 file changed, 12 insertions(+), 22 deletions(-) diff --git a/apps/sim/tools/knowledge/types.ts b/apps/sim/tools/knowledge/types.ts index 56f5a77b47b..5525122e74d 100644 --- a/apps/sim/tools/knowledge/types.ts +++ b/apps/sim/tools/knowledge/types.ts @@ -1,34 +1,24 @@ -const EXTENSION_MIME_MAP: Record = { - html: 'text/html', - htm: 'text/html', - md: 'text/markdown', - csv: 'text/csv', - json: 'application/json', - yaml: 'application/x-yaml', - yml: 'application/x-yaml', - xml: 'application/xml', - txt: 'text/plain', -} as const - -/** - * Infers MIME type from a file extension. Returns `text/plain` for unknown extensions. - */ -export function getMimeTypeFromExtension(ext: string): string { - return EXTENSION_MIME_MAP[ext.toLowerCase()] ?? 'text/plain' -} +import { + getFileExtension, + getMimeTypeFromExtension as getUploadMimeType, +} from '@/lib/uploads/utils/file-utils' /** * Extracts extension from a filename and returns the normalized filename and MIME type. * If no extension is present, appends `.txt` and uses `text/plain`. + * Falls back to `text/plain` for unknown extensions (knowledge docs are always text content). */ export function inferDocumentFileInfo(documentName: string): { filename: string mimeType: string } { - const dotIndex = documentName.lastIndexOf('.') - if (dotIndex > 0) { - const ext = documentName.slice(dotIndex + 1).toLowerCase() - return { filename: documentName, mimeType: getMimeTypeFromExtension(ext) } + const ext = getFileExtension(documentName) + if (ext) { + const mimeType = getUploadMimeType(ext) + return { + filename: documentName, + mimeType: mimeType === 'application/octet-stream' ? 'text/plain' : mimeType, + } } return { filename: `${documentName}.txt`, mimeType: 'text/plain' } } From 0e169360378c945c714c49706623daf03249af1b Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Wed, 18 Mar 2026 04:00:55 -0700 Subject: [PATCH 3/6] fix(knowledge): fix btoa stack overflow and duplicate encoding in create_document Same fixes as upsert_document: use loop-based String.fromCharCode instead of spread, consolidate duplicate TextEncoder calls, and check byte length instead of character length for 1MB limit. Co-Authored-By: Claude Opus 4.6 --- apps/sim/tools/knowledge/create_document.ts | 22 +++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/apps/sim/tools/knowledge/create_document.ts b/apps/sim/tools/knowledge/create_document.ts index e5bc616e622..e209a0e9bd2 100644 --- a/apps/sim/tools/knowledge/create_document.ts +++ b/apps/sim/tools/knowledge/create_document.ts @@ -66,17 +66,23 @@ export const knowledgeCreateDocumentTool: ToolConfig 1000000) { + const utf8Bytes = new TextEncoder().encode(textContent) + const contentBytes = utf8Bytes.length + + if (contentBytes > 1_000_000) { throw new Error('Document content exceeds maximum size of 1MB') } - const contentBytes = new TextEncoder().encode(textContent).length - - const utf8Bytes = new TextEncoder().encode(textContent) - const base64Content = - typeof Buffer !== 'undefined' - ? Buffer.from(textContent, 'utf8').toString('base64') - : btoa(String.fromCharCode(...utf8Bytes)) + let base64Content: string + if (typeof Buffer !== 'undefined') { + base64Content = Buffer.from(textContent, 'utf8').toString('base64') + } else { + let binary = '' + for (let i = 0; i < utf8Bytes.length; i++) { + binary += String.fromCharCode(utf8Bytes[i]) + } + base64Content = btoa(binary) + } const { filename, mimeType } = inferDocumentFileInfo(documentName) const dataUri = `data:${mimeType};base64,${base64Content}` From fa9f344f31419bac17aa36142ac0a3ca525d20f7 Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Wed, 18 Mar 2026 04:17:29 -0700 Subject: [PATCH 4/6] fix(knowledge): allowlist text-compatible MIME types in inferDocumentFileInfo Use an explicit allowlist instead of only checking for octet-stream, preventing binary MIME types (image/jpeg, audio/mpeg, etc.) from leaking through when a user names a document with a binary extension. Co-Authored-By: Claude Opus 4.6 --- apps/sim/tools/knowledge/types.ts | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/apps/sim/tools/knowledge/types.ts b/apps/sim/tools/knowledge/types.ts index 5525122e74d..a9ecd378594 100644 --- a/apps/sim/tools/knowledge/types.ts +++ b/apps/sim/tools/knowledge/types.ts @@ -3,10 +3,22 @@ import { getMimeTypeFromExtension as getUploadMimeType, } from '@/lib/uploads/utils/file-utils' +const TEXT_COMPATIBLE_MIME_TYPES = new Set([ + 'text/plain', + 'text/html', + 'text/markdown', + 'text/csv', + 'application/json', + 'application/xml', + 'application/x-yaml', + 'application/rtf', + 'application/pdf', +]) + /** * Extracts extension from a filename and returns the normalized filename and MIME type. * If no extension is present, appends `.txt` and uses `text/plain`. - * Falls back to `text/plain` for unknown extensions (knowledge docs are always text content). + * Falls back to `text/plain` for non-text MIME types (knowledge docs are always text content). */ export function inferDocumentFileInfo(documentName: string): { filename: string @@ -17,7 +29,7 @@ export function inferDocumentFileInfo(documentName: string): { const mimeType = getUploadMimeType(ext) return { filename: documentName, - mimeType: mimeType === 'application/octet-stream' ? 'text/plain' : mimeType, + mimeType: TEXT_COMPATIBLE_MIME_TYPES.has(mimeType) ? mimeType : 'text/plain', } } return { filename: `${documentName}.txt`, mimeType: 'text/plain' } From 3be8294561336360bf0a453c7725105cdcd93c0b Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Wed, 18 Mar 2026 04:30:50 -0700 Subject: [PATCH 5/6] fix(knowledge): remove pdf/rtf from allowlist, normalize unrecognized extensions - Remove application/pdf and application/rtf from TEXT_COMPATIBLE_MIME_TYPES since these tools pass plain text content, not binary - Normalize unrecognized extensions (e.g. report.v2) to .txt instead of preserving the original extension with text/plain MIME type Co-Authored-By: Claude Opus 4.6 --- apps/sim/tools/knowledge/types.ts | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/apps/sim/tools/knowledge/types.ts b/apps/sim/tools/knowledge/types.ts index a9ecd378594..ebe736cfef3 100644 --- a/apps/sim/tools/knowledge/types.ts +++ b/apps/sim/tools/knowledge/types.ts @@ -11,14 +11,12 @@ const TEXT_COMPATIBLE_MIME_TYPES = new Set([ 'application/json', 'application/xml', 'application/x-yaml', - 'application/rtf', - 'application/pdf', ]) /** * Extracts extension from a filename and returns the normalized filename and MIME type. - * If no extension is present, appends `.txt` and uses `text/plain`. - * Falls back to `text/plain` for non-text MIME types (knowledge docs are always text content). + * If the extension maps to a recognized text-compatible MIME type, it is preserved. + * Otherwise, the filename is normalized to `.txt` with `text/plain`. */ export function inferDocumentFileInfo(documentName: string): { filename: string @@ -27,12 +25,12 @@ export function inferDocumentFileInfo(documentName: string): { const ext = getFileExtension(documentName) if (ext) { const mimeType = getUploadMimeType(ext) - return { - filename: documentName, - mimeType: TEXT_COMPATIBLE_MIME_TYPES.has(mimeType) ? mimeType : 'text/plain', + if (TEXT_COMPATIBLE_MIME_TYPES.has(mimeType)) { + return { filename: documentName, mimeType } } } - return { filename: `${documentName}.txt`, mimeType: 'text/plain' } + const base = ext ? documentName.slice(0, documentName.lastIndexOf('.')) : documentName + return { filename: `${base}.txt`, mimeType: 'text/plain' } } export interface KnowledgeSearchResult { From e20ff52f3ad621da9caa5f8e5f02876acd2259ea Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Wed, 18 Mar 2026 04:53:51 -0700 Subject: [PATCH 6/6] fix(knowledge): handle dotfile names to avoid empty base in filename Dotfiles like .env would produce an empty base, resulting in '.txt'. Now falls back to the original name so .env becomes .env.txt. Co-Authored-By: Claude Opus 4.6 --- apps/sim/tools/knowledge/types.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/sim/tools/knowledge/types.ts b/apps/sim/tools/knowledge/types.ts index ebe736cfef3..3fa87ccaad7 100644 --- a/apps/sim/tools/knowledge/types.ts +++ b/apps/sim/tools/knowledge/types.ts @@ -30,7 +30,7 @@ export function inferDocumentFileInfo(documentName: string): { } } const base = ext ? documentName.slice(0, documentName.lastIndexOf('.')) : documentName - return { filename: `${base}.txt`, mimeType: 'text/plain' } + return { filename: `${base || documentName}.txt`, mimeType: 'text/plain' } } export interface KnowledgeSearchResult {