Skip to content
Merged
35 changes: 22 additions & 13 deletions apps/sim/tools/knowledge/create_document.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
import type { KnowledgeCreateDocumentResponse } from '@/tools/knowledge/types'
import {
inferDocumentFileInfo,
type KnowledgeCreateDocumentResponse,
} from '@/tools/knowledge/types'
import { enrichKBTagsSchema } from '@/tools/schema-enrichers'
import { formatDocumentTagsForAPI, parseDocumentTags } from '@/tools/shared/tags'
import type { ToolConfig } from '@/tools/types'
Expand Down Expand Up @@ -63,30 +66,36 @@ export const knowledgeCreateDocumentTool: ToolConfig<any, KnowledgeCreateDocumen
if (!textContent || textContent.length < 1) {
throw new Error('Document content cannot be empty')
}
if (textContent.length > 1000000) {
const utf8Bytes = new TextEncoder().encode(textContent)
const contentBytes = utf8Bytes.length

if (contentBytes > 1_000_000) {
throw new Error('Document content exceeds maximum size of 1MB')
}

const contentBytes = new TextEncoder().encode(textContent).length

const utf8Bytes = new TextEncoder().encode(textContent)
const base64Content =
typeof Buffer !== 'undefined'
? Buffer.from(textContent, 'utf8').toString('base64')
: btoa(String.fromCharCode(...utf8Bytes))
let base64Content: string
if (typeof Buffer !== 'undefined') {
base64Content = Buffer.from(textContent, 'utf8').toString('base64')
} else {
let binary = ''
for (let i = 0; i < utf8Bytes.length; i++) {
binary += String.fromCharCode(utf8Bytes[i])
}
base64Content = btoa(binary)
}

const dataUri = `data:text/plain;base64,${base64Content}`
const { filename, mimeType } = inferDocumentFileInfo(documentName)
const dataUri = `data:${mimeType};base64,${base64Content}`

// Parse document tags from various formats (object, array, JSON string)
const parsedTags = parseDocumentTags(params.documentTags)
const tagData = formatDocumentTagsForAPI(parsedTags)

const documents = [
{
filename: documentName.endsWith('.txt') ? documentName : `${documentName}.txt`,
filename,
fileUrl: dataUri,
fileSize: contentBytes,
mimeType: 'text/plain',
mimeType,
...tagData,
},
]
Expand Down
35 changes: 35 additions & 0 deletions apps/sim/tools/knowledge/types.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,38 @@
import {
getFileExtension,
getMimeTypeFromExtension as getUploadMimeType,
} from '@/lib/uploads/utils/file-utils'

const TEXT_COMPATIBLE_MIME_TYPES = new Set([
'text/plain',
'text/html',
'text/markdown',
'text/csv',
'application/json',
'application/xml',
'application/x-yaml',
])

/**
* Extracts extension from a filename and returns the normalized filename and MIME type.
* If the extension maps to a recognized text-compatible MIME type, it is preserved.
* Otherwise, the filename is normalized to `.txt` with `text/plain`.
*/
export function inferDocumentFileInfo(documentName: string): {
filename: string
mimeType: string
} {
const ext = getFileExtension(documentName)
if (ext) {
const mimeType = getUploadMimeType(ext)
if (TEXT_COMPATIBLE_MIME_TYPES.has(mimeType)) {
return { filename: documentName, mimeType }
}
}
const base = ext ? documentName.slice(0, documentName.lastIndexOf('.')) : documentName
return { filename: `${base || documentName}.txt`, mimeType: 'text/plain' }
}

export interface KnowledgeSearchResult {
documentId: string
documentName: string
Expand Down
14 changes: 7 additions & 7 deletions apps/sim/tools/knowledge/upsert_document.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import type {
KnowledgeUpsertDocumentParams,
KnowledgeUpsertDocumentResponse,
import {
inferDocumentFileInfo,
type KnowledgeUpsertDocumentParams,
type KnowledgeUpsertDocumentResponse,
} from '@/tools/knowledge/types'
import { enrichKBTagsSchema } from '@/tools/schema-enrichers'
import { formatDocumentTagsForAPI, parseDocumentTags } from '@/tools/shared/tags'
Expand Down Expand Up @@ -94,18 +95,17 @@ export const knowledgeUpsertDocumentTool: ToolConfig<
base64Content = btoa(binary)
}

const dataUri = `data:text/plain;base64,${base64Content}`
const { filename, mimeType } = inferDocumentFileInfo(documentName)
const dataUri = `data:${mimeType};base64,${base64Content}`

const parsedTags = parseDocumentTags(params.documentTags)
const tagData = formatDocumentTagsForAPI(parsedTags)

const filename = documentName.endsWith('.txt') ? documentName : `${documentName}.txt`

const requestBody: Record<string, unknown> = {
filename,
fileUrl: dataUri,
fileSize: contentBytes,
mimeType: 'text/plain',
mimeType,
...tagData,
processingOptions: {
chunkSize: 1024,
Expand Down
Loading