diff --git a/.gitignore b/.gitignore index c2658d7..e710a67 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ node_modules/ +package.json +package-lock.json diff --git a/src/natsbridge.js b/src/natsbridge.js index 7414e9a..ea5a32f 100644 --- a/src/natsbridge.js +++ b/src/natsbridge.js @@ -10,7 +10,7 @@ */ const nats = require('nats'); -const { v4: uuidv4 } = require('uuid'); +const crypto = require('crypto'); const fetch = require('node-fetch'); const arrow = require('apache-arrow'); @@ -116,41 +116,34 @@ function serializeArrowTable(data) { throw new Error('Table data must be a non-empty array of objects'); } - // Build schema from first row - const fields = Object.keys(data[0]).map(key => { - const value = data[0][key]; - let arrowType; - if (typeof value === 'number') { - arrowType = Number.isInteger(value) ? arrow.Int64 : arrow.Float64; - } else if (typeof value === 'boolean') { - arrowType = arrow.Boolean; - } else if (value instanceof Date) { - arrowType = arrow.Date; - } else { - arrowType = arrow.Utf8; - } - return new arrow.Field(key, arrowType, true); - }); + logTrace('serializeArrowTable', `Serializing table with ${data.length} rows`); - const schema = new arrow.Schema(fields); - const batches = []; - - // Create record batches - for (const row of data) { - const batch = arrow.recordBatch.fromObjects([row], schema); - batches.push(batch); + // Use arrow.tableFromArrays which handles the conversion properly + // Convert array of objects to a key-value format expected by tableFromArrays + const columns = {}; + for (const key of Object.keys(data[0])) { + columns[key] = data.map(row => row[key]); } - // Write to buffer using IPC format - const buffers = arrow.ipc.recordBatchesToMessage(batches, schema).buffers; - const combined = new Uint8Array(buffers.reduce((acc, b) => acc + b.byteLength, 0)); - let offset = 0; - for (const buf of buffers) { - combined.set(new Uint8Array(buf), offset); - offset += buf.byteLength; - } + logTrace('serializeArrowTable', `Columns: ${Object.keys(columns).join(', ')}`); - return Buffer.from(combined); + const table = arrow.tableFromArrays(columns); + + logTrace('serializeArrowTable', `Arrow table created with ${table.numRows} rows, ${table.numCols} cols`); + + // Convert to IPC format + const ipcBuffer = arrow.tableToIPC(table); + + logTrace('serializeArrowTable', `IPC buffer type: ${typeof ipcBuffer}, length: ${ipcBuffer.byteLength}`); + + const resultBuffer = Buffer.from(ipcBuffer); + logTrace('serializeArrowTable', `Result buffer: ${resultBuffer.length} bytes`); + + // Debug: Show first 20 bytes in hex + const hexPreview = resultBuffer.slice(0, 20).toString('hex'); + logTrace('serializeArrowTable', `First 20 bytes (hex): ${hexPreview}`); + + return resultBuffer; } /** @@ -163,21 +156,66 @@ function serializeArrowTable(data) { async function deserializeData(data, payloadType, correlationId) { const buffer = Buffer.isBuffer(data) ? data : Buffer.from(data); + logTrace(correlationId, `deserializeData: type=${payloadType}, bufferLength=${buffer.length}`); + + // Debug: Show first 20 bytes in hex for binary data + if (payloadType === 'table' || payloadType === 'image' || payloadType === 'binary') { + const hexPreview = buffer.slice(0, 20).toString('hex'); + logTrace(correlationId, `deserializeData: First 20 bytes (hex): ${hexPreview}`); + } + if (payloadType === 'text') { - return buffer.toString('utf8'); + const result = buffer.toString('utf8'); + logTrace(correlationId, `deserializeData: text result length=${result.length}`); + return result; } else if (payloadType === 'dictionary') { const jsonStr = buffer.toString('utf8'); - return JSON.parse(jsonStr); + const result = JSON.parse(jsonStr); + logTrace(correlationId, `deserializeData: dictionary keys=${Object.keys(result).join(', ')}`); + return result; } else if (payloadType === 'table') { - const table = arrow.tableFromRawBytes(buffer); - return table; + logTrace(correlationId, `deserializeData: Attempting Arrow table deserialization`); + + // Debug: Check available arrow methods + logTrace(correlationId, `deserializeData: arrow.tableFromRawBytes exists: ${typeof arrow.tableFromRawBytes}`); + logTrace(correlationId, `deserializeData: arrow.tableFromIPC exists: ${typeof arrow.tableFromIPC}`); + + try { + // Try tableFromRawBytes first (older API) + if (typeof arrow.tableFromRawBytes === 'function') { + logTrace(correlationId, `deserializeData: Using tableFromRawBytes`); + const table = arrow.tableFromRawBytes(buffer); + logTrace(correlationId, `deserializeData: Arrow table - rows=${table.numRows}, cols=${table.numCols}`); + return table; + } + } catch (e) { + logTrace(correlationId, `deserializeData: tableFromRawBytes failed: ${e.message}`); + } + + try { + // Try tableFromIPC (newer API) + if (typeof arrow.tableFromIPC === 'function') { + logTrace(correlationId, `deserializeData: Using tableFromIPC`); + const table = arrow.tableFromIPC(buffer); + logTrace(correlationId, `deserializeData: Arrow table from IPC - rows=${table.numRows}, cols=${table.numCols}`); + return table; + } + } catch (e) { + logTrace(correlationId, `deserializeData: tableFromIPC failed: ${e.message}`); + } + + throw new Error(`Unable to deserialize Arrow table: neither tableFromRawBytes nor tableFromIPC worked`); } else if (payloadType === 'image') { + logTrace(correlationId, `deserializeData: image buffer length=${buffer.length}`); return buffer; } else if (payloadType === 'audio') { + logTrace(correlationId, `deserializeData: audio buffer length=${buffer.length}`); return buffer; } else if (payloadType === 'video') { + logTrace(correlationId, `deserializeData: video buffer length=${buffer.length}`); return buffer; } else if (payloadType === 'binary') { + logTrace(correlationId, `deserializeData: binary buffer length=${buffer.length}`); return buffer; } else { throw new Error(`Unknown payload_type: ${payloadType}`); @@ -338,8 +376,8 @@ async function publishMessage(brokerUrlOrClient, subject, message, correlationId if (brokerUrlOrClient instanceof NATSClient) { conn = brokerUrlOrClient; - } else if (brokerUrlOrClient instanceof nats.Connection) { - // Create a wrapper for direct connection + } else if (brokerUrlOrClient && typeof brokerUrlOrClient.publish === 'function') { + // Create a wrapper for direct connection (duck-typing check for NATS connection) conn = { async publish(subj, msg) { await brokerUrlOrClient.publish(subj, msg); @@ -398,7 +436,7 @@ function buildEnvelope(subject, payloads, options) { */ function buildPayload(dataname, payloadType, payloadBytes, transport, data) { return { - id: uuidv4(), + id: crypto.randomUUID(), dataname, payload_type: payloadType, transport, @@ -424,7 +462,7 @@ function buildPayload(dataname, payloadType, payloadBytes, transport, data) { * @param {string} [options.fileserver_url=DEFAULT_FILESERVER_URL] - URL of the HTTP file server * @param {Function} [options.fileserver_upload_handler=plikOneshotUpload] - Function to handle fileserver uploads * @param {number} [options.size_threshold=DEFAULT_SIZE_THRESHOLD] - Threshold separating direct vs link transport - * @param {string} [options.correlation_id=uuidv4()] - Correlation ID for tracing + * @param {string} [options.correlation_id=crypto.randomUUID()] - Correlation ID for tracing * @param {string} [options.msg_purpose="chat"] - Purpose of the message * @param {string} [options.sender_name="NATSBridge"] - Name of the sender * @param {string} [options.receiver_name=""] - Name of the receiver (empty means broadcast) @@ -433,8 +471,8 @@ function buildPayload(dataname, payloadType, payloadBytes, transport, data) { * @param {string} [options.reply_to_msg_id=""] - Message ID this message is replying to * @param {boolean} [options.is_publish=true] - Whether to automatically publish the message * @param {NATSClient|NATS.Connection} [options.nats_connection=null] - Pre-existing NATS connection - * @param {string} [options.msg_id=uuidv4()] - Message ID - * @param {string} [options.sender_id=uuidv4()] - Sender ID + * @param {string} [options.msg_id=crypto.randomUUID()] - Message ID + * @param {string} [options.sender_id=crypto.randomUUID()] - Sender ID * @returns {Promise<[Object, string]>} Tuple of [env, env_json_str] * * @example @@ -469,7 +507,7 @@ async function smartsend(subject, data, options = {}) { fileserver_url = DEFAULT_FILESERVER_URL, fileserver_upload_handler = plikOneshotUpload, size_threshold = DEFAULT_SIZE_THRESHOLD, - correlation_id = uuidv4(), + correlation_id = crypto.randomUUID(), msg_purpose = 'chat', sender_name = 'NATSBridge', receiver_name = '', @@ -478,24 +516,40 @@ async function smartsend(subject, data, options = {}) { reply_to_msg_id = '', is_publish = true, nats_connection = null, - msg_id = uuidv4(), - sender_id = uuidv4() + msg_id = crypto.randomUUID(), + sender_id = crypto.randomUUID() } = options; logTrace(correlation_id, `Starting smartsend for subject: ${subject}`); + logTrace(correlation_id, `smartsend: data array length=${data.length}`); + + // Debug: Log input data structure + for (let i = 0; i < data.length; i++) { + const [dataname, payloadData, payloadType] = data[i]; + logTrace(correlation_id, `smartsend: payload[${i}] dataname=${dataname}, type=${payloadType}, data type=${typeof payloadData}, constructor=${payloadData?.constructor?.name}`); + } // Process payloads const payloads = []; for (const [dataname, payloadData, payloadType] of data) { + logTrace(correlation_id, `smartsend: Processing payload '${dataname}' type=${payloadType}`); + logTrace(correlation_id, `smartsend: payloadData type=${typeof payloadData}, constructor=${payloadData?.constructor?.name}`); + const payloadBytes = await serializeData(payloadData, payloadType); const payloadSize = payloadBytes.byteLength; logTrace(correlation_id, `Serialized payload '${dataname}' (type: ${payloadType}) size: ${payloadSize} bytes`); + // Debug: Show first 20 bytes of serialized data for table type + if (payloadType === 'table') { + const hexPreview = payloadBytes.slice(0, 20).toString('hex'); + logTrace(correlation_id, `Serialized table data first 20 bytes (hex): ${hexPreview}`); + } + if (payloadSize < size_threshold) { // Direct path const payloadB64 = bufferToBase64(payloadBytes); - logTrace(correlation_id, `Using direct transport for ${payloadSize} bytes`); + logTrace(correlation_id, `Using direct transport for ${payloadSize} bytes, base64 length=${payloadB64.length}`); const payload = buildPayload(dataname, payloadType, payloadBytes, 'direct', payloadB64); payloads.push(payload); @@ -579,32 +633,68 @@ async function smartreceive(msg, options = {}) { max_delay = 5000 } = options; + // Debug: Log message object structure + logTrace('smartreceive', `smartreceive: msg object keys: ${Object.keys(msg).join(', ')}`); + logTrace('smartreceive', `smartreceive: msg.data type: ${typeof msg.data}, constructor: ${msg.data?.constructor?.name}`); + logTrace('smartreceive', `smartreceive: msg.payload type: ${typeof msg.payload}, constructor: ${msg.payload?.constructor?.name}`); + // Parse the JSON envelope - const payload = typeof msg.payload === 'string' ? msg.payload : Buffer.from(msg.payload).toString('utf8'); - const envJsonObj = JSON.parse(payload); + // NATS.js v2.x uses msg.data instead of msg.payload + let payload; + if (msg.data !== undefined) { + payload = typeof msg.data === 'string' ? msg.data : Buffer.from(msg.data).toString('utf8'); + } else if (msg.payload !== undefined) { + payload = typeof msg.payload === 'string' ? msg.payload : Buffer.from(msg.payload).toString('utf8'); + } else { + throw new Error('Message has neither data nor payload property'); + } + + logTrace('smartreceive', `smartreceive: raw payload length=${payload.length}`); + + // Debug: Show first 200 chars of payload + const payloadPreview = payload.substring(0, 200); + logTrace('smartreceive', `smartreceive: payload preview: ${payloadPreview}`); + + let envJsonObj; + try { + envJsonObj = JSON.parse(payload); + } catch (e) { + logTrace('smartreceive', `smartreceive: JSON parse failed: ${e.message}`); + throw e; + } + logTrace(envJsonObj.correlation_id, 'Processing received message'); + logTrace(envJsonObj.correlation_id, `smartreceive: envelope has ${envJsonObj.payloads.length} payloads`); // Process all payloads in the envelope const payloadsList = []; const numPayloads = envJsonObj.payloads.length; + logTrace(envJsonObj.correlation_id, `smartreceive: Processing ${numPayloads} payloads`); + for (let i = 0; i < numPayloads; i++) { const payloadObj = envJsonObj.payloads[i]; const transport = payloadObj.transport; const dataname = payloadObj.dataname; + const payloadType = payloadObj.payload_type; + + logTrace(envJsonObj.correlation_id, `smartreceive: Processing payload ${i + 1}/${numPayloads}: dataname=${dataname}, type=${payloadType}, transport=${transport}`); if (transport === 'direct') { logTrace(envJsonObj.correlation_id, `Direct transport - decoding payload '${dataname}'`); // Extract base64 payload from the payload const payloadB64 = payloadObj.data; + logTrace(envJsonObj.correlation_id, `Direct transport: base64 length=${payloadB64?.length}`); // Decode Base64 payload const payloadBytes = Buffer.from(payloadB64, 'base64'); + logTrace(envJsonObj.correlation_id, `Direct transport: decoded bytes=${payloadBytes.length}`); // Deserialize based on type const dataType = payloadObj.payload_type; const data = await deserializeData(payloadBytes, dataType, envJsonObj.correlation_id); + logTrace(envJsonObj.correlation_id, `Direct transport: deserialized data type=${typeof data}, constructor=${data?.constructor?.name}`); payloadsList.push([dataname, data, dataType]); } else if (transport === 'link') { @@ -631,6 +721,7 @@ async function smartreceive(msg, options = {}) { } } + logTrace(envJsonObj.correlation_id, `smartreceive: Successfully processed all ${payloadsList.length} payloads`); envJsonObj.payloads = payloadsList; return envJsonObj; }