This commit is contained in:
2026-03-07 06:20:41 +07:00
parent f0b088f6f8
commit 96535147fb
2 changed files with 141 additions and 48 deletions

2
.gitignore vendored
View File

@@ -1 +1,3 @@
node_modules/ node_modules/
package.json
package-lock.json

View File

@@ -10,7 +10,7 @@
*/ */
const nats = require('nats'); const nats = require('nats');
const { v4: uuidv4 } = require('uuid'); const crypto = require('crypto');
const fetch = require('node-fetch'); const fetch = require('node-fetch');
const arrow = require('apache-arrow'); const arrow = require('apache-arrow');
@@ -116,41 +116,34 @@ function serializeArrowTable(data) {
throw new Error('Table data must be a non-empty array of objects'); throw new Error('Table data must be a non-empty array of objects');
} }
// Build schema from first row logTrace('serializeArrowTable', `Serializing table with ${data.length} rows`);
const fields = Object.keys(data[0]).map(key => {
const value = data[0][key];
let arrowType;
if (typeof value === 'number') {
arrowType = Number.isInteger(value) ? arrow.Int64 : arrow.Float64;
} else if (typeof value === 'boolean') {
arrowType = arrow.Boolean;
} else if (value instanceof Date) {
arrowType = arrow.Date;
} else {
arrowType = arrow.Utf8;
}
return new arrow.Field(key, arrowType, true);
});
const schema = new arrow.Schema(fields); // Use arrow.tableFromArrays which handles the conversion properly
const batches = []; // Convert array of objects to a key-value format expected by tableFromArrays
const columns = {};
// Create record batches for (const key of Object.keys(data[0])) {
for (const row of data) { columns[key] = data.map(row => row[key]);
const batch = arrow.recordBatch.fromObjects([row], schema);
batches.push(batch);
} }
// Write to buffer using IPC format logTrace('serializeArrowTable', `Columns: ${Object.keys(columns).join(', ')}`);
const buffers = arrow.ipc.recordBatchesToMessage(batches, schema).buffers;
const combined = new Uint8Array(buffers.reduce((acc, b) => acc + b.byteLength, 0));
let offset = 0;
for (const buf of buffers) {
combined.set(new Uint8Array(buf), offset);
offset += buf.byteLength;
}
return Buffer.from(combined); const table = arrow.tableFromArrays(columns);
logTrace('serializeArrowTable', `Arrow table created with ${table.numRows} rows, ${table.numCols} cols`);
// Convert to IPC format
const ipcBuffer = arrow.tableToIPC(table);
logTrace('serializeArrowTable', `IPC buffer type: ${typeof ipcBuffer}, length: ${ipcBuffer.byteLength}`);
const resultBuffer = Buffer.from(ipcBuffer);
logTrace('serializeArrowTable', `Result buffer: ${resultBuffer.length} bytes`);
// Debug: Show first 20 bytes in hex
const hexPreview = resultBuffer.slice(0, 20).toString('hex');
logTrace('serializeArrowTable', `First 20 bytes (hex): ${hexPreview}`);
return resultBuffer;
} }
/** /**
@@ -163,21 +156,66 @@ function serializeArrowTable(data) {
async function deserializeData(data, payloadType, correlationId) { async function deserializeData(data, payloadType, correlationId) {
const buffer = Buffer.isBuffer(data) ? data : Buffer.from(data); const buffer = Buffer.isBuffer(data) ? data : Buffer.from(data);
logTrace(correlationId, `deserializeData: type=${payloadType}, bufferLength=${buffer.length}`);
// Debug: Show first 20 bytes in hex for binary data
if (payloadType === 'table' || payloadType === 'image' || payloadType === 'binary') {
const hexPreview = buffer.slice(0, 20).toString('hex');
logTrace(correlationId, `deserializeData: First 20 bytes (hex): ${hexPreview}`);
}
if (payloadType === 'text') { if (payloadType === 'text') {
return buffer.toString('utf8'); const result = buffer.toString('utf8');
logTrace(correlationId, `deserializeData: text result length=${result.length}`);
return result;
} else if (payloadType === 'dictionary') { } else if (payloadType === 'dictionary') {
const jsonStr = buffer.toString('utf8'); const jsonStr = buffer.toString('utf8');
return JSON.parse(jsonStr); const result = JSON.parse(jsonStr);
logTrace(correlationId, `deserializeData: dictionary keys=${Object.keys(result).join(', ')}`);
return result;
} else if (payloadType === 'table') { } else if (payloadType === 'table') {
const table = arrow.tableFromRawBytes(buffer); logTrace(correlationId, `deserializeData: Attempting Arrow table deserialization`);
return table;
// Debug: Check available arrow methods
logTrace(correlationId, `deserializeData: arrow.tableFromRawBytes exists: ${typeof arrow.tableFromRawBytes}`);
logTrace(correlationId, `deserializeData: arrow.tableFromIPC exists: ${typeof arrow.tableFromIPC}`);
try {
// Try tableFromRawBytes first (older API)
if (typeof arrow.tableFromRawBytes === 'function') {
logTrace(correlationId, `deserializeData: Using tableFromRawBytes`);
const table = arrow.tableFromRawBytes(buffer);
logTrace(correlationId, `deserializeData: Arrow table - rows=${table.numRows}, cols=${table.numCols}`);
return table;
}
} catch (e) {
logTrace(correlationId, `deserializeData: tableFromRawBytes failed: ${e.message}`);
}
try {
// Try tableFromIPC (newer API)
if (typeof arrow.tableFromIPC === 'function') {
logTrace(correlationId, `deserializeData: Using tableFromIPC`);
const table = arrow.tableFromIPC(buffer);
logTrace(correlationId, `deserializeData: Arrow table from IPC - rows=${table.numRows}, cols=${table.numCols}`);
return table;
}
} catch (e) {
logTrace(correlationId, `deserializeData: tableFromIPC failed: ${e.message}`);
}
throw new Error(`Unable to deserialize Arrow table: neither tableFromRawBytes nor tableFromIPC worked`);
} else if (payloadType === 'image') { } else if (payloadType === 'image') {
logTrace(correlationId, `deserializeData: image buffer length=${buffer.length}`);
return buffer; return buffer;
} else if (payloadType === 'audio') { } else if (payloadType === 'audio') {
logTrace(correlationId, `deserializeData: audio buffer length=${buffer.length}`);
return buffer; return buffer;
} else if (payloadType === 'video') { } else if (payloadType === 'video') {
logTrace(correlationId, `deserializeData: video buffer length=${buffer.length}`);
return buffer; return buffer;
} else if (payloadType === 'binary') { } else if (payloadType === 'binary') {
logTrace(correlationId, `deserializeData: binary buffer length=${buffer.length}`);
return buffer; return buffer;
} else { } else {
throw new Error(`Unknown payload_type: ${payloadType}`); throw new Error(`Unknown payload_type: ${payloadType}`);
@@ -338,8 +376,8 @@ async function publishMessage(brokerUrlOrClient, subject, message, correlationId
if (brokerUrlOrClient instanceof NATSClient) { if (brokerUrlOrClient instanceof NATSClient) {
conn = brokerUrlOrClient; conn = brokerUrlOrClient;
} else if (brokerUrlOrClient instanceof nats.Connection) { } else if (brokerUrlOrClient && typeof brokerUrlOrClient.publish === 'function') {
// Create a wrapper for direct connection // Create a wrapper for direct connection (duck-typing check for NATS connection)
conn = { conn = {
async publish(subj, msg) { async publish(subj, msg) {
await brokerUrlOrClient.publish(subj, msg); await brokerUrlOrClient.publish(subj, msg);
@@ -398,7 +436,7 @@ function buildEnvelope(subject, payloads, options) {
*/ */
function buildPayload(dataname, payloadType, payloadBytes, transport, data) { function buildPayload(dataname, payloadType, payloadBytes, transport, data) {
return { return {
id: uuidv4(), id: crypto.randomUUID(),
dataname, dataname,
payload_type: payloadType, payload_type: payloadType,
transport, transport,
@@ -424,7 +462,7 @@ function buildPayload(dataname, payloadType, payloadBytes, transport, data) {
* @param {string} [options.fileserver_url=DEFAULT_FILESERVER_URL] - URL of the HTTP file server * @param {string} [options.fileserver_url=DEFAULT_FILESERVER_URL] - URL of the HTTP file server
* @param {Function} [options.fileserver_upload_handler=plikOneshotUpload] - Function to handle fileserver uploads * @param {Function} [options.fileserver_upload_handler=plikOneshotUpload] - Function to handle fileserver uploads
* @param {number} [options.size_threshold=DEFAULT_SIZE_THRESHOLD] - Threshold separating direct vs link transport * @param {number} [options.size_threshold=DEFAULT_SIZE_THRESHOLD] - Threshold separating direct vs link transport
* @param {string} [options.correlation_id=uuidv4()] - Correlation ID for tracing * @param {string} [options.correlation_id=crypto.randomUUID()] - Correlation ID for tracing
* @param {string} [options.msg_purpose="chat"] - Purpose of the message * @param {string} [options.msg_purpose="chat"] - Purpose of the message
* @param {string} [options.sender_name="NATSBridge"] - Name of the sender * @param {string} [options.sender_name="NATSBridge"] - Name of the sender
* @param {string} [options.receiver_name=""] - Name of the receiver (empty means broadcast) * @param {string} [options.receiver_name=""] - Name of the receiver (empty means broadcast)
@@ -433,8 +471,8 @@ function buildPayload(dataname, payloadType, payloadBytes, transport, data) {
* @param {string} [options.reply_to_msg_id=""] - Message ID this message is replying to * @param {string} [options.reply_to_msg_id=""] - Message ID this message is replying to
* @param {boolean} [options.is_publish=true] - Whether to automatically publish the message * @param {boolean} [options.is_publish=true] - Whether to automatically publish the message
* @param {NATSClient|NATS.Connection} [options.nats_connection=null] - Pre-existing NATS connection * @param {NATSClient|NATS.Connection} [options.nats_connection=null] - Pre-existing NATS connection
* @param {string} [options.msg_id=uuidv4()] - Message ID * @param {string} [options.msg_id=crypto.randomUUID()] - Message ID
* @param {string} [options.sender_id=uuidv4()] - Sender ID * @param {string} [options.sender_id=crypto.randomUUID()] - Sender ID
* @returns {Promise<[Object, string]>} Tuple of [env, env_json_str] * @returns {Promise<[Object, string]>} Tuple of [env, env_json_str]
* *
* @example * @example
@@ -469,7 +507,7 @@ async function smartsend(subject, data, options = {}) {
fileserver_url = DEFAULT_FILESERVER_URL, fileserver_url = DEFAULT_FILESERVER_URL,
fileserver_upload_handler = plikOneshotUpload, fileserver_upload_handler = plikOneshotUpload,
size_threshold = DEFAULT_SIZE_THRESHOLD, size_threshold = DEFAULT_SIZE_THRESHOLD,
correlation_id = uuidv4(), correlation_id = crypto.randomUUID(),
msg_purpose = 'chat', msg_purpose = 'chat',
sender_name = 'NATSBridge', sender_name = 'NATSBridge',
receiver_name = '', receiver_name = '',
@@ -478,24 +516,40 @@ async function smartsend(subject, data, options = {}) {
reply_to_msg_id = '', reply_to_msg_id = '',
is_publish = true, is_publish = true,
nats_connection = null, nats_connection = null,
msg_id = uuidv4(), msg_id = crypto.randomUUID(),
sender_id = uuidv4() sender_id = crypto.randomUUID()
} = options; } = options;
logTrace(correlation_id, `Starting smartsend for subject: ${subject}`); logTrace(correlation_id, `Starting smartsend for subject: ${subject}`);
logTrace(correlation_id, `smartsend: data array length=${data.length}`);
// Debug: Log input data structure
for (let i = 0; i < data.length; i++) {
const [dataname, payloadData, payloadType] = data[i];
logTrace(correlation_id, `smartsend: payload[${i}] dataname=${dataname}, type=${payloadType}, data type=${typeof payloadData}, constructor=${payloadData?.constructor?.name}`);
}
// Process payloads // Process payloads
const payloads = []; const payloads = [];
for (const [dataname, payloadData, payloadType] of data) { for (const [dataname, payloadData, payloadType] of data) {
logTrace(correlation_id, `smartsend: Processing payload '${dataname}' type=${payloadType}`);
logTrace(correlation_id, `smartsend: payloadData type=${typeof payloadData}, constructor=${payloadData?.constructor?.name}`);
const payloadBytes = await serializeData(payloadData, payloadType); const payloadBytes = await serializeData(payloadData, payloadType);
const payloadSize = payloadBytes.byteLength; const payloadSize = payloadBytes.byteLength;
logTrace(correlation_id, `Serialized payload '${dataname}' (type: ${payloadType}) size: ${payloadSize} bytes`); logTrace(correlation_id, `Serialized payload '${dataname}' (type: ${payloadType}) size: ${payloadSize} bytes`);
// Debug: Show first 20 bytes of serialized data for table type
if (payloadType === 'table') {
const hexPreview = payloadBytes.slice(0, 20).toString('hex');
logTrace(correlation_id, `Serialized table data first 20 bytes (hex): ${hexPreview}`);
}
if (payloadSize < size_threshold) { if (payloadSize < size_threshold) {
// Direct path // Direct path
const payloadB64 = bufferToBase64(payloadBytes); const payloadB64 = bufferToBase64(payloadBytes);
logTrace(correlation_id, `Using direct transport for ${payloadSize} bytes`); logTrace(correlation_id, `Using direct transport for ${payloadSize} bytes, base64 length=${payloadB64.length}`);
const payload = buildPayload(dataname, payloadType, payloadBytes, 'direct', payloadB64); const payload = buildPayload(dataname, payloadType, payloadBytes, 'direct', payloadB64);
payloads.push(payload); payloads.push(payload);
@@ -579,32 +633,68 @@ async function smartreceive(msg, options = {}) {
max_delay = 5000 max_delay = 5000
} = options; } = options;
// Debug: Log message object structure
logTrace('smartreceive', `smartreceive: msg object keys: ${Object.keys(msg).join(', ')}`);
logTrace('smartreceive', `smartreceive: msg.data type: ${typeof msg.data}, constructor: ${msg.data?.constructor?.name}`);
logTrace('smartreceive', `smartreceive: msg.payload type: ${typeof msg.payload}, constructor: ${msg.payload?.constructor?.name}`);
// Parse the JSON envelope // Parse the JSON envelope
const payload = typeof msg.payload === 'string' ? msg.payload : Buffer.from(msg.payload).toString('utf8'); // NATS.js v2.x uses msg.data instead of msg.payload
const envJsonObj = JSON.parse(payload); let payload;
if (msg.data !== undefined) {
payload = typeof msg.data === 'string' ? msg.data : Buffer.from(msg.data).toString('utf8');
} else if (msg.payload !== undefined) {
payload = typeof msg.payload === 'string' ? msg.payload : Buffer.from(msg.payload).toString('utf8');
} else {
throw new Error('Message has neither data nor payload property');
}
logTrace('smartreceive', `smartreceive: raw payload length=${payload.length}`);
// Debug: Show first 200 chars of payload
const payloadPreview = payload.substring(0, 200);
logTrace('smartreceive', `smartreceive: payload preview: ${payloadPreview}`);
let envJsonObj;
try {
envJsonObj = JSON.parse(payload);
} catch (e) {
logTrace('smartreceive', `smartreceive: JSON parse failed: ${e.message}`);
throw e;
}
logTrace(envJsonObj.correlation_id, 'Processing received message'); logTrace(envJsonObj.correlation_id, 'Processing received message');
logTrace(envJsonObj.correlation_id, `smartreceive: envelope has ${envJsonObj.payloads.length} payloads`);
// Process all payloads in the envelope // Process all payloads in the envelope
const payloadsList = []; const payloadsList = [];
const numPayloads = envJsonObj.payloads.length; const numPayloads = envJsonObj.payloads.length;
logTrace(envJsonObj.correlation_id, `smartreceive: Processing ${numPayloads} payloads`);
for (let i = 0; i < numPayloads; i++) { for (let i = 0; i < numPayloads; i++) {
const payloadObj = envJsonObj.payloads[i]; const payloadObj = envJsonObj.payloads[i];
const transport = payloadObj.transport; const transport = payloadObj.transport;
const dataname = payloadObj.dataname; const dataname = payloadObj.dataname;
const payloadType = payloadObj.payload_type;
logTrace(envJsonObj.correlation_id, `smartreceive: Processing payload ${i + 1}/${numPayloads}: dataname=${dataname}, type=${payloadType}, transport=${transport}`);
if (transport === 'direct') { if (transport === 'direct') {
logTrace(envJsonObj.correlation_id, `Direct transport - decoding payload '${dataname}'`); logTrace(envJsonObj.correlation_id, `Direct transport - decoding payload '${dataname}'`);
// Extract base64 payload from the payload // Extract base64 payload from the payload
const payloadB64 = payloadObj.data; const payloadB64 = payloadObj.data;
logTrace(envJsonObj.correlation_id, `Direct transport: base64 length=${payloadB64?.length}`);
// Decode Base64 payload // Decode Base64 payload
const payloadBytes = Buffer.from(payloadB64, 'base64'); const payloadBytes = Buffer.from(payloadB64, 'base64');
logTrace(envJsonObj.correlation_id, `Direct transport: decoded bytes=${payloadBytes.length}`);
// Deserialize based on type // Deserialize based on type
const dataType = payloadObj.payload_type; const dataType = payloadObj.payload_type;
const data = await deserializeData(payloadBytes, dataType, envJsonObj.correlation_id); const data = await deserializeData(payloadBytes, dataType, envJsonObj.correlation_id);
logTrace(envJsonObj.correlation_id, `Direct transport: deserialized data type=${typeof data}, constructor=${data?.constructor?.name}`);
payloadsList.push([dataname, data, dataType]); payloadsList.push([dataname, data, dataType]);
} else if (transport === 'link') { } else if (transport === 'link') {
@@ -631,6 +721,7 @@ async function smartreceive(msg, options = {}) {
} }
} }
logTrace(envJsonObj.correlation_id, `smartreceive: Successfully processed all ${payloadsList.length} payloads`);
envJsonObj.payloads = payloadsList; envJsonObj.payloads = payloadsList;
return envJsonObj; return envJsonObj;
} }