adding jsontable
This commit is contained in:
@@ -55,7 +55,8 @@ All three platforms expose the same high-level API:
|
|||||||
|------|-------|------------|-------------------|
|
|------|-------|------------|-------------------|
|
||||||
| `text` | `String` | `string` | `str` |
|
| `text` | `String` | `string` | `str` |
|
||||||
| `dictionary` | `Dict`, `NamedTuple` | `Object`, `Array` | `dict`, `list` |
|
| `dictionary` | `Dict`, `NamedTuple` | `Object`, `Array` | `dict`, `list` |
|
||||||
| `table` | `DataFrame`, `Arrow.Table` | `Array<Object>` (input) → `Buffer` (Arrow IPC) | `pandas.DataFrame`, `bytes` (Arrow IPC) |
|
| `arrowtable` | `DataFrame`, `Arrow.Table` | `Array<Object>` (input) → `Buffer` (Arrow IPC) | `pandas.DataFrame`, `bytes` (Arrow IPC) |
|
||||||
|
| `jsontable` | `Vector{NamedTuple}`, `Vector{Dict}` | `Array<Object>` | `list[dict]`, `list` |
|
||||||
| `image` | `Vector{UInt8}` | `Uint8Array`, `Buffer` | `bytes`, `bytearray` |
|
| `image` | `Vector{UInt8}` | `Uint8Array`, `Buffer` | `bytes`, `bytearray` |
|
||||||
| `audio` | `Vector{UInt8}` | `Uint8Array`, `Buffer` | `bytes`, `bytearray` |
|
| `audio` | `Vector{UInt8}` | `Uint8Array`, `Buffer` | `bytes`, `bytearray` |
|
||||||
| `video` | `Vector{UInt8}` | `Uint8Array`, `Buffer` | `bytes`, `bytearray` |
|
| `video` | `Vector{UInt8}` | `Uint8Array`, `Buffer` | `bytes`, `bytearray` |
|
||||||
@@ -236,13 +237,23 @@ flowchart TB
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "uuid4",
|
"id": "uuid4",
|
||||||
"dataname": "large_table",
|
"dataname": "large_arrow_table",
|
||||||
"payload_type": "table",
|
"payload_type": "arrowtable",
|
||||||
"transport": "link",
|
"transport": "link",
|
||||||
"encoding": "none",
|
"encoding": "arrow-ipc",
|
||||||
"size": 524288,
|
"size": 524288,
|
||||||
"data": "http://localhost:8080/file/UPLOAD_ID/FILE_ID/data.arrow",
|
"data": "http://localhost:8080/file/UPLOAD_ID/FILE_ID/data.arrow",
|
||||||
"metadata": {}
|
"metadata": {}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "uuid4",
|
||||||
|
"dataname": "json_table",
|
||||||
|
"payload_type": "jsontable",
|
||||||
|
"transport": "direct",
|
||||||
|
"encoding": "json",
|
||||||
|
"size": 1024,
|
||||||
|
"data": "[{\"id\": 1, \"name\": \"Alice\"}, {\"id\": 2, \"name\": \"Bob\"}]",
|
||||||
|
"metadata": {}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@@ -255,11 +266,11 @@ flowchart TB
|
|||||||
{
|
{
|
||||||
"id": "uuid4",
|
"id": "uuid4",
|
||||||
"dataname": "login_image",
|
"dataname": "login_image",
|
||||||
"payload_type": "image | dictionary | table | text | audio | video | binary",
|
"payload_type": "image | dictionary | arrowtable | jsontable | text | audio | video | binary",
|
||||||
"transport": "direct | link",
|
"transport": "direct | link",
|
||||||
"encoding": "none | json | base64 | arrow-ipc",
|
"encoding": "none | json | base64 | arrow-ipc",
|
||||||
"size": 15433,
|
"size": 15433,
|
||||||
"data": "base64-encoded-string | http-url",
|
"data": "base64-encoded-string | http-url | json-string",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"checksum": "sha256_hash"
|
"checksum": "sha256_hash"
|
||||||
}
|
}
|
||||||
@@ -291,7 +302,7 @@ flowchart TB
|
|||||||
│ │ │ │
|
│ │ │ │
|
||||||
│ • Serialize │ │ • Serialize │
|
│ • Serialize │ │ • Serialize │
|
||||||
│ to buffer │ │ to buffer │
|
│ to buffer │ │ to buffer │
|
||||||
│ • Base64 │ │ • Upload to │
|
│ • Base64/JSON│ │ • Upload to │
|
||||||
│ encode │ │ HTTP Server│
|
│ encode │ │ HTTP Server│
|
||||||
│ • Publish to │ │ • Publish to │
|
│ • Publish to │ │ • Publish to │
|
||||||
│ NATS │ │ NATS with │
|
│ NATS │ │ NATS with │
|
||||||
@@ -422,6 +433,41 @@ function smartreceive(
|
|||||||
)::JSON.Object{String, Any}
|
)::JSON.Object{String, Any}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
#### Serialization Logic for Tables
|
||||||
|
|
||||||
|
```julia
|
||||||
|
# Serialize table data based on payload_type
|
||||||
|
function _serialize_table_data(data::Any, payload_type::String)::Vector{UInt8}
|
||||||
|
if payload_type == "arrowtable"
|
||||||
|
# Serialize to Apache Arrow IPC format
|
||||||
|
buffer = IOBuffer()
|
||||||
|
Arrow.write(buffer, data)
|
||||||
|
return take!(buffer)
|
||||||
|
elseif payload_type == "jsontable"
|
||||||
|
# Serialize to JSON format
|
||||||
|
json_str = JSON.json(data)
|
||||||
|
return Vector{UInt8}(json_str)
|
||||||
|
else
|
||||||
|
throw(ArgumentError("Unknown payload_type: $payload_type"))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Deserialize table data based on payload_type
|
||||||
|
function _deserialize_table_data(data::Vector{UInt8}, payload_type::String)::Any
|
||||||
|
if payload_type == "arrowtable"
|
||||||
|
# Deserialize from Apache Arrow IPC format
|
||||||
|
buffer = Buffer(data)
|
||||||
|
return Arrow.read(buffer)
|
||||||
|
elseif payload_type == "jsontable"
|
||||||
|
# Deserialize from JSON format
|
||||||
|
json_str = String(data)
|
||||||
|
return JSON.parse(json_str)
|
||||||
|
else
|
||||||
|
throw(ArgumentError("Unknown payload_type: $payload_type"))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### JavaScript Implementation
|
### JavaScript Implementation
|
||||||
@@ -541,7 +587,7 @@ class NATSClient {
|
|||||||
| Package | Purpose |
|
| Package | Purpose |
|
||||||
|---------|---------|
|
|---------|---------|
|
||||||
| `nats` | Core NATS functionality (nats.js) |
|
| `nats` | Core NATS functionality (nats.js) |
|
||||||
| `uuid` | UUID generation |
|
| `crypto` (built-in) | UUID generation (Node.js) |
|
||||||
| `node-fetch` or `axios` | HTTP client for file server |
|
| `node-fetch` or `axios` | HTTP client for file server |
|
||||||
| `apache-arrow` | Arrow IPC serialization |
|
| `apache-arrow` | Arrow IPC serialization |
|
||||||
|
|
||||||
@@ -550,7 +596,7 @@ class NATSClient {
|
|||||||
| Package | Purpose |
|
| Package | Purpose |
|
||||||
|---------|---------|
|
|---------|---------|
|
||||||
| `nats` | Browser-compatible NATS client |
|
| `nats` | Browser-compatible NATS client |
|
||||||
| `uuid` | UUID generation |
|
| `crypto` (built-in) | UUID generation (browser) |
|
||||||
| `fetch` (native) | HTTP client for file server |
|
| `fetch` (native) | HTTP client for file server |
|
||||||
| `apache-arrow` | Arrow IPC serialization |
|
| `apache-arrow` | Arrow IPC serialization |
|
||||||
|
|
||||||
@@ -615,6 +661,43 @@ async function fetchWithBackoff(url, max_retries, base_delay, max_delay, correla
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
#### Serialization Logic for Tables
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
// Serialize table data based on payload_type
|
||||||
|
async function serializeTableData(data, payload_type) {
|
||||||
|
if (payload_type === "arrowtable") {
|
||||||
|
// Serialize to Apache Arrow IPC format
|
||||||
|
const schema = new arrow.Schema([...]); // Define schema
|
||||||
|
const arr = arrow.tableToArrowTable(data, schema);
|
||||||
|
const buffer = arrow.RecordBatch.from(arr).toBuffer();
|
||||||
|
return new Uint8Array(buffer);
|
||||||
|
} else if (payload_type === "jsontable") {
|
||||||
|
// Serialize to JSON format
|
||||||
|
const jsonStr = JSON.stringify(data);
|
||||||
|
return new TextEncoder().encode(jsonStr);
|
||||||
|
} else {
|
||||||
|
throw new Error(`Unknown payload_type: ${payload_type}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Deserialize table data based on payload_type
|
||||||
|
async function deserializeTableData(data, payload_type) {
|
||||||
|
if (payload_type === "arrowtable") {
|
||||||
|
// Deserialize from Apache Arrow IPC format
|
||||||
|
const buffer = arrow.arrayBufferToBuffer(data.buffer);
|
||||||
|
const batch = arrow.RecordBatch.deserialize(buffer);
|
||||||
|
return arrow.tableFromBatch(batch);
|
||||||
|
} else if (payload_type === "jsontable") {
|
||||||
|
// Deserialize from JSON format
|
||||||
|
const jsonStr = new TextDecoder().decode(data);
|
||||||
|
return JSON.parse(jsonStr);
|
||||||
|
} else {
|
||||||
|
throw new Error(`Unknown payload_type: ${payload_type}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### Python/MicroPython Implementation
|
### Python/MicroPython Implementation
|
||||||
@@ -906,6 +989,56 @@ async def fetch_with_backoff(
|
|||||||
pass
|
pass
|
||||||
```
|
```
|
||||||
|
|
||||||
|
#### Serialization Logic for Tables
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Serialize table data based on payload_type
|
||||||
|
def serialize_table_data(data: Any, payload_type: str) -> bytes:
|
||||||
|
if payload_type == "arrowtable":
|
||||||
|
# Serialize to Apache Arrow IPC format
|
||||||
|
import pyarrow as pa
|
||||||
|
import pyarrow.feather as feather
|
||||||
|
import io
|
||||||
|
|
||||||
|
if isinstance(data, pd.DataFrame):
|
||||||
|
table = pa.Table.from_pandas(data)
|
||||||
|
buffer = io.BytesIO()
|
||||||
|
feather.write_feather(table, buffer)
|
||||||
|
return buffer.getvalue()
|
||||||
|
else:
|
||||||
|
raise TypeError("Expected pandas DataFrame for arrowtable")
|
||||||
|
|
||||||
|
elif payload_type == "jsontable":
|
||||||
|
# Serialize to JSON format
|
||||||
|
if isinstance(data, list) and all(isinstance(row, dict) for row in data):
|
||||||
|
return json.dumps(data).encode('utf-8')
|
||||||
|
else:
|
||||||
|
raise TypeError("Expected list of dicts for jsontable")
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unknown payload_type: {payload_type}")
|
||||||
|
|
||||||
|
# Deserialize table data based on payload_type
|
||||||
|
def deserialize_table_data(data: bytes, payload_type: str) -> Any:
|
||||||
|
if payload_type == "arrowtable":
|
||||||
|
# Deserialize from Apache Arrow IPC format
|
||||||
|
import pyarrow as pa
|
||||||
|
import pyarrow.feather as feather
|
||||||
|
import io
|
||||||
|
|
||||||
|
buffer = io.BytesIO(data)
|
||||||
|
table = feather.read_table(buffer)
|
||||||
|
return table.to_pandas()
|
||||||
|
|
||||||
|
elif payload_type == "jsontable":
|
||||||
|
# Deserialize from JSON format
|
||||||
|
json_str = data.decode('utf-8')
|
||||||
|
return json.loads(json_str)
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unknown payload_type: {payload_type}")
|
||||||
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Platform Comparison Matrix
|
## Platform Comparison Matrix
|
||||||
@@ -917,6 +1050,9 @@ async def fetch_with_backoff(
|
|||||||
| **Type Safety** | ✅ Strong | ⚠️ (TypeScript) | ✅ (Type hints) | ❌ |
|
| **Type Safety** | ✅ Strong | ⚠️ (TypeScript) | ✅ (Type hints) | ❌ |
|
||||||
| **Memory Management** | ✅ GC | ✅ GC | ✅ GC | ⚠️ (Manual) |
|
| **Memory Management** | ✅ GC | ✅ GC | ✅ GC | ⚠️ (Manual) |
|
||||||
| **Arrow IPC** | ✅ Native | ✅ (arrow package) | ✅ (pyarrow) | ❌ |
|
| **Arrow IPC** | ✅ Native | ✅ (arrow package) | ✅ (pyarrow) | ❌ |
|
||||||
|
| **JSON Serialization** | ✅ (JSON.jl) | ✅ (native) | ✅ (json) | ✅ (json) |
|
||||||
|
| **arrowtable Support** | ✅ | ✅ | ✅ | ❌ |
|
||||||
|
| **jsontable Support** | ✅ | ✅ | ✅ | ✅ |
|
||||||
| **Direct Transport** | ✅ | ✅ | ✅ | ✅ |
|
| **Direct Transport** | ✅ | ✅ | ✅ | ✅ |
|
||||||
| **Link Transport** | ✅ | ✅ | ✅ | ⚠️ (Limited) |
|
| **Link Transport** | ✅ | ✅ | ✅ | ⚠️ (Limited) |
|
||||||
| **Handler Functions** | ✅ | ✅ | ✅ | ✅ |
|
| **Handler Functions** | ✅ | ✅ | ✅ | ✅ |
|
||||||
@@ -948,7 +1084,11 @@ function _serialize_data(data::Dict, payload_type::String)
|
|||||||
end
|
end
|
||||||
|
|
||||||
function _serialize_data(data::DataFrame, payload_type::String)
|
function _serialize_data(data::DataFrame, payload_type::String)
|
||||||
# Table handling
|
# Table handling - arrowtable
|
||||||
|
end
|
||||||
|
|
||||||
|
function _serialize_data(data::Vector{NamedTuple}, payload_type::String)
|
||||||
|
# Table handling - jsontable
|
||||||
end
|
end
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -979,7 +1119,7 @@ function generateUUID() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async function serializeData(data, payload_type) {
|
async function serializeData(data, payload_type) {
|
||||||
// Serialization logic
|
// Serialization logic for arrowtable and jsontable
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -1028,9 +1168,9 @@ def smartreceive(msg, **kwargs):
|
|||||||
|
|
||||||
| Platform | Code |
|
| Platform | Code |
|
||||||
|----------|------|
|
|----------|------|
|
||||||
| **Julia** | ```julia<br>df = DataFrame(id=1:1000000, value=rand(1000000))<br>env, env_json_str = smartsend("analysis", [("table", df, "table")])``` |
|
| **Julia** | ```julia<br>df = DataFrame(id=1:1000000, value=rand(1000000))<br>env, env_json_str = smartsend("analysis", [("table_data", df, "arrowtable")])``` |
|
||||||
| **JavaScript** | ```javascript<br>const df = [{ id: 1, value: 0.5 }, ...];<br>[env, env_json_str] = await smartsend("analysis", [["table", df, "table"]]);``` |
|
| **JavaScript** | ```javascript<br>const df = [{ id: 1, value: 0.5 }, ...];<br>[env, env_json_str] = await smartsend("analysis", [["table_data", df, "arrowtable"]]);``` |
|
||||||
| **Python** | ```python<br>import pandas as pd<br>df = pd.DataFrame({"id": range(1000000), "value": np.random.rand(1000000)})<br>env, env_json_str = await smartsend("analysis", [("table", df, "table")])``` |
|
| **Python** | ```python<br>import pandas as pd<br>df = pd.DataFrame({"id": range(1000000), "value": np.random.rand(1000000)})<br>env, env_json_str = await smartsend("analysis", [("table_data", df, "arrowtable")])``` |
|
||||||
|
|
||||||
### Scenario 3: Chat System (Multi-Payload)
|
### Scenario 3: Chat System (Multi-Payload)
|
||||||
|
|
||||||
@@ -1040,6 +1180,29 @@ def smartreceive(msg, **kwargs):
|
|||||||
| **JavaScript** | ```javascript<br>const chat = [["text", "Hello!", "text"], ["image", imgBuffer, "image"]];<br>[env, env_json_str] = await smartsend("chat", chat);``` |
|
| **JavaScript** | ```javascript<br>const chat = [["text", "Hello!", "text"], ["image", imgBuffer, "image"]];<br>[env, env_json_str] = await smartsend("chat", chat);``` |
|
||||||
| **Python** | ```python<br>chat = [("text", "Hello!", "text"), ("image", img_bytes, "image")]<br>env, env_json_str = await smartsend("chat", chat)``` |
|
| **Python** | ```python<br>chat = [("text", "Hello!", "text"), ("image", img_bytes, "image")]<br>env, env_json_str = await smartsend("chat", chat)``` |
|
||||||
|
|
||||||
|
### Scenario 4: JSON Table Transfer (Cross-Platform)
|
||||||
|
|
||||||
|
| Platform | Code |
|
||||||
|
|----------|------|
|
||||||
|
| **Julia** | ```julia<br>rows = [Dict("id" => 1, "name" => "Alice"), Dict("id" => 2, "name" => "Bob")]<br>env, env_json_str = smartsend("data", [("users", rows, "jsontable")])``` |
|
||||||
|
| **JavaScript** | ```javascript<br>const users = [{ id: 1, name: "Alice" }, { id: 2, name: "Bob" }];<br>[env, env_json_str] = await smartsend("data", [["users", users, "jsontable"]]);``` |
|
||||||
|
| **Python** | ```python<br>users = [{"id": 1, "name": "Alice"}, {"id": 2, "name": "Bob"}]<br>env, env_json_str = await smartsend("data", [("users", users, "jsontable")])``` |
|
||||||
|
|
||||||
|
### Scenario 5: Smart Transport Selection
|
||||||
|
|
||||||
|
The `smartsend` function automatically selects the transport method based on payload size:
|
||||||
|
|
||||||
|
- **Direct Transport (< 1MB)**: Payload is serialized and embedded directly in the NATS message
|
||||||
|
- `arrowtable`: Serialized to Arrow IPC, base64 encoded
|
||||||
|
- `jsontable`: Serialized to JSON, base64 encoded
|
||||||
|
- `dictionary`: Serialized to JSON, base64 encoded
|
||||||
|
- `text`: Serialized to UTF-8, base64 encoded
|
||||||
|
- `image/audio/video/binary`: Base64 encoded
|
||||||
|
|
||||||
|
- **Link Transport (>= 1MB)**: Payload is uploaded to HTTP file server, URL embedded in message
|
||||||
|
- All types supported
|
||||||
|
- Receiver downloads from URL and deserializes
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Performance Considerations (Cross-Platform)
|
## Performance Considerations (Cross-Platform)
|
||||||
@@ -1080,6 +1243,13 @@ All platforms use correlation IDs for distributed tracing:
|
|||||||
[timestamp] [Correlation: abc123] Message published to subject
|
[timestamp] [Correlation: abc123] Message published to subject
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Serialization Performance Comparison
|
||||||
|
|
||||||
|
| Format | Use Case | Pros | Cons |
|
||||||
|
|--------|----------|------|------|
|
||||||
|
| `arrowtable` | Large tabular data | Fast, zero-copy, schema-preserving | Binary format, requires Arrow library |
|
||||||
|
| `jsontable` | Small/medium tabular data | Human-readable, universal support | Slower, larger size, no schema |
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Testing Strategy (Cross-Platform)
|
## Testing Strategy (Cross-Platform)
|
||||||
@@ -1092,12 +1262,15 @@ All platforms use correlation IDs for distributed tracing:
|
|||||||
| **Deserialization** | `test/test_julia_text_receiver.jl` | `test/test_js_text_receiver.js` | `test/test_py_text_receiver.py` |
|
| **Deserialization** | `test/test_julia_text_receiver.jl` | `test/test_js_text_receiver.js` | `test/test_py_text_receiver.py` |
|
||||||
| **Large Payload** | `test/test_julia_file_sender.jl` | `test/test_js_file_sender.js` | `test/test_py_file_sender.py` |
|
| **Large Payload** | `test/test_julia_file_sender.jl` | `test/test_js_file_sender.js` | `test/test_py_file_sender.py` |
|
||||||
| **Multi-Payload** | `test/test_julia_mix_payloads_sender.jl` | `test/test_js_mix_payloads_sender.js` | `test/test_py_mix_payloads_sender.py` |
|
| **Multi-Payload** | `test/test_julia_mix_payloads_sender.jl` | `test/test_js_mix_payloads_sender.js` | `test/test_py_mix_payloads_sender.py` |
|
||||||
|
| **Arrow Table** | `test/test_julia_table_sender.jl` | `test/test_js_table_sender.js` | `test/test_py_table_sender.py` |
|
||||||
|
|
||||||
### Integration Tests
|
### Integration Tests
|
||||||
|
|
||||||
- NATS server communication
|
- NATS server communication
|
||||||
- File server upload/download
|
- File server upload/download
|
||||||
- Cross-platform message exchange
|
- Cross-platform message exchange
|
||||||
|
- Arrow table serialization/deserialization
|
||||||
|
- JSON table serialization/deserialization
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -1134,6 +1307,16 @@ This cross-platform NATS bridge provides:
|
|||||||
- Python: Class-based design with type hints
|
- Python: Class-based design with type hints
|
||||||
3. **Message Format Consistency**: Identical `msg_envelope_v1` and `msg_payload_v1` JSON schemas
|
3. **Message Format Consistency**: Identical `msg_envelope_v1` and `msg_payload_v1` JSON schemas
|
||||||
4. **Handler Abstraction**: File server operations abstracted through configurable handlers
|
4. **Handler Abstraction**: File server operations abstracted through configurable handlers
|
||||||
5. **Platform-Specific Optimizations**: Arrow IPC support in desktop platforms, streaming support in MicroPython
|
5. **Platform-Specific Optimizations**:
|
||||||
|
- **Arrow IPC** (`arrowtable`): Efficient binary format for large tabular data
|
||||||
|
- **JSON** (`jsontable`): Universal human-readable format for smaller tables
|
||||||
|
- Streaming support in MicroPython
|
||||||
|
|
||||||
The Julia implementation serves as the **ground truth** for API design and behavior, while JavaScript and Python implementations maintain interface parity while leveraging their respective language idioms.
|
The Julia implementation serves as the **ground truth** for API design and behavior, while JavaScript and Python implementations maintain interface parity while leveraging their respective language idioms.
|
||||||
|
|
||||||
|
### Datatype Summary
|
||||||
|
|
||||||
|
| Datatype | Serialization | Use Case | Encoding |
|
||||||
|
|----------|---------------|----------|----------|
|
||||||
|
| `arrowtable` | Apache Arrow IPC | Large tabular data, schema-preserving | `arrow-ipc` → `base64` |
|
||||||
|
| `jsontable` | JSON | Small/medium tabular data, human-readable | `json` → `base64` |
|
||||||
|
|||||||
@@ -177,7 +177,8 @@ The system uses a **standardized list-of-tuples format** for all payload operati
|
|||||||
|------|-------|------------|--------|-------------|
|
|------|-------|------------|--------|-------------|
|
||||||
| `text` | `String` | `string` | `str` | `str` |
|
| `text` | `String` | `string` | `str` | `str` |
|
||||||
| `dictionary` | `Dict`, `NamedTuple` | `Object`, `Array` | `dict`, `list` | `dict` |
|
| `dictionary` | `Dict`, `NamedTuple` | `Object`, `Array` | `dict`, `list` | `dict` |
|
||||||
| `table` | `DataFrame`, `Arrow.Table` | `Array<Object>` (input) → `Buffer` (Arrow IPC) | `pandas.DataFrame`, `bytes` (Arrow IPC) | ❌ (not supported) |
|
| `arrowtable` | `DataFrame`, `Arrow.Table` | `Array<Object>` (input) → `Buffer` (Arrow IPC) | `pandas.DataFrame`, `bytes` (Arrow IPC) | ❌ (not supported) |
|
||||||
|
| `jsontable` | `Vector{NamedTuple}`, `Vector{Dict}` | `Array<Object>` | `list[dict]`, `list` | `list` |
|
||||||
| `image` | `Vector{UInt8}` | `Uint8Array`, `Buffer` | `bytes` | `bytearray` |
|
| `image` | `Vector{UInt8}` | `Uint8Array`, `Buffer` | `bytes` | `bytearray` |
|
||||||
| `audio` | `Vector{UInt8}` | `Uint8Array`, `Buffer` | `bytes` | `bytearray` |
|
| `audio` | `Vector{UInt8}` | `Uint8Array`, `Buffer` | `bytes` | `bytearray` |
|
||||||
| `video` | `Vector{UInt8}` | `Uint8Array`, `Buffer` | `bytes` | `bytearray` |
|
| `video` | `Vector{UInt8}` | `Uint8Array`, `Buffer` | `bytes` | `bytearray` |
|
||||||
@@ -201,7 +202,7 @@ env, env_json_str = smartsend(
|
|||||||
# Multiple payloads with different types
|
# Multiple payloads with different types
|
||||||
env, env_json_str = smartsend(
|
env, env_json_str = smartsend(
|
||||||
"/test",
|
"/test",
|
||||||
[("dataname1", data1, "dictionary"), ("dataname2", data2, "table")],
|
[("dataname1", data1, "dictionary"), ("dataname2", data2, "arrowtable")],
|
||||||
broker_url="nats://localhost:4222"
|
broker_url="nats://localhost:4222"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -245,7 +246,7 @@ const [env, env_json_str] = await NATSBridge.smartsend(
|
|||||||
"/test",
|
"/test",
|
||||||
[
|
[
|
||||||
["dataname1", data1, "dictionary"],
|
["dataname1", data1, "dictionary"],
|
||||||
["dataname2", data2, "table"]
|
["dataname2", data2, "arrowtable"]
|
||||||
],
|
],
|
||||||
{ broker_url: "nats://localhost:4222" }
|
{ broker_url: "nats://localhost:4222" }
|
||||||
);
|
);
|
||||||
@@ -288,7 +289,7 @@ env, env_json_str = await NATSBridge.smartsend(
|
|||||||
# Multiple payloads
|
# Multiple payloads
|
||||||
env, env_json_str = await NATSBridge.smartsend(
|
env, env_json_str = await NATSBridge.smartsend(
|
||||||
"/test",
|
"/test",
|
||||||
[("dataname1", data1, "dictionary"), ("dataname2", data2, "table")],
|
[("dataname1", data1, "dictionary"), ("dataname2", data2, "arrowtable")],
|
||||||
broker_url="nats://localhost:4222"
|
broker_url="nats://localhost:4222"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -334,6 +335,160 @@ env, env_json_str = NATSBridge.smartsend(
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
## Row-Oriented vs Column-Oriented Data Structures
|
||||||
|
|
||||||
|
Different platforms use different internal representations for tabular data. Understanding these differences is crucial for proper serialization/deserialization when using `jsontable` and `arrowtable` datatypes.
|
||||||
|
|
||||||
|
### Data Structure Comparison
|
||||||
|
|
||||||
|
| Platform | Table Structure | Orientation |
|
||||||
|
|----------|-----------------|-------------|
|
||||||
|
| **Julia (DataFrame)** | `Dict{String, Vector}` | Column-oriented |
|
||||||
|
| **Python (pandas)** | `dict[str, list]` | Column-oriented |
|
||||||
|
| **JavaScript** | `Array<Object>` | Row-oriented |
|
||||||
|
| **MicroPython** | `list[list]` | Row-oriented |
|
||||||
|
|
||||||
|
### Column-Oriented (Julia DataFrame, Python pandas)
|
||||||
|
|
||||||
|
In column-oriented structures, each column is stored as a separate array/vector:
|
||||||
|
|
||||||
|
**Julia Example:**
|
||||||
|
```julia
|
||||||
|
# Create dictionary with column vectors
|
||||||
|
dict = Dict("customer age" => [15, 20, 25],
|
||||||
|
"first name" => ["Rohit", "Rahul", "Akshat"])
|
||||||
|
|
||||||
|
# Convert to DataFrame
|
||||||
|
df = DataFrame(dict)
|
||||||
|
println(df)
|
||||||
|
# Output:
|
||||||
|
# 3×2 DataFrame
|
||||||
|
# Row ┆ customer age ┆ first name
|
||||||
|
# ┆ Int64 ┆ String
|
||||||
|
# ─────┼──────────────┼────────────
|
||||||
|
# 1 ┆ 15 ┆ "Rohit"
|
||||||
|
# 2 ┆ 20 ┆ "Rahul"
|
||||||
|
# 3 ┆ 25 ┆ "Akshat"
|
||||||
|
```
|
||||||
|
|
||||||
|
**Python Example:**
|
||||||
|
```python
|
||||||
|
# Create dictionary with column lists
|
||||||
|
data = {
|
||||||
|
"Name": ["Alice", "Bob", "Charlie"],
|
||||||
|
"Age": [25, 30, 35],
|
||||||
|
"Score": [88.5, 92.0, 79.5]
|
||||||
|
}
|
||||||
|
|
||||||
|
# Convert to DataFrame
|
||||||
|
df = pd.DataFrame(data)
|
||||||
|
print(df)
|
||||||
|
# Output:
|
||||||
|
# Name Age Score
|
||||||
|
# 0 Alice 25 88.5
|
||||||
|
# 1 Bob 30 92.0
|
||||||
|
# 2 Charlie 35 79.5
|
||||||
|
```
|
||||||
|
|
||||||
|
### Row-Oriented (JavaScript, MicroPython)
|
||||||
|
|
||||||
|
In row-oriented structures, each row is stored as a separate object/array:
|
||||||
|
|
||||||
|
**JavaScript Example:**
|
||||||
|
```javascript
|
||||||
|
// Array of objects (row-oriented)
|
||||||
|
const users = [
|
||||||
|
{ Name: "Alice", Age: 25, Score: 88.5 },
|
||||||
|
{ Name: "Bob", Age: 30, Score: 92.0 },
|
||||||
|
{ Name: "Charlie", Age: 35, Score: 79.5 }
|
||||||
|
];
|
||||||
|
```
|
||||||
|
|
||||||
|
**MicroPython Example:**
|
||||||
|
```python
|
||||||
|
# List of lists (row-oriented)
|
||||||
|
users = [
|
||||||
|
["Alice", 25, 88.5],
|
||||||
|
["Bob", 30, 92.0],
|
||||||
|
["Charlie", 35, 79.5]
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
### Cross-Platform Conversion for jsontable
|
||||||
|
|
||||||
|
When sending `jsontable` across platforms, the system performs automatic conversion between row-oriented and column-oriented formats:
|
||||||
|
|
||||||
|
**Sending from Julia/Python (column-oriented) to JS/MicroPython (row-oriented):**
|
||||||
|
1. Convert column-oriented dict to row-oriented array of objects
|
||||||
|
2. Serialize to JSON
|
||||||
|
3. Send with `payload_type = "jsontable"`
|
||||||
|
|
||||||
|
**Receiving from JS/MicroPython (row-oriented) to Julia/Python (column-oriented):**
|
||||||
|
1. Deserialize JSON to row-oriented array of objects
|
||||||
|
2. Convert to column-oriented dict
|
||||||
|
3. Create DataFrame from column-oriented dict
|
||||||
|
|
||||||
|
**Example: Julia to JavaScript**
|
||||||
|
```julia
|
||||||
|
# Julia side - column-oriented DataFrame
|
||||||
|
df = DataFrame(
|
||||||
|
"Name" => ["Alice", "Bob", "Charlie"],
|
||||||
|
"Age" => [25, 30, 35],
|
||||||
|
"Score" => [88.5, 92.0, 79.5]
|
||||||
|
)
|
||||||
|
|
||||||
|
# smartsend automatically converts to row-oriented JSON
|
||||||
|
env, env_json_str = smartsend(
|
||||||
|
"/data",
|
||||||
|
[("users", df, "jsontable")]
|
||||||
|
)
|
||||||
|
# JSON sent: [{"Name":"Alice","Age":25,"Score":88.5}, ...]
|
||||||
|
```
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
// JavaScript side - receives row-oriented array
|
||||||
|
const [env, env_json_str] = await NATSBridge.smartsend(
|
||||||
|
"/data",
|
||||||
|
[["users", users, "jsontable"]]
|
||||||
|
);
|
||||||
|
// users is already row-oriented: [{Name: "Alice", Age: 25, ...}, ...]
|
||||||
|
```
|
||||||
|
|
||||||
|
**Example: JavaScript to Julia**
|
||||||
|
```javascript
|
||||||
|
// JavaScript side - row-oriented array
|
||||||
|
const users = [
|
||||||
|
{ Name: "Alice", Age: 25, Score: 88.5 },
|
||||||
|
{ Name: "Bob", Age: 30, Score: 92.0 }
|
||||||
|
];
|
||||||
|
|
||||||
|
const [env, env_json_str] = await NATSBridge.smartsend(
|
||||||
|
"/data",
|
||||||
|
[["users", users, "jsontable"]]
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
```julia
|
||||||
|
# Julia side - receives and converts to column-oriented DataFrame
|
||||||
|
env = smartreceive(msg; fileserver_download_handler=_fetch_with_backoff)
|
||||||
|
# The jsontable is automatically converted to DataFrame
|
||||||
|
for (dataname, data, type) in env["payloads"]
|
||||||
|
if type == "jsontable"
|
||||||
|
# data is now a DataFrame with column-oriented structure
|
||||||
|
println(data)
|
||||||
|
# Output:
|
||||||
|
# 2×3 DataFrame
|
||||||
|
# Row ┆ Name ┆ Age ┆ Score
|
||||||
|
# ┆ String ┆ Int64 ┆ Float64
|
||||||
|
# ─────┼────────┼──────┼───────
|
||||||
|
# 1 ┆ Alice ┆ 25 ┆ 88.5
|
||||||
|
# 2 ┆ Bob ┆ 30 ┆ 92.0
|
||||||
|
end
|
||||||
|
end
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
## Architecture
|
## Architecture
|
||||||
|
|
||||||
### Cross-Platform Claim-Check Pattern
|
### Cross-Platform Claim-Check Pattern
|
||||||
@@ -345,7 +500,7 @@ flowchart TD
|
|||||||
B -->|No | D[Link Path<br/><small>>= 1MB</small>]
|
B -->|No | D[Link Path<br/><small>>= 1MB</small>]
|
||||||
|
|
||||||
C --> C1[Serialize to Buffer]
|
C --> C1[Serialize to Buffer]
|
||||||
C1 --> C2[Base64 encode]
|
C1 --> C2[Base64/JSON encode]
|
||||||
C2 --> C3[Publish to NATS]
|
C2 --> C3[Publish to NATS]
|
||||||
|
|
||||||
D --> D1[Serialize to Buffer]
|
D --> D1[Serialize to Buffer]
|
||||||
@@ -426,20 +581,24 @@ Pkg.add("Dates")
|
|||||||
### JavaScript Dependencies (Node.js)
|
### JavaScript Dependencies (Node.js)
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
npm install nats uuid apache-arrow node-fetch
|
npm install nats apache-arrow node-fetch
|
||||||
# or
|
# or
|
||||||
yarn add nats uuid apache-arrow node-fetch
|
yarn add nats apache-arrow node-fetch
|
||||||
```
|
```
|
||||||
|
|
||||||
|
**Note:** Node.js has a built-in `crypto` module for UUID generation, so no external `uuid` package is needed.
|
||||||
|
|
||||||
### JavaScript Dependencies (Browser)
|
### JavaScript Dependencies (Browser)
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
npm install nats uuid apache-arrow
|
npm install nats apache-arrow
|
||||||
# or use CDN:
|
# or use CDN:
|
||||||
# https://unpkg.com/nats-js/dist/bundle/nats.min.js
|
# https://unpkg.com/nats-js/dist/bundle/nats.min.js
|
||||||
# https://unpkg.com/apache-arrow/arrow.min.js
|
# https://unpkg.com/apache-arrow/arrow.min.js
|
||||||
```
|
```
|
||||||
|
|
||||||
|
**Note:** For browser UUID generation, use the built-in `crypto.randomUUID()` API (available in modern browsers) or a lightweight alternative like `uuidv4` package.
|
||||||
|
|
||||||
### Python Dependencies (Desktop)
|
### Python Dependencies (Desktop)
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
@@ -592,7 +751,7 @@ function _serialize_data(data::Dict, payload_type::String)
|
|||||||
end
|
end
|
||||||
|
|
||||||
function _serialize_data(data::DataFrame, payload_type::String)
|
function _serialize_data(data::DataFrame, payload_type::String)
|
||||||
# Table handling
|
# Table handling - arrowtable
|
||||||
io = IOBuffer()
|
io = IOBuffer()
|
||||||
Arrow.write(io, data)
|
Arrow.write(io, data)
|
||||||
return take!(io)
|
return take!(io)
|
||||||
@@ -784,10 +943,16 @@ function _serialize_data(data::Any, payload_type::String)
|
|||||||
json_str = JSON.json(data)
|
json_str = JSON.json(data)
|
||||||
json_str_bytes = Vector{UInt8}(json_str)
|
json_str_bytes = Vector{UInt8}(json_str)
|
||||||
return json_str_bytes
|
return json_str_bytes
|
||||||
elseif payload_type == "table"
|
elseif payload_type == "arrowtable"
|
||||||
|
# Serialize DataFrame to Arrow IPC format
|
||||||
io = IOBuffer()
|
io = IOBuffer()
|
||||||
Arrow.write(io, data)
|
Arrow.write(io, data)
|
||||||
return take!(io)
|
return take!(io)
|
||||||
|
elseif payload_type == "jsontable"
|
||||||
|
# Convert column-oriented to row-oriented JSON
|
||||||
|
# data is Vector{NamedTuple} or Vector{Dict}
|
||||||
|
json_str = JSON.json(data)
|
||||||
|
return Vector{UInt8}(json_str)
|
||||||
elseif payload_type == "image"
|
elseif payload_type == "image"
|
||||||
if isa(data, Vector{UInt8})
|
if isa(data, Vector{UInt8})
|
||||||
return data
|
return data
|
||||||
@@ -833,10 +998,17 @@ function _deserialize_data(
|
|||||||
elseif payload_type == "dictionary"
|
elseif payload_type == "dictionary"
|
||||||
json_str = String(data)
|
json_str = String(data)
|
||||||
return JSON.parse(json_str)
|
return JSON.parse(json_str)
|
||||||
elseif payload_type == "table"
|
elseif payload_type == "arrowtable"
|
||||||
|
# Deserialize from Arrow IPC format
|
||||||
io = IOBuffer(data)
|
io = IOBuffer(data)
|
||||||
df = Arrow.Table(io)
|
arrow_table = Arrow.Table(io)
|
||||||
return df
|
return arrow_table
|
||||||
|
elseif payload_type == "jsontable"
|
||||||
|
# Deserialize from JSON format
|
||||||
|
# Returns Vector{NamedTuple} (column-oriented compatible)
|
||||||
|
json_str = String(data)
|
||||||
|
parsed = JSON.parse(json_str)
|
||||||
|
return parsed
|
||||||
elseif payload_type == "image"
|
elseif payload_type == "image"
|
||||||
return data
|
return data
|
||||||
elseif payload_type == "audio"
|
elseif payload_type == "audio"
|
||||||
@@ -931,9 +1103,12 @@ end
|
|||||||
```javascript
|
```javascript
|
||||||
// natsbridge.js
|
// natsbridge.js
|
||||||
const nats = require('nats');
|
const nats = require('nats');
|
||||||
const { v4: uuidv4 } = require('uuid');
|
const crypto = require('crypto');
|
||||||
const fetch = require('node-fetch');
|
const fetch = require('node-fetch');
|
||||||
|
|
||||||
|
// UUID generation using built-in crypto module
|
||||||
|
const uuidv4 = () => crypto.randomUUID();
|
||||||
|
|
||||||
const DEFAULT_SIZE_THRESHOLD = 1_000_000;
|
const DEFAULT_SIZE_THRESHOLD = 1_000_000;
|
||||||
const DEFAULT_BROKER_URL = 'nats://localhost:4222';
|
const DEFAULT_BROKER_URL = 'nats://localhost:4222';
|
||||||
const DEFAULT_FILESERVER_URL = 'http://localhost:8080';
|
const DEFAULT_FILESERVER_URL = 'http://localhost:8080';
|
||||||
@@ -984,10 +1159,13 @@ module.exports = {
|
|||||||
|
|
||||||
```javascript
|
```javascript
|
||||||
const nats = require('nats');
|
const nats = require('nats');
|
||||||
const { v4: uuidv4 } = require('uuid');
|
const crypto = require('crypto');
|
||||||
const fetch = require('node-fetch');
|
const fetch = require('node-fetch');
|
||||||
const arrow = require('apache-arrow');
|
const arrow = require('apache-arrow');
|
||||||
|
|
||||||
|
// UUID generation using built-in crypto module
|
||||||
|
const uuidv4 = () => crypto.randomUUID();
|
||||||
|
|
||||||
const DEFAULT_SIZE_THRESHOLD = 1_000_000;
|
const DEFAULT_SIZE_THRESHOLD = 1_000_000;
|
||||||
const DEFAULT_BROKER_URL = 'nats://localhost:4222';
|
const DEFAULT_BROKER_URL = 'nats://localhost:4222';
|
||||||
const DEFAULT_FILESERVER_URL = 'http://localhost:8080';
|
const DEFAULT_FILESERVER_URL = 'http://localhost:8080';
|
||||||
@@ -1108,21 +1286,36 @@ async function serializeData(data, payload_type) {
|
|||||||
} else if (payload_type === 'dictionary') {
|
} else if (payload_type === 'dictionary') {
|
||||||
const jsonStr = JSON.stringify(data);
|
const jsonStr = JSON.stringify(data);
|
||||||
return Buffer.from(jsonStr, 'utf8');
|
return Buffer.from(jsonStr, 'utf8');
|
||||||
} else if (payload_type === 'table') {
|
} else if (payload_type === 'arrowtable') {
|
||||||
// Convert to Arrow IPC
|
// Convert Array<Object> to Arrow IPC
|
||||||
const buffer = Buffer.alloc(1024 * 1024); // Pre-allocate buffer
|
// data is row-oriented: [{id: 1, name: "Alice"}, ...]
|
||||||
const writer = new arrow.RecordBatchWriter([
|
if (!Array.isArray(data) || data.length === 0) {
|
||||||
new arrow.Schema(Object.keys(data[0]).map(key => new arrow.Field(key, arrow.any())))
|
throw new Error('arrowtable data must be a non-empty array of objects');
|
||||||
]);
|
}
|
||||||
|
|
||||||
|
// Create schema from first row
|
||||||
|
const schemaFields = Object.keys(data[0]).map(key =>
|
||||||
|
new arrow.Field(key, arrow.any())
|
||||||
|
);
|
||||||
|
const schema = new arrow.Schema(schemaFields);
|
||||||
|
|
||||||
|
// Create writer
|
||||||
|
const writer = new arrow.RecordBatchWriter([schema]);
|
||||||
|
|
||||||
|
// Write rows
|
||||||
for (const row of data) {
|
for (const row of data) {
|
||||||
const recordBatch = arrow.recordBatch.fromObjects([row], writer.schema);
|
const recordBatch = arrow.recordBatch.fromObjects([row], schema);
|
||||||
writer.write(recordBatch);
|
writer.write(recordBatch);
|
||||||
}
|
}
|
||||||
await writer.close();
|
await writer.close();
|
||||||
|
|
||||||
// Read from the underlying buffer
|
// Read buffer
|
||||||
return buffer;
|
return writer.toBuffer();
|
||||||
|
} else if (payload_type === 'jsontable') {
|
||||||
|
// data is already row-oriented Array<Object>
|
||||||
|
// Serialize directly to JSON
|
||||||
|
const jsonStr = JSON.stringify(data);
|
||||||
|
return Buffer.from(jsonStr, 'utf8');
|
||||||
} else if (payload_type === 'image') {
|
} else if (payload_type === 'image') {
|
||||||
if (data instanceof Uint8Array || Buffer.isBuffer(data)) {
|
if (data instanceof Uint8Array || Buffer.isBuffer(data)) {
|
||||||
return Buffer.from(data);
|
return Buffer.from(data);
|
||||||
@@ -1168,10 +1361,15 @@ async function deserializeData(data, payload_type, correlation_id) {
|
|||||||
} else if (payload_type === 'dictionary') {
|
} else if (payload_type === 'dictionary') {
|
||||||
const jsonStr = Buffer.from(data).toString('utf8');
|
const jsonStr = Buffer.from(data).toString('utf8');
|
||||||
return JSON.parse(jsonStr);
|
return JSON.parse(jsonStr);
|
||||||
} else if (payload_type === 'table') {
|
} else if (payload_type === 'arrowtable') {
|
||||||
|
// Deserialize from Arrow IPC
|
||||||
const buffer = Buffer.from(data);
|
const buffer = Buffer.from(data);
|
||||||
const table = arrow.tableFromRawBytes(buffer);
|
const table = arrow.tableFromRawBytes(buffer);
|
||||||
return table;
|
return table;
|
||||||
|
} else if (payload_type === 'jsontable') {
|
||||||
|
// Deserialize from JSON - returns Array<Object> (row-oriented)
|
||||||
|
const jsonStr = Buffer.from(data).toString('utf8');
|
||||||
|
return JSON.parse(jsonStr);
|
||||||
} else if (payload_type === 'image') {
|
} else if (payload_type === 'image') {
|
||||||
return Buffer.from(data);
|
return Buffer.from(data);
|
||||||
} else if (payload_type === 'audio') {
|
} else if (payload_type === 'audio') {
|
||||||
@@ -1489,7 +1687,8 @@ from typing import Any
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
import pyarrow as arrow
|
import pyarrow as arrow
|
||||||
import pyarrow.parquet as pq
|
import pyarrow.feather as feather
|
||||||
|
import pyarrow.ipc as ipc
|
||||||
ARROW_AVAILABLE = True
|
ARROW_AVAILABLE = True
|
||||||
except ImportError:
|
except ImportError:
|
||||||
ARROW_AVAILABLE = False
|
ARROW_AVAILABLE = False
|
||||||
@@ -1505,22 +1704,27 @@ def _serialize_data(data: Any, payload_type: str) -> bytes:
|
|||||||
elif payload_type == 'dictionary':
|
elif payload_type == 'dictionary':
|
||||||
json_str = json.dumps(data)
|
json_str = json.dumps(data)
|
||||||
return json_str.encode('utf-8')
|
return json_str.encode('utf-8')
|
||||||
elif payload_type == 'table':
|
elif payload_type == 'arrowtable':
|
||||||
if not ARROW_AVAILABLE:
|
if not ARROW_AVAILABLE:
|
||||||
raise Error('pyarrow not available for table serialization')
|
raise Error('pyarrow not available for table serialization')
|
||||||
|
|
||||||
# Convert DataFrame to Arrow
|
|
||||||
import io
|
import io
|
||||||
buf = io.BytesIO()
|
buf = io.BytesIO()
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
if isinstance(data, pd.DataFrame):
|
if isinstance(data, pd.DataFrame):
|
||||||
|
# Column-oriented DataFrame to Arrow
|
||||||
table = arrow.Table.from_pandas(data)
|
table = arrow.Table.from_pandas(data)
|
||||||
sink = arrow.ipc.new_file(buf)
|
sink = arrow.ipc.new_file(buf)
|
||||||
arrow.ipc.write_table(table, sink)
|
arrow.ipc.write_table(table, sink)
|
||||||
sink.close()
|
sink.close()
|
||||||
return buf.getvalue()
|
return buf.getvalue()
|
||||||
else:
|
else:
|
||||||
raise Error('Table data must be a pandas DataFrame')
|
raise Error('arrowtable data must be a pandas DataFrame')
|
||||||
|
elif payload_type == 'jsontable':
|
||||||
|
# data is list[dict] or list (row-oriented)
|
||||||
|
# Serialize directly to JSON
|
||||||
|
json_str = json.dumps(data)
|
||||||
|
return json_str.encode('utf-8')
|
||||||
elif payload_type == 'image':
|
elif payload_type == 'image':
|
||||||
if isinstance(data, (bytes, bytearray)):
|
if isinstance(data, (bytes, bytearray)):
|
||||||
return bytes(data)
|
return bytes(data)
|
||||||
@@ -1554,6 +1758,8 @@ from typing import Any
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
import pyarrow as arrow
|
import pyarrow as arrow
|
||||||
|
import pyarrow.feather as feather
|
||||||
|
import pyarrow.ipc as ipc
|
||||||
ARROW_AVAILABLE = True
|
ARROW_AVAILABLE = True
|
||||||
except ImportError:
|
except ImportError:
|
||||||
ARROW_AVAILABLE = False
|
ARROW_AVAILABLE = False
|
||||||
@@ -1566,7 +1772,7 @@ def _deserialize_data(data: bytes, payload_type: str, correlation_id: str) -> An
|
|||||||
elif payload_type == 'dictionary':
|
elif payload_type == 'dictionary':
|
||||||
json_str = data.decode('utf-8')
|
json_str = data.decode('utf-8')
|
||||||
return json.loads(json_str)
|
return json.loads(json_str)
|
||||||
elif payload_type == 'table':
|
elif payload_type == 'arrowtable':
|
||||||
if not ARROW_AVAILABLE:
|
if not ARROW_AVAILABLE:
|
||||||
raise Error('pyarrow not available for table deserialization')
|
raise Error('pyarrow not available for table deserialization')
|
||||||
|
|
||||||
@@ -1574,6 +1780,10 @@ def _deserialize_data(data: bytes, payload_type: str, correlation_id: str) -> An
|
|||||||
buf = io.BytesIO(data)
|
buf = io.BytesIO(data)
|
||||||
reader = arrow.ipc.open_file(buf)
|
reader = arrow.ipc.open_file(buf)
|
||||||
return reader.read_all().to_pandas()
|
return reader.read_all().to_pandas()
|
||||||
|
elif payload_type == 'jsontable':
|
||||||
|
# Deserialize from JSON - returns list[dict] (row-oriented)
|
||||||
|
json_str = data.decode('utf-8')
|
||||||
|
return json.loads(json_str)
|
||||||
elif payload_type == 'image':
|
elif payload_type == 'image':
|
||||||
return data
|
return data
|
||||||
elif payload_type == 'audio':
|
elif payload_type == 'audio':
|
||||||
@@ -1684,7 +1894,8 @@ MicroPython has significant constraints compared to desktop implementations:
|
|||||||
| Arrow IPC | ✅ | ❌ (not supported) |
|
| Arrow IPC | ✅ | ❌ (not supported) |
|
||||||
| Async/Await | ✅ | ⚠️ (uasyncio only) |
|
| Async/Await | ✅ | ⚠️ (uasyncio only) |
|
||||||
| Large payloads (>1MB) | ✅ | ❌ (enforced limit) |
|
| Large payloads (>1MB) | ✅ | ❌ (enforced limit) |
|
||||||
| Table type | ✅ | ❌ |
|
| arrowtable | ✅ | ❌ |
|
||||||
|
| jsontable | ⚠️ (limited) | ⚠️ (limited) |
|
||||||
| Multiple payloads | ✅ | ⚠️ (limited) |
|
| Multiple payloads | ✅ | ⚠️ (limited) |
|
||||||
|
|
||||||
### MicroPython Module Structure
|
### MicroPython Module Structure
|
||||||
@@ -1704,6 +1915,9 @@ DEFAULT_BROKER_URL = "nats://localhost:4222"
|
|||||||
DEFAULT_FILESERVER_URL = "http://localhost:8080"
|
DEFAULT_FILESERVER_URL = "http://localhost:8080"
|
||||||
MAX_PAYLOAD_SIZE = 50000 # Hard limit
|
MAX_PAYLOAD_SIZE = 50000 # Hard limit
|
||||||
|
|
||||||
|
# Note: MicroPython uses list[list] for jsontable (row-oriented)
|
||||||
|
# No DataFrame support - data is always row-oriented
|
||||||
|
|
||||||
|
|
||||||
class NATSBridge:
|
class NATSBridge:
|
||||||
"""MicroPython NATS bridge implementation."""
|
"""MicroPython NATS bridge implementation."""
|
||||||
@@ -1811,11 +2025,14 @@ class NATSBridge:
|
|||||||
return env_json_obj
|
return env_json_obj
|
||||||
|
|
||||||
def _serialize_data(self, data, payload_type):
|
def _serialize_data(self, data, payload_type):
|
||||||
"""Serialize data (MicroPython version - no table support)."""
|
"""Serialize data (MicroPython version - no arrowtable support)."""
|
||||||
if payload_type == 'text':
|
if payload_type == 'text':
|
||||||
return data.encode('utf-8')
|
return data.encode('utf-8')
|
||||||
elif payload_type == 'dictionary':
|
elif payload_type == 'dictionary':
|
||||||
return json.dumps(data).encode('utf-8')
|
return json.dumps(data).encode('utf-8')
|
||||||
|
elif payload_type == 'jsontable':
|
||||||
|
# data is list[list] (row-oriented)
|
||||||
|
return json.dumps(data).encode('utf-8')
|
||||||
elif payload_type in ('image', 'audio', 'video', 'binary'):
|
elif payload_type in ('image', 'audio', 'video', 'binary'):
|
||||||
return bytes(data)
|
return bytes(data)
|
||||||
else:
|
else:
|
||||||
@@ -1827,6 +2044,9 @@ class NATSBridge:
|
|||||||
return data.decode('utf-8')
|
return data.decode('utf-8')
|
||||||
elif payload_type == 'dictionary':
|
elif payload_type == 'dictionary':
|
||||||
return json.loads(data.decode('utf-8'))
|
return json.loads(data.decode('utf-8'))
|
||||||
|
elif payload_type == 'jsontable':
|
||||||
|
# Returns list[list] (row-oriented)
|
||||||
|
return json.loads(data.decode('utf-8'))
|
||||||
elif payload_type in ('image', 'audio', 'video', 'binary'):
|
elif payload_type in ('image', 'audio', 'video', 'binary'):
|
||||||
return data
|
return data
|
||||||
else:
|
else:
|
||||||
@@ -1926,6 +2146,13 @@ All platforms use correlation IDs for distributed tracing:
|
|||||||
[timestamp] [Correlation: abc123] Message published to subject
|
[timestamp] [Correlation: abc123] Message published to subject
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Serialization Performance
|
||||||
|
|
||||||
|
| Format | Use Case | Pros | Cons |
|
||||||
|
|--------|----------|------|------|
|
||||||
|
| `arrowtable` | Large tabular data | Fast, zero-copy, schema-preserving | Binary format, requires Arrow library, not supported in MicroPython |
|
||||||
|
| `jsontable` | Small/medium tabular data | Human-readable, universal support, works in MicroPython | Slower, larger size, no schema enforcement |
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Testing
|
## Testing
|
||||||
@@ -1978,6 +2205,12 @@ python3 test/test_py_text_receiver.py
|
|||||||
- Reduce `size_threshold`
|
- Reduce `size_threshold`
|
||||||
- Use direct transport only (< 100KB)
|
- Use direct transport only (< 100KB)
|
||||||
- Avoid large payloads
|
- Avoid large payloads
|
||||||
|
- Use `jsontable` instead of `arrowtable` (arrowtable not supported)
|
||||||
|
|
||||||
|
5. **Row-Oriented vs Column-Oriented Conversion Issues**
|
||||||
|
- Julia/Python: DataFrames are column-oriented; when sending `jsontable`, they are converted to row-oriented JSON
|
||||||
|
- JavaScript/MicroPython: Data is natively row-oriented
|
||||||
|
- When receiving `jsontable` in Julia/Python, JSON is automatically converted back to column-oriented DataFrame
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -1993,6 +2226,16 @@ This cross-platform NATS bridge provides:
|
|||||||
- **MicroPython**: Synchronous API, memory-constrained optimizations
|
- **MicroPython**: Synchronous API, memory-constrained optimizations
|
||||||
3. **Message Format Consistency**: Identical JSON schemas across all platforms
|
3. **Message Format Consistency**: Identical JSON schemas across all platforms
|
||||||
4. **Handler Abstraction**: File server operations abstracted through configurable handlers
|
4. **Handler Abstraction**: File server operations abstracted through configurable handlers
|
||||||
5. **Platform-Specific Optimizations**: Arrow IPC in desktop platforms, streaming support in MicroPython
|
5. **Platform-Specific Optimizations**:
|
||||||
|
- **Arrow IPC** (`arrowtable`): Efficient binary format for large tabular data (not supported in MicroPython)
|
||||||
|
- **JSON** (`jsontable`): Universal human-readable format for smaller tables (works in all platforms)
|
||||||
|
6. **Row-Oriented ↔ Column-Oriented Conversion**: Automatic conversion between row-oriented (JS, MicroPython) and column-oriented (Julia DataFrame, Python pandas) formats when using `jsontable`
|
||||||
|
|
||||||
The Julia implementation in [`src/NATSBridge.jl`](src/NATSBridge.jl:1) serves as the ground truth for API design and behavior.
|
The Julia implementation in [`src/NATSBridge.jl`](src/NATSBridge.jl:1) serves as the ground truth for API design and behavior.
|
||||||
|
|
||||||
|
### Datatype Summary
|
||||||
|
|
||||||
|
| Datatype | Serialization | Use Case | Encoding | Supported Platforms |
|
||||||
|
|----------|---------------|----------|----------|---------------------|
|
||||||
|
| `arrowtable` | Apache Arrow IPC | Large tabular data, schema-preserving | `arrow-ipc` → `base64` | Julia, JavaScript, Python |
|
||||||
|
| `jsontable` | JSON | Small/medium tabular data, human-readable | `json` → `base64` | Julia, JavaScript, Python, MicroPython |
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -31,7 +31,15 @@
|
|||||||
# [(dataname1, data1, type1), (dataname2, data2, type2), ...]
|
# [(dataname1, data1, type1), (dataname2, data2, type2), ...]
|
||||||
# ```
|
# ```
|
||||||
#
|
#
|
||||||
# Supported types: "text", "dictionary", "table", "image", "audio", "video", "binary"
|
# Supported types: "text", "dictionary", "arrowtable", "jsontable", "image", "audio", "video", "binary"
|
||||||
|
#
|
||||||
|
# Table Datatypes:
|
||||||
|
# - `arrowtable`: Apache Arrow IPC format for efficient binary serialization
|
||||||
|
# - Input: DataFrame, Arrow.Table
|
||||||
|
# - Encoding: arrow-ipc
|
||||||
|
# - `jsontable`: JSON format for human-readable tabular data
|
||||||
|
# - Input: Vector{NamedTuple}, Vector{Dict} (column-oriented compatible)
|
||||||
|
# - Encoding: json
|
||||||
|
|
||||||
module NATSBridge
|
module NATSBridge
|
||||||
|
|
||||||
@@ -51,7 +59,7 @@ It supports both direct transport (base64-encoded data) and link transport (URL-
|
|||||||
# Arguments:
|
# Arguments:
|
||||||
- `id::String` - Unique identifier for this payload (e.g., "uuid4")
|
- `id::String` - Unique identifier for this payload (e.g., "uuid4")
|
||||||
- `dataname::String` - Name of the payload (e.g., "login_image")
|
- `dataname::String` - Name of the payload (e.g., "login_image")
|
||||||
- `payload_type::String` - Payload type: "text", "dictionary", "table", "image", "audio", "video", "binary"
|
- `payload_type::String` - Payload type: "text", "dictionary", "arrowtable", "jsontable", "image", "audio", "video", "binary"
|
||||||
- `transport::String` - Transport method: "direct" or "link"
|
- `transport::String` - Transport method: "direct" or "link"
|
||||||
- `encoding::String` - Encoding method: "none", "json", "base64", "arrow-ipc"
|
- `encoding::String` - Encoding method: "none", "json", "base64", "arrow-ipc"
|
||||||
- `size::Integer` - Size of the payload in bytes (e.g., 15433)
|
- `size::Integer` - Size of the payload in bytes (e.g., 15433)
|
||||||
@@ -100,7 +108,7 @@ payload = msg_payload_v1(
|
|||||||
struct msg_payload_v1
|
struct msg_payload_v1
|
||||||
id::String # id of this payload e.g. "uuid4"
|
id::String # id of this payload e.g. "uuid4"
|
||||||
dataname::String # name of this payload e.g. "login_image"
|
dataname::String # name of this payload e.g. "login_image"
|
||||||
payload_type::String # this payload type. Can be "text", "dictionary", "table", "image", "audio", "video", "binary"
|
payload_type::String # this payload type. Can be "text", "dictionary", "arrowtable", "jsontable", "image", "audio", "video", "binary"
|
||||||
transport::String # transport method: "direct" or "link"
|
transport::String # transport method: "direct" or "link"
|
||||||
encoding::String # encoding method: "none", "json", "base64", "arrow-ipc"
|
encoding::String # encoding method: "none", "json", "base64", "arrow-ipc"
|
||||||
size::Integer # data size in bytes e.g. 15433
|
size::Integer # data size in bytes e.g. 15433
|
||||||
@@ -363,7 +371,7 @@ Each payload can have a different type, enabling mixed-content messages (e.g., c
|
|||||||
- `data::AbstractArray{Tuple{String, Any, String}}` - List of (dataname, data, type) tuples to send
|
- `data::AbstractArray{Tuple{String, Any, String}}` - List of (dataname, data, type) tuples to send
|
||||||
- `dataname::String` - Name of the payload
|
- `dataname::String` - Name of the payload
|
||||||
- `data::Any` - The actual data to send
|
- `data::Any` - The actual data to send
|
||||||
- `payload_type::String` - Payload type: "text", "dictionary", "table", "image", "audio", "video", "binary"
|
- `payload_type::String` - Payload type: "text", "dictionary", "arrowtable", "jsontable", "image", "audio", "video", "binary"
|
||||||
- No standalone `type` parameter - type is specified per payload
|
- No standalone `type` parameter - type is specified per payload
|
||||||
|
|
||||||
# Keyword Arguments:
|
# Keyword Arguments:
|
||||||
@@ -399,11 +407,15 @@ env, msg_json = smartsend("my.subject", [("dataname1", data, "dictionary")])
|
|||||||
# Send multiple payloads in one message with different types
|
# Send multiple payloads in one message with different types
|
||||||
data1 = Dict("key1" => "value1")
|
data1 = Dict("key1" => "value1")
|
||||||
data2 = rand(10_000) # Small array
|
data2 = rand(10_000) # Small array
|
||||||
env, msg_json = smartsend("my.subject", [("dataname1", data1, "dictionary"), ("dataname2", data2, "table")])
|
env, msg_json = smartsend("my.subject", [("dataname1", data1, "dictionary"), ("dataname2", data2, "arrowtable")])
|
||||||
|
|
||||||
# Send a large array using fileserver upload
|
# Send a large array using fileserver upload
|
||||||
data = rand(10_000_000) # ~80 MB
|
data = rand(10_000_000) # ~80 MB
|
||||||
env, msg_json = smartsend("large.data", [("large_table", data, "table")])
|
env, msg_json = smartsend("large.data", [("large_arrow_table", data, "arrowtable")])
|
||||||
|
|
||||||
|
# Send jsontable (JSON format)
|
||||||
|
rows = [Dict("id" => 1, "name" => "Alice"), Dict("id" => 2, "name" => "Bob")]
|
||||||
|
env, msg_json = smartsend("json.data", [("users", rows, "jsontable")])
|
||||||
|
|
||||||
# Mixed content (e.g., chat with text and image)
|
# Mixed content (e.g., chat with text and image)
|
||||||
env, msg_json = smartsend("chat.subject", [
|
env, msg_json = smartsend("chat.subject", [
|
||||||
@@ -424,13 +436,12 @@ function smartsend(
|
|||||||
fileserver_upload_handler::Function = plik_oneshot_upload, # a function to handle uploading data to specific HTTP fileserver
|
fileserver_upload_handler::Function = plik_oneshot_upload, # a function to handle uploading data to specific HTTP fileserver
|
||||||
size_threshold::Int = DEFAULT_SIZE_THRESHOLD,
|
size_threshold::Int = DEFAULT_SIZE_THRESHOLD,
|
||||||
|
|
||||||
#=
|
# Generate a globally unique identifier (UUID) at the start of the request.
|
||||||
Generate a globally unique identifier (UUID) at the start of the request.
|
# This ID must remain constant and immutable as it propagates through every
|
||||||
This ID must remain constant and immutable as it propagates through every
|
# stage of the execution pipeline. It serves as the end-to-end ID for
|
||||||
stage of the execution pipeline. It serves as the end-to-end ID for
|
# distributed tracing, enabling the correlation of all logs, metrics, and
|
||||||
distributed tracing, enabling the correlation of all logs, metrics, and
|
# errors across the system back to this specific request instance.
|
||||||
errors across the system back to this specific request instance.
|
|
||||||
=#
|
|
||||||
correlation_id::String = string(uuid4()),
|
correlation_id::String = string(uuid4()),
|
||||||
|
|
||||||
msg_purpose::String = "chat",
|
msg_purpose::String = "chat",
|
||||||
@@ -463,6 +474,14 @@ function smartsend(
|
|||||||
payload_b64 = Base64.base64encode(payload_bytes) # Encode bytes as base64 string
|
payload_b64 = Base64.base64encode(payload_bytes) # Encode bytes as base64 string
|
||||||
log_trace(correlation_id, "Using direct transport for $payload_size bytes") # Log transport choice
|
log_trace(correlation_id, "Using direct transport for $payload_size bytes") # Log transport choice
|
||||||
|
|
||||||
|
# Determine encoding based on payload_type
|
||||||
|
encoding = "base64"
|
||||||
|
if payload_type == "jsontable"
|
||||||
|
encoding = "json"
|
||||||
|
elseif payload_type == "arrowtable"
|
||||||
|
encoding = "arrow-ipc"
|
||||||
|
end
|
||||||
|
|
||||||
# Create msg_payload_v1 for direct transport
|
# Create msg_payload_v1 for direct transport
|
||||||
payload = msg_payload_v1(
|
payload = msg_payload_v1(
|
||||||
payload_b64,
|
payload_b64,
|
||||||
@@ -470,7 +489,7 @@ function smartsend(
|
|||||||
id = string(uuid4()),
|
id = string(uuid4()),
|
||||||
dataname = dataname,
|
dataname = dataname,
|
||||||
transport = "direct",
|
transport = "direct",
|
||||||
encoding = "base64",
|
encoding = encoding,
|
||||||
size = payload_size,
|
size = payload_size,
|
||||||
metadata = Dict{String, Any}("payload_bytes" => payload_size)
|
metadata = Dict{String, Any}("payload_bytes" => payload_size)
|
||||||
)
|
)
|
||||||
@@ -489,6 +508,14 @@ function smartsend(
|
|||||||
url = response["url"] # URL for the uploaded data
|
url = response["url"] # URL for the uploaded data
|
||||||
log_trace(correlation_id, "Uploaded to URL: $url") # Log successful upload
|
log_trace(correlation_id, "Uploaded to URL: $url") # Log successful upload
|
||||||
|
|
||||||
|
# Determine encoding based on payload_type
|
||||||
|
encoding = "none"
|
||||||
|
if payload_type == "jsontable"
|
||||||
|
encoding = "json"
|
||||||
|
elseif payload_type == "arrowtable"
|
||||||
|
encoding = "arrow-ipc"
|
||||||
|
end
|
||||||
|
|
||||||
# Create msg_payload_v1 for link transport
|
# Create msg_payload_v1 for link transport
|
||||||
payload = msg_payload_v1(
|
payload = msg_payload_v1(
|
||||||
url,
|
url,
|
||||||
@@ -496,7 +523,7 @@ function smartsend(
|
|||||||
id = string(uuid4()),
|
id = string(uuid4()),
|
||||||
dataname = dataname,
|
dataname = dataname,
|
||||||
transport = "link",
|
transport = "link",
|
||||||
encoding = "none",
|
encoding = encoding,
|
||||||
size = payload_size,
|
size = payload_size,
|
||||||
metadata = Dict{String, Any}()
|
metadata = Dict{String, Any}()
|
||||||
)
|
)
|
||||||
@@ -543,12 +570,13 @@ It supports multiple serialization formats for different data types.
|
|||||||
2. Converts data to binary representation according to format rules
|
2. Converts data to binary representation according to format rules
|
||||||
3. For text: converts string to UTF-8 bytes
|
3. For text: converts string to UTF-8 bytes
|
||||||
4. For dictionary: serializes as JSON then converts to bytes
|
4. For dictionary: serializes as JSON then converts to bytes
|
||||||
5. For table: uses Arrow.jl to write as IPC stream
|
5. For arrowtable: uses Arrow.jl to write as IPC stream
|
||||||
6. For image/audio/video/binary: returns binary data directly
|
6. For jsontable: converts to JSON then to bytes
|
||||||
|
7. For image/audio/video/binary: returns binary data directly
|
||||||
|
|
||||||
# Arguments:
|
# Arguments:
|
||||||
- `data::Any` - Data to serialize (string for `"text"`, JSON-serializable for `"dictionary"`, table-like for `"table"`, binary for `"image"`, `"audio"`, `"video"`, `"binary"`)
|
- `data::Any` - Data to serialize (string for `"text"`, JSON-serializable for `"dictionary"`, table-like for `"arrowtable"`, Vector{NamedTuple}/Vector{Dict} for `"jsontable"`, binary for `"image"`, `"audio"`, `"video"`, `"binary"`)
|
||||||
- `payload_type::String` - Target format: "text", "dictionary", "table", "image", "audio", "video", "binary"
|
- `payload_type::String` - Target format: "text", "dictionary", "arrowtable", "jsontable", "image", "audio", "video", "binary"
|
||||||
|
|
||||||
# Return:
|
# Return:
|
||||||
- `Vector{UInt8}` - Binary representation of the serialized data
|
- `Vector{UInt8}` - Binary representation of the serialized data
|
||||||
@@ -569,9 +597,13 @@ text_bytes = _serialize_data(text_data, "text")
|
|||||||
json_data = Dict("name" => "Alice", "age" => 30)
|
json_data = Dict("name" => "Alice", "age" => 30)
|
||||||
json_bytes = _serialize_data(json_data, "dictionary")
|
json_bytes = _serialize_data(json_data, "dictionary")
|
||||||
|
|
||||||
# Table serialization with a DataFrame (recommended for tabular data)
|
# Arrow table serialization with a DataFrame (recommended for tabular data)
|
||||||
df = DataFrame(id = 1:3, name = ["Alice", "Bob", "Charlie"], score = [95, 88, 92])
|
df = DataFrame(id = 1:3, name = ["Alice", "Bob", "Charlie"], score = [95, 88, 92])
|
||||||
table_bytes = _serialize_data(df, "table")
|
arrow_bytes = _serialize_data(df, "arrowtable")
|
||||||
|
|
||||||
|
# JSON table serialization - Vector{NamedTuple} or Vector{Dict}
|
||||||
|
rows = [Dict("id" => 1, "name" => "Alice"), Dict("id" => 2, "name" => "Bob")]
|
||||||
|
json_bytes = _serialize_data(rows, "jsontable")
|
||||||
|
|
||||||
# Image data (Vector{UInt8})
|
# Image data (Vector{UInt8})
|
||||||
image_bytes = UInt8[1, 2, 3] # Image bytes
|
image_bytes = UInt8[1, 2, 3] # Image bytes
|
||||||
@@ -622,10 +654,30 @@ function _serialize_data(data::Any, payload_type::String)
|
|||||||
json_str = JSON.json(data) # Convert Julia data to JSON string
|
json_str = JSON.json(data) # Convert Julia data to JSON string
|
||||||
json_str_bytes = Vector{UInt8}(json_str) # Convert JSON string to bytes
|
json_str_bytes = Vector{UInt8}(json_str) # Convert JSON string to bytes
|
||||||
return json_str_bytes
|
return json_str_bytes
|
||||||
elseif payload_type == "table" # Table data - convert to Arrow IPC stream
|
elseif payload_type == "arrowtable" # Arrow table data - convert to Arrow IPC stream
|
||||||
io = IOBuffer() # Create in-memory buffer
|
io = IOBuffer() # Create in-memory buffer
|
||||||
Arrow.write(io, data) # Write data as Arrow IPC stream to buffer
|
Arrow.write(io, data) # Write data as Arrow IPC stream to buffer
|
||||||
return take!(io) # Return the buffer contents as bytes
|
return take!(io) # Return the buffer contents as bytes
|
||||||
|
elseif payload_type == "jsontable" # JSON table data - convert to JSON
|
||||||
|
# data can be Vector{NamedTuple}, Vector{Dict}, or DataFrame
|
||||||
|
# If DataFrame, convert to Vector{Dict} first
|
||||||
|
if isa(data, DataFrame)
|
||||||
|
# Convert DataFrame to Vector{Dict} (row-oriented)
|
||||||
|
rows = []
|
||||||
|
for i in 1:nrow(data)
|
||||||
|
row_dict = Dict()
|
||||||
|
for col in names(data)
|
||||||
|
row_dict[String(col)] = data[i, col]
|
||||||
|
end
|
||||||
|
push!(rows, row_dict)
|
||||||
|
end
|
||||||
|
json_str = JSON.json(rows)
|
||||||
|
return Vector{UInt8}(json_str)
|
||||||
|
else
|
||||||
|
# Already Vector{NamedTuple} or Vector{Dict}
|
||||||
|
json_str = JSON.json(data)
|
||||||
|
return Vector{UInt8}(json_str)
|
||||||
|
end
|
||||||
elseif payload_type == "image" # Image data - treat as binary
|
elseif payload_type == "image" # Image data - treat as binary
|
||||||
if isa(data, Vector{UInt8})
|
if isa(data, Vector{UInt8})
|
||||||
return data # Return binary data directly
|
return data # Return binary data directly
|
||||||
@@ -881,24 +933,25 @@ end
|
|||||||
|
|
||||||
""" _deserialize_data - Deserialize bytes to data based on type
|
""" _deserialize_data - Deserialize bytes to data based on type
|
||||||
This internal function converts serialized bytes back to Julia data based on type.
|
This internal function converts serialized bytes back to Julia data based on type.
|
||||||
It handles "text" (string), "dictionary" (JSON deserialization), "table" (Arrow IPC deserialization),
|
It handles "text" (string), "dictionary" (JSON deserialization), "arrowtable" (Arrow IPC deserialization),
|
||||||
"image" (binary data), "audio" (binary data), "video" (binary data), and "binary" (binary data).
|
"jsontable" (JSON deserialization), "image" (binary data), "audio" (binary data), "video" (binary data), and "binary" (binary data).
|
||||||
|
|
||||||
# Function Workflow:
|
# Function Workflow:
|
||||||
1. Validates the data type against supported formats
|
1. Validates the data type against supported formats
|
||||||
2. Converts bytes to appropriate Julia data type based on format
|
2. Converts bytes to appropriate Julia data type based on format
|
||||||
3. For text: converts bytes to string
|
3. For text: converts bytes to string
|
||||||
4. For dictionary: converts bytes to JSON string then parses to Julia object
|
4. For dictionary: converts bytes to JSON string then parses to Julia object
|
||||||
5. For table: reads Arrow IPC format and returns DataFrame
|
5. For arrowtable: reads Arrow IPC format and returns Arrow.Table
|
||||||
6. For image/audio/video/binary: returns bytes directly
|
6. For jsontable: converts bytes to JSON string then parses to Vector{Dict}
|
||||||
|
7. For image/audio/video/binary: returns bytes directly
|
||||||
|
|
||||||
# Arguments:
|
# Arguments:
|
||||||
- `data::Vector{UInt8}` - Serialized data as bytes
|
- `data::Vector{UInt8}` - Serialized data as bytes
|
||||||
- `payload_type::String` - Data type ("text", "dictionary", "table", "image", "audio", "video", "binary")
|
- `payload_type::String` - Data type ("text", "dictionary", "arrowtable", "jsontable", "image", "audio", "video", "binary")
|
||||||
- `correlation_id::String` - Correlation ID for logging
|
- `correlation_id::String` - Correlation ID for logging
|
||||||
|
|
||||||
# Return:
|
# Return:
|
||||||
- Deserialized data (String for "text", DataFrame for "table", JSON data for "dictionary", bytes for "image", "audio", "video", "binary")
|
- Deserialized data (String for "text", Arrow.Table for "arrowtable", Vector{Dict} for "jsontable", JSON data for "dictionary", bytes for "image", "audio", "video", "binary")
|
||||||
|
|
||||||
# Throws:
|
# Throws:
|
||||||
- `Error` if `payload_type` is not one of the supported types
|
- `Error` if `payload_type` is not one of the supported types
|
||||||
@@ -913,9 +966,13 @@ text_data = _deserialize_data(text_bytes, "text", "correlation123")
|
|||||||
json_bytes = UInt8[123, 34, 110, 97, 109, 101, 34, 58, 34, 65, 108, 105, 99, 101, 125] # {"name":"Alice"}
|
json_bytes = UInt8[123, 34, 110, 97, 109, 101, 34, 58, 34, 65, 108, 105, 99, 101, 125] # {"name":"Alice"}
|
||||||
json_data = _deserialize_data(json_bytes, "dictionary", "correlation123")
|
json_data = _deserialize_data(json_bytes, "dictionary", "correlation123")
|
||||||
|
|
||||||
# Arrow IPC data (table)
|
# Arrow IPC data (arrowtable)
|
||||||
arrow_bytes = Vector{UInt8}([1, 2, 3]) # Arrow IPC bytes
|
arrow_bytes = Vector{UInt8}([1, 2, 3]) # Arrow IPC bytes
|
||||||
table_data = _deserialize_data(arrow_bytes, "table", "correlation123")
|
arrow_table = _deserialize_data(arrow_bytes, "arrowtable", "correlation123")
|
||||||
|
|
||||||
|
# JSON table data (jsontable)
|
||||||
|
json_table_bytes = UInt8[91, 123, 34, 105, 100, 34, 58, 49, 44, 34, 110, 97, 109, 101, 34, 58, 34, 65, 108, 105, 99, 101, 34, 125] # [{"id":1,"name":"Alice"}]
|
||||||
|
json_table = _deserialize_data(json_table_bytes, "jsontable", "correlation123")
|
||||||
```
|
```
|
||||||
"""
|
"""
|
||||||
function _deserialize_data(
|
function _deserialize_data(
|
||||||
@@ -928,10 +985,13 @@ function _deserialize_data(
|
|||||||
elseif payload_type == "dictionary" # JSON data - deserialize
|
elseif payload_type == "dictionary" # JSON data - deserialize
|
||||||
json_str = String(data) # Convert bytes to string
|
json_str = String(data) # Convert bytes to string
|
||||||
return JSON.parse(json_str) # Parse JSON string to JSON object
|
return JSON.parse(json_str) # Parse JSON string to JSON object
|
||||||
elseif payload_type == "table" # Table data - deserialize Arrow IPC stream
|
elseif payload_type == "arrowtable" # Arrow table data - deserialize Arrow IPC stream
|
||||||
io = IOBuffer(data) # Create buffer from bytes
|
io = IOBuffer(data) # Create buffer from bytes
|
||||||
df = Arrow.Table(io) # Read Arrow IPC format from buffer
|
table = Arrow.Table(io) # Read Arrow IPC format from buffer
|
||||||
return df # Return DataFrame
|
return table # Return Arrow.Table
|
||||||
|
elseif payload_type == "jsontable" # JSON table data - deserialize JSON
|
||||||
|
json_str = String(data) # Convert bytes to string
|
||||||
|
return JSON.parse(json_str) # Parse JSON string to Vector{Dict}
|
||||||
elseif payload_type == "image" # Image data - return binary
|
elseif payload_type == "image" # Image data - return binary
|
||||||
return data # Return bytes directly
|
return data # Return bytes directly
|
||||||
elseif payload_type == "audio" # Audio data - return binary
|
elseif payload_type == "audio" # Audio data - return binary
|
||||||
@@ -945,6 +1005,16 @@ function _deserialize_data(
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
function rows_to_columns_dict(rows::Vector{Dict{Symbol,Any}})
|
||||||
|
# Ensure rows is not empty
|
||||||
|
isempty(rows) && return Dict{Symbol,Vector{Any}}()
|
||||||
|
|
||||||
|
# Build column-oriented dictionary
|
||||||
|
return Dict(
|
||||||
|
key => [get(row, key, missing) for row in rows]
|
||||||
|
for key in keys(rows[1])
|
||||||
|
)
|
||||||
|
end
|
||||||
|
|
||||||
""" plik_oneshot_upload - Upload a single file to a plik server using one-shot mode
|
""" plik_oneshot_upload - Upload a single file to a plik server using one-shot mode
|
||||||
This function uploads a raw byte array to a plik server in one-shot mode (no upload session).
|
This function uploads a raw byte array to a plik server in one-shot mode (no upload session).
|
||||||
@@ -1106,18 +1176,4 @@ end
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
end # module
|
end # module
|
||||||
|
|||||||
Reference in New Issue
Block a user