adding jsontable
This commit is contained in:
@@ -55,7 +55,8 @@ All three platforms expose the same high-level API:
|
||||
|------|-------|------------|-------------------|
|
||||
| `text` | `String` | `string` | `str` |
|
||||
| `dictionary` | `Dict`, `NamedTuple` | `Object`, `Array` | `dict`, `list` |
|
||||
| `table` | `DataFrame`, `Arrow.Table` | `Array<Object>` (input) → `Buffer` (Arrow IPC) | `pandas.DataFrame`, `bytes` (Arrow IPC) |
|
||||
| `arrowtable` | `DataFrame`, `Arrow.Table` | `Array<Object>` (input) → `Buffer` (Arrow IPC) | `pandas.DataFrame`, `bytes` (Arrow IPC) |
|
||||
| `jsontable` | `Vector{NamedTuple}`, `Vector{Dict}` | `Array<Object>` | `list[dict]`, `list` |
|
||||
| `image` | `Vector{UInt8}` | `Uint8Array`, `Buffer` | `bytes`, `bytearray` |
|
||||
| `audio` | `Vector{UInt8}` | `Uint8Array`, `Buffer` | `bytes`, `bytearray` |
|
||||
| `video` | `Vector{UInt8}` | `Uint8Array`, `Buffer` | `bytes`, `bytearray` |
|
||||
@@ -236,13 +237,23 @@ flowchart TB
|
||||
},
|
||||
{
|
||||
"id": "uuid4",
|
||||
"dataname": "large_table",
|
||||
"payload_type": "table",
|
||||
"dataname": "large_arrow_table",
|
||||
"payload_type": "arrowtable",
|
||||
"transport": "link",
|
||||
"encoding": "none",
|
||||
"encoding": "arrow-ipc",
|
||||
"size": 524288,
|
||||
"data": "http://localhost:8080/file/UPLOAD_ID/FILE_ID/data.arrow",
|
||||
"metadata": {}
|
||||
},
|
||||
{
|
||||
"id": "uuid4",
|
||||
"dataname": "json_table",
|
||||
"payload_type": "jsontable",
|
||||
"transport": "direct",
|
||||
"encoding": "json",
|
||||
"size": 1024,
|
||||
"data": "[{\"id\": 1, \"name\": \"Alice\"}, {\"id\": 2, \"name\": \"Bob\"}]",
|
||||
"metadata": {}
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -255,11 +266,11 @@ flowchart TB
|
||||
{
|
||||
"id": "uuid4",
|
||||
"dataname": "login_image",
|
||||
"payload_type": "image | dictionary | table | text | audio | video | binary",
|
||||
"payload_type": "image | dictionary | arrowtable | jsontable | text | audio | video | binary",
|
||||
"transport": "direct | link",
|
||||
"encoding": "none | json | base64 | arrow-ipc",
|
||||
"size": 15433,
|
||||
"data": "base64-encoded-string | http-url",
|
||||
"data": "base64-encoded-string | http-url | json-string",
|
||||
"metadata": {
|
||||
"checksum": "sha256_hash"
|
||||
}
|
||||
@@ -278,25 +289,25 @@ flowchart TB
|
||||
▼
|
||||
┌─────────────────────────────────────────────────────────────┐
|
||||
│ For each payload: │
|
||||
│ 1. Extract type from tuple/array │
|
||||
│ 1. Extract type from tuple/array │
|
||||
│ 2. Serialize based on type │
|
||||
│ 3. Check payload size │
|
||||
└─────────────────────────────────────────────────────────────┘
|
||||
│
|
||||
┌───────────┴────────────┐
|
||||
▼ ▼
|
||||
┌──────────────┐ ┌──────────────┐
|
||||
│ Direct Path │ │ Link Path │
|
||||
│ (< 1MB) │ │ (>= 1MB) │
|
||||
│ │ │ │
|
||||
│ • Serialize │ │ • Serialize │
|
||||
│ to buffer │ │ to buffer │
|
||||
│ • Base64 │ │ • Upload to │
|
||||
│ encode │ │ HTTP Server│
|
||||
│ • Publish to │ │ • Publish to │
|
||||
│ NATS │ │ NATS with │
|
||||
│ (in msg) │ │ URL │
|
||||
└──────────────┘ └──────────────┘
|
||||
┌───────────┴────────────┐
|
||||
▼ ▼
|
||||
┌──────────────┐ ┌──────────────┐
|
||||
│ Direct Path │ │ Link Path │
|
||||
│ (< 1MB) │ │ (>= 1MB) │
|
||||
│ │ │ │
|
||||
│ • Serialize │ │ • Serialize │
|
||||
│ to buffer │ │ to buffer │
|
||||
│ • Base64/JSON│ │ • Upload to │
|
||||
│ encode │ │ HTTP Server│
|
||||
│ • Publish to │ │ • Publish to │
|
||||
│ NATS │ │ NATS with │
|
||||
│ (in msg) │ │ URL │
|
||||
└──────────────┘ └──────────────┘
|
||||
```
|
||||
|
||||
---
|
||||
@@ -422,6 +433,41 @@ function smartreceive(
|
||||
)::JSON.Object{String, Any}
|
||||
```
|
||||
|
||||
#### Serialization Logic for Tables
|
||||
|
||||
```julia
|
||||
# Serialize table data based on payload_type
|
||||
function _serialize_table_data(data::Any, payload_type::String)::Vector{UInt8}
|
||||
if payload_type == "arrowtable"
|
||||
# Serialize to Apache Arrow IPC format
|
||||
buffer = IOBuffer()
|
||||
Arrow.write(buffer, data)
|
||||
return take!(buffer)
|
||||
elseif payload_type == "jsontable"
|
||||
# Serialize to JSON format
|
||||
json_str = JSON.json(data)
|
||||
return Vector{UInt8}(json_str)
|
||||
else
|
||||
throw(ArgumentError("Unknown payload_type: $payload_type"))
|
||||
end
|
||||
end
|
||||
|
||||
# Deserialize table data based on payload_type
|
||||
function _deserialize_table_data(data::Vector{UInt8}, payload_type::String)::Any
|
||||
if payload_type == "arrowtable"
|
||||
# Deserialize from Apache Arrow IPC format
|
||||
buffer = Buffer(data)
|
||||
return Arrow.read(buffer)
|
||||
elseif payload_type == "jsontable"
|
||||
# Deserialize from JSON format
|
||||
json_str = String(data)
|
||||
return JSON.parse(json_str)
|
||||
else
|
||||
throw(ArgumentError("Unknown payload_type: $payload_type"))
|
||||
end
|
||||
end
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### JavaScript Implementation
|
||||
@@ -541,7 +587,7 @@ class NATSClient {
|
||||
| Package | Purpose |
|
||||
|---------|---------|
|
||||
| `nats` | Core NATS functionality (nats.js) |
|
||||
| `uuid` | UUID generation |
|
||||
| `crypto` (built-in) | UUID generation (Node.js) |
|
||||
| `node-fetch` or `axios` | HTTP client for file server |
|
||||
| `apache-arrow` | Arrow IPC serialization |
|
||||
|
||||
@@ -550,7 +596,7 @@ class NATSClient {
|
||||
| Package | Purpose |
|
||||
|---------|---------|
|
||||
| `nats` | Browser-compatible NATS client |
|
||||
| `uuid` | UUID generation |
|
||||
| `crypto` (built-in) | UUID generation (browser) |
|
||||
| `fetch` (native) | HTTP client for file server |
|
||||
| `apache-arrow` | Arrow IPC serialization |
|
||||
|
||||
@@ -615,6 +661,43 @@ async function fetchWithBackoff(url, max_retries, base_delay, max_delay, correla
|
||||
}
|
||||
```
|
||||
|
||||
#### Serialization Logic for Tables
|
||||
|
||||
```javascript
|
||||
// Serialize table data based on payload_type
|
||||
async function serializeTableData(data, payload_type) {
|
||||
if (payload_type === "arrowtable") {
|
||||
// Serialize to Apache Arrow IPC format
|
||||
const schema = new arrow.Schema([...]); // Define schema
|
||||
const arr = arrow.tableToArrowTable(data, schema);
|
||||
const buffer = arrow.RecordBatch.from(arr).toBuffer();
|
||||
return new Uint8Array(buffer);
|
||||
} else if (payload_type === "jsontable") {
|
||||
// Serialize to JSON format
|
||||
const jsonStr = JSON.stringify(data);
|
||||
return new TextEncoder().encode(jsonStr);
|
||||
} else {
|
||||
throw new Error(`Unknown payload_type: ${payload_type}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Deserialize table data based on payload_type
|
||||
async function deserializeTableData(data, payload_type) {
|
||||
if (payload_type === "arrowtable") {
|
||||
// Deserialize from Apache Arrow IPC format
|
||||
const buffer = arrow.arrayBufferToBuffer(data.buffer);
|
||||
const batch = arrow.RecordBatch.deserialize(buffer);
|
||||
return arrow.tableFromBatch(batch);
|
||||
} else if (payload_type === "jsontable") {
|
||||
// Deserialize from JSON format
|
||||
const jsonStr = new TextDecoder().decode(data);
|
||||
return JSON.parse(jsonStr);
|
||||
} else {
|
||||
throw new Error(`Unknown payload_type: ${payload_type}`);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Python/MicroPython Implementation
|
||||
@@ -906,6 +989,56 @@ async def fetch_with_backoff(
|
||||
pass
|
||||
```
|
||||
|
||||
#### Serialization Logic for Tables
|
||||
|
||||
```python
|
||||
# Serialize table data based on payload_type
|
||||
def serialize_table_data(data: Any, payload_type: str) -> bytes:
|
||||
if payload_type == "arrowtable":
|
||||
# Serialize to Apache Arrow IPC format
|
||||
import pyarrow as pa
|
||||
import pyarrow.feather as feather
|
||||
import io
|
||||
|
||||
if isinstance(data, pd.DataFrame):
|
||||
table = pa.Table.from_pandas(data)
|
||||
buffer = io.BytesIO()
|
||||
feather.write_feather(table, buffer)
|
||||
return buffer.getvalue()
|
||||
else:
|
||||
raise TypeError("Expected pandas DataFrame for arrowtable")
|
||||
|
||||
elif payload_type == "jsontable":
|
||||
# Serialize to JSON format
|
||||
if isinstance(data, list) and all(isinstance(row, dict) for row in data):
|
||||
return json.dumps(data).encode('utf-8')
|
||||
else:
|
||||
raise TypeError("Expected list of dicts for jsontable")
|
||||
|
||||
else:
|
||||
raise ValueError(f"Unknown payload_type: {payload_type}")
|
||||
|
||||
# Deserialize table data based on payload_type
|
||||
def deserialize_table_data(data: bytes, payload_type: str) -> Any:
|
||||
if payload_type == "arrowtable":
|
||||
# Deserialize from Apache Arrow IPC format
|
||||
import pyarrow as pa
|
||||
import pyarrow.feather as feather
|
||||
import io
|
||||
|
||||
buffer = io.BytesIO(data)
|
||||
table = feather.read_table(buffer)
|
||||
return table.to_pandas()
|
||||
|
||||
elif payload_type == "jsontable":
|
||||
# Deserialize from JSON format
|
||||
json_str = data.decode('utf-8')
|
||||
return json.loads(json_str)
|
||||
|
||||
else:
|
||||
raise ValueError(f"Unknown payload_type: {payload_type}")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Platform Comparison Matrix
|
||||
@@ -917,6 +1050,9 @@ async def fetch_with_backoff(
|
||||
| **Type Safety** | ✅ Strong | ⚠️ (TypeScript) | ✅ (Type hints) | ❌ |
|
||||
| **Memory Management** | ✅ GC | ✅ GC | ✅ GC | ⚠️ (Manual) |
|
||||
| **Arrow IPC** | ✅ Native | ✅ (arrow package) | ✅ (pyarrow) | ❌ |
|
||||
| **JSON Serialization** | ✅ (JSON.jl) | ✅ (native) | ✅ (json) | ✅ (json) |
|
||||
| **arrowtable Support** | ✅ | ✅ | ✅ | ❌ |
|
||||
| **jsontable Support** | ✅ | ✅ | ✅ | ✅ |
|
||||
| **Direct Transport** | ✅ | ✅ | ✅ | ✅ |
|
||||
| **Link Transport** | ✅ | ✅ | ✅ | ⚠️ (Limited) |
|
||||
| **Handler Functions** | ✅ | ✅ | ✅ | ✅ |
|
||||
@@ -948,7 +1084,11 @@ function _serialize_data(data::Dict, payload_type::String)
|
||||
end
|
||||
|
||||
function _serialize_data(data::DataFrame, payload_type::String)
|
||||
# Table handling
|
||||
# Table handling - arrowtable
|
||||
end
|
||||
|
||||
function _serialize_data(data::Vector{NamedTuple}, payload_type::String)
|
||||
# Table handling - jsontable
|
||||
end
|
||||
```
|
||||
|
||||
@@ -979,7 +1119,7 @@ function generateUUID() {
|
||||
}
|
||||
|
||||
async function serializeData(data, payload_type) {
|
||||
// Serialization logic
|
||||
// Serialization logic for arrowtable and jsontable
|
||||
}
|
||||
```
|
||||
|
||||
@@ -1028,9 +1168,9 @@ def smartreceive(msg, **kwargs):
|
||||
|
||||
| Platform | Code |
|
||||
|----------|------|
|
||||
| **Julia** | ```julia<br>df = DataFrame(id=1:1000000, value=rand(1000000))<br>env, env_json_str = smartsend("analysis", [("table", df, "table")])``` |
|
||||
| **JavaScript** | ```javascript<br>const df = [{ id: 1, value: 0.5 }, ...];<br>[env, env_json_str] = await smartsend("analysis", [["table", df, "table"]]);``` |
|
||||
| **Python** | ```python<br>import pandas as pd<br>df = pd.DataFrame({"id": range(1000000), "value": np.random.rand(1000000)})<br>env, env_json_str = await smartsend("analysis", [("table", df, "table")])``` |
|
||||
| **Julia** | ```julia<br>df = DataFrame(id=1:1000000, value=rand(1000000))<br>env, env_json_str = smartsend("analysis", [("table_data", df, "arrowtable")])``` |
|
||||
| **JavaScript** | ```javascript<br>const df = [{ id: 1, value: 0.5 }, ...];<br>[env, env_json_str] = await smartsend("analysis", [["table_data", df, "arrowtable"]]);``` |
|
||||
| **Python** | ```python<br>import pandas as pd<br>df = pd.DataFrame({"id": range(1000000), "value": np.random.rand(1000000)})<br>env, env_json_str = await smartsend("analysis", [("table_data", df, "arrowtable")])``` |
|
||||
|
||||
### Scenario 3: Chat System (Multi-Payload)
|
||||
|
||||
@@ -1040,6 +1180,29 @@ def smartreceive(msg, **kwargs):
|
||||
| **JavaScript** | ```javascript<br>const chat = [["text", "Hello!", "text"], ["image", imgBuffer, "image"]];<br>[env, env_json_str] = await smartsend("chat", chat);``` |
|
||||
| **Python** | ```python<br>chat = [("text", "Hello!", "text"), ("image", img_bytes, "image")]<br>env, env_json_str = await smartsend("chat", chat)``` |
|
||||
|
||||
### Scenario 4: JSON Table Transfer (Cross-Platform)
|
||||
|
||||
| Platform | Code |
|
||||
|----------|------|
|
||||
| **Julia** | ```julia<br>rows = [Dict("id" => 1, "name" => "Alice"), Dict("id" => 2, "name" => "Bob")]<br>env, env_json_str = smartsend("data", [("users", rows, "jsontable")])``` |
|
||||
| **JavaScript** | ```javascript<br>const users = [{ id: 1, name: "Alice" }, { id: 2, name: "Bob" }];<br>[env, env_json_str] = await smartsend("data", [["users", users, "jsontable"]]);``` |
|
||||
| **Python** | ```python<br>users = [{"id": 1, "name": "Alice"}, {"id": 2, "name": "Bob"}]<br>env, env_json_str = await smartsend("data", [("users", users, "jsontable")])``` |
|
||||
|
||||
### Scenario 5: Smart Transport Selection
|
||||
|
||||
The `smartsend` function automatically selects the transport method based on payload size:
|
||||
|
||||
- **Direct Transport (< 1MB)**: Payload is serialized and embedded directly in the NATS message
|
||||
- `arrowtable`: Serialized to Arrow IPC, base64 encoded
|
||||
- `jsontable`: Serialized to JSON, base64 encoded
|
||||
- `dictionary`: Serialized to JSON, base64 encoded
|
||||
- `text`: Serialized to UTF-8, base64 encoded
|
||||
- `image/audio/video/binary`: Base64 encoded
|
||||
|
||||
- **Link Transport (>= 1MB)**: Payload is uploaded to HTTP file server, URL embedded in message
|
||||
- All types supported
|
||||
- Receiver downloads from URL and deserializes
|
||||
|
||||
---
|
||||
|
||||
## Performance Considerations (Cross-Platform)
|
||||
@@ -1080,6 +1243,13 @@ All platforms use correlation IDs for distributed tracing:
|
||||
[timestamp] [Correlation: abc123] Message published to subject
|
||||
```
|
||||
|
||||
### Serialization Performance Comparison
|
||||
|
||||
| Format | Use Case | Pros | Cons |
|
||||
|--------|----------|------|------|
|
||||
| `arrowtable` | Large tabular data | Fast, zero-copy, schema-preserving | Binary format, requires Arrow library |
|
||||
| `jsontable` | Small/medium tabular data | Human-readable, universal support | Slower, larger size, no schema |
|
||||
|
||||
---
|
||||
|
||||
## Testing Strategy (Cross-Platform)
|
||||
@@ -1092,12 +1262,15 @@ All platforms use correlation IDs for distributed tracing:
|
||||
| **Deserialization** | `test/test_julia_text_receiver.jl` | `test/test_js_text_receiver.js` | `test/test_py_text_receiver.py` |
|
||||
| **Large Payload** | `test/test_julia_file_sender.jl` | `test/test_js_file_sender.js` | `test/test_py_file_sender.py` |
|
||||
| **Multi-Payload** | `test/test_julia_mix_payloads_sender.jl` | `test/test_js_mix_payloads_sender.js` | `test/test_py_mix_payloads_sender.py` |
|
||||
| **Arrow Table** | `test/test_julia_table_sender.jl` | `test/test_js_table_sender.js` | `test/test_py_table_sender.py` |
|
||||
|
||||
### Integration Tests
|
||||
|
||||
- NATS server communication
|
||||
- File server upload/download
|
||||
- Cross-platform message exchange
|
||||
- Arrow table serialization/deserialization
|
||||
- JSON table serialization/deserialization
|
||||
|
||||
---
|
||||
|
||||
@@ -1134,6 +1307,16 @@ This cross-platform NATS bridge provides:
|
||||
- Python: Class-based design with type hints
|
||||
3. **Message Format Consistency**: Identical `msg_envelope_v1` and `msg_payload_v1` JSON schemas
|
||||
4. **Handler Abstraction**: File server operations abstracted through configurable handlers
|
||||
5. **Platform-Specific Optimizations**: Arrow IPC support in desktop platforms, streaming support in MicroPython
|
||||
5. **Platform-Specific Optimizations**:
|
||||
- **Arrow IPC** (`arrowtable`): Efficient binary format for large tabular data
|
||||
- **JSON** (`jsontable`): Universal human-readable format for smaller tables
|
||||
- Streaming support in MicroPython
|
||||
|
||||
The Julia implementation serves as the **ground truth** for API design and behavior, while JavaScript and Python implementations maintain interface parity while leveraging their respective language idioms.
|
||||
The Julia implementation serves as the **ground truth** for API design and behavior, while JavaScript and Python implementations maintain interface parity while leveraging their respective language idioms.
|
||||
|
||||
### Datatype Summary
|
||||
|
||||
| Datatype | Serialization | Use Case | Encoding |
|
||||
|----------|---------------|----------|----------|
|
||||
| `arrowtable` | Apache Arrow IPC | Large tabular data, schema-preserving | `arrow-ipc` → `base64` |
|
||||
| `jsontable` | JSON | Small/medium tabular data, human-readable | `json` → `base64` |
|
||||
|
||||
Reference in New Issue
Block a user