From 6e2fccd04ef0989af597ce0fc7e9982c8b998a66 Mon Sep 17 00:00:00 2001 From: narawat Date: Sun, 8 Mar 2026 13:43:26 +0700 Subject: [PATCH] remove column oriented json --- docs/implementation.md | 183 +++-------------------------------------- src/NATSBridge.jl | 2 +- 2 files changed, 11 insertions(+), 174 deletions(-) diff --git a/docs/implementation.md b/docs/implementation.md index ed21567..b8d3ceb 100644 --- a/docs/implementation.md +++ b/docs/implementation.md @@ -335,160 +335,6 @@ env, env_json_str = NATSBridge.smartsend( --- -## Row-Oriented vs Column-Oriented Data Structures - -Different platforms use different internal representations for tabular data. Understanding these differences is crucial for proper serialization/deserialization when using `jsontable` and `arrowtable` datatypes. - -### Data Structure Comparison - -| Platform | Table Structure | Orientation | -|----------|-----------------|-------------| -| **Julia (DataFrame)** | `Dict{String, Vector}` | Column-oriented | -| **Python (pandas)** | `dict[str, list]` | Column-oriented | -| **JavaScript** | `Array` | Row-oriented | -| **MicroPython** | `list[list]` | Row-oriented | - -### Column-Oriented (Julia DataFrame, Python pandas) - -In column-oriented structures, each column is stored as a separate array/vector: - -**Julia Example:** -```julia -# Create dictionary with column vectors -dict = Dict("customer age" => [15, 20, 25], - "first name" => ["Rohit", "Rahul", "Akshat"]) - -# Convert to DataFrame -df = DataFrame(dict) -println(df) -# Output: -# 3×2 DataFrame -# Row ┆ customer age ┆ first name -# ┆ Int64 ┆ String -# ─────┼──────────────┼──────────── -# 1 ┆ 15 ┆ "Rohit" -# 2 ┆ 20 ┆ "Rahul" -# 3 ┆ 25 ┆ "Akshat" -``` - -**Python Example:** -```python -# Create dictionary with column lists -data = { - "Name": ["Alice", "Bob", "Charlie"], - "Age": [25, 30, 35], - "Score": [88.5, 92.0, 79.5] -} - -# Convert to DataFrame -df = pd.DataFrame(data) -print(df) -# Output: -# Name Age Score -# 0 Alice 25 88.5 -# 1 Bob 30 92.0 -# 2 Charlie 35 79.5 -``` - -### Row-Oriented (JavaScript, MicroPython) - -In row-oriented structures, each row is stored as a separate object/array: - -**JavaScript Example:** -```javascript -// Array of objects (row-oriented) -const users = [ - { Name: "Alice", Age: 25, Score: 88.5 }, - { Name: "Bob", Age: 30, Score: 92.0 }, - { Name: "Charlie", Age: 35, Score: 79.5 } -]; -``` - -**MicroPython Example:** -```python -# List of lists (row-oriented) -users = [ - ["Alice", 25, 88.5], - ["Bob", 30, 92.0], - ["Charlie", 35, 79.5] -] -``` - -### Cross-Platform Conversion for jsontable - -When sending `jsontable` across platforms, the system performs automatic conversion between row-oriented and column-oriented formats: - -**Sending from Julia/Python (column-oriented) to JS/MicroPython (row-oriented):** -1. Convert column-oriented dict to row-oriented array of objects -2. Serialize to JSON -3. Send with `payload_type = "jsontable"` - -**Receiving from JS/MicroPython (row-oriented) to Julia/Python (column-oriented):** -1. Deserialize JSON to row-oriented array of objects -2. Convert to column-oriented dict -3. Create DataFrame from column-oriented dict - -**Example: Julia to JavaScript** -```julia -# Julia side - column-oriented DataFrame -df = DataFrame( - "Name" => ["Alice", "Bob", "Charlie"], - "Age" => [25, 30, 35], - "Score" => [88.5, 92.0, 79.5] -) - -# smartsend automatically converts to row-oriented JSON -env, env_json_str = smartsend( - "/data", - [("users", df, "jsontable")] -) -# JSON sent: [{"Name":"Alice","Age":25,"Score":88.5}, ...] -``` - -```javascript -// JavaScript side - receives row-oriented array -const [env, env_json_str] = await NATSBridge.smartsend( - "/data", - [["users", users, "jsontable"]] -); -// users is already row-oriented: [{Name: "Alice", Age: 25, ...}, ...] -``` - -**Example: JavaScript to Julia** -```javascript -// JavaScript side - row-oriented array -const users = [ - { Name: "Alice", Age: 25, Score: 88.5 }, - { Name: "Bob", Age: 30, Score: 92.0 } -]; - -const [env, env_json_str] = await NATSBridge.smartsend( - "/data", - [["users", users, "jsontable"]] -); -``` - -```julia -# Julia side - receives and converts to column-oriented DataFrame -env = smartreceive(msg; fileserver_download_handler=_fetch_with_backoff) -# The jsontable is automatically converted to DataFrame -for (dataname, data, type) in env["payloads"] - if type == "jsontable" - # data is now a DataFrame with column-oriented structure - println(data) - # Output: - # 2×3 DataFrame - # Row ┆ Name ┆ Age ┆ Score - # ┆ String ┆ Int64 ┆ Float64 - # ─────┼────────┼──────┼─────── - # 1 ┆ Alice ┆ 25 ┆ 88.5 - # 2 ┆ Bob ┆ 30 ┆ 92.0 - end -end -``` - ---- - ## Architecture ### Cross-Platform Claim-Check Pattern @@ -949,7 +795,7 @@ function _serialize_data(data::Any, payload_type::String) Arrow.write(io, data) return take!(io) elseif payload_type == "jsontable" - # Convert column-oriented to row-oriented JSON + # Serialize to JSON # data is Vector{NamedTuple} or Vector{Dict} json_str = JSON.json(data) return Vector{UInt8}(json_str) @@ -1005,7 +851,7 @@ function _deserialize_data( return arrow_table elseif payload_type == "jsontable" # Deserialize from JSON format - # Returns Vector{NamedTuple} (column-oriented compatible) + # Returns Vector{NamedTuple} or Vector{Dict} json_str = String(data) parsed = JSON.parse(json_str) return parsed @@ -1288,7 +1134,6 @@ async function serializeData(data, payload_type) { return Buffer.from(jsonStr, 'utf8'); } else if (payload_type === 'arrowtable') { // Convert Array to Arrow IPC - // data is row-oriented: [{id: 1, name: "Alice"}, ...] if (!Array.isArray(data) || data.length === 0) { throw new Error('arrowtable data must be a non-empty array of objects'); } @@ -1312,7 +1157,6 @@ async function serializeData(data, payload_type) { // Read buffer return writer.toBuffer(); } else if (payload_type === 'jsontable') { - // data is already row-oriented Array // Serialize directly to JSON const jsonStr = JSON.stringify(data); return Buffer.from(jsonStr, 'utf8'); @@ -1367,7 +1211,7 @@ async function deserializeData(data, payload_type, correlation_id) { const table = arrow.tableFromRawBytes(buffer); return table; } else if (payload_type === 'jsontable') { - // Deserialize from JSON - returns Array (row-oriented) + // Deserialize from JSON - returns Array const jsonStr = Buffer.from(data).toString('utf8'); return JSON.parse(jsonStr); } else if (payload_type === 'image') { @@ -1712,7 +1556,7 @@ def _serialize_data(data: Any, payload_type: str) -> bytes: buf = io.BytesIO() import pandas as pd if isinstance(data, pd.DataFrame): - # Column-oriented DataFrame to Arrow + # Serialize DataFrame to Arrow table = arrow.Table.from_pandas(data) sink = arrow.ipc.new_file(buf) arrow.ipc.write_table(table, sink) @@ -1721,7 +1565,6 @@ def _serialize_data(data: Any, payload_type: str) -> bytes: else: raise Error('arrowtable data must be a pandas DataFrame') elif payload_type == 'jsontable': - # data is list[dict] or list (row-oriented) # Serialize directly to JSON json_str = json.dumps(data) return json_str.encode('utf-8') @@ -1781,7 +1624,7 @@ def _deserialize_data(data: bytes, payload_type: str, correlation_id: str) -> An reader = arrow.ipc.open_file(buf) return reader.read_all().to_pandas() elif payload_type == 'jsontable': - # Deserialize from JSON - returns list[dict] (row-oriented) + # Deserialize from JSON - returns list[dict] json_str = data.decode('utf-8') return json.loads(json_str) elif payload_type == 'image': @@ -1915,8 +1758,8 @@ DEFAULT_BROKER_URL = "nats://localhost:4222" DEFAULT_FILESERVER_URL = "http://localhost:8080" MAX_PAYLOAD_SIZE = 50000 # Hard limit -# Note: MicroPython uses list[list] for jsontable (row-oriented) -# No DataFrame support - data is always row-oriented +# Note: MicroPython uses list[list] for jsontable +# No DataFrame support class NATSBridge: @@ -2031,7 +1874,7 @@ class NATSBridge: elif payload_type == 'dictionary': return json.dumps(data).encode('utf-8') elif payload_type == 'jsontable': - # data is list[list] (row-oriented) + # Serialize list of lists to JSON return json.dumps(data).encode('utf-8') elif payload_type in ('image', 'audio', 'video', 'binary'): return bytes(data) @@ -2045,7 +1888,7 @@ class NATSBridge: elif payload_type == 'dictionary': return json.loads(data.decode('utf-8')) elif payload_type == 'jsontable': - # Returns list[list] (row-oriented) + # Returns list of lists return json.loads(data.decode('utf-8')) elif payload_type in ('image', 'audio', 'video', 'binary'): return data @@ -2207,11 +2050,6 @@ python3 test/test_py_text_receiver.py - Avoid large payloads - Use `jsontable` instead of `arrowtable` (arrowtable not supported) -5. **Row-Oriented vs Column-Oriented Conversion Issues** - - Julia/Python: DataFrames are column-oriented; when sending `jsontable`, they are converted to row-oriented JSON - - JavaScript/MicroPython: Data is natively row-oriented - - When receiving `jsontable` in Julia/Python, JSON is automatically converted back to column-oriented DataFrame - --- ## Summary @@ -2226,10 +2064,9 @@ This cross-platform NATS bridge provides: - **MicroPython**: Synchronous API, memory-constrained optimizations 3. **Message Format Consistency**: Identical JSON schemas across all platforms 4. **Handler Abstraction**: File server operations abstracted through configurable handlers -5. **Platform-Specific Optimizations**: +5. **Platform-Specific Optimizations**: - **Arrow IPC** (`arrowtable`): Efficient binary format for large tabular data (not supported in MicroPython) - **JSON** (`jsontable`): Universal human-readable format for smaller tables (works in all platforms) -6. **Row-Oriented ↔ Column-Oriented Conversion**: Automatic conversion between row-oriented (JS, MicroPython) and column-oriented (Julia DataFrame, Python pandas) formats when using `jsontable` The Julia implementation in [`src/NATSBridge.jl`](src/NATSBridge.jl:1) serves as the ground truth for API design and behavior. diff --git a/src/NATSBridge.jl b/src/NATSBridge.jl index a698e32..9a0976d 100644 --- a/src/NATSBridge.jl +++ b/src/NATSBridge.jl @@ -38,7 +38,7 @@ # - Input: DataFrame, Arrow.Table # - Encoding: arrow-ipc # - `jsontable`: JSON format for human-readable tabular data -# - Input: Vector{NamedTuple}, Vector{Dict} (column-oriented compatible) +# - Input: Vector{NamedTuple}, Vector{Dict} # - Encoding: json module NATSBridge