adding jsontable
This commit is contained in:
@@ -31,7 +31,15 @@
|
||||
# [(dataname1, data1, type1), (dataname2, data2, type2), ...]
|
||||
# ```
|
||||
#
|
||||
# Supported types: "text", "dictionary", "table", "image", "audio", "video", "binary"
|
||||
# Supported types: "text", "dictionary", "arrowtable", "jsontable", "image", "audio", "video", "binary"
|
||||
#
|
||||
# Table Datatypes:
|
||||
# - `arrowtable`: Apache Arrow IPC format for efficient binary serialization
|
||||
# - Input: DataFrame, Arrow.Table
|
||||
# - Encoding: arrow-ipc
|
||||
# - `jsontable`: JSON format for human-readable tabular data
|
||||
# - Input: Vector{NamedTuple}, Vector{Dict} (column-oriented compatible)
|
||||
# - Encoding: json
|
||||
|
||||
module NATSBridge
|
||||
|
||||
@@ -51,7 +59,7 @@ It supports both direct transport (base64-encoded data) and link transport (URL-
|
||||
# Arguments:
|
||||
- `id::String` - Unique identifier for this payload (e.g., "uuid4")
|
||||
- `dataname::String` - Name of the payload (e.g., "login_image")
|
||||
- `payload_type::String` - Payload type: "text", "dictionary", "table", "image", "audio", "video", "binary"
|
||||
- `payload_type::String` - Payload type: "text", "dictionary", "arrowtable", "jsontable", "image", "audio", "video", "binary"
|
||||
- `transport::String` - Transport method: "direct" or "link"
|
||||
- `encoding::String` - Encoding method: "none", "json", "base64", "arrow-ipc"
|
||||
- `size::Integer` - Size of the payload in bytes (e.g., 15433)
|
||||
@@ -100,7 +108,7 @@ payload = msg_payload_v1(
|
||||
struct msg_payload_v1
|
||||
id::String # id of this payload e.g. "uuid4"
|
||||
dataname::String # name of this payload e.g. "login_image"
|
||||
payload_type::String # this payload type. Can be "text", "dictionary", "table", "image", "audio", "video", "binary"
|
||||
payload_type::String # this payload type. Can be "text", "dictionary", "arrowtable", "jsontable", "image", "audio", "video", "binary"
|
||||
transport::String # transport method: "direct" or "link"
|
||||
encoding::String # encoding method: "none", "json", "base64", "arrow-ipc"
|
||||
size::Integer # data size in bytes e.g. 15433
|
||||
@@ -363,7 +371,7 @@ Each payload can have a different type, enabling mixed-content messages (e.g., c
|
||||
- `data::AbstractArray{Tuple{String, Any, String}}` - List of (dataname, data, type) tuples to send
|
||||
- `dataname::String` - Name of the payload
|
||||
- `data::Any` - The actual data to send
|
||||
- `payload_type::String` - Payload type: "text", "dictionary", "table", "image", "audio", "video", "binary"
|
||||
- `payload_type::String` - Payload type: "text", "dictionary", "arrowtable", "jsontable", "image", "audio", "video", "binary"
|
||||
- No standalone `type` parameter - type is specified per payload
|
||||
|
||||
# Keyword Arguments:
|
||||
@@ -399,11 +407,15 @@ env, msg_json = smartsend("my.subject", [("dataname1", data, "dictionary")])
|
||||
# Send multiple payloads in one message with different types
|
||||
data1 = Dict("key1" => "value1")
|
||||
data2 = rand(10_000) # Small array
|
||||
env, msg_json = smartsend("my.subject", [("dataname1", data1, "dictionary"), ("dataname2", data2, "table")])
|
||||
env, msg_json = smartsend("my.subject", [("dataname1", data1, "dictionary"), ("dataname2", data2, "arrowtable")])
|
||||
|
||||
# Send a large array using fileserver upload
|
||||
data = rand(10_000_000) # ~80 MB
|
||||
env, msg_json = smartsend("large.data", [("large_table", data, "table")])
|
||||
env, msg_json = smartsend("large.data", [("large_arrow_table", data, "arrowtable")])
|
||||
|
||||
# Send jsontable (JSON format)
|
||||
rows = [Dict("id" => 1, "name" => "Alice"), Dict("id" => 2, "name" => "Bob")]
|
||||
env, msg_json = smartsend("json.data", [("users", rows, "jsontable")])
|
||||
|
||||
# Mixed content (e.g., chat with text and image)
|
||||
env, msg_json = smartsend("chat.subject", [
|
||||
@@ -424,13 +436,12 @@ function smartsend(
|
||||
fileserver_upload_handler::Function = plik_oneshot_upload, # a function to handle uploading data to specific HTTP fileserver
|
||||
size_threshold::Int = DEFAULT_SIZE_THRESHOLD,
|
||||
|
||||
#=
|
||||
Generate a globally unique identifier (UUID) at the start of the request.
|
||||
This ID must remain constant and immutable as it propagates through every
|
||||
stage of the execution pipeline. It serves as the end-to-end ID for
|
||||
distributed tracing, enabling the correlation of all logs, metrics, and
|
||||
errors across the system back to this specific request instance.
|
||||
=#
|
||||
# Generate a globally unique identifier (UUID) at the start of the request.
|
||||
# This ID must remain constant and immutable as it propagates through every
|
||||
# stage of the execution pipeline. It serves as the end-to-end ID for
|
||||
# distributed tracing, enabling the correlation of all logs, metrics, and
|
||||
# errors across the system back to this specific request instance.
|
||||
|
||||
correlation_id::String = string(uuid4()),
|
||||
|
||||
msg_purpose::String = "chat",
|
||||
@@ -463,6 +474,14 @@ function smartsend(
|
||||
payload_b64 = Base64.base64encode(payload_bytes) # Encode bytes as base64 string
|
||||
log_trace(correlation_id, "Using direct transport for $payload_size bytes") # Log transport choice
|
||||
|
||||
# Determine encoding based on payload_type
|
||||
encoding = "base64"
|
||||
if payload_type == "jsontable"
|
||||
encoding = "json"
|
||||
elseif payload_type == "arrowtable"
|
||||
encoding = "arrow-ipc"
|
||||
end
|
||||
|
||||
# Create msg_payload_v1 for direct transport
|
||||
payload = msg_payload_v1(
|
||||
payload_b64,
|
||||
@@ -470,7 +489,7 @@ function smartsend(
|
||||
id = string(uuid4()),
|
||||
dataname = dataname,
|
||||
transport = "direct",
|
||||
encoding = "base64",
|
||||
encoding = encoding,
|
||||
size = payload_size,
|
||||
metadata = Dict{String, Any}("payload_bytes" => payload_size)
|
||||
)
|
||||
@@ -481,7 +500,7 @@ function smartsend(
|
||||
|
||||
# Upload to HTTP server
|
||||
response = fileserver_upload_handler(fileserver_url, dataname, payload_bytes)
|
||||
|
||||
|
||||
if response["status"] != 200 # Check if upload was successful
|
||||
error("Failed to upload data to fileserver: $(response["status"])") # Throw error if upload failed
|
||||
end
|
||||
@@ -489,6 +508,14 @@ function smartsend(
|
||||
url = response["url"] # URL for the uploaded data
|
||||
log_trace(correlation_id, "Uploaded to URL: $url") # Log successful upload
|
||||
|
||||
# Determine encoding based on payload_type
|
||||
encoding = "none"
|
||||
if payload_type == "jsontable"
|
||||
encoding = "json"
|
||||
elseif payload_type == "arrowtable"
|
||||
encoding = "arrow-ipc"
|
||||
end
|
||||
|
||||
# Create msg_payload_v1 for link transport
|
||||
payload = msg_payload_v1(
|
||||
url,
|
||||
@@ -496,7 +523,7 @@ function smartsend(
|
||||
id = string(uuid4()),
|
||||
dataname = dataname,
|
||||
transport = "link",
|
||||
encoding = "none",
|
||||
encoding = encoding,
|
||||
size = payload_size,
|
||||
metadata = Dict{String, Any}()
|
||||
)
|
||||
@@ -543,12 +570,13 @@ It supports multiple serialization formats for different data types.
|
||||
2. Converts data to binary representation according to format rules
|
||||
3. For text: converts string to UTF-8 bytes
|
||||
4. For dictionary: serializes as JSON then converts to bytes
|
||||
5. For table: uses Arrow.jl to write as IPC stream
|
||||
6. For image/audio/video/binary: returns binary data directly
|
||||
5. For arrowtable: uses Arrow.jl to write as IPC stream
|
||||
6. For jsontable: converts to JSON then to bytes
|
||||
7. For image/audio/video/binary: returns binary data directly
|
||||
|
||||
# Arguments:
|
||||
- `data::Any` - Data to serialize (string for `"text"`, JSON-serializable for `"dictionary"`, table-like for `"table"`, binary for `"image"`, `"audio"`, `"video"`, `"binary"`)
|
||||
- `payload_type::String` - Target format: "text", "dictionary", "table", "image", "audio", "video", "binary"
|
||||
- `data::Any` - Data to serialize (string for `"text"`, JSON-serializable for `"dictionary"`, table-like for `"arrowtable"`, Vector{NamedTuple}/Vector{Dict} for `"jsontable"`, binary for `"image"`, `"audio"`, `"video"`, `"binary"`)
|
||||
- `payload_type::String` - Target format: "text", "dictionary", "arrowtable", "jsontable", "image", "audio", "video", "binary"
|
||||
|
||||
# Return:
|
||||
- `Vector{UInt8}` - Binary representation of the serialized data
|
||||
@@ -569,9 +597,13 @@ text_bytes = _serialize_data(text_data, "text")
|
||||
json_data = Dict("name" => "Alice", "age" => 30)
|
||||
json_bytes = _serialize_data(json_data, "dictionary")
|
||||
|
||||
# Table serialization with a DataFrame (recommended for tabular data)
|
||||
# Arrow table serialization with a DataFrame (recommended for tabular data)
|
||||
df = DataFrame(id = 1:3, name = ["Alice", "Bob", "Charlie"], score = [95, 88, 92])
|
||||
table_bytes = _serialize_data(df, "table")
|
||||
arrow_bytes = _serialize_data(df, "arrowtable")
|
||||
|
||||
# JSON table serialization - Vector{NamedTuple} or Vector{Dict}
|
||||
rows = [Dict("id" => 1, "name" => "Alice"), Dict("id" => 2, "name" => "Bob")]
|
||||
json_bytes = _serialize_data(rows, "jsontable")
|
||||
|
||||
# Image data (Vector{UInt8})
|
||||
image_bytes = UInt8[1, 2, 3] # Image bytes
|
||||
@@ -622,10 +654,30 @@ function _serialize_data(data::Any, payload_type::String)
|
||||
json_str = JSON.json(data) # Convert Julia data to JSON string
|
||||
json_str_bytes = Vector{UInt8}(json_str) # Convert JSON string to bytes
|
||||
return json_str_bytes
|
||||
elseif payload_type == "table" # Table data - convert to Arrow IPC stream
|
||||
elseif payload_type == "arrowtable" # Arrow table data - convert to Arrow IPC stream
|
||||
io = IOBuffer() # Create in-memory buffer
|
||||
Arrow.write(io, data) # Write data as Arrow IPC stream to buffer
|
||||
return take!(io) # Return the buffer contents as bytes
|
||||
elseif payload_type == "jsontable" # JSON table data - convert to JSON
|
||||
# data can be Vector{NamedTuple}, Vector{Dict}, or DataFrame
|
||||
# If DataFrame, convert to Vector{Dict} first
|
||||
if isa(data, DataFrame)
|
||||
# Convert DataFrame to Vector{Dict} (row-oriented)
|
||||
rows = []
|
||||
for i in 1:nrow(data)
|
||||
row_dict = Dict()
|
||||
for col in names(data)
|
||||
row_dict[String(col)] = data[i, col]
|
||||
end
|
||||
push!(rows, row_dict)
|
||||
end
|
||||
json_str = JSON.json(rows)
|
||||
return Vector{UInt8}(json_str)
|
||||
else
|
||||
# Already Vector{NamedTuple} or Vector{Dict}
|
||||
json_str = JSON.json(data)
|
||||
return Vector{UInt8}(json_str)
|
||||
end
|
||||
elseif payload_type == "image" # Image data - treat as binary
|
||||
if isa(data, Vector{UInt8})
|
||||
return data # Return binary data directly
|
||||
@@ -881,24 +933,25 @@ end
|
||||
|
||||
""" _deserialize_data - Deserialize bytes to data based on type
|
||||
This internal function converts serialized bytes back to Julia data based on type.
|
||||
It handles "text" (string), "dictionary" (JSON deserialization), "table" (Arrow IPC deserialization),
|
||||
"image" (binary data), "audio" (binary data), "video" (binary data), and "binary" (binary data).
|
||||
It handles "text" (string), "dictionary" (JSON deserialization), "arrowtable" (Arrow IPC deserialization),
|
||||
"jsontable" (JSON deserialization), "image" (binary data), "audio" (binary data), "video" (binary data), and "binary" (binary data).
|
||||
|
||||
# Function Workflow:
|
||||
1. Validates the data type against supported formats
|
||||
2. Converts bytes to appropriate Julia data type based on format
|
||||
3. For text: converts bytes to string
|
||||
4. For dictionary: converts bytes to JSON string then parses to Julia object
|
||||
5. For table: reads Arrow IPC format and returns DataFrame
|
||||
6. For image/audio/video/binary: returns bytes directly
|
||||
5. For arrowtable: reads Arrow IPC format and returns Arrow.Table
|
||||
6. For jsontable: converts bytes to JSON string then parses to Vector{Dict}
|
||||
7. For image/audio/video/binary: returns bytes directly
|
||||
|
||||
# Arguments:
|
||||
- `data::Vector{UInt8}` - Serialized data as bytes
|
||||
- `payload_type::String` - Data type ("text", "dictionary", "table", "image", "audio", "video", "binary")
|
||||
- `payload_type::String` - Data type ("text", "dictionary", "arrowtable", "jsontable", "image", "audio", "video", "binary")
|
||||
- `correlation_id::String` - Correlation ID for logging
|
||||
|
||||
# Return:
|
||||
- Deserialized data (String for "text", DataFrame for "table", JSON data for "dictionary", bytes for "image", "audio", "video", "binary")
|
||||
- Deserialized data (String for "text", Arrow.Table for "arrowtable", Vector{Dict} for "jsontable", JSON data for "dictionary", bytes for "image", "audio", "video", "binary")
|
||||
|
||||
# Throws:
|
||||
- `Error` if `payload_type` is not one of the supported types
|
||||
@@ -913,9 +966,13 @@ text_data = _deserialize_data(text_bytes, "text", "correlation123")
|
||||
json_bytes = UInt8[123, 34, 110, 97, 109, 101, 34, 58, 34, 65, 108, 105, 99, 101, 125] # {"name":"Alice"}
|
||||
json_data = _deserialize_data(json_bytes, "dictionary", "correlation123")
|
||||
|
||||
# Arrow IPC data (table)
|
||||
# Arrow IPC data (arrowtable)
|
||||
arrow_bytes = Vector{UInt8}([1, 2, 3]) # Arrow IPC bytes
|
||||
table_data = _deserialize_data(arrow_bytes, "table", "correlation123")
|
||||
arrow_table = _deserialize_data(arrow_bytes, "arrowtable", "correlation123")
|
||||
|
||||
# JSON table data (jsontable)
|
||||
json_table_bytes = UInt8[91, 123, 34, 105, 100, 34, 58, 49, 44, 34, 110, 97, 109, 101, 34, 58, 34, 65, 108, 105, 99, 101, 34, 125] # [{"id":1,"name":"Alice"}]
|
||||
json_table = _deserialize_data(json_table_bytes, "jsontable", "correlation123")
|
||||
```
|
||||
"""
|
||||
function _deserialize_data(
|
||||
@@ -928,10 +985,13 @@ function _deserialize_data(
|
||||
elseif payload_type == "dictionary" # JSON data - deserialize
|
||||
json_str = String(data) # Convert bytes to string
|
||||
return JSON.parse(json_str) # Parse JSON string to JSON object
|
||||
elseif payload_type == "table" # Table data - deserialize Arrow IPC stream
|
||||
elseif payload_type == "arrowtable" # Arrow table data - deserialize Arrow IPC stream
|
||||
io = IOBuffer(data) # Create buffer from bytes
|
||||
df = Arrow.Table(io) # Read Arrow IPC format from buffer
|
||||
return df # Return DataFrame
|
||||
table = Arrow.Table(io) # Read Arrow IPC format from buffer
|
||||
return table # Return Arrow.Table
|
||||
elseif payload_type == "jsontable" # JSON table data - deserialize JSON
|
||||
json_str = String(data) # Convert bytes to string
|
||||
return JSON.parse(json_str) # Parse JSON string to Vector{Dict}
|
||||
elseif payload_type == "image" # Image data - return binary
|
||||
return data # Return bytes directly
|
||||
elseif payload_type == "audio" # Audio data - return binary
|
||||
@@ -945,6 +1005,16 @@ function _deserialize_data(
|
||||
end
|
||||
end
|
||||
|
||||
function rows_to_columns_dict(rows::Vector{Dict{Symbol,Any}})
|
||||
# Ensure rows is not empty
|
||||
isempty(rows) && return Dict{Symbol,Vector{Any}}()
|
||||
|
||||
# Build column-oriented dictionary
|
||||
return Dict(
|
||||
key => [get(row, key, missing) for row in rows]
|
||||
for key in keys(rows[1])
|
||||
)
|
||||
end
|
||||
|
||||
""" plik_oneshot_upload - Upload a single file to a plik server using one-shot mode
|
||||
This function uploads a raw byte array to a plik server in one-shot mode (no upload session).
|
||||
@@ -970,19 +1040,19 @@ retrieves an upload ID and token, then uploads the file data as multipart form d
|
||||
- `"url"` - Full URL to download the uploaded file
|
||||
|
||||
# Example
|
||||
```jldoctest
|
||||
using HTTP, JSON
|
||||
```jldoctest
|
||||
using HTTP, JSON
|
||||
|
||||
fileserver_url = "http://localhost:8080"
|
||||
dataname = "test.txt"
|
||||
data = Vector{UInt8}("hello world")
|
||||
fileserver_url = "http://localhost:8080"
|
||||
dataname = "test.txt"
|
||||
data = Vector{UInt8}("hello world")
|
||||
|
||||
# Upload to local plik server
|
||||
result = plik_oneshot_upload(file_server_url, dataname, data)
|
||||
# Upload to local plik server
|
||||
result = plik_oneshot_upload(file_server_url, dataname, data)
|
||||
|
||||
# Access the result as a Dict
|
||||
# result["status"], result["uploadid"], result["fileid"], result["url"]
|
||||
```
|
||||
# Access the result as a Dict
|
||||
# result["status"], result["uploadid"], result["fileid"], result["url"]
|
||||
```
|
||||
"""
|
||||
function plik_oneshot_upload(file_server_url::String, dataname::String, data::Vector{UInt8})
|
||||
|
||||
@@ -1106,18 +1176,4 @@ end
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
end # module
|
||||
|
||||
Reference in New Issue
Block a user