This time, I've come across a Python library called Construct, described as: a powerful declarative parser (and builder) for binary data. It's quickly proven to be a very powerful and expressive tool as I was able to rewrite an older binary schema (for Geometry) in just a while, without the necessity to involve any of the advanced techniques Construct offers.
The code snipped below shows the schema. Note the build_geometry wrapper, which builds data in two passes in order to obtain the total data size and individual offsets in the jump table. It's not the most elegant solution I could imagine but it's pretty straightforward. I especially like the concept of Anchor, which allows to capture the actual stream position, so one doesn't need to calculate offsets manually, which tends to be quite error prone. This is a huge helper!
Code: Select all
#!/usr/bin/env python
# /*
# (c) 2014 +++ Filip Stoklas, aka FipS, http://www.4FipS.com +++
# THIS CODE IS FREE - LICENSED UNDER THE MIT LICENSE
# ARTICLE URL: http://forums.4fips.com/viewtopic.php?f=3&t=1205
# */
from construct import *
Geometry = Struct("geometry",
Struct("header",
ULInt32("size"),
Const(Bytes("magic", 6), "FS-GEO"),
ULInt8("major_ver"),
ULInt8("minor_ver"),
),
Struct("jump_table",
ULInt32("vertex_format"),
ULInt32("vertex_data"),
),
Anchor("_anchor_vertex_format"),
Struct("vertex_format",
ULInt8("num_elems"),
Array(lambda ctx: ctx.num_elems,
Struct("elems",
Enum(ULInt8("type"),
float_4 = 0,
float_3 = 1,
float_2 = 2,
float_1 = 3,
uint8_4 = 4,
uint8_2 = 5,
uint8_3 = 6,
uint8_1 = 7,
),
Enum(ULInt8("semantics"),
position = 0,
color = 1,
normal = 2,
texcoord0 = 3,
texcoord1 = 4,
),
),
),
),
Anchor("_anchor_vertex_data"),
Struct("vertex_data",
ULInt32("num_bytes"),
Bytes("bytes", lambda ctx: ctx.num_bytes),
),
Anchor("_anchor_end"),
)
def build_geometry(container):
geom_data = Geometry.build(container)
geom = Geometry.parse(geom_data)
# 2nd pass: set size & offsets:
geom.header.size = geom._anchor_end
geom.jump_table.vertex_format = geom._anchor_vertex_format
geom.jump_table.vertex_data = geom._anchor_vertex_data
geom_data = Geometry.build(geom)
return geom_data
geom = build_geometry(Container(
header = Container(
size = 0, # set in the 2nd pass
magic = "FS-GEO",
major_ver = 1,
minor_ver = 0,
),
jump_table = Container(
vertex_format = 0, # set in the 2nd pass
vertex_data = 0, # set in the 2nd pass
),
_anchor_vertex_format = 0, # capture stream pos, set automatically
vertex_format = Container(
num_elems = 2,
elems = [
Container(type = "float_3", semantics = "position"),
Container(type = "uint8_4", semantics = "color"),
]
),
_anchor_vertex_data = 0, # capture stream pos, set automatically
vertex_data = Container(
num_bytes = 7,
bytes = "data...",
),
_anchor_end = 0, # capture stream pos, set automatically
))
print Geometry.parse(geom)
print "\nbinary dump:"
print " ".join("{:02X}".format(ord(c)) for c in geom)
Code: Select all
Container:
header = Container:
size = 36
magic = 'FS-GEO'
major_ver = 1
minor_ver = 0
jump_table = Container:
vertex_format = 20
vertex_data = 25
vertex_format = Container:
num_elems = 2
elems = [
Container:
type = 'float_3'
semantics = 'position'
Container:
type = 'uint8_4'
semantics = 'color'
]
vertex_data = Container:
num_bytes = 7
bytes = 'data...'
binary dump:
24 00 00 00 46 53 2D 47 45 4F 01 00 14 00 00 00 19 00 00 00 02 01 00 04 01 07 00 00 00 64 61 74 61 2E 2E 2E
Code: Select all
class geometry_view
{
public:
struct header
{
uint32_t size;
uint8_t magic[6];
uint8_t minor_ver;
uint8_t major_ver;
};
struct jump_table
{
uint32_t vertex_format;
uint32_t vertex_data;
};
struct vertex_element
{
enum type : uint8_t
{
float_4, float_3, float_2, float_1,
uint8_4, uint8_3, uint8_2, uint8_1,
};
enum semantics : uint8_t
{
position, color, normal, texcoord0, texcoord1
};
type type;
semantics sema;
};
explicit geometry_view(bytes_ref data);
const header & header() const { return *_header; }
const jump_table & jump_table() const { return *_jump_table; }
array_ref<vertex_element> vertex_elements() const { return _vertex_elems; }
bytes_ref vertex_bytes() const { return _vertex_bytes; }
private:
struct vertex_format
{
uint8_t num_elems;
const vertex_element elems[1];
};
struct vertex_data
{
uint32_t num_bytes;
uint8_t bytes[1];
};
const struct header *_header;
const struct jump_table *_jump_table;
const vertex_format *_vertex_format;
array_ref<vertex_element> _vertex_elems;
const vertex_data *_vertex_data;
bytes_ref _vertex_bytes;
};
Code: Select all
geometry_view::geometry_view(bytes_ref data):
_header(reinterpret_cast<const struct header *>(data.data())),
_jump_table(reinterpret_cast<const struct jump_table *>(data.data() + sizeof(struct header))),
_vertex_format(reinterpret_cast<const vertex_format *>(data.data() + _jump_table->vertex_format)),
_vertex_elems(_vertex_format->elems, _vertex_format->num_elems),
_vertex_data(reinterpret_cast<const vertex_data *>(data.data() + _jump_table->vertex_data)),
_vertex_bytes(_vertex_data->bytes, _vertex_data->num_bytes)
{
}